bin/fg21sim-download-cluster-data


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168

#!/usr/bin/env python3
#
# Copyright (c) 2016 Weitian LI <liweitianux@live.com>
# MIT license

"""
Retrieve the galaxy cluster catalog simulated by the *Hubble Volume Project*:

    http://wwwmpa.mpa-garching.mpg.de/galform/virgo/hubble/

The data used by this package is the *cluster catalog* of the *deep wedge*
assuming the *ΛCMD* lightcone geometry,
and can be downloaded from:

    http://www.mpa-garching.mpg.de/galform/virgo/hubble/lcdm.DW.tar.gz

The catalog data is downloaded, extracted, transformed, and finally saved
to a CSV file.


Catalog Description
-------------------
The Hubble Volume Project is a joint effort of the Virgo Consortium and
Collaborators in U.S., Canada, U.K., and Germany.  To study the formation
of clusters of galaxies, filaments and void-structures, a significant
fraction of the entire observable Universe is modeled and simulated by
employing one billion (1e9) mass particles. [Evard2002]_

The ΛCDM cosmological model, one of the two models adopted by the Project,
has the following parameters:

    Ω_m = 0.3, Ω_Λ = 0.7, h = 0.7, σ_8 = 0.9
    Cube side length: 3000 h^-1 Mpc
    Main slice: 3000 x 3000 x 30 h^-3 Mpc^3
    P^3M: z_init = 35, s = 100 h^-1 kpc
    1000^3 particles, 1024^3 mesh
    M_particle = 2.25e12 h^-1 M_⊙

The retrieved catalog of the galaxy clusters is derived from a *spherical
overdensity* method applied to the *deep wedge* light-cone particle data,
with overdensity threshold Δ=200.  The deep wedge lightcone covers
10x10 deg^2, with a maximum LoS distance of 5100 h^-1 Mpc, and a redshift
coverage limit of 4.37.  The coordinates used in the catalog are mapped
to 0-1 unit, and as for the deep wedge catalog, the origin is at (0, 0, 0),
and is directed toward (1, 1, 1).


References
----------
.. [Evard2002]
   Evard, A. E. et al.,
   "Galaxy Clusters in Hubble Volume Simulations: Cosmological Constraints
   from Sky Survey Populations",
   2002, ApJ, 573, 7-36,
   http://adsabs.harvard.edu/abs/2002ApJ...573....7E
"""


import os
import sys
import re
import argparse
import logging
import csv
import urllib.request
import tarfile

from fg21sim.configs import configs
from fg21sim.utils import setup_logging


# URL to the simulated galaxy cluster catalog data
DATA_URL = "http://www.mpa-garching.mpg.de/galform/virgo/hubble/lcdm.DW.tar.gz"


def main():
    outfile_default = "HVP_LCDM_DeepWedge_Catalog.csv"

    parser = argparse.ArgumentParser(
        description="Retrieve Simulated galaxy cluster catalog data")
    parser.add_argument("outfile", nargs="?", default=outfile_default,
                        help="output CSV file to save the catalog data " +
                        "(default: %s)" % outfile_default)
    parser.add_argument("-U", "--url", default=DATA_URL,
                        help="URL to Green's SNRs catalog summary page " +
                        "or a local HTML file (default: %s)" % DATA_URL)
    parser.add_argument("-C", "--clobber", action="store_true",
                        help="overwrite the existing output file")
    parser.add_argument("-l", "--log", dest="loglevel", default=None,
                        choices=["DEBUG", "INFO", "WARNING",
                                 "ERROR", "CRITICAL"],
                        help="set the log level")
    parser.add_argument("-L", "--logfile", default=None,
                        help="filename where to save the log messages")
    parser.add_argument("-Q", "--quiet", action="store_true",
                        help="be quiet so do not log messages to screen")
    args = parser.parse_args()

    if args.quiet:
        log_stream = ""
    else:
        log_stream = None

    tool = os.path.basename(sys.argv[0])

    setup_logging(dict_config=configs.logging,
                  level=args.loglevel,
                  stream=log_stream,
                  logfile=args.logfile)
    logger = logging.getLogger(tool)
    logger.info("COMMAND: {0}".format(" ".join(sys.argv)))

    if os.path.exists(args.outfile) and (not args.clobber):
        raise IOError("output file already exists: %s" % args.outfile)

    basename = os.path.splitext(args.outfile)[0]
    fn_catalog = basename + ".tar.gz"
    fn_catalogtxt = basename + ".txt"

    logger.info("Downloading the catalog data from: {0}".format(args.url))
    urllib.request.urlretrieve(args.url, fn_catalog)
    logger.info("Done download the catalog as file: {0}".format(fn_catalog))

    logger.info("Extract the catalog data from the downloaded archive ...")
    tf = tarfile.open(fn_catalog)
    members = tf.getmembers()
    if len(members) != 1:
        raise ValueError("Catalog should contain only 1 file, but got %d" %
                         len(members))
    m0 = members[0]
    tf.extract(m0)
    tf.close()
    os.rename(m0.name, fn_catalogtxt)
    logger.info("Done extract catalog data to file: %s" % fn_catalogtxt)

    # Data column names
    header = ["m", "redshift", "sigma", "ip", "x", "y", "z", "vx", "vy", "vz"]
    with open(args.outfile, "w") as csvfile:
        csvwriter = csv.writer(csvfile)
        csvwriter.writerow(header)
        i = 0
        for line in open(fn_catalogtxt):
            if re.match(r"^\s*#|^\s*$", line):
                # Ignore comment and blank line
                continue
            values = line.split()
            if len(header) != len(values):
                raise ValueError("Invalid line: '{0}'".format(line))
            values = [
                int(values[0]),       # m: number of particles
                float(values[1]),     # redshift
                float(values[2]),     # sigma: measured 1D velocity dispersion
                int(values[3]),       # ip: parent flag
                float(values[4]),     # x: cluster X location in 0-1 units
                float(values[5]),     # y: cluster Y location in 0-1 units
                float(values[6]),     # z: cluster Z location in 0-1 units
                float(values[7]),     # vx: physical peculiar velocity [km/s]
                float(values[8]),     # vy: physical peculiar velocity [km/s]
                float(values[9]),     # vz: physical peculiar velocity [km/s]
            ]
            i += 1
            csvwriter.writerow(values)
    logger.info("Catalog data contains %d clusters" % i)
    logger.info("Cluster catalog data write to: %s" % args.outfile)


if __name__ == "__main__":
    main()