#!/usr/bin/env python3
#
# Copyright (c) 2016-2017 Weitian LI <weitian@aaronly.me>
# MIT license

"""
Retrieve the galaxy cluster catalog simulated by the *Hubble Volume Project*:

    http://wwwmpa.mpa-garching.mpg.de/galform/virgo/hubble/

The data used by this package is the *cluster catalog* of the *deep wedge*
assuming the *ΛCMD* lightcone geometry,
and can be downloaded from:

    http://www.mpa-garching.mpg.de/galform/virgo/hubble/lcdm.DW.tar.gz

The catalog data is downloaded, extracted, transformed, and finally saved
to a CSV file.


Catalog Description
-------------------
The Hubble Volume Project is a joint effort of the Virgo Consortium and
Collaborators in U.S., Canada, U.K., and Germany.  To study the formation
of clusters of galaxies, filaments and void-structures, a significant
fraction of the entire observable Universe is modeled and simulated by
employing one billion (1e9) mass particles. [Evard2002]_

The ΛCDM cosmological model, one of the two models adopted by the Project,
has the following parameters:

    Ω_m = 0.3, Ω_Λ = 0.7, h = 0.7, σ_8 = 0.9
    Cube side length: 3000 h^-1 Mpc
    Main slice: 3000 x 3000 x 30 h^-3 Mpc^3
    P^3M: z_init = 35, s = 100 h^-1 kpc
    1000^3 particles, 1024^3 mesh
    M_particle = 2.25e12 h^-1 M_⊙

The retrieved catalog of the galaxy clusters is derived from a *spherical
overdensity* method applied to the *deep wedge* light-cone particle data,
with overdensity threshold Δ=200.  The deep wedge lightcone covers
10x10 deg^2, with a maximum LoS distance of 5100 h^-1 Mpc, and a redshift
coverage limit of 4.37.  The coordinates used in the catalog are mapped
to 0-1 unit, and as for the deep wedge catalog, the origin is at (0, 0, 0),
and is directed toward (1, 1, 1).


References
----------
.. [Evard2002]
   Evard, A. E. et al.,
   "Galaxy Clusters in Hubble Volume Simulations: Cosmological Constraints
   from Sky Survey Populations",
   2002, ApJ, 573, 7-36,
   http://adsabs.harvard.edu/abs/2002ApJ...573....7E
"""


import os
import sys
import re
import argparse
import logging
import csv
import urllib.request
import tarfile

from fg21sim.configs import CONFIGS
from fg21sim.utils import setup_logging


# URL to the simulated galaxy cluster catalog data
DATA_URL = "http://www.mpa-garching.mpg.de/galform/virgo/hubble/lcdm.DW.tar.gz"


def main():
    outfile_default = "HVP_LCDM_DeepWedge_Catalog.csv"

    parser = argparse.ArgumentParser(
        description="Retrieve Simulated galaxy cluster catalog data")
    parser.add_argument("outfile", nargs="?", default=outfile_default,
                        help="output CSV file to save the catalog data " +
                        "(default: %s)" % outfile_default)
    parser.add_argument("-U", "--url", default=DATA_URL,
                        help="URL to Green's SNRs catalog summary page " +
                        "or a local HTML file (default: %s)" % DATA_URL)
    parser.add_argument("-C", "--clobber", action="store_true",
                        help="overwrite the existing output file")
    parser.add_argument("-l", "--log", dest="loglevel", default=None,
                        choices=["DEBUG", "INFO", "WARNING",
                                 "ERROR", "CRITICAL"],
                        help="set the log level")
    parser.add_argument("-L", "--logfile", default=None,
                        help="filename where to save the log messages")
    parser.add_argument("-Q", "--quiet", action="store_true",
                        help="be quiet so do not log messages to screen")
    args = parser.parse_args()

    log_stream = "" if args.quiet else None
    setup_logging(dict_config=CONFIGS.logging,
                  level=args.loglevel,
                  stream=log_stream,
                  logfile=args.logfile)
    tool = os.path.basename(sys.argv[0])
    logger = logging.getLogger(tool)
    logger.info("COMMAND: {0}".format(" ".join(sys.argv)))

    if os.path.exists(args.outfile) and (not args.clobber):
        raise IOError("output file already exists: %s" % args.outfile)

    basename = os.path.splitext(args.outfile)[0]
    fn_catalog = basename + ".tar.gz"
    fn_catalogtxt = basename + ".txt"

    logger.info("Downloading the catalog data from: {0}".format(args.url))
    urllib.request.urlretrieve(args.url, fn_catalog)
    logger.info("Done download the catalog as file: {0}".format(fn_catalog))

    logger.info("Extract the catalog data from the downloaded archive ...")
    tf = tarfile.open(fn_catalog)
    members = tf.getmembers()
    if len(members) != 1:
        raise ValueError("Catalog should contain only 1 file, but got %d" %
                         len(members))
    m0 = members[0]
    tf.extract(m0)
    tf.close()
    os.rename(m0.name, fn_catalogtxt)
    logger.info("Done extract catalog data to file: %s" % fn_catalogtxt)

    # Data column names
    header = ["m", "redshift", "sigma", "ip", "x", "y", "z", "vx", "vy", "vz"]
    with open(args.outfile, "w") as csvfile:
        csvwriter = csv.writer(csvfile)
        csvwriter.writerow(header)
        i = 0
        for line in open(fn_catalogtxt):
            if re.match(r"^\s*#|^\s*$", line):
                # Ignore comment and blank line
                continue
            values = line.split()
            if len(header) != len(values):
                raise ValueError("Invalid line: '{0}'".format(line))
            values = [
                int(values[0]),       # m: number of particles
                float(values[1]),     # redshift
                float(values[2]),     # sigma: measured 1D velocity dispersion
                int(values[3]),       # ip: parent flag
                float(values[4]),     # x: cluster X location in 0-1 units
                float(values[5]),     # y: cluster Y location in 0-1 units
                float(values[6]),     # z: cluster Z location in 0-1 units
                float(values[7]),     # vx: physical peculiar velocity [km/s]
                float(values[8]),     # vy: physical peculiar velocity [km/s]
                float(values[9]),     # vz: physical peculiar velocity [km/s]
            ]
            i += 1
            csvwriter.writerow(values)
    logger.info("Catalog data contains %d clusters" % i)
    logger.info("Cluster catalog data write to: %s" % args.outfile)


if __name__ == "__main__":
    main()