aboutsummaryrefslogtreecommitdiffstats
path: root/fg21sim
diff options
context:
space:
mode:
authorAaron LI <aly@aaronly.me>2018-01-02 13:35:34 +0800
committerAaron LI <aly@aaronly.me>2018-01-02 13:35:34 +0800
commit34098c6a8b1008f4950e8524936406b6611d74f9 (patch)
tree1a2014f86cee90b21a2c6fc068a5a84ca3ea5685 /fg21sim
parent4a49490eb32398255dee3ee252e87aef626fb5d7 (diff)
downloadfg21sim-34098c6a8b1008f4950e8524936406b6611d74f9.tar.bz2
clusters: rework the reuse of previously dumped cluster catalog
Diffstat (limited to 'fg21sim')
-rw-r--r--fg21sim/configs/20-extragalactic.conf.spec17
-rw-r--r--fg21sim/configs/checkers.py5
-rw-r--r--fg21sim/extragalactic/clusters/main.py79
3 files changed, 65 insertions, 36 deletions
diff --git a/fg21sim/configs/20-extragalactic.conf.spec b/fg21sim/configs/20-extragalactic.conf.spec
index a75907c..02d7f00 100644
--- a/fg21sim/configs/20-extragalactic.conf.spec
+++ b/fg21sim/configs/20-extragalactic.conf.spec
@@ -60,12 +60,19 @@
# used by the following ``[[halos]]`` section.
#
[[clusters]]
- # Output CSV file of the clusters catalog containing the simulated
- # mass, redshift, position, shape, and the recent major merger info.
+ # Output CSV file of the cluster catalog containing the simulated
+ # mass, redshift, position, shape, recent merger info, etc.
catalog_outfile = string(default=None)
- # Whether to directly use the (previously simulated) catalog file
- # specified as the above "catalog_outfile" option?
+ # Whether to dump the raw data of the simulated cluster catalog in
+ # Python native pickle format (i.e., ".pkl") to a file with the same
+ # basename as the above ``catalog_outfile``?
+ # The dumped data can be easily loaded back for reuse.
+ dump_catalog_data = boolean(default=True)
+
+ # Whether to directly use the (previously simulated) catalog data as
+ # specified by the above "catalog_outfile" and ``dump_catalog_data``
+ # options?
# NOTE:
# By using an existing catalog, the steps to derive these data are
# simply skipped.
@@ -76,7 +83,7 @@
# tests and only create images at some frequencies necessary for
# testing, then select the satisfying one to continue the simulation
# to generate images at all frequencies.
- use_output_catalog = boolean(default=False)
+ use_dump_catalog_data = boolean(default=False)
# Output CSV file of the halos catalog containing the calculated
# properties of the simulated halos.
diff --git a/fg21sim/configs/checkers.py b/fg21sim/configs/checkers.py
index 78d5b20..9759ef1 100644
--- a/fg21sim/configs/checkers.py
+++ b/fg21sim/configs/checkers.py
@@ -197,8 +197,9 @@ def check_extragalactic_clusters(configs):
key = "extragalactic/psformalism/dndlnm_outfile"
results.update(_check_missing(configs, key))
# catalog required when enabled to use it
- if configs.get(comp+"/use_output_catalog"):
- results.update(_check_existence(configs, comp+"/catalog_outfile"))
+ if configs.get(comp+"/use_dump_catalog_data"):
+ results.update(_check_file_existence(
+ configs, comp+"/catalog_outfile", ext=".pkl"))
else:
results.update(_check_missing(configs, comp+"/catalog_outfile"))
# dumped halos data required when enabled to use it
diff --git a/fg21sim/extragalactic/clusters/main.py b/fg21sim/extragalactic/clusters/main.py
index da3190e..ab5a32e 100644
--- a/fg21sim/extragalactic/clusters/main.py
+++ b/fg21sim/extragalactic/clusters/main.py
@@ -17,14 +17,12 @@ import logging
from collections import OrderedDict
import numpy as np
-import pandas as pd
from .psformalism import PSFormalism
from .formation import ClusterFormation
from .halo import RadioHalo
from ...share import CONFIGS, COSMO
-from ...utils.io import (dataframe_to_csv, csv_to_dataframe,
- pickle_dump, pickle_load)
+from ...utils.io import dataframe_to_csv, pickle_dump, pickle_load
from ...utils.ds import dictlist_to_dataframe
from ...utils.convert import JyPerPix_to_K
from ...sky import get_sky
@@ -76,11 +74,11 @@ class GalaxyClusters:
"""
comp = self.compID
self.catalog_outfile = self.configs.get_path(comp+"/catalog_outfile")
- self.use_output_catalog = self.configs.getn(comp+"/use_output_catalog")
+ self.dump_catalog_data = self.configs.getn(comp+"/dump_catalog_data")
+ self.use_dump_catalog_data = self.configs.getn(
+ comp+"/use_dump_catalog_data")
self.halos_catalog_outfile = self.configs.get_path(
comp+"/halos_catalog_outfile")
- self.halos_data_dumpfile = os.path.splitext(
- self.halos_catalog_outfile)[0] + ".pkl"
self.dump_halos_data = self.configs.getn(comp+"/dump_halos_data")
self.use_dump_halos_data = self.configs.getn(
comp+"/use_dump_halos_data")
@@ -96,8 +94,8 @@ class GalaxyClusters:
self.clobber = self.configs.getn("output/clobber")
logger.info("Loaded and set up configurations")
- if self.use_dump_halos_data and (not self.use_output_catalog):
- self.use_output_catalog = True
+ if self.use_dump_halos_data and (not self.use_dump_catalog_data):
+ self.use_dump_catalog_data = True
logger.warning("Forced to use existing cluster catalog, "
"due to 'use_dump_halos_data=True'")
@@ -392,7 +390,39 @@ class GalaxyClusters:
hdict["template"] = template
logger.info("Done drawn halo template images.")
+ def _save_catalog_data(self, outfile=None, dump=None, clobber=None):
"""
+ Save the simulated cluster catalog (``self.catalog``) by converting
+ it into a Pandas DataFrame and writing into a CSV file.
+
+ If ``dump=True``, then the raw data (``self.catalog``) is dumped
+ into a Python pickle file, making it easier to be loaded back
+ for reuse.
+ """
+ if outfile is None:
+ outfile = self.catalog_outfile
+ if dump is None:
+ dump = self.dump_catalog_data
+ if clobber is None:
+ clobber = self.clobber
+
+ if self.use_dump_catalog_data and os.path.exists(outfile):
+ os.rename(outfile, outfile+".old")
+
+ logger.info("Converting cluster catalog into a Pandas DataFrame ...")
+ keys = list(self.catalog[0].keys())
+ catalog_df = dictlist_to_dataframe(self.catalog, keys=keys)
+ dataframe_to_csv(catalog_df, outfile=outfile,
+ comment=self.comments, clobber=clobber)
+ logger.info("Saved cluster catalog to CSV file: %s" % outfile)
+
+ if dump:
+ outfile = os.path.splitext(outfile)[0] + ".pkl"
+ if self.use_dump_catalog_data and os.path.exists(outfile):
+ os.rename(outfile, outfile+".old")
+ pickle_dump(self.catalog, outfile=outfile, clobber=clobber)
+ logger.info("Dumped catalog raw data to file: %s" % outfile)
+
def _save_halos_data(self, outfile=None, dump=None, clobber=None):
"""
Save the simulated halo data (``self.halos``) by converting it
@@ -463,25 +493,22 @@ class GalaxyClusters:
return
logger.info("{name}: preprocessing ...".format(name=self.name))
- if self.use_output_catalog:
- logger.info("Use existing cluster & halo catalog: %s" %
- self.catalog_outfile)
- self.catalog, self.comments = csv_to_dataframe(
- self.catalog_outfile)
- ncluster = len(self.catalog)
- idx_rmm = ~self.catalog["rmm_z"].isnull()
- nhalo = idx_rmm.sum()
- logger.info("Loaded cluster catalog: %d clusters with %d halos" %
- (ncluster, nhalo))
+ if self.use_dump_catalog_data:
+ infile = os.path.splitext(self.catalog_outfile)[0] + ".pkl"
+ logger.info("Use existing cluster catalog: %s" % infile)
+ self.catalog = pickle_load(infile)
+ self.comments = []
+ logger.info("Loaded cluster catalog of %d clusters" %
+ len(self.catalog))
else:
self._simulate_catalog()
self._process_catalog()
self._simulate_mergers()
if self.use_dump_halos_data:
- logger.info("Use existing dumped halos raw data: %s" %
- self.halos_data_dumpfile)
- self.halos = pickle_load(self.halos_data_dumpfile)
+ infile = os.path.splitext(self.halos_catalog_outfile)[0] + ".pkl"
+ logger.info("Use existing dumped halos raw data: %s" % infile)
+ self.halos = pickle_load(infile)
logger.info("Loaded data of %d halos" % len(self.halos))
else:
self._simulate_halos()
@@ -552,13 +579,7 @@ class GalaxyClusters:
"""
logger.info("{name}: postprocessing ...".format(name=self.name))
# Save the final resulting clusters catalog
- logger.info("Save the resulting catalog ...")
- if self.use_output_catalog:
- logger.info("No need to save the cluster catalog.")
- else:
- dataframe_to_csv(self.catalog, outfile=self.catalog_outfile,
- comment=self.comments, clobber=self.clobber)
-
- # Save the simulated halos catalog and raw data
+ logger.info("Save the cluster catalog ...")
+ self._save_catalog_data()
logger.info("Saving the simulated halos catalog and raw data ...")
self._save_halos_data()