diff options
author | Aaron LI <aly@aaronly.me> | 2018-01-02 13:35:34 +0800 |
---|---|---|
committer | Aaron LI <aly@aaronly.me> | 2018-01-02 13:35:34 +0800 |
commit | 34098c6a8b1008f4950e8524936406b6611d74f9 (patch) | |
tree | 1a2014f86cee90b21a2c6fc068a5a84ca3ea5685 | |
parent | 4a49490eb32398255dee3ee252e87aef626fb5d7 (diff) | |
download | fg21sim-34098c6a8b1008f4950e8524936406b6611d74f9.tar.bz2 |
clusters: rework the reuse of previously dumped cluster catalog
-rw-r--r-- | fg21sim/configs/20-extragalactic.conf.spec | 17 | ||||
-rw-r--r-- | fg21sim/configs/checkers.py | 5 | ||||
-rw-r--r-- | fg21sim/extragalactic/clusters/main.py | 79 |
3 files changed, 65 insertions, 36 deletions
diff --git a/fg21sim/configs/20-extragalactic.conf.spec b/fg21sim/configs/20-extragalactic.conf.spec index a75907c..02d7f00 100644 --- a/fg21sim/configs/20-extragalactic.conf.spec +++ b/fg21sim/configs/20-extragalactic.conf.spec @@ -60,12 +60,19 @@ # used by the following ``[[halos]]`` section. # [[clusters]] - # Output CSV file of the clusters catalog containing the simulated - # mass, redshift, position, shape, and the recent major merger info. + # Output CSV file of the cluster catalog containing the simulated + # mass, redshift, position, shape, recent merger info, etc. catalog_outfile = string(default=None) - # Whether to directly use the (previously simulated) catalog file - # specified as the above "catalog_outfile" option? + # Whether to dump the raw data of the simulated cluster catalog in + # Python native pickle format (i.e., ".pkl") to a file with the same + # basename as the above ``catalog_outfile``? + # The dumped data can be easily loaded back for reuse. + dump_catalog_data = boolean(default=True) + + # Whether to directly use the (previously simulated) catalog data as + # specified by the above "catalog_outfile" and ``dump_catalog_data`` + # options? # NOTE: # By using an existing catalog, the steps to derive these data are # simply skipped. @@ -76,7 +83,7 @@ # tests and only create images at some frequencies necessary for # testing, then select the satisfying one to continue the simulation # to generate images at all frequencies. - use_output_catalog = boolean(default=False) + use_dump_catalog_data = boolean(default=False) # Output CSV file of the halos catalog containing the calculated # properties of the simulated halos. diff --git a/fg21sim/configs/checkers.py b/fg21sim/configs/checkers.py index 78d5b20..9759ef1 100644 --- a/fg21sim/configs/checkers.py +++ b/fg21sim/configs/checkers.py @@ -197,8 +197,9 @@ def check_extragalactic_clusters(configs): key = "extragalactic/psformalism/dndlnm_outfile" results.update(_check_missing(configs, key)) # catalog required when enabled to use it - if configs.get(comp+"/use_output_catalog"): - results.update(_check_existence(configs, comp+"/catalog_outfile")) + if configs.get(comp+"/use_dump_catalog_data"): + results.update(_check_file_existence( + configs, comp+"/catalog_outfile", ext=".pkl")) else: results.update(_check_missing(configs, comp+"/catalog_outfile")) # dumped halos data required when enabled to use it diff --git a/fg21sim/extragalactic/clusters/main.py b/fg21sim/extragalactic/clusters/main.py index da3190e..ab5a32e 100644 --- a/fg21sim/extragalactic/clusters/main.py +++ b/fg21sim/extragalactic/clusters/main.py @@ -17,14 +17,12 @@ import logging from collections import OrderedDict import numpy as np -import pandas as pd from .psformalism import PSFormalism from .formation import ClusterFormation from .halo import RadioHalo from ...share import CONFIGS, COSMO -from ...utils.io import (dataframe_to_csv, csv_to_dataframe, - pickle_dump, pickle_load) +from ...utils.io import dataframe_to_csv, pickle_dump, pickle_load from ...utils.ds import dictlist_to_dataframe from ...utils.convert import JyPerPix_to_K from ...sky import get_sky @@ -76,11 +74,11 @@ class GalaxyClusters: """ comp = self.compID self.catalog_outfile = self.configs.get_path(comp+"/catalog_outfile") - self.use_output_catalog = self.configs.getn(comp+"/use_output_catalog") + self.dump_catalog_data = self.configs.getn(comp+"/dump_catalog_data") + self.use_dump_catalog_data = self.configs.getn( + comp+"/use_dump_catalog_data") self.halos_catalog_outfile = self.configs.get_path( comp+"/halos_catalog_outfile") - self.halos_data_dumpfile = os.path.splitext( - self.halos_catalog_outfile)[0] + ".pkl" self.dump_halos_data = self.configs.getn(comp+"/dump_halos_data") self.use_dump_halos_data = self.configs.getn( comp+"/use_dump_halos_data") @@ -96,8 +94,8 @@ class GalaxyClusters: self.clobber = self.configs.getn("output/clobber") logger.info("Loaded and set up configurations") - if self.use_dump_halos_data and (not self.use_output_catalog): - self.use_output_catalog = True + if self.use_dump_halos_data and (not self.use_dump_catalog_data): + self.use_dump_catalog_data = True logger.warning("Forced to use existing cluster catalog, " "due to 'use_dump_halos_data=True'") @@ -392,7 +390,39 @@ class GalaxyClusters: hdict["template"] = template logger.info("Done drawn halo template images.") + def _save_catalog_data(self, outfile=None, dump=None, clobber=None): """ + Save the simulated cluster catalog (``self.catalog``) by converting + it into a Pandas DataFrame and writing into a CSV file. + + If ``dump=True``, then the raw data (``self.catalog``) is dumped + into a Python pickle file, making it easier to be loaded back + for reuse. + """ + if outfile is None: + outfile = self.catalog_outfile + if dump is None: + dump = self.dump_catalog_data + if clobber is None: + clobber = self.clobber + + if self.use_dump_catalog_data and os.path.exists(outfile): + os.rename(outfile, outfile+".old") + + logger.info("Converting cluster catalog into a Pandas DataFrame ...") + keys = list(self.catalog[0].keys()) + catalog_df = dictlist_to_dataframe(self.catalog, keys=keys) + dataframe_to_csv(catalog_df, outfile=outfile, + comment=self.comments, clobber=clobber) + logger.info("Saved cluster catalog to CSV file: %s" % outfile) + + if dump: + outfile = os.path.splitext(outfile)[0] + ".pkl" + if self.use_dump_catalog_data and os.path.exists(outfile): + os.rename(outfile, outfile+".old") + pickle_dump(self.catalog, outfile=outfile, clobber=clobber) + logger.info("Dumped catalog raw data to file: %s" % outfile) + def _save_halos_data(self, outfile=None, dump=None, clobber=None): """ Save the simulated halo data (``self.halos``) by converting it @@ -463,25 +493,22 @@ class GalaxyClusters: return logger.info("{name}: preprocessing ...".format(name=self.name)) - if self.use_output_catalog: - logger.info("Use existing cluster & halo catalog: %s" % - self.catalog_outfile) - self.catalog, self.comments = csv_to_dataframe( - self.catalog_outfile) - ncluster = len(self.catalog) - idx_rmm = ~self.catalog["rmm_z"].isnull() - nhalo = idx_rmm.sum() - logger.info("Loaded cluster catalog: %d clusters with %d halos" % - (ncluster, nhalo)) + if self.use_dump_catalog_data: + infile = os.path.splitext(self.catalog_outfile)[0] + ".pkl" + logger.info("Use existing cluster catalog: %s" % infile) + self.catalog = pickle_load(infile) + self.comments = [] + logger.info("Loaded cluster catalog of %d clusters" % + len(self.catalog)) else: self._simulate_catalog() self._process_catalog() self._simulate_mergers() if self.use_dump_halos_data: - logger.info("Use existing dumped halos raw data: %s" % - self.halos_data_dumpfile) - self.halos = pickle_load(self.halos_data_dumpfile) + infile = os.path.splitext(self.halos_catalog_outfile)[0] + ".pkl" + logger.info("Use existing dumped halos raw data: %s" % infile) + self.halos = pickle_load(infile) logger.info("Loaded data of %d halos" % len(self.halos)) else: self._simulate_halos() @@ -552,13 +579,7 @@ class GalaxyClusters: """ logger.info("{name}: postprocessing ...".format(name=self.name)) # Save the final resulting clusters catalog - logger.info("Save the resulting catalog ...") - if self.use_output_catalog: - logger.info("No need to save the cluster catalog.") - else: - dataframe_to_csv(self.catalog, outfile=self.catalog_outfile, - comment=self.comments, clobber=self.clobber) - - # Save the simulated halos catalog and raw data + logger.info("Save the cluster catalog ...") + self._save_catalog_data() logger.info("Saving the simulated halos catalog and raw data ...") self._save_halos_data() |