From 34098c6a8b1008f4950e8524936406b6611d74f9 Mon Sep 17 00:00:00 2001
From: Aaron LI <aly@aaronly.me>
Date: Tue, 2 Jan 2018 13:35:34 +0800
Subject: clusters: rework the reuse of previously dumped cluster catalog

---
 fg21sim/extragalactic/clusters/main.py | 79 +++++++++++++++++++++-------------
 1 file changed, 50 insertions(+), 29 deletions(-)

(limited to 'fg21sim/extragalactic/clusters')

diff --git a/fg21sim/extragalactic/clusters/main.py b/fg21sim/extragalactic/clusters/main.py
index da3190e..ab5a32e 100644
--- a/fg21sim/extragalactic/clusters/main.py
+++ b/fg21sim/extragalactic/clusters/main.py
@@ -17,14 +17,12 @@ import logging
 from collections import OrderedDict
 
 import numpy as np
-import pandas as pd
 
 from .psformalism import PSFormalism
 from .formation import ClusterFormation
 from .halo import RadioHalo
 from ...share import CONFIGS, COSMO
-from ...utils.io import (dataframe_to_csv, csv_to_dataframe,
-                         pickle_dump, pickle_load)
+from ...utils.io import dataframe_to_csv, pickle_dump, pickle_load
 from ...utils.ds import dictlist_to_dataframe
 from ...utils.convert import JyPerPix_to_K
 from ...sky import get_sky
@@ -76,11 +74,11 @@ class GalaxyClusters:
         """
         comp = self.compID
         self.catalog_outfile = self.configs.get_path(comp+"/catalog_outfile")
-        self.use_output_catalog = self.configs.getn(comp+"/use_output_catalog")
+        self.dump_catalog_data = self.configs.getn(comp+"/dump_catalog_data")
+        self.use_dump_catalog_data = self.configs.getn(
+            comp+"/use_dump_catalog_data")
         self.halos_catalog_outfile = self.configs.get_path(
             comp+"/halos_catalog_outfile")
-        self.halos_data_dumpfile = os.path.splitext(
-            self.halos_catalog_outfile)[0] + ".pkl"
         self.dump_halos_data = self.configs.getn(comp+"/dump_halos_data")
         self.use_dump_halos_data = self.configs.getn(
             comp+"/use_dump_halos_data")
@@ -96,8 +94,8 @@ class GalaxyClusters:
         self.clobber = self.configs.getn("output/clobber")
         logger.info("Loaded and set up configurations")
 
-        if self.use_dump_halos_data and (not self.use_output_catalog):
-            self.use_output_catalog = True
+        if self.use_dump_halos_data and (not self.use_dump_catalog_data):
+            self.use_dump_catalog_data = True
             logger.warning("Forced to use existing cluster catalog, "
                            "due to 'use_dump_halos_data=True'")
 
@@ -392,7 +390,39 @@ class GalaxyClusters:
             hdict["template"] = template
         logger.info("Done drawn halo template images.")
 
+    def _save_catalog_data(self, outfile=None, dump=None, clobber=None):
         """
+        Save the simulated cluster catalog (``self.catalog``) by converting
+        it into a Pandas DataFrame and writing into a CSV file.
+
+        If ``dump=True``, then the raw data (``self.catalog``) is dumped
+        into a Python pickle file, making it easier to be loaded back
+        for reuse.
+        """
+        if outfile is None:
+            outfile = self.catalog_outfile
+        if dump is None:
+            dump = self.dump_catalog_data
+        if clobber is None:
+            clobber = self.clobber
+
+        if self.use_dump_catalog_data and os.path.exists(outfile):
+            os.rename(outfile, outfile+".old")
+
+        logger.info("Converting cluster catalog into a Pandas DataFrame ...")
+        keys = list(self.catalog[0].keys())
+        catalog_df = dictlist_to_dataframe(self.catalog, keys=keys)
+        dataframe_to_csv(catalog_df, outfile=outfile,
+                         comment=self.comments, clobber=clobber)
+        logger.info("Saved cluster catalog to CSV file: %s" % outfile)
+
+        if dump:
+            outfile = os.path.splitext(outfile)[0] + ".pkl"
+            if self.use_dump_catalog_data and os.path.exists(outfile):
+                os.rename(outfile, outfile+".old")
+            pickle_dump(self.catalog, outfile=outfile, clobber=clobber)
+            logger.info("Dumped catalog raw data to file: %s" % outfile)
+
     def _save_halos_data(self, outfile=None, dump=None, clobber=None):
         """
         Save the simulated halo data (``self.halos``) by converting it
@@ -463,25 +493,22 @@ class GalaxyClusters:
             return
 
         logger.info("{name}: preprocessing ...".format(name=self.name))
-        if self.use_output_catalog:
-            logger.info("Use existing cluster & halo catalog: %s" %
-                        self.catalog_outfile)
-            self.catalog, self.comments = csv_to_dataframe(
-                self.catalog_outfile)
-            ncluster = len(self.catalog)
-            idx_rmm = ~self.catalog["rmm_z"].isnull()
-            nhalo = idx_rmm.sum()
-            logger.info("Loaded cluster catalog: %d clusters with %d halos" %
-                        (ncluster, nhalo))
+        if self.use_dump_catalog_data:
+            infile = os.path.splitext(self.catalog_outfile)[0] + ".pkl"
+            logger.info("Use existing cluster catalog: %s" % infile)
+            self.catalog = pickle_load(infile)
+            self.comments = []
+            logger.info("Loaded cluster catalog of %d clusters" %
+                        len(self.catalog))
         else:
             self._simulate_catalog()
             self._process_catalog()
             self._simulate_mergers()
 
         if self.use_dump_halos_data:
-            logger.info("Use existing dumped halos raw data: %s" %
-                        self.halos_data_dumpfile)
-            self.halos = pickle_load(self.halos_data_dumpfile)
+            infile = os.path.splitext(self.halos_catalog_outfile)[0] + ".pkl"
+            logger.info("Use existing dumped halos raw data: %s" % infile)
+            self.halos = pickle_load(infile)
             logger.info("Loaded data of %d halos" % len(self.halos))
         else:
             self._simulate_halos()
@@ -552,13 +579,7 @@ class GalaxyClusters:
         """
         logger.info("{name}: postprocessing ...".format(name=self.name))
         # Save the final resulting clusters catalog
-        logger.info("Save the resulting catalog ...")
-        if self.use_output_catalog:
-            logger.info("No need to save the cluster catalog.")
-        else:
-            dataframe_to_csv(self.catalog, outfile=self.catalog_outfile,
-                             comment=self.comments, clobber=self.clobber)
-
-        # Save the simulated halos catalog and raw data
+        logger.info("Save the cluster catalog ...")
+        self._save_catalog_data()
         logger.info("Saving the simulated halos catalog and raw data ...")
         self._save_halos_data()
-- 
cgit v1.2.2