diff options
author | Aaron LI <aly@aaronly.me> | 2017-08-03 08:31:22 +0800 |
---|---|---|
committer | Aaron LI <aly@aaronly.me> | 2017-08-03 08:31:22 +0800 |
commit | 3d366533e4ece7a9ee89cafb751a5e4089185d35 (patch) | |
tree | 32423dc3ee7ce2a24c45d46b342ffaa8a3ed37e9 | |
parent | 4b13e8bd09d2e220aba040576e58dfa60c044c54 (diff) | |
download | fg21sim-3d366533e4ece7a9ee89cafb751a5e4089185d35.tar.bz2 |
utils: Add ds.py with function "dictlist_to_dataframe()"
This function convert a list of dictionaries created by e.g.,
GalaxyClusters._simulate_halos() to be a Pandas DataFrame.
Signed-off-by: Aaron LI <aly@aaronly.me>
-rw-r--r-- | fg21sim/utils/ds.py | 82 |
1 files changed, 82 insertions, 0 deletions
diff --git a/fg21sim/utils/ds.py b/fg21sim/utils/ds.py new file mode 100644 index 0000000..8b69dbc --- /dev/null +++ b/fg21sim/utils/ds.py @@ -0,0 +1,82 @@ +# Copyright (c) 2017 Weitian LI <weitian@aaronly.me> +# MIT license + +""" +Data structure/set utilities. +""" + +import logging +from collections import Iterable + +import pandas as pd + + +logger = logging.getLogger(__name__) + + +def _flatten_list(l): + """ + Flatten an arbitrarily nested list. + + Credit + ------ + * Flatten (an irregular) list of lists + https://stackoverflow.com/a/2158532 + """ + for el in l: + if isinstance(el, Iterable) and not isinstance(el, (str, bytes)): + yield from _flatten_list(el) + else: + yield el + + +def dictlist_to_dataframe(dictlist, keys=None): + """ + Convert the data in format of list of dictionaries to be a Pandas + DataFrame by flattening the dictionary keys into columns. + + NOTE + ---- + If the item ``key`` of the dictionary has value of a list/vector, + then it is split into multiple columns named as ``key[0], key[1], ...``. + + Parameters + ---------- + dictlist : list[dict] + The input data to be converted, is a list of dictionaries, with + each member dictionary has the same format/structure. + NOTE: The dictionary may have items with list/vector as the values, + but other more complex items (e.g., nested dictionary) is not + allowed and supported. + keys : list[str], optional + The list of dictionary items to be selected for conversion. + Default: convert all dictionary items. + + Returns + ------- + dataframe : `~pandas.DataFrame` + The converted Pandas DataFrame with columns be the dictionary + item keys. + """ + d0 = dictlist[0] + if keys is None: + keys = list(d0.keys()) + logger.info("DataFrame conversion selected keys: {0}".format(keys)) + + columns = [] + for k in keys: + v = d0[k] + if isinstance(v, Iterable) and not isinstance(v, (str, bytes)): + columns += ["%s[%d]" % (k, i) for i in range(len(v))] + else: + columns.append(k) + logger.info("DataFrame number of columns: %d" % len(columns)) + logger.debug("DataFrame columns: {0}".format(columns)) + + data = [] + for d in dictlist: + dv = [d[k] for k in keys] + dv2 = list(_flatten_list(dv)) + data.append(dv2) + dataframe = pd.DataFrame(data, columns=columns) + return dataframe |