1 files changed, 82 insertions, 0 deletions
diff --git a/fg21sim/utils/ds.py b/fg21sim/utils/ds.py
new file mode 100644
index 0000000..8b69dbc
--- /dev/null
+++ b/fg21sim/utils/ds.py
@@ -0,0 +1,82 @@
+# Copyright (c) 2017 Weitian LI <weitian@aaronly.me>
+# MIT license
+
+"""
+Data structure/set utilities.
+"""
+
+import logging
+from collections import Iterable
+
+import pandas as pd
+
+
+logger = logging.getLogger(__name__)
+
+
+def _flatten_list(l):
+    """
+    Flatten an arbitrarily nested list.
+
+    Credit
+    ------
+    * Flatten (an irregular) list of lists
+      https://stackoverflow.com/a/2158532
+    """
+    for el in l:
+        if isinstance(el, Iterable) and not isinstance(el, (str, bytes)):
+            yield from _flatten_list(el)
+        else:
+            yield el
+
+
+def dictlist_to_dataframe(dictlist, keys=None):
+    """
+    Convert the data in format of list of dictionaries to be a Pandas
+    DataFrame by flattening the dictionary keys into columns.
+
+    NOTE
+    ----
+    If the item ``key`` of the dictionary has value of a list/vector,
+    then it is split into multiple columns named as ``key[0], key[1], ...``.
+
+    Parameters
+    ----------
+    dictlist : list[dict]
+        The input data to be converted, is a list of dictionaries, with
+        each member dictionary has the same format/structure.
+        NOTE: The dictionary may have items with list/vector as the values,
+              but other more complex items (e.g., nested dictionary) is not
+              allowed and supported.
+    keys : list[str], optional
+        The list of dictionary items to be selected for conversion.
+        Default: convert all dictionary items.
+
+    Returns
+    -------
+    dataframe : `~pandas.DataFrame`
+        The converted Pandas DataFrame with columns be the dictionary
+        item keys.
+    """
+    d0 = dictlist[0]
+    if keys is None:
+        keys = list(d0.keys())
+    logger.info("DataFrame conversion selected keys: {0}".format(keys))
+
+    columns = []
+    for k in keys:
+        v = d0[k]
+        if isinstance(v, Iterable) and not isinstance(v, (str, bytes)):
+            columns += ["%s[%d]" % (k, i) for i in range(len(v))]
+        else:
+            columns.append(k)
+    logger.info("DataFrame number of columns: %d" % len(columns))
+    logger.debug("DataFrame columns: {0}".format(columns))
+
+    data = []
+    for d in dictlist:
+        dv = [d[k] for k in keys]
+        dv2 = list(_flatten_list(dv))
+        data.append(dv2)
+    dataframe = pd.DataFrame(data, columns=columns)
+    return dataframe