# Copyright (c) 2017 Weitian LI # MIT license """ Data structure/set utilities. """ import logging from collections import Iterable import pandas as pd logger = logging.getLogger(__name__) def _flatten_list(l): """ Flatten an arbitrarily nested list. Credit ------ * Flatten (an irregular) list of lists https://stackoverflow.com/a/2158532 """ for el in l: if isinstance(el, Iterable) and not isinstance(el, (str, bytes)): yield from _flatten_list(el) else: yield el def dictlist_to_dataframe(dictlist, keys=None): """ Convert the data in format of list of dictionaries to be a Pandas DataFrame by flattening the dictionary keys into columns. NOTE ---- If the item ``key`` of the dictionary has value of a list/vector, then it is split into multiple columns named as ``key[0], key[1], ...``. Parameters ---------- dictlist : list[dict] The input data to be converted, is a list of dictionaries, with each member dictionary has the same format/structure. NOTE: The dictionary may have items with list/vector as the values, but other more complex items (e.g., nested dictionary) is not allowed and supported. keys : list[str], optional The list of dictionary items to be selected for conversion. Default: convert all dictionary items. Returns ------- dataframe : `~pandas.DataFrame` The converted Pandas DataFrame with columns be the dictionary item keys. """ d0 = dictlist[0] if keys is None: keys = list(d0.keys()) logger.info("DataFrame conversion selected keys: {0}".format(keys)) columns = [] for k in keys: v = d0[k] if isinstance(v, Iterable) and not isinstance(v, (str, bytes)): columns += ["%s[%d]" % (k, i) for i in range(len(v))] else: columns.append(k) logger.info("DataFrame number of columns: %d" % len(columns)) logger.debug("DataFrame columns: {0}".format(columns)) data = [] for d in dictlist: dv = [d[k] for k in keys] dv2 = list(_flatten_list(dv)) data.append(dv2) dataframe = pd.DataFrame(data, columns=columns) return dataframe