fg21sim/utils/io.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171

# Copyright (c) 2017 Weitian LI <weitian@aaronly.me>
# MIT license

"""
Input/output utilities.
"""

import os
import logging
import pickle
from datetime import datetime

import numpy as np
import pandas as pd
from astropy.io import fits


logger = logging.getLogger(__name__)


def _create_dir(filepath):
    """
    Check the existence of the target directory, and create it if necessary.

    NOTE
    ----
    If the given ``filepath`` is simply the filename without any directory
    path, then just returns.
    """
    dirname = os.path.dirname(filepath)
    # ``dirname == ""`` if ``filepath`` does not contain directory path
    if dirname and not os.path.exists(dirname):
        os.makedirs(dirname)
        logger.info("Created output directory: {0}".format(dirname))


def _check_existence(filepath, clobber=False, remove=False):
    """
    Check the existence of the target file.

    * raise ``OSError`` : file exists and clobber is False;
    * no action : files does not exists or clobber is True;
    * remove the file : files exists and clobber is True and remove is True
    """
    if os.path.exists(filepath):
        if clobber:
            if remove:
                logger.warning("Removed existing file: {0}".format(filepath))
                os.remove(filepath)
            else:
                logger.warning("Existing file will be overwritten.")
        else:
            raise OSError("Output file exists: {0}".format(filepath))


def dataframe_to_csv(df, outfile, comment=None, clobber=False):
    """
    Save the given Pandas DataFrame into a CSV text file.

    Parameters
    ----------
    df : `~pandas.DataFrame`
        The DataFrame to be saved to the CSV text file.
    outfile : string
        The path to the output CSV file.
    comment : list[str], optional
        A list of comments to be prepended to the output CSV file header.
        The prefix ``#`` is not required and will be automatically added.
    clobber : bool, optional
        Whether overwrite the existing output file?
        Default: False
    """
    if not isinstance(df, pd.DataFrame):
        raise TypeError("Not a Pandas DataFrame!")

    _create_dir(outfile)
    _check_existence(outfile, clobber=clobber, remove=True)

    # Add a default header comment
    if comment is None:
        comment = ["by %s" % __name__,
                   "at %s" % datetime.now().isoformat()]

    with open(outfile, "w") as fh:
        # Write header comments with ``#`` prefixed.
        fh.write("".join(["# "+line.strip()+"\n" for line in comment]))
        df.to_csv(fh, header=True, index=False)
    logger.info("Wrote DataFrame to CSV file: {0}".format(outfile))


def pickle_dump(obj, outfile, clobber=False):
    """
    Dump the given object into the output file using ``pickle.dump()``.

    NOTE
    ----
    The dumped output file is in binary format, and can be loaded back
    using ``pickle.load()``, e.g., the ``pickle_load()`` function below.

    Example
    -------
    >>> a = [1, 2, 3]
    >>> pickle.dump(a, file=open("a.pkl", "wb"))
    >>> b = pickle.load(open("a.pkl", "rb))
    >>> a == b
    True

    Parameters
    ----------
    outfile : str
        The path/filename to the output file storing the pickled object.
    clobber : bool, optional
        Whether to overwrite the existing output file.
        Default: False
    """
    _create_dir(outfile)
    _check_existence(outfile, clobber=clobber, remove=True)
    pickle.dump(obj, file=open(outfile, "wb"))
    logger.info("Pickled data to file: %s" % outfile)


def pickle_load(infile):
    """
    Load the pickled Python back from the given file.

    Parameters
    ----------
    infile : str
        The path/filename to the data file, e.g., dumped by the above
        ``pickle_dump()`` function.

    Returns
    -------
    obj : The loaded Python object from the input file.
    """
    return pickle.load(open(infile, "rb"))


def write_fits_image(outfile, image, header=None, float32=True,
                     clobber=False, checksum=False):
    """
    Write the supplied image (together with header information) into
    the output FITS file.

    Parameters
    ----------
    outfile : str
        The path/filename to the output file storing the pickled object.
    image : 2D `~numpy.ndarray`
        The image data to be written out to the FITS file.
        NOTE: image.shape: (nrow, ncol)  <->  FITS image: (ncol, nrow)
    header : `~astropy.io.fits.Header`
        The FITS header information for this image
    float32 : bool, optional
        Whether coerce the image data (generally double/float64 data type)
        into single/float32 (in order to save space)?
        Default: True
    clobber : bool, optional
        Whether to overwrite the existing output file.
        Default: False
    checksum : bool, optional
        Whether to calculate the data checksum, which may cost some time?
        Default: False
    """
    _create_dir(outfile)
    _check_existence(outfile, clobber=clobber, remove=True)
    if float32:
        image = np.asarray(image, dtype=float32)
    hdu = fits.PrimaryHDU(data=image, header=header)
    hdu.writeto(outfile, checksum=checksum)
    logger.info("Wrote image to FITS file: %s" % outfile)