From 13ea7cb340ae2380da185274c8d9560b6be46b64 Mon Sep 17 00:00:00 2001
From: Aaron LI <aaronly.me@outlook.com>
Date: Fri, 1 Apr 2016 16:18:05 +0800
Subject: correct_crosstalk.py: Add class RMF and update doc

---
 python/correct_crosstalk.py | 340 ++++++++++++++++++++++++++++++++++++++------
 1 file changed, 299 insertions(+), 41 deletions(-)

(limited to 'python/correct_crosstalk.py')

diff --git a/python/correct_crosstalk.py b/python/correct_crosstalk.py
index 4c7f820..9f86763 100755
--- a/python/correct_crosstalk.py
+++ b/python/correct_crosstalk.py
@@ -1,40 +1,66 @@
 #!/usr/bin/env python3
-#
-# Correct the crosstalk effect of XMM spectra by subtracting the
-# scattered photons from surrounding regions, and by compensating
-# the photons scattered to surrounding regions, according to the
-# generated crosstalk ARFs.
-#
-# Sample config file (in `ConfigObj' syntax):
-#----------------------
-# fix_negative = True
-# verbose = True
-# clobber = False
-#
-# [reg2]
-# outfile = cc_reg2.pi
-# spec = reg2.pi
-# arf = reg2.arf
-#   [[cross_in]]
-#     [[[in1]]]
-#     spec = reg1.pi
-#     arf = reg1.arf
-#     cross_arf = reg_1-2.arf
-#     [[[in2]]]
-#     spec = reg3.pi
-#     arf = reg3.arf
-#     cross_arf = reg_3-2.arf
-#   [[cross_out]]
-#   cross_arf = reg_2-1.arf, reg_2-3.arf
-#----------------------
+# -*- coding: utf-8 -*-
 #
 # Weitian LI
 # Created: 2016-03-26
-# Updated: 2016-03-28
+# Updated: 2016-04-01
+#
+# ChangeLog:
+# 2016-04-01:
+#   * Greatly update the documentations (e.g., description, sample config)
+#   * Add class `RMF'
+#   * Add method `get_energy()' for class `ARF'
+#
+# TODO/XXX:
+#   * Add background spectrum fields
+#   * Subtract background spectrum before correcting crosstalk effects
+#   * Estimate channel errors by Monte Carlo simulations
+#   * Split classes ARF, RMF, Spectrum to a separate module
 #
 
-from astropy.io import fits
+
+"""
+Correct the crosstalk effect of XMM spectra by subtracting the photons that
+scattered from the surrounding regions due to the finite PSF, and by
+compensating the photons that scattered to the surrounding regions, according
+to the generated crosstalk ARFs by SAS `arfgen'.
+
+
+Sample config file (in `ConfigObj' syntax):
+-----------------------------------------------------------
+verbose = True
+clobber = False
+# whether to fix the negative channel values due to spectral subtractions
+fix_negative = True
+
+[...]
+...
+
+[reg2]
+outfile = cc_reg2.pi
+spec = reg2.pi
+arf = reg2.arf
+  [[cross_in]]
+    [[[in1]]]
+    spec = reg1.pi
+    arf = reg1.arf
+    cross_arf = reg_1-2.arf
+    [[[in2]]]
+    spec = reg3.pi
+    arf = reg3.arf
+    cross_arf = reg_3-2.arf
+  [[cross_out]]
+  cross_arf = reg_2-1.arf, reg_2-3.arf
+
+[...]
+...
+-----------------------------------------------------------
+"""
+
+
 import numpy as np
+import scipy as sp
+from astropy.io import fits
 from configobj import ConfigObj
 
 import sys
@@ -43,22 +69,45 @@ import argparse
 from datetime import datetime
 
 
-class ARF:
+class ARF:  # {{{
     """
-    Deal with X-ray ARF file (.arf)
+    Class to handle the ARF (ancillary/auxiliary response file),
+    which contains the combined instrumental effective area
+    (telescope/filter/detector) and the quantum efficiency (QE) as a
+    function of energy averaged over time.
+    The effective area is [cm^2], and the QE is [counts/photon]; they are
+    multiplied together to create the ARF, resulting in [cm^2 counts/photon].
+
+    **CAVEAT/NOTE**:
+    For the XMM EPIC *pn* and Chandra *ACIS*, the generated ARF does NOT have
+    the same number of data points to that of spectral channels, i.e., the
+    "ENERG_LO" and "ENERG_HI" columns of ARF is different to the "E_MIN" and
+    "E_MAX" columns of RMF.
+    Therefore it is necessary to interpolate and extrapolate the ARF curve
+    in order to match the spectrum (or RMF "EBOUNDS" extension).
+    As for the XMM EPIC *MOS1* and *MOS2*, the ARF data points match the
+    spectral channels, i.e., the energy positions of each ARF data point and
+    spectral channel are consistent.  Thus the interpolation is not needed.
+
+    References:
+    [1] CIAO: Auxiliary Response File
+        http://cxc.harvard.edu/ciao/dictionary/arf.html
+    [2] Definition of RMF and ARF file formats
+        https://heasarc.gsfc.nasa.gov/docs/heasarc/caldb/docs/memos/cal_gen_92_002/cal_gen_92_002.html
     """
     filename = None
-    fitsobj = None
-    header = None
+    fitsobj  = None
+    # only consider the "SPECTRUM" extension
+    header   = None
     energ_lo = None
     energ_hi = None
     specresp = None
 
     def __init__(self, filename):
         self.filename = filename
-        self.fitsobj = fits.open(filename)
-        ext_specresp = self.fitsobj["SPECRESP"]
-        self.header = ext_specresp.header
+        self.fitsobj  = fits.open(filename)
+        ext_specresp  = self.fitsobj["SPECRESP"]
+        self.header   = ext_specresp.header
         self.energ_lo = ext_specresp.data["ENERG_LO"]
         self.energ_hi = ext_specresp.data["ENERG_HI"]
         self.specresp = ext_specresp.data["SPECRESP"]
@@ -69,27 +118,202 @@ class ARF:
         else:
             return self.specresp
 
+    def get_energy(self, mean="geometric"):
+        """
+        Return the mean energy values of the ARF.
 
-class Spectrum:
+        Arguments:
+          * mean: type of the mean energy:
+                  + "geometric": geometric mean, i.e., e = sqrt(e_min*e_max)
+                  + "arithmetic": arithmetic mean, i.e., e = 0.5*(e_min+e_max)
+        """
+        if mean == "geometric":
+            energy = np.sqrt(self.energ_lo * self.energ_hi)
+        elif mean == "arithmetic":
+            energy = 0.5 * (self.energ_lo + self.energ_hi)
+        else:
+            raise ValueError("Invalid mean type: %s" % mean)
+        return energy
+
+    def interpolate(self, x=None, verbose=False):
+        """
+        Cubic interpolate the ARF curve using `scipy.interpolate'
+
+        If the requested point is outside of the data range, the
+        fill value of *zero* is returned.
+
+        Arguments:
+          * x: points at which the interpolation to be calculated.
+
+        Return:
+          If x is None, then the interpolated function is returned,
+          otherwise, the interpolated data are returned.
+        """
+        energy = self.get_energy()
+        arf = self.get_data(copy=False)
+        if verbose:
+            print("INFO: ARF interpolating (this may take a while) ...",
+                    file=sys.stderr)
+        f_interp = sp.interpolate.interp1d(energy, arf, kind="cubic",
+                bounds_error=False, fill_value=0.0, assume_sorted=True)
+        if x is not None:
+            return f_interp(x)
+        else:
+            return f_interp
+# class ARF }}}
+
+
+class RMF:  # {{{
+    """
+    Class to handle the RMF (redistribution matrix file),
+    which maps from energy space into detector pulse height (or position)
+    space.  Since detectors are not perfect, this involves a spreading of
+    the observed counts by the detector resolution, which is expressed as
+    a matrix multiplication.
+    For X-ray spectral analysis, the RMF encodes the probability R(E,p)
+    that a detected photon of energy E will be assisgned to a given
+    channel value (PHA or PI) of p.
+
+    The standard Legacy format [2] for the RMF uses a binary table in which
+    each row contains R(E,p) for a single value of E as a function of p.
+    Non-zero sequences of elements of R(E,p) are encoded using a set of
+    variable length array columns.  This format is compact but hard to
+    manipulate and understand.
+
+    **CAVEAT/NOTE**:
+    + See the above ARF CAVEAT/NOTE for the XMM EPIC pn.
+    + This class (currently) only deals with the "EBOUNDS" extension, which
+      contains the `CHANNEL', `E_MIN' and `E_MAX' columns.  This `CHANNEL'
+      is the same as that of a spectrum.  Therefore, the energy values can
+      be used to interpolate and extrapolate the ARF curve.
+    + The `ENERG_LO' and `ENERG_HI' columns of "MATRIX" extension are the
+      same as that of a ARF.
+
+    References:
+    [1] CIAO: Redistribution Matrix File
+        http://cxc.harvard.edu/ciao/dictionary/rmf.html
+    [2] Definition of RMF and ARF file formats
+        https://heasarc.gsfc.nasa.gov/docs/heasarc/caldb/docs/memos/cal_gen_92_002/cal_gen_92_002.html
+    """
+    filename    = None
+    fitsobj     = None
+    ## extension "MATRIX"
+    hdr_matrix  = None
+    energ_lo    = None
+    energ_hi    = None
+    n_grp       = None
+    f_chan      = None
+    n_chan      = None
+    # raw squeezed RMF matrix data
+    matrix      = None
+    ## extension "EBOUNDS"
+    hdr_ebounds = None
+    channel     = None
+    e_min       = None
+    e_max       = None
+    ## converted 2D RMF matrix/image from the squeezed binary table
+    #  size: len(energ_lo) x len(channel)
+    rmfimg      = None
+
+    def __init__(self, filename):
+        self.filename    = filename
+        self.fitsobj     = fits.open(filename)
+        ## "MATRIX" extension
+        ext_matrix       = self.fitsobj["MATRIX"]
+        self.hdr_matrix  = ext_matrix.header
+        self.energ_lo    = ext_matrix.data["ENERG_LO"]
+        self.energ_hi    = ext_matrix.data["ENERG_HI"]
+        self.n_grp       = ext_matrix.data["N_GRP"]
+        self.f_chan      = ext_matrix.data["F_CHAN"]
+        self.n_chan      = ext_matrix.data["N_CHAN"]
+        self.matrix      = ext_matrix.data["MATRIX"]
+        ## "EBOUNDS" extension
+        ext_ebounds      = self.fitsobj["EBOUNDS"]
+        self.hdr_ebounds = ext_ebounds.header
+        self.channel     = ext_ebounds.data["CHANNEL"]
+        self.e_min       = ext_ebounds.data["E_MIN"]
+        self.e_max       = ext_ebounds.data["E_MAX"]
+
+    def get_energy(self, mean="geometric"):
+        """
+        Return the mean energy values of the RMF "EBOUNDS".
+
+        Arguments:
+          * mean: type of the mean energy:
+                  + "geometric": geometric mean, i.e., e = sqrt(e_min*e_max)
+                  + "arithmetic": arithmetic mean, i.e., e = 0.5*(e_min+e_max)
+        """
+        if mean == "geometric":
+            energy = np.sqrt(self.e_min * self.e_max)
+        elif mean == "arithmetic":
+            energy = 0.5 * (self.e_min + self.e_max)
+        else:
+            raise ValueError("Invalid mean type: %s" % mean)
+        return energy
+
+    def get_rmfimg(self):
+        """
+        Convert the RMF data in squeezed binary table (standard Legacy format)
+        to a 2D image/matrix.
+        """
+        def _make_rmfimg_row(n_channel, dtype, f_chan, n_chan, mat_row):
+            f_chan -= 1  # FITS indices are 1-based
+            idx = np.concatenate([ np.arange(f, f+n) \
+                    for f, n in zip(f_chan, n_chan) ])
+            rmfrow = np.zeros(n_channel, dtype=dtype)
+            rmfrow[idx] = mat_row
+            return rmfrow
+        #
+        if self.rmfimg is None:
+            # Make the 2D RMF matrix/image
+            n_energy  = len(self.energ_lo)
+            n_channel = len(self.channel)
+            rmf_dtype = self.matrix[0].dtype
+            rmfimg = np.zeros(shape=(n_energy, n_channel), dtype=rmf_dtype)
+            for i in range(n_energy):
+                rmfimg[i, :] = _make_rmfimg_row(n_channel, rmf_dtype,
+                        self.f_chan[i], self.n_chan[i], self.matrix[i])
+            self.rmfimg = rmfimg
+        return self.rmfimg
+
+    def write_rmfimg(self, outfile, clobber=False):
+        rmfimg = self.get_rmfimg()
+        # merge headers
+        header = self.hdr_matrix.copy(strip=True)
+        header.extend(self.hdr_ebounds.copy(strip=True))
+        outfits = fits.PrimaryHDU(data=rmfimg, header=header)
+        outfits.writeto(outfile, checksum=True, clobber=clobber)
+# class RMF }}}
+
+
+class Spectrum:  # {{{
     """
     Deal with X-ray spectrum (.pi)
 
+    TODO/XXX:
+    * Add background spectrum fields
+    * Subtract background spectrum before correcting crosstalk effects
+    * Strip keywords ANCRFILE, RESPFILE, BACKFILE from the output spectrum
+    * Estimate channel errors by Monte Carlo simulations
+
     NOTE:
     The "COUNTS" column data are converted from "int32" to "float32".
     """
     filename = None
-    # FITS object return by `fits.open'
+    # FITS object return by `fits.open()'
     fitsobj = None
     # header of "SPECTRUM" extension
     header = None
     # "SPECTRUM" extension data
     channel = None
-    # name of the column containing the spectrum data, either "COUNTS" or "RATE"
+    # name of the column containing the spectrum data ("COUNTS" or "RATE")
     spec_colname = None
     # spectrum data
     spec_data = None
     # ARF object for this spectrum
     arf = None
+    # RMF object for this spectrum
+    rmf = None
 
     def __init__(self, filename, arffile):
         self.filename = filename
@@ -114,7 +338,25 @@ class Spectrum:
         else:
             return self.spec_data
 
+    def get_channel(self, copy=True):
+        if copy:
+            return self.channel.copy()
+        else:
+            return self.channel
+
+    def get_energy(self, mean="geometric"):
+        """
+        Get the energy values of each channel if RMF present.
+        """
+        if self.rmf is None:
+            return None
+        else:
+            return self.rmf.get_energy(mean=mean)
+
     def get_arf(self, copy=True):
+        """
+        Get the corresponding ARF curve data for this spectrum.
+        """
         if self.arf is None:
             return None
         else:
@@ -189,6 +431,15 @@ class Spectrum:
         if i > 0:
             print("*** Fixed negative channels ***", file=sys.stderr)
 
+    def reset_header_keywords(self,
+            keywords=["ANCRFILE", "RESPFILE", "BACKFILE"]):
+        """
+        Reset the keywords to "NONE" to avoid confusion or mistakes.
+        """
+        for kw in keywords:
+            if kw in self.header:
+                header[kw] = "NONE"
+
     def write(self, filename, clobber=False):
         """
         Create a new "SPECTRUM" table/extension and replace the original
@@ -202,9 +453,10 @@ class Spectrum:
                 header=self.header)
         self.fitsobj["SPECTRUM"] = ext_spec
         self.fitsobj.writeto(filename, clobber=clobber, checksum=True)
+# class Spectrum }}}
 
 
-class Crosstalk:
+class Crosstalk:  # {{{
     """
     Crosstalk correction.
     """
@@ -245,6 +497,9 @@ class Crosstalk:
         self.outfile = config["outfile"]
 
     def do_correction(self, fix_negative=False, verbose=False):
+        """
+        Perform the crosstalk correction.
+        """
         self.spectrum.header.add_history("Crosstalk Correction BEGIN")
         self.spectrum.header.add_history("  TOOL: %s @ %s" % (\
                 os.path.basename(sys.argv[0]), datetime.utcnow().isoformat()))
@@ -269,7 +524,10 @@ class Crosstalk:
     def write(self, filename=None, clobber=False):
         if filename is None:
             filename = self.outfile
+        self.spectrum.reset_header_keywords(
+                keywords=["ANCRFILE", "RESPFILE", "BACKFILE"])
         self.spectrum.write(filename, clobber=clobber)
+# class Crosstalk }}}
 
 
 def main():
-- 
cgit v1.2.2