aboutsummaryrefslogtreecommitdiffstats
path: root/astro/fits
diff options
context:
space:
mode:
Diffstat (limited to 'astro/fits')
-rwxr-xr-xastro/fits/merge_fits.py234
1 files changed, 234 insertions, 0 deletions
diff --git a/astro/fits/merge_fits.py b/astro/fits/merge_fits.py
new file mode 100755
index 0000000..e014c69
--- /dev/null
+++ b/astro/fits/merge_fits.py
@@ -0,0 +1,234 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Aaron LI
+# 2015/06/16
+#
+# Changelogs:
+# 0.3.0, 2015/06/17, Aaron LI
+# * added argument '-c/--columns' to specify columns to be merged
+# * added argument 'columns' to function 'merge2fits()'
+# 0.2.0, 2015/06/17, Aaron LI
+# * added function 'del_key_startswith()' to delete header keywords,
+# and the deletion must be repeated until the header length does not
+# decrease any more
+# * ignore the header of the second FITS file to avoid keyword conflictions
+#
+# BUGS:
+# * astropy.io.fits may have bug with header keywords deletion
+#
+# TODO:
+# * to support image FITS merge
+# * to allow specify column list to be merged
+#
+
+"""
+Merge several (>=2) of FITS file.
+
+By default the *first* extend tables are merged and write out to a new
+FITS file containing the *common* columns. If the data types of the
+columns of each FITS table do not match, then the data type of the column
+of the *first* FITS table is used, and other columns are coerced.
+
+If the FITS files have only *1* HDU (i.e., the Primary HDU), then data of
+these HDU's are summed up to make up the output FITS file (an image),
+on conditional that the shapes of all these HDU's are the same.
+"""
+
+__version__ = "0.3.0"
+__date__ = "2015/06/17"
+
+# default blockname to be merged
+BLOCKNAME_DFT = "EVENTS"
+
+DEBUG = True
+
+import sys
+import argparse
+import re
+
+import numpy as np
+try:
+ from astropy.io import fits
+except ImportError:
+ try:
+ import pyfits as fits
+ except ImportError:
+ raise ImportError("cannot import 'astropy.io.fits' or 'pyfits'")
+
+
+def merge2fits(file1, file2, block1=1, block2=1, columns=None):
+ """
+ Merge *two* FITS files of the given blocks (extension table),
+ and return the merged FITS object.
+
+ TODO:
+ * log history to header
+
+ Arguments:
+ file1, file2: input two FITS files
+ block1, block2: table number or table name to be merged
+ columns: the columns to be merged; by default to merge the
+ common columns
+
+ Return:
+ the merged FITS object
+ """
+ # open file if provide filename
+ if isinstance(file1, str):
+ file1 = fits.open(file1)
+ if isinstance(file2, str):
+ file2 = fits.open(file2)
+ # if has only *1* HDU => image
+ if len(file1) == 1:
+ block1 = 0
+ if len(file2) == 1:
+ block2 = 0
+ if block1 == 0 or block2 == 0:
+ # TODO
+ raise NotImplementedError("image FITS merge currently not supported!")
+ # get table to be merged
+ table1 = file1[block1]
+ table2 = file2[block2]
+ # create column names to be merged
+ # get names of all columns (convert to upper case)
+ colnames1 = [col.name.upper() for col in table1.columns]
+ colnames2 = [col.name.upper() for col in table2.columns]
+ colnames_common = list(set(colnames1).intersection(set(colnames2)))
+ # sort the common column names acoording original column orders
+ colnames_common.sort(key = lambda x: colnames1.index(x))
+ if columns is not None:
+ if isinstance(columns, list):
+ columnlist = list(map(str.upper, columns))
+ else:
+ columnlist = list(columns.upper())
+ # check the specified columns whether in the above colnames_common
+ for name in columnlist:
+ if name not in colnames_common:
+ raise ValueError("column '%s' not found in both files" % name)
+ # use the specified columns
+ colnames_common = columnlist
+ # "STATUS" columns don't have equal-length format, so remove it
+ if "STATUS" in colnames_common:
+ colnames_common.remove("STATUS")
+ if DEBUG:
+ print("DEBUG: columns to merge: ", colnames_common, file=sys.stderr)
+ # filter out the common columns
+ nrow1 = table1.data.shape[0]
+ nrow2 = table2.data.shape[0]
+ hdu_merged = fits.BinTableHDU.from_columns(
+ fits.ColDefs([table1.columns[name] for name in colnames_common]),
+ nrows=nrow1+nrow2)
+ for name in colnames_common:
+ if DEBUG:
+ print("DEBUG: merging column: ", name, file=sys.stderr)
+ dtype = hdu_merged.columns[name].array.dtype
+ hdu_merged.columns[name].array[nrow1:] = \
+ table2.columns[name].array.astype(dtype)
+ # process headers, based on the header of the first FITS file
+ # DO NOT strip the base header, in order to keep the position of
+ # XTENSION/BITPIX/NAXIS/NAXIS1/NAXIS2/PCOUNT/GCOUNT/TFIELDS keywords.
+ header = table1.header.copy() # do not strip
+ # IGNORE the header of the second FITS file to avoid keyword conflictions.
+ #header2 = table2.header.copy(strip=True)
+ ## merge two headers; COMMENT and HISTORY needs special handle
+ #for comment in header2["COMMENT"]:
+ # header.add_comment(comment)
+ #for history in header2["HISTORY"]:
+ # header.add_history(history)
+ #if "COMMENT" in header2:
+ # del header2["COMMENT"]
+ #if "HISTORY" in header2:
+ # del header2["HISTORY"]
+ #if "" in header2:
+ # del header2[""]
+ #header.update(header2)
+ # remove the original TLMIN??/TLMAX??/TTYPE??/TFORM??/TUNIT?? keywords
+ del_key_startswith(header,
+ startswith=["TLMIN", "TLMAX", "TTYPE", "TFORM", "TUNIT"],
+ lastlength=len(header))
+ # update with new TLMIN??/TLMAX??/TTYPE??/TFORM??/TUNIT?? keywords
+ header.update(hdu_merged.header)
+ hdu_merged.header = header
+ # copy PrimaryHDU from first FITS
+ primary_hdu = file1[0].copy()
+ # make HDUList and return
+ return fits.HDUList([primary_hdu, hdu_merged])
+
+
+def del_key_startswith(header, startswith, lastlength=0):
+ """
+ Delete the keys which start with the specified strings.
+
+ Arguments:
+ header: FITS table header
+ startswith: a list of strings; If a key starts with any
+ of these strings, then the key-value pair is removed.
+
+ XXX: the deletion must be repeated several times until the
+ length of the header does not decrease any more.
+ (This may be a bug related to the astropy.io.fits???)
+ """
+ if not isinstance(startswith, list):
+ startswith = list(startswith)
+ re_key = re.compile(r"^(%s)" % "|".join(startswith), re.I)
+ for k in header.keys():
+ if re_key.match(k):
+ del header[k]
+ curlength = len(header)
+ if lastlength == curlength:
+ return
+ else:
+ # recursively continue deletion
+ if DEBUG:
+ print("DEBUG: recursively continue header keywords deleteion",
+ file=sys.stderr)
+ del_key_startswith(header, startswith, curlength)
+
+
+def get_filename_blockname(pstr):
+ """
+ Separate privided 'pstr' (parameter string) into filename and
+ blockname. If does not have a blockname, then the default
+ blockname returned.
+ """
+ try:
+ filename, blockname = re.sub(r"[\[\]]", " ", pstr).split()
+ except ValueError:
+ filename = pstr
+ blockname = BLOCKNAME_DFT
+ return (filename, blockname)
+
+
+def main():
+ parser = argparse.ArgumentParser(
+ description="Merge several FITS files with the common columns.")
+ parser.add_argument("-V", "--version", action="version",
+ version="%(prog)s " + "%s (%s)" % (__version__, __date__))
+ parser.add_argument("infile1", help="input FITS file 1; " + \
+ "The blockname can be appended, e.g., infile1.fits[EVENTS]")
+ parser.add_argument("infile2", nargs="+",
+ help="input FITS file 2 and more")
+ parser.add_argument("outfile", help="merged output file")
+ parser.add_argument("-c", "--columns", dest="columns",
+ help="list of columns to be merged (comma separated)")
+ parser.add_argument("-C", "--clobber", dest="clobber",
+ action="store_true", help="overwrite output file if exists")
+ args = parser.parse_args()
+ if DEBUG:
+ print("DEBUG: infile2: ", args.infile2, file=sys.stderr)
+
+ if args.columns:
+ columns = args.columns.upper().replace(",", " ").split()
+ file1, block1 = get_filename_blockname(args.infile1)
+ merged_fits = fits.open(file1)
+ for fitsfile in args.infile2:
+ # split filename and block name
+ file2, block2 = get_filename_blockname(fitsfile)
+ merged_fits = merge2fits(merged_fits, file2, block1, block2, columns)
+ merged_fits.writeto(args.outfile, checksum=True, clobber=args.clobber)
+
+
+if __name__ == "__main__":
+ main()
+