diff options
69 files changed, 11597 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c3edc65 --- /dev/null +++ b/.gitignore @@ -0,0 +1,13 @@ +# .gitignore +# + +*~ +*.swp +*.un~ +*_bak + +# python +__init__.py +*.pyc +**/__pycache__ + diff --git a/README.md b/README.md new file mode 100644 index 0000000..47e9250 --- /dev/null +++ b/README.md @@ -0,0 +1,19 @@ +Aly's toolbox +============= + +Aaron LI +Created: 2015-06-06 + + +This repository contains various tools for my daily usage, handy command +line scripts, astronomical data analysis tools, etc. +Some tools are written by myself, some are steal from others :) + + +**NOTE**: This repository is currently very rough, and need many cleanups. + + +# License +These tools are distributed under the MIT license, unless otherwise +declared in the corresponding files. + diff --git a/astro/add_xflt.py b/astro/add_xflt.py new file mode 100755 index 0000000..8a718e6 --- /dev/null +++ b/astro/add_xflt.py @@ -0,0 +1,159 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Aaron LI +# 2015/06/16 + +""" +Add XFLT#### keywords to the spectrum header according to the provided +region, in order to employ the "PROJCT" model in XSPEC. +""" + +__version__ = "0.1.0" +__date__ = "2015-11-14" + +import sys +import argparse +import subprocess +import re +import os +from collections import OrderedDict + + +def parse_region(regstr): + """ + Parse the given region string into one of the following 4 cases: + 1. annulus + 2. pie (cxc) + 3. pie + annulus (ftools/xmm) + 4. other + + For the first 3 cases, the return is a dictionary like: + { 'shape': 'pie', 'xc': 55, 'yc': 89, + 'radius_in': 50, 'radius_out': 100, + 'angle_begin': 30, 'angle_end': 120 } + Otherwise, return None. + """ + re_annulus = re.compile(r'^.*(?P<shape>annulus)\(\s*(?P<xc>[\d.-]+)\s*,\s*(?P<yc>[\d.-]+)\s*,\s*(?P<radius_in>[\d.-]+)\s*,\s*(?P<radius_out>[\d.-]+)\s*\).*$', re.I) + re_pie_cxc = re.compile(r'^.*(?P<shape>pie)\(\s*(?P<xc>[\d.-]+)\s*,\s*(?P<yc>[\d.-]+)\s*,\s*(?P<radius_in>[\d.-]+)\s*,\s*(?P<radius_out>[\d.-]+)\s*,\s*(?P<angle_begin>[\d.-]+)\s*,\s*(?P<angle_end>[\d.-]+)\s*\).*$', re.I) + re_pie_ft = re.compile(r'^.*(?P<shape>pie)\(\s*(?P<xc>[\d.-]+)\s*,\s*(?P<yc>[\d.-]+)\s*,\s*(?P<angle_begin>[\d.-]+)\s*,\s*(?P<angle_end>[\d.-]+)\s*\).*$', re.I) + m_annulus = re_annulus.match(regstr) + m_pie_cxc = re_pie_cxc.match(regstr) + m_pie_ft = re_pie_ft.match(regstr) + if m_pie_cxc is not None: + # case 2: pie (cxc) + region = OrderedDict([ + ('shape', m_pie_cxc.group('shape').lower()), + ('xc', float(m_pie_cxc.group('xc'))), + ('yc', float(m_pie_cxc.group('yc'))), + ('radius_in', float(m_pie_cxc.group('radius_in'))), + ('radius_out', float(m_pie_cxc.group('radius_out'))), + ('angle_begin', float(m_pie_cxc.group('angle_begin'))), + ('angle_end', float(m_pie_cxc.group('angle_end'))) + ]) + elif m_pie_ft is not None: + # pie (ftools/xmm) + if m_annulus is not None: + # case 3: pie + annulus (ftools/xmm) + region = OrderedDict([ + ('shape', m_pie_ft.group('shape').lower()), + ('xc', float(m_pie_ft.group('xc'))), + ('yc', float(m_pie_ft.group('yc'))), + ('radius_in', float(m_annulus.group('radius_in'))), + ('radius_out', float(m_annulus.group('radius_out'))), + ('angle_begin', float(m_pie_ft.group('angle_begin'))), + ('angle_end', float(m_pie_ft.group('angle_end'))) + ]) + else: + region = None + elif m_annulus is not None: + # case 1: annulus + region = OrderedDict([ + ('shape', m_annulus.group('shape').lower()), + ('xc', float(m_annulus.group('xc'))), + ('yc', float(m_annulus.group('yc'))), + ('radius_in', float(m_annulus.group('radius_in'))), + ('radius_out', float(m_annulus.group('radius_out'))) + ]) + else: + region = None + return region + + +def make_xflt(region): + """ + Make a dictionary for the XFLT#### keywords and values according + to the provided region. + + Return: + a dictionary containing the XFLT#### keywords and values, e.g., + { 'XFLT0001': radius_out, 'XFLT0002': radius_out, 'XFLT0003': 0, + 'XFLT0004': angle_begin, 'XFLT0005': angle_end } + """ + if region.get('shape') == 'annulus': + xflt = OrderedDict([ + ('XFLT0001', region.get('radius_out')), + ('XFLT0002', region.get('radius_out')), + ('XFLT0003', 0) + ]) + elif region.get('shape') == 'pie': + xflt = OrderedDict([ + ('XFLT0001', region.get('radius_out')), + ('XFLT0002', region.get('radius_out')), + ('XFLT0003', 0), + ('XFLT0004', region.get('angle_begin')), + ('XFLT0005', region.get('angle_end')) + ]) + else: + xflt = None + return xflt + + +def add_xflt(fitsfile, xflt): + """ + Add XFLT#### keywords to the given FITS file. + """ + if xflt is not None: + for key, val in xflt.items(): + cmd = 'fthedit "%(file)s+1" keyword="%(key)s" operation=add value="%(val)s"' % \ + {'file': fitsfile, 'key': key, 'val': val} + print("CMD: %s" % cmd, file=sys.stderr) + subprocess.call(cmd, shell=True) + + +def main(): + parser = argparse.ArgumentParser( + description="Add XFLT???? keywords to spectrum header") + parser.add_argument("-V", "--version", action="version", + version="%(prog)s " + "%s (%s)" % (__version__, __date__)) + parser.add_argument("spectrum", help="input spectrum; @stack") + parser.add_argument("region", help="extraction region of this spectrum; @stack") + parser.add_argument("arcmin2pix", nargs='?', help="1 arcmin = ? pixel", + default=1.0, type=float) + args = parser.parse_args() + + if args.spectrum[0] == '@' and args.region[0] == '@': + spectrum = map(str.strip, open(args.spectrum[1:]).readlines()) + regionstr = map(str.strip, open(args.region[1:]).readlines()) + else: + spectrum = [ args.spectrum ] + regionstr = [ args.region ] + + for spec, reg in zip(spectrum, regionstr): + print("SPECTRUM: '%s'" % spec) + print("REGION: '%s'" % reg) + region = parse_region(reg) + if region is None: + print("ERROR: invalid region %s" % reg, file=sys.stderr) + sys.exit(11) + else: + # Convert pixel to arcmin + region['radius_in'] = region['radius_in'] / args.arcmin2pix + region['radius_out'] = region['radius_out'] / args.arcmin2pix + xflt = make_xflt(region) + add_xflt(spec, xflt) + + +if __name__ == "__main__": + main() + diff --git a/astro/chandra/blanksky_add_time.py b/astro/chandra/blanksky_add_time.py new file mode 100755 index 0000000..8db3c2b --- /dev/null +++ b/astro/chandra/blanksky_add_time.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Aaron LI +# 2015/06/16 +# +# Changelogs: +# 0.2.0, 2015/06/16, Aaron LI +# * append the new time column to the *last*, rather than inserting +# to the beginning +# * explicitly update header from the new generated table +# +# BUGS: +# * comments of columns will lost after modified by astropy.io.fits, +# which is a bug with this package +# + +""" +Add a time column for the chandra blanksky event file. +The time data are generated with a uniform distribution +between TSTART and TSTOP. +""" + +__version__ = "0.2.0" +__date__ = "2015/06/16" + +import sys +import argparse + +import numpy as np +try: + from astropy.io import fits +except ImportError: + try: + import pyfits as fits + except ImportError: + raise ImportError("cannot import 'astropy.io.fits' or 'pyfits'") + + +def add_time_column(fitsfile, blockname="EVENTS"): + """ + Add a time column to the specified block of the input fits file. + The time data are generated with a uniform distribution + between TSTART and TSTOP. + + Return: + A fits object with the new time column. + """ + if isinstance(fitsfile, str): + fitsfile = fits.open(fitsfile) + table = fitsfile[blockname] + tstart = table.header["TSTART"] + tstop = table.header["TSTOP"] + counts = len(table.data) + time_data = np.random.uniform(tstart, tstop, counts) + time_col = fits.Column(name="time", format="1D", unit="s", array=time_data) + # NOTE: append the new time column to the *last*! + # Otherwise the TLMIN??/TLMAX?? keyword pairs, which record the + # minimum/maximum values of corresponding columns, will become + # *out of order*. Therefore the output FITS file causes weird problems + # with DS9 and DM tools. + newtable = fits.BinTableHDU.from_columns( + table.columns + fits.ColDefs([time_col])) + fitsfile[blockname].data = newtable.data + # update header + fitsfile[blockname].header.update(newtable.header) + return fitsfile + + +def main(): + parser = argparse.ArgumentParser( + description="Add a time column for Chandra blanksky event file.") + parser.add_argument("-V", "--version", action="version", + version="%(prog)s " + "%s (%s)" % (__version__, __date__)) + parser.add_argument("infile", help="input chandra blanksky file") + parser.add_argument("outfile", nargs="?", default=None, + help="modified blanksky file. IN-PLACE modification if omitted.") + parser.add_argument("-C", "--clobber", dest="clobber", + action="store_true", help="overwrite output file if exists") + args = parser.parse_args() + + newfits = add_time_column(args.infile) + if args.outfile: + newfits.writeto(args.outfile, clobber=args.clobber) + else: + newfits.writeto(args.infile, clobber=True) + + +if __name__ == "__main__": + main() + diff --git a/astro/fits/merge_fits.py b/astro/fits/merge_fits.py new file mode 100755 index 0000000..e014c69 --- /dev/null +++ b/astro/fits/merge_fits.py @@ -0,0 +1,234 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Aaron LI +# 2015/06/16 +# +# Changelogs: +# 0.3.0, 2015/06/17, Aaron LI +# * added argument '-c/--columns' to specify columns to be merged +# * added argument 'columns' to function 'merge2fits()' +# 0.2.0, 2015/06/17, Aaron LI +# * added function 'del_key_startswith()' to delete header keywords, +# and the deletion must be repeated until the header length does not +# decrease any more +# * ignore the header of the second FITS file to avoid keyword conflictions +# +# BUGS: +# * astropy.io.fits may have bug with header keywords deletion +# +# TODO: +# * to support image FITS merge +# * to allow specify column list to be merged +# + +""" +Merge several (>=2) of FITS file. + +By default the *first* extend tables are merged and write out to a new +FITS file containing the *common* columns. If the data types of the +columns of each FITS table do not match, then the data type of the column +of the *first* FITS table is used, and other columns are coerced. + +If the FITS files have only *1* HDU (i.e., the Primary HDU), then data of +these HDU's are summed up to make up the output FITS file (an image), +on conditional that the shapes of all these HDU's are the same. +""" + +__version__ = "0.3.0" +__date__ = "2015/06/17" + +# default blockname to be merged +BLOCKNAME_DFT = "EVENTS" + +DEBUG = True + +import sys +import argparse +import re + +import numpy as np +try: + from astropy.io import fits +except ImportError: + try: + import pyfits as fits + except ImportError: + raise ImportError("cannot import 'astropy.io.fits' or 'pyfits'") + + +def merge2fits(file1, file2, block1=1, block2=1, columns=None): + """ + Merge *two* FITS files of the given blocks (extension table), + and return the merged FITS object. + + TODO: + * log history to header + + Arguments: + file1, file2: input two FITS files + block1, block2: table number or table name to be merged + columns: the columns to be merged; by default to merge the + common columns + + Return: + the merged FITS object + """ + # open file if provide filename + if isinstance(file1, str): + file1 = fits.open(file1) + if isinstance(file2, str): + file2 = fits.open(file2) + # if has only *1* HDU => image + if len(file1) == 1: + block1 = 0 + if len(file2) == 1: + block2 = 0 + if block1 == 0 or block2 == 0: + # TODO + raise NotImplementedError("image FITS merge currently not supported!") + # get table to be merged + table1 = file1[block1] + table2 = file2[block2] + # create column names to be merged + # get names of all columns (convert to upper case) + colnames1 = [col.name.upper() for col in table1.columns] + colnames2 = [col.name.upper() for col in table2.columns] + colnames_common = list(set(colnames1).intersection(set(colnames2))) + # sort the common column names acoording original column orders + colnames_common.sort(key = lambda x: colnames1.index(x)) + if columns is not None: + if isinstance(columns, list): + columnlist = list(map(str.upper, columns)) + else: + columnlist = list(columns.upper()) + # check the specified columns whether in the above colnames_common + for name in columnlist: + if name not in colnames_common: + raise ValueError("column '%s' not found in both files" % name) + # use the specified columns + colnames_common = columnlist + # "STATUS" columns don't have equal-length format, so remove it + if "STATUS" in colnames_common: + colnames_common.remove("STATUS") + if DEBUG: + print("DEBUG: columns to merge: ", colnames_common, file=sys.stderr) + # filter out the common columns + nrow1 = table1.data.shape[0] + nrow2 = table2.data.shape[0] + hdu_merged = fits.BinTableHDU.from_columns( + fits.ColDefs([table1.columns[name] for name in colnames_common]), + nrows=nrow1+nrow2) + for name in colnames_common: + if DEBUG: + print("DEBUG: merging column: ", name, file=sys.stderr) + dtype = hdu_merged.columns[name].array.dtype + hdu_merged.columns[name].array[nrow1:] = \ + table2.columns[name].array.astype(dtype) + # process headers, based on the header of the first FITS file + # DO NOT strip the base header, in order to keep the position of + # XTENSION/BITPIX/NAXIS/NAXIS1/NAXIS2/PCOUNT/GCOUNT/TFIELDS keywords. + header = table1.header.copy() # do not strip + # IGNORE the header of the second FITS file to avoid keyword conflictions. + #header2 = table2.header.copy(strip=True) + ## merge two headers; COMMENT and HISTORY needs special handle + #for comment in header2["COMMENT"]: + # header.add_comment(comment) + #for history in header2["HISTORY"]: + # header.add_history(history) + #if "COMMENT" in header2: + # del header2["COMMENT"] + #if "HISTORY" in header2: + # del header2["HISTORY"] + #if "" in header2: + # del header2[""] + #header.update(header2) + # remove the original TLMIN??/TLMAX??/TTYPE??/TFORM??/TUNIT?? keywords + del_key_startswith(header, + startswith=["TLMIN", "TLMAX", "TTYPE", "TFORM", "TUNIT"], + lastlength=len(header)) + # update with new TLMIN??/TLMAX??/TTYPE??/TFORM??/TUNIT?? keywords + header.update(hdu_merged.header) + hdu_merged.header = header + # copy PrimaryHDU from first FITS + primary_hdu = file1[0].copy() + # make HDUList and return + return fits.HDUList([primary_hdu, hdu_merged]) + + +def del_key_startswith(header, startswith, lastlength=0): + """ + Delete the keys which start with the specified strings. + + Arguments: + header: FITS table header + startswith: a list of strings; If a key starts with any + of these strings, then the key-value pair is removed. + + XXX: the deletion must be repeated several times until the + length of the header does not decrease any more. + (This may be a bug related to the astropy.io.fits???) + """ + if not isinstance(startswith, list): + startswith = list(startswith) + re_key = re.compile(r"^(%s)" % "|".join(startswith), re.I) + for k in header.keys(): + if re_key.match(k): + del header[k] + curlength = len(header) + if lastlength == curlength: + return + else: + # recursively continue deletion + if DEBUG: + print("DEBUG: recursively continue header keywords deleteion", + file=sys.stderr) + del_key_startswith(header, startswith, curlength) + + +def get_filename_blockname(pstr): + """ + Separate privided 'pstr' (parameter string) into filename and + blockname. If does not have a blockname, then the default + blockname returned. + """ + try: + filename, blockname = re.sub(r"[\[\]]", " ", pstr).split() + except ValueError: + filename = pstr + blockname = BLOCKNAME_DFT + return (filename, blockname) + + +def main(): + parser = argparse.ArgumentParser( + description="Merge several FITS files with the common columns.") + parser.add_argument("-V", "--version", action="version", + version="%(prog)s " + "%s (%s)" % (__version__, __date__)) + parser.add_argument("infile1", help="input FITS file 1; " + \ + "The blockname can be appended, e.g., infile1.fits[EVENTS]") + parser.add_argument("infile2", nargs="+", + help="input FITS file 2 and more") + parser.add_argument("outfile", help="merged output file") + parser.add_argument("-c", "--columns", dest="columns", + help="list of columns to be merged (comma separated)") + parser.add_argument("-C", "--clobber", dest="clobber", + action="store_true", help="overwrite output file if exists") + args = parser.parse_args() + if DEBUG: + print("DEBUG: infile2: ", args.infile2, file=sys.stderr) + + if args.columns: + columns = args.columns.upper().replace(",", " ").split() + file1, block1 = get_filename_blockname(args.infile1) + merged_fits = fits.open(file1) + for fitsfile in args.infile2: + # split filename and block name + file2, block2 = get_filename_blockname(fitsfile) + merged_fits = merge2fits(merged_fits, file2, block1, block2, columns) + merged_fits.writeto(args.outfile, checksum=True, clobber=args.clobber) + + +if __name__ == "__main__": + main() + diff --git a/astro/fitting/fit_betasbp_cut.py b/astro/fitting/fit_betasbp_cut.py new file mode 100755 index 0000000..2d2931e --- /dev/null +++ b/astro/fitting/fit_betasbp_cut.py @@ -0,0 +1,458 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# To fitting the given SBP data with the following beta model: +# s = s0 * pow((1.0+(r/rc)^2), 0.5-3*beta) + c +# And this tool supports the following two requirements for the fitting: +# (1) ignore the specified number of inner-most data points; +# (2) ignore the data points whose radius value less than the given value. +# +# Aaron LI +# 2015/05/29 +# +# Changelogs: +# v0.5.1, 2015/06/07, Aaron LI +# * fixed 'dof' calculation with 'n-p-1' +# * fixed 'par_eps' calculation/configuration +# v0.5.0, 2015/06/07, Aaron LI +# * added 'fit_model_bounds' using 'scipy.optimize.minimize' to +# perform function minimization with bounds +# * split the data cut section to function 'cut_data' +# * added argument 'options' to 'fit_model_bounds' +# v0.4.0, 2015/06/06, Aaron LI +# * replace getopt with 'argparse' +# * added 'get_parameter' to process model parameter initial value and bounds +# * support read parameter bounds from input file +# * added options '--s0', '--rc', '--beta', '--const' to get paramter +# initial values and bounds +# * renamed 'fit_beta_model' to 'fit_model', and added argument 'func' to +# support other models +# v0.3.0, 2015/06/02, Aaron LI +# * can output chi-squared and dof values +# * can output one standard deviation errors on the parameters +# v0.2.0, 2015/05/30: +# * Added option '-n' / '--no-radius' to ignore radius less than the +# given value. +# * Support read model initial parameter values from input file. +# +# TODO: +# * calculate fitting parameter's confidence interval / confidence bounds +# * to normalize fitting paramters to be the same order of magnitude +# for better minimization +# Ref: http://stackoverflow.com/questions/21369139/normalization-for-optimization-in-python +# + + +from __future__ import print_function, division + +__version__ = "0.5.1" +__date__ = "2015/06/07" + +import numpy as np +from scipy.optimize import curve_fit, minimize + +import os +import sys +import re +import argparse + + +# modes of to cut data +CUT_POINT = 'CUT_POINT' +CUT_RADIUS = 'CUT_RADIUS' +# default minimize method +MINIMIZE_METHOD = 'L-BFGS-B' + + +def beta_model(r, s0, rc, beta, c): + """ + SBP beta model, with a constant background. + """ + return s0 * np.power((1.0+(r/rc)**2), 0.5-3*beta) + c + + +def calc_chisq(func, xdata, ydata, yerrdata, *args): + """ + Calculate the chi-squared values for the given function according + to the provided data points. + + Arguments: + xdata: x values of data points + ydata: y values of data points + yerrdata: y standard deviation values + args: additional arguments for 'func' + + Return: + chi-squared value + """ + xdata = np.array(xdata) + ydata = np.array(ydata) + yerrdata = np.array(yerrdata) + return np.sum(((ydata - func(xdata, *args)) / yerrdata) ** 2) + + +def fit_model(func, xdata, ydata, yerrdata, p0): + """ + Fit the provided data with the beta model. + + Arguments: + p0: initial values for the parameters of beta model + + Return: + (popt, infodict) + popt: optimal values for the parameters + infodict: + * fvec: the function evaluated at the output parameters + * dof: degree of freedom + * chisq: chi-squared + * perr: one standard deviation errors on the parameters + """ + popt, pcov = curve_fit(func, xdata, ydata, p0=p0, sigma=yerrdata) + # the function evaluated at the output parameters + fvec = lambda x: func(x, *popt) + # degree of freedom + dof = len(xdata) - len(popt) - 1 + # chi squared + chisq = np.sum(((ydata - fvec(xdata)) / yerrdata) ** 2) + # one standard deviation errors on the parameters + perr = np.sqrt(np.diag(pcov)) + infodict = { + 'fvec': fvec, + 'dof': dof, + 'chisq': chisq, + 'perr': perr + } + return (popt, infodict) + + +def fit_model_bounds(func, xdata, ydata, yerrdata, p0=None, + bounds=None, options=None): + """ + Fit the provided data with the beta model, and apply parameter + bounds requirements. + Using 'scipy.optimize.minimize'. + + Arguments: + p0: initial values for the parameters of beta model + bounds: (min, max) pairs for each parameter bound + options: a dict of solver options (=> minimize: options) + + Return: + (popt, infodict) + popt: optimal values for the parameters + infodict: + * fvec: the function evaluated at the output parameters + * dof: degree of freedom + * chisq: chi-squared + * perr: one standard deviation errors on the parameters + """ + # objective function to be minimized, required format of 'f(x, *args)' + f = lambda p: calc_chisq(func, xdata, ydata, yerrdata, *p) + # minimize the given function using 'scipy.optimize.minimize' with bounds + res = minimize(f, p0, method=MINIMIZE_METHOD, bounds=bounds, + options=options) + popt = res.x + #print("DEBUG: minimization results:\n", res, file=sys.stderr) + + # check minimization results + if not res.success: + print("*** WARNING: minimization exited with error: ***\n" + \ + "*** %s ***" % res.message, file=sys.stderr) + + # the function evaluated at the output parameters + fvec = lambda x: func(x, *popt) + # degree of freedom + dof = len(xdata) - len(popt) - 1 + # chi squared + chisq = res.fun + # one standard deviation errors on the parameters + perr = popt * 0.0 # FIXME + infodict = { + 'fvec': fvec, + 'dof': dof, + 'chisq': chisq, + 'perr': perr + } + return (popt, infodict) + + +def cut_data(xdata, ydata, yerrdata, cutmode=CUT_POINT, cutvalue=0): + """ + Cut the given data with the provided cutmode and cutvalue, + return the cut data. + + Arguments: + xdata, ydata, yerrdata: input data (x, y, yerr) + cutmode: 'point' / 'radius'; ignore data by number of data points, + or by radius value less than the given value + cutvalue: the cut limit + + Return: + (xdata_cut, ydata_cut, yerrdata_cut) + """ + if cutmode == CUT_POINT: + xdata_cut = xdata[cutvalue:] + ydata_cut = ydata[cutvalue:] + yerrdata_cut = yerrdata[cutvalue:] + elif cutmode == CUT_RADIUS: + ii = xdata >= cutvalue + xdata_cut = xdata[ii] + ydata_cut = ydata[ii] + yerrdata_cut = yerrdata[ii] + else: + raise ValueError('Unknown cut mode: %s' % cutmode) + return (xdata_cut, ydata_cut, yerrdata_cut) + + +def get_parameter(pstring): + """ + Process the parameter string of the following format, and return + the initial value, lower limit, and upper limit. + "init_value" + "init_value lower upper" + "init_value,lower,upper" + If want to ignore the lower/upper limit, use 'None' (case-insensitive), + and the None is returned. + """ + parameters = pstring.replace(',', ' ').split() + if len(parameters) == 1: + init_value = float(parameters[0]) + return (init_value, None, None) + elif len(parameters) == 3: + init_value = float(parameters[0]) + if parameters[1].upper() == 'NONE': + lower_value = None + else: + lower_value = float(parameters[1]) + if parameters[2].upper() == 'NONE': + upper_value = None + else: + upper_value = float(parameters[2]) + return (init_value, lower_value, upper_value) + else: + raise ValueError('Invalid parameter format: %s' % pstring) + + +def main(): + # options + infile = None + outfilename= None + cutmode = CUT_POINT # ignore data by number of data points + cutvalue = 0 # do not ignore any data by default + # initial values for the four parameters of the beta model + s0_0 = 1.0e-7 + rc_0 = 10.0 + beta_0 = 0.6 + c_0 = 0.0 + # default bounds for the four parameters + s0_lower, s0_upper = None, None + rc_lower, rc_upper = None, None + beta_lower, beta_upper = None, None + c_lower, c_upper = None, None + + # parser for command line options and arguments + parser = argparse.ArgumentParser( + description="Fitting provided data with the beta model.", + epilog="Version: %s (%s)" % (__version__, __date__)) + parser.add_argument("-v", "--verbose", dest="verbose", + action="store_true", default=False, + help="show verbose/debug information (False)") + parser.add_argument("-V", "--version", action="version", + version="%(prog)s " + "%s (%s)" % (__version__, __date__)) + parser.add_argument("-i", "--infile", + dest="infile", required=True, + help="""input data file with the following 4 or 3 columns: + [radius radius_err brightness brightness_err], + [radius brightness brightness_err]. + Note: The initial values and lower/upper limits + for the beta models can also be provided with the + following syntax: + # s0_0 = init_value, lower_limit, upper_limit + # rc_0 = init_value, lower_limit, upper_limit + # beta_0 = init_value, lower_limit, upper_limit + # c_0 = init_value, lower_limit, upper_limit""") + parser.add_argument("-o", "--outfile", dest="outfilename", + help="output file to store the fitted data") + parser.add_argument("-b", "--bounds", dest="bounds", + action="store_true", default=False, + help="whether apply paramenter bounds (False)") + parser.add_argument("-c", "--cut-point", + dest="cut_point", metavar="N", type=int, default=0, + help="number of inner-most data points to be ignored") + parser.add_argument("-n", "--no-radius", + dest="cut_radius", metavar="RADIUS", type=float, default=0.0, + help="ignore data points with smaller radius") + parser.add_argument("--s0", dest="s0", + metavar="init_value,lower,upper", + help="initial value and lower/upper limits for parameter s0. " + \ + "Use 'none' (case-insensitive) to ignore the limit") + parser.add_argument("--rc", dest="rc", + metavar="init_value,lower,upper", + help="initial value and lower/upper limits for parameter rc. " + \ + "Use 'none' (case-insensitive) to ignore the limit") + parser.add_argument("--beta", dest="beta", + metavar="init_value,lower,upper", + help="initial value and lower/upper limits for parameter beta. " + \ + "Use 'none' (case-insensitive) to ignore the limit") + parser.add_argument("--const", dest="const", + metavar="init_value,lower,upper", + help="initial value and lower/upper limits for parameter const. " + \ + "Use 'none' (case-insensitive) to ignore the limit") + + args = parser.parse_args() + if args.outfilename: + outfile = open(args.outfilename, 'w') + else: + outfile = sys.stdout + # cut mode and value + if args.cut_point: + cutmode = CUT_POINT + cutvalue = args.cut_point + elif args.cut_radius: + cutmode = CUT_RADIUS + cutvalue = args.cut_radius + + if args.verbose: + print('DEBUG: apply parameter bounds: %s' % args.bounds, + file=sys.stderr) + print("DEBUG: cutmode: %s, cutvalue: %s" % (cutmode, cutvalue)) + + # input data list + r_data = [] + rerr_data = [] + s_data = [] + serr_data = [] + + # regex to match initial parameter names, blank line, and comment line + re_blank = re.compile(r'^\s*$') + re_comment = re.compile(r'^\s*#') + re_s0 = re.compile(r'^\s*#\s*s0_0\s*[:=]') + re_rc = re.compile(r'^\s*#\s*rc_0\s*[:=]') + re_beta = re.compile(r'^\s*#\s*beta_0\s*[:=]') + re_c = re.compile(r'^\s*#\s*c_0\s*[:=]') + for line in open(args.infile, 'r'): + if re_s0.match(line): + # read 's0_0': initial value for parameter 's0' + s0_pstring = re_s0.split(line)[1] + s0_0, s0_lower, s0_upper = get_parameter(s0_pstring) + elif re_rc.match(line): + # read 'rc_0': initial value for parameter 'rc' + rc_pstring = re_rc.split(line)[1] + rc_0, rc_lower, rc_upper = get_parameter(rc_pstring) + elif re_beta.match(line): + # read 'beta_0': initial value for parameter 'beta' + beta_pstring = re_beta.split(line)[1] + beta_0, beta_lower, beta_upper = get_parameter(beta_pstring) + elif re_c.match(line): + # read 'c_0': initial value for parameter 'c' + c_pstring = re_c.split(line)[1] + c_0, c_lower, c_upper = get_parameter(c_pstring) + elif re_blank.match(line): + # ignore blank line + continue + elif re_comment.match(line): + # ignore comment line + continue + else: + try: + r, rerr, s, serr = map(float, line.split()) + except ValueError: + try: + r, s, serr = map(float, line.split()) + rerr = 0.0 + except ValueError: + print('ERROR: unsupported input data format', + file=sys.stderr) + sys.exit(21) + r_data.append(r) + rerr_data.append(rerr) + s_data.append(s) + serr_data.append(serr) + + if args.verbose: + print('DEBUG: infile: s0_0 = %g (%s, %s)' % \ + (s0_0, s0_lower, s0_upper), file=sys.stderr) + print('DEBUG: infile: rc_0 = %g (%s, %s)' % \ + (rc_0, rc_lower, rc_upper), file=sys.stderr) + print('DEBUG: infile: beta_0 = %g (%s, %s)' % \ + (beta_0, beta_lower, beta_upper), file=sys.stderr) + print('DEBUG: infile: c_0 = %g (%s, %s)' % \ + (c_0, c_lower, c_upper), file=sys.stderr) + + # get parameter initial values and bounds from command line arguments + if args.s0: + s0_0, s0_lower, s0_upper = get_parameter(args.s0) + if args.rc: + rc_0, rc_lower, rc_upper = get_parameter(args.rc) + if args.beta: + beta_0, beta_lower, beta_upper = get_parameter(args.beta) + if args.const: + c_0, c_lower, c_upper = get_parameter(args.const) + + if args.verbose: + print('DEBUG: final: s0_0 = %g (%s, %s)' % \ + (s0_0, s0_lower, s0_upper), file=sys.stderr) + print('DEBUG: final: rc_0 = %g (%s, %s)' % \ + (rc_0, rc_lower, rc_upper), file=sys.stderr) + print('DEBUG: final: beta_0 = %g (%s, %s)' % \ + (beta_0, beta_lower, beta_upper), file=sys.stderr) + print('DEBUG: final: c_0 = %g (%s, %s)' % \ + (c_0, c_lower, c_upper), file=sys.stderr) + + # convert to numpy array + r_data = np.array(r_data) + rerr_data = np.array(rerr_data) + s_data = np.array(s_data) + serr_data = np.array(serr_data) + # cut data + r_data_cut, s_data_cut, serr_data_cut = cut_data(r_data, s_data, + serr_data, cutmode=cutmode, cutvalue=cutvalue) + + # model parameters + par_names = ["s0", "rc", "beta", "c"] + # initial values + par_0 = np.array([s0_0, rc_0, beta_0, c_0]) + # parameter bounds + par_bounds = np.array([(s0_lower, s0_upper), (rc_lower, rc_upper), + (beta_lower, beta_upper), (c_lower, c_upper)]) + # set eps for the parameters (required for the minimize method, + # otherwise error 'ABNORMAL_TERMINATION_IN_LNSRCH' occurs, which + # may due to the different order of magnitude of each parameters) + par_eps = np.absolute(par_0) * 1e-3 + par_eps[ par_eps<1e-15 ] = 1e-15 # set par_eps >= 1e-14 + if args.verbose: + print("DEBUG: parameters eps:\n", par_eps, file=sys.stderr) + + if args.bounds: + ## 'fit_model_bounds' to perform fitting with bounds + par_fit, infodict = fit_model_bounds(beta_model, r_data_cut, + s_data_cut, serr_data_cut, p0=par_0, bounds=par_bounds, + options={'eps': par_eps}) + else: + # 'fit_model' do not support parameter bounds + par_fit, infodict = fit_model(beta_model, r_data_cut, + s_data_cut, serr_data_cut, p0=par_0) + + fvec = infodict['fvec'] + dof = infodict['dof'] + chisq = infodict['chisq'] + perr = infodict['perr'] + + print("# beta-model fitting results:", file=outfile) + print("# s(r) = s0 * pow((1.0+(r/rc)^2), 0.5-3*beta) + c", file=outfile) + for i in range(len(par_names)): + print("# %s = %g +/- %g" % (par_names[i], par_fit[i], perr[i]), + file=outfile) + print("# chisq / dof = %g / %g = %g" % (chisq, dof, chisq/dof), + file=outfile) + print("# radius(input) brightness(fitted)", file=outfile) + for i in range(len(r_data)): + print("%g %g" % (r_data[i], fvec(r_data[i])), file=outfile) + + if args.outfilename: + outfile.close() + + +if __name__ == '__main__': + main() + diff --git a/astro/lc_clean.py b/astro/lc_clean.py new file mode 100755 index 0000000..7141002 --- /dev/null +++ b/astro/lc_clean.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Aaron LI +# Created: 2016-01-16 +# Updated: 2016-01-16 +# + +""" +Clean the lightcurve by fitting the RATE data with a Gaussian model, +and discard the time bins with RATE beyond [mean-n*sigma, mean+n*sigma]. +""" + +__version__ = "0.1.0" +__date__ = "2016-01-16" + +import sys +import argparse + +from astropy.io import fits +import numpy as np + + +class LightCurve: + """ + X-ray data light curve class + """ + def __init__(self, lcfile): + f = fits.open(lcfile) + self.lc_data = f[1].data + self.lc_header = f[1].header + self.time = self.lc_data['TIME'] + self.rate = self.lc_data['RATE'] + self.rate_err = self.lc_data['ERROR'] + self.TSTART = self.lc_header['TSTART'] + self.TSTOP = self.lc_header['TSTOP'] + self.TIMEDEL = self.lc_header['TIMEDEL'] + self.TIMEPIXR = self.lc_header['TIMEPIXR'] + f.close() + + def sigma_clip(self, nsigma=3, maxiter=10): + """ + Iteratively clip the time bins whose value lie beyond the + range [mean-n*sigma, mean+n*sigma]. + """ + rate = self.rate + keep_idx = np.ones(rate.shape, dtype=bool) # all True's + keep_num = np.sum(keep_idx) + keep_num0 = np.inf + i = 0 + while (keep_num < keep_num0): + if (i >= maxiter): + print("WARNING: maximum iteration limit reached", + file=sys.stderr) + break + keep_num0 = keep_num + i += 1 + mean = np.mean(rate[keep_idx]) + sigma = np.std(rate[keep_idx]) + cut_low = mean - nsigma * sigma + cut_high = mean + nsigma * sigma + keep_idx = np.logical_and((rate >= cut_low), (rate <= cut_high)) + keep_num = np.sum(keep_idx) + # save clip results + self.niter = i + self.keep_idx = keep_idx + self.time_clipped = self.time[keep_idx] + self.rate_clipped = self.rate[keep_idx] + + def make_gti(self, apply_header=True): + """ + Make new GTIs (good time intervals) according to the clipped + time bins. + """ + frac = 0.01 # TIMEDEL fraction to distingush two time bins + gti_start = [] + gti_stop = [] + time_start = self.time_clipped + time_stop = time_start + self.TIMEDEL + # first GTI start time + gti_start.append(time_start[0]) + for tstart, tstop in zip(time_start[1:], time_stop[:-1]): + if (np.abs(tstart-tstop) <= frac * self.TIMEDEL): + # time bin continues + continue + else: + # a new GTI start + gti_start.append(tstart) + gti_stop.append(tstop) + # last GTI stop time + gti_stop.append(time_stop[-1]) + # convert to numpy array + gti_start = np.array(gti_start) + gti_stop = np.array(gti_stop) + if apply_header: + # add TSTART to the time + gti_start += self.TSTART + gti_stop += self.TSTART + # save results + self.gti_start = gti_start + self.gti_stop = gti_stop + + def write_gti(self, filename=None, header=True): + """ + Write generated GTIs to file or screen (default) + """ + if isinstance(filename, str): + outfile = open(filename, 'w') + else: + outfile = sys.stdout + # + if header: + outfile.write('# TSTART\tTSTOP\n') + outfile.write('\n'.join([ '%s\t%s' % (tstart, tstop) \ + for tstart, tstop in zip(self.gti_start, self.gti_stop) ])) + # + if isinstance(filename, str): + outfile.close() + + +def main(): + parser = argparse.ArgumentParser( + description="Clean light curve by sigma clipping") + parser.add_argument("-V", "--version", action="version", + version="%(prog)s " + "%s (%s)" % (__version__, __date__)) + parser.add_argument("infile", + help="input lightcurve file; contains [TIME, RATE] columns") + parser.add_argument("outfile", nargs='?', default=None, + help="output text-format GTI file; for XSELECT filter time") + parser.add_argument("-s", "--nsigma", dest="nsigma", type=float, + default=2.0, help="sigma clipping significant level") + parser.add_argument("-H", "--no-header", dest="noheader", + action="store_true", help="not write header to the output file") + parser.add_argument("-v", "--verbose", dest="verbose", + action="store_true", help="show verbose information") + args = parser.parse_args() + + lc = LightCurve(args.infile) + lc.sigma_clip(nsigma=args.nsigma) + lc.make_gti(apply_header=True) + lc.write_gti(filename=args.outfile, header=(not args.noheader)) + if args.verbose: + exposure = np.sum(lc.gti_stop - lc.gti_start) + print("# Total GTI: %.2f (s)" % exposure) + + +if __name__ == "__main__": + main() + + +# vim: set ts=4 sw=4 tw=0 fenc=utf-8 ft=python: # diff --git a/astro/marx/marx_pntsrc.py b/astro/marx/marx_pntsrc.py new file mode 100755 index 0000000..97b1575 --- /dev/null +++ b/astro/marx/marx_pntsrc.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Aaron LI +# 2015/06/16 + +""" +Run MARX simulation on a given list of point sources, merge the +output simulation results, and finally convert into FITS image. +""" + +__version__ = "0.1.0" +__date__ = "2015/06/16" + +import sys +import argparse +import subprocess +import re +import os + + +def marx_pntsrc(pfile, ra, dec, flux, outdir): + """ + Run MARX simulation for the provided point source. + """ + cmd = "marx @@%(pfile)s SourceRA=%(ra)s " % {"pfile": pfile, "ra": ra} + \ + "SourceDEC=%(dec)s SourceFlux=%(flux)s OutputDir=%(outdir)s" % \ + {"dec": dec, "flux": flux, "outdir": outdir} + print("CMD: %s" % cmd, file=sys.stderr) + subprocess.call(cmd, shell=True) + + +def marxcat(indirs, outdir): + """ + Concatenate a list of MARX simulation results. + + Note: the number of MARX results to be concatenated at *one* time + can not be to many, otherwise the 'marxcat' tool will failed. + """ + if isinstance(indirs, list): + pass + elif isinstance(indirs, str): + indirs = indirs.split() + else: + raise ValueError("invalid indirs type: %s" % indirs) + pid = os.getpid() + tempdir = "_marx_tmp%d" % pid + cmd = "cp -a %(marxdir)s %(tempdir)s" % \ + {"marxdir": indirs[0], "tempdir": tempdir} + print("CMD: %s" % cmd, file=sys.stderr) + subprocess.call(cmd, shell=True) + del indirs[0] + while len(indirs) > 0: + # concatenated 10 directories each time + catdirs = indirs[:9] + del indirs[:9] + catdirs = tempdir + " " + " ".join(catdirs) + # concatenate MARX results + cmd = "marxcat %(catdirs)s %(outdir)s" % \ + {"catdirs": catdirs, "outdir": outdir} + print("CMD: %s" % cmd, file=sys.stderr) + subprocess.call(cmd, shell=True) + # move output results to temporary directory + cmd = "rm -rf %(tempdir)s && mv %(outdir)s %(tempdir)s" % \ + {"tempdir": tempdir, "outdir": outdir} + print("CMD: %s" % cmd, file=sys.stderr) + subprocess.call(cmd, shell=True) + cmd = "mv %(tempdir)s %(outdir)s" % \ + {"tempdir": tempdir, "outdir": outdir} + print("CMD: %s" % cmd, file=sys.stderr) + subprocess.call(cmd, shell=True) + + +def marx2fits(indir, outfile, params=""): + """ + Convert the results of MARX simulation into FITS image. + """ + cmd = "marx2fits %(params)s %(indir)s %(outfile)s" % \ + {"params": params, "indir": indir, "outfile": outfile} + print("CMD: %s" % cmd, file=sys.stderr) + subprocess.call(cmd, shell=True) + + +def main(): + parser = argparse.ArgumentParser( + description="Run MARX on a given list of point sources") + parser.add_argument("-V", "--version", action="version", + version="%(prog)s " + "%s (%s)" % (__version__, __date__)) + parser.add_argument("pfile", help="marx paramter file") + parser.add_argument("srclist", help="point source list file") + parser.add_argument("outprefix", help="prefix of output directories") + args = parser.parse_args() + + outdirs = [] + i = 0 + for line in open(args.srclist, "r"): + if re.match(r"^\s*$", line): + # skip blank line + continue + elif re.match(r"^\s*#", line): + # skip comment line + continue + i += 1 + ra, dec, flux = map(float, line.split()) + print("INFO: ra = %g, dec = %g, flux = %g" % (ra, dec, flux), + file=sys.stderr) + outdir = "%sp%03d" % (args.outprefix, i) + print("INFO: outdir = %s" % outdir, file=sys.stderr) + outdirs.append(outdir) + marx_pntsrc(args.pfile, ra, dec, flux, outdir) + # merge results + merged = args.outprefix + "merged" + marxcat(outdirs, merged) + # convert to FITS image + merged_fits = merged + ".fits" + marx2fits(merged, merged_fits) + + +if __name__ == "__main__": + main() + diff --git a/astro/marx/randpoints.py b/astro/marx/randpoints.py new file mode 100755 index 0000000..da0bedc --- /dev/null +++ b/astro/marx/randpoints.py @@ -0,0 +1,327 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Aaron LI +# Created: 2015-12-03 +# Updated: 2015-12-05 +# +# ChangeLog: +# 2015-12-05: +# * Add class "RegionDS9" to parse region +# 2015-12-10: +# * Support both "erg/cm^2/s" and "photon/cm^2/s" flux units +# * Add argument "--outregion" to also save a region file +# + +""" +Generate random point source information for MARX simulation. +And make a point source data list for "marx_pntsrc.py" usage. +""" + +__version__ = "0.3.1" +__date__ = "2015-12-10" + + +import sys +import argparse +import re +import numpy as np + + +class RegionDS9: + """ + Process DS9 regions. + """ + def __init__(self, shape=None, xc=None, yc=None, + width=None, height=None, rotation=None): + self.shape = shape + self.xc = xc + self.yc = yc + self.width = width + self.height = height + self.rotation = rotation + + def parse(self, region): + """ + Parse a DS9 region string and update the instance. + + Region syntax: + box(xc,yc,width,height,rotation) + + Note: + "width", "height" may have a '"' suffix which means "arcsec" instead + of "degree". + """ + re_box = re.compile(r'^\s*(?P<shape>box)\(\s*(?P<xc>[\d.-]+)\s*,\s*(?P<yc>[\d.-]+)\s*,\s*(?P<width>[\d.-]+"?)\s*,\s*(?P<height>[\d.-]+"?)\s*,\s*(?P<rotation>[\d.-]+)\s*\).*$', re.I) + m_box = re_box.match(region) + if m_box is not None: + self.shape = "box" + self.xc = float(m_box.group("xc")) + self.yc = float(m_box.group("yc")) + self.width = self.parse_dms(m_box.group("width")) + self.height = self.parse_dms(m_box.group("height")) + self.rotation = float(m_box.group("rotation")) + else: + raise NotImplementedError("Only 'box' region supported") + + @staticmethod + def parse_dms(ms): + """ + Parse a value in format ?'?" into degree. + """ + re_arcmin = re.compile(r'^\s*(?P<arcmin>[\d.]+)\'.*') + re_arcsec = re.compile(r'^([^\']*\'|)\s*(?P<arcsec>[\d.]+)".*') + m_arcmin = re_arcmin.match(ms) + m_arcsec = re_arcsec.match(ms) + degree = 0.0 + if m_arcmin is not None: + degree += float(m_arcmin.group("arcmin")) / 60.0 + if m_arcsec is not None: + degree += float(m_arcsec.group("arcsec")) / 3600.0 + return degree + + +class RandCoord: + """ + Randomly generate the coordinates of point sources within a given box + region for MARX simulation. + + Arguments: + xc - central X position of the box (degree) + yc - central Y position of the box (degree) + width - width of the box (degree) + height - height of the box (degree) + mindist - minimum distance between each generated coordinate (degree) + """ + + def __init__(self, xc, yc, width, height, mindist=0): + self.xc = xc + self.yc = yc + self.width = width + self.height = height + self.mindist = mindist + # Record the generated coordinates: [(x1,y1), (x2,y2), ...] + self.xy = [] + + def clear(self): + """ + Clear previously generated coordinates. + """ + self.xy = [] + + def generate(self, n=1): + """ + Generate random coordinates. + """ + coord = [] + xmin = self.xc - 0.5 * self.width + xmax = self.xc + 0.5 * self.width + ymin = self.yc - 0.5 * self.height + ymax = self.yc + 0.5 * self.height + i = 0 + while i < n: + x = np.random.uniform(low=xmin, high=xmax) + y = np.random.uniform(low=ymin, high=ymax) + if self.checkDistance((x, y)): + i += 1 + coord.append((x, y)) + self.xy.append((x, y)) + return coord + + def checkDistance(self, coord): + """ + Check whether the given coordinate has a distance larger than + the specified "mindist" + """ + if len(self.xy) == 0: + return True + else: + xy = np.array(self.xy) # each row represents one coordinate + dist2 = (xy[:, 0] - coord[0])**2 + (xy[:, 1] - coord[1])**2 + if all(dist2 >= self.mindist**2): + return True + else: + return False + + +class RandFlux: + """ + Randomly generate the flux of point sources for MARX simulation. + + Arguments: + fmin - minimum flux + fmax - maximum flux + """ + + def __init__(self, fmin, fmax): + self.fmin = fmin + self.fmax = fmax + + @staticmethod + def fluxDensity(S): + """ + The *differential* number count - flux function: dN(>S)/dS + i.e., density function + + Broken power law: + dN/dS = (1) K (S/S_ref)^(-gamma_1); (S < S_b) + (2) K (S_b/S_ref)^(gamma_2-gamma_1) (S/S_ref)^(-gamma_2); (S >= S_b) + K: normalization constant + S_ref: normalization flux; [10^-15 erg/cm^2/s] + gamma_1: faint power-law index + gamma_2: bright power-law index + S_b: break flux; [10^-15 erg/cm^2/s] + + Reference: + [1] Kim et al. 2007, ApJ, 659, 29 + http://adsabs.harvard.edu/abs/2007ApJ...659...29K + http://hea-www.cfa.harvard.edu/CHAMP/PUBLICATIONS/ChaMP_ncounts.pdf + Table 4: ChaMP: 9.6 [deg^2]: 0.5-8 [keV]: 1.4 (photon index) + Differential number count; broken power law + K (normalization constant): 1557 (+28 / -50) + S_ref (normalization flux): 1.0 [10^-15 erg/cm^2/s] + gamma_1 (faint power-law index): 1.64 (+/- 0.01) + gamma_2 (bright power-law index): 2.48 (+/- 0.05) + S_b (break flux): 22.9 (+/- 1.6) [10^-15 erg/cm^2/s] + f_min (faint flux limit): 0.69 [10^-15 erg/cm^2/s] + f_max (bright flux limit): 6767.74 [10^-15 erg/cm^2/s] + """ + K = 1557 # normalization constant: 1557 (+28 / -50) + S_ref = 1.0 # normalization flux: 1.0 [10^-15 erg/cm^2/s] + gamma_1 = 1.64 # faint power-law index: 1.64 (+/- 0.01) + gamma_2 = 2.48 # bright power-law index: 2.48 (+/- 0.05) + S_b = 22.9 # break flux: 22.9 (+/- 1.6) [10^-15 erg/cm^2/s] + # Adjust unit/magnitude + S = S / 1e-15 # => unit: 10^-15 erg/cm^2/s + if isinstance(S, np.ndarray): + Np = np.zeros(S.shape) + Np[S<=0] = 0.0 + Np[S<=S_b] = K * (S[S<=S_b] / S_ref)**(-gamma_1) + Np[S>S_b] = K * (S_b/S_ref)**(gamma_2-gamma_1) * (S[S>S_b] / S_ref)**(-gamma_2) + else: + # "S" is a single number + if S <= 0.0: + Np = 0.0 + elif S <= S_b: + Np = K * (S/S_ref)**(-gamma_1) + else: + Np = K * (S_b/S_ref)**(gamma_2-gamma_1) * (S/S_ref)**(-gamma_2) + # + return Np + + def generate(self, n=1): + """ + Generate a sample of luminosity values within [min, max] from + the above luminosity distribution. + """ + results = [] + # Get the maximum value of the flux number density function, + # which is a monotonically decreasing. + M = self.fluxDensity(self.fmin) + for i in range(n): + while True: + u = np.random.uniform() * M + y = 10 ** np.random.uniform(low=np.log10(self.fmin), + high=np.log10(self.fmax)) + if u <= self.fluxDensity(y): + results.append(y) + break + return results + + +def main(): + parser = argparse.ArgumentParser( + description="Randomly generate point sources information for MARX") + parser.add_argument("-V", "--version", action="version", + version="%(prog)s " + "v%s (%s)" % (__version__, __date__)) + parser.add_argument("-n", "--number", dest="number", type=int, default=1, + help="number of point sources (default: 1)") + parser.add_argument("-m", "--fmin", dest="fmin", + type=float, default=1e-15, + help="minimum flux (default: 1e-15 erg/cm^2/s)") + parser.add_argument("-M", "--fmax", dest="fmax", + type=float, default=6000e-15, + help="maximum flux (default: 6000e-15 erg/cm^2/s)") + parser.add_argument("-r", "--region", dest="region", required=True, + help="region within which to generate coordinates ('box' only)") + parser.add_argument("-d", "--distance", dest="distance", default="0", + help="minimum distance between coordinates (default: 0) [unit: deg/arcmin]") + parser.add_argument("-u", "--unit", dest="unit", default="erg", + help="unit for input and output flux; 'erg' (default) / 'photon'") + parser.add_argument("-f", "--factor", dest="factor", type=float, + help="conversion factor from 'photon/cm^s/s' to 'erg/cm^2/s' (required if unit='photon')") + parser.add_argument("-o", "--outfile", dest="outfile", + help="output file to save the generate information list") + parser.add_argument("-O", "--outregion", dest="outregion", + help="write the generate information list as a DS9 region file") + + args = parser.parse_args() + + # Check flux unit + if args.unit == "erg": + unit = "erg/cm^2/s" + fmin = args.fmin + fmax = args.fmax + factor = 1.0 + elif args.unit == "photon": + unit = "photon/cm^2/s" + factor = args.factor + try: + fmin = args.fmin / factor + fmax = args.fmax / factor + except NameError: + raise ValueError("argument '--factor' required") + else: + raise ValueError("unsupported flux unit") + + region = RegionDS9() + region.parse(args.region) + # Check the box rotation + if not (abs(region.rotation) <= 1.0 or abs(region.rotation-360) <= 1.0): + raise NotImplementedError("rotated 'box' region not supported") + + # Minimum distance between generated coordinates + try: + mindist = float(args.distance) + except ValueError: + mindist = region.parse_dms(args.distance) + + randcoord = RandCoord(region.xc, region.yc, region.width, region.height, + mindist=mindist) + randflux = RandFlux(fmin, fmax) + coord = randcoord.generate(n=args.number) + flux = randflux.generate(n=args.number) + + if args.outfile: + outfile = open(args.outfile, "w") + else: + outfile = sys.stdout + + print("# region: %s" % args.region, file=outfile) + print("# mindist: %.9f [deg]" % mindist, file=outfile) + print("# f_min: %.9g; f_max: %.9g [%s]" % (fmin, fmax, unit), file=outfile) + print("# factor: %g [photon/cm^2/s] / [erg/cm^2/s]" % factor, file=outfile) + print("# R.A.[deg] Dec.[deg] Flux[%s]" % unit, file=outfile) + for ((ra, dec), f) in zip(coord, flux): + print("%.9f %.9f %.9g" % (ra, dec, f*factor), file=outfile) + + if args.outfile: + outfile.close() + + # Save the generated information as a DS9 region file if specified + if args.outregion: + reg_r = '3"' + reg_header = ["# Region file format: DS9 version 4.1", "fk5"] + regions = [ + "circle(%.9f,%.9f,%s) # text={%.9g}" % \ + (ra, dec, reg_r, f*factor) \ + for ((ra, dec), f) in zip(coord, flux) + ] + regfile = open(args.outregion, "w") + regfile.write("\n".join(reg_header + regions)) + regfile.close() + + +if __name__ == "__main__": + main() + diff --git a/astro/query_ned.py b/astro/query_ned.py new file mode 100755 index 0000000..959169f --- /dev/null +++ b/astro/query_ned.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# References: +# [1] astroquery: NedClass +# https://astroquery.readthedocs.org/en/latest/api/astroquery.ned.NedClass.html +# +# Change log: +# 2016-05-25: +# * Also output RA, DEC results +# * Update argument process +# * Simplify queried results process +# * Improve comments a bit +# * Some PEP8 fixes +# +# TODO: +# * allow to query by coordinates & radius range +# * filter queried results according to the type/other... +# * if not queried by name, then try query by coordinates +# + +""" +Query NED with the provided name or coordinate. +NASA/IPAC Extragalactic Database: http://ned.ipac.caltech.edu/ +""" + +import sys +import argparse +import csv +from collections import OrderedDict + +from astroquery.ned import Ned +from astroquery.exceptions import RemoteServiceError +# from astropy import coordinates +# import astropy.units as u + + +__version__ = "0.2.2" +__date__ = "2016-05-25" + + +# Ned configurations +Ned.TIMEOUT = 20 + + +def query_name(name, verbose=False): + """ + Query NED by source name. + """ + try: + q = Ned.query_object(name) + objname = q["Object Name"][0].decode("utf-8") + objtype = q["Type"][0].decode("utf-8") + ra = q["RA(deg)"][0] + dec = q["DEC(deg)"][0] + velocity = q["Velocity"][0] + z = q["Redshift"][0] + z_flag = q["Redshift Flag"][0].decode("utf-8") + refs = q["References"][0] + notes = q["Notes"][0] + if verbose: + print("%s: %s,%s,%s,%s,%s,%s,%s,%s,%s" % + (name, objname, objtype, ra, dec, velocity, z, z_flag, + refs, notes), + file=sys.stderr) + except RemoteServiceError as e: + objname = None + objtype = None + ra = None + dec = None + velocity = None + z = None + z_flag = None + refs = None + notes = None + if verbose: + print("*** %s: not found ***" % name, file=sys.stderr) + # + results = OrderedDict([ + ("Name", name), + ("NED_Name", objname), + ("Type", objtype), + ("RA", ra), + ("DEC", dec), + ("Velocity", velocity), + ("z", z), + ("z_Flag", z_flag), + ("References", refs), + ("Notes", notes), + ]) + return results + + +def main(): + parser = argparse.ArgumentParser( + description="Query NED database by source name") + parser.add_argument("-V", "--version", action="version", + version="%(prog)s " + "%s (%s)" % (__version__, + __date__)) + parser.add_argument("-v", "--verbose", dest="verbose", + action="store_true", + help="show verbose information") + parser.add_argument("-i", "--input", dest="input", required=True, + help="source names to be queried (sep by comma); " + + "or a file contains the names (one per line)") + parser.add_argument("-o", "--output", dest="output", default=sys.stdout, + help="output CSV file with queried data") + args = parser.parse_args() + + try: + names = map(str.strip, open(args.input).readlines()) + except FileNotFoundError: + names = map(str.strip, args.input.split(",")) + + results_list = [] + + for name in names: + qr = query_name(name, verbose=args.verbose) + results_list.append(qr) + + try: + of = open(args.output, "w") + except TypeError: + of = args.output + writer = csv.writer(of) + writer.writerow(results_list[0].keys()) + for res in results_list: + writer.writerow(res.values()) + if of is not sys.stdout: + of.close() + + +if __name__ == "__main__": + main() + + +# vim: set ts=4 sw=4 tw=0 fenc=utf-8 ft=python: # diff --git a/astro/query_simbad.py b/astro/query_simbad.py new file mode 100755 index 0000000..4e7ccd7 --- /dev/null +++ b/astro/query_simbad.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# NOTE: +# * SimbadClass +# https://astroquery.readthedocs.org/en/latest/api/astroquery.simbad.SimbadClass.html +# * All available VOTable fields: +# http://simbad.u-strasbg.fr/simbad/sim-help?Page=sim-fscript#VotableFields +# +# ChangeLog: +# 2016-01-14: +# * Add 'z_value' +# +# TODO: +# * allow to query by coordinates & radius range +# * filter queryed results according to the type/other... +# * if not queryed by name, then try query by coordinates +# + +""" +Query SIMBAD with the provided name or coordinate. +http://simbad.u-strasbg.fr/simbad/ +""" + +__version__ = "0.1.1" +__date__ = "2016-01-14" + + +import sys +import argparse +import csv + +from astroquery.simbad import Simbad +from astropy import coordinates +import astropy.units as u + + +## Simbad configurations +Simbad.ROW_LIMIT = 30 +Simbad.TIMEOUT = 20 + +## Add query items/fields +# otype: standard name of the object type +# rv_value: Radial velocity value. Eventually translated from a redshift +# z_value: Redshift value. Eventually translated from a radial velocity +# rvz_qual: Quality code (A: best, .., E: worst) +# rvz_type: stored type of velocity: 'v'=radial velocity, 'z'=redshift +Simbad.reset_votable_fields() +Simbad.add_votable_fields('otype', 'rv_value', 'z_value', 'rvz_qual', 'rvz_type') + + +def query_name(name, verbose=False): + """ + Query SIMBAD by name. + """ + q = Simbad.query_object(name) + try: + main_id = str(q['MAIN_ID'][0], encoding='utf-8') + otype = str(q['OTYPE'][0], encoding='utf-8') + rv = q['RV_VALUE'][0] + z = q['Z_VALUE'][0] + rvz_qual = q['RVZ_QUAL'][0] + rvz_type = q['RVZ_TYPE'][0] + if verbose: + print('%s: %s,%s,%s,%s,%s,%s' % (name, main_id, otype, rv, z, + rvz_qual, rvz_type)) + except (TypeError, KeyError) as e: + main_id = '' + otype = '' + rv = '' + z = '' + rvz_qual = '' + rvz_type = '' + if verbose: + print('*** %s: not found ***' % name, file=sys.stderr) + # + results = { + 'main_id': main_id, + 'otype': otype, + 'rv': rv, + 'z': z, + 'rvz_qual': rvz_qual, + 'rvz_type': rvz_type, + } + return results + + +def main(): + parser = argparse.ArgumentParser( + description="Query SIMBAD ...") + parser.add_argument("-V", "--version", action="version", + version="%(prog)s " + "%s (%s)" % (__version__, __date__)) + parser.add_argument("infile", + help="file contains list of names; one per line") + parser.add_argument("outfile", + help="output with queryed data, empty if not found; CSV format") + parser.add_argument("-v", "--verbose", dest="verbose", + action="store_true", help="show verbose information") + args = parser.parse_args() + + name_list = [] + main_id_list = [] + otype_list = [] + rv_list = [] + z_list = [] + rvz_qual_list = [] + rvz_type_list = [] + + with open(args.infile) as f: + for name in f: + name = str.strip(name) + name_list.append(name) + qr = query_name(name, verbose=args.verbose) + main_id_list.append(qr['main_id']) + otype_list.append(qr['otype']) + rv_list.append(qr['rv']) + z_list.append(qr['z']) + rvz_qual_list.append(qr['rvz_qual']) + rvz_type_list.append(qr['rvz_type']) + + with open(args.outfile, 'w') as of: + writer = csv.writer(of) + writer.writerow([ "Name", "SIMBAD_ID", "Type", + "RV", "z", "RV/z_Quality", "RV/z_Type" ]) + for i in range(len(name_list)): + writer.writerow([ name_list[i], + main_id_list[i], + otype_list[i], + rv_list[i], + z_list[i], + rvz_qual_list[i], + rvz_type_list[i] ]) + + +if __name__ == "__main__": + main() + + +# vim: set ts=4 sw=4 tw=0 fenc=utf-8 ft=python: # diff --git a/audio/ape2id3.py b/audio/ape2id3.py new file mode 100755 index 0000000..8651a2f --- /dev/null +++ b/audio/ape2id3.py @@ -0,0 +1,145 @@ +#! /usr/bin/env python +import sys +from optparse import OptionParser +import mutagen +from mutagen.apev2 import APEv2 +from mutagen.id3 import ID3, TXXX + +def convert_gain(gain): + if gain[-3:] == " dB": + gain = gain[:-3] + try: + gain = float(gain) + except ValueError: + raise ValueError, "invalid gain value" + return "%.2f dB" % gain +def convert_peak(peak): + try: + peak = float(peak) + except ValueError: + raise ValueError, "invalid peak value" + return "%.6f" % peak + +REPLAYGAIN_TAGS = ( + ("mp3gain_album_minmax", None), + ("mp3gain_minmax", None), + ("replaygain_album_gain", convert_gain), + ("replaygain_album_peak", convert_peak), + ("replaygain_track_gain", convert_gain), + ("replaygain_track_peak", convert_peak), +) + + +class Logger(object): + def __init__(self, log_level, prog_name): + self.log_level = log_level + self.prog_name = prog_name + self.filename = None + def prefix(self, msg): + if self.filename is None: + return msg + return "%s: %s" % (self.filename, msg) + def debug(self, msg): + if self.log_level >= 4: + print self.prefix(msg) + def info(self, msg): + if self.log_level >= 3: + print self.prefix(msg) + def warning(self, msg): + if self.log_level >= 2: + print self.prefix("WARNING: %s" % msg) + def error(self, msg): + if self.log_level >= 1: + sys.stderr.write("%s: %s\n" % (self.prog_name, msg)) + def critical(self, msg, retval=1): + self.error(msg) + sys.exit(retval) + +class Ape2Id3(object): + def __init__(self, logger, force=False): + self.log = logger + self.force = force + def convert_tag(self, name, value): + pass + def copy_replaygain_tag(self, apev2, id3, name, converter=None): + self.log.debug("processing '%s' tag" % name) + if name not in apev2: + self.log.info("no APEv2 '%s' tag found, skipping tag" % name) + return False + if not self.force and ("TXXX:%s" % name) in id3: + self.log.info("ID3 '%s' tag already exists, skpping tag" % name) + return False + value = str(apev2[name]) + if callable(converter): + self.log.debug("converting APEv2 '%s' tag from '%s'" % + (name, value)) + try: + value = converter(value) + except ValueError: + self.log.warning("invalid value for APEv2 '%s' tag" % name) + return False + self.log.debug("converted APEv2 '%s' tag to '%s'" % (name, value)) + id3.add(TXXX(encoding=1, desc=name, text=value)) + self.log.info("added ID3 '%s' tag with value '%s'" % (name, value)) + return True + def copy_replaygain_tags(self, filename): + self.log.filename = filename + self.log.debug("begin processing file") + try: + apev2 = APEv2(filename) + except mutagen.apev2.error: + self.log.info("no APEv2 tag found, skipping file") + return + except IOError: + e = sys.exc_info() + self.log.error("%s" % e[1]) + return + try: + id3 = ID3(filename) + except mutagen.id3.error: + self.log.info("no ID3 tag found, creating one") + id3 = ID3() + modified = False + for name, converter in REPLAYGAIN_TAGS: + copied = self.copy_replaygain_tag(apev2, id3, name, converter) + if copied: + modified = True + if modified: + self.log.debug("saving modified ID3 tag") + id3.save(filename) + self.log.debug("done processing file") + self.log.filename = None + +def main(prog_name, options, args): + logger = Logger(options.log_level, prog_name) + ape2id3 = Ape2Id3(logger, force=options.force) + for filename in args: + ape2id3.copy_replaygain_tags(filename) + +if __name__ == "__main__": + parser = OptionParser(version="0.1", usage="%prog [OPTION]... FILE...", + description="Copy APEv2 ReplayGain tags on " + "FILE(s) to ID3v2.") + parser.add_option("-q", "--quiet", dest="log_level", + action="store_const", const=0, default=1, + help="do not output error messages") + parser.add_option("-v", "--verbose", dest="log_level", + action="store_const", const=3, + help="output warnings and informational messages") + parser.add_option("-d", "--debug", dest="log_level", + action="store_const", const=4, + help="output debug messages") + parser.add_option("-f", "--force", dest="force", + action="store_true", default=False, + help="force overwriting of existing ID3v2 " + "ReplayGain tags") + prog_name = parser.get_prog_name() + options, args = parser.parse_args() + if len(args) < 1: + parser.error("no files specified") + try: + main(prog_name, options, args) + except KeyboardInterrupt: + pass + +# vim: set expandtab shiftwidth=4 softtabstop=4 textwidth=79: diff --git a/audio/m4a2mp3.sh b/audio/m4a2mp3.sh new file mode 100755 index 0000000..5d06cd9 --- /dev/null +++ b/audio/m4a2mp3.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +bitrate=192 + +for i in *.m4a; do + faad -o - "$i" | lame -h -b $bitrate - "${i%m4a}mp3" +done + diff --git a/audio/split2flac b/audio/split2flac new file mode 100755 index 0000000..6622262 --- /dev/null +++ b/audio/split2flac @@ -0,0 +1,752 @@ +#!/bin/sh +# Copyright (c) 2009-2015 Serge "ftrvxmtrx" Ziryukin +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +# Dependencies: +# shntool, cuetools +# SPLIT: flac, wavpack, mac +# CONVERT: flac/flake, faac, libmp4v2, id3lib/mutagen, lame, vorbis-tools +# ART: ImageMagick +# CHARSET: iconv, enca +# GAIN: flac, aacgain, mp3gain, vorbisgain + +# Exit codes: +# 0 - success +# 1 - error in arguments +# 2 - file or path is not accessible +# 3 - something has failed + +[ -n "${XDG_CONFIG_HOME}" ] && CONFIG="${XDG_CONFIG_HOME}/split2flac/split2flac.conf" +[ -r "${CONFIG}" ] || CONFIG="${HOME}/.split2flac" +TMPPIC="${HOME}/.split2flac_cover.jpg" +FAILED="split_failed.txt" + +NOSUBDIRS=0 +NOPIC=0 +REMOVE=0 +NOCOLORS=0 +PIC_SIZE="192x192" +REPLAY_GAIN=0 +FORMAT="${0##*split2}" +DIR="." +OUTPATTERN="@artist/{@year - }@album/@track - @title.@ext" +COPYMASKS="[Cc]overs \*.log \*.txt \*.jpg \*.cbr" +COPYFILES=1 +ENCA_ARGS="" + +# codecs default arguments +ENCARGS_flac="-8" +ENCARGS_m4a="-q 500" +ENCARGS_mp3="--preset standard" +ENCARGS_ogg="-q 5" +ENCARGS_wav="" + +# load settings +eval $(cat "${CONFIG}" 2>/dev/null) +DRY=0 +SAVE=0 +NASK=0 +unset PIC INPATH CUE CHARSET +FORCE=0 + +# do not forget to update before commit +VERSION=121 + +HELP="\${cG}split2flac version: ${VERSION} +Splits one big \${cU}APE/FLAC/WV/WAV\$cZ\$cG audio image (or a collection) into \${cU}FLAC/M4A/MP3/OGG_VORBIS/WAV\$cZ\$cG tracks with tagging and renaming. + +Usage: \${cZ}split2\${FORMAT} [\${cU}OPTIONS\$cZ] \${cU}FILE\$cZ [\${cU}OPTIONS\$cZ]\$cZ + \${cZ}split2\${FORMAT} [\${cU}OPTIONS\$cZ] \${cU}DIR\$cZ [\${cU}OPTIONS\$cZ]\$cZ + \$cG-p\$cZ - dry run + \$cG-o \${cU}DIRECTORY\$cZ \$cR*\$cZ - set output directory (current is \$cP\${DIR}\$cZ) + \$cG-of \${cU}'PATTERN'\$cZ \$cR*\$cZ - use specific output naming pattern (current is \$cP'\${OUTPATTERN}'\$cZ) + \$cG-cue \${cU}FILE\$cZ - use file as a cue sheet (does not work with \${cU}DIR\$cZ) + \$cG-cuecharset \${cU}CHARSET\$cZ - convert cue sheet from CHARSET to UTF-8 (no conversion by default) + \$cG-nask\$cZ - do not ask to enter proper charset of a cue sheet (default is to ask) + \$cG-f \${cU}FORMAT\$cZ - use specific output format (current is \$cP\${FORMAT}\$cZ) + \$cG-e \${cU}'ARG1 ARG2'\$cZ \$cR*\$cZ - encoder arguments (current is \$cP'\${ENCARGS}'\$cZ) + \$cG-eh\$cZ - show help for current encoder and exit\$cZ + \$cG-enca \${cU}'ARG1 ARG2'\$cZ \$cR*\$cZ - enca additional arguments (current is \$cP'\${ENCA_ARGS}'\$cZ) + \$cG-c \${cU}FILE\$cZ \$cR*\$cZ - use file as a cover image (does not work with \${cU}DIR\$cZ) + \$cG-nc \${cR}*\$cZ - do not set any cover images + \$cG-C \${cU}MASKS\$cZ \$cR*\$cZ - specify wildcards for files to copy over (current is \$cP'\${COPYMASKS}'\$cZ) + \$cG-nC \${cR}*\$cZ - do not copy any files + \$cG-cs \${cU}WxH\$cZ \$cR*\$cZ - set cover image size (current is \$cP\${PIC_SIZE}\$cZ) + \$cG-d \$cR*\$cZ - create artist/album subdirs (default) + \$cG-nd \$cR*\$cZ - do not create any subdirs + \$cG-D \$cR*\$cZ - delete original file + \$cG-nD \$cR*\$cZ - do not remove the original (default) + \$cG-F\$cZ - force deletion without asking + \$cG-colors\$cZ \$cR*\$cZ - colorized output (default) + \$cG-nocolors\$cZ \$cR*\$cZ - turn off colors + \$cG-g\$cZ \$cR*\$cZ - adjust audio gain + \$cG-ng\$cZ \$cR*\$cZ - do not adjust audio gain (default) + \$cG-s\$cZ - save configuration to \$cP\"\${CONFIG}\"\$cZ + \$cG-h\$cZ - print this message + \$cG-v\$cZ - print version + +\$cR*\$cZ - option affects configuration if \$cP'-s'\$cZ option passed. +\${cP}NOTE: \$cG'-c some_file.jpg -s'\$cP only \${cU}allows\$cZ\$cP cover images, it doesn't set a default one. +\${cZ}Supported \$cU\${cG}FORMATs\${cZ}: flac, m4a, mp3, ogg, wav. +Supported tags for \$cU\${cG}PATTERN\${cZ}: @artist, @album, @year, @track, @performer, @title, @genre, @ext. +@performer tag is useful with 'various artists' albums, when you want to add +each artist's name to the track filename. It works as @artist if track performer is undefined. +Special \"underscored\" tags are also supported (@_artist, @_album, etc). If used, spaces will be replaced with +underscores. It's useful if you want to have filenames without spaces. + +It's better to pass \$cP'-p'\$cZ option to see what will happen when actually splitting tracks. +You may want to pass \$cP'-s'\$cZ option for the first run to save default configuration +(output dir, cover image size, etc.) so you won't need to pass a lot of options +every time, just a filename. Script will try to find CUE sheet if it wasn't specified. +It also supports internal CUE sheets (FLAC, APE and WV).\n" + +msg="printf" + +emsg () { + $msg "${cR}$1${cZ}" +} + +SKIP_UPDATE_ENCARGS=0 + +update_encargs () { + if [ ${SKIP_UPDATE_ENCARGS} -eq 0 ]; then + e="\${ENCARGS_${FORMAT}}" + ENCARGS=`eval echo "$e"` + ENCHELP=0 + fi +} + +update_colors () { + if [ "${NOCOLORS}" -eq 0 ]; then + cR="\033[31m" + cG="\033[32m" + cC="\033[35m" + cP="\033[36m" + cU="\033[4m" + cZ="\033[0m" + else + unset cR cG cC cP cU cZ + fi +} + +update_encargs +update_colors + +# parse arguments +while [ "$1" ]; do + case "$1" in + -o) DIR=$2; shift;; + -of) OUTPATTERN=$2; shift;; + -cue) CUE=$2; shift;; + -cuecharset) CHARSET=$2; shift;; + -nask) NASK=1;; + -f) FORMAT=$2; update_encargs; shift;; + -e) ENCARGS=$2; SKIP_UPDATE_ENCARGS=1; shift;; + -eh) ENCHELP=1;; + -enca) ENCA_ARGS=$2; shift;; + -c) NOPIC=0; PIC=$2; shift;; + -nc) NOPIC=1;; + -C) COPYMASKS=$2; COPYFILES=1; shift;; + -nC) COPYFILES=0;; + -cs) PIC_SIZE=$2; shift;; + -d) NOSUBDIRS=0;; + -nd) NOSUBDIRS=1;; + -p) DRY=1;; + -D) REMOVE=1;; + -nD) REMOVE=0;; + -F) FORCE=1;; + -colors) NOCOLORS=0; update_colors;; + -nocolors) NOCOLORS=1; update_colors;; + -g) REPLAY_GAIN=1;; + -ng) REPLAY_GAIN=0;; + -s) SAVE=1;; + -h|--help|-help) eval "$msg \"${HELP}\""; exit 0;; + -v|--version) + $msg "split2${FORMAT} version: ${VERSION}\n\n"; + shntool -v 2>&1 | grep '^shntool'; + flac --version 2>/dev/null; + wavpack --help 2>&1 | grep 'Version'; + mac 2>&1 | grep '(v '; + faac -h 2>&1 | grep '^FAAC'; + oggenc --version 2>/dev/null; + lame --version | grep '^LAME'; + exit 0;; + -*) eval "$msg \"${HELP}\""; emsg "\nUnknown option $1\n"; exit 1;; + *) + if [ -n "${INPATH}" ]; then + eval "$msg \"${HELP}\"" + emsg "\nUnknown option $1\n" + exit 1 + elif [ ! -r "$1" ]; then + emsg "Unable to read $1\n" + exit 2 + else + INPATH="$1" + fi;; + esac + shift +done + +eval "export ENCARGS_${FORMAT}=\"${ENCARGS}\"" + +# save configuration if needed +if [ ${SAVE} -eq 1 ]; then + echo "DIR=\"${DIR}\"" > "${CONFIG}" + echo "OUTPATTERN=\"${OUTPATTERN}\"" >> "${CONFIG}" + echo "COPYMASKS=\"${COPYMASKS}\"" >> "${CONFIG}" + echo "COPYFILES=${COPYFILES}" >> "${CONFIG}" + echo "NOSUBDIRS=${NOSUBDIRS}" >> "${CONFIG}" + echo "NOPIC=${NOPIC}" >> "${CONFIG}" + echo "REMOVE=${REMOVE}" >> "${CONFIG}" + echo "PIC_SIZE=${PIC_SIZE}" >> "${CONFIG}" + echo "NOCOLORS=${NOCOLORS}" >> "${CONFIG}" + echo "REPLAY_GAIN=${REPLAY_GAIN}" >> "${CONFIG}" + echo "ENCARGS_flac=\"${ENCARGS_flac}\"" >> "${CONFIG}" + echo "ENCARGS_m4a=\"${ENCARGS_m4a}\"" >> "${CONFIG}" + echo "ENCARGS_mp3=\"${ENCARGS_mp3}\"" >> "${CONFIG}" + echo "ENCARGS_ogg=\"${ENCARGS_ogg}\"" >> "${CONFIG}" + echo "ENCARGS_wav=\"${ENCARGS_wav}\"" >> "${CONFIG}" + echo "ENCA_ARGS=\"${ENCA_ARGS}\"" >> "${CONFIG}" + $msg "${cP}Configuration saved$cZ\n" +fi + +# use flake if possible +command -v flake >/dev/null && FLAC_ENCODER="flake" || FLAC_ENCODER="flac" + +METAFLAC="metaflac --no-utf8-convert" +VORBISCOMMENT="vorbiscomment -R -a" +command -v mid3v2 >/dev/null && ID3TAG="mid3v2" || ID3TAG="id3tag -2" +MP4TAGS="mp4tags" +GETTAG="cueprint -n 1 -t" +VALIDATE="sed s/[^][[:space:][:alnum:]&_#,.'\"\(\)!-]//g" + +# check & print output format +msg_format="${cG}Output format :$cZ" +case ${FORMAT} in + flac) $msg "$msg_format FLAC [using ${FLAC_ENCODER} tool]"; enc_help="${FLAC_ENCODER} -h";; + m4a) $msg "$msg_format M4A"; enc_help="faac --help";; + mp3) $msg "$msg_format MP3"; enc_help="lame --help";; + ogg) $msg "$msg_format OGG VORBIS"; enc_help="oggenc --help";; + wav) $msg "$msg_format WAVE"; enc_help="echo Sorry, no arguments available for this encoder";; + *) emsg "Unknown output format \"${FORMAT}\"\n"; exit 1;; +esac + +$msg " (${ENCARGS})\n" + +if [ ${ENCHELP} -eq 1 ]; then + ${enc_help} + exit 0 +fi + +$msg "${cG}Output dir :$cZ ${DIR:?Output directory was not set}\n" + +# replaces a tag name with the value of the tag. $1=pattern $2=tag_name $3=tag_value +update_pattern_aux () { + tag_name="$2" + tag_value="$3" + expr_match="@${tag_name}" + expr_match_opt="[{]\([^}{]*\)${expr_match}\([^}]*\)[}]" + + echo "$1" | { [ "${tag_value}" ] \ + && sed "s/${expr_match_opt}/\1${tag_value}\2/g;s/${expr_match}/${tag_value}/g" \ + || sed "s/${expr_match_opt}//g;s/${expr_match}//g"; } +} + +# replaces a tag name with the value of the tag. $1=pattern $2=tag_name $3=tag_value +update_pattern () { + # replace '/' with '\' and '&' with '\&' for proper sed call + tag_name=$(echo "$2" | sed 's,/,\\\\,g;s,&,\\&,g') + tag_value=$(echo "$3" | sed 's,/,\\\\,g;s,&,\\&,g') + + v=$(update_pattern_aux "$1" "${tag_name}" "${tag_value}") + update_pattern_aux "$v" "_${tag_name}" $(echo "${tag_value}" | sed "s/ /_/g") +} + +# splits a file +split_file () { + TMPCUE="${HOME}/.split2flac_XXXXX.cue" + FILE="$1" + + if [ ! -r "${FILE}" ]; then + emsg "Can not read the file\n" + return 1 + fi + + # search for a cue sheet if not specified + if [ -z "${CUE}" ]; then + CUE="${FILE}.cue" + if [ ! -r "${CUE}" ]; then + CUE="${FILE%.*}.cue" + if [ ! -r "${CUE}" ]; then + # try to extract internal one + CUESHEET=$(${METAFLAC} --show-tag=CUESHEET "${FILE}" 2>/dev/null | sed 's/^cuesheet=//;s/^CUESHEET=//') + + # try WV internal cue sheet + [ -z "${CUESHEET}" ] && CUESHEET=$(wvunpack -q -c "${FILE}" 2>/dev/null) + + # try APE internal cue sheet (omfg!) + if [ -z "${CUESHEET}" ]; then + APETAGEX=$(tail -c 32 "$1" | cut -b 1-8 2>/dev/null) + if [ "${APETAGEX}" = "APETAGEX" ]; then + LENGTH=$(tail -c 32 "$1" | cut -b 13-16 | od -t u4 | awk '{printf $2}') 2>/dev/null + tail -c ${LENGTH} "$1" | grep -a CUESHEET >/dev/null 2>&1 + if [ $? -eq 0 ]; then + CUESHEET=$(tail -c ${LENGTH} "$1" | sed 's/.*CUESHEET.//g' 2>/dev/null) + [ $? -ne 0 ] && CUESHEET="" + fi + fi + fi + + if [ -n "${CUESHEET}" ]; then + $msg "${cP}Found internal cue sheet$cZ\n" + TMPCUE=$(mktemp "${TMPCUE}") + CUE="${TMPCUE}" + echo "${CUESHEET}" > "${CUE}" + TMPCUE="${HOME}/.split2flac_XXXXX.cue" + + if [ $? -ne 0 ]; then + emsg "Unable to save internal cue sheet\n" + return 1 + fi + else + unset CUE + fi + fi + fi + fi + + # print cue sheet filename + if [ -z "${CUE}" ]; then + emsg "No cue sheet\n" + return 1 + fi + + # cue sheet charset + [ -z "${CHARSET}" ] && CHARSET="utf-8" || $msg "${cG}Cue charset : $cP${CHARSET} -> utf-8$cZ\n" + + CUESHEET=$(iconv -f "${CHARSET}" -t utf-8 "${CUE}" 2>/dev/null) + + # try to guess the charset using enca + if [ $? -ne 0 ]; then + CUESHEET=$(enconv ${ENCA_ARGS} -x utf8 < "${CUE}" 2>/dev/null) + fi + + if [ $? -ne 0 ]; then + [ "${CHARSET}" = "utf-8" ] \ + && emsg "Cue sheet is not utf-8\n" \ + || emsg "Unable to convert cue sheet from ${CHARSET} to utf-8\n" + + if [ ${NASK} -eq 0 ]; then + while [ 1 ]; do + echo -n "Please enter the charset (or just press ENTER to ignore) > " + read CHARSET + + [ -z "${CHARSET}" ] && break + $msg "${cG}Converted cue sheet:$cZ\n" + iconv -f "${CHARSET}" -t utf-8 "${CUE}" || continue + + echo -n "Is this right? [Y/n] > " + read YEP + [ -z "${YEP}" -o "${YEP}" = "y" -o "${YEP}" = "Y" ] && break + done + + CUESHEET=$(iconv -f "${CHARSET}" -t utf-8 "${CUE}" 2>/dev/null) + fi + fi + + # save converted cue sheet + TMPCUE=$(mktemp "${TMPCUE}") + CUE="${TMPCUE}" + echo "${CUESHEET}" > "${CUE}" + + if [ $? -ne 0 ]; then + emsg "Unable to save converted cue sheet\n" + return 1 + fi + + SDIR=$(dirname "${FILE}") + + # search for a front cover image + if [ ${NOPIC} -eq 1 ]; then + unset PIC + elif [ -z "${PIC}" ]; then + # try common names + for i in *[Cc]over*.jpg *[Ff]older*.jpg */*[Cc]over*.jpg */*[Ff]older*.jpg; do + if [ -r "${SDIR}/$i" ]; then + PIC="${SDIR}/$i" + break + fi + done + + # try to extract internal one + if [ -z "${PIC}" ]; then + ${METAFLAC} --export-picture-to="${TMPPIC}" "${FILE}" 2>/dev/null + if [ $? -ne 0 ]; then + unset PIC + else + PIC="${TMPPIC}" + fi + fi + fi + + $msg "${cG}Cue sheet :$cZ ${CUE}\n" + $msg "${cG}Cover image :$cZ ${PIC:-not set}\n" + + # file removal warning + if [ ${REMOVE} -eq 1 ]; then + msg_removal="\n${cR}Also remove original" + [ ${FORCE} -eq 1 ] \ + && $msg "$msg_removal (WITHOUT ASKING)$cZ\n" \ + || $msg "$msg_removal if user says 'y'$cZ\n" + fi + + # files to copy over + if [ ${COPYFILES} -eq 1 -a -n "${COPYMASKS}" ]; then + $msg "${cG}Copy over :$cZ ${COPYMASKS}\n" + fi + + # get common tags + TAG_ARTIST=$(${GETTAG} %P "${CUE}" 2>/dev/null) + TAG_ALBUM=$(${GETTAG} %T "${CUE}" 2>/dev/null) + TRACKS_NUM=$(${GETTAG} %N "${CUE}" 2>/dev/null) + + # some cue sheets may have non-audio tracks + # we can check the difference between what cuebreakpoints and cueprint gives us + BREAKPOINTS_NUM=$(($(cuebreakpoints "${CUE}" 2>/dev/null | wc -l) + 1)) + + # too bad, we can't fix that in a _right_ way + if [ ${BREAKPOINTS_NUM} -lt ${TRACKS_NUM} ]; then + emsg "'cueprint' tool reported ${TRACKS_NUM} tracks, " + emsg "but there seem to be only ${BREAKPOINTS_NUM} audio ones\n" + emsg "Sorry, there is no any helpful options in the 'cueprint' tool for this problem.\n" + emsg "You probably remove non-audio tracks from the cue sheet (\"${CUE}\") by hand.\n" + return 1 + fi + + if [ -z "${TRACKS_NUM}" ]; then + emsg "Failed to get number of tracks from CUE sheet.\n" + emsg "There may be an error in the sheet.\n" + emsg "Running ${GETTAG} %N \"${CUE}\" produces this:\n" + ${GETTAG} %N "${CUE}" + return 1 + fi + + TAG_GENRE=$(grep 'REM[ \t]\+GENRE[ \t]\+' "${CUE}" | head -1 | sed 's/REM[ \t]\+GENRE[ \t]\+//;s/^"\(.*\)"$/\1/') + + YEAR=$(awk '{ if (/REM[ \t]+DATE/) { printf "%i", $3; exit } }' < "${CUE}") + YEAR=$(echo ${YEAR} | tr -d -c '[:digit:]') + + unset TAG_DATE + + if [ -n "${YEAR}" ]; then + [ ${YEAR} -ne 0 ] && TAG_DATE="${YEAR}" + fi + + $msg "\n${cG}Artist :$cZ ${TAG_ARTIST}\n" + $msg "${cG}Album :$cZ ${TAG_ALBUM}\n" + [ "${TAG_GENRE}" ] && $msg "${cG}Genre :$cZ ${TAG_GENRE}\n" + [ "${TAG_DATE}" ] && $msg "${cG}Year :$cZ ${TAG_DATE}\n" + $msg "${cG}Tracks :$cZ ${TRACKS_NUM}\n\n" + + # those tags won't change, so update the pattern now + DIR_ARTIST=$(echo "${TAG_ARTIST}" | ${VALIDATE}) + DIR_ALBUM=$(echo "${TAG_ALBUM}" | ${VALIDATE}) + PATTERN=$(update_pattern "${OUTPATTERN}" "artist" "${DIR_ARTIST}") + PATTERN=$(update_pattern "${PATTERN}" "album" "${DIR_ALBUM}") + PATTERN=$(update_pattern "${PATTERN}" "genre" "${TAG_GENRE}") + PATTERN=$(update_pattern "${PATTERN}" "year" "${TAG_DATE}") + PATTERN=$(update_pattern "${PATTERN}" "ext" "${FORMAT}") + + # construct output directory name + OUT="${DIR}" + + if [ ${NOSUBDIRS} -eq 0 ]; then + # add path from the pattern + path=$(dirname "${PATTERN}") + [ "${path}" != "${PATTERN}" ] && OUT="${OUT}/${path}" + fi + + # shnsplit is retarded enough to break on double slash + OUT=$(echo "${OUT}" | sed s,/[/]*,/,g) + + # remove path from the pattern + PATTERN=$(basename "${PATTERN}") + + $msg "${cP}Saving tracks to $cZ\"${OUT}\"\n" + + # split to tracks + if [ ${DRY} -ne 1 ]; then + # remove if empty and create output dir + if [ ${NOSUBDIRS} -eq 0 ]; then + rmdir "${OUT}" 2>/dev/null + mkdir -p "${OUT}" + [ $? -ne 0 ] && { emsg "Failed to create output directory ${OUT} (already split?)\n"; return 1; } + fi + + case ${FORMAT} in + flac) ENC="flac ${FLAC_ENCODER} ${ENCARGS} - -o %f"; RG="metaflac --add-replay-gain";; + m4a) ENC="cust ext=m4a faac ${ENCARGS} -o %f -"; RG="aacgain";; + mp3) ENC="cust ext=mp3 lame ${ENCARGS} - %f"; RG="mp3gain";; + ogg) ENC="cust ext=ogg oggenc ${ENCARGS} - -o %f"; RG="vorbisgain -a";; + wav) ENC="wav ${ENCARGS}"; REPLAY_GAIN=0;; + *) emsg "Unknown output format ${FORMAT}\n"; exit 1;; + esac + + # split to tracks + # sed expression is a fix for "shnsplit: error: m:ss.ff format can only be used with CD-quality files" + cuebreakpoints "${CUE}" 2>/dev/null | \ + sed 's/$/0/' | \ + shnsplit -O never -o "${ENC}" -d "${OUT}" -t "%n" "${FILE}" + if [ $? -ne 0 ]; then + emsg "Failed to split\n" + return 1 + fi + + # prepare cover image + if [ -n "${PIC}" ]; then + convert "${PIC}" -resize "${PIC_SIZE}" "${TMPPIC}" + if [ $? -eq 0 ]; then + PIC="${TMPPIC}" + else + $msg "${cR}Failed to convert cover image$cZ\n" + unset PIC + fi + fi + fi + + # set tags and rename + $msg "\n${cP}Setting tags$cZ\n" + + i=1 + while [ $i -le ${TRACKS_NUM} ]; do + TAG_TITLE=$(cueprint -n $i -t %t "${CUE}" 2>/dev/null) + FILE_TRACK="$(printf %02i $i)" + FILE_TITLE=$(echo "${TAG_TITLE}" | ${VALIDATE}) + f="${OUT}/${FILE_TRACK}.${FORMAT}" + + TAG_PERFORMER=$(cueprint -n $i -t %p "${CUE}" 2>/dev/null) + + if [ -n "${TAG_PERFORMER}" -a "${TAG_PERFORMER}" != "${TAG_ARTIST}" ]; then + $msg "$i: $cG${TAG_PERFORMER} - ${TAG_TITLE}$cZ\n" + else + TAG_PERFORMER="${TAG_ARTIST}" + $msg "$i: $cG${TAG_TITLE}$cZ\n" + fi + + FINAL=$(update_pattern "${OUT}/${PATTERN}" "title" "${FILE_TITLE}") + FINAL=$(update_pattern "${FINAL}" "performer" "${TAG_PERFORMER}") + FINAL=$(update_pattern "${FINAL}" "track" "${FILE_TRACK}") + + if [ ${DRY} -ne 1 -a "$f" != "${FINAL}" ]; then + mv "$f" "${FINAL}" + if [ $? -ne 0 ]; then + emsg "Failed to rename track file\n" + return 1 + fi + fi + + if [ ${DRY} -ne 1 ]; then + case ${FORMAT} in + flac) + ${METAFLAC} --remove-all-tags \ + --set-tag="ARTIST=${TAG_PERFORMER}" \ + --set-tag="ALBUM=${TAG_ALBUM}" \ + --set-tag="TITLE=${TAG_TITLE}" \ + --set-tag="TRACKNUMBER=$i" \ + --set-tag="TRACKTOTAL=${TRACKS_NUM}" \ + "${FINAL}" >/dev/null + RES=$? + + [ "${TAG_GENRE}" ] && { ${METAFLAC} --set-tag="GENRE=${TAG_GENRE}" "${FINAL}" >/dev/null; RES=$RES$?; } + [ "${TAG_DATE}" ] && { ${METAFLAC} --set-tag="DATE=${TAG_DATE}" "${FINAL}" >/dev/null; RES=$RES$?; } + [ "${PIC}" ] && { ${METAFLAC} --import-picture-from="${PIC}" "${FINAL}" >/dev/null; RES=$RES$?; } + ;; + + mp3) + ${ID3TAG} --artist="${TAG_PERFORMER}" \ + --album="${TAG_ALBUM}" \ + --song="${TAG_TITLE}" \ + --track="$i" \ + "${FINAL}" >/dev/null + RES=$? + + [ "${TAG_GENRE}" ] && { ${ID3TAG} --genre="${TAG_GENRE}" "${FINAL}" >/dev/null; RES=$RES$?; } + [ "${TAG_DATE}" ] && { ${ID3TAG} --year="${TAG_DATE}" "${FINAL}" >/dev/null; RES=$RES$?; } + ;; + + ogg) + ${VORBISCOMMENT} "${FINAL}" \ + -t "ARTIST=${TAG_PERFORMER}" \ + -t "ALBUM=${TAG_ALBUM}" \ + -t "TITLE=${TAG_TITLE}" \ + -t "TRACKNUMBER=$i" \ + -t "TRACKTOTAL=${TRACKS_NUM}" >/dev/null + RES=$? + + [ "${TAG_GENRE}" ] && { ${VORBISCOMMENT} "${FINAL}" -t "GENRE=${TAG_GENRE}" >/dev/null; RES=$RES$?; } + [ "${TAG_DATE}" ] && { ${VORBISCOMMENT} "${FINAL}" -t "DATE=${TAG_DATE}" >/dev/null; RES=$RES$?; } + ;; + + m4a) + ${MP4TAGS} "${FINAL}" \ + -a "${TAG_PERFORMER}" \ + -A "${TAG_ALBUM}" \ + -s "${TAG_TITLE}" \ + -t "$i" \ + -T "${TRACKS_NUM}" >/dev/null + RES=$? + + [ "${TAG_GENRE}" ] && { ${MP4TAGS} "${FINAL}" -g "${TAG_GENRE}" >/dev/null; RES=$RES$?; } + [ "${TAG_DATE}" ] && { ${MP4TAGS} "${FINAL}" -y "${TAG_DATE}" >/dev/null; RES=$RES$?; } + [ "${PIC}" ] && { ${MP4TAGS} "${FINAL}" -P "${PIC}" >/dev/null; RES=$RES$?; } + ;; + + wav) + RES=0 + ;; + + *) + emsg "Unknown output format ${FORMAT}\n" + return 1 + ;; + esac + + if [ ${RES} -ne 0 ]; then + emsg "Failed to set tags for track\n" + return 1 + fi + fi + + $msg " -> ${cP}${FINAL}$cZ\n" + + i=$(($i + 1)) + done + + # adjust gain + if [ ${REPLAY_GAIN} -ne 0 ]; then + $msg "\n${cP}Adjusting gain$cZ\n" + + if [ ${DRY} -ne 1 ]; then + ${RG} "${OUT}/"*.${FORMAT} >/dev/null + + if [ $? -ne 0 ]; then + emsg "Failed to adjust gain for track\n" + return 1 + fi + fi + fi + + # copy files + if [ ${COPYFILES} -eq 1 -a "${COPYMASKS}" ]; then + old=`pwd` + cd "${SDIR}" + $msg "\n${cG}Copying files:$cZ\n" + eval "for i in ${COPYMASKS}; do \ + test -r \"\$i\" && \ + echo \" +> \$i\" 2>/dev/null; done" + cd "${old}" + if [ ${DRY} -ne 1 ]; then + eval "for i in ${COPYMASKS}; do \ + test -r/\"${SDIR}/\$i\" && \ + cp -r \"${SDIR}/\$i\" \"\${OUT}/\"; done" + fi + fi + + rm -f "${TMPPIC}" + rm -f "${TMPCUE}" + + if [ ${DRY} -ne 1 -a ${REMOVE} -eq 1 ]; then + YEP="n" + + if [ ${FORCE} -ne 1 ]; then + echo -n "Are you sure you want to delete original? [y/N] > " + read YEP + fi + + [ "${YEP}" = "y" -o "${YEP}" = "Y" -o ${FORCE} -eq 1 ] && rm -f "${FILE}" + fi + + return 0 +} + +# searches for files in a directory and splits them +split_collection () { + rm -f "${FAILED}" + NUM_FAILED=0 + OLDIFS=${IFS} + OLDCHARSET="${CHARSET}" + # set IFS to newline. we do not use 'read' here because we may want to ask user for input + IFS=" +" + + for FILE in `find "$1" -iname '*.flac' -o -iname '*.ape' -o -iname '*.tta' -o -iname '*.wv' -o -iname '*.wav'`; do + IFS=${OLDIFS} + CHARSET=${OLDCHARSET} + $msg "$cG>> $cC\"${FILE}\"$cZ\n" + unset PIC CUE + split_file "${FILE}" + + if [ ! $? -eq 0 ]; then + emsg "Failed to split \"${FILE}\"\n" + echo "${FILE}" >> "${FAILED}" + NUM_FAILED=$((${NUM_FAILED} + 1)) + fi + + echo + done + + if [ ${NUM_FAILED} -ne 0 ]; then + emsg "${NUM_FAILED} file(s) failed to split (already split?):\n" + $msg "${cR}\n" + sort "${FAILED}" -o "${FAILED}" + cat "${FAILED}" + emsg "\nThese files are also listed in ${FAILED}.\n" + return 1 + fi + + return 0 +} + +if [ -d "${INPATH}" ]; then + if [ ! -x "${INPATH}" ]; then + emsg "Directory \"${INPATH}\" is not accessible\n" + exit 2 + fi + $msg "${cG}Input dir :$cZ ${INPATH}$cZ\n\n" + split_collection "${INPATH}" +elif [ -n "${INPATH}" ]; then + split_file "${INPATH}" +else + emsg "No input filename given. Use -h for help.\n" + exit 1 +fi + +# exit code of split_collection or split_file +STATUS=$? + +$msg "\n${cP}Finished$cZ\n" + +[ ${STATUS} -ne 0 ] && exit 3 || exit 0 + +### Local Variables: *** +### mode:sh *** +### tab-width:4 *** +### End: *** diff --git a/audio/wma2mp3.sh b/audio/wma2mp3.sh new file mode 100755 index 0000000..db51e56 --- /dev/null +++ b/audio/wma2mp3.sh @@ -0,0 +1,20 @@ +#!/bin/sh +# convert *.wma to *.mp3 + +current_directory=$( pwd ) + +#remove spaces +for i in *.wma; do mv "$i" `echo $i | tr ' ' '_'`; done + +#remove uppercase +for i in *.[Ww][Mm][Aa]; do mv "$i" `echo $i | tr '[A-Z]' '[a-z]'`; done + +#Rip with Mplayer / encode with LAME +for i in *.wma ; do mplayer -vo null -vc dummy -af resample=44100 -ao pcm + -waveheader $i && lame -m s audiodump.wav -o $i; done + +#convert file names +for i in *.wma; do mv "$i" "`basename "$i" .wma`.mp3"; done + +rm audiodump.wav + diff --git a/bin/gen_points.py b/bin/gen_points.py new file mode 100755 index 0000000..57733dc --- /dev/null +++ b/bin/gen_points.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Aaron LI +# 2015/06/19 + +""" +Generate the required number of random points within the required region. +""" + +__version__ = "0.1.0" +__date__ = "2015/06/19" +DEBUG = True + +import sys +import argparse +import random +import time +import re + +from rand.sphere import sphere_point +from region.region import Region + +random.seed(time.time()) + + +def parse_region(regstring): + reg_par = re.sub(r"[(),]", " ", regstring).split() + regtype = reg_par[0].lower() + if regtype == "box": + xc = float(reg_par[1]) + yc = float(reg_par[2]) + width = parse_reg_value(reg_par[3]) + height = parse_reg_value(reg_par[4]) + rotation = float(reg_par[5]) + reg = Region(regtype, xc=xc, yc=yc, + width=width, height=height, rotation=rotation) + else: + raise ValueError("region type '%s' currently not implemented" % regtype) + return reg + + +def parse_reg_value(valstring): + if valstring[-1] == '"': + # arcsec -> deg + value = float(valstring.split('"')[0]) / 60.0 / 60.0 + elif valstring[-1] == "'": + # arcmin -> deg + value = float(valstring.split("'")[0]) / 60.0 + else: + value = float(valstring) + return value + + +def main(): + parser = argparse.ArgumentParser( + description="Generate random point within the given region.") + parser.add_argument("-V", "--version", action="version", + version="%(prog)s " + "%s (%s)" % (__version__, __date__)) + parser.add_argument("-n", "--number", dest="number", + type=int, default=1, + help="number of points to be generated") + parser.add_argument("-r", "--region", dest="region", required=True, + help="DS9 region") + args = parser.parse_args() + + reg = parse_region(args.region) + if DEBUG: + print("DEBUG: region: ", reg.dump(), file=sys.stderr) + + points = [] + while len(points) < args.number: + p = sphere_point(unit="deg") + if reg.is_inside(p): + points.append(p) + print("%s %s" % p) + + +if __name__ == "__main__": + main() + diff --git a/bin/img2list.py b/bin/img2list.py new file mode 100644 index 0000000..48d0de4 --- /dev/null +++ b/bin/img2list.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Aaron LI +# 2015/06/23 +# + + +import numpy as np +from astropy.io import fits + + +def img2list(imgdata, mask=None): + """ + Convert a image matrix to list of point coordinates. + The input image matrix is taken as an integer matrix. + If one pixel has value n (>1), then it is repeated n times. + """ + img = imgdata.astype(int) + points = [] + ii, jj = np.nonzero(img >= 1) + while len(ii) > 0: + for i, j in zip(ii, jj): + points.append([i, j]) + img[ii, jj] -= 1 + ii, jj = np.nonzero(img >= 1) + return np.array(points) + diff --git a/cli/colors.sh b/cli/colors.sh new file mode 100755 index 0000000..a28c261 --- /dev/null +++ b/cli/colors.sh @@ -0,0 +1,37 @@ +#!/bin/sh +# https://gist.github.com/esundahl/1651086 + +function color_test { + # Daniel Crisman's ANSI color chart script from + # The Bash Prompt HOWTO: 6.1. Colours + # http://www.tldp.org/HOWTO/Bash-Prompt-HOWTO/x329.html + # + # This function echoes a bunch of color codes to the + # terminal to demonstrate what's available. Each + # line is the color code of one forground color, + # out of 17 (default + 16 escapes), followed by a + # test use of that color on all nine background + # colors (default + 8 escapes). + # + + T='gYw' # The test text + + echo -e "\n 40m 41m 42m 43m\ + 44m 45m 46m 47m" + + for FGs in ' m' ' 1m' ' 30m' '1;30m' ' 31m' '1;31m' \ + ' 32m' '1;32m' ' 33m' '1;33m' ' 34m' '1;34m' \ + ' 35m' '1;35m' ' 36m' '1;36m' ' 37m' '1;37m'; + do + FG=${FGs// /} + echo -en " $FGs \033[$FG $T " + for BG in 40m 41m 42m 43m 44m 45m 46m 47m; do + echo -en "$EINS \033[$FG\033[$BG $T \033[0m" + done + echo; + done + echo +} + +color_test + diff --git a/cli/colortest.bash b/cli/colortest.bash new file mode 100755 index 0000000..c777b9e --- /dev/null +++ b/cli/colortest.bash @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +# +# ANSI color scheme script featuring Space Invaders +# +# Original: http://crunchbang.org/forums/viewtopic.php?pid=126921%23p126921#p126921 +# Modified by lolilolicon +# + +f=3 b=4 +for j in f b; do + for i in {0..7}; do + printf -v $j$i %b "\e[${!j}${i}m" + done +done +bld=$'\e[1m' +rst=$'\e[0m' + +cat << EOF + + $f1 ▀▄ ▄▀ $f2 ▄▄▄████▄▄▄ $f3 ▄██▄ $f4 ▀▄ ▄▀ $f5 ▄▄▄████▄▄▄ $f6 ▄██▄ $rst + $f1 ▄█▀███▀█▄ $f2███▀▀██▀▀███ $f3▄█▀██▀█▄ $f4 ▄█▀███▀█▄ $f5███▀▀██▀▀███ $f6▄█▀██▀█▄$rst + $f1█▀███████▀█ $f2▀▀███▀▀███▀▀ $f3▀█▀██▀█▀ $f4█▀███████▀█ $f5▀▀███▀▀███▀▀ $f6▀█▀██▀█▀$rst + $f1▀ ▀▄▄ ▄▄▀ ▀ $f2 ▀█▄ ▀▀ ▄█▀ $f3▀▄ ▄▀ $f4▀ ▀▄▄ ▄▄▀ ▀ $f5 ▀█▄ ▀▀ ▄█▀ $f6▀▄ ▄▀$rst + + $bld$f1▄ ▀▄ ▄▀ ▄ $f2 ▄▄▄████▄▄▄ $f3 ▄██▄ $f4▄ ▀▄ ▄▀ ▄ $f5 ▄▄▄████▄▄▄ $f6 ▄██▄ $rst + $bld$f1█▄█▀███▀█▄█ $f2███▀▀██▀▀███ $f3▄█▀██▀█▄ $f4█▄█▀███▀█▄█ $f5███▀▀██▀▀███ $f6▄█▀██▀█▄$rst + $bld$f1▀█████████▀ $f2▀▀▀██▀▀██▀▀▀ $f3▀▀█▀▀█▀▀ $f4▀█████████▀ $f5▀▀▀██▀▀██▀▀▀ $f6▀▀█▀▀█▀▀$rst + $bld$f1 ▄▀ ▀▄ $f2▄▄▀▀ ▀▀ ▀▀▄▄ $f3▄▀▄▀▀▄▀▄ $f4 ▄▀ ▀▄ $f5▄▄▀▀ ▀▀ ▀▀▄▄ $f6▄▀▄▀▀▄▀▄$rst + + + $f7▌$rst + + $f7▌$rst + + $f7 ▄█▄ $rst + $f7▄█████████▄$rst + $f7▀▀▀▀▀▀▀▀▀▀▀$rst + +EOF diff --git a/cli/colortest.lua b/cli/colortest.lua new file mode 100755 index 0000000..8f8c95f --- /dev/null +++ b/cli/colortest.lua @@ -0,0 +1,22 @@ +#!/usr/bin/env lua + +function cl(e) + return string.format('\27[%sm', e) +end + +function print_fg(bg, pre) + for fg = 30,37 do + fg = pre..fg + io.write(cl(bg), cl(fg), string.format(' %6s ', fg), cl(0)) + end +end + +for bg = 40,47 do + io.write(cl(0), ' ', bg, ' ') + print_fg(bg, ' ') + io.write('\n ') + print_fg(bg, '1;') + io.write('\n\n') +end + +-- Andres P diff --git a/cli/colortest.pl b/cli/colortest.pl new file mode 100755 index 0000000..767789e --- /dev/null +++ b/cli/colortest.pl @@ -0,0 +1,365 @@ +#!/usr/bin/env perl + +# by entheon, do whatever the hell you want with this file + +print "\n"; +print "*****************************\n"; +print "* XTERM 256Color Test Chart *\n"; +print "*****************************\n"; +print "* 16 = black\n"; +print "* 255 = white\n"; +print "*\n"; +print "* Usage:\n"; +print "* colortest.pl -w\n"; +print "* wide display\n"; +print "*\n"; +print "* colortest.pl -w -r\n"; +print "* wide display reversed\n"; +print "*\n"; +print "* colortest.pl -w -s\n"; +print "* extra spaces padding\n"; +print "*\n"; +print "* colortest.pl -w -r -s\n"; +print "* available combination\n"; +print "*\n"; +print "**************************\n"; + +if( $ARGV[0] eq "-w" || $ARGV[1] eq "-w" || $ARGV[2] eq "-w" ) { + push(@arr, [( "[38;5;16m 16: 00/00/00", "[38;5;17m 17: 00/00/5f", "[38;5;18m 18: 00/00/87", "[38;5;19m 19: 00/00/af", "[38;5;20m 20: 00/00/d7", "[38;5;21m 21: 00/00/ff")] ); + push(@arr, [( "[38;5;22m 22: 00/5f/00", "[38;5;23m 23: 00/5f/5f", "[38;5;24m 24: 00/5f/87", "[38;5;25m 25: 00/5f/af", "[38;5;26m 26: 00/5f/d7", "[38;5;27m 27: 00/5f/ff")] ); + push(@arr, [( "[38;5;28m 28: 00/87/00", "[38;5;29m 29: 00/87/5f", "[38;5;30m 30: 00/87/87", "[38;5;31m 31: 00/87/af", "[38;5;32m 32: 00/87/d7", "[38;5;33m 33: 00/87/ff")] ); + push(@arr, [( "[38;5;34m 34: 00/af/00", "[38;5;35m 35: 00/af/5f", "[38;5;36m 36: 00/af/87", "[38;5;37m 37: 00/af/af", "[38;5;38m 38: 00/af/d7", "[38;5;39m 39: 00/af/ff")] ); + push(@arr, [( "[38;5;40m 40: 00/d7/00", "[38;5;41m 41: 00/d7/5f", "[38;5;42m 42: 00/d7/87", "[38;5;43m 43: 00/d7/af", "[38;5;44m 44: 00/d7/d7", "[38;5;45m 45: 00/d7/ff")] ); + push(@arr, [( "[38;5;46m 46: 00/ff/00", "[38;5;47m 47: 00/ff/5f", "[38;5;48m 48: 00/ff/87", "[38;5;49m 49: 00/ff/af", "[38;5;50m 50: 00/ff/d7", "[38;5;51m 51: 00/ff/ff")] ); + push(@arr, [( "[38;5;52m 52: 5f/00/00", "[38;5;53m 53: 5f/00/5f", "[38;5;54m 54: 5f/00/87", "[38;5;55m 55: 5f/00/af", "[38;5;56m 56: 5f/00/d7", "[38;5;57m 57: 5f/00/ff")] ); + push(@arr, [( "[38;5;58m 58: 5f/5f/00", "[38;5;59m 59: 5f/5f/5f", "[38;5;60m 60: 5f/5f/87", "[38;5;61m 61: 5f/5f/af", "[38;5;62m 62: 5f/5f/d7", "[38;5;63m 63: 5f/5f/ff")] ); + push(@arr, [( "[38;5;64m 64: 5f/87/00", "[38;5;65m 65: 5f/87/5f", "[38;5;66m 66: 5f/87/87", "[38;5;67m 67: 5f/87/af", "[38;5;68m 68: 5f/87/d7", "[38;5;69m 69: 5f/87/ff")] ); + push(@arr, [( "[38;5;70m 70: 5f/af/00", "[38;5;71m 71: 5f/af/5f", "[38;5;72m 72: 5f/af/87", "[38;5;73m 73: 5f/af/af", "[38;5;74m 74: 5f/af/d7", "[38;5;75m 75: 5f/af/ff")] ); + push(@arr, [( "[38;5;76m 76: 5f/d7/00", "[38;5;77m 77: 5f/d7/5f", "[38;5;78m 78: 5f/d7/87", "[38;5;79m 79: 5f/d7/af", "[38;5;80m 80: 5f/d7/d7", "[38;5;81m 81: 5f/d7/ff")] ); + push(@arr, [( "[38;5;82m 82: 5f/ff/00", "[38;5;83m 83: 5f/ff/5f", "[38;5;84m 84: 5f/ff/87", "[38;5;85m 85: 5f/ff/af", "[38;5;86m 86: 5f/ff/d7", "[38;5;87m 87: 5f/ff/ff")] ); + push(@arr, [( "[38;5;88m 88: 87/00/00", "[38;5;89m 89: 87/00/5f", "[38;5;90m 90: 87/00/87", "[38;5;91m 91: 87/00/af", "[38;5;92m 92: 87/00/d7", "[38;5;93m 93: 87/00/ff")] ); + push(@arr, [( "[38;5;94m 94: 87/5f/00", "[38;5;95m 95: 87/5f/5f", "[38;5;96m 96: 87/5f/87", "[38;5;97m 97: 87/5f/af", "[38;5;98m 98: 87/5f/d7", "[38;5;99m 99: 87/5f/ff")] ); + push(@arr, [( "[38;5;100m 100: 87/87/00", "[38;5;101m 101: 87/87/5f", "[38;5;102m 102: 87/87/87", "[38;5;103m 103: 87/87/af", "[38;5;104m 104: 87/87/d7", "[38;5;105m 105: 87/87/ff")] ); + push(@arr, [( "[38;5;106m 106: 87/af/00", "[38;5;107m 107: 87/af/5f", "[38;5;108m 108: 87/af/87", "[38;5;109m 109: 87/af/af", "[38;5;110m 110: 87/af/d7", "[38;5;111m 111: 87/af/ff")] ); + push(@arr, [( "[38;5;112m 112: 87/d7/00", "[38;5;113m 113: 87/d7/5f", "[38;5;114m 114: 87/d7/87", "[38;5;115m 115: 87/d7/af", "[38;5;116m 116: 87/d7/d7", "[38;5;117m 117: 87/d7/ff")] ); + push(@arr, [( "[38;5;118m 118: 87/ff/00", "[38;5;119m 119: 87/ff/5f", "[38;5;120m 120: 87/ff/87", "[38;5;121m 121: 87/ff/af", "[38;5;122m 122: 87/ff/d7", "[38;5;123m 123: 87/ff/ff")] ); + push(@arr, [( "[38;5;124m 124: af/00/00", "[38;5;125m 125: af/00/5f", "[38;5;126m 126: af/00/87", "[38;5;127m 127: af/00/af", "[38;5;128m 128: af/00/d7", "[38;5;129m 129: af/00/ff")] ); + push(@arr, [( "[38;5;130m 130: af/5f/00", "[38;5;131m 131: af/5f/5f", "[38;5;132m 132: af/5f/87", "[38;5;133m 133: af/5f/af", "[38;5;134m 134: af/5f/d7", "[38;5;135m 135: af/5f/ff")] ); + push(@arr, [( "[38;5;136m 136: af/87/00", "[38;5;137m 137: af/87/5f", "[38;5;138m 138: af/87/87", "[38;5;139m 139: af/87/af", "[38;5;140m 140: af/87/d7", "[38;5;141m 141: af/87/ff")] ); + push(@arr, [( "[38;5;142m 142: af/af/00", "[38;5;143m 143: af/af/5f", "[38;5;144m 144: af/af/87", "[38;5;145m 145: af/af/af", "[38;5;146m 146: af/af/d7", "[38;5;147m 147: af/af/ff")] ); + push(@arr, [( "[38;5;148m 148: af/d7/00", "[38;5;149m 149: af/d7/5f", "[38;5;150m 150: af/d7/87", "[38;5;151m 151: af/d7/af", "[38;5;152m 152: af/d7/d7", "[38;5;153m 153: af/d7/ff")] ); + push(@arr, [( "[38;5;154m 154: af/ff/00", "[38;5;155m 155: af/ff/5f", "[38;5;156m 156: af/ff/87", "[38;5;157m 157: af/ff/af", "[38;5;158m 158: af/ff/d7", "[38;5;159m 159: af/ff/ff")] ); + push(@arr, [( "[38;5;160m 160: d7/00/00", "[38;5;161m 161: d7/00/5f", "[38;5;162m 162: d7/00/87", "[38;5;163m 163: d7/00/af", "[38;5;164m 164: d7/00/d7", "[38;5;165m 165: d7/00/ff")] ); + push(@arr, [( "[38;5;166m 166: d7/5f/00", "[38;5;167m 167: d7/5f/5f", "[38;5;168m 168: d7/5f/87", "[38;5;169m 169: d7/5f/af", "[38;5;170m 170: d7/5f/d7", "[38;5;171m 171: d7/5f/ff")] ); + push(@arr, [( "[38;5;172m 172: d7/87/00", "[38;5;173m 173: d7/87/5f", "[38;5;174m 174: d7/87/87", "[38;5;175m 175: d7/87/af", "[38;5;176m 176: d7/87/d7", "[38;5;177m 177: d7/87/ff")] ); + push(@arr, [( "[38;5;178m 178: d7/af/00", "[38;5;179m 179: d7/af/5f", "[38;5;180m 180: d7/af/87", "[38;5;181m 181: d7/af/af", "[38;5;182m 182: d7/af/d7", "[38;5;183m 183: d7/af/ff")] ); + push(@arr, [( "[38;5;184m 184: d7/d7/00", "[38;5;185m 185: d7/d7/5f", "[38;5;186m 186: d7/d7/87", "[38;5;187m 187: d7/d7/af", "[38;5;188m 188: d7/d7/d7", "[38;5;189m 189: d7/d7/ff")] ); + push(@arr, [( "[38;5;190m 190: d7/ff/00", "[38;5;191m 191: d7/ff/5f", "[38;5;192m 192: d7/ff/87", "[38;5;193m 193: d7/ff/af", "[38;5;194m 194: d7/ff/d7", "[38;5;195m 195: d7/ff/ff")] ); + push(@arr, [( "[38;5;196m 196: ff/00/00", "[38;5;197m 197: ff/00/5f", "[38;5;198m 198: ff/00/87", "[38;5;199m 199: ff/00/af", "[38;5;200m 200: ff/00/d7", "[38;5;201m 201: ff/00/ff")] ); + push(@arr, [( "[38;5;202m 202: ff/5f/00", "[38;5;203m 203: ff/5f/5f", "[38;5;204m 204: ff/5f/87", "[38;5;205m 205: ff/5f/af", "[38;5;206m 206: ff/5f/d7", "[38;5;207m 207: ff/5f/ff")] ); + push(@arr, [( "[38;5;208m 208: ff/87/00", "[38;5;209m 209: ff/87/5f", "[38;5;210m 210: ff/87/87", "[38;5;211m 211: ff/87/af", "[38;5;212m 212: ff/87/d7", "[38;5;213m 213: ff/87/ff")] ); + push(@arr, [( "[38;5;214m 214: ff/af/00", "[38;5;215m 215: ff/af/5f", "[38;5;216m 216: ff/af/87", "[38;5;217m 217: ff/af/af", "[38;5;218m 218: ff/af/d7", "[38;5;219m 219: ff/af/ff")] ); + push(@arr, [( "[38;5;220m 220: ff/d7/00", "[38;5;221m 221: ff/d7/5f", "[38;5;222m 222: ff/d7/87", "[38;5;223m 223: ff/d7/af", "[38;5;224m 224: ff/d7/d7", "[38;5;225m 225: ff/d7/ff")] ); + push(@arr, [( "[38;5;226m 226: ff/ff/00", "[38;5;227m 227: ff/ff/5f", "[38;5;228m 228: ff/ff/87", "[38;5;229m 229: ff/ff/af", "[38;5;230m 230: ff/ff/d7", "[38;5;231m 231: ff/ff/ff")] ); + push(@arr, [( "[38;5;232m 232: 08/08/08", "[38;5;233m 233: 12/12/12", "[38;5;234m 234: 1c/1c/1c", "[38;5;235m 235: 26/26/26", "[38;5;236m 236: 30/30/30", "[38;5;237m 237: 3a/3a/3a")] ); + push(@arr, [( "[38;5;238m 238: 44/44/44", "[38;5;239m 239: 4e/4e/4e", "[38;5;240m 240: 58/58/58", "[38;5;241m 241: 62/62/62", "[38;5;242m 242: 6c/6c/6c", "[38;5;243m 243: 76/76/76")] ); + push(@arr, [( "[38;5;244m 244: 80/80/80", "[38;5;245m 245: 8a/8a/8a", "[38;5;246m 246: 94/94/94", "[38;5;247m 247: 9e/9e/9e", "[38;5;248m 248: a8/a8/a8", "[38;5;249m 249: b2/b2/b2")] ); + push(@arr, [( "[38;5;250m 250: bc/bc/bc", "[38;5;251m 251: c6/c6/c6", "[38;5;252m 252: d0/d0/d0", "[38;5;253m 253: da/da/da", "[38;5;254m 254: e4/e4/e4", "[38;5;255m 255: ee/ee/ee")] ); + + if( $ARGV[0] eq "-s" || $ARGV[1] eq "-s" || $ARGV[2] eq "-s" ){ + $padding = " "; + } + else { + + } + + # display in reverse order + if( $ARGV[0] eq "-r" || $ARGV[1] eq "-r" || $ARGV[2] eq "-r" ){ + for( $dimone = 0; $dimone < scalar @arr; $dimone++ ) { + + $seed = ($dimone % 6) * -1; + for( $dimtwo = 0; $dimtwo < 6; $dimtwo++ ) { + + $movone = $seed; + $movtwo = $seed * -1; + + print $arr[$dimone][$dimtwo] . $padding; + + $seed = $seed+1; + } + + print "\n"; + } + } + else { + for( $dimone = 0; $dimone < scalar @arr; $dimone++ ) { + + $seed = ($dimone % 6) * -1; + for( $dimtwo = 0; $dimtwo < 6; $dimtwo++ ) { + + $movone = $seed; + $movtwo = $seed * -1; + + $newone = $dimone+$movone; + $newtwo = $dimtwo+$movtwo; + + if( $newone < scalar @arr ){ + print $arr[$newone][$newtwo] . $padding; + } + + $seed = $seed+1; + } + + print "\n"; + } + } + print "\n"; + print "\n"; + +} +else { + print "[38;5;16m 16: 00/00/00\n"; + print "[38;5;17m 17: 00/00/5f\n"; + print "[38;5;18m 18: 00/00/87\n"; + print "[38;5;19m 19: 00/00/af\n"; + print "[38;5;20m 20: 00/00/d7\n"; + print "[38;5;21m 21: 00/00/ff\n"; + print "[38;5;22m 22: 00/5f/00\n"; + print "[38;5;23m 23: 00/5f/5f\n"; + print "[38;5;24m 24: 00/5f/87\n"; + print "[38;5;25m 25: 00/5f/af\n"; + print "[38;5;26m 26: 00/5f/d7\n"; + print "[38;5;27m 27: 00/5f/ff\n"; + print "[38;5;28m 28: 00/87/00\n"; + print "[38;5;29m 29: 00/87/5f\n"; + print "[38;5;30m 30: 00/87/87\n"; + print "[38;5;31m 31: 00/87/af\n"; + print "[38;5;32m 32: 00/87/d7\n"; + print "[38;5;33m 33: 00/87/ff\n"; + print "[38;5;34m 34: 00/af/00\n"; + print "[38;5;35m 35: 00/af/5f\n"; + print "[38;5;36m 36: 00/af/87\n"; + print "[38;5;37m 37: 00/af/af\n"; + print "[38;5;38m 38: 00/af/d7\n"; + print "[38;5;39m 39: 00/af/ff\n"; + print "[38;5;40m 40: 00/d7/00\n"; + print "[38;5;41m 41: 00/d7/5f\n"; + print "[38;5;42m 42: 00/d7/87\n"; + print "[38;5;43m 43: 00/d7/af\n"; + print "[38;5;44m 44: 00/d7/d7\n"; + print "[38;5;45m 45: 00/d7/ff\n"; + print "[38;5;46m 46: 00/ff/00\n"; + print "[38;5;47m 47: 00/ff/5f\n"; + print "[38;5;48m 48: 00/ff/87\n"; + print "[38;5;49m 49: 00/ff/af\n"; + print "[38;5;50m 50: 00/ff/d7\n"; + print "[38;5;51m 51: 00/ff/ff\n"; + print "[38;5;52m 52: 5f/00/00\n"; + print "[38;5;53m 53: 5f/00/5f\n"; + print "[38;5;54m 54: 5f/00/87\n"; + print "[38;5;55m 55: 5f/00/af\n"; + print "[38;5;56m 56: 5f/00/d7\n"; + print "[38;5;57m 57: 5f/00/ff\n"; + print "[38;5;58m 58: 5f/5f/00\n"; + print "[38;5;59m 59: 5f/5f/5f\n"; + print "[38;5;60m 60: 5f/5f/87\n"; + print "[38;5;61m 61: 5f/5f/af\n"; + print "[38;5;62m 62: 5f/5f/d7\n"; + print "[38;5;63m 63: 5f/5f/ff\n"; + print "[38;5;64m 64: 5f/87/00\n"; + print "[38;5;65m 65: 5f/87/5f\n"; + print "[38;5;66m 66: 5f/87/87\n"; + print "[38;5;67m 67: 5f/87/af\n"; + print "[38;5;68m 68: 5f/87/d7\n"; + print "[38;5;69m 69: 5f/87/ff\n"; + print "[38;5;70m 70: 5f/af/00\n"; + print "[38;5;71m 71: 5f/af/5f\n"; + print "[38;5;72m 72: 5f/af/87\n"; + print "[38;5;73m 73: 5f/af/af\n"; + print "[38;5;74m 74: 5f/af/d7\n"; + print "[38;5;75m 75: 5f/af/ff\n"; + print "[38;5;76m 76: 5f/d7/00\n"; + print "[38;5;77m 77: 5f/d7/5f\n"; + print "[38;5;78m 78: 5f/d7/87\n"; + print "[38;5;79m 79: 5f/d7/af\n"; + print "[38;5;80m 80: 5f/d7/d7\n"; + print "[38;5;81m 81: 5f/d7/ff\n"; + print "[38;5;82m 82: 5f/ff/00\n"; + print "[38;5;83m 83: 5f/ff/5f\n"; + print "[38;5;84m 84: 5f/ff/87\n"; + print "[38;5;85m 85: 5f/ff/af\n"; + print "[38;5;86m 86: 5f/ff/d7\n"; + print "[38;5;87m 87: 5f/ff/ff\n"; + print "[38;5;88m 88: 87/00/00\n"; + print "[38;5;89m 89: 87/00/5f\n"; + print "[38;5;90m 90: 87/00/87\n"; + print "[38;5;91m 91: 87/00/af\n"; + print "[38;5;92m 92: 87/00/d7\n"; + print "[38;5;93m 93: 87/00/ff\n"; + print "[38;5;94m 94: 87/5f/00\n"; + print "[38;5;95m 95: 87/5f/5f\n"; + print "[38;5;96m 96: 87/5f/87\n"; + print "[38;5;97m 97: 87/5f/af\n"; + print "[38;5;98m 98: 87/5f/d7\n"; + print "[38;5;99m 99: 87/5f/ff\n"; + print "[38;5;100m 100 :87/87/00\n"; + print "[38;5;101m 101 :87/87/5f\n"; + print "[38;5;102m 102 :87/87/87\n"; + print "[38;5;103m 103 :87/87/af\n"; + print "[38;5;104m 104 :87/87/d7\n"; + print "[38;5;105m 105 :87/87/ff\n"; + print "[38;5;106m 106 :87/af/00\n"; + print "[38;5;107m 107 :87/af/5f\n"; + print "[38;5;108m 108 :87/af/87\n"; + print "[38;5;109m 109 :87/af/af\n"; + print "[38;5;110m 110 :87/af/d7\n"; + print "[38;5;111m 111 :87/af/ff\n"; + print "[38;5;112m 112 :87/d7/00\n"; + print "[38;5;113m 113 :87/d7/5f\n"; + print "[38;5;114m 114 :87/d7/87\n"; + print "[38;5;115m 115 :87/d7/af\n"; + print "[38;5;116m 116 :87/d7/d7\n"; + print "[38;5;117m 117 :87/d7/ff\n"; + print "[38;5;118m 118 :87/ff/00\n"; + print "[38;5;119m 119 :87/ff/5f\n"; + print "[38;5;120m 120 :87/ff/87\n"; + print "[38;5;121m 121 :87/ff/af\n"; + print "[38;5;122m 122 :87/ff/d7\n"; + print "[38;5;123m 123 :87/ff/ff\n"; + print "[38;5;124m 124 :af/00/00\n"; + print "[38;5;125m 125 :af/00/5f\n"; + print "[38;5;126m 126 :af/00/87\n"; + print "[38;5;127m 127 :af/00/af\n"; + print "[38;5;128m 128 :af/00/d7\n"; + print "[38;5;129m 129 :af/00/ff\n"; + print "[38;5;130m 130 :af/5f/00\n"; + print "[38;5;131m 131 :af/5f/5f\n"; + print "[38;5;132m 132 :af/5f/87\n"; + print "[38;5;133m 133 :af/5f/af\n"; + print "[38;5;134m 134 :af/5f/d7\n"; + print "[38;5;135m 135 :af/5f/ff\n"; + print "[38;5;136m 136 :af/87/00\n"; + print "[38;5;137m 137 :af/87/5f\n"; + print "[38;5;138m 138 :af/87/87\n"; + print "[38;5;139m 139 :af/87/af\n"; + print "[38;5;140m 140 :af/87/d7\n"; + print "[38;5;141m 141 :af/87/ff\n"; + print "[38;5;142m 142 :af/af/00\n"; + print "[38;5;143m 143 :af/af/5f\n"; + print "[38;5;144m 144 :af/af/87\n"; + print "[38;5;145m 145 :af/af/af\n"; + print "[38;5;146m 146 :af/af/d7\n"; + print "[38;5;147m 147 :af/af/ff\n"; + print "[38;5;148m 148 :af/d7/00\n"; + print "[38;5;149m 149 :af/d7/5f\n"; + print "[38;5;150m 150 :af/d7/87\n"; + print "[38;5;151m 151 :af/d7/af\n"; + print "[38;5;152m 152 :af/d7/d7\n"; + print "[38;5;153m 153 :af/d7/ff\n"; + print "[38;5;154m 154 :af/ff/00\n"; + print "[38;5;155m 155 :af/ff/5f\n"; + print "[38;5;156m 156 :af/ff/87\n"; + print "[38;5;157m 157 :af/ff/af\n"; + print "[38;5;158m 158 :af/ff/d7\n"; + print "[38;5;159m 159 :af/ff/ff\n"; + print "[38;5;160m 160 :d7/00/00\n"; + print "[38;5;161m 161 :d7/00/5f\n"; + print "[38;5;162m 162 :d7/00/87\n"; + print "[38;5;163m 163 :d7/00/af\n"; + print "[38;5;164m 164 :d7/00/d7\n"; + print "[38;5;165m 165 :d7/00/ff\n"; + print "[38;5;166m 166 :d7/5f/00\n"; + print "[38;5;167m 167 :d7/5f/5f\n"; + print "[38;5;168m 168 :d7/5f/87\n"; + print "[38;5;169m 169 :d7/5f/af\n"; + print "[38;5;170m 170 :d7/5f/d7\n"; + print "[38;5;171m 171 :d7/5f/ff\n"; + print "[38;5;172m 172 :d7/87/00\n"; + print "[38;5;173m 173 :d7/87/5f\n"; + print "[38;5;174m 174 :d7/87/87\n"; + print "[38;5;175m 175 :d7/87/af\n"; + print "[38;5;176m 176 :d7/87/d7\n"; + print "[38;5;177m 177 :d7/87/ff\n"; + print "[38;5;178m 178 :d7/af/00\n"; + print "[38;5;179m 179 :d7/af/5f\n"; + print "[38;5;180m 180 :d7/af/87\n"; + print "[38;5;181m 181 :d7/af/af\n"; + print "[38;5;182m 182 :d7/af/d7\n"; + print "[38;5;183m 183 :d7/af/ff\n"; + print "[38;5;184m 184 :d7/d7/00\n"; + print "[38;5;185m 185 :d7/d7/5f\n"; + print "[38;5;186m 186 :d7/d7/87\n"; + print "[38;5;187m 187 :d7/d7/af\n"; + print "[38;5;188m 188 :d7/d7/d7\n"; + print "[38;5;189m 189 :d7/d7/ff\n"; + print "[38;5;190m 190 :d7/ff/00\n"; + print "[38;5;191m 191 :d7/ff/5f\n"; + print "[38;5;192m 192 :d7/ff/87\n"; + print "[38;5;193m 193 :d7/ff/af\n"; + print "[38;5;194m 194 :d7/ff/d7\n"; + print "[38;5;195m 195 :d7/ff/ff\n"; + print "[38;5;196m 196 :ff/00/00\n"; + print "[38;5;197m 197 :ff/00/5f\n"; + print "[38;5;198m 198 :ff/00/87\n"; + print "[38;5;199m 199 :ff/00/af\n"; + print "[38;5;200m 200 :ff/00/d7\n"; + print "[38;5;201m 201 :ff/00/ff\n"; + print "[38;5;202m 202 :ff/5f/00\n"; + print "[38;5;203m 203 :ff/5f/5f\n"; + print "[38;5;204m 204 :ff/5f/87\n"; + print "[38;5;205m 205 :ff/5f/af\n"; + print "[38;5;206m 206 :ff/5f/d7\n"; + print "[38;5;207m 207 :ff/5f/ff\n"; + print "[38;5;208m 208 :ff/87/00\n"; + print "[38;5;209m 209 :ff/87/5f\n"; + print "[38;5;210m 210 :ff/87/87\n"; + print "[38;5;211m 211 :ff/87/af\n"; + print "[38;5;212m 212 :ff/87/d7\n"; + print "[38;5;213m 213 :ff/87/ff\n"; + print "[38;5;214m 214 :ff/af/00\n"; + print "[38;5;215m 215 :ff/af/5f\n"; + print "[38;5;216m 216 :ff/af/87\n"; + print "[38;5;217m 217 :ff/af/af\n"; + print "[38;5;218m 218 :ff/af/d7\n"; + print "[38;5;219m 219 :ff/af/ff\n"; + print "[38;5;220m 220 :ff/d7/00\n"; + print "[38;5;221m 221 :ff/d7/5f\n"; + print "[38;5;222m 222 :ff/d7/87\n"; + print "[38;5;223m 223 :ff/d7/af\n"; + print "[38;5;224m 224 :ff/d7/d7\n"; + print "[38;5;225m 225 :ff/d7/ff\n"; + print "[38;5;226m 226 :ff/ff/00\n"; + print "[38;5;227m 227 :ff/ff/5f\n"; + print "[38;5;228m 228 :ff/ff/87\n"; + print "[38;5;229m 229 :ff/ff/af\n"; + print "[38;5;230m 230 :ff/ff/d7\n"; + print "[38;5;231m 231 :ff/ff/ff\n"; + print "[38;5;232m 232 :08/08/08\n"; + print "[38;5;233m 233 :12/12/12\n"; + print "[38;5;234m 234 :1c/1c/1c\n"; + print "[38;5;235m 235 :26/26/26\n"; + print "[38;5;236m 236 :30/30/30\n"; + print "[38;5;237m 237 :3a/3a/3a\n"; + print "[38;5;238m 238 :44/44/44\n"; + print "[38;5;239m 239 :4e/4e/4e\n"; + print "[38;5;240m 240 :58/58/58\n"; + print "[38;5;241m 241 :62/62/62\n"; + print "[38;5;242m 242 :6c/6c/6c\n"; + print "[38;5;243m 243 :76/76/76\n"; + print "[38;5;244m 244 :80/80/80\n"; + print "[38;5;245m 245 :8a/8a/8a\n"; + print "[38;5;246m 246 :94/94/94\n"; + print "[38;5;247m 247 :9e/9e/9e\n"; + print "[38;5;248m 248 :a8/a8/a8\n"; + print "[38;5;249m 249 :b2/b2/b2\n"; + print "[38;5;250m 250 :bc/bc/bc\n"; + print "[38;5;251m 251 :c6/c6/c6\n"; + print "[38;5;252m 252 :d0/d0/d0\n"; + print "[38;5;253m 253 :da/da/da\n"; + print "[38;5;254m 254 :e4/e4/e4\n"; + print "[38;5;255m 255 :ee/ee/ee\n"; + print "\n"; + print "\n"; +} +print "0m"; +exit; diff --git a/cli/colortest.py b/cli/colortest.py new file mode 100755 index 0000000..2d29590 --- /dev/null +++ b/cli/colortest.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python +# +# http://askubuntu.com/questions/27314/script-to-display-all-terminal-colors + +import sys + +terse = "-t" in sys.argv[1:] or "--terse" in sys.argv[1:] + +for i in range(2 if terse else 10): + for j in range(30, 38): + for k in range(40, 48): + if terse: + print "\33[%d;%d;%dm%d;%d;%d\33[m " % (i, j, k, i, j, k), + else: + print ("%d;%d;%d: \33[%d;%d;%dm Hello, World! \33[m " % + (i, j, k, i, j, k, )) + print + diff --git a/cli/colortest.rb b/cli/colortest.rb new file mode 100755 index 0000000..cc5d6d6 --- /dev/null +++ b/cli/colortest.rb @@ -0,0 +1,26 @@ +#!/usr/bin/env ruby +# coding: utf-8 + +# ANSI color scheme script +# Author: Ivaylo Kuzev < Ivo > +# Original: http://crunchbang.org/forums/viewtopic.php?pid=134749%23p134749#p134749 +# Modified using Ruby. + +CL = "\e[0m" +BO = "\e[1m" + +R = "\e[31m" +G = "\e[32m" +Y = "\e[33m" +B = "\e[34m" +P = "\e[35m" +C = "\e[36m" + +print <<EOF + +#{BO}#{R} ██████ #{CL} #{BO}#{G}██████ #{CL}#{BO}#{Y} ██████#{CL} #{BO}#{B}██████ #{CL} #{BO}#{P} ██████#{CL} #{BO}#{C} ███████#{CL} +#{BO}#{R} ████████#{CL} #{BO}#{G}██ ██ #{CL}#{BO}#{Y}██ #{CL} #{BO}#{B}██ ██#{CL} #{BO}#{P}██████ #{CL} #{BO}#{C} █████████#{CL} +#{R} ██ ████#{CL} #{G}██ ████#{CL}#{Y} ████ #{CL} #{B}████ ██#{CL} #{P}████ #{CL} #{C}█████ #{CL} +#{R} ██ ██#{CL} #{G}██████ #{CL}#{Y} ████████#{CL} #{B}██████ #{CL} #{P}████████#{CL} #{C}██ #{CL} + +EOF diff --git a/cli/colortest.sh b/cli/colortest.sh new file mode 100755 index 0000000..3974d69 --- /dev/null +++ b/cli/colortest.sh @@ -0,0 +1,53 @@ +#!/bin/sh +# Original Posted at http://crunchbang.org/forums/viewtopic.php?pid=126921%23p126921#p126921 +# [ESC] character in original post removed here. + +# ANSI Color -- use these variables to easily have different color +# and format output. Make sure to output the reset sequence after +# colors (f = foreground, b = background), and use the 'off' +# feature for anything you turn on. + +initializeANSI() +{ + esc="$(echo -en '\e')" + + blackf="${esc}[30m"; redf="${esc}[31m"; greenf="${esc}[32m" + yellowf="${esc}[33m" bluef="${esc}[34m"; purplef="${esc}[35m" + cyanf="${esc}[36m"; whitef="${esc}[37m" + + blackb="${esc}[40m"; redb="${esc}[41m"; greenb="${esc}[42m" + yellowb="${esc}[43m" blueb="${esc}[44m"; purpleb="${esc}[45m" + cyanb="${esc}[46m"; whiteb="${esc}[47m" + + boldon="${esc}[1m"; boldoff="${esc}[22m" + italicson="${esc}[3m"; italicsoff="${esc}[23m" + ulon="${esc}[4m"; uloff="${esc}[24m" + invon="${esc}[7m"; invoff="${esc}[27m" + + reset="${esc}[0m" +} + +# note in this first use that switching colors doesn't require a reset +# first - the new color overrides the old one. + +#clear + +initializeANSI + +cat << EOF + + ${yellowf} ▄███████▄${reset} ${redf} ▄██████▄${reset} ${greenf} ▄██████▄${reset} ${bluef} ▄██████▄${reset} ${purplef} ▄██████▄${reset} ${cyanf} ▄██████▄${reset} + ${yellowf}▄█████████▀▀${reset} ${redf}▄${whitef}█▀█${redf}██${whitef}█▀█${redf}██▄${reset} ${greenf}▄${whitef}█▀█${greenf}██${whitef}█▀█${greenf}██▄${reset} ${bluef}▄${whitef}█▀█${bluef}██${whitef}█▀█${bluef}██▄${reset} ${purplef}▄${whitef}█▀█${purplef}██${whitef}█▀█${purplef}██▄${reset} ${cyanf}▄${whitef}█▀█${cyanf}██${whitef}█▀█${cyanf}██▄${reset} + ${yellowf}███████▀${reset} ${redf}█${whitef}▄▄█${redf}██${whitef}▄▄█${redf}███${reset} ${greenf}█${whitef}▄▄█${greenf}██${whitef}▄▄█${greenf}███${reset} ${bluef}█${whitef}▄▄█${bluef}██${whitef}▄▄█${bluef}███${reset} ${purplef}█${whitef}▄▄█${purplef}██${whitef}▄▄█${purplef}███${reset} ${cyanf}█${whitef}▄▄█${cyanf}██${whitef}▄▄█${cyanf}███${reset} + ${yellowf}███████▄${reset} ${redf}████████████${reset} ${greenf}████████████${reset} ${bluef}████████████${reset} ${purplef}████████████${reset} ${cyanf}████████████${reset} + ${yellowf}▀█████████▄▄${reset} ${redf}██▀██▀▀██▀██${reset} ${greenf}██▀██▀▀██▀██${reset} ${bluef}██▀██▀▀██▀██${reset} ${purplef}██▀██▀▀██▀██${reset} ${cyanf}██▀██▀▀██▀██${reset} + ${yellowf} ▀███████▀${reset} ${redf}▀ ▀ ▀ ▀${reset} ${greenf}▀ ▀ ▀ ▀${reset} ${bluef}▀ ▀ ▀ ▀${reset} ${purplef}▀ ▀ ▀ ▀${reset} ${cyanf}▀ ▀ ▀ ▀${reset} + + ${boldon}${yellowf} ▄███████▄ ${redf} ▄██████▄ ${greenf} ▄██████▄ ${bluef} ▄██████▄ ${purplef} ▄██████▄ ${cyanf} ▄██████▄${reset} + ${boldon}${yellowf}▄█████████▀▀ ${redf}▄${whitef}█▀█${redf}██${whitef}█▀█${redf}██▄ ${greenf}▄${whitef}█▀█${greenf}██${whitef}█▀█${greenf}██▄ ${bluef}▄${whitef}█▀█${bluef}██${whitef}█▀█${bluef}██▄ ${purplef}▄${whitef}█▀█${purplef}██${whitef}█▀█${purplef}██▄ ${cyanf}▄${whitef}█▀█${cyanf}██${whitef}█▀█${cyanf}██▄${reset} + ${boldon}${yellowf}███████▀ ${redf}█${whitef}▄▄█${redf}██${whitef}▄▄█${redf}███ ${greenf}█${whitef}▄▄█${greenf}██${whitef}▄▄█${greenf}███ ${bluef}█${whitef}▄▄█${bluef}██${whitef}▄▄█${bluef}███ ${purplef}█${whitef}▄▄█${purplef}██${whitef}▄▄█${purplef}███ ${cyanf}█${whitef}▄▄█${cyanf}██${whitef}▄▄█${cyanf}███${reset} + ${boldon}${yellowf}███████▄ ${redf}████████████ ${greenf}████████████ ${bluef}████████████ ${purplef}████████████ ${cyanf}████████████${reset} + ${boldon}${yellowf}▀█████████▄▄ ${redf}██▀██▀▀██▀██ ${greenf}██▀██▀▀██▀██ ${bluef}██▀██▀▀██▀██ ${purplef}██▀██▀▀██▀██ ${cyanf}██▀██▀▀██▀██${reset} + ${boldon}${yellowf} ▀███████▀ ${redf}▀ ▀ ▀ ▀ ${greenf}▀ ▀ ▀ ▀ ${bluef}▀ ▀ ▀ ▀ ${purplef}▀ ▀ ▀ ▀ ${cyanf}▀ ▀ ▀ ▀${reset} + +EOF diff --git a/cli/csv2json.py b/cli/csv2json.py new file mode 100755 index 0000000..54f6be2 --- /dev/null +++ b/cli/csv2json.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# This is a simple tool that converts CSV file into a JSON file. +# The first line of the input CSV file is used as the field names. +# +# Use 'OrderedDict' to keep the input fields order. +# +# Aaron LI +# 2015/06/11 +# + +from __future__ import print_function, division + +__version__ = "0.1.0" +__date__ = "2015/06/11" + +import sys +import argparse +import csv +import json + +from collections import OrderedDict + + +def csv2json(csvfile, jsonfile=None): + """ + Convert CSV data to JSON data. + The first line of CSV data is used as the field names. + + Return: + If jsonfile is None, then return a list of JSON dict. + """ + if not hasattr(csvfile, "read"): + csvfile = open(csvfile, "r") + if (jsonfile is not None) and (not hasattr(jsonfile, "write")): + jsonfile = open(jsonfile, "w") + csvdata = list(csv.reader(csvfile)) + fieldnames = csvdata[0] + # use 'OrderedDict' to keep fields order + jsondata = [ OrderedDict(zip(fieldnames, row)) for row in csvdata[1:] ] + csvfile.close() + if jsonfile is None: + return jsondata + else: + # 'ensure_ascii=False' to support UTF-8 + json.dump(jsondata, jsonfile, ensure_ascii=False, indent=4) + jsonfile.close() + + +def main(): + # command line options & arguments + parser = argparse.ArgumentParser( + description="Simple CSV to JSON convertor") + parser.add_argument("-V", "--version", action="version", + version="%(prog)s " + "%s (%s)" % (__version__, __date__)) + parser.add_argument("csvfile", help="Input CSV file") + parser.add_argument("jsonfile", nargs="?", default=sys.stdout, + help="Output JSON file (default stdout)") + args = parser.parse_args() + + csv2json(args.csvfile, args.jsonfile) + + +if __name__ == "__main__": + main() + diff --git a/cli/jpegs2pdf.sh b/cli/jpegs2pdf.sh new file mode 100755 index 0000000..6d42bab --- /dev/null +++ b/cli/jpegs2pdf.sh @@ -0,0 +1,42 @@ +#!/bin/sh +# +############################################################################# +# +# Shellscript to convert a set of JPEG files to a multipage PDF. +# +# Requirements: (1) Ghostscript needs to be installed on the local system. +# (2) ImageMagick needs to be installed on the local system. +# +# Usage: jpegs2pdf.sh output.pdf file1.jpeg [file2.jpeg [file2.jpeg [...]]] +# +# Copyright (c) 2007, <pipitas@gmail.com> +# Use, distribute and modify without any restrictions. +# +# Versions: +# v1.0.0, Jul 12 2007: initial version +# v1.0.1, Jan 07 2011: set viewJPEG.ps path (self-compiled GS 9.02) +# +############################################################################# + +if [ $# -eq 0 ]; then + echo "Usage:" + echo " `basename $0` output.pdf 1.jpg ..." + exit 1 +fi + +outfile=$1 +shift + +param="" +for i in "$@" ; do + dimension=$(identify -format "%[fx:(w)] %[fx:(h)]" "${i}") + param="${param} <</PageSize [${dimension}]>> setpagedevice (${i}) viewJPEG showpage" +done + +gs \ + -sDEVICE=pdfwrite \ + -dPDFSETTINGS=/prepress \ + -o "$outfile" \ + viewjpeg.ps \ + -c "${param}" + diff --git a/cli/pdfmerge.sh b/cli/pdfmerge.sh new file mode 100755 index 0000000..aef72db --- /dev/null +++ b/cli/pdfmerge.sh @@ -0,0 +1,23 @@ +#!/bin/sh +# +# Merge multiple PDFs with pdftk. +# +# Ref: +# Merging Multiple PDFs under GNU/Linux +# https://blog.dbrgn.ch/2013/8/14/merge-multiple-pdfs/ +# +# Weitian LI +# 2015/01/23 +# + +if [ $# -lt 2 ]; then + printf "Usage: `basename $0` out.pdf in1.pdf ...\n" + exit 1 +fi + +outpdf="$1" +shift + +echo "Input files: $@" +pdftk "$@" cat output "${outpdf}" + diff --git a/cli/shrinkpdf.sh b/cli/shrinkpdf.sh new file mode 100755 index 0000000..1190fce --- /dev/null +++ b/cli/shrinkpdf.sh @@ -0,0 +1,56 @@ +#!/bin/sh +# +# Shrink the size of PDF files by adjust its quality using gs. +# +# Aaron LI +# 2013/09/18 +# + +case "$1" in + -[hH]*|--[hH]*) + printf "usage:\n" + printf " `basename $0` in=<input.pdf> out=<output.pdf> quality=<screen|ebook|printer|prepress> imgdpi=<img_dpi>\n" + exit 1 + ;; +esac + +getopt_keyval() { + until [ -z "$1" ] + do + key=${1%%=*} # extract key + val=${1#*=} # extract value + keyval="${key}=\"${val}\"" + echo "## getopt: eval '${keyval}'" + eval ${keyval} + shift # shift, process next one + done +} +getopt_keyval "$@" + +if [ -z "${in}" ] || [ -z "${out}" ]; then + printf "Error: 'in' or 'out' not specified\n" + exit 2 +fi +quality=${quality:-ebook} +imgdpi=${imgdpi:-120} + +printf "# in: ${in} +# out: ${out} +# quality: ${quality} +# imgdpi: ${imgdpi}\n" + +gs -dNOPAUSE -dBATCH -dSAFER \ + -sDEVICE=pdfwrite \ + -dCompatibilityLevel=1.4 \ + -dPDFSETTINGS="/${quality}" \ + -dEmbedAllFonts=true \ + -dSubsetFonts=true \ + -dColorImageDownsampleType=/Bicubic \ + -dColorImageResolution=${imgdpi} \ + -dGrayImageDownsampleType=/Bicubic \ + -dGrayImageResolution=${imgdpi} \ + -dMonoImageDownsampleType=/Bicubic \ + -dMonoImageResolution=${imgdpi} \ + -sOutputFile=${out} \ + ${in} + diff --git a/cli/term_color.sh b/cli/term_color.sh new file mode 100755 index 0000000..f10f916 --- /dev/null +++ b/cli/term_color.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +# +# This file echoes a bunch of color codes to the +# terminal to demonstrate what's available. Each +# line is the color code of one forground color, +# out of 17 (default + 16 escapes), followed by a +# test use of that color on all nine background +# colors (default + 8 escapes). +# +# Ref: https://wiki.archlinux.org/index.php/X_resources +# + +T='gYw' # The test text + +echo -e "\n 40m 41m 42m 43m\ + 44m 45m 46m 47m"; + +for FGs in ' m' ' 1m' ' 30m' '1;30m' ' 31m' '1;31m' ' 32m' \ + '1;32m' ' 33m' '1;33m' ' 34m' '1;34m' ' 35m' '1;35m' \ + ' 36m' '1;36m' ' 37m' '1;37m'; + do FG=${FGs// /} + echo -en " $FGs \033[$FG $T " + for BG in 40m 41m 42m 43m 44m 45m 46m 47m; + do echo -en "$EINS \033[$FG\033[$BG $T \033[0m"; + done + echo; +done +echo diff --git a/cli/term_color_2.sh b/cli/term_color_2.sh new file mode 100755 index 0000000..4dc2ef2 --- /dev/null +++ b/cli/term_color_2.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# Original: http://frexx.de/xterm-256-notes/ +# http://frexx.de/xterm-256-notes/data/colortable16.sh +# Modified by Aaron Griffin +# and further by Kazuo Teramoto +FGNAMES=(' black ' ' red ' ' green ' ' yellow' ' blue ' 'magenta' ' cyan ' ' white ') +BGNAMES=('DFT' 'BLK' 'RED' 'GRN' 'YEL' 'BLU' 'MAG' 'CYN' 'WHT') + +echo " ┌──────────────────────────────────────────────────────────────────────────┐" +for b in {0..8}; do + ((b>0)) && bg=$((b+39)) + + echo -en "\033[0m ${BGNAMES[b]} │ " + + for f in {0..7}; do + echo -en "\033[${bg}m\033[$((f+30))m ${FGNAMES[f]} " + done + + echo -en "\033[0m │" + echo -en "\033[0m\n\033[0m │ " + + for f in {0..7}; do + echo -en "\033[${bg}m\033[1;$((f+30))m ${FGNAMES[f]} " + done + + echo -en "\033[0m │" + echo -e "\033[0m" + + ((b<8)) && + echo " ├──────────────────────────────────────────────────────────────────────────┤" +done +echo " └──────────────────────────────────────────────────────────────────────────┘" diff --git a/cli/term_color_3.sh b/cli/term_color_3.sh new file mode 100755 index 0000000..85b499a --- /dev/null +++ b/cli/term_color_3.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +# Original: http://frexx.de/xterm-256-notes/ +# http://frexx.de/xterm-256-notes/data/colortable16.sh +# Modified by Aaron Griffin +# and further by Kazuo Teramoto + + +FGNAMES=(' black ' ' red ' ' green ' ' yellow' ' blue ' 'magenta' ' cyan ' ' white ') +BGNAMES=('DFT' 'BLK' 'RED' 'GRN' 'YEL' 'BLU' 'MAG' 'CYN' 'WHT') +echo " ----------------------------------------------------------------------------" +for b in $(seq 0 8); do + if [ "$b" -gt 0 ]; then + bg=$(($b+39)) + fi + + echo -en "\033[0m ${BGNAMES[$b]} : " + for f in $(seq 0 7); do + echo -en "\033[${bg}m\033[$(($f+30))m ${FGNAMES[$f]} " + done + echo -en "\033[0m :" + + echo -en "\033[0m\n\033[0m : " + for f in $(seq 0 7); do + echo -en "\033[${bg}m\033[1;$(($f+30))m ${FGNAMES[$f]} " + done + echo -en "\033[0m :" + echo -e "\033[0m" + + if [ "$b" -lt 8 ]; then + echo " ----------------------------------------------------------------------------" + fi +done +echo " ----------------------------------------------------------------------------" diff --git a/cli/unzip-gbk.py b/cli/unzip-gbk.py new file mode 100755 index 0000000..423e10f --- /dev/null +++ b/cli/unzip-gbk.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# unzip-gbk.py +# +# http://note.ninehills.info/linux-gbk.html +# + +import os +import sys +import zipfile + +print "Processing File " + sys.argv[1] + +file=zipfile.ZipFile(sys.argv[1],"r"); +for name in file.namelist(): + utf8name=name.decode('gbk') + print "Extracting " + utf8name + pathname = os.path.dirname(utf8name) + if not os.path.exists(pathname) and pathname!= "": + os.makedirs(pathname) + data = file.read(name) + if not os.path.exists(utf8name): + fo = open(utf8name, "w") + fo.write(data) + fo.close +file.close() diff --git a/cli/vimpager b/cli/vimpager new file mode 100755 index 0000000..447fd9a --- /dev/null +++ b/cli/vimpager @@ -0,0 +1,85 @@ +#!/bin/sh + +# Script for using ViM as a PAGER. +# Based on Bram's less.sh. +# Version 1.3 + +file="$@" +if [ -z "$file" ]; then file="-"; fi + +if uname -s | grep -iq cygwin; then + cygwin=1 +elif uname -s | grep -iq linux; then + linux=1 +elif uname -s | grep -iq sunos; then + solaris=1 +else + bsd=1 +fi + +less_vim() { + vim -R \ + -c 'let no_plugin_maps = 1' \ + -c 'set scrolloff=999' \ + -c 'runtime! macros/less.vim' \ + -c 'set foldlevel=999' \ + -c 'set mouse=h' \ + -c 'set nonu' \ + -c 'nmap <ESC>u :nohlsearch<cr>' \ + "$@" +} + +do_ps() { + if [ $solaris ]; then + ps -u `id -u` -o pid,comm= + elif [ $bsd ]; then + ps -U `id -u` -o pid,comm= + else + ps fuxw + fi +} + +pproc() { + if [ $linux ]; then + ps -p $1 -o comm= + elif [ $cygwin ]; then + ps -p $1 | sed -e 's/^I/ /' | grep -v PID + else + ps -p $1 -o comm= | grep -v PID + fi +} + +ppid() { + if [ $linux ]; then + ps -p $1 -o ppid= + elif [ $cygwin ]; then + ps -p $1 | sed -e 's/^I/ /' | grep -v PID | awk '{print $2}' + else + ps -p $1 -o ppid= | grep -v PID + fi +} + +# Check if called from man, perldoc or pydoc +if do_ps | grep -q '\(py\(thon\|doc\)\|man\|perl\(doc\)\?\([0-9.]*\)\?\)\>'; then + proc=$$ + while next_parent=`ppid $proc` && [ $next_parent != 1 ]; do + if pproc $next_parent | grep -q 'man\>'; then + cat $file | sed -e 's/\[[^m]*m//g' | sed -e 's/.//g' | less_vim -c 'set ft=man' -; exit + elif pproc $next_parent | grep -q 'py\(thon\|doc\)\>'; then + cat $file | sed -e 's/\[[^m]*m//g' | sed -e 's/.//g' | less_vim -c 'set ft=man' -; exit + elif pproc $next_parent | grep -q 'perl\(doc\)\?\([0-9.]*\)\?\>'; then + cat $file | sed -e 's/.//g' | less_vim -c 'set ft=man' -; exit + fi + proc=$next_parent + done +fi + +less_vim "$file" + +# CONTRIBUTORS: +# +# Rafael Kitover +# Antonio Ospite +# Jean-Marie Gaillourdet +# Perry Hargrave +# Koen Smits diff --git a/cluster/kMeans.py b/cluster/kMeans.py new file mode 100644 index 0000000..f4868c6 --- /dev/null +++ b/cluster/kMeans.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Credit: Machine Learning in Action: Chapter 10 +# +# Aaron LI +# 2015/06/23 +# + +""" +k-means clustering algorithm +""" + + +import numpy as np + + +def loadDataSet(fileName): + dataMat = [] + fr = open(fileName) + for line in fr.readlines(): + curLine = line.strip().split('\t') + fltLine = list(map(float, curLine)) + dataMat.append(fltLine) + return np.array(dataMat) + + +def distEclud(vecA, vecB): + return np.sqrt(np.sum(np.power(vecA - vecB, 2))) + + +def randCent(dataSet, k): + n = np.shape(dataSet)[1] + centroids = np.zeros((k, n)) + for j in range(n): + minJ = np.min(dataSet[:, j]) + rangeJ = float(np.max(dataSet[:, j]) - minJ) + centroids[:, j] = minJ + rangeJ * np.random.rand(k) + return centroids + + +def kMeans(dataSet, k, distMeas=distEclud, createCent=randCent): + m = np.shape(dataSet)[0] + clusterAssment = np.zeros((m, 2)) + centroids = createCent(dataSet, k) + clusterChanged = True + iterations = 0 + while clusterChanged: + clusterChanged = False + iterations += 1 + for i in range(m): + minDist = np.inf + minIndex = -1 + # to find the nearest centroid + for j in range(k): + distJI = distMeas(centroids[j, :], dataSet[i, :]) + if distJI < minDist: + minDist = distJI + minIndex = j + if clusterAssment[i, 0] != minIndex: + clusterChanged = True + clusterAssment[i, :] = minIndex, minDist**2 + #print(centroids) + for cent in range(k): + ptsInClust = dataSet[np.nonzero(clusterAssment[:, 0] == cent)] + centroids[cent, :] = np.mean(ptsInClust, axis=0) + result = { + 'k': k, + 'centroids': centroids, + 'labels': clusterAssment[:, 0].astype(int), + 'distance2': clusterAssment[:, 1], + 'accessment': clusterAssment, + 'iterations': iterations + } + return result + diff --git a/img/force_field_transform.jl b/img/force_field_transform.jl new file mode 100644 index 0000000..1b3872a --- /dev/null +++ b/img/force_field_transform.jl @@ -0,0 +1,135 @@ +#!/usr/bin/env julia +# -*- coding: utf-8 -*- +# +# Force field transform +# +# Aaron LI +# 2015/07/14 +# + +using FITSIO; +#include("../julia/ndgrid.jl"); + +@everywhere function meshgrid(vx, vy) + m, n = length(vy), length(vx) + vx = reshape(vx, 1, n) + vy = reshape(vy, m, 1) + (repmat(vx, m, 1), repmat(vy, 1, n)) +end + + +# Calculate the forces between the specified point with respect to the image. +@everywhere function force(p0, img) + img = copy(img); + x0, y0 = p0; + v0 = img[y0, x0]; + img[y0, x0] = 0.0; + rows, cols = size(img); + x, y = meshgrid(1:cols, 1:rows); + x[y0, x0] = -1; + y[y0, x0] = -1; + f_x = v0 .* img .* (x-x0) ./ ((x-x0).^2 + (y-y0).^2).^1.5; + f_y = v0 .* img .* (y-y0) ./ ((x-x0).^2 + (y-y0).^2).^1.5; + #return (f_x, f_y); + return (sum(f_x), sum(f_y)); +end + + +# Perform the "force field transform" for the input image. +# +# Return: +# (amplitudes, angles) +# amplitudes: the amplitudes of the resulting forces of each pixel +# angles: the directions of the resulting forces of each pixel, +# in unit radian. +@everywhere function force_field_transform_serial(img, rowstart=1, rowend="end") + rows, cols = size(img) + if rowend == "end" + rowend = rows + end + amplitudes = zeros(rows, cols) + angles = zeros(rows, cols) + t0 = time() + t_p = t0 + 30 # in 30 seconds + for y = rowstart:rowend + for x = 1:cols + t1 = time() + if (t1 >= t_p) + percent = 100*((y-rowstart)*cols + x+1) / ((rowend-rowstart+1)*cols) + @printf("Worker #%d: progress: %.3f%%; %.1f min\n", + myid(), percent, (t1-t0)/60.0) + t_p += 30 # in 30 seconds + end + F_x, F_y = force((x, y), img) + #@printf("F_x, F_y = (%f, %f)\n", F_x, F_y); + amplitudes[y, x] = sqrt(F_x^2 + F_y^2) + angles[y, x] = atan2(F_y, F_x) + end + end + t1 = time() + @printf("Worker #%d: finished in %.1f min!\n", myid(), (t1-t0)/60.0) + return (amplitudes, angles) +end + + +# parallel-capable +function force_field_transform(img) + t0 = time() + rows, cols = size(img) + np = nprocs() + amplitudes = cell(np) + angles = cell(np) + # split rows for each process + rows_chunk = div(rows, np) + rowstart = cell(np) + rowend = cell(np) + @sync begin + for p = 1:np + rowstart[p] = 1 + rows_chunk * (p-1) + if p == np + rowend[p] = rows + else + rowend[p] = rowstart[p] + rows_chunk - 1 + end + # perform transform + @async begin + amplitudes[p], angles[p] = remotecall_fetch(p, + force_field_transform_serial, + img, rowstart[p], rowend[p]) + end + end + end + t1 = time() + @printf("Finished in %.1f min!\n", (t1-t0)/60.0) + return (sum(amplitudes), sum(angles)) +end + + +# arguments +#println(ARGS); +if length(ARGS) != 3 + println("Usage: PROG <input_fits_img> <out_fits_amplitudes> <out_fits_angles>"); + exit(1); +end + +infile = ARGS[1]; +outfile_ampl = ARGS[2]; +outfile_angles = ARGS[3]; + +fits_img = FITS(infile); +img = read(fits_img[1]); +header = read_header(fits_img[1]); + +# perform force field transform +ampl, angles = force_field_transform(img); + +outfits_ampl = FITS(outfile_ampl, "w"); +outfits_angles = FITS(outfile_angles, "w"); +write(outfits_ampl, ampl; header=header); +write(outfits_angles, angles; header=header); + +close(fits_img); +close(outfits_ampl); +close(outfits_angles); + +#= vim: set ts=8 sw=4 tw=0 fenc=utf-8 ft=julia: =# diff --git a/img/force_field_transform.py b/img/force_field_transform.py new file mode 100644 index 0000000..2b185c8 --- /dev/null +++ b/img/force_field_transform.py @@ -0,0 +1,126 @@ +# -*- coding: utf -*- +# +# Force field transform (Hurley et al., 2002, 2005) +# + +""" +Force field transform +""" + +import sys +import time +import numpy as np + + +def force(p1, p2): + """ + The force between two points of the image. + + Arguments: + p1, p2: (value, x, y) + + Return: + # (force, angle): value and direction of the force. + # angle: (-pi, pi], with respect to p1. + (f_x, f_y): x and y components of the force + """ + v1, x1, y1 = p1 + v2, x2, y2 = p2 + #force = v1*v2 / ((x1-x2)**2 + (y1-y2)**2) + #angle = np.atan2(y2-y1, x2-x1) + #return (force, angle) + f_x = v1 * v2 * (x2-x1) / ((x2-x1)**2 + (y2-y1)**2)**1.5 + f_y = v1 * v2 * (y2-y1) / ((x2-x1)**2 + (y2-y1)**2)**1.5 + return (f_x, f_y) + + +def force_array(p0, img): + """ + The forces between the input point with respect to the image. + + Arguments: + p0: (x, y), note (x, y) start with zero. + img: input image, a numpy array + + Return: + (f_x, f_y): x and y components of the forces of the same size + of the input image + """ + x0, y0 = p0 + v0 = img[y0, x0] + img[y0, x0] = 0.0 + x, y = np.meshgrid(range(img.shape[1]), range(img.shape[0])) + x[y0, x0] = -1 + y[y0, x0] = -1 + f_x = v0 * img * (x-x0) / ((x-x0)**2 + (y-y0)**2)**1.5 + f_y = v0 * img * (y-y0) / ((x-x0)**2 + (y-y0)**2)**1.5 + return (f_x, f_y) + + +def vector_add(v1, v2): + """ + Add two vectors and return the results. + + Arguments: + v1, v2: two input vectors of format (f_x, f_y) + + Return: + (F_x, F_y) + """ + f1_x, f1_y = v1 + f2_x, f2_y = v2 + return (f1_x+f2_x, f1_y+f2_y) + + +def force_summation(pixel, img): + """ + Calculate the resulting force of the specified pixel with respect to + the image. + + Argument: + pixel: the position (x, y) of the pixel to be calculated + img: the input image + + Return: + (F_x, F_y): x and y components of the resulting force. + """ + img = np.array(img) + x0, y0 = pixel + f_x, f_y = force_array((x0, y0), img) + return (f_x.sum(), f_y.sum()) + + +def force_field_transform(img): + """ + Perform the "force field transform" on the input image. + + Arguments: + img: input 2D image + + Return: + (amplitudes, angles) + amplitudes: the amplitudes of the resulting forces of each pixel + angles: the directions of the resulting forces of each pixel, + in unit radian. + """ + img = np.array(img) + amplitudes = np.zeros(img.shape) + angles = np.zeros(img.shape) + rows, cols = img.shape + t0 = time.time() + t_p = t0 + 30 # in 30 seconds + for y in range(rows): + for x in range(cols): + t1 = time.time() + if t1 >= t_p: + percent = 100 * (y*cols + x + 1) / (rows * cols) + print("progress: %.3f%%; %.1f min" % (percent, (t1-t0)/60.0), + file=sys.stderr) + t_p += 30 # in 30 seconds + f_x, f_y = force_array((x, y), img) + F_x, F_y = f_x.sum(), f_y.sum() + amplitudes[y, x] = np.sqrt(F_x**2 + F_y**2) + angles[y, x] = np.math.atan2(F_y, F_x) + return (amplitudes, angles) + + diff --git a/img/force_field_transform_fft.jl b/img/force_field_transform_fft.jl new file mode 100644 index 0000000..c5bf905 --- /dev/null +++ b/img/force_field_transform_fft.jl @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +# +# To do force field transform using FFT +# +# Aaron LI +# 2015/07/16 +# + +function forcefieldtransform_fft(img) + rows, cols = size(img) + pic = zeros(3*rows, 3*cols) + pic[1:rows, 1:cols] = img + # unit force field + unit_ff = complex(zeros(3*rows, 3*cols)) + for r = 1:(2*rows-1) + for c = 1:(2*cols) + d = (rows+cols*im) - (r+c*im) + if (r, c) == (rows, cols) + unit_ff[r, c] = 0 + 0im + else + unit_ff[r, c] = d / abs(d)^3 + end + end + end + # FIXME matrix sizes + ff = sqrt(rows*cols) * ifft(fft(pic) .* fft(unit_ff)) + #ff_crop = ff[rows:2*rows, cols:2*cols] +end + diff --git a/img/forcefield.jl b/img/forcefield.jl new file mode 100644 index 0000000..bf2c236 --- /dev/null +++ b/img/forcefield.jl @@ -0,0 +1,87 @@ +# -*- coding: utf-8 -*- +# +# Force field transform with specified size of mask. +# +# Aaron LI +# 2015/07/19 +# + +# Make the specified sized force field mask. +# NOTE: the number of rows and cols must be odd. +function ff_mask(rows=5, cols=5) + rows % 2 == 1 || error("rows must be odd number") + cols % 2 == 1 || error("cols must be odd number") + mask = complex(zeros(rows, cols)) + for r = range(-div(rows, 2), rows) + for c = range(-div(cols, 2), cols) + i, j = r + div(rows+1, 2), c + div(cols+1, 2) + #@printf("(r,c) = (%d,%d); (i,j) = (%d,%d)\n", r, c, i, j) + d = c + r*im + if abs(d) < 1e-8 + mask[i, j] = 0.0 + else + mask[i, j] = d / abs(d)^3 + end + end + end + return mask / sum(abs(mask)) +end + + +# Padding image by specified number of rows and cols. +# Default padding mode: mirror +function pad_image(img, pad_rows, pad_cols, mode="mirror") + rows, cols = size(img) + rows_new, cols_new = rows + 2*pad_rows, cols + 2*pad_cols + img_pad = zeros(rows_new, cols_new) + img_pad[(pad_rows+1):(pad_rows+rows), (pad_cols+1):(pad_cols+cols)] = img + for r = 1:rows_new + for c = 1:cols_new + if mode == "mirror" + if r <= pad_rows + r_mirror = 2*(pad_rows+1) - r + elseif r <= pad_rows+rows + r_mirror = r + else + r_mirror = 2*(pad_rows+rows) - r + end + if c <= pad_cols + c_mirror = 2*(pad_cols+1) - c + elseif c <= pad_cols+cols + c_mirror = c + else + c_mirror = 2*(pad_cols+cols) - c + end + if (r_mirror, c_mirror) != (r, c) + #@printf("(%d,%d) <= (%d,%d)\n", r, c, r_mirror, c_mirror) + img_pad[r, c] = img_pad[r_mirror, c_mirror] + end + else + error("mode not supported") + end + end + end + return img_pad +end + + +# Perform force field transform for the image. +function ff_transform(img, mask, mode="mirror") + rows, cols = size(img) + mask_rows, mask_cols = size(mask) + pad_rows, pad_cols = div(mask_rows, 2), div(mask_cols, 2) + img_pad = pad_image(img, pad_rows, pad_cols) + # result images + ff_amplitudes = zeros(rows, cols) + ff_angles = zeros(rows, cols) + # calculate transformed values + for r = (pad_rows+1):(pad_rows+rows) + for c = (pad_cols+1):(pad_cols+cols) + force = sum(img_pad[r, c] * img_pad[(r-pad_rows):(r+pad_rows), (c-pad_cols):(c+pad_cols)] .* mask) + ff_amplitudes[r-pad_rows, c-pad_cols] = abs(force) + ff_angles[r-pad_rows, c-pad_cols] = angle(force) + end + end + return ff_amplitudes, ff_angles +end + diff --git a/img/png2gif.sh b/img/png2gif.sh new file mode 100644 index 0000000..1357be3 --- /dev/null +++ b/img/png2gif.sh @@ -0,0 +1,11 @@ +#!/bin/sh + +#convert ???.png -background white -alpha remove -resize 50% -layers optimize -delay 5 -loop 0 simu.gif + +[ ! -d gifs ] && mkdir gifs + +for f in ???.png; do + convert $f -trim -resize 50% gifs/${f%.png}.gif +done +gifsicle --delay=5 --loop --colors=256 --optimize=3 gifs/???.gif > simu.gif + diff --git a/julia/anisodiff.jl b/julia/anisodiff.jl new file mode 100644 index 0000000..e01f6db --- /dev/null +++ b/julia/anisodiff.jl @@ -0,0 +1,101 @@ +# -*- coding: utf-8 -*- +# +# ANISODIFF - Anisotropic diffusion +# +# Usage: +# diff = anisodiff(img, niter, kappa, lambda, option) +# +# Arguments: +# | img - input image (2D grayscale) +# | niter - number of iterations +# | kappa - conduction coefficient (gradient modulus threshold) +# | This parameter controls conduction as a function of gradient. +# | If kappa is low, small intensity gradients are able to block +# | conduction and hence diffusion across step edges. A large value +# | reduces the influence of intensity gradients on conduction. +# | lambda - integration constant for stability (0 <= lambda <= 0.25) +# | This parameter controls the diffusion speed, and you +# | usually want it at the maximum value of 0.25. +# | default value: 0.25 +# | option - conduction coefficient functions proposed by Perona & Malik: +# | 1: c(x, y, t) = exp(-(nablaI/kappa).^2) +# | privileges high-contrast edges over low-contrast ones +# | 2: c(x, y, t) = 1 ./ (1 + (nablaI/kappa).^2) +# | privileges wide regions over smaller ones +# | default value: 1 +# +# Returns: +# | diff - anisotropic diffused image +# +# Reference: +# [1] P. Perona and J. Malik. +# Scale-space and edge detection using ansotropic diffusion. +# IEEE Transactions on Pattern Analysis and Machine Intelligence, +# 12(7):629-639, July 1990. +# https://dx.doi.org/10.1109%2F34.56205 +# +# Credits: +# [1] Peter Kovesi +# pk@peterkovesi.com +# MATLAB and Octave Functions for Computer Vision and Image Processing +# http://www.peterkovesi.com/matlabfns/Spatial/anisodiff.m +# -- +# June 2000 original version +# March 2002 corrected diffusion eqn No 2. +# [2] Daniel Lopes +# Anisotropic Diffusion (Perona & Malik) +# http://www.mathworks.com/matlabcentral/fileexchange/14995-anisotropic-diffusion--perona---malik- +# +# +# Aaron LI <aaronly.me@gmail.com> +# 2015/07/17 +# + +include("calc_k_percentile.jl"); + +function anisodiff(img, niter, k=calc_k_percentile, lambda=0.25, option=1) + diff = float(img) + rows, cols = size(diff) + + for i = 1:niter + println("anisodiff - iteration: ", i) + + # Construct diffl which is the same as diff but + # has an extra padding of zeros around it. + diffl = zeros(rows+2, cols+2) + diffl[2:rows+1, 2:cols+1] = diff + + # North, South, East and West differences + deltaN = diffl[1:rows, 2:cols+1] - diff + deltaS = diffl[3:rows+2, 2:cols+1] - diff + deltaE = diffl[2:rows+1, 3:cols+2] - diff + deltaW = diffl[2:rows+1, 1:cols] - diff + + # Calculate the kappa + if isa(k, Function) + kappa = k(diff) + else + kappa = k + end + + println(" kappa: ", kappa) + + # Conduction + if option == 1 + cN = exp(-(deltaN/kappa).^2) + cS = exp(-(deltaS/kappa).^2) + cE = exp(-(deltaE/kappa).^2) + cW = exp(-(deltaW/kappa).^2) + elseif option == 2 + cN = 1 ./ (1 + (deltaN/kappa).^2) + cS = 1 ./ (1 + (deltaS/kappa).^2) + cE = 1 ./ (1 + (deltaE/kappa).^2) + cW = 1 ./ (1 + (deltaW/kappa).^2) + end + + diff += lambda * (cN.*deltaN + cS.*deltaS + cE.*deltaE + cW.*deltaW) + end + + return diff +end + diff --git a/julia/calc_k_percentile.jl b/julia/calc_k_percentile.jl new file mode 100644 index 0000000..36b15e1 --- /dev/null +++ b/julia/calc_k_percentile.jl @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- +# +# Calculate the percentile of the gradient image, which +# used as the 'kappa' parameter of the anisotropic diffusion. +# +# Credits: +# [1] KAZE: nldiffusion_functions.cpp / compute_k_percentile() +# +# Aaron LI +# 2015/07/20 +# + +include("scharr.jl"); + +function calc_k_percentile(img, percent=0.7, nbins=300) + rows, cols = size(img) + # derivatives of the image + img_gx = scharr(img, 1, 0) + img_gy = scharr(img, 0, 1) + img_modg = sqrt(img_gx.^2 + img_gy.^2) + # histogram + hmax = maximum(img_modg) + hist_e, hist_counts = hist(reshape(img_modg, length(img_modg)), nbins) + hist_cum = cumsum(hist_counts) + # find the percent of the histogram percentile + npoints = sum(img_modg .> 0.0) + nthreshold = npoints * percent + k = sum(hist_cum .<= nthreshold) + kperc = (k == length(hist_cum)) ? 0.03 : (hmax * k / nbins) + return kperc +end + diff --git a/julia/forcefield.jl b/julia/forcefield.jl new file mode 100644 index 0000000..bf2c236 --- /dev/null +++ b/julia/forcefield.jl @@ -0,0 +1,87 @@ +# -*- coding: utf-8 -*- +# +# Force field transform with specified size of mask. +# +# Aaron LI +# 2015/07/19 +# + +# Make the specified sized force field mask. +# NOTE: the number of rows and cols must be odd. +function ff_mask(rows=5, cols=5) + rows % 2 == 1 || error("rows must be odd number") + cols % 2 == 1 || error("cols must be odd number") + mask = complex(zeros(rows, cols)) + for r = range(-div(rows, 2), rows) + for c = range(-div(cols, 2), cols) + i, j = r + div(rows+1, 2), c + div(cols+1, 2) + #@printf("(r,c) = (%d,%d); (i,j) = (%d,%d)\n", r, c, i, j) + d = c + r*im + if abs(d) < 1e-8 + mask[i, j] = 0.0 + else + mask[i, j] = d / abs(d)^3 + end + end + end + return mask / sum(abs(mask)) +end + + +# Padding image by specified number of rows and cols. +# Default padding mode: mirror +function pad_image(img, pad_rows, pad_cols, mode="mirror") + rows, cols = size(img) + rows_new, cols_new = rows + 2*pad_rows, cols + 2*pad_cols + img_pad = zeros(rows_new, cols_new) + img_pad[(pad_rows+1):(pad_rows+rows), (pad_cols+1):(pad_cols+cols)] = img + for r = 1:rows_new + for c = 1:cols_new + if mode == "mirror" + if r <= pad_rows + r_mirror = 2*(pad_rows+1) - r + elseif r <= pad_rows+rows + r_mirror = r + else + r_mirror = 2*(pad_rows+rows) - r + end + if c <= pad_cols + c_mirror = 2*(pad_cols+1) - c + elseif c <= pad_cols+cols + c_mirror = c + else + c_mirror = 2*(pad_cols+cols) - c + end + if (r_mirror, c_mirror) != (r, c) + #@printf("(%d,%d) <= (%d,%d)\n", r, c, r_mirror, c_mirror) + img_pad[r, c] = img_pad[r_mirror, c_mirror] + end + else + error("mode not supported") + end + end + end + return img_pad +end + + +# Perform force field transform for the image. +function ff_transform(img, mask, mode="mirror") + rows, cols = size(img) + mask_rows, mask_cols = size(mask) + pad_rows, pad_cols = div(mask_rows, 2), div(mask_cols, 2) + img_pad = pad_image(img, pad_rows, pad_cols) + # result images + ff_amplitudes = zeros(rows, cols) + ff_angles = zeros(rows, cols) + # calculate transformed values + for r = (pad_rows+1):(pad_rows+rows) + for c = (pad_cols+1):(pad_cols+cols) + force = sum(img_pad[r, c] * img_pad[(r-pad_rows):(r+pad_rows), (c-pad_cols):(c+pad_cols)] .* mask) + ff_amplitudes[r-pad_rows, c-pad_cols] = abs(force) + ff_angles[r-pad_rows, c-pad_cols] = angle(force) + end + end + return ff_amplitudes, ff_angles +end + diff --git a/julia/ndgrid.jl b/julia/ndgrid.jl new file mode 100644 index 0000000..688a246 --- /dev/null +++ b/julia/ndgrid.jl @@ -0,0 +1,52 @@ +# This file is a part of Julia. License is MIT: http://julialang.org/license + +ndgrid(v::AbstractVector) = copy(v) + +function ndgrid{T}(v1::AbstractVector{T}, v2::AbstractVector{T}) + m, n = length(v1), length(v2) + v1 = reshape(v1, m, 1) + v2 = reshape(v2, 1, n) + (repmat(v1, 1, n), repmat(v2, m, 1)) +end + +function ndgrid_fill(a, v, s, snext) + for j = 1:length(a) + a[j] = v[div(rem(j-1, snext), s)+1] + end +end + +function ndgrid{T}(vs::AbstractVector{T}...) + n = length(vs) + sz = map(length, vs) + out = ntuple(i->Array(T, sz), n) + s = 1 + for i=1:n + a = out[i]::Array + v = vs[i] + snext = s*size(a,i) + ndgrid_fill(a, v, s, snext) + s = snext + end + out +end + +meshgrid(v::AbstractVector) = meshgrid(v, v) + +function meshgrid{T}(vx::AbstractVector{T}, vy::AbstractVector{T}) + m, n = length(vy), length(vx) + vx = reshape(vx, 1, n) + vy = reshape(vy, m, 1) + (repmat(vx, m, 1), repmat(vy, 1, n)) +end + +function meshgrid{T}(vx::AbstractVector{T}, vy::AbstractVector{T}, + vz::AbstractVector{T}) + m, n, o = length(vy), length(vx), length(vz) + vx = reshape(vx, 1, n, 1) + vy = reshape(vy, m, 1, 1) + vz = reshape(vz, 1, 1, o) + om = ones(Int, m) + on = ones(Int, n) + oo = ones(Int, o) + (vx[om, :, oo], vy[:, on, oo], vz[om, on, :]) +end diff --git a/julia/scharr.jl b/julia/scharr.jl new file mode 100644 index 0000000..02daeb6 --- /dev/null +++ b/julia/scharr.jl @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +# +# Calculate the derivatives of an image using the Scharr operator +# of kernal size 3x3. +# +# References: +# [1] https://en.wikipedia.org/wiki/Sobel_operator +# [2] http://docs.opencv.org/doc/tutorials/imgproc/imgtrans/sobel_derivatives/sobel_derivatives.html +# +# Aaron LI +# 2015/07/20 +# + +# Calculate the derivatives of the image using the Scharr operator +# img - input image +# dx - order of the derivative x +# dy - order of the derivative y +function scharr(img, dx, dy) + rows, cols = size(img) + img_d = float(img) + (isa(dx, Int) && dx >= 0) || error("dx should be an integer >= 0") + (isa(dy, Int) && dy >= 0) || error("dy should be an integer >= 0") + # Scharr operator + Gy = [-3.0 -10.0 -3.0; 0.0 0.0 0.0; 3.0 10.0 3.0]; + Gx = Gy' + # calculate the derivatives using convolution + for i = 1:dx + img_d = conv2(img_d, Gx) + end + for i = 1:dy + img_d = conv2(img_d, Gy) + end + # FIXME: 'conv2' will increase the image size + rows_d, cols_d = size(img_d) + return img_d[(div(rows_d-rows, 2)+1):(div(rows_d-rows, 2)+rows), (div(cols_d-cols, 2)+1):(div(cols_d-cols, 2)+cols)] +end + diff --git a/matlab/radialpsd.m b/matlab/radialpsd.m new file mode 100644 index 0000000..cff4f9e --- /dev/null +++ b/matlab/radialpsd.m @@ -0,0 +1,83 @@ +% +% radialpsd - to calculate the radial power spectrum density +% of the given 2d image +% +% Credits: +% [1] Evan Ruzanski +% Radially averaged power spectrum of 2D real-valued matrix +% https://www.mathworks.com/matlabcentral/fileexchange/23636-radially-averaged-power-spectrum-of-2d-real-valued-matrix +% +% Arguments: +% img - input 2d image (grayscale) +% step - radius step between each consecutive two circles +% +% Return: +% psd - vector contains the power at each frequency +% psd_sdd - vector of the corresponding standard deviation +% + +function [psd, psd_std] = radialpsd(img, step) + [N M] = size(img) + + %% Compute power spectrum + imgf = fftshift(fft2(img)) + % Normalize by image size + imgfp = (abs(imgf) / (N*M)) .^ 2 + + %% Adjust PSD size: padding to make a square matrix + dimDiff = abs(N-M) + dimMax = max(N, M) + % To make square matrix + if N > M + % More rows than columns + if ~mod(dimDiff, 2) + % Even difference + % Pad columns to match dimension + imgfp = [NaN(N,dimDiff/2) imgfp NaN(N,dimDiff/2)] + else + % Odd difference + imgfp = [NaN(N,floor(dimDiff/2)) imgfp NaN(N,floor(dimDiff/2)+1)] + end + elseif N < M + % More columns than rows + if ~mod(dimDiff, 2) + % Even difference + % Pad rows to match dimensions + imgfp = [NaN(dimDiff/2,M); imgfp; NaN(dimDiff/2,M)] + else + % Pad rows to match dimensions + imgfp = [NaN(floor(dimDiff/2),M); imgfp; NaN(floor(dimDiff/2)+1,M)] + end + end + + % Only consider one half of spectrum (due to symmetry) + halfDim = floor(dimMax/2) + 1 + + %% Compute radially average power spectrum + % Make Cartesian grid + [X Y] = meshgrid(-dimMax/2:dimMax/2-1, -dimMax/2:dimMax/2-1) + % Convert to polar coordinate axes + [theta rho] = cart2pol(X, Y) + rho = round(rho) + i = cell(floor(dimMax/2)+1, 1) + for r = 0:floor(dimMax/2) + i{r+1} = find(rho == r) + end + % calculate the radial mean power and its standard deviation + Pf = zeros(2, floor(dimMax/2)+1) + for r = 0:floor(dimMax/2) + Pf(1, r+1) = nanmean(imgfp(i{r+1})) + Pf(2, r+1) = nanstd(imgfp(i{r+1})) + end + + % adapt to the given step size + psd = zeros(1, floor(size(Pf, 2) / step)) + psd_std = zeros(size(psd)) + for k = 1:length(psd) + psd(i) = mean(Pf(1, (k*step-step+1):(k*step))) + % approximately calculate the merged standard deviation + psd_std(i) = sqrt(mean(Pf(2, (k*step-step+1):(k*step)) .^ 2)) + end +end + +% vim: set ts=8 sw=4 tw=0 fenc=utf-8 ft=matlab: % diff --git a/python/adjust_spectrum_error.py b/python/adjust_spectrum_error.py new file mode 100755 index 0000000..0f80ec7 --- /dev/null +++ b/python/adjust_spectrum_error.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +Squeeze the spectrum according to the grouping specification, then +calculate the statistical errors for each group, and apply error +adjustments (e.g., incorporate the systematic uncertainties). +""" + +__version__ = "0.1.0" +__date__ = "2016-01-11" + + +import sys +import argparse + +import numpy as np +from astropy.io import fits + + +class Spectrum: + """ + Spectrum class to keep spectrum information and perform manipulations. + """ + header = None + channel = None + counts = None + grouping = None + quality = None + + def __init__(self, specfile): + f = fits.open(specfile) + spechdu = f['SPECTRUM'] + self.header = spechdu.header + self.channel = spechdu.data.field('CHANNEL') + self.counts = spechdu.data.field('COUNTS') + self.grouping = spechdu.data.field('GROUPING') + self.quality = spechdu.data.field('QUALITY') + f.close() + + def squeezeByGrouping(self): + """ + Squeeze the spectrum according to the grouping specification, + i.e., sum the counts belonging to the same group, and place the + sum as the first channel within each group with other channels + of counts zero's. + """ + counts_squeezed = [] + cnt_sum = 0 + cnt_num = 0 + first = True + for grp, cnt in zip(self.grouping, self.counts): + if first and grp == 1: + # first group + cnt_sum = cnt + cnt_num = 1 + first = False + elif grp == 1: + # save previous group + counts_squeezed.append(cnt_sum) + counts_squeezed += [ 0 for i in range(cnt_num-1) ] + # start new group + cnt_sum = cnt + cnt_num = 1 + else: + # group continues + cnt_sum += cnt + cnt_num += 1 + # last group + # save previous group + counts_squeezed.append(cnt_sum) + counts_squeezed += [ 0 for i in range(cnt_num-1) ] + self.counts_squeezed = np.array(counts_squeezed, dtype=np.int32) + + def calcStatErr(self, gehrels=False): + """ + Calculate the statistical errors for the grouped channels, + and save as the STAT_ERR column. + """ + idx_nz = np.nonzero(self.counts_squeezed) + stat_err = np.zeros(self.counts_squeezed.shape) + if gehrels: + # Gehrels + stat_err[idx_nz] = 1 + np.sqrt(self.counts_squeezed[idx_nz] + 0.75) + else: + stat_err[idx_nz] = np.sqrt(self.counts_squeezed[idx_nz]) + self.stat_err = stat_err + + @staticmethod + def parseSysErr(syserr): + """ + Parse the string format of syserr supplied in the commandline. + """ + items = map(str.strip, syserr.split(',')) + syserr_spec = [] + for item in items: + spec = item.split(':') + try: + spec = (int(spec[0]), int(spec[1]), float(spec[2])) + except: + raise ValueError("invalid syserr specficiation") + syserr_spec.append(spec) + return syserr_spec + + def applySysErr(self, syserr): + """ + Apply systematic error adjustments to the above calculated + statistical errors. + """ + syserr_spec = self.parseSysErr(syserr) + for lo, hi, se in syserr_spec: + err_adjusted = self.stat_err[(lo-1):(hi-1)] * np.sqrt(1+se) + self.stat_err_adjusted = err_adjusted + + def updateHeader(self): + """ + Update header accordingly. + """ + # POISSERR + self.header['POISSERR'] = False + + def write(self, filename, clobber=False): + """ + Write the updated/modified spectrum block to file. + """ + channel_col = fits.Column(name='CHANNEL', format='J', + array=self.channel) + counts_col = fits.Column(name='COUNTS', format='J', + array=self.counts_squeezed) + stat_err_col = fits.Column(name='STAT_ERR', format='D', + array=self.stat_err_adjusted) + grouping_col = fits.Column(name='GROUPING', format='I', + array=self.grouping) + quality_col = fits.Column(name='QUALITY', format='I', + array=self.quality) + spec_cols = fits.ColDefs([channel_col, counts_col, stat_err_col, + grouping_col, quality_col]) + spechdu = fits.BinTableHDU.from_columns(spec_cols, header=self.header) + spechdu.writeto(filename, clobber=clobber) + + +def main(): + parser = argparse.ArgumentParser( + description="Apply systematic error adjustments to spectrum.") + parser.add_argument("-V", "--version", action="version", + version="%(prog)s " + "%s (%s)" % (__version__, __date__)) + parser.add_argument("infile", help="input spectrum file") + parser.add_argument("outfile", help="output adjusted spectrum file") + parser.add_argument("-e", "--syserr", dest="syserr", required=True, + help="systematic error specification; " + \ + "syntax: ch1low:ch1high:syserr1,...") + parser.add_argument("-C", "--clobber", dest="clobber", + action="store_true", help="overwrite output file if exists") + parser.add_argument("-G", "--gehrels", dest="gehrels", + action="store_true", help="use Gehrels error?") + args = parser.parse_args() + + spec = Spectrum(args.infile) + spec.squeezeByGrouping() + spec.calcStatErr(gehrels=args.gehrels) + spec.applySysErr(syserr=args.syserr) + spec.updateHeader() + spec.write(args.outfile, clobber=args.clobber) + + +if __name__ == "__main__": + main() + + +# vim: set ts=4 sw=4 tw=0 fenc=utf-8 ft=python: # diff --git a/python/calc_radial_psd.py b/python/calc_radial_psd.py new file mode 100755 index 0000000..23bd819 --- /dev/null +++ b/python/calc_radial_psd.py @@ -0,0 +1,450 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Credit: +# [1] Radially averaged power spectrum of 2D real-valued matrix +# Evan Ruzanski +# 'raPsd2d.m' +# https://www.mathworks.com/matlabcentral/fileexchange/23636-radially-averaged-power-spectrum-of-2d-real-valued-matrix +# +# XXX: +# * If the input image is NOT SQUARE; then are the horizontal frequencies +# the same as the vertical frequencies ?? +# +# Aaron LI <aaronly.me@gmail.com> +# Created: 2015-04-22 +# Updated: 2016-04-28 +# +# Changelog: +# 2016-04-28: +# * Fix wrong meshgrid with respect to the shift zero-frequency component +# * Use "numpy.fft" instead of "scipy.fftpack" +# * Split method "pad_square()" from "calc_radial_psd()" +# * Hide numpy warning when dividing by zero +# * Add method "AstroImage.fix_shapes()" +# * Add support for background subtraction and exposure correction +# * Show verbose information during calculation +# * Add class "AstroImage" +# * Set default value for 'args.png' +# * Rename from 'radialPSD2d.py' to 'calc_radial_psd.py' +# 2016-04-26: +# * Adjust plot function +# * Update normalize argument; Add pixel argument +# 2016-04-25: +# * Update plot function +# * Add command line scripting support +# * Encapsulate the functions within class 'PSD' +# * Update docs/comments +# + +""" +Compute the radially averaged power spectral density (i.e., power spectrum). +""" + +__version__ = "0.5.0" +__date__ = "2016-04-28" + + +import sys +import os +import argparse + +import numpy as np +from astropy.io import fits + +import matplotlib.pyplot as plt +from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas +from matplotlib.figure import Figure + +plt.style.use("ggplot") + + +class PSD: + """ + Computes the 2D power spectral density and the radially averaged power + spectral density (i.e., 1D power spectrum). + """ + # 2D image data + img = None + # value and unit of 1 pixel for the input image + pixel = (None, None) + # whether to normalize the power spectral density by image size + normalize = True + # 2D power spectral density + psd2d = None + # 1D (radially averaged) power spectral density + freqs = None + psd1d = None + psd1d_err = None + + def __init__(self, img, pixel=(1.0, "pixel"), normalize=True): + self.img = img.astype(np.float) + self.pixel = pixel + self.normalize = normalize + + def calc_psd2d(self, verbose=False): + """ + Computes the 2D power spectral density of the given image. + Note that the low frequency components are shifted to the center + of the FFT'ed image. + + NOTE: + The zero-frequency component is shifted to position of index (0-based) + (ceil((n-1) / 2), ceil((m-1) / 2)), + where (n, m) are the number of rows and columns of the image/psd2d. + + Return: + 2D power spectral density, which is dimensionless if normalized, + otherwise has unit ${pixel_unit}^2. + """ + if verbose: + print("Calculating 2D power spectral density ... ", + end="", flush=True) + rows, cols = self.img.shape + ## Compute the power spectral density (i.e., power spectrum) + imgf = np.fft.fftshift(np.fft.fft2(self.img)) + if self.normalize: + norm = rows * cols * self.pixel[0]**2 + else: + norm = 1.0 # Do not normalize + self.psd2d = (np.abs(imgf) / norm) ** 2 + if verbose: + print("DONE", flush=True) + return self.psd2d + + def calc_radial_psd1d(self, verbose=False): + """ + Computes the radially averaged power spectral density from the + provided 2D power spectral density. + + Return: + (freqs, radial_psd, radial_psd_err) + freqs: spatial freqencies (unit: ${pixel_unit}^(-1)) + radial_psd: radially averaged power spectral density for each + frequency + radial_psd_err: standard deviations of each radial_psd + """ + if verbose: + print("Calculating radial (1D) power spectral density ... ", + end="", flush=True) + if verbose: + print("padding ... ", end="", flush=True) + psd2d = self.pad_square(self.psd2d, value=np.nan) + dim = psd2d.shape[0] + dim_half = (dim+1) // 2 + # NOTE: + # The zero-frequency component is shifted to position of index + # (0-based): (ceil((n-1) / 2), ceil((m-1) / 2)) + px = np.arange(dim_half-dim, dim_half) + x, y = np.meshgrid(px, px) + rho, phi = self.cart2pol(x, y) + rho = np.around(rho).astype(np.int) + radial_psd = np.zeros(dim_half) + radial_psd_err = np.zeros(dim_half) + if verbose: + print("radially averaging ... ", + end="", flush=True) + for r in range(dim_half): + # Get the indices of the elements satisfying rho[i,j]==r + ii, jj = (rho == r).nonzero() + # Calculate the mean value at a given radii + data = psd2d[ii, jj] + radial_psd[r] = np.nanmean(data) + radial_psd_err[r] = np.nanstd(data) + # Calculate frequencies + f = np.fft.fftfreq(dim, d=self.pixel[0]) + freqs = np.abs(f[:dim_half]) + # + self.freqs = freqs + self.psd1d = radial_psd + self.psd1d_err = radial_psd_err + if verbose: + print("DONE", end="", flush=True) + return (freqs, radial_psd, radial_psd_err) + + @staticmethod + def cart2pol(x, y): + """ + Convert Cartesian coordinates to polar coordinates. + """ + rho = np.sqrt(x**2 + y**2) + phi = np.arctan2(y, x) + return (rho, phi) + + @staticmethod + def pol2cart(rho, phi): + """ + Convert polar coordinates to Cartesian coordinates. + """ + x = rho * np.cos(phi) + y = rho * np.sin(phi) + return (x, y) + + @staticmethod + def pad_square(data, value=np.nan): + """ + Symmetrically pad the supplied data matrix to make it square. + The padding rows are equally added to the top and bottom, + as well as the columns to the left and right sides. + The padded rows/columns are filled with the specified value. + """ + mat = data.copy() + rows, cols = mat.shape + dim_diff = abs(rows - cols) + dim_max = max(rows, cols) + if rows > cols: + # pad columns + if dim_diff // 2 == 0: + cols_left = np.zeros((rows, dim_diff/2)) + cols_left[:] = value + cols_right = np.zeros((rows, dim_diff/2)) + cols_right[:] = value + mat = np.hstack((cols_left, mat, cols_right)) + else: + cols_left = np.zeros((rows, np.floor(dim_diff/2))) + cols_left[:] = value + cols_right = np.zeros((rows, np.floor(dim_diff/2)+1)) + cols_right[:] = value + mat = np.hstack((cols_left, mat, cols_right)) + elif rows < cols: + # pad rows + if dim_diff // 2 == 0: + rows_top = np.zeros((dim_diff/2, cols)) + rows_top[:] = value + rows_bottom = np.zeros((dim_diff/2, cols)) + rows_bottom[:] = value + mat = np.vstack((rows_top, mat, rows_bottom)) + else: + rows_top = np.zeros((np.floor(dim_diff/2), cols)) + rows_top[:] = value + rows_bottom = np.zeros((np.floor(dim_diff/2)+1, cols)) + rows_bottom[:] = value + mat = np.vstack((rows_top, mat, rows_bottom)) + return mat + + def plot(self, ax=None, fig=None): + """ + Make a plot of the radial (1D) PSD with matplotlib. + """ + if ax is None: + fig, ax = plt.subplots(1, 1) + # + xmin = self.freqs[1] / 1.2 # ignore the first 0 + xmax = self.freqs[-1] + ymin = np.nanmin(self.psd1d) / 10.0 + ymax = np.nanmax(self.psd1d + self.psd1d_err) + # + eb = ax.errorbar(self.freqs, self.psd1d, yerr=self.psd1d_err, + fmt="none") + ax.plot(self.freqs, self.psd1d, "ko") + ax.set_xscale("log") + ax.set_yscale("log") + ax.set_xlim(xmin, xmax) + ax.set_ylim(ymin, ymax) + ax.set_title("Radially Averaged Power Spectral Density") + ax.set_xlabel(r"k (%s$^{-1}$)" % self.pixel[1]) + if self.normalize: + ax.set_ylabel("Power") + else: + ax.set_ylabel(r"Power (%s$^2$)" % self.pixel[1]) + fig.tight_layout() + return (fig, ax) + + +class AstroImage: + """ + Manipulate the astronimcal counts image, as well as the corresponding + exposure map and background map. + """ + # input counts image + image = None + # exposure map with respect to the input counts image + expmap = None + # background map (e.g., stowed background) + bkgmap = None + # exposure time of the input image + exposure = None + # exposure time of the background map + exposure_bkg = None + + def __init__(self, image, expmap=None, bkgmap=None, verbose=False): + self.load_image(image, verbose=verbose) + self.load_expmap(expmap, verbose=verbose) + self.load_bkgmap(bkgmap, verbose=verbose) + + def load_image(self, image, verbose=False): + if verbose: + print("Loading image ... ", end="", flush=True) + with fits.open(image) as imgfits: + self.image = imgfits[0].data.astype(np.float) + self.exposure = imgfits[0].header["EXPOSURE"] + if verbose: + print("DONE", flush=True) + + def load_expmap(self, expmap, verbose=False): + if expmap: + if verbose: + print("Loading exposure map ... ", end="", flush=True) + with fits.open(expmap) as imgfits: + self.expmap = imgfits[0].data.astype(np.float) + if verbose: + print("DONE", flush=True) + + def load_bkgmap(self, bkgmap, verbose=False): + if bkgmap: + if verbose: + print("Loading background map ... ", end="", flush=True) + with fits.open(bkgmap) as imgfits: + self.bkgmap = imgfits[0].data.astype(np.float) + self.exposure_bkg = imgfits[0].header["EXPOSURE"] + if verbose: + print("DONE", flush=True) + + def fix_shapes(self, tolerance=2, verbose=False): + """ + Fix the shapes of self.expmap and self.bkgmap to make them have + the same shape as the self.image. + + NOTE: + * if the image is bigger than the reference image, then its + columns on the right and rows on the botton are clipped; + * if the image is smaller than the reference image, then padding + columns on the right and rows on the botton are added. + * Original images are REPLACED! + + Arguments: + * tolerance: allow absolute difference between images + """ + def _fix_shape(img, ref, tol=tolerance, verbose=verbose): + if img.shape == ref.shape: + if verbose: + print("SKIPPED", flush=True) + return img + elif np.allclose(img.shape, ref.shape, atol=tol): + if verbose: + print(img.shape, "->", ref.shape, flush=True) + rows, cols = img.shape + rows_ref, cols_ref = ref.shape + # rows + if rows > rows_ref: + img_fixed = img[:rows_ref, :] + else: + img_fixed = np.row_stack((img, + np.zeros((rows_ref-rows, cols), dtype=img.dtype))) + # columns + if cols > cols_ref: + img_fixed = img_fixed[:, :cols_ref] + else: + img_fixed = np.column_stack((img_fixed, + np.zeros((rows_ref, cols_ref-cols), dtype=img.dtype))) + return img_fixed + else: + raise ValueError("shape difference exceeds tolerance: " + \ + "(%d, %d) vs. (%d, %d)" % (img.shape + ref.shape)) + # + if self.bkgmap is not None: + if verbose: + print("Fixing shape for bkgmap ... ", end="", flush=True) + self.bkgmap = _fix_shape(self.bkgmap, self.image) + if self.expmap is not None: + if verbose: + print("Fixing shape for expmap ... ", end="", flush=True) + self.expmap = _fix_shape(self.expmap, self.image) + + def subtract_bkg(self, verbose=False): + if verbose: + print("Subtracting background ... ", end="", flush=True) + self.image -= (self.bkgmap / self.exposure_bkg * self.exposure) + if verbose: + print("DONE", flush=True) + + def correct_exposure(self, cut=0.015, verbose=False): + """ + Correct the image for exposure by dividing by the expmap to + create the exposure-corrected image. + + Arguments: + * cut: the threshold percentage with respect to the maximum + exposure map value; and those pixels with lower values + than this threshold will be excluded/clipped (set to ZERO) + if set to None, then skip clipping image + """ + if verbose: + print("Correcting image for exposure ... ", end="", flush=True) + with np.errstate(divide="ignore", invalid="ignore"): + self.image /= self.expmap + # set invalid values to ZERO + self.image[ ~ np.isfinite(self.image) ] = 0.0 + if verbose: + print("DONE", flush=True) + if cut is not None: + # clip image according the exposure threshold + if verbose: + print("Clipping image (%s) ... " % cut, end="", flush=True) + threshold = cut * np.max(self.expmap) + self.image[ self.expmap < threshold ] = 0.0 + if verbose: + print("DONE", flush=True) + + +def main(): + parser = argparse.ArgumentParser( + description="Compute the radially averaged power spectral density", + epilog="Version: %s (%s)" % (__version__, __date__)) + parser.add_argument("-V", "--version", action="version", + version="%(prog)s " + "%s (%s)" % (__version__, __date__)) + parser.add_argument("-v", "--verbose", dest="verbose", + action="store_true", help="show verbose information") + parser.add_argument("-C", "--clobber", dest="clobber", + action="store_true", + help="overwrite the output files if already exist") + parser.add_argument("-i", "--infile", dest="infile", + required=True, help="input image") + parser.add_argument("-b", "--bkgmap", dest="bkgmap", default=None, + help="background map (for background subtraction)") + parser.add_argument("-e", "--expmap", dest="expmap", default=None, + help="exposure map (for exposure correction)") + parser.add_argument("-o", "--outfile", dest="outfile", + required=True, help="output file to store the PSD data") + parser.add_argument("-p", "--png", dest="png", default=None, + help="plot the PSD and save (default: same basename as outfile)") + args = parser.parse_args() + + if args.png is None: + args.png = os.path.splitext(args.outfile)[0] + ".png" + + # Check output files whether already exists + if (not args.clobber) and os.path.exists(args.outfile): + raise ValueError("outfile '%s' already exists" % args.outfile) + if (not args.clobber) and os.path.exists(args.png): + raise ValueError("output png '%s' already exists" % args.png) + + # Load image data + image = AstroImage(image=args.infile, expmap=args.expmap, + bkgmap=args.bkgmap, verbose=args.verbose) + image.fix_shapes(verbose=args.verbose) + if args.bkgmap: + image.subtract_bkg(verbose=args.verbose) + if args.expmap: + image.correct_exposure(verbose=args.verbose) + + # Calculate the power spectral density + psd = PSD(img=image.image, normalize=True) + psd.calc_psd2d(verbose=args.verbose) + freqs, psd1d, psd1d_err = psd.calc_radial_psd1d(verbose=args.verbose) + + # Write out PSD results + psd_data = np.column_stack((freqs, psd1d, psd1d_err)) + np.savetxt(args.outfile, psd_data, header="freqs psd1d psd1d_err") + + # Make and save a plot + fig = Figure(figsize=(10, 8)) + canvas = FigureCanvas(fig) + ax = fig.add_subplot(111) + psd.plot(ax=ax, fig=fig) + fig.savefig(args.png, format="png", dpi=150) + + +if __name__ == "__main__": + main() + diff --git a/python/crosstalk_deprojection.py b/python/crosstalk_deprojection.py new file mode 100755 index 0000000..d5bab05 --- /dev/null +++ b/python/crosstalk_deprojection.py @@ -0,0 +1,1808 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# References: +# [1] Definition of RMF and ARF file formats +# https://heasarc.gsfc.nasa.gov/docs/heasarc/caldb/docs/memos/cal_gen_92_002/cal_gen_92_002.html +# [2] The OGIP Spectral File Format +# https://heasarc.gsfc.nasa.gov/docs/heasarc/ofwg/docs/summary/ogip_92_007_summary.html +# [3] CIAO: Auxiliary Response File +# http://cxc.harvard.edu/ciao/dictionary/arf.html +# [4] CIAO: Redistribution Matrix File +# http://cxc.harvard.edu/ciao/dictionary/rmf.html +# [5] astropy - FITS format code +# http://docs.astropy.org/en/stable/io/fits/usage/table.html#column-creation +# [6] XSPEC - Spectral Fitting +# https://heasarc.gsfc.nasa.gov/docs/xanadu/xspec/manual/XspecSpectralFitting.html +# [7] Direct X-ray Spectra Deprojection +# https://www-xray.ast.cam.ac.uk/papers/dsdeproj/ +# Sanders & Fabian 2007, MNRAS, 381, 1381 +# +# +# Weitian LI +# Created: 2016-03-26 +# Updated: 2016-04-20 +# +# ChangeLog: +# 2016-04-20: +# * Add argument 'add_history' to some methods (to avoid many duplicated +# histories due to Monte Carlo) +# * Rename 'reset_header_keywords()' to 'fix_header_keywords()', +# and add mandatory spectral keywords if missing +# * Add method 'fix_header()' to class 'Crosstalk' and 'Deprojection', +# and fix the headers before write spectra +# 2016-04-19: +# * Ignore numpy error due to division by zero +# * Update tool description and sample configuration +# * Add two other main methods: `main_deprojection()' and `main_crosstalk()' +# * Add argument 'group_squeeze' to some methods for better performance +# * Rename from 'correct_crosstalk.py' to 'crosstalk_deprojection.py' +# 2016-04-18: +# * Implement deprojection function: class Deprojection +# * Support spectral grouping (supply the grouping specification) +# * Add grouping, estimate_errors, copy, randomize, etc. methods +# * Utilize the Monte Carlo techniques to estimate the final spectral errors +# * Collect all ARFs and RMFs within dictionaries +# 2016-04-06: +# * Fix `RMF: get_rmfimg()' for XMM EPIC RMF +# 2016-04-02: +# * Interpolate ARF in order to match the spectral channel energies +# * Add version and date information +# * Update documentations +# * Update header history contents +# 2016-04-01: +# * Greatly update the documentations (e.g., description, sample config) +# * Add class `RMF' +# * Add method `get_energy()' for class `ARF' +# * Split out class `SpectrumSet' from `Spectrum' +# * Implement background subtraction +# * Add config `subtract_bkg' and corresponding argument +# +# XXX/FIXME: +# * Deprojection: account for ARF differences across different regions +# +# TODO: +# * Split classes ARF, RMF, Spectrum, and SpectrumSet to a separate module +# + +__version__ = "0.5.2" +__date__ = "2016-04-20" + + +""" +Correct the crosstalk effect of XMM spectra by subtracting the photons +that scattered from the surrounding regions due to the finite PSF, and +by compensating the photons that scattered to the surrounding regions, +according to the generated crosstalk ARFs by SAS `arfgen'. + +After the crosstalk effect being corrected, the deprojection is performed +to deproject the crosstalk-corrected spectra to derive the spectra with +both the crosstalk effect and projection effect corrected. + + +Sample config file (in `ConfigObj' syntax): +----------------------------------------------------------- +# operation mode: deprojection, crosstalk, or both (default) +mode = both +# supply a *groupped* spectrum (from which the "GROUPING" and "QUALITY" +# are used to group all the following spectra) +grouping = spec_grp.pi +# whether to subtract the background before crosstalk correction +subtract_bkg = True +# whether to fix the negative channel values due to spectral subtractions +fix_negative = False +# Monte Carlo times for spectral error estimation +mc_times = 5000 +# show progress details and verbose information +verbose = True +# overwrite existing files +clobber = False + +# NOTE: +# ONLY specifiy ONE set of projected spectra (i.e., from the same detector +# of one observation), since ALL the following specified spectra will be +# used for the deprojection. + +[reg1] +... + +[reg2] +outfile = deprojcc_reg2.pi +spec = reg2.pi +arf = reg2.arf +rmf = reg2.rmf +bkg = reg2_bkg.pi + [[cross_in]] + [[[in1]]] + spec = reg1.pi + arf = reg1.arf + rmf = reg1.rmf + bkg = reg1_bkg.pi + cross_arf = reg_1-2.arf + [[[in2]]] + spec = reg3.pi + arf = reg3.arf + rmf = reg3.rmf + bkg = reg3_bkg.pi + cross_arf = reg_3-2.arf + [[cross_out]] + cross_arf = reg_2-1.arf, reg_2-3.arf + +[...] +... +----------------------------------------------------------- +""" + +WARNING = """ +********************************* WARNING ************************************ +The generated spectra are substantially modified (e.g., scale, add, subtract), +therefore, take special care when interpretating the fitting results, +especially the metal abundances and normalizations. +****************************************************************************** +""" + + +import sys +import os +import argparse +from datetime import datetime +from copy import copy + +import numpy as np +import scipy as sp +import scipy.interpolate +from astropy.io import fits +from configobj import ConfigObj + + +def group_data(data, grouping): + """ + Group the data with respect to the supplied `grouping' specification + (i.e., "GROUPING" columns of a spectrum). The channel counts of the + same group are summed up and assigned to the FIRST channel of this + group, while the OTHRE channels are all set to ZERO. + """ + data_grp = np.array(data).copy() + for i in reversed(range(len(data))): + if grouping[i] == 1: + # the beginning channel of a group + continue + else: + # other channels of a group + data_grp[i-1] += data_grp[i] + data_grp[i] = 0 + assert np.isclose(sum(data_grp), sum(data)) + return data_grp + + +class ARF: # {{{ + """ + Class to handle the ARF (ancillary/auxiliary response file), + which contains the combined instrumental effective area + (telescope/filter/detector) and the quantum efficiency (QE) as a + function of energy averaged over time. + The effective area is [cm^2], and the QE is [counts/photon]; they are + multiplied together to create the ARF, resulting in [cm^2 counts/photon]. + + **CAVEAT/NOTE**: + Generally, the "ENERG_LO" and "ENERG_HI" columns of an ARF are *different* + to the "E_MIN" and "E_MAX" columns of a RMF (which are corresponding + to the spectrum channel energies). + For the XMM EPIC *pn* and Chandra *ACIS*, the generated ARF does NOT have + the same number of data points to that of spectral channels, i.e., the + "ENERG_LO" and "ENERG_HI" columns of ARF is different to the "E_MIN" and + "E_MAX" columns of RMF. + Therefore it is necessary to interpolate and extrapolate the ARF curve + in order to match the spectrum (or RMF "EBOUNDS" extension). + As for the XMM EPIC *MOS1* and *MOS2*, the ARF data points match the + spectral channels, i.e., the energy positions of each ARF data point and + spectral channel are consistent. Thus the interpolation is not needed. + + References: + [1] CIAO: Auxiliary Response File + http://cxc.harvard.edu/ciao/dictionary/arf.html + [2] Definition of RMF and ARF file formats + https://heasarc.gsfc.nasa.gov/docs/heasarc/caldb/docs/memos/cal_gen_92_002/cal_gen_92_002.html + """ + filename = None + fitsobj = None + # only consider the "SPECTRUM" extension + header = None + energ_lo = None + energ_hi = None + specresp = None + # function of the interpolated ARF + f_interp = None + # energies of the spectral channels + energy_channel = None + # spectral channel grouping specification + grouping = None + groupped = False + # groupped ARF channels with respect to the grouping + specresp_grp = None + + def __init__(self, filename): + self.filename = filename + self.fitsobj = fits.open(filename) + ext_specresp = self.fitsobj["SPECRESP"] + self.header = ext_specresp.header + self.energ_lo = ext_specresp.data["ENERG_LO"] + self.energ_hi = ext_specresp.data["ENERG_HI"] + self.specresp = ext_specresp.data["SPECRESP"] + + def get_data(self, groupped=False, group_squeeze=False, copy=True): + if groupped: + specresp = self.specresp_grp + if group_squeeze: + specresp = specresp[self.grouping == 1] + else: + specresp = self.specresp + if copy: + return specresp.copy() + else: + return specresp + + def get_energy(self, mean="geometric"): + """ + Return the mean energy values of the ARF. + + Arguments: + * mean: type of the mean energy: + + "geometric": geometric mean, i.e., e = sqrt(e_min*e_max) + + "arithmetic": arithmetic mean, i.e., e = 0.5*(e_min+e_max) + """ + if mean == "geometric": + energy = np.sqrt(self.energ_lo * self.energ_hi) + elif mean == "arithmetic": + energy = 0.5 * (self.energ_lo + self.energ_hi) + else: + raise ValueError("Invalid mean type: %s" % mean) + return energy + + def interpolate(self, x=None, verbose=False): + """ + Cubic interpolate the ARF curve using `scipy.interpolate' + + If the requested point is outside of the data range, the + fill value of *zero* is returned. + + Arguments: + * x: points at which the interpolation to be calculated. + + Return: + If x is None, then the interpolated function is returned, + otherwise, the interpolated data are returned. + """ + if not hasattr(self, "f_interp") or self.f_interp is None: + energy = self.get_energy() + arf = self.get_data(copy=False) + if verbose: + print("INFO: interpolating '%s' (this may take a while) ..." \ + % self.filename, file=sys.stderr) + f_interp = sp.interpolate.interp1d(energy, arf, kind="cubic", + bounds_error=False, fill_value=0.0, assume_sorted=True) + self.f_interp = f_interp + if x is not None: + return self.f_interp(x) + else: + return self.f_interp + + def apply_grouping(self, energy_channel, grouping, verbose=False): + """ + Group the ARF channels (INTERPOLATED with respect to the spectral + channels) by the supplied grouping specification. + + Arguments: + * energy_channel: energies of the spectral channel + * grouping: spectral grouping specification + + Return: `self.specresp_grp' + """ + if self.groupped: + return + if verbose: + print("INFO: Grouping spectrum '%s' ..." % self.filename, + file=sys.stderr) + self.energy_channel = energy_channel + self.grouping = grouping + # interpolate the ARF w.r.t the spectral channel energies + arf_interp = self.interpolate(x=energy_channel, verbose=verbose) + self.specresp_grp = group_data(arf_interp, grouping) + self.groupped = True +# class ARF }}} + + +class RMF: # {{{ + """ + Class to handle the RMF (redistribution matrix file), + which maps from energy space into detector pulse height (or position) + space. Since detectors are not perfect, this involves a spreading of + the observed counts by the detector resolution, which is expressed as + a matrix multiplication. + For X-ray spectral analysis, the RMF encodes the probability R(E,p) + that a detected photon of energy E will be assisgned to a given + channel value (PHA or PI) of p. + + The standard Legacy format [2] for the RMF uses a binary table in which + each row contains R(E,p) for a single value of E as a function of p. + Non-zero sequences of elements of R(E,p) are encoded using a set of + variable length array columns. This format is compact but hard to + manipulate and understand. + + **CAVEAT/NOTE**: + + See also the above ARF CAVEAT/NOTE. + + The "EBOUNDS" extension contains the `CHANNEL', `E_MIN' and `E_MAX' + columns. This `CHANNEL' is the same as that of a spectrum. Therefore, + the energy values determined from the `E_MIN' and `E_MAX' columns are + used to interpolate and extrapolate the ARF curve. + + The `ENERG_LO' and `ENERG_HI' columns of the "MATRIX" extension are + the same as that of a ARF. + + References: + [1] CIAO: Redistribution Matrix File + http://cxc.harvard.edu/ciao/dictionary/rmf.html + [2] Definition of RMF and ARF file formats + https://heasarc.gsfc.nasa.gov/docs/heasarc/caldb/docs/memos/cal_gen_92_002/cal_gen_92_002.html + """ + filename = None + fitsobj = None + ## extension "MATRIX" + hdr_matrix = None + energ_lo = None + energ_hi = None + n_grp = None + f_chan = None + n_chan = None + # raw squeezed RMF matrix data + matrix = None + ## extension "EBOUNDS" + hdr_ebounds = None + channel = None + e_min = None + e_max = None + ## converted 2D RMF matrix/image from the squeezed binary table + # size: len(energ_lo) x len(channel) + rmfimg = None + + def __init__(self, filename): + self.filename = filename + self.fitsobj = fits.open(filename) + ## "MATRIX" extension + ext_matrix = self.fitsobj["MATRIX"] + self.hdr_matrix = ext_matrix.header + self.energ_lo = ext_matrix.data["ENERG_LO"] + self.energ_hi = ext_matrix.data["ENERG_HI"] + self.n_grp = ext_matrix.data["N_GRP"] + self.f_chan = ext_matrix.data["F_CHAN"] + self.n_chan = ext_matrix.data["N_CHAN"] + self.matrix = ext_matrix.data["MATRIX"] + ## "EBOUNDS" extension + ext_ebounds = self.fitsobj["EBOUNDS"] + self.hdr_ebounds = ext_ebounds.header + self.channel = ext_ebounds.data["CHANNEL"] + self.e_min = ext_ebounds.data["E_MIN"] + self.e_max = ext_ebounds.data["E_MAX"] + + def get_energy(self, mean="geometric"): + """ + Return the mean energy values of the RMF "EBOUNDS". + + Arguments: + * mean: type of the mean energy: + + "geometric": geometric mean, i.e., e = sqrt(e_min*e_max) + + "arithmetic": arithmetic mean, i.e., e = 0.5*(e_min+e_max) + """ + if mean == "geometric": + energy = np.sqrt(self.e_min * self.e_max) + elif mean == "arithmetic": + energy = 0.5 * (self.e_min + self.e_max) + else: + raise ValueError("Invalid mean type: %s" % mean) + return energy + + def get_rmfimg(self): + """ + Convert the RMF data in squeezed binary table (standard Legacy format) + to a 2D image/matrix. + """ + def _make_rmfimg_row(n_channel, dtype, f_chan, n_chan, mat_row): + # make sure that `f_chan' and `n_chan' are 1-D numpy array + f_chan = np.array(f_chan).reshape(-1) + f_chan -= 1 # FITS indices are 1-based + n_chan = np.array(n_chan).reshape(-1) + idx = np.concatenate([ np.arange(f, f+n) \ + for f, n in zip(f_chan, n_chan) ]) + rmfrow = np.zeros(n_channel, dtype=dtype) + rmfrow[idx] = mat_row + return rmfrow + # + if self.rmfimg is None: + # Make the 2D RMF matrix/image + n_energy = len(self.energ_lo) + n_channel = len(self.channel) + rmf_dtype = self.matrix[0].dtype + rmfimg = np.zeros(shape=(n_energy, n_channel), dtype=rmf_dtype) + for i in np.arange(n_energy)[self.n_grp > 0]: + rmfimg[i, :] = _make_rmfimg_row(n_channel, rmf_dtype, + self.f_chan[i], self.n_chan[i], self.matrix[i]) + self.rmfimg = rmfimg + return self.rmfimg + + def write_rmfimg(self, outfile, clobber=False): + rmfimg = self.get_rmfimg() + # merge headers + header = self.hdr_matrix.copy(strip=True) + header.extend(self.hdr_ebounds.copy(strip=True)) + outfits = fits.PrimaryHDU(data=rmfimg, header=header) + outfits.writeto(outfile, checksum=True, clobber=clobber) +# class RMF }}} + + +class Spectrum: # {{{ + """ + Class that deals with the X-ray spectrum file (usually *.pi). + """ + filename = None + # FITS object return by `fits.open()' + fitsobj = None + # header of "SPECTRUM" extension + header = None + # "SPECTRUM" extension data + channel = None + # name of the spectrum data column (i.e., type, "COUNTS" or "RATE") + spec_type = None + # unit of the spectrum data ("count" for "COUNTS", "count/s" for "RATE") + spec_unit = None + # spectrum data + spec_data = None + # estimated spectral errors for each channel/group + spec_err = None + # statistical errors for each channel/group + stat_err = None + # grouping and quality + grouping = None + quality = None + # whether the spectral data being groupped + groupped = False + # several important keywords + EXPOSURE = None + BACKSCAL = None + AREASCAL = None + RESPFILE = None + ANCRFILE = None + BACKFILE = None + # numpy dtype and FITS format code of the spectrum data + spec_dtype = None + spec_fits_format = None + # output filename for writing the spectrum if no filename provided + outfile = None + + def __init__(self, filename, outfile=None): + self.filename = filename + self.fitsobj = fits.open(filename) + ext_spec = self.fitsobj["SPECTRUM"] + self.header = ext_spec.header.copy(strip=True) + colnames = ext_spec.columns.names + if "COUNTS" in colnames: + self.spec_type = "COUNTS" + elif "RATE" in colnames: + self.spec_type = "RATE" + else: + raise ValueError("Invalid spectrum file") + self.channel = ext_spec.data.columns["CHANNEL"].array + col_spec_data = ext_spec.data.columns[self.spec_type] + self.spec_data = col_spec_data.array.copy() + self.spec_unit = col_spec_data.unit + self.spec_dtype = col_spec_data.dtype + self.spec_fits_format = col_spec_data.format + # grouping and quality + if "GROUPING" in colnames: + self.grouping = ext_spec.data.columns["GROUPING"].array + if "QUALITY" in colnames: + self.quality = ext_spec.data.columns["QUALITY"].array + # keywords + self.EXPOSURE = self.header.get("EXPOSURE") + self.BACKSCAL = self.header.get("BACKSCAL") + self.AREASCAL = self.header.get("AREASCAL") + self.RESPFILE = self.header.get("RESPFILE") + self.ANCRFILE = self.header.get("ANCRFILE") + self.BACKFILE = self.header.get("BACKFILE") + # output filename + self.outfile = outfile + + def get_data(self, group_squeeze=False, copy=True): + """ + Get the spectral data (i.e., self.spec_data). + + Arguments: + * group_squeeze: whether squeeze the spectral data according to + the grouping (i.e., exclude the channels that + are not the first channel of the group, which + also have value of ZERO). + This argument is effective only the grouping + being applied. + """ + if group_squeeze and self.groupped: + spec_data = self.spec_data[self.grouping == 1] + else: + spec_data = self.spec_data + if copy: + return spec_data.copy() + else: + return spec_data + + def get_channel(self, copy=True): + if copy: + return self.channel.copy() + else: + return self.channel + + def set_data(self, spec_data, group_squeeze=True): + """ + Set the spectral data of this spectrum to the supplied data. + """ + if group_squeeze and self.groupped: + assert sum(self.grouping == 1) == len(spec_data) + self.spec_data[self.grouping == 1] = spec_data + else: + assert len(self.spec_data) == len(spec_data) + self.spec_data = spec_data.copy() + + def add_stat_err(self, stat_err, group_squeeze=True): + """ + Add the "STAT_ERR" column as the statistical errors of each spectral + group, which are estimated by utilizing the Monte Carlo techniques. + """ + self.stat_err = np.zeros(self.spec_data.shape, + dtype=self.spec_data.dtype) + if group_squeeze and self.groupped: + assert sum(self.grouping == 1) == len(stat_err) + self.stat_err[self.grouping == 1] = stat_err + else: + assert len(self.stat_err) == len(stat_err) + self.stat_err = stat_err.copy() + self.header["POISSERR"] = False + + def apply_grouping(self, grouping=None, quality=None): + """ + Apply the spectral channel grouping specification to the spectrum. + + NOTE: + * The spectral data (i.e., self.spec_data) is MODIFIED! + * The spectral data within the same group are summed up. + * The self grouping is overwritten if `grouping' is supplied, as well + as the self quality. + """ + if grouping is not None: + self.grouping = grouping + if quality is not None: + self.quality = quality + self.spec_data = group_data(self.spec_data, self.grouping) + self.groupped = True + + def estimate_errors(self, gehrels=True): + """ + Estimate the statistical errors of each spectral group (after + applying grouping) for the source spectrum (and background spectrum). + + If `gehrels=True', the statistical error for a spectral group with + N photons is given by `1 + sqrt(N + 0.75)'; otherwise, the error + is given by `sqrt(N)'. + + Results: `self.spec_err' + """ + eps = 1.0e-10 + if gehrels: + self.spec_err = 1.0 + np.sqrt(self.spec_data + 0.75) + else: + self.spec_err = np.sqrt(self.spec_data) + # replace the zeros with a very small value (because + # `np.random.normal' requires `scale' > 0) + self.spec_err[self.spec_err <= 0.0] = eps + + def copy(self): + """ + Return a copy of this object, with the `np.ndarray' properties are + copied. + """ + new = copy(self) + for k, v in self.__dict__.items(): + if isinstance(v, np.ndarray): + setattr(new, k, v.copy()) + return new + + def randomize(self): + """ + Randomize the spectral data according to the estimated spectral + group errors by assuming the normal distribution. + + NOTE: this method should be called AFTER the `copy()' method. + """ + if self.spec_err is None: + raise ValueError("No valid 'spec_err' presents") + if self.groupped: + idx = self.grouping == 1 + self.spec_data[idx] = np.random.normal(self.spec_data[idx], + self.spec_err[idx]) + else: + self.spec_data = np.random.normal(self.spec_data, self.spec_err) + return self + + def fix_header_keywords(self, + reset_kw=["ANCRFILE", "RESPFILE", "BACKFILE"]): + """ + Reset the keywords to "NONE" to avoid confusion or mistakes, + and also add mandatory spectral keywords if missing. + + Reference: + [1] The OGIP Spectral File Format, Sec. 3.1.1 + https://heasarc.gsfc.nasa.gov/docs/heasarc/ofwg/docs/summary/ogip_92_007_summary.html + """ + default_keywords = { + ## Mandatory keywords + #"EXTNAME" : "SPECTRUM", + "TELESCOP" : "NONE", + "INSTRUME" : "NONE", + "FILTER" : "NONE", + #"EXPOSURE" : <integration_time (s)>, + "BACKFILE" : "NONE", + "CORRFILE" : "NONE", + "CORRSCAL" : 1.0, + "RESPFILE" : "NONE", + "ANCRFILE" : "NONE", + "HDUCLASS" : "OGIP", + "HDUCLAS1" : "SPECTRUM", + "HDUVERS" : "1.2.1", + "POISSERR" : True, + #"CHANTYPE" : "PI", + #"DETCHANS" : <total_number_of_detector_channels>, + ## Optional keywords for further information + "BACKSCAL" : 1.0, + "AREASCAL" : 1.0, + # Type of spectral data: + # (1) "TOTAL": gross spectrum (source+bkg); + # (2) "NET": background-subtracted spectrum + # (3) "BKG" background spectrum + #"HDUCLAS2" : "NET", + # Details of the type of data: + # (1) "COUNT": data stored as counts + # (2) "RATE": data stored as counts/s + "HDUCLAS3" : { "COUNTS":"COUNT", + "RATE":"RATE" }.get(self.spec_type), + } + # add mandatory keywords if missing + for kw, value in default_keywords.items(): + if kw not in self.header: + self.header[kw] = value + # reset the specified keywords + for kw in reset_kw: + self.header[kw] = default_keywords.get(kw) + + def write(self, filename=None, clobber=False): + """ + Create a new "SPECTRUM" table/extension and replace the original + one, then write to output file. + """ + if filename is None: + filename = self.outfile + columns = [ + fits.Column(name="CHANNEL", format="I", array=self.channel), + fits.Column(name=self.spec_type, format=self.spec_fits_format, + unit=self.spec_unit, array=self.spec_data), + ] + if self.grouping is not None: + columns.append(fits.Column(name="GROUPING", + format="I", array=self.grouping)) + if self.quality is not None: + columns.append(fits.Column(name="QUALITY", + format="I", array=self.quality)) + if self.stat_err is not None: + columns.append(fits.Column(name="STAT_ERR", unit=self.spec_unit, + format=self.spec_fits_format, + array=self.stat_err)) + ext_spec_cols = fits.ColDefs(columns) + ext_spec = fits.BinTableHDU.from_columns(ext_spec_cols, + header=self.header) + self.fitsobj["SPECTRUM"] = ext_spec + self.fitsobj.writeto(filename, clobber=clobber, checksum=True) +# class Spectrum }}} + + +class SpectrumSet(Spectrum): # {{{ + """ + This class handles a set of spectrum, including the source spectrum, + RMF, ARF, and the background spectrum. + + **NOTE**: + The "COUNTS" column data are converted from "int32" to "float32", + since this spectrum will be subtracted/compensated according to the + ratios of ARFs. + """ + # ARF object for this spectrum + arf = None + # RMF object for this spectrum + rmf = None + # background Spectrum object for this spectrum + bkg = None + # inner and outer radius of the region from which the spectrum extracted + radius_inner = None + radius_outer = None + # total angular range of the spectral region + angle = None + + # numpy dtype and FITS format code to which the spectrum data be + # converted if the data is "COUNTS" + #_spec_dtype = np.float32 + #_spec_fits_format = "E" + _spec_dtype = np.float64 + _spec_fits_format = "D" + + def __init__(self, filename, outfile=None, arf=None, rmf=None, bkg=None): + super().__init__(filename, outfile) + # convert spectrum data type if necessary + if self.spec_data.dtype != self._spec_dtype: + self.spec_data = self.spec_data.astype(self._spec_dtype) + self.spec_dtype = self._spec_dtype + self.spec_fits_format = self._spec_fits_format + if arf is not None: + if isinstance(arf, ARF): + self.arf = arf + else: + self.arf = ARF(arf) + if rmf is not None: + if isinstance(rmf, RMF): + self.rmf = rmf + else: + self.rmf = RMF(rmf) + if bkg is not None: + if isinstance(bkg, Spectrum): + self.bkg = bkg + else: + self.bkg = Spectrum(bkg) + # convert background spectrum data type if necessary + if self.bkg.spec_data.dtype != self._spec_dtype: + self.bkg.spec_data = self.bkg.spec_data.astype(self._spec_dtype) + self.bkg.spec_dtype = self._spec_dtype + self.bkg.spec_fits_format = self._spec_fits_format + + def get_energy(self, mean="geometric"): + """ + Get the energy values of each channel if RMF present. + + NOTE: + The "E_MIN" and "E_MAX" columns of the RMF is required to calculate + the spectrum channel energies. + And the channel energies are generally different to the "ENERG_LO" + and "ENERG_HI" of the corresponding ARF. + """ + if self.rmf is None: + return None + else: + return self.rmf.get_energy(mean=mean) + + def get_arf(self, mean="geometric", groupped=True, copy=True): + """ + Get the interpolated ARF data w.r.t the spectral channel energies + if the ARF presents. + + Arguments: + * groupped: (bool) whether to get the groupped ARF + + Return: (groupped) interpolated ARF data + """ + if self.arf is None: + return None + else: + return self.arf.get_data(groupped=groupped, copy=copy) + + def read_xflt(self): + """ + Read the XFLT000# keywords from the header, check the validity (e.g., + "XFLT0001" should equals "XFLT0002", "XFLT0003" should equals 0). + Sum all the additional XFLT000# pairs (e.g., ) which describes the + regions angluar ranges. + """ + eps = 1.0e-6 + xflt0001 = float(self.header["XFLT0001"]) + xflt0002 = float(self.header["XFLT0002"]) + xflt0003 = float(self.header["XFLT0003"]) + # XFLT000# validity check + assert np.isclose(xflt0001, xflt0002) + assert abs(xflt0003) < eps + # outer radius of the region + self.radius_outer = xflt0001 + # angular regions + self.angle = 0.0 + num = 4 + while True: + try: + angle_begin = float(self.header["XFLT%04d" % num]) + angle_end = float(self.header["XFLT%04d" % (num+1)]) + num += 2 + except KeyError: + break + self.angle += (angle_end - angle_begin) + # if NO additional XFLT000# keys exist, assume "annulus" region + if self.angle < eps: + self.angle = 360.0 + + def scale(self): + """ + Scale the spectral data (and spectral group errors if present) of + the source spectrum (and background spectra if present) according + to the region angular size to make it correspond to the whole annulus + region (i.e., 360 degrees). + + NOTE: the spectral data and errors (i.e., `self.spec_data', and + `self.spec_err') is MODIFIED! + """ + self.spec_data *= (360.0 / self.angle) + if self.spec_err is not None: + self.spec_err *= (360.0 / self.angle) + # also scale the background spectrum if present + if self.bkg: + self.bkg.spec_data *= (360.0 / self.angle) + if self.bkg.spec_err is not None: + self.bkg.spec_err *= (360.0 / self.angle) + + def apply_grouping(self, grouping=None, quality=None, verbose=False): + """ + Apply the spectral channel grouping specification to the source + spectrum, the ARF (which is used during the later spectral + manipulations), and the background spectrum (if presents). + + NOTE: + * The spectral data (i.e., self.spec_data) is MODIFIED! + * The spectral data within the same group are summed up. + * The self grouping is overwritten if `grouping' is supplied, as well + as the self quality. + """ + super().apply_grouping(grouping=grouping, quality=quality) + # also group the ARF accordingly + self.arf.apply_grouping(energy_channel=self.get_energy(), + grouping=self.grouping, verbose=verbose) + # group the background spectrum if present + if self.bkg: + self.bkg.spec_data = group_data(self.bkg.spec_data, self.grouping) + + def estimate_errors(self, gehrels=True): + """ + Estimate the statistical errors of each spectral group (after + applying grouping) for the source spectrum (and background spectrum). + + If `gehrels=True', the statistical error for a spectral group with + N photons is given by `1 + sqrt(N + 0.75)'; otherwise, the error + is given by `sqrt(N)'. + + Results: `self.spec_err' (and `self.bkg.spec_err') + """ + super().estimate_errors(gehrels=gehrels) + eps = 1.0e-10 + # estimate the errors for background spectrum if present + if self.bkg: + if gehrels: + self.bkg.spec_err = 1.0 + np.sqrt(self.bkg.spec_data + 0.75) + else: + self.bkg.spec_err = np.sqrt(self.bkg.spec_data) + self.bkg.spec_err[self.bkg.spec_err <= 0.0] = eps + + def subtract_bkg(self, inplace=True, add_history=False, verbose=False): + """ + Subtract the background contribution from the source spectrum. + The `EXPOSURE' and `BACKSCAL' values are required to calculate + the fraction/ratio for the background subtraction. + + Arguments: + * inplace: whether replace the `spec_data' with the background- + subtracted spectrum data; If True, the attribute + `spec_bkg_subtracted' is also set to `True' when + the subtraction finished. + The keywords "BACKSCAL" and "AREASCAL" are set to 1.0. + + Return: + background-subtracted spectrum data + """ + ratio = (self.EXPOSURE / self.bkg.EXPOSURE) * \ + (self.BACKSCAL / self.bkg.BACKSCAL) * \ + (self.AREASCAL / self.bkg.AREASCAL) + operation = " SUBTRACT_BACKGROUND: %s - %s * %s" % \ + (self.filename, ratio, self.bkg.filename) + if verbose: + print(operation, file=sys.stderr) + spec_data_subbkg = self.spec_data - ratio * self.bkg.get_data() + if inplace: + self.spec_data = spec_data_subbkg + self.spec_bkg_subtracted = True + self.BACKSCAL = 1.0 + self.AREASCAL = 1.0 + # update header + self.header["BACKSCAL"] = 1.0 + self.header["AREASCAL"] = 1.0 + self.header["BACKFILE"] = "NONE" + self.header["HDUCLAS2"] = "NET" # background-subtracted spectrum + # also record history + if add_history: + self.header.add_history(operation) + return spec_data_subbkg + + def subtract(self, spectrumset, cross_arf, groupped=False, + group_squeeze=False, add_history=False, verbose=False): + """ + Subtract the photons that originate from the surrounding regions + but were scattered into this spectrum due to the finite PSF. + + The background of this spectrum and the given spectrum should + both be subtracted before applying this subtraction for crosstalk + correction, as well as the below `compensate()' procedure. + + NOTE: + 1. The crosstalk ARF must be provided, since the `spectrumset.arf' + is required to be its ARF without taking crosstalk into account: + spec1_new = spec1 - spec2 * (cross_arf_2_to_1 / arf2) + 2. The ARF are interpolated to match the energies of spetral channels. + """ + operation = " SUBTRACT: %s - (%s/%s) * %s" % (self.filename, + cross_arf.filename, spectrumset.arf.filename, + spectrumset.filename) + if verbose: + print(operation, file=sys.stderr) + energy = self.get_energy() + if groupped: + spectrumset.arf.apply_grouping(energy_channel=energy, + grouping=self.grouping, verbose=verbose) + cross_arf.apply_grouping(energy_channel=energy, + grouping=self.grouping, verbose=verbose) + arfresp_spec = spectrumset.arf.get_data(groupped=True, + group_squeeze=group_squeeze) + arfresp_cross = cross_arf.get_data(groupped=True, + group_squeeze=group_squeeze) + else: + arfresp_spec = spectrumset.arf.interpolate(x=energy, + verbose=verbose) + arfresp_cross = cross_arf.interpolate(x=energy, verbose=verbose) + with np.errstate(divide="ignore", invalid="ignore"): + arf_ratio = arfresp_cross / arfresp_spec + # fix nan/inf values due to division by zero + arf_ratio[ ~ np.isfinite(arf_ratio) ] = 0.0 + spec_data = self.get_data(group_squeeze=group_squeeze) - \ + spectrumset.get_data(group_squeeze=group_squeeze)*arf_ratio + self.set_data(spec_data, group_squeeze=group_squeeze) + # record history + if add_history: + self.header.add_history(operation) + + def compensate(self, cross_arf, groupped=False, group_squeeze=False, + add_history=False, verbose=False): + """ + Compensate the photons that originate from this regions but were + scattered into the surrounding regions due to the finite PSF. + + formula: + spec1_new = spec1 + spec1 * (cross_arf_1_to_2 / arf1) + """ + operation = " COMPENSATE: %s + (%s/%s) * %s" % (self.filename, + cross_arf.filename, self.arf.filename, self.filename) + if verbose: + print(operation, file=sys.stderr) + energy = self.get_energy() + if groupped: + cross_arf.apply_grouping(energy_channel=energy, + grouping=self.grouping, verbose=verbose) + arfresp_this = self.arf.get_data(groupped=True, + group_squeeze=group_squeeze) + arfresp_cross = cross_arf.get_data(groupped=True, + group_squeeze=group_squeeze) + else: + arfresp_this = self.arf.interpolate(x=energy, verbose=verbose) + arfresp_cross = cross_arf.interpolate(x=energy, verbose=verbose) + with np.errstate(divide="ignore", invalid="ignore"): + arf_ratio = arfresp_cross / arfresp_this + # fix nan/inf values due to division by zero + arf_ratio[ ~ np.isfinite(arf_ratio) ] = 0.0 + spec_data = self.get_data(group_squeeze=group_squeeze) + \ + self.get_data(group_squeeze=group_squeeze) * arf_ratio + self.set_data(spec_data, group_squeeze=group_squeeze) + # record history + if add_history: + self.header.add_history(operation) + + def fix_negative(self, add_history=False, verbose=False): + """ + The subtractions may lead to negative counts, it may be necessary + to fix these channels with negative values. + """ + neg_counts = self.spec_data < 0 + N = len(neg_counts) + neg_channels = np.arange(N, dtype=np.int)[neg_counts] + if len(neg_channels) > 0: + print("WARNING: %d channels have NEGATIVE counts" % \ + len(neg_channels), file=sys.stderr) + i = 0 + while len(neg_channels) > 0: + i += 1 + if verbose: + if i == 1: + print("*** Fixing negative channels: iter %d..." % i, + end="", file=sys.stderr) + else: + print("%d..." % i, end="", file=sys.stderr) + for ch in neg_channels: + neg_val = self.spec_data[ch] + if ch < N-2: + self.spec_data[ch] = 0 + self.spec_data[(ch+1):(ch+3)] -= 0.5 * np.abs(neg_val) + else: + # just set to zero if it is the last 2 channels + self.spec_data[ch] = 0 + # update negative channels indices + neg_counts = self.spec_data < 0 + neg_channels = np.arange(N, dtype=np.int)[neg_counts] + if i > 0: + print("FIXED!", file=sys.stderr) + # record history + if add_history: + self.header.add_history(" FIXED NEGATIVE CHANNELS") + + def set_radius_inner(self, radius_inner): + """ + Set the inner radius of the spectral region. + """ + assert radius_inner < self.radius_outer + self.radius_inner = radius_inner + + def copy(self): + """ + Return a copy of this object. + """ + new = super().copy() + if self.bkg: + new.bkg = self.bkg.copy() + return new + + def randomize(self): + """ + Randomize the source (and background if present) spectral data + according to the estimated spectral group errors by assuming the + normal distribution. + + NOTE: this method should be called AFTER the `copy()' method. + """ + super().randomize() + if self.bkg: + self.bkg.spec_data = np.random.normal(self.bkg.spec_data, + self.bkg.spec_err) + self.bkg.spec_data[self.grouping == -1] = 0.0 + return self +# class SpectrumSet }}} + + +class Crosstalk: # {{{ + """ + XMM-Newton PSF Crosstalk effect correction. + """ + # `SpectrumSet' object for the spectrum to be corrected + spectrumset = None + # NOTE/XXX: do NOT use list (e.g., []) here, otherwise, all the + # instances will share these list properties. + # `SpectrumSet' and `ARF' objects corresponding to the spectra from + # which the photons were scattered into this spectrum. + cross_in_specset = None + cross_in_arf = None + # `ARF' objects corresponding to the regions to which the photons of + # this spectrum were scattered into. + cross_out_arf = None + # grouping specification and quality data + grouping = None + quality = None + # whether the spectrum is groupped + groupped = False + + def __init__(self, config, arf_dict={}, rmf_dict={}, + grouping=None, quality=None): + """ + Arguments: + * config: a section of the whole config file (`ConfigObj' object) + """ + self.cross_in_specset = [] + self.cross_in_arf = [] + self.cross_out_arf = [] + # this spectrum to be corrected + self.spectrumset = SpectrumSet(filename=config["spec"], + outfile=config["outfile"], + arf=arf_dict.get(config["arf"], config["arf"]), + rmf=rmf_dict.get(config.get("rmf"), config.get("rmf")), + bkg=config.get("bkg")) + # spectra and cross arf from which photons were scattered in + for reg_in in config["cross_in"].values(): + specset = SpectrumSet(filename=reg_in["spec"], + arf=arf_dict.get(reg_in["arf"], reg_in["arf"]), + rmf=rmf_dict.get(reg_in.get("rmf"), reg_in.get("rmf")), + bkg=reg_in.get("bkg")) + self.cross_in_specset.append(specset) + self.cross_in_arf.append(arf_dict.get(reg_in["cross_arf"], + ARF(reg_in["cross_arf"]))) + # regions into which the photons of this spectrum were scattered into + if "cross_out" in config.sections: + cross_arf = config["cross_out"].as_list("cross_arf") + for arffile in cross_arf: + self.cross_out_arf.append(arf_dict.get(arffile, ARF(arffile))) + # grouping and quality + self.grouping = grouping + self.quality = quality + + def apply_grouping(self, verbose=False): + self.spectrumset.apply_grouping(grouping=self.grouping, + quality=self.quality, verbose=verbose) + # also group the related surrounding spectra + for specset in self.cross_in_specset: + specset.apply_grouping(grouping=self.grouping, + quality=self.quality, verbose=verbose) + self.groupped = True + + def estimate_errors(self, gehrels=True, verbose=False): + if verbose: + print("INFO: Estimating spectral errors ...") + self.spectrumset.estimate_errors(gehrels=gehrels) + # also estimate errors for the related surrounding spectra + for specset in self.cross_in_specset: + specset.estimate_errors(gehrels=gehrels) + + def do_correction(self, subtract_bkg=True, fix_negative=False, + group_squeeze=True, add_history=False, verbose=False): + """ + Perform the crosstalk correction. The background contribution + for each spectrum is subtracted first if `subtract_bkg' is True. + The basic correction procedures are recorded to the header. + """ + if add_history: + self.spectrumset.header.add_history("Crosstalk Correction BEGIN") + self.spectrumset.header.add_history(" TOOL: %s (v%s) @ %s" % (\ + os.path.basename(sys.argv[0]), __version__, + datetime.utcnow().isoformat())) + # background subtraction + if subtract_bkg: + if verbose: + print("INFO: subtract background ...", file=sys.stderr) + self.spectrumset.subtract_bkg(inplace=True, + add_history=add_history, verbose=verbose) + # also apply background subtraction to the surrounding spectra + for specset in self.cross_in_specset: + specset.subtract_bkg(inplace=True, + add_history=add_history, verbose=verbose) + # subtractions + if verbose: + print("INFO: apply subtractions ...", file=sys.stderr) + for specset, cross_arf in zip(self.cross_in_specset, + self.cross_in_arf): + self.spectrumset.subtract(spectrumset=specset, + cross_arf=cross_arf, groupped=self.groupped, + group_squeeze=group_squeeze, add_history=add_history, + verbose=verbose) + # compensations + if verbose: + print("INFO: apply compensations ...", file=sys.stderr) + for cross_arf in self.cross_out_arf: + self.spectrumset.compensate(cross_arf=cross_arf, + groupped=self.groupped, group_squeeze=group_squeeze, + add_history=add_history, verbose=verbose) + # fix negative values in channels + if fix_negative: + if verbose: + print("INFO: fix negative channel values ...", file=sys.stderr) + self.spectrumset.fix_negative(add_history=add_history, + verbose=verbose) + if add_history: + self.spectrumset.header.add_history("END Crosstalk Correction") + + def fix_header(self): + # fix header keywords + self.spectrumset.fix_header_keywords( + reset_kw=["RESPFILE", "ANCRFILE", "BACKFILE"]) + + def copy(self): + new = copy(self) + # properly handle the copy of spectrumsets + new.spectrumset = self.spectrumset.copy() + new.cross_in_specset = [ specset.copy() \ + for specset in self.cross_in_specset ] + return new + + def randomize(self): + self.spectrumset.randomize() + for specset in self.cross_in_specset: + specset.randomize() + return self + + def get_spectrum(self, copy=True): + if copy: + return self.spectrumset.copy() + else: + return self.spectrumset + + def write(self, filename=None, clobber=False): + self.spectrumset.write(filename=filename, clobber=clobber) +# class Crosstalk }}} + + +class Deprojection: # {{{ + """ + Perform the deprojection on a set of PROJECTED spectra with the + assumption of spherical symmetry of the source object, and produce + the DEPROJECTED spectra. + + NOTE: + * Assumption of the spherical symmetry + * Background should be subtracted before deprojection + * ARF differences of different regions are taken into account + + Reference & Credit: + [1] Direct X-ray Spectra Deprojection + https://www-xray.ast.cam.ac.uk/papers/dsdeproj/ + Sanders & Fabian 2007, MNRAS, 381, 1381 + """ + spectra = None + grouping = None + quality = None + + def __init__(self, spectra, grouping=None, quality=None, verbose=False): + """ + Arguments: + * spectra: a set of spectra from the inner-most to the outer-most + regions (e.g., spectra after correcting crosstalk effect) + * grouping: grouping specification for all the spectra + * quality: quality column for the spectra + """ + self.spectra = [] + for spec in spectra: + if not isinstance(spec, SpectrumSet): + raise ValueError("Not a 'SpectrumSet' object") + spec.read_xflt() + self.spectra.append(spec) + self.spectra = spectra + self.grouping = grouping + self.quality = quality + # sort spectra by `radius_outer' + self.spectra.sort(key=lambda x: x.radius_outer) + # set the inner radii + radii_inner = [0.0] + [ x.radius_outer for x in self.spectra[:-1] ] + for spec, rin in zip(self.spectra, radii_inner): + spec.set_radius_inner(rin) + if verbose: + print("Deprojection: loaded spectrum: radius: (%s, %s)" % \ + (spec.radius_inner, spec.radius_outer), + file=sys.stderr) + # check EXPOSURE validity (all spectra must have the same exposures) + exposures = [ spec.EXPOSURE for spec in self.spectra ] + assert np.allclose(exposures[:-1], exposures[1:]) + + def subtract_bkg(self, verbose=True): + for spec in self.spectra: + if not spec.bkg: + raise ValueError("Spectrum '%s' has NO background" % \ + spec.filename) + spec.subtract_bkg(inplace=True, verbose=verbose) + + def apply_grouping(self, verbose=False): + for spec in self.spectra: + spec.apply_grouping(grouping=self.grouping, quality=self.quality, + verbose=verbose) + + def estimate_errors(self, gehrels=True): + for spec in self.spectra: + spec.estimate_errors(gehrels=gehrels) + + def scale(self): + """ + Scale the spectral data according to the region angular size. + """ + for spec in self.spectra: + spec.scale() + + def do_deprojection(self, group_squeeze=True, + add_history=False, verbose=False): + # + # TODO/XXX: How to apply ARF correction here??? + # + num_spec = len(self.spectra) + tmp_spec_data = self.spectra[0].get_data(group_squeeze=group_squeeze) + spec_shape = tmp_spec_data.shape + spec_dtype = tmp_spec_data.dtype + spec_per_vol = [None] * num_spec + # + for shellnum in reversed(range(num_spec)): + if verbose: + print("DEPROJECTION: deprojecting shell %d ..." % shellnum, + file=sys.stderr) + spec = self.spectra[shellnum] + # calculate projected spectrum of outlying shells + proj_spec = np.zeros(spec_shape, spec_dtype) + for outer in range(shellnum+1, num_spec): + vol = self.projected_volume( + r1=self.spectra[outer].radius_inner, + r2=self.spectra[outer].radius_outer, + R1=spec.radius_inner, + R2=spec.radius_outer) + proj_spec += spec_per_vol[outer] * vol + # + this_spec = spec.get_data(group_squeeze=group_squeeze, copy=True) + deproj_spec = this_spec - proj_spec + # calculate the volume that this spectrum is from + this_vol = self.projected_volume( + r1=spec.radius_inner, r2=spec.radius_outer, + R1=spec.radius_inner, R2=spec.radius_outer) + # calculate the spectral data per unit volume + spec_per_vol[shellnum] = deproj_spec / this_vol + # set the spectral data to these deprojected values + self.set_spec_data(spec_per_vol, group_squeeze=group_squeeze) + # add history to header + if add_history: + self.add_history() + + def get_spec_data(self, group_squeeze=True, copy=True): + """ + Extract the spectral data of each spectrum after deprojection + performed. + """ + return [ spec.get_data(group_squeeze=group_squeeze, copy=copy) + for spec in self.spectra ] + + def set_spec_data(self, spec_data, group_squeeze=True): + """ + Set `spec_data' for each spectrum to the deprojected spectral data. + """ + assert len(spec_data) == len(self.spectra) + for spec, data in zip(self.spectra, spec_data): + spec.set_data(data, group_squeeze=group_squeeze) + + def add_stat_err(self, stat_err, group_squeeze=True): + """ + Add the "STAT_ERR" column to each spectrum. + """ + assert len(stat_err) == len(self.spectra) + for spec, err in zip(self.spectra, stat_err): + spec.add_stat_err(err, group_squeeze=group_squeeze) + + def add_history(self): + """ + Append a brief history about this tool to the header. + """ + history = "Deprojected by %s (v%s) @ %s" % ( + os.path.basename(sys.argv[0]), __version__, + datetime.utcnow().isoformat()) + for spec in self.spectra: + spec.header.add_history(history) + + def fix_header(self): + # fix header keywords + for spec in self.spectra: + spec.fix_header_keywords( + reset_kw=["RESPFILE", "ANCRFILE", "BACKFILE"]) + + def write(self, filenames=[], clobber=False): + """ + Write the deprojected spectra to output file. + """ + if filenames == []: + filenames = [ spec.outfile for spec in self.spectra ] + for spec, outfile in zip(self.spectra, filenames): + spec.write(filename=outfile, clobber=clobber) + + @staticmethod + def projected_volume(r1, r2, R1, R2): + """ + Calculate the projected volume of a spherical shell of radii r1 -> r2 + onto an annulus on the sky of radius R1 -> R2. + + This volume is the integral: + Int(R=R1,R2) Int(x=sqrt(r1^2-R^2),sqrt(r2^2-R^2)) 2*pi*R dx dR + = + Int(R=R1,R2) 2*pi*R * (sqrt(r2^2-R^2) - sqrt(r1^2-R^2)) dR + + Note that the above integral is only half the total volume + (i.e., front only). + """ + def sqrt_trunc(x): + if x > 0: + return np.sqrt(x) + else: + return 0.0 + # + p1 = sqrt_trunc(r1**2 - R2**2) + p2 = sqrt_trunc(r1**2 - R1**2) + p3 = sqrt_trunc(r2**2 - R2**2) + p4 = sqrt_trunc(r2**2 - R1**2) + return 2.0 * (2.0/3.0) * np.pi * ((p1**3 - p2**3) + (p4**3 - p3**3)) +# class Deprojection }}} + + +# Helper functions {{{ +def calc_median_errors(results): + """ + Calculate the median and errors for the spectral data gathered + through Monte Carlo simulations. + + TODO: investigate the errors calculation approach used here! + """ + results = np.array(results) + # `results' now has shape: (mc_times, num_spec, num_channel) + # sort by the Monte Carlo simulation axis + results.sort(0) + mc_times = results.shape[0] + medians = results[ int(mc_times * 0.5) ] + lowerpcs = results[ int(mc_times * 0.1585) ] + upperpcs = results[ int(mc_times * 0.8415) ] + errors = np.sqrt(0.5 * ((medians-lowerpcs)**2 + (upperpcs-medians)**2)) + return (medians, errors) + + +def set_argument(name, default, cmdargs, config): + value = default + if name in config.keys(): + value = config.as_bool(name) + value_cmd = vars(cmdargs)[name] + if value_cmd != default: + value = value_cmd # command arguments overwrite others + return value +# helper functions }}} + + +# main routine {{{ +def main(config, subtract_bkg, fix_negative, mc_times, + verbose=False, clobber=False): + # collect ARFs and RMFs into dictionaries (avoid interpolation every time) + arf_files = set() + rmf_files = set() + for region in config.sections: + config_reg = config[region] + arf_files.add(config_reg.get("arf")) + rmf_files.add(config_reg.get("rmf")) + for reg_in in config_reg["cross_in"].values(): + arf_files.add(reg_in.get("arf")) + arf_files.add(reg_in.get("cross_arf")) + if "cross_out" in config_reg.sections: + for arf in config_reg["cross_out"].as_list("cross_arf"): + arf_files.add(arf) + arf_files = arf_files - set([None]) + arf_dict = { arf: ARF(arf) for arf in arf_files } + rmf_files = rmf_files - set([None]) + rmf_dict = { rmf: RMF(rmf) for rmf in rmf_files } + if verbose: + print("INFO: arf_files:", arf_files, file=sys.stderr) + print("INFO: rmf_files:", rmf_files, file=sys.stderr) + + # get the GROUPING and QUALITY data + grouping_fits = fits.open(config["grouping"]) + grouping = grouping_fits["SPECTRUM"].data.columns["GROUPING"].array + quality = grouping_fits["SPECTRUM"].data.columns["QUALITY"].array + # squeeze the groupped spectral data, etc. + group_squeeze = True + + # crosstalk objects (BEFORE background subtraction) + crosstalks_cleancopy = [] + # crosstalk-corrected spectra + cc_spectra = [] + + # correct crosstalk effects for each region first + for region in config.sections: + if verbose: + print("INFO: processing '%s' ..." % region, file=sys.stderr) + crosstalk = Crosstalk(config.get(region), + arf_dict=arf_dict, rmf_dict=rmf_dict, + grouping=grouping, quality=quality) + crosstalk.apply_grouping(verbose=verbose) + crosstalk.estimate_errors(verbose=verbose) + # keep a (almost) clean copy of the crosstalk object + crosstalks_cleancopy.append(crosstalk.copy()) + if verbose: + print("INFO: doing crosstalk correction ...", file=sys.stderr) + crosstalk.do_correction(subtract_bkg=subtract_bkg, + fix_negative=fix_negative, group_squeeze=group_squeeze, + add_history=True, verbose=verbose) + cc_spectra.append(crosstalk.get_spectrum(copy=True)) + + # load back the crosstalk-corrected spectra for deprojection + if verbose: + print("INFO: preparing spectra for deprojection ...", file=sys.stderr) + deprojection = Deprojection(spectra=cc_spectra, grouping=grouping, + quality=quality, verbose=verbose) + if verbose: + print("INFO: scaling spectra according the region angular size...", + file=sys.stderr) + deprojection.scale() + if verbose: + print("INFO: doing deprojection ...", file=sys.stderr) + deprojection.do_deprojection(add_history=True, verbose=verbose) + deproj_results = [ deprojection.get_spec_data( + group_squeeze=group_squeeze, copy=True) ] + + # Monte Carlo for spectral group error estimation + print("INFO: Monte Carlo to estimate spectral errors (%d times) ..." % \ + mc_times, file=sys.stderr) + for i in range(mc_times): + if i % 100 == 0: + print("%d..." % i, end="", flush=True, file=sys.stderr) + # correct crosstalk effects + cc_spectra_copy = [] + for crosstalk in crosstalks_cleancopy: + # copy and randomize + crosstalk_copy = crosstalk.copy().randomize() + crosstalk_copy.do_correction(subtract_bkg=subtract_bkg, + fix_negative=fix_negative, group_squeeze=group_squeeze, + add_history=False, verbose=False) + cc_spectra_copy.append(crosstalk_copy.get_spectrum(copy=True)) + # deproject spectra + deprojection_copy = Deprojection(spectra=cc_spectra_copy, + grouping=grouping, quality=quality, verbose=False) + deprojection_copy.scale() + deprojection_copy.do_deprojection(add_history=False, verbose=False) + deproj_results.append(deprojection_copy.get_spec_data( + group_squeeze=group_squeeze, copy=True)) + print("DONE!", flush=True, file=sys.stderr) + + if verbose: + print("INFO: Calculating the median and errors for each spectrum ...", + file=sys.stderr) + medians, errors = calc_median_errors(deproj_results) + deprojection.set_spec_data(medians, group_squeeze=group_squeeze) + deprojection.add_stat_err(errors, group_squeeze=group_squeeze) + if verbose: + print("INFO: Writing the crosstalk-corrected and deprojected " + \ + "spectra with estimated statistical errors ...", file=sys.stderr) + deprojection.fix_header() + deprojection.write(clobber=clobber) +# main routine }}} + + +# main_deprojection routine {{{ +def main_deprojection(config, mc_times, verbose=False, clobber=False): + """ + Only perform the spectral deprojection. + """ + # collect ARFs and RMFs into dictionaries (avoid interpolation every time) + arf_files = set() + rmf_files = set() + for region in config.sections: + config_reg = config[region] + arf_files.add(config_reg.get("arf")) + rmf_files.add(config_reg.get("rmf")) + arf_files = arf_files - set([None]) + arf_dict = { arf: ARF(arf) for arf in arf_files } + rmf_files = rmf_files - set([None]) + rmf_dict = { rmf: RMF(rmf) for rmf in rmf_files } + if verbose: + print("INFO: arf_files:", arf_files, file=sys.stderr) + print("INFO: rmf_files:", rmf_files, file=sys.stderr) + + # get the GROUPING and QUALITY data + grouping_fits = fits.open(config["grouping"]) + grouping = grouping_fits["SPECTRUM"].data.columns["GROUPING"].array + quality = grouping_fits["SPECTRUM"].data.columns["QUALITY"].array + # squeeze the groupped spectral data, etc. + group_squeeze = True + + # load spectra for deprojection + if verbose: + print("INFO: preparing spectra for deprojection ...", file=sys.stderr) + proj_spectra = [] + for region in config.sections: + config_reg = config[region] + specset = SpectrumSet(filename=config_reg["spec"], + outfile=config_reg["outfile"], + arf=arf_dict.get(config_reg["arf"], config_reg["arf"]), + rmf=rmf_dict.get(config_reg["rmf"], config_reg["rmf"]), + bkg=config_reg["bkg"]) + proj_spectra.append(specset) + + deprojection = Deprojection(spectra=proj_spectra, grouping=grouping, + quality=quality, verbose=verbose) + deprojection.apply_grouping(verbose=verbose) + deprojection.estimate_errors() + if verbose: + print("INFO: scaling spectra according the region angular size ...", + file=sys.stderr) + deprojection.scale() + + # keep a (almost) clean copy of the input projected spectra + proj_spectra_cleancopy = [ spec.copy() for spec in proj_spectra ] + + if verbose: + print("INFO: subtract the background ...", file=sys.stderr) + deprojection.subtract_bkg(verbose=verbose) + if verbose: + print("INFO: doing deprojection ...", file=sys.stderr) + deprojection.do_deprojection(add_history=True, verbose=verbose) + deproj_results = [ deprojection.get_spec_data( + group_squeeze=group_squeeze, copy=True) ] + + # Monte Carlo for spectral group error estimation + print("INFO: Monte Carlo to estimate spectral errors (%d times) ..." % \ + mc_times, file=sys.stderr) + for i in range(mc_times): + if i % 100 == 0: + print("%d..." % i, end="", flush=True, file=sys.stderr) + # copy and randomize the input projected spectra + proj_spectra_copy = [ spec.copy().randomize() + for spec in proj_spectra_cleancopy ] + # deproject spectra + deprojection_copy = Deprojection(spectra=proj_spectra_copy, + grouping=grouping, quality=quality, verbose=False) + deprojection_copy.subtract_bkg(verbose=False) + deprojection_copy.do_deprojection(add_history=False, verbose=False) + deproj_results.append(deprojection_copy.get_spec_data( + group_squeeze=group_squeeze, copy=True)) + print("DONE!", flush=True, file=sys.stderr) + + if verbose: + print("INFO: Calculating the median and errors for each spectrum ...", + file=sys.stderr) + medians, errors = calc_median_errors(deproj_results) + deprojection.set_spec_data(medians, group_squeeze=group_squeeze) + deprojection.add_stat_err(errors, group_squeeze=group_squeeze) + if verbose: + print("INFO: Writing the deprojected spectra " + \ + "with estimated statistical errors ...", file=sys.stderr) + deprojection.fix_header() + deprojection.write(clobber=clobber) +# main_deprojection routine }}} + + +# main_crosstalk routine {{{ +def main_crosstalk(config, subtract_bkg, fix_negative, mc_times, + verbose=False, clobber=False): + """ + Only perform the crosstalk correction. + """ + # collect ARFs and RMFs into dictionaries (avoid interpolation every time) + arf_files = set() + rmf_files = set() + for region in config.sections: + config_reg = config[region] + arf_files.add(config_reg.get("arf")) + rmf_files.add(config_reg.get("rmf")) + for reg_in in config_reg["cross_in"].values(): + arf_files.add(reg_in.get("arf")) + arf_files.add(reg_in.get("cross_arf")) + if "cross_out" in config_reg.sections: + for arf in config_reg["cross_out"].as_list("cross_arf"): + arf_files.add(arf) + arf_files = arf_files - set([None]) + arf_dict = { arf: ARF(arf) for arf in arf_files } + rmf_files = rmf_files - set([None]) + rmf_dict = { rmf: RMF(rmf) for rmf in rmf_files } + if verbose: + print("INFO: arf_files:", arf_files, file=sys.stderr) + print("INFO: rmf_files:", rmf_files, file=sys.stderr) + + # get the GROUPING and QUALITY data + if "grouping" in config.keys(): + grouping_fits = fits.open(config["grouping"]) + grouping = grouping_fits["SPECTRUM"].data.columns["GROUPING"].array + quality = grouping_fits["SPECTRUM"].data.columns["QUALITY"].array + group_squeeze = True + else: + grouping = None + quality = None + group_squeeze = False + + # crosstalk objects (BEFORE background subtraction) + crosstalks_cleancopy = [] + # crosstalk-corrected spectra + cc_spectra = [] + + # correct crosstalk effects for each region first + for region in config.sections: + if verbose: + print("INFO: processing '%s' ..." % region, file=sys.stderr) + crosstalk = Crosstalk(config.get(region), + arf_dict=arf_dict, rmf_dict=rmf_dict, + grouping=grouping, quality=quality) + if grouping is not None: + crosstalk.apply_grouping(verbose=verbose) + crosstalk.estimate_errors(verbose=verbose) + # keep a (almost) clean copy of the crosstalk object + crosstalks_cleancopy.append(crosstalk.copy()) + if verbose: + print("INFO: doing crosstalk correction ...", file=sys.stderr) + crosstalk.do_correction(subtract_bkg=subtract_bkg, + fix_negative=fix_negative, group_squeeze=group_squeeze, + add_history=True, verbose=verbose) + crosstalk.fix_header() + cc_spectra.append(crosstalk.get_spectrum(copy=True)) + + # spectral data of the crosstalk-corrected spectra + cc_results = [] + cc_results.append([ spec.get_data(group_squeeze=group_squeeze, copy=True) + for spec in cc_spectra ]) + + # Monte Carlo for spectral group error estimation + print("INFO: Monte Carlo to estimate spectral errors (%d times) ..." % \ + mc_times, file=sys.stderr) + for i in range(mc_times): + if i % 100 == 0: + print("%d..." % i, end="", flush=True, file=sys.stderr) + # correct crosstalk effects + cc_spectra_copy = [] + for crosstalk in crosstalks_cleancopy: + # copy and randomize + crosstalk_copy = crosstalk.copy().randomize() + crosstalk_copy.do_correction(subtract_bkg=subtract_bkg, + fix_negative=fix_negative, group_squeeze=group_squeeze, + add_history=False, verbose=False) + cc_spectra_copy.append(crosstalk_copy.get_spectrum(copy=True)) + cc_results.append([ spec.get_data(group_squeeze=group_squeeze, + copy=True) + for spec in cc_spectra_copy ]) + print("DONE!", flush=True, file=sys.stderr) + + if verbose: + print("INFO: Calculating the median and errors for each spectrum ...", + file=sys.stderr) + medians, errors = calc_median_errors(cc_results) + if verbose: + print("INFO: Writing the crosstalk-corrected spectra " + \ + "with estimated statistical errors ...", + file=sys.stderr) + for spec, data, err in zip(cc_spectra, medians, errors): + spec.set_data(data, group_squeeze=group_squeeze) + spec.add_stat_err(err, group_squeeze=group_squeeze) + spec.write(clobber=clobber) +# main_crosstalk routine }}} + + +if __name__ == "__main__": + # arguments' default values + default_mode = "both" + default_mc_times = 5000 + # commandline arguments parser + parser = argparse.ArgumentParser( + description="Correct the crosstalk effects for XMM EPIC spectra", + epilog="Version: %s (%s)" % (__version__, __date__)) + parser.add_argument("config", help="config file in which describes " +\ + "the crosstalk relations ('ConfigObj' syntax)") + parser.add_argument("-m", "--mode", dest="mode", default=default_mode, + help="operation mode (both | crosstalk | deprojection)") + parser.add_argument("-B", "--no-subtract-bkg", dest="subtract_bkg", + action="store_false", help="do NOT subtract background first") + parser.add_argument("-N", "--fix-negative", dest="fix_negative", + action="store_true", help="fix negative channel values") + parser.add_argument("-M", "--mc-times", dest="mc_times", + type=int, default=default_mc_times, + help="Monte Carlo times for error estimation") + parser.add_argument("-C", "--clobber", dest="clobber", + action="store_true", help="overwrite output file if exists") + parser.add_argument("-v", "--verbose", dest="verbose", + action="store_true", help="show verbose information") + args = parser.parse_args() + # merge commandline arguments and config + config = ConfigObj(args.config) + subtract_bkg = set_argument("subtract_bkg", True, args, config) + fix_negative = set_argument("fix_negative", False, args, config) + verbose = set_argument("verbose", False, args, config) + clobber = set_argument("clobber", False, args, config) + # operation mode + mode = config.get("mode", default_mode) + if args.mode != default_mode: + mode = args.mode + # Monte Carlo times + mc_times = config.as_int("mc_times") + if args.mc_times != default_mc_times: + mc_times = args.mc_times + + if mode.lower() == "both": + print("MODE: CROSSTALK + DEPROJECTION", file=sys.stderr) + main(config, subtract_bkg=subtract_bkg, fix_negative=fix_negative, + mc_times=mc_times, verbose=verbose, clobber=clobber) + elif mode.lower() == "deprojection": + print("MODE: DEPROJECTION", file=sys.stderr) + main_deprojection(config, mc_times=mc_times, + verbose=verbose, clobber=clobber) + elif mode.lower() == "crosstalk": + print("MODE: CROSSTALK", file=sys.stderr) + main_crosstalk(config, subtract_bkg=subtract_bkg, + fix_negative=fix_negative, mc_times=mc_times, + verbose=verbose, clobber=clobber) + else: + raise ValueError("Invalid operation mode: %s" % mode) + print(WARNING) + +# vim: set ts=4 sw=4 tw=0 fenc=utf-8 ft=python: # diff --git a/python/fit_sbp.py b/python/fit_sbp.py new file mode 100755 index 0000000..c22e0c8 --- /dev/null +++ b/python/fit_sbp.py @@ -0,0 +1,807 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Aaron LI +# Created: 2016-03-13 +# Updated: 2016-04-26 +# +# Changelogs: +# 2016-04-26: +# * Reorder some methods of classes 'FitModelSBeta' and 'FitModelDBeta' +# * Change the output file extension from ".txt" to ".json" +# 2016-04-21: +# * Plot another X axis with unit "r500", with R500 values marked +# * Adjust output image size/resolution +# 2016-04-20: +# * Support "pix" and "kpc" units +# * Allow ignore data w.r.t R500 value +# * Major changes to the config syntax +# * Add commandline argument to select the sbp model +# 2016-04-05: +# * Allow fix parameters +# 2016-03-31: +# * Remove `ci_report()' +# * Add `make_results()' to orgnize all results as s Python dictionary +# * Report results as json string +# 2016-03-28: +# * Add `main()', `make_model()' +# * Use `configobj' to handle configurations +# * Save fit results and plot +# * Add `ci_report()' +# 2016-03-14: +# * Refactor classes `FitModelSBeta' and `FitModelDBeta' +# * Add matplotlib plot support +# * Add `ignore_data()' and `notice_data()' support +# * Add classes `FitModelSBetaNorm' and `FitModelDBetaNorm' +# +# TODO: +# * to allow fit the outer beta component, then fix it, and fit the inner one +# * to integrate basic information of config file to the output json +# * to output the ignored radius range in the same unit as input sbp data +# + +""" +Fit the surface brightness profile (SBP) with the single-beta model: + s(r) = s0 * [1.0 + (r/rc)^2] ^ (0.5-3*beta) + bkg +or the double-beta model: + s(r) = s01 * [1.0 + (r/rc1)^2] ^ (0.5-3*beta1) + + s02 * [1.0 + (r/rc2)^2] ^ (0.5-3*beta2) + bkg + + +Sample config file: +------------------------------------------------- +name = <NAME> +obsid = <OBSID> +r500_pix = <R500_PIX> +r500_kpc = <R500_KPC> + +sbpfile = sbprofile.txt +# unit of radius: pix (default) or kpc +unit = pixel + +# sbp model: "sbeta" or "dbeta" +model = sbeta +#model = dbeta + +# output file to store the fitting results +outfile = sbpfit.json +# output file to save the fitting plot +imgfile = sbpfit.png + +# data range to be ignored during fitting (same unit as the above "unit") +#ignore = 0.0-20.0, +# specify the ignore range w.r.t R500 ("r500_pix" or "r500_kpc" required) +#ignore_r500 = 0.0-0.15, + +[sbeta] +# model-related options (OVERRIDE the upper level options) +outfile = sbpfit_sbeta.json +imgfile = sbpfit_sbeta.png +#ignore = 0.0-20.0, +#ignore_r500 = 0.0-0.15, + [[params]] + # model parameters + # name = initial, lower, upper, variable (FIXED/False to fix the parameter) + s0 = 1.0e-8, 0.0, 1.0e-6 + rc = 30.0, 5.0, 1.0e4 + #rc = 30.0, 5.0, 1.0e4, FIXED + beta = 0.7, 0.3, 1.1 + bkg = 1.0e-10, 0.0, 1.0e-8 + + +[dbeta] +outfile = sbpfit_dbeta.json +imgfile = sbpfit_dbeta.png +#ignore = 0.0-20.0, +#ignore_r500 = 0.0-0.15, + [[params]] + s01 = 1.0e-8, 0.0, 1.0e-6 + rc1 = 50.0, 10.0, 1.0e4 + beta1 = 0.7, 0.3, 1.1 + s02 = 1.0e-8, 0.0, 1.0e-6 + rc2 = 30.0, 2.0, 5.0e2 + beta2 = 0.7, 0.3, 1.1 + bkg = 1.0e-10, 0.0, 1.0e-8 +------------------------------------------------- +""" + +__version__ = "0.6.2" +__date__ = "2016-04-26" + + +import os +import sys +import re +import argparse +import json +from collections import OrderedDict + +import numpy as np +import lmfit +import matplotlib.pyplot as plt +from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas +from matplotlib.figure import Figure +from configobj import ConfigObj + + +plt.style.use("ggplot") + + +class FitModel: + """ + Meta-class of the fitting model. + + The supplied `func' should have the following syntax: + y = f(x, params) + where the `params' is `lmfit.Parameters' instance which contains all + the model parameters to be fitted, and should be provided as well. + """ + def __init__(self, name=None, func=None, params=lmfit.Parameters()): + self.name = name + self.func = func + self.params = params + + def f(self, x): + return self.func(x, self.params) + + def get_param(self, name=None): + """ + Return the requested `Parameter' object or the whole + `Parameters' object of no name supplied. + """ + try: + return self.params[name] + except KeyError: + return self.params + + def set_param(self, name, *args, **kwargs): + """ + Set the properties of the specified parameter. + """ + param = self.params[name] + param.set(*args, **kwargs) + + def plot(self, params, xdata, ax): + """ + Plot the fitted model. + """ + f_fitted = lambda x: self.func(x, params) + ydata = f_fitted(xdata) + ax.plot(xdata, ydata, 'k-') + +class FitModelSBeta(FitModel): + """ + The single-beta model to be fitted. + Single-beta model, with a constant background. + """ + params = lmfit.Parameters() + params.add_many( # (name, value, vary, min, max, expr) + ("s0", 1.0e-8, True, 0.0, 1.0e-6, None), + ("rc", 30.0, True, 1.0, 1.0e4, None), + ("beta", 0.7, True, 0.3, 1.1, None), + ("bkg", 1.0e-9, True, 0.0, 1.0e-7, None)) + + def __init__(self): + super(self.__class__, self).__init__(name="Single-beta", + func=self.sbeta, params=self.params) + + @staticmethod + def sbeta(r, params): + parvals = params.valuesdict() + s0 = parvals["s0"] + rc = parvals["rc"] + beta = parvals["beta"] + bkg = parvals["bkg"] + return s0 * np.power((1 + (r/rc)**2), (0.5 - 3*beta)) + bkg + + def plot(self, params, xdata, ax): + """ + Plot the fitted model, as well as the fitted parameters. + """ + super(self.__class__, self).plot(params, xdata, ax) + ydata = self.sbeta(xdata, params) + # fitted paramters + ax.vlines(x=params["rc"].value, ymin=min(ydata), ymax=max(ydata), + linestyles="dashed") + ax.hlines(y=params["bkg"].value, xmin=min(xdata), xmax=max(xdata), + linestyles="dashed") + ax.text(x=params["rc"].value, y=min(ydata), + s="beta: %.2f\nrc: %.2f" % (params["beta"].value, + params["rc"].value)) + ax.text(x=min(xdata), y=min(ydata), + s="bkg: %.3e" % params["bkg"].value, + verticalalignment="top") + + +class FitModelDBeta(FitModel): + """ + The double-beta model to be fitted. + Double-beta model, with a constant background. + + NOTE: + the first beta component (s01, rc1, beta1) describes the main and + outer SBP; while the second beta component (s02, rc2, beta2) accounts + for the central brightness excess. + """ + params = lmfit.Parameters() + params.add("s01", value=1.0e-8, min=0.0, max=1.0e-6) + params.add("rc1", value=50.0, min=10.0, max=1.0e4) + params.add("beta1", value=0.7, min=0.3, max=1.1) + #params.add("df_s0", value=1.0e-8, min=0.0, max=1.0e-6) + #params.add("s02", expr="s01 + df_s0") + params.add("s02", value=1.0e-8, min=0.0, max=1.0e-6) + #params.add("df_rc", value=30.0, min=0.0, max=1.0e4) + #params.add("rc2", expr="rc1 - df_rc") + params.add("rc2", value=20.0, min=1.0, max=5.0e2) + params.add("beta2", value=0.7, min=0.3, max=1.1) + params.add("bkg", value=1.0e-9, min=0.0, max=1.0e-7) + + def __init__(self): + super(self.__class__, self).__init__(name="Double-beta", + func=self.dbeta, params=self.params) + + @classmethod + def dbeta(self, r, params): + return self.beta1(r, params) + self.beta2(r, params) + + @staticmethod + def beta1(r, params): + """ + This beta component describes the main/outer part of the SBP. + """ + parvals = params.valuesdict() + s01 = parvals["s01"] + rc1 = parvals["rc1"] + beta1 = parvals["beta1"] + bkg = parvals["bkg"] + return s01 * np.power((1 + (r/rc1)**2), (0.5 - 3*beta1)) + bkg + + @staticmethod + def beta2(r, params): + """ + This beta component describes the central/excess part of the SBP. + """ + parvals = params.valuesdict() + s02 = parvals["s02"] + rc2 = parvals["rc2"] + beta2 = parvals["beta2"] + return s02 * np.power((1 + (r/rc2)**2), (0.5 - 3*beta2)) + + def plot(self, params, xdata, ax): + """ + Plot the fitted model, and each beta component, + as well as the fitted parameters. + """ + super(self.__class__, self).plot(params, xdata, ax) + beta1_ydata = self.beta1(xdata, params) + beta2_ydata = self.beta2(xdata, params) + ax.plot(xdata, beta1_ydata, 'b-.') + ax.plot(xdata, beta2_ydata, 'b-.') + # fitted paramters + ydata = beta1_ydata + beta2_ydata + ax.vlines(x=params["rc1"].value, ymin=min(ydata), ymax=max(ydata), + linestyles="dashed") + ax.vlines(x=params["rc2"].value, ymin=min(ydata), ymax=max(ydata), + linestyles="dashed") + ax.hlines(y=params["bkg"].value, xmin=min(xdata), xmax=max(xdata), + linestyles="dashed") + ax.text(x=params["rc1"].value, y=min(ydata), + s="beta1: %.2f\nrc1: %.2f" % (params["beta1"].value, + params["rc1"].value)) + ax.text(x=params["rc2"].value, y=min(ydata), + s="beta2: %.2f\nrc2: %.2f" % (params["beta2"].value, + params["rc2"].value)) + ax.text(x=min(xdata), y=min(ydata), + s="bkg: %.3e" % params["bkg"].value, + verticalalignment="top") + + +class FitModelSBetaNorm(FitModel): + """ + The single-beta model to be fitted. + Single-beta model, with a constant background. + Normalized the `s0' and `bkg' parameters by take the logarithm. + """ + params = lmfit.Parameters() + params.add_many( # (name, value, vary, min, max, expr) + ("log10_s0", -8.0, True, -12.0, -6.0, None), + ("rc", 30.0, True, 1.0, 1.0e4, None), + ("beta", 0.7, True, 0.3, 1.1, None), + ("log10_bkg", -9.0, True, -12.0, -7.0, None)) + + @staticmethod + def sbeta(r, params): + parvals = params.valuesdict() + s0 = 10 ** parvals["log10_s0"] + rc = parvals["rc"] + beta = parvals["beta"] + bkg = 10 ** parvals["log10_bkg"] + return s0 * np.power((1 + (r/rc)**2), (0.5 - 3*beta)) + bkg + + def __init__(self): + super(self.__class__, self).__init__(name="Single-beta", + func=self.sbeta, params=self.params) + + def plot(self, params, xdata, ax): + """ + Plot the fitted model, as well as the fitted parameters. + """ + super(self.__class__, self).plot(params, xdata, ax) + ydata = self.sbeta(xdata, params) + # fitted paramters + ax.vlines(x=params["rc"].value, ymin=min(ydata), ymax=max(ydata), + linestyles="dashed") + ax.hlines(y=(10 ** params["bkg"].value), xmin=min(xdata), + xmax=max(xdata), linestyles="dashed") + ax.text(x=params["rc"].value, y=min(ydata), + s="beta: %.2f\nrc: %.2f" % (params["beta"].value, + params["rc"].value)) + ax.text(x=min(xdata), y=min(ydata), + s="bkg: %.3e" % (10 ** params["bkg"].value), + verticalalignment="top") + + +class FitModelDBetaNorm(FitModel): + """ + The double-beta model to be fitted. + Double-beta model, with a constant background. + Normalized the `s01', `s02' and `bkg' parameters by take the logarithm. + + NOTE: + the first beta component (s01, rc1, beta1) describes the main and + outer SBP; while the second beta component (s02, rc2, beta2) accounts + for the central brightness excess. + """ + params = lmfit.Parameters() + params.add("log10_s01", value=-8.0, min=-12.0, max=-6.0) + params.add("rc1", value=50.0, min=10.0, max=1.0e4) + params.add("beta1", value=0.7, min=0.3, max=1.1) + #params.add("df_s0", value=1.0e-8, min=0.0, max=1.0e-6) + #params.add("s02", expr="s01 + df_s0") + params.add("log10_s02", value=-8.0, min=-12.0, max=-6.0) + #params.add("df_rc", value=30.0, min=0.0, max=1.0e4) + #params.add("rc2", expr="rc1 - df_rc") + params.add("rc2", value=20.0, min=1.0, max=5.0e2) + params.add("beta2", value=0.7, min=0.3, max=1.1) + params.add("log10_bkg", value=-9.0, min=-12.0, max=-7.0) + + @staticmethod + def beta1(r, params): + """ + This beta component describes the main/outer part of the SBP. + """ + parvals = params.valuesdict() + s01 = 10 ** parvals["log10_s01"] + rc1 = parvals["rc1"] + beta1 = parvals["beta1"] + bkg = 10 ** parvals["log10_bkg"] + return s01 * np.power((1 + (r/rc1)**2), (0.5 - 3*beta1)) + bkg + + @staticmethod + def beta2(r, params): + """ + This beta component describes the central/excess part of the SBP. + """ + parvals = params.valuesdict() + s02 = 10 ** parvals["log10_s02"] + rc2 = parvals["rc2"] + beta2 = parvals["beta2"] + return s02 * np.power((1 + (r/rc2)**2), (0.5 - 3*beta2)) + + @classmethod + def dbeta(self, r, params): + return self.beta1(r, params) + self.beta2(r, params) + + def __init__(self): + super(self.__class__, self).__init__(name="Double-beta", + func=self.dbeta, params=self.params) + + def plot(self, params, xdata, ax): + """ + Plot the fitted model, and each beta component, + as well as the fitted parameters. + """ + super(self.__class__, self).plot(params, xdata, ax) + beta1_ydata = self.beta1(xdata, params) + beta2_ydata = self.beta2(xdata, params) + ax.plot(xdata, beta1_ydata, 'b-.') + ax.plot(xdata, beta2_ydata, 'b-.') + # fitted paramters + ydata = beta1_ydata + beta2_ydata + ax.vlines(x=params["log10_rc1"].value, ymin=min(ydata), ymax=max(ydata), + linestyles="dashed") + ax.vlines(x=params["rc2"].value, ymin=min(ydata), ymax=max(ydata), + linestyles="dashed") + ax.hlines(y=(10 ** params["bkg"].value), xmin=min(xdata), + xmax=max(xdata), linestyles="dashed") + ax.text(x=params["rc1"].value, y=min(ydata), + s="beta1: %.2f\nrc1: %.2f" % (params["beta1"].value, + params["rc1"].value)) + ax.text(x=params["rc2"].value, y=min(ydata), + s="beta2: %.2f\nrc2: %.2f" % (params["beta2"].value, + params["rc2"].value)) + ax.text(x=min(xdata), y=min(ydata), + s="bkg: %.3e" % (10 ** params["bkg"].value), + verticalalignment="top") + + +class SbpFit: + """ + Class to handle the SBP fitting with single-/double-beta model. + """ + def __init__(self, model, method="lbfgsb", + xdata=None, ydata=None, xerr=None, yerr=None, xunit="pix", + name=None, obsid=None, r500_pix=None, r500_kpc=None): + self.method = method + self.model = model + self.load_data(xdata=xdata, ydata=ydata, xerr=xerr, yerr=yerr, + xunit=xunit) + self.set_source(name=name, obsid=obsid, r500_pix=r500_pix, + r500_kpc=r500_kpc) + + def set_source(self, name, obsid=None, r500_pix=None, r500_kpc=None): + self.name = name + try: + self.obsid = int(obsid) + except TypeError: + self.obsid = None + try: + self.r500_pix = float(r500_pix) + except TypeError: + self.r500_pix = None + try: + self.r500_kpc = float(r500_kpc) + except TypeError: + self.r500_kpc = None + try: + self.kpc_per_pix = self.r500_kpc / self.r500_pix + except (TypeError, ZeroDivisionError): + self.kpc_per_pix = -1 + + def load_data(self, xdata, ydata, xerr, yerr, xunit="pix"): + self.xdata = xdata + self.ydata = ydata + self.xerr = xerr + self.yerr = yerr + if xdata is not None: + self.mask = np.ones(xdata.shape, dtype=np.bool) + else: + self.mask = None + if xunit.lower() in ["pix", "pixel"]: + self.xunit = "pix" + elif xunit.lower() == "kpc": + self.xunit = "kpc" + else: + raise ValueError("invalid xunit: %s" % xunit) + + def ignore_data(self, xmin=None, xmax=None, unit=None): + """ + Ignore the data points within range [xmin, xmax]. + If xmin is None, then xmin=min(xdata); + if xmax is None, then xmax=max(xdata). + + if unit is None, then assume the same unit as `self.xunit'. + """ + if unit is None: + unit = self.xunit + if xmin is not None: + xmin = self.convert_unit(xmin, unit=unit) + else: + xmin = np.min(self.xdata) + if xmax is not None: + xmax = self.convert_unit(xmax, unit=unit) + else: + xmax = np.max(self.xdata) + ignore_idx = np.logical_and(self.xdata >= xmin, self.xdata <= xmax) + self.mask[ignore_idx] = False + # reset `f_residual' + self.f_residual = None + + def notice_data(self, xmin=None, xmax=None, unit=None): + """ + Notice the data points within range [xmin, xmax]. + If xmin is None, then xmin=min(xdata); + if xmax is None, then xmax=max(xdata). + + if unit is None, then assume the same unit as `self.xunit'. + """ + if unit is None: + unit = self.xunit + if xmin is not None: + xmin = self.convert_unit(xmin, unit=unit) + else: + xmin = np.min(self.xdata) + if xmax is not None: + xmax = self.convert_unit(xmax, unit=unit) + else: + xmax = np.max(self.xdata) + notice_idx = np.logical_and(self.xdata >= xmin, self.xdata <= xmax) + self.mask[notice_idx] = True + # reset `f_residual' + self.f_residual = None + + def convert_unit(self, x, unit): + """ + Convert the value x in given unit to be the unit `self.xunit' + """ + if unit == self.xunit: + return x + elif (unit == "pix") and (self.xunit == "kpc"): + return (x / self.r500_pix * self.r500_kpc) + elif (unit == "kpc") and (self.xunit == "pix"): + return (x / self.r500_kpc * self.r500_pix) + elif (unit == "r500") and (self.xunit == "pix"): + return (x * self.r500_pix) + elif (unit == "r500") and (self.xunit == "kpc"): + return (x * self.r500_kpc) + else: + raise ValueError("invalid units: %s vs. %s" % (unit, self.xunit)) + + def convert_to_r500(self, x, unit=None): + """ + Convert the value x in given unit to be in unit "r500". + """ + if unit is None: + unit = self.xunit + if unit == "r500": + return x + elif unit == "pix": + return (x / self.r500_pix) + elif unit == "kpc": + return (x / self.r500_kpc) + else: + raise ValueError("invalid unit: %s" % unit) + + def set_residual(self): + def f_residual(params): + if self.yerr is None: + return self.model.func(self.xdata[self.mask], params) - \ + self.ydata + else: + return (self.model.func(self.xdata[self.mask], params) - \ + self.ydata[self.mask]) / self.yerr[self.mask] + self.f_residual = f_residual + + def fit(self, method=None): + if method is None: + method = self.method + if not hasattr(self, "f_residual") or self.f_residual is None: + self.set_residual() + self.fitter = lmfit.Minimizer(self.f_residual, self.model.params) + self.fitted = self.fitter.minimize(method=method) + self.fitted_model = lambda x: self.model.func(x, self.fitted.params) + + def calc_ci(self, sigmas=[0.68, 0.90]): + # `conf_interval' requires the fitted results have valid `stderr', + # so we need to re-fit the model with method `leastsq'. + fitted = self.fitter.minimize(method="leastsq", + params=self.fitted.params) + self.ci, self.trace = lmfit.conf_interval(self.fitter, fitted, + sigmas=sigmas, trace=True) + + def make_results(self): + """ + Make the `self.results' dictionary which contains all the fitting + results as well as the confidence intervals. + """ + fitted = self.fitted + self.results = OrderedDict() + ## fitting results + self.results.update( + nfev = fitted.nfev, + ndata = fitted.ndata, + nvarys = fitted.nvarys, # number of varible paramters + nfree = fitted.nfree, # degree of freem + chisqr = fitted.chisqr, + redchi = fitted.redchi, + aic = fitted.aic, + bic = fitted.bic) + params = fitted.params + pnames = list(params.keys()) + pvalues = OrderedDict() + for pn in pnames: + par = params.get(pn) + pvalues[pn] = [par.value, par.min, par.max, par.vary] + self.results["params"] = pvalues + ## confidence intervals + if hasattr(self, "ci") and self.ci is not None: + ci = self.ci + ci_values = OrderedDict() + ci_sigmas = [ "ci%02d" % (v[0]*100) for v in ci.get(pnames[0]) ] + ci_names = sorted(list(set(ci_sigmas))) + ci_idx = { k: [] for k in ci_names } + for cn, idx in zip(ci_sigmas, range(len(ci_sigmas))): + ci_idx[cn].append(idx) + # parameters ci + for pn in pnames: + ci_pv = OrderedDict() + pv = [ v[1] for v in ci.get(pn) ] + # best + pv_best = pv[ ci_idx["ci00"][0] ] + ci_pv["best"] = pv_best + # ci of each sigma + pv2 = [ v-pv_best for v in pv ] + for cn in ci_names[1:]: + ci_pv[cn] = [ pv2[idx] for idx in ci_idx[cn] ] + ci_values[pn] = ci_pv + self.results["ci"] = ci_values + + def report(self, outfile=sys.stdout): + if not hasattr(self, "results") or self.results is None: + self.make_results() + jd = json.dumps(self.results, indent=2) + print(jd, file=outfile) + + def plot(self, ax=None, fig=None, r500_axis=True): + """ + Arguments: + * r500_axis: whether to add a second X axis in unit "r500" + """ + if ax is None: + fig, ax = plt.subplots(1, 1) + # noticed data points + eb = ax.errorbar(self.xdata[self.mask], self.ydata[self.mask], + xerr=self.xerr[self.mask], yerr=self.yerr[self.mask], + fmt="none") + # ignored data points + ignore_mask = np.logical_not(self.mask) + if np.sum(ignore_mask) > 0: + eb = ax.errorbar(self.xdata[ignore_mask], self.ydata[ignore_mask], + xerr=self.xerr[ignore_mask], yerr=self.yerr[ignore_mask], + fmt="none") + eb[-1][0].set_linestyle("-.") + # fitted model + xmax = self.xdata[-1] + self.xerr[-1] + xpred = np.power(10, np.linspace(0, np.log10(xmax), 2*len(self.xdata))) + ypred = self.fitted_model(xpred) + ymin = min(min(self.ydata), min(ypred)) + ymax = max(max(self.ydata), max(ypred)) + self.model.plot(params=self.fitted.params, xdata=xpred, ax=ax) + ax.set_xscale("log") + ax.set_yscale("log") + ax.set_xlim(1.0, xmax) + ax.set_ylim(ymin/1.2, ymax*1.2) + name = self.name + if self.obsid is not None: + name += "; %s" % self.obsid + ax.set_title("Fitted Surface Brightness Profile (%s)" % name) + ax.set_xlabel("Radius (%s)" % self.xunit) + ax.set_ylabel(r"Surface Brightness (photons/cm$^2$/pixel$^2$/s)") + ax.text(x=xmax, y=ymax, + s="redchi: %.2f / %.2f = %.2f" % (self.fitted.chisqr, + self.fitted.nfree, self.fitted.chisqr/self.fitted.nfree), + horizontalalignment="right", verticalalignment="top") + plot_ret = [fig, ax] + if r500_axis: + # Add a second X-axis with labels in unit "r500" + # Credit: https://stackoverflow.com/a/28192477/4856091 + try: + ax.title.set_position([0.5, 1.1]) # raise title position + ax2 = ax.twiny() + # NOTE: the ORDER of the following lines MATTERS + ax2.set_xscale(ax.get_xscale()) + ax2_ticks = ax.get_xticks() + ax2.set_xticks(ax2_ticks) + ax2.set_xbound(ax.get_xbound()) + ax2.set_xticklabels([ "%.2g" % self.convert_to_r500(x) + for x in ax2_ticks ]) + ax2.set_xlabel("Radius (r500; r500 = %s pix = %s kpc)" % (\ + self.r500_pix, self.r500_kpc)) + ax2.grid(False) + plot_ret.append(ax2) + except ValueError: + # cannot convert X values to unit "r500" + pass + # automatically adjust layout + fig.tight_layout() + return plot_ret + + +def make_model(config, modelname): + """ + Make the model with parameters set according to the config. + """ + if modelname == "sbeta": + # single-beta model + model = FitModelSBeta() + elif modelname == "dbeta": + # double-beta model + model = FitModelDBeta() + else: + raise ValueError("Invalid model: %s" % modelname) + # set initial values and bounds for the model parameters + params = config[modelname]["params"] + for p, value in params.items(): + variable = True + if len(value) == 4 and value[3].upper() in ["FIXED", "FALSE"]: + variable = False + model.set_param(name=p, value=float(value[0]), + min=float(value[1]), max=float(value[2]), vary=variable) + return model + + +def main(): + # parser for command line options and arguments + parser = argparse.ArgumentParser( + description="Fit surface brightness profile with " + \ + "single-/double-beta model", + epilog="Version: %s (%s)" % (__version__, __date__)) + parser.add_argument("-V", "--version", action="version", + version="%(prog)s " + "%s (%s)" % (__version__, __date__)) + parser.add_argument("config", help="Config file for SBP fitting") + # exclusive argument group for model selection + grp_model = parser.add_mutually_exclusive_group(required=False) + grp_model.add_argument("-s", "--sbeta", dest="sbeta", + action="store_true", help="single-beta model for SBP") + grp_model.add_argument("-d", "--dbeta", dest="dbeta", + action="store_true", help="double-beta model for SBP") + # + args = parser.parse_args() + + config = ConfigObj(args.config) + + # determine the model name + if args.sbeta: + modelname = "sbeta" + elif args.dbeta: + modelname = "dbeta" + else: + modelname = config["model"] + + config_model = config[modelname] + # determine the "outfile" and "imgfile" + outfile = config.get("outfile") + outfile = config_model.get("outfile", outfile) + imgfile = config.get("imgfile") + imgfile = config_model.get("imgfile", imgfile) + + # SBP fitting model + model = make_model(config, modelname=modelname) + + # sbp data and fit object + sbpdata = np.loadtxt(config["sbpfile"]) + sbpfit = SbpFit(model=model, xdata=sbpdata[:, 0], xerr=sbpdata[:, 1], + ydata=sbpdata[:, 2], yerr=sbpdata[:, 3], + xunit=config.get("unit", "pix")) + sbpfit.set_source(name=config["name"], obsid=config.get("obsid"), + r500_pix=config.get("r500_pix"), r500_kpc=config.get("r500_kpc")) + + # apply data range ignorance + if "ignore" in config.keys(): + for ig in config.as_list("ignore"): + xmin, xmax = map(float, ig.split("-")) + sbpfit.ignore_data(xmin=xmin, xmax=xmax) + if "ignore_r500" in config.keys(): + for ig in config.as_list("ignore_r500"): + xmin, xmax = map(float, ig.split("-")) + sbpfit.ignore_data(xmin=xmin, xmax=xmax, unit="r500") + + # apply additional data range ignorance specified within model section + if "ignore" in config_model.keys(): + for ig in config_model.as_list("ignore"): + xmin, xmax = map(float, ig.split("-")) + sbpfit.ignore_data(xmin=xmin, xmax=xmax) + if "ignore_r500" in config_model.keys(): + for ig in config_model.as_list("ignore_r500"): + xmin, xmax = map(float, ig.split("-")) + sbpfit.ignore_data(xmin=xmin, xmax=xmax, unit="r500") + + # fit and calculate confidence intervals + sbpfit.fit() + sbpfit.calc_ci() + sbpfit.report() + with open(outfile, "w") as ofile: + sbpfit.report(outfile=ofile) + + # make and save a plot + fig = Figure(figsize=(10, 8)) + canvas = FigureCanvas(fig) + ax = fig.add_subplot(111) + sbpfit.plot(ax=ax, fig=fig, r500_axis=True) + fig.savefig(imgfile, dpi=150) + + +if __name__ == "__main__": + main() + +# vim: set ts=4 sw=4 tw=0 fenc=utf-8 ft=python: # diff --git a/python/imapUTF7.py b/python/imapUTF7.py new file mode 100644 index 0000000..2e4db0a --- /dev/null +++ b/python/imapUTF7.py @@ -0,0 +1,189 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# This code was originally in PloneMailList, a GPL'd software. +# http://svn.plone.org/svn/collective/mxmImapClient/trunk/imapUTF7.py +# http://bugs.python.org/issue5305 +# +# Port to Python 3.x +# Credit: https://github.com/MarechJ/py3_imap_utf7 +# +# 2016-01-23 +# Aaron LI +# + +""" +Imap folder names are encoded using a special version of utf-7 as defined in RFC +2060 section 5.1.3. + +5.1.3. Mailbox International Naming Convention + + By convention, international mailbox names are specified using a + modified version of the UTF-7 encoding described in [UTF-7]. The + purpose of these modifications is to correct the following problems + with UTF-7: + + 1) UTF-7 uses the "+" character for shifting; this conflicts with + the common use of "+" in mailbox names, in particular USENET + newsgroup names. + + 2) UTF-7's encoding is BASE64 which uses the "/" character; this + conflicts with the use of "/" as a popular hierarchy delimiter. + + 3) UTF-7 prohibits the unencoded usage of "\"; this conflicts with + the use of "\" as a popular hierarchy delimiter. + + 4) UTF-7 prohibits the unencoded usage of "~"; this conflicts with + the use of "~" in some servers as a home directory indicator. + + 5) UTF-7 permits multiple alternate forms to represent the same + string; in particular, printable US-ASCII chararacters can be + represented in encoded form. + + In modified UTF-7, printable US-ASCII characters except for "&" + represent themselves; that is, characters with octet values 0x20-0x25 + and 0x27-0x7e. The character "&" (0x26) is represented by the two- + octet sequence "&-". + + All other characters (octet values 0x00-0x1f, 0x7f-0xff, and all + Unicode 16-bit octets) are represented in modified BASE64, with a + further modification from [UTF-7] that "," is used instead of "/". + Modified BASE64 MUST NOT be used to represent any printing US-ASCII + character which can represent itself. + + "&" is used to shift to modified BASE64 and "-" to shift back to US- + ASCII. All names start in US-ASCII, and MUST end in US-ASCII (that + is, a name that ends with a Unicode 16-bit octet MUST end with a "- + "). + + For example, here is a mailbox name which mixes English, Japanese, + and Chinese text: ~peter/mail/&ZeVnLIqe-/&U,BTFw- +""" + + +import binascii +import codecs + + +## encoding + +def modified_base64(s:str): + s = s.encode('utf-16be') # UTF-16, big-endian byte order + return binascii.b2a_base64(s).rstrip(b'\n=').replace(b'/', b',') + +def doB64(_in, r): + if _in: + r.append(b'&' + modified_base64(''.join(_in)) + b'-') + del _in[:] + +def encoder(s:str): + r = [] + _in = [] + for c in s: + ordC = ord(c) + if 0x20 <= ordC <= 0x25 or 0x27 <= ordC <= 0x7e: + doB64(_in, r) + r.append(c.encode()) + elif c == '&': + doB64(_in, r) + r.append(b'&-') + else: + _in.append(c) + doB64(_in, r) + return (b''.join(r), len(s)) + + +## decoding + +def modified_unbase64(s:bytes): + b = binascii.a2b_base64(s.replace(b',', b'/') + b'===') + return b.decode('utf-16be') + +def decoder(s:bytes): + r = [] + decode = bytearray() + for c in s: + if c == ord('&') and not decode: + decode.append(ord('&')) + elif c == ord('-') and decode: + if len(decode) == 1: + r.append('&') + else: + r.append(modified_unbase64(decode[1:])) + decode = bytearray() + elif decode: + decode.append(c) + else: + r.append(chr(c)) + if decode: + r.append(modified_unbase64(decode[1:])) + bin_str = ''.join(r) + return (bin_str, len(s)) + + +class StreamReader(codecs.StreamReader): + def decode(self, s, errors='strict'): + return decoder(s) + + +class StreamWriter(codecs.StreamWriter): + def decode(self, s, errors='strict'): + return encoder(s) + + +def imap4_utf_7(name): + if name == 'imap4-utf-7': + return (encoder, decoder, StreamReader, StreamWriter) + + +codecs.register(imap4_utf_7) + + +## testing methods + +def imapUTF7Encode(ust): + "Returns imap utf-7 encoded version of string" + return ust.encode('imap4-utf-7') + +def imapUTF7EncodeSequence(seq): + "Returns imap utf-7 encoded version of strings in sequence" + return [imapUTF7Encode(itm) for itm in seq] + + +def imapUTF7Decode(st): + "Returns utf7 encoded version of imap utf-7 string" + return st.decode('imap4-utf-7') + +def imapUTF7DecodeSequence(seq): + "Returns utf7 encoded version of imap utf-7 strings in sequence" + return [imapUTF7Decode(itm) for itm in seq] + + +def utf8Decode(st): + "Returns utf7 encoded version of imap utf-7 string" + return st.decode('utf-8') + + +def utf7SequenceToUTF8(seq): + "Returns utf7 encoded version of imap utf-7 strings in sequence" + return [itm.decode('imap4-utf-7').encode('utf-8') for itm in seq] + + +__all__ = [ 'imapUTF7Encode', 'imapUTF7Decode' ] + + +if __name__ == '__main__': + testdata = [ + (u'foo\r\n\nbar\n', b'foo&AA0ACgAK-bar&AAo-'), + (u'测试', b'&bUuL1Q-'), + (u'Hello 世界', b'Hello &ThZ1TA-') + ] + for s, e in testdata: + #assert s == decoder(encoder(s)[0])[0] + assert s == imapUTF7Decode(e) + assert e == imapUTF7Encode(s) + assert s == imapUTF7Decode(imapUTF7Encode(s)) + assert e == imapUTF7Encode(imapUTF7Decode(e)) + print("All tests passed!") + +# vim: set ts=4 sw=4 tw=0 fenc=utf-8 ft=python: # diff --git a/python/msvst_starlet.py b/python/msvst_starlet.py new file mode 100755 index 0000000..e534d3d --- /dev/null +++ b/python/msvst_starlet.py @@ -0,0 +1,646 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# References: +# [1] Jean-Luc Starck, Fionn Murtagh & Jalal M. Fadili +# Sparse Image and Signal Processing: Wavelets, Curvelets, Morphological Diversity +# Section 3.5, 6.6 +# +# Credits: +# [1] https://github.com/abrazhe/image-funcut/blob/master/imfun/atrous.py +# +# Aaron LI +# Created: 2016-03-17 +# Updated: 2016-04-22 +# +# ChangeLog: +# 2016-04-22: +# * Add argument "end-scale" to specifiy the end denoising scale +# * Check outfile existence first +# * Add argument "start-scale" to specifiy the start denoising scale +# * Fix a bug about "p_cutoff" when "comp" contains ALL False's +# * Show more verbose information/details +# 2016-04-20: +# * Add argparse and main() for scripting +# + +""" +Starlet wavelet transform, i.e., isotropic undecimated wavelet transform +(IUWT), or à trous wavelet transform. +And multi-scale variance stabling transform (MS-VST), which can be used +to effectively remove the Poisson noises. +""" + +__version__ = "0.2.5" +__date__ = "2016-04-22" + + +import sys +import os +import argparse +from datetime import datetime + +import numpy as np +import scipy as sp +from scipy import signal +from astropy.io import fits + + +class B3Spline: # {{{ + """ + B3-spline wavelet. + """ + # scaling function (phi) + dec_lo = np.array([1.0, 4.0, 6.0, 4.0, 1.0]) / 16 + dec_hi = np.array([-1.0, -4.0, 10.0, -4.0, -1.0]) / 16 + rec_lo = np.array([0.0, 0.0, 1.0, 0.0, 0.0]) + rec_hi = np.array([0.0, 0.0, 1.0, 0.0, 0.0]) +# B3Spline }}} + + +class IUWT: # {{{ + """ + Isotropic undecimated wavelet transform. + """ + ## Decomposition filters list: + # a_{scale} = convole(a_0, filters[scale]) + # Note: the zero-th scale filter (i.e., delta function) is the first + # element, thus the array index is the same as the decomposition scale. + filters = [] + + phi = None # wavelet scaling function (2D) + level = 0 # number of transform level + decomposition = None # decomposed coefficients/images + reconstruction = None # reconstructed image + + # convolution boundary condition + boundary = "symm" + + def __init__(self, phi=B3Spline.dec_lo, level=None, boundary="symm", + data=None): + self.set_wavelet(phi=phi) + self.level = level + self.boundary = boundary + self.data = np.array(data) + + def reset(self): + """ + Reset the object attributes. + """ + self.data = None + self.phi = None + self.decomposition = None + self.reconstruction = None + self.level = 0 + self.filters = [] + self.boundary = "symm" + + def load_data(self, data): + self.reset() + self.data = np.array(data) + + def set_wavelet(self, phi): + self.reset() + phi = np.array(phi) + if phi.ndim == 1: + phi_ = phi.reshape(1, -1) + self.phi = np.dot(phi_.T, phi_) + elif phi.ndim == 2: + self.phi = phi + else: + raise ValueError("Invalid phi dimension") + + def calc_filters(self): + """ + Calculate the convolution filters of each scale. + Note: the zero-th scale filter (i.e., delta function) is the first + element, thus the array index is the same as the decomposition scale. + """ + self.filters = [] + # scale 0: delta function + h = np.array([[1]]) # NOTE: 2D + self.filters.append(h) + # scale 1 + h = self.phi[::-1, ::-1] + self.filters.append(h) + for scale in range(2, self.level+1): + h_up = self.zupsample(self.phi, order=scale-1) + h2 = signal.convolve2d(h_up[::-1, ::-1], h, mode="same", + boundary=self.boundary) + self.filters.append(h2) + + def transform(self, data, scale, boundary="symm"): + """ + Perform only one scale wavelet transform for the given data. + + return: + [ approx, detail ] + """ + self.decomposition = [] + approx = signal.convolve2d(data, self.filters[scale], + mode="same", boundary=self.boundary) + detail = data - approx + return [approx, detail] + + def decompose(self, level, boundary="symm"): + """ + Perform IUWT decomposition in the plain loop way. + The filters of each scale/level are calculated first, then the + approximations of each scale/level are calculated by convolving the + raw/finest image with these filters. + + return: + [ W_1, W_2, ..., W_n, A_n ] + n = level + W: wavelet details + A: approximation + """ + self.boundary = boundary + if self.level != level or self.filters == []: + self.level = level + self.calc_filters() + self.decomposition = [] + approx = self.data + for scale in range(1, level+1): + # approximation: + approx2 = signal.convolve2d(self.data, self.filters[scale], + mode="same", boundary=self.boundary) + # wavelet details: + w = approx - approx2 + self.decomposition.append(w) + if scale == level: + self.decomposition.append(approx2) + approx = approx2 + return self.decomposition + + def decompose_recursive(self, level, boundary="symm"): + """ + Perform the IUWT decomposition in the recursive way. + + return: + [ W_1, W_2, ..., W_n, A_n ] + n = level + W: wavelet details + A: approximation + """ + self.level = level + self.boundary = boundary + self.decomposition = self.__decompose(self.data, self.phi, level=level) + return self.decomposition + + def __decompose(self, data, phi, level): + """ + 2D IUWT decomposition (or stationary wavelet transform). + + This is a convolution version, where kernel is zero-upsampled + explicitly. Not fast. + + Parameters: + - level : level of decomposition + - phi : low-pass filter kernel + - boundary : boundary conditions (passed to scipy.signal.convolve2d, + 'symm' by default) + + Returns: + list of wavelet details + last approximation. Each element in + the list is an image of the same size as the input image. + """ + if level <= 0: + return data + shapecheck = map(lambda a,b:a>b, data.shape, phi.shape) + assert np.all(shapecheck) + # approximation: + approx = signal.convolve2d(data, phi[::-1, ::-1], mode="same", + boundary=self.boundary) + # wavelet details: + w = data - approx + phi_up = self.zupsample(phi, order=1) + shapecheck = map(lambda a,b:a>b, data.shape, phi_up.shape) + if level == 1: + return [w, approx] + elif not np.all(shapecheck): + print("Maximum allowed decomposition level reached", + file=sys.stderr) + return [w, approx] + else: + return [w] + self.__decompose(approx, phi_up, level-1) + + @staticmethod + def zupsample(data, order=1): + """ + Upsample data array by interleaving it with zero's. + + h{up_order: n}[l] = (1) h[l], if l % 2^n == 0; + (2) 0, otherwise + """ + shape = data.shape + new_shape = [ (2**order * (n-1) + 1) for n in shape ] + output = np.zeros(new_shape, dtype=data.dtype) + output[[ slice(None, None, 2**order) for d in shape ]] = data + return output + + def reconstruct(self, decomposition=None): + if decomposition is not None: + reconstruction = np.sum(decomposition, axis=0) + return reconstruction + else: + self.reconstruction = np.sum(self.decomposition, axis=0) + + def get_detail(self, scale): + """ + Get the wavelet detail coefficients of given scale. + Note: 1 <= scale <= level + """ + if scale < 1 or scale > self.level: + raise ValueError("Invalid scale") + return self.decomposition[scale-1] + + def get_approx(self): + """ + Get the approximation coefficients of the largest scale. + """ + return self.decomposition[-1] +# IUWT }}} + + +class IUWT_VST(IUWT): # {{{ + """ + IUWT with Multi-scale variance stabling transform. + + Refernce: + [1] Bo Zhang, Jalal M. Fadili & Jean-Luc Starck, + IEEE Trans. Image Processing, 17, 17, 2008 + """ + # VST coefficients and the corresponding asymptotic standard deviation + # of each scale. + vst_coef = [] + + def reset(self): + super(self.__class__, self).reset() + vst_coef = [] + + def __decompose(self): + raise AttributeError("No '__decompose' attribute") + + @staticmethod + def soft_threshold(data, threshold): + if isinstance(data, np.ndarray): + data_th = data.copy() + data_th[np.abs(data) <= threshold] = 0.0 + data_th[data > threshold] -= threshold + data_th[data < -threshold] += threshold + else: + data_th = data + if np.abs(data) <= threshold: + data_th = 0.0 + elif data > threshold: + data_th -= threshold + else: + data_th += threshold + return data_th + + def tau(self, k, scale): + """ + Helper function used in VST coefficients calculation. + """ + return np.sum(np.power(self.filters[scale], k)) + + def filters_product(self, scale1, scale2): + """ + Calculate the scalar product of the filters of two scales, + considering only the overlapped part. + Helper function used in VST coefficients calculation. + """ + if scale1 > scale2: + filter_big = self.filters[scale1] + filter_small = self.filters[scale2] + else: + filter_big = self.filters[scale2] + filter_small = self.filters[scale1] + # crop the big filter to match the size of the small filter + size_big = filter_big.shape + size_small = filter_small.shape + size_diff2 = list(map(lambda a,b: (a-b)//2, size_big, size_small)) + filter_big_crop = filter_big[ + size_diff2[0]:(size_big[0]-size_diff2[0]), + size_diff2[1]:(size_big[1]-size_diff2[1])] + assert(np.all(list(map(lambda a,b: a==b, + size_small, filter_big_crop.shape)))) + product = np.sum(filter_small * filter_big_crop) + return product + + def calc_vst_coef(self): + """ + Calculate the VST coefficients and the corresponding + asymptotic standard deviation of each scale, according to the + calculated filters of each scale/level. + """ + self.vst_coef = [] + for scale in range(self.level+1): + b = 2 * np.sqrt(np.abs(self.tau(1, scale)) / self.tau(2, scale)) + c = 7.0*self.tau(2, scale) / (8.0*self.tau(1, scale)) - \ + self.tau(3, scale) / (2.0*self.tau(2, scale)) + if scale == 0: + std = -1.0 + else: + std = np.sqrt((self.tau(2, scale-1) / \ + (4 * self.tau(1, scale-1)**2)) + \ + (self.tau(2, scale) / (4 * self.tau(1, scale)**2)) - \ + (self.filters_product(scale-1, scale) / \ + (2 * self.tau(1, scale-1) * self.tau(1, scale)))) + self.vst_coef.append({ "b": b, "c": c, "std": std }) + + def vst(self, data, scale, coupled=True): + """ + Perform variance stabling transform + + XXX: parameter `coupled' why?? + Credit: MSVST-V1.0/src/libmsvst/B3VSTAtrous.h + """ + self.vst_coupled = coupled + if self.vst_coef == []: + self.calc_vst_coef() + if coupled: + b = 1.0 + else: + b = self.vst_coef[scale]["b"] + data_vst = b * np.sqrt(np.abs(data + self.vst_coef[scale]["c"])) + return data_vst + + def ivst(self, data, scale, cbias=True): + """ + Inverse variance stabling transform + NOTE: assuming that `a_{j} + c^{j}' are all positive. + + XXX: parameter `cbias' why?? + `bias correction' is recommended while reconstruct the data + after estimation + Credit: MSVST-V1.0/src/libmsvst/B3VSTAtrous.h + """ + self.vst_cbias = cbias + if cbias: + cb = 1.0 / (self.vst_coef[scale]["b"] ** 2) + else: + cb = 0.0 + data_ivst = data ** 2 + cb - self.vst_coef[scale]["c"] + return data_ivst + + def is_significant(self, scale, fdr=0.1, independent=False, verbose=False): + """ + Multiple hypothesis testing with false discovery rate (FDR) control. + + `independent': whether the test statistics of all the null + hypotheses are independent. + If `independent=True': FDR <= (m0/m) * q + otherwise: FDR <= (m0/m) * q * (1 + 1/2 + 1/3 + ... + 1/m) + + References: + [1] False discovery rate - Wikipedia + https://en.wikipedia.org/wiki/False_discovery_rate + """ + coef = self.get_detail(scale) + std = self.vst_coef[scale]["std"] + pvalues = 2.0 * (1.0 - sp.stats.norm.cdf(np.abs(coef) / std)) + p_sorted = pvalues.flatten() + p_sorted.sort() + N = len(p_sorted) + if independent: + cn = 1.0 + else: + cn = np.sum(1.0 / np.arange(1, N+1)) + p_comp = fdr * np.arange(N) / (N * cn) + comp = (p_sorted < p_comp) + if np.sum(comp) == 0: + # `comp' contains ALL False + p_cutoff = 0.0 + else: + # cutoff p-value after FDR control/correction + p_cutoff = np.max(p_sorted[comp]) + sig = (pvalues <= p_cutoff) + if verbose: + print("std/sigma: %g, p_cutoff: %g" % (std, p_cutoff), + flush=True, file=sys.stderr) + return (sig, p_cutoff) + + def denoise(self, fdr=0.1, fdr_independent=False, start_scale=1, + end_scale=None, verbose=False): + """ + Denoise the wavelet coefficients by controlling FDR. + """ + self.fdr = fdr + self.fdr_indepent = fdr_independent + self.denoised = [] + # supports of significant coefficients of each scale + self.sig_supports = [None] # make index match the scale + self.p_cutoff = [None] + if verbose: + print("MSVST denosing ...", flush=True, file=sys.stderr) + for scale in range(1, self.level+1): + coef = self.get_detail(scale) + if verbose: + print("\tScale %d: " % scale, end="", + flush=True, file=sys.stderr) + if (scale < start_scale) or \ + ((end_scale is not None) and scale > end_scale): + if verbose: + print("skipped", flush=True, file=sys.stderr) + sig, p_cutoff = None, None + else: + sig, p_cutoff = self.is_significant(scale, fdr=fdr, + independent=fdr_independent, verbose=verbose) + coef[np.logical_not(sig)] = 0.0 + # + self.denoised.append(coef) + self.sig_supports.append(sig) + self.p_cutoff.append(p_cutoff) + # append the last approximation + self.denoised.append(self.get_approx()) + + def decompose(self, level=5, boundary="symm", verbose=False): + """ + 2D IUWT decomposition with VST. + """ + self.boundary = boundary + if self.level != level or self.filters == []: + self.level = level + self.calc_filters() + self.calc_vst_coef() + self.decomposition = [] + approx = self.data + if verbose: + print("IUWT decomposing (%d levels): " % level, + end="", flush=True, file=sys.stderr) + for scale in range(1, level+1): + if verbose: + print("%d..." % scale, end="", flush=True, file=sys.stderr) + # approximation: + approx2 = signal.convolve2d(self.data, self.filters[scale], + mode="same", boundary=self.boundary) + # wavelet details: + w = self.vst(approx, scale=scale-1) - self.vst(approx2, scale=scale) + self.decomposition.append(w) + if scale == level: + self.decomposition.append(approx2) + approx = approx2 + if verbose: + print("DONE!", flush=True, file=sys.stderr) + return self.decomposition + + def reconstruct_ivst(self, denoised=True, positive_project=True): + """ + Reconstruct the original image from the *un-denoised* decomposition + by applying the inverse VST. + + This reconstruction result is also used as the `initial condition' + for the below `iterative reconstruction' algorithm. + + arguments: + * denoised: whether use th denoised data or the direct decomposition + * positive_project: whether replace negative values with zeros + """ + if denoised: + decomposition = self.denoised + else: + decomposition = self.decomposition + self.positive_project = positive_project + details = np.sum(decomposition[:-1], axis=0) + approx = self.vst(decomposition[-1], scale=self.level) + reconstruction = self.ivst(approx+details, scale=0) + if positive_project: + reconstruction[reconstruction < 0.0] = 0.0 + self.reconstruction = reconstruction + return reconstruction + + def reconstruct(self, denoised=True, niter=10, verbose=False): + """ + Reconstruct the original image using iterative method with + L1 regularization, because the denoising violates the exact inverse + procedure. + + arguments: + * denoised: whether use the denoised coefficients + * niter: number of iterations + """ + if denoised: + decomposition = self.denoised + else: + decomposition = self.decomposition + # L1 regularization + lbd = 1.0 + delta = lbd / (niter - 1) + # initial solution + solution = self.reconstruct_ivst(denoised=denoised, + positive_project=True) + # + iuwt = IUWT(level=self.level) + iuwt.calc_filters() + # iterative reconstruction + if verbose: + print("Iteratively reconstructing (%d times): " % niter, + end="", flush=True, file=sys.stderr) + for i in range(niter): + if verbose: + print("%d..." % i, end="", flush=True, file=sys.stderr) + tempd = self.data.copy() + solution_decomp = [] + for scale in range(1, self.level+1): + approx, detail = iuwt.transform(tempd, scale) + approx_sol, detail_sol = iuwt.transform(solution, scale) + # Update coefficients according to the significant supports, + # which are acquired during the denosing precodure with FDR. + sig = self.sig_supports[scale] + detail_sol[sig] = detail[sig] + detail_sol = self.soft_threshold(detail_sol, threshold=lbd) + # + solution_decomp.append(detail_sol) + tempd = approx.copy() + solution = approx_sol.copy() + # last approximation (the two are the same) + solution_decomp.append(approx) + # reconstruct + solution = iuwt.reconstruct(decomposition=solution_decomp) + # discard all negative values + solution[solution < 0] = 0.0 + # + lbd -= delta + if verbose: + print("DONE!", flush=True, file=sys.stderr) + # + self.reconstruction = solution + return self.reconstruction +# IUWT_VST }}} + + +def main(): + # commandline arguments parser + parser = argparse.ArgumentParser( + description="Poisson Noise Removal with Multi-scale Variance " + \ + "Stabling Transform and Wavelet Transform", + epilog="Version: %s (%s)" % (__version__, __date__)) + parser.add_argument("-l", "--level", dest="level", + type=int, default=5, + help="level of the IUWT decomposition") + parser.add_argument("-r", "--fdr", dest="fdr", + type=float, default=0.1, + help="false discovery rate") + parser.add_argument("-I", "--fdr-independent", dest="fdr_independent", + action="store_true", default=False, + help="whether the FDR null hypotheses are independent") + parser.add_argument("-s", "--start-scale", dest="start_scale", + type=int, default=1, + help="which scale to start the denoising (inclusive)") + parser.add_argument("-e", "--end-scale", dest="end_scale", + type=int, default=0, + help="which scale to end the denoising (inclusive)") + parser.add_argument("-n", "--niter", dest="niter", + type=int, default=10, + help="number of iterations for reconstruction") + parser.add_argument("-v", "--verbose", dest="verbose", + action="store_true", default=False, + help="show verbose progress") + parser.add_argument("-C", "--clobber", dest="clobber", + action="store_true", default=False, + help="overwrite output file if exists") + parser.add_argument("infile", help="input image with Poisson noises") + parser.add_argument("outfile", help="output denoised image") + args = parser.parse_args() + + if args.end_scale == 0: + args.end_scale = args.level + + if args.verbose: + print("infile: '%s'" % args.infile, file=sys.stderr) + print("outfile: '%s'" % args.outfile, file=sys.stderr) + print("level: %d" % args.level, file=sys.stderr) + print("fdr: %.2f" % args.fdr, file=sys.stderr) + print("fdr_independent: %s" % args.fdr_independent, file=sys.stderr) + print("start_scale: %d" % args.start_scale, file=sys.stderr) + print("end_scale: %d" % args.end_scale, file=sys.stderr) + print("niter: %d\n" % args.niter, flush=True, file=sys.stderr) + + if not args.clobber and os.path.exists(args.outfile): + raise OSError("outfile '%s' already exists" % args.outfile) + + imgfits = fits.open(args.infile) + img = imgfits[0].data + # Remove Poisson noises + msvst = IUWT_VST(data=img) + msvst.decompose(level=args.level, verbose=args.verbose) + msvst.denoise(fdr=args.fdr, fdr_independent=args.fdr_independent, + start_scale=args.start_scale, end_scale=args.end_scale, + verbose=args.verbose) + msvst.reconstruct(denoised=True, niter=args.niter, verbose=args.verbose) + img_denoised = msvst.reconstruction + # Output + imgfits[0].data = img_denoised + imgfits[0].header.add_history("%s: Removed Poisson Noises @ %s" % ( + os.path.basename(sys.argv[0]), datetime.utcnow().isoformat())) + imgfits[0].header.add_history(" TOOL: %s (v%s, %s)" % ( + os.path.basename(sys.argv[0]), __version__, __date__)) + imgfits[0].header.add_history(" PARAM: %s" % " ".join(sys.argv[1:])) + imgfits.writeto(args.outfile, checksum=True, clobber=args.clobber) + + +if __name__ == "__main__": + main() + diff --git a/python/plot.py b/python/plot.py new file mode 100644 index 0000000..b65f8a3 --- /dev/null +++ b/python/plot.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- +# +# Credits: http://www.aosabook.org/en/matplotlib.html +# +# Aaron LI +# 2016-03-14 +# + +# Import the FigureCanvas from the backend of your choice +# and attach the Figure artist to it. +from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas +from matplotlib.figure import Figure +fig = Figure() +canvas = FigureCanvas(fig) + +# Import the numpy library to generate the random numbers. +import numpy as np +x = np.random.randn(10000) + +# Now use a figure method to create an Axes artist; the Axes artist is +# added automatically to the figure container fig.axes. +# Here "111" is from the MATLAB convention: create a grid with 1 row and 1 +# column, and use the first cell in that grid for the location of the new +# Axes. +ax = fig.add_subplot(111) + +# Call the Axes method hist to generate the histogram; hist creates a +# sequence of Rectangle artists for each histogram bar and adds them +# to the Axes container. Here "100" means create 100 bins. +ax.hist(x, 100) + +# Decorate the figure with a title and save it. +ax.set_title('Normal distribution with $\mu=0, \sigma=1$') +fig.savefig('matplotlib_histogram.png') + diff --git a/python/plot_tprofiles_zzh.py b/python/plot_tprofiles_zzh.py new file mode 100644 index 0000000..e5824e9 --- /dev/null +++ b/python/plot_tprofiles_zzh.py @@ -0,0 +1,126 @@ +# -*- coding: utf-8 -*- +# +# Weitian LI +# 2015-09-11 +# + +""" +Plot a list of *temperature profiles* in a grid of subplots with Matplotlib. +""" + +import matplotlib.pyplot as plt + + +def plot_tprofiles(tplist, nrows, ncols, + xlim=None, ylim=None, logx=False, logy=False, + xlab="", ylab="", title=""): + """ + Plot a list of *temperature profiles* in a grid of subplots of size + nrow x ncol. Each subplot is related to a temperature profile. + All the subplots share the same X and Y axes. + The order is by row. + + The tplist is a list of dictionaries, each of which contains all the + necessary data to make the subplot. + + The dictionary consists of the following components: + tpdat = { + "name": "NAME", + "radius": [[radius points], [radius errors]], + "temperature": [[temperature points], [temperature errors]], + "radius_model": [radus points of the fitted model], + "temperature_model": [ + [fitted model value], + [lower bounds given by the model], + [upper bounds given by the model] + ] + } + + Arguments: + tplist - a list of dictionaries containing the data of each + temperature profile. + Note that the length of this list should equal to nrows*ncols. + nrows - number of rows of the subplots + ncols - number of columns of the subplots + xlim - limits of the X axis + ylim - limits of the Y axis + logx - whether to set the log scale for X axis + logy - whether to set the log scale for Y axis + xlab - label for the X axis + ylab - label for the Y axis + title - title for the whole plot + """ + assert len(tplist) == nrows*ncols, "tplist length != nrows*ncols" + # All subplots share both X and Y axes. + fig, axarr = plt.subplots(nrows, ncols, sharex=True, sharey=True) + # Set title for the whole plot. + if title != "": + fig.suptitle(title) + # Set xlab and ylab for each subplot + if xlab != "": + for ax in axarr[-1, :]: + ax.set_xlabel(xlab) + if ylab != "": + for ax in axarr[:, 0]: + ax.set_ylabel(ylab) + for ax in axarr.reshape(-1): + # Set xlim and ylim. + if xlim is not None: + ax.set_xlim(xlim) + if ylim is not None: + ax.set_ylim(ylim) + # Set xscale and yscale. + if logx: + ax.set_xscale("log", nonposx="clip") + if logy: + ax.set_yscale("log", nonposy="clip") + # Decrease the spacing between the subplots and suptitle + fig.subplots_adjust(top=0.94) + # Eleminate the spaces between each row and column. + fig.subplots_adjust(hspace=0, wspace=0) + # Hide X ticks for all subplots but the bottom row. + plt.setp([ax.get_xticklabels() for ax in axarr[:-1, :].reshape(-1)], + visible=False) + # Hide Y ticks for all subplots but the left column. + plt.setp([ax.get_yticklabels() for ax in axarr[:, 1:].reshape(-1)], + visible=False) + # Plot each temperature profile in the tplist + for i, ax in zip(range(len(tplist)), axarr.reshape(-1)): + tpdat = tplist[i] + # Add text to display the name. + # The text is placed at (0.95, 0.95), i.e., the top-right corner, + # with respect to this subplot, and the top-right part of the text + # is aligned to the above position. + ax_pois = ax.get_position() + ax.text(0.95, 0.95, tpdat["name"], + verticalalignment="top", horizontalalignment="right", + transform=ax.transAxes, color="black", fontsize=10) + # Plot data points + if isinstance(tpdat["radius"][0], list) and \ + len(tpdat["radius"]) == 2 and \ + isinstance(tpdat["temperature"][0], list) and \ + len(tpdat["temperature"]) == 2: + # Data points have symmetric errorbar + ax.errorbar(tpdat["radius"][0], tpdat["temperature"][0], + xerr=tpdat["radius"][1], yerr=tpdat["temperature"][1], + color="black", linewidth=1.5, linestyle="None") + else: + ax.plot(tpdat["radius"], tpdat["temperature"], + color="black", linewidth=1.5, linestyle="None") + # Plot model line and bounds band + if isinstance(tpdat["temperature_model"][0], list) and \ + len(tpdat["temperature_model"]) == 3: + # Model data have bounds + ax.plot(tpdat["radius_model"], tpdat["temperature_model"][0], + color="blue", linewidth=1.0) + # Plot model bounds band + ax.fill_between(tpdat["radius_model"], + y1=tpdat["temperature_model"][1], + y2=tpdat["temperature_model"][2], + color="gray", alpha=0.5) + else: + ax.plot(tpdat["radius_model"], tpdat["temperature_model"], + color="blue", linewidth=1.5) + return (fig, axarr) + +# vim: set ts=4 sw=4 tw=0 fenc=utf-8 ft=python: # diff --git a/python/randomize_events.py b/python/randomize_events.py new file mode 100755 index 0000000..e1a6e31 --- /dev/null +++ b/python/randomize_events.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 +# +# Randomize the (X,Y) position of each X-ray photon events according +# to a Gaussian distribution of given sigma. +# +# References: +# [1] G. Scheellenberger, T.H. Reiprich, L. Lovisari, J. Nevalainen & L. David +# 2015, A&A, 575, A30 +# +# +# Aaron LI +# Created: 2016-03-24 +# Updated: 2016-03-24 +# + +from astropy.io import fits +import numpy as np + +import os +import sys +import datetime +import argparse + + +CHANDRA_ARCSEC_PER_PIXEL = 0.492 + +def randomize_events(infile, outfile, sigma, clobber=False): + """ + Randomize the position (X,Y) of each X-ray event according to a + specified size/sigma Gaussian distribution. + """ + sigma_pix = sigma / CHANDRA_ARCSEC_PER_PIXEL + evt_fits = fits.open(infile) + evt_table = evt_fits[1].data + # (X,Y) physical coordinate + evt_x = evt_table["x"] + evt_y = evt_table["y"] + rand_x = np.random.normal(scale=sigma_pix, size=evt_x.shape)\ + .astype(evt_x.dtype) + rand_y = np.random.normal(scale=sigma_pix, size=evt_y.shape)\ + .astype(evt_y.dtype) + evt_x += rand_x + evt_y += rand_y + # Add history to FITS header + evt_hdr = evt_fits[1].header + evt_hdr.add_history("TOOL: %s @ %s" % ( + os.path.basename(sys.argv[0]), + datetime.datetime.utcnow().isoformat())) + evt_hdr.add_history("COMMAND: %s" % " ".join(sys.argv)) + evt_fits.writeto(outfile, clobber=clobber, checksum=True) + + +def main(): + parser = argparse.ArgumentParser( + description="Randomize the (X,Y) of each X-ray event") + parser.add_argument("infile", help="input event file") + parser.add_argument("outfile", help="output randomized event file") + parser.add_argument("-s", "--sigma", dest="sigma", + required=True, type=float, + help="sigma/size of the Gaussian distribution used" + \ + "to randomize the position of events (unit: arcsec)") + parser.add_argument("-C", "--clobber", dest="clobber", + action="store_true", help="overwrite output file if exists") + args = parser.parse_args() + + randomize_events(args.infile, args.outfile, + sigma=args.sigma, clobber=args.clobber) + + +if __name__ == "__main__": + main() + diff --git a/python/rebuild_ipod_db.py b/python/rebuild_ipod_db.py new file mode 100755 index 0000000..20c5454 --- /dev/null +++ b/python/rebuild_ipod_db.py @@ -0,0 +1,595 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# LICENSE: +# --------------------------------------------------------------------------- +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# --------------------------------------------------------------------------- +# +# Based on Matrin Fiedler's "rebuild_db.py" v1.0-rc1 (2006-04-26): +# http://shuffle-db.sourceforge.net/ +# + +from __future__ import print_function + + +__title__ = "iPod Shuffle Database Builder" +__author__ = "Aaron LI" +__version__ = "2.0.2" +__date__ = "2016-04-16" + + +import sys +import os +import operator +import array +import random +import fnmatch +import operator +import string +import argparse +import functools +import shutil +from collections import OrderedDict + + +domains = [] +total_count = 0 + + +class LogObj: + """ + Print and log the process information. + """ + def __init__(self, filename=None): + self.filename = filename + + def open(self): + if self.filename: + try: + self.logfile = open(self.filename, "w") + except IOError: + self.logfile = None + else: + self.logfile = None + + def log(self, line="", end="\n"): + value = line + end + if self.logfile: + self.logfile.write(value) + print(value, end="") + + def close(self): + if self.logfile: + self.logfile.close() + + +class Rule: + """ + A RuleSet for the way to handle the found playable files. + """ + SUPPORT_PROPS = ("filename", "size", "ignore", "type", + "shuffle", "reuse", "bookmark") + + def __init__(self, conditions=None, actions=None): + self.conditions = conditions + self.actions = actions + + @classmethod + def parse(cls, rule): + """ + Parse the whole line of a rule. + + Syntax: + condition1, condition2, ...: action1, action2, ... + + condition examples: + * filename ~ "*.mp3" + * size > 100000 + action examples: + * ignore = 1 + * shuffle = 1 + + Return: a object of this class with the parsed rule. + """ + conditions, actions = rule.split(":") + conditions = list(map(cls.parse_condition, conditions.split(","))) + actions = dict(map(cls.parse_action, actions.split(","))) + return cls(conditions, actions) + + @classmethod + def parse_condition(cls, cond): + sep_pos = min([ cond.find(sep) for sep in "~=<>" \ + if cond.find(sep)>0 ]) + prop = cond[:sep_pos].strip() + if prop not in cls.SUPPORT_PROPS: + raise ValueError("WARNING: unknown property '%s'" % prop) + return (prop, cond[sep_pos], + cls.parse_value(cond[sep_pos+1:].strip())) + + @classmethod + def parse_action(cls, action): + prop, value = map(str.strip, action.split("=", 1)) + if prop not in cls.SUPPORT_PROPS: + raise ValueError("WARNING: unknown property '%s'" % prop) + return (prop, cls.parse_value(value)) + + @staticmethod + def parse_value(value): + value = value.strip().strip('"').strip("'") + try: + return int(value) + except ValueError: + return value + + def match(self, props): + """ + Check whether the given props match all the conditions. + """ + def match_condition(props, cond): + """ + Check whether the given props match the given condition. + """ + try: + prop, op, ref = props[cond[0]], cond[1], cond[2] + except KeyError: + return False + if op == "~": + return fnmatch.fnmatchcase(prop.lower(), ref.lower()) + elif op == "=": + return prop == ref + elif op == ">": + return prop > ref + elif op == "<": + return prop < ref + else: + return False + # + return functools.reduce(operator.and_, + [ match_condition(props, cond) \ + for cond in self.conditions ], + True) + + +class Entries: + """ + Walk through the directory to find all files, and filter by the + extensions to get all the playable files. + """ + PLAYABLE_EXTS = (".mp3", ".m4a", ".m4b", ".m4p", ".aa", ".wav") + + def __init__(self, dirs=[], rename=True, recursive=True, ignore_dup=True): + self.entries = [] + self.add_dirs(dirs=dirs, rename=rename, recursive=recursive, + ignore_dup=ignore_dup) + + def add_dirs(self, dirs=[], rename=True, recursive=True, ignore_dup=True): + for dir in dirs: + self.add_dir(dir=dir, rename=rename, recursive=recursive, + ignore_dup=ignore_dup) + + def add_dir(self, dir, rename=True, recursive=True, ignore_dup=True): + global logobj + if recursive: + # Get all directories, and rename them if needed + dirs = [] + for dirName, subdirList, fileList in os.walk(dir): + dirs.append(dirName) + for dirName in dirs: + newDirName = self.get_newname(dirName) + if rename and newDirName != dirName: + logobj.log("Rename: '%s' -> '%s'" % (dirName, newDirName)) + shutil.move(dirName, newDirName) + # Get all files + files = [] + for dirName, subdirList, fileList in os.walk(dir): + files.extend([ os.path.join(dirName, f) for f in fileList ]) + else: + # rename the directory if needed + newDir = self.get_newname(dir) + if rename and newDir != dir: + logobj.log("Rename: '%s' -> '%s'" % (dir, newDir)) + shutil.move(dir, newDir) + files = [ os.path.join(newDir, f) for f in self.listfiles(newDir) ] + # + for fn in files: + # rename filename if needed + newfn = self.get_newname(fn) + if rename and newfn != fn: + logobj.log("Rename: '%s' -> '%s'" % (fn, newfn)) + shutil.move(fn, newfn) + fn = newfn + # filter by playable extensions + if os.path.splitext(fn)[1].lower() not in self.PLAYABLE_EXTS: + continue + if ignore_dup and (fn in self.entries): + continue + self.entries.append(fn) + print("Entry: %s" % fn) + + @staticmethod + def listfiles(path, ignore_hidden=True): + """ + List only files of a directory + """ + for f in os.listdir(path): + if os.path.isfile(os.path.join(path, f)): + if ignore_hidden and f[0] != ".": + yield f + else: + yield f + + @staticmethod + def get_newname(path): + def conv_char(ch): + safe_char = string.ascii_letters + string.digits + "-_" + if ch in safe_char: + return ch + return "_" + # + if path == ".": + return path + dirname, basename = os.path.split(path) + base, ext = os.path.splitext(basename) + newbase = "".join(map(conv_char, base)) + if basename == newbase+ext: + return os.path.join(dirname, basename) + if os.path.exists("%s/%s%s" % (dirname, newbase, ext)): + i = 0 + while os.path.exists("%s/%s_%d%s" % (dirname, newbase, i, ext)): + i += 1 + newbase += "_%d" % i + newname = "%s/%s%s" % (dirname, newbase, ext) + return newname + + def fix_and_sort(self): + """ + Fix the entries' pathes (should starts with "/"), and sort. + """ + self.entries = [ "/"+f.lstrip("./") for f in self.entries ] + self.entries.sort() + + def apply_rules(self, rules): + """ + Apply rules to the found entries. + The filtered/updated entries and properties are saved in: + 'self.entries_dict' + """ + self.entries_dict = OrderedDict() + + for fn in self.entries: + # set default properties + props = { + "filename": fn, + "size": os.stat(fn[1:]).st_size, + "ignore": 0, + "type": 1, + "shuffle": 1, + "bookmark": 0 + } + # check and apply rules + for rule in rules: + if rule.match(props): + props.update(rule.actions) + # + if props["ignore"]: + continue + # + self.entries_dict[fn] = props + + def get_entries(self): + return self.entries_dict.items() + + +class iTunesSD: + """ + Class to handle the iPod Shuffle main database + "iPod_Control/iTunes/iTunesSD" + """ + def __init__(self, dbfile="./iPod_Control/iTunes/iTunesSD"): + self.dbfile = dbfile + self.load() + + def load(self): + """ + Load original header and entries. + """ + self.old_entries = {} + self.header_main = array.array("B") # unsigned integer array + self.header_entry = array.array("B") # unsigned integer array + db = open(self.dbfile, "rb") + try: + self.header_main.fromfile(db, 18) + self.header_entry.fromfile(db, 33) + db.seek(18) + entry = db.read(558) + while len(entry) == 558: + filename = entry[33::2].split(b"\0", 1)[0] + self.old_entries[filename] = entry + entry = db.read(558) + except EOFError: + pass + db.close() + print("Loaded %d entries from existing database" % \ + len(self.old_entries)) + + def build_header(self, force=False): + global logobj + # rebuild database header + if force or len(self.header_main) != 18: + logobj.log("Rebuild iTunesSD main header ...") + del self.header_main[:] + self.header_main.fromlist([0,0,0,1,6,0,0,0,18] + [0]*9) + if force or len(self.header_entry) != 33: + logobj.log("Rebuild iTunesSD entry header ...") + del self.header_entry[:] + self.header_entry.fromlist([0,2,46,90,165,1] + [0]*20 + \ + [100,0,0,1,0,2,0]) + + def add_entries(self, entries, reuse=True): + """ + Prepare the entries for database + """ + self.entries = OrderedDict() + + for fn, props in entries.get_entries(): + if reuse and props.get("reuse") and (fn in self.old_entries): + # retrieve entry from old entries + entry = self.old_entries[fn] + else: + # build new entry + self.header_entry[29] = props["type"] + entry_data = "".join([ c+"\0" for c in fn[:261] ]) + \ + "\0"*(558 - len(self.header_entry) - 2*len(fn)) + entry = self.header_entry.tostring() + \ + entry_data.encode("utf-8") + # modify the shuffle and bookmark flags + entry = entry[:555] + chr(props["shuffle"]).encode("utf-8") + \ + chr(props["bookmark"]).encode("utf-8") + entry[557] + # + self.entries[fn] = entry + + def write(self, dbfile=None): + if dbfile is None: + dbfile = self.dbfile + # Make a backup + if os.path.exists(dbfile): + shutil.copy2(dbfile, dbfile+"_bak") + + # write main database file + with open(dbfile, "wb") as db: + self.header_main.tofile(db) + for entry in self.entries.values(): + db.write(entry) + # Update database header + num_entries = len(self.entries) + db.seek(0) + db.write(b"\0%c%c" % (num_entries>>8, num_entries&0xFF)) + + +class iTunesPState: + """ + iPod Shuffle playback state database: "iPod_Control/iTunes/iTunesPState" + """ + def __init__(self, dbfile="iPod_Control/iTunes/iTunesPState"): + self.dbfile = dbfile + self.load() + + def load(self): + with open(self.dbfile, "rb") as db: + a = array.array("B") + a.fromstring(db.read()) + self.PState = a.tolist() + + def update(self, volume=None): + if len(self.PState) != 21: + # volume 29, FW ver 1.0 + self.PState = self.listval(29) + [0]*15 + self.listval(1) + # track 0, shuffle mode, start of track + self.PState[3:15] = [0]*6 + [1] + [0]*5 + if volume is not None: + self.PState[:3] = self.listval(volume) + + def write(self, dbfile=None): + if dbfile is None: + dbfile = self.dbfile + # Make a backup + if os.path.exists(dbfile): + shutil.copy2(dbfile, dbfile+"_bak") + + with open(dbfile, "wb") as db: + array.array("B", self.PState).tofile(db) + + @staticmethod + def listval(i): + if i < 0: + i += 0x1000000 + return [i&0xFF, (i>>8)&0xFF, (i>>16)&0xFF] + + +class iTunesStats: + """ + iPod Shuffle statistics database: "iPod_Control/iTunes/iTunesStats" + """ + def __init__(self, dbfile="iPod_Control/iTunes/iTunesStats"): + self.dbfile = dbfile + + def write(self, count, dbfile=None): + if dbfile is None: + dbfile = self.dbfile + # Make a backup + if os.path.exists(dbfile): + shutil.copy2(dbfile, dbfile+"_bak") + + with open(dbfile, "wb") as db: + data = self.stringval(count) + "\0"*3 + \ + (self.stringval(18) + "\xff"*3 + "\0"*12) * count + db.write(data.encode("utf-8")) + + @staticmethod + def stringval(i): + if i < 0: + i += 0x1000000 + return "%c%c%c" % (i&0xFF, (i>>8)&0xFF, (i>>16)&0xFF) + + +class iTunesShuffle: + """ + iPod shuffle database: "iPod_Control/iTunes/iTunesShuffle" + """ + def __init__(self, dbfile="iPod_Control/iTunes/iTunesShuffle"): + self.dbfile = dbfile + + def shuffle(self, entries): + """ + Generate the shuffle sequences for the entries, and take care + of the "shuffle" property. + """ + shuffle_prop = [ props["shuffle"] + for fn, props in entries.get_entries() ] + shuffle_idx = [ idx for idx, s in enumerate(shuffle_prop) if s == 1 ] + shuffled = shuffle_idx.copy() + random.seed() + random.shuffle(shuffled) + shuffle_seq = list(range(len(shuffle_prop))) + for i, idx in enumerate(shuffle_idx): + shuffle_seq[idx] = shuffled[i] + self.shuffle_seq = shuffle_seq + + def write(self, dbfile=None): + if dbfile is None: + dbfile = self.dbfile + # Make a backup + if os.path.exists(dbfile): + shutil.copy2(dbfile, dbfile+"_bak") + + with open(dbfile, "wb") as db: + data = "".join(map(iTunesStats.stringval, self.shuffle_seq)) + db.write(data.encode("utf-8")) + + +def main(): + prog_basename = os.path.splitext(os.path.basename(sys.argv[0]))[0] + + # command line arguments + parser = argparse.ArgumentParser( + description="Rebuild iPod Shuffle Database", + epilog="Version: %s (%s)\n\n" % (__version__, __date__) + \ + "Only 1st and 2nd iPod Shuffle supported!\n\n" + \ + "The script must be placed under the iPod's root directory") + parser.add_argument("-f", "--force", dest="force", action="store_true", + help="always rebuild database entries, do NOT reuse old ones") + parser.add_argument("-M", "--no-rename", dest="norename", + action="store_false", default=True, + help="do NOT rename files") + parser.add_argument("-V", "--volume", dest="volume", type=int, + help="set playback volume (0 - 38)") + parser.add_argument("-r", "--rulesfile", dest="rulesfile", + default="%s.rules" % prog_basename, + help="additional rules filename") + parser.add_argument("-l", "--logfile", dest="logfile", + default="%s.log" % prog_basename, + help="log output filename") + parser.add_argument("dirs", nargs="*", + help="directories to be searched for playable files") + args = parser.parse_args() + + flag_reuse = not args.force + + # Start logging + global logobj + logobj = LogObj(args.logfile) + logobj.open() + + # Rules for how to handle the found playable files + rules = [] + # Add default rules + rules.append(Rule(conditions=[("filename", "~", "*.mp3")], + actions={"type":1, "shuffle":1, "bookmark":0})) + rules.append(Rule(conditions=[("filename", "~", "*.m4?")], + actions={"type":2, "shuffle":1, "bookmark":0})) + rules.append(Rule(conditions=[("filename", "~", "*.m4b")], + actions={"shuffle":0, "bookmark":1})) + rules.append(Rule(conditions=[("filename", "~", "*.aa")], + actions={"type":1, "shuffle":0, "bookmark":1, "reuse":1})) + rules.append(Rule(conditions=[("filename", "~", "*.wav")], + actions={"type":4, "shuffle":0, "bookmark":0})) + rules.append(Rule(conditions=[("filename", "~", "*.book.???")], + actions={"shuffle":0, "bookmark":1})) + rules.append(Rule(conditions=[("filename", "~", "*.announce.???")], + actions={"shuffle":0, "bookmark":0})) + rules.append(Rule(conditions=[("filename", "~", "/backup/*")], + actions={"ignore":1})) + # Load additional rules + try: + for line in open(args.rulesfile, "r").readlines(): + rules.append(Rule.parse(line)) + logobj.log("Loaded additional rules from file: %s" % args.rulesfile) + except IOError: + pass + + # cd to the directory of this script + os.chdir(os.path.dirname(sys.argv[0])) + + if not os.path.isdir("iPod_Control/iTunes"): + logobj.log("ERROR: No iPod control directory found!") + logobj.log("Please make sure that:") + logobj.log("(*) this script is placed under the iPod's root directory") + logobj.log("(*) the iPod was correctly initialized with iTunes") + sys.exit(1) + + # playable entries + logobj.log("Search for playable entries ...") + entries = Entries() + if args.dirs: + for dir in args.dirs: + entries.add_dir(dir=dir, recursive=True, rename=args.norename) + else: + entries.add_dir(".", recursive=True, rename=args.norename) + entries.fix_and_sort() + logobj.log("Apply rules to entries ...") + entries.apply_rules(rules=rules) + + # read main database file + logobj.log("Update main database ...") + db = iTunesSD(dbfile="iPod_Control/iTunes/iTunesSD") + db.build_header(force=args.force) + db.add_entries(entries=entries, reuse=flag_reuse) + assert len(db.entries) == len(entries.get_entries()) + db.write() + logobj.log("Added %d entries ..." % len(db.entries)) + + # other misc databases + logobj.log("Update playback state database ...") + db_pstate = iTunesPState(dbfile="iPod_Control/iTunes/iTunesPState") + db_pstate.update(volume=args.volume) + db_pstate.write() + logobj.log("Update statistics database ...") + db_stats = iTunesStats(dbfile="iPod_Control/iTunes/iTunesStats") + db_stats.write(count=len(db.entries)) + logobj.log("Update shuffle database ...") + db_shuffle = iTunesShuffle(dbfile="iPod_Control/iTunes/iTunesShuffle") + db_shuffle.shuffle(entries=entries) + db_shuffle.write() + + logobj.log("The iPod Shuffle database was rebuilt successfully!") + + logobj.close() + + +if __name__ == "__main__": + main() + +# vim: set ts=4 sw=4 tw=0 fenc=utf-8 ft=python: # diff --git a/python/splitBoxRegion.py b/python/splitBoxRegion.py new file mode 100755 index 0000000..5254686 --- /dev/null +++ b/python/splitBoxRegion.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8- +# +# Split the strip-shaped CCD gaps regions into a series of small +# square regions, which are used as the input regions of 'roi' to +# determine the corresponding background regions, and finally providied +# to 'dmfilth' in order to fill in the CCD gaps. +# +# Aaron LI +# 2015/08/12 +# +# Changelogs: +# v0.1.0, 2015/08/12 +# * initial version +# + + +__version__ = "0.1.0" +__date__ = "2015/08/12" + + +import os +import sys +import re +import math +import argparse +from io import TextIOWrapper + + +## BoxRegion {{{ +class BoxRegion(object): + """ + CIAO/DS9 "rotbox"/"box" region class. + + rotbox/box format: + rotbox(xc, yc, width, height, rotation) + box(xc, yc, width, height, rotation) + Notes: + rotation: [0, 360) (degree) + """ + def __init__(self, xc=None, yc=None, + width=None, height=None, rotation=None): + self.regtype = "rotbox" + self.xc = xc + self.yc = yc + self.width = width + self.height = height + self.rotation = rotation + + def __str__(self): + return "%s(%s,%s,%s,%s,%s)" % (self.regtype, self.xc, self.yc, + self.width, self.height, self.rotation) + + @classmethod + def parse(cls, regstr): + """ + Parse region string. + """ + regex_box = re.compile(r'^\s*(box|rotbox)\(([0-9. ]+),([0-9. ]+),([0-9. ]+),([0-9. ]+),([0-9. ]+)\)\s*$', re.I) + m = regex_box.match(regstr) + if m: + regtype = m.group(1) + xc = float(m.group(2)) + yc = float(m.group(3)) + width = float(m.group(4)) + height = float(m.group(5)) + rotation = float(m.group(6)) + return cls(xc, yc, width, height, rotation) + else: + return None + + def split(self, filename=None): + """ + Split strip-shaped box region into a series small square regions. + """ + angle = self.rotation * math.pi / 180.0 + # to record the center coordinates of each split region + centers = [] + if self.width > self.height: + # number of regions after split + nreg = math.ceil(self.width / self.height) + # width & height of the split region + width = self.width / nreg + height = self.height + # position of the left-most region + x_l = self.xc - 0.5*self.width * math.cos(angle) + y_l = self.yc - 0.5*self.width * math.sin(angle) + for i in range(nreg): + x = x_l + (0.5 + i) * width * math.cos(angle) + y = y_l + (0.5 + i) * width * math.sin(angle) + centers.append((x, y)) + else: + # number of regions after split + nreg = math.ceil(self.height / self.width) + # width & height of the split region + width = self.width + height = self.height / nreg + # position of the left-most region + x_l = self.xc + 0.5*self.height * math.cos(angle + math.pi/2) + y_l = self.yc + 0.5*self.height * math.sin(angle + math.pi/2) + for i in range(nreg): + x = x_l - (0.5 + i) * height * math.cos(angle + math.pi/2) + y = y_l - (0.5 + i) * height * math.sin(angle + math.pi/2) + centers.append((x, y)) + # create split regions + regions = [] + for (x, y) in centers: + regions.append(self.__class__(x, y, width+2, height+2, + self.rotation)) + # write split regions into file if specified + if isinstance(filename, str): + regout = open(filename, "w") + regout.write("\n".join(map(str, regions))) + regout.close() + else: + return regions + +## BoxRegion }}} + + +def main(): + # command line arguments + parser = argparse.ArgumentParser( + description="Split strip-shaped rotbox region into " + \ + "a series of small square regions.", + epilog="Version: %s (%s)" % (__version__, __date__)) + parser.add_argument("-V", "--version", action="version", + version="%(prog)s " + "%s (%s)" % (__version__, __date__)) + parser.add_argument("infile", help="input rotbox region file") + parser.add_argument("outfile", help="output file of the split regions") + args = parser.parse_args() + + outfile = open(args.outfile, "w") + regex_box = re.compile(r'^\s*(box|rotbox)\([0-9., ]+\)\s*$', re.I) + for line in open(args.infile, "r"): + if regex_box.match(line): + reg = BoxRegion.parse(line) + split_regs = reg.split() + outfile.write("\n".join(map(str, split_regs)) + "\n") + else: + outfile.write(line) + + outfile.close() + + +if __name__ == "__main__": + main() + diff --git a/python/splitCCDgaps.py b/python/splitCCDgaps.py new file mode 100644 index 0000000..bc26c29 --- /dev/null +++ b/python/splitCCDgaps.py @@ -0,0 +1,107 @@ +# -*- coding: utf-8- +# +# Aaron LI +# 2015/08/12 +# + +""" +Split the long-strip-shaped CCD gaps regions into a series of little +square regions, which are used as the input regions of 'roi' to +determine the corresponding background regions, and finally providied +to 'dmfilth' in order to fill in the CCD gaps. +""" + + +import re +import math +from io import TextIOWrapper + + +class BoxRegion(object): + """ + CIAO/DS9 "rotbox"/"box" region class. + + rotbox/box format: + rotbox(xc, yc, width, height, rotation) + box(xc, yc, width, height, rotation) + Notes: + rotation: [0, 360) (degree) + """ + def __init__(self, xc=None, yc=None, + width=None, height=None, rotation=None): + self.regtype = "rotbox" + self.xc = xc + self.yc = yc + self.width = width + self.height = height + self.rotation = rotation + + def __str__(self): + return "%s(%s,%s,%s,%s,%s)" % (self.regtype, self.xc, self.yc, + self.width, self.height, self.rotation) + + @classmethod + def parse(cls, regstr): + """ + Parse region string. + """ + regex_box = re.compile(r'^(box|rotbox)\(([0-9.]+),([0-9.]+),([0-9.]+),([0-9.]+),([0-9.]+)\)$', re.I) + m = regex_box.match(regstr) + if m: + regtype = m.group(1) + xc = float(m.group(2)) + yc = float(m.group(3)) + width = float(m.group(4)) + height = float(m.group(5)) + rotation = float(m.group(6)) + return cls(xc, yc, width, height, rotation) + else: + return None + + def split(self, filename=None): + """ + Split long-strip-shaped box region into a series square box regions. + """ + angle = self.rotation * math.pi / 180.0 + # to record the center coordinates of each split region + centers = [] + if self.width > self.height: + # number of regions after split + nreg = math.ceil(self.width / self.height) + # width & height of the split region + width = self.width / nreg + height = self.height + # position of the left-most region + x_l = self.xc - 0.5*self.width * math.cos(angle) + y_l = self.yc - 0.5*self.width * math.sin(angle) + for i in range(nreg): + x = x_l + (0.5 + i) * width * math.cos(angle) + y = y_l + (0.5 + i) * width * math.sin(angle) + centers.append((x, y)) + else: + # number of regions after split + nreg = math.ceil(self.height / self.width) + # width & height of the split region + width = self.width + height = self.height / nreg + # position of the left-most region + x_l = self.xc + 0.5*self.height * math.cos(angle + math.pi/2) + y_l = self.yc + 0.5*self.height * math.sin(angle + math.pi/2) + for i in range(nreg): + x = x_l - (0.5 + i) * height * math.cos(angle + math.pi/2) + y = y_l - (0.5 + i) * height * math.sin(angle + math.pi/2) + centers.append((x, y)) + # create split regions + regions = [] + for (x, y) in centers: + regions.append(self.__class__(x, y, width+2, height+2, + self.rotation)) + # write split regions into file if specified + if isinstance(filename, str): + regout = open(filename, "w") + regout.write("\n".join(map(str, regions))) + regout.close() + else: + return regions + + diff --git a/python/xkeywordsync.py b/python/xkeywordsync.py new file mode 100644 index 0000000..9a7cd79 --- /dev/null +++ b/python/xkeywordsync.py @@ -0,0 +1,533 @@ +#!/bin/usr/env python3 +# -*- coding: utf-8 -*- +# +# Credits: +# [1] Gaute Hope: gauteh/abunchoftags +# https://github.com/gauteh/abunchoftags/blob/master/keywsync.cc +# +# TODO: +# * Support case-insensitive tags merge +# (ref: http://stackoverflow.com/a/1480230) +# * Accept a specified mtime, and only deal with files with newer mtime. +# +# Aaron LI +# Created: 2016-01-24 +# + +""" +Sync message 'X-Keywords' header with notmuch tags. + +* tags-to-keywords: + Check if the messages in the query have a matching 'X-Keywords' header + to the list of notmuch tags. + If not, update the 'X-Keywords' and re-write the message. + +* keywords-to-tags: + Check if the messages in the query have matching notmuch tags to the + 'X-Keywords' header. + If not, update the tags in the notmuch database. + +* merge-keywords-tags: + Merge the 'X-Keywords' labels and notmuch tags, and update both. +""" + +__version__ = "0.1.2" +__date__ = "2016-01-25" + +import os +import sys +import argparse +import email + +# Require Python 3.4, or install package 'enum34' +from enum import Enum + +from notmuch import Database, Query + +from imapUTF7 import imapUTF7Decode, imapUTF7Encode + + +class SyncDirection(Enum): + """ + Synchronization direction + """ + MERGE_KEYWORDS_TAGS = 0 # Merge 'X-Keywords' and notmuch tags and + # update both + KEYWORDS_TO_TAGS = 1 # Sync 'X-Keywords' header to notmuch tags + TAGS_TO_KEYWORDS = 2 # Sync notmuch tags to 'X-Keywords' header + +class SyncMode(Enum): + """ + Sync mode + """ + ADD_REMOVE = 0 # Allow add & remove tags/keywords + ADD_ONLY = 1 # Only allow add tags/keywords + REMOVE_ONLY = 2 # Only allow remove tags/keywords + + +class KwMessage: + """ + Message class to deal with 'X-Keywords' header synchronization + with notmuch tags. + + NOTE: + * The same message may have multiple files with different keywords + (e.g, the same message exported under each label by Gmail) + managed by OfflineIMAP. + For example: a message file in OfflineIMAP synced folder of + '[Gmail]/All Mail' have keywords ['google', 'test']; however, + the file in synced folder 'test' of the same message only have + keywords ['google'] without the keyword 'test'. + * All files associated to the same message are regarded as the same. + The keywords are extracted from all files and merged. + And the same updated keywords are written back to all files, which + results all files finally having the same 'X-Keywords' header. + * You may only sync the '[Gmail]/All Mail' folder without other + folders exported according the labels by Gmail. + """ + # Replace some special characters before mapping keyword to tag + enable_replace_chars = True + chars_replace = { + '/' : '.', + } + # Mapping between (Gmail) keywords and notmuch tags (before ignoring tags) + keywords_mapping = { + '\\Inbox' : 'inbox', + '\\Important' : 'important', + '\\Starred' : 'flagged', + '\\Sent' : 'sent', + '\\Muted' : 'killed', + '\\Draft' : 'draft', + '\\Trash' : 'deleted', + '\\Junk' : 'spam', + } + # Tags ignored from syncing + # These tags are either internal tags or tags handled by maildir flags. + enable_ignore_tags = True + tags_ignored = set([ + 'new', 'unread', 'attachment', 'signed', 'encrypted', + 'flagged', 'replied', 'passed', 'draft', + ]) + # Ignore case when merging tags + tags_ignorecase = True + + # Whether the tags updated against the message 'X-Keywords' header + tags_updated = False + # Added & removed tags for notmuch database against 'X-Keywords' + tags_added = [] + tags_removed = [] + # Newly updated/merged notmuch tags against 'X-Keywords' + tags_new = [] + + # Whether the keywords updated against the notmuch tags + keywords_updated = False + # Added & removed tags for 'X-Keywords' against notmuch database + tags_kw_added = [] + tags_kw_removed = [] + # Newly updated/merged tags for 'X-Keywords' against notmuch database + tags_kw_new = [] + + def __init__(self, msg, filename=None): + self.message = msg + self.filename = filename + self.allfiles = [ fn for fn in msg.get_filenames() ] + self.tags = set(msg.get_tags()) + + def sync(self, direction, mode=SyncMode.ADD_REMOVE, + dryrun=False, verbose=False): + """ + Wrapper function to sync between 'X-Keywords' and notmuch tags. + """ + if direction == SyncDirection.KEYWORDS_TO_TAGS: + self.sync_keywords_to_tags(sync_mode=mode, dryrun=dryrun, + verbose=verbose) + elif direction == SyncDirection.TAGS_TO_KEYWORDS: + self.sync_tags_to_keywords(sync_mode=mode, dryrun=dryrun, + verbose=verbose) + elif direction == SyncDirection.MERGE_KEYWORDS_TAGS: + self.merge_keywords_tags(sync_mode=mode, dryrun=dryrun, + verbose=verbose) + else: + raise ValueError("Invalid sync direction: %s" % direction) + + def sync_keywords_to_tags(self, sync_mode=SyncMode.ADD_REMOVE, + dryrun=False, verbose=False): + """ + Wrapper function to sync 'X-Keywords' to notmuch tags. + """ + self.get_keywords() + self.map_keywords() + self.merge_tags(sync_direction=SyncDirection.KEYWORDS_TO_TAGS, + sync_mode=sync_mode) + if dryrun or verbose: + print('* MSG: %s' % self.message) + print(' TAG: [%s] +[%s] -[%s] => [%s]' % ( + ','.join(self.tags), ','.join(self.tags_added), + ','.join(self.tags_removed), ','.join(self.tags_new))) + if not dryrun: + self.update_tags() + + def sync_tags_to_keywords(self, sync_mode=SyncMode.ADD_REMOVE, + dryrun=False, verbose=False): + """ + Wrapper function to sync notmuch tags to 'X-Keywords' + """ + self.get_keywords() + self.map_keywords() + self.merge_tags(sync_direction=SyncDirection.TAGS_TO_KEYWORDS, + sync_mode=sync_mode) + keywords_new = self.map_tags(tags=self.tags_kw_new) + if dryrun or verbose: + print('* MSG: %s' % self.message) + print('* FILES: %s' % ' ; '.join(self.allfiles)) + print(' XKW: {%s} +[%s] -[%s] => {%s}' % ( + ','.join(self.keywords), ','.join(self.tags_kw_added), + ','.join(self.tags_kw_removed), ','.join(keywords_new))) + if not dryrun: + self.update_keywords(keywords_new=keywords_new) + + def merge_keywords_tags(self, sync_mode=SyncMode.ADD_REMOVE, + dryrun=False, verbose=False): + """ + Wrapper function to merge 'X-Keywords' and notmuch tags + """ + self.get_keywords() + self.map_keywords() + self.merge_tags(sync_direction=SyncDirection.MERGE_KEYWORDS_TAGS, + sync_mode=sync_mode) + keywords_new = self.map_tags(tags=self.tags_kw_new) + if dryrun or verbose: + print('* MSG: %s' % self.message) + print('* FILES: %s' % ' ; '.join(self.allfiles)) + print(' TAG: [%s] +[%s] -[%s] => [%s]' % ( + ','.join(self.tags), ','.join(self.tags_added), + ','.join(self.tags_removed), ','.join(self.tags_new))) + print(' XKW: {%s} +[%s] -[%s] => {%s}' % ( + ','.join(self.keywords), ','.join(self.tags_kw_added), + ','.join(self.tags_kw_removed), ','.join(keywords_new))) + if not dryrun: + self.update_tags() + self.update_keywords(keywords_new=keywords_new) + + def get_keywords(self): + """ + Get 'X-Keywords' header from all files associated with the same + message, decode, split and merge. + + NOTE: Do NOT simply use the `message.get_header()` method, which + cannot get the complete keywords from all files. + """ + keywords_utf7 = [] + for fn in self.allfiles: + msg = email.message_from_file(open(fn, 'r')) + val = msg['X-Keywords'] + if val: + keywords_utf7.append(val) + else: + print("WARNING: 'X-Keywords' header not found or empty " +\ + "for file: %s" % fn, file=sys.stderr) + keywords_utf7 = ','.join(keywords_utf7) + if keywords_utf7 != '': + keywords = imapUTF7Decode(keywords_utf7.encode()).split(',') + keywords = [ kw.strip() for kw in keywords ] + # Remove duplications + keywords = set(keywords) + else: + keywords = set() + self.keywords = keywords + return keywords + + def map_keywords(self, keywords=None): + """ + Map keywords to notmuch tags according to the mapping table. + """ + if keywords is None: + keywords = self.keywords + if self.enable_replace_chars: + # Replace specified characters in keywords + trans = str.maketrans(self.chars_replace) + keywords = [ kw.translate(trans) for kw in keywords ] + # Map keywords to tags + tags = set([ self.keywords_mapping.get(kw, kw) for kw in keywords ]) + self.tags_kw = tags + return tags + + def map_tags(self, tags=None): + """ + Map tags to keywords according to the inversed mapping table. + """ + if tags is None: + tags = self.tags + if self.enable_replace_chars: + # Inversely replace specified characters in tags + chars_replace_inv = { v: k for k, v in self.chars_replace.items() } + trans = str.maketrans(chars_replace_inv) + tags = [ tag.translate(trans) for tag in tags ] + # Map keywords to tags + keywords_mapping_inv = { v:k for k,v in self.keywords_mapping.items() } + keywords = set([ keywords_mapping_inv.get(tag, tag) for tag in tags ]) + self.keywords_tags = keywords + return keywords + + def merge_tags(self, sync_direction, sync_mode=SyncMode.ADD_REMOVE, + tags_nm=None, tags_kw=None): + """ + Merge the tags from notmuch database and 'X-Keywords' header, + according to the specified sync direction and operation restriction. + + TODO: support case-insensitive set operations + """ + # Added & removed tags for notmuch database against 'X-Keywords' + tags_added = [] + tags_removed = [] + # Newly updated/merged notmuch tags against 'X-Keywords' + tags_new = [] + # Added & removed tags for 'X-Keywords' against notmuch database + tags_kw_added = [] + tags_kw_removed = [] + # Newly updated/merged tags for 'X-Keywords' against notmuch database + tags_kw_new = [] + # + if tags_nm is None: + tags_nm = self.tags + if tags_kw is None: + tags_kw = self.tags_kw + if self.enable_ignore_tags: + # Remove ignored tags before merge + tags_nm2 = tags_nm.difference(self.tags_ignored) + tags_kw2 = tags_kw.difference(self.tags_ignored) + else: + tags_nm2 = tags_nm + tags_kw2 = tags_kw + # + if sync_direction == SyncDirection.KEYWORDS_TO_TAGS: + # Sync 'X-Keywords' to notmuch tags + tags_added = tags_kw2.difference(tags_nm2) + tags_removed = tags_nm2.difference(tags_kw2) + elif sync_direction == SyncDirection.TAGS_TO_KEYWORDS: + # Sync notmuch tags to 'X-Keywords' + tags_kw_added = tags_nm2.difference(tags_kw2) + tags_kw_removed = tags_kw2.difference(tags_nm2) + elif sync_direction == SyncDirection.MERGE_KEYWORDS_TAGS: + # Merge both notmuch tags and 'X-Keywords' + tags_merged = tags_nm2.union(tags_kw2) + # notmuch tags + tags_added = tags_merged.difference(tags_nm2) + tags_removed = tags_nm2.difference(tags_merged) + # tags for 'X-Keywords' + tags_kw_added = tags_merged.difference(tags_kw2) + tags_kw_removed = tags_kw2.difference(tags_merged) + else: + raise ValueError("Invalid synchronization direction") + # Apply sync operation restriction + self.tags_added = [] + self.tags_removed = [] + self.tags_kw_added = [] + self.tags_kw_removed = [] + tags_new = tags_nm # Use un-ignored notmuch tags + tags_kw_new = tags_kw # Use un-ignored 'X-Keywords' tags + if sync_mode != SyncMode.REMOVE_ONLY: + self.tags_added = tags_added + self.tags_kw_added = tags_kw_added + tags_new = tags_new.union(tags_added) + tags_kw_new = tags_kw_new.union(tags_kw_added) + if sync_mode != SyncMode.ADD_ONLY: + self.tags_removed = tags_removed + self.tags_kw_removed = tags_kw_removed + tags_new = tags_new.difference(tags_removed) + tags_kw_new = tags_kw_new.difference(tags_kw_removed) + # + self.tags_new = tags_new + self.tags_kw_new = tags_kw_new + if self.tags_added or self.tags_removed: + self.tags_updated = True + if self.tags_kw_added or self.tags_kw_removed: + self.keywords_updated = True + # + return { + 'tags_updated' : self.tags_updated, + 'tags_added' : self.tags_added, + 'tags_removed' : self.tags_removed, + 'tags_new' : self.tags_new, + 'keywords_updated' : self.keywords_updated, + 'tags_kw_added' : self.tags_kw_added, + 'tags_kw_removed' : self.tags_kw_removed, + 'tags_kw_new' : self.tags_kw_new, + } + + def update_keywords(self, keywords_new=None, outfile=None): + """ + Encode the keywords (default: self.keywords_new) and write back to + all message files. + + If parameter 'outfile' specified, then write the updated message + to that file instead of overwriting. + + NOTE: + * The modification time of the message file should be kept to prevent + OfflineIMAP from treating it as a new one (and the previous a + deleted one). + * All files associated with the same message are updated to have + the same 'X-Keywords' header. + """ + if not self.keywords_updated: + # keywords NOT updated, just skip + return + + if keywords_new is None: + keywords_new = self.keywords_new + # + if outfile is not None: + infile = self.allfiles[0:1] + outfile = [ os.path.expanduser(outfile) ] + else: + infile = self.allfiles + outfile = self.allfiles + # + for ifname, ofname in zip(infile, outfile): + msg = email.message_from_file(open(ifname, 'r')) + fstat = os.stat(ifname) + if keywords_new == []: + # Delete 'X-Keywords' header + print("WARNING: delete 'X-Keywords' header from file: %s" % + ifname, file=sys.stderr) + del msg['X-Keywords'] + else: + # Update 'X-Keywords' header + keywords = ','.join(keywords_new) + keywords_utf7 = imapUTF7Encode(keywords).decode() + # Delete then add, to avoid multiple occurrences + del msg['X-Keywords'] + msg['X-Keywords'] = keywords_utf7 + # Write updated message + with open(ofname, 'w') as fp: + fp.write(msg.as_string()) + # Reset the timestamps + os.utime(ofname, ns=(fstat.st_atime_ns, fstat.st_mtime_ns)) + + def update_tags(self, tags_added=None, tags_removed=None): + """ + Update notmuch tags according to keywords. + """ + if not self.tags_updated: + # tags NOT updated, just skip + return + + if tags_added is None: + tags_added = self.tags_added + if tags_removed is None: + tags_removed = self.tags_removed + # Use freeze/thaw for safer transactions to change tag values. + self.message.freeze() + for tag in tags_added: + self.message.add_tag(tag, sync_maildir_flags=False) + for tag in tags_removed: + self.message.remove_tag(tag, sync_maildir_flags=False) + self.message.thaw() + + +def get_notmuch_revision(dbpath=None): + """ + Get the current revision and UUID of notmuch database. + """ + import subprocess + import tempfile + if dbpath: + tf = tempfile.NamedTemporaryFile() + # Create a minimal notmuch config for the specified dbpath + config = '[database]\npath=%s\n' % os.path.expanduser(dbpath) + tf.file.write(config.encode()) + tf.file.flush() + cmd = 'notmuch --config=%s count --lastmod' % tf.name + output = subprocess.check_output(cmd, shell=True) + tf.close() + else: + cmd = 'notmuch count --lastmod' + output = subprocess.check_output(cmd, shell=True) + # Extract output + dbinfo = output.decode().split() + return { 'revision': int(dbinfo[2]), 'uuid': dbinfo[1] } + + +def main(): + parser = argparse.ArgumentParser( + description="Sync message 'X-Keywords' header with notmuch tags.") + parser.add_argument("-V", "--version", action="version", + version="%(prog)s " + "v%s (%s)" % (__version__, __date__)) + parser.add_argument("-q", "--query", dest="query", required=True, + help="notmuch database query string") + parser.add_argument("-p", "--db-path", dest="dbpath", + help="notmuch database path (default to try user configuration)") + parser.add_argument("-n", "--dry-run", dest="dryrun", + action="store_true", help="dry run") + parser.add_argument("-v", "--verbose", dest="verbose", + action="store_true", help="show verbose information") + # Exclusive argument group for sync mode + exgroup1 = parser.add_mutually_exclusive_group(required=True) + exgroup1.add_argument("-m", "--merge-keywords-tags", + dest="direction_merge", action="store_true", + help="merge 'X-Keywords' and tags and update both") + exgroup1.add_argument("-k", "--keywords-to-tags", + dest="direction_keywords2tags", action="store_true", + help="sync 'X-Keywords' to notmuch tags") + exgroup1.add_argument("-t", "--tags-to-keywords", + dest="direction_tags2keywords", action="store_true", + help="sync notmuch tags to 'X-Keywords'") + # Exclusive argument group for tag operation mode + exgroup2 = parser.add_mutually_exclusive_group(required=False) + exgroup2.add_argument("-a", "--add-only", dest="mode_addonly", + action="store_true", help="only add notmuch tags") + exgroup2.add_argument("-r", "--remove-only", dest="mode_removeonly", + action="store_true", help="only remove notmuch tags") + # Parse + args = parser.parse_args() + # Sync direction + if args.direction_merge: + sync_direction = SyncDirection.MERGE_KEYWORDS_TAGS + elif args.direction_keywords2tags: + sync_direction = SyncDirection.KEYWORDS_TO_TAGS + elif args.direction_tags2keywords: + sync_direction = SyncDirection.TAGS_TO_KEYWORDS + else: + raise ValueError("Invalid synchronization direction") + # Sync mode + if args.mode_addonly: + sync_mode = SyncMode.ADD_ONLY + elif args.mode_removeonly: + sync_mode = SyncMode.REMOVE_ONLY + else: + sync_mode = SyncMode.ADD_REMOVE + # + if args.dbpath: + dbpath = os.path.abspath(os.path.expanduser(args.dbpath)) + else: + dbpath = None + # + db = Database(path=dbpath, create=False, mode=Database.MODE.READ_WRITE) + dbinfo = get_notmuch_revision(dbpath=dbpath) + q = Query(db, args.query) + total_msgs = q.count_messages() + msgs = q.search_messages() + # + if args.verbose: + print("# Notmuch database path: %s" % dbpath) + print("# Database revision: %d (uuid: %s)" % + (dbinfo['revision'], dbinfo['uuid'])) + print("# Query: %s" % args.query) + print("# Sync direction: %s" % sync_direction.name) + print("# Sync mode: %s" % sync_mode.name) + print("# Total messages to check: %d" % total_msgs) + print("# Dryn run: %s" % args.dryrun) + # + for msg in msgs: + kwmsg = KwMessage(msg) + kwmsg.sync(direction=sync_direction, mode=sync_mode, + dryrun=args.dryrun, verbose=args.verbose) + # + db.close() + + +if __name__ == "__main__": + main() + +# vim: set ts=4 sw=4 tw=0 fenc=utf-8 ft=python: # diff --git a/r/chisq.R b/r/chisq.R new file mode 100644 index 0000000..41f851f --- /dev/null +++ b/r/chisq.R @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- +# +# Calculate the chi-squared values between the given data and model values. +# + +calc.chisq <- function(value, error=NULL, model=NULL) { + if (is.data.frame(value)) { + df <- value + value <- df$value + error <- df$error + model <- df$model + } + chisq <- (value - model)^2 + if (! is.null(error)) { + weights <- error ^ (-2) + chisq <- chisq * weights + } + return(sum(chisq)) +} + +# vim: set ts=8 sw=4 tw=0 fenc=utf-8 ft=r: # diff --git a/r/fillpois.R b/r/fillpois.R new file mode 100644 index 0000000..be590f8 --- /dev/null +++ b/r/fillpois.R @@ -0,0 +1,130 @@ +# -*- coding: utf-8 -*- +# +# Identify the abnormal pixels (with relatively larger values) in the +# given X-ray count image, and replace their values with random Poisson +# values whose parameter lambda is determined by the neighboring pixels. +# +# +# Aaron LI +# 2015/09/01 +# Updated: 2015/09/02 +# + +# Fill a vector of a row/column of X-ray image with Poisson distribution. +# +# The parameter lambda of the Poisson distribution is determined by the +# mean value of the left n (default: 3) elements. +# +# If the value of one element is greater than or equal to (>=) +# qpois(prob, lambda), then its value is replaced with rpois(1, lambda). +# +# Arguments: +# vec - input data vector +# n - number of elements used to calculate the lambda (default: 3) +# prob - quantile probability (default: 95%) +# +# Return: +# a vector of the same length with abnormal values replaced +fill.pois.vec <- function(vec, n=3, prob=0.95) { + # Select the given index of element from the vector + # Arguments: + # bc - boundary condition: + # + "cyclic": vec[0, -1] := vec[length(vec), length(vec)-1] + # + "symmetric": vec[0, -1] := vec[1, 2] + elem <- function(vec, index, bc="cyclic") { + if (index <= 0) { + if (bc == "cyclic") { + index <- length(vec) + index + } else if (bc == "symmetric") { + index <- 1 - index + } else { + stop(paste("Invalid boundary condition:", bc, "\n")) + } + } + return(vec[index]) + } + # Calculate the mean value of left n elements for the given element. + mean.left <- function(vec, index, n=3, bc="cyclic") { + elements <- get(class(vec))(n) + for (i in 1:n) { + elements[i] <- elem(vec, index-i) + } + return(mean(elements)) + } + # + vec.filled <- vec + for (i in 1:length(vec)) { + lambda <- mean.left(vec, i, n) + if (vec[i] >= qpois(prob, lambda)) { + vec.filled[i] <- rpois(1, lambda) + } + } + return(vec.filled) +} + + +# Fill a matrix of the X-ray image with Poisson distribution by row or column. +# +# For more details, see 'fill.pois.vec()'. +# +# Arguments: +# mat - input image matrix +# byrow - where Poisson fill the matrix by row (default by column) +# n - number of elements used to calculate the lambda (default: 3) +# prob - quantile probability (default: 95%) +# +# Return: +# a matrix of the same size with abnormal values replaced +fill.pois.mat <- function(mat, byrow=FALSE, n=3, prob=0.95) { + mat.filled <- mat + if (byrow) { + # process by row + rows <- nrow(mat) + for (r in 1:rows) { + vec <- mat[r, ] + vec.filled <- fill.pois.vec(vec, n, prob) + mat.filled[r, ] <- vec.filled + } + } else { + # process by column + cols <- ncol(mat) + for (c in 1:cols) { + vec <- mat[, c] + vec.filled <- fill.pois.vec(vec, n, prob) + mat.filled[, c] <- vec.filled + } + } + return(mat.filled) +} + + +# Identify the abnormal pixels (with relatively larger values) in the +# given X-ray count image, and replace their values with random Poisson +# values whose parameter lambda is determined by the neighboring pixels. +# +# The refilled image is the average of the two images, which are the +# original image processed with 'fill.pois.mat()' by row and column, +# respectively. +# +# TODO: to verify??? +# The two-step procedure is employed to avoid the grid-like pattern/structure +# in the refilled image. +# +# For more details, see 'fill.pois.vec()'. +# +# Arguments: +# img - input image (a matrix) +# n - number of elements used to calculate the lambda (default: 3) +# prob - quantile probability (default: 95%) +# +# Return: +# a matrix of the same size with abnormal values replaced +fill.pois.img <- function(img, n=3, prob=0.95) { + img.fillbycol <- fill.pois.mat(img, byrow=FALSE, n=n, prob=prob) + img.fillbyrow <- fill.pois.mat(img, byrow=TRUE, n=n, prob=prob) + img.filled <- (img.fillbycol + img.fillbyrow) / 2 + return(img.filled) +} + + +# vim: set ts=8 sw=4 tw=0 fenc=utf-8 ft=r: # diff --git a/r/fitdistrplus-example.R b/r/fitdistrplus-example.R new file mode 100644 index 0000000..b5b1a57 --- /dev/null +++ b/r/fitdistrplus-example.R @@ -0,0 +1,54 @@ +n <- 50 +m <- 50 +set.seed(1) +mu <- -0.4 +sig <- 0.12 +x <- matrix(data=rlnorm(n*m, mu, sig), nrow=m) + +library(fitdistrplus) +## Fit a log-normal distribution to the 50 random data set +f <- apply(x, 2, fitdist, "lnorm") + +## Plot the results +for(i in 1:n) +plot(f[[i]]) + +## Save plot in an animated GIF-file +library(animation) +saveGIF({for(i in 1:n) plot(f[[i]])}) + +apply((sapply(f, "[[", "estimate")),1, summary) +# meanlog sdlog +# Min. -0.4347 0.09876 +# 1st Qu. -0.4140 0.11480 +# Median -0.4010 0.12110 +# Mean -0.4011 0.12270 +# 3rd Qu. -0.3899 0.12950 +# Max. -0.3522 0.14780 + + +## How much variance can we expect in the mean and std? +## Expeted mean +ExpectedMean <- function(mu, sig) exp(mu+ sig^2/2) +## Expected std +ExpectedStd <- function(mu, sig) sqrt((exp(sig^2)-1)*exp(2*mu + sig^2)) + +summary(apply(sapply(f, "[[", "estimate"), 2, function(x) ExpectedMean(x[1], x[2]))) +# Min. 1st Qu. Median Mean 3rd Qu. Max. +# 0.6529 0.6665 0.6747 0.6748 0.6819 0.7087 +summary(apply(sapply(f, "[[", "estimate"), 2, function(x) ExpectedStd(x[1], x[2]))) +# Min. 1st Qu. Median Mean 3rd Qu. Max. +# 0.06604 0.07880 0.08212 0.08316 0.08794 0.10170 + +## Let's look at the goodness of fit statistics to get an +## idea how much variance we can expect there: +gof.ln <- lapply(f, gofstat) +gof.test <- lapply(gof.ln, function(x) data.frame(x[c("chisqpvalue", "cvm", "ad", "ks")])) +apply(do.call("rbind", gof.test), 2, summary) +# chisqpvalue cvm ad ks +# Min. 0.0002673 0.02117 0.1537 0.05438 +# 1st Qu. 0.1394000 0.03755 0.2708 0.07488 +# Median 0.3578000 0.04841 0.3216 0.08054 +# Mean 0.3814000 0.05489 0.3564 0.08431 +# 3rd Qu. 0.6409000 0.06913 0.4358 0.09436 +# Max. 0.9245000 0.13220 0.7395 0.12570
\ No newline at end of file diff --git a/r/forceFieldTransform.R b/r/forceFieldTransform.R new file mode 100644 index 0000000..92310c1 --- /dev/null +++ b/r/forceFieldTransform.R @@ -0,0 +1,46 @@ +# -*- coding: utf -*- +# +# Calculate the "force field transform" of the image, using the +# specified *cell* size. +# +# The image is padded with the mirrored boundary condition. +# +# NOTE:TODO: +# The transformation output strengths image is NOT normalized! +# +# +# Credit: +# [1] TODO: +# Hurley et al., 2002, 2005 +# +# +# Aaron LI +# 2015/08/28 +# + + +# The attractive force between two points on the image. +# NOTE: the coefficient is ignored +# +# Arguments: +# p0, p1 - (r, c, value), the row and column number of the point position, +# and the value of that point +# +# Return: +# the force vector (f_r, f_c): +# 'f_r': force along the row direction, positive goes downside +# 'f_c': force along the column direction, positive goes to the right +# Note that this is the force that 'p1' act on 'p0', and is directed +# to point 'p1'. +p2p.force <- function(p0, p1) { + r0 = p0[1] + c0 = p0[2] + r1 = p1[1] + c1 = p1[2] + f_r = p0[3]*p1[3] * (r1-r0) / ((r1-r0)^2 + (c1-c0)^2)^1.5 + f_c = p0[3]*p1[3] * (c1-c0) / ((r1-r0)^2 + (c1-c0)^2)^1.5 + return(c(f_r, f_c)) +} + + +# vim: set ts=8 sw=4 tw=0 fenc=utf-8 ft=r: # diff --git a/r/kldiv.R b/r/kldiv.R new file mode 100644 index 0000000..a767038 --- /dev/null +++ b/r/kldiv.R @@ -0,0 +1,349 @@ +# -*- coding: utf-8 -*- +# +# Kullback-Leibler or Jensen-Shannon divergence between two distributions +# +# The Kullback-Leibler divergence is given by: +# D_{KL}(P(x), Q(x)) = sum[ P(x) * log(P(x) / Q(x)) ] +# where P(x) is the underground true distribution, and Q(x) the approximation +# distribution. Thus KL divergence measures the information lost when Q is +# used to approximate P. +# +# The Jensen-Shannon divergence is given by: +# D_{JS}(P, Q) = 0.5 * D_{KL}(P, M) + 0.5 * D_{KL}(Q, M); M = (P+Q)/2 +# This is a symmetrised divergence, and is equal to 1/2 the so-called +# Jeffrey divergence. +# +# Credits: +# [1] Wikipedia - Kullback-Leibler divergence +# https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence +# [2] David Fass, KLDIV +# http://www.mathworks.com/matlabcentral/fileexchange/13089-kldiv/content//kldiv.m +# +# Aaron LI +# 2015/09/04 +# + + +# Calculate the entropy of the probability mass distribution. +# The zeros are ignored. +# +# Arguments: +# x - probability mass distribution +# +# Return: +# entropy in unit "bits" +# +calc.entropy <- function(x) { + x.gt0 <- x[x>0] + return(sum(-x.gt0 * log2(x.gt0))) +} + + +# Calculate the KL divergence of distribution P from Q, or the JS divergence +# between the P and Q distributions. +# +# TODO: +# * to add other methods to deal with zero probabilities: +# - add eps to p.m.f and renormalize +# - Byesian prior +# - smoothing +# +# Credits: +# [1] Calculate the Kullback-Leibler Divergence in practice? +# http://stats.stackexchange.com/questions/97938/calculate-the-kullback-leibler-divergence-in-practice +# [2] How to compute KL-divergence when PMF contains 0s? +# http://mathoverflow.net/questions/72668/how-to-compute-kl-divergence-when-pmf-contains-0s +# +# Arguments: +# p - probabilities representing the distribution P (underground true) +# q - probabilities representing the distribution Q (approximation) +# type - which type of divergence to be calculated +# + "kl": (default) Kullback-Leibler divergence +# + "klsym": symmetric variant of the Kullback-Leibler divergence, +# which given by (KL(p, q) + KL(q, p))/2 +# + "js": Jensen-Shannon divergence +# zeros - how to deal with the zeros in each distribution probabilities +# + "ignore": just ignore the data points with probability of zero +# +# Note that the vectors p and q must have the same length, and the +# sum of probabilities p and q must be 1 +/- 1e-5 +# +# Return: +# calculate divergence value in unit "bits" +# +kldiv <- function(p, q, type="kl", zeros="ignore") { + # check length of vectors + stopifnot(length(p) == length(q)) + # validate probabilities + eps_prob <- 1e-5 + stopifnot(abs(sum(p) - 1) <= eps_prob, abs(sum(q) - 1) <= eps_prob) + # how to deal with zero probabilities + if (zeros == "ignore") { + # just ignore the zeros in probabilities + nonzeros <- (p > 0) & (q > 0) + p <- p[nonzeros] + q <- q[nonzeros] + } else { + stop(paste("Unsupported parameter value zeros=", zeros, "\n", sep="")) + } + # check divergence type + if (type == "kl") { + # Kullback-Leibler divergence + div <- sum(p * (log2(p) - log2(q))) + } else if (type == "klsym") { + # symmetric variant KL divergence + div <- 0.5 * (sum(p * (log2(p) - log2(q))) + + sum(q * (log2(q) - log2(p)))) + } else if (type == "js") { + # Jensen-Shannon divergence + m <- (p + q) / 2 + div <- 0.5 * (sum(p * (log2(p) - log2(m))) + + sum(q * (log2(q) - log2(m)))) + } else { + stop(paste("Unsupported parameter value type=", type, "\n", sep="")) + } + return(div) +} + + +# Estimate the probability mass distribution for the observation data, +# using "density()". +# The range of output coordinates of points is set to be: +# from: min(x) - cut*bw +# to: max(x) + cut*bw +# And the probability mass distribution is normalized. +# +# Arguments: +# x - input observation data +# n - number of equally spaced points at which the probability mass is +# to be estimated. +# bw - bandwidth to be used +# kernel - the smoothing kernel +# from - coordinate of the left-most point +# to - coordinate of the right-most point +# cut - c(left, right). Number of bandwidths beyond the left and right +# extremes of the input data. +# This allows the estimated density to drop to approximately zero +# at the extremes. +# If "from" and "to" specified, then "cut" is ignored. +# +# Returns: +# list with following components: +# x - the coordinates of the points where probability mass estimated +# y - the estimated probability mass +# bw - bandwidth used +# kernel - kernel used +# n - sample size +# cut - left and right cut used +# from - coordinate of the left-most point used +# to - coordinate of the right-most point used +# +estimate.prob.mass <- function(x, bw="nrd0", kernel="gaussian", n=512, + from=NULL, to=NULL, cut=c(3,3)) { + data <- x[!is.na(x)] + # calculate the bandwidth + bw <- get(paste("bw.", bw, sep=""))(data) + # determine the estimation range + if (is.null(from)) { + from <- min(data) - cut[1] * bw + } + if (is.null(to)) { + to <- max(data) + cut[2] * bw + } + # estimate with "density()" + d <- density(data, bw=bw, kernel=kernel, n=n, from=from, to=to) + # renormalize the probability mass distribution + pmass <- d$y / sum(d$y) + prob.mass <- list(x=d$x, y=pmass, bw=bw, kernel=kernel, + n=n, from=from, to=to, cut=cut) + return(prob.mass) +} + + +# Estimate the probability mass distribution for the source and corresponding +# background data using 'estimate.prob.mass()'. +# +# The coordinates at which the probability masses are estimated are the same +# for the source and corresponding background probability mass distributions. +# Therefore we can calculate the KL divergence between these two distributions. +# +# Argument: +# srcdata - raw counts data drawn from the source region +# bkgdata - raw counts data drawn from the background region +# +# Return: +# data.frame with the following components: +# x - the coordinates of the points where probability mass estimated +# src - the estimated probability masses of the source data +# bkg - the estimated probability masses of the background data +# +pm.src.bkg <- function(srcdata, bkgdata) { + # compare the data ranges + if (max(srcdata) > max(bkgdata)) { + pm.src <- estimate.prob.mass(srcdata) + from <- pm.src$from + to <- pm.src$to + pm.bkg <- estimate.prob.mass(bkgdata, from=from, to=to) + } else { + pm.bkg <- estimate.prob.mass(bkgdata) + from <- pm.bkg$from + to <- pm.bkg$to + pm.src <- estimate.prob.mass(srcdata, from=from, to=to) + } + df <- data.frame(x=pm.src$x, src=pm.src$y, bkg=pm.bkg$y) + return(df) +} + + +# Calculate the entropies and KL/JS divergences of the source and background +# probability mass distribution group. +# +# Arguments: +# pmdf - data.frame of the probability mass distribution +# comp - components to be calculated +# + "entropy": entropy of the source and background +# + "kl": KL divergences from source to background and vice versa +# + "klsym": symmetric variant of KL divergence +# + "js": JS divergence +# +# Return: +# list with following components: +# entropy.src - entropy of the source distribution +# entropy.bkg - entropy of the background distribution +# kl.src2bkg - KL divergence from source to background +# kl.bkg2src - KL divergence from background to source +# klsym - symmetric variant KL divergence +# js - JS divergence +info.src.bkg <- function(pmdf, comp=c("entropy", "kl", "klsym", "js")) { + pm.src <- pmdf$src + pm.bkg <- pmdf$bkg + entropy.src <- NULL + entropy.bkg <- NULL + kl.src2bkg <- NULL + kl.bkg2src <- NULL + klsym <- NULL + js <- NULL + if ("entropy" %in% comp) { + entropy.src <- calc.entropy(pm.src) + entropy.bkg <- calc.entropy(pm.bkg) + } + if ("kl" %in% comp) { + kl.src2bkg <- kldiv(pm.src, pm.bkg, type="kl") + kl.bkg2src <- kldiv(pm.bkg, pm.src, type="kl") + } + if ("klsym" %in% comp) { + klsym <- kldiv(pm.src, pm.bkg, type="klsym") + } + if ("js" %in% comp) { + js <- kldiv(pm.src, pm.bkg, type="js") + } + return(list(entropy.src=entropy.src, entropy.bkg=entropy.bkg, + kl.src2bkg=kl.src2bkg, kl.bkg2src=kl.bkg2src, + klsym=klsym, js=js)) +} + + +# Calculate the entropies and KL/JS divergences of the source density +# histogram with respect to the corresponding background data which +# drawn from the estimated Poisson mass distribution. +# +# Arguments: +# src - raw counts data of the source region +# comp - components to be calculated +# + "entropy": entropy of the source and background +# + "kl": KL divergences from source to background and vice versa +# + "klsym": symmetric variant of KL divergence +# + "js": JS divergence +# +# Return: +# list with following components: +# entropy.src - entropy of the source distribution +# entropy.bkg - entropy of the background distribution +# kl.src2bkg - KL divergence from source to background +# kl.bkg2src - KL divergence from background to source +# klsym - symmetric variant KL divergence +# js - JS divergence +# +info.src.pois <- function(src, comp=c("entropy", "kl", "klsym", "js")) { + # make the density histogram of the source counts data + hist.src <- hist(src, breaks=(min(src):(max(src)+1)-0.5), plot=FALSE) + x <- hist.src$mids + pm.src <- hist.src$density + # calculate the corresponding theoretical Poisson density/mass distribution + # as the estimated background + lambda <- mean(src) + pm.pois <- dpois(x, lambda) + pm.pois <- pm.pois / sum(pm.pois) + # calculate the entropy, KL/JS divergences + entropy.src <- NULL + entropy.bkg <- NULL + kl.src2bkg <- NULL + kl.bkg2src <- NULL + klsym <- NULL + js <- NULL + if ("entropy" %in% comp) { + entropy.src <- calc.entropy(pm.src) + entropy.bkg <- calc.entropy(pm.pois) + } + if ("kl" %in% comp) { + kl.src2bkg <- kldiv(pm.src, pm.pois, type="kl") + kl.bkg2src <- kldiv(pm.pois, pm.src, type="kl") + } + if ("klsym" %in% comp) { + klsym <- kldiv(pm.src, pm.pois, type="klsym") + } + if ("js" %in% comp) { + js <- kldiv(pm.src, pm.pois, type="js") + } + return(list(entropy.src=entropy.src, entropy.bkg=entropy.bkg, + kl.src2bkg=kl.src2bkg, kl.bkg2src=kl.bkg2src, + klsym=klsym, js=js)) +} + + +# Calculate the information (e.g., entropy, divergences) for each group of +# region data. +# If the background data are not provided, then the background is estimated +# with a Poisson density/mass distribution. +info.reglist <- function(srcdatalist, bkgdatalist=NULL) { + if (is.null(bkgdatalist)) { + infofunc <- "info.src.pois" + } else { + infofunc <- "info.src.bkg" + stopifnot(length(srcdatalist) == length(bkgdatalist)) + } + l <- length(srcdatalist) + infodf <- data.frame(entropy.src=numeric(l), entropy.bkg=numeric(l), + kl.src2bkg=numeric(l), kl.bkg2src=numeric(l), + klsym=numeric(l), js=numeric(l)) + for (i in 1:length(srcdatalist)) { + #cat(i, "\n") + if (is.null(bkgdatalist)) { + if (sum(srcdatalist[[i]]) == 0) { + # srcdata all zeros + cat(i, ": WARNING: srcdata are all zeros!\n") + info <- list(entropy.src=NA, entropy.bkg=NA, + kl.src2bkg=NA, kl.bkg2src=NA, + klsym=NA, js=NA) + } else { + info <- get(infofunc)(srcdatalist[[i]]) + } + } else { + if (sum(srcdatalist[[i]]) == 0 || sum(bkgdatalist[[i]]) == 0) { + # srcdata / bkgdata all zeros + cat(i, ": WARNING: srcdata/bkgdata are all zeros!\n") + info <- list(entropy.src=NA, entropy.bkg=NA, + kl.src2bkg=NA, kl.bkg2src=NA, + klsym=NA, js=NA) + } else { + pmdf <- pm.src.bkg(srcdatalist[[i]], bkgdatalist[[i]]) + info <- get(infofunc)(pmdf) + } + } + infodf[i, ] <- info + } + return(infodf) +} + + +# vim: set ts=8 sw=4 tw=0 fenc=utf-8 ft=r: # diff --git a/r/lsos.R b/r/lsos.R new file mode 100644 index 0000000..16a3e7f --- /dev/null +++ b/r/lsos.R @@ -0,0 +1,45 @@ +# -*- encoding: utf-8 -*- + +# Tricks to manage the available memory in an R session +# http://stackoverflow.com/q/1358003/4856091 + +# improved list of objects +.ls.objects <- function(pos=1, pattern, order.by, + decreasing=FALSE, pretty.size=FALSE, + head=FALSE, n=10) { + napply <- function(names, fn) { + sapply(names, function(x) fn(get(x, pos=pos))) + } + names <- ls(pos=pos, pattern=pattern) + obj.class <- napply(names, function(x) as.character(class(x))[1]) + obj.mode <- napply(names, mode) + obj.type <- ifelse(is.na(obj.class), obj.mode, obj.class) + obj.size.bytes <- napply(names, object.size) + if (pretty.size) { + obj.size <- napply(names, function(x) { + format(object.size(x), units="auto") + }) + } else { + obj.size <- obj.size.bytes + } + obj.dim <- t(napply(names, function(x) as.numeric(dim(x))[1:2])) + vec <- is.na(obj.dim)[, 1] & (obj.type != "function") + obj.dim[vec, 1] <- napply(names, length)[vec] + out <- data.frame(obj.type, obj.size, obj.dim) + names(out) <- c("Type", "Size", "Rows", "Columns") + if (! missing(order.by)) + if (order.by == "Size") { + out <- out[order(obj.size.bytes, decreasing=decreasing), ] + } else { + out <- out[order(out[[order.by]], decreasing=decreasing), ] + } + if (head) + out <- head(out, n) + out +} +# shorthand +lsobjs <- function(..., n=10) { + .ls.objects(..., order.by="Size", decreasing=TRUE, + pretty.size=TRUE, head=TRUE, n=n) +} + diff --git a/r/scaleSaliency.R b/r/scaleSaliency.R new file mode 100644 index 0000000..80172bc --- /dev/null +++ b/r/scaleSaliency.R @@ -0,0 +1,246 @@ +# -*- coding: utf-8 -*- +# +# Scale Saliency algorithm +# +# Reference: +# [1] T. Kadir & M. Brady, Saliency, Scale and Image Description. +# 2001, International Journal of Computer Vision, 45(2), 83-105 +# +# Aaron LI +# 2015/07/29 +# + + +# Calculate Shannon entropy from histogram of probability densities. +# Arguments: +# phist - histogram of probability density +# Return: +# entropy value +calcShannonEntropy <- function(phist) { + # Helper function to calculate the entropy + # Arguments: + # p - probability density of each bin + # Return: + # p * log(p) if p > 0, otherwise 0 + plogp <- function(p) { + if (p < 1e-10) { + return(0.0) + } else { + return(p * log(p)) + } + } + # calculate the entropy + entropy <- -sum(sapply(phist, plogp)) + return(entropy) +} + + +# Generate the template circular regions for each scale. +# Arguments: +# scale_min - minimum scale (radius of circle) +# scale_max - maximum scale +# Return: +# list of matrix which represent the regions of interest (with value of TRUE) +circleROI <- function(scale_min, scale_max) { + rows <- 2 * scale_max + 1 + cols <- rows + rc <- (rows + 1) / 2 # central row + cc <- (cols + 1) / 2 # central col + roi <- list() + for (s in scale_min:scale_max) { + radius2 <- s^2 + m <- matrix(0, nrow=rows, ncol=cols) + roi[[paste("scale", s, sep="")]] <- + ifelse(((row(m)-rc)^2 + (col(m)-cc)^2) <= radius2, + TRUE, FALSE) + } + return(roi) +} + + +# Calculate the scale saliencies for the 1D case: scalar image +# Arguments: +# img - input *scalar* image +# scale_min - minimum scale (pixels of radius of circle) +# scale_max - maximum scale (NOTE: scale_max >= scale_min+2) +# nbins - how many bins used for histogram +# progressbar - whether to display the progress bar +# Return: +# 6-column data.frame contains the scale saliencies results +calcScaleSaliency1D <- function(img, scale_min, scale_max, nbins, + progressbar=TRUE) { + # check scale range first: must have at least 3 scales + stopifnot(scale_max >= scale_min+2) + # get number of rows and columns + rows <- nrow(img) + cols <- ncol(img) + # determine the saliency calculation region of the image + # FIXME: how to deal with the boundaries??? + row_begin <- scale_max + 1 + col_begin <- scale_max + 1 + row_end <- rows - scale_max + col_end <- cols - scale_max + # templates of regions for each scale + roi <- circleROI(scale_min, scale_max) + # R data frame to store the saliency results + scaleSaliency <- data.frame(row=numeric(0), col=numeric(0), + scale=numeric(0), entropy=numeric(0), + disimilarity=numeric(0), saliency=numeric(0)) + # determine the breakpoints for histogram + hist_breaks <- (0:nbins) * (max(img) - min(img))/nbins + min(img) + if (progressbar) { + # progress bar + pb <- txtProgressBar(min=row_begin, max=row_end, style=3) + } + for (ri in row_begin:row_end) { + if (progressbar) { + # update progress bar + setTxtProgressBar(pb, ri) + } + for (ci in col_begin:col_end) { + # filter out the required size of image block, which is + # used to calculate its histogram, entropy, etc. + imgROI <- img[(ri-scale_max):(ri+scale_max), + (ci-scale_max):(ci+scale_max)] + # vectors to store entropies and distances + entropy <- numeric(scale_max-scale_min+1) + distance <- numeric(scale_max-scale_min+1) + # initial probability density for scale of 's-1' + scaleHistPr0 <- rep(0, nbins) + for (s in scale_min:scale_max) { + scaleROI <- roi[[paste("scale", s, sep="")]] + # NOTE: do not use 'breaks=nbins', since the number is a + # suggestion only and breakpoints will be set to 'prtty' + # values in this case. + scaleHist <- hist(imgROI[scaleROI], + breaks=hist_breaks, plot=FALSE) + scaleHistPr <- scaleHist$counts / sum(scaleHist$counts) + # calculate Shannon entropy + entropy[s-scale_min+1] <- calcShannonEntropy(scaleHistPr) + # FIXME: calculate distance of scales??? + distance[s-scale_min+1] <- sum(abs(scaleHistPr-scaleHistPr0)) + # save the probability density of current scale 's' + scaleHistPr0 <- scaleHistPr + } + # smooth the 'distance' vector to reduce the impacts of noise + distance1 <- c(distance[1], distance[1:(length(distance)-1)]) + distance2 <- c(distance[2:length(distance)], + distance[length(distance)]) + distance <- (distance1 + distance + distance2) / 3 + # find the peaks of entropy, and the corresponding scales + peakScale <- c(FALSE, + ((entropy[2:(length(entropy)-1)] > + entropy[1:(length(entropy)-2)]) & + (entropy[2:(length(entropy)-1)] > + entropy[3:length(entropy)])), + FALSE) + #cat("peakScale:", peakScale, "\n") + # calculate the inter-scale saliencies for each entropy peaks + for (s in (scale_min:scale_max)[peakScale]) { + scaleNorm <- s*s / (2*s - 1) + scaleEntropy <- entropy[s-scale_min+1] + disimilarity <- scaleNorm * distance[s-scale_min+1] + saliency <- scaleEntropy * disimilarity + scaleSaliency[nrow(scaleSaliency)+1, ] <- list(ri, ci, s, + scaleEntropy, + disimilarity, + saliency) + } + } + } + if (progressbar) { + # close progress bar + close(pb) + } + return(scaleSaliency) +} + + +# Simple greedy clustering algorithm to filter out salient regions. +# Arguments: +# ssaliency - saliency results from 'calcScaleSaliency*' +# ssaliency_th - inter-scale saliency threshold +# disimilarity_th - disimilarity threshold +# Return: +# clustered & filtered saliency regions +greedyCluster <- function(ssaliency, ssaliency_th, disimilarity_th) { + # filter by global saliency & inter-scale saliency threshold + ssaliency <- ssaliency[((ssaliency$saliency > ssaliency_th) & + (ssaliency$disimilarity > disimilarity_th)), ] + # sort in descending inter-scale saliency + ssaliency <- ssaliency[order(-ssaliency$saliency), ] + # cluster salienct points + clusteredSaliency <- ssaliency[NULL, ] + while (nrow(ssaliency) > 0) { + ss <- ssaliency[1, ] + clusteredSaliency[nrow(clusteredSaliency)+1, ] <- ss + distance2 <- (ssaliency$row - ss$row)^2 + (ssaliency$col - ss$col)^2 + # filter out the points inside the current salient circle + ssaliency <- ssaliency[(distance2 > ss$scale^2), ] + } + return(clusteredSaliency) +} + + +# Plot the image and salient regions with ggplot2 +# Arguments: +# img - input image +# saliency - saliency restults by clusteredSaliency() +plotSalientReg <- function(img, saliency) { + require(reshape2) + require(ggplot2) + plotCircle <- function(xc, yc, radius) { + theta <- seq(0, 2*pi, length.out=100) + gcircle <- annotate("path", + x=xc+radius*cos(theta), + y=yc+radius*sin(theta), + colour="green") + return(gcircle) + } + # plot the image + gp <- ggplot(melt(img), aes(Var2, -Var1, fill=value)) + geom_raster() + # add circles + for (i in 1:nrow(saliency)) { + ss <- saliency[i, ] + gcircle <- plotCircle(ss$col, -ss$row, ss$scale) + gp <- gp + gcircle + } + return(gp) +} + + +# Convert the scale saliency information to DS9 regions. +# +# NOTE: +# However, the rows and columns of the FITS matrix in R correspond +# to the X and Y axes in DS9, which is *swapped*. +# Thus the region width and height correspond to the row range and +# column range, respectively. +# +# Arguments: +# saliency - saliency restults by clusteredSaliency() +# Return: +# vector of DS9 region strings +saliency2region <- function(saliency) { + regions <- with(saliency, + paste("circle(", row, ",", col, ",", scale, ")", + sep="")) + return(regions) +} + + +# Write DS9 region to file with appropriate header information. +# +# Arguments: +# filename - output region file +# region - vector/list of region strings +save.region <- function(filename, region) { + rf <- file(filename, "w") + region.hdr <- c("# Region file format: DS9 version 4.1", "image") + writeLines(region.hdr, rf) + writeLines(region, rf) + close(rf) +} + + +# vim: set ts=8 sw=4 tw=0 fenc=utf-8 ft=r: # diff --git a/rand/luminosity_func.py b/rand/luminosity_func.py new file mode 100644 index 0000000..8cc46ee --- /dev/null +++ b/rand/luminosity_func.py @@ -0,0 +1,96 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- +# +# Aaron LI +# 2015/07/01 +# + +""" +Generate random numbers (i.e., fluxes) with respect to the +provided luminosity function. +""" + +import numpy as np +import random + +def luminosity_func(Lx, N0=1.0): + """ + The *cumulative* luminosity function: N(>=L) + The number of objects with luminosities >= L(x) for each L(x). + """ + # broken power-law model (Xu et al. 2005) + # Nx = (1) N0 * (Lx/L_b)^(-alpha_l); for Lx <= L_b + # (2) N0 * (Lx/L_b)^(-alpha_h); for Lx > L_b + L_b = 4.4e38 # break point (erg/s) (+2.0/-1.4) + alpha_h = 2.28 # (+1.72/-0.53) + alpha_l = 1.08 # (+0.15/-0.33) + if isinstance(Lx, np.ndarray): + Nx = np.zeros(Lx.shape) + Nx[Lx <= 0] = 0.0 + Nx[Lx <= L_b] = N0 * (Lx[Lx <= L_b] / L_b)**(-alpha_l) + Nx[Lx > L_b] = N0 * (Lx[Lx > L_b] / L_b)**(-alpha_h) + else: + # Lx is a single number + if Lx <= 0.0: + Nx = 0.0 + elif Lx <= L_b: + Nx = N0 * (Lx/L_b)**(-alpha_l) + else: + Nx = N0 * (Lx/L_b)**(-alpha_h) + return Nx + + +def luminosity_density(Lx, N0=1.0): + """ + Function of number density at luminosity at Lx. => PDF + + PDF(Lx) = - d(luminosity_func(Lx) / d(Lx) + """ + L_b = 4.4e38 # break point (erg/s) (+2.0/-1.4) + alpha_h = 2.28 # (+1.72/-0.53) + alpha_l = 1.08 # (+0.15/-0.33) + if isinstance(Lx, np.ndarray): + Px = np.zeros(Lx.shape) + Px[Lx<=0] = 0.0 + Px[Lx<=L_b] = N0 * (alpha_l/L_b) * (Lx[Lx<=L_b] / L_b)**(-alpha_l-1) + Px[Lx>L_b] = N0 * (alpha_h/L_b) * (Lx[Lx>L_b] / L_b)**(-alpha_h-1) + else: + # Lx is a single number + if Lx <= 0.0: + Px = 0.0 + elif Lx <= L_b: + Px = N0 * (alpha_l/L_b) * (Lx/L_b)**(-alpha_l-1) + else: + Px = N0 * (alpha_h/L_b) * (Lx/L_b)**(-alpha_h-1) + return Px + + +def luminosity_pdf(Lx): + """ + Probability density function + """ + h = 1e-5 * Lx # step size for numerical deviation + p = - (luminosity_func(Lx+0.5*h) - luminosity_func(Lx-0.5*h)) / h + return p + + +def sampler(min, max, number=1): + """ + Generate a sample of luminosity values within [min, max] from + the above luminosity distribution. + """ + # Get the maximum value of the density function + M = luminosity_density(min) + results = [] + for i in range(number): + while True: + u = random.random() * M + y = random.random() * (max-min) + min + if u <= luminosity_density(y): + results.append(y) + break + if len(results) == 1: + return results[0] + else: + return np.array(results) + diff --git a/rand/pointsrc_coord.py b/rand/pointsrc_coord.py new file mode 100644 index 0000000..1da9be2 --- /dev/null +++ b/rand/pointsrc_coord.py @@ -0,0 +1,98 @@ +# -*- coding: utf-8 -*- +# +# Aaron LI +# 2015/07/01 +# + +""" +Generate random coordinates for point sources with respect to the r^{1/4} +distribution. +""" + +import numpy as np +import random + + +def cdf(r, N0=1.0): + """ + Cumulative distribution function of the number of point sources. + + r^{1/4} distribution law: de Vaucouleurs 1948 + """ + return N0 * r**(1.0/4.0) + + +def pdf(r, N0=1.0): + """ + Density function of the number of point sources. + + pdf = d(pdf) / d(r) + """ + if isinstance(r, np.ndarray): + p = np.zeros(r.shape) + p[r<=0.0] = 0.0 + p[r>0.0] = 0.25 * N0 * r[r>0.0]**(-3.0/4.0) + else: + if r <= 0.0: + p = 0.0 + else: + p = 0.25 * N0 * r**(-3.0/4.0) + return p + + +def sampler(min, max, number=1): + """ + Generate a sample of coordinates (only r) within [min, max] from + the above density distribution. + + min, max: the minimum and maximum r values (in degree) + """ + # Get the maximum value of the density function + M = pdf(min) + results = [] + for i in range(number): + while True: + u = random.random() * M + y = random.random() * (max-min) + min + if u <= pdf(y): + results.append(y) + break + if len(results) == 1: + return results[0] + else: + return np.array(results) + + +def add_angle(r): + """ + Add angle for each r value to make up a coordinate of a polar coordinate. + """ + coords = [] + for ri in r: + theta = random.random() * 360 + coords.append((ri, theta)) + if len(coords) == 1: + return coords[0] + else: + return coords + + +def to_radec(coords, xc=0, yc=0): + """ + Convert the generated coordinates to (ra, dec) (unit: degree). + + xc, yc: the center coordinate (ra, dec) + """ + results = [] + for r, theta in coords: + # FIXME: spherical algebra should be used!!! + dx = r * np.cos(theta*np.pi/180) + dy = r * np.sin(theta*np.pi/180) + x = xc + dx + y = yc + dy + results.append((x, y)) + if len(results) == 1: + return results[0] + else: + return results + diff --git a/rand/sphere.py b/rand/sphere.py new file mode 100644 index 0000000..4220766 --- /dev/null +++ b/rand/sphere.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Randomly pick point on the sphere surface. +# +# References: +# [1] Shpere Poin Picking -- from Wolfram MathWorld +# http://mathworld.wolfram.com/SpherePointPicking.html +# [2] Random Points on a Sphere +# https://www.jasondavies.com/maps/random-points/ +# +# Aaron LI +# 2015/06/18 + +__version__ = "0.1.0" +__date__ = "2015/06/16" + +import math +import random + +def sphere_point(n=1, unit="rad"): + """ + Randomly uniformly pick a point on the sphere surface. + Using the method "Sphere Point Picking" from Wolfram MathWorld. + + Arguments: + n: number of points to be generated + unit: unit of output values: rad/deg + + Return: + (theta, phi): spherical coordinate (unit: rad). + theta: [0, 2\pi); phi: [0 - \pi] + If n > 1, then return a list of (theta, phi) + """ + points = [] + for i in range(n): + u = random.random() + v = random.random() + theta = 2.0 * math.pi * u + phi = math.acos(2.0*v - 1.0) + if unit == "deg": + theta = rad2deg(theta) + phi = rad2deg(phi) + points.append((theta, phi)) + if n == 1: + return points[0] + else: + return points + + +def rad2deg(x): + return x * 180.0 / math.pi + +def deg2rad(x): + return x * math.pi / 180.0 + + diff --git a/region/region.py b/region/region.py new file mode 100644 index 0000000..47a1636 --- /dev/null +++ b/region/region.py @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- +# +# Aaron Li +# 2015/06/19 + +""" +Class Region for regions on the spherical surface. +Used in astronomy to select/define a certian region, e.g, DS9. +""" + +import sys + + +class Region(object): + """ + Basic region class for regions on the spherical surface, + similar definition as to DS9 regions. + + Coordinate style: (ra, dec) + Unit: degree + ra: [0, 2\pi) + dec: [-\pi/2, \pi/2] + """ + + # currently supported region types (similar to DS9) + REGION_TYPES = ["circle", "ellipse", "box", "annulus", "pie", "panda"] + + def __init__(self, regtype, xc, yc, + radius=None, radius2=None, + width=None, height=None, rotation=None, + start=None, end=None): + if regtype.lower() not in self.REGION_TYPES: + raise ValueError("only following region types supported: %s" %\ + " ".join(self.REGION_TYPES)) + self.regtype = regtype.lower() + self.xc = xc + self.yc = yc + self.radius = radius + self.radius2 = radius2 + self.width = width + self.height = height + self.rotation = rotation + + def __repr__(self): + return "Region: %s" % self.regtype + + def dump(self): + return {"regtype": self.regtype, + "xc": self.xc, + "yc": self.yc, + "radius": self.radius, + "radius2": self.radius2, + "width": self.width, + "height": self.height, + "rotation": self.rotation + } + + def is_inside(self, point): + """ + Determine whether the given point is inside the region. + """ + x = point[0] + y = point[1] + if self.regtype == "box": + #print("WARNING: rotation box currently not supported!", + # file=sys.stderr) + xmin = self.xc - self.width/2.0 + xmax = self.xc + self.width/2.0 + ymin = self.yc - self.height/2.0 + ymax = self.yc + self.height/2.0 + if all([x >= xmin, x <= xmax, y >= ymin, y <= ymax]): + return True + else: + return False + else: + raise ValueError("region type '%s' currently not implemented" %\ + self.regtype) + |