From 0c0bd0baec9e987ed5c2ad74aed55e5f4ac0152b Mon Sep 17 00:00:00 2001 From: Aaron LI Date: Sat, 15 Oct 2016 09:52:45 +0800 Subject: Add data/snr.py to retrieve Green's Galactic SNRs catalog data Thanks D. A. Green for the Galactic SNRs catalog (294 objects)! http://www.mrao.cam.ac.uk/surveys/snrs/ --- fg21sim/data/__init__.py | 4 + fg21sim/data/snr.py | 330 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 334 insertions(+) create mode 100644 fg21sim/data/__init__.py create mode 100644 fg21sim/data/snr.py (limited to 'fg21sim') diff --git a/fg21sim/data/__init__.py b/fg21sim/data/__init__.py new file mode 100644 index 0000000..f1805fe --- /dev/null +++ b/fg21sim/data/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2016 Weitian LI +# MIT license + +from .snr import SNRDataGreen, retrieve_snr_data_green diff --git a/fg21sim/data/snr.py b/fg21sim/data/snr.py new file mode 100644 index 0000000..a0a0fb9 --- /dev/null +++ b/fg21sim/data/snr.py @@ -0,0 +1,330 @@ +# Copyright (c) 2016 Weitian LI +# MIT license + + +""" +Retrieve the Galactic SNRs catalog data by parsing the web by /D. A. Green/: + + http://www.mrao.cam.ac.uk/surveys/snrs/ + http://www.mrao.cam.ac.uk/surveys/snrs/snrs.data.html +""" + + +import os +import re +import logging +from collections import OrderedDict + +import requests +import bs4 + + +logger = logging.getLogger(__name__) + + +class SNRDataGreen: + """Class for Green's Galactic SNRs catalog data parse and manipulation. + + The available SNR data: + - glon, glat : Galactic longitude, latitude (rounded to 0.1deg) [degree] + - ra, dec : Right ascension, Declination (J2000) [degree] + - size : angular size [degree]: (diameter, diameter) or (major, minor) + - type : shape type: (shape, flag) with shapes of "S" (shell), + "F" (filled-center), "C" (composite), and flag "?" if uncertain. + - flux : flux density at 1 GHz [Jy] + - specindex : Spectral index of the integrated radio emission + - other_names : other name(s) commonly used for the SNR + + For more detailed description about the SNR catalog, refer to: + http://www.mrao.cam.ac.uk/surveys/snrs/snrs.info.html + """ + def __init__(self, dstr): + self.data = self.parse(dstr) + + @classmethod + def parse(cls, dstr): + """Parse the SNR data string, for one SNR object. + + Parameters + ---------- + dstr : str + String containing the SNR data + + Returns + ------- + data : dict + A data dictionary containing the parsed SNR data + """ + pattern = re.compile(( + r"^\s*(?P\d+\.\d+)\s+(?P[-+]?\d+\.\d+)\s+" + r"(?P\d{2}\s+\d{2}\s+\d{2})\s+(?P[-+]?\d{2}\s+\d{2})\s+" + r"(?P[0-9.]+\??|[0-9.]+x[0-9.]+\??|\?)\s+" + r"(?P[SCF?]{1,2})\s+" + r"(?P\>?\d+\.\d+\??|\>?\d+\??|\?)\s+" + r"(?P\d+\.\d+\??|\d+\??|\?|varies)\s*" + r"(?P.*)$")) + match = pattern.match(dstr) + data = OrderedDict([ + ("glon", float(match.group("glon"))), + ("glat", float(match.group("glat"))), + ("ra", cls._parse_ra(match.group("ra"))), + ("dec", cls._parse_dec(match.group("dec"))), + ("size", cls._parse_size(match.group("size"))), + ("shape", cls._parse_shape(match.group("shape"))), + ("flux", cls._parse_flux(match.group("flux"))), + ("specindex", cls._parse_specindex(match.group("specindex"))), + ("othernames", cls._parse_othernames(match.group("othernames"))), + ]) + return data + + @staticmethod + def _parse_ra(s): + """Parse the R.A. string "hh mm ss" to degree [0, 360)""" + pattern = re.compile(r"(?P\d+)\s+(?P\d+)\s+(?P\d+)") + match = pattern.match(s) + hh = float(match.group("hh")) + mm = float(match.group("mm")) + ss = float(match.group("ss")) + return (hh*15.0 + mm*15.0/60.0 + ss*15.0/3600.0) + + @staticmethod + def _parse_dec(s): + """Parse the Dec. string "dd mm" to degree [-90, 90]""" + pattern = re.compile(r"(?P[-+]?)(?P
\d+)\s+(?P\d+)") + match = pattern.match(s) + if match.group("sign") == "-": + sign = -1.0 + else: + sign = 1.0 + dd = float(match.group("dd")) + mm = float(match.group("mm")) + return sign * (dd + mm/60.0) + + @staticmethod + def _parse_size(s): + """Parse the SNR angular size string. + + Returns + ------- + major : float + minor : float + flag : str + (diameter, diameter) of the SNR if approximately circular; + (major axis, minor axis) if SNR is elongated. + All values are in unit [ degree ]. + Possible flag: "", "?" (uncertain) + """ + if s.endswith("?"): + flag = "?" + s = s.rstrip("?") + else: + flag = "" + try: + major, minor = map(float, s.split("x")) + except ValueError: + major = minor = float(s) + return (major, minor, flag) + + @staticmethod + def _parse_shape(s): + """Parse the SNR shape (a.k.a. type) string. + + Returns + ------- + shape : str + flag : str + Possible shapes are "S" (shell), "F" (filled-center), + "C" (composite), or None (very uncertain); + Possible flag: "", "?" (uncertain) + """ + flag = "" + if s.endswith("?"): + flag += "?" + s = s.rstrip("?") + if s != "": + shape = s + else: + shape = None + return (shape, flag) + + @staticmethod + def _parse_flux(s): + """Parse the flux density string. + + Returns + ------- + flux : float + flag : str + Flux density [ Jy ] at 1GHz, None if the value is uncertain. + Possible flag: "", "?", ">", ">?" + """ + flag = "" + if s.startswith(">"): + flag += ">" + s = s.lstrip(">") + if s.endswith("?"): + flag += "?" + s = s.rstrip("?") + try: + flux = float(s) + except ValueError: + flux = None + return (flux, flag) + + @staticmethod + def _parse_specindex(s): + """Parse the spectral index string. + + Returns + ------- + specindex : float + flag : str + Spectral index, None if the value is uncertain. + Possible flag: "", "?", "varies" + """ + if s == "varies": + specindex = None + flag = "varies" + elif s.endswith("?"): + flag = "?" + s = s.rstrip("?") + try: + specindex = float(s) + except ValueError: + specindex = None + else: + specindex = float(s) + flag = "" + return (specindex, flag) + + @staticmethod + def _parse_othernames(s): + """Parse the other names string to a list of names.""" + s = s.strip() + if s: + return s.split(",") + else: + return [] + + @property + def name(self): + pattern = "G{glon:05.1f}{glat:+05.1f}" + return pattern.format(**self.data) + + @property + def othernames(self): + return self.data["othernames"] + + @property + def glon(self): + return self.data["glon"] + + @property + def glat(self): + return self.data["glat"] + + @property + def ra(self): + return self.data["ra"] + + @property + def dec(self): + return self.data["dec"] + + @property + def size(self): + return self.data["size"] + + @property + def shape(self): + return self.data["shape"] + + @property + def flux(self): + return self.data["flux"] + + @property + def specindex(self): + return self.data["specindex"] + + @property + def data_flat(self): + """Get the data with tuple items flattened for easier CSV process""" + data = OrderedDict([ + ("name", self.name), + ("glon", self.glon), + ("glat", self.glat), + ("ra", self.ra), + ("dec", self.dec), + ("size_major", self.size[0]), + ("size_minor", self.size[1]), + ("size_flag", self.size[2]), + ("shape", self.shape[0]), + ("shape_flag", self.shape[1]), + ("flux", self.flux[0]), + ("flux_flag", self.flux[1]), + ("specindex", self.specindex[0]), + ("specindex_flag", self.specindex[1]), + ("othernames", self.othernames), + ]) + return data + + +def retrieve_snr_data_green(url): + """Retrieve D. A. Green's Galactic SNRs catalog and parse the HTML + contents to extract the catalog data. + + Parameters + ---------- + url : str + URL to the D. A. Green's SNRs catalog summary data page, + can also be the path to the local HTML file. + + Returns + ------- + snrdata : list[str] + A string list with each line representing the information of + one SNR object. + + Data string format: + - Column 1, 2: Galactic longitude (l) and latitude (b) + - Column 3-5: R.A. J2000 (hh mm ss) + - Column 6, 7: Dec. J2000 (dd mm) + - Column 8: Size [ arcmin ], `r` if circular, `Mxm` if elliptical; + may also contains a "?" + - Column 9: Type (e.g., S, C, S?, C?) + - Column 10: Flux density at 1 GHz [ Jy ] + - Column 11: Spectral index (may contains "?" or be "varies") + - Column 12: Other name(s), separated by "," + """ + # Strip the beginning "file://" if presents + url = re.sub(r"^file://", "", url) + logger.info("Retrieve Galactic SNRs catalog from: {0}".format(url)) + if os.path.exists(url): + # A local HTML file + html = open(url).read() + else: + # Remote web page + r = requests.get(url) + r.raise_for_status() + html = r.text + logger.info("Parse the HTML contents ...") + soup = bs4.BeautifulSoup(html, "html.parser") + snrdata_tag = soup.body.pre + snrdata_str = [ch.strip() if isinstance(ch, bs4.element.NavigableString) + else ch.string.strip() + for ch in snrdata_tag.children] + # Remove the header row + del snrdata_str[0] + # Strip the last data row + snrdata_str[-1] = re.sub(r"[-\s]*$", "", snrdata_str[-1]) + # The remaining SNR data string list should be even-length, since + # every two items are the Galactic coordinate and other information. + if len(snrdata_str) % 2 != 0: + raise ValueError("length of the parsed SNR data str list is ODD") + # Concatenate every two items corresponding to the same SNR object + snrdata_str2 = [" ".join(x) for x in zip(snrdata_str[0::2], + snrdata_str[1::2])] + logger.info("Done parse the HTML contents: " + "got {0} SNR objects".format(len(snrdata_str2))) + return snrdata_str2 -- cgit v1.2.2