diff options
| -rw-r--r-- | fg21sim/data/__init__.py | 4 | ||||
| -rw-r--r-- | fg21sim/data/snr.py | 330 | 
2 files changed, 334 insertions, 0 deletions
diff --git a/fg21sim/data/__init__.py b/fg21sim/data/__init__.py new file mode 100644 index 0000000..f1805fe --- /dev/null +++ b/fg21sim/data/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) 2016 Weitian LI <liweitianux@live.com> +# MIT license + +from .snr import SNRDataGreen, retrieve_snr_data_green diff --git a/fg21sim/data/snr.py b/fg21sim/data/snr.py new file mode 100644 index 0000000..a0a0fb9 --- /dev/null +++ b/fg21sim/data/snr.py @@ -0,0 +1,330 @@ +# Copyright (c) 2016 Weitian LI <liweitianux@live.com> +# MIT license + + +""" +Retrieve the Galactic SNRs catalog data by parsing the web by /D. A. Green/: + +    http://www.mrao.cam.ac.uk/surveys/snrs/ +    http://www.mrao.cam.ac.uk/surveys/snrs/snrs.data.html +""" + + +import os +import re +import logging +from collections import OrderedDict + +import requests +import bs4 + + +logger = logging.getLogger(__name__) + + +class SNRDataGreen: +    """Class for Green's Galactic SNRs catalog data parse and manipulation. + +    The available SNR data: +    - glon, glat : Galactic longitude, latitude (rounded to 0.1deg) [degree] +    - ra, dec : Right ascension, Declination (J2000) [degree] +    - size : angular size [degree]: (diameter, diameter) or (major, minor) +    - type : shape type: (shape, flag) with shapes of "S" (shell), +             "F" (filled-center), "C" (composite), and flag "?" if uncertain. +    - flux : flux density at 1 GHz [Jy] +    - specindex : Spectral index of the integrated radio emission +    - other_names : other name(s) commonly used for the SNR + +    For more detailed description about the SNR catalog, refer to: +    http://www.mrao.cam.ac.uk/surveys/snrs/snrs.info.html +    """ +    def __init__(self, dstr): +        self.data = self.parse(dstr) + +    @classmethod +    def parse(cls, dstr): +        """Parse the SNR data string, for one SNR object. + +        Parameters +        ---------- +        dstr : str +            String containing the SNR data + +        Returns +        ------- +        data : dict +            A data dictionary containing the parsed SNR data +        """ +        pattern = re.compile(( +            r"^\s*(?P<glon>\d+\.\d+)\s+(?P<glat>[-+]?\d+\.\d+)\s+" +            r"(?P<ra>\d{2}\s+\d{2}\s+\d{2})\s+(?P<dec>[-+]?\d{2}\s+\d{2})\s+" +            r"(?P<size>[0-9.]+\??|[0-9.]+x[0-9.]+\??|\?)\s+" +            r"(?P<shape>[SCF?]{1,2})\s+" +            r"(?P<flux>\>?\d+\.\d+\??|\>?\d+\??|\?)\s+" +            r"(?P<specindex>\d+\.\d+\??|\d+\??|\?|varies)\s*" +            r"(?P<othernames>.*)$")) +        match = pattern.match(dstr) +        data = OrderedDict([ +            ("glon", float(match.group("glon"))), +            ("glat", float(match.group("glat"))), +            ("ra", cls._parse_ra(match.group("ra"))), +            ("dec", cls._parse_dec(match.group("dec"))), +            ("size", cls._parse_size(match.group("size"))), +            ("shape", cls._parse_shape(match.group("shape"))), +            ("flux", cls._parse_flux(match.group("flux"))), +            ("specindex", cls._parse_specindex(match.group("specindex"))), +            ("othernames", cls._parse_othernames(match.group("othernames"))), +        ]) +        return data + +    @staticmethod +    def _parse_ra(s): +        """Parse the R.A. string "hh mm ss" to degree [0, 360)""" +        pattern = re.compile(r"(?P<hh>\d+)\s+(?P<mm>\d+)\s+(?P<ss>\d+)") +        match = pattern.match(s) +        hh = float(match.group("hh")) +        mm = float(match.group("mm")) +        ss = float(match.group("ss")) +        return (hh*15.0 + mm*15.0/60.0 + ss*15.0/3600.0) + +    @staticmethod +    def _parse_dec(s): +        """Parse the Dec. string "dd mm" to degree [-90, 90]""" +        pattern = re.compile(r"(?P<sign>[-+]?)(?P<dd>\d+)\s+(?P<mm>\d+)") +        match = pattern.match(s) +        if match.group("sign") == "-": +            sign = -1.0 +        else: +            sign = 1.0 +        dd = float(match.group("dd")) +        mm = float(match.group("mm")) +        return sign * (dd + mm/60.0) + +    @staticmethod +    def _parse_size(s): +        """Parse the SNR angular size string. + +        Returns +        ------- +        major : float +        minor : float +        flag : str +            (diameter, diameter) of the SNR if approximately circular; +            (major axis, minor axis) if SNR is elongated. +            All values are in unit [ degree ]. +            Possible flag: "", "?" (uncertain) +        """ +        if s.endswith("?"): +            flag = "?" +            s = s.rstrip("?") +        else: +            flag = "" +        try: +            major, minor = map(float, s.split("x")) +        except ValueError: +            major = minor = float(s) +        return (major, minor, flag) + +    @staticmethod +    def _parse_shape(s): +        """Parse the SNR shape (a.k.a. type) string. + +        Returns +        ------- +        shape : str +        flag : str +            Possible shapes are "S" (shell), "F" (filled-center), +            "C" (composite), or None (very uncertain); +            Possible flag: "", "?" (uncertain) +        """ +        flag = "" +        if s.endswith("?"): +            flag += "?" +            s = s.rstrip("?") +        if s != "": +            shape = s +        else: +            shape = None +        return (shape, flag) + +    @staticmethod +    def _parse_flux(s): +        """Parse the flux density string. + +        Returns +        ------- +        flux : float +        flag : str +            Flux density [ Jy ] at 1GHz, None if the value is uncertain. +            Possible flag: "", "?", ">", ">?" +        """ +        flag = "" +        if s.startswith(">"): +            flag += ">" +            s = s.lstrip(">") +        if s.endswith("?"): +            flag += "?" +            s = s.rstrip("?") +        try: +            flux = float(s) +        except ValueError: +            flux = None +        return (flux, flag) + +    @staticmethod +    def _parse_specindex(s): +        """Parse the spectral index string. + +        Returns +        ------- +        specindex : float +        flag : str +            Spectral index, None if the value is uncertain. +            Possible flag: "", "?", "varies" +        """ +        if s == "varies": +            specindex = None +            flag = "varies" +        elif s.endswith("?"): +            flag = "?" +            s = s.rstrip("?") +            try: +                specindex = float(s) +            except ValueError: +                specindex = None +        else: +            specindex = float(s) +            flag = "" +        return (specindex, flag) + +    @staticmethod +    def _parse_othernames(s): +        """Parse the other names string to a list of names.""" +        s = s.strip() +        if s: +            return s.split(",") +        else: +            return [] + +    @property +    def name(self): +        pattern = "G{glon:05.1f}{glat:+05.1f}" +        return pattern.format(**self.data) + +    @property +    def othernames(self): +        return self.data["othernames"] + +    @property +    def glon(self): +        return self.data["glon"] + +    @property +    def glat(self): +        return self.data["glat"] + +    @property +    def ra(self): +        return self.data["ra"] + +    @property +    def dec(self): +        return self.data["dec"] + +    @property +    def size(self): +        return self.data["size"] + +    @property +    def shape(self): +        return self.data["shape"] + +    @property +    def flux(self): +        return self.data["flux"] + +    @property +    def specindex(self): +        return self.data["specindex"] + +    @property +    def data_flat(self): +        """Get the data with tuple items flattened for easier CSV process""" +        data = OrderedDict([ +            ("name", self.name), +            ("glon", self.glon), +            ("glat", self.glat), +            ("ra", self.ra), +            ("dec", self.dec), +            ("size_major", self.size[0]), +            ("size_minor", self.size[1]), +            ("size_flag", self.size[2]), +            ("shape", self.shape[0]), +            ("shape_flag", self.shape[1]), +            ("flux", self.flux[0]), +            ("flux_flag", self.flux[1]), +            ("specindex", self.specindex[0]), +            ("specindex_flag", self.specindex[1]), +            ("othernames", self.othernames), +        ]) +        return data + + +def retrieve_snr_data_green(url): +    """Retrieve D. A. Green's Galactic SNRs catalog and parse the HTML +    contents to extract the catalog data. + +    Parameters +    ---------- +    url : str +        URL to the D. A. Green's SNRs catalog summary data page, +        can also be the path to the local HTML file. + +    Returns +    ------- +    snrdata : list[str] +        A string list with each line representing the information of +        one SNR object. + +        Data string format: +        - Column 1, 2: Galactic longitude (l) and latitude (b) +        - Column 3-5: R.A. J2000 (hh mm ss) +        - Column 6, 7: Dec. J2000 (dd mm) +        - Column 8: Size [ arcmin ], `r` if circular, `Mxm` if elliptical; +                    may also contains a "?" +        - Column 9: Type (e.g., S, C, S?, C?) +        - Column 10: Flux density at 1 GHz [ Jy ] +        - Column 11: Spectral index (may contains "?" or be "varies") +        - Column 12: Other name(s), separated by "," +    """ +    # Strip the beginning "file://" if presents +    url = re.sub(r"^file://", "", url) +    logger.info("Retrieve Galactic SNRs catalog from: {0}".format(url)) +    if os.path.exists(url): +        # A local HTML file +        html = open(url).read() +    else: +        # Remote web page +        r = requests.get(url) +        r.raise_for_status() +        html = r.text +    logger.info("Parse the HTML contents ...") +    soup = bs4.BeautifulSoup(html, "html.parser") +    snrdata_tag = soup.body.pre +    snrdata_str = [ch.strip() if isinstance(ch, bs4.element.NavigableString) +                   else ch.string.strip() +                   for ch in snrdata_tag.children] +    # Remove the header row +    del snrdata_str[0] +    # Strip the last data row +    snrdata_str[-1] = re.sub(r"[-\s]*$", "", snrdata_str[-1]) +    # The remaining SNR data string list should be even-length, since +    # every two items are the Galactic coordinate and other information. +    if len(snrdata_str) % 2 != 0: +        raise ValueError("length of the parsed SNR data str list is ODD") +    # Concatenate every two items corresponding to the same SNR object +    snrdata_str2 = [" ".join(x) for x in zip(snrdata_str[0::2], +                                             snrdata_str[1::2])] +    logger.info("Done parse the HTML contents: " +                "got {0} SNR objects".format(len(snrdata_str2))) +    return snrdata_str2  | 
