From 0c0bd0baec9e987ed5c2ad74aed55e5f4ac0152b Mon Sep 17 00:00:00 2001
From: Aaron LI <aaronly.me@outlook.com>
Date: Sat, 15 Oct 2016 09:52:45 +0800
Subject: Add data/snr.py to retrieve Green's Galactic SNRs catalog data

Thanks D. A. Green for the Galactic SNRs catalog (294 objects)!

http://www.mrao.cam.ac.uk/surveys/snrs/
---
 fg21sim/data/__init__.py |   4 +
 fg21sim/data/snr.py      | 330 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 334 insertions(+)
 create mode 100644 fg21sim/data/__init__.py
 create mode 100644 fg21sim/data/snr.py

diff --git a/fg21sim/data/__init__.py b/fg21sim/data/__init__.py
new file mode 100644
index 0000000..f1805fe
--- /dev/null
+++ b/fg21sim/data/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) 2016 Weitian LI <liweitianux@live.com>
+# MIT license
+
+from .snr import SNRDataGreen, retrieve_snr_data_green
diff --git a/fg21sim/data/snr.py b/fg21sim/data/snr.py
new file mode 100644
index 0000000..a0a0fb9
--- /dev/null
+++ b/fg21sim/data/snr.py
@@ -0,0 +1,330 @@
+# Copyright (c) 2016 Weitian LI <liweitianux@live.com>
+# MIT license
+
+
+"""
+Retrieve the Galactic SNRs catalog data by parsing the web by /D. A. Green/:
+
+    http://www.mrao.cam.ac.uk/surveys/snrs/
+    http://www.mrao.cam.ac.uk/surveys/snrs/snrs.data.html
+"""
+
+
+import os
+import re
+import logging
+from collections import OrderedDict
+
+import requests
+import bs4
+
+
+logger = logging.getLogger(__name__)
+
+
+class SNRDataGreen:
+    """Class for Green's Galactic SNRs catalog data parse and manipulation.
+
+    The available SNR data:
+    - glon, glat : Galactic longitude, latitude (rounded to 0.1deg) [degree]
+    - ra, dec : Right ascension, Declination (J2000) [degree]
+    - size : angular size [degree]: (diameter, diameter) or (major, minor)
+    - type : shape type: (shape, flag) with shapes of "S" (shell),
+             "F" (filled-center), "C" (composite), and flag "?" if uncertain.
+    - flux : flux density at 1 GHz [Jy]
+    - specindex : Spectral index of the integrated radio emission
+    - other_names : other name(s) commonly used for the SNR
+
+    For more detailed description about the SNR catalog, refer to:
+    http://www.mrao.cam.ac.uk/surveys/snrs/snrs.info.html
+    """
+    def __init__(self, dstr):
+        self.data = self.parse(dstr)
+
+    @classmethod
+    def parse(cls, dstr):
+        """Parse the SNR data string, for one SNR object.
+
+        Parameters
+        ----------
+        dstr : str
+            String containing the SNR data
+
+        Returns
+        -------
+        data : dict
+            A data dictionary containing the parsed SNR data
+        """
+        pattern = re.compile((
+            r"^\s*(?P<glon>\d+\.\d+)\s+(?P<glat>[-+]?\d+\.\d+)\s+"
+            r"(?P<ra>\d{2}\s+\d{2}\s+\d{2})\s+(?P<dec>[-+]?\d{2}\s+\d{2})\s+"
+            r"(?P<size>[0-9.]+\??|[0-9.]+x[0-9.]+\??|\?)\s+"
+            r"(?P<shape>[SCF?]{1,2})\s+"
+            r"(?P<flux>\>?\d+\.\d+\??|\>?\d+\??|\?)\s+"
+            r"(?P<specindex>\d+\.\d+\??|\d+\??|\?|varies)\s*"
+            r"(?P<othernames>.*)$"))
+        match = pattern.match(dstr)
+        data = OrderedDict([
+            ("glon", float(match.group("glon"))),
+            ("glat", float(match.group("glat"))),
+            ("ra", cls._parse_ra(match.group("ra"))),
+            ("dec", cls._parse_dec(match.group("dec"))),
+            ("size", cls._parse_size(match.group("size"))),
+            ("shape", cls._parse_shape(match.group("shape"))),
+            ("flux", cls._parse_flux(match.group("flux"))),
+            ("specindex", cls._parse_specindex(match.group("specindex"))),
+            ("othernames", cls._parse_othernames(match.group("othernames"))),
+        ])
+        return data
+
+    @staticmethod
+    def _parse_ra(s):
+        """Parse the R.A. string "hh mm ss" to degree [0, 360)"""
+        pattern = re.compile(r"(?P<hh>\d+)\s+(?P<mm>\d+)\s+(?P<ss>\d+)")
+        match = pattern.match(s)
+        hh = float(match.group("hh"))
+        mm = float(match.group("mm"))
+        ss = float(match.group("ss"))
+        return (hh*15.0 + mm*15.0/60.0 + ss*15.0/3600.0)
+
+    @staticmethod
+    def _parse_dec(s):
+        """Parse the Dec. string "dd mm" to degree [-90, 90]"""
+        pattern = re.compile(r"(?P<sign>[-+]?)(?P<dd>\d+)\s+(?P<mm>\d+)")
+        match = pattern.match(s)
+        if match.group("sign") == "-":
+            sign = -1.0
+        else:
+            sign = 1.0
+        dd = float(match.group("dd"))
+        mm = float(match.group("mm"))
+        return sign * (dd + mm/60.0)
+
+    @staticmethod
+    def _parse_size(s):
+        """Parse the SNR angular size string.
+
+        Returns
+        -------
+        major : float
+        minor : float
+        flag : str
+            (diameter, diameter) of the SNR if approximately circular;
+            (major axis, minor axis) if SNR is elongated.
+            All values are in unit [ degree ].
+            Possible flag: "", "?" (uncertain)
+        """
+        if s.endswith("?"):
+            flag = "?"
+            s = s.rstrip("?")
+        else:
+            flag = ""
+        try:
+            major, minor = map(float, s.split("x"))
+        except ValueError:
+            major = minor = float(s)
+        return (major, minor, flag)
+
+    @staticmethod
+    def _parse_shape(s):
+        """Parse the SNR shape (a.k.a. type) string.
+
+        Returns
+        -------
+        shape : str
+        flag : str
+            Possible shapes are "S" (shell), "F" (filled-center),
+            "C" (composite), or None (very uncertain);
+            Possible flag: "", "?" (uncertain)
+        """
+        flag = ""
+        if s.endswith("?"):
+            flag += "?"
+            s = s.rstrip("?")
+        if s != "":
+            shape = s
+        else:
+            shape = None
+        return (shape, flag)
+
+    @staticmethod
+    def _parse_flux(s):
+        """Parse the flux density string.
+
+        Returns
+        -------
+        flux : float
+        flag : str
+            Flux density [ Jy ] at 1GHz, None if the value is uncertain.
+            Possible flag: "", "?", ">", ">?"
+        """
+        flag = ""
+        if s.startswith(">"):
+            flag += ">"
+            s = s.lstrip(">")
+        if s.endswith("?"):
+            flag += "?"
+            s = s.rstrip("?")
+        try:
+            flux = float(s)
+        except ValueError:
+            flux = None
+        return (flux, flag)
+
+    @staticmethod
+    def _parse_specindex(s):
+        """Parse the spectral index string.
+
+        Returns
+        -------
+        specindex : float
+        flag : str
+            Spectral index, None if the value is uncertain.
+            Possible flag: "", "?", "varies"
+        """
+        if s == "varies":
+            specindex = None
+            flag = "varies"
+        elif s.endswith("?"):
+            flag = "?"
+            s = s.rstrip("?")
+            try:
+                specindex = float(s)
+            except ValueError:
+                specindex = None
+        else:
+            specindex = float(s)
+            flag = ""
+        return (specindex, flag)
+
+    @staticmethod
+    def _parse_othernames(s):
+        """Parse the other names string to a list of names."""
+        s = s.strip()
+        if s:
+            return s.split(",")
+        else:
+            return []
+
+    @property
+    def name(self):
+        pattern = "G{glon:05.1f}{glat:+05.1f}"
+        return pattern.format(**self.data)
+
+    @property
+    def othernames(self):
+        return self.data["othernames"]
+
+    @property
+    def glon(self):
+        return self.data["glon"]
+
+    @property
+    def glat(self):
+        return self.data["glat"]
+
+    @property
+    def ra(self):
+        return self.data["ra"]
+
+    @property
+    def dec(self):
+        return self.data["dec"]
+
+    @property
+    def size(self):
+        return self.data["size"]
+
+    @property
+    def shape(self):
+        return self.data["shape"]
+
+    @property
+    def flux(self):
+        return self.data["flux"]
+
+    @property
+    def specindex(self):
+        return self.data["specindex"]
+
+    @property
+    def data_flat(self):
+        """Get the data with tuple items flattened for easier CSV process"""
+        data = OrderedDict([
+            ("name", self.name),
+            ("glon", self.glon),
+            ("glat", self.glat),
+            ("ra", self.ra),
+            ("dec", self.dec),
+            ("size_major", self.size[0]),
+            ("size_minor", self.size[1]),
+            ("size_flag", self.size[2]),
+            ("shape", self.shape[0]),
+            ("shape_flag", self.shape[1]),
+            ("flux", self.flux[0]),
+            ("flux_flag", self.flux[1]),
+            ("specindex", self.specindex[0]),
+            ("specindex_flag", self.specindex[1]),
+            ("othernames", self.othernames),
+        ])
+        return data
+
+
+def retrieve_snr_data_green(url):
+    """Retrieve D. A. Green's Galactic SNRs catalog and parse the HTML
+    contents to extract the catalog data.
+
+    Parameters
+    ----------
+    url : str
+        URL to the D. A. Green's SNRs catalog summary data page,
+        can also be the path to the local HTML file.
+
+    Returns
+    -------
+    snrdata : list[str]
+        A string list with each line representing the information of
+        one SNR object.
+
+        Data string format:
+        - Column 1, 2: Galactic longitude (l) and latitude (b)
+        - Column 3-5: R.A. J2000 (hh mm ss)
+        - Column 6, 7: Dec. J2000 (dd mm)
+        - Column 8: Size [ arcmin ], `r` if circular, `Mxm` if elliptical;
+                    may also contains a "?"
+        - Column 9: Type (e.g., S, C, S?, C?)
+        - Column 10: Flux density at 1 GHz [ Jy ]
+        - Column 11: Spectral index (may contains "?" or be "varies")
+        - Column 12: Other name(s), separated by ","
+    """
+    # Strip the beginning "file://" if presents
+    url = re.sub(r"^file://", "", url)
+    logger.info("Retrieve Galactic SNRs catalog from: {0}".format(url))
+    if os.path.exists(url):
+        # A local HTML file
+        html = open(url).read()
+    else:
+        # Remote web page
+        r = requests.get(url)
+        r.raise_for_status()
+        html = r.text
+    logger.info("Parse the HTML contents ...")
+    soup = bs4.BeautifulSoup(html, "html.parser")
+    snrdata_tag = soup.body.pre
+    snrdata_str = [ch.strip() if isinstance(ch, bs4.element.NavigableString)
+                   else ch.string.strip()
+                   for ch in snrdata_tag.children]
+    # Remove the header row
+    del snrdata_str[0]
+    # Strip the last data row
+    snrdata_str[-1] = re.sub(r"[-\s]*$", "", snrdata_str[-1])
+    # The remaining SNR data string list should be even-length, since
+    # every two items are the Galactic coordinate and other information.
+    if len(snrdata_str) % 2 != 0:
+        raise ValueError("length of the parsed SNR data str list is ODD")
+    # Concatenate every two items corresponding to the same SNR object
+    snrdata_str2 = [" ".join(x) for x in zip(snrdata_str[0::2],
+                                             snrdata_str[1::2])]
+    logger.info("Done parse the HTML contents: "
+                "got {0} SNR objects".format(len(snrdata_str2)))
+    return snrdata_str2
-- 
cgit v1.2.2