# Copyright (c) 2016 Weitian LI <liweitianux@live.com>
# MIT license


"""
Retrieve the Galactic SNRs catalog data by parsing the web by /D. A. Green/:

    http://www.mrao.cam.ac.uk/surveys/snrs/
    http://www.mrao.cam.ac.uk/surveys/snrs/snrs.data.html
"""


import os
import re
import logging
from collections import OrderedDict

import requests
import bs4


logger = logging.getLogger(__name__)


class SNRDataGreen:
    """Class for Green's Galactic SNRs catalog data parse and manipulation.

    The available SNR data:
    - glon, glat : Galactic longitude, latitude (rounded to 0.1deg) [degree]
    - ra, dec : Right ascension, Declination (J2000) [degree]
    - size : angular size [degree]: (diameter, diameter) or (major, minor)
    - type : shape type: (shape, flag) with shapes of "S" (shell),
             "F" (filled-center), "C" (composite), and flag "?" if uncertain.
    - flux : flux density at 1 GHz [Jy]
    - specindex : Spectral index of the integrated radio emission
    - other_names : other name(s) commonly used for the SNR

    For more detailed description about the SNR catalog, refer to:
    http://www.mrao.cam.ac.uk/surveys/snrs/snrs.info.html
    """
    def __init__(self, dstr):
        self.data = self.parse(dstr)

    @classmethod
    def parse(cls, dstr):
        """Parse the SNR data string, for one SNR object.

        Parameters
        ----------
        dstr : str
            String containing the SNR data

        Returns
        -------
        data : dict
            A data dictionary containing the parsed SNR data
        """
        pattern = re.compile((
            r"^\s*(?P<glon>\d+\.\d+)\s+(?P<glat>[-+]?\d+\.\d+)\s+"
            r"(?P<ra>\d{2}\s+\d{2}\s+\d{2})\s+(?P<dec>[-+]?\d{2}\s+\d{2})\s+"
            r"(?P<size>[0-9.]+\??|[0-9.]+x[0-9.]+\??|\?)\s+"
            r"(?P<shape>[SCF?]{1,2})\s+"
            r"(?P<flux>\>?\d+\.\d+\??|\>?\d+\??|\?)\s+"
            r"(?P<specindex>\d+\.\d+\??|\d+\??|\?|varies)\s*"
            r"(?P<othernames>.*)$"))
        match = pattern.match(dstr)
        data = OrderedDict([
            ("glon", float(match.group("glon"))),
            ("glat", float(match.group("glat"))),
            ("ra", cls._parse_ra(match.group("ra"))),
            ("dec", cls._parse_dec(match.group("dec"))),
            ("size", cls._parse_size(match.group("size"))),
            ("shape", cls._parse_shape(match.group("shape"))),
            ("flux", cls._parse_flux(match.group("flux"))),
            ("specindex", cls._parse_specindex(match.group("specindex"))),
            ("othernames", cls._parse_othernames(match.group("othernames"))),
        ])
        return data

    @staticmethod
    def _parse_ra(s):
        """Parse the R.A. string "hh mm ss" to degree [0, 360)"""
        pattern = re.compile(r"(?P<hh>\d+)\s+(?P<mm>\d+)\s+(?P<ss>\d+)")
        match = pattern.match(s)
        hh = float(match.group("hh"))
        mm = float(match.group("mm"))
        ss = float(match.group("ss"))
        return (hh*15.0 + mm*15.0/60.0 + ss*15.0/3600.0)

    @staticmethod
    def _parse_dec(s):
        """Parse the Dec. string "dd mm" to degree [-90, 90]"""
        pattern = re.compile(r"(?P<sign>[-+]?)(?P<dd>\d+)\s+(?P<mm>\d+)")
        match = pattern.match(s)
        if match.group("sign") == "-":
            sign = -1.0
        else:
            sign = 1.0
        dd = float(match.group("dd"))
        mm = float(match.group("mm"))
        return sign * (dd + mm/60.0)

    @staticmethod
    def _parse_size(s):
        """Parse the SNR angular size string.

        Returns
        -------
        major : float
        minor : float
        flag : str
            (diameter, diameter) of the SNR if approximately circular;
            (major axis, minor axis) if SNR is elongated.
            All values are in unit [ degree ].
            Possible flag: "", "?" (uncertain)
        """
        if s.endswith("?"):
            flag = "?"
            s = s.rstrip("?")
        else:
            flag = ""
        try:
            major, minor = map(float, s.split("x"))
        except ValueError:
            major = minor = float(s)
        return (major, minor, flag)

    @staticmethod
    def _parse_shape(s):
        """Parse the SNR shape (a.k.a. type) string.

        Returns
        -------
        shape : str
        flag : str
            Possible shapes are "S" (shell), "F" (filled-center),
            "C" (composite), or None (very uncertain);
            Possible flag: "", "?" (uncertain)
        """
        flag = ""
        if s.endswith("?"):
            flag += "?"
            s = s.rstrip("?")
        if s != "":
            shape = s
        else:
            shape = None
        return (shape, flag)

    @staticmethod
    def _parse_flux(s):
        """Parse the flux density string.

        Returns
        -------
        flux : float
        flag : str
            Flux density [ Jy ] at 1GHz, None if the value is uncertain.
            Possible flag: "", "?", ">", ">?"
        """
        flag = ""
        if s.startswith(">"):
            flag += ">"
            s = s.lstrip(">")
        if s.endswith("?"):
            flag += "?"
            s = s.rstrip("?")
        try:
            flux = float(s)
        except ValueError:
            flux = None
        return (flux, flag)

    @staticmethod
    def _parse_specindex(s):
        """Parse the spectral index string.

        Returns
        -------
        specindex : float
        flag : str
            Spectral index, None if the value is uncertain.
            Possible flag: "", "?", "varies"
        """
        if s == "varies":
            specindex = None
            flag = "varies"
        elif s.endswith("?"):
            flag = "?"
            s = s.rstrip("?")
            try:
                specindex = float(s)
            except ValueError:
                specindex = None
        else:
            specindex = float(s)
            flag = ""
        return (specindex, flag)

    @staticmethod
    def _parse_othernames(s):
        """Parse the other names string to a list of names."""
        s = s.strip()
        if s:
            return s.split(",")
        else:
            return []

    @property
    def name(self):
        pattern = "G{glon:05.1f}{glat:+05.1f}"
        return pattern.format(**self.data)

    @property
    def othernames(self):
        return self.data["othernames"]

    @property
    def glon(self):
        return self.data["glon"]

    @property
    def glat(self):
        return self.data["glat"]

    @property
    def ra(self):
        return self.data["ra"]

    @property
    def dec(self):
        return self.data["dec"]

    @property
    def size(self):
        return self.data["size"]

    @property
    def shape(self):
        return self.data["shape"]

    @property
    def flux(self):
        return self.data["flux"]

    @property
    def specindex(self):
        return self.data["specindex"]

    @property
    def data_flat(self):
        """Get the data with tuple items flattened for easier CSV process"""
        data = OrderedDict([
            ("name", self.name),
            ("glon", self.glon),
            ("glat", self.glat),
            ("ra", self.ra),
            ("dec", self.dec),
            ("size_major", self.size[0]),
            ("size_minor", self.size[1]),
            ("size_flag", self.size[2]),
            ("shape", self.shape[0]),
            ("shape_flag", self.shape[1]),
            ("flux", self.flux[0]),
            ("flux_flag", self.flux[1]),
            ("specindex", self.specindex[0]),
            ("specindex_flag", self.specindex[1]),
            ("othernames", self.othernames),
        ])
        return data


def retrieve_snr_data_green(url):
    """Retrieve D. A. Green's Galactic SNRs catalog and parse the HTML
    contents to extract the catalog data.

    Parameters
    ----------
    url : str
        URL to the D. A. Green's SNRs catalog summary data page,
        can also be the path to the local HTML file.

    Returns
    -------
    snrdata : list[str]
        A string list with each line representing the information of
        one SNR object.

        Data string format:
        - Column 1, 2: Galactic longitude (l) and latitude (b)
        - Column 3-5: R.A. J2000 (hh mm ss)
        - Column 6, 7: Dec. J2000 (dd mm)
        - Column 8: Size [ arcmin ], `r` if circular, `Mxm` if elliptical;
                    may also contains a "?"
        - Column 9: Type (e.g., S, C, S?, C?)
        - Column 10: Flux density at 1 GHz [ Jy ]
        - Column 11: Spectral index (may contains "?" or be "varies")
        - Column 12: Other name(s), separated by ","
    """
    # Strip the beginning "file://" if presents
    url = re.sub(r"^file://", "", url)
    logger.info("Retrieve Galactic SNRs catalog from: {0}".format(url))
    if os.path.exists(url):
        # A local HTML file
        html = open(url).read()
    else:
        # Remote web page
        r = requests.get(url)
        r.raise_for_status()
        html = r.text
    logger.info("Parse the HTML contents ...")
    soup = bs4.BeautifulSoup(html, "html.parser")
    snrdata_tag = soup.body.pre
    snrdata_str = [ch.strip() if isinstance(ch, bs4.element.NavigableString)
                   else ch.string.strip()
                   for ch in snrdata_tag.children]
    # Remove the header row
    del snrdata_str[0]
    # Strip the last data row
    snrdata_str[-1] = re.sub(r"[-\s]*$", "", snrdata_str[-1])
    # The remaining SNR data string list should be even-length, since
    # every two items are the Galactic coordinate and other information.
    if len(snrdata_str) % 2 != 0:
        raise ValueError("length of the parsed SNR data str list is ODD")
    # Concatenate every two items corresponding to the same SNR object
    snrdata_str2 = [" ".join(x) for x in zip(snrdata_str[0::2],
                                             snrdata_str[1::2])]
    logger.info("Done parse the HTML contents: "
                "got {0} SNR objects".format(len(snrdata_str2)))
    return snrdata_str2