aboutsummaryrefslogtreecommitdiffstats
path: root/97suifangqa/apps/utils/xpinyin/__init__.py
blob: 291c10a2381354c54a85991a333d695566c56881 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# -*- coding: utf-8 -*-
import os.path


class Pinyin(object):
    """translate chinese hanzi to pinyin by python, inspired by flyerhzm’s
    `chinese\_pinyin`_ gem

    usage
    -----
    ::
        In [1]: from xpinyin import Pinyin
        In [2]: p = Pinyin()
        In [3]: p.get_pinyin(u"上海")
        Out[3]: 'shang-hai'
        In [4]: p.get_initials(u"上")
        Out[4]: 'S'
    请输入utf8编码汉字
    .. _chinese\_pinyin: https://github.com/flyerhzm/chinese_pinyin
    """

    data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             'Mandarin.dat')

    def __init__(self, data_path=data_path):
        self.dict = {}
        for line in open(data_path):
            k, v = line.split('\t')
            self.dict[k] = v

    def get_pinyin(self, chars=u'你好', splitter=u'-'):
        result = []
        flag = 1
        for char in chars:
            key = "%X" % ord(char)
            try:
                result.append(self.dict[key].split(" ")[0].strip()[:-1]
                              .lower())
                flag = 1
            except KeyError:
                if flag:
                    result.append(char)
                else:
                    result[-1] += char
                flag = 0

        return splitter.join(result)

    # def get_initials(self, char=u'你'):
    #     try:
    #         return self.dict["%X" % ord(char)].split(" ")[0][0]
    #     except KeyError:
    #         return char

    def get_initial(self, chars=u'你好'):
        _str = u""
        ch = chars[0]
        key = "%X" % ord(ch)
        try:
            _str += self.dict[key].split(" ")[0][0]
        except KeyError:
            _str += ch
        return _str

    def get_py(self, chars=u'你好'):
        _str = u""
        for ch in chars:
            _str += self.get_initial(ch)
        return _str