blob: 291c10a2381354c54a85991a333d695566c56881 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
|
# -*- coding: utf-8 -*-
import os.path
class Pinyin(object):
"""translate chinese hanzi to pinyin by python, inspired by flyerhzm’s
`chinese\_pinyin`_ gem
usage
-----
::
In [1]: from xpinyin import Pinyin
In [2]: p = Pinyin()
In [3]: p.get_pinyin(u"上海")
Out[3]: 'shang-hai'
In [4]: p.get_initials(u"上")
Out[4]: 'S'
请输入utf8编码汉字
.. _chinese\_pinyin: https://github.com/flyerhzm/chinese_pinyin
"""
data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
'Mandarin.dat')
def __init__(self, data_path=data_path):
self.dict = {}
for line in open(data_path):
k, v = line.split('\t')
self.dict[k] = v
def get_pinyin(self, chars=u'你好', splitter=u'-'):
result = []
flag = 1
for char in chars:
key = "%X" % ord(char)
try:
result.append(self.dict[key].split(" ")[0].strip()[:-1]
.lower())
flag = 1
except KeyError:
if flag:
result.append(char)
else:
result[-1] += char
flag = 0
return splitter.join(result)
# def get_initials(self, char=u'你'):
# try:
# return self.dict["%X" % ord(char)].split(" ")[0][0]
# except KeyError:
# return char
def get_initial(self, chars=u'你好'):
_str = u""
ch = chars[0]
key = "%X" % ord(ch)
try:
_str += self.dict[key].split(" ")[0][0]
except KeyError:
_str += ch
return _str
def get_py(self, chars=u'你好'):
_str = u""
for ch in chars:
_str += self.get_initial(ch)
return _str
|