diff options
author | Alvin Li <liweitianux@gmail.com> | 2013-08-13 14:13:24 +0800 |
---|---|---|
committer | Alvin Li <liweitianux@gmail.com> | 2013-08-13 14:13:24 +0800 |
commit | 9636d4a6767f49384d5c386bc3f1142c88b90613 (patch) | |
tree | 3a70f6d9e4be1791d36c87cc7cbfd1d5aa2b39dd /97suifangqa/apps/utils/xpinyin/__init__.py | |
parent | 9383d9a8a5988d071766c3d08a5c946e9c5b02ae (diff) | |
download | 97dev-9636d4a6767f49384d5c386bc3f1142c88b90613.tar.bz2 |
cloned from 'bitbucket', 2013/08/13
Diffstat (limited to '97suifangqa/apps/utils/xpinyin/__init__.py')
-rw-r--r-- | 97suifangqa/apps/utils/xpinyin/__init__.py | 70 |
1 files changed, 70 insertions, 0 deletions
diff --git a/97suifangqa/apps/utils/xpinyin/__init__.py b/97suifangqa/apps/utils/xpinyin/__init__.py new file mode 100644 index 0000000..291c10a --- /dev/null +++ b/97suifangqa/apps/utils/xpinyin/__init__.py @@ -0,0 +1,70 @@ +# -*- coding: utf-8 -*- +import os.path + + +class Pinyin(object): + """translate chinese hanzi to pinyin by python, inspired by flyerhzm’s + `chinese\_pinyin`_ gem + + usage + ----- + :: + In [1]: from xpinyin import Pinyin + In [2]: p = Pinyin() + In [3]: p.get_pinyin(u"上海") + Out[3]: 'shang-hai' + In [4]: p.get_initials(u"上") + Out[4]: 'S' + 请输入utf8编码汉字 + .. _chinese\_pinyin: https://github.com/flyerhzm/chinese_pinyin + """ + + data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), + 'Mandarin.dat') + + def __init__(self, data_path=data_path): + self.dict = {} + for line in open(data_path): + k, v = line.split('\t') + self.dict[k] = v + + def get_pinyin(self, chars=u'你好', splitter=u'-'): + result = [] + flag = 1 + for char in chars: + key = "%X" % ord(char) + try: + result.append(self.dict[key].split(" ")[0].strip()[:-1] + .lower()) + flag = 1 + except KeyError: + if flag: + result.append(char) + else: + result[-1] += char + flag = 0 + + return splitter.join(result) + + # def get_initials(self, char=u'你'): + # try: + # return self.dict["%X" % ord(char)].split(" ")[0][0] + # except KeyError: + # return char + + def get_initial(self, chars=u'你好'): + _str = u"" + ch = chars[0] + key = "%X" % ord(ch) + try: + _str += self.dict[key].split(" ")[0][0] + except KeyError: + _str += ch + return _str + + def get_py(self, chars=u'你好'): + _str = u"" + for ch in chars: + _str += self.get_initial(ch) + return _str + |