aboutsummaryrefslogtreecommitdiffstats
path: root/97suifangqa/apps/utils/xpinyin/__init__.py
diff options
context:
space:
mode:
authorAlvin Li <liweitianux@gmail.com>2013-08-13 14:13:24 +0800
committerAlvin Li <liweitianux@gmail.com>2013-08-13 14:13:24 +0800
commit9636d4a6767f49384d5c386bc3f1142c88b90613 (patch)
tree3a70f6d9e4be1791d36c87cc7cbfd1d5aa2b39dd /97suifangqa/apps/utils/xpinyin/__init__.py
parent9383d9a8a5988d071766c3d08a5c946e9c5b02ae (diff)
download97dev-9636d4a6767f49384d5c386bc3f1142c88b90613.tar.bz2
cloned from 'bitbucket', 2013/08/13
Diffstat (limited to '97suifangqa/apps/utils/xpinyin/__init__.py')
-rw-r--r--97suifangqa/apps/utils/xpinyin/__init__.py70
1 files changed, 70 insertions, 0 deletions
diff --git a/97suifangqa/apps/utils/xpinyin/__init__.py b/97suifangqa/apps/utils/xpinyin/__init__.py
new file mode 100644
index 0000000..291c10a
--- /dev/null
+++ b/97suifangqa/apps/utils/xpinyin/__init__.py
@@ -0,0 +1,70 @@
+# -*- coding: utf-8 -*-
+import os.path
+
+
+class Pinyin(object):
+ """translate chinese hanzi to pinyin by python, inspired by flyerhzm’s
+ `chinese\_pinyin`_ gem
+
+ usage
+ -----
+ ::
+ In [1]: from xpinyin import Pinyin
+ In [2]: p = Pinyin()
+ In [3]: p.get_pinyin(u"上海")
+ Out[3]: 'shang-hai'
+ In [4]: p.get_initials(u"上")
+ Out[4]: 'S'
+ 请输入utf8编码汉字
+ .. _chinese\_pinyin: https://github.com/flyerhzm/chinese_pinyin
+ """
+
+ data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+ 'Mandarin.dat')
+
+ def __init__(self, data_path=data_path):
+ self.dict = {}
+ for line in open(data_path):
+ k, v = line.split('\t')
+ self.dict[k] = v
+
+ def get_pinyin(self, chars=u'你好', splitter=u'-'):
+ result = []
+ flag = 1
+ for char in chars:
+ key = "%X" % ord(char)
+ try:
+ result.append(self.dict[key].split(" ")[0].strip()[:-1]
+ .lower())
+ flag = 1
+ except KeyError:
+ if flag:
+ result.append(char)
+ else:
+ result[-1] += char
+ flag = 0
+
+ return splitter.join(result)
+
+ # def get_initials(self, char=u'你'):
+ # try:
+ # return self.dict["%X" % ord(char)].split(" ")[0][0]
+ # except KeyError:
+ # return char
+
+ def get_initial(self, chars=u'你好'):
+ _str = u""
+ ch = chars[0]
+ key = "%X" % ord(ch)
+ try:
+ _str += self.dict[key].split(" ")[0][0]
+ except KeyError:
+ _str += ch
+ return _str
+
+ def get_py(self, chars=u'你好'):
+ _str = u""
+ for ch in chars:
+ _str += self.get_initial(ch)
+ return _str
+