File size: 625 Bytes
751936e f4973d4 751936e f4973d4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
def is_chinese(uchar):
"""
https://github.com/fxsjy/jieba/blob/master/jieba/__init__.py#L48
re.compile("([\u4E00-\u9FD5]+)", re.U)
"""
return u'\u4e00' <= uchar <= u'\u9fa5'
def has_chinese(text):
""" contains Chinese characters """
return any(is_chinese(ch) for ch in text)
def get_zh_count(text):
return sum([is_chinese(uchar) for uchar in text])
def is_all_chinese(text):
return all(is_chinese(char) for char in text)
def get_digit_count(text):
digit_count = 0
for char in text:
if char in "0123456789":
digit_count += 1
return digit_count
|