Spaces:
Sleeping
Sleeping
rt
Browse files- multi_language.py +17 -13
multi_language.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import os
|
2 |
import functools
|
3 |
-
import
|
4 |
import pickle
|
5 |
import time
|
6 |
|
@@ -79,22 +79,26 @@ def lru_file_cache(maxsize=128, ttl=None, filename=None):
|
|
79 |
return decorator_function
|
80 |
|
81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
def extract_chinese_characters(file_path):
|
|
|
84 |
with open(file_path, 'r', encoding='utf-8') as f:
|
85 |
content = f.read()
|
86 |
-
|
87 |
-
|
88 |
-
for
|
89 |
-
if
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
chinese_characters.append(sentence)
|
96 |
-
sentence = {'file':file_path, 'begin':-1, 'end':-1, 'word': ""}
|
97 |
-
return chinese_characters
|
98 |
|
99 |
def extract_chinese_characters_from_directory(directory_path):
|
100 |
chinese_characters = []
|
|
|
1 |
import os
|
2 |
import functools
|
3 |
+
import re
|
4 |
import pickle
|
5 |
import time
|
6 |
|
|
|
79 |
return decorator_function
|
80 |
|
81 |
|
82 |
+
def contains_chinese(string):
|
83 |
+
"""
|
84 |
+
Returns True if the given string contains Chinese characters, False otherwise.
|
85 |
+
"""
|
86 |
+
chinese_regex = re.compile(u'[\u4e00-\u9fff]+')
|
87 |
+
return chinese_regex.search(string) is not None
|
88 |
|
89 |
def extract_chinese_characters(file_path):
|
90 |
+
syntax = []
|
91 |
with open(file_path, 'r', encoding='utf-8') as f:
|
92 |
content = f.read()
|
93 |
+
import ast
|
94 |
+
root = ast.parse(content)
|
95 |
+
for node in ast.walk(root):
|
96 |
+
if isinstance(node, ast.Name):
|
97 |
+
if contains_chinese(node.id):
|
98 |
+
print(node.id)
|
99 |
+
syntax.append(node)
|
100 |
+
|
101 |
+
return syntax
|
|
|
|
|
|
|
102 |
|
103 |
def extract_chinese_characters_from_directory(directory_path):
|
104 |
chinese_characters = []
|