Spaces:
Sleeping
Sleeping
DWizard
commited on
Commit
·
0a976db
1
Parent(s):
3bfa7ce
spell_check bug fix, add dict
Browse filesFormer-commit-id: bf17ca2c7eee77189b866cb1681fc33ea4c226d9
- SRT.py +3 -3
- finetune_data/dict_enzh.csv +15 -1
- finetune_data/dict_freq.txt +6 -6
SRT.py
CHANGED
@@ -418,14 +418,14 @@ class SRT_script():
|
|
418 |
for i in range(len(ready_words)):
|
419 |
word = ready_words[i]
|
420 |
[real_word, pos] = self.get_real_word(word)
|
421 |
-
if not dict.check(word[:pos]):
|
422 |
suggest = term_spellDict.suggest(real_word)
|
423 |
-
if suggest and enchant.utils.levenshtein(
|
424 |
|
425 |
# with open("dislog.log","a") as log:
|
426 |
# if not os.path.exists("dislog.log"):
|
427 |
# log.write("word \t suggest \t levenshtein \n")
|
428 |
-
logging.info(
|
429 |
#print(word + ":" + suggest[0] + ":---:levenshtein:" + str(enchant.utils.levenshtein(word, suggest[0])))
|
430 |
new_word = word.replace(word[:pos],suggest[0])
|
431 |
else:
|
|
|
418 |
for i in range(len(ready_words)):
|
419 |
word = ready_words[i]
|
420 |
[real_word, pos] = self.get_real_word(word)
|
421 |
+
if not dict.check(word[:pos]) and not term_spellDict.check(real_word):
|
422 |
suggest = term_spellDict.suggest(real_word)
|
423 |
+
if suggest and enchant.utils.levenshtein(real_word, suggest[0]) < (len(real_word)+len(suggest[0]))/4: # relax spell check
|
424 |
|
425 |
# with open("dislog.log","a") as log:
|
426 |
# if not os.path.exists("dislog.log"):
|
427 |
# log.write("word \t suggest \t levenshtein \n")
|
428 |
+
logging.info(real_word + "\t" + suggest[0] + "\t" + str(enchant.utils.levenshtein(real_word, suggest[0]))+'\n')
|
429 |
#print(word + ":" + suggest[0] + ":---:levenshtein:" + str(enchant.utils.levenshtein(word, suggest[0])))
|
430 |
new_word = word.replace(word[:pos],suggest[0])
|
431 |
else:
|
finetune_data/dict_enzh.csv
CHANGED
@@ -179,4 +179,18 @@ stalker,追猎
|
|
179 |
disruptor,自爆球
|
180 |
zerg,虫族
|
181 |
protross,神族
|
182 |
-
terran,人族
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
disruptor,自爆球
|
180 |
zerg,虫族
|
181 |
protross,神族
|
182 |
+
terran,人族
|
183 |
+
gas,气矿
|
184 |
+
cannon rush,野炮台
|
185 |
+
supply block,卡人口
|
186 |
+
macro,运营
|
187 |
+
natural expansion,开矿
|
188 |
+
roach warren,蟑螂虫巢
|
189 |
+
proxy,野
|
190 |
+
cyber core,控制芯核
|
191 |
+
prismatic alignment,充能射线
|
192 |
+
flooding,余钱
|
193 |
+
chrono boost,星空加速
|
194 |
+
cheese,狗
|
195 |
+
supply,人口
|
196 |
+
war prism,棱镜
|
finetune_data/dict_freq.txt
CHANGED
@@ -23,8 +23,8 @@ gateway
|
|
23 |
warpgate
|
24 |
immortal
|
25 |
zealot
|
26 |
-
nydus
|
27 |
-
|
28 |
hydralisk
|
29 |
grooved spines
|
30 |
muscular augments
|
@@ -173,12 +173,12 @@ concussive shells
|
|
173 |
stalker
|
174 |
disruptor
|
175 |
zerg
|
176 |
-
|
177 |
terran
|
178 |
starcraft
|
179 |
TvT
|
180 |
Maxpax
|
181 |
-
|
182 |
PvP
|
183 |
ZvZ
|
184 |
TvZ
|
@@ -187,7 +187,7 @@ ZvP
|
|
187 |
PvZ
|
188 |
PvT
|
189 |
ZvT
|
190 |
-
Florencio
|
191 |
cybercore
|
192 |
nest
|
193 |
-
follow-up
|
|
|
|
23 |
warpgate
|
24 |
immortal
|
25 |
zealot
|
26 |
+
nydus
|
27 |
+
worm
|
28 |
hydralisk
|
29 |
grooved spines
|
30 |
muscular augments
|
|
|
173 |
stalker
|
174 |
disruptor
|
175 |
zerg
|
176 |
+
protoss
|
177 |
terran
|
178 |
starcraft
|
179 |
TvT
|
180 |
Maxpax
|
181 |
+
ShowTime
|
182 |
PvP
|
183 |
ZvZ
|
184 |
TvZ
|
|
|
187 |
PvZ
|
188 |
PvT
|
189 |
ZvT
|
|
|
190 |
cybercore
|
191 |
nest
|
192 |
+
follow-up
|
193 |
+
robo
|