Spaces:
Runtime error
Runtime error
Spelling
Browse files
app.py
CHANGED
@@ -7,6 +7,53 @@ tdk1 = pipeline('text-generation', model='notexist/tttf', tokenizer=tokenizer1)
|
|
7 |
tokenizer2 = AutoTokenizer.from_pretrained("notexist/ttte")
|
8 |
tdk2 = pipeline('text-generation', model='notexist/ttte', tokenizer=tokenizer2)
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
def predict(name, sl, topk, topp):
|
11 |
if name == "":
|
12 |
x1 = tdk1(f"<|endoftext|>",
|
@@ -28,9 +75,9 @@ def predict(name, sl, topk, topp):
|
|
28 |
)[0]["generated_text"]
|
29 |
|
30 |
if "[TEXT]" not in x2:
|
31 |
-
return x1[len(f"<|endoftext|>"):]
|
32 |
else:
|
33 |
-
return x1[len(f"<|endoftext|>"):]+"\n\n"+x2[len(f"<|endoftext|>{new_name}\n\n"):].replace("[TEXT]", " "+new_name+" ")
|
34 |
else:
|
35 |
x1 = tdk1(f"<|endoftext|>{name}\n\n",
|
36 |
do_sample=True,
|
@@ -50,9 +97,9 @@ def predict(name, sl, topk, topp):
|
|
50 |
)[0]["generated_text"]
|
51 |
|
52 |
if "[TEXT]" not in x2:
|
53 |
-
return x1[len(f"<|endoftext|>{name}\n\n"):]
|
54 |
else:
|
55 |
-
return x1[len(f"<|endoftext|>{name}\n\n"):]+"\n\n"+x2[len(f"<|endoftext|>{name}\n\n"):].replace("[TEXT]", " "+name+" ")
|
56 |
|
57 |
|
58 |
|
|
|
7 |
tokenizer2 = AutoTokenizer.from_pretrained("notexist/ttte")
|
8 |
tdk2 = pipeline('text-generation', model='notexist/ttte', tokenizer=tokenizer2)
|
9 |
|
10 |
+
|
11 |
+
# Speller borrowed from http://turkceddi.blogspot.com/
|
12 |
+
lower_vowel = {'a', 'â', 'e', 'ê', 'ı', 'î', 'i', 'o', 'ô', 'ö', 'u', 'û', 'ü'}
|
13 |
+
SPELL_SLICER = (('001000', 5), ('000100', 5), ('01000', 4), ('00100', 4), ('00010', 4), ('1000', 3), ('0100', 3),
|
14 |
+
('0011', 3), ('0010', 3), ('011', 2), ('010', 2), ('100', 2), ('10', 1), ('11', 1))
|
15 |
+
|
16 |
+
|
17 |
+
def to_lower(word):
|
18 |
+
tolower_text = (word.replace('İ', 'i'))
|
19 |
+
tolower_text = (tolower_text.replace('I', 'ı'))
|
20 |
+
return tolower_text.lower()
|
21 |
+
|
22 |
+
|
23 |
+
def wordtoten(word: str):
|
24 |
+
wtt = ''
|
25 |
+
|
26 |
+
for ch in word:
|
27 |
+
if ch in lower_vowel:
|
28 |
+
wtt += '1'
|
29 |
+
else:
|
30 |
+
wtt += '0'
|
31 |
+
return wtt
|
32 |
+
|
33 |
+
def spellword(word: str):
|
34 |
+
word = to_lower(word)
|
35 |
+
syllable_list = []
|
36 |
+
tenword = wordtoten(word)
|
37 |
+
len_spell = tenword.count('1')
|
38 |
+
|
39 |
+
for i in range(tenword.count('1')):
|
40 |
+
for x, y in SPELL_SLICER:
|
41 |
+
if tenword.startswith(x):
|
42 |
+
syllable_list.append(word[:y])
|
43 |
+
word = word[y:]
|
44 |
+
tenword = tenword[y:]
|
45 |
+
break
|
46 |
+
|
47 |
+
if tenword == '0':
|
48 |
+
syllable_list[-1] = syllable_list[-1] + word
|
49 |
+
elif word:
|
50 |
+
syllable_list.append(word)
|
51 |
+
|
52 |
+
if len(syllable_list) != len_spell:
|
53 |
+
return False
|
54 |
+
|
55 |
+
return "$".join(syllable_list)
|
56 |
+
|
57 |
def predict(name, sl, topk, topp):
|
58 |
if name == "":
|
59 |
x1 = tdk1(f"<|endoftext|>",
|
|
|
75 |
)[0]["generated_text"]
|
76 |
|
77 |
if "[TEXT]" not in x2:
|
78 |
+
return x1[len(f"<|endoftext|>"):]+"\n\n"+spellword(new_name)
|
79 |
else:
|
80 |
+
return x1[len(f"<|endoftext|>"):]+"\n\n"+spellword(new_name)+"\n\n"+x2[len(f"<|endoftext|>{new_name}\n\n"):].replace("[TEXT]", " "+new_name+" ")
|
81 |
else:
|
82 |
x1 = tdk1(f"<|endoftext|>{name}\n\n",
|
83 |
do_sample=True,
|
|
|
97 |
)[0]["generated_text"]
|
98 |
|
99 |
if "[TEXT]" not in x2:
|
100 |
+
return x1[len(f"<|endoftext|>{name}\n\n"):]+"\n\n"+spellword(name)
|
101 |
else:
|
102 |
+
return x1[len(f"<|endoftext|>{name}\n\n"):]+"\n\n"+spellword(name)+"\n\n"+x2[len(f"<|endoftext|>{name}\n\n"):].replace("[TEXT]", " "+name+" ")
|
103 |
|
104 |
|
105 |
|