akoksal commited on
Commit
77c5165
·
1 Parent(s): abf954c
Files changed (1) hide show
  1. app.py +51 -4
app.py CHANGED
@@ -7,6 +7,53 @@ tdk1 = pipeline('text-generation', model='notexist/tttf', tokenizer=tokenizer1)
7
  tokenizer2 = AutoTokenizer.from_pretrained("notexist/ttte")
8
  tdk2 = pipeline('text-generation', model='notexist/ttte', tokenizer=tokenizer2)
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  def predict(name, sl, topk, topp):
11
  if name == "":
12
  x1 = tdk1(f"<|endoftext|>",
@@ -28,9 +75,9 @@ def predict(name, sl, topk, topp):
28
  )[0]["generated_text"]
29
 
30
  if "[TEXT]" not in x2:
31
- return x1[len(f"<|endoftext|>"):]
32
  else:
33
- return x1[len(f"<|endoftext|>"):]+"\n\n"+x2[len(f"<|endoftext|>{new_name}\n\n"):].replace("[TEXT]", " "+new_name+" ")
34
  else:
35
  x1 = tdk1(f"<|endoftext|>{name}\n\n",
36
  do_sample=True,
@@ -50,9 +97,9 @@ def predict(name, sl, topk, topp):
50
  )[0]["generated_text"]
51
 
52
  if "[TEXT]" not in x2:
53
- return x1[len(f"<|endoftext|>{name}\n\n"):]
54
  else:
55
- return x1[len(f"<|endoftext|>{name}\n\n"):]+"\n\n"+x2[len(f"<|endoftext|>{name}\n\n"):].replace("[TEXT]", " "+name+" ")
56
 
57
 
58
 
 
7
  tokenizer2 = AutoTokenizer.from_pretrained("notexist/ttte")
8
  tdk2 = pipeline('text-generation', model='notexist/ttte', tokenizer=tokenizer2)
9
 
10
+
11
+ # Speller borrowed from http://turkceddi.blogspot.com/
12
+ lower_vowel = {'a', 'â', 'e', 'ê', 'ı', 'î', 'i', 'o', 'ô', 'ö', 'u', 'û', 'ü'}
13
+ SPELL_SLICER = (('001000', 5), ('000100', 5), ('01000', 4), ('00100', 4), ('00010', 4), ('1000', 3), ('0100', 3),
14
+ ('0011', 3), ('0010', 3), ('011', 2), ('010', 2), ('100', 2), ('10', 1), ('11', 1))
15
+
16
+
17
+ def to_lower(word):
18
+ tolower_text = (word.replace('İ', 'i'))
19
+ tolower_text = (tolower_text.replace('I', 'ı'))
20
+ return tolower_text.lower()
21
+
22
+
23
+ def wordtoten(word: str):
24
+ wtt = ''
25
+
26
+ for ch in word:
27
+ if ch in lower_vowel:
28
+ wtt += '1'
29
+ else:
30
+ wtt += '0'
31
+ return wtt
32
+
33
+ def spellword(word: str):
34
+ word = to_lower(word)
35
+ syllable_list = []
36
+ tenword = wordtoten(word)
37
+ len_spell = tenword.count('1')
38
+
39
+ for i in range(tenword.count('1')):
40
+ for x, y in SPELL_SLICER:
41
+ if tenword.startswith(x):
42
+ syllable_list.append(word[:y])
43
+ word = word[y:]
44
+ tenword = tenword[y:]
45
+ break
46
+
47
+ if tenword == '0':
48
+ syllable_list[-1] = syllable_list[-1] + word
49
+ elif word:
50
+ syllable_list.append(word)
51
+
52
+ if len(syllable_list) != len_spell:
53
+ return False
54
+
55
+ return "$".join(syllable_list)
56
+
57
  def predict(name, sl, topk, topp):
58
  if name == "":
59
  x1 = tdk1(f"<|endoftext|>",
 
75
  )[0]["generated_text"]
76
 
77
  if "[TEXT]" not in x2:
78
+ return x1[len(f"<|endoftext|>"):]+"\n\n"+spellword(new_name)
79
  else:
80
+ return x1[len(f"<|endoftext|>"):]+"\n\n"+spellword(new_name)+"\n\n"+x2[len(f"<|endoftext|>{new_name}\n\n"):].replace("[TEXT]", " "+new_name+" ")
81
  else:
82
  x1 = tdk1(f"<|endoftext|>{name}\n\n",
83
  do_sample=True,
 
97
  )[0]["generated_text"]
98
 
99
  if "[TEXT]" not in x2:
100
+ return x1[len(f"<|endoftext|>{name}\n\n"):]+"\n\n"+spellword(name)
101
  else:
102
+ return x1[len(f"<|endoftext|>{name}\n\n"):]+"\n\n"+spellword(name)+"\n\n"+x2[len(f"<|endoftext|>{name}\n\n"):].replace("[TEXT]", " "+name+" ")
103
 
104
 
105