shakhovak commited on
Commit
25e5c2b
1 Parent(s): d14b8fb
app.py CHANGED
@@ -15,18 +15,27 @@ class RUAccent:
15
  self.workdir = os.getcwd()
16
 
17
  def load(self, custom_accent=None, custom_omographs=None):
18
-
19
  if custom_omographs is None:
20
  custom_omographs = {}
21
 
22
  if custom_accent is None:
23
  custom_accent = {}
24
 
25
- self.omographs = json.load(open(join_path(self.workdir, "dictionaries", "file_omo.json"), encoding='utf-8'))
 
 
 
 
 
26
 
27
  self.omographs.update(custom_omographs)
28
 
29
- self.accents = json.load(open(join_path(self.workdir, "dictionaries", "file_norm.json"), encoding='utf-8'))
 
 
 
 
 
30
 
31
  self.accents.update(custom_accent)
32
 
@@ -59,7 +68,9 @@ class RUAccent:
59
  founded_omographs = self._process_omographs(text)
60
  omographs_list.extend(founded_omographs)
61
 
62
- processed_text, unknown_words = self._process_accent(text, founded_omographs)
 
 
63
  unknown_list.extend(unknown_words)
64
 
65
  processed_text = " ".join(processed_text)
@@ -67,7 +78,9 @@ class RUAccent:
67
 
68
  accented_sentence.append(processed_text)
69
 
70
- omographs_list = [f"{key}: {value}" for elem in omographs_list for key, value in elem.items()]
 
 
71
  return accented_sentence, list(set(omographs_list)), list(set(unknown_list))
72
 
73
  def _process_yo(self, text):
@@ -84,9 +97,7 @@ class RUAccent:
84
  for i, word in enumerate(splitted_text):
85
  variants = self.omographs.get(word)
86
  if variants:
87
- founded_omographs.append(
88
- {word: self.omographs[word]["acc_variants"]}
89
- )
90
 
91
  return founded_omographs
92
 
@@ -115,11 +126,32 @@ class RUAccent:
115
  text = text.replace(" " + char, char)
116
  return text
117
 
118
- # # Example usage:
119
- # ru_accent = RUAccent()
120
- # ru_accent.load()
121
- #
122
- # text_to_process = "В этом замке совершенно нет ни одного замка. Наверно я не буду ругаться с нига нига нига из-за этого сучонка"
123
- # processed_text = ru_accent.process_all(text_to_process)
124
- #
125
- # print(processed_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  self.workdir = os.getcwd()
16
 
17
  def load(self, custom_accent=None, custom_omographs=None):
 
18
  if custom_omographs is None:
19
  custom_omographs = {}
20
 
21
  if custom_accent is None:
22
  custom_accent = {}
23
 
24
+ self.omographs = json.load(
25
+ open(
26
+ join_path(self.workdir, "dictionaries", "file_omo.json"),
27
+ encoding="utf-8",
28
+ )
29
+ )
30
 
31
  self.omographs.update(custom_omographs)
32
 
33
+ self.accents = json.load(
34
+ open(
35
+ join_path(self.workdir, "dictionaries", "file_norm.json"),
36
+ encoding="utf-8",
37
+ )
38
+ )
39
 
40
  self.accents.update(custom_accent)
41
 
 
68
  founded_omographs = self._process_omographs(text)
69
  omographs_list.extend(founded_omographs)
70
 
71
+ processed_text, unknown_words = self._process_accent(
72
+ text, founded_omographs
73
+ )
74
  unknown_list.extend(unknown_words)
75
 
76
  processed_text = " ".join(processed_text)
 
78
 
79
  accented_sentence.append(processed_text)
80
 
81
+ omographs_list = [
82
+ f"{key}: {value}" for elem in omographs_list for key, value in elem.items()
83
+ ]
84
  return accented_sentence, list(set(omographs_list)), list(set(unknown_list))
85
 
86
  def _process_yo(self, text):
 
97
  for i, word in enumerate(splitted_text):
98
  variants = self.omographs.get(word)
99
  if variants:
100
+ founded_omographs.append({word: self.omographs[word]["acc_variants"]})
 
 
101
 
102
  return founded_omographs
103
 
 
126
  text = text.replace(" " + char, char)
127
  return text
128
 
129
+
130
+ ru_accent = RUAccent()
131
+ ru_accent.load()
132
+
133
+
134
+ title = "Демо для модели расстановки ударения на русском языке"
135
+
136
+
137
+ description = "Для расстановки ударения необходимо ввести текст в поле ниже. Алгоритм обработает текст и выдаст текст с ударениями, а также 2 списка: омографы, если они есть в тексте и слов, не найденных в словаре."
138
+ examples = ["Я иду в замок повесить замок."]
139
+ outputs = [
140
+ gr.Textbox(label="Обработанный текст"),
141
+ gr.Textbox(label="Омографы"),
142
+ gr.Textbox(label="Нет в словаре"),
143
+ ]
144
+
145
+ theme = "huggingface"
146
+
147
+ interface = gr.Interface(
148
+ fn=ru_accent.process_all,
149
+ inputs=gr.Textbox(label="текст для расстановки ударения"),
150
+ outputs=outputs,
151
+ examples=examples,
152
+ title=title,
153
+ description=description,
154
+ )
155
+
156
+ if __name__ == "__main__":
157
+ interface.launch(debug=True, share=True)
dictionaries/file_norm.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebf4187d80e9702f94253d81a48fa3a14d484e2befaeb939fdca99eb6c42f1d5
3
+ size 178087540
dictionaries/file_omo.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba98b20c885cee2f54da731bb068df53fa6960bd3c8ef36417d8f6ffc90acbff
3
+ size 4240115
requirements.txt DELETED
@@ -1,10 +0,0 @@
1
- blinker==1.7.0
2
- click==8.1.7
3
- colorama==0.4.6
4
- Flask==3.0.0
5
- importlib-metadata==7.0.0
6
- itsdangerous==2.1.2
7
- Jinja2==3.1.2
8
- MarkupSafe==2.1.3
9
- Werkzeug==3.0.1
10
- zipp==3.17.0