Spaces:
Runtime error
Runtime error
tonic
commited on
Commit
·
4889640
1
Parent(s):
f8ee061
language list and prompt engineering ;-)
Browse files- app.py +19 -13
- lang_list.py +255 -0
app.py
CHANGED
@@ -5,6 +5,7 @@ from surya.ocr import run_ocr
|
|
5 |
from surya.model.detection.segformer import load_model as load_det_model, load_processor as load_det_processor
|
6 |
from surya.model.recognition.model import load_model as load_rec_model
|
7 |
from surya.model.recognition.processor import load_processor as load_rec_processor
|
|
|
8 |
from gradio_client import Client
|
9 |
from dotenv import load_dotenv
|
10 |
import requests
|
@@ -16,12 +17,14 @@ import re
|
|
16 |
|
17 |
title = "# Welcome to AyaTonic"
|
18 |
description = "Learn a New Language With Aya"
|
19 |
-
|
20 |
# Load environment variables
|
21 |
load_dotenv()
|
22 |
COHERE_API_KEY = os.getenv('CO_API_KEY')
|
23 |
SEAMLESSM4T = os.getenv('SEAMLESSM4T')
|
24 |
|
|
|
|
|
|
|
25 |
|
26 |
# Regular expression patterns for each color
|
27 |
patterns = {
|
@@ -66,14 +69,14 @@ class TaggedPhraseExtractor:
|
|
66 |
co = cohere.Client(COHERE_API_KEY)
|
67 |
audio_client = Client(SEAMLESSM4T)
|
68 |
|
69 |
-
def process_audio_to_text(audio_path):
|
70 |
"""
|
71 |
Convert audio input to text using the Gradio client.
|
72 |
"""
|
73 |
result = audio_client.predict(
|
74 |
audio_path,
|
75 |
-
|
76 |
-
|
77 |
api_name="/s2tt"
|
78 |
)
|
79 |
print("Audio Result: ", result)
|
@@ -85,8 +88,8 @@ def process_text_to_audio(text, target_language="English"):
|
|
85 |
"""
|
86 |
result = audio_client.predict(
|
87 |
text,
|
88 |
-
"English",
|
89 |
target_language,
|
|
|
90 |
api_name="/t2st"
|
91 |
)
|
92 |
return result['audio'] # Adjust based on the actual response
|
@@ -141,20 +144,22 @@ def process_input(image=None, file=None, audio=None, text=""):
|
|
141 |
audio_text = process_audio_to_text(audio)
|
142 |
final_text += "\n" + audio_text
|
143 |
|
|
|
|
|
144 |
response = co.generate(
|
145 |
model='c4ai-aya',
|
146 |
-
prompt=
|
147 |
max_tokens=1024,
|
148 |
temperature=0.5
|
149 |
)
|
|
|
150 |
generated_text = response.generations[0].text
|
151 |
print("Generated Text: ", generated_text)
|
152 |
-
|
153 |
-
# Process generated text with command-nightly model
|
154 |
response = co.generate(
|
155 |
model='command-nightly',
|
156 |
-
prompt=
|
157 |
-
max_tokens=
|
158 |
temperature=0.5
|
159 |
)
|
160 |
processed_text = response.generations[0].text
|
@@ -162,7 +167,6 @@ def process_input(image=None, file=None, audio=None, text=""):
|
|
162 |
audio_output = process_text_to_audio(processed_text)
|
163 |
|
164 |
return processed_text, audio_output
|
165 |
-
|
166 |
# Define Gradio interface
|
167 |
iface = gr.Interface(
|
168 |
fn=process_input,
|
@@ -187,7 +191,7 @@ if __name__ == "__main__":
|
|
187 |
# co = cohere.Client('yhA228YGeZSl1ctten8LQxw2dky2nngHetXFjV2Q') # This is your trial API key
|
188 |
# response = co.generate(
|
189 |
# model='c4ai-aya',
|
190 |
-
# prompt='एक यांत्रिक घड़ी दिन के समय को प्रदान करने
|
191 |
# max_tokens=3674,
|
192 |
# temperature=0.9,
|
193 |
# k=0,
|
@@ -224,7 +228,9 @@ iface = gr.Interface(
|
|
224 |
gr.Image(type="pil", label="Camera Input"),
|
225 |
gr.File(label="File Upload"),
|
226 |
gr.Audio(sources="microphone", type="filepath", label="Mic Input"),
|
227 |
-
gr.Textbox(lines=2, label="Text Input")
|
|
|
|
|
228 |
],
|
229 |
outputs=[
|
230 |
gr.RichTextbox(label="Processed Text"),
|
|
|
5 |
from surya.model.detection.segformer import load_model as load_det_model, load_processor as load_det_processor
|
6 |
from surya.model.recognition.model import load_model as load_rec_model
|
7 |
from surya.model.recognition.processor import load_processor as load_rec_processor
|
8 |
+
from lang_list import LANGUAGE_NAME_TO_CODE, TEXT_SOURCE_LANGUAGE_NAMES, S2ST_TARGET_LANGUAGE_NAMES
|
9 |
from gradio_client import Client
|
10 |
from dotenv import load_dotenv
|
11 |
import requests
|
|
|
17 |
|
18 |
title = "# Welcome to AyaTonic"
|
19 |
description = "Learn a New Language With Aya"
|
|
|
20 |
# Load environment variables
|
21 |
load_dotenv()
|
22 |
COHERE_API_KEY = os.getenv('CO_API_KEY')
|
23 |
SEAMLESSM4T = os.getenv('SEAMLESSM4T')
|
24 |
|
25 |
+
inputlanguage = ""
|
26 |
+
producetext = "\n\nProduce a complete expositional blog post in {target_language} based on the above :"
|
27 |
+
formatinputstring = "\n\nthe above text is a learning aid. you must use rich text format to rewrite the above and add 1 . a red color tags for nouns 2. a blue color tag for verbs 3. a green color tag for adjectives and adverbs:"
|
28 |
|
29 |
# Regular expression patterns for each color
|
30 |
patterns = {
|
|
|
69 |
co = cohere.Client(COHERE_API_KEY)
|
70 |
audio_client = Client(SEAMLESSM4T)
|
71 |
|
72 |
+
def process_audio_to_text(audio_path, inputlanguage="English"):
|
73 |
"""
|
74 |
Convert audio input to text using the Gradio client.
|
75 |
"""
|
76 |
result = audio_client.predict(
|
77 |
audio_path,
|
78 |
+
inputlanguage,
|
79 |
+
inputlanguage,
|
80 |
api_name="/s2tt"
|
81 |
)
|
82 |
print("Audio Result: ", result)
|
|
|
88 |
"""
|
89 |
result = audio_client.predict(
|
90 |
text,
|
|
|
91 |
target_language,
|
92 |
+
target_language, # could be make a variation for learning content
|
93 |
api_name="/t2st"
|
94 |
)
|
95 |
return result['audio'] # Adjust based on the actual response
|
|
|
144 |
audio_text = process_audio_to_text(audio)
|
145 |
final_text += "\n" + audio_text
|
146 |
|
147 |
+
final_text_with_producetext = final_text + producetext
|
148 |
+
|
149 |
response = co.generate(
|
150 |
model='c4ai-aya',
|
151 |
+
prompt=final_text_with_producetext,
|
152 |
max_tokens=1024,
|
153 |
temperature=0.5
|
154 |
)
|
155 |
+
# add graceful handling for errors (overflow)
|
156 |
generated_text = response.generations[0].text
|
157 |
print("Generated Text: ", generated_text)
|
158 |
+
generated_text_with_format = generated_text + "\n" + formatinputstring
|
|
|
159 |
response = co.generate(
|
160 |
model='command-nightly',
|
161 |
+
prompt=generated_text_with_format,
|
162 |
+
max_tokens=4000,
|
163 |
temperature=0.5
|
164 |
)
|
165 |
processed_text = response.generations[0].text
|
|
|
167 |
audio_output = process_text_to_audio(processed_text)
|
168 |
|
169 |
return processed_text, audio_output
|
|
|
170 |
# Define Gradio interface
|
171 |
iface = gr.Interface(
|
172 |
fn=process_input,
|
|
|
191 |
# co = cohere.Client('yhA228YGeZSl1ctten8LQxw2dky2nngHetXFjV2Q') # This is your trial API key
|
192 |
# response = co.generate(
|
193 |
# model='c4ai-aya',
|
194 |
+
# prompt='एक यांत्रिक घड़ी दिन के समय को प्रदान करने ��े लिए एक गैर-इलेक्ट्रॉनिक तंत्र का उपयोग करती है। एक मुख्य स्प्रिंग का उपयोग यांत्रिक तंत्र को ऊर्जा संग्रहीत करने के लिए किया जाता है। एक यांत्रिक घड़ी में दांतों का एक कुंडल होता है जो धीरे-धीरे मुख्य स्प्रिंग से संचालित होता है। दांतों के कुंडल को एक यांत्रिक तंत्र में स्थानांतरित करने के लिए पहियों की एक श्रृंखला का उपयोग किया जाता है जो हाथों को घड़ी के चेहरे पर दाईं ओर ले जाता है। घड़ी के तंत्र को स्थिर करने और यह सुनिश्चित करने के लिए कि हाथ सही दिशा में घूमते हैं, एक कंपन का उपयोग किया जाता है। ',
|
195 |
# max_tokens=3674,
|
196 |
# temperature=0.9,
|
197 |
# k=0,
|
|
|
228 |
gr.Image(type="pil", label="Camera Input"),
|
229 |
gr.File(label="File Upload"),
|
230 |
gr.Audio(sources="microphone", type="filepath", label="Mic Input"),
|
231 |
+
gr.Textbox(lines=2, label="Text Input"),
|
232 |
+
gr.Dropdown(choices=TEXT_SOURCE_LANGUAGE_NAMES, label="Input Language"),
|
233 |
+
gr.Dropdown(choices=TEXT_SOURCE_LANGUAGE_NAMES, label="Target Language")
|
234 |
],
|
235 |
outputs=[
|
236 |
gr.RichTextbox(label="Processed Text"),
|
lang_list.py
ADDED
@@ -0,0 +1,255 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Language dict from https://huggingface.co/spaces/facebook/seamless-m4t-v2-large/blob/main/lang_list.py
|
2 |
+
language_code_to_name = {
|
3 |
+
"afr": "Afrikaans",
|
4 |
+
"amh": "Amharic",
|
5 |
+
"arb": "Modern Standard Arabic",
|
6 |
+
"ary": "Moroccan Arabic",
|
7 |
+
"arz": "Egyptian Arabic",
|
8 |
+
"asm": "Assamese",
|
9 |
+
"ast": "Asturian",
|
10 |
+
"azj": "North Azerbaijani",
|
11 |
+
"bel": "Belarusian",
|
12 |
+
"ben": "Bengali",
|
13 |
+
"bos": "Bosnian",
|
14 |
+
"bul": "Bulgarian",
|
15 |
+
"cat": "Catalan",
|
16 |
+
"ceb": "Cebuano",
|
17 |
+
"ces": "Czech",
|
18 |
+
"ckb": "Central Kurdish",
|
19 |
+
"cmn": "Mandarin Chinese",
|
20 |
+
"cym": "Welsh",
|
21 |
+
"dan": "Danish",
|
22 |
+
"deu": "German",
|
23 |
+
"ell": "Greek",
|
24 |
+
"eng": "English",
|
25 |
+
"est": "Estonian",
|
26 |
+
"eus": "Basque",
|
27 |
+
"fin": "Finnish",
|
28 |
+
"fra": "French",
|
29 |
+
"gaz": "West Central Oromo",
|
30 |
+
"gle": "Irish",
|
31 |
+
"glg": "Galician",
|
32 |
+
"guj": "Gujarati",
|
33 |
+
"heb": "Hebrew",
|
34 |
+
"hin": "Hindi",
|
35 |
+
"hrv": "Croatian",
|
36 |
+
"hun": "Hungarian",
|
37 |
+
"hye": "Armenian",
|
38 |
+
"ibo": "Igbo",
|
39 |
+
"ind": "Indonesian",
|
40 |
+
"isl": "Icelandic",
|
41 |
+
"ita": "Italian",
|
42 |
+
"jav": "Javanese",
|
43 |
+
"jpn": "Japanese",
|
44 |
+
"kam": "Kamba",
|
45 |
+
"kan": "Kannada",
|
46 |
+
"kat": "Georgian",
|
47 |
+
"kaz": "Kazakh",
|
48 |
+
"kea": "Kabuverdianu",
|
49 |
+
"khk": "Halh Mongolian",
|
50 |
+
"khm": "Khmer",
|
51 |
+
"kir": "Kyrgyz",
|
52 |
+
"kor": "Korean",
|
53 |
+
"lao": "Lao",
|
54 |
+
"lit": "Lithuanian",
|
55 |
+
"ltz": "Luxembourgish",
|
56 |
+
"lug": "Ganda",
|
57 |
+
"luo": "Luo",
|
58 |
+
"lvs": "Standard Latvian",
|
59 |
+
"mai": "Maithili",
|
60 |
+
"mal": "Malayalam",
|
61 |
+
"mar": "Marathi",
|
62 |
+
"mkd": "Macedonian",
|
63 |
+
"mlt": "Maltese",
|
64 |
+
"mni": "Meitei",
|
65 |
+
"mya": "Burmese",
|
66 |
+
"nld": "Dutch",
|
67 |
+
"nno": "Norwegian Nynorsk",
|
68 |
+
"nob": "Norwegian Bokm\u00e5l",
|
69 |
+
"npi": "Nepali",
|
70 |
+
"nya": "Nyanja",
|
71 |
+
"oci": "Occitan",
|
72 |
+
"ory": "Odia",
|
73 |
+
"pan": "Punjabi",
|
74 |
+
"pbt": "Southern Pashto",
|
75 |
+
"pes": "Western Persian",
|
76 |
+
"pol": "Polish",
|
77 |
+
"por": "Portuguese",
|
78 |
+
"ron": "Romanian",
|
79 |
+
"rus": "Russian",
|
80 |
+
"slk": "Slovak",
|
81 |
+
"slv": "Slovenian",
|
82 |
+
"sna": "Shona",
|
83 |
+
"snd": "Sindhi",
|
84 |
+
"som": "Somali",
|
85 |
+
"spa": "Spanish",
|
86 |
+
"srp": "Serbian",
|
87 |
+
"swe": "Swedish",
|
88 |
+
"swh": "Swahili",
|
89 |
+
"tam": "Tamil",
|
90 |
+
"tel": "Telugu",
|
91 |
+
"tgk": "Tajik",
|
92 |
+
"tgl": "Tagalog",
|
93 |
+
"tha": "Thai",
|
94 |
+
"tur": "Turkish",
|
95 |
+
"ukr": "Ukrainian",
|
96 |
+
"urd": "Urdu",
|
97 |
+
"uzn": "Northern Uzbek",
|
98 |
+
"vie": "Vietnamese",
|
99 |
+
"xho": "Xhosa",
|
100 |
+
"yor": "Yoruba",
|
101 |
+
"yue": "Cantonese",
|
102 |
+
"zlm": "Colloquial Malay",
|
103 |
+
"zsm": "Standard Malay",
|
104 |
+
"zul": "Zulu",
|
105 |
+
}
|
106 |
+
LANGUAGE_NAME_TO_CODE = {v: k for k, v in language_code_to_name.items()}
|
107 |
+
|
108 |
+
# Source langs: S2ST / S2TT / ASR don't need source lang
|
109 |
+
# T2TT / T2ST use this
|
110 |
+
text_source_language_codes = [
|
111 |
+
"afr",
|
112 |
+
"amh",
|
113 |
+
"arb",
|
114 |
+
"ary",
|
115 |
+
"arz",
|
116 |
+
"asm",
|
117 |
+
"azj",
|
118 |
+
"bel",
|
119 |
+
"ben",
|
120 |
+
"bos",
|
121 |
+
"bul",
|
122 |
+
"cat",
|
123 |
+
"ceb",
|
124 |
+
"ces",
|
125 |
+
"ckb",
|
126 |
+
"cmn",
|
127 |
+
"cym",
|
128 |
+
"dan",
|
129 |
+
"deu",
|
130 |
+
"ell",
|
131 |
+
"eng",
|
132 |
+
"est",
|
133 |
+
"eus",
|
134 |
+
"fin",
|
135 |
+
"fra",
|
136 |
+
"gaz",
|
137 |
+
"gle",
|
138 |
+
"glg",
|
139 |
+
"guj",
|
140 |
+
"heb",
|
141 |
+
"hin",
|
142 |
+
"hrv",
|
143 |
+
"hun",
|
144 |
+
"hye",
|
145 |
+
"ibo",
|
146 |
+
"ind",
|
147 |
+
"isl",
|
148 |
+
"ita",
|
149 |
+
"jav",
|
150 |
+
"jpn",
|
151 |
+
"kan",
|
152 |
+
"kat",
|
153 |
+
"kaz",
|
154 |
+
"khk",
|
155 |
+
"khm",
|
156 |
+
"kir",
|
157 |
+
"kor",
|
158 |
+
"lao",
|
159 |
+
"lit",
|
160 |
+
"lug",
|
161 |
+
"luo",
|
162 |
+
"lvs",
|
163 |
+
"mai",
|
164 |
+
"mal",
|
165 |
+
"mar",
|
166 |
+
"mkd",
|
167 |
+
"mlt",
|
168 |
+
"mni",
|
169 |
+
"mya",
|
170 |
+
"nld",
|
171 |
+
"nno",
|
172 |
+
"nob",
|
173 |
+
"npi",
|
174 |
+
"nya",
|
175 |
+
"ory",
|
176 |
+
"pan",
|
177 |
+
"pbt",
|
178 |
+
"pes",
|
179 |
+
"pol",
|
180 |
+
"por",
|
181 |
+
"ron",
|
182 |
+
"rus",
|
183 |
+
"slk",
|
184 |
+
"slv",
|
185 |
+
"sna",
|
186 |
+
"snd",
|
187 |
+
"som",
|
188 |
+
"spa",
|
189 |
+
"srp",
|
190 |
+
"swe",
|
191 |
+
"swh",
|
192 |
+
"tam",
|
193 |
+
"tel",
|
194 |
+
"tgk",
|
195 |
+
"tgl",
|
196 |
+
"tha",
|
197 |
+
"tur",
|
198 |
+
"ukr",
|
199 |
+
"urd",
|
200 |
+
"uzn",
|
201 |
+
"vie",
|
202 |
+
"yor",
|
203 |
+
"yue",
|
204 |
+
"zsm",
|
205 |
+
"zul",
|
206 |
+
]
|
207 |
+
TEXT_SOURCE_LANGUAGE_NAMES = sorted([language_code_to_name[code] for code in text_source_language_codes])
|
208 |
+
|
209 |
+
# Target langs:
|
210 |
+
# S2ST / T2ST
|
211 |
+
s2st_target_language_codes = [
|
212 |
+
"eng",
|
213 |
+
"arb",
|
214 |
+
"ben",
|
215 |
+
"cat",
|
216 |
+
"ces",
|
217 |
+
"cmn",
|
218 |
+
"cym",
|
219 |
+
"dan",
|
220 |
+
"deu",
|
221 |
+
"est",
|
222 |
+
"fin",
|
223 |
+
"fra",
|
224 |
+
"hin",
|
225 |
+
"ind",
|
226 |
+
"ita",
|
227 |
+
"jpn",
|
228 |
+
"kor",
|
229 |
+
"mlt",
|
230 |
+
"nld",
|
231 |
+
"pes",
|
232 |
+
"pol",
|
233 |
+
"por",
|
234 |
+
"ron",
|
235 |
+
"rus",
|
236 |
+
"slk",
|
237 |
+
"spa",
|
238 |
+
"swe",
|
239 |
+
"swh",
|
240 |
+
"tel",
|
241 |
+
"tgl",
|
242 |
+
"tha",
|
243 |
+
"tur",
|
244 |
+
"ukr",
|
245 |
+
"urd",
|
246 |
+
"uzn",
|
247 |
+
"vie",
|
248 |
+
]
|
249 |
+
S2ST_TARGET_LANGUAGE_NAMES = sorted([language_code_to_name[code] for code in s2st_target_language_codes])
|
250 |
+
T2ST_TARGET_LANGUAGE_NAMES = S2ST_TARGET_LANGUAGE_NAMES
|
251 |
+
|
252 |
+
# S2TT / T2TT / ASR
|
253 |
+
S2TT_TARGET_LANGUAGE_NAMES = TEXT_SOURCE_LANGUAGE_NAMES
|
254 |
+
T2TT_TARGET_LANGUAGE_NAMES = TEXT_SOURCE_LANGUAGE_NAMES
|
255 |
+
ASR_TARGET_LANGUAGE_NAMES = TEXT_SOURCE_LANGUAGE_NAMES
|