Spaces:
Running
Running
kwabs22
commited on
Commit
•
ba25e9b
1
Parent(s):
8703cf4
Removed googletrans options to test
Browse files- app.py +61 -61
- requirements.txt +0 -2
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
-
from googletrans import Translator
|
2 |
-
from googletrans import LANGUAGES
|
3 |
import spacy
|
4 |
import gradio as gr
|
5 |
import nltk
|
@@ -85,7 +85,7 @@ nlp_ja = spacy.load("ja_core_news_sm")
|
|
85 |
nlp_zh = spacy.load("zh_core_web_sm")
|
86 |
nlp_en_syllable = spacy.load("en_core_web_sm")
|
87 |
nlp_en_syllable.add_pipe("syllables", after="tagger") #https://spacy.io/universe/project/spacy_syllables/
|
88 |
-
langdropdown_choices = [f"{code}: {name}" for code, name in LANGUAGES.items()]
|
89 |
|
90 |
nlp = spacy.load('en_core_web_sm')
|
91 |
#translator = Translator()
|
@@ -160,7 +160,7 @@ def BatchWordChunk(sentence):
|
|
160 |
|
161 |
# Translate from English to French
|
162 |
|
163 |
-
langdest = gr.Dropdown(choices=langdropdown_choices, label="Choose Language", value="de: german") #["af", "de", "es", "ko", "ja", "zh-cn"]
|
164 |
|
165 |
ChunkModeDrop = gr.Dropdown(choices=["Chunks", "Reverse", "Three Word Chunks", "Spelling Chunks"], label="Choose Chunk Type", value="Chunks")
|
166 |
|
@@ -248,8 +248,8 @@ def merge_lines(roman_file, w4w_file, full_mean_file, macaronic_file):
|
|
248 |
|
249 |
return "\n".join(merged_lines)
|
250 |
|
251 |
-
TTSLangOptions = gr.Dropdown(choices=langdropdown_choices, value="en: english", label="choose the language of the srt/text accent") #["en", "de", "es", "ja", "ko", "zh-cn"]
|
252 |
-
TTSLangOptions2 = gr.Dropdown(choices=langdropdown_choices, value="en: english", label="choose the language of the srt/text accent")
|
253 |
|
254 |
def TTSforListeningPractice(text, language = "en", Repeat10x = False):
|
255 |
if Repeat10x:
|
@@ -1777,32 +1777,32 @@ def lingualinkassist(text, language):
|
|
1777 |
|
1778 |
#----------------------------------------------------------------------------------------------------------------------
|
1779 |
|
1780 |
-
def w4wsidebysidereadergen(text, langdest):
|
1781 |
-
|
1782 |
-
|
1783 |
-
|
1784 |
-
|
1785 |
-
|
1786 |
-
|
1787 |
-
|
1788 |
-
|
1789 |
-
|
1790 |
-
|
1791 |
-
|
1792 |
-
|
1793 |
-
|
1794 |
-
|
1795 |
-
|
1796 |
-
|
1797 |
-
|
1798 |
-
|
1799 |
-
|
1800 |
-
|
1801 |
-
|
1802 |
-
|
1803 |
-
|
1804 |
-
|
1805 |
-
|
1806 |
|
1807 |
#https://huggingface.co/spaces/Geonmo/nllb-translation-demo/blob/main/app.py
|
1808 |
def nllbtranscload_models():
|
@@ -2110,33 +2110,33 @@ with gr.Blocks() as lliface: #theme=gr.themes.Glass(primary_hue='green', seconda
|
|
2110 |
gr.HTML('Memorisation by string comparison idea <br><br>Result of prompt chain starting with: Lets say I have the strings "red" and "ppalgan" how can I guess the second from the first from just spelling (eg. similar words and distance in the alphabet, ...), how can I use python to do this i.e. output of no matching letters, closest letter to r, then e, then d, a dictionary of letters that look similar eg. d and p, l and I a and d etc.')
|
2111 |
gr.Interface(fn=letterbased_guess_word, inputs=["text", "text"], outputs="text", description="letter based guess suggestions (one word to one word is the designed use case)")
|
2112 |
gr.HTML("Side by side reading creator (Google Translate) TODO - Roman output of Non roman characters")
|
2113 |
-
with gr.Group():
|
2114 |
-
|
2115 |
-
|
2116 |
-
|
2117 |
-
|
2118 |
-
|
2119 |
-
|
2120 |
-
|
2121 |
-
|
2122 |
-
|
2123 |
-
|
2124 |
-
|
2125 |
-
|
2126 |
-
|
2127 |
-
|
2128 |
-
|
2129 |
-
|
2130 |
-
|
2131 |
-
|
2132 |
-
|
2133 |
-
|
2134 |
-
|
2135 |
-
|
2136 |
-
|
2137 |
-
|
2138 |
-
|
2139 |
-
|
2140 |
gr.HTML("Side by side reading creator (NLLB-600M (+-3gb / setting has 400 tokens? as max length for inference optimisation?)- 200 languages vs 107 in googletrans) - <a href='https://github.com/facebookresearch/fairseq/tree/nllb'> -- Fairseq Github -- </a> | inspired by - <a href='https://huggingface.co/spaces/Geonmo/nllb-translation-demo'> -- Geonmo NLLB Demo -- </a> | <a href='https://huggingface.co/spaces/vutuka/nllb-vutuka-translation'> -- Vutuka demo -- </a>")
|
2141 |
with gr.Group():
|
2142 |
gr.HTML("Under Construction - generator and cpu based to beat gpu cost, cpu wait time and network dependency for local use")
|
@@ -2217,7 +2217,7 @@ with gr.Blocks() as lliface: #theme=gr.themes.Glass(primary_hue='green', seconda
|
|
2217 |
gr.Text("Text to Closed Class + Adjectives + Punctuation or Noun Verb + Punctuation ")
|
2218 |
with gr.Tab("Audio - Only English thoughts as practice"):
|
2219 |
gr.HTML("For Audio Most productive is real time recall of native (where your full reasoning ability will always be) <br><hr> Find Replace new lines of the foreign text with full stops or | to get per word translation")
|
2220 |
-
gr.Interface(fn=TTSforListeningPractice, inputs=["text", TTSLangOptions2], outputs="audio", description="Paste only english words in foreign order and then keep removing the words from this to practice as effectively")
|
2221 |
with gr.Tab("Speed through Imagery"):
|
2222 |
gr.HTML("<a href='https://chat.openai.com/g/g-bYMSVlb8y-lingua-link'> -- Lingua Link (Simple GPT for assistinng image creation) -- </a> <br>Use with placeholder generator tab below <br> Best for this is 2 nouns as one phrase i.e. nouns as adjectives and then you can a verb (1000 of those will take you far)")
|
2223 |
with gr.Accordion("More Details - conversation example", open=False):
|
@@ -2314,7 +2314,7 @@ with gr.Blocks() as lliface: #theme=gr.themes.Glass(primary_hue='green', seconda
|
|
2314 |
with gr.Row():
|
2315 |
with gr.Column(scale=1):
|
2316 |
gr.HTML("Listening - Songs - Chorus <br> Anticipation of the item to remember is how you learn lyrics that is why songs are easy as if you heard it 10 times already your capacity to anticipate the words is great <br><br> This is where TTS helps as you are ignoring all words except the words just before the actual <br> <b>Tiny Stories dataset is like a graded reader</b> <br>")
|
2317 |
-
gr.Interface(fn=TTSforListeningPractice, inputs=["text", TTSLangOptions, "checkbox"], outputs="audio", description="Paste chorus lyrics from below here and use TTS or make notes to save here (Or paste anything)")
|
2318 |
with gr.Accordion("TTS Spaces", open=False):
|
2319 |
TTSspaceoptions = gr.Dropdown(choices=["https://suno-bark.hf.space", "https://coqui-xtts.hf.space"], label="existing whisper spaces")
|
2320 |
TTSspaceoptionsbtn = gr.Button("Load a Image as prompt Space")
|
|
|
1 |
+
#from googletrans import Translator
|
2 |
+
#from googletrans import LANGUAGES
|
3 |
import spacy
|
4 |
import gradio as gr
|
5 |
import nltk
|
|
|
85 |
nlp_zh = spacy.load("zh_core_web_sm")
|
86 |
nlp_en_syllable = spacy.load("en_core_web_sm")
|
87 |
nlp_en_syllable.add_pipe("syllables", after="tagger") #https://spacy.io/universe/project/spacy_syllables/
|
88 |
+
#langdropdown_choices = [f"{code}: {name}" for code, name in LANGUAGES.items()]
|
89 |
|
90 |
nlp = spacy.load('en_core_web_sm')
|
91 |
#translator = Translator()
|
|
|
160 |
|
161 |
# Translate from English to French
|
162 |
|
163 |
+
#langdest = gr.Dropdown(choices=langdropdown_choices, label="Choose Language", value="de: german") #["af", "de", "es", "ko", "ja", "zh-cn"]
|
164 |
|
165 |
ChunkModeDrop = gr.Dropdown(choices=["Chunks", "Reverse", "Three Word Chunks", "Spelling Chunks"], label="Choose Chunk Type", value="Chunks")
|
166 |
|
|
|
248 |
|
249 |
return "\n".join(merged_lines)
|
250 |
|
251 |
+
#TTSLangOptions = gr.Dropdown(choices=langdropdown_choices, value="en: english", label="choose the language of the srt/text accent") #["en", "de", "es", "ja", "ko", "zh-cn"]
|
252 |
+
#TTSLangOptions2 = gr.Dropdown(choices=langdropdown_choices, value="en: english", label="choose the language of the srt/text accent")
|
253 |
|
254 |
def TTSforListeningPractice(text, language = "en", Repeat10x = False):
|
255 |
if Repeat10x:
|
|
|
1777 |
|
1778 |
#----------------------------------------------------------------------------------------------------------------------
|
1779 |
|
1780 |
+
# def w4wsidebysidereadergen(text, langdest):
|
1781 |
+
# #FrontRevSentChunk as reference
|
1782 |
+
# FinalOutput = ""
|
1783 |
+
# Translated = "FWNWO: \n"
|
1784 |
+
# words = text.split()
|
1785 |
+
# w4wsidebysidtranslator = Translator()
|
1786 |
+
# translatedFWO = w4wsidebysidtranslator.translate(text, dest=langdest[:2])
|
1787 |
+
# translatedNWO = w4wsidebysidtranslator.translate(words, dest=langdest[:2]) #src or dest
|
1788 |
+
# #print(translated)
|
1789 |
+
# #print(dir(translatedNWO[0]), "\n")
|
1790 |
+
# #FinalOutput += "\n" + translated.text
|
1791 |
+
# for obj in translatedNWO:
|
1792 |
+
# # print(f"Original Text: {obj.origin}")
|
1793 |
+
# # print(f"Translated Text: {obj.text}")
|
1794 |
+
# # print(f"Source Language: {obj.src}")
|
1795 |
+
# # print(f"Destination Language: {obj.dest}")
|
1796 |
+
# # print(f"Pronunciation: {obj.pronunciation}\n")
|
1797 |
+
# FinalOutput += obj.origin + f" ({obj.text}) "
|
1798 |
+
# Translated += obj.text + " "
|
1799 |
+
# speech = gTTS(text=FinalOutput, lang=langdest[:2], slow="False")
|
1800 |
+
# speech.save("CurrentSidebySideTTSFile.mp3")
|
1801 |
+
|
1802 |
+
# FinalOutput = "Side by Side Version: " + FinalOutput
|
1803 |
+
|
1804 |
+
# analysisPrompt = f"{ Translated } and \n\nFWFWO: \n{ translatedFWO.text } \n\nForeign Words Native Word Order and Foreign Word Order \nIf you had to make the notes on the word by word considerations to transform FWNWO to FWFWO what would that be? (A simple game idea where your response will be the rubrik to mark the players response against)"
|
1805 |
+
# return FinalOutput, Translated, "FWFWO: \n" + translatedFWO.text, "CurrentSidebySideTTSFile.mp3", analysisPrompt
|
1806 |
|
1807 |
#https://huggingface.co/spaces/Geonmo/nllb-translation-demo/blob/main/app.py
|
1808 |
def nllbtranscload_models():
|
|
|
2110 |
gr.HTML('Memorisation by string comparison idea <br><br>Result of prompt chain starting with: Lets say I have the strings "red" and "ppalgan" how can I guess the second from the first from just spelling (eg. similar words and distance in the alphabet, ...), how can I use python to do this i.e. output of no matching letters, closest letter to r, then e, then d, a dictionary of letters that look similar eg. d and p, l and I a and d etc.')
|
2111 |
gr.Interface(fn=letterbased_guess_word, inputs=["text", "text"], outputs="text", description="letter based guess suggestions (one word to one word is the designed use case)")
|
2112 |
gr.HTML("Side by side reading creator (Google Translate) TODO - Roman output of Non roman characters")
|
2113 |
+
# with gr.Group():
|
2114 |
+
# with gr.Row():
|
2115 |
+
# #gr.Interface(fn=w4wsidebysidereadergen, inputs=["text", w4wsidebysidelangdest], outputs=["text", "text", "text"], description="Side by side reading creator")
|
2116 |
+
# w4wsidebysideinput = gr.Text(label="Paste Text you want to learn here (wordlists and UDHR can be used as )", placeholder="Enter Text Here. One or Two Paragraphs at the longest ideally")
|
2117 |
+
# with gr.Row():
|
2118 |
+
# w4wsidebysidelangdest = gr.Dropdown(choices=langdropdown_choices, label="Choose destination language", value="de: german")
|
2119 |
+
# #w4wsidebysideaudiosidebyside = gr.Checkbox(label="Audio for side by side")
|
2120 |
+
# w4wsidebysidebtn = gr.Button("Create Side by Side, FWNWO, and FWFWO (Just read start to finish of each till you can read the FWFWO without assistance)")
|
2121 |
+
# with gr.Row():
|
2122 |
+
# w4wsidebysideOutput = gr.Text(label="Side by side", placeholder="Side by side will display here")
|
2123 |
+
# w4wsidebysideFWNWOOutput = gr.Text(label="Foreign Word Native Word Order", placeholder="FW NWO will display here")
|
2124 |
+
# w4wsidebysideFWFWOOutput = gr.Text(label="Foreign Word Foreign Word Order", placeholder="FW FWO will display here")
|
2125 |
+
# with gr.Row():
|
2126 |
+
# w4wsidebysideaudioOutput = gr.Audio(sources=["upload"], label="Side by Side in Audio form")
|
2127 |
+
# with gr.Row():
|
2128 |
+
# w4wsidebysideAnalysisPromptOutput = gr.Text(label="Prompt for LLM analysis", placeholder="Prompt for LLM analysis will display here")
|
2129 |
+
# w4wsidebysidebtn.click(fn=w4wsidebysidereadergen, inputs=[w4wsidebysideinput, w4wsidebysidelangdest], outputs=[w4wsidebysideOutput, w4wsidebysideFWNWOOutput, w4wsidebysideFWFWOOutput, w4wsidebysideaudioOutput, w4wsidebysideAnalysisPromptOutput])
|
2130 |
+
# with gr.Row():
|
2131 |
+
# gr.HTML("To be Added")
|
2132 |
+
# with gr.Row():
|
2133 |
+
# w4wsidebysideNatSentStructOutput = gr.Text(label="Native Closed class words as Native Sentence Structure", placeholder="Comparison is valuable")
|
2134 |
+
# w4wsidebysideForSentStructOutput = gr.Text(label="Foreign Closed class words as Foreign Sentence Structure", placeholder="Comparison is valuable")
|
2135 |
+
# with gr.Row():
|
2136 |
+
# w4wsidebysideWordCountOutput = gr.Text(label="Word Count", placeholder="Word Count will display here")
|
2137 |
+
# w4wsidebysideRandomisedOutput = gr.Text(label="Random Order As Test", placeholder="Randomised version (Transition Tab most left bottom) will display here")
|
2138 |
+
# w4wsidebysideQueGenOutput = gr.Text(label="Questions generation as comprehension test", placeholder="Questions generated will display here")
|
2139 |
+
# w4wsidebysideUNWFWOOutput = gr.Text(label="HTML as UNWFWO assistant", placeholder="HTML as UNWFWO assistant download will display here")
|
2140 |
gr.HTML("Side by side reading creator (NLLB-600M (+-3gb / setting has 400 tokens? as max length for inference optimisation?)- 200 languages vs 107 in googletrans) - <a href='https://github.com/facebookresearch/fairseq/tree/nllb'> -- Fairseq Github -- </a> | inspired by - <a href='https://huggingface.co/spaces/Geonmo/nllb-translation-demo'> -- Geonmo NLLB Demo -- </a> | <a href='https://huggingface.co/spaces/vutuka/nllb-vutuka-translation'> -- Vutuka demo -- </a>")
|
2141 |
with gr.Group():
|
2142 |
gr.HTML("Under Construction - generator and cpu based to beat gpu cost, cpu wait time and network dependency for local use")
|
|
|
2217 |
gr.Text("Text to Closed Class + Adjectives + Punctuation or Noun Verb + Punctuation ")
|
2218 |
with gr.Tab("Audio - Only English thoughts as practice"):
|
2219 |
gr.HTML("For Audio Most productive is real time recall of native (where your full reasoning ability will always be) <br><hr> Find Replace new lines of the foreign text with full stops or | to get per word translation")
|
2220 |
+
# gr.Interface(fn=TTSforListeningPractice, inputs=["text", TTSLangOptions2], outputs="audio", description="Paste only english words in foreign order and then keep removing the words from this to practice as effectively")
|
2221 |
with gr.Tab("Speed through Imagery"):
|
2222 |
gr.HTML("<a href='https://chat.openai.com/g/g-bYMSVlb8y-lingua-link'> -- Lingua Link (Simple GPT for assistinng image creation) -- </a> <br>Use with placeholder generator tab below <br> Best for this is 2 nouns as one phrase i.e. nouns as adjectives and then you can a verb (1000 of those will take you far)")
|
2223 |
with gr.Accordion("More Details - conversation example", open=False):
|
|
|
2314 |
with gr.Row():
|
2315 |
with gr.Column(scale=1):
|
2316 |
gr.HTML("Listening - Songs - Chorus <br> Anticipation of the item to remember is how you learn lyrics that is why songs are easy as if you heard it 10 times already your capacity to anticipate the words is great <br><br> This is where TTS helps as you are ignoring all words except the words just before the actual <br> <b>Tiny Stories dataset is like a graded reader</b> <br>")
|
2317 |
+
# gr.Interface(fn=TTSforListeningPractice, inputs=["text", TTSLangOptions, "checkbox"], outputs="audio", description="Paste chorus lyrics from below here and use TTS or make notes to save here (Or paste anything)")
|
2318 |
with gr.Accordion("TTS Spaces", open=False):
|
2319 |
TTSspaceoptions = gr.Dropdown(choices=["https://suno-bark.hf.space", "https://coqui-xtts.hf.space"], label="existing whisper spaces")
|
2320 |
TTSspaceoptionsbtn = gr.Button("Load a Image as prompt Space")
|
requirements.txt
CHANGED
@@ -17,5 +17,3 @@ youtube_transcript_api
|
|
17 |
spacy_syllables
|
18 |
whisper
|
19 |
gradio
|
20 |
-
httpx==0.13.3
|
21 |
-
googletrans==3.1.0a0
|
|
|
17 |
spacy_syllables
|
18 |
whisper
|
19 |
gradio
|
|
|
|