Spaces:

KwabsHug
/

Language-Learn-Idea

Running

App Files Files Community

kwabs22 commited on Mar 12

Commit

13c20b1

•

1 Parent(s): ba25e9b

GPT4 suggested missing file in dockerfile

Browse files

Files changed (2) hide show

Dockerfile +13 -9
app.py +60 -60

Dockerfile CHANGED Viewed

@@ -2,9 +2,10 @@ FROM python:3.9
 WORKDIR /code
-# Install dependencies for OpenCV
 RUN apt-get update && apt-get install -y \
     libgl1-mesa-glx \
     && rm -rf /var/lib/apt/lists/*
 # Create a virtual environment and activate it
@@ -23,20 +24,23 @@ RUN python -m spacy download zh_core_web_sm
 RUN python -m spacy download es_core_news_sm
 RUN python -m spacy download de_core_news_sm
 # Set up a new user named "user" with user ID 1000
 RUN useradd -m -u 1000 user
 # Switch to the "user" user
 USER user
 # Set home to the user's home directory
 ENV HOME=/home/user \
-	PATH=/home/user/.local/bin:$PATH \
     PYTHONPATH=$HOME/app \
-	PYTHONUNBUFFERED=1 \
-	GRADIO_ALLOW_FLAGGING=never \
-	GRADIO_NUM_PORTS=1 \
-	GRADIO_SERVER_NAME=0.0.0.0 \
-	GRADIO_THEME=huggingface \
-	SYSTEM=spaces
 # Set the working directory to the user's home directory
 WORKDIR $HOME/app
@@ -44,4 +48,4 @@ WORKDIR $HOME/app
 # Copy the current directory contents into the container at $HOME/app setting the owner to the user
 COPY --chown=user . $HOME/app
-CMD ["python", "app.py"]

 WORKDIR /code
+# Install dependencies for OpenCV and curl for downloading files
 RUN apt-get update && apt-get install -y \
     libgl1-mesa-glx \
+    curl \
     && rm -rf /var/lib/apt/lists/*
 # Create a virtual environment and activate it
 RUN python -m spacy download es_core_news_sm
 RUN python -m spacy download de_core_news_sm
+# Download and place the frpc_linux_amd64_v0.2 file
+RUN curl -L https://cdn-media.huggingface.co/frpc-gradio-0.2/frpc_linux_amd64 -o /opt/venv/lib/python3.9/site-packages/gradio/frpc_linux_amd64_v0.2
 # Set up a new user named "user" with user ID 1000
 RUN useradd -m -u 1000 user
 # Switch to the "user" user
 USER user
 # Set home to the user's home directory
 ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH \
     PYTHONPATH=$HOME/app \
+    PYTHONUNBUFFERED=1 \
+    GRADIO_ALLOW_FLAGGING=never \
+    GRADIO_NUM_PORTS=1 \
+    GRADIO_SERVER_NAME=0.0.0.0 \
+    GRADIO_THEME=huggingface \
+    SYSTEM=spaces
 # Set the working directory to the user's home directory
 WORKDIR $HOME/app
 # Copy the current directory contents into the container at $HOME/app setting the owner to the user
 COPY --chown=user . $HOME/app
+CMD ["python", "app.py"]

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
-#from googletrans import Translator
-#from googletrans import LANGUAGES
 import spacy
 import gradio as gr
 import nltk
@@ -85,10 +85,10 @@ nlp_ja = spacy.load("ja_core_news_sm")
 nlp_zh = spacy.load("zh_core_web_sm")
 nlp_en_syllable = spacy.load("en_core_web_sm")
 nlp_en_syllable.add_pipe("syllables", after="tagger") #https://spacy.io/universe/project/spacy_syllables/
-#langdropdown_choices = [f"{code}: {name}" for code, name in LANGUAGES.items()]
 nlp = spacy.load('en_core_web_sm')
-#translator = Translator()
 def Sentencechunker(sentence):
     Sentchunks = sentence.split(" ")
@@ -160,7 +160,7 @@ def BatchWordChunk(sentence):
 # Translate from English to French
-#langdest = gr.Dropdown(choices=langdropdown_choices, label="Choose Language", value="de: german") #["af", "de", "es", "ko", "ja", "zh-cn"]
 ChunkModeDrop = gr.Dropdown(choices=["Chunks", "Reverse", "Three Word Chunks", "Spelling Chunks"], label="Choose Chunk Type", value="Chunks")
@@ -248,8 +248,8 @@ def merge_lines(roman_file, w4w_file, full_mean_file, macaronic_file):
     return "\n".join(merged_lines)
-#TTSLangOptions = gr.Dropdown(choices=langdropdown_choices, value="en: english", label="choose the language of the srt/text accent") #["en", "de", "es", "ja", "ko", "zh-cn"]
-#TTSLangOptions2 = gr.Dropdown(choices=langdropdown_choices, value="en: english", label="choose the language of the srt/text accent")
 def TTSforListeningPractice(text, language = "en", Repeat10x = False):
     if Repeat10x:
@@ -1777,32 +1777,32 @@ def lingualinkassist(text, language):
 #----------------------------------------------------------------------------------------------------------------------
-# def w4wsidebysidereadergen(text, langdest):
-#     #FrontRevSentChunk  as reference
-#     FinalOutput = ""
-#     Translated = "FWNWO: \n"
-#     words = text.split()
-#     w4wsidebysidtranslator = Translator()
-#     translatedFWO = w4wsidebysidtranslator.translate(text, dest=langdest[:2])
-#     translatedNWO =  w4wsidebysidtranslator.translate(words, dest=langdest[:2]) #src or dest
-#     #print(translated)
-#     #print(dir(translatedNWO[0]), "\n")
-#     #FinalOutput += "\n" + translated.text
-#     for obj in translatedNWO:
-#     #   print(f"Original Text: {obj.origin}")
-#     #   print(f"Translated Text: {obj.text}")
-#     #   print(f"Source Language: {obj.src}")
-#     #   print(f"Destination Language: {obj.dest}")
-#     #   print(f"Pronunciation: {obj.pronunciation}\n")
-#         FinalOutput += obj.origin + f" ({obj.text}) "
-#         Translated += obj.text + " "
-#     speech = gTTS(text=FinalOutput, lang=langdest[:2], slow="False")
-#     speech.save("CurrentSidebySideTTSFile.mp3")
-#     FinalOutput = "Side by Side Version: " + FinalOutput
-#     analysisPrompt = f"{ Translated } and \n\nFWFWO: \n{ translatedFWO.text } \n\nForeign Words Native Word Order and Foreign Word Order \nIf you had to make the notes on the word by word considerations to transform FWNWO to FWFWO what would that be? (A simple game idea where your response will be the rubrik to mark the players response against)"
-#     return FinalOutput, Translated, "FWFWO: \n" + translatedFWO.text, "CurrentSidebySideTTSFile.mp3", analysisPrompt
 #https://huggingface.co/spaces/Geonmo/nllb-translation-demo/blob/main/app.py
 def nllbtranscload_models():
@@ -2110,33 +2110,33 @@ with gr.Blocks() as lliface: #theme=gr.themes.Glass(primary_hue='green', seconda
                 gr.HTML('Memorisation by string comparison idea <br><br>Result of prompt chain starting with:  Lets say I have the strings "red" and "ppalgan" how can I guess the second from the first from just spelling (eg. similar words and distance in the alphabet, ...), how can I use python to do this i.e. output of no matching letters, closest letter to r, then e, then d, a dictionary of letters that look similar eg. d and p, l and I a and d etc.')
                 gr.Interface(fn=letterbased_guess_word, inputs=["text", "text"], outputs="text", description="letter based guess suggestions (one word to one word is the designed use case)")
             gr.HTML("Side by side reading creator (Google Translate) TODO - Roman output of Non roman characters")
-            # with gr.Group():
-            #     with gr.Row():
-            #         #gr.Interface(fn=w4wsidebysidereadergen, inputs=["text", w4wsidebysidelangdest], outputs=["text", "text", "text"], description="Side by side reading creator")
-            #         w4wsidebysideinput = gr.Text(label="Paste Text you want to learn here (wordlists and UDHR can be used as )", placeholder="Enter Text Here. One or Two Paragraphs at the longest ideally")
-            #     with gr.Row():
-            #         w4wsidebysidelangdest = gr.Dropdown(choices=langdropdown_choices, label="Choose destination language", value="de: german")
-            #         #w4wsidebysideaudiosidebyside = gr.Checkbox(label="Audio for side by side")
-            #         w4wsidebysidebtn =  gr.Button("Create Side by Side, FWNWO, and FWFWO (Just read start to finish of each till you can read the FWFWO without assistance)")
-            #     with gr.Row():
-            #         w4wsidebysideOutput = gr.Text(label="Side by side", placeholder="Side by side will display here")
-            #         w4wsidebysideFWNWOOutput = gr.Text(label="Foreign Word Native Word Order", placeholder="FW NWO will display here")
-            #         w4wsidebysideFWFWOOutput = gr.Text(label="Foreign Word Foreign Word Order", placeholder="FW FWO will display here")
-            #     with gr.Row():
-            #         w4wsidebysideaudioOutput = gr.Audio(sources=["upload"], label="Side by Side in Audio form")
-            #     with gr.Row():
-            #         w4wsidebysideAnalysisPromptOutput = gr.Text(label="Prompt for LLM analysis", placeholder="Prompt for LLM analysis will display here")
-            #         w4wsidebysidebtn.click(fn=w4wsidebysidereadergen, inputs=[w4wsidebysideinput, w4wsidebysidelangdest], outputs=[w4wsidebysideOutput, w4wsidebysideFWNWOOutput, w4wsidebysideFWFWOOutput, w4wsidebysideaudioOutput, w4wsidebysideAnalysisPromptOutput])
-            #     with gr.Row():
-            #         gr.HTML("To be Added")
-            #     with gr.Row():
-            #         w4wsidebysideNatSentStructOutput = gr.Text(label="Native Closed class words as Native Sentence Structure", placeholder="Comparison is valuable")
-            #         w4wsidebysideForSentStructOutput = gr.Text(label="Foreign Closed class words as Foreign Sentence Structure", placeholder="Comparison is valuable")
-            #     with gr.Row():
-            #         w4wsidebysideWordCountOutput = gr.Text(label="Word Count", placeholder="Word Count will display here")
-            #         w4wsidebysideRandomisedOutput = gr.Text(label="Random Order As Test", placeholder="Randomised version (Transition Tab most left bottom) will display here")
-            #         w4wsidebysideQueGenOutput = gr.Text(label="Questions generation as comprehension test", placeholder="Questions generated will display here")
-            #         w4wsidebysideUNWFWOOutput = gr.Text(label="HTML as UNWFWO assistant", placeholder="HTML as UNWFWO assistant download will display here")
             gr.HTML("Side by side reading creator (NLLB-600M (+-3gb / setting has 400 tokens? as max length for inference optimisation?)- 200 languages vs 107 in googletrans) - <a href='https://github.com/facebookresearch/fairseq/tree/nllb'> -- Fairseq Github -- </a> |  inspired by - <a href='https://huggingface.co/spaces/Geonmo/nllb-translation-demo'> -- Geonmo NLLB Demo -- </a> | <a href='https://huggingface.co/spaces/vutuka/nllb-vutuka-translation'> -- Vutuka demo -- </a>")
             with gr.Group():
                 gr.HTML("Under Construction - generator and cpu based to beat gpu cost, cpu wait time and network dependency for local use")

+from googletrans import Translator
+from googletrans import LANGUAGES
 import spacy
 import gradio as gr
 import nltk
 nlp_zh = spacy.load("zh_core_web_sm")
 nlp_en_syllable = spacy.load("en_core_web_sm")
 nlp_en_syllable.add_pipe("syllables", after="tagger") #https://spacy.io/universe/project/spacy_syllables/
+langdropdown_choices = [f"{code}: {name}" for code, name in LANGUAGES.items()]
 nlp = spacy.load('en_core_web_sm')
+translator = Translator()
 def Sentencechunker(sentence):
     Sentchunks = sentence.split(" ")
 # Translate from English to French
+langdest = gr.Dropdown(choices=langdropdown_choices, label="Choose Language", value="de: german") #["af", "de", "es", "ko", "ja", "zh-cn"]
 ChunkModeDrop = gr.Dropdown(choices=["Chunks", "Reverse", "Three Word Chunks", "Spelling Chunks"], label="Choose Chunk Type", value="Chunks")
     return "\n".join(merged_lines)
+TTSLangOptions = gr.Dropdown(choices=langdropdown_choices, value="en: english", label="choose the language of the srt/text accent") #["en", "de", "es", "ja", "ko", "zh-cn"]
+TTSLangOptions2 = gr.Dropdown(choices=langdropdown_choices, value="en: english", label="choose the language of the srt/text accent")
 def TTSforListeningPractice(text, language = "en", Repeat10x = False):
     if Repeat10x:
 #----------------------------------------------------------------------------------------------------------------------
+def w4wsidebysidereadergen(text, langdest):
+    #FrontRevSentChunk  as reference
+    FinalOutput = ""
+    Translated = "FWNWO: \n"
+    words = text.split()
+    w4wsidebysidtranslator = Translator()
+    translatedFWO = w4wsidebysidtranslator.translate(text, dest=langdest[:2])
+    translatedNWO =  w4wsidebysidtranslator.translate(words, dest=langdest[:2]) #src or dest
+    #print(translated)
+    #print(dir(translatedNWO[0]), "\n")
+    #FinalOutput += "\n" + translated.text
+    for obj in translatedNWO:
+    #   print(f"Original Text: {obj.origin}")
+    #   print(f"Translated Text: {obj.text}")
+    #   print(f"Source Language: {obj.src}")
+    #   print(f"Destination Language: {obj.dest}")
+    #   print(f"Pronunciation: {obj.pronunciation}\n")
+        FinalOutput += obj.origin + f" ({obj.text}) "
+        Translated += obj.text + " "
+    speech = gTTS(text=FinalOutput, lang=langdest[:2], slow="False")
+    speech.save("CurrentSidebySideTTSFile.mp3")
+    FinalOutput = "Side by Side Version: " + FinalOutput
+    analysisPrompt = f"{ Translated } and \n\nFWFWO: \n{ translatedFWO.text } \n\nForeign Words Native Word Order and Foreign Word Order \nIf you had to make the notes on the word by word considerations to transform FWNWO to FWFWO what would that be? (A simple game idea where your response will be the rubrik to mark the players response against)"
+    return FinalOutput, Translated, "FWFWO: \n" + translatedFWO.text, "CurrentSidebySideTTSFile.mp3", analysisPrompt
 #https://huggingface.co/spaces/Geonmo/nllb-translation-demo/blob/main/app.py
 def nllbtranscload_models():
                 gr.HTML('Memorisation by string comparison idea <br><br>Result of prompt chain starting with:  Lets say I have the strings "red" and "ppalgan" how can I guess the second from the first from just spelling (eg. similar words and distance in the alphabet, ...), how can I use python to do this i.e. output of no matching letters, closest letter to r, then e, then d, a dictionary of letters that look similar eg. d and p, l and I a and d etc.')
                 gr.Interface(fn=letterbased_guess_word, inputs=["text", "text"], outputs="text", description="letter based guess suggestions (one word to one word is the designed use case)")
             gr.HTML("Side by side reading creator (Google Translate) TODO - Roman output of Non roman characters")
+            with gr.Group():
+                with gr.Row():
+                    #gr.Interface(fn=w4wsidebysidereadergen, inputs=["text", w4wsidebysidelangdest], outputs=["text", "text", "text"], description="Side by side reading creator")
+                    w4wsidebysideinput = gr.Text(label="Paste Text you want to learn here (wordlists and UDHR can be used as )", placeholder="Enter Text Here. One or Two Paragraphs at the longest ideally")
+                with gr.Row():
+                    w4wsidebysidelangdest = gr.Dropdown(choices=langdropdown_choices, label="Choose destination language", value="de: german")
+                    #w4wsidebysideaudiosidebyside = gr.Checkbox(label="Audio for side by side")
+                    w4wsidebysidebtn =  gr.Button("Create Side by Side, FWNWO, and FWFWO (Just read start to finish of each till you can read the FWFWO without assistance)")
+                with gr.Row():
+                    w4wsidebysideOutput = gr.Text(label="Side by side", placeholder="Side by side will display here")
+                    w4wsidebysideFWNWOOutput = gr.Text(label="Foreign Word Native Word Order", placeholder="FW NWO will display here")
+                    w4wsidebysideFWFWOOutput = gr.Text(label="Foreign Word Foreign Word Order", placeholder="FW FWO will display here")
+                with gr.Row():
+                    w4wsidebysideaudioOutput = gr.Audio(sources=["upload"], label="Side by Side in Audio form")
+                with gr.Row():
+                    w4wsidebysideAnalysisPromptOutput = gr.Text(label="Prompt for LLM analysis", placeholder="Prompt for LLM analysis will display here")
+                    w4wsidebysidebtn.click(fn=w4wsidebysidereadergen, inputs=[w4wsidebysideinput, w4wsidebysidelangdest], outputs=[w4wsidebysideOutput, w4wsidebysideFWNWOOutput, w4wsidebysideFWFWOOutput, w4wsidebysideaudioOutput, w4wsidebysideAnalysisPromptOutput])
+                with gr.Row():
+                    gr.HTML("To be Added")
+                with gr.Row():
+                    w4wsidebysideNatSentStructOutput = gr.Text(label="Native Closed class words as Native Sentence Structure", placeholder="Comparison is valuable")
+                    w4wsidebysideForSentStructOutput = gr.Text(label="Foreign Closed class words as Foreign Sentence Structure", placeholder="Comparison is valuable")
+                with gr.Row():
+                    w4wsidebysideWordCountOutput = gr.Text(label="Word Count", placeholder="Word Count will display here")
+                    w4wsidebysideRandomisedOutput = gr.Text(label="Random Order As Test", placeholder="Randomised version (Transition Tab most left bottom) will display here")
+                    w4wsidebysideQueGenOutput = gr.Text(label="Questions generation as comprehension test", placeholder="Questions generated will display here")
+                    w4wsidebysideUNWFWOOutput = gr.Text(label="HTML as UNWFWO assistant", placeholder="HTML as UNWFWO assistant download will display here")
             gr.HTML("Side by side reading creator (NLLB-600M (+-3gb / setting has 400 tokens? as max length for inference optimisation?)- 200 languages vs 107 in googletrans) - <a href='https://github.com/facebookresearch/fairseq/tree/nllb'> -- Fairseq Github -- </a> |  inspired by - <a href='https://huggingface.co/spaces/Geonmo/nllb-translation-demo'> -- Geonmo NLLB Demo -- </a> | <a href='https://huggingface.co/spaces/vutuka/nllb-vutuka-translation'> -- Vutuka demo -- </a>")
             with gr.Group():
                 gr.HTML("Under Construction - generator and cpu based to beat gpu cost, cpu wait time and network dependency for local use")