kwabs22 commited on
Commit
13c20b1
1 Parent(s): ba25e9b

GPT4 suggested missing file in dockerfile

Browse files
Files changed (2) hide show
  1. Dockerfile +13 -9
  2. app.py +60 -60
Dockerfile CHANGED
@@ -2,9 +2,10 @@ FROM python:3.9
2
 
3
  WORKDIR /code
4
 
5
- # Install dependencies for OpenCV
6
  RUN apt-get update && apt-get install -y \
7
  libgl1-mesa-glx \
 
8
  && rm -rf /var/lib/apt/lists/*
9
 
10
  # Create a virtual environment and activate it
@@ -23,20 +24,23 @@ RUN python -m spacy download zh_core_web_sm
23
  RUN python -m spacy download es_core_news_sm
24
  RUN python -m spacy download de_core_news_sm
25
 
 
 
 
26
  # Set up a new user named "user" with user ID 1000
27
  RUN useradd -m -u 1000 user
28
  # Switch to the "user" user
29
  USER user
30
  # Set home to the user's home directory
31
  ENV HOME=/home/user \
32
- PATH=/home/user/.local/bin:$PATH \
33
  PYTHONPATH=$HOME/app \
34
- PYTHONUNBUFFERED=1 \
35
- GRADIO_ALLOW_FLAGGING=never \
36
- GRADIO_NUM_PORTS=1 \
37
- GRADIO_SERVER_NAME=0.0.0.0 \
38
- GRADIO_THEME=huggingface \
39
- SYSTEM=spaces
40
 
41
  # Set the working directory to the user's home directory
42
  WORKDIR $HOME/app
@@ -44,4 +48,4 @@ WORKDIR $HOME/app
44
  # Copy the current directory contents into the container at $HOME/app setting the owner to the user
45
  COPY --chown=user . $HOME/app
46
 
47
- CMD ["python", "app.py"]
 
2
 
3
  WORKDIR /code
4
 
5
+ # Install dependencies for OpenCV and curl for downloading files
6
  RUN apt-get update && apt-get install -y \
7
  libgl1-mesa-glx \
8
+ curl \
9
  && rm -rf /var/lib/apt/lists/*
10
 
11
  # Create a virtual environment and activate it
 
24
  RUN python -m spacy download es_core_news_sm
25
  RUN python -m spacy download de_core_news_sm
26
 
27
+ # Download and place the frpc_linux_amd64_v0.2 file
28
+ RUN curl -L https://cdn-media.huggingface.co/frpc-gradio-0.2/frpc_linux_amd64 -o /opt/venv/lib/python3.9/site-packages/gradio/frpc_linux_amd64_v0.2
29
+
30
  # Set up a new user named "user" with user ID 1000
31
  RUN useradd -m -u 1000 user
32
  # Switch to the "user" user
33
  USER user
34
  # Set home to the user's home directory
35
  ENV HOME=/home/user \
36
+ PATH=/home/user/.local/bin:$PATH \
37
  PYTHONPATH=$HOME/app \
38
+ PYTHONUNBUFFERED=1 \
39
+ GRADIO_ALLOW_FLAGGING=never \
40
+ GRADIO_NUM_PORTS=1 \
41
+ GRADIO_SERVER_NAME=0.0.0.0 \
42
+ GRADIO_THEME=huggingface \
43
+ SYSTEM=spaces
44
 
45
  # Set the working directory to the user's home directory
46
  WORKDIR $HOME/app
 
48
  # Copy the current directory contents into the container at $HOME/app setting the owner to the user
49
  COPY --chown=user . $HOME/app
50
 
51
+ CMD ["python", "app.py"]
app.py CHANGED
@@ -1,5 +1,5 @@
1
- #from googletrans import Translator
2
- #from googletrans import LANGUAGES
3
  import spacy
4
  import gradio as gr
5
  import nltk
@@ -85,10 +85,10 @@ nlp_ja = spacy.load("ja_core_news_sm")
85
  nlp_zh = spacy.load("zh_core_web_sm")
86
  nlp_en_syllable = spacy.load("en_core_web_sm")
87
  nlp_en_syllable.add_pipe("syllables", after="tagger") #https://spacy.io/universe/project/spacy_syllables/
88
- #langdropdown_choices = [f"{code}: {name}" for code, name in LANGUAGES.items()]
89
 
90
  nlp = spacy.load('en_core_web_sm')
91
- #translator = Translator()
92
 
93
  def Sentencechunker(sentence):
94
  Sentchunks = sentence.split(" ")
@@ -160,7 +160,7 @@ def BatchWordChunk(sentence):
160
 
161
  # Translate from English to French
162
 
163
- #langdest = gr.Dropdown(choices=langdropdown_choices, label="Choose Language", value="de: german") #["af", "de", "es", "ko", "ja", "zh-cn"]
164
 
165
  ChunkModeDrop = gr.Dropdown(choices=["Chunks", "Reverse", "Three Word Chunks", "Spelling Chunks"], label="Choose Chunk Type", value="Chunks")
166
 
@@ -248,8 +248,8 @@ def merge_lines(roman_file, w4w_file, full_mean_file, macaronic_file):
248
 
249
  return "\n".join(merged_lines)
250
 
251
- #TTSLangOptions = gr.Dropdown(choices=langdropdown_choices, value="en: english", label="choose the language of the srt/text accent") #["en", "de", "es", "ja", "ko", "zh-cn"]
252
- #TTSLangOptions2 = gr.Dropdown(choices=langdropdown_choices, value="en: english", label="choose the language of the srt/text accent")
253
 
254
  def TTSforListeningPractice(text, language = "en", Repeat10x = False):
255
  if Repeat10x:
@@ -1777,32 +1777,32 @@ def lingualinkassist(text, language):
1777
 
1778
  #----------------------------------------------------------------------------------------------------------------------
1779
 
1780
- # def w4wsidebysidereadergen(text, langdest):
1781
- # #FrontRevSentChunk as reference
1782
- # FinalOutput = ""
1783
- # Translated = "FWNWO: \n"
1784
- # words = text.split()
1785
- # w4wsidebysidtranslator = Translator()
1786
- # translatedFWO = w4wsidebysidtranslator.translate(text, dest=langdest[:2])
1787
- # translatedNWO = w4wsidebysidtranslator.translate(words, dest=langdest[:2]) #src or dest
1788
- # #print(translated)
1789
- # #print(dir(translatedNWO[0]), "\n")
1790
- # #FinalOutput += "\n" + translated.text
1791
- # for obj in translatedNWO:
1792
- # # print(f"Original Text: {obj.origin}")
1793
- # # print(f"Translated Text: {obj.text}")
1794
- # # print(f"Source Language: {obj.src}")
1795
- # # print(f"Destination Language: {obj.dest}")
1796
- # # print(f"Pronunciation: {obj.pronunciation}\n")
1797
- # FinalOutput += obj.origin + f" ({obj.text}) "
1798
- # Translated += obj.text + " "
1799
- # speech = gTTS(text=FinalOutput, lang=langdest[:2], slow="False")
1800
- # speech.save("CurrentSidebySideTTSFile.mp3")
1801
-
1802
- # FinalOutput = "Side by Side Version: " + FinalOutput
1803
-
1804
- # analysisPrompt = f"{ Translated } and \n\nFWFWO: \n{ translatedFWO.text } \n\nForeign Words Native Word Order and Foreign Word Order \nIf you had to make the notes on the word by word considerations to transform FWNWO to FWFWO what would that be? (A simple game idea where your response will be the rubrik to mark the players response against)"
1805
- # return FinalOutput, Translated, "FWFWO: \n" + translatedFWO.text, "CurrentSidebySideTTSFile.mp3", analysisPrompt
1806
 
1807
  #https://huggingface.co/spaces/Geonmo/nllb-translation-demo/blob/main/app.py
1808
  def nllbtranscload_models():
@@ -2110,33 +2110,33 @@ with gr.Blocks() as lliface: #theme=gr.themes.Glass(primary_hue='green', seconda
2110
  gr.HTML('Memorisation by string comparison idea <br><br>Result of prompt chain starting with: Lets say I have the strings "red" and "ppalgan" how can I guess the second from the first from just spelling (eg. similar words and distance in the alphabet, ...), how can I use python to do this i.e. output of no matching letters, closest letter to r, then e, then d, a dictionary of letters that look similar eg. d and p, l and I a and d etc.')
2111
  gr.Interface(fn=letterbased_guess_word, inputs=["text", "text"], outputs="text", description="letter based guess suggestions (one word to one word is the designed use case)")
2112
  gr.HTML("Side by side reading creator (Google Translate) TODO - Roman output of Non roman characters")
2113
- # with gr.Group():
2114
- # with gr.Row():
2115
- # #gr.Interface(fn=w4wsidebysidereadergen, inputs=["text", w4wsidebysidelangdest], outputs=["text", "text", "text"], description="Side by side reading creator")
2116
- # w4wsidebysideinput = gr.Text(label="Paste Text you want to learn here (wordlists and UDHR can be used as )", placeholder="Enter Text Here. One or Two Paragraphs at the longest ideally")
2117
- # with gr.Row():
2118
- # w4wsidebysidelangdest = gr.Dropdown(choices=langdropdown_choices, label="Choose destination language", value="de: german")
2119
- # #w4wsidebysideaudiosidebyside = gr.Checkbox(label="Audio for side by side")
2120
- # w4wsidebysidebtn = gr.Button("Create Side by Side, FWNWO, and FWFWO (Just read start to finish of each till you can read the FWFWO without assistance)")
2121
- # with gr.Row():
2122
- # w4wsidebysideOutput = gr.Text(label="Side by side", placeholder="Side by side will display here")
2123
- # w4wsidebysideFWNWOOutput = gr.Text(label="Foreign Word Native Word Order", placeholder="FW NWO will display here")
2124
- # w4wsidebysideFWFWOOutput = gr.Text(label="Foreign Word Foreign Word Order", placeholder="FW FWO will display here")
2125
- # with gr.Row():
2126
- # w4wsidebysideaudioOutput = gr.Audio(sources=["upload"], label="Side by Side in Audio form")
2127
- # with gr.Row():
2128
- # w4wsidebysideAnalysisPromptOutput = gr.Text(label="Prompt for LLM analysis", placeholder="Prompt for LLM analysis will display here")
2129
- # w4wsidebysidebtn.click(fn=w4wsidebysidereadergen, inputs=[w4wsidebysideinput, w4wsidebysidelangdest], outputs=[w4wsidebysideOutput, w4wsidebysideFWNWOOutput, w4wsidebysideFWFWOOutput, w4wsidebysideaudioOutput, w4wsidebysideAnalysisPromptOutput])
2130
- # with gr.Row():
2131
- # gr.HTML("To be Added")
2132
- # with gr.Row():
2133
- # w4wsidebysideNatSentStructOutput = gr.Text(label="Native Closed class words as Native Sentence Structure", placeholder="Comparison is valuable")
2134
- # w4wsidebysideForSentStructOutput = gr.Text(label="Foreign Closed class words as Foreign Sentence Structure", placeholder="Comparison is valuable")
2135
- # with gr.Row():
2136
- # w4wsidebysideWordCountOutput = gr.Text(label="Word Count", placeholder="Word Count will display here")
2137
- # w4wsidebysideRandomisedOutput = gr.Text(label="Random Order As Test", placeholder="Randomised version (Transition Tab most left bottom) will display here")
2138
- # w4wsidebysideQueGenOutput = gr.Text(label="Questions generation as comprehension test", placeholder="Questions generated will display here")
2139
- # w4wsidebysideUNWFWOOutput = gr.Text(label="HTML as UNWFWO assistant", placeholder="HTML as UNWFWO assistant download will display here")
2140
  gr.HTML("Side by side reading creator (NLLB-600M (+-3gb / setting has 400 tokens? as max length for inference optimisation?)- 200 languages vs 107 in googletrans) - <a href='https://github.com/facebookresearch/fairseq/tree/nllb'> -- Fairseq Github -- </a> | inspired by - <a href='https://huggingface.co/spaces/Geonmo/nllb-translation-demo'> -- Geonmo NLLB Demo -- </a> | <a href='https://huggingface.co/spaces/vutuka/nllb-vutuka-translation'> -- Vutuka demo -- </a>")
2141
  with gr.Group():
2142
  gr.HTML("Under Construction - generator and cpu based to beat gpu cost, cpu wait time and network dependency for local use")
 
1
+ from googletrans import Translator
2
+ from googletrans import LANGUAGES
3
  import spacy
4
  import gradio as gr
5
  import nltk
 
85
  nlp_zh = spacy.load("zh_core_web_sm")
86
  nlp_en_syllable = spacy.load("en_core_web_sm")
87
  nlp_en_syllable.add_pipe("syllables", after="tagger") #https://spacy.io/universe/project/spacy_syllables/
88
+ langdropdown_choices = [f"{code}: {name}" for code, name in LANGUAGES.items()]
89
 
90
  nlp = spacy.load('en_core_web_sm')
91
+ translator = Translator()
92
 
93
  def Sentencechunker(sentence):
94
  Sentchunks = sentence.split(" ")
 
160
 
161
  # Translate from English to French
162
 
163
+ langdest = gr.Dropdown(choices=langdropdown_choices, label="Choose Language", value="de: german") #["af", "de", "es", "ko", "ja", "zh-cn"]
164
 
165
  ChunkModeDrop = gr.Dropdown(choices=["Chunks", "Reverse", "Three Word Chunks", "Spelling Chunks"], label="Choose Chunk Type", value="Chunks")
166
 
 
248
 
249
  return "\n".join(merged_lines)
250
 
251
+ TTSLangOptions = gr.Dropdown(choices=langdropdown_choices, value="en: english", label="choose the language of the srt/text accent") #["en", "de", "es", "ja", "ko", "zh-cn"]
252
+ TTSLangOptions2 = gr.Dropdown(choices=langdropdown_choices, value="en: english", label="choose the language of the srt/text accent")
253
 
254
  def TTSforListeningPractice(text, language = "en", Repeat10x = False):
255
  if Repeat10x:
 
1777
 
1778
  #----------------------------------------------------------------------------------------------------------------------
1779
 
1780
+ def w4wsidebysidereadergen(text, langdest):
1781
+ #FrontRevSentChunk as reference
1782
+ FinalOutput = ""
1783
+ Translated = "FWNWO: \n"
1784
+ words = text.split()
1785
+ w4wsidebysidtranslator = Translator()
1786
+ translatedFWO = w4wsidebysidtranslator.translate(text, dest=langdest[:2])
1787
+ translatedNWO = w4wsidebysidtranslator.translate(words, dest=langdest[:2]) #src or dest
1788
+ #print(translated)
1789
+ #print(dir(translatedNWO[0]), "\n")
1790
+ #FinalOutput += "\n" + translated.text
1791
+ for obj in translatedNWO:
1792
+ # print(f"Original Text: {obj.origin}")
1793
+ # print(f"Translated Text: {obj.text}")
1794
+ # print(f"Source Language: {obj.src}")
1795
+ # print(f"Destination Language: {obj.dest}")
1796
+ # print(f"Pronunciation: {obj.pronunciation}\n")
1797
+ FinalOutput += obj.origin + f" ({obj.text}) "
1798
+ Translated += obj.text + " "
1799
+ speech = gTTS(text=FinalOutput, lang=langdest[:2], slow="False")
1800
+ speech.save("CurrentSidebySideTTSFile.mp3")
1801
+
1802
+ FinalOutput = "Side by Side Version: " + FinalOutput
1803
+
1804
+ analysisPrompt = f"{ Translated } and \n\nFWFWO: \n{ translatedFWO.text } \n\nForeign Words Native Word Order and Foreign Word Order \nIf you had to make the notes on the word by word considerations to transform FWNWO to FWFWO what would that be? (A simple game idea where your response will be the rubrik to mark the players response against)"
1805
+ return FinalOutput, Translated, "FWFWO: \n" + translatedFWO.text, "CurrentSidebySideTTSFile.mp3", analysisPrompt
1806
 
1807
  #https://huggingface.co/spaces/Geonmo/nllb-translation-demo/blob/main/app.py
1808
  def nllbtranscload_models():
 
2110
  gr.HTML('Memorisation by string comparison idea <br><br>Result of prompt chain starting with: Lets say I have the strings "red" and "ppalgan" how can I guess the second from the first from just spelling (eg. similar words and distance in the alphabet, ...), how can I use python to do this i.e. output of no matching letters, closest letter to r, then e, then d, a dictionary of letters that look similar eg. d and p, l and I a and d etc.')
2111
  gr.Interface(fn=letterbased_guess_word, inputs=["text", "text"], outputs="text", description="letter based guess suggestions (one word to one word is the designed use case)")
2112
  gr.HTML("Side by side reading creator (Google Translate) TODO - Roman output of Non roman characters")
2113
+ with gr.Group():
2114
+ with gr.Row():
2115
+ #gr.Interface(fn=w4wsidebysidereadergen, inputs=["text", w4wsidebysidelangdest], outputs=["text", "text", "text"], description="Side by side reading creator")
2116
+ w4wsidebysideinput = gr.Text(label="Paste Text you want to learn here (wordlists and UDHR can be used as )", placeholder="Enter Text Here. One or Two Paragraphs at the longest ideally")
2117
+ with gr.Row():
2118
+ w4wsidebysidelangdest = gr.Dropdown(choices=langdropdown_choices, label="Choose destination language", value="de: german")
2119
+ #w4wsidebysideaudiosidebyside = gr.Checkbox(label="Audio for side by side")
2120
+ w4wsidebysidebtn = gr.Button("Create Side by Side, FWNWO, and FWFWO (Just read start to finish of each till you can read the FWFWO without assistance)")
2121
+ with gr.Row():
2122
+ w4wsidebysideOutput = gr.Text(label="Side by side", placeholder="Side by side will display here")
2123
+ w4wsidebysideFWNWOOutput = gr.Text(label="Foreign Word Native Word Order", placeholder="FW NWO will display here")
2124
+ w4wsidebysideFWFWOOutput = gr.Text(label="Foreign Word Foreign Word Order", placeholder="FW FWO will display here")
2125
+ with gr.Row():
2126
+ w4wsidebysideaudioOutput = gr.Audio(sources=["upload"], label="Side by Side in Audio form")
2127
+ with gr.Row():
2128
+ w4wsidebysideAnalysisPromptOutput = gr.Text(label="Prompt for LLM analysis", placeholder="Prompt for LLM analysis will display here")
2129
+ w4wsidebysidebtn.click(fn=w4wsidebysidereadergen, inputs=[w4wsidebysideinput, w4wsidebysidelangdest], outputs=[w4wsidebysideOutput, w4wsidebysideFWNWOOutput, w4wsidebysideFWFWOOutput, w4wsidebysideaudioOutput, w4wsidebysideAnalysisPromptOutput])
2130
+ with gr.Row():
2131
+ gr.HTML("To be Added")
2132
+ with gr.Row():
2133
+ w4wsidebysideNatSentStructOutput = gr.Text(label="Native Closed class words as Native Sentence Structure", placeholder="Comparison is valuable")
2134
+ w4wsidebysideForSentStructOutput = gr.Text(label="Foreign Closed class words as Foreign Sentence Structure", placeholder="Comparison is valuable")
2135
+ with gr.Row():
2136
+ w4wsidebysideWordCountOutput = gr.Text(label="Word Count", placeholder="Word Count will display here")
2137
+ w4wsidebysideRandomisedOutput = gr.Text(label="Random Order As Test", placeholder="Randomised version (Transition Tab most left bottom) will display here")
2138
+ w4wsidebysideQueGenOutput = gr.Text(label="Questions generation as comprehension test", placeholder="Questions generated will display here")
2139
+ w4wsidebysideUNWFWOOutput = gr.Text(label="HTML as UNWFWO assistant", placeholder="HTML as UNWFWO assistant download will display here")
2140
  gr.HTML("Side by side reading creator (NLLB-600M (+-3gb / setting has 400 tokens? as max length for inference optimisation?)- 200 languages vs 107 in googletrans) - <a href='https://github.com/facebookresearch/fairseq/tree/nllb'> -- Fairseq Github -- </a> | inspired by - <a href='https://huggingface.co/spaces/Geonmo/nllb-translation-demo'> -- Geonmo NLLB Demo -- </a> | <a href='https://huggingface.co/spaces/vutuka/nllb-vutuka-translation'> -- Vutuka demo -- </a>")
2141
  with gr.Group():
2142
  gr.HTML("Under Construction - generator and cpu based to beat gpu cost, cpu wait time and network dependency for local use")