Spaces:
Running
Running
kwabs22
commited on
Commit
•
13c20b1
1
Parent(s):
ba25e9b
GPT4 suggested missing file in dockerfile
Browse files- Dockerfile +13 -9
- app.py +60 -60
Dockerfile
CHANGED
@@ -2,9 +2,10 @@ FROM python:3.9
|
|
2 |
|
3 |
WORKDIR /code
|
4 |
|
5 |
-
# Install dependencies for OpenCV
|
6 |
RUN apt-get update && apt-get install -y \
|
7 |
libgl1-mesa-glx \
|
|
|
8 |
&& rm -rf /var/lib/apt/lists/*
|
9 |
|
10 |
# Create a virtual environment and activate it
|
@@ -23,20 +24,23 @@ RUN python -m spacy download zh_core_web_sm
|
|
23 |
RUN python -m spacy download es_core_news_sm
|
24 |
RUN python -m spacy download de_core_news_sm
|
25 |
|
|
|
|
|
|
|
26 |
# Set up a new user named "user" with user ID 1000
|
27 |
RUN useradd -m -u 1000 user
|
28 |
# Switch to the "user" user
|
29 |
USER user
|
30 |
# Set home to the user's home directory
|
31 |
ENV HOME=/home/user \
|
32 |
-
|
33 |
PYTHONPATH=$HOME/app \
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
|
41 |
# Set the working directory to the user's home directory
|
42 |
WORKDIR $HOME/app
|
@@ -44,4 +48,4 @@ WORKDIR $HOME/app
|
|
44 |
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
45 |
COPY --chown=user . $HOME/app
|
46 |
|
47 |
-
CMD ["python", "app.py"]
|
|
|
2 |
|
3 |
WORKDIR /code
|
4 |
|
5 |
+
# Install dependencies for OpenCV and curl for downloading files
|
6 |
RUN apt-get update && apt-get install -y \
|
7 |
libgl1-mesa-glx \
|
8 |
+
curl \
|
9 |
&& rm -rf /var/lib/apt/lists/*
|
10 |
|
11 |
# Create a virtual environment and activate it
|
|
|
24 |
RUN python -m spacy download es_core_news_sm
|
25 |
RUN python -m spacy download de_core_news_sm
|
26 |
|
27 |
+
# Download and place the frpc_linux_amd64_v0.2 file
|
28 |
+
RUN curl -L https://cdn-media.huggingface.co/frpc-gradio-0.2/frpc_linux_amd64 -o /opt/venv/lib/python3.9/site-packages/gradio/frpc_linux_amd64_v0.2
|
29 |
+
|
30 |
# Set up a new user named "user" with user ID 1000
|
31 |
RUN useradd -m -u 1000 user
|
32 |
# Switch to the "user" user
|
33 |
USER user
|
34 |
# Set home to the user's home directory
|
35 |
ENV HOME=/home/user \
|
36 |
+
PATH=/home/user/.local/bin:$PATH \
|
37 |
PYTHONPATH=$HOME/app \
|
38 |
+
PYTHONUNBUFFERED=1 \
|
39 |
+
GRADIO_ALLOW_FLAGGING=never \
|
40 |
+
GRADIO_NUM_PORTS=1 \
|
41 |
+
GRADIO_SERVER_NAME=0.0.0.0 \
|
42 |
+
GRADIO_THEME=huggingface \
|
43 |
+
SYSTEM=spaces
|
44 |
|
45 |
# Set the working directory to the user's home directory
|
46 |
WORKDIR $HOME/app
|
|
|
48 |
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
49 |
COPY --chown=user . $HOME/app
|
50 |
|
51 |
+
CMD ["python", "app.py"]
|
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
-
|
2 |
-
|
3 |
import spacy
|
4 |
import gradio as gr
|
5 |
import nltk
|
@@ -85,10 +85,10 @@ nlp_ja = spacy.load("ja_core_news_sm")
|
|
85 |
nlp_zh = spacy.load("zh_core_web_sm")
|
86 |
nlp_en_syllable = spacy.load("en_core_web_sm")
|
87 |
nlp_en_syllable.add_pipe("syllables", after="tagger") #https://spacy.io/universe/project/spacy_syllables/
|
88 |
-
|
89 |
|
90 |
nlp = spacy.load('en_core_web_sm')
|
91 |
-
|
92 |
|
93 |
def Sentencechunker(sentence):
|
94 |
Sentchunks = sentence.split(" ")
|
@@ -160,7 +160,7 @@ def BatchWordChunk(sentence):
|
|
160 |
|
161 |
# Translate from English to French
|
162 |
|
163 |
-
|
164 |
|
165 |
ChunkModeDrop = gr.Dropdown(choices=["Chunks", "Reverse", "Three Word Chunks", "Spelling Chunks"], label="Choose Chunk Type", value="Chunks")
|
166 |
|
@@ -248,8 +248,8 @@ def merge_lines(roman_file, w4w_file, full_mean_file, macaronic_file):
|
|
248 |
|
249 |
return "\n".join(merged_lines)
|
250 |
|
251 |
-
|
252 |
-
|
253 |
|
254 |
def TTSforListeningPractice(text, language = "en", Repeat10x = False):
|
255 |
if Repeat10x:
|
@@ -1777,32 +1777,32 @@ def lingualinkassist(text, language):
|
|
1777 |
|
1778 |
#----------------------------------------------------------------------------------------------------------------------
|
1779 |
|
1780 |
-
|
1781 |
-
#
|
1782 |
-
|
1783 |
-
|
1784 |
-
|
1785 |
-
|
1786 |
-
|
1787 |
-
|
1788 |
-
#
|
1789 |
-
#
|
1790 |
-
#
|
1791 |
-
|
1792 |
-
#
|
1793 |
-
#
|
1794 |
-
#
|
1795 |
-
#
|
1796 |
-
#
|
1797 |
-
|
1798 |
-
|
1799 |
-
|
1800 |
-
|
1801 |
-
|
1802 |
-
|
1803 |
-
|
1804 |
-
|
1805 |
-
|
1806 |
|
1807 |
#https://huggingface.co/spaces/Geonmo/nllb-translation-demo/blob/main/app.py
|
1808 |
def nllbtranscload_models():
|
@@ -2110,33 +2110,33 @@ with gr.Blocks() as lliface: #theme=gr.themes.Glass(primary_hue='green', seconda
|
|
2110 |
gr.HTML('Memorisation by string comparison idea <br><br>Result of prompt chain starting with: Lets say I have the strings "red" and "ppalgan" how can I guess the second from the first from just spelling (eg. similar words and distance in the alphabet, ...), how can I use python to do this i.e. output of no matching letters, closest letter to r, then e, then d, a dictionary of letters that look similar eg. d and p, l and I a and d etc.')
|
2111 |
gr.Interface(fn=letterbased_guess_word, inputs=["text", "text"], outputs="text", description="letter based guess suggestions (one word to one word is the designed use case)")
|
2112 |
gr.HTML("Side by side reading creator (Google Translate) TODO - Roman output of Non roman characters")
|
2113 |
-
|
2114 |
-
|
2115 |
-
|
2116 |
-
|
2117 |
-
|
2118 |
-
|
2119 |
-
|
2120 |
-
|
2121 |
-
|
2122 |
-
|
2123 |
-
|
2124 |
-
|
2125 |
-
|
2126 |
-
|
2127 |
-
|
2128 |
-
|
2129 |
-
|
2130 |
-
|
2131 |
-
|
2132 |
-
|
2133 |
-
|
2134 |
-
|
2135 |
-
|
2136 |
-
|
2137 |
-
|
2138 |
-
|
2139 |
-
|
2140 |
gr.HTML("Side by side reading creator (NLLB-600M (+-3gb / setting has 400 tokens? as max length for inference optimisation?)- 200 languages vs 107 in googletrans) - <a href='https://github.com/facebookresearch/fairseq/tree/nllb'> -- Fairseq Github -- </a> | inspired by - <a href='https://huggingface.co/spaces/Geonmo/nllb-translation-demo'> -- Geonmo NLLB Demo -- </a> | <a href='https://huggingface.co/spaces/vutuka/nllb-vutuka-translation'> -- Vutuka demo -- </a>")
|
2141 |
with gr.Group():
|
2142 |
gr.HTML("Under Construction - generator and cpu based to beat gpu cost, cpu wait time and network dependency for local use")
|
|
|
1 |
+
from googletrans import Translator
|
2 |
+
from googletrans import LANGUAGES
|
3 |
import spacy
|
4 |
import gradio as gr
|
5 |
import nltk
|
|
|
85 |
nlp_zh = spacy.load("zh_core_web_sm")
|
86 |
nlp_en_syllable = spacy.load("en_core_web_sm")
|
87 |
nlp_en_syllable.add_pipe("syllables", after="tagger") #https://spacy.io/universe/project/spacy_syllables/
|
88 |
+
langdropdown_choices = [f"{code}: {name}" for code, name in LANGUAGES.items()]
|
89 |
|
90 |
nlp = spacy.load('en_core_web_sm')
|
91 |
+
translator = Translator()
|
92 |
|
93 |
def Sentencechunker(sentence):
|
94 |
Sentchunks = sentence.split(" ")
|
|
|
160 |
|
161 |
# Translate from English to French
|
162 |
|
163 |
+
langdest = gr.Dropdown(choices=langdropdown_choices, label="Choose Language", value="de: german") #["af", "de", "es", "ko", "ja", "zh-cn"]
|
164 |
|
165 |
ChunkModeDrop = gr.Dropdown(choices=["Chunks", "Reverse", "Three Word Chunks", "Spelling Chunks"], label="Choose Chunk Type", value="Chunks")
|
166 |
|
|
|
248 |
|
249 |
return "\n".join(merged_lines)
|
250 |
|
251 |
+
TTSLangOptions = gr.Dropdown(choices=langdropdown_choices, value="en: english", label="choose the language of the srt/text accent") #["en", "de", "es", "ja", "ko", "zh-cn"]
|
252 |
+
TTSLangOptions2 = gr.Dropdown(choices=langdropdown_choices, value="en: english", label="choose the language of the srt/text accent")
|
253 |
|
254 |
def TTSforListeningPractice(text, language = "en", Repeat10x = False):
|
255 |
if Repeat10x:
|
|
|
1777 |
|
1778 |
#----------------------------------------------------------------------------------------------------------------------
|
1779 |
|
1780 |
+
def w4wsidebysidereadergen(text, langdest):
|
1781 |
+
#FrontRevSentChunk as reference
|
1782 |
+
FinalOutput = ""
|
1783 |
+
Translated = "FWNWO: \n"
|
1784 |
+
words = text.split()
|
1785 |
+
w4wsidebysidtranslator = Translator()
|
1786 |
+
translatedFWO = w4wsidebysidtranslator.translate(text, dest=langdest[:2])
|
1787 |
+
translatedNWO = w4wsidebysidtranslator.translate(words, dest=langdest[:2]) #src or dest
|
1788 |
+
#print(translated)
|
1789 |
+
#print(dir(translatedNWO[0]), "\n")
|
1790 |
+
#FinalOutput += "\n" + translated.text
|
1791 |
+
for obj in translatedNWO:
|
1792 |
+
# print(f"Original Text: {obj.origin}")
|
1793 |
+
# print(f"Translated Text: {obj.text}")
|
1794 |
+
# print(f"Source Language: {obj.src}")
|
1795 |
+
# print(f"Destination Language: {obj.dest}")
|
1796 |
+
# print(f"Pronunciation: {obj.pronunciation}\n")
|
1797 |
+
FinalOutput += obj.origin + f" ({obj.text}) "
|
1798 |
+
Translated += obj.text + " "
|
1799 |
+
speech = gTTS(text=FinalOutput, lang=langdest[:2], slow="False")
|
1800 |
+
speech.save("CurrentSidebySideTTSFile.mp3")
|
1801 |
+
|
1802 |
+
FinalOutput = "Side by Side Version: " + FinalOutput
|
1803 |
+
|
1804 |
+
analysisPrompt = f"{ Translated } and \n\nFWFWO: \n{ translatedFWO.text } \n\nForeign Words Native Word Order and Foreign Word Order \nIf you had to make the notes on the word by word considerations to transform FWNWO to FWFWO what would that be? (A simple game idea where your response will be the rubrik to mark the players response against)"
|
1805 |
+
return FinalOutput, Translated, "FWFWO: \n" + translatedFWO.text, "CurrentSidebySideTTSFile.mp3", analysisPrompt
|
1806 |
|
1807 |
#https://huggingface.co/spaces/Geonmo/nllb-translation-demo/blob/main/app.py
|
1808 |
def nllbtranscload_models():
|
|
|
2110 |
gr.HTML('Memorisation by string comparison idea <br><br>Result of prompt chain starting with: Lets say I have the strings "red" and "ppalgan" how can I guess the second from the first from just spelling (eg. similar words and distance in the alphabet, ...), how can I use python to do this i.e. output of no matching letters, closest letter to r, then e, then d, a dictionary of letters that look similar eg. d and p, l and I a and d etc.')
|
2111 |
gr.Interface(fn=letterbased_guess_word, inputs=["text", "text"], outputs="text", description="letter based guess suggestions (one word to one word is the designed use case)")
|
2112 |
gr.HTML("Side by side reading creator (Google Translate) TODO - Roman output of Non roman characters")
|
2113 |
+
with gr.Group():
|
2114 |
+
with gr.Row():
|
2115 |
+
#gr.Interface(fn=w4wsidebysidereadergen, inputs=["text", w4wsidebysidelangdest], outputs=["text", "text", "text"], description="Side by side reading creator")
|
2116 |
+
w4wsidebysideinput = gr.Text(label="Paste Text you want to learn here (wordlists and UDHR can be used as )", placeholder="Enter Text Here. One or Two Paragraphs at the longest ideally")
|
2117 |
+
with gr.Row():
|
2118 |
+
w4wsidebysidelangdest = gr.Dropdown(choices=langdropdown_choices, label="Choose destination language", value="de: german")
|
2119 |
+
#w4wsidebysideaudiosidebyside = gr.Checkbox(label="Audio for side by side")
|
2120 |
+
w4wsidebysidebtn = gr.Button("Create Side by Side, FWNWO, and FWFWO (Just read start to finish of each till you can read the FWFWO without assistance)")
|
2121 |
+
with gr.Row():
|
2122 |
+
w4wsidebysideOutput = gr.Text(label="Side by side", placeholder="Side by side will display here")
|
2123 |
+
w4wsidebysideFWNWOOutput = gr.Text(label="Foreign Word Native Word Order", placeholder="FW NWO will display here")
|
2124 |
+
w4wsidebysideFWFWOOutput = gr.Text(label="Foreign Word Foreign Word Order", placeholder="FW FWO will display here")
|
2125 |
+
with gr.Row():
|
2126 |
+
w4wsidebysideaudioOutput = gr.Audio(sources=["upload"], label="Side by Side in Audio form")
|
2127 |
+
with gr.Row():
|
2128 |
+
w4wsidebysideAnalysisPromptOutput = gr.Text(label="Prompt for LLM analysis", placeholder="Prompt for LLM analysis will display here")
|
2129 |
+
w4wsidebysidebtn.click(fn=w4wsidebysidereadergen, inputs=[w4wsidebysideinput, w4wsidebysidelangdest], outputs=[w4wsidebysideOutput, w4wsidebysideFWNWOOutput, w4wsidebysideFWFWOOutput, w4wsidebysideaudioOutput, w4wsidebysideAnalysisPromptOutput])
|
2130 |
+
with gr.Row():
|
2131 |
+
gr.HTML("To be Added")
|
2132 |
+
with gr.Row():
|
2133 |
+
w4wsidebysideNatSentStructOutput = gr.Text(label="Native Closed class words as Native Sentence Structure", placeholder="Comparison is valuable")
|
2134 |
+
w4wsidebysideForSentStructOutput = gr.Text(label="Foreign Closed class words as Foreign Sentence Structure", placeholder="Comparison is valuable")
|
2135 |
+
with gr.Row():
|
2136 |
+
w4wsidebysideWordCountOutput = gr.Text(label="Word Count", placeholder="Word Count will display here")
|
2137 |
+
w4wsidebysideRandomisedOutput = gr.Text(label="Random Order As Test", placeholder="Randomised version (Transition Tab most left bottom) will display here")
|
2138 |
+
w4wsidebysideQueGenOutput = gr.Text(label="Questions generation as comprehension test", placeholder="Questions generated will display here")
|
2139 |
+
w4wsidebysideUNWFWOOutput = gr.Text(label="HTML as UNWFWO assistant", placeholder="HTML as UNWFWO assistant download will display here")
|
2140 |
gr.HTML("Side by side reading creator (NLLB-600M (+-3gb / setting has 400 tokens? as max length for inference optimisation?)- 200 languages vs 107 in googletrans) - <a href='https://github.com/facebookresearch/fairseq/tree/nllb'> -- Fairseq Github -- </a> | inspired by - <a href='https://huggingface.co/spaces/Geonmo/nllb-translation-demo'> -- Geonmo NLLB Demo -- </a> | <a href='https://huggingface.co/spaces/vutuka/nllb-vutuka-translation'> -- Vutuka demo -- </a>")
|
2141 |
with gr.Group():
|
2142 |
gr.HTML("Under Construction - generator and cpu based to beat gpu cost, cpu wait time and network dependency for local use")
|