Spaces:

imperialwool
/

llama-cpp-api

Sleeping

App Files Files Community

toaster61 commited on Oct 5, 2023

Commit

43a82b2

•

1 Parent(s): a098627

add fix (maybe) + adding new languages for translator

Browse files

Files changed (2) hide show

Dockerfile +1 -3
gradio_app.py +18 -6

Dockerfile CHANGED Viewed

@@ -19,10 +19,8 @@ RUN mkdir translator
 RUN chmod -R 777 translator
 # Installing wget and downloading model.
-#RUN apt install wget -y
-#RUN wget -q -O model.bin https://huggingface.co/TheBloke/WizardLM-1.0-Uncensored-Llama2-13B-GGUF/resolve/main/wizardlm-1.0-uncensored-llama2-13b.Q5_K_M.gguf
-#RUN ls
 ADD https://huggingface.co/TheBloke/WizardLM-1.0-Uncensored-Llama2-13B-GGUF/resolve/main/wizardlm-1.0-uncensored-llama2-13b.Q5_K_M.gguf /app/model.bin
 # You can use other models! Or u can comment this two RUNs and include in Space/repo/Docker image own model with name "model.bin".
 # Updating pip and installing everything from requirements

 RUN chmod -R 777 translator
 # Installing wget and downloading model.
 ADD https://huggingface.co/TheBloke/WizardLM-1.0-Uncensored-Llama2-13B-GGUF/resolve/main/wizardlm-1.0-uncensored-llama2-13b.Q5_K_M.gguf /app/model.bin
+RUN chmod -R 777 /app/model.bin
 # You can use other models! Or u can comment this two RUNs and include in Space/repo/Docker image own model with name "model.bin".
 # Updating pip and installing everything from requirements

gradio_app.py CHANGED Viewed

@@ -23,8 +23,7 @@ print("! INITING DONE !")
 # Preparing things to work
 translator_tokenizer.src_lang = "en"
 title = "llama.cpp API"
-desc = '''<style>a:visited{color:black;}</style>
-<h1>Hello, world!</h1>
 This is showcase how to make own server with Llama2 model.<br>
 I'm using here 7b model just for example. Also here's only CPU power.<br>
 But you can use GPU power as well!<br>
@@ -37,6 +36,21 @@ Or you can once follow steps in Dockerfile and try it on your machine, not in Do
 <br>''' + f"Memory used: {psutil.virtual_memory()[2]}<br>" + '''
 <script>document.write("<b>URL of space:</b> "+window.location.href);</script>'''
 # Loading prompt
 with open('system.prompt', 'r', encoding='utf-8') as f:
     prompt = f.read()
@@ -54,9 +68,7 @@ def generate_answer(request: str, max_tokens: int = 256, language: str = "en", c
     try:
         output = llm(userPrompt, max_tokens=maxTokens, stop=["User:"], echo=False)
         text = output["choices"][0]["text"]
-        # i allowed only certain languages (its not discrimination, its just other popular language on my opinion!!!):
-        # russian (ru), ukranian (uk), chinese (zh)
-        if language in ["ru", "uk", "zh"]:
             encoded_input = translator_tokenizer(text, return_tensors="pt")
             generated_tokens = translator_model.generate(
                 **encoded_input, forced_bos_token_id=translator_tokenizer.get_lang_id(language)
@@ -76,7 +88,7 @@ demo = gr.Interface(
     inputs=[
         gr.components.Textbox(label="Input"),
         gr.components.Number(value=256),
-        gr.components.Dropdown(label="Target Language", value="en", choices=["en", "ru", "uk", "zh"]),
         gr.components.Textbox(label="Custom system prompt"),
     ],
     outputs=["text"],

 # Preparing things to work
 translator_tokenizer.src_lang = "en"
 title = "llama.cpp API"
+desc = '''<h1>Hello, world!</h1>
 This is showcase how to make own server with Llama2 model.<br>
 I'm using here 7b model just for example. Also here's only CPU power.<br>
 But you can use GPU power as well!<br>
 <br>''' + f"Memory used: {psutil.virtual_memory()[2]}<br>" + '''
 <script>document.write("<b>URL of space:</b> "+window.location.href);</script>'''
+'''
+    # Defining languages for translator (i just chose popular on my opinion languages!!!)
+    ru - Russian
+    uk - Ukranian
+    zh - Chinese
+    de - German
+    fr - French
+    hi - Hindi
+    it - Italian
+    ja - Japanese
+    es - Spanish
+    ar - Arabic
+'''
+languages = ["ru", "uk", "zh", "de", "fr", "hi", "it", "ja", "es", "ar"]
 # Loading prompt
 with open('system.prompt', 'r', encoding='utf-8') as f:
     prompt = f.read()
     try:
         output = llm(userPrompt, max_tokens=maxTokens, stop=["User:"], echo=False)
         text = output["choices"][0]["text"]
+        if language in languages:
             encoded_input = translator_tokenizer(text, return_tensors="pt")
             generated_tokens = translator_model.generate(
                 **encoded_input, forced_bos_token_id=translator_tokenizer.get_lang_id(language)
     inputs=[
         gr.components.Textbox(label="Input"),
         gr.components.Number(value=256),
+        gr.components.Dropdown(label="Target Language", value="en", choices=["en"]+languages),
         gr.components.Textbox(label="Custom system prompt"),
     ],
     outputs=["text"],