toaster61 commited on
Commit
c43baac
1 Parent(s): fb62a18
Files changed (2) hide show
  1. Dockerfile +1 -1
  2. gradio_app.py +5 -4
Dockerfile CHANGED
@@ -19,7 +19,7 @@ RUN mkdir translator
19
  RUN chmod -R 777 translator
20
 
21
  # Installing wget and downloading model.
22
- ADD https://huggingface.co/TheBloke/WizardLM-1.0-Uncensored-Llama2-13B-GGUF/resolve/main/wizardlm-1.0-uncensored-llama2-13b.Q6_K.gguf /app/model.bin
23
  RUN chmod -R 777 /app/model.bin
24
  # You can use other models! Or u can comment this two RUNs and include in Space/repo/Docker image own model with name "model.bin".
25
 
 
19
  RUN chmod -R 777 translator
20
 
21
  # Installing wget and downloading model.
22
+ ADD https://huggingface.co/TheBloke/dolphin-2.2.1-AshhLimaRP-Mistral-7B-GGUF/resolve/main/dolphin-2.2.1-ashhlimarp-mistral-7b.Q5_0.gguf /app/model.bin
23
  RUN chmod -R 777 /app/model.bin
24
  # You can use other models! Or u can comment this two RUNs and include in Space/repo/Docker image own model with name "model.bin".
25
 
gradio_app.py CHANGED
@@ -17,7 +17,7 @@ print("! SETTING MODEL IN EVALUATION MODE !")
17
  translator_model.eval()
18
  print("! INITING LLAMA MODEL !")
19
  llm = Llama(model_path="./model.bin") # LLaMa model
20
- llama_model_name = "TheBloke/WizardLM-1.0-Uncensored-Llama2-13B-GGUF"
21
  print("! INITING DONE !")
22
 
23
  # Preparing things to work
@@ -25,7 +25,7 @@ translator_tokenizer.src_lang = "en"
25
  title = "llama.cpp API"
26
  desc = '''<h1>Hello, world!</h1>
27
  This is showcase how to make own server with Llama2 model.<br>
28
- I'm using here 13b model just for example. Also here's only CPU power.<br>
29
  But you can use GPU power as well!<br><br>
30
  <h1>How to GPU?</h1>
31
  Change <code>`CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS`</code> in Dockerfile on <code>`CMAKE_ARGS="-DLLAMA_CUBLAS=on"`</code>. Also you can try <code>`DLLAMA_CLBLAST`</code> or <code>`DLLAMA_METAL`</code>.<br><br>
@@ -69,7 +69,7 @@ def generate_answer(request: str, max_tokens: int = 256, language: str = "en", c
69
  try:
70
  # this shitty fix will be until i willnt figure out why sometimes there is empty output
71
  counter = 1
72
- while True:
73
  logs += f"Attempt {counter} to generate answer...\n"
74
  output = llm(userPrompt, max_tokens=maxTokens, stop=["User:"], echo=False)
75
  text = output["choices"][0]["text"]
@@ -77,7 +77,8 @@ def generate_answer(request: str, max_tokens: int = 256, language: str = "en", c
77
  break
78
  counter += 1
79
  logs += f"Final attempt: {counter}\n"
80
-
 
81
 
82
  if language in languages and language != "en":
83
  logs += f"\nTranslating from en to {language}"
 
17
  translator_model.eval()
18
  print("! INITING LLAMA MODEL !")
19
  llm = Llama(model_path="./model.bin") # LLaMa model
20
+ llama_model_name = "TheBloke/dolphin-2.2.1-AshhLimaRP-Mistral-7B-GGUF"
21
  print("! INITING DONE !")
22
 
23
  # Preparing things to work
 
25
  title = "llama.cpp API"
26
  desc = '''<h1>Hello, world!</h1>
27
  This is showcase how to make own server with Llama2 model.<br>
28
+ I'm using here 7b model just for example. Also here's only CPU power.<br>
29
  But you can use GPU power as well!<br><br>
30
  <h1>How to GPU?</h1>
31
  Change <code>`CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS`</code> in Dockerfile on <code>`CMAKE_ARGS="-DLLAMA_CUBLAS=on"`</code>. Also you can try <code>`DLLAMA_CLBLAST`</code> or <code>`DLLAMA_METAL`</code>.<br><br>
 
69
  try:
70
  # this shitty fix will be until i willnt figure out why sometimes there is empty output
71
  counter = 1
72
+ while counter <= 10:
73
  logs += f"Attempt {counter} to generate answer...\n"
74
  output = llm(userPrompt, max_tokens=maxTokens, stop=["User:"], echo=False)
75
  text = output["choices"][0]["text"]
 
77
  break
78
  counter += 1
79
  logs += f"Final attempt: {counter}\n"
80
+ if len(text.strip()) > 1 and text.strip() not in ['', None, ' ']:
81
+ text = "Sorry, but something went wrong while generating answer. Try again or fix code. If you are maintainer of this space, look into logs."
82
 
83
  if language in languages and language != "en":
84
  logs += f"\nTranslating from en to {language}"