Spaces:

PawinC
/

GemmaFinetunes1

Sleeping

App Files Files Community

PawinC commited on Apr 30, 2024

Commit

43bf3d6

verified ·

1 Parent(s): 2ba1842

Reduce number of models to 2

Browse files

Files changed (1) hide show

app/main.py +22 -22

app/main.py CHANGED Viewed

@@ -15,7 +15,7 @@ from typing import Optional
 print("Loading model...")
 SAllm = Llama(model_path="/models/final-gemma2b_SA-Q8_0.gguf", use_mmap=False, use_mlock=True)
 FIllm = Llama(model_path="/models/final-gemma7b_FI-Q8_0.gguf", use_mmap=False, use_mlock=True)
-WIllm = Llama(model_path="/models/final-GemmaWild7b-Q8_0.gguf", use_mmap=False, use_mlock=True)
       # n_gpu_layers=28, # Uncomment to use GPU acceleration
       # seed=1337, # Uncomment to set a specific seed
       # n_ctx=2048, # Uncomment to increase the context window
@@ -45,7 +45,7 @@ def check_sentiment(text):
 print("Testing model...")
 assert "positive" in check_sentiment("ดอกไม้ร้านนี้สวยจัง")
 assert ask_llm(FIllm, "Hello!, How are you today?", max_new_tokens=1) #Just checking that it can run
-assert ask_llm(WIllm, "Hello!, How are you today?", max_new_tokens=1) #Just checking that it can run
 print("Ready.")
@@ -127,23 +127,23 @@ async def ask_gemmaFinanceTH(
     return HTTPException(400, QuestionResponse(code=400, answer="Request argument 'prompt' not provided."))
-@app.post('/questions/open-ended')
-async def ask_gemmaWild(
-    prompt: str = Body(..., embed=True, example="Why is ice cream so delicious?"),
-    temperature: float = Body(0.5, embed=True),
-    max_new_tokens: int = Body(200, embed=True)
-) -> QuestionResponse:
-  """
-  Ask a finetuned Gemma an open-ended question..
-  NOTICE: IT MAY PRODUCE RANDOM/INACCURATE ANSWERS. PLEASE SEEK PROFESSIONAL ADVICE BEFORE DOING ANYTHING SERIOUS.
-  """
-  if prompt:
-    try:
-      print(f'Asking GemmaWild with the question "{prompt}"')
-      result = ask_llm(WIllm, prompt, max_new_tokens=max_new_tokens, temperature=temperature)
-      print(f"Result: {result}")
-      return QuestionResponse(answer=result, question=prompt, config={"temperature": temperature, "max_new_tokens": max_new_tokens})
-    except Exception as e:
-      return HTTPException(500, QuestionResponse(code=500, answer=str(e), question=prompt))
-  else:
-    return HTTPException(400, QuestionResponse(code=400, answer="Request argument 'prompt' not provided."))

 print("Loading model...")
 SAllm = Llama(model_path="/models/final-gemma2b_SA-Q8_0.gguf", use_mmap=False, use_mlock=True)
 FIllm = Llama(model_path="/models/final-gemma7b_FI-Q8_0.gguf", use_mmap=False, use_mlock=True)
+# WIllm = Llama(model_path="/models/final-GemmaWild7b-Q8_0.gguf", use_mmap=False, use_mlock=True)
       # n_gpu_layers=28, # Uncomment to use GPU acceleration
       # seed=1337, # Uncomment to set a specific seed
       # n_ctx=2048, # Uncomment to increase the context window
 print("Testing model...")
 assert "positive" in check_sentiment("ดอกไม้ร้านนี้สวยจัง")
 assert ask_llm(FIllm, "Hello!, How are you today?", max_new_tokens=1) #Just checking that it can run
+# assert ask_llm(WIllm, "Hello!, How are you today?", max_new_tokens=1) #Just checking that it can run
 print("Ready.")
     return HTTPException(400, QuestionResponse(code=400, answer="Request argument 'prompt' not provided."))
+# @app.post('/questions/open-ended')
+# async def ask_gemmaWild(
+#     prompt: str = Body(..., embed=True, example="Why is ice cream so delicious?"),
+#     temperature: float = Body(0.5, embed=True),
+#     max_new_tokens: int = Body(200, embed=True)
+# ) -> QuestionResponse:
+#   """
+#   Ask a finetuned Gemma an open-ended question..
+#   NOTICE: IT MAY PRODUCE RANDOM/INACCURATE ANSWERS. PLEASE SEEK PROFESSIONAL ADVICE BEFORE DOING ANYTHING SERIOUS.
+#   """
+#   if prompt:
+#     try:
+#       print(f'Asking GemmaWild with the question "{prompt}"')
+#       result = ask_llm(WIllm, prompt, max_new_tokens=max_new_tokens, temperature=temperature)
+#       print(f"Result: {result}")
+#       return QuestionResponse(answer=result, question=prompt, config={"temperature": temperature, "max_new_tokens": max_new_tokens})
+#     except Exception as e:
+#       return HTTPException(500, QuestionResponse(code=500, answer=str(e), question=prompt))
+#   else:
+#     return HTTPException(400, QuestionResponse(code=400, answer="Request argument 'prompt' not provided."))