Spaces:

PawinC
/

GemmaFinetunes1

Sleeping

App Files Files Community

PawinC commited on Apr 23

Commit

8b750c3

•

1 Parent(s): 0c228e3

Update app/main.py

Browse files

Files changed (1) hide show

app/main.py +9 -4

app/main.py CHANGED Viewed

@@ -11,13 +11,13 @@ from enum import Enum
 from typing import Optional
 print("Loading model...")
-SAllm = Llama(model_path="/models/final-gemma2b_SA-Q8_0.gguf")#,
       # n_gpu_layers=28, # Uncomment to use GPU acceleration
       # seed=1337, # Uncomment to set a specific seed
       # n_ctx=2048, # Uncomment to increase the context window
 #)
-# FIllm = Llama(model_path="/models/final-gemma2b_FI-Q8_0.gguf")
 # def ask(question, max_new_tokens=200):
 #   output = llm(
@@ -32,6 +32,11 @@ SAllm = Llama(model_path="/models/final-gemma2b_SA-Q8_0.gguf")#,
 def extract_restext(response):
   return response['choices'][0]['text'].strip()
 def check_sentiment(text):
   prompt = f'Analyze the sentiment of the tweet enclosed in square brackets, determine if it is positive or negative, and return the answer as the corresponding sentiment label "positive" or  "negative" [{text}] ='
   response = SAllm(prompt, max_tokens=3, stop=["\n"], echo=False, temperature=0.5)
@@ -47,6 +52,7 @@ def check_sentiment(text):
 print("Testing model...")
 assert "positive" in check_sentiment("ดอกไม้ร้านนี้สวยจัง")
 print("Ready.")
 app = FastAPI(
@@ -117,8 +123,7 @@ def ask_gemmaFinanceTH(
   if prompt:
     try:
       print(f'Asking FI with the question "{prompt}"')
-      prompt = f"""###User: {prompt}\n###Assistant:"""
-      result = extract_restext(FIllm(prompt, max_tokens=max_new_tokens, temperature=temperature, stop=["###User:", "###Assistant:"], echo=False))
       print(f"Result: {result}")
       return FI_Response(answer=result, question=prompt, config={"temperature": temperature, "max_new_tokens": max_new_tokens})
     except Exception as e:

 from typing import Optional
 print("Loading model...")
+SAllm = Llama(model_path="/models/final-gemma2b_SA-Q8_0.gguf", mmap=False, mlock=True)
       # n_gpu_layers=28, # Uncomment to use GPU acceleration
       # seed=1337, # Uncomment to set a specific seed
       # n_ctx=2048, # Uncomment to increase the context window
 #)
+FIllm = Llama(model_path="/models/final-gemma7b_FI-Q8_0.gguf", mmap=False, mlock=True)
 # def ask(question, max_new_tokens=200):
 #   output = llm(
 def extract_restext(response):
   return response['choices'][0]['text'].strip()
+def ask_fi(question, max_new_tokens=200, temperature=0.5):
+  prompt = f"""###User: {question}\n###Assistant:"""
+  result = extract_restext(FIllm(prompt, max_tokens=max_new_tokens, temperature=temperature, stop=["###User:", "###Assistant:"], echo=False))
+  return result
 def check_sentiment(text):
   prompt = f'Analyze the sentiment of the tweet enclosed in square brackets, determine if it is positive or negative, and return the answer as the corresponding sentiment label "positive" or  "negative" [{text}] ='
   response = SAllm(prompt, max_tokens=3, stop=["\n"], echo=False, temperature=0.5)
 print("Testing model...")
 assert "positive" in check_sentiment("ดอกไม้ร้านนี้สวยจัง")
+assert ask_fi("Hello!, How are you today?")
 print("Ready.")
 app = FastAPI(
   if prompt:
     try:
       print(f'Asking FI with the question "{prompt}"')
+      result = ask_fi(prompt, max_new_tokens=max_new_tokens, temperature=temperature)
       print(f"Result: {result}")
       return FI_Response(answer=result, question=prompt, config={"temperature": temperature, "max_new_tokens": max_new_tokens})
     except Exception as e: