Spaces:

PawinC
/

GemmaFinetunes1

Sleeping

PawinC commited on Apr 22

Commit

0c228e3

•

1 Parent(s): befe899

Upload main.py

Files changed (1) hide show

app/main.py CHANGED Viewed

@@ -2,10 +2,9 @@
 # coding: utf-8
 from os import listdir
 from os.path import isdir
-from fastapi import FastAPI, HTTPException, Request, responses
 from fastapi.middleware.cors import CORSMiddleware
 from llama_cpp import Llama
-from fastapi import Body
 from pydantic import BaseModel
 from enum import Enum
@@ -18,7 +17,7 @@ SAllm = Llama(model_path="/models/final-gemma2b_SA-Q8_0.gguf")#,
       # n_ctx=2048, # Uncomment to increase the context window
 #)
-FIllm = Llama(model_path="/models/final-gemma2b_FI-Q8_0.gguf")
 # def ask(question, max_new_tokens=200):
 #   output = llm(
@@ -108,8 +107,8 @@ def perform_sentiment_analysis(prompt: str = Body(..., embed=True, example="I li
 @app.post('/FI')
 def ask_gemmaFinanceTH(
     prompt: str = Body(..., embed=True, example="What's the best way to invest my money"),
-    temperature: float = 0.5,
-    max_new_tokens: int = 200
 ) -> FI_Response:
   """
   Ask a finetuned Gemma a finance-related question, just for fun.
@@ -118,9 +117,10 @@ def ask_gemmaFinanceTH(
   if prompt:
     try:
       print(f'Asking FI with the question "{prompt}"')
       result = extract_restext(FIllm(prompt, max_tokens=max_new_tokens, temperature=temperature, stop=["###User:", "###Assistant:"], echo=False))
       print(f"Result: {result}")
-      return FI_Response(answer=result, question=prompt)
     except Exception as e:
       return HTTPException(500, FI_Response(code=500, answer=str(e), question=prompt))
   else:

 # coding: utf-8
 from os import listdir
 from os.path import isdir
+from fastapi import FastAPI, HTTPException, Request, responses, Body
 from fastapi.middleware.cors import CORSMiddleware
 from llama_cpp import Llama
 from pydantic import BaseModel
 from enum import Enum
       # n_ctx=2048, # Uncomment to increase the context window
 #)
+# FIllm = Llama(model_path="/models/final-gemma2b_FI-Q8_0.gguf")
 # def ask(question, max_new_tokens=200):
 #   output = llm(
 @app.post('/FI')
 def ask_gemmaFinanceTH(
     prompt: str = Body(..., embed=True, example="What's the best way to invest my money"),
+    temperature: float = Body(0.5, embed=True),
+    max_new_tokens: int = Body(200, embed=True)
 ) -> FI_Response:
   """
   Ask a finetuned Gemma a finance-related question, just for fun.
   if prompt:
     try:
       print(f'Asking FI with the question "{prompt}"')
+      prompt = f"""###User: {prompt}\n###Assistant:"""
       result = extract_restext(FIllm(prompt, max_tokens=max_new_tokens, temperature=temperature, stop=["###User:", "###Assistant:"], echo=False))
       print(f"Result: {result}")
+      return FI_Response(answer=result, question=prompt, config={"temperature": temperature, "max_new_tokens": max_new_tokens})
     except Exception as e:
       return HTTPException(500, FI_Response(code=500, answer=str(e), question=prompt))
   else: