PawinC's picture
Reduce number of models to 2
43bf3d6 verified
raw
history blame
No virus
5.71 kB
#!/usr/bin/env python
# coding: utf-8
from os import listdir
from os.path import isdir
from fastapi import FastAPI, HTTPException, Request, responses, Body
from fastapi.middleware.cors import CORSMiddleware
from llama_cpp import Llama
from pydantic import BaseModel
from enum import Enum
from typing import Optional
# MODEL LOADING, FUNCTIONS, AND TESTING
print("Loading model...")
SAllm = Llama(model_path="/models/final-gemma2b_SA-Q8_0.gguf", use_mmap=False, use_mlock=True)
FIllm = Llama(model_path="/models/final-gemma7b_FI-Q8_0.gguf", use_mmap=False, use_mlock=True)
# WIllm = Llama(model_path="/models/final-GemmaWild7b-Q8_0.gguf", use_mmap=False, use_mlock=True)
# n_gpu_layers=28, # Uncomment to use GPU acceleration
# seed=1337, # Uncomment to set a specific seed
# n_ctx=2048, # Uncomment to increase the context window
#)
def extract_restext(response):
return response['choices'][0]['text'].strip()
def ask_llm(llm, question, max_new_tokens=200, temperature=0.5):
prompt = f"""###User: {question}\n###Assistant:"""
result = extract_restext(llm(prompt, max_tokens=max_new_tokens, temperature=temperature, stop=["###User:", "###Assistant:"], echo=False))
return result
def check_sentiment(text):
prompt = f'Analyze the sentiment of the tweet enclosed in square brackets, determine if it is positive or negative, and return the answer as the corresponding sentiment label "positive" or "negative" [{text}] ='
response = SAllm(prompt, max_tokens=3, stop=["\n"], echo=False, temperature=0.5)
# print(response)
result = extract_restext(response)
if "positive" in result:
return "positive"
elif "negative" in result:
return "negative"
else:
return "unknown"
# TESTING THE MODEL
print("Testing model...")
assert "positive" in check_sentiment("ดอกไม้ร้านนี้สวยจัง")
assert ask_llm(FIllm, "Hello!, How are you today?", max_new_tokens=1) #Just checking that it can run
# assert ask_llm(WIllm, "Hello!, How are you today?", max_new_tokens=1) #Just checking that it can run
print("Ready.")
# START OF FASTAPI APP
app = FastAPI(
title = "Gemma Finetuned API",
description="Gemma Finetuned API for Sentiment Analysis and Finance Questions.",
version="1.0.0",
)
origins = ["*"]
app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"]
)
# API DATA CLASSES
class SA_Result(str, Enum):
positive = "positive"
negative = "negative"
unknown = "unknown"
class SAResponse(BaseModel):
code: int = 200
text: Optional[str] = None
result: SA_Result = None
class QuestionResponse(BaseModel):
code: int = 200
question: Optional[str] = None
answer: str = None
config: Optional[dict] = None
# API ROUTES
@app.get('/')
def docs():
"Redirects the user from the main page to the docs."
return responses.RedirectResponse('./docs')
@app.post('/classifications/sentiment')
async def perform_sentiment_analysis(prompt: str = Body(..., embed=True, example="I like eating fried chicken")) -> SAResponse:
"""Performs a sentiment analysis using a finetuned version of Gemma-7b"""
if prompt:
try:
print(f"Checking sentiment for {prompt}")
result = check_sentiment(prompt)
print(f"Result: {result}")
return SAResponse(result=result, text=prompt)
except Exception as e:
return HTTPException(500, SAResponse(code=500, result=str(e), text=prompt))
else:
return HTTPException(400, SAResponse(code=400, result="Request argument 'prompt' not provided."))
@app.post('/questions/finance')
async def ask_gemmaFinanceTH(
prompt: str = Body(..., embed=True, example="What's the best way to invest my money"),
temperature: float = Body(0.5, embed=True),
max_new_tokens: int = Body(200, embed=True)
) -> QuestionResponse:
"""
Ask a finetuned Gemma a finance-related question, just for fun.
NOTICE: IT MAY PRODUCE RANDOM/INACCURATE ANSWERS. PLEASE SEEK PROFESSIONAL ADVICE BEFORE DOING ANYTHING SERIOUS.
"""
if prompt:
try:
print(f'Asking GemmaFinance with the question "{prompt}"')
result = ask_llm(FIllm, prompt, max_new_tokens=max_new_tokens, temperature=temperature)
print(f"Result: {result}")
return QuestionResponse(answer=result, question=prompt, config={"temperature": temperature, "max_new_tokens": max_new_tokens})
except Exception as e:
return HTTPException(500, QuestionResponse(code=500, answer=str(e), question=prompt))
else:
return HTTPException(400, QuestionResponse(code=400, answer="Request argument 'prompt' not provided."))
# @app.post('/questions/open-ended')
# async def ask_gemmaWild(
# prompt: str = Body(..., embed=True, example="Why is ice cream so delicious?"),
# temperature: float = Body(0.5, embed=True),
# max_new_tokens: int = Body(200, embed=True)
# ) -> QuestionResponse:
# """
# Ask a finetuned Gemma an open-ended question..
# NOTICE: IT MAY PRODUCE RANDOM/INACCURATE ANSWERS. PLEASE SEEK PROFESSIONAL ADVICE BEFORE DOING ANYTHING SERIOUS.
# """
# if prompt:
# try:
# print(f'Asking GemmaWild with the question "{prompt}"')
# result = ask_llm(WIllm, prompt, max_new_tokens=max_new_tokens, temperature=temperature)
# print(f"Result: {result}")
# return QuestionResponse(answer=result, question=prompt, config={"temperature": temperature, "max_new_tokens": max_new_tokens})
# except Exception as e:
# return HTTPException(500, QuestionResponse(code=500, answer=str(e), question=prompt))
# else:
# return HTTPException(400, QuestionResponse(code=400, answer="Request argument 'prompt' not provided."))