Spaces:
Running
Running
from fastapi import FastAPI | |
import torch | |
import os | |
from llama_cpp import Llama | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import requests | |
device = "cpu" | |
access_token = os.getenv("access_token") | |
privateurl = os.getenv("privateurl") | |
tokenizer1 = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct") | |
tokenizer2 = AutoTokenizer.from_pretrained("google/gemma-2-2b-it", token=access_token) | |
tokenizer3 = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct") | |
llm1 = Llama.from_pretrained( | |
repo_id="Qwen/Qwen2-1.5B-Instruct-GGUF", | |
filename="*q8_0.gguf", | |
verbose=False | |
) | |
llm2 = Llama.from_pretrained( | |
repo_id="NexaAIDev/gemma-2-2b-it-GGUF", | |
filename="*q4_K_S.gguf", | |
verbose=False | |
) | |
llm3 = Llama.from_pretrained( | |
repo_id="microsoft/Phi-3-mini-4k-instruct-gguf", | |
filename="*q4.gguf", | |
verbose=False | |
) | |
app = FastAPI() | |
async def read_root(): | |
return {"Hello": "World!"} | |
def modelResp1(cookie, target, token, prompt): | |
messages = [ | |
{"role": "system", "content": "You are a helpful assistant, Sia, developed by Sushma. You will response in polity and brief."}, | |
{"role": "user", "content": "Who are you?"}, | |
{"role": "assistant", "content": "I am Sia, a small language model created by Sushma."}, | |
{"role": "user", "content": f"{prompt}"} | |
] | |
text = tokenizer1.apply_chat_template( | |
messages, | |
tokenize=False, | |
add_generation_prompt=True | |
) | |
output = llm1( | |
text, | |
max_tokens=64, # Generate up to 256 tokens | |
echo=False, # Whether to echo the prompt | |
) | |
response = output['choices'][0]['text'] | |
headers['Cookie'] = f"{cookie}" | |
payload['token'] = f"{token}" | |
payload['target'] = f"{target}" | |
payload['content'] = response | |
requests.post(privateurl, headers=headers, data=payload) | |
def modelResp2(prompt): | |
messages = [ | |
{"role": "user", "content": "Who are you?"}, | |
{"role": "assistant", "content": "I am Sia, a small language model created by Sushma."}, | |
{"role": "user", "content": f"{prompt}"} | |
] | |
text = tokenizer2.apply_chat_template( | |
messages, | |
tokenize=False, | |
add_generation_prompt=True | |
) | |
output = llm2( | |
text, | |
max_tokens=64, # Generate up to 256 tokens | |
echo=False, # Whether to echo the prompt | |
) | |
response = output['choices'][0]['text'] | |
return response | |
def modelResp3(prompt): | |
messages = [ | |
{"role": "system", "content": "You are a helpful assistant, Sia, developed by Sushma. You will response in polity and brief."}, | |
{"role": "user", "content": "Who are you?"}, | |
{"role": "assistant", "content": "I am Sia, a small language model created by Sushma."}, | |
{"role": "user", "content": f"{prompt}"} | |
] | |
text = tokenizer3.apply_chat_template( | |
messages, | |
tokenize=False, | |
add_generation_prompt=True | |
) | |
output = llm2( | |
text, | |
max_tokens=64, # Generate up to 256 tokens | |
echo=False, # Whether to echo the prompt | |
) | |
response = output['choices'][0]['text'] | |
return response | |
async def modelApi(data: dict): | |
target = data.get("target_id") | |
cookie = data.get("Cookie") | |
token = data.get("token") | |
prompt = data.get("prompt") | |
modelResp1(cookie, target, token, prompt) | |
return {"Hello": "World!"} | |
async def modelApi(data: dict): | |
prompt = data.get("prompt") | |
#response = modelResp2(prompt) | |
return {"Hello": "World!"} | |
async def modelApi1(data: dict): | |
prompt = data.get("prompt") | |
response = modelResp3(prompt) | |
return response | |
headers = { | |
'Accept': 'application/json, text/javascript, */*; q=0.01', | |
'Accept-Encoding': 'gzip, deflate, br', | |
'Accept-Language': 'en-US,en;q=0.9', | |
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', | |
'Cookie': '', | |
'Sec-Ch-Ua': '"Opera";v="95", "Chromium";v="109", "Not;A=Brand";v="24"', | |
'Sec-Ch-Ua-Mobile': '?0', | |
'Sec-Ch-Ua-Platform': '"Windows"', | |
'Sec-Fetch-Dest': 'empty', | |
'Sec-Fetch-Mode': 'cors', | |
'Sec-Fetch-Site': 'same-origin', | |
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 OPR/95.0.0.0', | |
'X-Requested-With': 'XMLHttpRequest' | |
} | |
payload = { | |
'target': '', | |
'content': '', | |
'token': '' | |
} |