Spaces:
Building
Building
Update app.py
Browse files
app.py
CHANGED
@@ -1,19 +1,32 @@
|
|
1 |
from fastapi import FastAPI
|
2 |
import torch
|
3 |
-
|
|
|
4 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
5 |
device = "cpu"
|
6 |
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
-
|
10 |
-
"
|
11 |
-
|
|
|
12 |
)
|
13 |
|
14 |
-
|
15 |
-
"
|
16 |
-
|
|
|
17 |
)
|
18 |
|
19 |
app = FastAPI()
|
@@ -22,7 +35,7 @@ app = FastAPI()
|
|
22 |
async def read_root():
|
23 |
return {"Hello": "World!"}
|
24 |
|
25 |
-
def
|
26 |
messages = [
|
27 |
{"role": "system", "content": "You are a helpful assistant, Sia, developed by Sushma. You will response in polity and brief."},
|
28 |
{"role": "user", "content": "Who are you?"},
|
@@ -34,20 +47,16 @@ def modelResp(prompt):
|
|
34 |
tokenize=False,
|
35 |
add_generation_prompt=True
|
36 |
)
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
do_sample=True
|
42 |
)
|
43 |
-
|
44 |
-
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
|
45 |
-
]
|
46 |
-
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
47 |
|
48 |
return response
|
49 |
|
50 |
-
def
|
51 |
messages = [
|
52 |
{"role": "system", "content": "You are a helpful assistant, Sia, developed by Sushma. You will response in polity and brief."},
|
53 |
{"role": "user", "content": "Who are you?"},
|
@@ -59,27 +68,50 @@ def modelResp1(prompt):
|
|
59 |
tokenize=False,
|
60 |
add_generation_prompt=True
|
61 |
)
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
do_sample=True
|
67 |
)
|
68 |
-
|
69 |
-
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
|
70 |
-
]
|
71 |
-
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
72 |
|
73 |
return response
|
74 |
|
75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
async def modelApi(data: dict):
|
77 |
prompt = data.get("prompt")
|
78 |
-
response =
|
79 |
return response
|
80 |
|
81 |
-
@app.post("/
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
async def modelApi1(data: dict):
|
83 |
prompt = data.get("prompt")
|
84 |
-
response =
|
85 |
return response
|
|
|
1 |
from fastapi import FastAPI
|
2 |
import torch
|
3 |
+
import os
|
4 |
+
from llama_cpp import Llama
|
5 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
6 |
device = "cpu"
|
7 |
|
8 |
+
access_token = os.getenv("access_token")
|
9 |
+
|
10 |
+
tokenizer1 = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
|
11 |
+
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b-it", token=access_token)
|
12 |
+
tokenizer3 = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
|
13 |
+
|
14 |
+
llm1 = Llama.from_pretrained(
|
15 |
+
repo_id="Qwen/Qwen2-1.5B-Instruct-GGUF",
|
16 |
+
filename="*q8_0.gguf",
|
17 |
+
verbose=False
|
18 |
+
)
|
19 |
|
20 |
+
llm2 = Llama.from_pretrained(
|
21 |
+
repo_id="NexaAIDev/gemma-2-2b-it-GGUF",
|
22 |
+
filename="*q4_K_S.gguf",
|
23 |
+
verbose=False
|
24 |
)
|
25 |
|
26 |
+
llm3 = Llama.from_pretrained(
|
27 |
+
repo_id="microsoft/Phi-3-mini-4k-instruct-gguf",
|
28 |
+
filename="*q4.gguf",
|
29 |
+
verbose=False
|
30 |
)
|
31 |
|
32 |
app = FastAPI()
|
|
|
35 |
async def read_root():
|
36 |
return {"Hello": "World!"}
|
37 |
|
38 |
+
def modelResp1(prompt):
|
39 |
messages = [
|
40 |
{"role": "system", "content": "You are a helpful assistant, Sia, developed by Sushma. You will response in polity and brief."},
|
41 |
{"role": "user", "content": "Who are you?"},
|
|
|
47 |
tokenize=False,
|
48 |
add_generation_prompt=True
|
49 |
)
|
50 |
+
output = llm1(
|
51 |
+
text,
|
52 |
+
max_tokens=64, # Generate up to 256 tokens
|
53 |
+
echo=False, # Whether to echo the prompt
|
|
|
54 |
)
|
55 |
+
response = output['choices'][0]['text']
|
|
|
|
|
|
|
56 |
|
57 |
return response
|
58 |
|
59 |
+
def modelResp2(prompt):
|
60 |
messages = [
|
61 |
{"role": "system", "content": "You are a helpful assistant, Sia, developed by Sushma. You will response in polity and brief."},
|
62 |
{"role": "user", "content": "Who are you?"},
|
|
|
68 |
tokenize=False,
|
69 |
add_generation_prompt=True
|
70 |
)
|
71 |
+
output = llm2(
|
72 |
+
text,
|
73 |
+
max_tokens=64, # Generate up to 256 tokens
|
74 |
+
echo=False, # Whether to echo the prompt
|
|
|
75 |
)
|
76 |
+
response = output['choices'][0]['text']
|
|
|
|
|
|
|
77 |
|
78 |
return response
|
79 |
|
80 |
+
def modelResp3(prompt):
|
81 |
+
messages = [
|
82 |
+
{"role": "system", "content": "You are a helpful assistant, Sia, developed by Sushma. You will response in polity and brief."},
|
83 |
+
{"role": "user", "content": "Who are you?"},
|
84 |
+
{"role": "assistant", "content": "I am Sia, a small language model created by Sushma."},
|
85 |
+
{"role": "user", "content": f"{prompt}"}
|
86 |
+
]
|
87 |
+
text = tokenizer.apply_chat_template(
|
88 |
+
messages,
|
89 |
+
tokenize=False,
|
90 |
+
add_generation_prompt=True
|
91 |
+
)
|
92 |
+
output = llm2(
|
93 |
+
text,
|
94 |
+
max_tokens=64, # Generate up to 256 tokens
|
95 |
+
echo=False, # Whether to echo the prompt
|
96 |
+
)
|
97 |
+
response = output['choices'][0]['text']
|
98 |
+
|
99 |
+
return response
|
100 |
+
|
101 |
+
@app.post("/modelapi1")
|
102 |
async def modelApi(data: dict):
|
103 |
prompt = data.get("prompt")
|
104 |
+
response = modelResp1(prompt)
|
105 |
return response
|
106 |
|
107 |
+
@app.post("/modelapi2")
|
108 |
+
async def modelApi(data: dict):
|
109 |
+
prompt = data.get("prompt")
|
110 |
+
response = modelResp2(prompt)
|
111 |
+
return response
|
112 |
+
|
113 |
+
@app.post("/modelapi3")
|
114 |
async def modelApi1(data: dict):
|
115 |
prompt = data.get("prompt")
|
116 |
+
response = modelResp3(prompt)
|
117 |
return response
|