asv7j commited on
Commit
867ef01
·
verified ·
1 Parent(s): 29149f6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -0
app.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ import time
3
+
4
+
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer
6
+ device = "cpu" # the device to load the model onto
7
+
8
+ model = AutoModelForCausalLM.from_pretrained(
9
+ "Qwen/Qwen2-0.5B-Instruct",
10
+ torch_dtype="auto",
11
+ device_map="auto"
12
+ )
13
+ tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
14
+
15
+ app = FastAPI()
16
+
17
+ @app.get("/")
18
+ async def read_root():
19
+ return {"Hello": "World!"}
20
+ start_time = time.time()
21
+ messages = [
22
+ {"role": "system", "content": "You are a helpful assistant, Sia. You are developed by Sushma. You will response in polity, clear, brief and in short length."},
23
+ {"role": "user", "content": "Who are you?"},
24
+ {"role": "assistant", "content": "I am Sia, a small language model created by Sushma. I am here to assist you. How can I help you today?"},
25
+ {"role": "user", "content": "Hi, How are you?"}
26
+ ]
27
+ text = tokenizer.apply_chat_template(
28
+ messages,
29
+ tokenize=False,
30
+ add_generation_prompt=True
31
+ )
32
+ model_inputs = tokenizer([text], return_tensors="pt").to(device)
33
+
34
+ generated_ids = model.generate(
35
+ model_inputs.input_ids,
36
+ max_new_tokens=64
37
+ )
38
+ generated_ids = [
39
+ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
40
+ ]
41
+
42
+ response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
43
+ print(response)
44
+ end_time = time.time()
45
+ time_taken = end_time - start_time
46
+ print(time_taken)
47
+
48
+
49
+ @app.get("/test")
50
+ async def read_droot():
51
+ messages = [
52
+ {"role": "system", "content": "You are a helpful assistant, Sia. You are developed by Sushma. You will response in polity, clear, brief and in short length."},
53
+ {"role": "user", "content": "Who are you?"},
54
+ {"role": "assistant", "content": "I am Sia, a small language model created by Sushma. I am here to assist you. How can I help you today?"},
55
+ {"role": "user", "content": "Hi, How are you?"}
56
+ ]
57
+ text = tokenizer.apply_chat_template(
58
+ messages,
59
+ tokenize=False,
60
+ add_generation_prompt=True
61
+ )
62
+ model_inputs = tokenizer([text], return_tensors="pt").to(device)
63
+
64
+ generated_ids = model.generate(
65
+ model_inputs.input_ids,
66
+ max_new_tokens=64
67
+ )
68
+ generated_ids = [
69
+ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
70
+ ]
71
+
72
+ response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
73
+ print(response)
74
+ end_time = time.time()
75
+ time_taken = end_time - start_time
76
+ print(time_taken)
77
+ return {"Hello": "World!"}