asv7j commited on
Commit
f38c19f
·
verified ·
1 Parent(s): fca36f2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -7
app.py CHANGED
@@ -1,19 +1,32 @@
1
  from fastapi import FastAPI
2
  import time
 
3
  import torch
4
 
5
  from transformers import AutoModelForCausalLM, AutoTokenizer
6
  device = "cpu" # the device to load the model onto
7
-
8
  model = AutoModelForCausalLM.from_pretrained(
9
  "Qwen/Qwen2-0.5B-Instruct",
10
  torch_dtype="auto",
11
  device_map="auto"
12
  )
 
 
13
  tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
14
-
 
 
 
 
 
 
 
 
 
15
  app = FastAPI()
16
-
 
17
  @app.get("/")
18
  async def read_root():
19
  return {"Hello": "World!"}
@@ -22,25 +35,35 @@ messages = [
22
  {"role": "system", "content": "You are a helpful assistant, Sia, developed by Sushma. You will response in polity and brief."},
23
  {"role": "user", "content": "I'm Alok. Who are you?"},
24
  {"role": "assistant", "content": "I am Sia, a small language model created by Sushma."},
25
- {"role": "user", "content": "Hi, How are you?"}
26
  ]
 
27
  text = tokenizer.apply_chat_template(
28
  messages,
29
  tokenize=False,
30
  add_generation_prompt=True
31
  )
 
 
32
  model_inputs = tokenizer([text], return_tensors="pt").to(device)
33
-
 
34
  generated_ids = model.generate(
35
  model_inputs.input_ids,
36
  max_new_tokens=64
37
  )
 
 
38
  generated_ids = [
39
  output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
40
  ]
41
-
 
42
  response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
43
  print(response)
 
 
 
44
  end_time = time.time()
45
  time_taken = end_time - start_time
46
  print(time_taken)
@@ -53,7 +76,7 @@ async def read_droot():
53
  {"role": "system", "content": "You are a helpful assistant, Sia, developed by Sushma. You will response in polity and brief."},
54
  {"role": "user", "content": "I'm Alok. Who are you?"},
55
  {"role": "assistant", "content": "I am Sia, a small language model created by Sushma."},
56
- {"role": "user", "content": "Hi, How are you?"}
57
  ]
58
  text = tokenizer.apply_chat_template(
59
  messages,
@@ -77,3 +100,34 @@ async def read_droot():
77
  print(time_taken)
78
  return {"Hello": "World!"}
79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from fastapi import FastAPI
2
  import time
3
+ time = time.time()
4
  import torch
5
 
6
  from transformers import AutoModelForCausalLM, AutoTokenizer
7
  device = "cpu" # the device to load the model onto
8
+ time1 = time.time()
9
  model = AutoModelForCausalLM.from_pretrained(
10
  "Qwen/Qwen2-0.5B-Instruct",
11
  torch_dtype="auto",
12
  device_map="auto"
13
  )
14
+ time2 = time.time()
15
+ print(time2-time1)
16
  tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
17
+ time3 = time.time()
18
+ print(time3-time1)
19
+ model1 = AutoModelForCausalLM.from_pretrained(
20
+ "Qwen/Qwen2-1.5B-Instruct",
21
+ torch_dtype="auto",
22
+ device_map="auto"
23
+ )
24
+ tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-1.5B-Instruct")
25
+ time4 = time.time()
26
+ print(time4-time3)
27
  app = FastAPI()
28
+ time5 = time.time()
29
+ print(time5-time4)
30
  @app.get("/")
31
  async def read_root():
32
  return {"Hello": "World!"}
 
35
  {"role": "system", "content": "You are a helpful assistant, Sia, developed by Sushma. You will response in polity and brief."},
36
  {"role": "user", "content": "I'm Alok. Who are you?"},
37
  {"role": "assistant", "content": "I am Sia, a small language model created by Sushma."},
38
+ {"role": "user", "content": "How are you?"}
39
  ]
40
+ time1 = time.time()
41
  text = tokenizer.apply_chat_template(
42
  messages,
43
  tokenize=False,
44
  add_generation_prompt=True
45
  )
46
+ time2 = time.time()
47
+ print(time2-time1)
48
  model_inputs = tokenizer([text], return_tensors="pt").to(device)
49
+ time3 = time.time()
50
+ print(time3-time2)
51
  generated_ids = model.generate(
52
  model_inputs.input_ids,
53
  max_new_tokens=64
54
  )
55
+ time4 = time.time()
56
+ print(time4-time3)
57
  generated_ids = [
58
  output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
59
  ]
60
+ time5 = time.time()
61
+ print(time5-time4)
62
  response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
63
  print(response)
64
+ time6 = time.time()
65
+ print(time6-time5)
66
+
67
  end_time = time.time()
68
  time_taken = end_time - start_time
69
  print(time_taken)
 
76
  {"role": "system", "content": "You are a helpful assistant, Sia, developed by Sushma. You will response in polity and brief."},
77
  {"role": "user", "content": "I'm Alok. Who are you?"},
78
  {"role": "assistant", "content": "I am Sia, a small language model created by Sushma."},
79
+ {"role": "user", "content": "How are you?"}
80
  ]
81
  text = tokenizer.apply_chat_template(
82
  messages,
 
100
  print(time_taken)
101
  return {"Hello": "World!"}
102
 
103
+ @app.get("/text")
104
+ async def read_droot():
105
+ starttime = time.time()
106
+ messages = [
107
+ {"role": "system", "content": "You are a helpful assistant, Sia, developed by Sushma. You will response in polity and brief."},
108
+ {"role": "user", "content": "I'm Alok. Who are you?"},
109
+ {"role": "assistant", "content": "I am Sia, a small language model created by Sushma."},
110
+ {"role": "user", "content": "How are you?"}
111
+ ]
112
+ text = tokenizer.apply_chat_template(
113
+ messages,
114
+ tokenize=False,
115
+ add_generation_prompt=True
116
+ )
117
+ model_inputs = tokenizer1([text], return_tensors="pt").to(device)
118
+
119
+ generated_ids = model1.generate(
120
+ model_inputs.input_ids,
121
+ max_new_tokens=64
122
+ )
123
+ generated_ids = [
124
+ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
125
+ ]
126
+
127
+ response = tokenizer1.batch_decode(generated_ids, skip_special_tokens=True)[0]
128
+ print(response)
129
+ end_time = time.time()
130
+ time_taken = end_time - starttime
131
+ print(time_taken)
132
+ return {"Hello": "World!"}
133
+ #return {response: time}