asv7j commited on
Commit
318fbfd
·
verified ·
1 Parent(s): c79fe72

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -67
app.py CHANGED
@@ -7,45 +7,31 @@ access_token = os.getenv("read_access")
7
 
8
  from transformers import AutoModelForCausalLM, AutoTokenizer
9
  device = "cpu" # the device to load the model onto
10
- time1 = time.time()
11
  model = AutoModelForCausalLM.from_pretrained(
12
  "Qwen/Qwen2-0.5B-Instruct",
13
- torch_dtype="auto",
14
  device_map="auto"
15
  )
16
- time2 = time.time()
17
- print(time2-time1)
18
  tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
19
- time3 = time.time()
20
- print(time3-time1)
21
  model1 = AutoModelForCausalLM.from_pretrained(
22
  "Qwen/Qwen2-1.5B-Instruct",
23
- torch_dtype="auto",
24
  device_map="auto"
25
  )
26
- tokenizer1 = AutoTokenizer.from_pretrained("Qwen/Qwen2-1.5B-Instruct")
27
- time4 = time.time()
28
- print(time4-time3)
29
- app = FastAPI()
30
- time5 = time.time()
31
- print(time5-time4)
32
-
33
 
34
  tokenizer2 = AutoTokenizer.from_pretrained("google/gemma-2-2b-it", token=access_token)
35
  model2 = AutoModelForCausalLM.from_pretrained(
36
  "google/gemma-2-2b-it",
37
  device_map="auto",
38
- torch_dtype=torch.bfloat16,
39
  token=access_token
40
  )
41
- model3 = AutoModelForCausalLM.from_pretrained(
42
- "Qwen/Qwen2-0.5B-Instruct",
43
- device_map="auto"
44
- )
45
 
46
  @app.get("/")
47
  async def read_root():
48
  return {"Hello": "World!"}
 
49
  start_time = time.time()
50
  messages = [
51
  {"role": "system", "content": "You are a helpful assistant, Sia, developed by Sushma. You will response in polity and brief."},
@@ -53,32 +39,20 @@ messages = [
53
  {"role": "assistant", "content": "I am Sia, a small language model created by Sushma."},
54
  {"role": "user", "content": "How are you?"}
55
  ]
56
- time1 = time.time()
57
  text = tokenizer.apply_chat_template(
58
  messages,
59
  tokenize=False,
60
  add_generation_prompt=True
61
  )
62
- time2 = time.time()
63
- print(time2-time1)
64
  model_inputs = tokenizer([text], return_tensors="pt").to(device)
65
- time3 = time.time()
66
- print(time3-time2)
67
  generated_ids = model.generate(
68
  model_inputs.input_ids,
69
  max_new_tokens=64
70
  )
71
- time4 = time.time()
72
- print(time4-time3)
73
  generated_ids = [
74
  output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
75
- ]
76
- time5 = time.time()
77
- print(time5-time4)
78
  response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
79
- print(response)
80
- time6 = time.time()
81
- print(time6-time5)
82
 
83
  end_time = time.time()
84
  time_taken = end_time - start_time
@@ -103,7 +77,7 @@ async def read_droot():
103
 
104
  generated_ids = model.generate(
105
  model_inputs.input_ids,
106
- max_new_tokens=64
107
  )
108
  generated_ids = [
109
  output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
@@ -116,37 +90,6 @@ async def read_droot():
116
  print(time_taken)
117
  return {"Hello": "World!"}
118
 
119
- @app.get("/teat")
120
- async def read_droot():
121
- starttime = time.time()
122
- messages = [
123
- {"role": "system", "content": "You are a helpful assistant, Sia, developed by Sushma. You will response in polity and brief."},
124
- {"role": "user", "content": "I'm Alok. Who are you?"},
125
- {"role": "assistant", "content": "I am Sia, a small language model created by Sushma."},
126
- {"role": "user", "content": "How are you?"}
127
- ]
128
- text = tokenizer.apply_chat_template(
129
- messages,
130
- tokenize=False,
131
- add_generation_prompt=True
132
- )
133
- model_inputs = tokenizer([text], return_tensors="pt").to(device)
134
-
135
- generated_ids = model3.generate(
136
- model_inputs.input_ids,
137
- max_new_tokens=64
138
- )
139
- generated_ids = [
140
- output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
141
- ]
142
-
143
- response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
144
- print(response)
145
- end_time = time.time()
146
- time_taken = end_time - starttime
147
- print(time_taken)
148
- return {"Hello": "World!"}
149
-
150
  @app.get("/text")
151
  async def read_droot():
152
  starttime = time.time()
@@ -161,7 +104,7 @@ async def read_droot():
161
  tokenize=False,
162
  add_generation_prompt=True
163
  )
164
- model_inputs = tokenizer1([text], return_tensors="pt").to(device)
165
 
166
  generated_ids = model1.generate(
167
  model_inputs.input_ids,
@@ -171,7 +114,7 @@ async def read_droot():
171
  output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
172
  ]
173
 
174
- response = tokenizer1.batch_decode(generated_ids, skip_special_tokens=True)[0]
175
  print(response)
176
  end_time = time.time()
177
  time_taken = end_time - starttime
@@ -205,7 +148,7 @@ async def read_droot():
205
  ]
206
 
207
  response = tokenizer2.batch_decode(generated_ids, skip_special_tokens=True)[0]
208
- respons = tokenizer1.batch_decode(generated_ids, skip_special_tokens=True)[0]
209
  print(response)
210
  end_time = time.time()
211
  time_taken = end_time - starttime
 
7
 
8
  from transformers import AutoModelForCausalLM, AutoTokenizer
9
  device = "cpu" # the device to load the model onto
10
+
11
  model = AutoModelForCausalLM.from_pretrained(
12
  "Qwen/Qwen2-0.5B-Instruct",
 
13
  device_map="auto"
14
  )
 
 
15
  tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
16
+
 
17
  model1 = AutoModelForCausalLM.from_pretrained(
18
  "Qwen/Qwen2-1.5B-Instruct",
 
19
  device_map="auto"
20
  )
 
 
 
 
 
 
 
21
 
22
  tokenizer2 = AutoTokenizer.from_pretrained("google/gemma-2-2b-it", token=access_token)
23
  model2 = AutoModelForCausalLM.from_pretrained(
24
  "google/gemma-2-2b-it",
25
  device_map="auto",
 
26
  token=access_token
27
  )
28
+
29
+ app = FastAPI()
 
 
30
 
31
  @app.get("/")
32
  async def read_root():
33
  return {"Hello": "World!"}
34
+
35
  start_time = time.time()
36
  messages = [
37
  {"role": "system", "content": "You are a helpful assistant, Sia, developed by Sushma. You will response in polity and brief."},
 
39
  {"role": "assistant", "content": "I am Sia, a small language model created by Sushma."},
40
  {"role": "user", "content": "How are you?"}
41
  ]
 
42
  text = tokenizer.apply_chat_template(
43
  messages,
44
  tokenize=False,
45
  add_generation_prompt=True
46
  )
 
 
47
  model_inputs = tokenizer([text], return_tensors="pt").to(device)
 
 
48
  generated_ids = model.generate(
49
  model_inputs.input_ids,
50
  max_new_tokens=64
51
  )
 
 
52
  generated_ids = [
53
  output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
54
+ ]
 
 
55
  response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
 
 
 
56
 
57
  end_time = time.time()
58
  time_taken = end_time - start_time
 
77
 
78
  generated_ids = model.generate(
79
  model_inputs.input_ids,
80
+ max_new_tokens=128
81
  )
82
  generated_ids = [
83
  output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
 
90
  print(time_taken)
91
  return {"Hello": "World!"}
92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  @app.get("/text")
94
  async def read_droot():
95
  starttime = time.time()
 
104
  tokenize=False,
105
  add_generation_prompt=True
106
  )
107
+ model_inputs = tokenizer([text], return_tensors="pt").to(device)
108
 
109
  generated_ids = model1.generate(
110
  model_inputs.input_ids,
 
114
  output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
115
  ]
116
 
117
+ response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
118
  print(response)
119
  end_time = time.time()
120
  time_taken = end_time - starttime
 
148
  ]
149
 
150
  response = tokenizer2.batch_decode(generated_ids, skip_special_tokens=True)[0]
151
+ respons = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
152
  print(response)
153
  end_time = time.time()
154
  time_taken = end_time - starttime