Rodolfo Torres commited on
Commit
c0c69df
·
1 Parent(s): 11f8b48

Added benchmarking

Browse files
Files changed (1) hide show
  1. main.py +75 -2
main.py CHANGED
@@ -1,3 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
1
  from fastapi import FastAPI, UploadFile, File, HTTPException
2
  from fastapi.staticfiles import StaticFiles
3
  from fastapi.responses import FileResponse
@@ -8,8 +19,7 @@ from fastapi.responses import JSONResponse
8
  from io import BytesIO
9
  import PyPDF2
10
  from newspaper import Article
11
- import torch
12
- from transformers import AutoModelForMultipleChoice, AutoTokenizer
13
 
14
  qa_pipeline = pipeline("question-answering", model="roaltopo/scan-u-doc_question-answer")
15
  bool_q_pipeline = pipeline("text-classification", model="roaltopo/scan-u-doc_bool-question")
@@ -128,6 +138,69 @@ async def answer_question(uuid: str, question_info: QuestionInfo):
128
 
129
  return answer
130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
  app.mount("/", StaticFiles(directory="static", html=True), name="static")
133
 
 
1
+ import torch
2
+
3
+ try:
4
+ import intel_extension_for_pytorch as ipex
5
+ ipex_enabled = True
6
+ except:
7
+ ipex_enabled = False
8
+
9
+ import time
10
+ import numpy as np
11
+
12
  from fastapi import FastAPI, UploadFile, File, HTTPException
13
  from fastapi.staticfiles import StaticFiles
14
  from fastapi.responses import FileResponse
 
19
  from io import BytesIO
20
  import PyPDF2
21
  from newspaper import Article
22
+ from transformers import AutoModelForMultipleChoice, AutoTokenizer, AutoModelForQuestionAnswering
 
23
 
24
  qa_pipeline = pipeline("question-answering", model="roaltopo/scan-u-doc_question-answer")
25
  bool_q_pipeline = pipeline("text-classification", model="roaltopo/scan-u-doc_bool-question")
 
138
 
139
  return answer
140
 
141
+ ############
142
+ def get_score1(model_checkpoint, question, context, num_times, warmup_rounds, has_xpu):
143
+ tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
144
+ model = AutoModelForQuestionAnswering.from_pretrained(model_checkpoint)
145
+ model.eval()
146
+
147
+ if has_xpu:
148
+ device = 'xpu'
149
+ else :
150
+ device = None
151
+
152
+ qa_pipeline = pipeline("question-answering", model=model, tokenizer=tokenizer, torch_dtype=torch.bfloat16, device=device) #, torch_dtype=torch.bfloat16
153
+ latency_list = []
154
+ for i in range(num_times):
155
+ time_start = time.time()
156
+ answer = qa_pipeline(question=question, context=context)
157
+ if i >= warmup_rounds:
158
+ latency_list.append(time.time() - time_start)
159
+ pipeline_inference_time = np.mean(latency_list)
160
+ return pipeline_inference_time
161
+
162
+ def get_score2(model_checkpoint, question, context, num_times, warmup_rounds, has_xpu):
163
+ tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
164
+ model = AutoModelForQuestionAnswering.from_pretrained(model_checkpoint)
165
+ model.eval()
166
+
167
+ if has_xpu:
168
+ device = 'xpu'
169
+ else :
170
+ device = None
171
+
172
+ if ipex_enabled:
173
+ #################### code changes ####################
174
+ model = ipex.optimize(model, weights_prepack=False)
175
+ model = torch.compile(model, backend="ipex")
176
+ ######################################################
177
+ with torch.no_grad():
178
+ qa_pipeline = pipeline("question-answering", model=model, tokenizer=tokenizer, torch_dtype=torch.bfloat16, device=device) #, torch_dtype=torch.bfloat16
179
+ latency_list = []
180
+ for i in range(num_times):
181
+ time_start = time.time()
182
+ answer = qa_pipeline(question=question, context=context)
183
+ if i >= warmup_rounds:
184
+ latency_list.append(time.time() - time_start)
185
+ pipeline_inference_time = np.mean(latency_list)
186
+ return pipeline_inference_time
187
+
188
+ @app.get("/benchmark")
189
+ async def benchmark(question: str, context: str):
190
+ num_times = 50
191
+ warmup_rounds = 20
192
+
193
+ model_checkpoint = "roaltopo/scan-u-doc_question-answer"
194
+
195
+
196
+ has_xpu = torch.xpu.device_count()
197
+ score1 = get_score1(model_checkpoint, question, context, num_times, warmup_rounds, has_xpu)
198
+ score2 = get_score2(model_checkpoint, question, context, num_times, warmup_rounds, has_xpu)
199
+
200
+ return {'has_xpu': has_xpu, 'ipex_enabled': ipex_enabled,'score1': score1, 'score2': score2}
201
+
202
+ ############
203
+
204
 
205
  app.mount("/", StaticFiles(directory="static", html=True), name="static")
206