Update extract_insights.py
Browse files- extract_insights.py +103 -45
extract_insights.py
CHANGED
@@ -200,52 +200,110 @@ def adjust_prompt_tokens_v1(prompt: str) -> str:
|
|
200 |
return trimmed_text
|
201 |
|
202 |
|
203 |
-
async def process_synchronous_job(
|
204 |
"""
|
205 |
Background task to process the batch job
|
206 |
"""
|
207 |
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
200 |
return trimmed_text
|
201 |
|
202 |
|
203 |
+
async def process_synchronous_job(dataset: Dict[str, Any]):
|
204 |
"""
|
205 |
Background task to process the batch job
|
206 |
"""
|
207 |
|
208 |
+
for ds in dataset.get('data'):
|
209 |
+
user_id = ds.get('user_id')
|
210 |
+
|
211 |
+
message_id = ds.get('message_id')
|
212 |
+
|
213 |
+
raw_text = ds.get('receipt_text')
|
214 |
+
email = ds.get('email')
|
215 |
+
try:
|
216 |
+
prompt = adjust_prompt_tokens_v1(receipt_radar_prompt(raw_text))
|
217 |
+
|
218 |
+
completion = client.chat.completions.create(
|
219 |
+
model="gpt-4o-mini",
|
220 |
+
messages=[
|
221 |
+
{
|
222 |
+
"role": "user",
|
223 |
+
"content": prompt
|
224 |
+
}
|
225 |
+
],
|
226 |
+
response_format={"type": "json_object"},
|
227 |
+
temperature=0.1
|
228 |
+
)
|
229 |
+
print("Printing GPT response")
|
230 |
+
print(completion.choices[0].message)
|
231 |
+
|
232 |
+
# inserting data into supabase
|
233 |
+
insert_data = json.loads(completion.choices[0].message.content)
|
234 |
+
insert_data['email'] = email
|
235 |
+
insert_data['user_id'] = user_id
|
236 |
+
insert_data['message_id'] = message_id
|
237 |
+
insert_data['total_cost'] = parse_number(insert_data['total_cost'])
|
238 |
+
print("Printing user_id")
|
239 |
+
print(user_id)
|
240 |
+
|
241 |
+
insert_response = (
|
242 |
+
supabase.table("receipt_radar_structured_data_duplicate")
|
243 |
+
.insert(insert_data)
|
244 |
+
.execute()
|
245 |
+
)
|
246 |
+
|
247 |
+
update_status_response = (
|
248 |
+
supabase.table("receipt_ocr_data")
|
249 |
+
.update({"status": "processing completed"})
|
250 |
+
.eq("message_id", message_id)
|
251 |
+
.eq("user_id", user_id)
|
252 |
+
.eq("email", email)
|
253 |
+
.execute()
|
254 |
+
)
|
255 |
+
|
256 |
+
print(update_status_response)
|
257 |
+
except Exception as e:
|
258 |
+
print(f"Error occurred during processing: {e}")
|
259 |
+
|
260 |
+
|
261 |
+
|
262 |
+
|
263 |
+
|
264 |
+
|
265 |
+
|
266 |
+
# try:
|
267 |
+
# prompt = receipt_radar_prompt(raw_text)
|
268 |
+
|
269 |
+
# completion = client.chat.completions.create(
|
270 |
+
# model="gpt-4o-mini",
|
271 |
+
# messages=[
|
272 |
+
# {
|
273 |
+
# "role": "user",
|
274 |
+
# "content": prompt
|
275 |
+
# }
|
276 |
+
# ],
|
277 |
+
# response_format={"type": "json_object"},
|
278 |
+
# temperature=0.1
|
279 |
+
# )
|
280 |
+
# print("Printing GPT response")
|
281 |
+
# print(completion.choices[0].message)
|
282 |
+
|
283 |
+
# # inserting data into supabase
|
284 |
+
# insert_data = json.loads(completion.choices[0].message.content)
|
285 |
+
# insert_data['email'] = email
|
286 |
+
# insert_data['user_id'] = user_id
|
287 |
+
# insert_data['message_id'] = message_id
|
288 |
+
# insert_data['total_cost'] = parse_number(insert_data['total_cost'])
|
289 |
+
# print("Printing user_id")
|
290 |
+
# print(user_id)
|
291 |
+
|
292 |
+
# insert_response = (
|
293 |
+
# supabase.table("receipt_radar_structured_data_duplicate")
|
294 |
+
# .insert(insert_data)
|
295 |
+
# .execute()
|
296 |
+
# )
|
297 |
+
|
298 |
+
# update_status_response = (
|
299 |
+
# supabase.table("receipt_ocr_data")
|
300 |
+
# .update({"status": "processing completed"})
|
301 |
+
# .eq("message_id", message_id)
|
302 |
+
# .eq("user_id", user_id)
|
303 |
+
# .eq("email", email)
|
304 |
+
# .execute()
|
305 |
+
# )
|
306 |
+
|
307 |
+
# print(update_status_response)
|
308 |
+
# except Exception as e:
|
309 |
+
# print(f"Error occurred during processing: {e}")
|