katanaml commited on
Commit
a461c7e
1 Parent(s): 5ba13db
Files changed (5) hide show
  1. app.py +412 -0
  2. bank_statement.png +0 -0
  3. bonds_table.png +0 -0
  4. lab_results.png +0 -0
  5. requirements.txt +1 -0
app.py ADDED
@@ -0,0 +1,412 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ import os
4
+ from PIL import Image
5
+ import json
6
+ from datetime import datetime
7
+
8
+
9
+ # Example data with placeholder JSON for lab_results and bank_statement
10
+ examples = [
11
+ ["bonds_table.png", "Bonds table", "[{\"instrument_name\":\"example\", \"valuation\":0}]"],
12
+ ["lab_results.png", "Lab results", "{\"patient_name\": \"example\", \"patient_age\": \"example\", \"patient_pid\": 0, \"lab_results\": [{\"investigation\": \"example\", \"result\": 0.00, \"reference_value\": \"example\", \"unit\": \"example\"}]}"],
13
+ ["bank_statement.png", "Bank statement", "*"]
14
+ ]
15
+
16
+ # JSON data for Bonds table
17
+ bonds_json = [
18
+ {
19
+ "instrument_name": "UNITS BLACKROCK FIX INC DUB FDS PLC ISHS EUR INV GRD CP BD IDX/INST/E",
20
+ "valuation": 19049
21
+ },
22
+ {
23
+ "instrument_name": "UNITS ISHARES III PLC CORE EUR GOVT BOND UCITS ETF/EUR",
24
+ "valuation": 83488
25
+ },
26
+ {
27
+ "instrument_name": "UNITS ISHARES III PLC EUR CORP BOND 1-5YR UCITS ETF/EUR",
28
+ "valuation": 213030
29
+ },
30
+ {
31
+ "instrument_name": "UNIT ISHARES VI PLC/JP MORGAN USD E BOND EUR HED UCITS ETF DIST/HDGD/",
32
+ "valuation": 32774
33
+ },
34
+ {
35
+ "instrument_name": "UNITS XTRACKERS II SICAV/EUR HY CORP BOND UCITS ETF/-1D-/DISTR.",
36
+ "valuation": 23643
37
+ }
38
+ ]
39
+
40
+ lab_results_json = {
41
+ "patient_name": "Yash M. Patel",
42
+ "patient_age": "21 Years",
43
+ "patient_pid": 555,
44
+ "lab_results": [
45
+ {
46
+ "investigation": "Hemoglobin (Hb)",
47
+ "result": 12.5,
48
+ "reference_value": "13.0 - 17.0",
49
+ "unit": "g/dL"
50
+ },
51
+ {
52
+ "investigation": "RBC COUNT",
53
+ "result": 5.2,
54
+ "reference_value": "4.5 - 5.5",
55
+ "unit": "mill/cumm"
56
+ },
57
+ {
58
+ "investigation": "Packed Cell Volume (PCV)",
59
+ "result": 57.5,
60
+ "reference_value": "40 - 50",
61
+ "unit": "%"
62
+ },
63
+ {
64
+ "investigation": "Mean Corpuscular Volume (MCV)",
65
+ "result": 87.75,
66
+ "reference_value": "83 - 101",
67
+ "unit": "fL"
68
+ },
69
+ {
70
+ "investigation": "MCH",
71
+ "result": 27.2,
72
+ "reference_value": "27 - 32",
73
+ "unit": "pg"
74
+ },
75
+ {
76
+ "investigation": "MCHC",
77
+ "result": 32.8,
78
+ "reference_value": "32.5 - 34.5",
79
+ "unit": "g/dL"
80
+ },
81
+ {
82
+ "investigation": "RDW",
83
+ "result": 13.6,
84
+ "reference_value": "11.6 - 14.0",
85
+ "unit": "%"
86
+ },
87
+ {
88
+ "investigation": "WBC COUNT",
89
+ "result": 9000,
90
+ "reference_value": "4000-11000",
91
+ "unit": "cumm"
92
+ },
93
+ {
94
+ "investigation": "Neutrophils",
95
+ "result": 60,
96
+ "reference_value": "50 - 62",
97
+ "unit": "%"
98
+ },
99
+ {
100
+ "investigation": "Lymphocytes",
101
+ "result": 31,
102
+ "reference_value": "20 - 40",
103
+ "unit": "%"
104
+ },
105
+ {
106
+ "investigation": "Eosinophils",
107
+ "result": 1,
108
+ "reference_value": "00 - 06",
109
+ "unit": "%"
110
+ },
111
+ {
112
+ "investigation": "Monocytes",
113
+ "result": 7,
114
+ "reference_value": "00 - 10",
115
+ "unit": "%"
116
+ },
117
+ {
118
+ "investigation": "Basophils",
119
+ "result": 1,
120
+ "reference_value": "00 - 02",
121
+ "unit": "%"
122
+ },
123
+ {
124
+ "investigation": "Absolute Neutrophils",
125
+ "result": 6000,
126
+ "reference_value": "1500 - 7500",
127
+ "unit": "cells/mcL"
128
+ },
129
+ {
130
+ "investigation": "Absolute Lymphocytes",
131
+ "result": 3100,
132
+ "reference_value": "1300 - 3500",
133
+ "unit": "cells/mcL"
134
+ },
135
+ {
136
+ "investigation": "Absolute Eosinophils",
137
+ "result": 100,
138
+ "reference_value": "00 - 500",
139
+ "unit": "cells/mcL"
140
+ },
141
+ {
142
+ "investigation": "Absolute Monocytes",
143
+ "result": 700,
144
+ "reference_value": "200 - 950",
145
+ "unit": "cells/mcL"
146
+ },
147
+ {
148
+ "investigation": "Absolute Basophils",
149
+ "result": 100,
150
+ "reference_value": "00 - 300",
151
+ "unit": "cells/mcL"
152
+ },
153
+ {
154
+ "investigation": "Platelet Count",
155
+ "result": 320000,
156
+ "reference_value": "150000 - 410000",
157
+ "unit": "cumm"
158
+ }
159
+ ]
160
+ }
161
+
162
+ bank_statement_json = {
163
+ "bank": "First Platypus Bank",
164
+ "address": "1234 Kings St., New York, NY 12123",
165
+ "account_holder": "Mary G. Orta",
166
+ "account_number": "1234567890123",
167
+ "statement_date": "3/1/2022",
168
+ "period_covered": "2/1/2022 - 3/1/2022",
169
+ "account_summary": {
170
+ "balance_on_march_1": "$25,032.23",
171
+ "total_money_in": "$10,234.23",
172
+ "total_money_out": "$10,532.51"
173
+ },
174
+ "transactions": [
175
+ {
176
+ "date": "02/01",
177
+ "description": "PGD EasyPay Debit",
178
+ "withdrawal": "203.24",
179
+ "deposit": "",
180
+ "balance": "22,098.23"
181
+ },
182
+ {
183
+ "date": "02/02",
184
+ "description": "AB&B Online Payment*****",
185
+ "withdrawal": "71.23",
186
+ "deposit": "",
187
+ "balance": "22,027.00"
188
+ },
189
+ {
190
+ "date": "02/04",
191
+ "description": "Check No. 2345",
192
+ "withdrawal": "",
193
+ "deposit": "450.00",
194
+ "balance": "22,477.00"
195
+ },
196
+ {
197
+ "date": "02/05",
198
+ "description": "Payroll Direct Dep 23422342 Giants",
199
+ "withdrawal": "",
200
+ "deposit": "2,534.65",
201
+ "balance": "25,011.65"
202
+ },
203
+ {
204
+ "date": "02/06",
205
+ "description": "Signature POS Debit - TJP",
206
+ "withdrawal": "84.50",
207
+ "deposit": "",
208
+ "balance": "24,927.15"
209
+ },
210
+ {
211
+ "date": "02/07",
212
+ "description": "Check No. 234",
213
+ "withdrawal": "1,400.00",
214
+ "deposit": "",
215
+ "balance": "23,527.15"
216
+ },
217
+ {
218
+ "date": "02/08",
219
+ "description": "Check No. 342",
220
+ "withdrawal": "",
221
+ "deposit": "25.00",
222
+ "balance": "23,552.15"
223
+ },
224
+ {
225
+ "date": "02/09",
226
+ "description": "FPB AutoPay***** Credit Card",
227
+ "withdrawal": "456.02",
228
+ "deposit": "",
229
+ "balance": "23,096.13"
230
+ },
231
+ {
232
+ "date": "02/08",
233
+ "description": "Check No. 123",
234
+ "withdrawal": "",
235
+ "deposit": "25.00",
236
+ "balance": "23,552.15"
237
+ },
238
+ {
239
+ "date": "02/09",
240
+ "description": "FPB AutoPay***** Credit Card",
241
+ "withdrawal": "156.02",
242
+ "deposit": "",
243
+ "balance": "23,096.13"
244
+ },
245
+ {
246
+ "date": "02/08",
247
+ "description": "Cash Deposit",
248
+ "withdrawal": "",
249
+ "deposit": "25.00",
250
+ "balance": "23,552.15"
251
+ }
252
+ ]
253
+ }
254
+
255
+
256
+ def run_inference(image_filepath, query, key):
257
+ if image_filepath is None:
258
+ return {"error": f"No image provided. Please upload an image before submitting."}
259
+
260
+ if query is None or query.strip() == "":
261
+ return {"error": f"No query provided. Please enter a query before submitting."}
262
+
263
+ if key is None or key.strip() == "":
264
+ return {"error": f"No Sparrow Key provided. Please enter a Sparrow Key before submitting."}
265
+
266
+ file_path = None
267
+ try:
268
+ # Open the uploaded image using its filepath
269
+ img = Image.open(image_filepath)
270
+
271
+ # Extract the file extension from the uploaded file
272
+ input_image_extension = image_filepath.split('.')[-1].lower() # Extract extension from filepath
273
+
274
+ # Set file extension based on the original file, otherwise default to PNG
275
+ if input_image_extension in ['jpg', 'jpeg', 'png']:
276
+ file_extension = input_image_extension
277
+ else:
278
+ file_extension = 'png' # Default to PNG if extension is unavailable or invalid
279
+
280
+ # Generate a unique filename using timestamp
281
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
282
+ filename = f"image_{timestamp}.{file_extension}"
283
+
284
+ # Save the image
285
+ img.save(filename)
286
+
287
+ # Get the full path of the saved image
288
+ file_path = os.path.abspath(filename)
289
+
290
+ # Prepare the REST API call
291
+ url = 'https://katanaml-sparrow-ml.hf.space/api/v1/sparrow-llm/inference'
292
+ headers = {
293
+ 'accept': 'application/json'
294
+ }
295
+
296
+ # Open the file in binary mode and send it
297
+ with open(filename, "rb") as f:
298
+ files = {
299
+ 'file': (filename, f, f'image/{file_extension}')
300
+ }
301
+
302
+ # Convert 'query' input to JSON string if needed
303
+ try:
304
+ # Check if the query is a wildcard '*'
305
+ if query.strip() == "*":
306
+ query_json = "*" # Directly use the wildcard as valid input
307
+ else:
308
+ # Attempt to parse the query as JSON
309
+ query_json = json.loads(query) # This could return any valid JSON (string, number, etc.)
310
+
311
+ # Ensure the parsed query is either a JSON object (dict) or a list of JSON objects
312
+ if not isinstance(query_json, (dict, list)):
313
+ return {
314
+ "error": "Invalid input. Only JSON objects, arrays of objects, or wildcard '*' are allowed."}
315
+
316
+ # If it's a list, make sure it's a list of JSON objects
317
+ if isinstance(query_json, list):
318
+ if not all(isinstance(item, dict) for item in query_json):
319
+ return {"error": "Invalid input. Arrays must contain only JSON objects."}
320
+
321
+ except json.JSONDecodeError:
322
+ return {"error": "Invalid JSON format in query input"}
323
+
324
+ data = {
325
+ 'group_by_rows': '',
326
+ 'agent': 'sparrow-parse',
327
+ 'keywords': '',
328
+ 'sparrow_key': key,
329
+ 'update_targets': '',
330
+ 'debug': 'false',
331
+ 'index_name': '',
332
+ 'types': '',
333
+ 'fields': query_json if query_json == "*" else json.dumps(query_json), # Use wildcard as-is, or JSON
334
+ 'options': 'huggingface,katanaml/sparrow-qwen2-vl-7b'
335
+ }
336
+
337
+ # Perform the POST request
338
+ response = requests.post(url, headers=headers, files=files, data=data)
339
+
340
+ # Process the response and return the JSON data
341
+ if response.status_code == 200:
342
+ return response.json()
343
+ else:
344
+ return {"error": f"Request failed with status code {response.status_code}", "details": response.text}
345
+ finally:
346
+ # Clean up the temporary file
347
+ if os.path.exists(file_path):
348
+ os.remove(file_path)
349
+
350
+
351
+ def handle_example(example_image):
352
+ # Find the corresponding entry in the examples array
353
+ for example in examples:
354
+ if example[0] == example_image:
355
+ # Return bonds_json if Bonds table is selected
356
+ if example_image == "bonds_table.png":
357
+ return example_image, bonds_json, example[2]
358
+ # Return lab_results_json if Lab results is selected
359
+ elif example_image == "lab_results.png":
360
+ return example_image, lab_results_json, example[2]
361
+ # Return bank_statement_json if Bank statement is selected
362
+ elif example_image == "bank_statement.png":
363
+ return example_image, bank_statement_json, example[2]
364
+
365
+ # Default return if no match found
366
+ return None, "No example selected.", ""
367
+
368
+
369
+ # Define the UI
370
+ with gr.Blocks(theme=gr.themes.Ocean()) as demo:
371
+ with gr.Tab(label="Sparrow UI"):
372
+ with gr.Row():
373
+ with gr.Column():
374
+ input_img = gr.Image(label="Input Document Image", type="filepath")
375
+ query_input = gr.Textbox(label="Query", placeholder="Use * to query all data or JSON schema, e.g.: [{\"instrument_name\": \"example\"}]")
376
+ key_input = gr.Textbox(label="Sparrow Key", type="password")
377
+ submit_btn = gr.Button(value="Submit", variant="primary")
378
+
379
+ # Radio button for selecting examples
380
+ example_radio = gr.Radio(label="Select Example", choices=[ex[0] for ex in examples])
381
+
382
+ with gr.Column():
383
+ # JSON output for structured JSON display
384
+ output_json = gr.JSON(label="Response (JSON)", height=900, min_height=900)
385
+
386
+
387
+ # Function to handle example selection
388
+ def on_example_select(selected_example):
389
+ # Handle example selection and return the image, output (text or JSON), and query
390
+ return handle_example(selected_example)
391
+
392
+
393
+ # Update image, output JSON, and query when an example is selected
394
+ example_radio.change(on_example_select,
395
+ inputs=example_radio,
396
+ outputs=[input_img, output_json, query_input])
397
+
398
+ # When submit is clicked
399
+ submit_btn.click(run_inference, [input_img, query_input, key_input], [output_json])
400
+
401
+ gr.Markdown(
402
+ """
403
+ ---
404
+ <p style="text-align: center;">
405
+ Visit <a href="https://katanaml.io/" target="_blank">Katana ML</a> for more details.
406
+ </p>
407
+ """
408
+ )
409
+
410
+ # Launch the app
411
+ demo.queue(api_open=False)
412
+ demo.launch(debug=True)
bank_statement.png ADDED
bonds_table.png ADDED
lab_results.png ADDED
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ gradio==5.1.0