acverma commited on
Commit
ba26445
1 Parent(s): 26e6d66

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -62
app.py CHANGED
@@ -116,17 +116,6 @@ def unnormalize_box(bbox, width, height):
116
  height * (bbox[3] / 1000),
117
  ]
118
 
119
- #def prepare_examples(examples):
120
- # images = examples[image_column_name]
121
- # words = examples[text_column_name]
122
- # boxes = examples[boxes_column_name]
123
- # word_labels = examples[label_column_name]
124
-
125
- # encoding = processor(images, words, boxes=boxes, word_labels=word_labels,
126
- # truncation=True, padding="max_length")
127
-
128
- # return encoding
129
-
130
  # we need to define custom features for `set_format` (used later on) to work properly
131
  features = Features({
132
  'pixel_values': Array3D(dtype="float32", shape=(3, 224, 224)),
@@ -173,58 +162,7 @@ def process_image(image):
173
 
174
  return image
175
 
176
- #def process_image(image):
177
-
178
- print(type(image))
179
- width, height = image.size
180
-
181
- image = example["image"]
182
- words = example["tokens"]
183
- boxes = example["bboxes"]
184
- word_labels = example["ner_tags"]
185
-
186
- for k,v in encoding.items():
187
- print(k,v.shape)
188
-
189
- # encode
190
-
191
- encoding = processor(image, truncation=True,boxes=boxes, word_labels=word_labels,return_offsets_mapping=True, return_tensors="pt")
192
- offset_mapping = encoding.pop('offset_mapping')
193
-
194
-
195
-
196
- # forward pass
197
- with torch.no_grad():
198
- outputs = model(**encoding)
199
-
200
- # get predictions
201
-
202
- # We take the highest score for each token, using argmax.
203
- # This serves as the predicted label for each token.
204
- logits = outputs.logits
205
- #logits.shape
206
- predictions = logits.argmax(-1).squeeze().tolist()
207
 
208
- labels = encoding.labels.squeeze().tolist()
209
-
210
- token_boxes = encoding.bbox.squeeze().tolist()
211
- width, height = image.size
212
-
213
-
214
- # only keep non-subword predictions
215
- is_subword = np.array(offset_mapping.squeeze().tolist())[:,0] != 0
216
- true_predictions = [id2label[pred] for idx, pred in enumerate(predictions) if not is_subword[idx]]
217
- true_boxes = [unnormalize_box(box, width, height) for idx, box in enumerate(token_boxes) if not is_subword[idx]]
218
-
219
- # draw predictions over the image
220
- draw = ImageDraw.Draw(image)
221
- font = ImageFont.load_default()
222
- for prediction, box in zip(true_predictions, true_boxes):
223
- predicted_label = id2label(prediction)
224
- draw.rectangle(box, outline=label2color[predicted_label])
225
- draw.text((box[0]+10, box[1]-10), text=predicted_label, fill=label2color[predicted_label], font=font)
226
-
227
- return image
228
 
229
  title = "DocumentAI - Extraction using LayoutLMv3 model"
230
  description = "Extraction of Form or Invoice Extraction - We use Microsoft's LayoutLMv3 trained on Invoice Dataset to predict the Biller Name, Biller Address, Biller post_code, Due_date, GST, Invoice_date, Invoice_number, Subtotal and Total. To use it, simply upload an image or use the example image below. Results will show up in a few seconds."
 
116
  height * (bbox[3] / 1000),
117
  ]
118
 
 
 
 
 
 
 
 
 
 
 
 
119
  # we need to define custom features for `set_format` (used later on) to work properly
120
  features = Features({
121
  'pixel_values': Array3D(dtype="float32", shape=(3, 224, 224)),
 
162
 
163
  return image
164
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
  title = "DocumentAI - Extraction using LayoutLMv3 model"
168
  description = "Extraction of Form or Invoice Extraction - We use Microsoft's LayoutLMv3 trained on Invoice Dataset to predict the Biller Name, Biller Address, Biller post_code, Due_date, GST, Invoice_date, Invoice_number, Subtotal and Total. To use it, simply upload an image or use the example image below. Results will show up in a few seconds."