mrm8488
/

layoutlm-finetuned-funsd

Token Classification

Transformers

PyTorch

Safetensors

layoutlm

Inference Endpoints

Model card Files Files and versions Community

mrm8488 commited on Aug 1, 2021

Commit

b154577

1 Parent(s): 3f23a54

Update README.md

Browse files

Files changed (1) hide show

README.md +43 -9

README.md CHANGED Viewed

@@ -1,5 +1,4 @@
-# LayoutLM fine-tuned on FUNSD for Document token classification
 ## Usage
@@ -8,12 +7,13 @@ import torch
 import numpy as np
 from PIL import Image, ImageDraw, ImageFont
 import pytesseract
-from transformers import LayoutLMForTokenClassification
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model = LayoutLMForTokenClassification.from_pretrained("mrm8488/layoutlm-finetuned-funsd", num_labels=num_labels)
 model.to(device)
@@ -29,10 +29,8 @@ width, height = image.size
 w_scale = 1000/width
 h_scale = 1000/height
-ocr_df = pytesseract.image_to_data(image, output_type='data.frame') \
-ocr_df = ocr_df.dropna() \
-               .assign(left_scaled = ocr_df.left*w_scale,
                        width_scaled = ocr_df.width*w_scale,
                        top_scaled = ocr_df.top*h_scale,
                        height_scaled = ocr_df.height*h_scale,
@@ -41,7 +39,7 @@ ocr_df = ocr_df.dropna() \
 float_cols = ocr_df.select_dtypes('float').columns
 ocr_df[float_cols] = ocr_df[float_cols].round(0).astype(int)
-ocr_df = ocr_df.replace(r'^\s*$', np.nan, regex=True)
 ocr_df = ocr_df.dropna().reset_index(drop=True)
 ocr_df[:20]
@@ -140,5 +138,41 @@ bbox = torch.tensor(token_boxes, device=device).unsqueeze(0)
 outputs = model(input_ids=input_ids, bbox=bbox, attention_mask=attention_mask, token_type_ids=token_type_ids)
 ```

+\n# LayoutLM fine-tuned on FUNSD for Document token classification
 ## Usage
 import numpy as np
 from PIL import Image, ImageDraw, ImageFont
 import pytesseract
+from transformers import LayoutLMForTokenClassification, LayoutLMTokenizer
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+tokenizer = LayoutLMTokenizer.from_pretrained("mrm8488/layoutlm-finetuned-funsd")
+model = LayoutLMForTokenClassification.from_pretrained("mrm8488/layoutlm-finetuned-funsd", num_labels=13)
 model.to(device)
 w_scale = 1000/width
 h_scale = 1000/height
+ocr_df = pytesseract.image_to_data(image, output_type='data.frame') \\n
+ocr_df = ocr_df.dropna() \\n               .assign(left_scaled = ocr_df.left*w_scale,
                        width_scaled = ocr_df.width*w_scale,
                        top_scaled = ocr_df.top*h_scale,
                        height_scaled = ocr_df.height*h_scale,
 float_cols = ocr_df.select_dtypes('float').columns
 ocr_df[float_cols] = ocr_df[float_cols].round(0).astype(int)
+ocr_df = ocr_df.replace(r'^\s*{{%htmlContent%}}#39;, np.nan, regex=True)
 ocr_df = ocr_df.dropna().reset_index(drop=True)
 ocr_df[:20]
 outputs = model(input_ids=input_ids, bbox=bbox, attention_mask=attention_mask, token_type_ids=token_type_ids)
+token_predictions = outputs.logits.argmax(-1).squeeze().tolist() # the predictions are at the token level
+word_level_predictions = [] # let's turn them into word level predictions
+final_boxes = []
+for id, token_pred, box in zip(input_ids.squeeze().tolist(), token_predictions, token_actual_boxes):
+  if (tokenizer.decode([id]).startswith("##")) or (id in [tokenizer.cls_token_id,
+                                                           tokenizer.sep_token_id,
+                                                          tokenizer.pad_token_id]):
+    # skip prediction + bounding box
+    continue
+  else:
+    word_level_predictions.append(token_pred)
+    final_boxes.append(box)
+#print(word_level_predictions)
+draw = ImageDraw.Draw(image)
+font = ImageFont.load_default()
+def iob_to_label(label):
+  if label != 'O':
+    return label[2:]
+  else:
+    return "other"
+label2color = {'question':'blue', 'answer':'green', 'header':'orange', 'other':'violet'}
+for prediction, box in zip(word_level_predictions, final_boxes):
+    predicted_label = iob_to_label(label_map[prediction]).lower()
+    draw.rectangle(box, outline=label2color[predicted_label])
+    draw.text((box[0] + 10, box[1] - 10), text=predicted_label, fill=label2color[predicted_label], font=font)
+# Display the result (image)
 ```