szczotar
commited on
Commit
·
1125d95
1
Parent(s):
fe3447a
hanlder
Browse files- .ipynb_checkpoints/Untitled-checkpoint.ipynb +195 -0
- .ipynb_checkpoints/handler-checkpoint.py +146 -0
- Untitled.ipynb +195 -0
- __pycache__/handler.cpython-311.pyc +0 -0
- handler.py +145 -0
.ipynb_checkpoints/Untitled-checkpoint.ipynb
ADDED
@@ -0,0 +1,195 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"id": "20356e27-98f6-4a19-b0ec-d1d2e92029f1",
|
7 |
+
"metadata": {},
|
8 |
+
"outputs": [
|
9 |
+
{
|
10 |
+
"name": "stderr",
|
11 |
+
"output_type": "stream",
|
12 |
+
"text": [
|
13 |
+
"C:\\Users\\ArturSzczotarski\\LLM env\\du_env\\Lib\\site-packages\\huggingface_hub\\file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
|
14 |
+
" warnings.warn(\n",
|
15 |
+
"C:\\Users\\ArturSzczotarski\\LLM env\\du_env\\Lib\\site-packages\\huggingface_hub\\file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
|
16 |
+
" warnings.warn(\n"
|
17 |
+
]
|
18 |
+
}
|
19 |
+
],
|
20 |
+
"source": [
|
21 |
+
"from handler import EndpointHandler\n",
|
22 |
+
" \n",
|
23 |
+
"# init handler\n",
|
24 |
+
"my_handler = EndpointHandler(path=\"Szczotar93/Layoutlm_Inkaso_2\")"
|
25 |
+
]
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"cell_type": "code",
|
29 |
+
"execution_count": 5,
|
30 |
+
"id": "63a53fa9-c2ae-425c-9a8a-ec2415753630",
|
31 |
+
"metadata": {},
|
32 |
+
"outputs": [],
|
33 |
+
"source": [
|
34 |
+
"from transformers import LayoutLMForTokenClassification, LayoutLMv2Processor, LayoutLMTokenizer, AutoModelForTokenClassification, AutoProcessor\n",
|
35 |
+
"\n",
|
36 |
+
"from PIL import Image, ImageDraw, ImageFont\n",
|
37 |
+
"import torch\n",
|
38 |
+
"import pandas as pd\n",
|
39 |
+
"import pytesseract\n",
|
40 |
+
"\n",
|
41 |
+
"pytesseract.pytesseract.tesseract_cmd = r'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'"
|
42 |
+
]
|
43 |
+
},
|
44 |
+
{
|
45 |
+
"cell_type": "code",
|
46 |
+
"execution_count": 2,
|
47 |
+
"id": "684076e3-9ec2-4c99-af1e-6860a3e355e9",
|
48 |
+
"metadata": {},
|
49 |
+
"outputs": [],
|
50 |
+
"source": [
|
51 |
+
"from PIL import Image\n",
|
52 |
+
"filename = r\"C:\\Users\\ArturSzczotarski\\LLM env\\du_env\\documentsImages\\test\\2.png\""
|
53 |
+
]
|
54 |
+
},
|
55 |
+
{
|
56 |
+
"cell_type": "code",
|
57 |
+
"execution_count": 3,
|
58 |
+
"id": "660f4096-9122-41fc-b38a-fd5299a16df5",
|
59 |
+
"metadata": {},
|
60 |
+
"outputs": [],
|
61 |
+
"source": [
|
62 |
+
"img = Image.open(filename)"
|
63 |
+
]
|
64 |
+
},
|
65 |
+
{
|
66 |
+
"cell_type": "code",
|
67 |
+
"execution_count": 4,
|
68 |
+
"id": "04ee0572-093c-4e30-872c-24216c807e4c",
|
69 |
+
"metadata": {},
|
70 |
+
"outputs": [
|
71 |
+
{
|
72 |
+
"data": {
|
73 |
+
"text/plain": [
|
74 |
+
"'C:\\\\Users\\\\ArturSzczotarski\\\\LLM env\\\\du_env\\\\documentsImages\\\\test\\\\2.png'"
|
75 |
+
]
|
76 |
+
},
|
77 |
+
"execution_count": 4,
|
78 |
+
"metadata": {},
|
79 |
+
"output_type": "execute_result"
|
80 |
+
}
|
81 |
+
],
|
82 |
+
"source": [
|
83 |
+
"img.filename"
|
84 |
+
]
|
85 |
+
},
|
86 |
+
{
|
87 |
+
"cell_type": "code",
|
88 |
+
"execution_count": 6,
|
89 |
+
"id": "096b6ffb-767e-45a2-bf4b-1f6d3f67f3a4",
|
90 |
+
"metadata": {},
|
91 |
+
"outputs": [
|
92 |
+
{
|
93 |
+
"data": {
|
94 |
+
"text/plain": [
|
95 |
+
"{'predictions': [[{'word': 'km 1792 /', 'label': 'doc id', 'score': '0.94'},\n",
|
96 |
+
" {'word': 'wezwanie do dokonywania potraceh ztur',\n",
|
97 |
+
" 'label': 'title',\n",
|
98 |
+
" 'score': '0.98'},\n",
|
99 |
+
" {'word': 'kredyt inkaso s. a', 'label': 'creditor name', 'score': '0.95'},\n",
|
100 |
+
" {'word': '02 - 672 warszawa domaniewska 39',\n",
|
101 |
+
" 'label': 'creditor address',\n",
|
102 |
+
" 'score': '0.97'},\n",
|
103 |
+
" {'word': '##ter mateusz garbula kanaria. -',\n",
|
104 |
+
" 'label': 'creditor proxy',\n",
|
105 |
+
" 'score': '0.92'}]]}"
|
106 |
+
]
|
107 |
+
},
|
108 |
+
"execution_count": 6,
|
109 |
+
"metadata": {},
|
110 |
+
"output_type": "execute_result"
|
111 |
+
}
|
112 |
+
],
|
113 |
+
"source": [
|
114 |
+
"my_handler({\"inputs\": img})"
|
115 |
+
]
|
116 |
+
},
|
117 |
+
{
|
118 |
+
"cell_type": "code",
|
119 |
+
"execution_count": 9,
|
120 |
+
"id": "52580570-51a6-4d73-aab0-ba3bf2af41f2",
|
121 |
+
"metadata": {},
|
122 |
+
"outputs": [
|
123 |
+
{
|
124 |
+
"data": {
|
125 |
+
"text/plain": [
|
126 |
+
"'C:\\\\Users\\\\ArturSzczotarski\\\\LLM env\\\\du_env\\\\documentsImages\\\\test\\\\2.png'"
|
127 |
+
]
|
128 |
+
},
|
129 |
+
"execution_count": 9,
|
130 |
+
"metadata": {},
|
131 |
+
"output_type": "execute_result"
|
132 |
+
}
|
133 |
+
],
|
134 |
+
"source": [
|
135 |
+
"img.filename"
|
136 |
+
]
|
137 |
+
},
|
138 |
+
{
|
139 |
+
"cell_type": "code",
|
140 |
+
"execution_count": 8,
|
141 |
+
"id": "04c995ad-634b-4057-92ad-25d329371911",
|
142 |
+
"metadata": {},
|
143 |
+
"outputs": [
|
144 |
+
{
|
145 |
+
"ename": "TypeError",
|
146 |
+
"evalue": "object of type 'PngImageFile' has no len()",
|
147 |
+
"output_type": "error",
|
148 |
+
"traceback": [
|
149 |
+
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
150 |
+
"\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
|
151 |
+
"Cell \u001b[1;32mIn[8], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdata\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DataLoader\n\u001b[1;32m----> 3\u001b[0m dataloader \u001b[38;5;241m=\u001b[39m \u001b[43mDataLoader\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimg\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mshuffle\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n",
|
152 |
+
"File \u001b[1;32m~\\LLM env\\du_env\\Lib\\site-packages\\torch\\utils\\data\\dataloader.py:350\u001b[0m, in \u001b[0;36mDataLoader.__init__\u001b[1;34m(self, dataset, batch_size, shuffle, sampler, batch_sampler, num_workers, collate_fn, pin_memory, drop_last, timeout, worker_init_fn, multiprocessing_context, generator, prefetch_factor, persistent_workers, pin_memory_device)\u001b[0m\n\u001b[0;32m 348\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m: \u001b[38;5;66;03m# map-style\u001b[39;00m\n\u001b[0;32m 349\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m shuffle:\n\u001b[1;32m--> 350\u001b[0m sampler \u001b[38;5;241m=\u001b[39m \u001b[43mRandomSampler\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdataset\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgenerator\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgenerator\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# type: ignore[arg-type]\u001b[39;00m\n\u001b[0;32m 351\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 352\u001b[0m sampler \u001b[38;5;241m=\u001b[39m SequentialSampler(dataset) \u001b[38;5;66;03m# type: ignore[arg-type]\u001b[39;00m\n",
|
153 |
+
"File \u001b[1;32m~\\LLM env\\du_env\\Lib\\site-packages\\torch\\utils\\data\\sampler.py:142\u001b[0m, in \u001b[0;36mRandomSampler.__init__\u001b[1;34m(self, data_source, replacement, num_samples, generator)\u001b[0m\n\u001b[0;32m 139\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreplacement, \u001b[38;5;28mbool\u001b[39m):\n\u001b[0;32m 140\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mreplacement should be a boolean value, but got replacement=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreplacement\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m--> 142\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnum_samples\u001b[49m, \u001b[38;5;28mint\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnum_samples \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m 143\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnum_samples should be a positive integer value, but got num_samples=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnum_samples\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
|
154 |
+
"File \u001b[1;32m~\\LLM env\\du_env\\Lib\\site-packages\\torch\\utils\\data\\sampler.py:149\u001b[0m, in \u001b[0;36mRandomSampler.num_samples\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 145\u001b[0m \u001b[38;5;129m@property\u001b[39m\n\u001b[0;32m 146\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mnum_samples\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mint\u001b[39m:\n\u001b[0;32m 147\u001b[0m \u001b[38;5;66;03m# dataset size might change at runtime\u001b[39;00m\n\u001b[0;32m 148\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_num_samples \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m--> 149\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdata_source\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 150\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_num_samples\n",
|
155 |
+
"\u001b[1;31mTypeError\u001b[0m: object of type 'PngImageFile' has no len()"
|
156 |
+
]
|
157 |
+
}
|
158 |
+
],
|
159 |
+
"source": [
|
160 |
+
"from torch.utils.data import DataLoader\n",
|
161 |
+
"\n",
|
162 |
+
"dataloader = DataLoader(img, batch_size=1, shuffle=True)"
|
163 |
+
]
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"cell_type": "code",
|
167 |
+
"execution_count": null,
|
168 |
+
"id": "e604a2bd-a068-46bb-82d8-4fba7fc6212b",
|
169 |
+
"metadata": {},
|
170 |
+
"outputs": [],
|
171 |
+
"source": []
|
172 |
+
}
|
173 |
+
],
|
174 |
+
"metadata": {
|
175 |
+
"kernelspec": {
|
176 |
+
"display_name": "du_env",
|
177 |
+
"language": "python",
|
178 |
+
"name": "du_env"
|
179 |
+
},
|
180 |
+
"language_info": {
|
181 |
+
"codemirror_mode": {
|
182 |
+
"name": "ipython",
|
183 |
+
"version": 3
|
184 |
+
},
|
185 |
+
"file_extension": ".py",
|
186 |
+
"mimetype": "text/x-python",
|
187 |
+
"name": "python",
|
188 |
+
"nbconvert_exporter": "python",
|
189 |
+
"pygments_lexer": "ipython3",
|
190 |
+
"version": "3.11.7"
|
191 |
+
}
|
192 |
+
},
|
193 |
+
"nbformat": 4,
|
194 |
+
"nbformat_minor": 5
|
195 |
+
}
|
.ipynb_checkpoints/handler-checkpoint.py
ADDED
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Dict, List, Any
|
2 |
+
from transformers import LayoutLMForTokenClassification, LayoutLMv2Processor
|
3 |
+
import torch
|
4 |
+
from subprocess import run
|
5 |
+
|
6 |
+
# install tesseract-ocr and pytesseract
|
7 |
+
# run("apt install -y tesseract-ocr", shell=True, check=True)
|
8 |
+
run("pip install pytesseract", shell=True, check=True)
|
9 |
+
|
10 |
+
# helper function to unnormalize bboxes for drawing onto the image
|
11 |
+
def unnormalize_box(bbox, width, height):
|
12 |
+
return [
|
13 |
+
width * (bbox[0] / 1000),
|
14 |
+
height * (bbox[1] / 1000),
|
15 |
+
width * (bbox[2] / 1000),
|
16 |
+
height * (bbox[3] / 1000),
|
17 |
+
]
|
18 |
+
|
19 |
+
def predict(Image, processor, model):
|
20 |
+
"""Process document and prepare the data for LayoutLM inference
|
21 |
+
|
22 |
+
Args:
|
23 |
+
urls (List[str]): Batch of pre-signed document urls
|
24 |
+
Returns:
|
25 |
+
(List[List[Dict]]): Features extraction
|
26 |
+
"""
|
27 |
+
|
28 |
+
|
29 |
+
# images = [get_image_from_url(url) for url in urls]
|
30 |
+
encoding = processor(
|
31 |
+
images = Image,
|
32 |
+
return_tensors="pt",
|
33 |
+
padding="max_length",
|
34 |
+
truncation=True,
|
35 |
+
)
|
36 |
+
del encoding["image"] # LayoutLM doesn't require the image
|
37 |
+
outputs = model(**encoding)
|
38 |
+
results = process_outputs(
|
39 |
+
outputs, encoding=encoding,
|
40 |
+
images=Image, model=model,
|
41 |
+
processor=processor,
|
42 |
+
threshold = 0.75
|
43 |
+
)
|
44 |
+
return results, encoding
|
45 |
+
def get_uniqueLabelList(labels):
|
46 |
+
uqnieue_labels =[]
|
47 |
+
for label in labels[0]:
|
48 |
+
try:
|
49 |
+
label_short = label.split("-")[1]
|
50 |
+
if label_short not in uqnieue_labels:
|
51 |
+
uqnieue_labels.append(label_short)
|
52 |
+
except:
|
53 |
+
if label not in uqnieue_labels:
|
54 |
+
uqnieue_labels.append(label)
|
55 |
+
else:
|
56 |
+
pass
|
57 |
+
return uqnieue_labels
|
58 |
+
|
59 |
+
def process_outputs(outputs, encoding, images, model, processor, threshold):
|
60 |
+
scores, _ = torch.max(outputs.logits.softmax(axis=-1), dim=-1)
|
61 |
+
scores = scores.tolist()
|
62 |
+
predictions = outputs.logits.argmax(-1)
|
63 |
+
labels = [[model.config.id2label[pred.item()] for pred in prediction] for prediction in predictions]
|
64 |
+
results = _process_outputs(
|
65 |
+
encoding=encoding,
|
66 |
+
tokenizer=processor.tokenizer,
|
67 |
+
processor = processor,
|
68 |
+
labels=labels,
|
69 |
+
scores=scores,
|
70 |
+
images=images,
|
71 |
+
threshold = threshold
|
72 |
+
)
|
73 |
+
return results
|
74 |
+
|
75 |
+
def _process_outputs(encoding, tokenizer, labels, scores, images, processor, threshold):
|
76 |
+
results = []
|
77 |
+
|
78 |
+
width, height = images.size
|
79 |
+
entities = []
|
80 |
+
previous_word_idx = 0
|
81 |
+
unique_lables = get_uniqueLabelList(labels)
|
82 |
+
# tokens = tokenizer.convert_ids_to_tokens(input_ids)
|
83 |
+
# word_ids = encoding.word_ids(batch_index=batch_idx)
|
84 |
+
# word = ""
|
85 |
+
entite_wordsidx = []
|
86 |
+
for idx, label in enumerate(unique_lables):
|
87 |
+
score_sum = float(0)
|
88 |
+
if label != "O":
|
89 |
+
for ix, pred in enumerate(labels[0]):
|
90 |
+
if scores[0][ix] > threshold:
|
91 |
+
if label in pred:
|
92 |
+
score_sum += scores[0][ix]
|
93 |
+
entite_wordsidx.append(ix)
|
94 |
+
#
|
95 |
+
|
96 |
+
try:
|
97 |
+
score_mean = f'{score_sum/len(entite_wordsidx):.2f}'
|
98 |
+
except:
|
99 |
+
score_mean = 0.0
|
100 |
+
# entite_wordsidx.append(entite_wordsidx[-1] + 1)
|
101 |
+
entities.append(
|
102 |
+
{
|
103 |
+
"word": processor.decode(encoding.input_ids[0][entite_wordsidx]),
|
104 |
+
"label": unique_lables[idx],
|
105 |
+
"score": score_mean
|
106 |
+
,
|
107 |
+
}
|
108 |
+
)
|
109 |
+
|
110 |
+
entite_wordsidx = []
|
111 |
+
|
112 |
+
|
113 |
+
results.append(entities)
|
114 |
+
return results
|
115 |
+
|
116 |
+
def unnormalize_box(bbox, width, height):
|
117 |
+
return [
|
118 |
+
int(width * (bbox[0] / 1000)),
|
119 |
+
int(height * (bbox[1] / 1000)),
|
120 |
+
int(width * (bbox[2] / 1000)),
|
121 |
+
int(height * (bbox[3] / 1000)),
|
122 |
+
]
|
123 |
+
def get_image_from_url(Image):
|
124 |
+
return Image.open(f).convert("RGB") # LayoutLMv2Processor requires RGB format
|
125 |
+
# set device
|
126 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
127 |
+
|
128 |
+
|
129 |
+
class EndpointHandler:
|
130 |
+
def __init__(self, path=""):
|
131 |
+
# load model and processor from path
|
132 |
+
self.model = LayoutLMForTokenClassification.from_pretrained(path).to(device)
|
133 |
+
self.processor = LayoutLMv2Processor.from_pretrained(path)
|
134 |
+
|
135 |
+
def __call__(self, data: Dict[str, bytes]) -> Dict[str, List[Any]]:
|
136 |
+
"""
|
137 |
+
Args:
|
138 |
+
data (:obj:):
|
139 |
+
includes the deserialized image file as PIL.Image
|
140 |
+
"""
|
141 |
+
# process input
|
142 |
+
image = data.pop("inputs", data)
|
143 |
+
print(image.filename)
|
144 |
+
|
145 |
+
result, encod = predict(image, self.processor, self.model)
|
146 |
+
return {"predictions": result}
|
Untitled.ipynb
ADDED
@@ -0,0 +1,195 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"id": "20356e27-98f6-4a19-b0ec-d1d2e92029f1",
|
7 |
+
"metadata": {},
|
8 |
+
"outputs": [
|
9 |
+
{
|
10 |
+
"name": "stderr",
|
11 |
+
"output_type": "stream",
|
12 |
+
"text": [
|
13 |
+
"C:\\Users\\ArturSzczotarski\\LLM env\\du_env\\Lib\\site-packages\\huggingface_hub\\file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
|
14 |
+
" warnings.warn(\n",
|
15 |
+
"C:\\Users\\ArturSzczotarski\\LLM env\\du_env\\Lib\\site-packages\\huggingface_hub\\file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
|
16 |
+
" warnings.warn(\n"
|
17 |
+
]
|
18 |
+
}
|
19 |
+
],
|
20 |
+
"source": [
|
21 |
+
"from handler import EndpointHandler\n",
|
22 |
+
" \n",
|
23 |
+
"# init handler\n",
|
24 |
+
"my_handler = EndpointHandler(path=\"Szczotar93/Layoutlm_Inkaso_2\")"
|
25 |
+
]
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"cell_type": "code",
|
29 |
+
"execution_count": 5,
|
30 |
+
"id": "63a53fa9-c2ae-425c-9a8a-ec2415753630",
|
31 |
+
"metadata": {},
|
32 |
+
"outputs": [],
|
33 |
+
"source": [
|
34 |
+
"from transformers import LayoutLMForTokenClassification, LayoutLMv2Processor, LayoutLMTokenizer, AutoModelForTokenClassification, AutoProcessor\n",
|
35 |
+
"\n",
|
36 |
+
"from PIL import Image, ImageDraw, ImageFont\n",
|
37 |
+
"import torch\n",
|
38 |
+
"import pandas as pd\n",
|
39 |
+
"import pytesseract\n",
|
40 |
+
"\n",
|
41 |
+
"pytesseract.pytesseract.tesseract_cmd = r'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'"
|
42 |
+
]
|
43 |
+
},
|
44 |
+
{
|
45 |
+
"cell_type": "code",
|
46 |
+
"execution_count": 2,
|
47 |
+
"id": "684076e3-9ec2-4c99-af1e-6860a3e355e9",
|
48 |
+
"metadata": {},
|
49 |
+
"outputs": [],
|
50 |
+
"source": [
|
51 |
+
"from PIL import Image\n",
|
52 |
+
"filename = r\"C:\\Users\\ArturSzczotarski\\LLM env\\du_env\\documentsImages\\test\\2.png\""
|
53 |
+
]
|
54 |
+
},
|
55 |
+
{
|
56 |
+
"cell_type": "code",
|
57 |
+
"execution_count": 3,
|
58 |
+
"id": "660f4096-9122-41fc-b38a-fd5299a16df5",
|
59 |
+
"metadata": {},
|
60 |
+
"outputs": [],
|
61 |
+
"source": [
|
62 |
+
"img = Image.open(filename)"
|
63 |
+
]
|
64 |
+
},
|
65 |
+
{
|
66 |
+
"cell_type": "code",
|
67 |
+
"execution_count": 4,
|
68 |
+
"id": "04ee0572-093c-4e30-872c-24216c807e4c",
|
69 |
+
"metadata": {},
|
70 |
+
"outputs": [
|
71 |
+
{
|
72 |
+
"data": {
|
73 |
+
"text/plain": [
|
74 |
+
"'C:\\\\Users\\\\ArturSzczotarski\\\\LLM env\\\\du_env\\\\documentsImages\\\\test\\\\2.png'"
|
75 |
+
]
|
76 |
+
},
|
77 |
+
"execution_count": 4,
|
78 |
+
"metadata": {},
|
79 |
+
"output_type": "execute_result"
|
80 |
+
}
|
81 |
+
],
|
82 |
+
"source": [
|
83 |
+
"img.filename"
|
84 |
+
]
|
85 |
+
},
|
86 |
+
{
|
87 |
+
"cell_type": "code",
|
88 |
+
"execution_count": 6,
|
89 |
+
"id": "096b6ffb-767e-45a2-bf4b-1f6d3f67f3a4",
|
90 |
+
"metadata": {},
|
91 |
+
"outputs": [
|
92 |
+
{
|
93 |
+
"data": {
|
94 |
+
"text/plain": [
|
95 |
+
"{'predictions': [[{'word': 'km 1792 /', 'label': 'doc id', 'score': '0.94'},\n",
|
96 |
+
" {'word': 'wezwanie do dokonywania potraceh ztur',\n",
|
97 |
+
" 'label': 'title',\n",
|
98 |
+
" 'score': '0.98'},\n",
|
99 |
+
" {'word': 'kredyt inkaso s. a', 'label': 'creditor name', 'score': '0.95'},\n",
|
100 |
+
" {'word': '02 - 672 warszawa domaniewska 39',\n",
|
101 |
+
" 'label': 'creditor address',\n",
|
102 |
+
" 'score': '0.97'},\n",
|
103 |
+
" {'word': '##ter mateusz garbula kanaria. -',\n",
|
104 |
+
" 'label': 'creditor proxy',\n",
|
105 |
+
" 'score': '0.92'}]]}"
|
106 |
+
]
|
107 |
+
},
|
108 |
+
"execution_count": 6,
|
109 |
+
"metadata": {},
|
110 |
+
"output_type": "execute_result"
|
111 |
+
}
|
112 |
+
],
|
113 |
+
"source": [
|
114 |
+
"my_handler({\"inputs\": img})"
|
115 |
+
]
|
116 |
+
},
|
117 |
+
{
|
118 |
+
"cell_type": "code",
|
119 |
+
"execution_count": 9,
|
120 |
+
"id": "52580570-51a6-4d73-aab0-ba3bf2af41f2",
|
121 |
+
"metadata": {},
|
122 |
+
"outputs": [
|
123 |
+
{
|
124 |
+
"data": {
|
125 |
+
"text/plain": [
|
126 |
+
"'C:\\\\Users\\\\ArturSzczotarski\\\\LLM env\\\\du_env\\\\documentsImages\\\\test\\\\2.png'"
|
127 |
+
]
|
128 |
+
},
|
129 |
+
"execution_count": 9,
|
130 |
+
"metadata": {},
|
131 |
+
"output_type": "execute_result"
|
132 |
+
}
|
133 |
+
],
|
134 |
+
"source": [
|
135 |
+
"img.filename"
|
136 |
+
]
|
137 |
+
},
|
138 |
+
{
|
139 |
+
"cell_type": "code",
|
140 |
+
"execution_count": 8,
|
141 |
+
"id": "04c995ad-634b-4057-92ad-25d329371911",
|
142 |
+
"metadata": {},
|
143 |
+
"outputs": [
|
144 |
+
{
|
145 |
+
"ename": "TypeError",
|
146 |
+
"evalue": "object of type 'PngImageFile' has no len()",
|
147 |
+
"output_type": "error",
|
148 |
+
"traceback": [
|
149 |
+
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
150 |
+
"\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
|
151 |
+
"Cell \u001b[1;32mIn[8], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdata\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DataLoader\n\u001b[1;32m----> 3\u001b[0m dataloader \u001b[38;5;241m=\u001b[39m \u001b[43mDataLoader\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimg\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mshuffle\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n",
|
152 |
+
"File \u001b[1;32m~\\LLM env\\du_env\\Lib\\site-packages\\torch\\utils\\data\\dataloader.py:350\u001b[0m, in \u001b[0;36mDataLoader.__init__\u001b[1;34m(self, dataset, batch_size, shuffle, sampler, batch_sampler, num_workers, collate_fn, pin_memory, drop_last, timeout, worker_init_fn, multiprocessing_context, generator, prefetch_factor, persistent_workers, pin_memory_device)\u001b[0m\n\u001b[0;32m 348\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m: \u001b[38;5;66;03m# map-style\u001b[39;00m\n\u001b[0;32m 349\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m shuffle:\n\u001b[1;32m--> 350\u001b[0m sampler \u001b[38;5;241m=\u001b[39m \u001b[43mRandomSampler\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdataset\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgenerator\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgenerator\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# type: ignore[arg-type]\u001b[39;00m\n\u001b[0;32m 351\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 352\u001b[0m sampler \u001b[38;5;241m=\u001b[39m SequentialSampler(dataset) \u001b[38;5;66;03m# type: ignore[arg-type]\u001b[39;00m\n",
|
153 |
+
"File \u001b[1;32m~\\LLM env\\du_env\\Lib\\site-packages\\torch\\utils\\data\\sampler.py:142\u001b[0m, in \u001b[0;36mRandomSampler.__init__\u001b[1;34m(self, data_source, replacement, num_samples, generator)\u001b[0m\n\u001b[0;32m 139\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreplacement, \u001b[38;5;28mbool\u001b[39m):\n\u001b[0;32m 140\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mreplacement should be a boolean value, but got replacement=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreplacement\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m--> 142\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnum_samples\u001b[49m, \u001b[38;5;28mint\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnum_samples \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m 143\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnum_samples should be a positive integer value, but got num_samples=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnum_samples\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
|
154 |
+
"File \u001b[1;32m~\\LLM env\\du_env\\Lib\\site-packages\\torch\\utils\\data\\sampler.py:149\u001b[0m, in \u001b[0;36mRandomSampler.num_samples\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 145\u001b[0m \u001b[38;5;129m@property\u001b[39m\n\u001b[0;32m 146\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mnum_samples\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mint\u001b[39m:\n\u001b[0;32m 147\u001b[0m \u001b[38;5;66;03m# dataset size might change at runtime\u001b[39;00m\n\u001b[0;32m 148\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_num_samples \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m--> 149\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdata_source\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 150\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_num_samples\n",
|
155 |
+
"\u001b[1;31mTypeError\u001b[0m: object of type 'PngImageFile' has no len()"
|
156 |
+
]
|
157 |
+
}
|
158 |
+
],
|
159 |
+
"source": [
|
160 |
+
"from torch.utils.data import DataLoader\n",
|
161 |
+
"\n",
|
162 |
+
"dataloader = DataLoader(img, batch_size=1, shuffle=True)"
|
163 |
+
]
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"cell_type": "code",
|
167 |
+
"execution_count": null,
|
168 |
+
"id": "e604a2bd-a068-46bb-82d8-4fba7fc6212b",
|
169 |
+
"metadata": {},
|
170 |
+
"outputs": [],
|
171 |
+
"source": []
|
172 |
+
}
|
173 |
+
],
|
174 |
+
"metadata": {
|
175 |
+
"kernelspec": {
|
176 |
+
"display_name": "du_env",
|
177 |
+
"language": "python",
|
178 |
+
"name": "du_env"
|
179 |
+
},
|
180 |
+
"language_info": {
|
181 |
+
"codemirror_mode": {
|
182 |
+
"name": "ipython",
|
183 |
+
"version": 3
|
184 |
+
},
|
185 |
+
"file_extension": ".py",
|
186 |
+
"mimetype": "text/x-python",
|
187 |
+
"name": "python",
|
188 |
+
"nbconvert_exporter": "python",
|
189 |
+
"pygments_lexer": "ipython3",
|
190 |
+
"version": "3.11.7"
|
191 |
+
}
|
192 |
+
},
|
193 |
+
"nbformat": 4,
|
194 |
+
"nbformat_minor": 5
|
195 |
+
}
|
__pycache__/handler.cpython-311.pyc
ADDED
Binary file (6.83 kB). View file
|
|
handler.py
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Dict, List, Any
|
2 |
+
from transformers import LayoutLMForTokenClassification, LayoutLMv2Processor
|
3 |
+
import torch
|
4 |
+
from subprocess import run
|
5 |
+
|
6 |
+
# install tesseract-ocr and pytesseract
|
7 |
+
run("apt install -y tesseract-ocr", shell=True, check=True)
|
8 |
+
run("pip install pytesseract", shell=True, check=True)
|
9 |
+
|
10 |
+
# helper function to unnormalize bboxes for drawing onto the image
|
11 |
+
def unnormalize_box(bbox, width, height):
|
12 |
+
return [
|
13 |
+
width * (bbox[0] / 1000),
|
14 |
+
height * (bbox[1] / 1000),
|
15 |
+
width * (bbox[2] / 1000),
|
16 |
+
height * (bbox[3] / 1000),
|
17 |
+
]
|
18 |
+
|
19 |
+
def predict(Image, processor, model):
|
20 |
+
"""Process document and prepare the data for LayoutLM inference
|
21 |
+
|
22 |
+
Args:
|
23 |
+
urls (List[str]): Batch of pre-signed document urls
|
24 |
+
Returns:
|
25 |
+
(List[List[Dict]]): Features extraction
|
26 |
+
"""
|
27 |
+
|
28 |
+
|
29 |
+
# images = [get_image_from_url(url) for url in urls]
|
30 |
+
encoding = processor(
|
31 |
+
images = Image,
|
32 |
+
return_tensors="pt",
|
33 |
+
padding="max_length",
|
34 |
+
truncation=True,
|
35 |
+
)
|
36 |
+
del encoding["image"] # LayoutLM doesn't require the image
|
37 |
+
outputs = model(**encoding)
|
38 |
+
results = process_outputs(
|
39 |
+
outputs, encoding=encoding,
|
40 |
+
images=Image, model=model,
|
41 |
+
processor=processor,
|
42 |
+
threshold = 0.75
|
43 |
+
)
|
44 |
+
return results, encoding
|
45 |
+
def get_uniqueLabelList(labels):
|
46 |
+
uqnieue_labels =[]
|
47 |
+
for label in labels[0]:
|
48 |
+
try:
|
49 |
+
label_short = label.split("-")[1]
|
50 |
+
if label_short not in uqnieue_labels:
|
51 |
+
uqnieue_labels.append(label_short)
|
52 |
+
except:
|
53 |
+
if label not in uqnieue_labels:
|
54 |
+
uqnieue_labels.append(label)
|
55 |
+
else:
|
56 |
+
pass
|
57 |
+
return uqnieue_labels
|
58 |
+
|
59 |
+
def process_outputs(outputs, encoding, images, model, processor, threshold):
|
60 |
+
scores, _ = torch.max(outputs.logits.softmax(axis=-1), dim=-1)
|
61 |
+
scores = scores.tolist()
|
62 |
+
predictions = outputs.logits.argmax(-1)
|
63 |
+
labels = [[model.config.id2label[pred.item()] for pred in prediction] for prediction in predictions]
|
64 |
+
results = _process_outputs(
|
65 |
+
encoding=encoding,
|
66 |
+
tokenizer=processor.tokenizer,
|
67 |
+
processor = processor,
|
68 |
+
labels=labels,
|
69 |
+
scores=scores,
|
70 |
+
images=images,
|
71 |
+
threshold = threshold
|
72 |
+
)
|
73 |
+
return results
|
74 |
+
|
75 |
+
def _process_outputs(encoding, tokenizer, labels, scores, images, processor, threshold):
|
76 |
+
results = []
|
77 |
+
|
78 |
+
width, height = images.size
|
79 |
+
entities = []
|
80 |
+
previous_word_idx = 0
|
81 |
+
unique_lables = get_uniqueLabelList(labels)
|
82 |
+
# tokens = tokenizer.convert_ids_to_tokens(input_ids)
|
83 |
+
# word_ids = encoding.word_ids(batch_index=batch_idx)
|
84 |
+
# word = ""
|
85 |
+
entite_wordsidx = []
|
86 |
+
for idx, label in enumerate(unique_lables):
|
87 |
+
score_sum = float(0)
|
88 |
+
if label != "O":
|
89 |
+
for ix, pred in enumerate(labels[0]):
|
90 |
+
if scores[0][ix] > threshold:
|
91 |
+
if label in pred:
|
92 |
+
score_sum += scores[0][ix]
|
93 |
+
entite_wordsidx.append(ix)
|
94 |
+
#
|
95 |
+
|
96 |
+
try:
|
97 |
+
score_mean = f'{score_sum/len(entite_wordsidx):.2f}'
|
98 |
+
except:
|
99 |
+
score_mean = 0.0
|
100 |
+
# entite_wordsidx.append(entite_wordsidx[-1] + 1)
|
101 |
+
entities.append(
|
102 |
+
{
|
103 |
+
"word": processor.decode(encoding.input_ids[0][entite_wordsidx]),
|
104 |
+
"label": unique_lables[idx],
|
105 |
+
"score": score_mean
|
106 |
+
,
|
107 |
+
}
|
108 |
+
)
|
109 |
+
|
110 |
+
entite_wordsidx = []
|
111 |
+
|
112 |
+
|
113 |
+
results.append(entities)
|
114 |
+
return results
|
115 |
+
|
116 |
+
def unnormalize_box(bbox, width, height):
|
117 |
+
return [
|
118 |
+
int(width * (bbox[0] / 1000)),
|
119 |
+
int(height * (bbox[1] / 1000)),
|
120 |
+
int(width * (bbox[2] / 1000)),
|
121 |
+
int(height * (bbox[3] / 1000)),
|
122 |
+
]
|
123 |
+
def get_image_from_url(Image):
|
124 |
+
return Image.open(f).convert("RGB") # LayoutLMv2Processor requires RGB format
|
125 |
+
# set device
|
126 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
127 |
+
|
128 |
+
|
129 |
+
class EndpointHandler:
|
130 |
+
def __init__(self, path=""):
|
131 |
+
# load model and processor from path
|
132 |
+
self.model = LayoutLMForTokenClassification.from_pretrained(path).to(device)
|
133 |
+
self.processor = LayoutLMv2Processor.from_pretrained(path, apply_ocr=True)
|
134 |
+
|
135 |
+
def __call__(self, data: Dict[str, bytes]) -> Dict[str, List[Any]]:
|
136 |
+
"""
|
137 |
+
Args:
|
138 |
+
data (:obj:):
|
139 |
+
includes the deserialized image file as PIL.Image
|
140 |
+
"""
|
141 |
+
# process input
|
142 |
+
image = data.pop("inputs", data)
|
143 |
+
|
144 |
+
result, encod = predict(image, self.processor, self.model)
|
145 |
+
return {"predictions": result}
|