Spaces:
Runtime error
Runtime error
update evaluation package with evaluation and ocr packages and completed _compute logic.
Browse files- cer.py +0 -2
- evaluation/iou.py +370 -0
- evaluation/metrics.py +589 -0
- iliauniiccocrevaluation.py +72 -28
- ocr/fiftyone.py +26 -0
cer.py
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
def calculate_cer(*args):
|
2 |
-
return -1
|
|
|
|
|
|
evaluation/iou.py
ADDED
@@ -0,0 +1,370 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# https://www.pyimagesearch.com/2016/11/07/intersection-over-union-iou-for-object-detection/
|
2 |
+
|
3 |
+
import numpy as np
|
4 |
+
import pandas as pd
|
5 |
+
from scipy.sparse import csr_matrix
|
6 |
+
from scipy.sparse.csgraph import connected_components
|
7 |
+
|
8 |
+
|
9 |
+
def bb_intersection_over_union(boxA, boxB):
|
10 |
+
EPS = 1e-5
|
11 |
+
# determine the (x, y)-coordinates of the intersection rectangle
|
12 |
+
xA = max(boxA[0], boxB[0])
|
13 |
+
yA = max(boxA[1], boxB[1])
|
14 |
+
xB = min(boxA[2], boxB[2])
|
15 |
+
yB = min(boxA[3], boxB[3])
|
16 |
+
# compute the area of intersection rectangle
|
17 |
+
interArea = max(0, xB - xA + EPS) * max(0, yB - yA + EPS)
|
18 |
+
# compute the area of both the prediction and ground-truth
|
19 |
+
# rectangles
|
20 |
+
boxAArea = (boxA[2] - boxA[0] + EPS) * (boxA[3] - boxA[1] + EPS)
|
21 |
+
boxBArea = (boxB[2] - boxB[0] + EPS) * (boxB[3] - boxB[1] + EPS)
|
22 |
+
# compute the intersection over union by taking the intersection
|
23 |
+
# area and dividing it by the sum of prediction + ground-truth
|
24 |
+
# areas - the interesection area
|
25 |
+
iou = interArea / float(boxAArea + boxBArea - interArea)
|
26 |
+
# return the intersection over union value
|
27 |
+
return iou
|
28 |
+
|
29 |
+
|
30 |
+
def bb_intersection_over_union_vectorized(bboxes1, bboxes2):
|
31 |
+
low = np.s_[..., :2]
|
32 |
+
high = np.s_[..., 2:]
|
33 |
+
|
34 |
+
EPS = 1e-5
|
35 |
+
|
36 |
+
A, B = bboxes1.copy(), bboxes2.copy()
|
37 |
+
A = np.tile(A, (1, len(bboxes2))).reshape(len(bboxes1) * len(bboxes2), -1)
|
38 |
+
B = np.tile(B, (len(bboxes1), 1))
|
39 |
+
|
40 |
+
A[high] += EPS
|
41 |
+
B[high] += EPS
|
42 |
+
|
43 |
+
intrs = (
|
44 |
+
np.maximum(
|
45 |
+
0.0,
|
46 |
+
np.minimum(
|
47 |
+
A[high],
|
48 |
+
B[high]
|
49 |
+
)
|
50 |
+
-
|
51 |
+
np.maximum(
|
52 |
+
A[low],
|
53 |
+
B[low]
|
54 |
+
)
|
55 |
+
)
|
56 |
+
).prod(-1)
|
57 |
+
|
58 |
+
ious = intrs / ((A[high] - A[low]).prod(-1) + (B[high] - B[low]).prod(-1) - intrs)
|
59 |
+
|
60 |
+
return ious.reshape(len(bboxes1), len(bboxes2))
|
61 |
+
|
62 |
+
|
63 |
+
def bb_is_on_same_line_vectorized(bboxes1, bboxes2):
|
64 |
+
low = np.s_[..., 1]
|
65 |
+
high = np.s_[..., 3]
|
66 |
+
|
67 |
+
A, B = bboxes1.copy(), bboxes2.copy()
|
68 |
+
A = np.tile(A, (1, len(bboxes2))).reshape(len(bboxes1) * len(bboxes2), -1)
|
69 |
+
B = np.tile(B, (len(bboxes1), 1))
|
70 |
+
|
71 |
+
is_on_same_line = np.bitwise_and(
|
72 |
+
np.bitwise_and(A[low] <= (B[low] + B[high]) / 2, (B[low] + B[high]) / 2 <= A[high]),
|
73 |
+
np.bitwise_and(B[low] <= (A[low] + A[high]) / 2, (A[low] + A[high]) / 2 <= B[high]),
|
74 |
+
)
|
75 |
+
|
76 |
+
return is_on_same_line.reshape(len(bboxes1), len(bboxes2))
|
77 |
+
|
78 |
+
|
79 |
+
def iou(ocr1, ocr2):
|
80 |
+
return bb_intersection_over_union(
|
81 |
+
(ocr1['x1'], ocr1['y1'], ocr1['x2'], ocr1['y2']),
|
82 |
+
(ocr2['x1'], ocr2['y1'], ocr2['x2'], ocr2['y2'])
|
83 |
+
)
|
84 |
+
|
85 |
+
|
86 |
+
def _generate_empty_row(example_row, index):
|
87 |
+
"""This will generate empty row with empty values but it also generates tiny but valid bounding box
|
88 |
+
to avoid exceptions while cropping the image"""
|
89 |
+
|
90 |
+
example_row_dict = example_row.to_dict()
|
91 |
+
example_row_dict['page'] = example_row_dict.get('page', 0)
|
92 |
+
example_row_dict['block'] = 0
|
93 |
+
example_row_dict['paragraph'] = 0
|
94 |
+
example_row_dict['word'] = 0
|
95 |
+
example_row_dict['x1'] = 0
|
96 |
+
example_row_dict['y1'] = 0
|
97 |
+
example_row_dict['x2'] = 1
|
98 |
+
example_row_dict['y2'] = 1
|
99 |
+
example_row_dict['conf'] = 0.0
|
100 |
+
example_row_dict['text'] = ""
|
101 |
+
|
102 |
+
empty_row = pd.DataFrame([example_row_dict], columns=example_row.index, index=[index])
|
103 |
+
|
104 |
+
return empty_row
|
105 |
+
|
106 |
+
|
107 |
+
def word_or_symbol_pair_matching(df1, df2, pref1, pref2):
|
108 |
+
"""Applies IOU based matching of words or symbol elements using rectangular bounding boxes (x1,y1,x2,y2).
|
109 |
+
It sorts makes sure that matching between first and second set is unique which means that it's not allowed to have
|
110 |
+
one item in two different pairs. If pair isn't found then empty element is used as a pair. This way it's guaranteed
|
111 |
+
that word or symbol level matching is correctly evaluated. Pairs are generated in decreasing order of IOU values.
|
112 |
+
"""
|
113 |
+
# match word pairs by page
|
114 |
+
text_pairs_dfs_per_page = []
|
115 |
+
unique_page_ids = sorted(list(set(df1['page'].unique().tolist() + df2['page'].unique().tolist())))
|
116 |
+
|
117 |
+
for page_id in unique_page_ids:
|
118 |
+
# extract words for given page only
|
119 |
+
df1_page = df1[df1.page == page_id]
|
120 |
+
df2_page = df2[df2.page == page_id]
|
121 |
+
|
122 |
+
if not df1_page.empty and not df1_page.empty:
|
123 |
+
|
124 |
+
# calculate similarities
|
125 |
+
similarity_metrics = calculate_ious_fast(ocr1_df=df1_page, ocr2_df=df2_page)
|
126 |
+
similarities = []
|
127 |
+
for idx1, index1 in enumerate(df1_page.index):
|
128 |
+
for idx2, index2 in enumerate(df2_page.index):
|
129 |
+
similarities.append((index1, index2, similarity_metrics[idx1, idx2]))
|
130 |
+
|
131 |
+
# process pair similarities in decreasing order of similarity values
|
132 |
+
sorted_similarities = sorted(similarities, key=lambda x: -x[2])
|
133 |
+
paired_items_1 = set()
|
134 |
+
paired_items_2 = set()
|
135 |
+
pairs = []
|
136 |
+
for idx1, idx2, similarity in sorted_similarities:
|
137 |
+
if idx1 not in paired_items_1 and idx2 not in paired_items_2:
|
138 |
+
if similarity > 0.0:
|
139 |
+
paired_items_1.add(idx1)
|
140 |
+
paired_items_2.add(idx2)
|
141 |
+
pairs.append((idx1, idx2, similarity))
|
142 |
+
|
143 |
+
# add items as empty pairs which weren't matched but index is considered across all pages to avoid collisions
|
144 |
+
EMPTY_ITEM_INDEX = max(df1.shape[0], df2.shape[0]) + 100 + page_id
|
145 |
+
for idx1, row1 in df1_page.iterrows():
|
146 |
+
if idx1 not in paired_items_1:
|
147 |
+
pairs.append((idx1, EMPTY_ITEM_INDEX, 0.0))
|
148 |
+
for idx2, row2 in df2_page.iterrows():
|
149 |
+
if idx2 not in paired_items_2:
|
150 |
+
pairs.append((EMPTY_ITEM_INDEX, idx2, 0.0))
|
151 |
+
|
152 |
+
# sort pairs according to df2 items original indices
|
153 |
+
sorted_pairs = sorted(pairs, key=lambda x: (x[1], x[0]))
|
154 |
+
|
155 |
+
# create row for empty items in each dataframe
|
156 |
+
df1_page = pd.concat([df1_page, _generate_empty_row(example_row=df1_page.iloc[0], index=EMPTY_ITEM_INDEX)])
|
157 |
+
df2_page = pd.concat([df2_page, _generate_empty_row(example_row=df2_page.iloc[0], index=EMPTY_ITEM_INDEX)])
|
158 |
+
|
159 |
+
# generate pairs dataset
|
160 |
+
text_pairs_df = pd.concat(
|
161 |
+
[
|
162 |
+
df1_page.loc[[item[0] for item in sorted_pairs], :].reset_index(drop=True).add_prefix(pref1),
|
163 |
+
df2_page.loc[[item[1] for item in sorted_pairs], :].reset_index(drop=True).add_prefix(pref2),
|
164 |
+
pd.DataFrame(
|
165 |
+
data=[item[2] for item in sorted_pairs],
|
166 |
+
columns=["iou"]
|
167 |
+
)
|
168 |
+
],
|
169 |
+
axis=1
|
170 |
+
)
|
171 |
+
|
172 |
+
text_pairs_dfs_per_page.append(text_pairs_df)
|
173 |
+
|
174 |
+
all_text_pairs_df = pd.concat(text_pairs_dfs_per_page, axis=0)
|
175 |
+
|
176 |
+
return all_text_pairs_df
|
177 |
+
|
178 |
+
|
179 |
+
def word_or_symbol_group_pair_matching(df1, df2, pref1, pref2):
|
180 |
+
"""Applies IOU based matching of words or symbol elements groups using rectangular bounding boxes (x1,y1,x2,y2).
|
181 |
+
It sorts makes sure that matching between first and second set is unique which means that it's not allowed to have
|
182 |
+
one item in two different pairs. If pair isn't found then empty element is used as a pair. BUT the difference from
|
183 |
+
non-group approach is that here it's possible to match group of words or symbols on each other. This way it's
|
184 |
+
more guaranteed that OCR detected result is evaluated correctly.
|
185 |
+
|
186 |
+
Example:
|
187 |
+
Let's say we have 2 words: ["abc", "d"] and target has only one word: ["abcd"] then it's better to group first
|
188 |
+
two words and match them with the one target word. This way we try to evaluate the overall text detection
|
189 |
+
accuracy and not the actual symbol or word boundary detection.
|
190 |
+
|
191 |
+
Note: the grouping operation will happen on one line to avoid unpredictable results if word bounding boxes on
|
192 |
+
neighboring lines has some intersection.
|
193 |
+
"""
|
194 |
+
# match word pairs by page
|
195 |
+
text_pairs_dfs_per_page = []
|
196 |
+
unique_page_ids = sorted(list(set(df1['page'].unique().tolist() + df2['page'].unique().tolist())))
|
197 |
+
|
198 |
+
for page_id in unique_page_ids:
|
199 |
+
# extract words for given page only
|
200 |
+
df1_page = df1[df1.page == page_id]
|
201 |
+
df2_page = df2[df2.page == page_id]
|
202 |
+
|
203 |
+
if not df1_page.empty and not df1_page.empty:
|
204 |
+
df1_page_groups, df2_page_groups = get_connected_components(ocr1_df=df1_page, ocr2_df=df2_page)
|
205 |
+
|
206 |
+
# calculate similarities
|
207 |
+
similarity_metrics = calculate_ious_fast(ocr1_df=df1_page_groups, ocr2_df=df2_page_groups)
|
208 |
+
similarities = []
|
209 |
+
for idx1, index1 in enumerate(df1_page_groups.index):
|
210 |
+
for idx2, index2 in enumerate(df2_page_groups.index):
|
211 |
+
similarities.append((index1, index2, similarity_metrics[idx1, idx2]))
|
212 |
+
|
213 |
+
# process pair similarities in decreasing order of similarity values
|
214 |
+
sorted_similarities = sorted(similarities, key=lambda x: -x[2])
|
215 |
+
paired_items_1 = set()
|
216 |
+
paired_items_2 = set()
|
217 |
+
pairs = []
|
218 |
+
for idx1, idx2, similarity in sorted_similarities:
|
219 |
+
if idx1 not in paired_items_1 and idx2 not in paired_items_2:
|
220 |
+
if similarity > 0.0:
|
221 |
+
paired_items_1.add(idx1)
|
222 |
+
paired_items_2.add(idx2)
|
223 |
+
pairs.append((idx1, idx2, similarity))
|
224 |
+
|
225 |
+
# add items as empty pairs which weren't matched but index is considered across all pages to avoid collisions
|
226 |
+
EMPTY_ITEM_INDEX = max(df1.shape[0], df2.shape[0]) + 100 + page_id
|
227 |
+
for idx1, row1 in df1_page_groups.iterrows():
|
228 |
+
if idx1 not in paired_items_1:
|
229 |
+
pairs.append((idx1, EMPTY_ITEM_INDEX, 0.0))
|
230 |
+
for idx2, row2 in df2_page_groups.iterrows():
|
231 |
+
if idx2 not in paired_items_2:
|
232 |
+
pairs.append((EMPTY_ITEM_INDEX, idx2, 0.0))
|
233 |
+
|
234 |
+
# sort pairs according to df2 items original indices
|
235 |
+
sorted_pairs = sorted(pairs, key=lambda x: (x[1], x[0]))
|
236 |
+
|
237 |
+
# create row for empty items in each dataframe
|
238 |
+
df1_page_groups = pd.concat(
|
239 |
+
[df1_page_groups, _generate_empty_row(example_row=df1_page_groups.iloc[0], index=EMPTY_ITEM_INDEX)])
|
240 |
+
df2_page_groups = pd.concat(
|
241 |
+
[df2_page_groups, _generate_empty_row(example_row=df2_page_groups.iloc[0], index=EMPTY_ITEM_INDEX)])
|
242 |
+
|
243 |
+
# generate pairs dataset
|
244 |
+
text_pairs_df = pd.concat(
|
245 |
+
[
|
246 |
+
df1_page_groups.loc[[item[0] for item in sorted_pairs], :].reset_index(drop=True).add_prefix(pref1),
|
247 |
+
df2_page_groups.loc[[item[1] for item in sorted_pairs], :].reset_index(drop=True).add_prefix(pref2),
|
248 |
+
pd.DataFrame(
|
249 |
+
data=[item[2] for item in sorted_pairs],
|
250 |
+
columns=["iou"]
|
251 |
+
)
|
252 |
+
],
|
253 |
+
axis=1
|
254 |
+
)
|
255 |
+
|
256 |
+
text_pairs_dfs_per_page.append(text_pairs_df)
|
257 |
+
|
258 |
+
all_text_pairs_df = pd.concat(text_pairs_dfs_per_page, axis=0)
|
259 |
+
|
260 |
+
return all_text_pairs_df
|
261 |
+
|
262 |
+
def calculate_ious_fast(ocr1_df, ocr2_df):
|
263 |
+
ious = None
|
264 |
+
if not ocr1_df.empty and not ocr2_df.empty:
|
265 |
+
bboxes1 = np.array(ocr1_df["bounding_box"].values.tolist())
|
266 |
+
bboxes2 = np.array(ocr2_df["bounding_box"].values.tolist())
|
267 |
+
|
268 |
+
if len(bboxes1) > 0 and len(bboxes2) > 0:
|
269 |
+
ious = bb_intersection_over_union_vectorized(bboxes1=bboxes1, bboxes2=bboxes2)
|
270 |
+
|
271 |
+
return ious
|
272 |
+
|
273 |
+
|
274 |
+
def calculate_iosl_fast(ocr1_df, ocr2_df):
|
275 |
+
iosls = None
|
276 |
+
if not ocr1_df.empty and not ocr2_df.empty:
|
277 |
+
bboxes1 = np.array(ocr1_df["bounding_box"].values.tolist())
|
278 |
+
bboxes2 = np.array(ocr2_df["bounding_box"].values.tolist())
|
279 |
+
|
280 |
+
if len(bboxes1) > 0 and len(bboxes2) > 0:
|
281 |
+
iosls = bb_is_on_same_line_vectorized(bboxes1=bboxes1, bboxes2=bboxes2)
|
282 |
+
|
283 |
+
return iosls
|
284 |
+
|
285 |
+
|
286 |
+
def calculate_adjacency_matrix(ocr1_df, ocr2_df):
|
287 |
+
"""Calculates Adjacency Matrix based on IOU values and for two different sets of items. For each item the adjacency
|
288 |
+
is defined by the maximum IOU value. We do 2 sided approach since it can be the case that i is adjacent to j but j
|
289 |
+
isn't adjacent to i, so we generate adjacency matrix for directed graph"""
|
290 |
+
# concat both dataframes
|
291 |
+
ocr_df = pd.concat([ocr1_df, ocr2_df], axis=0).reset_index()
|
292 |
+
|
293 |
+
# calculate ious
|
294 |
+
ious = calculate_ious_fast(ocr1_df=ocr_df, ocr2_df=ocr_df)
|
295 |
+
|
296 |
+
# calculate `is on same line` property
|
297 |
+
iosls = calculate_iosl_fast(ocr1_df=ocr_df, ocr2_df=ocr_df)
|
298 |
+
|
299 |
+
# build adjacency matrix (1s and 0s)
|
300 |
+
adjacency_matrix = np.bitwise_and(ious > 0.0, iosls).astype(np.int)
|
301 |
+
|
302 |
+
return adjacency_matrix
|
303 |
+
|
304 |
+
|
305 |
+
def get_connected_components(ocr1_df, ocr2_df):
|
306 |
+
"""Apply connected component analysis and group items"""
|
307 |
+
|
308 |
+
def _aggregate_group_items_into_one(df):
|
309 |
+
if len(df) == 1:
|
310 |
+
return df
|
311 |
+
else:
|
312 |
+
_df = df.iloc[0, :]
|
313 |
+
_bboxes = np.array(df["bounding_box"].values.tolist())
|
314 |
+
|
315 |
+
|
316 |
+
_df["bounding_box"] = [
|
317 |
+
[
|
318 |
+
np.min(_bboxes[:, 0]),
|
319 |
+
np.min(_bboxes[:, 1]),
|
320 |
+
np.max(_bboxes[:, 2]),
|
321 |
+
np.max(_bboxes[:, 3]),
|
322 |
+
]
|
323 |
+
]
|
324 |
+
_df["confidence"] = df["confidence"].mean()
|
325 |
+
_df["text"] = " ".join(df["text"].tolist())
|
326 |
+
|
327 |
+
return _df
|
328 |
+
|
329 |
+
# 1. calculate adjacency matrix
|
330 |
+
adjacency_matrix = calculate_adjacency_matrix(ocr1_df=ocr1_df, ocr2_df=ocr2_df)
|
331 |
+
|
332 |
+
# 2. find connected components
|
333 |
+
n_components, labels = connected_components(csgraph=csr_matrix(adjacency_matrix), directed=False,
|
334 |
+
return_labels=True)
|
335 |
+
|
336 |
+
# 3. separate df1 and df2 items and group for each connected component
|
337 |
+
connected_component_groups = pd.Series(labels).to_frame().groupby(0).apply(
|
338 |
+
lambda x: {1: [item for item in x.index.tolist() if item < ocr1_df.shape[0]],
|
339 |
+
2: [item - len(ocr1_df) for item in x.index.tolist() if item >= ocr1_df.shape[0]]}).to_dict()
|
340 |
+
|
341 |
+
# 4. check if group of items are consecutive (Optional but interesting)
|
342 |
+
# assert np.all(pd.DataFrame(connected_component_groups).loc[1, :].apply(
|
343 |
+
# lambda x: sum(x) == (min(x) * 2 + (len(x) - 1)) * len(x) / 2 if x else True))
|
344 |
+
# assert np.all(pd.DataFrame(connected_component_groups).loc[2, :].apply(
|
345 |
+
# lambda x: sum(x) == (min(x) * 2 + (len(x) - 1)) * len(x) / 2 if x else True))
|
346 |
+
|
347 |
+
# 5. merge group items into one
|
348 |
+
ocr1_df_groups = pd.concat(
|
349 |
+
[
|
350 |
+
_aggregate_group_items_into_one(
|
351 |
+
ocr1_df.loc[group_data[1], :]
|
352 |
+
)
|
353 |
+
for group_id, group_data in connected_component_groups.items()
|
354 |
+
if group_data[1]
|
355 |
+
],
|
356 |
+
axis=0
|
357 |
+
).reset_index(drop=True)
|
358 |
+
|
359 |
+
ocr2_df_groups = pd.concat(
|
360 |
+
[
|
361 |
+
_aggregate_group_items_into_one(
|
362 |
+
ocr2_df.loc[group_data[2], :]
|
363 |
+
)
|
364 |
+
for group_id, group_data in connected_component_groups.items()
|
365 |
+
if group_data[2]
|
366 |
+
],
|
367 |
+
axis=0
|
368 |
+
).reset_index(drop=True)
|
369 |
+
|
370 |
+
return ocr1_df_groups, ocr2_df_groups
|
evaluation/metrics.py
ADDED
@@ -0,0 +1,589 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
|
3 |
+
from evaluation.iou import word_or_symbol_pair_matching, word_or_symbol_group_pair_matching
|
4 |
+
|
5 |
+
|
6 |
+
def text_accuracy(df, pref_1, pref_2):
|
7 |
+
return (df[f'{pref_1}text'] == df[f'{pref_2}text']).sum() / df.shape[0]
|
8 |
+
|
9 |
+
|
10 |
+
def text_precision(df, pref_1, pref_2):
|
11 |
+
ocr1_nonempty = df[f'{pref_1}text'].apply(lambda x: bool(x))
|
12 |
+
ocr1 = df[f'{pref_1}text']
|
13 |
+
ocr2 = df[f'{pref_2}text']
|
14 |
+
return (ocr1_nonempty & (ocr1 == ocr2)).sum() / ocr1_nonempty.sum()
|
15 |
+
|
16 |
+
|
17 |
+
def text_recall(df, pref_1, pref_2):
|
18 |
+
ocr2_nonempty = df[f'{pref_2}text'].apply(lambda x: bool(x))
|
19 |
+
ocr2 = df[f'{pref_1}text']
|
20 |
+
ocr1 = df[f'{pref_2}text']
|
21 |
+
return (ocr2_nonempty & (ocr2 == ocr1)).sum() / ocr2_nonempty.sum()
|
22 |
+
|
23 |
+
|
24 |
+
def text_f1(df, pref_1, pref_2):
|
25 |
+
precision = text_precision(df, pref_1, pref_2)
|
26 |
+
recall = text_recall(df, pref_1, pref_2)
|
27 |
+
|
28 |
+
if precision == 0 or recall == 0:
|
29 |
+
f1 = 0.0
|
30 |
+
else:
|
31 |
+
f1 = (2 * precision * recall) / (precision + recall)
|
32 |
+
|
33 |
+
return f1
|
34 |
+
|
35 |
+
|
36 |
+
def symbol_confusion_matrix(df, pref_1, pref_2):
|
37 |
+
all_symbols = list(sorted(set(df[f'{pref_1}text'].tolist() + df[f'{pref_2}text'].tolist())))
|
38 |
+
pair_value_counts = df[
|
39 |
+
[f'{pref_1}text', f'{pref_2}text']
|
40 |
+
].value_counts()
|
41 |
+
|
42 |
+
pair_cnts = pair_value_counts.reset_index().rename({0: "count"}, axis=1).sort_values(
|
43 |
+
by=[f'{pref_1}text', f'{pref_2}text'], ascending=True)
|
44 |
+
|
45 |
+
pair_value_counts_dict = pair_value_counts.to_dict()
|
46 |
+
|
47 |
+
confusion_matrix = pd.DataFrame(
|
48 |
+
[
|
49 |
+
[pair_value_counts_dict.get((symbol1, symbol2), 0) for symbol2 in all_symbols]
|
50 |
+
for symbol1 in all_symbols
|
51 |
+
],
|
52 |
+
columns=all_symbols,
|
53 |
+
index=all_symbols,
|
54 |
+
)
|
55 |
+
|
56 |
+
return confusion_matrix, pair_cnts
|
57 |
+
|
58 |
+
|
59 |
+
def levenstein(text1, text2):
|
60 |
+
"""Measures the metrics based on edit operations.
|
61 |
+
- levenstein_distance: number of character operations (insertion, deletion, substitution) that
|
62 |
+
required to get text2 from text1
|
63 |
+
- levenstein_similarity: number of matches divided by the number of all operations (fraction of characters that
|
64 |
+
don't require modification while transforming text1 into text2)
|
65 |
+
- edit_operations: list of character operations (<operation name>, <text1 character>, <text2 character>)
|
66 |
+
"""
|
67 |
+
levenstein_distance, edit_operations = edit_distance(text1, text2)
|
68 |
+
if levenstein_distance == 0:
|
69 |
+
levenstein_similarity = 1.0
|
70 |
+
else:
|
71 |
+
matches_cnt = len([item for item in edit_operations if item[0] == "match"])
|
72 |
+
all_operations_cnt = len(edit_operations)
|
73 |
+
|
74 |
+
if matches_cnt == 0:
|
75 |
+
levenstein_similarity = 0.0
|
76 |
+
else:
|
77 |
+
levenstein_similarity = float(matches_cnt / all_operations_cnt)
|
78 |
+
|
79 |
+
return levenstein_similarity, levenstein_distance, edit_operations
|
80 |
+
|
81 |
+
|
82 |
+
def edit_distance(text1, text2):
|
83 |
+
"""
|
84 |
+
we have three allowed edit operations:
|
85 |
+
- Insert a character
|
86 |
+
- Delete a character
|
87 |
+
- Substitute a character
|
88 |
+
Each of these operations has cost of 1
|
89 |
+
Our goal is to minimize number of required operations to convert text1 into text2
|
90 |
+
This DP problem which is being solved with 2d array (NxM) where N is the length of text1 and M - length of
|
91 |
+
text2.
|
92 |
+
|
93 |
+
DP[i][j]: this is minimum amount of operations to convert text1[:i] into text2[:j]
|
94 |
+
The update rule is the following:
|
95 |
+
DP[i][j] = min of the following
|
96 |
+
|
97 |
+
case 1: DP[i-1][j-1] # match
|
98 |
+
case 2: DP[i-1][j] + 1 # insertion,
|
99 |
+
case 3: DP[i][j-1] + 1 # deletion
|
100 |
+
case 4: DP[i-1][j-1] + 1 # substitution
|
101 |
+
|
102 |
+
Example:
|
103 |
+
text1 = "horse"
|
104 |
+
text2 = "ros"
|
105 |
+
|
106 |
+
DP _ r o s
|
107 |
+
_ [0, 1, 2, 3]
|
108 |
+
h [1, 1, 2, 3]
|
109 |
+
o [2, 2, 1, 2]
|
110 |
+
r [3, 2, 2, 2]
|
111 |
+
s [4, 3, 3, 2]
|
112 |
+
e [5, 4, 4, 3]
|
113 |
+
"""
|
114 |
+
if not text1:
|
115 |
+
return len(text2), []
|
116 |
+
elif not text2:
|
117 |
+
return len(text1), []
|
118 |
+
|
119 |
+
INF = 10 ** 10
|
120 |
+
N = len(text1)
|
121 |
+
M = len(text2)
|
122 |
+
|
123 |
+
DP = [[INF for _ in range(M + 1)] for _ in range(N + 1)]
|
124 |
+
P = [[None for _ in range(M + 1)] for _ in range(N + 1)]
|
125 |
+
|
126 |
+
for i in range(N + 1):
|
127 |
+
DP[i][0] = i
|
128 |
+
P[i][0] = "insertion"
|
129 |
+
for j in range(M + 1):
|
130 |
+
DP[0][j] = j
|
131 |
+
P[0][j] = "deletion"
|
132 |
+
|
133 |
+
for j in range(1, M + 1):
|
134 |
+
for i in range(1, N + 1):
|
135 |
+
|
136 |
+
pair_mismatch = int(text1[i - 1] != text2[j - 1])
|
137 |
+
match_case = None
|
138 |
+
match_cost = INF
|
139 |
+
|
140 |
+
# match
|
141 |
+
if match_cost > DP[i - 1][j - 1] + pair_mismatch:
|
142 |
+
match_cost = DP[i - 1][j - 1] + pair_mismatch
|
143 |
+
match_case = "substitution" if pair_mismatch == 1 else "match"
|
144 |
+
|
145 |
+
# insertion
|
146 |
+
if match_cost > DP[i - 1][j] + 1:
|
147 |
+
match_cost = DP[i - 1][j] + 1
|
148 |
+
match_case = "insertion"
|
149 |
+
|
150 |
+
# deletion
|
151 |
+
if match_cost > DP[i][j - 1] + 1:
|
152 |
+
match_cost = DP[i][j - 1] + 1
|
153 |
+
match_case = "deletion"
|
154 |
+
|
155 |
+
DP[i][j] = match_cost
|
156 |
+
P[i][j] = match_case
|
157 |
+
|
158 |
+
operations = []
|
159 |
+
i = N
|
160 |
+
j = M
|
161 |
+
while (i >= 0 and j >= 0) and not (i == 0 and j == 0):
|
162 |
+
if P[i][j] == "substitution":
|
163 |
+
operations.append(("substitution", text1[i - 1] if i - 1 >= 0 else "",
|
164 |
+
text2[j - 1] if j - 1 >= 0 else "", i - 1, j - 1))
|
165 |
+
i -= 1
|
166 |
+
j -= 1
|
167 |
+
elif P[i][j] == "match":
|
168 |
+
operations.append(
|
169 |
+
("match", text1[i - 1] if i - 1 >= 0 else "", text2[j - 1] if j - 1 >= 0 else "", i - 1, j - 1))
|
170 |
+
i -= 1
|
171 |
+
j -= 1
|
172 |
+
elif P[i][j] == "insertion":
|
173 |
+
operations.append(("insertion", text1[i - 1] if i - 1 >= 0 else "",
|
174 |
+
"", i - 1, j - 1))
|
175 |
+
i -= 1
|
176 |
+
elif P[i][j] == "deletion":
|
177 |
+
operations.append(("deletion", "",
|
178 |
+
text2[j - 1] if j - 1 >= 0 else "", i - 1, j - 1))
|
179 |
+
j -= 1
|
180 |
+
|
181 |
+
levenstein_distance = DP[N][M]
|
182 |
+
operations = operations[::-1]
|
183 |
+
|
184 |
+
return levenstein_distance, operations
|
185 |
+
|
186 |
+
|
187 |
+
def levenstein_metrics(df, pref_1="Pred_", pref_2='Tar_'):
|
188 |
+
levenstein_results = df[[f'{pref_1}text', f'{pref_2}text']].apply(
|
189 |
+
lambda x: levenstein(text1=x[f'{pref_1}text'], text2=x[f'{pref_2}text']),
|
190 |
+
axis=1
|
191 |
+
)
|
192 |
+
levenstein_similarities = levenstein_results.apply(lambda x: x[0])
|
193 |
+
levenstein_distances = levenstein_results.apply(lambda x: x[1])
|
194 |
+
edit_operations = levenstein_results.apply(lambda x: x[2])
|
195 |
+
|
196 |
+
return levenstein_similarities, levenstein_distances, edit_operations
|
197 |
+
|
198 |
+
|
199 |
+
def evaluate_by_words(pred_df, target_df, pred_pref='Pred_', target_pref='Target_', **kwargs):
|
200 |
+
if not pred_df.empty and not target_df.empty:
|
201 |
+
|
202 |
+
show_hist = kwargs.get("show_hist", False)
|
203 |
+
text_pairs = word_or_symbol_pair_matching(df1=pred_df, df2=target_df, pref1=pred_pref, pref2=target_pref)
|
204 |
+
levenstein_similarities, levenstein_distances, edit_operations = levenstein_metrics(
|
205 |
+
df=text_pairs, pref_1=pred_pref, pref_2=target_pref
|
206 |
+
)
|
207 |
+
|
208 |
+
levenstein_similarities_stats = {
|
209 |
+
**levenstein_similarities.describe().to_dict(),
|
210 |
+
"values": levenstein_similarities.tolist()
|
211 |
+
}
|
212 |
+
levenstein_distances_stats = {
|
213 |
+
**levenstein_distances.describe().to_dict(),
|
214 |
+
"values": levenstein_distances.tolist()
|
215 |
+
}
|
216 |
+
iou_stats = {
|
217 |
+
**text_pairs.iou.describe().to_dict(),
|
218 |
+
"values": text_pairs.iou.tolist()
|
219 |
+
}
|
220 |
+
edit_operations_stats = {
|
221 |
+
operation_id: pd.Series(
|
222 |
+
edit_operations.apply(
|
223 |
+
lambda x: [f"[{item[1]}]_[{item[2]}]" for item in x if item[0] == operation_id]
|
224 |
+
).sum(axis=0)).value_counts().to_dict()
|
225 |
+
for operation_id in ["insertion", "deletion", "substitution"]
|
226 |
+
}
|
227 |
+
|
228 |
+
if show_hist is True:
|
229 |
+
pd.Series(levenstein_similarities).plot(kind='hist', bins=20, title="Levestein Similarities")
|
230 |
+
pd.Series(levenstein_distances).plot(kind='hist', bins=20, title="Levestein Distances")
|
231 |
+
for edit_operation_id, edit_operation_data in edit_operations_stats.items():
|
232 |
+
pd.Series(edit_operation_data).plot(kind='barh', title=f"{edit_operation_id.capitalize()} Stats")
|
233 |
+
|
234 |
+
report = {
|
235 |
+
"accuracy": text_accuracy(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
|
236 |
+
"precision": text_precision(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
|
237 |
+
"recall": text_recall(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
|
238 |
+
"f1": text_f1(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
|
239 |
+
"levenstein_distances_stats": levenstein_distances_stats,
|
240 |
+
"levenstein_similarities_stats": levenstein_similarities_stats,
|
241 |
+
"iou_stats": iou_stats,
|
242 |
+
"edit_operations_stats": edit_operations_stats,
|
243 |
+
}
|
244 |
+
else:
|
245 |
+
report = {
|
246 |
+
"accuracy": None,
|
247 |
+
"precision": None,
|
248 |
+
"recall": None,
|
249 |
+
"f1": None,
|
250 |
+
"levenstein_distances_stats": {},
|
251 |
+
"levenstein_similarities_stats": {},
|
252 |
+
"iou_stats": {},
|
253 |
+
"edit_operations_stats": {key: {} for key in ["insertion", "deletion", "substitution"]},
|
254 |
+
}
|
255 |
+
|
256 |
+
return report
|
257 |
+
|
258 |
+
|
259 |
+
def evaluate_by_word_groups(pred_df, target_df, pred_pref='Pred_', target_pref='Target_', **kwargs):
|
260 |
+
if not pred_df.empty and not target_df.empty:
|
261 |
+
|
262 |
+
show_hist = kwargs.get("show_hist", False)
|
263 |
+
text_pairs = word_or_symbol_group_pair_matching(df1=pred_df, df2=target_df, pref1=pred_pref, pref2=target_pref)
|
264 |
+
levenstein_similarities, levenstein_distances, edit_operations = levenstein_metrics(
|
265 |
+
df=text_pairs, pref_1=pred_pref, pref_2=target_pref
|
266 |
+
)
|
267 |
+
|
268 |
+
levenstein_similarities_stats = {
|
269 |
+
**levenstein_similarities.describe().to_dict(),
|
270 |
+
"values": levenstein_similarities.tolist()
|
271 |
+
}
|
272 |
+
levenstein_distances_stats = {
|
273 |
+
**levenstein_distances.describe().to_dict(),
|
274 |
+
"values": levenstein_distances.tolist()
|
275 |
+
}
|
276 |
+
iou_stats = {
|
277 |
+
**text_pairs.iou.describe().to_dict(),
|
278 |
+
"values": text_pairs.iou.tolist()
|
279 |
+
}
|
280 |
+
edit_operations_stats = {
|
281 |
+
operation_id: pd.Series(
|
282 |
+
edit_operations.apply(
|
283 |
+
lambda x: [f"[{item[1]}]_[{item[2]}]" for item in x if item[0] == operation_id]
|
284 |
+
).sum(axis=0)).value_counts().to_dict()
|
285 |
+
for operation_id in ["insertion", "deletion", "substitution"]
|
286 |
+
}
|
287 |
+
|
288 |
+
if show_hist is True:
|
289 |
+
pd.Series(levenstein_similarities).plot(kind='hist', bins=20, title="Levestein Similarities")
|
290 |
+
pd.Series(levenstein_distances).plot(kind='hist', bins=20, title="Levestein Distances")
|
291 |
+
for edit_operation_id, edit_operation_data in edit_operations_stats.items():
|
292 |
+
pd.Series(edit_operation_data).plot(kind='barh', title=f"{edit_operation_id.capitalize()} Stats")
|
293 |
+
|
294 |
+
report = {
|
295 |
+
"accuracy": text_accuracy(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
|
296 |
+
"precision": text_precision(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
|
297 |
+
"recall": text_recall(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
|
298 |
+
"f1": text_f1(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
|
299 |
+
"levenstein_distances_stats": levenstein_distances_stats,
|
300 |
+
"levenstein_similarities_stats": levenstein_similarities_stats,
|
301 |
+
"iou_stats": iou_stats,
|
302 |
+
"edit_operations_stats": edit_operations_stats,
|
303 |
+
}
|
304 |
+
else:
|
305 |
+
report = {
|
306 |
+
"accuracy": None,
|
307 |
+
"precision": None,
|
308 |
+
"recall": None,
|
309 |
+
"f1": None,
|
310 |
+
"levenstein_distances_stats": {},
|
311 |
+
"levenstein_similarities_stats": {},
|
312 |
+
"iou_stats": {},
|
313 |
+
"edit_operations_stats": {key: {} for key in ["insertion", "deletion", "substitution"]},
|
314 |
+
}
|
315 |
+
|
316 |
+
return report
|
317 |
+
|
318 |
+
|
319 |
+
def reduce_word_evaluation_results(eval_results):
|
320 |
+
if eval_results:
|
321 |
+
accuracies = pd.Series([item['accuracy'] for item in eval_results])
|
322 |
+
precisions = pd.Series([item['precision'] for item in eval_results])
|
323 |
+
recalls = pd.Series([item['recall'] for item in eval_results])
|
324 |
+
f1s = pd.Series([item['f1'] for item in eval_results])
|
325 |
+
levenstein_similarities = pd.Series(
|
326 |
+
[
|
327 |
+
pd.Series(item['levenstein_similarities_stats'].get('values', [])).mean()
|
328 |
+
for item in eval_results
|
329 |
+
]
|
330 |
+
)
|
331 |
+
levenstein_distances = pd.Series(
|
332 |
+
[
|
333 |
+
pd.Series(item['levenstein_distances_stats'].get('values', [])).mean()
|
334 |
+
for item in eval_results
|
335 |
+
]
|
336 |
+
)
|
337 |
+
ious = pd.Series(
|
338 |
+
[
|
339 |
+
pd.Series(item['iou_stats'].get('values', [])).mean()
|
340 |
+
for item in eval_results
|
341 |
+
]
|
342 |
+
)
|
343 |
+
|
344 |
+
levenstein_similarities_stats = {
|
345 |
+
**levenstein_similarities.describe().to_dict(),
|
346 |
+
"values": levenstein_similarities.tolist()
|
347 |
+
}
|
348 |
+
levenstein_distances_stats = {
|
349 |
+
**levenstein_distances.describe().to_dict(),
|
350 |
+
"values": levenstein_distances.tolist()
|
351 |
+
}
|
352 |
+
iou_stats = {
|
353 |
+
**ious.describe().to_dict(),
|
354 |
+
"values": ious.tolist()
|
355 |
+
}
|
356 |
+
|
357 |
+
edit_operations_stats = {}
|
358 |
+
for eval_result in eval_results:
|
359 |
+
for edit_operation, edit_operation_data in eval_result['edit_operations_stats'].items():
|
360 |
+
if edit_operation not in edit_operations_stats:
|
361 |
+
edit_operations_stats[edit_operation] = {}
|
362 |
+
|
363 |
+
for key, count in edit_operation_data.items():
|
364 |
+
edit_operations_stats[edit_operation][key] = edit_operations_stats[edit_operation].get(key,
|
365 |
+
0) + count
|
366 |
+
|
367 |
+
summary = {
|
368 |
+
"accuracy": {
|
369 |
+
"mean": accuracies.mean(),
|
370 |
+
"std": accuracies.std(),
|
371 |
+
"values": accuracies.tolist()
|
372 |
+
},
|
373 |
+
"precision": {
|
374 |
+
"mean": precisions.mean(),
|
375 |
+
"std": precisions.std(),
|
376 |
+
"values": precisions.tolist(),
|
377 |
+
},
|
378 |
+
"recall": {
|
379 |
+
"mean": recalls.mean(),
|
380 |
+
"std": recalls.std(),
|
381 |
+
"values": recalls.tolist(),
|
382 |
+
},
|
383 |
+
"f1": {
|
384 |
+
"mean": f1s.mean(),
|
385 |
+
"std": f1s.std(),
|
386 |
+
"values": f1s.tolist(),
|
387 |
+
},
|
388 |
+
"document_count": len(eval_results),
|
389 |
+
"levenstein_distances_stats": levenstein_distances_stats,
|
390 |
+
"levenstein_similarities_stats": levenstein_similarities_stats,
|
391 |
+
"iou_stats": iou_stats,
|
392 |
+
"edit_operations_stats": edit_operations_stats,
|
393 |
+
}
|
394 |
+
|
395 |
+
|
396 |
+
else:
|
397 |
+
summary = {
|
398 |
+
"accuracy": {},
|
399 |
+
"precision": {},
|
400 |
+
"recall": {},
|
401 |
+
"f1": {},
|
402 |
+
"document_count": 0,
|
403 |
+
"levenstein_distances_stats": {},
|
404 |
+
"levenstein_similarities_stats": {},
|
405 |
+
"iou_stats": {},
|
406 |
+
"edit_operations_stats": {key: {} for key in ["insertion", "deletion", "substitution"]},
|
407 |
+
}
|
408 |
+
|
409 |
+
return summary
|
410 |
+
|
411 |
+
|
412 |
+
def evaluate_by_symbols(pred_df, target_df, pred_pref='Pred_', target_pref='Target_', **kwargs):
|
413 |
+
if not pred_df.empty and not target_df.empty:
|
414 |
+
|
415 |
+
show_hist = kwargs.get("show_hist", False)
|
416 |
+
text_pairs = word_or_symbol_pair_matching(df1=pred_df, df2=target_df, pref1=pred_pref, pref2=target_pref)
|
417 |
+
|
418 |
+
confusion_matrix, pair_counts = symbol_confusion_matrix(text_pairs, pref_1=pred_pref, pref_2=target_pref)
|
419 |
+
|
420 |
+
iou_stats = {
|
421 |
+
**text_pairs.iou.describe().to_dict(),
|
422 |
+
"values": text_pairs.iou.tolist()
|
423 |
+
}
|
424 |
+
|
425 |
+
if show_hist is True:
|
426 |
+
pd.Series(pair_counts).plot(kind='barh', title="Symbol Pair Counts")
|
427 |
+
|
428 |
+
report = {
|
429 |
+
"accuracy": text_accuracy(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
|
430 |
+
"precision": text_precision(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
|
431 |
+
"recall": text_recall(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
|
432 |
+
"f1": text_f1(df=text_pairs, pref_1=pred_pref, pref_2=target_pref),
|
433 |
+
"confusion_matrix": confusion_matrix,
|
434 |
+
"pair_counts": pair_counts,
|
435 |
+
"iou_stats": iou_stats,
|
436 |
+
}
|
437 |
+
else:
|
438 |
+
report = {
|
439 |
+
"accuracy": None,
|
440 |
+
"precision": None,
|
441 |
+
"recall": None,
|
442 |
+
"f1": None,
|
443 |
+
"confusion_matrix": pd.DataFrame(),
|
444 |
+
"pair_counts": pd.DataFrame(),
|
445 |
+
"iou_stats": {},
|
446 |
+
}
|
447 |
+
|
448 |
+
return report
|
449 |
+
|
450 |
+
|
451 |
+
def reduce_pair_counts(pair_counts):
|
452 |
+
reduced_pair_counts_df = pd.DataFrame()
|
453 |
+
columns = []
|
454 |
+
if pair_counts:
|
455 |
+
pair_counts_dict = {}
|
456 |
+
for pair_count in pair_counts:
|
457 |
+
if not pair_count.empty:
|
458 |
+
pair_count_dict = pair_count.set_index(pair_count.columns[:-1].tolist(), drop=True).to_dict()[
|
459 |
+
pair_count.columns[-1]]
|
460 |
+
columns = pair_count.columns.tolist()
|
461 |
+
else:
|
462 |
+
pair_count_dict = {}
|
463 |
+
|
464 |
+
for key, value in pair_count_dict.items():
|
465 |
+
pair_counts_dict[key] = pair_counts_dict.get(key, 0) + value
|
466 |
+
|
467 |
+
reduced_pair_counts_df = pd.Series(pair_counts_dict).to_frame().reset_index()
|
468 |
+
if columns:
|
469 |
+
reduced_pair_counts_df.columns = columns
|
470 |
+
|
471 |
+
return reduced_pair_counts_df
|
472 |
+
|
473 |
+
|
474 |
+
def reduce_confusion_matrices(confusion_matrices):
|
475 |
+
reduced_confusion_matrices_df = pd.DataFrame()
|
476 |
+
if confusion_matrices:
|
477 |
+
all_index_values = set()
|
478 |
+
confusion_matrices_dict = {}
|
479 |
+
for confusion_matrix in confusion_matrices:
|
480 |
+
if not confusion_matrix.empty:
|
481 |
+
confusion_matrix_dict = {
|
482 |
+
(index, column): confusion_matrix.loc[index, column]
|
483 |
+
for index in confusion_matrix.index
|
484 |
+
for column in confusion_matrix.columns
|
485 |
+
}
|
486 |
+
else:
|
487 |
+
confusion_matrix_dict = {}
|
488 |
+
|
489 |
+
for key, value in confusion_matrix_dict.items():
|
490 |
+
all_index_values.add(key[0])
|
491 |
+
all_index_values.add(key[1])
|
492 |
+
confusion_matrices_dict[key] = confusion_matrices_dict.get(key, 0) + value
|
493 |
+
|
494 |
+
all_index_values = list(sorted(list(all_index_values)))
|
495 |
+
reduced_confusion_matrices_df = pd.DataFrame(
|
496 |
+
[
|
497 |
+
[
|
498 |
+
confusion_matrices_dict.get((index, column), 0)
|
499 |
+
for column in all_index_values
|
500 |
+
]
|
501 |
+
for index in all_index_values
|
502 |
+
],
|
503 |
+
columns=all_index_values,
|
504 |
+
index=all_index_values,
|
505 |
+
)
|
506 |
+
|
507 |
+
return reduced_confusion_matrices_df
|
508 |
+
|
509 |
+
|
510 |
+
def reduce_symbol_evaluation_results(eval_results):
|
511 |
+
"""
|
512 |
+
all_symbols = list(sorted(set(df[f'{pref_1}text'].tolist() + df[f'{pref_2}text'].tolist())))
|
513 |
+
pair_value_counts = df[
|
514 |
+
[f'{pref_1}text', f'{pref_2}text']
|
515 |
+
].value_counts()
|
516 |
+
|
517 |
+
pair_cnts = pair_value_counts.reset_index().rename({0: "count"}, axis=1).sort_values(
|
518 |
+
by=[f'{pref_1}text', f'{pref_2}text'], ascending=True)
|
519 |
+
|
520 |
+
pair_value_counts_dict = pair_value_counts.to_dict()
|
521 |
+
|
522 |
+
confusion_matrix = pd.DataFrame(
|
523 |
+
[
|
524 |
+
[pair_value_counts_dict.get((symbol1, symbol2), 0) for symbol2 in all_symbols]
|
525 |
+
for symbol1 in all_symbols
|
526 |
+
],
|
527 |
+
columns=all_symbols,
|
528 |
+
index=all_symbols,
|
529 |
+
)
|
530 |
+
"""
|
531 |
+
if eval_results:
|
532 |
+
accuracies = pd.Series([item['accuracy'] for item in eval_results])
|
533 |
+
precisions = pd.Series([item['precision'] for item in eval_results])
|
534 |
+
recalls = pd.Series([item['recall'] for item in eval_results])
|
535 |
+
f1s = pd.Series([item['f1'] for item in eval_results])
|
536 |
+
confusion_matrices = [item['confusion_matrix'] for item in eval_results]
|
537 |
+
pair_counts = [item['pair_counts'] for item in eval_results]
|
538 |
+
ious = pd.Series(
|
539 |
+
[
|
540 |
+
pd.Series(item['iou_stats'].get('values', [])).mean()
|
541 |
+
for item in eval_results
|
542 |
+
]
|
543 |
+
)
|
544 |
+
|
545 |
+
iou_stats = {
|
546 |
+
**ious.describe().to_dict(),
|
547 |
+
"values": ious.tolist()
|
548 |
+
}
|
549 |
+
|
550 |
+
summary = {
|
551 |
+
"accuracy": {
|
552 |
+
"mean": accuracies.mean(),
|
553 |
+
"std": accuracies.std(),
|
554 |
+
"values": accuracies.tolist()
|
555 |
+
},
|
556 |
+
"precision": {
|
557 |
+
"mean": precisions.mean(),
|
558 |
+
"std": precisions.std(),
|
559 |
+
"values": precisions.tolist(),
|
560 |
+
},
|
561 |
+
"recall": {
|
562 |
+
"mean": recalls.mean(),
|
563 |
+
"std": recalls.std(),
|
564 |
+
"values": recalls.tolist(),
|
565 |
+
},
|
566 |
+
"f1": {
|
567 |
+
"mean": f1s.mean(),
|
568 |
+
"std": f1s.std(),
|
569 |
+
"values": f1s.tolist(),
|
570 |
+
},
|
571 |
+
"document_count": len(eval_results),
|
572 |
+
"pair_counts": reduce_pair_counts(pair_counts),
|
573 |
+
"confusion_matrix": reduce_confusion_matrices(confusion_matrices),
|
574 |
+
"iou_stats": iou_stats,
|
575 |
+
}
|
576 |
+
|
577 |
+
else:
|
578 |
+
summary = {
|
579 |
+
"accuracy": {},
|
580 |
+
"precision": {},
|
581 |
+
"recall": {},
|
582 |
+
"f1": {},
|
583 |
+
"document_count": 0,
|
584 |
+
"pair_counts": pd.DataFrame(),
|
585 |
+
"confusion_matrix": pd.DataFrame(),
|
586 |
+
"iou_stats": {},
|
587 |
+
}
|
588 |
+
|
589 |
+
return summary
|
iliauniiccocrevaluation.py
CHANGED
@@ -13,51 +13,48 @@
|
|
13 |
# limitations under the License.
|
14 |
"""TODO: Add a description here."""
|
15 |
|
16 |
-
import evaluate
|
17 |
import datasets
|
18 |
-
|
19 |
|
20 |
# TODO: Add BibTeX citation
|
21 |
-
from
|
|
|
22 |
|
23 |
_CITATION = """\
|
24 |
@InProceedings{huggingface:module,
|
25 |
-
title = {
|
26 |
-
authors={
|
27 |
-
year={
|
28 |
}
|
29 |
"""
|
30 |
|
31 |
# TODO: Add description of the module here
|
32 |
_DESCRIPTION = """\
|
33 |
-
|
|
|
|
|
34 |
"""
|
35 |
|
36 |
-
|
37 |
# TODO: Add description of the arguments of the module here
|
38 |
_KWARGS_DESCRIPTION = """
|
39 |
Calculates how good are predictions given some references, using certain scores
|
40 |
Args:
|
41 |
-
predictions: list of
|
42 |
-
|
43 |
-
references: list of reference for each prediction. Each
|
44 |
-
reference should be a string with tokens separated by spaces.
|
45 |
Returns:
|
46 |
-
|
47 |
-
another_score: description of the second score,
|
48 |
Examples:
|
49 |
Examples should be written in doctest format, and should illustrate how
|
50 |
to use the function.
|
51 |
|
52 |
-
>>>
|
53 |
-
>>>
|
|
|
|
|
54 |
>>> print(results)
|
55 |
{'accuracy': 1.0}
|
56 |
"""
|
57 |
|
58 |
-
# TODO: Define external resources urls if needed
|
59 |
-
BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"
|
60 |
-
|
61 |
|
62 |
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
63 |
class IliauniIccOCREvaluation(evaluate.Metric):
|
@@ -72,10 +69,52 @@ class IliauniIccOCREvaluation(evaluate.Metric):
|
|
72 |
citation=_CITATION,
|
73 |
inputs_description=_KWARGS_DESCRIPTION,
|
74 |
# This defines the format of each prediction and reference
|
75 |
-
features=datasets.Features(
|
76 |
-
|
77 |
-
|
78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
# Homepage of the module for documentation
|
80 |
homepage="http://module.homepage",
|
81 |
# Additional links to the codebase or references
|
@@ -90,9 +129,14 @@ class IliauniIccOCREvaluation(evaluate.Metric):
|
|
90 |
|
91 |
def _compute(self, predictions, references):
|
92 |
"""Returns the scores"""
|
93 |
-
# TODO: Compute the different scores of the module
|
94 |
-
cer = calculate_cer(predictions, references)
|
95 |
|
96 |
-
|
97 |
-
|
98 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
# limitations under the License.
|
14 |
"""TODO: Add a description here."""
|
15 |
|
|
|
16 |
import datasets
|
17 |
+
import evaluate
|
18 |
|
19 |
# TODO: Add BibTeX citation
|
20 |
+
from evaluation.metrics import evaluate_by_words
|
21 |
+
from ocr.fiftyone import FiftyOneOcr
|
22 |
|
23 |
_CITATION = """\
|
24 |
@InProceedings{huggingface:module,
|
25 |
+
title = {Iliauni ICC OCR Evaluation},
|
26 |
+
authors={},
|
27 |
+
year={2022}
|
28 |
}
|
29 |
"""
|
30 |
|
31 |
# TODO: Add description of the module here
|
32 |
_DESCRIPTION = """\
|
33 |
+
Better OCR evaluation metric that enables to evaluate OCR results in various ways. It is robust in a way that
|
34 |
+
it matches the words using their bounding boxes instead of using plain edit distance matching between two texts.
|
35 |
+
Elaborate more on this later.
|
36 |
"""
|
37 |
|
|
|
38 |
# TODO: Add description of the arguments of the module here
|
39 |
_KWARGS_DESCRIPTION = """
|
40 |
Calculates how good are predictions given some references, using certain scores
|
41 |
Args:
|
42 |
+
predictions: list of OCR detections in FiftyOne dataset format.
|
43 |
+
references: list of OCR detections in FiftyOne dataset format.
|
|
|
|
|
44 |
Returns:
|
45 |
+
evaluation_results: dictionary containing multiple metrics
|
|
|
46 |
Examples:
|
47 |
Examples should be written in doctest format, and should illustrate how
|
48 |
to use the function.
|
49 |
|
50 |
+
>>> dataset = load_dataset("anz2/iliauni_icc_georgian_ocr", use_auth_token="<auth token here>")
|
51 |
+
>>> sample = dataset['test'][0]
|
52 |
+
>>> ocr_evaluator = evaluate.load("iliauniiccocrevaluation")
|
53 |
+
>>> results = ocr_evaluator.compute(references=[sample], predictions=[0, 1])
|
54 |
>>> print(results)
|
55 |
{'accuracy': 1.0}
|
56 |
"""
|
57 |
|
|
|
|
|
|
|
58 |
|
59 |
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
60 |
class IliauniIccOCREvaluation(evaluate.Metric):
|
|
|
69 |
citation=_CITATION,
|
70 |
inputs_description=_KWARGS_DESCRIPTION,
|
71 |
# This defines the format of each prediction and reference
|
72 |
+
features=datasets.Features(
|
73 |
+
{
|
74 |
+
"id": datasets.Value("string"),
|
75 |
+
"filepath": datasets.Value("string"),
|
76 |
+
"tags": datasets.Sequence(datasets.Value("string")),
|
77 |
+
"metadata": datasets.Features(
|
78 |
+
{
|
79 |
+
"size_bytes": datasets.Value("int32"),
|
80 |
+
"mime_type": datasets.Value("string"),
|
81 |
+
"width": datasets.Value("int32"),
|
82 |
+
"height": datasets.Value("int32"),
|
83 |
+
"num_channels": datasets.Value("int32"),
|
84 |
+
"author": datasets.Value("string"),
|
85 |
+
"category": datasets.Value("string"),
|
86 |
+
"document_name": datasets.Value("string"),
|
87 |
+
"source": datasets.Value("string"),
|
88 |
+
"year": datasets.Value("int32")
|
89 |
+
}
|
90 |
+
),
|
91 |
+
"_media_type": datasets.Value("string"),
|
92 |
+
"_rand": datasets.Value("string"),
|
93 |
+
"detections": datasets.Features(
|
94 |
+
{
|
95 |
+
"detections": datasets.Sequence(
|
96 |
+
datasets.Features(
|
97 |
+
{
|
98 |
+
"id": datasets.Value("string"),
|
99 |
+
"attributes": datasets.Sequence(datasets.Value("string")),
|
100 |
+
"tags": datasets.Value("string"),
|
101 |
+
"label": datasets.Value("string"),
|
102 |
+
"bounding_box": datasets.Sequence(datasets.Value("float32")),
|
103 |
+
"confidence": datasets.Value("float32"),
|
104 |
+
"index": datasets.Value("int32"),
|
105 |
+
"page": datasets.Value("int32"),
|
106 |
+
"block": datasets.Value("int32"),
|
107 |
+
"paragraph": datasets.Value("int32"),
|
108 |
+
"word": datasets.Value("int32"),
|
109 |
+
"text": datasets.Value("string"),
|
110 |
+
}
|
111 |
+
)
|
112 |
+
)
|
113 |
+
}
|
114 |
+
),
|
115 |
+
"image": datasets.Image(),
|
116 |
+
}
|
117 |
+
),
|
118 |
# Homepage of the module for documentation
|
119 |
homepage="http://module.homepage",
|
120 |
# Additional links to the codebase or references
|
|
|
129 |
|
130 |
def _compute(self, predictions, references):
|
131 |
"""Returns the scores"""
|
|
|
|
|
132 |
|
133 |
+
assert len(predictions) == len(references)
|
134 |
+
eval_results = []
|
135 |
+
for prediction, reference in zip(predictions, references):
|
136 |
+
prediction_df = FiftyOneOcr(data=prediction).get_word_annotations(convert_bbox=True)
|
137 |
+
reference_df = FiftyOneOcr(data=reference).get_word_annotations(convert_bbox=True)
|
138 |
+
|
139 |
+
eval_result = evaluate_by_words(prediction_df, reference_df, pref1="Pred_", pref2="Tar_")
|
140 |
+
eval_results.append(eval_result)
|
141 |
+
|
142 |
+
return eval_results
|
ocr/fiftyone.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
|
4 |
+
|
5 |
+
class FiftyOneOcr:
|
6 |
+
def __init__(self, data):
|
7 |
+
self.data = data
|
8 |
+
|
9 |
+
def get_word_annotations(self, convert_bbox: bool = True) -> pd.DataFrame:
|
10 |
+
"""Returns dataframe of detections where each row represents independent word annotation
|
11 |
+
|
12 |
+
Args:
|
13 |
+
convert_bbox: FiftyOne bounding box type (x1, x2, dx, xy) to 2 point bounding box type (x1, y1, x2, y2)
|
14 |
+
"""
|
15 |
+
|
16 |
+
annotations = self.data.get("detections", {}).get("detections", {})
|
17 |
+
|
18 |
+
annotations_df = pd.DataFrame(annotations)
|
19 |
+
|
20 |
+
# convert bounding box into 2 point values format
|
21 |
+
if convert_bbox:
|
22 |
+
bbox = np.array(annotations_df['bounding_box'].values.tolist())
|
23 |
+
bbox[:, 2:] += bbox[:, :2]
|
24 |
+
annotations_df['bounding_box'] = bbox.tolist()
|
25 |
+
|
26 |
+
return annotations_df
|