plano_lit / app_utils.py
SakshiRathi77's picture
Upload 8 files
6ef9ea4 verified
import glob
import json
import os
import xml.etree.ElementTree as ET
import cv2
# from sklearn.externals import joblib
import joblib
import numpy as np
import pandas as pd
# from .variables import old_ocr_req_cols
# from .skew_correction import PageSkewWraper
const_HW = 1.294117647
const_W = 600
# https://www.forbes.com/sites/forbestechcouncil/2020/06/02/leveraging-technologies-to-align-realograms-and-planograms-for-grocery/?sh=506b8b78e86c
# https://stackoverflow.com/questions/39403183/python-opencv-sorting-contours
# http://devdoc.net/linux/OpenCV-3.2.0/da/d0c/tutorial_bounding_rects_circles.html
# https://stackoverflow.com/questions/10297713/find-contour-of-the-set-of-points-in-opencv
# https://stackoverflow.com/questions/16538774/dealing-with-contours-and-bounding-rectangle-in-opencv-2-4-python-2-7
# https://stackoverflow.com/questions/50308055/creating-bounding-boxes-for-contours
# https://stackoverflow.com/questions/57296398/how-can-i-get-better-results-of-bounding-box-using-find-contours-of-opencv
# http://amroamroamro.github.io/mexopencv/opencv/generalContours_demo1.html
# https://gist.github.com/bigsnarfdude/d811e31ee17495f82f10db12651ae82d
# http://man.hubwiz.com/docset/OpenCV.docset/Contents/Resources/Documents/da/d0c/tutorial_bounding_rects_circles.html
# https://www.analyticsvidhya.com/blog/2021/05/document-layout-detection-and-ocr-with-detectron2/
# https://colab.research.google.com/drive/1m6gaQF6Q4M0IaSjoo_4jWllKJjK-i6fw?usp=sharing#scrollTo=lEyl3wYKHAe1
# https://stackoverflow.com/questions/39403183/python-opencv-sorting-contours
# https://docs.opencv.org/2.4/doc/tutorials/imgproc/shapedescriptors/bounding_rects_circles/bounding_rects_circles.html
# https://www.pyimagesearch.com/2016/03/21/ordering-coordinates-clockwise-with-python-and-opencv/
def bucket_sort(df, colmn, ymax_col="ymax", ymin_col="ymin"):
df["line_number"] = 0
colmn.append("line_number")
array_value = df[colmn].values
start_index = Line_counter = counter = 0
ymax, ymin, line_no = (
colmn.index(ymax_col),
colmn.index(ymin_col),
colmn.index("line_number"),
)
while counter < len(array_value):
current_ymax = array_value[start_index][ymax]
for next_index in range(start_index, len(array_value)):
counter += 1
next_ymin = array_value[next_index][ymin]
next_ymax = array_value[next_index][ymax]
if current_ymax > next_ymin:
array_value[next_index][line_no] = Line_counter + 1
# if current_ymax < next_ymax:
# current_ymax = next_ymax
else:
counter -= 1
break
# print(counter, len(array_value), start_index)
start_index = counter
Line_counter += 1
return pd.DataFrame(array_value, columns=colmn)
def do_sorting(df):
df.sort_values(["ymin", "xmin"], ascending=True, inplace=True)
df["idx"] = df.index
if "line_number" in df.columns:
print("line number removed")
df.drop("line_number", axis=1, inplace=True)
req_colns = ["xmin", "ymin", "xmax", "ymax", "idx"]
temp_df = df.copy()
temp = bucket_sort(temp_df.copy(), req_colns)
df = df.merge(temp[["idx", "line_number"]], on="idx")
df.sort_values(["line_number", "xmin"], ascending=True, inplace=True)
df = df.reset_index(drop=True)
df = df.reset_index(drop=True)
return df
def xml_to_csv(xml_file):
# https://gist.github.com/rotemtam/88d9a4efae243fc77ed4a0f9917c8f6c
xml_list = []
# for xml_file in glob.glob(path + '/*.xml'):
# https://discuss.streamlit.io/t/unable-to-read-files-using-standard-file-uploader/2258/2
tree = ET.parse(xml_file)
root = tree.getroot()
for member in root.findall("object"):
bbx = member.find("bndbox")
xmin = int(bbx.find("xmin").text)
ymin = int(bbx.find("ymin").text)
xmax = int(bbx.find("xmax").text)
ymax = int(bbx.find("ymax").text)
label = member.find("name").text
value = (
root.find("filename").text,
int(root.find("size")[0].text),
int(root.find("size")[1].text),
label,
xmin,
ymin,
xmax,
ymax,
)
xml_list.append(value)
column_name = [
"filename",
"width",
"height",
"cls",
"xmin",
"ymin",
"xmax",
"ymax",
]
xml_df = pd.DataFrame(xml_list, columns=column_name)
return xml_df
# def annotate_planogram_compliance(img0, sorted_xml_df, wrong_indexes, target_names):
# # annotator = Annotator(img0, line_width=3, pil=True)
# det = sorted_xml_df[['xmin', 'ymin', 'xmax', 'ymax','cls']].values
# # det[:, :4] = scale_coords((640, 640), det[:, :4], img0.shape).round()
# for i, (*xyxy, cls) in enumerate(det):
# c = int(cls) # integer class
# if i in wrong_indexes:
# # print(xyxy, "Wrong detection", (255, 0, 0))
# label = "Wrong detection"
# color = (0,0,255)
# else:
# # print(xyxy, label, (0, 255, 0))
# label = f'{target_names[c]}'
# color = (0,255, 0)
# org = (int(xyxy[0]), int(xyxy[1]) )
# top_left = org
# bottom_right = (int(xyxy[2]), int(xyxy[3]))
# # print("#"*50)
# # print(f"Anooatting cv2 rectangle with shape: { img0.shape}, top left: { top_left}, bottom right: { bottom_right} , color : { color }, thickness: {3}, cv2.LINE_8")
# # print("#"*50)
# cv2.rectangle(img0, top_left, bottom_right , color, 3, cv2.LINE_8)
# cv2.putText(img0, label, tuple(org), cv2. FONT_HERSHEY_SIMPLEX , 0.5, color)
# return img0
def annotate_planogram_compliance(
img0, sorted_df, correct_indexes, wrong_indexes, target_names
):
# annotator = Annotator(img0, line_width=3, pil=True)
det = sorted_df[["xmin", "ymin", "xmax", "ymax", "cls"]].values
# det[:, :4] = scale_coords((640, 640), det[:, :4], img0.shape).round()
for x, y in zip(*correct_indexes):
try:
row = sorted_df[sorted_df["line_number"] == x + 1].iloc[y]
xyxy = row[["xmin", "ymin", "xmax", "ymax"]].values
label = f'{target_names[row["cls"]]}'
color = (0, 255, 0)
# org = (int(xyxy[0]), int(xyxy[1]) )
top_left = (int(row["xmin"]), int(row["ymin"]))
bottom_right = (int(row["xmax"]), int(row["ymax"]))
cv2.rectangle(img0, top_left, bottom_right, color, 3, cv2.LINE_8)
cv2.putText(
img0, label, top_left, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color
)
except Exception as e:
print("Error: " + str(e))
continue
for x, y in zip(*wrong_indexes):
try:
row = sorted_df[sorted_df["line_number"] == x + 1].iloc[y]
xyxy = row[["xmin", "ymin", "xmax", "ymax"]].values
label = f'{target_names[row["cls"]]}'
color = (0, 0, 255)
# org = (int(xyxy[0]), int(xyxy[1]) )
top_left = (row["xmin"], row["ymin"])
bottom_right = (row["xmax"], row["ymax"])
cv2.rectangle(img0, top_left, bottom_right, color, 3, cv2.LINE_8)
cv2.putText(
img0, label, top_left, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color
)
except Exception as e:
print("Error: " + str(e))
continue
return img0