mp-02's picture
Upload folder using huggingface_hub
6d1caf6 verified
raw
history blame
1.04 kB
from paddleocr import PaddleOCR
from PIL import Image
def normalize_bbox(bbox, width, height):
return [
int(1000 * (bbox[0] / width)),
int(1000 * (bbox[1] / height)),
int(1000 * (bbox[2] / width)),
int(1000 * (bbox[3] / height)),
]
def unnormalize_box(bbox, width, height):
return [
width * (bbox[0] / 1000),
height * (bbox[1] / 1000),
width * (bbox[2] / 1000),
height * (bbox[3] / 1000),
]
def OCR(image_path: str):
ocr = PaddleOCR(use_angle_cls=True)
image = Image.open(image_path)
result = ocr.ocr(image_path, cls=True)
bboxes = []
words = []
for idx in range(len(result)):
res = result[idx]
for line in res:
# print(line)
# print(line[0][0] + line[0][2])
bboxes.append(normalize_bbox(line[0][0]+line[0][2], image.width, image.height))
# print(line[1][0])
words.append(line[1][0])
return bboxes, words