from paddleocr import PaddleOCR
from PIL import Image
from numpy import asarray

ocr = PaddleOCR(use_angle_cls=True, lang="en") #will be italian

def normalize_bbox(bbox, width, height):

    return [
        int(1000 * (bbox[0] / width)),
        int(1000 * (bbox[1] / height)),
        int(1000 * (bbox[2] / width)),
        int(1000 * (bbox[3] / height)),
    ]

def unnormalize_box(bbox, width, height):

    return [
        width * (bbox[0] / 1000),
        height * (bbox[1] / 1000),
        width * (bbox[2] / 1000),
        height * (bbox[3] / 1000),
    ]


def OCR(image):
    
    result = ocr.ocr(asarray(image), cls=True)
    bboxes = []
    words = []

    for idx in range(len(result)):
        res = result[idx]

        for line in res:
            if(line[1][0] == ""): continue
            # print(line)
            # print(line[0][0] + line[0][2])
            bboxes.append(normalize_bbox(line[0][0]+line[0][2], image.width, image.height))
            # print(line[1][0])
            words.append(line[1][0])

    return bboxes, words