cv_quality / ocr_functions.py
Nassiraaa's picture
Update ocr_functions.py
dc649a8 verified
from dotenv import load_dotenv
import io
import boto3
from paddleocr import PaddleOCR
import os
import pytesseract
from PIL import ImageFilter
import numpy as np
def textract_ocr(image, box):
load_dotenv()
x1, y1, x2, y2 = box
cropped_image = image.crop((x1, y1, x2, y2))
cropped_image = cropped_image.convert("L")
img_bytes = io.BytesIO()
cropped_image.save(img_bytes, format='PNG')
img_bytes = img_bytes.getvalue()
client = boto3.client('textract', region_name='eu-west-3', aws_access_key_id=os.getenv("aws_access_key_id"),
aws_secret_access_key=os.getenv('aws_secret_access_key')
)
response = client.detect_document_text(Document={'Bytes': img_bytes})
blocks = response['Blocks']
texttract = ""
line_confidence = {}
for block in blocks:
if(block['BlockType'] == 'LINE'):
line_confidence[block['Text']] = block['Confidence']
texttract+= block['Text']+"\n"
return texttract
def paddle_ocr(image,box):
x1, y1, x2, y2 = box
cropped_image = image.crop((x1, y1, x2, y2))
cropped_image = np.array(cropped_image)
ocr = PaddleOCR(use_angle_cls=False, lang='latin')
result = ocr.ocr(cropped_image, cls=False)
text= ""
if result [0] != None:
result.sort(key=lambda x: (x[0][0][1], x[0][0][0]))
text = [x[1][0] for x in result[0]]
return "\n".join(text)
def tesseract_ocr(image, box):
target_dpi = 300
x1, y1, x2, y2 = box
cropped_image = image.crop((x1, y1, x2, y2))
cropped_image = cropped_image.convert("L")
current_dpi = cropped_image.info['dpi'][0] if 'dpi' in image.info else None
if current_dpi:
scale_factor = target_dpi / current_dpi
else:
scale_factor = 1.0
binarized_image = cropped_image.filter(ImageFilter.MedianFilter())
binarized_image = binarized_image.point(lambda p: p > 180 and 255)
text = pytesseract.image_to_string(binarized_image, config="--psm 6")
return text