File size: 3,055 Bytes
b8b3256 ea5f05b b8b3256 ea5f05b b8b3256 ea5f05b b8b3256 ea5f05b b8b3256 ea5f05b b8b3256 ea5f05b b8b3256 ea5f05b b8b3256 ea5f05b df009b3 ea5f05b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import os
from doctr.io import DocumentFile
from doctr.models import ocr_predictor
import gradio as gr
from PIL import Image
from happytransformer import HappyTextToText, TTSettings
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import re
# OCR Predictor initialization
predictor = ocr_predictor(det_arch='db_mobilenet_v3_large', reco_arch='crnn_vgg16_bn', pretrained=True)
# Grammar Correction Model initialization
happy_tt = HappyTextToText("T5", "vennify/t5-base-grammar-correction")
grammar_args = TTSettings(num_beams=5, min_length=1)
# Spell Check Model initialization
tokenizer = AutoTokenizer.from_pretrained("Bhuvana/t5-base-spellchecker", use_fast=False)
model = AutoModelForSeq2SeqLM.from_pretrained("Bhuvana/t5-base-spellchecker")
def correct_spell(inputs):
input_ids = tokenizer.encode(inputs, return_tensors='pt')
sample_output = model.generate(
input_ids,
do_sample=True,
max_length=512,
top_p=0.99,
num_return_sequences=1
)
res = tokenizer.decode(sample_output[0], skip_special_tokens=True)
return res
def process_text_in_chunks(text, process_function, max_chunk_size=256):
# Split text into sentences
sentences = re.split(r'(?<=[.!?])\s+', text)
processed_text = ""
for sentence in sentences:
# Further split long sentences into smaller chunks
chunks = [sentence[i:i + max_chunk_size] for i in range(0, len(sentence), max_chunk_size)]
for chunk in chunks:
processed_text += process_function(chunk)
processed_text += " " # Add space after each processed sentence
return processed_text.strip()
def greet(img, apply_grammar_correction, apply_spell_check):
img.save("out.jpg")
doc = DocumentFile.from_images("out.jpg")
output = predictor(doc)
res = ""
for obj in output.pages:
for obj1 in obj.blocks:
for obj2 in obj1.lines:
for obj3 in obj2.words:
res += " " + obj3.value
res += "\n"
res += "\n"
# Process in chunks for grammar correction
if apply_grammar_correction:
res = process_text_in_chunks(res, lambda x: happy_tt.generate_text("grammar: " + x, args=grammar_args).text)
# Process in chunks for spell check
if apply_spell_check:
res = process_text_in_chunks(res, correct_spell)
_output_name = "RESULT_OCR.txt"
open(_output_name, 'w').write(res)
return res, _output_name
# Gradio Interface
title = "DocTR OCR with Grammar and Spell Check"
description = "Upload an image to get the OCR results. Optionally, apply grammar and spell check."
demo = gr.Interface(
fn=greet,
inputs=[
gr.Image(type="pil"),
gr.Checkbox(label="Apply Grammar Correction"),
gr.Checkbox(label="Apply Spell Check")
],
outputs=["text", "file"],
title=title,
description=description,
examples=[["Examples/Book.png"], ["Examples/News.png"], ["Examples/Manuscript.jpg"], ["Examples/Files.jpg"]]
)
demo.launch(debug=True)
|