import os from doctr.io import DocumentFile from doctr.models import ocr_predictor import gradio as gr from PIL import Image from happytransformer import HappyTextToText, TTSettings from transformers import AutoTokenizer, AutoModelForSeq2SeqLM import re # OCR Predictor initialization predictor = ocr_predictor(det_arch='db_mobilenet_v3_large', reco_arch='crnn_vgg16_bn', pretrained=True) # Grammar Correction Model initialization happy_tt = HappyTextToText("T5", "vennify/t5-base-grammar-correction") grammar_args = TTSettings(num_beams=5, min_length=1) # Spell Check Model initialization tokenizer = AutoTokenizer.from_pretrained("Bhuvana/t5-base-spellchecker", use_fast=False) model = AutoModelForSeq2SeqLM.from_pretrained("Bhuvana/t5-base-spellchecker") def correct_spell(inputs): input_ids = tokenizer.encode(inputs, return_tensors='pt') sample_output = model.generate( input_ids, do_sample=True, max_length=512, top_p=0.99, num_return_sequences=1 ) res = tokenizer.decode(sample_output[0], skip_special_tokens=True) return res def process_text_in_chunks(text, process_function, max_chunk_size=256): # Split text into sentences sentences = re.split(r'(?<=[.!?])\s+', text) processed_text = "" for sentence in sentences: # Further split long sentences into smaller chunks chunks = [sentence[i:i + max_chunk_size] for i in range(0, len(sentence), max_chunk_size)] for chunk in chunks: processed_text += process_function(chunk) processed_text += " " # Add space after each processed sentence return processed_text.strip() def greet(img, apply_grammar_correction, apply_spell_check): img.save("out.jpg") doc = DocumentFile.from_images("out.jpg") output = predictor(doc) res = "" for obj in output.pages: for obj1 in obj.blocks: for obj2 in obj1.lines: for obj3 in obj2.words: res += " " + obj3.value res += "\n" res += "\n" # Process in chunks for grammar correction if apply_grammar_correction: res = process_text_in_chunks(res, lambda x: happy_tt.generate_text("grammar: " + x, args=grammar_args).text) # Process in chunks for spell check if apply_spell_check: res = process_text_in_chunks(res, correct_spell) _output_name = "RESULT_OCR.txt" open(_output_name, 'w').write(res) return res, _output_name # Gradio Interface title = "DocTR OCR with Grammar and Spell Check" description = "Upload an image to get the OCR results. Optionally, apply grammar and spell check." demo = gr.Interface( fn=greet, inputs=[ gr.Image(type="pil"), gr.Checkbox(label="Apply Grammar Correction"), gr.Checkbox(label="Apply Spell Check") ], outputs=["text", "file"], title=title, description=description, examples=[["Examples/Book.png"], ["Examples/News.png"], ["Examples/Manuscript.jpg"], ["Examples/Files.jpg"]] ) demo.launch(debug=True)