Spaces:
Sleeping
Sleeping
File size: 5,363 Bytes
451b7cf d1f8e76 451b7cf d1f8e76 451b7cf d1f8e76 451b7cf d1f8e76 451b7cf d1f8e76 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
import gradio as gr
import spaces
from transformers import AutoModel, AutoTokenizer
import os
import base64
import io
import uuid
import time
import shutil
from pathlib import Path
import re
import easyocr
# OCR Model
tokenizer = AutoTokenizer.from_pretrained('RufusRubin777/GOT-OCR2_0_CPU', trust_remote_code=True, device_map='cpu')
model = AutoModel.from_pretrained('RufusRubin777/GOT-OCR2_0_CPU', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cpu', use_safetensors=True)
model = model.eval().cpu()
reader = easyocr.Reader(['hi'])
UPLOAD_FOLDER = "./uploads"
RESULTS_FOLDER = "./results"
for folder in [UPLOAD_FOLDER, RESULTS_FOLDER]:
if not os.path.exists(folder):
os.makedirs(folder)
def image_to_base64(image):
buffered = io.BytesIO()
image.save(buffered, format="PNG")
return base64.b64encode(buffered.getvalue()).decode()
# OCR Processing of the image uploaded by the user
# @spaces.GPU
def run_GOT(image,language):
unique_id = str(uuid.uuid4())
image_path = os.path.join(UPLOAD_FOLDER, f"{unique_id}.png")
shutil.copy(image, image_path)
try:
if language == "English":
res = model.chat(tokenizer, image_path, ocr_type='ocr')
return res
elif language == "Hindi":
res = reader.readtext(image)
extracted_text = ''
for x in res:
extracted_text += x[1] + '\n'
return extracted_text
else:
english_extraction = model.chat(tokenizer, image_path, ocr_type='ocr')
hindi_extraction = reader.readtext(image)
hindi_extract = ''
for x in hindi_extraction:
hindi_extract += x[1] + '\n'
return english_extraction+'\n'+hindi_extract
except Exception as e:
return f"Error: {str(e)}", None
finally:
if os.path.exists(image_path):
os.remove(image_path)
# Search Functionality
def search_keyword(text,keyword):
# Convert text and keyword to lowercase for case-insensitive search
text_lower = text.lower()
keyword_lower = keyword.lower()
# Keyword position in the text
pos = text_lower.find(keyword_lower)
if pos == -1:
ans = '<h3 style="text-align: center;">'+"Keyword not found"+'</h3>'
else:
res = [i.start() for i in re.finditer(keyword_lower, text)]
ans = '<h3>'
l = 0
for x in res:
ans += text[l:x]+'<mark>'+text[x:x+len(keyword)]+'</mark>'
l += len(text[l:x]+text[x:x+len(keyword)])
ans += text[l:]+'</h3>'
return ans
def cleanup_old_files():
current_time = time.time()
for folder in [UPLOAD_FOLDER, RESULTS_FOLDER]:
for file_path in Path(folder).glob('*'):
if current_time - file_path.stat().st_mtime > 3600: # 1 hour
file_path.unlink()
title_html = """
<h1> <span class="gradient-text" id="text">Scan Master</span></h1>
<p>Scan Master uses General OCR Theory (GOT), a 580M end-to-end OCR 2.0 model for English optical character recognition and EASYOCR for Hindi optical character recognition. It supports plain text ocr.</p>
"""
# acknowledgement_html = """
# <h3>Acknowledgement</h3>
# <a href="https://huggingface.co/ucaslcl/GOT-OCR2_0">[π Hugging Face]</a>
# <a href="https://arxiv.org/abs/2409.01704">[π Paper]</a>
# <a href="https://github.com/Ucas-HaoranWei/GOT-OCR2.0/">[π GitHub]</a>
# """
# aboutme_html = """
# <h3>About Me</h3>
# <p>Name : Satvik Chandrakar</p>
# <a href="https://github.com/Satvik-ai">[π GitHub]</a> """
# Scan Master web application developed using Gradio
with gr.Blocks() as scan_master_web_app:
gr.HTML(title_html)
gr.Markdown("""
You need to upload your image below and choose appropriate language, then click "Submit" to run the model. More characters will result in longer wait times.""")
with gr.Row():
with gr.Column():
image_input = gr.Image(type="filepath", label="Upload your image")
gr.Markdown("""If your image contains only English text, then choose English option in the language. If it contains only Hindi text, then choose Hindi option in the language. If it contains both the language, then choose the third option.""")
lang_dropdown = gr.Dropdown(
choices=[
"English",
"Hindi",
"English + Hindi",
],
label="Choose language",
value="English"
)
submit_button = gr.Button("Submit")
with gr.Column():
ocr_result = gr.Textbox(label="GOT output")
with gr.Row():
with gr.Column():
keyword = gr.Textbox(label="Search a keyword in the extracted text")
search_button = gr.Button("Search")
with gr.Column():
search_result = gr.HTML(label="Search result")
# gr.HTML(acknowledgement_html)
# gr.HTML(aboutme_html)
submit_button.click(
run_GOT,
inputs=[image_input,lang_dropdown],
outputs=[ocr_result]
)
search_button.click(
search_keyword,
inputs=[ocr_result,keyword],
outputs=[search_result]
)
if __name__ == "__main__":
cleanup_old_files()
scan_master_web_app.launch() |