File size: 5,363 Bytes
451b7cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d1f8e76
451b7cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d1f8e76
 
 
 
 
 
 
 
 
 
 
 
451b7cf
d1f8e76
451b7cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d1f8e76
 
451b7cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d1f8e76
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import gradio as gr
import spaces
from transformers import AutoModel, AutoTokenizer
import os
import base64
import io
import uuid
import time
import shutil
from pathlib import Path
import re
import easyocr

# OCR Model
tokenizer = AutoTokenizer.from_pretrained('RufusRubin777/GOT-OCR2_0_CPU', trust_remote_code=True, device_map='cpu')
model = AutoModel.from_pretrained('RufusRubin777/GOT-OCR2_0_CPU', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cpu', use_safetensors=True)
model = model.eval().cpu()
reader = easyocr.Reader(['hi'])

UPLOAD_FOLDER = "./uploads"
RESULTS_FOLDER = "./results"

for folder in [UPLOAD_FOLDER, RESULTS_FOLDER]:
    if not os.path.exists(folder):
        os.makedirs(folder)

def image_to_base64(image):
    buffered = io.BytesIO()
    image.save(buffered, format="PNG")
    return base64.b64encode(buffered.getvalue()).decode()

# OCR Processing of the image uploaded by the user
# @spaces.GPU
def run_GOT(image,language):
    unique_id = str(uuid.uuid4())
    image_path = os.path.join(UPLOAD_FOLDER, f"{unique_id}.png")
    
    shutil.copy(image, image_path)
    
    try:
        if language == "English":
            res = model.chat(tokenizer, image_path, ocr_type='ocr')
            return res
        elif language == "Hindi":
            res = reader.readtext(image)
            extracted_text = ''
            for x in res:
                extracted_text += x[1] + '\n'
            return extracted_text
        else:
            english_extraction = model.chat(tokenizer, image_path, ocr_type='ocr')
            hindi_extraction = reader.readtext(image)
            hindi_extract = ''
            for x in hindi_extraction:
                hindi_extract += x[1] + '\n'
            return english_extraction+'\n'+hindi_extract
    except Exception as e:
        return f"Error: {str(e)}", None
    finally:
        if os.path.exists(image_path):
            os.remove(image_path)

# Search Functionality
def search_keyword(text,keyword):
    # Convert text and keyword to lowercase for case-insensitive search
    text_lower = text.lower()
    keyword_lower = keyword.lower()

    # Keyword position in the text
    pos = text_lower.find(keyword_lower)

    if pos == -1:
        ans = '<h3 style="text-align: center;">'+"Keyword not found"+'</h3>'
    else:
        res = [i.start() for i in re.finditer(keyword_lower, text)]
        ans = '<h3>'
        l = 0
        for x in res:
            ans += text[l:x]+'<mark>'+text[x:x+len(keyword)]+'</mark>'
            l += len(text[l:x]+text[x:x+len(keyword)])
        ans += text[l:]+'</h3>'
    return ans

def cleanup_old_files():
    current_time = time.time()
    for folder in [UPLOAD_FOLDER, RESULTS_FOLDER]:
        for file_path in Path(folder).glob('*'):
            if current_time - file_path.stat().st_mtime > 3600:  # 1 hour
                file_path.unlink()

title_html = """
<h1> <span class="gradient-text" id="text">Scan Master</span></h1>
<p>Scan Master uses General OCR Theory (GOT), a 580M end-to-end OCR 2.0 model for English optical character recognition and EASYOCR for Hindi optical character recognition. It supports plain text ocr.</p>
"""

# acknowledgement_html = """
# <h3>Acknowledgement</h3>
# <a href="https://huggingface.co/ucaslcl/GOT-OCR2_0">[😊 Hugging Face]</a> 
# <a href="https://arxiv.org/abs/2409.01704">[πŸ“œ Paper]</a>
# <a href="https://github.com/Ucas-HaoranWei/GOT-OCR2.0/">[🌟 GitHub]</a> 
# """

# aboutme_html = """
# <h3>About Me</h3>
# <p>Name : Satvik Chandrakar</p>
# <a href="https://github.com/Satvik-ai">[🌟 GitHub]</a> """


# Scan Master web application developed using Gradio
with gr.Blocks() as scan_master_web_app:
    gr.HTML(title_html)
    gr.Markdown("""
    You need to upload your image below and choose appropriate language, then click "Submit" to run the model. More characters will result in longer wait times.""")
    
    with gr.Row():
        with gr.Column():
            image_input = gr.Image(type="filepath", label="Upload your image")
            gr.Markdown("""If your image contains only English text, then choose English option in the language. If it contains only Hindi text, then choose Hindi option in the language. If it contains both the language, then choose the third option.""")
            lang_dropdown = gr.Dropdown(
                choices=[
                    "English",
                    "Hindi",
                    "English + Hindi",
                ],
                label="Choose language",
                value="English"
            )
            submit_button = gr.Button("Submit")
        
        with gr.Column():
            ocr_result = gr.Textbox(label="GOT output")
    
    with gr.Row():
        with gr.Column():
            keyword = gr.Textbox(label="Search a keyword in the extracted text")
            search_button = gr.Button("Search")
        
        with gr.Column():
            search_result = gr.HTML(label="Search result")
    
    # gr.HTML(acknowledgement_html)
    # gr.HTML(aboutme_html)

    submit_button.click(
        run_GOT,
        inputs=[image_input,lang_dropdown],
        outputs=[ocr_result]
    )

    search_button.click(
        search_keyword,
        inputs=[ocr_result,keyword],
        outputs=[search_result]
    )

if __name__ == "__main__":
    cleanup_old_files()
    scan_master_web_app.launch()