Spaces:
Runtime error
Runtime error
RufusRubin777
commited on
Commit
•
b295a57
1
Parent(s):
916ad5f
Update app.py
Browse files
app.py
CHANGED
@@ -16,8 +16,12 @@ def load_models():
|
|
16 |
|
17 |
RAG, model, processor = load_models()
|
18 |
|
19 |
-
#
|
|
|
|
|
|
|
20 |
def extract_text(image):
|
|
|
21 |
text_query = "Extract all the text in Sanskrit and English from the image."
|
22 |
|
23 |
# Prepare message for Qwen model
|
@@ -35,66 +39,86 @@ def extract_text(image):
|
|
35 |
text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
36 |
image_inputs, video_inputs = process_vision_info(messages)
|
37 |
inputs = processor(
|
38 |
-
text=[text],
|
|
|
|
|
|
|
|
|
39 |
).to("cpu") # Use CPU
|
40 |
-
|
41 |
# Generate text
|
42 |
with torch.no_grad():
|
43 |
generated_ids = model.generate(**inputs, max_new_tokens=2000)
|
44 |
-
generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
|
45 |
-
extracted_text = processor.batch_decode(
|
46 |
-
generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
|
47 |
-
)[0]
|
48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
return extracted_text
|
50 |
|
51 |
-
# Function for keyword search
|
52 |
-
def search_keyword(
|
|
|
|
|
|
|
|
|
53 |
keyword_lower = keyword.lower()
|
54 |
-
sentences =
|
55 |
matched_sentences = []
|
56 |
|
|
|
57 |
for sentence in sentences:
|
58 |
if keyword_lower in sentence.lower():
|
59 |
highlighted_sentence = re.sub(
|
60 |
-
f'({re.escape(keyword)})',
|
|
|
|
|
|
|
61 |
)
|
62 |
matched_sentences.append(highlighted_sentence)
|
63 |
|
64 |
search_results_str = "<br>".join(matched_sentences) if matched_sentences else "No matches found."
|
65 |
-
|
66 |
-
return search_results_str
|
67 |
|
68 |
-
# Gradio App
|
69 |
-
def
|
70 |
extracted_text = extract_text(image)
|
71 |
return extracted_text
|
72 |
|
73 |
-
def
|
74 |
-
|
75 |
-
|
76 |
-
search_results_str = search_keyword(extracted_text, keyword)
|
77 |
-
return search_results_str
|
78 |
-
|
79 |
-
# Gradio Interface
|
80 |
-
with gr.Blocks() as iface:
|
81 |
-
extracted_text = gr.State("") # State to hold extracted text
|
82 |
-
|
83 |
-
with gr.Row():
|
84 |
-
with gr.Column():
|
85 |
-
image_input = gr.Image(type="pil", label="Upload an Image")
|
86 |
-
keyword_input = gr.Textbox(label="Enter keyword to search in extracted text", placeholder="Keyword")
|
87 |
|
88 |
-
|
89 |
-
|
90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
|
92 |
-
|
93 |
-
|
|
|
94 |
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
|
99 |
# Launch Gradio App
|
100 |
iface.launch()
|
|
|
16 |
|
17 |
RAG, model, processor = load_models()
|
18 |
|
19 |
+
# Global variable to store extracted text
|
20 |
+
extracted_text_global = ""
|
21 |
+
|
22 |
+
# Function for OCR extraction
|
23 |
def extract_text(image):
|
24 |
+
global extracted_text_global
|
25 |
text_query = "Extract all the text in Sanskrit and English from the image."
|
26 |
|
27 |
# Prepare message for Qwen model
|
|
|
39 |
text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
40 |
image_inputs, video_inputs = process_vision_info(messages)
|
41 |
inputs = processor(
|
42 |
+
text=[text],
|
43 |
+
images=image_inputs,
|
44 |
+
videos=video_inputs,
|
45 |
+
padding=True,
|
46 |
+
return_tensors="pt"
|
47 |
).to("cpu") # Use CPU
|
48 |
+
|
49 |
# Generate text
|
50 |
with torch.no_grad():
|
51 |
generated_ids = model.generate(**inputs, max_new_tokens=2000)
|
|
|
|
|
|
|
|
|
52 |
|
53 |
+
generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
|
54 |
+
extracted_text = processor.batch_decode(
|
55 |
+
generated_ids_trimmed,
|
56 |
+
skip_special_tokens=True,
|
57 |
+
clean_up_tokenization_spaces=False
|
58 |
+
)[0]
|
59 |
+
|
60 |
+
# Store extracted text in global variable
|
61 |
+
extracted_text_global = extracted_text
|
62 |
return extracted_text
|
63 |
|
64 |
+
# Function for keyword search within extracted text
|
65 |
+
def search_keyword(keyword):
|
66 |
+
global extracted_text_global
|
67 |
+
if not extracted_text_global:
|
68 |
+
return "No extracted text available. Please extract text first.", "No matches found."
|
69 |
+
|
70 |
keyword_lower = keyword.lower()
|
71 |
+
sentences = extracted_text_global.split('. ')
|
72 |
matched_sentences = []
|
73 |
|
74 |
+
# Perform keyword search with highlighting
|
75 |
for sentence in sentences:
|
76 |
if keyword_lower in sentence.lower():
|
77 |
highlighted_sentence = re.sub(
|
78 |
+
f'({re.escape(keyword)})',
|
79 |
+
r'<mark>\1</mark>', # Highlight the matched keyword
|
80 |
+
sentence,
|
81 |
+
flags=re.IGNORECASE
|
82 |
)
|
83 |
matched_sentences.append(highlighted_sentence)
|
84 |
|
85 |
search_results_str = "<br>".join(matched_sentences) if matched_sentences else "No matches found."
|
86 |
+
return extracted_text_global, search_results_str
|
|
|
87 |
|
88 |
+
# Gradio App
|
89 |
+
def app_extract(image):
|
90 |
extracted_text = extract_text(image)
|
91 |
return extracted_text
|
92 |
|
93 |
+
def app_search(keyword):
|
94 |
+
extracted_text, search_results = search_keyword(keyword)
|
95 |
+
return extracted_text, search_results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
+
# Gradio Interface with two buttons
|
98 |
+
iface = gr.Interface(
|
99 |
+
fn=[app_extract, app_search],
|
100 |
+
inputs=[
|
101 |
+
gr.Image(type="pil", label="Upload an Image"),
|
102 |
+
gr.Textbox(label="Enter keyword to search in extracted text", placeholder="Keyword")
|
103 |
+
],
|
104 |
+
outputs=[
|
105 |
+
gr.Textbox(label="Extracted Text"),
|
106 |
+
gr.HTML(label="Search Results"),
|
107 |
+
],
|
108 |
+
title="OCR and Keyword Search in Images",
|
109 |
+
live=False,
|
110 |
+
description="First, extract the text from an image, then search for a keyword in the extracted text.",
|
111 |
+
layout="vertical",
|
112 |
+
allow_flagging="never"
|
113 |
+
)
|
114 |
|
115 |
+
# Create separate buttons
|
116 |
+
extract_button = gr.Button("Extract Text")
|
117 |
+
search_button = gr.Button("Search Keyword")
|
118 |
|
119 |
+
# Link buttons to their respective functions
|
120 |
+
extract_button.click(fn=app_extract, inputs=[gr.Image(type="pil")], outputs=[gr.Textbox(label="Extracted Text")])
|
121 |
+
search_button.click(fn=app_search, inputs=[gr.Textbox(label="Enter keyword")], outputs=[gr.Textbox(label="Extracted Text"), gr.HTML(label="Search Results")])
|
122 |
|
123 |
# Launch Gradio App
|
124 |
iface.launch()
|