Spaces:
Runtime error
Runtime error
RufusRubin777
commited on
Commit
•
a93895a
1
Parent(s):
588f818
Update app.py
Browse files
app.py
CHANGED
@@ -5,7 +5,6 @@ from byaldi import RAGMultiModalModel
|
|
5 |
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
|
6 |
from qwen_vl_utils import process_vision_info
|
7 |
import torch
|
8 |
-
import re
|
9 |
|
10 |
# Load models
|
11 |
def load_models():
|
@@ -54,39 +53,19 @@ def ocr_and_search(image, keyword):
|
|
54 |
|
55 |
# Save extracted text to JSON
|
56 |
output_json = {"query": text_query, "extracted_text": extracted_text}
|
57 |
-
|
58 |
|
59 |
# Perform keyword search
|
60 |
keyword_lower = keyword.lower()
|
61 |
sentences = extracted_text.split('. ')
|
62 |
matched_sentences = [sentence for sentence in sentences if keyword_lower in sentence.lower()]
|
63 |
-
|
64 |
-
|
65 |
-
#########################################################################################################################################################
|
66 |
|
67 |
-
|
68 |
-
if search_queries:
|
69 |
-
keywords = [kw.strip() for kw in search_queries.split(",")]
|
70 |
-
for keyword in keywords:
|
71 |
-
highlighted_text = re.sub(
|
72 |
-
re.escape(keyword),
|
73 |
-
f"<span style='background-color: #f1c40f; color: #000; font-weight: bold;'>{keyword}</span>",
|
74 |
-
highlighted_text,
|
75 |
-
flags=re.IGNORECASE
|
76 |
-
)
|
77 |
-
|
78 |
-
|
79 |
-
####################################################################################################################################################
|
80 |
-
|
81 |
-
|
82 |
-
return extracted_text, highlighted_text #, json_output
|
83 |
|
84 |
|
85 |
# Gradio App
|
86 |
def app(image, keyword):
|
87 |
|
88 |
-
# extracted_text, search_results, json_output = ocr_and_search(image, keyword)
|
89 |
-
|
90 |
extracted_text, search_results = ocr_and_search(image, keyword)
|
91 |
|
92 |
search_results_str = "\n".join(search_results) if search_results else "No matches found."
|
|
|
5 |
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
|
6 |
from qwen_vl_utils import process_vision_info
|
7 |
import torch
|
|
|
8 |
|
9 |
# Load models
|
10 |
def load_models():
|
|
|
53 |
|
54 |
# Save extracted text to JSON
|
55 |
output_json = {"query": text_query, "extracted_text": extracted_text}
|
56 |
+
json_output = json.dumps(output_json, ensure_ascii=False, indent=4)
|
57 |
|
58 |
# Perform keyword search
|
59 |
keyword_lower = keyword.lower()
|
60 |
sentences = extracted_text.split('. ')
|
61 |
matched_sentences = [sentence for sentence in sentences if keyword_lower in sentence.lower()]
|
|
|
|
|
|
|
62 |
|
63 |
+
return extracted_text, matched_sentences #, json_output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
|
66 |
# Gradio App
|
67 |
def app(image, keyword):
|
68 |
|
|
|
|
|
69 |
extracted_text, search_results = ocr_and_search(image, keyword)
|
70 |
|
71 |
search_results_str = "\n".join(search_results) if search_results else "No matches found."
|