RufusRubin777 commited on
Commit
a93895a
1 Parent(s): 588f818

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -23
app.py CHANGED
@@ -5,7 +5,6 @@ from byaldi import RAGMultiModalModel
5
  from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
6
  from qwen_vl_utils import process_vision_info
7
  import torch
8
- import re
9
 
10
  # Load models
11
  def load_models():
@@ -54,39 +53,19 @@ def ocr_and_search(image, keyword):
54
 
55
  # Save extracted text to JSON
56
  output_json = {"query": text_query, "extracted_text": extracted_text}
57
- # json_output = json.dumps(output_json, ensure_ascii=False, indent=4)
58
 
59
  # Perform keyword search
60
  keyword_lower = keyword.lower()
61
  sentences = extracted_text.split('. ')
62
  matched_sentences = [sentence for sentence in sentences if keyword_lower in sentence.lower()]
63
-
64
-
65
- #########################################################################################################################################################
66
 
67
- highlighted_text = matched_sentences
68
- if search_queries:
69
- keywords = [kw.strip() for kw in search_queries.split(",")]
70
- for keyword in keywords:
71
- highlighted_text = re.sub(
72
- re.escape(keyword),
73
- f"<span style='background-color: #f1c40f; color: #000; font-weight: bold;'>{keyword}</span>",
74
- highlighted_text,
75
- flags=re.IGNORECASE
76
- )
77
-
78
-
79
- ####################################################################################################################################################
80
-
81
-
82
- return extracted_text, highlighted_text #, json_output
83
 
84
 
85
  # Gradio App
86
  def app(image, keyword):
87
 
88
- # extracted_text, search_results, json_output = ocr_and_search(image, keyword)
89
-
90
  extracted_text, search_results = ocr_and_search(image, keyword)
91
 
92
  search_results_str = "\n".join(search_results) if search_results else "No matches found."
 
5
  from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
6
  from qwen_vl_utils import process_vision_info
7
  import torch
 
8
 
9
  # Load models
10
  def load_models():
 
53
 
54
  # Save extracted text to JSON
55
  output_json = {"query": text_query, "extracted_text": extracted_text}
56
+ json_output = json.dumps(output_json, ensure_ascii=False, indent=4)
57
 
58
  # Perform keyword search
59
  keyword_lower = keyword.lower()
60
  sentences = extracted_text.split('. ')
61
  matched_sentences = [sentence for sentence in sentences if keyword_lower in sentence.lower()]
 
 
 
62
 
63
+ return extracted_text, matched_sentences #, json_output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
 
66
  # Gradio App
67
  def app(image, keyword):
68
 
 
 
69
  extracted_text, search_results = ocr_and_search(image, keyword)
70
 
71
  search_results_str = "\n".join(search_results) if search_results else "No matches found."