akhil-vaidya commited on
Commit
b093688
1 Parent(s): 91c864d

search by keyword

Browse files
Files changed (1) hide show
  1. app.py +15 -42
app.py CHANGED
@@ -1,4 +1,4 @@
1
- from transformers import AutoModel, AutoTokenizer, Qwen2VLForConditionalGeneration, AutoProcessor, MllamaForConditionalGeneration
2
  import streamlit as st
3
  import os
4
  from PIL import Image
@@ -6,7 +6,7 @@ import requests
6
  import torch
7
  from torchvision import io
8
  from typing import Dict
9
- import base64
10
 
11
  @st.cache_resource
12
  def init_model():
@@ -71,41 +71,17 @@ def get_quen_op(image_file, model, processor):
71
  except Exception as e:
72
  return f"An error occurred: {str(e)}"
73
 
74
- @st.cache_resource
75
- def init_llama():
76
- model_id = "meta-llama/Llama-3.2-11B-Vision-Instruct"
77
-
78
- model = MllamaForConditionalGeneration.from_pretrained(
79
- model_id,
80
- torch_dtype=torch.bfloat16,
81
- device_map="auto",
82
- token=os.getenv("access_token")
83
- )
84
- processor = AutoProcessor.from_pretrained(model_id, token=os.getenv("access_token"))
85
- return model, processor
86
-
87
- def get_llama_op(image_file, model, processor):
88
-
89
- with open(image_file, "rb") as f:
90
- image = base64.b64encode(f.read()).decode('utf-8')
91
-
92
- image = Image.open(image_file)
93
- messages = [
94
- {"role": "user", "content": [
95
- {"type": "image"},
96
- {"type": "text", "text": "You are an accurate OCR engine. From the given image, extract the text."}
97
- ]}
98
- ]
99
- input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
100
- inputs = processor(images=image, text=input_text, return_tensors="pt").to(model.device)
101
-
102
- output = model.generate(**inputs, max_new_tokens=20)
103
- return processor.decode(output[0])
104
-
105
  def get_text(image_file, model, tokenizer):
106
  res = model.chat(tokenizer, image_file, ocr_type='ocr')
107
  return res
108
 
 
 
 
 
 
 
109
  st.title("Image - Text OCR (General OCR Theory - GOT)")
110
  st.write("Upload an image for OCR")
111
 
@@ -114,7 +90,6 @@ MODEL, PROCESSOR = init_model()
114
  image_file = st.file_uploader("Upload Image", type=['jpg', 'png', 'jpeg'])
115
 
116
  if image_file:
117
-
118
  if not os.path.exists("images"):
119
  os.makedirs("images")
120
  with open(f"images/{image_file.name}", "wb") as f:
@@ -122,14 +97,12 @@ if image_file:
122
 
123
  image_file = f"images/{image_file.name}"
124
 
125
- # model, tokenizer = init_gpu_model()
126
- # model, tokenizer = init_model()
127
  text = get_text(image_file, MODEL, PROCESSOR)
128
 
129
- # model, processor = init_llama()
130
- # text = get_llama_op(image_file, MODEL, PROCESSOR)
131
-
132
- # model, processor = init_qwen_model()
133
- # text = get_quen_op(image_file, model, processor)
134
  print(text)
135
- st.write(text)
 
 
 
 
 
 
1
+ from transformers import AutoModel, AutoTokenizer, Qwen2VLForConditionalGeneration, AutoProcessor
2
  import streamlit as st
3
  import os
4
  from PIL import Image
 
6
  import torch
7
  from torchvision import io
8
  from typing import Dict
9
+ import re
10
 
11
  @st.cache_resource
12
  def init_model():
 
71
  except Exception as e:
72
  return f"An error occurred: {str(e)}"
73
 
74
+ # @st.cache_data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  def get_text(image_file, model, tokenizer):
76
  res = model.chat(tokenizer, image_file, ocr_type='ocr')
77
  return res
78
 
79
+ def highlight_text(text, search_term):
80
+ if not search_term:
81
+ return text
82
+ pattern = re.compile(re.escape(search_term), re.IGNORECASE)
83
+ return pattern.sub(lambda m: f'<span style="background-color: yellow;">{m.group()}</span>', text)
84
+
85
  st.title("Image - Text OCR (General OCR Theory - GOT)")
86
  st.write("Upload an image for OCR")
87
 
 
90
  image_file = st.file_uploader("Upload Image", type=['jpg', 'png', 'jpeg'])
91
 
92
  if image_file:
 
93
  if not os.path.exists("images"):
94
  os.makedirs("images")
95
  with open(f"images/{image_file.name}", "wb") as f:
 
97
 
98
  image_file = f"images/{image_file.name}"
99
 
 
 
100
  text = get_text(image_file, MODEL, PROCESSOR)
101
 
 
 
 
 
 
102
  print(text)
103
+
104
+ # Add search functionality
105
+ search_term = st.text_input("Enter a word or phrase to search:")
106
+ highlighted_text = highlight_text(text, search_term)
107
+
108
+ st.markdown(highlighted_text, unsafe_allow_html=True)