Spaces:

akhil-vaidya
/

GOT-OCR

Running

App Files Files Community

akhil-vaidya commited on Sep 29, 2024

Commit

b093688

1 Parent(s): 91c864d

search by keyword

Browse files

Files changed (1) hide show

app.py +15 -42

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from transformers import AutoModel, AutoTokenizer, Qwen2VLForConditionalGeneration, AutoProcessor, MllamaForConditionalGeneration
 import streamlit as st
 import os
 from PIL import Image
@@ -6,7 +6,7 @@ import requests
 import torch
 from torchvision import io
 from typing import Dict
-import base64
 @st.cache_resource
 def init_model():
@@ -71,41 +71,17 @@ def get_quen_op(image_file, model, processor):
     except Exception as e:
         return f"An error occurred: {str(e)}"
-@st.cache_resource
-def init_llama():
-    model_id = "meta-llama/Llama-3.2-11B-Vision-Instruct"
-    model = MllamaForConditionalGeneration.from_pretrained(
-        model_id,
-        torch_dtype=torch.bfloat16,
-        device_map="auto",
-        token=os.getenv("access_token")
-    )
-    processor = AutoProcessor.from_pretrained(model_id, token=os.getenv("access_token"))
-    return model, processor
-def get_llama_op(image_file, model, processor):
-    with open(image_file, "rb") as f:
-        image = base64.b64encode(f.read()).decode('utf-8')
-    image = Image.open(image_file)
-    messages = [
-        {"role": "user", "content": [
-            {"type": "image"},
-            {"type": "text", "text": "You are an accurate OCR engine. From the given image, extract the text."}
-        ]}
-    ]
-    input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
-    inputs = processor(images=image, text=input_text, return_tensors="pt").to(model.device)
-    output = model.generate(**inputs, max_new_tokens=20)
-    return processor.decode(output[0])
 def get_text(image_file, model, tokenizer):
     res = model.chat(tokenizer, image_file, ocr_type='ocr')
     return res
 st.title("Image - Text OCR (General OCR Theory - GOT)")
 st.write("Upload an image for OCR")
@@ -114,7 +90,6 @@ MODEL, PROCESSOR = init_model()
 image_file = st.file_uploader("Upload Image", type=['jpg', 'png', 'jpeg'])
 if image_file:
     if not os.path.exists("images"):
         os.makedirs("images")
     with open(f"images/{image_file.name}", "wb") as f:
@@ -122,14 +97,12 @@ if image_file:
     image_file = f"images/{image_file.name}"
-    # model, tokenizer = init_gpu_model()
-    # model, tokenizer = init_model()
     text = get_text(image_file, MODEL, PROCESSOR)
-    # model, processor = init_llama()
-    # text = get_llama_op(image_file, MODEL, PROCESSOR)
-    # model, processor = init_qwen_model()
-    # text = get_quen_op(image_file, model, processor)
     print(text)
-    st.write(text)

+from transformers import AutoModel, AutoTokenizer, Qwen2VLForConditionalGeneration, AutoProcessor
 import streamlit as st
 import os
 from PIL import Image
 import torch
 from torchvision import io
 from typing import Dict
+import re
 @st.cache_resource
 def init_model():
     except Exception as e:
         return f"An error occurred: {str(e)}"
+# @st.cache_data
 def get_text(image_file, model, tokenizer):
     res = model.chat(tokenizer, image_file, ocr_type='ocr')
     return res
+def highlight_text(text, search_term):
+    if not search_term:
+        return text
+    pattern = re.compile(re.escape(search_term), re.IGNORECASE)
+    return pattern.sub(lambda m: f'<span style="background-color: yellow;">{m.group()}</span>', text)
 st.title("Image - Text OCR (General OCR Theory - GOT)")
 st.write("Upload an image for OCR")
 image_file = st.file_uploader("Upload Image", type=['jpg', 'png', 'jpeg'])
 if image_file:
     if not os.path.exists("images"):
         os.makedirs("images")
     with open(f"images/{image_file.name}", "wb") as f:
     image_file = f"images/{image_file.name}"
     text = get_text(image_file, MODEL, PROCESSOR)
     print(text)
+    # Add search functionality
+    search_term = st.text_input("Enter a word or phrase to search:")
+    highlighted_text = highlight_text(text, search_term)
+    st.markdown(highlighted_text, unsafe_allow_html=True)