Spaces:

Rick7799
/

Ocr1

Sleeping

Rick7799 commited on Sep 29, 2024

Commit

491e665

•

1 Parent(s): 3dad239

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,35 +1,17 @@
 import gradio as gr
-import torch
-from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
-from PIL import Image
-# Load the ColPali model and tokenizer from Hugging Face
-model_name = "vidore/colpali-v1.2"  # Use the correct model identifier
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
 def extract_and_search(image, keyword):
-    try:
-        # Convert image to RGB if it's not already in that format
-        if image.mode != 'RGB':
-            image = image.convert('RGB')
-        # Preprocess image: convert to tensor format required by the model
-        inputs = tokenizer(images=image, return_tensors="pt")  # Adjust as necessary for your input requirements
-        # Extract text from image using ColPali model
-        with torch.no_grad():  # Disable gradient calculation for inference
-            outputs = model.generate(**inputs)
-        # Decode outputs to text
-        extracted_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Perform keyword search
-        matching_lines = [line for line in extracted_text.splitlines() if keyword.lower() in line.lower()]
-        return extracted_text, matching_lines
-    except Exception as e:
-        return f"Error during extraction: {str(e)}", []
 # Create Gradio interface
 interface = gr.Interface(

 import gradio as gr
+from byaldi import RAGMultiModalModel  # Importing the ColPali model
+# Initialize the ColPali model
+model = RAGMultiModalModel.from_pretrained("vidore/colpali")
 def extract_and_search(image, keyword):
+    # Use the model to extract text from the image
+    extracted_text = model.predict(image)  # Replace with actual prediction method
+    # Perform keyword search
+    matching_lines = [line for line in extracted_text.splitlines() if keyword.lower() in line.lower()]
+    return extracted_text, matching_lines
 # Create Gradio interface
 interface = gr.Interface(