|
import gradio as gr
|
|
from byaldi import RAGMultiModalModel
|
|
|
|
|
|
model = RAGMultiModalModel.from_pretrained("vidore/colpali")
|
|
|
|
def extract_and_search(image, keyword):
|
|
|
|
extracted_text = model.predict(image)
|
|
|
|
|
|
matching_lines = [line for line in extracted_text.splitlines() if keyword.lower() in line.lower()]
|
|
|
|
return extracted_text, matching_lines
|
|
|
|
|
|
interface = gr.Interface(
|
|
fn=extract_and_search,
|
|
inputs=[
|
|
gr.Image(type="pil", label="Upload Image"),
|
|
gr.Textbox(label="Enter Keyword")
|
|
],
|
|
outputs=[
|
|
gr.Textbox(label="Extracted Text"),
|
|
gr.Textbox(label="Matching Lines")
|
|
],
|
|
title="ColPali OCR with Keyword Search",
|
|
description="Upload an image and enter a keyword to search within the extracted text."
|
|
)
|
|
|
|
|
|
interface.launch(share=True)
|
|
|