import gradio as gr from byaldi import RAGMultiModalModel model = RAGMultiModalModel.from_pretrained("vidore/colpali") def extract_and_search(image, keyword): extracted_text = model.predict(image) matching_lines = [line for line in extracted_text.splitlines() if keyword.lower() in line.lower()] return extracted_text, matching_lines interface = gr.Interface( fn=extract_and_search, inputs=[ gr.Image(type="pil", label="Upload Image"), gr.Textbox(label="Enter Keyword") ], outputs=[ gr.Textbox(label="Extracted Text"), gr.Textbox(label="Matching Lines") ], title="ColPali OCR with Keyword Search", description="Upload an image and enter a keyword to search within the extracted text." ) interface.launch(share=True)