RufusRubin777 commited on
Commit
431e77e
1 Parent(s): a0bcd50

Delete app_1.py

Browse files
Files changed (1) hide show
  1. app_1.py +0 -92
app_1.py DELETED
@@ -1,92 +0,0 @@
1
- import gradio as gr
2
- from PIL import Image
3
- import json
4
- from byaldi import RAGMultiModalModel
5
- from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
6
- from qwen_vl_utils import process_vision_info
7
- import torch
8
- import re
9
-
10
- # Load models
11
- def load_models():
12
- RAG = RAGMultiModalModel.from_pretrained("vidore/colpali")
13
- model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", trust_remote_code=True, torch_dtype=torch.float32) # float32 for CPU
14
- processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", trust_remote_code=True)
15
- return RAG, model, processor
16
-
17
- RAG, model, processor = load_models()
18
-
19
- # Function for OCR and search
20
- def ocr_and_search(image, keyword):
21
- text_query = "Extract all the text in Sanskrit and English from the image."
22
-
23
- # Prepare message for Qwen model
24
- messages = [
25
- {
26
- "role": "user",
27
- "content": [
28
- {"type": "image", "image": image},
29
- {"type": "text", "text": text_query},
30
- ],
31
- }
32
- ]
33
-
34
- # Process the image
35
- text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
36
- image_inputs, video_inputs = process_vision_info(messages)
37
- inputs = processor(
38
- text=[text],
39
- images=image_inputs,
40
- videos=video_inputs,
41
- padding=True,
42
- return_tensors="pt",
43
- ).to("cpu") # Use CPU
44
-
45
- # Generate text
46
- with torch.no_grad():
47
- generated_ids = model.generate(**inputs, max_new_tokens=2000)
48
- generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
49
- extracted_text = processor.batch_decode(
50
- generated_ids_trimmed,
51
- skip_special_tokens=True,
52
- clean_up_tokenization_spaces=False
53
- )[0]
54
-
55
- # Perform keyword search with highlighting
56
- keyword_lower = keyword.lower()
57
- sentences = extracted_text.split('. ')
58
- matched_sentences = []
59
- for sentence in sentences:
60
- if keyword_lower in sentence.lower():
61
- highlighted_sentence = re.sub(
62
- f'({re.escape(keyword)})',
63
- r'<mark>\1</mark>',
64
- sentence,
65
- flags=re.IGNORECASE
66
- )
67
- matched_sentences.append(highlighted_sentence)
68
-
69
- return extracted_text, matched_sentences
70
-
71
- # Gradio App
72
- def app(image, keyword):
73
- extracted_text, search_results = ocr_and_search(image, keyword)
74
- search_results_str = "<br>".join(search_results) if search_results else "No matches found."
75
- return extracted_text, search_results_str
76
-
77
- # Gradio Interface
78
- iface = gr.Interface(
79
- fn=app,
80
- inputs=[
81
- gr.Image(type="pil", label="Upload an Image"),
82
- gr.Textbox(label="Enter keyword to search in extracted text", placeholder="Keyword")
83
- ],
84
- outputs=[
85
- gr.Textbox(label="Extracted Text"),
86
- gr.HTML(label="Search Results"),
87
- ],
88
- title="OCR and Keyword Search in Images",
89
- )
90
-
91
- # Launch Gradio App
92
- iface.launch()