arad1367 commited on
Commit
7b52b89
β€’
1 Parent(s): 4229184

setup is added to handle linux command

Browse files
Files changed (3) hide show
  1. app.py +94 -0
  2. requirements.txt +9 -0
  3. setup.sh +5 -0
app.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import gradio as gr
3
+ from pdf2image import convert_from_path
4
+ from byaldi import RAGMultiModalModel
5
+ from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
6
+ from qwen_vl_utils import process_vision_info
7
+ import torch
8
+ import subprocess
9
+
10
+ # Install flash-attn if not already installed
11
+ subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
12
+
13
+ # try:
14
+ # subprocess.check_output(['dpkg', '-s', 'poppler-utils'])
15
+ # except subprocess.CalledProcessError:
16
+ # print("Error: poppler-utils is not installed. Installing...")
17
+ # subprocess.check_call(['sudo', 'apt-get', 'install', '-y', 'poppler-utils'])
18
+
19
+ # Load the RAG Model and the Qwen2-VL-2B-Instruct model
20
+ RAG = RAGMultiModalModel.from_pretrained("vidore/colpali")
21
+ model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-2B-Instruct",
22
+ trust_remote_code=True, torch_dtype=torch.bfloat16).cuda().eval()
23
+ processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", trust_remote_code=True)
24
+
25
+ @spaces.GPU()
26
+ def process_pdf_and_query(pdf_file, user_query):
27
+ # Convert the PDF to images
28
+ images = convert_from_path(pdf_file.name) # pdf_file.name gives the file path
29
+ num_images = len(images)
30
+
31
+ # Indexing the PDF in RAG
32
+ RAG.index(
33
+ input_path=pdf_file.name,
34
+ index_name="image_index", # index will be saved at index_root/index_name/
35
+ store_collection_with_index=False,
36
+ overwrite=True
37
+ )
38
+
39
+ # Search the query in the RAG model
40
+ results = RAG.search(user_query, k=1)
41
+ if not results:
42
+ return "No results found.", num_images
43
+
44
+ # Retrieve the page number and process image
45
+ image_index = results[0]["page_num"] - 1
46
+ messages = [
47
+ {
48
+ "role": "user",
49
+ "content": [
50
+ {
51
+ "type": "image",
52
+ "image": images[image_index],
53
+ },
54
+ {"type": "text", "text": user_query},
55
+ ],
56
+ }
57
+ ]
58
+
59
+ # Generate text with the Qwen model
60
+ text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
61
+ image_inputs, video_inputs = process_vision_info(messages)
62
+ inputs = processor(
63
+ text=[text],
64
+ images=image_inputs,
65
+ videos=video_inputs,
66
+ padding=True,
67
+ return_tensors="pt",
68
+ )
69
+ inputs = inputs.to("cuda")
70
+
71
+ # Generate the output response
72
+ generated_ids = model.generate(**inputs, max_new_tokens=50)
73
+ generated_ids_trimmed = [
74
+ out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
75
+ ]
76
+ output_text = processor.batch_decode(
77
+ generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
78
+ )
79
+
80
+ return output_text[0], num_images
81
+
82
+ # Define the Gradio Interface
83
+ pdf_input = gr.inputs.File(label="Upload PDF", type="file")
84
+ query_input = gr.inputs.Textbox(label="Enter your query", placeholder="Ask a question about the PDF")
85
+ output_text = gr.outputs.Textbox(label="Model Answer")
86
+ output_images = gr.outputs.Textbox(label="Number of Images in PDF")
87
+
88
+ # Launch the Gradio app
89
+ gr.Interface(
90
+ fn=process_pdf_and_query,
91
+ inputs=[pdf_input, query_input],
92
+ outputs=[output_text, output_images],
93
+ title="Multimodal RAG with Image Query - By Pejman Ebrahimi"
94
+ ).launch()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ spaces
2
+ gradio
3
+ byaldi
4
+ poppler-utils
5
+ pdf2image
6
+ git+https://github.com/huggingface/transformers.git
7
+ qwen-vl-utils
8
+ torchvision
9
+ torch
setup.sh ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Install poppler-utils
4
+ sudo apt-get update
5
+ sudo apt-get install -y poppler-utils