File size: 10,837 Bytes
9dcf4fb
 
 
 
 
164d1e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0590c46
90b2605
164d1e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
# Initialize Google API and model
import torch

device = torch.device("cpu")  # Force CPU

import base64
import os
from huggingface_hub import login
import PIL.Image
from byaldi import RAGMultiModalModel
import PIL.Image as PILImage
import io
import textwrap
import google.generativeai as genai
import gradio as gr  # Add Gradio for UI
from PIL import Image as PILImage


import os
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
genai.configure(api_key=GOOGLE_API_KEY)
model = genai.GenerativeModel('models/gemini-1.5-flash-latest')

# Load the RAG multi-modal model
RAG = RAGMultiModalModel.from_pretrained("vidore/colpali-v1.2", verbose=1)
RAG.to(device)

# Specify the index path where the index was saved during the first run
index_path = "/home/mohammadaqib/Desktop/project/research/Multi-Modal-RAG/Colpali/BCC"
RAG = RAGMultiModalModel.from_index(index_path)

# Initialize conversation history
conversation_history = []

def get_user_input(query):
    """Process user input."""
    return query

def process_image_from_results(results):
    """Process images from the search results and merge them."""
    image_list = []
    for i in range(min(3, len(results))):
        try:
            # Ensure the result has a base64 attribute
            image_bytes = base64.b64decode(results[i].base64)
            image = PILImage.open(io.BytesIO(image_bytes))  # Open image directly from bytes
            image_list.append(image)
        except AttributeError:
            print(f"Result {i} does not contain a 'base64' attribute.")
    
    # Merge images if any
    if image_list:
        total_width = sum(img.width for img in image_list)
        max_height = max(img.height for img in image_list)

        merged_image = PILImage.new('RGB', (total_width, max_height))
        x_offset = 0
        for img in image_list:
            merged_image.paste(img, (x_offset, 0))
            x_offset += img.width

        # Save the merged image
        merged_image.save('merged_image.jpg')
        return merged_image
    else:
        return None

def generate_answer(query, image):
    """Generate an answer using the Gemini model and the merged image."""
    response = model.generate_content([f'Answer to the question asked using the image. Also mention the reference from image to support your answer. Example, Table Number or Statement number or any metadata. Question: {query}', image], stream=True)
    response.resolve()
    return response.text

def classify_system_question(query):
    """Check if the question is related to the system itself."""
    response = model.generate_content([f"Determine if the question is about the system itself, like 'Who are you?' or 'What can you do?' or 'Introduce yourself' . Answer with 'yes' or 'no'. Question: {query}"], stream=True)
    response.resolve()
    return response.text.strip().lower() == "yes"

def classify_question(query):
    """Classify whether the question is general or domain-specific using Gemini."""
    response = model.generate_content([f"Classify this question as 'general' or 'domain-specific'. Give one word answer i.e general or domain-specific. General questions are greetings and questions involving general knowledge like the capital of France. General questions also involve politics, geography, history, economics, cosmology, information about famous personalities, etc. Question: {query}"], stream=True)
    response.resolve()
    return response.text.strip().lower()  # Assuming the response is either 'general' or 'domain-specific'

def chatbot(query, history):
    max_history_length = 50  # Number of recent exchanges to keep
    
    # Truncate the history to the last `max_history_length` exchanges
    truncated_history = history[-max_history_length:]
    
    # Add user input to the history
    truncated_history.append(("You: " + query, "Model:"))
    
    # Step 1: Check if the question is about the system
    if classify_system_question(query):
        text = "I am an AI assistant capable of answering queries related to the National Building Code of Canada and general questions. I was developed by the research group [SITE] at the University of Alberta. How can I assist you further?"

    else:
        # Step 2: Classify the question as general or domain-specific
        question_type = classify_question(query)
        
        # If the question is general, use Gemini to directly answer it
        if question_type == "general":
            text = model.generate_content([f"Answer this general question: {query}. If it is a greeting respond accordingly and if it is not greeting add a prefix saying that it is a general query."], stream=True)
            text.resolve()
            text = text.text

        else:
            # Step 3: Query the RAG model for domain-specific answers
            results = RAG.search(query, k=3)
            
            # Check if RAG found any results
            if not results:
                text = model.generate_content([f"Answer this question: {query}"], stream=True)
                text.resolve()
                text = text.text
                text = "It is a general query. ANSWER:" + text
            else:
                # Process images from the results
                image = process_image_from_results(results)
                
                # Generate the answer using the Gemini model if an image is found
                if image:
                    text = generate_answer(query, image)
                    text = "It is a query from NBCC. ANSWER:" + text
                    
                    # Check if the answer is a fallback message (indicating no relevant answer)
                    if any(keyword in text.lower() for keyword in [
                        "does not provide", 
                        "cannot answer", 
                        "does not contain", 
                        "no relevant answer", 
                        "not found", 
                        "information unavailable", 
                        "not in the document", 
                        "unable to provide", 
                        "no data", 
                        "missing information", 
                        "no match", 
                        "provided text does not describe",
                        "are not explicitly listed",
                        "are not explicitly mentioned",
                        "no results", 
                        "not available", 
                        "query not found"
                    ]):
                        # Fallback to Gemini for answering
                        text = model.generate_content([f"Answer this general question in concise manner: {query}"], stream=True)
                        text.resolve()
                        text = text.text
                        text = "It is a general query. ANSWER: " + text
                else:
                    text = model.generate_content([f"Answer this question: {query}"], stream=True)
                    text.resolve()
                    text = text.text
                    text = "It is a query from NBCC. ANSWER: " + text

    # Add the model's response to the truncated history
    truncated_history[-1] = (truncated_history[-1][0], "Model: " + text)  # Update the most recent message with model's answer

    # Return the output text, updated state, and chat history (as tuple pairs)
    return text, truncated_history, truncated_history  # Ensure all three outputs are returned in the correct order



import gradio as gr

# Define Gradio interface
with gr.Blocks() as iface:
    # Set the conversation state as an empty list
    state = gr.State([])

    # Custom CSS to beautify the interface
    iface.css = """
    .gradio-container {
        background-color: #f9f9f9;
        border-radius: 15px;
        padding: 20px;
        box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
    }
    .gr-chatbox {
        background-color: #f0f0f0;
        border-radius: 10px;
        padding: 10px;
        max-height: 1000px;
        overflow-y: scroll;
        margin-bottom: 10px;
    }
    .gr-textbox input {
        border-radius: 10px;
        padding: 12px;
        font-size: 16px;
        border: 1px solid #ccc;
        width: 100%;
        margin-top: 10px;
        box-sizing: border-box;
    }
    .gr-textbox input:focus {
        border-color: #4CAF50;
        outline: none;
    }
    .gr-button {
        background-color: #4CAF50;
        color: white;
        padding: 12px;
        border-radius: 10px;
        font-size: 16px;
        border: none;
        cursor: pointer;
    }
    .gr-button:hover {
        background-color: #45a049;
    }
    .gr-chatbot {
        font-family: "Arial", sans-serif;
        font-size: 14px;
    }
    .gr-chatbot .gr-chatbot-user {
        background-color: #e1f5fe;
        border-radius: 10px;
        padding: 8px;
        margin-bottom: 10px;
        max-width: 80%;
    }
    .gr-chatbot .gr-chatbot-model {
        background-color: #ffffff;
        border-radius: 10px;
        padding: 8px;
        margin-bottom: 10px;
        max-width: 80%;
    }
    .gr-chatbot .gr-chatbot-user p,
    .gr-chatbot .gr-chatbot-model p {
        margin: 0;
    }
    #input_box {
        position: fixed;
        bottom: 20px;
        width: 95%;
        padding: 10px;
        border-radius: 10px;
        box-shadow: 0 0 5px rgba(0, 0, 0, 0.2);
    }
    """

    # Add an image at the top of the page
    with gr.Column():
        gr.Image("/home/mohammadaqib/Pictures/Screenshots/site.png",height = 300)  # Use the image URL
        gr.Markdown(
            "# Question Answering System Over National Building Code of Canada"
        )

    # Chatbot UI
    with gr.Row():

        chat_history = gr.Chatbot(label="Chat History", height=250)

    
    # Place input at the bottom
    with gr.Row():
        query = gr.Textbox(
            label="Ask a Question",
            placeholder="Enter your question here...",
            lines=1,
            interactive=True,
            elem_id="input_box"  # Custom ID for styling
        )
    
    # Output for the response
    output_text = gr.Textbox(label="Answer", interactive=False, visible=False)  # Optional to hide
    
    # Define the interaction behavior
    query.submit(
        chatbot, 
        inputs=[query, state], 
        outputs=[output_text, state, chat_history],
        show_progress=True
    ).then(
        lambda _: "",  # Clear the input after submission
        inputs=None,
        outputs=query
    )

    gr.Markdown("<p style='position: fixed; bottom:0; width: 100%; text-align: left; font-style: italic; margin-left: 15%;  font-size: 18px;'>Developed by Mohammad Aqib, MSc Student at the University of Alberta, supervised by Dr. Qipei (Gavin) Mei.</p>", elem_id="footer")




# Launch the interface
iface.launch(share=True)