File size: 12,086 Bytes
ef89dbb
 
98cccd6
 
 
ef89dbb
 
 
 
 
98cccd6
 
 
 
ef89dbb
98cccd6
ef89dbb
650714a
 
 
98cccd6
 
 
 
650714a
98cccd6
 
fb3b5a9
98cccd6
ef89dbb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98cccd6
 
cb808f6
98cccd6
66368d3
cb808f6
 
 
 
 
ef89dbb
53916a8
ef89dbb
53916a8
ef89dbb
98cccd6
66368d3
cb808f6
ef89dbb
 
 
 
66368d3
cb808f6
2663a60
ef89dbb
cb808f6
ef89dbb
 
 
 
 
98cccd6
 
ef89dbb
 
 
98cccd6
 
cb808f6
ef89dbb
 
cb808f6
ef89dbb
 
fb3b5a9
 
 
 
 
 
ef89dbb
cb808f6
98cccd6
ef89dbb
98cccd6
ef89dbb
 
cb808f6
98cccd6
ef89dbb
 
98cccd6
ef89dbb
 
 
 
 
 
 
98cccd6
 
 
ef89dbb
98cccd6
 
 
 
 
ef89dbb
98cccd6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cb808f6
98cccd6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef89dbb
98cccd6
ef89dbb
77591ae
cb808f6
 
77591ae
 
ef89dbb
98cccd6
 
 
 
 
 
 
 
 
 
 
 
ef89dbb
98cccd6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66368d3
98cccd6
 
66368d3
 
98cccd6
 
 
66368d3
98cccd6
66368d3
98cccd6
 
 
 
 
 
 
66368d3
98cccd6
 
 
 
 
 
 
66368d3
cb808f6
ef89dbb
98cccd6
ef89dbb
fb3b5a9
98cccd6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cb808f6
 
fb3b5a9
cb808f6
98cccd6
 
 
 
 
 
 
66368d3
 
 
 
 
 
 
98cccd6
 
fb3b5a9
98cccd6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66368d3
98cccd6
 
 
 
 
ef89dbb
 
66368d3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
import gradio as gr
import pandas as pd
import io
import base64
import uuid
import pixeltable as pxt
from pixeltable.iterators import DocumentSplitter
import numpy as np
from pixeltable.functions.huggingface import sentence_transformer
from pixeltable.functions import openai
from pixeltable.functions.fireworks import chat_completions as f_chat_completions
from pixeltable.functions.mistralai import chat_completions
from gradio.themes import Monochrome

import os
import getpass

"""## Store OpenAI API Key"""

if 'OPENAI_API_KEY' not in os.environ:
    os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API key:')

if 'FIREWORKS_API_KEY' not in os.environ:
    os.environ['FIREWORKS_API_KEY'] = getpass.getpass('Fireworks API Key:')

if 'MISTRAL_API_KEY' not in os.environ:
    os.environ['MISTRAL_API_KEY'] = getpass.getpass('Mistral AI API Key:')

"""## Creating UDFs: Embedding and Prompt Functions"""

# Set up embedding function
@pxt.expr_udf
def e5_embed(text: str) -> np.ndarray:
    return sentence_transformer(text, model_id='intfloat/e5-large-v2')

# Create prompt function
@pxt.udf
def create_prompt(top_k_list: list[dict], question: str) -> str:
    concat_top_k = '\n\n'.join(
        elt['text'] for elt in reversed(top_k_list)
    )
    return f'''
    PASSAGES:

    {concat_top_k}

    QUESTION:

    {question}'''

"""Gradio Application"""
def process_files(ground_truth_file, pdf_files, chunk_limit, chunk_separator, show_question, show_correct_answer, show_gpt4omini, show_llamav3p23b, show_mistralsmall, progress=gr.Progress()):
    # Ensure a clean slate for the demo by removing and recreating the 'rag_demo' directory
    progress(0, desc="Initializing...")
    
    pxt.drop_dir('rag_demo', force=True)
    pxt.create_dir('rag_demo')

    # Process the ground truth file, which contains questions and correct answers
    # Import as CSV or Excel depending on the file extension
    if ground_truth_file.name.endswith('.csv'):
        queries_t = pxt.io.import_csv('rag_demo.queries', ground_truth_file.name)
    else:
        queries_t = pxt.io.import_excel('rag_demo.queries', ground_truth_file.name)

    progress(0.2, desc="Processing documents...")
    
    # Create a table to store the uploaded PDF documents
    documents_t = pxt.create_table(
        'rag_demo.documents',
        {'document': pxt.DocumentType()}
    )
    
    # Insert the PDF files into the documents table
    documents_t.insert({'document': file.name} for file in pdf_files if file.name.endswith('.pdf'))

    # Create a view that splits the documents into smaller chunks
    chunks_t = pxt.create_view(
        'rag_demo.chunks',
        documents_t,
        iterator=DocumentSplitter.create(
            document=documents_t.document,
            separators=chunk_separator,
            limit=chunk_limit if chunk_separator in ["token_limit", "char_limit"] else None
        )
    )

    progress(0.4, desc="Generating embeddings...")

    # Add an embedding index to the chunks for similarity search
    chunks_t.add_embedding_index('text', string_embed=e5_embed)

    # Define a query function to retrieve the top-k most similar chunks for a given question
    @chunks_t.query
    def top_k(query_text: str):
      sim = chunks_t.text.similarity(query_text)
      return (
          chunks_t.order_by(sim, asc=False)
              .select(chunks_t.text, sim=sim)
              .limit(5)
      )

    # Add computed columns to the queries table for context retrieval and prompt creation
    queries_t['question_context'] = chunks_t.top_k(queries_t.question)
    queries_t['prompt'] = create_prompt(
        queries_t.question_context, queries_t.question
    )

    # Prepare messages for the OpenAI API, including system instructions and user prompt
    msgs = [
        {
            'role': 'system',
            'content': 'Read the following passages and answer the question based on their contents.'
        },
        {
            'role': 'user',
            'content': queries_t.prompt
        }
    ]

    progress(0.6, desc="Querying models...")

    # Add OpenAI response column
    queries_t['response'] = openai.chat_completions(
        model='gpt-4o-mini-2024-07-18',
        messages=msgs,
        max_tokens=300,
        top_p=0.9,
        temperature=0.7
    )

    # Create a table in Pixeltable and pick a model hosted on Anthropic with some parameters
    queries_t['response_2'] = f_chat_completions(
      messages=msgs,
      model='accounts/fireworks/models/llama-v3p2-3b-instruct',
      # These parameters are optional and can be used to tune model behavior:
      max_tokens=300,
      top_p=0.9,
      temperature=0.7
    )

    queries_t['response_3'] = chat_completions(
      messages=msgs,
      model='mistral-small-latest',
      # These parameters are optional and can be used to tune model behavior:
      max_tokens=300,
      top_p=0.9,
      temperature=0.7
    )

    # Extract the answer text from the API response
    queries_t['gpt4omini'] = queries_t.response.choices[0].message.content
    queries_t['llamav3p23b'] = queries_t.response_2.choices[0].message.content
    queries_t['mistralsmall'] = queries_t.response_3.choices[0].message.content

    # Prepare the output dataframe with selected columns
    columns_to_show = []
    if show_question:
        columns_to_show.append(queries_t.question)
    if show_correct_answer:
        columns_to_show.append(queries_t.correct_answer)
    if show_gpt4omini:
        columns_to_show.append(queries_t.gpt4omini)
    if show_llamav3p23b:
        columns_to_show.append(queries_t.llamav3p23b)
    if show_mistralsmall:
        columns_to_show.append(queries_t.mistralsmall)

    df_output = queries_t.select(*columns_to_show).collect().to_pandas()

    try:
        # Return the output dataframe for display
        return df_output
    except Exception as e:
        return f"An error occurred: {str(e)}", None

def save_dataframe_as_csv(data):
    print(f"Type of data: {type(data)}")
    if isinstance(data, pd.DataFrame):
        print(f"Shape of DataFrame: {data.shape}")
    if isinstance(data, pd.DataFrame) and not data.empty:
        filename = f"results_{uuid.uuid4().hex[:8]}.csv"
        filepath = os.path.join('tmp', filename)
        os.makedirs('tmp', exist_ok=True)
        data.to_csv(filepath, index=False)
        return filepath
    return None

# Gradio interface
with gr.Blocks(theme=Monochrome) as demo:
    gr.Markdown(
        """
        <div max-width: 800px; margin: 0 auto;">
            <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/source/data/pixeltable-logo-large.png" alt="Pixeltable" style="max-width: 200px; margin-bottom: 20px;" />
            <h1 style="margin-bottom: 0.5em;">Multi-LLM RAG Benchmark: Document Q&A with Groundtruth Comparison</h1>
        </div>
        """
    )
    gr.HTML(
        """
        <p>
            <a href="https://github.com/pixeltable/pixeltable" target="_blank" style="color: #F25022; text-decoration: none; font-weight: bold;">Pixeltable</a> is a declarative interface for working with text, images, embeddings, and even video, enabling you to store, transform, index, and iterate on data.
        </p>
        """
    )

    # Add the disclaimer
    gr.HTML(
        """
        <div style="background-color: #E5DDD4; border: 1px solid #e9ecef; border-radius: 8px; padding: 15px; margin-bottom: 20px;">
            <strong>Disclaimer:</strong> This app is running on OpenAI, Mistral, and Fireworks accounts with my own API keys 😞. This Hugging Face Space uses the free tier (2vCPU, 16GB RAM), which may result in slower processing times, especially for embedding generation and large document processing. Embeddings are generated using the sentence-transformer library with the 'intfloat/e5-large-v2' model. If you wish to use this app with your own hardware or API keys for improved performance, you can:
            <a href="https://huggingface.co/spaces/Pixeltable/Multi-LLM-RAG-with-Groundtruth-Comparison/duplicate" target="_blank" style="color: #F25022; text-decoration: none; font-weight: bold;">duplicate this Hugging Face Space</a>, run it locally, or use <b>Google Colab with the Free limited GPU support</b>.</p>
        </div>
        """
    )
   
    with gr.Row():
        with gr.Column():  
           with gr.Accordion("What This Demo Does", open = True):
            gr.Markdown("""
          1. **Ingests Documents**: Uploads your PDF documents and a ground truth file (CSV or XLSX).
          2. **Process and Retrieve Data**: Store, chunk, index, orchestrate, and retrieve all data.
          4. **Generates Answers**: Leverages OpenAI to produce accurate answers based on the retrieved context.
          5. **Compares Results**: Displays the generated answers alongside the ground truth for easy evaluation.
          """)
        with gr.Column():  
          with gr.Accordion("How to Use", open = True):
            gr.Markdown("""
          1. Upload your ground truth file (CSV or XLSX) with the following two columns: **question** and **correct_answer**.
          2. Upload one or more PDF documents that contain the information to answer these questions.
          3. Click "Process Files and Generate Output" to start the RAG process.
          4. View the results in the table below, comparing AI-generated answers to the ground truth.
          """)
 
    # File upload components for ground truth and PDF documents
    with gr.Row():
        ground_truth_file = gr.File(label="Upload Ground Truth (CSV or XLSX) - Format to respect:question | correct_answer", file_count="single")
        pdf_files = gr.File(label="Upload PDF Documents", file_count="multiple")

    # Add controls for chunking parameters
    with gr.Row():
        chunk_limit = gr.Slider(minimum=100, maximum=500, value=300, step=5, label="Chunk Size Limit (only used when the separator is token_/char_limit)")
        chunk_separator = gr.Dropdown(
          choices=["token_limit", "char_limit", "sentence", "paragraph", "heading"],
          value="token_limit",
          label="Chunk Separator"
    )

    with gr.Row():
        show_question = gr.Checkbox(label="Show Question", value=True)
        show_correct_answer = gr.Checkbox(label="Show Correct Answer", value=True)
        show_gpt4omini = gr.Checkbox(label="Show GPT-4o-mini Answer", value=True)
        show_llamav3p23b = gr.Checkbox(label="Show LLaMA-v3-2-3B Answer", value=True)
        show_mistralsmall = gr.Checkbox(label="Show Mistral-Small Answer", value=True)

    # Button to trigger file processing
    process_button = gr.Button("Process Files and Generate Outputs")

    # Output component to display the results
    df_output = gr.DataFrame(label="Pixeltable Table",
                             wrap=True
                             )

    with gr.Row():
        with gr.Column(scale=1):
            download_button = gr.Button("Download Results as CSV")

            gr.Examples(
            examples=['Zacks-Nvidia-Report.pdf'],
            inputs=[pdf_files])
            gr.Examples(
            examples=['Q-A-Rag.xlsx'],
            inputs=[ground_truth_files])
        with gr.Column(scale=2):
            csv_output = gr.File(label="CSV Download")

    def trigger_download(data):
        csv_path = save_dataframe_as_csv(data)
        return csv_path if csv_path else None

    process_button.click(process_files,
                         inputs=[ground_truth_file,
                                 pdf_files,
                                 chunk_limit,
                                 chunk_separator,
                                 show_question,
                                 show_correct_answer,
                                 show_gpt4omini,
                                 show_llamav3p23b,
                                 show_mistralsmall],
                         outputs=df_output)
    
    download_button.click(
    trigger_download,
    inputs=[df_output],
    outputs=[csv_output]
    )

if __name__ == "__main__":
    demo.launch(show_api=False)