PierreBrunelle
commited on
Commit
•
66368d3
1
Parent(s):
f09e956
Update app.py
Browse files
app.py
CHANGED
@@ -49,11 +49,10 @@ def create_prompt(top_k_list: list[dict], question: str) -> str:
|
|
49 |
{question}'''
|
50 |
|
51 |
"""Gradio Application"""
|
52 |
-
|
53 |
def process_files(ground_truth_file, pdf_files, chunk_limit, chunk_separator, show_question, show_correct_answer, show_gpt4omini, show_llamav3p23b, show_mistralsmall, progress=gr.Progress()):
|
54 |
# Ensure a clean slate for the demo by removing and recreating the 'rag_demo' directory
|
55 |
progress(0, desc="Initializing...")
|
56 |
-
|
57 |
pxt.drop_dir('rag_demo', force=True)
|
58 |
pxt.create_dir('rag_demo')
|
59 |
|
@@ -65,13 +64,13 @@ def process_files(ground_truth_file, pdf_files, chunk_limit, chunk_separator, sh
|
|
65 |
queries_t = pxt.io.import_excel('rag_demo.queries', ground_truth_file.name)
|
66 |
|
67 |
progress(0.2, desc="Processing documents...")
|
68 |
-
|
69 |
# Create a table to store the uploaded PDF documents
|
70 |
documents_t = pxt.create_table(
|
71 |
'rag_demo.documents',
|
72 |
{'document': pxt.DocumentType()}
|
73 |
)
|
74 |
-
|
75 |
# Insert the PDF files into the documents table
|
76 |
documents_t.insert({'document': file.name} for file in pdf_files if file.name.endswith('.pdf'))
|
77 |
|
@@ -206,19 +205,17 @@ with gr.Blocks(theme=Monochrome) as demo:
|
|
206 |
)
|
207 |
|
208 |
# Add the disclaimer
|
209 |
-
gr.
|
210 |
"""
|
211 |
<div style="background-color: #E5DDD4; border: 1px solid #e9ecef; border-radius: 8px; padding: 15px; margin-bottom: 20px;">
|
212 |
-
<strong>Disclaimer:</strong> This
|
213 |
-
|
214 |
-
<a href="https://huggingface.co/spaces/Pixeltable/Multi-LLM-RAG-with-Groundtruth-Comparison?duplicate=true" target="_blank" style="color: #F25022; text-decoration: none; font-weight: bold;">duplicate this Hugging Face Space</a>
|
215 |
-
or run it locally or in Google Colab.
|
216 |
</div>
|
217 |
"""
|
218 |
)
|
219 |
-
|
220 |
with gr.Row():
|
221 |
-
with gr.Column():
|
222 |
with gr.Accordion("What This Demo Does", open = True):
|
223 |
gr.Markdown("""
|
224 |
1. **Ingests Documents**: Uploads your PDF documents and a ground truth file (CSV or XLSX).
|
@@ -226,7 +223,7 @@ with gr.Blocks(theme=Monochrome) as demo:
|
|
226 |
4. **Generates Answers**: Leverages OpenAI to produce accurate answers based on the retrieved context.
|
227 |
5. **Compares Results**: Displays the generated answers alongside the ground truth for easy evaluation.
|
228 |
""")
|
229 |
-
with gr.Column():
|
230 |
with gr.Accordion("How to Use", open = True):
|
231 |
gr.Markdown("""
|
232 |
1. Upload your ground truth file (CSV or XLSX) with the following two columns: **question** and **correct_answer**.
|
@@ -234,7 +231,7 @@ with gr.Blocks(theme=Monochrome) as demo:
|
|
234 |
3. Click "Process Files and Generate Output" to start the RAG process.
|
235 |
4. View the results in the table below, comparing AI-generated answers to the ground truth.
|
236 |
""")
|
237 |
-
|
238 |
# File upload components for ground truth and PDF documents
|
239 |
with gr.Row():
|
240 |
ground_truth_file = gr.File(label="Upload Ground Truth (CSV or XLSX) - Format to respect:question | correct_answer", file_count="single")
|
@@ -267,6 +264,13 @@ with gr.Blocks(theme=Monochrome) as demo:
|
|
267 |
with gr.Row():
|
268 |
with gr.Column(scale=1):
|
269 |
download_button = gr.Button("Download Results as CSV")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
270 |
with gr.Column(scale=2):
|
271 |
csv_output = gr.File(label="CSV Download")
|
272 |
|
@@ -285,7 +289,7 @@ with gr.Blocks(theme=Monochrome) as demo:
|
|
285 |
show_llamav3p23b,
|
286 |
show_mistralsmall],
|
287 |
outputs=df_output)
|
288 |
-
|
289 |
download_button.click(
|
290 |
trigger_download,
|
291 |
inputs=[df_output],
|
@@ -293,4 +297,4 @@ with gr.Blocks(theme=Monochrome) as demo:
|
|
293 |
)
|
294 |
|
295 |
if __name__ == "__main__":
|
296 |
-
demo.launch(
|
|
|
49 |
{question}'''
|
50 |
|
51 |
"""Gradio Application"""
|
|
|
52 |
def process_files(ground_truth_file, pdf_files, chunk_limit, chunk_separator, show_question, show_correct_answer, show_gpt4omini, show_llamav3p23b, show_mistralsmall, progress=gr.Progress()):
|
53 |
# Ensure a clean slate for the demo by removing and recreating the 'rag_demo' directory
|
54 |
progress(0, desc="Initializing...")
|
55 |
+
|
56 |
pxt.drop_dir('rag_demo', force=True)
|
57 |
pxt.create_dir('rag_demo')
|
58 |
|
|
|
64 |
queries_t = pxt.io.import_excel('rag_demo.queries', ground_truth_file.name)
|
65 |
|
66 |
progress(0.2, desc="Processing documents...")
|
67 |
+
|
68 |
# Create a table to store the uploaded PDF documents
|
69 |
documents_t = pxt.create_table(
|
70 |
'rag_demo.documents',
|
71 |
{'document': pxt.DocumentType()}
|
72 |
)
|
73 |
+
|
74 |
# Insert the PDF files into the documents table
|
75 |
documents_t.insert({'document': file.name} for file in pdf_files if file.name.endswith('.pdf'))
|
76 |
|
|
|
205 |
)
|
206 |
|
207 |
# Add the disclaimer
|
208 |
+
gr.HTML(
|
209 |
"""
|
210 |
<div style="background-color: #E5DDD4; border: 1px solid #e9ecef; border-radius: 8px; padding: 15px; margin-bottom: 20px;">
|
211 |
+
<strong>Disclaimer:</strong> This app is running on OpenAI, Mistral, and Fireworks accounts with my own API keys 😞. This Hugging Face Space uses the free tier (2vCPU, 16GB RAM), which may result in slower processing times, especially for embedding generation and large document processing. Embeddings are generated using the sentence-transformer library with the 'intfloat/e5-large-v2' model. If you wish to use this app with your own hardware or API keys for improved performance, you can:
|
212 |
+
<a href="https://huggingface.co/spaces/Pixeltable/Multi-LLM-RAG-with-Groundtruth-Comparison/duplicate" target="_blank" style="color: #F25022; text-decoration: none; font-weight: bold;">duplicate this Hugging Face Space</a>, run it locally, or use <b>Google Colab with the Free limited GPU support</b>.</p>
|
|
|
|
|
213 |
</div>
|
214 |
"""
|
215 |
)
|
216 |
+
|
217 |
with gr.Row():
|
218 |
+
with gr.Column():
|
219 |
with gr.Accordion("What This Demo Does", open = True):
|
220 |
gr.Markdown("""
|
221 |
1. **Ingests Documents**: Uploads your PDF documents and a ground truth file (CSV or XLSX).
|
|
|
223 |
4. **Generates Answers**: Leverages OpenAI to produce accurate answers based on the retrieved context.
|
224 |
5. **Compares Results**: Displays the generated answers alongside the ground truth for easy evaluation.
|
225 |
""")
|
226 |
+
with gr.Column():
|
227 |
with gr.Accordion("How to Use", open = True):
|
228 |
gr.Markdown("""
|
229 |
1. Upload your ground truth file (CSV or XLSX) with the following two columns: **question** and **correct_answer**.
|
|
|
231 |
3. Click "Process Files and Generate Output" to start the RAG process.
|
232 |
4. View the results in the table below, comparing AI-generated answers to the ground truth.
|
233 |
""")
|
234 |
+
|
235 |
# File upload components for ground truth and PDF documents
|
236 |
with gr.Row():
|
237 |
ground_truth_file = gr.File(label="Upload Ground Truth (CSV or XLSX) - Format to respect:question | correct_answer", file_count="single")
|
|
|
264 |
with gr.Row():
|
265 |
with gr.Column(scale=1):
|
266 |
download_button = gr.Button("Download Results as CSV")
|
267 |
+
|
268 |
+
gr.Examples(
|
269 |
+
examples=['Zacks-Nvidia-Report.pdf'],
|
270 |
+
inputs=[pdf_files])
|
271 |
+
gr.Examples(
|
272 |
+
examples=['Q-A-Rag.xlsx'],
|
273 |
+
inputs=[ground_truth_files])
|
274 |
with gr.Column(scale=2):
|
275 |
csv_output = gr.File(label="CSV Download")
|
276 |
|
|
|
289 |
show_llamav3p23b,
|
290 |
show_mistralsmall],
|
291 |
outputs=df_output)
|
292 |
+
|
293 |
download_button.click(
|
294 |
trigger_download,
|
295 |
inputs=[df_output],
|
|
|
297 |
)
|
298 |
|
299 |
if __name__ == "__main__":
|
300 |
+
demo.launch(show_api=False)
|