PierreBrunelle commited on
Commit
fb3b5a9
1 Parent(s): 650714a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -22
app.py CHANGED
@@ -1,3 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import pandas as pd
3
  import pixeltable as pxt
@@ -12,6 +24,8 @@ import os
12
  if 'OPENAI_API_KEY' not in os.environ:
13
  os.environ['OPENAI_API_KEY'] = getpass.getpass('Enter your OpenAI API key:')
14
 
 
 
15
  # Ensure a clean slate for the demo
16
  pxt.drop_dir('rag_demo', force=True)
17
  pxt.create_dir('rag_demo')
@@ -36,25 +50,25 @@ def create_prompt(top_k_list: list[dict], question: str) -> str:
36
 
37
  {question}'''
38
 
 
 
39
  def process_files(ground_truth_file, pdf_files):
40
  # Process ground truth file
41
  if ground_truth_file.name.endswith('.csv'):
42
- df = pd.read_csv(ground_truth_file.name)
43
  else:
44
- df = pd.read_excel(ground_truth_file.name)
45
-
46
- queries_t = pxt.create_table('rag_demo.queries', df)
47
 
48
  # Process PDF files
49
  documents_t = pxt.create_table(
50
  'rag_demo.documents',
51
  {'document': pxt.DocumentType()}
52
  )
53
-
54
  for pdf_file in pdf_files:
55
  documents_t.insert({'document': pdf_file.name})
56
 
57
- # Create chunks view
58
  chunks_t = pxt.create_view(
59
  'rag_demo.chunks',
60
  documents_t,
@@ -71,12 +85,12 @@ def process_files(ground_truth_file, pdf_files):
71
  # Create top_k query
72
  @chunks_t.query
73
  def top_k(query_text: str):
74
- sim = chunks_t.text.similarity(query_text)
75
- return (
76
- chunks_t.order_by(sim, asc=False)
77
- .select(chunks_t.text, sim=sim)
78
- .limit(5)
79
- )
80
 
81
  # Add computed columns to queries_t
82
  queries_t['question_context'] = chunks_t.top_k(queries_t.Question)
@@ -96,6 +110,12 @@ def process_files(ground_truth_file, pdf_files):
96
  }
97
  ]
98
 
 
 
 
 
 
 
99
  # Add OpenAI response column
100
  queries_t['response'] = openai.chat_completions(
101
  model='gpt-4-0125-preview', messages=messages
@@ -104,10 +124,6 @@ def process_files(ground_truth_file, pdf_files):
104
 
105
  return "Files processed successfully!"
106
 
107
- def query_llm(question):
108
- queries_t = pxt.get_table('rag_demo.queries')
109
- chunks_t = pxt.get_table('rag_demo.chunks')
110
-
111
  # Perform top-k lookup
112
  context = chunks_t.top_k(question).collect()
113
 
@@ -140,21 +156,22 @@ def query_llm(question):
140
  # Gradio interface
141
  with gr.Blocks() as demo:
142
  gr.Markdown("# RAG Demo App")
143
-
144
  with gr.Row():
145
- ground_truth_file = gr.File(label="Upload Ground Truth (CSV or XLSX)")
146
  pdf_files = gr.File(label="Upload PDF Documents", file_count="multiple")
147
-
148
  process_button = gr.Button("Process Files")
149
  process_output = gr.Textbox(label="Processing Output")
150
-
151
  question_input = gr.Textbox(label="Enter your question")
152
  query_button = gr.Button("Query LLM")
153
-
154
  output_dataframe = gr.Dataframe(label="LLM Outputs")
155
 
156
  process_button.click(process_files, inputs=[ground_truth_file, pdf_files], outputs=process_output)
157
  query_button.click(query_llm, inputs=question_input, outputs=output_dataframe)
158
 
159
  if __name__ == "__main__":
160
- demo.launch()
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """LLM Comparison
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/156SKaX3DY6jwOhcpwZVM5AiLscOAbNNJ
8
+ """
9
+
10
+ # Commented out IPython magic to ensure Python compatibility.
11
+ # %pip install -qU pixeltable gradio sentence-transformers tiktoken openai openpyxl
12
+
13
  import gradio as gr
14
  import pandas as pd
15
  import pixeltable as pxt
 
24
  if 'OPENAI_API_KEY' not in os.environ:
25
  os.environ['OPENAI_API_KEY'] = getpass.getpass('Enter your OpenAI API key:')
26
 
27
+ """Pixeltable Set up"""
28
+
29
  # Ensure a clean slate for the demo
30
  pxt.drop_dir('rag_demo', force=True)
31
  pxt.create_dir('rag_demo')
 
50
 
51
  {question}'''
52
 
53
+ """Gradio Application"""
54
+
55
  def process_files(ground_truth_file, pdf_files):
56
  # Process ground truth file
57
  if ground_truth_file.name.endswith('.csv'):
58
+ queries_t = pxt.io.import_csv(rag_demo.queries, ground_truth_file.name)
59
  else:
60
+ queries_t = pxt.io.import_excel(rag_demo.queries, ground_truth_file.name)
 
 
61
 
62
  # Process PDF files
63
  documents_t = pxt.create_table(
64
  'rag_demo.documents',
65
  {'document': pxt.DocumentType()}
66
  )
67
+
68
  for pdf_file in pdf_files:
69
  documents_t.insert({'document': pdf_file.name})
70
 
71
+ # Create chunks view
72
  chunks_t = pxt.create_view(
73
  'rag_demo.chunks',
74
  documents_t,
 
85
  # Create top_k query
86
  @chunks_t.query
87
  def top_k(query_text: str):
88
+ sim = chunks_t.text.similarity(query_text)
89
+ return (
90
+ chunks_t.order_by(sim, asc=False)
91
+ .select(chunks_t.text, sim=sim)
92
+ .limit(5)
93
+ )
94
 
95
  # Add computed columns to queries_t
96
  queries_t['question_context'] = chunks_t.top_k(queries_t.Question)
 
110
  }
111
  ]
112
 
113
+ def query_llm(question, ground_truth_file, pdf_files):
114
+ queries_t = pxt.get_table('rag_demo.queries')
115
+ chunks_t = pxt.get_table('rag_demo.chunks')
116
+
117
+
118
+
119
  # Add OpenAI response column
120
  queries_t['response'] = openai.chat_completions(
121
  model='gpt-4-0125-preview', messages=messages
 
124
 
125
  return "Files processed successfully!"
126
 
 
 
 
 
127
  # Perform top-k lookup
128
  context = chunks_t.top_k(question).collect()
129
 
 
156
  # Gradio interface
157
  with gr.Blocks() as demo:
158
  gr.Markdown("# RAG Demo App")
159
+
160
  with gr.Row():
161
+ ground_truth_file = gr.File(label="Upload Ground Truth (CSV or XLSX)", file_count="single")
162
  pdf_files = gr.File(label="Upload PDF Documents", file_count="multiple")
163
+
164
  process_button = gr.Button("Process Files")
165
  process_output = gr.Textbox(label="Processing Output")
166
+
167
  question_input = gr.Textbox(label="Enter your question")
168
  query_button = gr.Button("Query LLM")
169
+
170
  output_dataframe = gr.Dataframe(label="LLM Outputs")
171
 
172
  process_button.click(process_files, inputs=[ground_truth_file, pdf_files], outputs=process_output)
173
  query_button.click(query_llm, inputs=question_input, outputs=output_dataframe)
174
 
175
  if __name__ == "__main__":
176
+ demo.launch()
177
+