DrishtiSharma commited on
Commit
56f965d
Β·
verified Β·
1 Parent(s): a36388a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +215 -19
app.py CHANGED
@@ -1,7 +1,10 @@
1
  import streamlit as st
2
  import pandas as pd
3
  import sqlite3
 
 
4
  import os
 
5
  import json
6
  from pathlib import Path
7
  import plotly.express as px
@@ -83,6 +86,214 @@ if st.session_state.df is not None and st.session_state.show_preview:
83
  st.subheader("πŸ“‚ Dataset Preview")
84
  st.dataframe(st.session_state.df.head())
85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  # SQL-RAG Analysis
87
  if st.session_state.df is not None:
88
  temp_dir = tempfile.TemporaryDirectory()
@@ -208,31 +419,17 @@ if st.session_state.df is not None:
208
  st.markdown(report_result if report_result else "⚠️ No Report Generated.")
209
 
210
  # Step 4: Generate Visualizations
211
- visualizations = []
212
-
213
- fig_salary = px.box(st.session_state.df, x="job_title", y="salary_in_usd",
214
- title="Salary Distribution by Job Title")
215
- visualizations.append(fig_salary)
216
-
217
- fig_experience = px.bar(
218
- st.session_state.df.groupby("experience_level")["salary_in_usd"].mean().reset_index(),
219
- x="experience_level", y="salary_in_usd",
220
- title="Average Salary by Experience Level"
221
- )
222
- visualizations.append(fig_experience)
223
 
224
- fig_employment = px.box(st.session_state.df, x="employment_type", y="salary_in_usd",
225
- title="Salary Distribution by Employment Type")
226
- visualizations.append(fig_employment)
227
 
228
  # Step 5: Insert Visual Insights
229
  st.markdown("### Visual Insights")
230
- for fig in visualizations:
231
- st.plotly_chart(fig, use_container_width=True)
232
 
233
  # Step 6: Display Concise Conclusion
234
  #st.markdown("#### Conclusion")
235
- st.markdown(conclusion_result if conclusion_result else "⚠️ No Conclusion Generated.")
 
 
236
 
237
  # Full Data Visualization Tab
238
  with tab2:
@@ -262,4 +459,3 @@ with st.sidebar:
262
  st.header("πŸ“š Reference:")
263
  st.markdown("[SQL Agents w CrewAI & Llama 3 - Plaban Nayak](https://github.com/plaban1981/Agents/blob/main/SQL_Agents_with_CrewAI_and_Llama_3.ipynb)")
264
 
265
-
 
1
  import streamlit as st
2
  import pandas as pd
3
  import sqlite3
4
+ import tempfile
5
+ from fpdf import FPDF
6
  import os
7
+ import re
8
  import json
9
  from pathlib import Path
10
  import plotly.express as px
 
86
  st.subheader("πŸ“‚ Dataset Preview")
87
  st.dataframe(st.session_state.df.head())
88
 
89
+ # Ask GPT-4o for Visualization Suggestions
90
+ def ask_gpt4o_for_visualization(query, df, llm):
91
+ columns = ', '.join(df.columns)
92
+ prompt = f"""
93
+ Analyze the query and suggest the best visualization.
94
+ Query: "{query}"
95
+ Available Columns: {columns}
96
+ Respond in this JSON format:
97
+ {{
98
+ "chart_type": "bar/box/line/scatter",
99
+ "x_axis": "column_name",
100
+ "y_axis": "column_name",
101
+ "group_by": "optional_column_name"
102
+ }}
103
+ """
104
+ response = llm.generate(prompt)
105
+ try:
106
+ return json.loads(response)
107
+ except json.JSONDecodeError:
108
+ st.error("⚠️ GPT-4o failed to generate a valid suggestion.")
109
+ return None
110
+
111
+ # Dynamically generate Plotly visualizations based on GPT-4o suggestions
112
+ def generate_visualization(suggestion, df):
113
+ chart_type = suggestion.get("chart_type", "bar").lower()
114
+ x_axis = suggestion.get("x_axis")
115
+ y_axis = suggestion.get("y_axis")
116
+ group_by = suggestion.get("group_by")
117
+
118
+ # Ensure required inputs are available
119
+ if not x_axis or not y_axis:
120
+ st.warning("⚠️ GPT-4o did not provide enough information for the visualization.")
121
+ return None
122
+
123
+ # Dynamically select the Plotly function
124
+ plotly_function = getattr(px, chart_type, None)
125
+
126
+ # Handle unsupported chart types gracefully
127
+ if not plotly_function:
128
+ st.warning(f"⚠️ Unsupported chart type '{chart_type}' suggested by GPT-4o.")
129
+ return None
130
+
131
+ # Prepare dynamic parameters for Plotly function
132
+ plot_args = {
133
+ "data_frame": df,
134
+ "x": x_axis,
135
+ "y": y_axis,
136
+ }
137
+ if group_by:
138
+ plot_args["color"] = group_by
139
+
140
+ try:
141
+ # Generate the dynamic visualization
142
+ fig = plotly_function(**plot_args)
143
+ fig.update_layout(
144
+ title=f"{chart_type.title()} Plot of {y_axis.replace('_', ' ').title()} by {x_axis.replace('_', ' ').title()}",
145
+ xaxis_title=x_axis.replace('_', ' ').title(),
146
+ yaxis_title=y_axis.replace('_', ' ').title(),
147
+ )
148
+ return fig
149
+
150
+ except Exception as e:
151
+ st.error(f"⚠️ Failed to generate visualization: {e}")
152
+ return None
153
+
154
+ # Function to create TXT file
155
+ def create_text_report_with_viz_temp(report, conclusion, visualizations):
156
+ content = f"### Analysis Report\n\n{report}\n\n### Visualizations\n"
157
+
158
+ for i, fig in enumerate(visualizations, start=1):
159
+ fig_title = fig.layout.title.text if fig.layout.title.text else f"Visualization {i}"
160
+ x_axis = fig.layout.xaxis.title.text if fig.layout.xaxis.title.text else "X-axis"
161
+ y_axis = fig.layout.yaxis.title.text if fig.layout.yaxis.title.text else "Y-axis"
162
+
163
+ content += f"\n{i}. {fig_title}\n"
164
+ content += f" - X-axis: {x_axis}\n"
165
+ content += f" - Y-axis: {y_axis}\n"
166
+
167
+ if fig.data:
168
+ trace_types = set(trace.type for trace in fig.data)
169
+ content += f" - Chart Type(s): {', '.join(trace_types)}\n"
170
+ else:
171
+ content += " - No data available in this visualization.\n"
172
+
173
+ content += f"\n\n\n{conclusion}"
174
+
175
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode='w', encoding='utf-8') as temp_txt:
176
+ temp_txt.write(content)
177
+ return temp_txt.name
178
+
179
+
180
+ def add_stats_to_figure(fig, df, y_axis, chart_type):
181
+ # Calculate statistics
182
+ min_val = df[y_axis].min()
183
+ max_val = df[y_axis].max()
184
+ avg_val = df[y_axis].mean()
185
+ median_val = df[y_axis].median()
186
+ std_dev_val = df[y_axis].std()
187
+
188
+ # Stats summary text
189
+ stats_text = (
190
+ f"πŸ“Š **Statistics**\n\n"
191
+ f"- **Min:** ${min_val:,.2f}\n"
192
+ f"- **Max:** ${max_val:,.2f}\n"
193
+ f"- **Average:** ${avg_val:,.2f}\n"
194
+ f"- **Median:** ${median_val:,.2f}\n"
195
+ f"- **Std Dev:** ${std_dev_val:,.2f}"
196
+ )
197
+
198
+ # Charts suitable for stats annotations
199
+ if chart_type in ["bar", "line", "scatter"]:
200
+ # Add annotation box
201
+ fig.add_annotation(
202
+ text=stats_text,
203
+ xref="paper", yref="paper",
204
+ x=1.05, y=1,
205
+ showarrow=False,
206
+ align="left",
207
+ font=dict(size=12, color="black"),
208
+ bordercolor="black",
209
+ borderwidth=1,
210
+ bgcolor="rgba(255, 255, 255, 0.8)"
211
+ )
212
+
213
+ # Add horizontal lines for min, median, avg, max
214
+ fig.add_hline(y=min_val, line_dash="dot", line_color="red", annotation_text="Min", annotation_position="bottom right")
215
+ fig.add_hline(y=median_val, line_dash="dash", line_color="orange", annotation_text="Median", annotation_position="top right")
216
+ fig.add_hline(y=avg_val, line_dash="dashdot", line_color="green", annotation_text="Avg", annotation_position="top right")
217
+ fig.add_hline(y=max_val, line_dash="dot", line_color="blue", annotation_text="Max", annotation_position="top right")
218
+
219
+ elif chart_type == "box":
220
+ # Box plots already show distribution (no extra stats needed)
221
+ pass
222
+
223
+ elif chart_type == "pie":
224
+ # Pie charts don't need statistical overlays
225
+ st.info("πŸ“Š Pie charts focus on proportions. No additional stats displayed.")
226
+
227
+ else:
228
+ st.warning(f"⚠️ No stats added for unsupported chart type: {chart_type}")
229
+
230
+ return fig
231
+
232
+
233
+ # Function to create PDF with report text and visualizations
234
+ def create_pdf_report_with_viz(report, conclusion, visualizations):
235
+ pdf = FPDF()
236
+ pdf.set_auto_page_break(auto=True, margin=15)
237
+ pdf.add_page()
238
+ pdf.set_font("Arial", size=12)
239
+
240
+ # Title
241
+ pdf.set_font("Arial", style="B", size=18)
242
+ pdf.cell(0, 10, "πŸ“Š Analysis Report", ln=True, align="C")
243
+ pdf.ln(10)
244
+
245
+ # Report Content
246
+ pdf.set_font("Arial", style="B", size=14)
247
+ pdf.cell(0, 10, "Analysis", ln=True)
248
+ pdf.set_font("Arial", size=12)
249
+ pdf.multi_cell(0, 10, report)
250
+
251
+ pdf.ln(10)
252
+ pdf.set_font("Arial", style="B", size=14)
253
+ pdf.cell(0, 10, "Conclusion", ln=True)
254
+ pdf.set_font("Arial", size=12)
255
+ pdf.multi_cell(0, 10, conclusion)
256
+
257
+ # Add Visualizations
258
+ pdf.add_page()
259
+ pdf.set_font("Arial", style="B", size=16)
260
+ pdf.cell(0, 10, "πŸ“ˆ Visualizations", ln=True)
261
+ pdf.ln(5)
262
+
263
+ with tempfile.TemporaryDirectory() as temp_dir:
264
+ for i, fig in enumerate(visualizations, start=1):
265
+ fig_title = fig.layout.title.text if fig.layout.title.text else f"Visualization {i}"
266
+ x_axis = fig.layout.xaxis.title.text if fig.layout.xaxis.title.text else "X-axis"
267
+ y_axis = fig.layout.yaxis.title.text if fig.layout.yaxis.title.text else "Y-axis"
268
+
269
+ # Save each visualization as a PNG image
270
+ img_path = os.path.join(temp_dir, f"viz_{i}.png")
271
+ fig.write_image(img_path)
272
+
273
+ # Insert Title and Description
274
+ pdf.set_font("Arial", style="B", size=14)
275
+ pdf.multi_cell(0, 10, f"{i}. {fig_title}")
276
+ pdf.set_font("Arial", size=12)
277
+ pdf.multi_cell(0, 10, f"X-axis: {x_axis} | Y-axis: {y_axis}")
278
+ pdf.ln(3)
279
+
280
+ # Embed Visualization
281
+ pdf.image(img_path, w=170)
282
+ pdf.ln(10)
283
+
284
+ # Save PDF
285
+ temp_pdf = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
286
+ pdf.output(temp_pdf.name)
287
+
288
+ return temp_pdf
289
+
290
+ def escape_markdown(text):
291
+ # Ensure text is a string
292
+ text = str(text)
293
+ # Escape Markdown characters: *, _, `, ~
294
+ escape_chars = r"(\*|_|`|~)"
295
+ return re.sub(escape_chars, r"\\\1", text)
296
+
297
  # SQL-RAG Analysis
298
  if st.session_state.df is not None:
299
  temp_dir = tempfile.TemporaryDirectory()
 
419
  st.markdown(report_result if report_result else "⚠️ No Report Generated.")
420
 
421
  # Step 4: Generate Visualizations
 
 
 
 
 
 
 
 
 
 
 
 
422
 
 
 
 
423
 
424
  # Step 5: Insert Visual Insights
425
  st.markdown("### Visual Insights")
426
+
 
427
 
428
  # Step 6: Display Concise Conclusion
429
  #st.markdown("#### Conclusion")
430
+
431
+ safe_conclusion = escape_markdown(conclusion_result if conclusion_result else "⚠️ No Conclusion Generated.")
432
+ st.markdown(safe_conclusion)
433
 
434
  # Full Data Visualization Tab
435
  with tab2:
 
459
  st.header("πŸ“š Reference:")
460
  st.markdown("[SQL Agents w CrewAI & Llama 3 - Plaban Nayak](https://github.com/plaban1981/Agents/blob/main/SQL_Agents_with_CrewAI_and_Llama_3.ipynb)")
461