DrishtiSharma commited on
Commit
34b8355
·
verified ·
1 Parent(s): 329fe9b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -40
app.py CHANGED
@@ -1,24 +1,32 @@
1
  import streamlit as st
2
  import pandas as pd
3
  import os
4
- from pandasai import SmartDataframe
5
  from pandasai.llm import OpenAI
6
  import tempfile
7
  import matplotlib.pyplot as plt
8
  from datasets import load_dataset
9
- from langchain_groq import ChatGroq
10
- from langchain_openai import ChatOpenAI
11
  import time
12
 
 
 
13
 
 
14
  openai_api_key = os.getenv("OPENAI_API_KEY")
15
 
 
 
 
 
 
 
 
 
 
16
  # Dataset loading without caching to support progress bar
17
  def load_huggingface_dataset(dataset_name):
18
- # Initialize progress bar
19
  progress_bar = st.progress(0)
20
  try:
21
- # Incrementally update progress
22
  progress_bar.progress(10)
23
  dataset = load_dataset(dataset_name, name="sample", split="train", trust_remote_code=True, uniform_split=True)
24
  progress_bar.progress(50)
@@ -26,25 +34,24 @@ def load_huggingface_dataset(dataset_name):
26
  df = dataset.to_pandas()
27
  else:
28
  df = pd.DataFrame(dataset)
29
- progress_bar.progress(100) # Final update to 100%
30
  return df
31
  except Exception as e:
32
- progress_bar.progress(0) # Reset progress bar on failure
33
  raise e
34
 
 
35
  def load_uploaded_csv(uploaded_file):
36
- # Initialize progress bar
37
  progress_bar = st.progress(0)
38
  try:
39
- # Simulate progress
40
  progress_bar.progress(10)
41
- time.sleep(1) # Simulate file processing delay
42
  progress_bar.progress(50)
43
  df = pd.read_csv(uploaded_file)
44
- progress_bar.progress(100) # Final update
45
  return df
46
  except Exception as e:
47
- progress_bar.progress(0) # Reset progress bar on failure
48
  raise e
49
 
50
  # Dataset selection logic
@@ -54,7 +61,6 @@ def load_dataset_into_session():
54
  ["Use Repo Directory Dataset", "Use Hugging Face Dataset", "Upload CSV File"], index=1, horizontal=True
55
  )
56
 
57
- # Option 1: Load dataset from the repo directory
58
  if input_option == "Use Repo Directory Dataset":
59
  file_path = "./source/test.csv"
60
  if st.button("Load Dataset"):
@@ -65,11 +71,8 @@ def load_dataset_into_session():
65
  except Exception as e:
66
  st.error(f"Error loading dataset from the repo directory: {e}")
67
 
68
- # Option 2: Load dataset from Hugging Face
69
  elif input_option == "Use Hugging Face Dataset":
70
- dataset_name = st.text_input(
71
- "Enter Hugging Face Dataset Name:", value="HUPD/hupd"
72
- )
73
  if st.button("Load Dataset"):
74
  try:
75
  st.session_state.df = load_huggingface_dataset(dataset_name)
@@ -77,7 +80,6 @@ def load_dataset_into_session():
77
  except Exception as e:
78
  st.error(f"Error loading Hugging Face dataset: {e}")
79
 
80
- # Option 3: Upload CSV File
81
  elif input_option == "Upload CSV File":
82
  uploaded_file = st.file_uploader("Upload a CSV File:", type=["csv"])
83
  if uploaded_file:
@@ -87,39 +89,23 @@ def load_dataset_into_session():
87
  except Exception as e:
88
  st.error(f"Error reading uploaded file: {e}")
89
 
90
- # Load dataset into session
91
- load_dataset_into_session()
92
-
93
- if "df" in st.session_state and llm:
94
- df = st.session_state.df
95
-
96
- # Display dataset metadata
97
- st.write("### Dataset Metadata")
98
- st.text(f"Number of Rows: {df.shape[0]}")
99
- st.text(f"Number of Columns: {df.shape[1]}")
100
- st.text(f"Column Names: {', '.join(df.columns)}")
101
-
102
- # Display dataset preview
103
- st.write("### Dataset Preview")
104
- num_rows = st.slider("Select number of rows to display:", min_value=5, max_value=50, value=10)
105
- st.dataframe(df.head(num_rows))
106
-
107
-
108
  # Streamlit app main
109
- st.set_page_config(layout='wide')
110
  st.title("ChatCSV powered by LLM")
111
 
 
 
112
 
113
  st.header("Load Your Dataset")
114
  load_dataset_into_session()
115
 
116
- if not st.session_state.df.empty:
117
  st.subheader("Dataset Preview")
118
- st.dataframe(st.session_state.df, use_container_width=True)
 
119
 
120
  st.subheader("Chat with Your Dataset")
121
  user_query = st.text_area("Enter your query:")
122
-
123
  if st.button("Run Query"):
124
  if user_query.strip():
125
  with st.spinner("Processing your query..."):
 
1
  import streamlit as st
2
  import pandas as pd
3
  import os
4
+ from pandasai import SmartDataframe, PandasAI
5
  from pandasai.llm import OpenAI
6
  import tempfile
7
  import matplotlib.pyplot as plt
8
  from datasets import load_dataset
 
 
9
  import time
10
 
11
+ # Set Streamlit page config FIRST
12
+ st.set_page_config(layout='wide')
13
 
14
+ # Set API key
15
  openai_api_key = os.getenv("OPENAI_API_KEY")
16
 
17
+ # Define the LLM
18
+ llm = OpenAI(api_token=openai_api_key)
19
+
20
+ # Chat with CSV
21
+ def chat_with_csv(df, prompt):
22
+ pandas_ai = PandasAI(llm)
23
+ result = pandas_ai.run(df, prompt=prompt)
24
+ return result
25
+
26
  # Dataset loading without caching to support progress bar
27
  def load_huggingface_dataset(dataset_name):
 
28
  progress_bar = st.progress(0)
29
  try:
 
30
  progress_bar.progress(10)
31
  dataset = load_dataset(dataset_name, name="sample", split="train", trust_remote_code=True, uniform_split=True)
32
  progress_bar.progress(50)
 
34
  df = dataset.to_pandas()
35
  else:
36
  df = pd.DataFrame(dataset)
37
+ progress_bar.progress(100)
38
  return df
39
  except Exception as e:
40
+ progress_bar.progress(0)
41
  raise e
42
 
43
+ # Load CSV file
44
  def load_uploaded_csv(uploaded_file):
 
45
  progress_bar = st.progress(0)
46
  try:
 
47
  progress_bar.progress(10)
48
+ time.sleep(1)
49
  progress_bar.progress(50)
50
  df = pd.read_csv(uploaded_file)
51
+ progress_bar.progress(100)
52
  return df
53
  except Exception as e:
54
+ progress_bar.progress(0)
55
  raise e
56
 
57
  # Dataset selection logic
 
61
  ["Use Repo Directory Dataset", "Use Hugging Face Dataset", "Upload CSV File"], index=1, horizontal=True
62
  )
63
 
 
64
  if input_option == "Use Repo Directory Dataset":
65
  file_path = "./source/test.csv"
66
  if st.button("Load Dataset"):
 
71
  except Exception as e:
72
  st.error(f"Error loading dataset from the repo directory: {e}")
73
 
 
74
  elif input_option == "Use Hugging Face Dataset":
75
+ dataset_name = st.text_input("Enter Hugging Face Dataset Name:", value="HUPD/hupd")
 
 
76
  if st.button("Load Dataset"):
77
  try:
78
  st.session_state.df = load_huggingface_dataset(dataset_name)
 
80
  except Exception as e:
81
  st.error(f"Error loading Hugging Face dataset: {e}")
82
 
 
83
  elif input_option == "Upload CSV File":
84
  uploaded_file = st.file_uploader("Upload a CSV File:", type=["csv"])
85
  if uploaded_file:
 
89
  except Exception as e:
90
  st.error(f"Error reading uploaded file: {e}")
91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  # Streamlit app main
 
93
  st.title("ChatCSV powered by LLM")
94
 
95
+ if "df" not in st.session_state:
96
+ st.session_state.df = pd.DataFrame()
97
 
98
  st.header("Load Your Dataset")
99
  load_dataset_into_session()
100
 
101
+ if "df" in st.session_state and not st.session_state.df.empty:
102
  st.subheader("Dataset Preview")
103
+ num_rows = st.slider("Select number of rows to display:", min_value=5, max_value=50, value=10)
104
+ st.dataframe(st.session_state.df.head(num_rows))
105
 
106
  st.subheader("Chat with Your Dataset")
107
  user_query = st.text_area("Enter your query:")
108
+
109
  if st.button("Run Query"):
110
  if user_query.strip():
111
  with st.spinner("Processing your query..."):