d / app.py
DrishtiSharma's picture
Update app.py
cb955b0 verified
import streamlit as st
import pandas as pd
import os
from pandasai import SmartDataframe
from pandasai.llm import OpenAI
import tempfile
import matplotlib.pyplot as plt
from datasets import load_dataset
import time
# Set Streamlit page config FIRST
st.set_page_config(layout='wide')
# Set API key
openai_api_key = os.getenv("OPENAI_API_KEY")
# Define the LLM
llm = OpenAI(api_token=openai_api_key)
# Chat with CSV
def chat_with_csv(df, prompt):
pandas_ai = SmartDataframe(df, config={"llm": llm})
result = pandas_ai.chat(prompt) # FIX: Use `chat` instead of `run`
return result
# Dataset loading without caching to support progress bar
def load_huggingface_dataset(dataset_name):
progress_bar = st.progress(0)
try:
progress_bar.progress(10)
dataset = load_dataset(dataset_name, name="sample", split="train", trust_remote_code=True, uniform_split=True)
progress_bar.progress(50)
if hasattr(dataset, "to_pandas"):
df = dataset.to_pandas()
else:
df = pd.DataFrame(dataset)
progress_bar.progress(100)
return df
except Exception as e:
progress_bar.progress(0)
raise e
# Load CSV file
def load_uploaded_csv(uploaded_file):
progress_bar = st.progress(0)
try:
progress_bar.progress(10)
time.sleep(1)
progress_bar.progress(50)
df = pd.read_csv(uploaded_file)
progress_bar.progress(100)
return df
except Exception as e:
progress_bar.progress(0)
raise e
# Dataset selection logic
def load_dataset_into_session():
input_option = st.radio(
"Select Dataset Input:",
["Use Repo Directory Dataset", "Use Hugging Face Dataset", "Upload CSV File"], index=1, horizontal=True
)
if input_option == "Use Repo Directory Dataset":
file_path = "./source/test.csv"
if st.button("Load Dataset"):
try:
with st.spinner("Loading dataset from the repo directory..."):
st.session_state.df = pd.read_csv(file_path)
st.success(f"File loaded successfully from '{file_path}'!")
except Exception as e:
st.error(f"Error loading dataset from the repo directory: {e}")
elif input_option == "Use Hugging Face Dataset":
dataset_name = st.text_input("Enter Hugging Face Dataset Name:", value="HUPD/hupd")
if st.button("Load Dataset"):
try:
st.session_state.df = load_huggingface_dataset(dataset_name)
st.success(f"Hugging Face Dataset '{dataset_name}' loaded successfully!")
except Exception as e:
st.error(f"Error loading Hugging Face dataset: {e}")
elif input_option == "Upload CSV File":
uploaded_file = st.file_uploader("Upload a CSV File:", type=["csv"])
if uploaded_file:
try:
st.session_state.df = load_uploaded_csv(uploaded_file)
st.success("File uploaded successfully!")
except Exception as e:
st.error(f"Error reading uploaded file: {e}")
# Streamlit app main
st.title("ChatCSV")
# Ensure session state for dataframe
if "df" not in st.session_state:
st.session_state.df = pd.DataFrame()
# Ensure session state for user query
if "user_query" not in st.session_state:
st.session_state.user_query = ""
st.header("Load Your Dataset")
load_dataset_into_session()
if "df" in st.session_state and not st.session_state.df.empty:
st.subheader("Dataset Preview")
num_rows = st.slider("Select number of rows to display:", min_value=5, max_value=50, value=10)
st.dataframe(st.session_state.df.head(num_rows))
st.subheader("Chat with Your Dataset")
# Text area for user query with session state persistence
st.session_state.user_query = st.text_area("Enter your query:", value=st.session_state.user_query)
if st.button("Run Query"):
if st.session_state.user_query.strip():
with st.spinner("Processing your query..."):
try:
# FIX: Use the correct `chat` method
result = chat_with_csv(st.session_state.df, st.session_state.user_query)
st.success(result)
except Exception as e:
st.error(f"Error processing your query: {e}")
else:
st.warning("Please enter a query before running.")