Spaces:
Sleeping
Sleeping
File size: 4,463 Bytes
b193f65 1c6d353 518bfb0 1c6d353 b193f65 34b8355 b193f65 34b8355 b193f65 34b8355 8fcfa20 46133c4 34b8355 329fe9b b193f65 34b8355 b193f65 34b8355 b193f65 34b8355 b193f65 34b8355 b193f65 34b8355 b193f65 34b8355 b193f65 329fe9b b193f65 329fe9b b193f65 34b8355 b193f65 cb955b0 b193f65 f615ebf 34b8355 b193f65 f615ebf b193f65 34b8355 b193f65 34b8355 b193f65 f615ebf b193f65 f615ebf b193f65 46133c4 f615ebf b193f65 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
import streamlit as st
import pandas as pd
import os
from pandasai import SmartDataframe
from pandasai.llm import OpenAI
import tempfile
import matplotlib.pyplot as plt
from datasets import load_dataset
import time
# Set Streamlit page config FIRST
st.set_page_config(layout='wide')
# Set API key
openai_api_key = os.getenv("OPENAI_API_KEY")
# Define the LLM
llm = OpenAI(api_token=openai_api_key)
# Chat with CSV
def chat_with_csv(df, prompt):
pandas_ai = SmartDataframe(df, config={"llm": llm})
result = pandas_ai.chat(prompt) # FIX: Use `chat` instead of `run`
return result
# Dataset loading without caching to support progress bar
def load_huggingface_dataset(dataset_name):
progress_bar = st.progress(0)
try:
progress_bar.progress(10)
dataset = load_dataset(dataset_name, name="sample", split="train", trust_remote_code=True, uniform_split=True)
progress_bar.progress(50)
if hasattr(dataset, "to_pandas"):
df = dataset.to_pandas()
else:
df = pd.DataFrame(dataset)
progress_bar.progress(100)
return df
except Exception as e:
progress_bar.progress(0)
raise e
# Load CSV file
def load_uploaded_csv(uploaded_file):
progress_bar = st.progress(0)
try:
progress_bar.progress(10)
time.sleep(1)
progress_bar.progress(50)
df = pd.read_csv(uploaded_file)
progress_bar.progress(100)
return df
except Exception as e:
progress_bar.progress(0)
raise e
# Dataset selection logic
def load_dataset_into_session():
input_option = st.radio(
"Select Dataset Input:",
["Use Repo Directory Dataset", "Use Hugging Face Dataset", "Upload CSV File"], index=1, horizontal=True
)
if input_option == "Use Repo Directory Dataset":
file_path = "./source/test.csv"
if st.button("Load Dataset"):
try:
with st.spinner("Loading dataset from the repo directory..."):
st.session_state.df = pd.read_csv(file_path)
st.success(f"File loaded successfully from '{file_path}'!")
except Exception as e:
st.error(f"Error loading dataset from the repo directory: {e}")
elif input_option == "Use Hugging Face Dataset":
dataset_name = st.text_input("Enter Hugging Face Dataset Name:", value="HUPD/hupd")
if st.button("Load Dataset"):
try:
st.session_state.df = load_huggingface_dataset(dataset_name)
st.success(f"Hugging Face Dataset '{dataset_name}' loaded successfully!")
except Exception as e:
st.error(f"Error loading Hugging Face dataset: {e}")
elif input_option == "Upload CSV File":
uploaded_file = st.file_uploader("Upload a CSV File:", type=["csv"])
if uploaded_file:
try:
st.session_state.df = load_uploaded_csv(uploaded_file)
st.success("File uploaded successfully!")
except Exception as e:
st.error(f"Error reading uploaded file: {e}")
# Streamlit app main
st.title("ChatCSV")
# Ensure session state for dataframe
if "df" not in st.session_state:
st.session_state.df = pd.DataFrame()
# Ensure session state for user query
if "user_query" not in st.session_state:
st.session_state.user_query = ""
st.header("Load Your Dataset")
load_dataset_into_session()
if "df" in st.session_state and not st.session_state.df.empty:
st.subheader("Dataset Preview")
num_rows = st.slider("Select number of rows to display:", min_value=5, max_value=50, value=10)
st.dataframe(st.session_state.df.head(num_rows))
st.subheader("Chat with Your Dataset")
# Text area for user query with session state persistence
st.session_state.user_query = st.text_area("Enter your query:", value=st.session_state.user_query)
if st.button("Run Query"):
if st.session_state.user_query.strip():
with st.spinner("Processing your query..."):
try:
# FIX: Use the correct `chat` method
result = chat_with_csv(st.session_state.df, st.session_state.user_query)
st.success(result)
except Exception as e:
st.error(f"Error processing your query: {e}")
else:
st.warning("Please enter a query before running.")
|