d
File size: 4,463 Bytes
b193f65
 
1c6d353
518bfb0
1c6d353
 
 
b193f65
 
 
34b8355
 
b193f65
34b8355
b193f65
 
34b8355
 
 
 
 
8fcfa20
46133c4
34b8355
 
329fe9b
b193f65
 
 
 
 
 
 
 
 
 
34b8355
b193f65
 
34b8355
b193f65
 
34b8355
b193f65
 
 
 
34b8355
b193f65
 
34b8355
b193f65
 
34b8355
b193f65
 
329fe9b
b193f65
 
 
329fe9b
b193f65
 
 
 
 
 
 
 
 
 
 
 
 
34b8355
b193f65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cb955b0
b193f65
f615ebf
34b8355
 
b193f65
f615ebf
 
 
 
b193f65
 
 
34b8355
b193f65
34b8355
 
b193f65
 
f615ebf
 
 
 
b193f65
f615ebf
b193f65
 
46133c4
f615ebf
b193f65
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import streamlit as st
import pandas as pd
import os
from pandasai import SmartDataframe
from pandasai.llm import OpenAI
import tempfile
import matplotlib.pyplot as plt
from datasets import load_dataset
import time

# Set Streamlit page config FIRST
st.set_page_config(layout='wide')

# Set API key
openai_api_key = os.getenv("OPENAI_API_KEY")

# Define the LLM
llm = OpenAI(api_token=openai_api_key)

# Chat with CSV
def chat_with_csv(df, prompt):
    pandas_ai = SmartDataframe(df, config={"llm": llm})
    result = pandas_ai.chat(prompt)  # FIX: Use `chat` instead of `run`
    return result

# Dataset loading without caching to support progress bar
def load_huggingface_dataset(dataset_name):
    progress_bar = st.progress(0)
    try:
        progress_bar.progress(10)
        dataset = load_dataset(dataset_name, name="sample", split="train", trust_remote_code=True, uniform_split=True)
        progress_bar.progress(50)
        if hasattr(dataset, "to_pandas"):
            df = dataset.to_pandas()
        else:
            df = pd.DataFrame(dataset)
        progress_bar.progress(100)
        return df
    except Exception as e:
        progress_bar.progress(0)
        raise e

# Load CSV file
def load_uploaded_csv(uploaded_file):
    progress_bar = st.progress(0)
    try:
        progress_bar.progress(10)
        time.sleep(1)
        progress_bar.progress(50)
        df = pd.read_csv(uploaded_file)
        progress_bar.progress(100)
        return df
    except Exception as e:
        progress_bar.progress(0)
        raise e

# Dataset selection logic
def load_dataset_into_session():
    input_option = st.radio(
        "Select Dataset Input:",
        ["Use Repo Directory Dataset", "Use Hugging Face Dataset", "Upload CSV File"], index=1, horizontal=True
    )

    if input_option == "Use Repo Directory Dataset":
        file_path = "./source/test.csv"
        if st.button("Load Dataset"):
            try:
                with st.spinner("Loading dataset from the repo directory..."):
                    st.session_state.df = pd.read_csv(file_path)
                st.success(f"File loaded successfully from '{file_path}'!")
            except Exception as e:
                st.error(f"Error loading dataset from the repo directory: {e}")

    elif input_option == "Use Hugging Face Dataset":
        dataset_name = st.text_input("Enter Hugging Face Dataset Name:", value="HUPD/hupd")
        if st.button("Load Dataset"):
            try:
                st.session_state.df = load_huggingface_dataset(dataset_name)
                st.success(f"Hugging Face Dataset '{dataset_name}' loaded successfully!")
            except Exception as e:
                st.error(f"Error loading Hugging Face dataset: {e}")

    elif input_option == "Upload CSV File":
        uploaded_file = st.file_uploader("Upload a CSV File:", type=["csv"])
        if uploaded_file:
            try:
                st.session_state.df = load_uploaded_csv(uploaded_file)
                st.success("File uploaded successfully!")
            except Exception as e:
                st.error(f"Error reading uploaded file: {e}")

# Streamlit app main
st.title("ChatCSV")

# Ensure session state for dataframe
if "df" not in st.session_state:
    st.session_state.df = pd.DataFrame()

# Ensure session state for user query
if "user_query" not in st.session_state:
    st.session_state.user_query = ""

st.header("Load Your Dataset")
load_dataset_into_session()

if "df" in st.session_state and not st.session_state.df.empty:
    st.subheader("Dataset Preview")
    num_rows = st.slider("Select number of rows to display:", min_value=5, max_value=50, value=10)
    st.dataframe(st.session_state.df.head(num_rows))

    st.subheader("Chat with Your Dataset")
    
    # Text area for user query with session state persistence
    st.session_state.user_query = st.text_area("Enter your query:", value=st.session_state.user_query)
    
    if st.button("Run Query"):
        if st.session_state.user_query.strip():
            with st.spinner("Processing your query..."):
                try:
                    # FIX: Use the correct `chat` method
                    result = chat_with_csv(st.session_state.df, st.session_state.user_query)
                    st.success(result)
                except Exception as e:
                    st.error(f"Error processing your query: {e}")
        else:
            st.warning("Please enter a query before running.")