import streamlit as st import pandas as pd from pandasai import SmartDataframe from pandasai.llm import OpenAI from langchain_groq import ChatGroq from dotenv import load_dotenv from datasets import load_dataset import os def initialize_llm(model_choice): """Initialize the chosen LLM based on the user's selection.""" groq_api_key = os.getenv("GROQ_API_KEY") openai_api_key = os.getenv("OPENAI_API_KEY") if model_choice == "llama-3.3-70b": if not groq_api_key: st.error("Groq API key is missing. Please set the GROQ_API_KEY environment variable.") return None st.success("Using model: llama-3.3-70b (Groq)") return ChatGroq(groq_api_key=groq_api_key, model="groq/llama-3.3-70b-versatile") elif model_choice == "GPT-4o": if not openai_api_key: st.error("OpenAI API key is missing. Please set the OPENAI_API_KEY environment variable.") return None st.success("Using model: GPT-4o (OpenAI)") return OpenAI(api_token=openai_api_key) def load_dataset_into_session(): """Load dataset from Hugging Face or via CSV upload.""" input_option = st.radio("Select Dataset Input:", ["Use Hugging Face Dataset", "Upload CSV File"]) if input_option == "Use Hugging Face Dataset": dataset_name = st.text_input("Enter Hugging Face Dataset Name:", value="HUPD/hupd") if st.button("Load Dataset"): try: dataset = load_dataset( dataset_name, name="sample", split="train", trust_remote_code=True, uniform_split=True ) st.session_state.df = pd.DataFrame(dataset) st.success(f"Dataset '{dataset_name}' loaded successfully!") st.dataframe(st.session_state.df.head(10)) except Exception as e: st.error(f"Error loading dataset: {e}") elif input_option == "Upload CSV File": uploaded_file = st.file_uploader("Upload CSV File:", type=["csv"]) if uploaded_file: try: st.session_state.df = pd.read_csv(uploaded_file) st.success("File uploaded successfully!") st.dataframe(st.session_state.df.head(10)) except Exception as e: st.error(f"Error reading file: {e}") if "df" not in st.session_state: st.session_state.df = None # Streamlit app st.title("Chat With Your Dataset Using PandasAI") # Section 1: LLM Selection st.sidebar.title("Choose Your LLM") model_choice = st.sidebar.radio( "Select a model:", ("GPT-4o", "llama-3.3-70b"), help="Choose between OpenAI GPT-4o or Groq Llama-3.3-70b." ) # Initialize LLM llm = initialize_llm(model_choice) if not llm: st.stop() # Section 2: Dataset Loading st.header("Dataset Selection") load_dataset_into_session() # Section 3: Query and Interaction if st.session_state.df is not None: st.subheader("Ask Questions About Your Dataset") chat_df = SmartDataframe(st.session_state.df, config={"llm": llm}) user_query = st.text_input("Ask a question about your data:", "") if user_query: try: response = chat_df.chat(user_query) st.write("### Response:") st.write(response) # Check for plot-related keywords if any(keyword in user_query.lower() for keyword in ["plot", "graph", "draw", "visualize", "chart", "visualise"]): st.write("### Generating Plot...") try: chat_df.chat(user_query) except Exception as e: st.error(f"An error occurred while generating the plot: {e}") except Exception as e: st.error(f"An error occurred: {e}") else: st.info("Please load a dataset to start interacting.")