Spaces:

DrishtiSharma
/

test-07

Sleeping

App Files Files Community

test-07 / app.py

DrishtiSharma

Update app.py

3f3f7da verified 18 days ago

raw

history blame

4.86 kB

	import streamlit as st
	import pandas as pd
	import plotly.express as px
	from pandasai import Agent
	from langchain_community.embeddings.openai import OpenAIEmbeddings
	from langchain_community.vectorstores import FAISS
	from langchain_openai import ChatOpenAI
	from langchain.chains import RetrievalQA
	from langchain.schema import Document
	from datasets import load_dataset
	import os

	# Title
	st.title("Dataset Analysis and Visualization")

	# Fetch API keys from environment variables
	api_key = os.getenv("OPENAI_API_KEY")
	pandasai_api_key = os.getenv("PANDASAI_API_KEY")

	# Initialize session state for the dataframe
	if "df" not in st.session_state:
	st.session_state.df = None

	# Dataset loading section
	st.subheader("Load Dataset")
	input_option = st.radio("Select Dataset Input:", ["Use Hugging Face Dataset", "Upload CSV File"])

	if input_option == "Use Hugging Face Dataset":
	dataset_name = st.text_input("Enter Hugging Face Dataset Name:", value="HUPD/hupd")
	if st.button("Load Dataset"):
	try:
	# Load dataset and store it in session state
	dataset = load_dataset(dataset_name, name="sample", split="train", trust_remote_code=True, uniform_split=True)
	st.session_state.df = pd.DataFrame(dataset)
	st.success(f"Dataset '{dataset_name}' loaded successfully!")
	except Exception as e:
	st.error(f"Error loading dataset: {e}")
	elif input_option == "Upload CSV File":
	uploaded_file = st.file_uploader("Upload CSV File:", type=["csv"])
	if uploaded_file and st.button("Load CSV"):
	try:
	# Read uploaded CSV and store it in session state
	st.session_state.df = pd.read_csv(uploaded_file)
	st.success("File uploaded successfully!")
	except Exception as e:
	st.error(f"Error loading file: {e}")

	# Show the loaded dataframe preview
	if st.session_state.df is not None:
	st.subheader("Dataset Preview")
	st.dataframe(st.session_state.df.head(10))

	# Set up PandasAI Agent
	agent = Agent(st.session_state.df)

	# Convert DataFrame to documents
	documents = [
	Document(
	page_content=", ".join([f"{col}: {row[col]}" for col in st.session_state.df.columns]),
	metadata={"index": index}
	)
	for index, row in st.session_state.df.iterrows()
	]

	# Set up RAG
	embeddings = OpenAIEmbeddings()
	vectorstore = FAISS.from_documents(documents, embeddings)
	retriever = vectorstore.as_retriever()
	qa_chain = RetrievalQA.from_chain_type(
	llm=ChatOpenAI(),
	chain_type="stuff",
	retriever=retriever
	)

	# Create tabs for different functionality
	tab1, tab2, tab3 = st.tabs(["PandasAI Analysis", "RAG Q&A", "Data Visualization"])

	with tab1:
	st.header("Data Analysis with PandasAI")
	pandas_question = st.text_input("Ask a question about your data (PandasAI):")
	if pandas_question:
	result = agent.chat(pandas_question)
	st.write("PandasAI Answer:", result)

	with tab2:
	st.header("Q&A with RAG")
	rag_question = st.text_input("Ask a question about your data (RAG):")
	if rag_question:
	result = qa_chain.run(rag_question)
	st.write("RAG Answer:", result)

	with tab3:
	st.header("Data Visualization")
	viz_question = st.text_input("What kind of graph would you like to see? (e.g., 'Show a scatter plot of salary vs experience')")
	if viz_question:
	try:
	result = agent.chat(viz_question)

	# Convert the PandasAI result into executable code
	import re
	code_pattern = r'```python\n(.*?)\n```'
	code_match = re.search(code_pattern, result, re.DOTALL)

	if code_match:
	viz_code = code_match.group(1)
	# Modify the code to use 'px' instead of 'plt'
	viz_code = viz_code.replace('plt.', 'px.')
	viz_code = viz_code.replace('plt.show()', 'fig = px.scatter(df, x=x, y=y)')

	# Execute the code and display the graph
	exec(viz_code)
	st.plotly_chart(fig)
	else:
	st.write("Failed to generate a graph. Please try asking differently.")
	except Exception as e:
	st.write(f"An error occurred: {str(e)}")
	st.write("Please try rephrasing your question.")
	else:
	st.warning("No dataset loaded. Please select a dataset input option above.")

	# Error handling for missing API keys
	if not api_key:
	st.error("Missing OpenAI API Key in environment variables.")
	if not pandasai_api_key:
	st.error("Missing PandasAI API Key in environment variables.")