DrishtiSharma commited on
Commit
2851a04
·
verified ·
1 Parent(s): 1231f9d

Create error_401.py

Browse files
Files changed (1) hide show
  1. mylab/error_401.py +186 -0
mylab/error_401.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import plotly.express as px
4
+ from datasets import load_dataset
5
+ from pandasai import SmartDataframe
6
+ from pandasai.llm.openai import OpenAI
7
+ from langchain_community.embeddings.openai import OpenAIEmbeddings
8
+ from langchain_community.vectorstores import FAISS
9
+ from langchain_openai import ChatOpenAI
10
+ from langchain.chains import RetrievalQA
11
+ from langchain.schema import Document
12
+ import os
13
+ import logging
14
+
15
+ # Configure logging
16
+ logging.basicConfig(level=logging.DEBUG)
17
+ logger = logging.getLogger(__name__)
18
+
19
+ # Fetch API keys from environment variables
20
+ api_key = os.getenv("OPENAI_API_KEY")
21
+ pandasai_api_key = os.getenv("PANDASAI_API_KEY")
22
+
23
+ # Check for missing keys and raise specific errors
24
+ missing_keys = []
25
+ if not api_key:
26
+ missing_keys.append("OPENAI_API_KEY")
27
+ if not pandasai_api_key:
28
+ missing_keys.append("PANDASAI_API_KEY")
29
+
30
+ if missing_keys:
31
+ missing_keys_str = ", ".join(missing_keys)
32
+ raise EnvironmentError(
33
+ f"The following API key(s) are missing: {missing_keys_str}. Please set them in the environment."
34
+ )
35
+
36
+ logger.debug(f"OPENAI_API_KEY: {api_key}")
37
+ logger.debug(f"PANDASAI_API_KEY: {pandasai_api_key}")
38
+
39
+ # Title of the app
40
+ st.title("PandasAI and RAG Data Analyzer")
41
+
42
+ # Function to load datasets into session
43
+ def load_dataset_into_session():
44
+ input_option = st.radio(
45
+ "Select Dataset Input:",
46
+ ["Use Repo Directory Dataset", "Use Hugging Face Dataset", "Upload CSV File"],
47
+ )
48
+
49
+ # Option 1: Load dataset from the repo directory
50
+ if input_option == "Use Repo Directory Dataset":
51
+ file_path = "./source/test.csv"
52
+ if st.button("Load Repo Dataset"):
53
+ try:
54
+ st.session_state.df = pd.read_csv(file_path)
55
+ st.success(f"File loaded successfully from '{file_path}'!")
56
+ st.dataframe(st.session_state.df.head(10))
57
+ except Exception as e:
58
+ st.error(f"Error loading dataset from the repo directory: {e}")
59
+ logger.error(f"Error loading dataset from repo directory: {e}")
60
+
61
+ # Option 2: Load dataset from Hugging Face
62
+ elif input_option == "Use Hugging Face Dataset":
63
+ dataset_name = st.text_input(
64
+ "Enter Hugging Face Dataset Name:", value="HUPD/hupd"
65
+ )
66
+ if st.button("Load Hugging Face Dataset"):
67
+ try:
68
+ dataset = load_dataset(dataset_name, split="train", trust_remote_code=True)
69
+ if hasattr(dataset, "to_pandas"):
70
+ st.session_state.df = dataset.to_pandas()
71
+ else:
72
+ st.session_state.df = pd.DataFrame(dataset)
73
+ st.success(f"Hugging Face Dataset '{dataset_name}' loaded successfully!")
74
+ st.dataframe(st.session_state.df.head(10))
75
+ except Exception as e:
76
+ st.error(f"Error loading Hugging Face dataset: {e}")
77
+ logger.error(f"Error loading Hugging Face dataset: {e}")
78
+
79
+ # Option 3: Upload CSV File
80
+ elif input_option == "Upload CSV File":
81
+ uploaded_file = st.file_uploader("Upload a CSV File:", type=["csv"])
82
+ if uploaded_file:
83
+ try:
84
+ st.session_state.df = pd.read_csv(uploaded_file)
85
+ st.success("File uploaded successfully!")
86
+ st.dataframe(st.session_state.df.head(10))
87
+ except Exception as e:
88
+ st.error(f"Error reading uploaded file: {e}")
89
+ logger.error(f"Error reading uploaded file: {e}")
90
+
91
+ # Ensure session state for the DataFrame
92
+ if "df" not in st.session_state:
93
+ st.session_state.df = None
94
+
95
+ # Load dataset into session
96
+ load_dataset_into_session()
97
+
98
+ # Check if a dataset is loaded
99
+ if st.session_state.df is not None:
100
+ df = st.session_state.df
101
+ try:
102
+ # Initialize OpenAI LLM
103
+ llm = OpenAI(api_token=pandasai_api_key) # PandasAI LLM
104
+
105
+ # Create SmartDataframe for PandasAI
106
+ smart_df = SmartDataframe(df, config={"llm": llm})
107
+
108
+ # Convert DataFrame to documents for RAG
109
+ documents = [
110
+ Document(
111
+ page_content=", ".join(
112
+ [f"{col}: {row[col]}" for col in df.columns if pd.notnull(row[col])]
113
+ ),
114
+ metadata={"index": index},
115
+ )
116
+ for index, row in df.iterrows()
117
+ ]
118
+
119
+ # Set up RAG
120
+ embeddings = OpenAIEmbeddings()
121
+ vectorstore = FAISS.from_documents(documents, embeddings)
122
+ retriever = vectorstore.as_retriever()
123
+ qa_chain = RetrievalQA.from_chain_type(
124
+ llm=ChatOpenAI(),
125
+ chain_type="stuff",
126
+ retriever=retriever,
127
+ )
128
+
129
+ # Create tabs
130
+ tab1, tab2, tab3 = st.tabs(
131
+ ["PandasAI Analysis", "RAG Q&A", "Data Visualization"]
132
+ )
133
+
134
+ # Tab 1: PandasAI Analysis
135
+ with tab1:
136
+ st.header("PandasAI Analysis")
137
+ pandas_question = st.text_input("Ask a question about the data (PandasAI):")
138
+ if pandas_question:
139
+ try:
140
+ result = smart_df.chat(pandas_question)
141
+ if result:
142
+ st.write("PandasAI Answer:", result)
143
+ else:
144
+ st.warning("PandasAI returned no result. Try another question.")
145
+ except Exception as e:
146
+ st.error(f"Error during PandasAI Analysis: {e}")
147
+ logger.error(f"PandasAI Analysis error: {e}")
148
+
149
+ # Tab 2: RAG Q&A
150
+ with tab2:
151
+ st.header("RAG Q&A")
152
+ rag_question = st.text_input("Ask a question about the data (RAG):")
153
+ if rag_question:
154
+ try:
155
+ result = qa_chain.run(rag_question)
156
+ st.write("RAG Answer:", result)
157
+ except Exception as e:
158
+ st.error(f"Error during RAG Q&A: {e}")
159
+ logger.error(f"RAG Q&A error: {e}")
160
+
161
+ # Tab 3: Data Visualization
162
+ with tab3:
163
+ st.header("Data Visualization")
164
+ viz_question = st.text_input(
165
+ "What kind of graph would you like to create? (e.g., 'Show a scatter plot of salary vs experience')"
166
+ )
167
+ if viz_question:
168
+ try:
169
+ result = smart_df.chat(viz_question)
170
+ import re
171
+ code_pattern = r"```python\n(.*?)\n```"
172
+ code_match = re.search(code_pattern, result, re.DOTALL)
173
+
174
+ if code_match:
175
+ viz_code = code_match.group(1)
176
+ viz_code = viz_code.replace("plt.", "px.")
177
+ exec(viz_code)
178
+ st.plotly_chart(fig)
179
+ else:
180
+ st.warning("Could not generate a graph. Try a different query.")
181
+ except Exception as e:
182
+ st.error(f"Error during Data Visualization: {e}")
183
+ except Exception as e:
184
+ st.error(f"An error occurred during processing: {e}")
185
+ else:
186
+ st.info("Please load a dataset to start analysis.")