Spaces:

DrishtiSharma
/

test-07

Sleeping

App Files Files Community

DrishtiSharma commited on 18 days ago

Commit

3dc0491

verified ·

1 Parent(s): 0625cfa

Create llm_not_gen.py

Browse files

Files changed (1) hide show

mylab/llm_not_gen.py +179 -0

mylab/llm_not_gen.py ADDED Viewed

	@@ -0,0 +1,179 @@

+import streamlit as st
+import pandas as pd
+import plotly.express as px
+from datasets import load_dataset
+from pandasai import Agent
+from langchain_community.embeddings.openai import OpenAIEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain_openai import ChatOpenAI
+from langchain.chains import RetrievalQA
+from langchain.schema import Document
+import os
+import logging
+# Configure logging
+logging.basicConfig(level=logging.DEBUG)
+logger = logging.getLogger(__name__)
+# Fetch API keys from environment variables
+api_key = os.getenv("OPENAI_API_KEY")
+pandasai_api_key = os.getenv("PANDASAI_API_KEY")
+# Check for missing keys and raise specific errors
+missing_keys = []
+if not api_key:
+    missing_keys.append("OPENAI_API_KEY")
+if not pandasai_api_key:
+    missing_keys.append("PANDASAI_API_KEY")
+if missing_keys:
+    missing_keys_str = ", ".join(missing_keys)
+    raise EnvironmentError(
+        f"The following API key(s) are missing: {missing_keys_str}. Please set them in the environment."
+    )
+# Title of the app
+st.title("Data Analyzer")
+# Function to load datasets into session
+def load_dataset_into_session():
+    input_option = st.radio(
+        "Select Dataset Input:",
+        ["Use Repo Directory Dataset", "Use Hugging Face Dataset", "Upload CSV File"],
+    )
+    # Option 1: Load dataset from the repo directory
+    if input_option == "Use Repo Directory Dataset":
+        file_path = "./source/test.csv"
+        if st.button("Load Repo Dataset"):
+            try:
+                st.session_state.df = pd.read_csv(file_path)
+                st.success(f"File loaded successfully from '{file_path}'!")
+                st.dataframe(st.session_state.df.head(10))
+            except Exception as e:
+                st.error(f"Error loading dataset from the repo directory: {e}")
+                logger.error(f"Error loading dataset from repo directory: {e}")
+    # Option 2: Load dataset from Hugging Face
+    elif input_option == "Use Hugging Face Dataset":
+        dataset_name = st.text_input(
+            "Enter Hugging Face Dataset Name:", value="HUPD/hupd"
+        )
+        if st.button("Load Hugging Face Dataset"):
+            try:
+                dataset = load_dataset(dataset_name, split="train", trust_remote_code=True)
+                # Convert Hugging Face dataset to Pandas DataFrame
+                if hasattr(dataset, "to_pandas"):
+                    st.session_state.df = dataset.to_pandas()
+                else:
+                    st.session_state.df = pd.DataFrame(dataset)
+                st.success(f"Hugging Face Dataset '{dataset_name}' loaded successfully!")
+                st.dataframe(st.session_state.df.head(10))
+            except Exception as e:
+                st.error(f"Error loading Hugging Face dataset: {e}")
+                logger.error(f"Error loading Hugging Face dataset: {e}")
+    # Option 3: Upload CSV File
+    elif input_option == "Upload CSV File":
+        uploaded_file = st.file_uploader("Upload a CSV File:", type=["csv"])
+        if uploaded_file:
+            try:
+                st.session_state.df = pd.read_csv(uploaded_file)
+                st.success("File uploaded successfully!")
+                st.dataframe(st.session_state.df.head(10))
+            except Exception as e:
+                st.error(f"Error reading uploaded file: {e}")
+                logger.error(f"Error reading uploaded file: {e}")
+# Ensure session state for the DataFrame
+if "df" not in st.session_state:
+    st.session_state.df = None
+# Load dataset into session
+load_dataset_into_session()
+# Check if a dataset is loaded
+if st.session_state.df is not None:
+    df = st.session_state.df
+    try:
+        # Initialize PandasAI Agent
+        agent = Agent(df)
+        # Convert DataFrame to documents for RAG
+        documents = [
+            Document(
+                page_content=", ".join(
+                    [f"{col}: {row[col]}" for col in df.columns if pd.notnull(row[col])]
+                ),
+                metadata={"index": index},
+            )
+            for index, row in df.iterrows()
+        ]
+        # Set up RAG
+        embeddings = OpenAIEmbeddings()
+        vectorstore = FAISS.from_documents(documents, embeddings)
+        retriever = vectorstore.as_retriever()
+        qa_chain = RetrievalQA.from_chain_type(
+            llm=ChatOpenAI(),
+            chain_type="stuff",
+            retriever=retriever,
+        )
+        # Create tabs
+        tab1, tab2, tab3 = st.tabs(
+            ["PandasAI Analysis", "RAG Q&A", "Data Visualization"]
+        )
+        # Tab 1: PandasAI Analysis
+        with tab1:
+            st.header("PandasAI Analysis")
+            pandas_question = st.text_input("Ask a question about the data (PandasAI):")
+            if pandas_question:
+                try:
+                    result = agent.chat(pandas_question)
+                    st.write("PandasAI Answer:", result)
+                except Exception as e:
+                    st.error(f"Error during PandasAI Analysis: {e}")
+        # Tab 2: RAG Q&A
+        with tab2:
+            st.header("RAG Q&A")
+            rag_question = st.text_input("Ask a question about the data (RAG):")
+            if rag_question:
+                try:
+                    result = qa_chain.run(rag_question)
+                    st.write("RAG Answer:", result)
+                except Exception as e:
+                    st.error(f"Error during RAG Q&A: {e}")
+        # Tab 3: Data Visualization
+        with tab3:
+            st.header("Data Visualization")
+            viz_question = st.text_input(
+                "What kind of graph would you like to create? (e.g., 'Show a scatter plot of salary vs experience')"
+            )
+            if viz_question:
+                try:
+                    result = agent.chat(viz_question)
+                    # Extract Python code for visualization
+                    import re
+                    code_pattern = r"```python\n(.*?)\n```"
+                    code_match = re.search(code_pattern, result, re.DOTALL)
+                    if code_match:
+                        viz_code = code_match.group(1)
+                        # Replace matplotlib (plt) code with Plotly (px)
+                        viz_code = viz_code.replace("plt.", "px.")
+                        exec(viz_code)  # Execute the visualization code
+                        st.plotly_chart(fig)
+                    else:
+                        st.warning("Could not generate a graph. Try a different query.")
+                except Exception as e:
+                    st.error(f"Error during Data Visualization: {e}")
+    except Exception as e:
+        st.error(f"An error occurred during processing: {e}")
+else:
+    st.info("Please load a dataset to start analysis.")