Spaces:

arithescientist
/

GenBIChatbot

Running

App Files Files Community

arithescientist commited on Oct 4, 2024

Commit

6dd2b20

•

1 Parent(s): 9bff135

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -144

app.py CHANGED Viewed

@@ -3,13 +3,13 @@ import streamlit as st
 import pandas as pd
 import sqlite3
 import logging
-import json
 from langchain.agents.agent_toolkits import SQLDatabaseToolkit
 from langchain.sql_database import SQLDatabase
-from langchain.prompts import PromptTemplate
-from langchain.chains import LLMChain
-# Import ChatOpenAI from langchain_community
-from langchain_community.chat_models import ChatOpenAI
 # Initialize logging
 logging.basicConfig(level=logging.INFO)
@@ -20,8 +20,6 @@ if 'history' not in st.session_state:
 # OpenAI API key
 openai_api_key = os.getenv("OPENAI_API_KEY")
-# Alternatively, you can set your API key directly
-# openai_api_key = "YOUR_OPENAI_API_KEY"
 # Check if the API key is set
 if not openai_api_key:
@@ -54,122 +52,18 @@ engine = SQLDatabase.from_uri(f"sqlite:///{db_file}", include_tables=[table_name
 # Initialize the LLM
 llm = ChatOpenAI(temperature=0, openai_api_key=openai_api_key)
-# Step 3: Create the agent toolkit (not used directly in the layered approach but kept for completeness)
 toolkit = SQLDatabaseToolkit(db=engine, llm=llm)
-# Step 4: Define the layered functions
-# Layer 1: Understanding the Question
-def parse_user_question(question):
-    parsing_prompt = f"""
-    You are an assistant that extracts key information from user questions for SQL query generation.
-    Given the following question, identify the relevant columns, tables, and any conditions or filters needed.
-    Question: "{question}"
-    Provide your answer in the following JSON format:
-    {{
-        "columns": [list of columns or aggregation functions],
-        "table": "table_name",
-        "conditions": "SQL WHERE clause conditions",
-        "aggregation": "any aggregation functions needed",
-        "group_by": [list of columns to group by],
-        "order_by": "column to order by and direction (e.g., 'Total_Sales DESC')",
-        "limit": "number of records to return"
-    }}
-    Answer:
-    """
-    # Use llm.predict instead of llm()
-    response = llm.predict(parsing_prompt)
-    try:
-        parsed_query = json.loads(response)
-        return parsed_query
-    except json.JSONDecodeError as e:
-        logging.error(f"JSON decoding error: {e}")
-        return None
-# Layer 2: Generating the SQL Query
-def construct_sql_query(parsed_info):
-    if not parsed_info:
-        return None
-    columns = ', '.join(parsed_info.get('columns', ['*']))
-    table = parsed_info.get('table', table_name)
-    conditions = parsed_info.get('conditions', '')
-    group_by = parsed_info.get('group_by', [])
-    order_by = parsed_info.get('order_by', '')
-    limit = parsed_info.get('limit', '')
-    sql_query = f"SELECT {columns} FROM {table}"
-    if conditions:
-        sql_query += f" WHERE {conditions}"
-    if group_by:
-        sql_query += f" GROUP BY {', '.join(group_by)}"
-    if order_by:
-        sql_query += f" ORDER BY {order_by}"
-    if limit:
-        sql_query += f" LIMIT {limit}"
-    return sql_query
-# Layer 3: Executing the Query and Retrieving Data
-def execute_sql_query(sql_query):
-    try:
-        result = pd.read_sql_query(sql_query, conn)
-        return result
-    except Exception as e:
-        logging.error(f"SQL execution error: {e}")
-        return None
-# Layer 4: Formatting and Presenting the Results
-def display_results(result):
-    if result is not None and not result.empty:
-        st.session_state.history.append({"role": "assistant", "content": "Here are the results:"})
-        st.session_state.history.append({"role": "assistant", "content": result.head(10)})
-    else:
-        assistant_response = "The query returned no results. Please try a different question."
-        st.session_state.history.append({"role": "assistant", "content": assistant_response})
-# Layer 5: Generating Insights or Additional Analysis (Optional)
-def generate_insights(question, result):
-    insights_template = """
-    You are an expert data analyst. Based on the user's question and the SQL query result provided below, generate a concise analysis that includes key data insights and actionable recommendations. Limit the response to a maximum of 150 words.
-    User's Question: {question}
-    SQL Query Result:
-    {result}
-    Concise Analysis:
-    """
-    insights_prompt = PromptTemplate(template=insights_template, input_variables=['question', 'result'])
-    insights_chain = LLMChain(llm=llm, prompt=insights_prompt)
-    result_str = result.to_string(index=False)
-    insights = insights_chain.run({'question': question, 'result': result_str})
-    st.session_state.history.append({"role": "assistant", "content": insights})
-# Function to Generate Data Summary (for non-SQL responses)
-def generate_data_summary():
-    summary_prompt = f"""
-    You are an assistant that provides a summary of the dataset.
-    Dataset Description:
-    {data.describe(include='all').to_string()}
-    Provide a concise summary of the dataset, highlighting key statistics and any notable observations.
-    """
-    # Use llm.predict instead of llm()
-    summary = llm.predict(summary_prompt)
-    return summary
-# Step 5: Define the callback function
 def process_input():
     user_prompt = st.session_state['user_input']
@@ -178,30 +72,58 @@ def process_input():
             # Append user message to history
             st.session_state.history.append({"role": "user", "content": user_prompt})
             with st.spinner("Processing..."):
-                # Layer 1: Understand the question
-                parsed_query = parse_user_question(user_prompt)
-                logging.info(f"Parsed Query: {parsed_query}")
-                if parsed_query and parsed_query.get('columns'):
-                    # Layer 2: Generate the SQL query
-                    sql_query = construct_sql_query(parsed_query)
-                    logging.info(f"Constructed SQL Query: {sql_query}")
-                    # Layer 3: Execute the SQL query and get the result
-                    result = execute_sql_query(sql_query)
-                    # Layer 4: Display the results
-                    display_results(result)
-                    # Layer 5: Generate insights (optional)
-                    if result is not None and not result.empty:
-                        generate_insights(user_prompt, result.head(10))
                 else:
-                    # If no columns are identified, provide a summary
-                    summary = generate_data_summary()
-                    st.session_state.history.append({"role": "assistant", "content": summary})
         except Exception as e:
             logging.error(f"An error occurred: {e}")
             assistant_response = f"Error: {e}"
@@ -210,7 +132,7 @@ def process_input():
         # Reset user input
         st.session_state['user_input'] = ''
-# Step 6: Display conversation history
 for message in st.session_state.history:
     if message['role'] == 'user':
         st.markdown(f"**User:** {message['content']}")

 import pandas as pd
 import sqlite3
 import logging
+from langchain.agents import create_sql_agent
 from langchain.agents.agent_toolkits import SQLDatabaseToolkit
+from langchain.agents.agent_types import AgentType
+from langchain.llms import OpenAI
 from langchain.sql_database import SQLDatabase
+from langchain.chat_models import ChatOpenAI
+from langchain.evaluation import load_evaluator
 # Initialize logging
 logging.basicConfig(level=logging.INFO)
 # OpenAI API key
 openai_api_key = os.getenv("OPENAI_API_KEY")
 # Check if the API key is set
 if not openai_api_key:
 # Initialize the LLM
 llm = ChatOpenAI(temperature=0, openai_api_key=openai_api_key)
+# Step 3: Create the agent
 toolkit = SQLDatabaseToolkit(db=engine, llm=llm)
+sql_agent = create_sql_agent(
+    llm=llm,
+    toolkit=toolkit,
+    verbose=True,
+    agent_type=AgentType.OPENAI_FUNCTIONS,
+    max_iterations=5
+)
+# Step 4: Define the callback function
 def process_input():
     user_prompt = st.session_state['user_input']
             # Append user message to history
             st.session_state.history.append({"role": "user", "content": user_prompt})
+            # Use the agent to generate the SQL query and get the response
             with st.spinner("Processing..."):
+                response = sql_agent.run(user_prompt)
+            # Check if the response contains a SQL query
+            if "```sql" in response:
+                # Extract the SQL query
+                start_index = response.find("```sql") + len("```sql")
+                end_index = response.find("```", start_index)
+                sql_query = response[start_index:end_index].strip()
+            else:
+                # If no SQL code is found, assume the entire response is the SQL query
+                sql_query = response.strip()
+            logging.info(f"Generated SQL Query: {sql_query}")
+            # Attempt to execute SQL query and handle exceptions
+            try:
+                result = pd.read_sql_query(sql_query, conn)
+                if result.empty:
+                    assistant_response = "The query returned no results. Please try a different question."
+                    st.session_state.history.append({"role": "assistant", "content": assistant_response})
                 else:
+                    # Limit the result to first 10 rows for display
+                    result_display = result.head(10)
+                    st.session_state.history.append({"role": "assistant", "content": "Here are the results:"})
+                    st.session_state.history.append({"role": "assistant", "content": result_display})
+                    # Generate insights based on the query result
+                    insights_template = """
+                    You are an expert data analyst. Based on the user's question and the SQL query result provided below, generate a concise analysis that includes key data insights and actionable recommendations. Limit the response to a maximum of 150 words.
+                    User's Question: {question}
+                    SQL Query Result:
+                    {result}
+                    Concise Analysis:
+                    """
+                    insights_prompt = PromptTemplate(template=insights_template, input_variables=['question', 'result'])
+                    insights_chain = LLMChain(llm=llm, prompt=insights_prompt)
+                    result_str = result_display.to_string(index=False)
+                    insights = insights_chain.run({'question': user_prompt, 'result': result_str})
+                    # Append the assistant's insights to the history
+                    st.session_state.history.append({"role": "assistant", "content": insights})
+            except Exception as e:
+                logging.error(f"An error occurred during SQL execution: {e}")
+                assistant_response = f"Error executing SQL query: {e}"
+                st.session_state.history.append({"role": "assistant", "content": assistant_response})
         except Exception as e:
             logging.error(f"An error occurred: {e}")
             assistant_response = f"Error: {e}"
         # Reset user input
         st.session_state['user_input'] = ''
+# Step 5: Display conversation history
 for message in st.session_state.history:
     if message['role'] == 'user':
         st.markdown(f"**User:** {message['content']}")