Spaces:

spedrox-sac
/

AI_SQL_query

Sleeping

File size: 6,568 Bytes

6d802e9

import streamlit as st
from huggingface_hub import InferenceClient
from langchain_core.output_parsers import StrOutputParser
import os
from dotenv import load_dotenv
import pandas as pd
import sqlite3
import re

load_dotenv()
token = os.getenv('HUGGINGFACEHUB_API_TOKEN')
api = InferenceClient(token=token)
parser = StrOutputParser()

# Streamlit app
st.title("AiSQL: AI-Powered SQL Query Generator")

# File uploader for CSV
uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])

if uploaded_file:
    # Read CSV into DataFrame
    df = pd.read_csv(uploaded_file)
    st.write("Uploaded Data:")
    st.dataframe(df)
    
    # Normalize column names: replace spaces and special characters with underscores
    df.columns = [re.sub(r'\W+', '_', col.strip()) for col in df.columns]
    
    st.write("Normalized Columns in the CSV:")
    st.write(df.columns.tolist())
    
    # Create SQLite in-memory database
    conn = sqlite3.connect(':memory:')
    df.to_sql('data', conn, index=False, if_exists='replace')
    
    # Natural language query input
    nl_query = st.text_area("Enter your query in natural language or in code:")
    
    if st.button("Run Query/Code"):
        try:
            # Generate SQL query using LLM
            system_message = (
                "You are an AI assistant that converts natural language queries into SQL queries based on the following table schema.\n"
                f"Table name: data\n"
                f"Columns: {', '.join(df.columns.tolist())}\n"
                "Provide only the SQL query suggestion in code blocks without any explanations, comments, or other text."
            )
            messages = [
                {"role": "system", "content": system_message},
                {"role": "user", "content": nl_query}
            ]
            llm = api.chat.completions.create(
                model="Qwen/Qwen2.5-Coder-32B-Instruct",
                max_tokens=150,
                messages=messages
            )
            raw_response = llm.choices[0].message['content'].strip()
            
            # Remove code blocks if present
            sql_query = re.sub(r'```sql\n?|\n?```', '', raw_response).strip()
            
            # Additional cleaning: Extract the first SQL statement
            match = re.search(r'\b(SELECT|INSERT|UPDATE|DELETE|CREATE|DROP|ALTER)\b[\s\S]*?;', sql_query, re.IGNORECASE)
            if match:
                sql_query = match.group(0)
            else:
                st.error("Failed to extract a valid SQL query from the response.")
                st.write("**Raw LLM Response:**")
                st.write(raw_response)
                st.stop()
            
            # Validate that the SQL query starts with a valid keyword
            valid_sql_keywords = ['SELECT', 'INSERT', 'UPDATE', 'DELETE', 'CREATE', 'DROP', 'ALTER']
            if not any(sql_query.upper().startswith(keyword) for keyword in valid_sql_keywords):
                st.error("The generated SQL query does not start with a valid SQL command.")
                st.write("**Extracted SQL Query:**")
                st.write(sql_query)
                st.stop()
            
            st.markdown(f"**Generated SQL Query:** `{sql_query}`")
            
            # Execute SQL query
            result = pd.read_sql_query(sql_query, conn)
            st.write("Query Results:")
            st.dataframe(result)
        except Exception as e:
            st.error(f"Error: {e}")
    
    # Generate query suggestions using LLM
    if st.button("Show Query Suggestions"):
        try:
            system_message = (
                "You are an AI assistant that provides SQL query suggestions based on the following table schema.\n"
                f"Table name: data\n"
                f"Columns: {', '.join(df.columns.tolist())}\n"
                "Provide exactly 5 example SQL queries separated by semicolons without any explanations, comments, or code blocks."
            )
            suggestion_messages = [
                {"role": "system", "content": system_message},
                {"role": "user", "content": "Provide SQL query suggestions."}
            ]
            suggestions_llm = api.chat.completions.create(
                model="Qwen/Qwen2.5-Coder-32B-Instruct",
                max_tokens=300,
                messages=suggestion_messages
            )
            raw_suggestions = suggestions_llm.choices[0].message['content']
            # Remove code blocks if present
            suggestions = re.sub(r'```sql\n?|\n?```', '', raw_suggestions).strip()
            
            # Split multiple queries separated by semicolons
            suggestions_list = [query.strip() for query in suggestions.split(';') if query.strip()]
            
            # Validate each suggestion starts with a valid SQL keyword
            valid_sql_keywords = ['SELECT', 'INSERT', 'UPDATE', 'DELETE', 'CREATE', 'DROP', 'ALTER']
            valid_suggestions = []
            for query in suggestions_list:
                if any(query.upper().startswith(keyword) for keyword in valid_sql_keywords):
                    valid_suggestions.append(query + ';')
            
            st.session_state['valid_suggestions'] = valid_suggestions
            
            if valid_suggestions:
                formatted_suggestions = ';\n'.join(valid_suggestions)
                st.write("**Query Suggestions:**")
                st.code(formatted_suggestions, language='sql')
                
                # Optionally, allow users to select a suggestion to execute
                if 'valid_suggestions' in st.session_state:
                    selected_query = st.selectbox("Select a query to execute:", st.session_state['valid_suggestions'])
                    if st.button("Execute Selected Query"):
                        # Execute the selected query
                        try:
                            st.write(f"**Executing SQL Query:** `{selected_query}`")
                            result = pd.read_sql_query(selected_query, conn)
                            st.write("Query Results:")
                            st.dataframe(result)
                        except Exception as e:
                            st.error(f"Error executing selected query: {e}")
            else:
                st.error("No valid SQL query suggestions were generated.")
                st.write("**Raw Suggestions Response:**")
                st.write(suggestions)
        except Exception as e:
            st.error(f"Error generating suggestions: {e}")