Spaces:
Sleeping
Sleeping
File size: 6,568 Bytes
6d802e9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import streamlit as st
from huggingface_hub import InferenceClient
from langchain_core.output_parsers import StrOutputParser
import os
from dotenv import load_dotenv
import pandas as pd
import sqlite3
import re
load_dotenv()
token = os.getenv('HUGGINGFACEHUB_API_TOKEN')
api = InferenceClient(token=token)
parser = StrOutputParser()
# Streamlit app
st.title("AiSQL: AI-Powered SQL Query Generator")
# File uploader for CSV
uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])
if uploaded_file:
# Read CSV into DataFrame
df = pd.read_csv(uploaded_file)
st.write("Uploaded Data:")
st.dataframe(df)
# Normalize column names: replace spaces and special characters with underscores
df.columns = [re.sub(r'\W+', '_', col.strip()) for col in df.columns]
st.write("Normalized Columns in the CSV:")
st.write(df.columns.tolist())
# Create SQLite in-memory database
conn = sqlite3.connect(':memory:')
df.to_sql('data', conn, index=False, if_exists='replace')
# Natural language query input
nl_query = st.text_area("Enter your query in natural language or in code:")
if st.button("Run Query/Code"):
try:
# Generate SQL query using LLM
system_message = (
"You are an AI assistant that converts natural language queries into SQL queries based on the following table schema.\n"
f"Table name: data\n"
f"Columns: {', '.join(df.columns.tolist())}\n"
"Provide only the SQL query suggestion in code blocks without any explanations, comments, or other text."
)
messages = [
{"role": "system", "content": system_message},
{"role": "user", "content": nl_query}
]
llm = api.chat.completions.create(
model="Qwen/Qwen2.5-Coder-32B-Instruct",
max_tokens=150,
messages=messages
)
raw_response = llm.choices[0].message['content'].strip()
# Remove code blocks if present
sql_query = re.sub(r'```sql\n?|\n?```', '', raw_response).strip()
# Additional cleaning: Extract the first SQL statement
match = re.search(r'\b(SELECT|INSERT|UPDATE|DELETE|CREATE|DROP|ALTER)\b[\s\S]*?;', sql_query, re.IGNORECASE)
if match:
sql_query = match.group(0)
else:
st.error("Failed to extract a valid SQL query from the response.")
st.write("**Raw LLM Response:**")
st.write(raw_response)
st.stop()
# Validate that the SQL query starts with a valid keyword
valid_sql_keywords = ['SELECT', 'INSERT', 'UPDATE', 'DELETE', 'CREATE', 'DROP', 'ALTER']
if not any(sql_query.upper().startswith(keyword) for keyword in valid_sql_keywords):
st.error("The generated SQL query does not start with a valid SQL command.")
st.write("**Extracted SQL Query:**")
st.write(sql_query)
st.stop()
st.markdown(f"**Generated SQL Query:** `{sql_query}`")
# Execute SQL query
result = pd.read_sql_query(sql_query, conn)
st.write("Query Results:")
st.dataframe(result)
except Exception as e:
st.error(f"Error: {e}")
# Generate query suggestions using LLM
if st.button("Show Query Suggestions"):
try:
system_message = (
"You are an AI assistant that provides SQL query suggestions based on the following table schema.\n"
f"Table name: data\n"
f"Columns: {', '.join(df.columns.tolist())}\n"
"Provide exactly 5 example SQL queries separated by semicolons without any explanations, comments, or code blocks."
)
suggestion_messages = [
{"role": "system", "content": system_message},
{"role": "user", "content": "Provide SQL query suggestions."}
]
suggestions_llm = api.chat.completions.create(
model="Qwen/Qwen2.5-Coder-32B-Instruct",
max_tokens=300,
messages=suggestion_messages
)
raw_suggestions = suggestions_llm.choices[0].message['content']
# Remove code blocks if present
suggestions = re.sub(r'```sql\n?|\n?```', '', raw_suggestions).strip()
# Split multiple queries separated by semicolons
suggestions_list = [query.strip() for query in suggestions.split(';') if query.strip()]
# Validate each suggestion starts with a valid SQL keyword
valid_sql_keywords = ['SELECT', 'INSERT', 'UPDATE', 'DELETE', 'CREATE', 'DROP', 'ALTER']
valid_suggestions = []
for query in suggestions_list:
if any(query.upper().startswith(keyword) for keyword in valid_sql_keywords):
valid_suggestions.append(query + ';')
st.session_state['valid_suggestions'] = valid_suggestions
if valid_suggestions:
formatted_suggestions = ';\n'.join(valid_suggestions)
st.write("**Query Suggestions:**")
st.code(formatted_suggestions, language='sql')
# Optionally, allow users to select a suggestion to execute
if 'valid_suggestions' in st.session_state:
selected_query = st.selectbox("Select a query to execute:", st.session_state['valid_suggestions'])
if st.button("Execute Selected Query"):
# Execute the selected query
try:
st.write(f"**Executing SQL Query:** `{selected_query}`")
result = pd.read_sql_query(selected_query, conn)
st.write("Query Results:")
st.dataframe(result)
except Exception as e:
st.error(f"Error executing selected query: {e}")
else:
st.error("No valid SQL query suggestions were generated.")
st.write("**Raw Suggestions Response:**")
st.write(suggestions)
except Exception as e:
st.error(f"Error generating suggestions: {e}") |