Spaces:

spedrox-sac
/

AI_SQL_query

Sleeping

App Files Files Community

AI_SQL_query / app.py

spedrox-sac

Create app.py

6d802e9 verified about 1 month ago

raw

history blame

6.57 kB

	import streamlit as st
	from huggingface_hub import InferenceClient
	from langchain_core.output_parsers import StrOutputParser
	import os
	from dotenv import load_dotenv
	import pandas as pd
	import sqlite3
	import re

	load_dotenv()
	token = os.getenv('HUGGINGFACEHUB_API_TOKEN')
	api = InferenceClient(token=token)
	parser = StrOutputParser()

	# Streamlit app
	st.title("AiSQL: AI-Powered SQL Query Generator")

	# File uploader for CSV
	uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])

	if uploaded_file:
	# Read CSV into DataFrame
	df = pd.read_csv(uploaded_file)
	st.write("Uploaded Data:")
	st.dataframe(df)

	# Normalize column names: replace spaces and special characters with underscores
	df.columns = [re.sub(r'\W+', '_', col.strip()) for col in df.columns]

	st.write("Normalized Columns in the CSV:")
	st.write(df.columns.tolist())

	# Create SQLite in-memory database
	conn = sqlite3.connect(':memory:')
	df.to_sql('data', conn, index=False, if_exists='replace')

	# Natural language query input
	nl_query = st.text_area("Enter your query in natural language or in code:")

	if st.button("Run Query/Code"):
	try:
	# Generate SQL query using LLM
	system_message = (
	"You are an AI assistant that converts natural language queries into SQL queries based on the following table schema.\n"
	f"Table name: data\n"
	f"Columns: {', '.join(df.columns.tolist())}\n"
	"Provide only the SQL query suggestion in code blocks without any explanations, comments, or other text."
	)
	messages = [
	{"role": "system", "content": system_message},
	{"role": "user", "content": nl_query}
	]
	llm = api.chat.completions.create(
	model="Qwen/Qwen2.5-Coder-32B-Instruct",
	max_tokens=150,
	messages=messages
	)
	raw_response = llm.choices[0].message['content'].strip()

	# Remove code blocks if present
	sql_query = re.sub(r'```sql\n?\|\n?```', '', raw_response).strip()

	# Additional cleaning: Extract the first SQL statement
	match = re.search(r'\b(SELECT\|INSERT\|UPDATE\|DELETE\|CREATE\|DROP\|ALTER)\b[\s\S]*?;', sql_query, re.IGNORECASE)
	if match:
	sql_query = match.group(0)
	else:
	st.error("Failed to extract a valid SQL query from the response.")
	st.write("Raw LLM Response:")
	st.write(raw_response)
	st.stop()

	# Validate that the SQL query starts with a valid keyword
	valid_sql_keywords = ['SELECT', 'INSERT', 'UPDATE', 'DELETE', 'CREATE', 'DROP', 'ALTER']
	if not any(sql_query.upper().startswith(keyword) for keyword in valid_sql_keywords):
	st.error("The generated SQL query does not start with a valid SQL command.")
	st.write("Extracted SQL Query:")
	st.write(sql_query)
	st.stop()

	st.markdown(f"Generated SQL Query: `{sql_query}`")

	# Execute SQL query
	result = pd.read_sql_query(sql_query, conn)
	st.write("Query Results:")
	st.dataframe(result)
	except Exception as e:
	st.error(f"Error: {e}")

	# Generate query suggestions using LLM
	if st.button("Show Query Suggestions"):
	try:
	system_message = (
	"You are an AI assistant that provides SQL query suggestions based on the following table schema.\n"
	f"Table name: data\n"
	f"Columns: {', '.join(df.columns.tolist())}\n"
	"Provide exactly 5 example SQL queries separated by semicolons without any explanations, comments, or code blocks."
	)
	suggestion_messages = [
	{"role": "system", "content": system_message},
	{"role": "user", "content": "Provide SQL query suggestions."}
	]
	suggestions_llm = api.chat.completions.create(
	model="Qwen/Qwen2.5-Coder-32B-Instruct",
	max_tokens=300,
	messages=suggestion_messages
	)
	raw_suggestions = suggestions_llm.choices[0].message['content']
	# Remove code blocks if present
	suggestions = re.sub(r'```sql\n?\|\n?```', '', raw_suggestions).strip()

	# Split multiple queries separated by semicolons
	suggestions_list = [query.strip() for query in suggestions.split(';') if query.strip()]

	# Validate each suggestion starts with a valid SQL keyword
	valid_sql_keywords = ['SELECT', 'INSERT', 'UPDATE', 'DELETE', 'CREATE', 'DROP', 'ALTER']
	valid_suggestions = []
	for query in suggestions_list:
	if any(query.upper().startswith(keyword) for keyword in valid_sql_keywords):
	valid_suggestions.append(query + ';')

	st.session_state['valid_suggestions'] = valid_suggestions

	if valid_suggestions:
	formatted_suggestions = ';\n'.join(valid_suggestions)
	st.write("Query Suggestions:")
	st.code(formatted_suggestions, language='sql')

	# Optionally, allow users to select a suggestion to execute
	if 'valid_suggestions' in st.session_state:
	selected_query = st.selectbox("Select a query to execute:", st.session_state['valid_suggestions'])
	if st.button("Execute Selected Query"):
	# Execute the selected query
	try:
	st.write(f"Executing SQL Query: `{selected_query}`")
	result = pd.read_sql_query(selected_query, conn)
	st.write("Query Results:")
	st.dataframe(result)
	except Exception as e:
	st.error(f"Error executing selected query: {e}")
	else:
	st.error("No valid SQL query suggestions were generated.")
	st.write("Raw Suggestions Response:")
	st.write(suggestions)
	except Exception as e:
	st.error(f"Error generating suggestions: {e}")