Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -30,7 +30,7 @@ else:
|
|
30 |
|
31 |
# Step 2: Load CSV data into a persistent SQLite database
|
32 |
db_file = 'my_database.db'
|
33 |
-
conn = sqlite3.connect(db_file)
|
34 |
data.to_sql(table_name, conn, index=False, if_exists='replace')
|
35 |
|
36 |
# SQL table metadata (for validation and schema)
|
@@ -40,80 +40,45 @@ st.write(f"Valid columns: {valid_columns}")
|
|
40 |
# Function to generate SQL query using Hugging Face model
|
41 |
def generate_sql_query(question, table_name, columns):
|
42 |
prompt = f"""
|
43 |
-
|
44 |
-
|
45 |
-
- You only use the columns provided.
|
46 |
-
- When performing string comparisons in the WHERE clause, make them case-insensitive by using 'COLLATE NOCASE' or the LOWER() function.
|
47 |
-
- Do not use 'COLLATE NOCASE' in ORDER BY clauses unless sorting a string column.
|
48 |
-
- Do not apply 'COLLATE NOCASE' to numeric columns.
|
49 |
-
If the question is vague or open-ended and does not pertain to specific data retrieval, respond with "NO_SQL" to indicate that a SQL query should not be generated.
|
50 |
Question: {question}
|
51 |
-
Table name: {table_name}
|
52 |
-
Valid columns: {columns}
|
53 |
-
SQL Query:
|
54 |
"""
|
55 |
-
response = llm(prompt, max_new_tokens=
|
56 |
return response[0]['generated_text'].strip()
|
57 |
|
58 |
-
|
59 |
# Function to generate insights using Hugging Face model
|
60 |
def generate_insights(question, result):
|
61 |
prompt = f"""
|
62 |
-
|
63 |
-
User's Question: {question}
|
64 |
-
SQL Query Result:
|
65 |
{result}
|
66 |
-
Concise Analysis (max 200 words):
|
67 |
"""
|
68 |
-
response = llm(prompt, max_new_tokens=100)
|
69 |
return response[0]['generated_text'].strip()
|
70 |
|
71 |
-
|
72 |
# Function to classify user query as SQL or Insights
|
73 |
def classify_query(question):
|
74 |
prompt = f"""
|
75 |
-
|
76 |
-
|
77 |
-
Question: "{question}"
|
78 |
-
Category (SQL/INSIGHTS):
|
79 |
"""
|
80 |
-
response = llm(prompt,
|
81 |
category = response[0]['generated_text'].strip().upper()
|
82 |
return 'SQL' if 'SQL' in category else 'INSIGHTS'
|
83 |
|
84 |
# Function to generate dataset summary
|
85 |
def generate_dataset_summary(data):
|
86 |
summary_template = f"""
|
87 |
-
|
88 |
-
Dataset:
|
89 |
{data.head().to_string(index=False)}
|
90 |
-
Dataset Summary:
|
91 |
"""
|
92 |
-
response = llm(summary_template, max_new_tokens=100)
|
93 |
return response[0]['generated_text'].strip()
|
94 |
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
parsed = sqlparse.parse(query)
|
100 |
-
statements = []
|
101 |
-
for stmt in parsed:
|
102 |
-
tokens = []
|
103 |
-
idx = 0
|
104 |
-
while idx < len(stmt.tokens):
|
105 |
-
token = stmt.tokens[idx]
|
106 |
-
if (token.ttype is sqlparse.tokens.Keyword and token.value.upper() == 'COLLATE'):
|
107 |
-
# Check if the next token is 'NOCASE'
|
108 |
-
next_token = stmt.tokens[idx + 2] if idx + 2 < len(stmt.tokens) else None
|
109 |
-
if next_token and next_token.value.upper() == 'NOCASE':
|
110 |
-
# Skip 'COLLATE' and 'NOCASE' tokens
|
111 |
-
idx += 3 # Skip 'COLLATE', whitespace, 'NOCASE'
|
112 |
-
continue
|
113 |
-
tokens.append(token)
|
114 |
-
idx += 1
|
115 |
-
statements.append(''.join([str(t) for t in tokens]))
|
116 |
-
return ' '.join(statements)
|
117 |
|
118 |
# Define the callback function
|
119 |
def process_input():
|
@@ -148,32 +113,32 @@ def process_input():
|
|
148 |
# Append the assistant's insights to the history
|
149 |
st.session_state.history.append({"role": "assistant", "content": general_insights})
|
150 |
else:
|
151 |
-
#
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
st.session_state.history.append({"role": "assistant", "content": assistant_response})
|
162 |
-
|
163 |
-
|
164 |
-
result_str = result.head(10).to_string(index=False) # Limit to first 10 rows
|
165 |
-
|
166 |
-
# Generate insights and recommendations based on the query result
|
167 |
-
insights = generate_insights(user_prompt, result_str)
|
168 |
-
|
169 |
-
# Append the assistant's insights to the history
|
170 |
-
st.session_state.history.append({"role": "assistant", "content": insights})
|
171 |
-
# Append the result DataFrame to the history
|
172 |
-
st.session_state.history.append({"role": "assistant", "content": result})
|
173 |
-
except Exception as e:
|
174 |
-
logging.error(f"An error occurred during SQL execution: {e}")
|
175 |
-
assistant_response = f"Error executing SQL query: {e}"
|
176 |
-
st.session_state.history.append({"role": "assistant", "content": assistant_response})
|
177 |
else: # INSIGHTS category
|
178 |
# Generate dataset summary
|
179 |
dataset_summary = generate_dataset_summary(data)
|
|
|
30 |
|
31 |
# Step 2: Load CSV data into a persistent SQLite database
|
32 |
db_file = 'my_database.db'
|
33 |
+
conn = sqlite3.connect(db_file, check_same_thread=False) # Allow connection across threads
|
34 |
data.to_sql(table_name, conn, index=False, if_exists='replace')
|
35 |
|
36 |
# SQL table metadata (for validation and schema)
|
|
|
40 |
# Function to generate SQL query using Hugging Face model
|
41 |
def generate_sql_query(question, table_name, columns):
|
42 |
prompt = f"""
|
43 |
+
Generate a valid SQL query using the following columns:
|
44 |
+
{columns}.
|
|
|
|
|
|
|
|
|
|
|
45 |
Question: {question}
|
|
|
|
|
|
|
46 |
"""
|
47 |
+
response = llm(prompt, max_new_tokens=50, truncation=True) # Ensure max tokens are reasonable
|
48 |
return response[0]['generated_text'].strip()
|
49 |
|
|
|
50 |
# Function to generate insights using Hugging Face model
|
51 |
def generate_insights(question, result):
|
52 |
prompt = f"""
|
53 |
+
Based on the user's question and the SQL query result below, generate concise data insights:
|
|
|
|
|
54 |
{result}
|
|
|
55 |
"""
|
56 |
+
response = llm(prompt, max_new_tokens=100, truncation=True)
|
57 |
return response[0]['generated_text'].strip()
|
58 |
|
|
|
59 |
# Function to classify user query as SQL or Insights
|
60 |
def classify_query(question):
|
61 |
prompt = f"""
|
62 |
+
Classify the following question as 'SQL' or 'INSIGHTS':
|
63 |
+
"{question}"
|
|
|
|
|
64 |
"""
|
65 |
+
response = llm(prompt, max_new_tokens=10, truncation=True)
|
66 |
category = response[0]['generated_text'].strip().upper()
|
67 |
return 'SQL' if 'SQL' in category else 'INSIGHTS'
|
68 |
|
69 |
# Function to generate dataset summary
|
70 |
def generate_dataset_summary(data):
|
71 |
summary_template = f"""
|
72 |
+
Provide a brief summary of the dataset:
|
|
|
73 |
{data.head().to_string(index=False)}
|
|
|
74 |
"""
|
75 |
+
response = llm(summary_template, max_new_tokens=100, truncation=True)
|
76 |
return response[0]['generated_text'].strip()
|
77 |
|
78 |
+
# Function to validate if the generated SQL query is valid
|
79 |
+
def is_valid_sql(query):
|
80 |
+
sql_keywords = ["SELECT", "INSERT", "UPDATE", "DELETE", "CREATE", "DROP", "ALTER"]
|
81 |
+
return any(query.strip().upper().startswith(keyword) for keyword in sql_keywords)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
# Define the callback function
|
84 |
def process_input():
|
|
|
113 |
# Append the assistant's insights to the history
|
114 |
st.session_state.history.append({"role": "assistant", "content": general_insights})
|
115 |
else:
|
116 |
+
# Validate the SQL query
|
117 |
+
if is_valid_sql(generated_sql):
|
118 |
+
# Attempt to execute SQL query and handle exceptions
|
119 |
+
try:
|
120 |
+
result = pd.read_sql_query(generated_sql, conn)
|
121 |
+
|
122 |
+
if result.empty:
|
123 |
+
assistant_response = "The query returned no results. Please try a different question."
|
124 |
+
st.session_state.history.append({"role": "assistant", "content": assistant_response})
|
125 |
+
else:
|
126 |
+
# Convert the result to a string for the insights prompt
|
127 |
+
result_str = result.head(10).to_string(index=False) # Limit to first 10 rows
|
128 |
+
|
129 |
+
# Generate insights and recommendations based on the query result
|
130 |
+
insights = generate_insights(user_prompt, result_str)
|
131 |
+
|
132 |
+
# Append the assistant's insights to the history
|
133 |
+
st.session_state.history.append({"role": "assistant", "content": insights})
|
134 |
+
# Append the result DataFrame to the history
|
135 |
+
st.session_state.history.append({"role": "assistant", "content": result})
|
136 |
+
except Exception as e:
|
137 |
+
logging.error(f"An error occurred during SQL execution: {e}")
|
138 |
+
assistant_response = f"Error executing SQL query: {e}"
|
139 |
st.session_state.history.append({"role": "assistant", "content": assistant_response})
|
140 |
+
else:
|
141 |
+
st.session_state.history.append({"role": "assistant", "content": "Generated text is not a valid SQL query."})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
else: # INSIGHTS category
|
143 |
# Generate dataset summary
|
144 |
dataset_summary = generate_dataset_summary(data)
|