hmrizal commited on
Commit
fdad3c6
·
verified ·
1 Parent(s): b4dec03

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -48
app.py CHANGED
@@ -4,11 +4,7 @@ import uuid
4
  import threading
5
  import pandas as pd
6
  import numpy as np
7
- from langchain.document_loaders.csv_loader import CSVLoader
8
- from langchain.embeddings import HuggingFaceEmbeddings
9
- from langchain.vectorstores import FAISS
10
  from langchain.llms import CTransformers
11
- from langchain_experimental.agents import create_pandas_dataframe_agent
12
  from langchain.chains import LLMChain
13
  from langchain.prompts import PromptTemplate
14
 
@@ -25,14 +21,13 @@ def initialize_model_once():
25
  """Initialize model once using CTransformers API"""
26
  with MODEL_CACHE["init_lock"]:
27
  if MODEL_CACHE["model"] is None:
28
- # Load Phi-2 model (smaller than Mistral)
29
  MODEL_CACHE["model"] = CTransformers(
30
  model="TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
31
  model_file="tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
32
  model_type="llama",
33
  max_new_tokens=512,
34
  temperature=0.1,
35
- top_p=0.9,
36
  repetition_penalty=1.1,
37
  context_length=2048
38
  )
@@ -80,18 +75,19 @@ class ChatBot:
80
  llm = initialize_model_once()
81
 
82
  query_template = """
83
- Kamu adalah asisten yang mengubah pertanyaan natural language menjadi kode Python dengan pandas.
84
 
85
  Informasi tentang DataFrame:
86
  - Nama kolom: {column_names}
87
  - Jumlah baris: {num_rows}
88
- - Sample data:
89
  {sample_data}
90
 
91
  Pertanyaan pengguna: {question}
92
 
93
- Ubah pertanyaan tersebut menjadi kode pandas yang bisa dijalankan. Kode harus ringkas, efisien, dan menggunakan variabel 'df'.
94
- Berikan HANYA kode python saja, tanpa backtick, tanpa penjelasan.
 
95
 
96
  Kode:
97
  """
@@ -118,8 +114,34 @@ class ChatBot:
118
  print(traceback.format_exc())
119
  return f"Error pemrosesan file: {str(e)}"
120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  def chat(self, message, history):
122
- if self.df is None or self.query_chain is None:
123
  return "Mohon upload file CSV terlebih dahulu."
124
 
125
  try:
@@ -132,64 +154,84 @@ class ChatBot:
132
  elif "jumlah baris" in message_lower or "berapa baris" in message_lower:
133
  return f"Jumlah baris dalam CSV: {self.csv_info['rows']}"
134
 
135
- # Get sample data for context
136
- sample_str = self.df.head(3).to_string()
137
-
138
- # Translate question to pandas code
139
- code_response = self.query_chain.run(
140
- column_names=str(self.csv_info["column_names"]),
141
- num_rows=self.csv_info["rows"],
142
- sample_data=sample_str,
143
- question=message
144
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
- # Clean and execute the code
147
  try:
148
- code = code_response.strip()
149
- # Add safety prefix to prevent malicious code
150
- if not code.startswith("df"):
151
- code = "result = " + code
152
- else:
153
- code = "result = " + code
154
-
155
- # Create local context with the dataframe
156
- locals_dict = {"df": self.df, "pd": pd, "np": np}
157
-
158
- # Execute the code
159
  print(f"Executing code: {code}")
160
- exec(code, {"pd": pd, "np": np}, locals_dict)
161
- result = locals_dict.get("result", "No result returned")
162
 
163
- # Format the result
164
  if isinstance(result, pd.DataFrame):
165
  if len(result) > 5:
166
- result_str = result.head(5).to_string() + f"\n\n[{len(result)} baris ditemukan]"
167
  else:
168
  result_str = result.to_string()
169
  elif isinstance(result, (pd.Series, np.ndarray)):
 
 
 
 
 
170
  result_str = str(result)
 
 
171
  else:
172
  result_str = str(result)
173
 
174
- # Build the response
175
- response = f"Hasil analisis untuk pertanyaan: '{message}'\n\n"
176
- response += f"Kode yang digunakan:\n```python\n{code}\n```\n\n"
177
- response += f"Output:\n{result_str}"
178
 
179
  self.chat_history.append((message, response))
180
  return response
181
 
182
  except Exception as e:
183
- error_msg = f"Error mengeksekusi kode: {str(e)}\nKode yang dihasilkan:\n```python\n{code}\n```"
184
- print(error_msg)
185
- return error_msg
186
 
187
  except Exception as e:
188
  import traceback
189
  print(traceback.format_exc())
190
  return f"Error: {str(e)}"
191
 
192
- # UI Code
193
  def create_gradio_interface():
194
  with gr.Blocks(title="CSV Data Analyzer") as interface:
195
  session_id = gr.State(lambda: str(uuid.uuid4()))
@@ -209,9 +251,9 @@ def create_gradio_interface():
209
  with gr.Accordion("Contoh Pertanyaan", open=False):
210
  gr.Markdown("""
211
  - "Berapa jumlah data yang memiliki nilai Glucose di atas 150?"
212
- - "Bagaimana distribusi kolom Age?"
213
- - "Hitung nilai rata-rata dan standar deviasi untuk setiap kolom numerik"
214
- - "Buat tabel frekuensi untuk kolom Outcome"
215
  """)
216
 
217
  with gr.Column(scale=2):
 
4
  import threading
5
  import pandas as pd
6
  import numpy as np
 
 
 
7
  from langchain.llms import CTransformers
 
8
  from langchain.chains import LLMChain
9
  from langchain.prompts import PromptTemplate
10
 
 
21
  """Initialize model once using CTransformers API"""
22
  with MODEL_CACHE["init_lock"]:
23
  if MODEL_CACHE["model"] is None:
24
+ # Load TinyLlama model
25
  MODEL_CACHE["model"] = CTransformers(
26
  model="TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
27
  model_file="tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
28
  model_type="llama",
29
  max_new_tokens=512,
30
  temperature=0.1,
 
31
  repetition_penalty=1.1,
32
  context_length=2048
33
  )
 
75
  llm = initialize_model_once()
76
 
77
  query_template = """
78
+ Kamu adalah asisten data yang mengubah pertanyaan bahasa natural menjadi kode Python dengan Pandas.
79
 
80
  Informasi tentang DataFrame:
81
  - Nama kolom: {column_names}
82
  - Jumlah baris: {num_rows}
83
+ - Sampel data:
84
  {sample_data}
85
 
86
  Pertanyaan pengguna: {question}
87
 
88
+ Buat kode Python menggunakan pandas untuk menjawab pertanyaan tersebut.
89
+ Berikan HANYA kode Python saja, tanpa penjelasan atau apapun.
90
+ Kode harus menggunakan variabel 'df' sebagai nama DataFrame.
91
 
92
  Kode:
93
  """
 
114
  print(traceback.format_exc())
115
  return f"Error pemrosesan file: {str(e)}"
116
 
117
+ def execute_query(self, code):
118
+ """Safely execute pandas code"""
119
+ try:
120
+ # Create local context with the dataframe
121
+ local_vars = {"df": self.df, "pd": pd, "np": np}
122
+
123
+ # Execute code with timeout
124
+ exec(code, {"pd": pd, "np": np}, local_vars)
125
+
126
+ # Get result
127
+ if "result" in local_vars:
128
+ return local_vars["result"]
129
+ else:
130
+ # If no result variable, find the last variable created
131
+ last_var = None
132
+ for var_name, var_value in local_vars.items():
133
+ if var_name not in ["df", "pd", "np"] and var_name != "__builtins__":
134
+ last_var = var_value
135
+
136
+ if last_var is not None:
137
+ return last_var
138
+ else:
139
+ return self.df # Return the dataframe as default
140
+ except Exception as e:
141
+ raise Exception(f"Gagal menjalankan kode: {str(e)}")
142
+
143
  def chat(self, message, history):
144
+ if self.df is None:
145
  return "Mohon upload file CSV terlebih dahulu."
146
 
147
  try:
 
154
  elif "jumlah baris" in message_lower or "berapa baris" in message_lower:
155
  return f"Jumlah baris dalam CSV: {self.csv_info['rows']}"
156
 
157
+ # Handle pre-defined analysis questions
158
+ if "glucose di atas 150" in message_lower:
159
+ code = "result = len(df[df['Glucose'] > 150])"
160
+ else:
161
+ # Get sample data for context
162
+ sample_str = self.df.head(3).to_string()
163
+
164
+ # Translate question to pandas code using LLM
165
+ try:
166
+ code_response = self.query_chain.run(
167
+ column_names=str(self.csv_info["column_names"]),
168
+ num_rows=self.csv_info["rows"],
169
+ sample_data=sample_str,
170
+ question=message
171
+ )
172
+
173
+ # Clean the code
174
+ code = code_response.strip().replace("```python", "").replace("```", "").strip()
175
+
176
+ # Add result variable if not present
177
+ if not any(line.strip().startswith("result =") for line in code.split("\n")):
178
+ if code.startswith("df."):
179
+ code = "result = " + code
180
+ else:
181
+ code = "result = df." + code
182
+ except Exception as e:
183
+ # Fallback for common queries if LLM fails
184
+ if "rata-rata" in message_lower or "mean" in message_lower:
185
+ code = "result = df.describe()"
186
+ elif "jumlah" in message_lower or "count" in message_lower:
187
+ code = "result = df.count()"
188
+ elif "distribusi" in message_lower:
189
+ col = next((c for c in self.csv_info["column_names"] if c.lower() in message_lower), None)
190
+ if col:
191
+ code = f"result = df['{col}'].value_counts()"
192
+ else:
193
+ code = "result = df.describe()"
194
+ else:
195
+ return f"Maaf, saya tidak dapat memproses pertanyaan ini. Error: {str(e)}"
196
 
197
+ # Execute the code and get result
198
  try:
 
 
 
 
 
 
 
 
 
 
 
199
  print(f"Executing code: {code}")
200
+ result = self.execute_query(code)
 
201
 
202
+ # Format result based on its type
203
  if isinstance(result, pd.DataFrame):
204
  if len(result) > 5:
205
+ result_str = result.head(5).to_string() + f"\n\n[Total {len(result)} baris]"
206
  else:
207
  result_str = result.to_string()
208
  elif isinstance(result, (pd.Series, np.ndarray)):
209
+ if len(result) > 10:
210
+ result_str = str(result[:10]) + f"\n\n[Total {len(result)} item]"
211
+ else:
212
+ result_str = str(result)
213
+ elif hasattr(result, "__len__") and not isinstance(result, (str, int, float)):
214
  result_str = str(result)
215
+ if len(result) > 0:
216
+ result_str += f"\n\n[Total {len(result)} item]"
217
  else:
218
  result_str = str(result)
219
 
220
+ # Format response
221
+ response = f"Hasil analisis:\n\n{result_str}\n\nKode yang dijalankan:\n```python\n{code}\n```"
 
 
222
 
223
  self.chat_history.append((message, response))
224
  return response
225
 
226
  except Exception as e:
227
+ return f"Error saat menganalisis data: {str(e)}\n\nKode yang dicoba:\n```python\n{code}\n```"
 
 
228
 
229
  except Exception as e:
230
  import traceback
231
  print(traceback.format_exc())
232
  return f"Error: {str(e)}"
233
 
234
+ # UI Code (tidak berubah dari sebelumnya)
235
  def create_gradio_interface():
236
  with gr.Blocks(title="CSV Data Analyzer") as interface:
237
  session_id = gr.State(lambda: str(uuid.uuid4()))
 
251
  with gr.Accordion("Contoh Pertanyaan", open=False):
252
  gr.Markdown("""
253
  - "Berapa jumlah data yang memiliki nilai Glucose di atas 150?"
254
+ - "Hitung nilai rata-rata setiap kolom numerik"
255
+ - "Berapa banyak data untuk setiap kelompok dalam kolom Outcome?"
256
+ - "Berapa jumlah baris dalam dataset ini?"
257
  """)
258
 
259
  with gr.Column(scale=2):