Spaces:

hmrizal
/

CSVBot-Llama2

Sleeping

App Files Files Community

hmrizal commited on 7 days ago

Commit

fdad3c6

verified ·

1 Parent(s): b4dec03

Update app.py

Browse files

Files changed (1) hide show

app.py +90 -48

app.py CHANGED Viewed

@@ -4,11 +4,7 @@ import uuid
 import threading
 import pandas as pd
 import numpy as np
-from langchain.document_loaders.csv_loader import CSVLoader
-from langchain.embeddings import HuggingFaceEmbeddings
-from langchain.vectorstores import FAISS
 from langchain.llms import CTransformers
-from langchain_experimental.agents import create_pandas_dataframe_agent
 from langchain.chains import LLMChain
 from langchain.prompts import PromptTemplate
@@ -25,14 +21,13 @@ def initialize_model_once():
     """Initialize model once using CTransformers API"""
     with MODEL_CACHE["init_lock"]:
         if MODEL_CACHE["model"] is None:
-            # Load Phi-2 model (smaller than Mistral)
             MODEL_CACHE["model"] = CTransformers(
                 model="TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
                 model_file="tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
                 model_type="llama",
                 max_new_tokens=512,
                 temperature=0.1,
-                top_p=0.9,
                 repetition_penalty=1.1,
                 context_length=2048
             )
@@ -80,18 +75,19 @@ class ChatBot:
                 llm = initialize_model_once()
                 query_template = """
-                Kamu adalah asisten yang mengubah pertanyaan natural language menjadi kode Python dengan pandas.
                 Informasi tentang DataFrame:
                 - Nama kolom: {column_names}
                 - Jumlah baris: {num_rows}
-                - Sample data:
                 {sample_data}
                 Pertanyaan pengguna: {question}
-                Ubah pertanyaan tersebut menjadi kode pandas yang bisa dijalankan. Kode harus ringkas, efisien, dan menggunakan variabel 'df'.
-                Berikan HANYA kode python saja, tanpa backtick, tanpa penjelasan.
                 Kode:
                 """
@@ -118,8 +114,34 @@ class ChatBot:
             print(traceback.format_exc())
             return f"Error pemrosesan file: {str(e)}"
     def chat(self, message, history):
-        if self.df is None or self.query_chain is None:
             return "Mohon upload file CSV terlebih dahulu."
         try:
@@ -132,64 +154,84 @@ class ChatBot:
             elif "jumlah baris" in message_lower or "berapa baris" in message_lower:
                 return f"Jumlah baris dalam CSV: {self.csv_info['rows']}"
-            # Get sample data for context
-            sample_str = self.df.head(3).to_string()
-            # Translate question to pandas code
-            code_response = self.query_chain.run(
-                column_names=str(self.csv_info["column_names"]),
-                num_rows=self.csv_info["rows"],
-                sample_data=sample_str,
-                question=message
-            )
-            # Clean and execute the code
             try:
-                code = code_response.strip()
-                # Add safety prefix to prevent malicious code
-                if not code.startswith("df"):
-                    code = "result = " + code
-                else:
-                    code = "result = " + code
-                # Create local context with the dataframe
-                locals_dict = {"df": self.df, "pd": pd, "np": np}
-                # Execute the code
                 print(f"Executing code: {code}")
-                exec(code, {"pd": pd, "np": np}, locals_dict)
-                result = locals_dict.get("result", "No result returned")
-                # Format the result
                 if isinstance(result, pd.DataFrame):
                     if len(result) > 5:
-                        result_str = result.head(5).to_string() + f"\n\n[{len(result)} baris ditemukan]"
                     else:
                         result_str = result.to_string()
                 elif isinstance(result, (pd.Series, np.ndarray)):
                     result_str = str(result)
                 else:
                     result_str = str(result)
-                # Build the response
-                response = f"Hasil analisis untuk pertanyaan: '{message}'\n\n"
-                response += f"Kode yang digunakan:\n```python\n{code}\n```\n\n"
-                response += f"Output:\n{result_str}"
                 self.chat_history.append((message, response))
                 return response
             except Exception as e:
-                error_msg = f"Error mengeksekusi kode: {str(e)}\nKode yang dihasilkan:\n```python\n{code}\n```"
-                print(error_msg)
-                return error_msg
         except Exception as e:
             import traceback
             print(traceback.format_exc())
             return f"Error: {str(e)}"
-# UI Code
 def create_gradio_interface():
     with gr.Blocks(title="CSV Data Analyzer") as interface:
         session_id = gr.State(lambda: str(uuid.uuid4()))
@@ -209,9 +251,9 @@ def create_gradio_interface():
                 with gr.Accordion("Contoh Pertanyaan", open=False):
                     gr.Markdown("""
                     - "Berapa jumlah data yang memiliki nilai Glucose di atas 150?"
-                    - "Bagaimana distribusi kolom Age?"
-                    - "Hitung nilai rata-rata dan standar deviasi untuk setiap kolom numerik"
-                    - "Buat tabel frekuensi untuk kolom Outcome"
                     """)
             with gr.Column(scale=2):

 import threading
 import pandas as pd
 import numpy as np
 from langchain.llms import CTransformers
 from langchain.chains import LLMChain
 from langchain.prompts import PromptTemplate
     """Initialize model once using CTransformers API"""
     with MODEL_CACHE["init_lock"]:
         if MODEL_CACHE["model"] is None:
+            # Load TinyLlama model
             MODEL_CACHE["model"] = CTransformers(
                 model="TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
                 model_file="tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
                 model_type="llama",
                 max_new_tokens=512,
                 temperature=0.1,
                 repetition_penalty=1.1,
                 context_length=2048
             )
                 llm = initialize_model_once()
                 query_template = """
+                Kamu adalah asisten data yang mengubah pertanyaan bahasa natural menjadi kode Python dengan Pandas.
                 Informasi tentang DataFrame:
                 - Nama kolom: {column_names}
                 - Jumlah baris: {num_rows}
+                - Sampel data:
                 {sample_data}
                 Pertanyaan pengguna: {question}
+                Buat kode Python menggunakan pandas untuk menjawab pertanyaan tersebut.
+                Berikan HANYA kode Python saja, tanpa penjelasan atau apapun.
+                Kode harus menggunakan variabel 'df' sebagai nama DataFrame.
                 Kode:
                 """
             print(traceback.format_exc())
             return f"Error pemrosesan file: {str(e)}"
+    def execute_query(self, code):
+        """Safely execute pandas code"""
+        try:
+            # Create local context with the dataframe
+            local_vars = {"df": self.df, "pd": pd, "np": np}
+            # Execute code with timeout
+            exec(code, {"pd": pd, "np": np}, local_vars)
+            # Get result
+            if "result" in local_vars:
+                return local_vars["result"]
+            else:
+                # If no result variable, find the last variable created
+                last_var = None
+                for var_name, var_value in local_vars.items():
+                    if var_name not in ["df", "pd", "np"] and var_name != "__builtins__":
+                        last_var = var_value
+                if last_var is not None:
+                    return last_var
+                else:
+                    return self.df  # Return the dataframe as default
+        except Exception as e:
+            raise Exception(f"Gagal menjalankan kode: {str(e)}")
     def chat(self, message, history):
+        if self.df is None:
             return "Mohon upload file CSV terlebih dahulu."
         try:
             elif "jumlah baris" in message_lower or "berapa baris" in message_lower:
                 return f"Jumlah baris dalam CSV: {self.csv_info['rows']}"
+            # Handle pre-defined analysis questions
+            if "glucose di atas 150" in message_lower:
+                code = "result = len(df[df['Glucose'] > 150])"
+            else:
+                # Get sample data for context
+                sample_str = self.df.head(3).to_string()
+                # Translate question to pandas code using LLM
+                try:
+                    code_response = self.query_chain.run(
+                        column_names=str(self.csv_info["column_names"]),
+                        num_rows=self.csv_info["rows"],
+                        sample_data=sample_str,
+                        question=message
+                    )
+                    # Clean the code
+                    code = code_response.strip().replace("```python", "").replace("```", "").strip()
+                    # Add result variable if not present
+                    if not any(line.strip().startswith("result =") for line in code.split("\n")):
+                        if code.startswith("df."):
+                            code = "result = " + code
+                        else:
+                            code = "result = df." + code
+                except Exception as e:
+                    # Fallback for common queries if LLM fails
+                    if "rata-rata" in message_lower or "mean" in message_lower:
+                        code = "result = df.describe()"
+                    elif "jumlah" in message_lower or "count" in message_lower:
+                        code = "result = df.count()"
+                    elif "distribusi" in message_lower:
+                        col = next((c for c in self.csv_info["column_names"] if c.lower() in message_lower), None)
+                        if col:
+                            code = f"result = df['{col}'].value_counts()"
+                        else:
+                            code = "result = df.describe()"
+                    else:
+                        return f"Maaf, saya tidak dapat memproses pertanyaan ini. Error: {str(e)}"
+            # Execute the code and get result
             try:
                 print(f"Executing code: {code}")
+                result = self.execute_query(code)
+                # Format result based on its type
                 if isinstance(result, pd.DataFrame):
                     if len(result) > 5:
+                        result_str = result.head(5).to_string() + f"\n\n[Total {len(result)} baris]"
                     else:
                         result_str = result.to_string()
                 elif isinstance(result, (pd.Series, np.ndarray)):
+                    if len(result) > 10:
+                        result_str = str(result[:10]) + f"\n\n[Total {len(result)} item]"
+                    else:
+                        result_str = str(result)
+                elif hasattr(result, "__len__") and not isinstance(result, (str, int, float)):
                     result_str = str(result)
+                    if len(result) > 0:
+                        result_str += f"\n\n[Total {len(result)} item]"
                 else:
                     result_str = str(result)
+                # Format response
+                response = f"Hasil analisis:\n\n{result_str}\n\nKode yang dijalankan:\n```python\n{code}\n```"
                 self.chat_history.append((message, response))
                 return response
             except Exception as e:
+                return f"Error saat menganalisis data: {str(e)}\n\nKode yang dicoba:\n```python\n{code}\n```"
         except Exception as e:
             import traceback
             print(traceback.format_exc())
             return f"Error: {str(e)}"
+# UI Code (tidak berubah dari sebelumnya)
 def create_gradio_interface():
     with gr.Blocks(title="CSV Data Analyzer") as interface:
         session_id = gr.State(lambda: str(uuid.uuid4()))
                 with gr.Accordion("Contoh Pertanyaan", open=False):
                     gr.Markdown("""
                     - "Berapa jumlah data yang memiliki nilai Glucose di atas 150?"
+                    - "Hitung nilai rata-rata setiap kolom numerik"
+                    - "Berapa banyak data untuk setiap kelompok dalam kolom Outcome?"
+                    - "Berapa jumlah baris dalam dataset ini?"
                     """)
             with gr.Column(scale=2):