Spaces:

hmrizal
/

CSVBot-Llama2

Running

File size: 9,053 Bytes

71a08c8
 
 
 
 
df0d042
 
 
e9a5be2
df0d042
71a08c8
 
 
 
 
 
 
 
 
 
 
e9a5be2
71a08c8
 
df0d042
e9a5be2
df0d042
 
 
71a08c8
df0d042
 
 
71a08c8
 
 
 
 
 
 
 
df0d042
71a08c8
 
 
 
 
 
 
 
 
 
 
df0d042
71a08c8
df0d042
a61644e
df0d042
 
71a08c8
88c17a0
71a08c8
df0d042
71a08c8
df0d042
 
 
 
 
 
 
 
 
 
 
 
71a08c8
88c17a0
df0d042
 
 
71a08c8
df0d042
 
 
 
 
 
 
 
 
 
 
 
 
71a08c8
a61644e
df0d042
71a08c8
 
df0d042
71a08c8
 
 
 
 
 
df0d042
71a08c8
 
 
df0d042
 
71a08c8
df0d042
 
71a08c8
df0d042
 
 
 
 
 
 
 
 
 
 
71a08c8
 
 
 
 
df0d042
71a08c8
df0d042
71a08c8
 
 
df0d042
 
71a08c8
 
 
 
 
 
 
 
 
df0d042
71a08c8
df0d042
 
 
 
 
 
71a08c8
 
 
 
 
 
 
 
df0d042
 
71a08c8
 
 
 
 
a61644e
71a08c8

import gradio as gr
import os
import uuid
import threading
import pandas as pd
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import CTransformers
from langchain.chains import ConversationalRetrievalChain

# Global model cache
MODEL_CACHE = {
    "model": None,
    "init_lock": threading.Lock()
}

# Create directories for user data
os.makedirs("user_data", exist_ok=True)

def initialize_model_once():
    """Initialize model once using CTransformers API"""
    with MODEL_CACHE["init_lock"]:
        if MODEL_CACHE["model"] is None:
            # Load Mistral-7B-Instruct-v0.2.Q4_K_M.gguf model
            MODEL_CACHE["model"] = CTransformers(
                model="TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
                model_file="mistral-7b-instruct-v0.2.Q4_K_M.gguf",
                model_type="mistral",
                max_new_tokens=512,
                temperature=0.2,
                top_p=0.9,
                repetition_penalty=1.2
            )
    
    return MODEL_CACHE["model"]

class ChatBot:
    def __init__(self, session_id):
        self.session_id = session_id
        self.chat_history = []
        self.chain = None
        self.user_dir = f"user_data/{session_id}"
        os.makedirs(self.user_dir, exist_ok=True)
        
    def process_file(self, file):
        if file is None:
            return "Mohon upload file CSV terlebih dahulu."
            
        try:
            # Handle file from Gradio
            file_path = file.name if hasattr(file, 'name') else str(file)
            
            # Verify and save CSV
            try:
                df = pd.read_csv(file_path)
                user_file_path = f"{self.user_dir}/uploaded.csv"
                df.to_csv(user_file_path, index=False)
                print(f"CSV verified: {df.shape[0]} rows, {len(df.columns)} columns")
            except Exception as e:
                return f"Error membaca CSV: {str(e)}"
            
            # Load document
            try:
                loader = CSVLoader(file_path=file_path, encoding="utf-8", csv_args={'delimiter': ','})
                data = loader.load()
                print(f"Documents loaded: {len(data)}")
            except Exception as e:
                return f"Error loading documents: {str(e)}"
            
            # Create vector database
            try:
                db_path = f"{self.user_dir}/db_faiss"
                embeddings = HuggingFaceEmbeddings(
                    model_name='sentence-transformers/all-MiniLM-L6-v2',
                    model_kwargs={'device': 'cpu'}  # Explicitly set to CPU
                )
                
                db = FAISS.from_documents(data, embeddings)
                db.save_local(db_path)
                print(f"Vector database created at {db_path}")
            except Exception as e:
                return f"Error creating vector database: {str(e)}"
            
            # Create LLM and chain
            try:
                llm = initialize_model_once()
                self.chain = ConversationalRetrievalChain.from_llm(
                    llm=llm, 
                    retriever=db.as_retriever(search_kwargs={"k": 4}),
                    return_source_documents=True
                )
                print("Chain created successfully")
            except Exception as e:
                return f"Error creating chain: {str(e)}"
            
            # Add file info to chat history
            file_info = f"CSV berhasil dimuat dengan {df.shape[0]} baris dan {len(df.columns)} kolom. Kolom: {', '.join(df.columns.tolist())}"
            self.chat_history.append(("System", file_info))
            
            return "File CSV berhasil diproses! Anda dapat mulai chat dengan Mistral 7B."
        except Exception as e:
            import traceback
            print(traceback.format_exc())
            return f"Error pemrosesan file: {str(e)}"

    def chat(self, message, history):
        if self.chain is None:
            return "Mohon upload file CSV terlebih dahulu."
        
        try:
            # Process with the chain
            result = self.chain({"question": message, "chat_history": self.chat_history})
            
            # Update chat history
            answer = result["answer"]
            
            # Optional: Add source info to answer
            sources = result.get("source_documents", [])
            if sources:
                source_text = "\n\nSumber:\n"
                for i, doc in enumerate(sources[:2], 1):  # Limit to top 2 sources
                    source_text += f"{i}. {doc.page_content[:100]}...\n"
                answer += source_text
            
            self.chat_history.append((message, answer))
            
            return answer
        except Exception as e:
            import traceback
            print(traceback.format_exc())
            return f"Error: {str(e)}"

# UI Code dan handler functions sama seperti sebelumnya
def create_gradio_interface():
    with gr.Blocks(title="Chat with CSV using Mistral 7B") as interface:
        session_id = gr.State(lambda: str(uuid.uuid4()))
        chatbot_state = gr.State(lambda: None)
        
        gr.HTML("<h1 style='text-align: center;'>Chat with CSV using Mistral 7B</h1>")
        gr.HTML("<h3 style='text-align: center;'>Asisten analisis CSV yang powerful</h3>")
        
        with gr.Row():
            with gr.Column(scale=1):
                file_input = gr.File(
                    label="Upload CSV Anda",
                    file_types=[".csv"]
                )
                process_button = gr.Button("Proses CSV")
                
                with gr.Accordion("Informasi Model", open=False):
                    gr.Markdown("""
                    **Model**: Mistral-7B-Instruct-v0.2-GGUF
                    
                    **Fitur**:
                    - GGUF model yang dioptimalkan untuk CPU
                    - Efisien untuk analisis data dan percakapan
                    - Manajemen sesi per pengguna
                    """)
            
            with gr.Column(scale=2):
                chatbot_interface = gr.Chatbot(
                    label="Riwayat Chat",
                    height=400
                )
                message_input = gr.Textbox(
                    label="Ketik pesan Anda",
                    placeholder="Tanyakan tentang data CSV Anda...",
                    lines=2
                )
                submit_button = gr.Button("Kirim")
                clear_button = gr.Button("Bersihkan Chat")
        
        # Handler functions
        def handle_process_file(file, sess_id):
            chatbot = ChatBot(sess_id)
            result = chatbot.process_file(file)
            return chatbot, [(None, result)]
            
        process_button.click(
            fn=handle_process_file,
            inputs=[file_input, session_id],
            outputs=[chatbot_state, chatbot_interface]
        )
        
        def user_message_submitted(message, history, chatbot, sess_id):
            history = history + [(message, None)]
            return history, "", chatbot, sess_id
        
        def bot_response(history, chatbot, sess_id):
            if chatbot is None:
                chatbot = ChatBot(sess_id)
                history[-1] = (history[-1][0], "Mohon upload file CSV terlebih dahulu.")
                return chatbot, history
            
            user_message = history[-1][0]
            response = chatbot.chat(user_message, history[:-1])
            history[-1] = (user_message, response)
            return chatbot, history
        
        submit_button.click(
            fn=user_message_submitted,
            inputs=[message_input, chatbot_interface, chatbot_state, session_id],
            outputs=[chatbot_interface, message_input, chatbot_state, session_id]
        ).then(
            fn=bot_response,
            inputs=[chatbot_interface, chatbot_state, session_id],
            outputs=[chatbot_state, chatbot_interface]
        )
        
        message_input.submit(
            fn=user_message_submitted,
            inputs=[message_input, chatbot_interface, chatbot_state, session_id],
            outputs=[chatbot_interface, message_input, chatbot_state, session_id]
        ).then(
            fn=bot_response,
            inputs=[chatbot_interface, chatbot_state, session_id],
            outputs=[chatbot_state, chatbot_interface]
        )
        
        def handle_clear_chat(chatbot):
            if chatbot is not None:
                chatbot.chat_history = []
            return chatbot, []
            
        clear_button.click(
            fn=handle_clear_chat,
            inputs=[chatbot_state],
            outputs=[chatbot_state, chatbot_interface]
        )
        
    return interface

# Launch the interface
if __name__ == "__main__":
    demo = create_gradio_interface()
    demo.launch(share=True)