import gradio as gr import os import uuid import threading import pandas as pd from langchain.document_loaders.csv_loader import CSVLoader from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS from langchain.llms import CTransformers from langchain.chains import ConversationalRetrievalChain # Global model cache MODEL_CACHE = { "model": None, "init_lock": threading.Lock() } # Create directories for user data os.makedirs("user_data", exist_ok=True) def initialize_model_once(): """Initialize model once using CTransformers API""" with MODEL_CACHE["init_lock"]: if MODEL_CACHE["model"] is None: # Load Mistral-7B-Instruct-v0.2.Q4_K_M.gguf model MODEL_CACHE["model"] = CTransformers( model="TheBloke/Mistral-7B-Instruct-v0.2-GGUF", model_file="mistral-7b-instruct-v0.2.Q4_K_M.gguf", model_type="mistral", max_new_tokens=512, temperature=0.2, top_p=0.9, repetition_penalty=1.2 ) return MODEL_CACHE["model"] class ChatBot: def __init__(self, session_id): self.session_id = session_id self.chat_history = [] self.chain = None self.user_dir = f"user_data/{session_id}" os.makedirs(self.user_dir, exist_ok=True) def process_file(self, file): if file is None: return "Mohon upload file CSV terlebih dahulu." try: # Handle file from Gradio file_path = file.name if hasattr(file, 'name') else str(file) # Verify and save CSV try: df = pd.read_csv(file_path) user_file_path = f"{self.user_dir}/uploaded.csv" df.to_csv(user_file_path, index=False) print(f"CSV verified: {df.shape[0]} rows, {len(df.columns)} columns") except Exception as e: return f"Error membaca CSV: {str(e)}" # Load document try: loader = CSVLoader(file_path=file_path, encoding="utf-8", csv_args={'delimiter': ','}) data = loader.load() print(f"Documents loaded: {len(data)}") except Exception as e: return f"Error loading documents: {str(e)}" # Create vector database try: db_path = f"{self.user_dir}/db_faiss" embeddings = HuggingFaceEmbeddings( model_name='sentence-transformers/all-MiniLM-L6-v2', model_kwargs={'device': 'cpu'} # Explicitly set to CPU ) db = FAISS.from_documents(data, embeddings) db.save_local(db_path) print(f"Vector database created at {db_path}") except Exception as e: return f"Error creating vector database: {str(e)}" # Create LLM and chain try: llm = initialize_model_once() self.chain = ConversationalRetrievalChain.from_llm( llm=llm, retriever=db.as_retriever(search_kwargs={"k": 4}), return_source_documents=True ) print("Chain created successfully") except Exception as e: return f"Error creating chain: {str(e)}" # Add file info to chat history file_info = f"CSV berhasil dimuat dengan {df.shape[0]} baris dan {len(df.columns)} kolom. Kolom: {', '.join(df.columns.tolist())}" self.chat_history.append(("System", file_info)) return "File CSV berhasil diproses! Anda dapat mulai chat dengan Mistral 7B." except Exception as e: import traceback print(traceback.format_exc()) return f"Error pemrosesan file: {str(e)}" def chat(self, message, history): if self.chain is None: return "Mohon upload file CSV terlebih dahulu." try: # Process with the chain result = self.chain({"question": message, "chat_history": self.chat_history}) # Update chat history answer = result["answer"] # Optional: Add source info to answer sources = result.get("source_documents", []) if sources: source_text = "\n\nSumber:\n" for i, doc in enumerate(sources[:2], 1): # Limit to top 2 sources source_text += f"{i}. {doc.page_content[:100]}...\n" answer += source_text self.chat_history.append((message, answer)) return answer except Exception as e: import traceback print(traceback.format_exc()) return f"Error: {str(e)}" # UI Code dan handler functions sama seperti sebelumnya def create_gradio_interface(): with gr.Blocks(title="Chat with CSV using Mistral 7B") as interface: session_id = gr.State(lambda: str(uuid.uuid4())) chatbot_state = gr.State(lambda: None) gr.HTML("

Chat with CSV using Mistral 7B

") gr.HTML("

Asisten analisis CSV yang powerful

") with gr.Row(): with gr.Column(scale=1): file_input = gr.File( label="Upload CSV Anda", file_types=[".csv"] ) process_button = gr.Button("Proses CSV") with gr.Accordion("Informasi Model", open=False): gr.Markdown(""" **Model**: Mistral-7B-Instruct-v0.2-GGUF **Fitur**: - GGUF model yang dioptimalkan untuk CPU - Efisien untuk analisis data dan percakapan - Manajemen sesi per pengguna """) with gr.Column(scale=2): chatbot_interface = gr.Chatbot( label="Riwayat Chat", height=400 ) message_input = gr.Textbox( label="Ketik pesan Anda", placeholder="Tanyakan tentang data CSV Anda...", lines=2 ) submit_button = gr.Button("Kirim") clear_button = gr.Button("Bersihkan Chat") # Handler functions def handle_process_file(file, sess_id): chatbot = ChatBot(sess_id) result = chatbot.process_file(file) return chatbot, [(None, result)] process_button.click( fn=handle_process_file, inputs=[file_input, session_id], outputs=[chatbot_state, chatbot_interface] ) def user_message_submitted(message, history, chatbot, sess_id): history = history + [(message, None)] return history, "", chatbot, sess_id def bot_response(history, chatbot, sess_id): if chatbot is None: chatbot = ChatBot(sess_id) history[-1] = (history[-1][0], "Mohon upload file CSV terlebih dahulu.") return chatbot, history user_message = history[-1][0] response = chatbot.chat(user_message, history[:-1]) history[-1] = (user_message, response) return chatbot, history submit_button.click( fn=user_message_submitted, inputs=[message_input, chatbot_interface, chatbot_state, session_id], outputs=[chatbot_interface, message_input, chatbot_state, session_id] ).then( fn=bot_response, inputs=[chatbot_interface, chatbot_state, session_id], outputs=[chatbot_state, chatbot_interface] ) message_input.submit( fn=user_message_submitted, inputs=[message_input, chatbot_interface, chatbot_state, session_id], outputs=[chatbot_interface, message_input, chatbot_state, session_id] ).then( fn=bot_response, inputs=[chatbot_interface, chatbot_state, session_id], outputs=[chatbot_state, chatbot_interface] ) def handle_clear_chat(chatbot): if chatbot is not None: chatbot.chat_history = [] return chatbot, [] clear_button.click( fn=handle_clear_chat, inputs=[chatbot_state], outputs=[chatbot_state, chatbot_interface] ) return interface # Launch the interface if __name__ == "__main__": demo = create_gradio_interface() demo.launch(share=True)