File size: 9,053 Bytes
71a08c8
 
 
 
 
df0d042
 
 
e9a5be2
df0d042
71a08c8
 
 
 
 
 
 
 
 
 
 
e9a5be2
71a08c8
 
df0d042
e9a5be2
df0d042
 
 
71a08c8
df0d042
 
 
71a08c8
 
 
 
 
 
 
 
df0d042
71a08c8
 
 
 
 
 
 
 
 
 
 
df0d042
71a08c8
df0d042
a61644e
df0d042
 
71a08c8
88c17a0
71a08c8
df0d042
71a08c8
df0d042
 
 
 
 
 
 
 
 
 
 
 
71a08c8
88c17a0
df0d042
 
 
71a08c8
df0d042
 
 
 
 
 
 
 
 
 
 
 
 
71a08c8
a61644e
df0d042
71a08c8
 
df0d042
71a08c8
 
 
 
 
 
df0d042
71a08c8
 
 
df0d042
 
71a08c8
df0d042
 
71a08c8
df0d042
 
 
 
 
 
 
 
 
 
 
71a08c8
 
 
 
 
df0d042
71a08c8
df0d042
71a08c8
 
 
df0d042
 
71a08c8
 
 
 
 
 
 
 
 
df0d042
71a08c8
df0d042
 
 
 
 
 
71a08c8
 
 
 
 
 
 
 
df0d042
 
71a08c8
 
 
 
 
a61644e
71a08c8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
import gradio as gr
import os
import uuid
import threading
import pandas as pd
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import CTransformers
from langchain.chains import ConversationalRetrievalChain

# Global model cache
MODEL_CACHE = {
    "model": None,
    "init_lock": threading.Lock()
}

# Create directories for user data
os.makedirs("user_data", exist_ok=True)

def initialize_model_once():
    """Initialize model once using CTransformers API"""
    with MODEL_CACHE["init_lock"]:
        if MODEL_CACHE["model"] is None:
            # Load Mistral-7B-Instruct-v0.2.Q4_K_M.gguf model
            MODEL_CACHE["model"] = CTransformers(
                model="TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
                model_file="mistral-7b-instruct-v0.2.Q4_K_M.gguf",
                model_type="mistral",
                max_new_tokens=512,
                temperature=0.2,
                top_p=0.9,
                repetition_penalty=1.2
            )
    
    return MODEL_CACHE["model"]

class ChatBot:
    def __init__(self, session_id):
        self.session_id = session_id
        self.chat_history = []
        self.chain = None
        self.user_dir = f"user_data/{session_id}"
        os.makedirs(self.user_dir, exist_ok=True)
        
    def process_file(self, file):
        if file is None:
            return "Mohon upload file CSV terlebih dahulu."
            
        try:
            # Handle file from Gradio
            file_path = file.name if hasattr(file, 'name') else str(file)
            
            # Verify and save CSV
            try:
                df = pd.read_csv(file_path)
                user_file_path = f"{self.user_dir}/uploaded.csv"
                df.to_csv(user_file_path, index=False)
                print(f"CSV verified: {df.shape[0]} rows, {len(df.columns)} columns")
            except Exception as e:
                return f"Error membaca CSV: {str(e)}"
            
            # Load document
            try:
                loader = CSVLoader(file_path=file_path, encoding="utf-8", csv_args={'delimiter': ','})
                data = loader.load()
                print(f"Documents loaded: {len(data)}")
            except Exception as e:
                return f"Error loading documents: {str(e)}"
            
            # Create vector database
            try:
                db_path = f"{self.user_dir}/db_faiss"
                embeddings = HuggingFaceEmbeddings(
                    model_name='sentence-transformers/all-MiniLM-L6-v2',
                    model_kwargs={'device': 'cpu'}  # Explicitly set to CPU
                )
                
                db = FAISS.from_documents(data, embeddings)
                db.save_local(db_path)
                print(f"Vector database created at {db_path}")
            except Exception as e:
                return f"Error creating vector database: {str(e)}"
            
            # Create LLM and chain
            try:
                llm = initialize_model_once()
                self.chain = ConversationalRetrievalChain.from_llm(
                    llm=llm, 
                    retriever=db.as_retriever(search_kwargs={"k": 4}),
                    return_source_documents=True
                )
                print("Chain created successfully")
            except Exception as e:
                return f"Error creating chain: {str(e)}"
            
            # Add file info to chat history
            file_info = f"CSV berhasil dimuat dengan {df.shape[0]} baris dan {len(df.columns)} kolom. Kolom: {', '.join(df.columns.tolist())}"
            self.chat_history.append(("System", file_info))
            
            return "File CSV berhasil diproses! Anda dapat mulai chat dengan Mistral 7B."
        except Exception as e:
            import traceback
            print(traceback.format_exc())
            return f"Error pemrosesan file: {str(e)}"

    def chat(self, message, history):
        if self.chain is None:
            return "Mohon upload file CSV terlebih dahulu."
        
        try:
            # Process with the chain
            result = self.chain({"question": message, "chat_history": self.chat_history})
            
            # Update chat history
            answer = result["answer"]
            
            # Optional: Add source info to answer
            sources = result.get("source_documents", [])
            if sources:
                source_text = "\n\nSumber:\n"
                for i, doc in enumerate(sources[:2], 1):  # Limit to top 2 sources
                    source_text += f"{i}. {doc.page_content[:100]}...\n"
                answer += source_text
            
            self.chat_history.append((message, answer))
            
            return answer
        except Exception as e:
            import traceback
            print(traceback.format_exc())
            return f"Error: {str(e)}"

# UI Code dan handler functions sama seperti sebelumnya
def create_gradio_interface():
    with gr.Blocks(title="Chat with CSV using Mistral 7B") as interface:
        session_id = gr.State(lambda: str(uuid.uuid4()))
        chatbot_state = gr.State(lambda: None)
        
        gr.HTML("<h1 style='text-align: center;'>Chat with CSV using Mistral 7B</h1>")
        gr.HTML("<h3 style='text-align: center;'>Asisten analisis CSV yang powerful</h3>")
        
        with gr.Row():
            with gr.Column(scale=1):
                file_input = gr.File(
                    label="Upload CSV Anda",
                    file_types=[".csv"]
                )
                process_button = gr.Button("Proses CSV")
                
                with gr.Accordion("Informasi Model", open=False):
                    gr.Markdown("""
                    **Model**: Mistral-7B-Instruct-v0.2-GGUF
                    
                    **Fitur**:
                    - GGUF model yang dioptimalkan untuk CPU
                    - Efisien untuk analisis data dan percakapan
                    - Manajemen sesi per pengguna
                    """)
            
            with gr.Column(scale=2):
                chatbot_interface = gr.Chatbot(
                    label="Riwayat Chat",
                    height=400
                )
                message_input = gr.Textbox(
                    label="Ketik pesan Anda",
                    placeholder="Tanyakan tentang data CSV Anda...",
                    lines=2
                )
                submit_button = gr.Button("Kirim")
                clear_button = gr.Button("Bersihkan Chat")
        
        # Handler functions
        def handle_process_file(file, sess_id):
            chatbot = ChatBot(sess_id)
            result = chatbot.process_file(file)
            return chatbot, [(None, result)]
            
        process_button.click(
            fn=handle_process_file,
            inputs=[file_input, session_id],
            outputs=[chatbot_state, chatbot_interface]
        )
        
        def user_message_submitted(message, history, chatbot, sess_id):
            history = history + [(message, None)]
            return history, "", chatbot, sess_id
        
        def bot_response(history, chatbot, sess_id):
            if chatbot is None:
                chatbot = ChatBot(sess_id)
                history[-1] = (history[-1][0], "Mohon upload file CSV terlebih dahulu.")
                return chatbot, history
            
            user_message = history[-1][0]
            response = chatbot.chat(user_message, history[:-1])
            history[-1] = (user_message, response)
            return chatbot, history
        
        submit_button.click(
            fn=user_message_submitted,
            inputs=[message_input, chatbot_interface, chatbot_state, session_id],
            outputs=[chatbot_interface, message_input, chatbot_state, session_id]
        ).then(
            fn=bot_response,
            inputs=[chatbot_interface, chatbot_state, session_id],
            outputs=[chatbot_state, chatbot_interface]
        )
        
        message_input.submit(
            fn=user_message_submitted,
            inputs=[message_input, chatbot_interface, chatbot_state, session_id],
            outputs=[chatbot_interface, message_input, chatbot_state, session_id]
        ).then(
            fn=bot_response,
            inputs=[chatbot_interface, chatbot_state, session_id],
            outputs=[chatbot_state, chatbot_interface]
        )
        
        def handle_clear_chat(chatbot):
            if chatbot is not None:
                chatbot.chat_history = []
            return chatbot, []
            
        clear_button.click(
            fn=handle_clear_chat,
            inputs=[chatbot_state],
            outputs=[chatbot_state, chatbot_interface]
        )
        
    return interface

# Launch the interface
if __name__ == "__main__":
    demo = create_gradio_interface()
    demo.launch(share=True)