File size: 7,205 Bytes
d6ad8ad
a325830
40ca111
 
f7ef9f6
40ca111
 
a325830
40ca111
a325830
40ca111
 
ecb0859
40ca111
 
b571d20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40ca111
2385eda
d60f2e2
9657a0d
d60f2e2
9657a0d
 
d60f2e2
 
9657a0d
 
e96d1ec
9657a0d
 
b18a293
c7f8d65
9657a0d
 
 
e96d1ec
 
2385eda
e96d1ec
2385eda
9657a0d
 
e96d1ec
 
9657a0d
 
2385eda
e96d1ec
c7f8d65
9657a0d
 
 
 
 
 
 
 
d60f2e2
 
40ca111
 
9657a0d
40ca111
f7ef9f6
 
2385eda
 
40ca111
f7ef9f6
 
54188da
40ca111
9657a0d
 
 
 
 
 
 
 
 
 
40ca111
f7ef9f6
b571d20
 
9657a0d
 
a325830
9657a0d
 
e433088
a325830
9657a0d
a325830
f7ef9f6
a325830
40ca111
9657a0d
40ca111
f7ef9f6
643d9db
f7ef9f6
 
643d9db
f7ef9f6
 
 
40ca111
9657a0d
40ca111
90ea7ba
40ca111
98032d3
b571d20
9657a0d
 
 
 
 
 
 
 
 
 
 
 
 
 
b571d20
9657a0d
 
 
 
b571d20
 
 
 
 
 
 
9657a0d
 
b571d20
1ff4c99
d9103af
b571d20
40ca111
a325830
 
40ca111
b571d20
40ca111
a325830
f7ef9f6
9657a0d
 
f7ef9f6
9657a0d
 
98032d3
9657a0d
 
98032d3
 
5e3c1f6
 
98032d3
 
a325830
f7ef9f6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
import json
import gradio as gr
import gspread
from oauth2client.service_account import ServiceAccountCredentials
from llama_cpp import Llama
from llama_index.core import VectorStoreIndex, Settings
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.llama_cpp import LlamaCPP
from huggingface_hub import hf_hub_download
from llama_index.core.llms import ChatMessage
from llama_index.core.chat_engine.condense_plus_context import CondensePlusContextChatEngine
from llama_index.core.schema import Document

# ===================================
# 1️⃣ Fungsi Membaca Data PKB.json
# ===================================
def read_pkb_json():
    try:
        with open("pkb.json", "r", encoding="utf-8") as file:
            data = json.load(file)
        
        pkb_text = "=== Perjanjian Kerja Bersama ===\n"
        for bab, content in data["perjanjian_kerja_bersama"].items():
            pkb_text += f"\n## {content['judul']} ##\n"
            for pasal, pasal_data in content.items():
                if pasal != "judul":
                    pkb_text += f"\n### {pasal_data['judul']} ###\n"
                    for item in pasal_data["isi"]:
                        if isinstance(item, dict):
                            pkb_text += f"- {item['istilah']}: {item['definisi']}\n"
                        else:
                            pkb_text += f"- {item}\n"
        return pkb_text
    except Exception as e:
        return f"❌ ERROR membaca PKB.json: {str(e)}"

# ===================================
# 2️⃣ Fungsi Membaca Data Google Spreadsheet
# ===================================
def read_google_sheets():
    try:
        # Tentukan scope akses ke Google Sheets & Drive
        scope = ["https://www.googleapis.com/auth/spreadsheets", "https://www.googleapis.com/auth/drive"]
        
        # Load kredensial dari file credentials.json
        creds = ServiceAccountCredentials.from_json_keyfile_name("credentials.json", scope)
        client = gspread.authorize(creds)
        
        # ID Spreadsheet (tetap sama untuk semua sheet)
        SPREADSHEET_ID = "1e_cNMhwF-QYpyYUpqQh-XCw-OdhWS6EuYsoBUsVtdNg"

        # 📌 Daftar nama worksheet yang akan dibaca
        sheet_names = ["datatarget", "datacuti", "dataabsen", "datalembur"]

        all_data = []  # 🔹 List untuk menyimpan semua data

        # 📌 Loop untuk membaca setiap worksheet
        spreadsheet = client.open_by_key(SPREADSHEET_ID)
        for sheet_name in sheet_names:
            try:
                sheet = spreadsheet.worksheet(sheet_name)
                data = sheet.get_all_values()
                
                # Tambahkan header nama sheet sebelum data untuk membedakan
                all_data.append(f"=== Data dari {sheet_name.upper()} ===")
                all_data.extend([" | ".join(row) for row in data])
                all_data.append("\n")  # Pisahkan tiap sheet dengan newline
                
            except gspread.exceptions.WorksheetNotFound:
                all_data.append(f"❌ ERROR: Worksheet {sheet_name} tidak ditemukan.")

        # Gabungkan semua data menjadi satu string panjang
        formatted_text = "\n".join(all_data)
        
        return formatted_text
    
    except gspread.exceptions.SpreadsheetNotFound:
        return "❌ ERROR: Spreadsheet tidak ditemukan. Pastikan ID/nama benar!"
    
    except Exception as e:
        return f"❌ ERROR: {str(e)}"

# ===================================
# 2️⃣ Fungsi untuk Mengunduh Model Llama
# ===================================
def initialize_llama_model():
    model_path = hf_hub_download(
        repo_id="TheBLoke/zephyr-7b-beta-GGUF",
        filename="zephyr-7b-beta.Q4_K_M.gguf",
        cache_dir="./models"
    )
    return model_path

# ===================================
# 3️⃣ Inisialisasi Model dan Pengaturan
# ===================================
def initialize_settings(model_path):
    Settings.llm = LlamaCPP(
        model_path=model_path,
        temperature=0.7,
    )

# ===================================
# 4️⃣ Inisialisasi Index dari Data Spreadsheet
# ===================================
def initialize_index():
    text_data = read_google_sheets() + "\n" + read_pkb_json()
    document = Document(text=text_data)
    documents = [document]
    
    parser = SentenceSplitter(chunk_size=150, chunk_overlap=10)
    nodes = parser.get_nodes_from_documents(documents)
    
    embedding = HuggingFaceEmbedding("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
    Settings.embed_model = embedding
    
    index = VectorStoreIndex(nodes)
    return index

# ===================================
# 5️⃣ Inisialisasi Mesin Chatbot
# ===================================
def initialize_chat_engine(index):
    retriever = index.as_retriever(similarity_top_k=3)
    chat_engine = CondensePlusContextChatEngine.from_defaults(
        retriever=retriever,
        verbose=True,
    )
    return chat_engine

# ===================================
# 6️⃣ Fungsi untuk Menghasilkan Respons Chatbot
# ===================================
def generate_response(message, history, chat_engine):
    if history is None:
        history = []
    
    text_data = read_google_sheets()
    document = Document(text=text_data)
    documents = [document]
    
    parser = SentenceSplitter(chunk_size=150, chunk_overlap=10)
    nodes = parser.get_nodes_from_documents(documents)
    index = VectorStoreIndex(nodes)
    retriever = index.as_retriever(similarity_top_k=3)
    
    chat_engine = CondensePlusContextChatEngine.from_defaults(
        retriever=retriever,
        verbose=True,
    )
    
    chat_messages = [
    ChatMessage(
        role="system",
        content=(
            "Anda adalah chatbot yang dirancang khusus untuk berbicara dalam Bahasa Indonesia. "
                "Anda adalah chatbot HRD yang membantu karyawan dalam memahami administrasi dan data perusahaan. "
                "Anda tidak diperbolehkan menjawab dalam bahasa lain, termasuk Inggris. "
                "Gunakan gaya bahasa profesional tetapi tetap ramah. "
                "Jika informasi tidak tersedia dalam dokumen, katakan dengan sopan bahwa Anda tidak tahu. "
                "Pastikan setiap jawaban diberikan secara ringkas, jelas, dan sesuai konteks."
                "Jawaban harus singkat, jelas, dan dalam Bahasa Indonesia."
        ),
    ),
]
    
    response = chat_engine.stream_chat(message)
    text = "".join(response.response_gen)
    
    history.append((message, text))
    return history

# ===================================
# 7️⃣ Fungsi Utama untuk Menjalankan Aplikasi
# ===================================
def main():
    model_path = initialize_llama_model()
    initialize_settings(model_path)  

    index = initialize_index()
    chat_engine = initialize_chat_engine(index)  

    def chatbot_response(message, history):
        return generate_response(message, history, chat_engine)  

    gr.Interface(
        fn=chatbot_response,
        inputs=["text"],
        outputs=["text"],
    ).launch()

if __name__ == "__main__":
    main()