File size: 7,205 Bytes
d6ad8ad a325830 40ca111 f7ef9f6 40ca111 a325830 40ca111 a325830 40ca111 ecb0859 40ca111 b571d20 40ca111 2385eda d60f2e2 9657a0d d60f2e2 9657a0d d60f2e2 9657a0d e96d1ec 9657a0d b18a293 c7f8d65 9657a0d e96d1ec 2385eda e96d1ec 2385eda 9657a0d e96d1ec 9657a0d 2385eda e96d1ec c7f8d65 9657a0d d60f2e2 40ca111 9657a0d 40ca111 f7ef9f6 2385eda 40ca111 f7ef9f6 54188da 40ca111 9657a0d 40ca111 f7ef9f6 b571d20 9657a0d a325830 9657a0d e433088 a325830 9657a0d a325830 f7ef9f6 a325830 40ca111 9657a0d 40ca111 f7ef9f6 643d9db f7ef9f6 643d9db f7ef9f6 40ca111 9657a0d 40ca111 90ea7ba 40ca111 98032d3 b571d20 9657a0d b571d20 9657a0d b571d20 9657a0d b571d20 1ff4c99 d9103af b571d20 40ca111 a325830 40ca111 b571d20 40ca111 a325830 f7ef9f6 9657a0d f7ef9f6 9657a0d 98032d3 9657a0d 98032d3 5e3c1f6 98032d3 a325830 f7ef9f6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 |
import json
import gradio as gr
import gspread
from oauth2client.service_account import ServiceAccountCredentials
from llama_cpp import Llama
from llama_index.core import VectorStoreIndex, Settings
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.llama_cpp import LlamaCPP
from huggingface_hub import hf_hub_download
from llama_index.core.llms import ChatMessage
from llama_index.core.chat_engine.condense_plus_context import CondensePlusContextChatEngine
from llama_index.core.schema import Document
# ===================================
# 1️⃣ Fungsi Membaca Data PKB.json
# ===================================
def read_pkb_json():
try:
with open("pkb.json", "r", encoding="utf-8") as file:
data = json.load(file)
pkb_text = "=== Perjanjian Kerja Bersama ===\n"
for bab, content in data["perjanjian_kerja_bersama"].items():
pkb_text += f"\n## {content['judul']} ##\n"
for pasal, pasal_data in content.items():
if pasal != "judul":
pkb_text += f"\n### {pasal_data['judul']} ###\n"
for item in pasal_data["isi"]:
if isinstance(item, dict):
pkb_text += f"- {item['istilah']}: {item['definisi']}\n"
else:
pkb_text += f"- {item}\n"
return pkb_text
except Exception as e:
return f"❌ ERROR membaca PKB.json: {str(e)}"
# ===================================
# 2️⃣ Fungsi Membaca Data Google Spreadsheet
# ===================================
def read_google_sheets():
try:
# Tentukan scope akses ke Google Sheets & Drive
scope = ["https://www.googleapis.com/auth/spreadsheets", "https://www.googleapis.com/auth/drive"]
# Load kredensial dari file credentials.json
creds = ServiceAccountCredentials.from_json_keyfile_name("credentials.json", scope)
client = gspread.authorize(creds)
# ID Spreadsheet (tetap sama untuk semua sheet)
SPREADSHEET_ID = "1e_cNMhwF-QYpyYUpqQh-XCw-OdhWS6EuYsoBUsVtdNg"
# 📌 Daftar nama worksheet yang akan dibaca
sheet_names = ["datatarget", "datacuti", "dataabsen", "datalembur"]
all_data = [] # 🔹 List untuk menyimpan semua data
# 📌 Loop untuk membaca setiap worksheet
spreadsheet = client.open_by_key(SPREADSHEET_ID)
for sheet_name in sheet_names:
try:
sheet = spreadsheet.worksheet(sheet_name)
data = sheet.get_all_values()
# Tambahkan header nama sheet sebelum data untuk membedakan
all_data.append(f"=== Data dari {sheet_name.upper()} ===")
all_data.extend([" | ".join(row) for row in data])
all_data.append("\n") # Pisahkan tiap sheet dengan newline
except gspread.exceptions.WorksheetNotFound:
all_data.append(f"❌ ERROR: Worksheet {sheet_name} tidak ditemukan.")
# Gabungkan semua data menjadi satu string panjang
formatted_text = "\n".join(all_data)
return formatted_text
except gspread.exceptions.SpreadsheetNotFound:
return "❌ ERROR: Spreadsheet tidak ditemukan. Pastikan ID/nama benar!"
except Exception as e:
return f"❌ ERROR: {str(e)}"
# ===================================
# 2️⃣ Fungsi untuk Mengunduh Model Llama
# ===================================
def initialize_llama_model():
model_path = hf_hub_download(
repo_id="TheBLoke/zephyr-7b-beta-GGUF",
filename="zephyr-7b-beta.Q4_K_M.gguf",
cache_dir="./models"
)
return model_path
# ===================================
# 3️⃣ Inisialisasi Model dan Pengaturan
# ===================================
def initialize_settings(model_path):
Settings.llm = LlamaCPP(
model_path=model_path,
temperature=0.7,
)
# ===================================
# 4️⃣ Inisialisasi Index dari Data Spreadsheet
# ===================================
def initialize_index():
text_data = read_google_sheets() + "\n" + read_pkb_json()
document = Document(text=text_data)
documents = [document]
parser = SentenceSplitter(chunk_size=150, chunk_overlap=10)
nodes = parser.get_nodes_from_documents(documents)
embedding = HuggingFaceEmbedding("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
Settings.embed_model = embedding
index = VectorStoreIndex(nodes)
return index
# ===================================
# 5️⃣ Inisialisasi Mesin Chatbot
# ===================================
def initialize_chat_engine(index):
retriever = index.as_retriever(similarity_top_k=3)
chat_engine = CondensePlusContextChatEngine.from_defaults(
retriever=retriever,
verbose=True,
)
return chat_engine
# ===================================
# 6️⃣ Fungsi untuk Menghasilkan Respons Chatbot
# ===================================
def generate_response(message, history, chat_engine):
if history is None:
history = []
text_data = read_google_sheets()
document = Document(text=text_data)
documents = [document]
parser = SentenceSplitter(chunk_size=150, chunk_overlap=10)
nodes = parser.get_nodes_from_documents(documents)
index = VectorStoreIndex(nodes)
retriever = index.as_retriever(similarity_top_k=3)
chat_engine = CondensePlusContextChatEngine.from_defaults(
retriever=retriever,
verbose=True,
)
chat_messages = [
ChatMessage(
role="system",
content=(
"Anda adalah chatbot yang dirancang khusus untuk berbicara dalam Bahasa Indonesia. "
"Anda adalah chatbot HRD yang membantu karyawan dalam memahami administrasi dan data perusahaan. "
"Anda tidak diperbolehkan menjawab dalam bahasa lain, termasuk Inggris. "
"Gunakan gaya bahasa profesional tetapi tetap ramah. "
"Jika informasi tidak tersedia dalam dokumen, katakan dengan sopan bahwa Anda tidak tahu. "
"Pastikan setiap jawaban diberikan secara ringkas, jelas, dan sesuai konteks."
"Jawaban harus singkat, jelas, dan dalam Bahasa Indonesia."
),
),
]
response = chat_engine.stream_chat(message)
text = "".join(response.response_gen)
history.append((message, text))
return history
# ===================================
# 7️⃣ Fungsi Utama untuk Menjalankan Aplikasi
# ===================================
def main():
model_path = initialize_llama_model()
initialize_settings(model_path)
index = initialize_index()
chat_engine = initialize_chat_engine(index)
def chatbot_response(message, history):
return generate_response(message, history, chat_engine)
gr.Interface(
fn=chatbot_response,
inputs=["text"],
outputs=["text"],
).launch()
if __name__ == "__main__":
main() |