Update app.py
Browse files
app.py
CHANGED
@@ -1,9 +1,8 @@
|
|
1 |
-
import json
|
2 |
import gradio as gr
|
3 |
import gspread
|
4 |
from oauth2client.service_account import ServiceAccountCredentials
|
5 |
from llama_cpp import Llama
|
6 |
-
from llama_index.core import VectorStoreIndex, Settings
|
7 |
from llama_index.core.node_parser import SentenceSplitter
|
8 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
9 |
from llama_index.llms.llama_cpp import LlamaCPP
|
@@ -12,9 +11,9 @@ from llama_index.core.llms import ChatMessage
|
|
12 |
from llama_index.core.chat_engine.condense_plus_context import CondensePlusContextChatEngine
|
13 |
from llama_index.core.schema import Document
|
14 |
|
15 |
-
#
|
16 |
-
# 1️⃣ Fungsi Membaca Data Google Spreadsheet
|
17 |
-
#
|
18 |
def read_google_sheets():
|
19 |
try:
|
20 |
scope = ["https://www.googleapis.com/auth/spreadsheets", "https://www.googleapis.com/auth/drive"]
|
@@ -22,7 +21,7 @@ def read_google_sheets():
|
|
22 |
client = gspread.authorize(creds)
|
23 |
|
24 |
SPREADSHEET_ID = "1e_cNMhwF-QYpyYUpqQh-XCw-OdhWS6EuYsoBUsVtdNg"
|
25 |
-
sheet_names = ["datatarget", "datacuti", "dataabsen", "datalembur"
|
26 |
|
27 |
all_data = []
|
28 |
spreadsheet = client.open_by_key(SPREADSHEET_ID)
|
@@ -45,9 +44,31 @@ def read_google_sheets():
|
|
45 |
except Exception as e:
|
46 |
return f"❌ ERROR: {str(e)}"
|
47 |
|
48 |
-
#
|
49 |
-
# 2️⃣
|
50 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
def initialize_llama_model():
|
52 |
model_path = hf_hub_download(
|
53 |
repo_id="TheBLoke/zephyr-7b-beta-GGUF",
|
@@ -56,20 +77,23 @@ def initialize_llama_model():
|
|
56 |
)
|
57 |
return model_path
|
58 |
|
59 |
-
#
|
60 |
-
#
|
61 |
-
#
|
62 |
def initialize_settings(model_path):
|
63 |
Settings.llm = LlamaCPP(model_path=model_path, temperature=0.7)
|
64 |
|
65 |
-
#
|
66 |
-
#
|
67 |
-
#
|
68 |
def initialize_index():
|
69 |
text_data = read_google_sheets()
|
70 |
document = Document(text=text_data)
|
71 |
-
|
72 |
-
|
|
|
|
|
|
|
73 |
|
74 |
embedding = HuggingFaceEmbedding("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
|
75 |
Settings.embed_model = embedding
|
@@ -77,26 +101,17 @@ def initialize_index():
|
|
77 |
index = VectorStoreIndex(nodes)
|
78 |
return index
|
79 |
|
|
|
|
|
|
|
80 |
def initialize_chat_engine(index):
|
81 |
retriever = index.as_retriever(similarity_top_k=3)
|
82 |
-
chat_engine = CondensePlusContextChatEngine.from_defaults(
|
83 |
-
retriever=retriever,
|
84 |
-
verbose=False # ❌ Hapus verbose agar tidak ada referensi dokumen
|
85 |
-
)
|
86 |
return chat_engine
|
87 |
|
88 |
-
#
|
89 |
-
#
|
90 |
-
#
|
91 |
-
def clean_response(response):
|
92 |
-
text = "".join(response.response_gen) # Gabungkan teks yang dihasilkan
|
93 |
-
text = text.replace("\n\n", "\n").strip() # Hilangkan newline berlebihan
|
94 |
-
text = text.replace("user:", "").replace("jawaban:", "").replace("assistant:", "").strip()
|
95 |
-
return text
|
96 |
-
|
97 |
-
# ===================================
|
98 |
-
# 6️⃣ Fungsi untuk Menghasilkan Respons Chatbot
|
99 |
-
# ===================================
|
100 |
def generate_response(message, history, chat_engine):
|
101 |
if history is None:
|
102 |
history = []
|
@@ -119,24 +134,20 @@ def generate_response(message, history, chat_engine):
|
|
119 |
history.append((message, cleaned_text)) # 🔹 Pastikan hanya teks yang masuk ke history
|
120 |
return cleaned_text
|
121 |
|
122 |
-
#
|
123 |
-
#
|
124 |
-
#
|
125 |
def main():
|
126 |
model_path = initialize_llama_model()
|
127 |
initialize_settings(model_path)
|
128 |
-
|
129 |
index = initialize_index()
|
130 |
chat_engine = initialize_chat_engine(index)
|
131 |
-
|
132 |
def chatbot_response(message, history):
|
133 |
return generate_response(message, history, chat_engine)
|
134 |
-
|
135 |
-
gr.Interface(
|
136 |
-
fn=chatbot_response,
|
137 |
-
inputs=["text"],
|
138 |
-
outputs=["text"],
|
139 |
-
).launch()
|
140 |
|
141 |
if __name__ == "__main__":
|
142 |
main()
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import gspread
|
3 |
from oauth2client.service_account import ServiceAccountCredentials
|
4 |
from llama_cpp import Llama
|
5 |
+
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, Settings
|
6 |
from llama_index.core.node_parser import SentenceSplitter
|
7 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
8 |
from llama_index.llms.llama_cpp import LlamaCPP
|
|
|
11 |
from llama_index.core.chat_engine.condense_plus_context import CondensePlusContextChatEngine
|
12 |
from llama_index.core.schema import Document
|
13 |
|
14 |
+
# ==============================
|
15 |
+
# 1️⃣ Fungsi Membaca Data Google Spreadsheet (Cuti, Lembur, Absen, Target)
|
16 |
+
# ==============================
|
17 |
def read_google_sheets():
|
18 |
try:
|
19 |
scope = ["https://www.googleapis.com/auth/spreadsheets", "https://www.googleapis.com/auth/drive"]
|
|
|
21 |
client = gspread.authorize(creds)
|
22 |
|
23 |
SPREADSHEET_ID = "1e_cNMhwF-QYpyYUpqQh-XCw-OdhWS6EuYsoBUsVtdNg"
|
24 |
+
sheet_names = ["datatarget", "datacuti", "dataabsen", "datalembur"]
|
25 |
|
26 |
all_data = []
|
27 |
spreadsheet = client.open_by_key(SPREADSHEET_ID)
|
|
|
44 |
except Exception as e:
|
45 |
return f"❌ ERROR: {str(e)}"
|
46 |
|
47 |
+
# ==============================
|
48 |
+
# 2️⃣ Fungsi Membaca Data dari File TXT (PKB dan Dokumen Lainnya)
|
49 |
+
# ==============================
|
50 |
+
def read_txt_documents():
|
51 |
+
documents = SimpleDirectoryReader(input_files=[
|
52 |
+
"bahandokumen/K3.txt", "bahandokumen/bonus.txt", "bahandokumen/absensi.txt",
|
53 |
+
"bahandokumen/cuti.txt", "bahandokumen/disiplinkerja.txt", "bahandokumen/fasilitas&bantuan.txt",
|
54 |
+
"bahandokumen/fasilitaskerja.txt", "bahandokumen/hak.txt", "bahandokumen/hubunganpengusaha&serikat.txt",
|
55 |
+
"bahandokumen/istilah.txt", "bahandokumen/jaminanserikat.txt", "bahandokumen/jamkes.txt",
|
56 |
+
"bahandokumen/jamsos.txt", "bahandokumen/keluhkesah.txt", "bahandokumen/kenaikanupah.txt",
|
57 |
+
"bahandokumen/kewajiban.txt", "bahandokumen/kompensasi.txt", "bahandokumen/larangan.txt",
|
58 |
+
"bahandokumen/lembur.txt", "bahandokumen/luaskesepakatan.txt", "bahandokumen/mogok.txt",
|
59 |
+
"bahandokumen/pelanggaran&sanksi.txt", "bahandokumen/pendidikan.txt", "bahandokumen/pengangkatan.txt",
|
60 |
+
"bahandokumen/penilaian&promosi.txt", "bahandokumen/pensiun.txt", "bahandokumen/perjadin.txt",
|
61 |
+
"bahandokumen/pesangon.txt", "bahandokumen/phk.txt", "bahandokumen/pihak.txt", "bahandokumen/pkb.txt",
|
62 |
+
"bahandokumen/resign.txt", "bahandokumen/sanksi.txt", "bahandokumen/shift.txt", "bahandokumen/syaratkerja.txt",
|
63 |
+
"bahandokumen/sisacuti.txt", "bahandokumen/target.txt", "bahandokumen/tatacara.txt", "bahandokumen/tka.txt",
|
64 |
+
"bahandokumen/tunjangan.txt", "bahandokumen/uangpisah.txt", "bahandokumen/upah.txt", "bahandokumen/upahlembur.txt",
|
65 |
+
"bahandokumen/waktukerja.txt"
|
66 |
+
]).load_data()
|
67 |
+
return documents
|
68 |
+
|
69 |
+
# ==============================
|
70 |
+
# 3️⃣ Inisialisasi Model Llama
|
71 |
+
# ==============================
|
72 |
def initialize_llama_model():
|
73 |
model_path = hf_hub_download(
|
74 |
repo_id="TheBLoke/zephyr-7b-beta-GGUF",
|
|
|
77 |
)
|
78 |
return model_path
|
79 |
|
80 |
+
# ==============================
|
81 |
+
# 4️⃣ Inisialisasi Pengaturan Model
|
82 |
+
# ==============================
|
83 |
def initialize_settings(model_path):
|
84 |
Settings.llm = LlamaCPP(model_path=model_path, temperature=0.7)
|
85 |
|
86 |
+
# ==============================
|
87 |
+
# 5️⃣ Inisialisasi Index
|
88 |
+
# ==============================
|
89 |
def initialize_index():
|
90 |
text_data = read_google_sheets()
|
91 |
document = Document(text=text_data)
|
92 |
+
txt_documents = read_txt_documents()
|
93 |
+
|
94 |
+
all_documents = [document] + txt_documents # Gabungkan dokumen CSV dan TXT
|
95 |
+
parser = SentenceSplitter(chunk_size=150, chunk_overlap=10)
|
96 |
+
nodes = parser.get_nodes_from_documents(all_documents)
|
97 |
|
98 |
embedding = HuggingFaceEmbedding("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
|
99 |
Settings.embed_model = embedding
|
|
|
101 |
index = VectorStoreIndex(nodes)
|
102 |
return index
|
103 |
|
104 |
+
# ==============================
|
105 |
+
# 6️⃣ Inisialisasi Chat Engine
|
106 |
+
# ==============================
|
107 |
def initialize_chat_engine(index):
|
108 |
retriever = index.as_retriever(similarity_top_k=3)
|
109 |
+
chat_engine = CondensePlusContextChatEngine.from_defaults(retriever=retriever, verbose=False)
|
|
|
|
|
|
|
110 |
return chat_engine
|
111 |
|
112 |
+
# ==============================
|
113 |
+
# 7️⃣ Fungsi untuk Menjawab Chat
|
114 |
+
# ==============================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
def generate_response(message, history, chat_engine):
|
116 |
if history is None:
|
117 |
history = []
|
|
|
134 |
history.append((message, cleaned_text)) # 🔹 Pastikan hanya teks yang masuk ke history
|
135 |
return cleaned_text
|
136 |
|
137 |
+
# ==============================
|
138 |
+
# 8️⃣ Fungsi Utama Menjalankan Aplikasi
|
139 |
+
# ==============================
|
140 |
def main():
|
141 |
model_path = initialize_llama_model()
|
142 |
initialize_settings(model_path)
|
143 |
+
|
144 |
index = initialize_index()
|
145 |
chat_engine = initialize_chat_engine(index)
|
146 |
+
|
147 |
def chatbot_response(message, history):
|
148 |
return generate_response(message, history, chat_engine)
|
149 |
+
|
150 |
+
gr.Interface(fn=chatbot_response, inputs="text", outputs="text").launch()
|
|
|
|
|
|
|
|
|
151 |
|
152 |
if __name__ == "__main__":
|
153 |
main()
|