Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
import gradio as gr
|
2 |
import gspread
|
3 |
-
import re
|
4 |
from oauth2client.service_account import ServiceAccountCredentials
|
5 |
from llama_cpp import Llama
|
6 |
from llama_index.core import VectorStoreIndex, Settings
|
@@ -15,72 +14,60 @@ from llama_index.core.schema import Document
|
|
15 |
# ===================================
|
16 |
# 1️⃣ Fungsi Membaca Data Google Spreadsheet
|
17 |
# ===================================
|
18 |
-
def read_google_sheets(
|
19 |
try:
|
20 |
scope = ["https://www.googleapis.com/auth/spreadsheets", "https://www.googleapis.com/auth/drive"]
|
21 |
creds = ServiceAccountCredentials.from_json_keyfile_name("credentials.json", scope)
|
22 |
client = gspread.authorize(creds)
|
23 |
-
|
24 |
SPREADSHEET_ID = "1e_cNMhwF-QYpyYUpqQh-XCw-OdhWS6EuYsoBUsVtdNg"
|
|
|
|
|
|
|
25 |
spreadsheet = client.open_by_key(SPREADSHEET_ID)
|
26 |
-
sheet = spreadsheet.worksheet(sheet_name)
|
27 |
-
data = sheet.get_all_values()
|
28 |
-
|
29 |
-
if sheet_name == "pkb" and query:
|
30 |
-
filtered_data = []
|
31 |
-
for row in data[1:]:
|
32 |
-
bab, pasal, judul, isi = row
|
33 |
-
if query.lower() in judul.lower() or query.lower() in isi.lower():
|
34 |
-
filtered_data.append(f"Bab {bab}, Pasal {pasal}: {judul}\n{isi}\n")
|
35 |
-
return "\n".join(filtered_data) if filtered_data else "Maaf, tidak ditemukan pasal terkait."
|
36 |
|
37 |
-
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
except Exception as e:
|
41 |
return f"❌ ERROR: {str(e)}"
|
42 |
|
43 |
# ===================================
|
44 |
-
# 2️⃣
|
45 |
-
# ===================================
|
46 |
-
def get_relevant_sheet(question):
|
47 |
-
question = question.lower()
|
48 |
-
if re.search(r"(jam kerja|aturan perusahaan|kerja lembur|hak|kewajiban|sanksi|upah|phk|jaminan sosial|disiplin kerja|larangan|upah|sanksi|pesangon|peraturan|sistem kerja)", question):
|
49 |
-
return "pkb"
|
50 |
-
if re.search(r"(target|aktual|selisih).*?(mesin|bulan)", question):
|
51 |
-
return "datatarget"
|
52 |
-
if re.search(r"(sisa cuti|cuti terpakai|cuti.*(nama karyawan)?)", question):
|
53 |
-
return "datacuti"
|
54 |
-
if re.search(r"(lembur|lembur bulan ini.*(nama karyawan)?)", question):
|
55 |
-
return "datalembur"
|
56 |
-
if re.search(r"(kehadiran|hadir|sakit|izin|absen.*(nama karyawan)?)", question):
|
57 |
-
return "dataabsen"
|
58 |
-
return None
|
59 |
-
|
60 |
-
# ===================================
|
61 |
-
# 3️⃣ Inisialisasi Model Llama
|
62 |
# ===================================
|
63 |
def initialize_llama_model():
|
64 |
model_path = hf_hub_download(
|
65 |
-
repo_id="
|
66 |
filename="zephyr-7b-beta.Q4_K_M.gguf",
|
67 |
cache_dir="./models"
|
68 |
)
|
69 |
return model_path
|
70 |
|
71 |
# ===================================
|
72 |
-
#
|
73 |
# ===================================
|
74 |
def initialize_settings(model_path):
|
75 |
Settings.llm = LlamaCPP(model_path=model_path, temperature=0.7)
|
76 |
|
77 |
# ===================================
|
78 |
-
#
|
79 |
# ===================================
|
80 |
def initialize_index():
|
81 |
-
text_data = read_google_sheets(
|
82 |
document = Document(text=text_data)
|
83 |
-
parser = SentenceSplitter(chunk_size=
|
84 |
nodes = parser.get_nodes_from_documents([document])
|
85 |
|
86 |
embedding = HuggingFaceEmbedding("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
|
@@ -93,21 +80,21 @@ def initialize_chat_engine(index):
|
|
93 |
retriever = index.as_retriever(similarity_top_k=1)
|
94 |
chat_engine = CondensePlusContextChatEngine.from_defaults(
|
95 |
retriever=retriever,
|
96 |
-
verbose=False
|
97 |
)
|
98 |
return chat_engine
|
99 |
|
100 |
# ===================================
|
101 |
-
#
|
102 |
# ===================================
|
103 |
def clean_response(response):
|
104 |
text = "".join(response.response_gen) # Gabungkan teks yang dihasilkan
|
105 |
text = text.replace("\n\n", "\n").strip() # Hilangkan newline berlebihan
|
106 |
-
text = text.
|
107 |
return text
|
108 |
|
109 |
# ===================================
|
110 |
-
#
|
111 |
# ===================================
|
112 |
def generate_response(message, history, chat_engine):
|
113 |
if history is None:
|
@@ -132,18 +119,18 @@ def generate_response(message, history, chat_engine):
|
|
132 |
return cleaned_text
|
133 |
|
134 |
# ===================================
|
135 |
-
#
|
136 |
# ===================================
|
137 |
def main():
|
138 |
model_path = initialize_llama_model()
|
139 |
initialize_settings(model_path)
|
140 |
-
|
141 |
index = initialize_index()
|
142 |
chat_engine = initialize_chat_engine(index)
|
143 |
-
|
144 |
def chatbot_response(message, history):
|
145 |
return generate_response(message, history, chat_engine)
|
146 |
-
|
147 |
gr.Interface(
|
148 |
fn=chatbot_response,
|
149 |
inputs=["text"],
|
|
|
1 |
import gradio as gr
|
2 |
import gspread
|
|
|
3 |
from oauth2client.service_account import ServiceAccountCredentials
|
4 |
from llama_cpp import Llama
|
5 |
from llama_index.core import VectorStoreIndex, Settings
|
|
|
14 |
# ===================================
|
15 |
# 1️⃣ Fungsi Membaca Data Google Spreadsheet
|
16 |
# ===================================
|
17 |
+
def read_google_sheets():
|
18 |
try:
|
19 |
scope = ["https://www.googleapis.com/auth/spreadsheets", "https://www.googleapis.com/auth/drive"]
|
20 |
creds = ServiceAccountCredentials.from_json_keyfile_name("credentials.json", scope)
|
21 |
client = gspread.authorize(creds)
|
22 |
+
|
23 |
SPREADSHEET_ID = "1e_cNMhwF-QYpyYUpqQh-XCw-OdhWS6EuYsoBUsVtdNg"
|
24 |
+
sheet_names = ["datatarget", "datacuti", "dataabsen", "datalembur","pkb"]
|
25 |
+
|
26 |
+
all_data = []
|
27 |
spreadsheet = client.open_by_key(SPREADSHEET_ID)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
+
for sheet_name in sheet_names:
|
30 |
+
try:
|
31 |
+
sheet = spreadsheet.worksheet(sheet_name)
|
32 |
+
data = sheet.get_all_values()
|
33 |
+
all_data.append(f"=== Data dari {sheet_name.upper()} ===")
|
34 |
+
all_data.extend([" | ".join(row) for row in data])
|
35 |
+
all_data.append("\n")
|
36 |
+
except gspread.exceptions.WorksheetNotFound:
|
37 |
+
all_data.append(f"❌ ERROR: Worksheet {sheet_name} tidak ditemukan.")
|
38 |
+
|
39 |
+
return "\n".join(all_data).strip()
|
40 |
+
|
41 |
+
except gspread.exceptions.SpreadsheetNotFound:
|
42 |
+
return "❌ ERROR: Spreadsheet tidak ditemukan!"
|
43 |
|
44 |
except Exception as e:
|
45 |
return f"❌ ERROR: {str(e)}"
|
46 |
|
47 |
# ===================================
|
48 |
+
# 2️⃣ Inisialisasi Model Llama
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
# ===================================
|
50 |
def initialize_llama_model():
|
51 |
model_path = hf_hub_download(
|
52 |
+
repo_id="TheBLoke/zephyr-7b-beta-GGUF",
|
53 |
filename="zephyr-7b-beta.Q4_K_M.gguf",
|
54 |
cache_dir="./models"
|
55 |
)
|
56 |
return model_path
|
57 |
|
58 |
# ===================================
|
59 |
+
# 3️⃣ Inisialisasi Pengaturan Model
|
60 |
# ===================================
|
61 |
def initialize_settings(model_path):
|
62 |
Settings.llm = LlamaCPP(model_path=model_path, temperature=0.7)
|
63 |
|
64 |
# ===================================
|
65 |
+
# 4️⃣ Inisialisasi Index & Chat Engine
|
66 |
# ===================================
|
67 |
def initialize_index():
|
68 |
+
text_data = read_google_sheets()
|
69 |
document = Document(text=text_data)
|
70 |
+
parser = SentenceSplitter(chunk_size=100, chunk_overlap=30)
|
71 |
nodes = parser.get_nodes_from_documents([document])
|
72 |
|
73 |
embedding = HuggingFaceEmbedding("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
|
|
|
80 |
retriever = index.as_retriever(similarity_top_k=1)
|
81 |
chat_engine = CondensePlusContextChatEngine.from_defaults(
|
82 |
retriever=retriever,
|
83 |
+
verbose=False # ❌ Hapus verbose agar tidak ada referensi dokumen
|
84 |
)
|
85 |
return chat_engine
|
86 |
|
87 |
# ===================================
|
88 |
+
# 5️⃣ Fungsi untuk Merapikan Jawaban Chatbot
|
89 |
# ===================================
|
90 |
def clean_response(response):
|
91 |
text = "".join(response.response_gen) # Gabungkan teks yang dihasilkan
|
92 |
text = text.replace("\n\n", "\n").strip() # Hilangkan newline berlebihan
|
93 |
+
text = text.replace("user:", "").replace("jawaban:", "").replace("assistant:", "").strip()
|
94 |
return text
|
95 |
|
96 |
# ===================================
|
97 |
+
# 6️⃣ Fungsi untuk Menghasilkan Respons Chatbot
|
98 |
# ===================================
|
99 |
def generate_response(message, history, chat_engine):
|
100 |
if history is None:
|
|
|
119 |
return cleaned_text
|
120 |
|
121 |
# ===================================
|
122 |
+
# 7️⃣ Fungsi Utama untuk Menjalankan Aplikasi
|
123 |
# ===================================
|
124 |
def main():
|
125 |
model_path = initialize_llama_model()
|
126 |
initialize_settings(model_path)
|
127 |
+
|
128 |
index = initialize_index()
|
129 |
chat_engine = initialize_chat_engine(index)
|
130 |
+
|
131 |
def chatbot_response(message, history):
|
132 |
return generate_response(message, history, chat_engine)
|
133 |
+
|
134 |
gr.Interface(
|
135 |
fn=chatbot_response,
|
136 |
inputs=["text"],
|