BramLeo commited on
Commit
233e3ae
·
verified ·
1 Parent(s): 9223e62

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -55
app.py CHANGED
@@ -2,7 +2,7 @@ import gradio as gr
2
  import gspread
3
  from oauth2client.service_account import ServiceAccountCredentials
4
  from llama_cpp import Llama
5
- from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, Settings
6
  from llama_index.core.node_parser import SentenceSplitter
7
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
8
  from llama_index.llms.llama_cpp import LlamaCPP
@@ -11,9 +11,9 @@ from llama_index.core.llms import ChatMessage
11
  from llama_index.core.chat_engine.condense_plus_context import CondensePlusContextChatEngine
12
  from llama_index.core.schema import Document
13
 
14
- # ==============================
15
- # 1️⃣ Fungsi Membaca Data Google Spreadsheet (Cuti, Lembur, Absen, Target)
16
- # ==============================
17
  def read_google_sheets():
18
  try:
19
  scope = ["https://www.googleapis.com/auth/spreadsheets", "https://www.googleapis.com/auth/drive"]
@@ -21,7 +21,7 @@ def read_google_sheets():
21
  client = gspread.authorize(creds)
22
 
23
  SPREADSHEET_ID = "1e_cNMhwF-QYpyYUpqQh-XCw-OdhWS6EuYsoBUsVtdNg"
24
- sheet_names = ["datatarget", "datacuti", "dataabsen", "datalembur"]
25
 
26
  all_data = []
27
  spreadsheet = client.open_by_key(SPREADSHEET_ID)
@@ -44,31 +44,9 @@ def read_google_sheets():
44
  except Exception as e:
45
  return f"❌ ERROR: {str(e)}"
46
 
47
- # ==============================
48
- # 2️⃣ Fungsi Membaca Data dari File TXT (PKB dan Dokumen Lainnya)
49
- # ==============================
50
- def read_txt_documents():
51
- documents = SimpleDirectoryReader(input_files=[
52
- "bahandokumen/K3.txt", "bahandokumen/bonus.txt", "bahandokumen/absensi.txt",
53
- "bahandokumen/cuti.txt", "bahandokumen/disiplinkerja.txt", "bahandokumen/fasilitas&bantuan.txt",
54
- "bahandokumen/fasilitaskerja.txt", "bahandokumen/hak.txt", "bahandokumen/hubunganpengusaha&serikat.txt",
55
- "bahandokumen/istilah.txt", "bahandokumen/jaminanserikat.txt", "bahandokumen/jamkes.txt",
56
- "bahandokumen/jamsos.txt", "bahandokumen/keluhkesah.txt", "bahandokumen/kenaikanupah.txt",
57
- "bahandokumen/kewajiban.txt", "bahandokumen/kompensasi.txt", "bahandokumen/larangan.txt",
58
- "bahandokumen/lembur.txt", "bahandokumen/luaskesepakatan.txt", "bahandokumen/mogok.txt",
59
- "bahandokumen/pelanggaran&sanksi.txt", "bahandokumen/pendidikan.txt", "bahandokumen/pengangkatan.txt",
60
- "bahandokumen/penilaian&promosi.txt", "bahandokumen/pensiun.txt", "bahandokumen/perjadin.txt",
61
- "bahandokumen/pesangon.txt", "bahandokumen/phk.txt", "bahandokumen/pihak.txt", "bahandokumen/pkb.txt",
62
- "bahandokumen/resign.txt", "bahandokumen/sanksi.txt", "bahandokumen/shift.txt", "bahandokumen/syaratkerja.txt",
63
- "bahandokumen/sisacuti.txt", "bahandokumen/target.txt", "bahandokumen/tatacara.txt", "bahandokumen/tka.txt",
64
- "bahandokumen/tunjangan.txt", "bahandokumen/uangpisah.txt", "bahandokumen/upah.txt", "bahandokumen/upahlembur.txt",
65
- "bahandokumen/waktukerja.txt"
66
- ]).load_data()
67
- return documents
68
-
69
- # ==============================
70
- # 3️⃣ Inisialisasi Model Llama
71
- # ==============================
72
  def initialize_llama_model():
73
  model_path = hf_hub_download(
74
  repo_id="TheBLoke/zephyr-7b-beta-GGUF",
@@ -77,23 +55,20 @@ def initialize_llama_model():
77
  )
78
  return model_path
79
 
80
- # ==============================
81
- # 4️⃣ Inisialisasi Pengaturan Model
82
- # ==============================
83
  def initialize_settings(model_path):
84
  Settings.llm = LlamaCPP(model_path=model_path, temperature=0.7)
85
 
86
- # ==============================
87
- # 5️⃣ Inisialisasi Index
88
- # ==============================
89
  def initialize_index():
90
  text_data = read_google_sheets()
91
  document = Document(text=text_data)
92
- txt_documents = read_txt_documents()
93
-
94
- all_documents = [document] + txt_documents # Gabungkan dokumen CSV dan TXT
95
- parser = SentenceSplitter(chunk_size=150, chunk_overlap=10)
96
- nodes = parser.get_nodes_from_documents(all_documents)
97
 
98
  embedding = HuggingFaceEmbedding("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
99
  Settings.embed_model = embedding
@@ -101,17 +76,26 @@ def initialize_index():
101
  index = VectorStoreIndex(nodes)
102
  return index
103
 
104
- # ==============================
105
- # 6️⃣ Inisialisasi Chat Engine
106
- # ==============================
107
  def initialize_chat_engine(index):
108
  retriever = index.as_retriever(similarity_top_k=3)
109
- chat_engine = CondensePlusContextChatEngine.from_defaults(retriever=retriever, verbose=False)
 
 
 
110
  return chat_engine
111
 
112
- # ==============================
113
- # 7️⃣ Fungsi untuk Menjawab Chat
114
- # ==============================
 
 
 
 
 
 
 
 
 
115
  def generate_response(message, history, chat_engine):
116
  if history is None:
117
  history = []
@@ -134,20 +118,24 @@ def generate_response(message, history, chat_engine):
134
  history.append((message, cleaned_text)) # 🔹 Pastikan hanya teks yang masuk ke history
135
  return cleaned_text
136
 
137
- # ==============================
138
- # 8️⃣ Fungsi Utama Menjalankan Aplikasi
139
- # ==============================
140
  def main():
141
  model_path = initialize_llama_model()
142
  initialize_settings(model_path)
143
-
144
  index = initialize_index()
145
  chat_engine = initialize_chat_engine(index)
146
-
147
  def chatbot_response(message, history):
148
  return generate_response(message, history, chat_engine)
149
-
150
- gr.Interface(fn=chatbot_response, inputs="text", outputs="text").launch()
 
 
 
 
151
 
152
  if __name__ == "__main__":
153
  main()
 
2
  import gspread
3
  from oauth2client.service_account import ServiceAccountCredentials
4
  from llama_cpp import Llama
5
+ from llama_index.core import VectorStoreIndex, Settings
6
  from llama_index.core.node_parser import SentenceSplitter
7
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
8
  from llama_index.llms.llama_cpp import LlamaCPP
 
11
  from llama_index.core.chat_engine.condense_plus_context import CondensePlusContextChatEngine
12
  from llama_index.core.schema import Document
13
 
14
+ # ===================================
15
+ # 1️⃣ Fungsi Membaca Data Google Spreadsheet
16
+ # ===================================
17
  def read_google_sheets():
18
  try:
19
  scope = ["https://www.googleapis.com/auth/spreadsheets", "https://www.googleapis.com/auth/drive"]
 
21
  client = gspread.authorize(creds)
22
 
23
  SPREADSHEET_ID = "1e_cNMhwF-QYpyYUpqQh-XCw-OdhWS6EuYsoBUsVtdNg"
24
+ sheet_names = ["datatarget", "datacuti", "dataabsen", "datalembur", "pkb"]
25
 
26
  all_data = []
27
  spreadsheet = client.open_by_key(SPREADSHEET_ID)
 
44
  except Exception as e:
45
  return f"❌ ERROR: {str(e)}"
46
 
47
+ # ===================================
48
+ # 2️⃣ Inisialisasi Model Llama
49
+ # ===================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  def initialize_llama_model():
51
  model_path = hf_hub_download(
52
  repo_id="TheBLoke/zephyr-7b-beta-GGUF",
 
55
  )
56
  return model_path
57
 
58
+ # ===================================
59
+ # 3️⃣ Inisialisasi Pengaturan Model
60
+ # ===================================
61
  def initialize_settings(model_path):
62
  Settings.llm = LlamaCPP(model_path=model_path, temperature=0.7)
63
 
64
+ # ===================================
65
+ # 4️⃣ Inisialisasi Index & Chat Engine
66
+ # ===================================
67
  def initialize_index():
68
  text_data = read_google_sheets()
69
  document = Document(text=text_data)
70
+ parser = SentenceSplitter(chunk_size=100, chunk_overlap=30)
71
+ nodes = parser.get_nodes_from_documents([document])
 
 
 
72
 
73
  embedding = HuggingFaceEmbedding("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
74
  Settings.embed_model = embedding
 
76
  index = VectorStoreIndex(nodes)
77
  return index
78
 
 
 
 
79
  def initialize_chat_engine(index):
80
  retriever = index.as_retriever(similarity_top_k=3)
81
+ chat_engine = CondensePlusContextChatEngine.from_defaults(
82
+ retriever=retriever,
83
+ verbose=False # ❌ Hapus verbose agar tidak ada referensi dokumen
84
+ )
85
  return chat_engine
86
 
87
+ # ===================================
88
+ # 5️⃣ Fungsi untuk Merapikan Jawaban Chatbot
89
+ # ===================================
90
+ def clean_response(response):
91
+ text = "".join(response.response_gen) # Gabungkan teks yang dihasilkan
92
+ text = text.replace("\n\n", "\n").strip() # Hilangkan newline berlebihan
93
+ text = text.replace("user:", "").replace("jawaban:", "").replace("assistant:", "").strip()
94
+ return text
95
+
96
+ # ===================================
97
+ # 6️⃣ Fungsi untuk Menghasilkan Respons Chatbot
98
+ # ===================================
99
  def generate_response(message, history, chat_engine):
100
  if history is None:
101
  history = []
 
118
  history.append((message, cleaned_text)) # 🔹 Pastikan hanya teks yang masuk ke history
119
  return cleaned_text
120
 
121
+ # ===================================
122
+ # 7️⃣ Fungsi Utama untuk Menjalankan Aplikasi
123
+ # ===================================
124
  def main():
125
  model_path = initialize_llama_model()
126
  initialize_settings(model_path)
127
+
128
  index = initialize_index()
129
  chat_engine = initialize_chat_engine(index)
130
+
131
  def chatbot_response(message, history):
132
  return generate_response(message, history, chat_engine)
133
+
134
+ gr.Interface(
135
+ fn=chatbot_response,
136
+ inputs=["text"],
137
+ outputs=["text"],
138
+ ).launch()
139
 
140
  if __name__ == "__main__":
141
  main()