Spaces:

BramLeo
/

adminzy

Paused

App Files Files Community

BramLeo commited on Feb 4

Commit

9cfd8ad

verified ·

1 Parent(s): 9b34bdd

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -15

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import gradio as gr
 import gdown
 import pandas as pd
 import os
 import threading
 import schedule
 import time
@@ -17,7 +18,7 @@ from llama_index.core.node_parser import SentenceSplitter
 # Fungsi untuk mengunduh model Llama
 def initialize_llama_model():
     model_path = hf_hub_download(
-        repo_id="TheBLoke/zephyr-7b-beta-GGUF",
         filename="zephyr-7b-beta.Q4_K_M.gguf",
         cache_dir="./models"
     )
@@ -32,25 +33,43 @@ def initialize_settings(model_path):
 # Fungsi untuk mengunduh file CSV terbaru dari Google Drive
 def download_csv_from_drive():
-    csv_url = "https://drive.google.com/uc?id=1UIx369_8GlzPiKArMVg8v-IwC6hYTYA0"  # Ganti dengan ID file Google Drive kamu
     output_csv = "bahandokumen/data.csv"
     if os.path.exists(output_csv):
         os.remove(output_csv)  # Hapus file lama
     print("🔄 Mengunduh file CSV terbaru...")
-    gdown.download(csv_url, output_csv, quiet=False)
     return output_csv
-# Fungsi untuk update index (dijalankan 1x sehari)
 def update_index():
     print(f"🔄 [{datetime.now()}] Mengupdate index dengan data terbaru...")
     csv_file = download_csv_from_drive()
     # Baca CSV dengan Pandas
     df = pd.read_csv(csv_file)
-    # Konversi isi CSV menjadi dokumen teks
     documents = [Document(text=" | ".join(map(str, row.values))) for _, row in df.iterrows()]
     # Tambahkan file dokumen lain
@@ -65,16 +84,12 @@ def update_index():
     ]).load_data()
     documents.extend(text_documents)
-    # Parsing dokumen menjadi nodes
     parser = SentenceSplitter(chunk_size=150, chunk_overlap=10)
     nodes = parser.get_nodes_from_documents(documents)
-    # Gunakan model embedding
     embedding = HuggingFaceEmbedding("firqaaa/indo-sentence-bert-base")
     Settings.embed_model = embedding
-    # Buat index vektor
     global index
     index = VectorStoreIndex(nodes)
     print("✅ Index berhasil diperbarui!")
@@ -85,7 +100,7 @@ def initialize_chat_engine():
 # Fungsi untuk menghasilkan respons chatbot
 def generate_response(message, history):
-    chat_engine = initialize_chat_engine()  # Gunakan index yang sudah diperbarui
     response = chat_engine.stream_chat(message)
     text = "".join(response.response_gen)
     history.append((message, text))
@@ -117,12 +132,12 @@ def main():
     initialize_settings(model_path)
     print("🔄 Inisialisasi index pertama kali...")
-    update_index()  # Buat index pertama kali
     print("⏳ Menjalankan scheduler update index (setiap jam 12 malam)...")
-    start_scheduler()  # Jalankan update otomatis di background
-    launch_gradio()  # Jalankan chatbot
 if __name__ == "__main__":
     main()

 import gdown
 import pandas as pd
 import os
+import shutil
 import threading
 import schedule
 import time
 # Fungsi untuk mengunduh model Llama
 def initialize_llama_model():
     model_path = hf_hub_download(
+        repo_id="TheBloke/zephyr-7b-beta-GGUF",
         filename="zephyr-7b-beta.Q4_K_M.gguf",
         cache_dir="./models"
     )
 # Fungsi untuk mengunduh file CSV terbaru dari Google Drive
 def download_csv_from_drive():
+    csv_url = "https://drive.google.com/uc?id=1UIx369_8GlzPiKArMVg8v-IwC6hYTYA0"  # Ganti dengan ID file terbaru
     output_csv = "bahandokumen/data.csv"
     if os.path.exists(output_csv):
         os.remove(output_csv)  # Hapus file lama
     print("🔄 Mengunduh file CSV terbaru...")
+    try:
+        gdown.download(csv_url, output_csv, quiet=False)
+        if os.path.exists(output_csv):
+            print(f"✅ File berhasil diunduh: {output_csv}")
+        else:
+            print("❌ Gagal mengunduh file. Cek kembali link Google Drive.")
+    except Exception as e:
+        print(f"❌ Terjadi kesalahan saat mengunduh file: {e}")
     return output_csv
+# Fungsi untuk menyimpan file ke Hugging Face Spaces
+def save_to_huggingface():
+    src = "bahandokumen/data.csv"
+    dst = "/home/user/app/bahandokumen/data.csv"  # Jalur sesuai dengan HF Spaces
+    if os.path.exists(src):
+        shutil.copy(src, dst)
+        print(f"✅ File {src} berhasil disalin ke {dst}")
+    else:
+        print("❌ File tidak ditemukan, pastikan berhasil diunduh.")
+# Fungsi untuk update index chatbot
 def update_index():
     print(f"🔄 [{datetime.now()}] Mengupdate index dengan data terbaru...")
     csv_file = download_csv_from_drive()
+    save_to_huggingface()
     # Baca CSV dengan Pandas
     df = pd.read_csv(csv_file)
     documents = [Document(text=" | ".join(map(str, row.values))) for _, row in df.iterrows()]
     # Tambahkan file dokumen lain
     ]).load_data()
     documents.extend(text_documents)
     parser = SentenceSplitter(chunk_size=150, chunk_overlap=10)
     nodes = parser.get_nodes_from_documents(documents)
     embedding = HuggingFaceEmbedding("firqaaa/indo-sentence-bert-base")
     Settings.embed_model = embedding
     global index
     index = VectorStoreIndex(nodes)
     print("✅ Index berhasil diperbarui!")
 # Fungsi untuk menghasilkan respons chatbot
 def generate_response(message, history):
+    chat_engine = initialize_chat_engine()
     response = chat_engine.stream_chat(message)
     text = "".join(response.response_gen)
     history.append((message, text))
     initialize_settings(model_path)
     print("🔄 Inisialisasi index pertama kali...")
+    update_index()
     print("⏳ Menjalankan scheduler update index (setiap jam 12 malam)...")
+    start_scheduler()
+    launch_gradio()
 if __name__ == "__main__":
     main()