Update app.py
Browse files
app.py
CHANGED
@@ -11,6 +11,32 @@ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
|
11 |
from huggingface_hub import hf_hub_download
|
12 |
from llama_index.core.node_parser import SentenceSplitter
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
# Fungsi untuk mengunduh model Llama
|
15 |
def initialize_llama_model():
|
16 |
# Unduh model jika belum ada di direktori kerja
|
@@ -23,9 +49,11 @@ def initialize_llama_model():
|
|
23 |
|
24 |
# Fungsi untuk mengatur konfigurasi Settings
|
25 |
def initialize_settings(model_path):
|
26 |
-
Settings.llm =
|
27 |
model_path=model_path,
|
28 |
-
|
|
|
|
|
29 |
)
|
30 |
|
31 |
# Fungsi untuk Menginisialisasi Index
|
@@ -87,10 +115,10 @@ def initialize_index():
|
|
87 |
def initialize_chat_engine(index):
|
88 |
from llama_index.core.prompts import PromptTemplate
|
89 |
from llama_index.core.chat_engine.condense_plus_context import CondensePlusContextChatEngine
|
90 |
-
retriever = index.as_retriever(similarity_top_k=
|
91 |
chat_engine = CondensePlusContextChatEngine.from_defaults(
|
92 |
retriever=retriever,
|
93 |
-
verbose=
|
94 |
)
|
95 |
return chat_engine
|
96 |
|
|
|
11 |
from huggingface_hub import hf_hub_download
|
12 |
from llama_index.core.node_parser import SentenceSplitter
|
13 |
|
14 |
+
# Fungsi untuk memasang ulang llama-cpp-python dengan dukungan CUDA
|
15 |
+
def install_llama_with_cuda():
|
16 |
+
try:
|
17 |
+
# Baca file requirements.txt
|
18 |
+
with open("requirements.txt", "r") as f:
|
19 |
+
packages = f.read().splitlines()
|
20 |
+
|
21 |
+
# Install setiap paket dengan CMAKE_ARGS untuk dukungan CUDA
|
22 |
+
for package in packages:
|
23 |
+
subprocess.run(
|
24 |
+
env={"CMAKE_ARGS": "-DGGML_CUDA=on"},
|
25 |
+
check=True
|
26 |
+
)
|
27 |
+
# Periksa apakah CUDA Toolkit tersedia
|
28 |
+
if not shutil.which("nvcc"):
|
29 |
+
print("CUDA Toolkit tidak ditemukan. Pastikan sudah diinstal.")
|
30 |
+
return
|
31 |
+
|
32 |
+
print("Memasang ulang llama-cpp-python dengan dukungan CUDA...")
|
33 |
+
|
34 |
+
print("llama-cpp-python berhasil diinstal ulang dengan dukungan CUDA.")
|
35 |
+
except subprocess.CalledProcessError as e:
|
36 |
+
print(f"Error saat menginstal ulang llama-cpp-python: {e}")
|
37 |
+
except Exception as e:
|
38 |
+
print(f"Kesalahan umum: {e}")
|
39 |
+
|
40 |
# Fungsi untuk mengunduh model Llama
|
41 |
def initialize_llama_model():
|
42 |
# Unduh model jika belum ada di direktori kerja
|
|
|
49 |
|
50 |
# Fungsi untuk mengatur konfigurasi Settings
|
51 |
def initialize_settings(model_path):
|
52 |
+
Settings.llm = Llama(
|
53 |
model_path=model_path,
|
54 |
+
n_gpu_layers=1, # Sesuaikan dengan kebutuhan perangkat Anda
|
55 |
+
temperature=0.7, # Sesuaikan untuk respons yang lebih cepat
|
56 |
+
top_p=0.9 # Mengurangi eksplorasi token
|
57 |
)
|
58 |
|
59 |
# Fungsi untuk Menginisialisasi Index
|
|
|
115 |
def initialize_chat_engine(index):
|
116 |
from llama_index.core.prompts import PromptTemplate
|
117 |
from llama_index.core.chat_engine.condense_plus_context import CondensePlusContextChatEngine
|
118 |
+
retriever = index.as_retriever(similarity_top_k=3)
|
119 |
chat_engine = CondensePlusContextChatEngine.from_defaults(
|
120 |
retriever=retriever,
|
121 |
+
verbose=True,
|
122 |
)
|
123 |
return chat_engine
|
124 |
|