Spaces:
Sleeping
Sleeping
emar
commited on
Commit
•
94f0475
1
Parent(s):
3d9b1a5
updated stuff switched to a smaller model
Browse files- app.py +58 -40
- requirements.txt +11 -3
- storage/default__vector_store.json +1 -1
- storage/docstore.json +1 -1
- storage/index_store.json +1 -1
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
-
import
|
4 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
5 |
from llama_index.core import (
|
6 |
VectorStoreIndex,
|
@@ -8,59 +8,77 @@ from llama_index.core import (
|
|
8 |
StorageContext,
|
9 |
load_index_from_storage, Settings,
|
10 |
)
|
11 |
-
from
|
|
|
12 |
|
|
|
13 |
hf_token = os.getenv('HF_TOKEN')
|
14 |
|
15 |
-
# Configure the settings
|
16 |
-
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")
|
17 |
-
|
18 |
-
# Load the Llama 3 model and tokenizer
|
19 |
-
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
|
20 |
-
tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=hf_token)
|
21 |
-
model = AutoModelForCausalLM.from_pretrained(model_id, use_auth_token=hf_token, torch_dtype=torch.float16)
|
22 |
|
|
|
|
|
|
|
23 |
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
device=0 if torch.cuda.is_available() else -1,
|
30 |
)
|
31 |
|
32 |
-
#
|
33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
-
|
36 |
-
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
query_engine = index.as_query_engine()
|
40 |
|
41 |
-
def chatbot_response(user_input):
|
42 |
-
# Retrieve context from the vector store
|
43 |
-
context = query_engine.query(user_input)
|
44 |
-
context_str = str(context)
|
45 |
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
-
# Generate a response using the Llama 3 pipeline
|
50 |
-
outputs = llama3_pipeline(
|
51 |
-
combined_input,
|
52 |
-
max_new_tokens=256,
|
53 |
-
eos_token_id=tokenizer.eos_token_id,
|
54 |
-
do_sample=True,
|
55 |
-
temperature=0.6,
|
56 |
-
top_p=0.9,
|
57 |
-
)
|
58 |
-
assistant_response = outputs[0]["generated_text"].split("Assistant:")[1].strip()
|
59 |
-
return assistant_response
|
60 |
|
61 |
-
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
# Launch the interface
|
65 |
if __name__ == "__main__":
|
66 |
-
|
|
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
+
from dotenv import load_dotenv
|
4 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
5 |
from llama_index.core import (
|
6 |
VectorStoreIndex,
|
|
|
8 |
StorageContext,
|
9 |
load_index_from_storage, Settings,
|
10 |
)
|
11 |
+
from llama_index.llms.huggingface import HuggingFaceLLM
|
12 |
+
from transformers import BitsAndBytesConfig
|
13 |
|
14 |
+
load_dotenv()
|
15 |
hf_token = os.getenv('HF_TOKEN')
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
+
# Path to your local corpus directory
|
19 |
+
PERSIST_DIR = './storage'
|
20 |
+
corpus_directory = 'articles'
|
21 |
|
22 |
+
quantization_config = BitsAndBytesConfig(
|
23 |
+
load_in_4bit=True,
|
24 |
+
# bnb_4bit_compute_dtype=torch.float16,
|
25 |
+
# bnb_4bit_quant_type="nf4",
|
26 |
+
# bnb_4bit_use_double_quant=True,
|
|
|
27 |
)
|
28 |
|
29 |
+
# Configure the settings
|
30 |
+
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")
|
31 |
+
|
32 |
+
Settings.llm = HuggingFaceLLM(
|
33 |
+
model_name="microsoft/Phi-3-small-8k-instruct",
|
34 |
+
tokenizer_name="microsoft/Phi-3-small-8k-instruct",
|
35 |
+
context_window=3900,
|
36 |
+
max_new_tokens=500,
|
37 |
+
model_kwargs={"quantization_config": quantization_config},
|
38 |
+
generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
|
39 |
+
# messages_to_prompt=messages_to_prompt,
|
40 |
+
# completion_to_prompt=completion_to_prompt,
|
41 |
+
device_map="auto",
|
42 |
+
)
|
43 |
|
44 |
+
if not os.path.exists(PERSIST_DIR):
|
45 |
+
# load the documents and create the index
|
46 |
+
documents = SimpleDirectoryReader(corpus_directory).load_data()
|
47 |
+
index = VectorStoreIndex.from_documents(documents)
|
48 |
+
# store it for later
|
49 |
+
index.storage_context.persist(persist_dir=PERSIST_DIR)
|
50 |
+
else:
|
51 |
+
# load the existing index
|
52 |
+
storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
|
53 |
+
index = load_index_from_storage(storage_context)
|
54 |
|
55 |
query_engine = index.as_query_engine()
|
56 |
|
|
|
|
|
|
|
|
|
57 |
|
58 |
+
# def chat():
|
59 |
+
# print("Chatbot is ready. Type 'exit' to end the conversation.")
|
60 |
+
# while True:
|
61 |
+
# user_input = input("You: ")
|
62 |
+
# if user_input.lower() == 'exit':
|
63 |
+
# print("Ending the chat. Goodbye!")
|
64 |
+
# break
|
65 |
+
# response = query_engine.query(user_input)
|
66 |
+
# print(f"Chatbot: {response}")
|
67 |
+
|
68 |
+
def chatbot_response(message, history):
|
69 |
+
response = query_engine.query(message)
|
70 |
+
return str(response)
|
71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
|
73 |
+
iface = gr.ChatInterface(
|
74 |
+
fn=chatbot_response,
|
75 |
+
title="UESP Lore Chatbot",
|
76 |
+
description="Ask questions about The Elder Scrolls lore!",
|
77 |
+
examples=["Who is Vivec?", "Tell me about the Oblivion Crisis", "Who is King Edward?"],
|
78 |
+
cache_examples=True,
|
79 |
+
)
|
80 |
|
81 |
# Launch the interface
|
82 |
if __name__ == "__main__":
|
83 |
+
# chat()
|
84 |
+
iface.launch()
|
requirements.txt
CHANGED
@@ -1,6 +1,14 @@
|
|
|
|
1 |
llama_index
|
2 |
-
llama-index-llms-ollama
|
3 |
llama-index-embeddings-huggingface
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
gradio
|
5 |
-
|
6 |
-
|
|
|
1 |
+
python-dotenv~=1.0.1
|
2 |
llama_index
|
|
|
3 |
llama-index-embeddings-huggingface
|
4 |
+
llama_index-llms-huggingface
|
5 |
+
torch~=2.3.1
|
6 |
+
transformers~=4.41.2
|
7 |
+
numpy~=1.23.2
|
8 |
+
openai~=1.35.3
|
9 |
+
scikit-learn~=1.5.0
|
10 |
+
requests~=2.32.3
|
11 |
+
protobuf==3.20.1
|
12 |
gradio
|
13 |
+
bitsandbytes
|
14 |
+
flash-attn==0.2.4
|
storage/default__vector_store.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 200618254
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b7f240d19ce80f9b2cdc3fe4819dbf024b823cfdccf53af1ad904d9a629e66e
|
3 |
size 200618254
|
storage/docstore.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 55587608
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:135f2765537408ee1299e62888e82f2f07d42a00e8dc97dadd089ae46c17f64f
|
3 |
size 55587608
|
storage/index_store.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 958771
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:407c0a3877b2d21c624d4aadcce49644edb452fe2b917c39cdce9a630e5ef0d5
|
3 |
size 958771
|