emar commited on
Commit
94f0475
1 Parent(s): 3d9b1a5

updated stuff switched to a smaller model

Browse files
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import os
2
  import gradio as gr
3
- import torch
4
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
5
  from llama_index.core import (
6
  VectorStoreIndex,
@@ -8,59 +8,77 @@ from llama_index.core import (
8
  StorageContext,
9
  load_index_from_storage, Settings,
10
  )
11
- from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
 
12
 
 
13
  hf_token = os.getenv('HF_TOKEN')
14
 
15
- # Configure the settings
16
- Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")
17
-
18
- # Load the Llama 3 model and tokenizer
19
- model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
20
- tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=hf_token)
21
- model = AutoModelForCausalLM.from_pretrained(model_id, use_auth_token=hf_token, torch_dtype=torch.float16)
22
 
 
 
 
23
 
24
- # Initialize the Llama 3 pipeline
25
- llama3_pipeline = pipeline(
26
- "text-generation",
27
- model=model,
28
- tokenizer=tokenizer,
29
- device=0 if torch.cuda.is_available() else -1,
30
  )
31
 
32
- # Path to your local corpus directory
33
- PERSIST_DIR = './storage'
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
- # Load the existing index
36
- storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
37
- index = load_index_from_storage(storage_context)
 
 
 
 
 
 
 
38
 
39
  query_engine = index.as_query_engine()
40
 
41
- def chatbot_response(user_input):
42
- # Retrieve context from the vector store
43
- context = query_engine.query(user_input)
44
- context_str = str(context)
45
 
46
- # Combine user input with retrieved context
47
- combined_input = f"{context_str}\n\nUser: {user_input}\nAssistant:"
 
 
 
 
 
 
 
 
 
 
 
48
 
49
- # Generate a response using the Llama 3 pipeline
50
- outputs = llama3_pipeline(
51
- combined_input,
52
- max_new_tokens=256,
53
- eos_token_id=tokenizer.eos_token_id,
54
- do_sample=True,
55
- temperature=0.6,
56
- top_p=0.9,
57
- )
58
- assistant_response = outputs[0]["generated_text"].split("Assistant:")[1].strip()
59
- return assistant_response
60
 
61
- # Create a Gradio interface
62
- interface = gr.Interface(fn=chatbot_response, inputs="text", outputs="text", title="Elder Scrolls Lore Chatbox - This one sucks more than the local and chatgpt one because of the way the data is passed.")
 
 
 
 
 
63
 
64
  # Launch the interface
65
  if __name__ == "__main__":
66
- interface.launch()
 
 
1
  import os
2
  import gradio as gr
3
+ from dotenv import load_dotenv
4
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
5
  from llama_index.core import (
6
  VectorStoreIndex,
 
8
  StorageContext,
9
  load_index_from_storage, Settings,
10
  )
11
+ from llama_index.llms.huggingface import HuggingFaceLLM
12
+ from transformers import BitsAndBytesConfig
13
 
14
+ load_dotenv()
15
  hf_token = os.getenv('HF_TOKEN')
16
 
 
 
 
 
 
 
 
17
 
18
+ # Path to your local corpus directory
19
+ PERSIST_DIR = './storage'
20
+ corpus_directory = 'articles'
21
 
22
+ quantization_config = BitsAndBytesConfig(
23
+ load_in_4bit=True,
24
+ # bnb_4bit_compute_dtype=torch.float16,
25
+ # bnb_4bit_quant_type="nf4",
26
+ # bnb_4bit_use_double_quant=True,
 
27
  )
28
 
29
+ # Configure the settings
30
+ Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")
31
+
32
+ Settings.llm = HuggingFaceLLM(
33
+ model_name="microsoft/Phi-3-small-8k-instruct",
34
+ tokenizer_name="microsoft/Phi-3-small-8k-instruct",
35
+ context_window=3900,
36
+ max_new_tokens=500,
37
+ model_kwargs={"quantization_config": quantization_config},
38
+ generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
39
+ # messages_to_prompt=messages_to_prompt,
40
+ # completion_to_prompt=completion_to_prompt,
41
+ device_map="auto",
42
+ )
43
 
44
+ if not os.path.exists(PERSIST_DIR):
45
+ # load the documents and create the index
46
+ documents = SimpleDirectoryReader(corpus_directory).load_data()
47
+ index = VectorStoreIndex.from_documents(documents)
48
+ # store it for later
49
+ index.storage_context.persist(persist_dir=PERSIST_DIR)
50
+ else:
51
+ # load the existing index
52
+ storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
53
+ index = load_index_from_storage(storage_context)
54
 
55
  query_engine = index.as_query_engine()
56
 
 
 
 
 
57
 
58
+ # def chat():
59
+ # print("Chatbot is ready. Type 'exit' to end the conversation.")
60
+ # while True:
61
+ # user_input = input("You: ")
62
+ # if user_input.lower() == 'exit':
63
+ # print("Ending the chat. Goodbye!")
64
+ # break
65
+ # response = query_engine.query(user_input)
66
+ # print(f"Chatbot: {response}")
67
+
68
+ def chatbot_response(message, history):
69
+ response = query_engine.query(message)
70
+ return str(response)
71
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
+ iface = gr.ChatInterface(
74
+ fn=chatbot_response,
75
+ title="UESP Lore Chatbot",
76
+ description="Ask questions about The Elder Scrolls lore!",
77
+ examples=["Who is Vivec?", "Tell me about the Oblivion Crisis", "Who is King Edward?"],
78
+ cache_examples=True,
79
+ )
80
 
81
  # Launch the interface
82
  if __name__ == "__main__":
83
+ # chat()
84
+ iface.launch()
requirements.txt CHANGED
@@ -1,6 +1,14 @@
 
1
  llama_index
2
- llama-index-llms-ollama
3
  llama-index-embeddings-huggingface
 
 
 
 
 
 
 
 
4
  gradio
5
- torch
6
- transformers
 
1
+ python-dotenv~=1.0.1
2
  llama_index
 
3
  llama-index-embeddings-huggingface
4
+ llama_index-llms-huggingface
5
+ torch~=2.3.1
6
+ transformers~=4.41.2
7
+ numpy~=1.23.2
8
+ openai~=1.35.3
9
+ scikit-learn~=1.5.0
10
+ requests~=2.32.3
11
+ protobuf==3.20.1
12
  gradio
13
+ bitsandbytes
14
+ flash-attn==0.2.4
storage/default__vector_store.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0da22f26551b6605b28f9262e36c0952d1d907c2bc2170aa31ee6d923087650
3
  size 200618254
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b7f240d19ce80f9b2cdc3fe4819dbf024b823cfdccf53af1ad904d9a629e66e
3
  size 200618254
storage/docstore.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b99f747606c5b6661654f45b174819907ade179ea87d7b160e296a5ee93d28d9
3
  size 55587608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:135f2765537408ee1299e62888e82f2f07d42a00e8dc97dadd089ae46c17f64f
3
  size 55587608
storage/index_store.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47cb58780a732145dd0bf65a2f59d40e8c0efe79bee17dbae61b860edba853ac
3
  size 958771
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:407c0a3877b2d21c624d4aadcce49644edb452fe2b917c39cdce9a630e5ef0d5
3
  size 958771