emar commited on
Commit
39596d4
1 Parent(s): 64b21dd

attempting some control

Browse files
Files changed (1) hide show
  1. app.py +30 -3
app.py CHANGED
@@ -5,6 +5,9 @@ from llama_index.core import (
5
  StorageContext,
6
  load_index_from_storage, Settings,
7
  )
 
 
 
8
  from llama_index.llms.huggingface import HuggingFaceLLM
9
  import torch
10
  PERSIST_DIR = './storage'
@@ -18,14 +21,38 @@ Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5",
18
  Settings.llm = HuggingFaceLLM(
19
  model_name="meta-llama/Meta-Llama-3-8B-Instruct",
20
  tokenizer_name="meta-llama/Meta-Llama-3-8B-Instruct",
21
- context_window=2048,
22
- max_new_tokens=256,
23
- generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
24
  device_map="auto",
25
  )
26
 
27
  storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
28
  index = load_index_from_storage(storage_context)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  query_engine = index.as_query_engine()
30
 
31
 
 
5
  StorageContext,
6
  load_index_from_storage, Settings,
7
  )
8
+ from llama_index.core.indices.vector_store import VectorIndexRetriever
9
+ from llama_index.core.query_engine import RetrieverQueryEngine
10
+ from llama_index.core.postprocessor import SentenceTransformerRerank, SimilarityPostprocessor
11
  from llama_index.llms.huggingface import HuggingFaceLLM
12
  import torch
13
  PERSIST_DIR = './storage'
 
21
  Settings.llm = HuggingFaceLLM(
22
  model_name="meta-llama/Meta-Llama-3-8B-Instruct",
23
  tokenizer_name="meta-llama/Meta-Llama-3-8B-Instruct",
24
+ context_window=4096,
25
+ max_new_tokens=512,
26
+ generate_kwargs={"temperature": 0.3, "top_k": 50, "top_p": 0.85},
27
  device_map="auto",
28
  )
29
 
30
  storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
31
  index = load_index_from_storage(storage_context)
32
+
33
+ prompt_helper = PromptHelper(
34
+ context_window=4096,
35
+ num_output=512,
36
+ chunk_overlap_ratio=0.1,
37
+ chunk_size_limit=None
38
+ )
39
+
40
+ retriever = VectorIndexRetriever(
41
+ index=index,
42
+ similarity_top_k=5,
43
+ )
44
+
45
+ query_engine = RetrieverQueryEngine.from_args(
46
+ retriever,
47
+ node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.7)],
48
+ prompt_helper=prompt_helper
49
+ )
50
+
51
+ def chatbot_response(message, history):
52
+ # Add a custom prompt template
53
+ prompt = f"Based on the Elder Scrolls lore, please answer the following question:\n\n{message}\n\nAnswer:"
54
+ response = query_engine.query(prompt)
55
+ return str(response)
56
  query_engine = index.as_query_engine()
57
 
58