File size: 2,597 Bytes
da93e12
f082418
5dd7727
6c79873
 
981a527
6c79873
39596d4
 
 
64b21dd
5dd7727
94f0475
3d9b1a5
5dd7727
 
 
5561bd8
d829f59
94f0475
6c79873
9ac9011
 
86518f0
 
 
6c79873
 
25a6760
6c79873
 
39596d4
86518f0
 
 
 
 
 
39596d4
86518f0
 
 
 
 
 
 
 
 
 
39596d4
86518f0
 
39596d4
86518f0
39596d4
86518f0
 
 
 
 
 
e0e7586
94f0475
b0752bd
64b21dd
b0752bd
 
3d9b1a5
64b21dd
eebc9a6
64b21dd
eebc9a6
64b21dd
 
eebc9a6
d829f59
b0752bd
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import spaces
import gradio as gr
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import (
    StorageContext,
    load_index_from_storage, Settings, PromptHelper
)
from llama_index.core.indices.vector_store import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SentenceTransformerRerank, SimilarityPostprocessor
from llama_index.llms.huggingface import HuggingFaceLLM
import torch
PERSIST_DIR = './storage'

# Configure the settings
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")


Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", device="cpu")

Settings.llm = HuggingFaceLLM(
    model_name="meta-llama/Meta-Llama-3-8B-Instruct",
    tokenizer_name="meta-llama/Meta-Llama-3-8B-Instruct",
    context_window=2048,
    max_new_tokens=256,
    generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
    device_map="auto",
)

storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
index = load_index_from_storage(storage_context)

# prompt_helper = PromptHelper(
#     context_window=4096,
#     num_output=512,
#     chunk_overlap_ratio=0.1,
#     chunk_size_limit=None
# )

# retriever = VectorIndexRetriever(
#     index=index,
#     similarity_top_k=5,
# )

# query_engine = RetrieverQueryEngine.from_args(
#     retriever,
#     node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.7)],
#     prompt_helper=prompt_helper
# )

rerank = SentenceTransformerRerank(
    model="BAAI/bge-reranker-large", top_n=5  # Note here
)
query_engine = index.as_query_engine(streaming=True, similarity_top_k=1, node_postprocessors=[rerank])


# def chatbot_response(message, history):
#     # Add a custom prompt template
#     prompt = f"Based on the Elder Scrolls lore, please answer the following question:\n\n{message}\n\nAnswer:"
#     response = query_engine.query(prompt)
#     return str(response)


@spaces.GPU
def chatbot_response(message, history):
    response = query_engine.query(message)
    return str(response)

iface = gr.ChatInterface(
    fn=chatbot_response,
    title="UESP Lore Chatbot: Running on top of Meta-Llama-3-8B-Instruct (currently) It works 'okay'",
    description="Github page for use case, general information, local installs, etc: https://github.com/emarron/UESP-lore",
    examples=["Who is Zaraphus?", "What is the relation between Vivec and Chim?", "What is the Lunar Lorkhan?"],
    cache_examples=True,
)

if __name__ == "__main__":
    iface.launch()