File size: 3,344 Bytes
973e898
 
 
 
 
 
 
 
91bdf3d
973e898
035fd3a
a3a6d28
 
035fd3a
973e898
 
 
 
 
 
 
 
 
 
 
50a222c
fa47481
 
 
 
 
 
 
 
 
 
cc496ba
50a222c
 
 
 
cc496ba
973e898
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import os
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.chains import RetrievalQA
import gradio as gr

# local_llm = "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q5_K_S.gguf"

# Load model directly
# from transformers import AutoModel
# local_llm = AutoModel.from_pretrained("TheBloke/zephyr-7B-beta-GGUF")


config = {
    "max_new_token": 1024,
    "repetition_penalty": 1.1,
    "temperature": 0.1,
    "top_k": 50,
    "top_p": 0.9,
    "stream": True,
    "threads": int(os.cpu_count() / 2),
}


# local_llm = CTransformers(
#     model = "TheBloke/zephyr-7B-beta-GGUF",
#     model_file = "zephyr-7b-beta.Q4_0.gguf",
#     model_type="mistral",
#     lib="avx2", #for CPU use
#     **config
# )

from ctransformers import AutoModelForCausalLM

llm_init = AutoModelForCausalLM.from_pretrained("TheBloke/zephyr-7B-beta-GGUF")




# llm_init = CTransformers(model=local_llm, model_type="mistral", lib="avx2", **config)

prompt_template = """Use the following piece of information to answers the question asked by the user.
Don't try to make up the answer if you don't know the answer, simply say I don't know.

Context: {context}
Question: {question}

Only helpful answer below.
Helpful answer:
"""

model_name = "BAAI/bge-large-en"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": False}

embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs,
)

prompt = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

load_vector_store = Chroma(
    persist_directory="stores/dino_cosine", embedding_function=embeddings
)

retriever = load_vector_store.as_retriever(search_kwargs={"k": 1})

# query = "How many genera of dinosaurs currently known?"

# semantic_search = retriever.get_relevant_documents(query)

# chain_type_kwargs = {"prompt": prompt}

# qa = RetrievalQA.from_chain_type(
#     llm=llm_init,
#     chain_type="stuff",
#     retriever=retriever,
#     verbose=True,
#     chain_type_kwargs=chain_type_kwargs,
#     return_source_documents=True,
# )

sample_query = [
    "How many genera of dinosaurs currently known?",
    "What methods are used to account for the incompleteness of the fossil record?",
    "Were Dinosaurs in Decline Before the Cretaceous or Tertiary Boundary?",
]


def get_response(input):
    query = input
    chain_type_kwargs = {"prompt": prompt}
    qa = RetrievalQA.from_chain_type(
        llm=llm_init,
        chain_type="stuff",
        retriever=retriever,
        verbose=True,
        chain_type_kwargs=chain_type_kwargs,
        return_source_documents=True,
    )
    response = qa(query)
    return response


input = gr.Text(
    label="Query",
    show_label=True,
    max_lines=2,
    container=False,
    placeholder="Enter your question",
)

gIface = gr.Interface(
    fn=get_response,
    inputs=input,
    outputs="text",
    title="Dinosaurs Diversity RAG AI",
    description="RAG demo using Zephyr 7B Beta and Langchain",
    examples=sample_query,
    allow_flagging="never",
)

gIface.launch()

# llm_chain = LLMChain(prompt=prompt, llm=llm_init, verbose=True)