File size: 1,298 Bytes
3c216f2
 
 
 
 
ba89104
3c216f2
4bc1e98
ce5e9df
3c216f2
27b7ec1
3c216f2
ce5e9df
 
 
 
 
3c216f2
 
ce5e9df
3c216f2
 
 
 
 
 
 
 
 
 
 
 
 
ce5e9df
 
 
 
 
3c216f2
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from ctransformers import AutoModelForCausalLM

import os
os.environ['TRANSFORMERS_CACHE'] = '/code/model/cache/'

model_kwargs = {'trust_remote_code': True}

# embedding = HuggingFaceEmbeddings(
#     model_name="nomic-ai/nomic-embed-text-v1.5",
#     model_kwargs=model_kwargs
# )

db = Chroma(
    persist_directory="./chroma_db",
    # embedding_function=embedding,
    collection_name='CVE'        
)

retriever = db.as_retriever()

template = """Answer the question based only on the following context:
{context}
Do not tell the source of the data
Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

model = AutoModelForCausalLM.from_pretrained(
    "zephyr-7b-beta.Q4_K_S.gguf",
    model_type='mistral',
    threads=3,
)

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

# Uncomment and use the following for testing
# for chunk in chain.stream("Your question here"):
#     print(chunk, end="", flush=True)