Spaces:
Runtime error
Runtime error
File size: 3,343 Bytes
973e898 91bdf3d 973e898 035fd3a a3a6d28 035fd3a 973e898 50a222c fa47481 50a222c 973e898 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
import os
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.chains import RetrievalQA
import gradio as gr
# local_llm = "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q5_K_S.gguf"
# Load model directly
# from transformers import AutoModel
# local_llm = AutoModel.from_pretrained("TheBloke/zephyr-7B-beta-GGUF")
config = {
"max_new_token": 1024,
"repetition_penalty": 1.1,
"temperature": 0.1,
"top_k": 50,
"top_p": 0.9,
"stream": True,
"threads": int(os.cpu_count() / 2),
}
# local_llm = CTransformers(
# model = "TheBloke/zephyr-7B-beta-GGUF",
# model_file = "zephyr-7b-beta.Q4_0.gguf",
# model_type="mistral",
# lib="avx2", #for CPU use
# **config
# )
from ctransformers import AutoModelForCausalLM
local_llm = AutoModelForCausalLM.from_pretrained("TheBloke/zephyr-7B-beta-GGUF")
llm_init = CTransformers(model=local_llm, model_type="mistral", lib="avx2", **config)
prompt_template = """Use the following piece of information to answers the question asked by the user.
Don't try to make up the answer if you don't know the answer, simply say I don't know.
Context: {context}
Question: {question}
Only helpful answer below.
Helpful answer:
"""
model_name = "BAAI/bge-large-en"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": False}
embeddings = HuggingFaceBgeEmbeddings(
model_name=model_name,
model_kwargs=model_kwargs,
encode_kwargs=encode_kwargs,
)
prompt = PromptTemplate(
template=prompt_template, input_variables=["context", "question"]
)
load_vector_store = Chroma(
persist_directory="stores/dino_cosine", embedding_function=embeddings
)
retriever = load_vector_store.as_retriever(search_kwargs={"k": 1})
# query = "How many genera of dinosaurs currently known?"
# semantic_search = retriever.get_relevant_documents(query)
# chain_type_kwargs = {"prompt": prompt}
# qa = RetrievalQA.from_chain_type(
# llm=llm_init,
# chain_type="stuff",
# retriever=retriever,
# verbose=True,
# chain_type_kwargs=chain_type_kwargs,
# return_source_documents=True,
# )
sample_query = [
"How many genera of dinosaurs currently known?",
"What methods are used to account for the incompleteness of the fossil record?",
"Were Dinosaurs in Decline Before the Cretaceous or Tertiary Boundary?",
]
def get_response(input):
query = input
chain_type_kwargs = {"prompt": prompt}
qa = RetrievalQA.from_chain_type(
llm=llm_init,
chain_type="stuff",
retriever=retriever,
verbose=True,
chain_type_kwargs=chain_type_kwargs,
return_source_documents=True,
)
response = qa(query)
return response
input = gr.Text(
label="Query",
show_label=True,
max_lines=2,
container=False,
placeholder="Enter your question",
)
gIface = gr.Interface(
fn=get_response,
inputs=input,
outputs="text",
title="Dinosaurs Diversity RAG AI",
description="RAG demo using Zephyr 7B Beta and Langchain",
examples=sample_query,
allow_flagging="never",
)
gIface.launch()
# llm_chain = LLMChain(prompt=prompt, llm=llm_init, verbose=True) |