Spaces:
Sleeping
Sleeping
File size: 3,297 Bytes
dad4fc7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
import time
import streamlit as st
from langchain_community.llms import CTransformers
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor
def load_llm():
model_name = 'aryarishit/phi3-unsloth-resumebot-GGUF'
llm = CTransformers(
model = model_name,
max_new_tokens = 128,
temperature = 0.5
)
return llm
def get_index():
Settings.embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-mpnet-base-v2")
docs = SimpleDirectoryReader('Info_docs').load_data()
Settings.llm = None
Settings.chunk_size = 84
Settings.chunk_overlap = 25
index = VectorStoreIndex.from_documents(docs)
return index
def get_context(index, query,top_k = 2):
top_k = top_k
# configure retriever
retriever = VectorIndexRetriever(
index=index,
similarity_top_k=top_k,
)
# assemble query engine
query_engine = RetrieverQueryEngine(
retriever=retriever,
node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.2)],
)
# query documents
query = query
response = query_engine.query(query)
# reformat response
context = ""
for i in range(len(response.source_nodes)):
context = context + response.source_nodes[i].text + "\n\n"
return context
def get_alpaca_prompt(context, query):
instruction_string = '''[INST]Consider you are assistant to Rishit Arya, and answers on behalf of him, Given the following context and a question, generate an answer based on the given context only. If the answer to the question is not found in the context, strictly state "I don't know." only, don't try to make up an answer.Answer pricesly to what is asked it as if you are answering to Rishit's potential client. \nContext:{}
Question:{}[\INST] \nAnswer:'''
prompt = instruction_string.format(
context,
query # input
)
return prompt
st.title("Ask-Rishit")
if "llm" not in st.session_state:
st.session_state['llm'] = None
if "embeddings" not in st.session_state:
st.session_state['embeddings'] = None
if st.session_state['llm'] is None:
with st.spinner('Loading the model'):
llm = load_llm()
st.session_state['llm'] = llm
if st.session_state['embeddings'] is None:
index = get_index()
st.session_state['embeddings'] = index
query = st.text_input('Enter your Question')
if st.button('Generate') and st.session_state['llm'] is not None and st.session_state['embeddings'] is not None:
with st.spinner('Generating.......'):
llm = st.session_state['llm']
index = st.session_state['embeddings']
context = get_context(index, query)
st.write("Context: " + context)
prompt = get_alpaca_prompt(context, query)
start_time = time.time()
response = llm.invoke(prompt)
end_time = time.time()
time_taken = round(end_time-start_time, 2)
st.write(response)
st.caption('Time taken:' + str(time_taken))
|