# Import necessary libraries from langchain.document_loaders import DirectoryLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import SentenceTransformerEmbeddings from langchain.vectorstores import Chroma from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline from langchain_community.llms import HuggingFacePipeline from langchain.chains.question_answering import load_qa_chain # Load and process documents dir = "data" def load_docs(dir): loader = DirectoryLoader(dir) docs = loader.load() return docs docs = load_docs(dir) def split_docs(docs, chunk_size=512, chunk_overlap=20): text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) split_docs = text_splitter.split_documents(docs) return split_docs docs = split_docs(docs) # Initialize embeddings and vector store embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2") persist_directory = "chroma_db" vectordb = Chroma.from_documents(docs, embeddings, persist_directory=persist_directory) vectordb.persist() new_db = Chroma(persist_directory=persist_directory, embedding_function=embeddings) def get_similar_docs(query, k=2, score=False): if score: similar_docs = new_db.similarity_search_with_score(query, k=k) else: similar_docs = new_db.similarity_search(query, k=k) return similar_docs # Load LLM model from Hugging Face # model_name = "HuggingFaceH4/zephyr-7b-beta" # model = AutoModelForCausalLM.from_pretrained(model_name) # tokenizer = AutoTokenizer.from_pretrained(model_name) # model = AutoModelForCausalLM.from_pretrained("gpt2") # tokenizer = AutoTokenizer.from_pretrained("gpt2") tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama_v1.1") model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama_v1.1") text_generation_pipeline = pipeline( model=model, tokenizer=tokenizer, task="text-generation", temperature=0.2, do_sample=True, repetition_penalty=1.1, return_full_text=True, max_new_tokens=400, ) # text_generation_pipeline = pipeline("text-generation", model="bigscience/bloom-1b7") llm = HuggingFacePipeline(pipeline=text_generation_pipeline) chain = load_qa_chain(llm, chain_type="stuff") def get_helpful_answer(text): # Find the index of "Helpful Answer:" index = text.find("Helpful Answer:") # If "Helpful Answer:" is not found, return an empty string if index == -1: return "" # Add the length of "Helpful Answer:" to the index to start from the end of this string index += len("Helpful Answer:") # Return the text from this index to the end return text[index:].strip() def get_answer(query): similar_docs = get_similar_docs(query) answer = chain.run(input_documents=similar_docs, question=query) answer = get_helpful_answer(answer) return answer