#create all embeddings, combine all answers and create a merged DB import os from langchain.embeddings.openai import OpenAIEmbeddings from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores import FAISS import time from lc_base.chain import openai_chain from lc_base.database import Data from lc_base.logs import save_log # Store all reports into input_dir and the generated DB for all reports will be saved in output_dir input_dir = os.path.join("inputs", "policy") output_dir = os.path.join("outputs", "faiss", "policy_stuff70") combined_dir = os.path.join("outputs", "combined", "policy_stuff70", "faiss_index") search_type = "stuff" #map_reduce, stuff model_type = "gpt-4-1106-preview" #gpt-3.5-turbo, gpt-4-1106-preview top_k = 70 default_query = ''' Please generate a comprehensive summary of this document. Ensure the summary is presented in a formal style, and if there are any contradictions or variations in the findings, address them appropriately. The summary should be approximately 1/3 of your input and can be structured in paragraphs or bullet points. ''' data = Data(inp_dir=input_dir, out_dir=output_dir) data.check_output() data.get_faiss_embeddings() list_dir = os.listdir(output_dir) comb_response = '' for dir in list_dir: path = os.path.join(output_dir, dir, 'faiss_index') chain = openai_chain(inp_dir=path) print('Getting reponse for ' + str(dir)) query = default_query start_time = time.time() response = chain.get_response(query, k=top_k, type=search_type, model_name=model_type) print(response) time_taken = time.time() - start_time save_log(file_path='logs/combined_policy.csv', query=query, response=response, model_name=model_type, time_taken=time_taken, inp=input_dir, data=dir) comb_response += str(response) # Split the texts text_splitter = CharacterTextSplitter( separator = "\n", chunk_size = 1000, chunk_overlap = 200, length_function = len, ) texts = text_splitter.split_text(comb_response) # Initialize OPENAI embeddings embedding = OpenAIEmbeddings() # Create Embedding db = FAISS.from_texts(texts, embedding) # Save Embedding db.save_local(combined_dir)