Spaces:
Sleeping
Sleeping
File size: 1,710 Bytes
3369d9f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
#Alternative to save_db + combine.py, create all embeddings and combine all answers
import os
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from chain import openai_chain
from database import Data
# Store all reports into input_dir and the generated DB for all reports will be saved in output_dir
input_dir = os.path.join("inputs", "papers")
output_dir = os.path.join("outputs", "faiss", "papers")
combined_dir = os.path.join("outputs", "combined", "papers_gpt4turbo_mapred5", "faiss_index")
search_type = "map_reduce" #map_reduce, stuff
model_type = "gpt-4-1106-preview" #gpt-3.5-turbo, gpt-4-1106-preview
top_k = 5
default_query = 'What are the topics discussed in this context? Please explain in detail.'
data = Data(inp_dir=input_dir, out_dir=output_dir)
data.check_output()
data.get_faiss_embeddings()
list_dir = os.listdir(output_dir)
comb_response = ''
for dir in list_dir:
path = os.path.join(output_dir, dir, 'faiss_index')
chain = openai_chain(inp_dir=path)
print('Getting reponse for ' + str(dir))
query = default_query
response = chain.get_response(query, k=top_k, type=search_type, model_name=model_type)
comb_response += str(response)
print(response)
# Split the texts
text_splitter = CharacterTextSplitter(
separator = "\n",
chunk_size = 1000,
chunk_overlap = 200,
length_function = len,
)
texts = text_splitter.split_text(comb_response)
# Initialize OPENAI embeddings
embedding = OpenAIEmbeddings()
# Create Embedding
db = FAISS.from_texts(texts, embedding)
# Save Embedding
db.save_local(combined_dir)
|