Spaces:
Sleeping
Sleeping
# DEPRECATED - Use keypoints.py, to get combined answer | |
import pandas as pd | |
import os | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain.vectorstores import FAISS | |
folder = 'paper_csvs' | |
list_dirs = os.listdir(folder) | |
result = '' | |
for i in range(len(list_dirs)): | |
path = os.path.join(folder, list_dirs[i]) | |
df = pd.read_csv(path) | |
result += str(df['response'].iloc[0]) | |
print(len(result)) | |
#21000 words - consultation reports | |
#12988 words - academic papers | |
# Split the texts | |
text_splitter = CharacterTextSplitter( | |
separator = "\n", | |
chunk_size = 1000, | |
chunk_overlap = 200, | |
length_function = len, | |
) | |
texts = text_splitter.split_text(result) | |
# Create Embedding | |
embedding = OpenAIEmbeddings() | |
db = FAISS.from_texts(texts, embedding) | |
# Save Embedding | |
db.save_local("paper_combined/faiss_index") |