Spaces:
Sleeping
Sleeping
File size: 907 Bytes
3369d9f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
# DEPRECATED - Use keypoints.py, to get combined answer
import pandas as pd
import os
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
folder = 'paper_csvs'
list_dirs = os.listdir(folder)
result = ''
for i in range(len(list_dirs)):
path = os.path.join(folder, list_dirs[i])
df = pd.read_csv(path)
result += str(df['response'].iloc[0])
print(len(result))
#21000 words - consultation reports
#12988 words - academic papers
# Split the texts
text_splitter = CharacterTextSplitter(
separator = "\n",
chunk_size = 1000,
chunk_overlap = 200,
length_function = len,
)
texts = text_splitter.split_text(result)
# Create Embedding
embedding = OpenAIEmbeddings()
db = FAISS.from_texts(texts, embedding)
# Save Embedding
db.save_local("paper_combined/faiss_index") |