# importing required libraries from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.vectorstores import FAISS from langchain.embeddings import HuggingFaceEmbeddings import wikipedia import pandas as pd from tqdm import tqdm # reading names of the players in the data and displaying few of them players = pd.read_csv("artifacts/data.csv", encoding = "latin-1")["Name"].to_list() # extracting information about the players from their wikipedia pages content = "" for player in tqdm(players, desc = "Fetching Data : "): text = wikipedia.page(player, auto_suggest = False).content content += player.upper() + text + "\n" # configuring the embedding function for the text chunks model_name = "sentence-transformers/all-mpnet-base-v2" embeddings = HuggingFaceEmbeddings(model_name = model_name) # splitting the text into text chunks text_splitter = RecursiveCharacterTextSplitter( separators = [".", "\n"], chunk_size = 750, chunk_overlap = 125, length_function = len ) # storing the text chunks into the vectorstore documents = text_splitter.split_text(content) vectorstore = FAISS.from_texts(documents, embeddings) # saving the FAISS vectorstore vectorstore.save_local("artifacts\FAISS-Vectorstore")