Rauhan commited on
Commit
ac318d7
1 Parent(s): 0f48f27

Adding Files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ artifacts/FAISS-Vectorstore/index.faiss filter=lfs diff=lfs merge=lfs -text
37
+ artifacts/FAISS-Vectorstore/index.pkl filter=lfs diff=lfs merge=lfs -text
Football_Players'_RAG_Model.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # importing required libraries
2
+ from langchain.embeddings import HuggingFaceEmbeddings
3
+ from langchain.llms import OpenAI
4
+ from langchain.vectorstores import FAISS
5
+ from langchain.prompts import PromptTemplate
6
+ from dotenv import load_dotenv
7
+ import gradio as gr
8
+ import os
9
+
10
+ # setting up the environment variables
11
+ load_dotenv("secrets.env")
12
+
13
+ # initialising the locally saved vectorstore from artifacts
14
+ model_name = "sentence-transformers/all-mpnet-base-v2"
15
+ embeddings = HuggingFaceEmbeddings(model_name = model_name)
16
+ vectorstore = FAISS.load_local("artifacts\FAISS-Vectorstore", embeddings)
17
+
18
+ # creating a generate_response function to take the input query and show the output
19
+ def generate_response(input_query):
20
+ result = vectorstore.similarity_search_with_relevance_scores(input_query, k = 4)
21
+ PROMPT_TEMPLATE = """
22
+ Consider yourself to be a football expert who has been given the task to answer a question
23
+ based on some of the content you are provided with along with their some relevance scores.
24
+ Please restrict your knowledge to only the given content and do not add up anything on your own.
25
+ Also make sure that if the top relevance score is less than 0.25, generate a response that you weren't able
26
+ to find anything relevant from the knowledge base.
27
+
28
+ Here's the question which you have been asked :
29
+ {question}
30
+
31
+ Here's the content you are provided with :
32
+ {content}
33
+
34
+ Here's the maximum relevance score :
35
+ {score}
36
+ """
37
+
38
+ content = "\n-----\n".join([x[0].page_content for x in result])
39
+ score = max([x[1] for x in result])
40
+
41
+ prompt = PromptTemplate.from_template(PROMPT_TEMPLATE)
42
+ prompt = prompt.format(question = input_query, content = content, score = score)
43
+
44
+ llm = OpenAI(api_key = os.getenv("OPENAI_API_KEY"), temperature = 0.95)
45
+ response = llm.predict(prompt).strip()
46
+
47
+ return response
48
+
49
+ interface = gr.Interface(
50
+ fn = generate_response,
51
+ inputs = gr.Textbox(),
52
+ outputs = gr.Text(),
53
+ title = "Football RAG System : Top Footballers' Profiles Powered by RAG",
54
+ description = "This innovative project reimagines the way we interact with football history. Leveraging the power of AI, it dives deep into the lives of 35 legendary players, starting with \"The Guardian\"'s prestigious list. By extracting and processing Wikipedia content, along with crafting original text, it creates rich profiles teeming with insights. These profiles are then cleverly segmented and stored in a local vectorstore, powered by cutting-edge open-source tools like Hugging Face embeddings and FAISS. This clever setup allows users to ask questions about these footballing greats, with the system efficiently retrieving relevant information and using OpenAI's GPT-3.5 language model to weave a tapestry of personalized responses. It's not just about stats and facts; it's about bringing these legends back to life through the magic of AI-driven storytelling."
55
+ )
56
+
57
+ interface.launch()
artifacts/FAISS-Vectorstore/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8e7a0a8a7a50cdc600891360b4c8608b08849075247e12cc6309c58e2e5d9fd
3
+ size 8100909
artifacts/FAISS-Vectorstore/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d23dce15b2bf3d6d6537b4fc34513cf86f1ce3717f598df5419ec07843c49de
3
+ size 2049644
artifacts/data.csv ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ No,Name,Position,Club,Nationality,Age at Dec 20 2012,Birthplace
2
+ 1,Lionel Messi,Forward,Barcelona,Argentina,25,Rosario
3
+ 2,Cristiano Ronaldo,Forward,Real Madrid,Portugal,27,Funchal
4
+ 3,Xavi Hern�ndez,Midfielder,Barcelona,Spain,32,Terrassa
5
+ 4,Andres Iniesta,Midfielder,Barcelona,Spain,28,Fuentealbilla
6
+ 5,Zlatan Ibrahimovic,Forward,PSG,Sweden,31,Malmö
7
+ 6,Radamel Falcao,Forward,Atletico Madrid,Colombia,26,Santa Marta
8
+ 7,Robin van Persie,Forward,Man Utd,Netherlands,29,Rotterdam
9
+ 8,Andrea Pirlo,Midfielder,Juventus,Italy,33,Flero
10
+ 9,Yaya Toure,Midfielder,Man City,Ivory Coast,29,Bouake
11
+ 10,Edinson Cavani,Forward,Napoli,Uruguay,25,Salto
12
+ 11,Sergio Aguero,Forward,Man City,Argentina,24,Quilmes
13
+ 12,Iker Casillas,Goalkeeper,Real Madrid,Spain,31,Móstoles
14
+ 13,Neymar Jr,Forward,Santos,Brazil,20,Mogi das Cruzes
15
+ 14,Sergio Busquets,Defender/midfielder,Barcelona,Spain,24,Sabadell
16
+ 15,Xabi Alonso,Midfielder,Real Madrid,Spain,31,Tolosa
17
+ 16,Thiago Silva,Defender,PSG,Brazil,28,Rio de Janeiro
18
+ 17,Mesut Ozil,Midfielder,Real Madrid,Germany,24,Gelsenkirchen
19
+ 18,David Silva,Midfielder/forward,Man City,Spain,26,Arguineguin
20
+ 19,Bastian Schweinsteiger,Midfielder,Bayern Munich,Germany,28,Kolbermoor
21
+ 20,Gianluigi Buffon,Goalkeeper,Juventus,Italy,34,Carrara
22
+ 21,Luis Suarez,Forward,Liverpool,Uruguay,25,Salto
23
+ 22,Sergio Ramos,Defender,Real Madrid,Spain,26,"Camas, Seville"
24
+ 23,Vincent Kompany,Defender,Man City,Belgium,26,Uccle
25
+ 24,Gerard Pique,Defender,Barcelona,Spain,25,Barcelona
26
+ 25,Philipp Lahm,Defender,Bayern Munich,Germany,29,Munich
27
+ 26,Willian Borges,Midfielder,Shakhtar Donetsk,Brazil,24,Ribeirão Pires
28
+ 27,Marco Reus,Forward,Borussia Dortmund,Germany,23,Dortmund
29
+ 28,Franck Ribery,Midfielder,Bayern Munich,France,29,Boulogne-sur-Mer
30
+ 29,Manuel Neuer,Goalkeeper,Bayern Munich,Germany,26,Gelsenkirchen
31
+ 30,Ashley Cole,Defender,Chelsea,England,32,Stepney
32
+ 31,Wayne Rooney,Forward,Man Utd,England,27,Croxteth
33
+ 32,Juan Mata,Midfielder,Chelsea,Spain,24,Villafranca Montes de Oca
34
+ 33,Thomas Muller,Forward,Bayern Munich,Germany,23,Weilheim in Oberbayern
35
+ 34,Mario G�tze,Midfielder/forward,Borussia Dortmund,Germany,20,Memmingen
36
+ 35,Karim Benzema,Forward,Real Madrid,France,24,Lyon
faiss_setup.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # importing required libraries
2
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
3
+ from langchain.vectorstores import FAISS
4
+ from langchain.embeddings import HuggingFaceEmbeddings
5
+ import wikipedia
6
+ import pandas as pd
7
+ from tqdm import tqdm
8
+
9
+ # reading names of the players in the data and displaying few of them
10
+ players = pd.read_csv("artifacts\data.csv", encoding = "latin-1")["Name"].to_list()
11
+
12
+ # extracting information about the players from their wikipedia pages
13
+ content = ""
14
+ for player in tqdm(players, desc = "Fetching Data : "):
15
+ text = wikipedia.page(player, auto_suggest = False).content
16
+ content += player.upper() + text + "\n"
17
+
18
+ # configuring the embedding function for the text chunks
19
+ model_name = "sentence-transformers/all-mpnet-base-v2"
20
+ embeddings = HuggingFaceEmbeddings(model_name = model_name)
21
+
22
+ # splitting the text into text chunks
23
+ text_splitter = RecursiveCharacterTextSplitter(
24
+ separators = [".", "\n"],
25
+ chunk_size = 750,
26
+ chunk_overlap = 125,
27
+ length_function = len
28
+ )
29
+
30
+ # storing the text chunks into the vectorstore
31
+ documents = text_splitter.split_text(content)
32
+ vectorstore = FAISS.from_texts(documents, embeddings)
33
+
34
+ # saving the FAISS vectorstore
35
+ vectorstore.save_local("artifacts\FAISS-Vectorstore")
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ pandas
2
+ wikipedia
3
+ openai
4
+ gradio
5
+ langchain
6
+ faiss-cpu
7
+ tiktoken
8
+ sentence-transformers
secrets.env ADDED
@@ -0,0 +1 @@
 
 
1
+ OPENAI_API_KEY=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