Spaces:
Sleeping
Sleeping
Adding Files
Browse files- .gitattributes +2 -0
- Football_Players'_RAG_Model.ipynb +0 -0
- app.py +57 -0
- artifacts/FAISS-Vectorstore/index.faiss +3 -0
- artifacts/FAISS-Vectorstore/index.pkl +3 -0
- artifacts/data.csv +36 -0
- faiss_setup.py +35 -0
- requirements.txt +8 -0
- secrets.env +1 -0
.gitattributes
CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
artifacts/FAISS-Vectorstore/index.faiss filter=lfs diff=lfs merge=lfs -text
|
37 |
+
artifacts/FAISS-Vectorstore/index.pkl filter=lfs diff=lfs merge=lfs -text
|
Football_Players'_RAG_Model.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
app.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# importing required libraries
|
2 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
3 |
+
from langchain.llms import OpenAI
|
4 |
+
from langchain.vectorstores import FAISS
|
5 |
+
from langchain.prompts import PromptTemplate
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
import gradio as gr
|
8 |
+
import os
|
9 |
+
|
10 |
+
# setting up the environment variables
|
11 |
+
load_dotenv("secrets.env")
|
12 |
+
|
13 |
+
# initialising the locally saved vectorstore from artifacts
|
14 |
+
model_name = "sentence-transformers/all-mpnet-base-v2"
|
15 |
+
embeddings = HuggingFaceEmbeddings(model_name = model_name)
|
16 |
+
vectorstore = FAISS.load_local("artifacts\FAISS-Vectorstore", embeddings)
|
17 |
+
|
18 |
+
# creating a generate_response function to take the input query and show the output
|
19 |
+
def generate_response(input_query):
|
20 |
+
result = vectorstore.similarity_search_with_relevance_scores(input_query, k = 4)
|
21 |
+
PROMPT_TEMPLATE = """
|
22 |
+
Consider yourself to be a football expert who has been given the task to answer a question
|
23 |
+
based on some of the content you are provided with along with their some relevance scores.
|
24 |
+
Please restrict your knowledge to only the given content and do not add up anything on your own.
|
25 |
+
Also make sure that if the top relevance score is less than 0.25, generate a response that you weren't able
|
26 |
+
to find anything relevant from the knowledge base.
|
27 |
+
|
28 |
+
Here's the question which you have been asked :
|
29 |
+
{question}
|
30 |
+
|
31 |
+
Here's the content you are provided with :
|
32 |
+
{content}
|
33 |
+
|
34 |
+
Here's the maximum relevance score :
|
35 |
+
{score}
|
36 |
+
"""
|
37 |
+
|
38 |
+
content = "\n-----\n".join([x[0].page_content for x in result])
|
39 |
+
score = max([x[1] for x in result])
|
40 |
+
|
41 |
+
prompt = PromptTemplate.from_template(PROMPT_TEMPLATE)
|
42 |
+
prompt = prompt.format(question = input_query, content = content, score = score)
|
43 |
+
|
44 |
+
llm = OpenAI(api_key = os.getenv("OPENAI_API_KEY"), temperature = 0.95)
|
45 |
+
response = llm.predict(prompt).strip()
|
46 |
+
|
47 |
+
return response
|
48 |
+
|
49 |
+
interface = gr.Interface(
|
50 |
+
fn = generate_response,
|
51 |
+
inputs = gr.Textbox(),
|
52 |
+
outputs = gr.Text(),
|
53 |
+
title = "Football RAG System : Top Footballers' Profiles Powered by RAG",
|
54 |
+
description = "This innovative project reimagines the way we interact with football history. Leveraging the power of AI, it dives deep into the lives of 35 legendary players, starting with \"The Guardian\"'s prestigious list. By extracting and processing Wikipedia content, along with crafting original text, it creates rich profiles teeming with insights. These profiles are then cleverly segmented and stored in a local vectorstore, powered by cutting-edge open-source tools like Hugging Face embeddings and FAISS. This clever setup allows users to ask questions about these footballing greats, with the system efficiently retrieving relevant information and using OpenAI's GPT-3.5 language model to weave a tapestry of personalized responses. It's not just about stats and facts; it's about bringing these legends back to life through the magic of AI-driven storytelling."
|
55 |
+
)
|
56 |
+
|
57 |
+
interface.launch()
|
artifacts/FAISS-Vectorstore/index.faiss
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b8e7a0a8a7a50cdc600891360b4c8608b08849075247e12cc6309c58e2e5d9fd
|
3 |
+
size 8100909
|
artifacts/FAISS-Vectorstore/index.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d23dce15b2bf3d6d6537b4fc34513cf86f1ce3717f598df5419ec07843c49de
|
3 |
+
size 2049644
|
artifacts/data.csv
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
No,Name,Position,Club,Nationality,Age at Dec 20 2012,Birthplace
|
2 |
+
1,Lionel Messi,Forward,Barcelona,Argentina,25,Rosario
|
3 |
+
2,Cristiano Ronaldo,Forward,Real Madrid,Portugal,27,Funchal
|
4 |
+
3,Xavi Hern�ndez,Midfielder,Barcelona,Spain,32,Terrassa
|
5 |
+
4,Andres Iniesta,Midfielder,Barcelona,Spain,28,Fuentealbilla
|
6 |
+
5,Zlatan Ibrahimovic,Forward,PSG,Sweden,31,Malmö
|
7 |
+
6,Radamel Falcao,Forward,Atletico Madrid,Colombia,26,Santa Marta
|
8 |
+
7,Robin van Persie,Forward,Man Utd,Netherlands,29,Rotterdam
|
9 |
+
8,Andrea Pirlo,Midfielder,Juventus,Italy,33,Flero
|
10 |
+
9,Yaya Toure,Midfielder,Man City,Ivory Coast,29,Bouake
|
11 |
+
10,Edinson Cavani,Forward,Napoli,Uruguay,25,Salto
|
12 |
+
11,Sergio Aguero,Forward,Man City,Argentina,24,Quilmes
|
13 |
+
12,Iker Casillas,Goalkeeper,Real Madrid,Spain,31,Móstoles
|
14 |
+
13,Neymar Jr,Forward,Santos,Brazil,20,Mogi das Cruzes
|
15 |
+
14,Sergio Busquets,Defender/midfielder,Barcelona,Spain,24,Sabadell
|
16 |
+
15,Xabi Alonso,Midfielder,Real Madrid,Spain,31,Tolosa
|
17 |
+
16,Thiago Silva,Defender,PSG,Brazil,28,Rio de Janeiro
|
18 |
+
17,Mesut Ozil,Midfielder,Real Madrid,Germany,24,Gelsenkirchen
|
19 |
+
18,David Silva,Midfielder/forward,Man City,Spain,26,Arguineguin
|
20 |
+
19,Bastian Schweinsteiger,Midfielder,Bayern Munich,Germany,28,Kolbermoor
|
21 |
+
20,Gianluigi Buffon,Goalkeeper,Juventus,Italy,34,Carrara
|
22 |
+
21,Luis Suarez,Forward,Liverpool,Uruguay,25,Salto
|
23 |
+
22,Sergio Ramos,Defender,Real Madrid,Spain,26,"Camas, Seville"
|
24 |
+
23,Vincent Kompany,Defender,Man City,Belgium,26,Uccle
|
25 |
+
24,Gerard Pique,Defender,Barcelona,Spain,25,Barcelona
|
26 |
+
25,Philipp Lahm,Defender,Bayern Munich,Germany,29,Munich
|
27 |
+
26,Willian Borges,Midfielder,Shakhtar Donetsk,Brazil,24,Ribeirão Pires
|
28 |
+
27,Marco Reus,Forward,Borussia Dortmund,Germany,23,Dortmund
|
29 |
+
28,Franck Ribery,Midfielder,Bayern Munich,France,29,Boulogne-sur-Mer
|
30 |
+
29,Manuel Neuer,Goalkeeper,Bayern Munich,Germany,26,Gelsenkirchen
|
31 |
+
30,Ashley Cole,Defender,Chelsea,England,32,Stepney
|
32 |
+
31,Wayne Rooney,Forward,Man Utd,England,27,Croxteth
|
33 |
+
32,Juan Mata,Midfielder,Chelsea,Spain,24,Villafranca Montes de Oca
|
34 |
+
33,Thomas Muller,Forward,Bayern Munich,Germany,23,Weilheim in Oberbayern
|
35 |
+
34,Mario G�tze,Midfielder/forward,Borussia Dortmund,Germany,20,Memmingen
|
36 |
+
35,Karim Benzema,Forward,Real Madrid,France,24,Lyon
|
faiss_setup.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# importing required libraries
|
2 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
3 |
+
from langchain.vectorstores import FAISS
|
4 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
5 |
+
import wikipedia
|
6 |
+
import pandas as pd
|
7 |
+
from tqdm import tqdm
|
8 |
+
|
9 |
+
# reading names of the players in the data and displaying few of them
|
10 |
+
players = pd.read_csv("artifacts\data.csv", encoding = "latin-1")["Name"].to_list()
|
11 |
+
|
12 |
+
# extracting information about the players from their wikipedia pages
|
13 |
+
content = ""
|
14 |
+
for player in tqdm(players, desc = "Fetching Data : "):
|
15 |
+
text = wikipedia.page(player, auto_suggest = False).content
|
16 |
+
content += player.upper() + text + "\n"
|
17 |
+
|
18 |
+
# configuring the embedding function for the text chunks
|
19 |
+
model_name = "sentence-transformers/all-mpnet-base-v2"
|
20 |
+
embeddings = HuggingFaceEmbeddings(model_name = model_name)
|
21 |
+
|
22 |
+
# splitting the text into text chunks
|
23 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
24 |
+
separators = [".", "\n"],
|
25 |
+
chunk_size = 750,
|
26 |
+
chunk_overlap = 125,
|
27 |
+
length_function = len
|
28 |
+
)
|
29 |
+
|
30 |
+
# storing the text chunks into the vectorstore
|
31 |
+
documents = text_splitter.split_text(content)
|
32 |
+
vectorstore = FAISS.from_texts(documents, embeddings)
|
33 |
+
|
34 |
+
# saving the FAISS vectorstore
|
35 |
+
vectorstore.save_local("artifacts\FAISS-Vectorstore")
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pandas
|
2 |
+
wikipedia
|
3 |
+
openai
|
4 |
+
gradio
|
5 |
+
langchain
|
6 |
+
faiss-cpu
|
7 |
+
tiktoken
|
8 |
+
sentence-transformers
|
secrets.env
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
OPENAI_API_KEY=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
|