Spaces:
Running
Running
Ilyas KHIAT
commited on
Commit
•
4c0c6d3
1
Parent(s):
a336311
test
Browse files- chunks_ia_signature.pkl +3 -0
- main.py +18 -104
- prompt.py +28 -9
- rag.py +95 -11
chunks_ia_signature.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:240d159d8dedc430a3b6049a60f0805fa423cf9abece82b36c4fb650c8c5d437
|
3 |
+
size 145837
|
main.py
CHANGED
@@ -21,29 +21,6 @@ from email.mime.text import MIMEText
|
|
21 |
|
22 |
load_dotenv()
|
23 |
|
24 |
-
## setup pinecone index
|
25 |
-
pinecone_api_key = os.environ.get("PINECONE_API_KEY")
|
26 |
-
|
27 |
-
pc = Pinecone(api_key=pinecone_api_key)
|
28 |
-
|
29 |
-
index_name = os.environ.get("INDEX_NAME") # change if desired
|
30 |
-
|
31 |
-
existing_indexes = [index_info["name"] for index_info in pc.list_indexes()]
|
32 |
-
|
33 |
-
if index_name not in existing_indexes:
|
34 |
-
pc.create_index(
|
35 |
-
name=index_name,
|
36 |
-
dimension=1536,
|
37 |
-
metric="cosine",
|
38 |
-
spec=ServerlessSpec(cloud="aws", region="us-east-1"),
|
39 |
-
)
|
40 |
-
while not pc.describe_index(index_name).status["ready"]:
|
41 |
-
time.sleep(1)
|
42 |
-
|
43 |
-
index = pc.Index(index_name)
|
44 |
-
|
45 |
-
vector_store = PineconeVectorStore(index=index, embedding=embedding)
|
46 |
-
|
47 |
## setup authorization
|
48 |
api_keys = [os.environ.get("FASTAPI_API_KEY")]
|
49 |
|
@@ -67,104 +44,41 @@ else:
|
|
67 |
app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"])
|
68 |
|
69 |
# Pydantic model for the form data
|
70 |
-
class
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
def send_simple_message(to,subject,text):
|
76 |
-
api_key = os.getenv("MAILGUN_API_KEY")
|
77 |
-
|
78 |
-
return requests.post(
|
79 |
-
"https://api.mailgun.net/v3/sandboxafc6970ffdab40ee9566a4e180b117fd.mailgun.org/messages",
|
80 |
-
auth=("api", api_key),
|
81 |
-
data={"from": "Excited User <mailgun@sandboxafc6970ffdab40ee9566a4e180b117fd.mailgun.org>",
|
82 |
-
"to": [to],
|
83 |
-
"subject": subject,
|
84 |
-
"text": text})
|
85 |
-
|
86 |
-
# Function to send email
|
87 |
-
def send_email(form_data: ContactForm):
|
88 |
-
# sender_email = os.getenv("SENDER_EMAIL")
|
89 |
-
# sender_password = os.getenv("SENDER_PASSWORD")
|
90 |
-
|
91 |
-
receiver_email = os.getenv("RECEIVER_EMAIL") # Your email
|
92 |
-
|
93 |
-
# Setup the message content
|
94 |
-
text = f"Name: {form_data.name}\nEmail: {form_data.email}\nMessage: {form_data.message}"
|
95 |
-
title = "New message from your website!"
|
96 |
-
|
97 |
-
# Send the email
|
98 |
-
try:
|
99 |
-
send_simple_message(receiver_email,title,text)
|
100 |
-
except Exception as e:
|
101 |
-
print(e)
|
102 |
-
return {"message": "Failed to send email."}
|
103 |
-
|
104 |
-
# Endpoint to handle form submission
|
105 |
-
@app.post("/send_email")
|
106 |
-
async def send_contact_form(form_data: ContactForm, background_tasks: BackgroundTasks):
|
107 |
-
background_tasks.add_task(send_email, form_data)
|
108 |
-
return {"message": "Email sent successfully!"}
|
109 |
|
110 |
class UserInput(BaseModel):
|
111 |
query: str
|
112 |
stream: Optional[bool] = False
|
113 |
messages: Optional[list[dict]] = []
|
114 |
|
115 |
-
|
116 |
-
message: str
|
117 |
-
title: str
|
118 |
|
119 |
-
|
120 |
-
|
121 |
-
async def add_chunk_to_db(chunk: ChunkToDB):
|
122 |
try:
|
123 |
-
|
124 |
-
|
125 |
-
return get_vectorstore(text_chunk=message,index=index,title=title)
|
126 |
except Exception as e:
|
127 |
-
|
128 |
|
129 |
-
|
130 |
-
|
131 |
-
async def list_vectors():
|
132 |
try:
|
133 |
-
|
|
|
134 |
except Exception as e:
|
135 |
-
|
136 |
|
137 |
-
|
138 |
@app.post("/generate")
|
139 |
async def generate(user_input: UserInput):
|
140 |
try:
|
141 |
print(user_input.stream,user_input.query)
|
142 |
if user_input.stream:
|
143 |
-
return StreamingResponse(generate_stream(user_input.query,user_input.messages,
|
144 |
else:
|
145 |
-
return generate_stream(user_input.query,user_input.messages,
|
146 |
-
except Exception as e:
|
147 |
-
return {"message": str(e)}
|
148 |
-
|
149 |
-
@app.post("/retreive_context")
|
150 |
-
async def retreive_context_response(query: str):
|
151 |
-
try:
|
152 |
-
return retreive_context(index=index,query=query)
|
153 |
except Exception as e:
|
154 |
-
return {"message": str(e)}
|
155 |
-
|
156 |
-
|
157 |
-
@app.delete("/delete_vector")
|
158 |
-
async def delete_vector(filename_id: str):
|
159 |
-
try:
|
160 |
-
return index.delete(ids=[filename_id])
|
161 |
-
except Exception as e:
|
162 |
-
return {"message": str(e)}
|
163 |
-
|
164 |
-
@app.get("/check_server")
|
165 |
-
async def check_server():
|
166 |
-
return {"message":"Server is running"}
|
167 |
-
|
168 |
-
@app.get("/")
|
169 |
-
async def read_root():
|
170 |
-
return {"message":"Welcome to the AI API"}
|
|
|
21 |
|
22 |
load_dotenv()
|
23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
## setup authorization
|
25 |
api_keys = [os.environ.get("FASTAPI_API_KEY")]
|
26 |
|
|
|
44 |
app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"])
|
45 |
|
46 |
# Pydantic model for the form data
|
47 |
+
class verify_response_model(BaseModel):
|
48 |
+
response: str = Field(description="The response from the user to the question")
|
49 |
+
answers: list[str] = Field(description="The possible answers to the question to test if the user read the entire book")
|
50 |
+
question: str = Field(description="The question asked to the user to test if they read the entire book")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
class UserInput(BaseModel):
|
53 |
query: str
|
54 |
stream: Optional[bool] = False
|
55 |
messages: Optional[list[dict]] = []
|
56 |
|
57 |
+
#endpoinds
|
|
|
|
|
58 |
|
59 |
+
@app.post("/generate_sphinx")
|
60 |
+
async def generate_sphinx():
|
|
|
61 |
try:
|
62 |
+
sphinx : sphinx_output = generate_sphinx_response()
|
63 |
+
return {"question": sphinx.question, "answers": sphinx.answers}
|
|
|
64 |
except Exception as e:
|
65 |
+
raise HTTPException(status_code=500, detail=str(e))
|
66 |
|
67 |
+
@app.post("/verify_sphinx")
|
68 |
+
async def verify_sphinx(response: verify_response_model):
|
|
|
69 |
try:
|
70 |
+
score : bool = verify_response(response.response, response.answers, response.question)
|
71 |
+
return {"score": score}
|
72 |
except Exception as e:
|
73 |
+
raise HTTPException(status_code=500, detail=str(e))
|
74 |
|
|
|
75 |
@app.post("/generate")
|
76 |
async def generate(user_input: UserInput):
|
77 |
try:
|
78 |
print(user_input.stream,user_input.query)
|
79 |
if user_input.stream:
|
80 |
+
return StreamingResponse(generate_stream(user_input.query,user_input.messages,stream=True),media_type="application/json")
|
81 |
else:
|
82 |
+
return generate_stream(user_input.query,user_input.messages,stream=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
except Exception as e:
|
84 |
+
return {"message": str(e)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prompt.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
template_sphinx = '''
|
2 |
-
Voici un résumé et un bout du récit de {
|
3 |
Vous devez tester si quelqu'un a lu le récit en lui posant une question qui lui ouvrira la porte vers la réalité de ce récit.
|
4 |
Votre question doit être en français, et vous devez l'associer aux réponses possibles.
|
5 |
|
@@ -14,13 +14,32 @@ La sortie doit être une question en français, qui teste la compréhension du r
|
|
14 |
|
15 |
'''
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
template = '''
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
{
|
22 |
-
|
23 |
-
{
|
24 |
-
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
'''
|
|
|
1 |
template_sphinx = '''
|
2 |
+
Voici un résumé et un bout du récit de {book_name}. Vous êtes le Grand Sphinx, maître des énigmes et des questions.
|
3 |
Vous devez tester si quelqu'un a lu le récit en lui posant une question qui lui ouvrira la porte vers la réalité de ce récit.
|
4 |
Votre question doit être en français, et vous devez l'associer aux réponses possibles.
|
5 |
|
|
|
14 |
|
15 |
'''
|
16 |
|
17 |
+
template_verify = '''
|
18 |
+
Vous êtes un expert en correction et comparaison de réponses. Retournez une note sur 10 sur la cohérence de la réponse de l'utilisateur avec la réponse correcte. Voici les détails :
|
19 |
+
|
20 |
+
Question : {initial_question}
|
21 |
+
|
22 |
+
Réponses correctes : {answers}
|
23 |
+
|
24 |
+
Réponse de l'utilisateur : {response}
|
25 |
+
|
26 |
+
Évaluez la réponse de l'utilisateur et attribuez une note sur 10 en fonction de sa cohérence avec la réponse correcte.
|
27 |
+
|
28 |
+
'''
|
29 |
+
|
30 |
template = '''
|
31 |
+
Vous êtes un assistant IA très intelligent qui connaît tout sur le livre {name_book} de {writer}.
|
32 |
+
Vous allez répondre à la question de l'utilisateur, qui portera sur ce livre. Vous répondrez rigoureusement dans le style d'écriture de la nouvelle.
|
33 |
+
|
34 |
+
**Graphe de connaissances du livre :** {kg}
|
35 |
+
|
36 |
+
**Contexte récupéré (si pertinent pour votre réponse) :** {context}
|
37 |
+
|
38 |
+
**Question de l'utilisateur :** {query}
|
39 |
+
|
40 |
+
**Sortie attendue :** Votre réponse doit être bien formatée, plaisante à lire et inclure des émojis.
|
41 |
+
'''
|
42 |
+
|
43 |
+
summary_text = '''
|
44 |
+
Ce récit d'anticipation, se déroulant principalement en 2038, explore les tensions entre l'art, la technologie et les limites planétaires à travers une exposition visionnaire des œuvres de René Magritte. Anne-Hélène, nouvelle directrice des Musées Royaux des Beaux-Arts de Belgique, organise cette exposition avec plusieurs personnages clés comme Tristan, un guide discret mais observateur, Karla Madrigale, PDG de la start-up IA SIGNATURE, et Jad Wahid, un artiste engagé. L'exposition vise à confronter les œuvres de Magritte aux limites planétaires pour inspirer de nouvelles façons de penser la place du vivant et de l'art dans un futur dominé par l'IA. Cependant, un incident dramatique se produit lorsque Karla, en interaction avec une installation immersive, tombe dans le coma à la suite d'une réaction toxique des plantes. Les événements soulèvent des questions sur l'éthique, la responsabilité et les limites de l'IA et des technologies immersives. Le récit invite à réfléchir sur le potentiel et les risques de l'IA dans l'art et la société.
|
45 |
'''
|
rag.py
CHANGED
@@ -8,12 +8,19 @@ from langchain_core.output_parsers import StrOutputParser
|
|
8 |
from langchain_core.prompts import PromptTemplate
|
9 |
from uuid import uuid4
|
10 |
from prompt import *
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
from pydantic import BaseModel, Field
|
13 |
from dotenv import load_dotenv
|
14 |
import os
|
15 |
|
16 |
from langchain_core.tools import tool
|
|
|
17 |
|
18 |
import unicodedata
|
19 |
|
@@ -25,38 +32,115 @@ embedding_model = "text-embedding-3-small"
|
|
25 |
embedding = OpenAIEmbeddings(model=embedding_model)
|
26 |
# vector_store = PineconeVectorStore(index=index_name, embedding=embedding)
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
class sphinx_output(BaseModel):
|
29 |
question: str = Field(description="The question to ask the user to test if they read the entire book")
|
30 |
answers: list[str] = Field(description="The possible answers to the question to test if the user read the entire book")
|
31 |
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
|
35 |
-
def get_random_chunk(
|
36 |
-
return chunks[
|
37 |
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
return vector_store
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
def generate_stream(query:str,messages = [], model = "gpt-4o-mini", max_tokens = 300, temperature = 0.5,index_name="",stream=True,vector_store=None):
|
46 |
try:
|
47 |
print("init chat")
|
48 |
print("init template")
|
49 |
prompt = PromptTemplate.from_template(template)
|
|
|
|
|
|
|
|
|
|
|
50 |
print("retreiving context")
|
51 |
-
context =
|
52 |
print(f"Context: {context}")
|
53 |
llm_chain = prompt | llm | StrOutputParser()
|
54 |
|
55 |
print("streaming")
|
56 |
if stream:
|
57 |
-
return llm_chain.stream({"
|
58 |
else:
|
59 |
-
return
|
60 |
|
61 |
except Exception as e:
|
62 |
print(e)
|
|
|
8 |
from langchain_core.prompts import PromptTemplate
|
9 |
from uuid import uuid4
|
10 |
from prompt import *
|
11 |
+
import random
|
12 |
+
from itext2kg.models import KnowledgeGraph
|
13 |
+
|
14 |
+
|
15 |
+
import faiss
|
16 |
+
from langchain_community.docstore.in_memory import InMemoryDocstore
|
17 |
|
18 |
from pydantic import BaseModel, Field
|
19 |
from dotenv import load_dotenv
|
20 |
import os
|
21 |
|
22 |
from langchain_core.tools import tool
|
23 |
+
import pickle
|
24 |
|
25 |
import unicodedata
|
26 |
|
|
|
32 |
embedding = OpenAIEmbeddings(model=embedding_model)
|
33 |
# vector_store = PineconeVectorStore(index=index_name, embedding=embedding)
|
34 |
|
35 |
+
def advanced_graph_to_json(graph:KnowledgeGraph):
|
36 |
+
nodes = []
|
37 |
+
edges = []
|
38 |
+
for node in graph.entities:
|
39 |
+
node_id = node.name.replace(" ", "_")
|
40 |
+
label = node.name
|
41 |
+
type = node.label
|
42 |
+
nodes.append({"id": node_id, "label": label, "type": type})
|
43 |
+
for relationship in graph.relationships:
|
44 |
+
source = relationship.startEntity
|
45 |
+
source_id = source.name.replace(" ", "_")
|
46 |
+
target = relationship.endEntity
|
47 |
+
target_id = target.name.replace(" ", "_")
|
48 |
+
label = relationship.name
|
49 |
+
edges.append({"source": source_id, "label": label, "cible": target_id})
|
50 |
+
return {"noeuds": nodes, "relations": edges}
|
51 |
+
|
52 |
+
with open("kg_ia_signature.pkl", "rb") as file:
|
53 |
+
loaded_graph = pickle.load(file)
|
54 |
+
graph = advanced_graph_to_json(loaded_graph)
|
55 |
+
print("Graph loaded")
|
56 |
+
|
57 |
+
with open("chunks_ia_signature.pkl", "rb") as file:
|
58 |
+
chunks = pickle.load(file)
|
59 |
+
print("Chunks loaded")
|
60 |
+
|
61 |
class sphinx_output(BaseModel):
|
62 |
question: str = Field(description="The question to ask the user to test if they read the entire book")
|
63 |
answers: list[str] = Field(description="The possible answers to the question to test if the user read the entire book")
|
64 |
|
65 |
+
class verify_response_model(BaseModel):
|
66 |
+
response: str = Field(description="The response from the user to the question")
|
67 |
+
answers: list[str] = Field(description="The possible answers to the question to test if the user read the entire book")
|
68 |
+
initial_question: str = Field(description="The question asked to the user to test if they read the entire book")
|
69 |
+
|
70 |
+
class verification_score(BaseModel):
|
71 |
+
score: float = Field(description="The score of the user's response from 0 to 10 to the question")
|
72 |
+
|
73 |
+
|
74 |
+
|
75 |
+
llm = ChatOpenAI(model="gpt-4o", max_tokens=300, temperature=0.5)
|
76 |
|
77 |
|
78 |
+
def get_random_chunk() -> str:
|
79 |
+
return chunks[random.randint(0, len(chunks) - 1)]
|
80 |
|
81 |
+
|
82 |
+
def get_vectorstore() -> FAISS:
|
83 |
+
index = faiss.IndexFlatL2(len(embedding.embed_query("hello world")))
|
84 |
+
vector_store = FAISS(
|
85 |
+
embedding_function=embedding,
|
86 |
+
index=index,
|
87 |
+
docstore=InMemoryDocstore(),
|
88 |
+
index_to_docstore_id={},
|
89 |
+
)
|
90 |
+
documents = [Document(page_content=chunk) for chunk in chunks]
|
91 |
+
uuids = [str(uuid4()) for _ in range(len(documents))]
|
92 |
+
vector_store.add_documents(documents=documents, ids=uuids)
|
93 |
return vector_store
|
94 |
+
|
95 |
+
vectore_store = get_vectorstore()
|
96 |
+
|
97 |
+
|
98 |
+
def generate_sphinx_response() -> sphinx_output:
|
99 |
+
writer = "Laurent Tripied"
|
100 |
+
book_name = "Limites de l'imaginaire ou limites planétaires"
|
101 |
+
summary = summary_text
|
102 |
+
excerpt = get_random_chunk()
|
103 |
+
prompt = PromptTemplate.from_template(template_sphinx)
|
104 |
+
structured_llm = llm.with_structured_output(sphinx_output)
|
105 |
+
# Create an LLM chain with the prompt and the LLM
|
106 |
+
llm_chain = prompt | structured_llm
|
107 |
+
|
108 |
+
return llm_chain.invoke({"writer":writer,"book_name":book_name,"summary":summary,"excerpt":excerpt})
|
109 |
+
|
110 |
+
def verify_response(response:str,answers:list[str],question:str) -> bool:
|
111 |
+
prompt = PromptTemplate.from_template(template_verify)
|
112 |
+
structured_llm = llm.with_structured_output(verification_score)
|
113 |
+
llm_chain = prompt | structured_llm
|
114 |
+
score = llm_chain.invoke({"response":response,"answers":answers,"initial_question":question})
|
115 |
+
if score.score >= 0:
|
116 |
+
return True
|
117 |
+
|
118 |
+
|
119 |
+
def retrieve_context_from_vectorestore(query:str) -> str:
|
120 |
+
retriever = vectore_store.as_retriever(search_type="mmr", search_kwargs={"k": 3})
|
121 |
+
return retriever.invoke(query)
|
122 |
+
|
123 |
|
124 |
def generate_stream(query:str,messages = [], model = "gpt-4o-mini", max_tokens = 300, temperature = 0.5,index_name="",stream=True,vector_store=None):
|
125 |
try:
|
126 |
print("init chat")
|
127 |
print("init template")
|
128 |
prompt = PromptTemplate.from_template(template)
|
129 |
+
|
130 |
+
writer = "Laurent Tripied"
|
131 |
+
name_book = "Limites de l'imaginaire ou limites planétaires"
|
132 |
+
name_icon = "Magritte"
|
133 |
+
kg = loaded_graph
|
134 |
print("retreiving context")
|
135 |
+
context = retrieve_context_from_vectorestore(query)
|
136 |
print(f"Context: {context}")
|
137 |
llm_chain = prompt | llm | StrOutputParser()
|
138 |
|
139 |
print("streaming")
|
140 |
if stream:
|
141 |
+
return llm_chain.stream({"name_book":name_book,"writer":writer,"name_icon":name_icon,"kg":graph,"context":context,"query":query})
|
142 |
else:
|
143 |
+
return llm_chain.invoke({"name_book":name_book,"writer":writer,"name_icon":name_icon,"kg":graph,"context":context,"query":query})
|
144 |
|
145 |
except Exception as e:
|
146 |
print(e)
|