Spaces:
Running
Running
Ilyas KHIAT
commited on
Commit
•
9b1fd5f
1
Parent(s):
78d0151
choisir par scene
Browse files- rag.py +33 -6
- scenes.pkl +3 -0
rag.py
CHANGED
@@ -9,6 +9,7 @@ from uuid import uuid4
|
|
9 |
from prompt import *
|
10 |
import random
|
11 |
from itext2kg.models import KnowledgeGraph
|
|
|
12 |
|
13 |
|
14 |
import faiss
|
@@ -57,6 +58,10 @@ with open("chunks_ia_signature.pkl", "rb") as file:
|
|
57 |
chunks = pickle.load(file)
|
58 |
print("Chunks loaded")
|
59 |
|
|
|
|
|
|
|
|
|
60 |
class sphinx_output(BaseModel):
|
61 |
question: str = Field(description="The question to ask the user to test if they read the entire book")
|
62 |
answers: list[str] = Field(description="The possible answers to the question to test if the user read the entire book")
|
@@ -70,12 +75,30 @@ class verification_score(BaseModel):
|
|
70 |
score: float = Field(description="The score of the user's response from 0 to 10 to the question")
|
71 |
|
72 |
|
73 |
-
|
74 |
llm = ChatOpenAI(model="gpt-4o", max_tokens=300, temperature=0.5)
|
75 |
|
76 |
-
|
77 |
-
|
78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
|
80 |
|
81 |
def get_vectorstore() -> FAISS:
|
@@ -98,7 +121,9 @@ def generate_sphinx_response() -> sphinx_output:
|
|
98 |
writer = "Laurent Tripied"
|
99 |
book_name = "Limites de l'imaginaire ou limites planétaires"
|
100 |
summary = summary_text
|
101 |
-
excerpt = get_random_chunk()
|
|
|
|
|
102 |
prompt = PromptTemplate.from_template(template_sphinx)
|
103 |
structured_llm = llm.with_structured_output(sphinx_output)
|
104 |
# Create an LLM chain with the prompt and the LLM
|
@@ -106,6 +131,8 @@ def generate_sphinx_response() -> sphinx_output:
|
|
106 |
|
107 |
return llm_chain.invoke({"writer":writer,"book_name":book_name,"summary":summary,"excerpt":excerpt})
|
108 |
|
|
|
|
|
109 |
def verify_response(response:str,answers:list[str],question:str) -> bool:
|
110 |
prompt = PromptTemplate.from_template(template_verify)
|
111 |
structured_llm = llm.with_structured_output(verification_score)
|
@@ -120,7 +147,7 @@ def retrieve_context_from_vectorestore(query:str) -> str:
|
|
120 |
return retriever.invoke(query)
|
121 |
|
122 |
|
123 |
-
def generate_stream(query:str,messages = [], model = "gpt-4o-mini", max_tokens = 300, temperature =
|
124 |
try:
|
125 |
print("init chat")
|
126 |
print("init template")
|
|
|
9 |
from prompt import *
|
10 |
import random
|
11 |
from itext2kg.models import KnowledgeGraph
|
12 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
13 |
|
14 |
|
15 |
import faiss
|
|
|
58 |
chunks = pickle.load(file)
|
59 |
print("Chunks loaded")
|
60 |
|
61 |
+
with open("scenes.pkl", "rb") as file:
|
62 |
+
scenes = pickle.load(file)
|
63 |
+
print("Scenes loaded")
|
64 |
+
|
65 |
class sphinx_output(BaseModel):
|
66 |
question: str = Field(description="The question to ask the user to test if they read the entire book")
|
67 |
answers: list[str] = Field(description="The possible answers to the question to test if the user read the entire book")
|
|
|
75 |
score: float = Field(description="The score of the user's response from 0 to 10 to the question")
|
76 |
|
77 |
|
|
|
78 |
llm = ChatOpenAI(model="gpt-4o", max_tokens=300, temperature=0.5)
|
79 |
|
80 |
+
def split_texts(text : str) -> list[str]:
|
81 |
+
splitter = RecursiveCharacterTextSplitter(
|
82 |
+
chunk_size=1000,
|
83 |
+
chunk_overlap=200,
|
84 |
+
length_function=len,
|
85 |
+
is_separator_regex=False,
|
86 |
+
)
|
87 |
+
return splitter.split_text(text)
|
88 |
+
|
89 |
+
#########################################################################
|
90 |
+
### PAR ICI , CHOISIR UNE SCENE SPECIFIQUE DANS L'ARGUMENT DE LA FONCTION
|
91 |
+
def get_random_chunk(scene_specific = 5) :
|
92 |
+
if scene_specific:
|
93 |
+
chunks_scene = split_texts(scenes[scene_specific-1])
|
94 |
+
print(f"Scene {scene_specific} has {len(chunks_scene)} chunks")
|
95 |
+
print([chunk[0:50] for chunk in chunks_scene])
|
96 |
+
print('---')
|
97 |
+
chunk_chosen = chunks_scene[random.randint(0, len(chunks_scene) - 1)]
|
98 |
+
print(f"Chosen chunk: {chunk_chosen}")
|
99 |
+
return chunk_chosen, scene_specific
|
100 |
+
|
101 |
+
return chunks[random.randint(0, len(chunks) - 1)],scene_specific
|
102 |
|
103 |
|
104 |
def get_vectorstore() -> FAISS:
|
|
|
121 |
writer = "Laurent Tripied"
|
122 |
book_name = "Limites de l'imaginaire ou limites planétaires"
|
123 |
summary = summary_text
|
124 |
+
excerpt , scene_number = get_random_chunk()
|
125 |
+
if scene_number:
|
126 |
+
summary = "scene " + str(scene_number)
|
127 |
prompt = PromptTemplate.from_template(template_sphinx)
|
128 |
structured_llm = llm.with_structured_output(sphinx_output)
|
129 |
# Create an LLM chain with the prompt and the LLM
|
|
|
131 |
|
132 |
return llm_chain.invoke({"writer":writer,"book_name":book_name,"summary":summary,"excerpt":excerpt})
|
133 |
|
134 |
+
#############################################################
|
135 |
+
### PAR ICI , CHOISIR LE DEGRE DE SEVERITE DE LA VERIFICATION
|
136 |
def verify_response(response:str,answers:list[str],question:str) -> bool:
|
137 |
prompt = PromptTemplate.from_template(template_verify)
|
138 |
structured_llm = llm.with_structured_output(verification_score)
|
|
|
147 |
return retriever.invoke(query)
|
148 |
|
149 |
|
150 |
+
def generate_stream(query:str,messages = [], model = "gpt-4o-mini", max_tokens = 300, temperature = 1,index_name="",stream=True,vector_store=None):
|
151 |
try:
|
152 |
print("init chat")
|
153 |
print("init template")
|
scenes.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:17fc4636b752c5b8f1434d0c97c95ea3b12605b083689e6d79daacd060f6c110
|
3 |
+
size 142917
|