Ilyas KHIAT commited on
Commit
9b1fd5f
1 Parent(s): 78d0151

choisir par scene

Browse files
Files changed (2) hide show
  1. rag.py +33 -6
  2. scenes.pkl +3 -0
rag.py CHANGED
@@ -9,6 +9,7 @@ from uuid import uuid4
9
  from prompt import *
10
  import random
11
  from itext2kg.models import KnowledgeGraph
 
12
 
13
 
14
  import faiss
@@ -57,6 +58,10 @@ with open("chunks_ia_signature.pkl", "rb") as file:
57
  chunks = pickle.load(file)
58
  print("Chunks loaded")
59
 
 
 
 
 
60
  class sphinx_output(BaseModel):
61
  question: str = Field(description="The question to ask the user to test if they read the entire book")
62
  answers: list[str] = Field(description="The possible answers to the question to test if the user read the entire book")
@@ -70,12 +75,30 @@ class verification_score(BaseModel):
70
  score: float = Field(description="The score of the user's response from 0 to 10 to the question")
71
 
72
 
73
-
74
  llm = ChatOpenAI(model="gpt-4o", max_tokens=300, temperature=0.5)
75
 
76
-
77
- def get_random_chunk() -> str:
78
- return chunks[random.randint(0, len(chunks) - 1)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
 
81
  def get_vectorstore() -> FAISS:
@@ -98,7 +121,9 @@ def generate_sphinx_response() -> sphinx_output:
98
  writer = "Laurent Tripied"
99
  book_name = "Limites de l'imaginaire ou limites planétaires"
100
  summary = summary_text
101
- excerpt = get_random_chunk()
 
 
102
  prompt = PromptTemplate.from_template(template_sphinx)
103
  structured_llm = llm.with_structured_output(sphinx_output)
104
  # Create an LLM chain with the prompt and the LLM
@@ -106,6 +131,8 @@ def generate_sphinx_response() -> sphinx_output:
106
 
107
  return llm_chain.invoke({"writer":writer,"book_name":book_name,"summary":summary,"excerpt":excerpt})
108
 
 
 
109
  def verify_response(response:str,answers:list[str],question:str) -> bool:
110
  prompt = PromptTemplate.from_template(template_verify)
111
  structured_llm = llm.with_structured_output(verification_score)
@@ -120,7 +147,7 @@ def retrieve_context_from_vectorestore(query:str) -> str:
120
  return retriever.invoke(query)
121
 
122
 
123
- def generate_stream(query:str,messages = [], model = "gpt-4o-mini", max_tokens = 300, temperature = 0.5,index_name="",stream=True,vector_store=None):
124
  try:
125
  print("init chat")
126
  print("init template")
 
9
  from prompt import *
10
  import random
11
  from itext2kg.models import KnowledgeGraph
12
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
13
 
14
 
15
  import faiss
 
58
  chunks = pickle.load(file)
59
  print("Chunks loaded")
60
 
61
+ with open("scenes.pkl", "rb") as file:
62
+ scenes = pickle.load(file)
63
+ print("Scenes loaded")
64
+
65
  class sphinx_output(BaseModel):
66
  question: str = Field(description="The question to ask the user to test if they read the entire book")
67
  answers: list[str] = Field(description="The possible answers to the question to test if the user read the entire book")
 
75
  score: float = Field(description="The score of the user's response from 0 to 10 to the question")
76
 
77
 
 
78
  llm = ChatOpenAI(model="gpt-4o", max_tokens=300, temperature=0.5)
79
 
80
+ def split_texts(text : str) -> list[str]:
81
+ splitter = RecursiveCharacterTextSplitter(
82
+ chunk_size=1000,
83
+ chunk_overlap=200,
84
+ length_function=len,
85
+ is_separator_regex=False,
86
+ )
87
+ return splitter.split_text(text)
88
+
89
+ #########################################################################
90
+ ### PAR ICI , CHOISIR UNE SCENE SPECIFIQUE DANS L'ARGUMENT DE LA FONCTION
91
+ def get_random_chunk(scene_specific = 5) :
92
+ if scene_specific:
93
+ chunks_scene = split_texts(scenes[scene_specific-1])
94
+ print(f"Scene {scene_specific} has {len(chunks_scene)} chunks")
95
+ print([chunk[0:50] for chunk in chunks_scene])
96
+ print('---')
97
+ chunk_chosen = chunks_scene[random.randint(0, len(chunks_scene) - 1)]
98
+ print(f"Chosen chunk: {chunk_chosen}")
99
+ return chunk_chosen, scene_specific
100
+
101
+ return chunks[random.randint(0, len(chunks) - 1)],scene_specific
102
 
103
 
104
  def get_vectorstore() -> FAISS:
 
121
  writer = "Laurent Tripied"
122
  book_name = "Limites de l'imaginaire ou limites planétaires"
123
  summary = summary_text
124
+ excerpt , scene_number = get_random_chunk()
125
+ if scene_number:
126
+ summary = "scene " + str(scene_number)
127
  prompt = PromptTemplate.from_template(template_sphinx)
128
  structured_llm = llm.with_structured_output(sphinx_output)
129
  # Create an LLM chain with the prompt and the LLM
 
131
 
132
  return llm_chain.invoke({"writer":writer,"book_name":book_name,"summary":summary,"excerpt":excerpt})
133
 
134
+ #############################################################
135
+ ### PAR ICI , CHOISIR LE DEGRE DE SEVERITE DE LA VERIFICATION
136
  def verify_response(response:str,answers:list[str],question:str) -> bool:
137
  prompt = PromptTemplate.from_template(template_verify)
138
  structured_llm = llm.with_structured_output(verification_score)
 
147
  return retriever.invoke(query)
148
 
149
 
150
+ def generate_stream(query:str,messages = [], model = "gpt-4o-mini", max_tokens = 300, temperature = 1,index_name="",stream=True,vector_store=None):
151
  try:
152
  print("init chat")
153
  print("init template")
scenes.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17fc4636b752c5b8f1434d0c97c95ea3b12605b083689e6d79daacd060f6c110
3
+ size 142917