Ilyas KHIAT commited on
Commit
ef73c14
·
1 Parent(s): f1342ba

multipage et ux ++

Browse files
agents_page/recommended_agent.py CHANGED
@@ -1,5 +1,71 @@
1
  import streamlit as st
2
-
 
3
  #st.set_page_config(page_title="Agents recommandés", page_icon="", layout="wide")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
- st.title("Agents recommandés")
 
1
  import streamlit as st
2
+ from utils.audit.response_llm import generate_response_via_langchain
3
+ from textwrap import dedent
4
  #st.set_page_config(page_title="Agents recommandés", page_icon="", layout="wide")
5
+ def remove_images_from_content(content):
6
+ filtered_content = {}
7
+
8
+ for page, data in content.items():
9
+ # Create a new dictionary excluding the "images" key
10
+ filtered_data = {key: value for key, value in data.items() if key != "images"}
11
+ filtered_content[page] = filtered_data
12
+
13
+ return filtered_content
14
+
15
+ def recommended_agent_main():
16
+ st.title("Agents recommandés")
17
+
18
+ if "audit" not in st.session_state or "audit" is None:
19
+ st.error("Veuillez d'abord effectuer un audit pour obtenir des recommandations d'agents.")
20
+ return
21
+
22
+ audit = st.session_state.audit_simplified
23
+ content = st.session_state.audit["content"]
24
+
25
+ if "response_llm" not in st.session_state:
26
+ st.session_state.response_llm = ""
27
+
28
+
29
+ #filter content, delete images if type is pdf
30
+ if audit["type de fichier"] == "pdf":
31
+ content = remove_images_from_content(content)
32
+ #delete audio if type is audio and keep transcript
33
+ elif audit["type de fichier"] == "audio":
34
+ content = content["transcription"]
35
+
36
+ ressources = content
37
+
38
+ prompt = '''Tu es designer en intelligence artificielle (IA) spécialisé dans la création d'agents IA autonomes et performants.
39
+ A partir de ressources fournies par l'utilisateur (texte, documents, images, audio), tu es chargé de suggérer la création d'agents autonomes pour mettre en pratique les informations contenues dans les ressources fournies.
40
+
41
+ Tu proposes deux solutions :
42
+
43
+ Sol. A : 1 seul agent IA dont tu suggéreras :
44
+ * Nom
45
+ * Rôle
46
+ * Objectifs
47
+ * Outils utilisés par l'agent
48
+ * Tâches réalisées par l'agents
49
+ * Compétences de l'agent (backstory)
50
+
51
+ Sol. B : 1 équipe d'agents tu suggéreras :
52
+ * Le nombre d'agents
53
+ * Pour chacune d'eux [Nom, Rôle, Objectifs, Outils utilisés par l'agent, Tâches réalisées par l'agents, Compétences de l'agent (backstory)]
54
+
55
+ Une fois ce travail réalisé, tu proposes une série de 3 missions avec objectifs SMART pour chacun des agents Sol. A et Sol. B en présentation les résultats dans un tableau contenant :
56
+ Nom de l’agent
57
+ Objectifs à atteindre
58
+ '''
59
+
60
+ #display prompt and modify it
61
+ prompt_modified = st.text_area("Prompt", prompt, height=300)
62
+ prompt_modified = dedent(prompt_modified)
63
+ if st.button("Générer les recommandations"):
64
+ resource_prompt = f'''Ressources fournies par l'utilisateur :{ressources}'''
65
+ prompt_modified = f"{prompt_modified}\n{resource_prompt}"
66
+ st.session_state.response_llm = st.write_stream(generate_response_via_langchain(query=prompt_modified,stream=True))
67
+ elif st.session_state.response_llm:
68
+ st.info("la dernière réponse générée est affichée ci-dessous")
69
+ st.write(st.session_state.response_llm)
70
 
71
+ recommended_agent_main()
audit_page/audit.py CHANGED
@@ -1,7 +1,7 @@
1
  import streamlit as st
2
  import pymupdf as fitz
3
  import pyperclip
4
- from utils.audit.audit_doc import audit_descriptif_pdf,audit_text,audit_descriptif_word
5
  import dotenv
6
  from utils.audit.audit_audio import evaluate_audio_quality
7
  from PIL import Image
@@ -26,15 +26,16 @@ def classify_file(file):
26
 
27
  #display content
28
  def display_content_doc(content:dict,col:st):
29
-
30
  number_of_pages = len(content)
31
- col.info("si vous choisissez 0, vous verrez le contenu de toutes les pages")
32
 
33
  number = col.number_input("Numéro de page", min_value=0, max_value=number_of_pages, value=0,key="number_page_content")
34
  #0 means all pages
35
  if number > 0:
36
  page : dict = content[f"page_{number-1}"]
 
37
  option = col.radio("Type de contenu",list(content[f"page_0"].keys()), index=0,horizontal=True)
 
38
  if option == "images":
39
  if number == 0:
40
  images = [img for page in content.values() for img in page["images"]]
@@ -62,55 +63,132 @@ def display_content_doc(content:dict,col:st):
62
  links = [link for page in content.values() for link in page["liens"]]
63
  else:
64
  links = page["liens"]
 
65
  for i, link in enumerate(links):
66
- col.markdown(f"- {i+1}: {link['uri']} (page {link['page']})")
 
 
 
 
 
 
67
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
- def display_audit_pdf(uploaded_file,col:st):
72
- if st.session_state.name_file != uploaded_file.name:
73
- st.session_state.name_file = uploaded_file.name
74
- with st.spinner("Analyse du document..."):
75
- st.session_state.audit = audit_descriptif_pdf(uploaded_file,200)
76
  audit = st.session_state.audit["audit"]
77
- content = st.session_state.audit["content"]
78
- #global audit
79
- audit_simplified = {
80
- "Nombre de pages": audit["number_of_pages"],
81
- "Nombre d'images": audit["number_of_images"],
82
- "Nombre de liens": audit["number_of_links"],
83
- "Nombre de tableaux": audit["number_of_tables"],
84
- "Nombre de tokens": audit["number_of_tokens"],
85
- "Nombre de mots": audit["number_of_words"],
86
- "Mots clés": audit["key_words"]
87
- }
88
 
89
  well_formatted_audit = "Contenus audités\n"
90
  for key, value in audit_simplified.items():
91
  well_formatted_audit += f"- {key}: {value}\n"
92
 
93
-
94
  col.code(well_formatted_audit)
95
 
96
- #audit par page
97
- with col.expander("Audit par page"):
98
- number = st.number_input("Numéro de page", min_value=1, max_value=audit["number_of_pages"], value=1,key="number_page_audit")
99
- audit_page = audit[f"page_{number-1}"]
100
- audit_page = {
101
- "Nombre d'images": audit_page["number_of_images"],
102
- "Nombre de liens": audit_page["number_of_links"],
103
- "Nombre de tableaux": audit_page["number_of_tables"],
104
- "Nombre de tokens": audit_page["number_of_tokens"],
105
- "Nombre de mots": audit_page["number_of_words"],
106
- }
107
- well_formatted_audit_page = "Audit descriptif\n"
108
- for key, value in audit_page.items():
109
- well_formatted_audit_page += f"- {key}: {value}\n"
110
-
111
- st.code(well_formatted_audit_page)
 
 
112
 
113
- return content
114
 
115
 
116
  def audit_main():
@@ -128,97 +206,22 @@ def audit_main():
128
  st.session_state.audit = {}
129
  if "name_file" not in st.session_state:
130
  st.session_state.name_file = ""
 
 
131
 
132
  # File uploader
133
  uploaded_file = col1.file_uploader("Télécharger un ou plusieurs documents")
134
 
135
  if uploaded_file is not None:
136
  type = classify_file(uploaded_file)
 
137
 
138
  col1.write(f"Type de fichier: {type}")
139
 
140
- col1.write("### Synthèse audit du ou des document(s) téléchargé(s)")
141
-
142
-
143
-
144
- if type == "pdf":
145
- content = display_audit_pdf(uploaded_file,col1)
146
- with col2.expander("Contenu"):
147
- display_content_doc(content,st)
148
-
149
- elif type == "audio":
150
- if st.session_state.name_file != uploaded_file.name:
151
- st.session_state.name_file = uploaded_file.name
152
- with st.spinner("Analyse de l'audio..."):
153
- st.session_state.audit = evaluate_audio_quality(uploaded_file)
154
- audit = st.session_state.audit
155
-
156
- #audit global simplifié
157
- audit_simplified = {
158
- "Durée": f"{audit['duration']:0.2f} minutes",
159
- "Nombre de mots": audit["number_of_words"],
160
- "Nombre de tokens": audit["number_of_tokens"],
161
- "Volume": f"{audit['volume']:0.2f} dBFS (déciBels Full Scale)",
162
- "SNR": f"{max(audit['SNR'],0):0.2f} dB (Ratio Signal / Bruit)",
163
- }
164
-
165
- well_formatted_audit = "Contenus audités\n"
166
- for key, value in audit_simplified.items():
167
- well_formatted_audit += f"- {key}: {value}\n"
168
-
169
- col1.code(well_formatted_audit)
170
-
171
- with col2.expander("Transcription"):
172
- st.write(audit["transcription"])
173
- if st.button("📋",key="copy_transcription"):
174
- pyperclip.copy(audit["transcription"])
175
- st.success("Transcription copiée dans le presse-papier")
176
-
177
- elif type == "text":
178
- text = uploaded_file.read().decode("utf-8")
179
- if st.session_state.name_file != uploaded_file.name:
180
- st.session_state.name_file = uploaded_file.name
181
- with st.spinner("Analyse du texte..."):
182
- st.session_state.audit = audit_text(text)
183
- audit = st.session_state.audit
184
-
185
- #audit global simplifié
186
- audit_simplified = {
187
- "Nombre de tokens": audit["number_of_tokens"],
188
- "Nombre de mots": audit["number_of_words"]
189
- }
190
-
191
- well_formatted_audit = "Audit descriptif\n"
192
- for key, value in audit_simplified.items():
193
- well_formatted_audit += f"- {key}: {value}\n"
194
-
195
- col1.code(well_formatted_audit)
196
-
197
- with col2.expander("Texte"):
198
- st.text_area("Texte",text,height=200)
199
-
200
- elif type == "word":
201
- if st.session_state.name_file != uploaded_file.name:
202
- st.session_state.name_file = uploaded_file.name
203
- with st.spinner("Analyse du document..."):
204
- st.session_state.audit = audit_descriptif_word(uploaded_file)
205
- audit = st.session_state.audit
206
-
207
- #global audit
208
- audit_simplified = {
209
- "Nombre de pages": audit["number_of_paragraphs"],
210
- "Nombre d'images": audit["number_of_images"],
211
- "Nombre de liens": audit["number_of_links"],
212
- "Nombre de tableaux": audit["number_of_tables"],
213
- "Nombre de tokens": audit["number_of_tokens"],
214
- "Nombre de mots": audit["number_of_words"]
215
- }
216
-
217
- well_formatted_audit = "Contenus audités\n"
218
- for key, value in audit_simplified.items():
219
- well_formatted_audit += f"- {key}: {value}\n"
220
-
221
- st.code(well_formatted_audit)
222
 
 
 
 
223
 
224
  audit_main()
 
1
  import streamlit as st
2
  import pymupdf as fitz
3
  import pyperclip
4
+ from utils.audit.audit_doc import audit_descriptif_pdf,audit_text
5
  import dotenv
6
  from utils.audit.audit_audio import evaluate_audio_quality
7
  from PIL import Image
 
26
 
27
  #display content
28
  def display_content_doc(content:dict,col:st):
 
29
  number_of_pages = len(content)
30
+ col.info("Note : Si vous choisissez 0, vous verrez le contenu de toutes les pages")
31
 
32
  number = col.number_input("Numéro de page", min_value=0, max_value=number_of_pages, value=0,key="number_page_content")
33
  #0 means all pages
34
  if number > 0:
35
  page : dict = content[f"page_{number-1}"]
36
+
37
  option = col.radio("Type de contenu",list(content[f"page_0"].keys()), index=0,horizontal=True)
38
+
39
  if option == "images":
40
  if number == 0:
41
  images = [img for page in content.values() for img in page["images"]]
 
63
  links = [link for page in content.values() for link in page["liens"]]
64
  else:
65
  links = page["liens"]
66
+
67
  for i, link in enumerate(links):
68
+ col.markdown(f"- {i+1}: [{link['uri']}]({link["uri"]}) (page {link['page']})")
69
+
70
+ elif option == "tableaux":
71
+ if number == 0:
72
+ tables = [table for page in content.values() for table in page["tableaux"]]
73
+ else:
74
+ tables = page["tableaux"]
75
 
76
+ for i, table in enumerate(tables):
77
+ col.write(f"Tableau {i+1}")
78
+ col.write(table)
79
+
80
+ def display_content_audio(content:dict,col:st):
81
+ st.write("##### Transcription")
82
+ st.write(content["transcription"])
83
+ if st.button("📋",key="copy_transcription"):
84
+ pyperclip.copy(content["transcription"])
85
+ st.success("Transcription copiée dans le presse-papier")
86
+
87
+ st.audio(content["audio_data"],sample_rate=content["frame_rate"]*2)
88
 
89
+ def display_content_text(content,col:st):
90
+ st.text_area("Texte",content,height=200)
91
+
92
+ def handle_display_content(col:st):
93
+ audit = st.session_state.audit
94
+ type = st.session_state.audit_simplified["type de fichier"]
95
+ if type == "pdf":
96
+ with col.expander("Contenu"):
97
+ display_content_doc(audit["content"],st)
98
+ elif type == "audio":
99
+ with col.expander("Contenu"):
100
+ display_content_audio(audit["content"],col)
101
+ elif type == "text":
102
+ with col.expander("Contenu"):
103
+ display_content_text(audit["content"],col)
104
+
105
+
106
+
107
+ def handle_audit(uploaded_file,type:str):
108
+ if type == "pdf":
109
+ if st.session_state.name_file != uploaded_file.name:
110
+ st.session_state.name_file = uploaded_file.name
111
+ with st.spinner("Analyse du document..."):
112
+ st.session_state.audit = audit_descriptif_pdf(uploaded_file,100)
113
+ audit = st.session_state.audit["audit"]
114
+ #global audit
115
+ audit_simplified = {
116
+ "type de fichier": type,
117
+ "Nombre de pages": audit["number_of_pages"],
118
+ "Nombre d'images": audit["number_of_images"],
119
+ "Nombre de liens": audit["number_of_links"],
120
+ "Nombre de tableaux": audit["number_of_tables"],
121
+ "Nombre de tokens": audit["number_of_tokens"],
122
+ "Nombre de mots": audit["number_of_words"],
123
+ "Mots clés": audit["key_words"]
124
+ }
125
+ st.session_state.audit_simplified = audit_simplified
126
 
127
+ elif type == "audio":
128
+ if st.session_state.name_file != uploaded_file.name:
129
+ st.session_state.name_file = uploaded_file.name
130
+ with st.spinner("Analyse de l'audio..."):
131
+ st.session_state.audit = evaluate_audio_quality(uploaded_file)
132
+ audit = st.session_state.audit["audit"]
133
+ #audit global simplifié
134
+ audit_simplified = {
135
+ "type de fichier": type,
136
+ "Durée": f"{audit['duration']:0.2f} minutes",
137
+ "Nombre de mots": audit["number_of_words"],
138
+ "Nombre de tokens": audit["number_of_tokens"],
139
+ "Volume": f"{audit['volume']:0.2f} dBFS (déciBels Full Scale)",
140
+ "SNR": f"{max(audit['SNR'],0):0.2f} dB (Ratio Signal / Bruit)",
141
+ }
142
+ st.session_state.audit_simplified = audit_simplified
143
+
144
+ elif type == "text":
145
+ text = uploaded_file.read().decode("utf-8")
146
+ if st.session_state.name_file != uploaded_file.name:
147
+ st.session_state.name_file = uploaded_file.name
148
+ with st.spinner("Analyse du texte..."):
149
+ st.session_state.audit = audit_text(text)
150
+ audit = st.session_state.audit["audit"]
151
+ #audit global simplifié
152
+ audit_simplified = {
153
+ "type de fichier": type,
154
+ "Nombre de tokens": audit["number_of_tokens"],
155
+ "Nombre de mots": audit["number_of_words"]
156
+ }
157
+ st.session_state.audit_simplified = audit_simplified
158
+
159
+
160
 
161
+
162
+ def display_audit(col:st):
163
+ #audit global simplifié
164
+ audit_simplified = st.session_state.audit_simplified
 
165
  audit = st.session_state.audit["audit"]
 
 
 
 
 
 
 
 
 
 
 
166
 
167
  well_formatted_audit = "Contenus audités\n"
168
  for key, value in audit_simplified.items():
169
  well_formatted_audit += f"- {key}: {value}\n"
170
 
 
171
  col.code(well_formatted_audit)
172
 
173
+ if audit_simplified["type de fichier"] == "pdf": #cad un type qui contient des pages
174
+ #audit par page
175
+ with col.expander("Audit par page"):
176
+ number = st.number_input("Numéro de page", min_value=1, max_value=audit["number_of_pages"], value=1,key="number_page_audit")
177
+ audit_page = audit[f"page_{number-1}"]
178
+ audit_page = {
179
+
180
+ "Nombre d'images": audit_page["number_of_images"],
181
+ "Nombre de liens": audit_page["number_of_links"],
182
+ "Nombre de tableaux": audit_page["number_of_tables"],
183
+ "Nombre de tokens": audit_page["number_of_tokens"],
184
+ "Nombre de mots": audit_page["number_of_words"],
185
+ }
186
+ well_formatted_audit_page = "Audit descriptif\n"
187
+ for key, value in audit_page.items():
188
+ well_formatted_audit_page += f"- {key}: {value}\n"
189
+
190
+ st.code(well_formatted_audit_page)
191
 
 
192
 
193
 
194
  def audit_main():
 
206
  st.session_state.audit = {}
207
  if "name_file" not in st.session_state:
208
  st.session_state.name_file = ""
209
+ if "audit_simplified" not in st.session_state:
210
+ st.session_state.audit_simplified = {}
211
 
212
  # File uploader
213
  uploaded_file = col1.file_uploader("Télécharger un ou plusieurs documents")
214
 
215
  if uploaded_file is not None:
216
  type = classify_file(uploaded_file)
217
+ handle_audit(uploaded_file,type)
218
 
219
  col1.write(f"Type de fichier: {type}")
220
 
221
+ col1.write("### Synthèse audit de(s) document(s) téléchargé(s)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
 
223
+ if "audit" in st.session_state and st.session_state.audit != {}:
224
+ display_audit(col1)
225
+ handle_display_content(col2)
226
 
227
  audit_main()
requirements.txt CHANGED
@@ -11,3 +11,4 @@ nltk
11
  rake_nltk
12
  python-docx
13
  pillow
 
 
11
  rake_nltk
12
  python-docx
13
  pillow
14
+ pandas
utils/audit/audit_audio.py CHANGED
@@ -39,7 +39,7 @@ def evaluate_audio_quality(file) -> dict:
39
  audio_data = np.array(audio.get_array_of_samples())
40
 
41
  #number of minutes
42
- duration = len(audio_data) / audio.frame_rate / 60
43
 
44
  # Calculate volume
45
  volume = audio.dBFS
@@ -49,6 +49,24 @@ def evaluate_audio_quality(file) -> dict:
49
 
50
  #get the transcription of the audio
51
  transcription = transcript_audio_func(file)
52
-
53
- return {"volume": volume, "SNR": snr,"transcription": transcription,"number_of_tokens": count_tokens(transcription),"duration": duration, "number_of_words": len(transcription.split())}
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  audio_data = np.array(audio.get_array_of_samples())
40
 
41
  #number of minutes
42
+ duration = len(audio_data) / audio.frame_rate*2 / 60
43
 
44
  # Calculate volume
45
  volume = audio.dBFS
 
49
 
50
  #get the transcription of the audio
51
  transcription = transcript_audio_func(file)
 
 
52
 
53
+ audit = {
54
+ "volume": volume,
55
+ "SNR": snr,
56
+ "duration": duration,
57
+ "number_of_tokens": count_tokens(transcription),
58
+ "number_of_words": len(transcription.split())
59
+ }
60
+
61
+ content = {
62
+ "transcription": transcription,
63
+ "audio_data": audio_data,
64
+ "frame_rate": audio.frame_rate
65
+ }
66
+
67
+ audit_global = {
68
+ "audit": audit,
69
+ "content": content
70
+ }
71
+
72
+ return audit_global
utils/audit/audit_doc.py CHANGED
@@ -98,7 +98,8 @@ def audit_descriptif_pdf(file,max_img_width) -> dict:
98
  page_content = {
99
  "images": [],
100
  "texte": "",
101
- "liens": []
 
102
  }
103
 
104
  #number of images
@@ -140,7 +141,10 @@ def audit_descriptif_pdf(file,max_img_width) -> dict:
140
  audit_dict_doc["number_of_links"] += number_links
141
 
142
  #number of tables
143
- number_tables = len(page.find_tables().tables)
 
 
 
144
  audit_dict_page["number_of_tables"] = number_tables
145
  audit_dict_doc["number_of_tables"] += number_tables
146
 
@@ -170,7 +174,8 @@ def audit_descriptif_pdf(file,max_img_width) -> dict:
170
  - {list_key_words_text}
171
  Veuillez extraire les cinq mots clés les plus pertinents de cette liste. Chaque mot clé doit contenir au maximum deux mots.
172
 
173
- REPONSE:
 
174
  '''
175
  key_words_extracted = extract_relevant_keywords(prompt)
176
  audit_dict_doc["key_words"] = "\n" + key_words_extracted
@@ -186,45 +191,14 @@ def audit_descriptif_pdf(file,max_img_width) -> dict:
186
  def audit_text(text: str) -> dict:
187
  audit_dict = {
188
  "number_of_tokens": count_tokens(text),
189
- "number_of_words": len(text.split())
190
  }
191
 
192
- return audit_dict
193
-
194
- def audit_descriptif_word(file) -> dict:
195
- document = Document(io.BytesIO(file.read()))
196
-
197
- audit_dict_doc = {
198
- "number_of_paragraphs": 0,
199
- "number_of_images": 0,
200
- "number_of_links": 0,
201
- "number_of_tables": 0,
202
- "number_of_tokens": 0,
203
- "number_of_words": 0
204
  }
205
 
206
- for para in document.paragraphs:
207
- audit_dict_doc["number_of_paragraphs"] += 1
208
- text = para.text
209
-
210
- # Count tokens and words in the paragraph
211
- number_tokens = count_tokens(text)
212
- number_words = len(text.split())
213
-
214
- audit_dict_doc["number_of_tokens"] += number_tokens
215
- audit_dict_doc["number_of_words"] += number_words
216
-
217
- # Count links (assuming they are hyperlinks)
218
- for run in para.runs:
219
- if run.link:
220
- audit_dict_doc["number_of_links"] += 1
221
-
222
- for table in document.tables:
223
- audit_dict_doc["number_of_tables"] += 1
224
-
225
- # Counting images (inline shapes and pictures)
226
- for shape in document.inline_shapes:
227
- audit_dict_doc["number_of_images"] += 1
228
 
229
- return audit_dict_doc
230
 
 
98
  page_content = {
99
  "images": [],
100
  "texte": "",
101
+ "liens": [],
102
+ "tableaux": []
103
  }
104
 
105
  #number of images
 
141
  audit_dict_doc["number_of_links"] += number_links
142
 
143
  #number of tables
144
+ tables = page.find_tables().tables
145
+ number_tables = len(tables)
146
+ for tab in tables:
147
+ page_content["tableaux"].append(tab.to_pandas())
148
  audit_dict_page["number_of_tables"] = number_tables
149
  audit_dict_doc["number_of_tables"] += number_tables
150
 
 
174
  - {list_key_words_text}
175
  Veuillez extraire les cinq mots clés les plus pertinents de cette liste. Chaque mot clé doit contenir au maximum deux mots.
176
 
177
+ TA REPONSE DOIT RESPECTER LE FORMAT SUIVANT :
178
+ key_word1, key_word2, key_word3, key_word4, key_word5
179
  '''
180
  key_words_extracted = extract_relevant_keywords(prompt)
181
  audit_dict_doc["key_words"] = "\n" + key_words_extracted
 
191
  def audit_text(text: str) -> dict:
192
  audit_dict = {
193
  "number_of_tokens": count_tokens(text),
194
+ "number_of_words": len(text.split()),
195
  }
196
 
197
+ global_audit = {
198
+ "audit": audit_dict,
199
+ "content": text
 
 
 
 
 
 
 
 
 
200
  }
201
 
202
+ return global_audit
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
 
 
204
 
utils/audit/response_llm.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+ from langchain_openai import ChatOpenAI
3
+ from langchain_core.output_parsers import StrOutputParser
4
+ from langchain_core.prompts import PromptTemplate
5
+
6
+ def generate_response_openai(prompt: str,stream:bool = False) -> str:
7
+ client = OpenAI()
8
+ response = client.chat.completions.create(
9
+ model="gpt-4o-mini",
10
+ messages=[
11
+ {"role": "user", "content": prompt}
12
+ ],
13
+ stream=stream
14
+ )
15
+
16
+ return response.choices[0].message.content
17
+
18
+
19
+ def generate_response_via_langchain(query: str, stream: bool = False, model: str = "gpt-4o-mini") :
20
+ # Define the prompt template
21
+ template = "You are an agent designer expert: {query}"
22
+ prompt = PromptTemplate.from_template(template)
23
+
24
+ # Initialize the OpenAI LLM with the specified model
25
+ llm = ChatOpenAI(model=model)
26
+
27
+ # Create an LLM chain with the prompt and the LLM
28
+ llm_chain = prompt | llm | StrOutputParser()
29
+
30
+ if stream:
31
+ # Return a generator that yields streamed responses
32
+ return llm_chain.stream({"query": query})
33
+
34
+ # Invoke the LLM chain and return the result
35
+ return llm_chain.invoke({"query": query})