Ilyas KHIAT commited on
Commit
56a3465
·
1 Parent(s): 83bb015

first push

Browse files
.gitattributes copy ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ __pycache__/
2
+ .env
3
+ .streamlit/.env
.streamlit/config.toml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ [server]
2
+ maxUploadSize = 20
3
+
4
+ [theme]
5
+ base="light"
6
+ primaryColor="#63abdf"
7
+ secondaryBackgroundColor="#fbf7f1"
8
+ textColor="#011166"
README copy.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Théo Pratik
3
+ emoji: ⚡
4
+ colorFrom: purple
5
+ colorTo: blue
6
+ sdk: streamlit
7
+ sdk_version: 1.37.1
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
agents_page/catalogue.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ #st.set_page_config(page_title="Catalogue des agents (via bziiit.com)", page_icon="", layout="wide")
4
+
5
+ st.title("Catalogue des agents (via bziiit.com)")
agents_page/recommended_agent.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from utils.audit.response_llm import generate_response_via_langchain
3
+ from textwrap import dedent
4
+ import streamlit as st
5
+ from langchain_openai import ChatOpenAI
6
+ from langchain_mistralai import ChatMistralAI
7
+ from langchain_core.prompts import ChatPromptTemplate
8
+ from langchain_core.output_parsers import StrOutputParser
9
+ from langchain_core.messages import AIMessage, HumanMessage
10
+
11
+ #st.set_page_config(page_title="Agents recommandés", page_icon="", layout="wide")
12
+ def remove_images_from_content(content):
13
+ filtered_content = {}
14
+
15
+ for page, data in content.items():
16
+ # Create a new dictionary excluding the "images" key
17
+ filtered_data = {key: value for key, value in data.items() if key != "images"}
18
+ filtered_content[page] = filtered_data
19
+
20
+ return filtered_content
21
+
22
+ def get_response(user_query, chat_history, db,llm=None,history_limit=10,stream=True):
23
+ retriever = db.as_retriever()
24
+ context = retriever.invoke(user_query)
25
+ template = """
26
+ Étant donné l'historique de la conversation : {chat_history}, le contexte qui est le document : {context}, et la question de l'utilisateur : {user_question}, repond comme un expert en agent IA.
27
+ Assurez-vous que la réponse soit adaptée au niveau d'expertise de l'utilisateur et aux spécificités du contexte fourni.
28
+
29
+ """
30
+
31
+ prompt = ChatPromptTemplate.from_template(template)
32
+
33
+ #llm = ChatOpenAI(model="gpt-4o")
34
+ if not llm:
35
+ llm = ChatOpenAI(model="gpt-4o-mini")
36
+ elif llm == "GPT-4o":
37
+ llm = ChatOpenAI(model="gpt-4o")
38
+ elif llm == "Mistral Large 2 (FR)":
39
+ llm = ChatMistralAI(model_name="mistral-large-2407")
40
+ elif llm == "GPT-4o-mini":
41
+ llm = ChatOpenAI(model="gpt-4o-mini")
42
+ elif llm == "Mistral Nemo (FR)":
43
+ llm = ChatMistralAI(model_name="open-mistral-nemo-2407")
44
+
45
+
46
+ chain = prompt | llm
47
+
48
+ if not stream:
49
+ return chain.invoke({
50
+ "context": context,
51
+ "chat_history": chat_history[-history_limit:],
52
+ "user_question": user_query,
53
+ })
54
+
55
+ chain = chain | StrOutputParser()
56
+
57
+ if history_limit:
58
+ return chain.stream({
59
+ "context": context,
60
+ "chat_history": chat_history[-history_limit:],
61
+ "user_question": user_query,
62
+ })
63
+
64
+ return chain.stream({
65
+ "context": context,
66
+ "chat_history": chat_history,
67
+ "user_question": user_query,
68
+ })
69
+
70
+ def handle_display_models(index, models_names):
71
+ model = st.radio("Choisir un modèle",models_names, index=index)
72
+ return model
73
+
74
+ def recommended_agent_main():
75
+ st.title("Agents recommandés")
76
+ models_names = ["GPT-4o", "GPT-4o-mini","Mistral Nemo (FR)","Mistral Large 2 (FR)"]
77
+
78
+ if "chat_history" not in st.session_state:
79
+ st.session_state.chat_history = [
80
+ ]
81
+
82
+ if "model" not in st.session_state:
83
+ st.session_state.model = "GPT-4o-mini"
84
+
85
+ header = st.container()
86
+ col1, col2 = header.columns([1, 2])
87
+
88
+ with col1.popover("Modèles disponibles"):
89
+ new_model = handle_display_models(models_names.index(st.session_state.model), models_names)
90
+
91
+ st.session_state.model = new_model
92
+
93
+ st.markdown(f"- **{st.session_state.model}**")
94
+
95
+ if "audit" not in st.session_state or st.session_state.audit == {}:
96
+ st.error("Veuillez d'abord effectuer un audit pour obtenir des recommandations d'agents.")
97
+ return
98
+
99
+ audit = st.session_state.audit_simplified
100
+ content = st.session_state.audit["content"]
101
+
102
+ if "response_llm" not in st.session_state:
103
+ st.session_state.response_llm = ""
104
+
105
+
106
+ #filter content, delete images if type is pdf
107
+ if audit["type de fichier"] == "pdf":
108
+ content = remove_images_from_content(content)
109
+ #delete audio if type is audio and keep transcript
110
+ elif audit["type de fichier"] == "audio":
111
+ content = content["transcription"]
112
+
113
+ ressources = content
114
+
115
+ prompt = '''
116
+ Tu es designer en intelligence artificielle (IA) spécialisé dans la création d'agents IA autonomes et performants.
117
+
118
+ A partir de ressources fournies par l'utilisateur (texte, documents, images, audio), tu es chargé de réaliser les tâches suivantes :
119
+
120
+ A/ Faire un résumé des ressources fournies en 500 caractères maximum
121
+
122
+ B/ Suggérer la création d'agents autonomes pour mettre en pratique les informations contenues dans les ressources fournies.
123
+
124
+ Tu proposes deux solutions :
125
+
126
+ Sol. A : 1 seul agent IA dont tu suggéreras :
127
+ * Nom
128
+ * Rôle
129
+ * Objectifs
130
+ * Outils utilisés par l'agent
131
+ * Tâches réalisées par l'agents
132
+ * Compétences de l'agent (backstory)
133
+
134
+ Sol. B : 1 équipe d'agents tu suggéreras :
135
+ * Le nombre d'agents
136
+ * Pour chacune d'eux [Nom, Rôle, Objectifs, Outils utilisés par l'agent, Tâches réalisées par l'agents, Compétences de l'agent (backstory)]
137
+
138
+ Une fois ce travail réalisé, tu proposes une série de 3 missions avec objectifs SMART pour chacun des agents Sol. A et Sol. B en présentation les résultats dans un tableau contenant :
139
+ * Nom de l’agent
140
+ * Objectifs à atteindre
141
+
142
+ '''
143
+
144
+ #display prompt and modify it
145
+ prompt_modified = st.text_area("Prompt par défaut (que vous pouvez modifier, compléter)", prompt, height=300)
146
+ prompt_modified = dedent(prompt_modified)
147
+
148
+ if st.button("Générer les recommandations"):
149
+ resource_prompt = f'''Ressources fournies par l'utilisateur :{ressources}'''
150
+ prompt_modified = f"{prompt_modified}\n{resource_prompt}"
151
+ st.session_state.chat_history = []
152
+ with st.chat_message("AI"):
153
+ st.session_state.response_llm = st.write_stream(generate_response_via_langchain(query=prompt_modified,stream=True))
154
+
155
+ st.session_state.chat_history.append(AIMessage(content=st.session_state.response_llm))
156
+
157
+ elif st.session_state.response_llm:
158
+ st.info("la dernière réponse générée est affichée ci-dessous")
159
+ with st.chat_message("AI"):
160
+ st.write(st.session_state.response_llm)
161
+
162
+ for message in st.session_state.chat_history[1:]:
163
+ if isinstance(message, AIMessage):
164
+ with st.chat_message("AI"):
165
+ st.markdown(message.content)
166
+ elif isinstance(message, HumanMessage):
167
+ with st.chat_message("Moi"):
168
+ st.write(message.content)
169
+
170
+ user_query = st.chat_input("Par ici ...")
171
+ if user_query is not None and user_query != "":
172
+ st.session_state.chat_history.append(HumanMessage(content=user_query))
173
+
174
+ with st.chat_message("Moi"):
175
+ st.markdown(user_query)
176
+
177
+ with st.chat_message("AI"):
178
+ st.markdown(f"**{st.session_state.model}**")
179
+
180
+
181
+ response = st.write_stream(get_response(user_query, st.session_state.chat_history,db=st.session_state.vectorstore, llm=st.session_state.model, stream=True))
182
+ st.session_state.chat_history.append(AIMessage(content=response))
183
+
184
+
185
+
186
+
187
+ recommended_agent_main()
app.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import dotenv
3
+ import os
4
+
5
+ def main():
6
+
7
+ dotenv.load_dotenv(dotenv_path=os.path.join('.streamlit', '.env'))
8
+
9
+ st.set_page_config(page_title="RAG Agent", page_icon="🤖", layout="wide")
10
+
11
+ audit_page = st.Page("audit_page/audit.py", title="Audit", icon="📋", default=True)
12
+ dialog_page = st.Page("audit_page/dialogue_doc.py", title="Dialoguer avec le document", icon="💬")
13
+ kg_page = st.Page("audit_page/knowledge_graph.py", title="Graphe de connaissance", icon="🧠")
14
+ agents_page = st.Page("agents_page/catalogue.py", title="Catalogue des agents", icon="📇")
15
+ compte_rendu = st.Page("audit_page/compte_rendu.py", title="Compte rendu", icon="📝")
16
+ recommended_agents = st.Page("agents_page/recommended_agent.py", title="Agents recommandés", icon="⭐")
17
+ chatbot = st.Page("chatbot_page/chatbot.py", title="Chatbot", icon="💬")
18
+ documentation = st.Page("doc_page/documentation.py", title="Documentation", icon="📚")
19
+
20
+ pg = st.navigation(
21
+ {
22
+ "Audit de contenus": [audit_page,dialog_page],
23
+ "Equipe d'agents IA": [recommended_agents],
24
+ "Chatbot": [chatbot],
25
+ "Documentation": [documentation]
26
+ }
27
+ )
28
+
29
+ pg.run()
30
+
31
+
32
+ if __name__ == "__main__":
33
+ main()
audit_page/audit.py ADDED
@@ -0,0 +1,259 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pymupdf as fitz
3
+ import pyperclip
4
+ import clipboard
5
+ from utils.audit.audit_doc import audit_descriptif_pdf,audit_text
6
+ from utils.audit.rag import setup_rag
7
+ import dotenv
8
+ from utils.audit.audit_audio import evaluate_audio_quality
9
+ from PIL import Image
10
+ from io import BytesIO
11
+ import st_copy_to_clipboard
12
+ import os
13
+
14
+
15
+ # Function to classify file type
16
+ def classify_file(file):
17
+ if file.type.startswith("image/"):
18
+ return "image"
19
+ elif file.type == "application/pdf":
20
+ return "pdf"
21
+ elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
22
+ return "word"
23
+ elif file.type.startswith("audio/"):
24
+ return "audio"
25
+ elif file.type.startswith("text/"):
26
+ return "text"
27
+ else:
28
+ return "unknown"
29
+
30
+ #display content
31
+ def display_content_doc(content:dict,col:st):
32
+ number_of_pages = len(content)
33
+ col.info("Note : Si vous choisissez 0, vous verrez le contenu de toutes les pages")
34
+
35
+ number = col.number_input("Numéro de page", min_value=0, max_value=number_of_pages, value=0,key="number_page_content")
36
+ #0 means all pages
37
+ if number > 0:
38
+ page : dict = content[f"page_{number-1}"]
39
+
40
+ option = col.radio("Type de contenu",list(content[f"page_0"].keys()), index=0,horizontal=True)
41
+
42
+ if option == "images":
43
+ if number == 0:
44
+ images = [img for page in content.values() for img in page["images"]]
45
+ else:
46
+ images = page["images"]
47
+ col1,col2,col3 = col.columns(3)
48
+ for i, (img_bytes, img_width, img_height) in enumerate(images):
49
+ try:
50
+ if i%3 == 0:
51
+ col1.image(Image.open(BytesIO(img_bytes)), caption=f'', width=img_width)
52
+ elif i%3 == 1:
53
+ col2.image(Image.open(BytesIO(img_bytes)), caption=f'', width=img_width)
54
+ else:
55
+ col3.image(Image.open(BytesIO(img_bytes)), caption=f'', width=img_width)
56
+ except:
57
+ pass
58
+
59
+ elif option == "texte":
60
+ if number == 0:
61
+ text = "-------------------\n".join([page["texte"] for page in content.values()])
62
+ else:
63
+ text = page["texte"]
64
+
65
+ col.code(text,language="text")
66
+
67
+ elif option == "liens":
68
+ if number == 0:
69
+ links = [link for page in content.values() for link in page["liens"]]
70
+ else:
71
+ links = page["liens"]
72
+
73
+ for i, link in enumerate(links):
74
+ col.markdown(f"- {i+1}: [{link['uri']}]({link['uri']}) (page {link['page']})")
75
+
76
+ elif option == "tableaux":
77
+ if number == 0:
78
+ tables = [table for page in content.values() for table in page["tableaux"]]
79
+ else:
80
+ tables = page["tableaux"]
81
+
82
+ for i, table in enumerate(tables):
83
+ col.write(f"Tableau {i+1}")
84
+ col.write(table)
85
+
86
+ def display_content_audio(content:dict,col:st):
87
+ st.write("##### Transcription")
88
+ st.write(content["transcription"])
89
+ # if st.button("📋",key="copy_transcription"):
90
+ st_copy_to_clipboard(content["transcription"])
91
+ # st.success("Transcription copiée dans le presse-papier")
92
+
93
+ st.audio(content["audio_data"],sample_rate=content["frame_rate"]*2)
94
+
95
+ def display_content_text(content,col:st):
96
+ st.text_area("Texte",content,height=200)
97
+
98
+ def handle_display_content(col:st):
99
+ audit = st.session_state.audit
100
+ type = st.session_state.audit_simplified["type de fichier"]
101
+ if type == "pdf":
102
+ with col.expander("Contenu"):
103
+ display_content_doc(audit["content"],st)
104
+ elif type == "audio":
105
+ with col.expander("Contenu"):
106
+ display_content_audio(audit["content"],col)
107
+ elif type == "text":
108
+ with col.expander("Contenu"):
109
+ display_content_text(audit["content"],col)
110
+
111
+
112
+
113
+ def handle_audit(uploaded_file,type:str):
114
+ if type == "pdf":
115
+ if st.session_state.name_file != uploaded_file.name:
116
+ st.session_state.name_file = uploaded_file.name
117
+ with st.spinner("Analyse du document..."):
118
+ st.session_state.audit = {}
119
+
120
+ st.session_state.audit = audit_descriptif_pdf(uploaded_file,100)
121
+ with st.spinner("Préparation de la DB..."):
122
+ vectorstore = setup_rag(type,st.session_state.audit["content"])
123
+ st.session_state.vectorstore = vectorstore
124
+ st.session_state.graph = None
125
+ st.session_state.cr = ""
126
+
127
+ audit = st.session_state.audit["audit"]
128
+ #global audit
129
+ audit_simplified = {
130
+ "type de fichier": type,
131
+ "Nombre de pages": audit["number_of_pages"],
132
+ "Nombre d'images": audit["number_of_images"],
133
+ "Nombre de liens": audit["number_of_links"],
134
+ "Nombre de tableaux": audit["number_of_tables"],
135
+ "Nombre de tokens": audit["number_of_tokens"],
136
+ "Nombre de mots": audit["number_of_words"],
137
+ "Mots clés": audit["key_words"]
138
+ }
139
+ st.session_state.audit_simplified = audit_simplified
140
+
141
+ elif type == "audio":
142
+ if st.session_state.name_file != uploaded_file.name:
143
+ st.session_state.name_file = uploaded_file.name
144
+ with st.spinner("Analyse de l'audio..."):
145
+ st.session_state.audit = {}
146
+ st.session_state.audit = evaluate_audio_quality(uploaded_file)
147
+ with st.spinner("Préparation de la DB..."):
148
+ vectorstore = setup_rag(type,st.session_state.audit["content"])
149
+ st.session_state.vectorstore = vectorstore
150
+ st.session_state.graph = None
151
+ st.session_state.cr = ""
152
+
153
+ audit = st.session_state.audit["audit"]
154
+ #audit global simplifié
155
+ audit_simplified = {
156
+ "type de fichier": type,
157
+ "Durée": f"{audit['duration']:0.2f} minutes",
158
+ "Nombre de mots": audit["number_of_words"],
159
+ "Nombre de tokens": audit["number_of_tokens"],
160
+ "Volume": f"{audit['volume']:0.2f} dBFS (déciBels Full Scale)",
161
+ "SNR": f"{max(audit['SNR'],0):0.2f} dB (Ratio Signal / Bruit)",
162
+ }
163
+ st.session_state.audit_simplified = audit_simplified
164
+
165
+ elif type == "text":
166
+ text = uploaded_file.read().decode("utf-8")
167
+ if st.session_state.name_file != uploaded_file.name:
168
+ st.session_state.name_file = uploaded_file.name
169
+ with st.spinner("Analyse du texte..."):
170
+ st.session_state.audit = {}
171
+ st.session_state.audit = audit_text(text)
172
+ audit = st.session_state.audit["audit"]
173
+ #audit global simplifié
174
+ audit_simplified = {
175
+ "type de fichier": type,
176
+ "Nombre de tokens": audit["number_of_tokens"],
177
+ "Nombre de mots": audit["number_of_words"],
178
+ "Mots clés": audit["key_words"]
179
+ }
180
+ st.session_state.audit_simplified = audit_simplified
181
+
182
+
183
+
184
+
185
+ def display_audit(col:st):
186
+ #audit global simplifié
187
+ audit_simplified = st.session_state.audit_simplified
188
+ audit = st.session_state.audit["audit"]
189
+
190
+ well_formatted_audit = "Contenus audités\n"
191
+ for key, value in audit_simplified.items():
192
+ well_formatted_audit += f"- {key}: {value}\n"
193
+
194
+ col.code(well_formatted_audit)
195
+
196
+ if audit_simplified["type de fichier"] == "pdf": #cad un type qui contient des pages
197
+ #audit par page
198
+ with col.expander("Audit par page"):
199
+ number = st.number_input("Numéro de page", min_value=1, max_value=audit["number_of_pages"], value=1,key="number_page_audit")
200
+ audit_page = audit[f"page_{number-1}"]
201
+ audit_page = {
202
+
203
+ "Nombre d'images": audit_page["number_of_images"],
204
+ "Nombre de liens": audit_page["number_of_links"],
205
+ "Nombre de tableaux": audit_page["number_of_tables"],
206
+ "Nombre de tokens": audit_page["number_of_tokens"],
207
+ "Nombre de mots": audit_page["number_of_words"],
208
+ }
209
+ well_formatted_audit_page = "Audit descriptif\n"
210
+ for key, value in audit_page.items():
211
+ well_formatted_audit_page += f"- {key}: {value}\n"
212
+
213
+ st.code(well_formatted_audit_page)
214
+
215
+
216
+
217
+ def audit_main():
218
+
219
+ #st.set_page_config(page_title="Audit des documents", page_icon=":page_with_curl:", layout="wide")
220
+ # Streamlit app
221
+ st.title("Audit des documents")
222
+
223
+ notice = "Les formats autorisés sont les suivants :\n- **format texte** : txt, word, pdf\n- **format image** : png, jpg\n- **format audio** : wav, MP3"
224
+
225
+ col1, col2 = st.columns([4, 3])
226
+ col1.markdown(notice)
227
+
228
+ if "audit" not in st.session_state:
229
+ st.session_state.audit = {}
230
+ if "name_file" not in st.session_state:
231
+ st.session_state.name_file = ""
232
+ if "audit_simplified" not in st.session_state:
233
+ st.session_state.audit_simplified = {}
234
+ if "vectorstore" not in st.session_state:
235
+ st.session_state.vectorstore = None
236
+ if "cr" not in st.session_state:
237
+ st.session_state.cr = ""
238
+ if "graph" not in st.session_state:
239
+ st.session_state.graph = None
240
+
241
+ # File uploader
242
+ uploaded_file = col1.file_uploader("Télécharger un ou plusieurs documents")
243
+
244
+ if uploaded_file is not None:
245
+ type = classify_file(uploaded_file)
246
+ handle_audit(uploaded_file,type)
247
+
248
+ col1.write(f"Type de fichier: {type}")
249
+
250
+ col1.write("### Synthèse audit de(s) document(s) téléchargé(s)")
251
+
252
+ if "audit" in st.session_state and st.session_state.audit != {}:
253
+ display_audit(col1)
254
+ handle_display_content(col2)
255
+
256
+ #init graph and cr
257
+
258
+
259
+ audit_main()
audit_page/compte_rendu.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from textwrap import dedent
3
+ from utils.audit.rag import get_text_from_content_for_doc,get_text_from_content_for_audio
4
+ from utils.audit.response_llm import generate_response_via_langchain
5
+ from langchain_core.messages import AIMessage, HumanMessage
6
+ import pyperclip
7
+
8
+
9
+ def cr_main():
10
+ st.title("Compte rendu")
11
+
12
+ if "audit" not in st.session_state or st.session_state.audit == {}:
13
+ st.error("Veuillez d'abord effectuer un audit pour générer un compte rendu.")
14
+ return
15
+
16
+ if "cr" not in st.session_state:
17
+ st.session_state.cr = ""
18
+
19
+ if "cr_chat_history" not in st.session_state:
20
+ st.session_state.cr_chat_history = [
21
+ ]
22
+
23
+ audit = st.session_state.audit_simplified
24
+ content = st.session_state.audit["content"]
25
+
26
+ if audit["type de fichier"] == "pdf":
27
+ text = get_text_from_content_for_doc(content)
28
+ elif audit["type de fichier"] == "audio":
29
+ text = get_text_from_content_for_audio(content)
30
+
31
+ prompt_cr = dedent(f'''
32
+
33
+ À partir du document ci-dessous, générez un compte rendu détaillé contenant les sections suivantes :
34
+
35
+ 2. **Résumé** : Fournissez une synthèse du document, en mettant en avant les points principaux, les relations essentielles, les concepts , les dates et les lieux, les conclusions et les détails importants.
36
+
37
+ 3. **Notes** :
38
+ - Présentez les points clés sous forme de liste à puces avec des émojis pertinents pour souligner la nature de chaque point.
39
+ - N'oubliez pas de relever tout les entités et les relations.
40
+ - Incluez des sous-points (sans émojis) sous les points principaux pour offrir des détails ou explications supplémentaires.
41
+
42
+ 4. **Actions** : Identifiez et listez les actions spécifiques, tâches ou étapes recommandées ou nécessaires selon le contenu du document.
43
+
44
+ **Document :**
45
+
46
+ {text}
47
+
48
+ **Format de sortie :**
49
+
50
+
51
+ ### Résumé :
52
+ [Fournissez un résumé concis du document ici;n'oubliez pas de relever tout les entités et les relations.]
53
+
54
+ ### Notes :
55
+ - 📌 **Point Principal 1**
56
+ - Sous-point A
57
+ - Sous-point B
58
+ - 📈 **Point Principal 2**
59
+ - Sous-point C
60
+ - Sous-point D
61
+ - 📝 **Point Principal 3**
62
+ - Sous-point E
63
+ - Sous-point F
64
+
65
+ ### Actions :
66
+ 1. [Action 1]
67
+ 2. [Action 2]
68
+ 3. [Action 3]
69
+ 4. ...
70
+
71
+ ---
72
+ ''')
73
+
74
+
75
+
76
+ if st.button("Générer compte rendu"):
77
+
78
+ with st.spinner("Génération du compte rendu..."):
79
+ cr = generate_response_via_langchain(prompt_cr,stream=False,model="gpt-4o")
80
+ st.session_state.cr = cr
81
+ st.session_state.cr_chat_history = []
82
+
83
+ else:
84
+ cr = st.session_state.cr
85
+
86
+ if cr:
87
+ col1, col2 = st.columns([2.5, 1.5])
88
+
89
+ with col1.container(border=True,height=800):
90
+ st.markdown("##### Compte rendu")
91
+ st.markdown("### Mots clés extraits:")
92
+ st.write(f"- {audit['Mots clés'].strip()}")
93
+ st.write(cr)
94
+ # if st.button("📋",key="copy_transcription"):
95
+ # #pyperclip.copy(cr)
96
+ # st.success("Transcription copiée dans le presse-papier")
97
+
98
+ with col2.container(border=True,height=800):
99
+ st.markdown("##### Dialoguer avec le CR")
100
+
101
+ user_query = st.chat_input("Par ici ...")
102
+ if user_query is not None and user_query != "":
103
+ st.session_state.cr_chat_history.append(HumanMessage(content=user_query))
104
+
105
+ with st.container(height=650, border=False):
106
+ for message in st.session_state.cr_chat_history:
107
+ if isinstance(message, AIMessage):
108
+ with st.chat_message("AI"):
109
+ st.markdown(message.content)
110
+ elif isinstance(message, HumanMessage):
111
+ with st.chat_message("Moi"):
112
+ st.write(message.content)
113
+
114
+ #check if last message is human message
115
+ if len(st.session_state.cr_chat_history) > 0:
116
+ last_message = st.session_state.cr_chat_history[-1]
117
+ if isinstance(last_message, HumanMessage):
118
+ with st.chat_message("AI"):
119
+ retreive = st.session_state.vectorstore.as_retriever()
120
+ context = retreive.invoke(last_message.content)
121
+ wrapped_prompt = f'''Étant donné le contexte suivant {context} et le compte rendu du document {cr}, {last_message.content}'''
122
+ response = st.write_stream(generate_response_via_langchain(wrapped_prompt,stream=True))
123
+ st.session_state.cr_chat_history.append(AIMessage(content=response))
124
+
125
+
126
+ cr_main()
127
+
128
+
129
+
130
+
audit_page/dialogue_doc.py ADDED
@@ -0,0 +1,337 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from textwrap import dedent
3
+ from utils.audit.rag import get_text_from_content_for_doc,get_text_from_content_for_audio
4
+ from utils.audit.response_llm import generate_response_via_langchain
5
+ from langchain_core.messages import AIMessage, HumanMessage
6
+ from st_copy_to_clipboard import st_copy_to_clipboard
7
+ from utils.kg.construct_kg import get_graph
8
+ from audit_page.knowledge_graph import *
9
+ import json
10
+ import clipboard
11
+ from time import sleep
12
+
13
+ def graph_doc_to_json(graph):
14
+ nodes = []
15
+ edges = []
16
+ for node in graph.nodes:
17
+ node_id = node.id.replace(" ", "_")
18
+ label = node.id
19
+ type = node.type
20
+ nodes.append({"id": node_id, "label": label, "type": type})
21
+ for relationship in graph.relationships:
22
+ source = relationship.source
23
+ source_id = source.id.replace(" ", "_")
24
+ target = relationship.target
25
+ target_id = target.id.replace(" ", "_")
26
+ label = relationship.type
27
+ edges.append({"source": source_id, "label": label, "cible": target_id})
28
+ return {"noeuds": nodes, "relations": edges}
29
+
30
+ def chat_history_formatter(chat_history):
31
+ formatted_chat = ""
32
+ for message in chat_history:
33
+ if isinstance(message, AIMessage):
34
+ formatted_chat += f"AI:{message.content}\n\n"
35
+ elif isinstance(message, HumanMessage):
36
+ formatted_chat += f"Human:{message.content}\n\n"
37
+ return formatted_chat
38
+
39
+ def filter_correspondance(source_list:list[str],ref_dict:dict,reverse=False):
40
+ source_list = [item.lower().strip() for item in source_list]
41
+ if reverse:
42
+ return [key for key, value in ref_dict.items() if value.lower().strip() in source_list]
43
+ else:
44
+ # st.write(source_list)
45
+ # st.write(ref_dict.keys())
46
+ return [value for key, value in ref_dict.items() if key.lower().strip() in source_list]
47
+
48
+ @st.fragment()
49
+ def radio_choice():
50
+ options = ["compte_rendu","graphe de connaissance"]
51
+ choice = st.radio("Choisissez une option",options,index=st.session_state.radio_choice,horizontal=True,label_visibility="collapsed")
52
+ sleep(1)
53
+ if choice and options.index(choice) != st.session_state.radio_choice:
54
+ sleep(1)
55
+ st.session_state.radio_choice = options.index(choice)
56
+ return choice
57
+
58
+
59
+ def doc_dialog_main():
60
+ st.title("Dialogue avec le document")
61
+
62
+ if "audit" not in st.session_state or st.session_state.audit == {}:
63
+ st.error("Veuillez d'abord effectuer un audit pour générer le compte rendu ou le graphe de connaissance.")
64
+ return
65
+
66
+ #init cr and chat history cr
67
+ if "cr" not in st.session_state:
68
+ st.session_state.cr = ""
69
+ if "cr_chat_history" not in st.session_state:
70
+ st.session_state.cr_chat_history = [
71
+ ]
72
+
73
+ #init graph and filter views
74
+ if "graph" not in st.session_state:
75
+ st.session_state.graph = None
76
+
77
+ if "filter_views" not in st.session_state:
78
+ st.session_state.filter_views = {}
79
+ if "current_view" not in st.session_state:
80
+ st.session_state.current_view = None
81
+ if "node_types" not in st.session_state:
82
+ st.session_state.node_types = None
83
+ # if "summary" not in st.session_state:
84
+ # st.session_state.summary = None
85
+ if "chat_graph_history" not in st.session_state:
86
+ st.session_state.chat_graph_history = []
87
+
88
+ #init a radio button for the choice
89
+ if "radio_choice" not in st.session_state:
90
+ st.session_state.radio_choice = None
91
+ # if "choice" not in st.session_state:
92
+ # st.session_state.choice = st.radio("Choisissez une option",["compte_rendu","graphe de connaissance"],index=st.session_state.radio_choice,horizontal=True,label_visibility="collapsed")
93
+
94
+ # choice = radio_choice()
95
+
96
+ options = ["compte_rendu","graphe de connaissance"]
97
+ choice = st.radio("Choisissez une option",options,index=st.session_state.radio_choice,horizontal=True,label_visibility="collapsed")
98
+ if choice and options.index(choice) != st.session_state.radio_choice:
99
+ st.session_state.radio_choice = options.index(choice)
100
+
101
+
102
+ audit = st.session_state.audit_simplified
103
+ content = st.session_state.audit["content"]
104
+
105
+ if audit["type de fichier"] == "pdf":
106
+ text = get_text_from_content_for_doc(content)
107
+ elif audit["type de fichier"] == "audio":
108
+ text = get_text_from_content_for_audio(content)
109
+ elif audit["type de fichier"] == "text":
110
+ text = content
111
+
112
+ prompt_cr = dedent(f'''
113
+
114
+ À partir du document ci-dessous, générez un compte rendu détaillé contenant les sections suivantes :
115
+
116
+ 2. **Résumé** : Fournissez une synthèse complète du document, en mettant en avant les points principaux, les relations essentielles, les concepts , les dates et les lieux, les conclusions et les détails importants.
117
+
118
+ 3. **Notes** :
119
+ - Présentez les points clés sous forme de liste à puces avec des émojis pertinents pour souligner la nature de chaque point.
120
+ - N'oubliez pas de relever tout les entités et les relations.
121
+ - Incluez des sous-points (sans émojis) sous les points principaux pour offrir des détails ou explications supplémentaires.
122
+
123
+ 4. **Actions** : Identifiez et listez les actions spécifiques, tâches ou étapes recommandées ou nécessaires selon le contenu du document.
124
+
125
+ **Document :**
126
+
127
+ {text}
128
+
129
+ **Format de sortie :**
130
+
131
+
132
+ ### Résumé :
133
+ [Fournissez un résumé concis du document ici;n'oubliez pas de relever tout les entités et les relations.]
134
+
135
+ ### Notes :
136
+ - 📌 **Point Principal 1**
137
+ - Sous-point A
138
+ - Sous-point B
139
+ - 📈 **Point Principal 2**
140
+ - Sous-point C
141
+ - Sous-point D
142
+ - 📝 **Point Principal 3**
143
+ - Sous-point E
144
+ - Sous-point F
145
+
146
+ ### Actions :
147
+ 1. [Action 1]
148
+ 2. [Action 2]
149
+ 3. [Action 3]
150
+ 4. ...
151
+
152
+ ---
153
+ ''')
154
+
155
+
156
+ if choice == "compte_rendu":
157
+ if "cr" not in st.session_state or st.session_state.cr == "":
158
+ with st.spinner("Génération du compte rendu..."):
159
+ cr = generate_response_via_langchain(prompt_cr,stream=False,model="gpt-4o")
160
+ st.session_state.cr = cr
161
+ st.session_state.cr_chat_history = []
162
+ else:
163
+ cr = st.session_state.cr
164
+
165
+ if cr:
166
+ col1, col2 = st.columns([2.5, 1.5])
167
+
168
+ with col1.container(border=True,height=850):
169
+ st.markdown("##### Compte rendu")
170
+ keywords_paragraph = f"### Mots clés extraits:\n- {audit['Mots clés'].strip()}"
171
+ with st.container(height=650,border=False):
172
+ st.markdown(keywords_paragraph)
173
+ st.write(cr)
174
+ # col_copy , col_success = st.columns([1,11])
175
+ # if col_copy.button("📋",key="copy_cr"):
176
+ with st.container(height=50,border=False):
177
+ st_copy_to_clipboard(keywords_paragraph+"\n\n"+cr,key="cp_but_cr")
178
+ # col_success.success("Compte rendu copié dans le presse-papier")
179
+
180
+ with col2.container(border=True,height=850):
181
+ st.markdown("##### Dialoguer avec le CR")
182
+
183
+ user_query = st.chat_input("Par ici ...")
184
+ if user_query is not None and user_query != "":
185
+ st.session_state.cr_chat_history.append(HumanMessage(content=user_query))
186
+
187
+ with st.container(height=600, border=False):
188
+ for message in st.session_state.cr_chat_history:
189
+ if isinstance(message, AIMessage):
190
+ with st.chat_message("AI"):
191
+ st.markdown(message.content)
192
+ elif isinstance(message, HumanMessage):
193
+ with st.chat_message("Human"):
194
+ st.write(message.content)
195
+
196
+ #check if last message is human message
197
+ if len(st.session_state.cr_chat_history) > 0:
198
+ last_message = st.session_state.cr_chat_history[-1]
199
+ if isinstance(last_message, HumanMessage):
200
+ with st.chat_message("AI"):
201
+ retreive = st.session_state.vectorstore.as_retriever()
202
+ context = retreive.invoke(last_message.content)
203
+ wrapped_prompt = f'''Étant donné le contexte suivant {context} et le compte rendu du document {cr}, {last_message.content}'''
204
+ response = st.write_stream(generate_response_via_langchain(wrapped_prompt,stream=True))
205
+ st.session_state.cr_chat_history.append(AIMessage(content=response))
206
+ # col_copy_c , col_success_c = st.columns([1,7])
207
+ # if col_copy_c.button("📋",key="copy_cr_chat"):
208
+ with st.container(height=50,border=False):
209
+ chat_formatted = chat_history_formatter(st.session_state.cr_chat_history)
210
+ st_copy_to_clipboard(chat_formatted,key="cp_but_cr_chat",show_text=False)
211
+ # col_success_c.success("Historique copié !")
212
+
213
+ elif choice == "graphe de connaissance":
214
+ if "graph" not in st.session_state or st.session_state.graph == None:
215
+ with st.spinner("Génération du graphe..."):
216
+ keywords_list = [keyword.strip() for keyword in audit["Mots clés"].strip().split(",")]
217
+ allowed_nodes_types =keywords_list+ ["Person","Organization","Location","Event","Date","Time","Ressource","Concept"]
218
+
219
+ number_tokens = audit["Nombre de tokens"]
220
+ if number_tokens > 10000:
221
+ if st.session_state.cr == "":
222
+ st.session_state.cr = generate_response_via_langchain(prompt_cr,stream=False,model="gpt-4o")
223
+ text = st.session_state.cr
224
+
225
+ graph = get_graph(text,allowed_nodes=allowed_nodes_types)
226
+ st.session_state.graph = graph
227
+ st.session_state.filter_views = {}
228
+ st.session_state.current_view = None
229
+ st.session_state.node_types = None
230
+ st.session_state.chat_graph_history = []
231
+
232
+ node_types = get_node_types(graph[0])
233
+ list_node_types = list(node_types)
234
+ sorted_node_types = sorted(list_node_types,key=lambda x: x.lower())
235
+ print(sorted_node_types)
236
+ nodes_type_dict = list_to_dict_colors(sorted_node_types)
237
+ st.session_state.node_types = nodes_type_dict
238
+ st.session_state.filter_views["Vue par défaut"] = list(node_types)
239
+ st.session_state.current_view = "Vue par défaut"
240
+ else:
241
+ graph = st.session_state.graph
242
+
243
+ if graph is not None:
244
+ #st.write(graph)
245
+
246
+ edges,nodes,config = convert_neo4j_to_agraph(graph[0],st.session_state.node_types)
247
+
248
+ col1, col2 = st.columns([2.5, 1.5])
249
+
250
+ with col1.container(border=True,height=850):
251
+ st.write("##### Visualisation du graphe (**"+st.session_state.current_view+"**)")
252
+ filter_col,add_view_col,change_view_col,color_col = st.columns([9,1,1,1])
253
+
254
+ if color_col.button("🎨",help="Changer la couleur"):
255
+ change_color_dialog()
256
+
257
+ if change_view_col.button("🔍",help="Changer de vue"):
258
+ change_view_dialog()
259
+
260
+
261
+ #add mots cles to evry label in audit["Mots clés"]
262
+ #filter_labels = [ label + " (mot clé)" if label.strip().lower() in audit["Mots clés"].strip().lower().split(",") else label for label in st.session_state.filter_views[st.session_state.current_view] ]
263
+ keywords_list = [keyword.strip().lower() for keyword in audit["Mots clés"].strip().split(",")]
264
+ dict_filters = {label: "Mot clé : "+label if label.strip().lower() in keywords_list else label for label in st.session_state.filter_views[st.session_state.current_view]}
265
+
266
+ default_target_filter = filter_correspondance(st.session_state.filter_views[st.session_state.current_view],dict_filters)
267
+ # st.write(default_target_filter)
268
+ # st.write(dict_filters)
269
+ sorted_default_target_filter = sorted(default_target_filter,key=lambda x: x.lower())
270
+ target_filter = filter_correspondance(list(st.session_state.node_types.keys()),dict_filters)
271
+ target_filter = sorted(target_filter,key=lambda x: x.lower())
272
+ filter = filter_col.multiselect("Filtrer selon l'étiquette",target_filter,placeholder="Sélectionner une ou plusieurs étiquettes",default=default_target_filter,label_visibility="collapsed")
273
+ filter = filter_correspondance(filter,dict_filters,reverse=True)
274
+ if add_view_col.button("➕",help="Ajouter une vue"):
275
+ add_view_dialog(filter)
276
+ if filter:
277
+ nodes = filter_nodes_by_types(nodes,filter)
278
+
279
+ selected = display_graph(edges,nodes,config)
280
+
281
+ # col_copy , col_success = st.columns([1,11])
282
+ # if col_copy.button("📋",key="copy_graph"):
283
+ with st.container(height=50,border=False):
284
+ graph_json = graph_doc_to_json(graph[0])
285
+ st_copy_to_clipboard(json.dumps(graph_json),key="cp_but_graph")
286
+ # col_success.success("Graphe copié dans le presse-papier")
287
+
288
+ with col2.container(border=True,height=850):
289
+ st.markdown("##### Dialoguer avec le graphe")
290
+
291
+ user_query = st.chat_input("Par ici ...")
292
+ if user_query is not None and user_query != "":
293
+ st.session_state.chat_graph_history.append(HumanMessage(content=user_query))
294
+
295
+ with st.container(height=600, border=False):
296
+ for message in st.session_state.chat_graph_history:
297
+ if isinstance(message, AIMessage):
298
+ with st.chat_message("AI"):
299
+ st.markdown(message.content)
300
+ elif isinstance(message, HumanMessage):
301
+ with st.chat_message("Human"):
302
+ st.write(message.content)
303
+
304
+ #check if last message is human message
305
+ if len(st.session_state.chat_graph_history) > 0:
306
+ last_message = st.session_state.chat_graph_history[-1]
307
+ if isinstance(last_message, HumanMessage):
308
+ with st.chat_message("AI"):
309
+ retreive = st.session_state.vectorstore.as_retriever()
310
+ context = retreive.invoke(last_message.content)
311
+ wrapped_prompt = f"Étant donné le contexte suivant {context}, et le graph de connaissance: {graph}, {last_message.content}"
312
+ response = st.write_stream(generate_response_via_langchain(wrapped_prompt,stream=True))
313
+ st.session_state.chat_graph_history.append(AIMessage(content=response))
314
+
315
+ if selected is not None:
316
+ with st.chat_message("AI"):
317
+ st.markdown(f" EXPLORER LES DONNEES CONTENUES DANS **{selected}**")
318
+
319
+ prompts = [f"Extrait moi toutes les informations du noeud ''{selected}'' ➡️",
320
+ f"Montre moi les conversations autour du noeud ''{selected}'' ➡️"]
321
+
322
+ for i,prompt in enumerate(prompts):
323
+ button = st.button(prompt,key=f"p_{i}",on_click=lambda i=i: st.session_state.chat_graph_history.append(HumanMessage(content=prompts[i])))
324
+
325
+ # col_copy_c , col_success_c = st.columns([1,7])
326
+ # if col_copy_c.button("📋",key="copy_graph_chat"):
327
+ with st.container(height=50,border=False):
328
+ st_copy_to_clipboard(chat_history_formatter(st.session_state.chat_graph_history),key="cp_but_graph_chat",show_text=False)
329
+ # col_success_c.success("Historique copié !")
330
+
331
+
332
+
333
+ doc_dialog_main()
334
+
335
+
336
+
337
+
audit_page/knowledge_graph.py ADDED
@@ -0,0 +1,333 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from utils.kg.construct_kg import get_graph
3
+ from utils.audit.rag import get_text_from_content_for_doc,get_text_from_content_for_audio
4
+ from streamlit_agraph import agraph, Node, Edge, Config
5
+ import random
6
+ import math
7
+ from utils.audit.response_llm import generate_response_via_langchain
8
+ from langchain_core.messages import AIMessage, HumanMessage
9
+ from langchain_core.prompts import PromptTemplate
10
+
11
+ def if_node_exists(nodes, node_id):
12
+ """
13
+ Check if a node exists in the graph.
14
+
15
+ Args:
16
+ graph (dict): A dictionary representing the graph with keys 'nodes' and 'relationships'.
17
+ node_id (str): The id of the node to check.
18
+
19
+ Returns:
20
+ return_value: True if the node exists, False otherwise.
21
+ """
22
+ for node in nodes:
23
+ if node.id == node_id:
24
+ return True
25
+ return False
26
+
27
+ def generate_random_color():
28
+ r = random.randint(180, 255)
29
+ g = random.randint(180, 255)
30
+ b = random.randint(180, 255)
31
+ return (r, g, b)
32
+
33
+ def rgb_to_hex(rgb):
34
+ return '#{:02x}{:02x}{:02x}'.format(rgb[0], rgb[1], rgb[2])
35
+
36
+ def get_node_types(graph):
37
+ node_types = set()
38
+ for node in graph.nodes:
39
+ node_types.add(node.type)
40
+ for relationship in graph.relationships:
41
+ source = relationship.source
42
+ target = relationship.target
43
+ node_types.add(source.type)
44
+ node_types.add(target.type)
45
+ return node_types
46
+
47
+ def color_distance(color1, color2):
48
+ # Calculate Euclidean distance between two RGB colors
49
+ return math.sqrt((color1[0] - color2[0]) ** 2 + (color1[1] - color2[1]) ** 2 + (color1[2] - color2[2]) ** 2)
50
+
51
+ def generate_distinct_colors(num_colors, min_distance=30):
52
+ colors = []
53
+ while len(colors) < num_colors:
54
+ new_color = generate_random_color()
55
+ if all(color_distance(new_color, existing_color) >= min_distance for existing_color in colors):
56
+ colors.append(new_color)
57
+ return [rgb_to_hex(color) for color in colors]
58
+
59
+ def list_to_dict_colors(node_types:set):
60
+
61
+ number_of_colors = len(node_types)
62
+ colors = generate_distinct_colors(number_of_colors)
63
+
64
+ node_colors = {}
65
+ for i, node_type in enumerate(node_types):
66
+ node_colors[node_type] = colors[i]
67
+
68
+ return node_colors
69
+
70
+
71
+ def convert_neo4j_to_agraph(neo4j_graph, node_colors):
72
+ """
73
+ Converts a Neo4j graph into an Agraph format.
74
+
75
+ Args:
76
+ neo4j_graph (dict): A dictionary representing the Neo4j graph with keys 'nodes' and 'relationships'.
77
+ 'nodes' is a list of dicts with each dict having 'id' and 'type' keys.
78
+ 'relationships' is a list of dicts with 'source', 'target', and 'type' keys.
79
+
80
+ Returns:
81
+ return_value: The Agraph visualization object.
82
+ """
83
+ nodes = []
84
+ edges = []
85
+
86
+ # Creating Agraph nodes
87
+ for node in neo4j_graph.nodes:
88
+ # Use the node id as the Agraph node id
89
+ node_id = node.id.replace(" ", "_") # Replace spaces with underscores for ids
90
+ label = node.id
91
+ type = node.type
92
+ size = 25 # Default size, can be customized
93
+ shape = "circle" # Default shape, can be customized
94
+
95
+ # For example purposes, no images are added, but you can set 'image' if needed.
96
+ new_node = Node(id=node_id,title=type, label=label, size=size, shape=shape,color=node_colors[type])
97
+ if not if_node_exists(nodes, new_node.id):
98
+ nodes.append(new_node)
99
+
100
+ # Creating Agraph edges
101
+ for relationship in neo4j_graph.relationships:
102
+ size = 25 # Default size, can be customized
103
+ shape = "circle" # Default shape, can be customized
104
+
105
+ source = relationship.source
106
+ source_type = source.type
107
+ source_id = source.id.replace(" ", "_")
108
+ label_source = source.id
109
+
110
+ source_node = Node(id=source_id,title=source_type, label=label_source, size=size, shape=shape,color=node_colors[source_type])
111
+ if not if_node_exists(nodes, source_node.id):
112
+ nodes.append(source_node)
113
+
114
+ target = relationship.target
115
+ target_type = target.type
116
+ target_id = target.id.replace(" ", "_")
117
+ label_target = target.id
118
+
119
+ target_node = Node(id=target_id,title=target_type, label=label_target, size=size, shape=shape,color=node_colors[target_type])
120
+ if not if_node_exists(nodes, target_node.id):
121
+ nodes.append(target_node)
122
+
123
+ label = relationship.type
124
+
125
+ edges.append(Edge(source=source_id, label=label, target=target_id))
126
+
127
+ # Define the configuration for Agraph
128
+ config = Config(width=1200, height=800, directed=True, physics=True, hierarchical=True,from_json="config.json")
129
+ # Create the Agraph visualization
130
+
131
+ return edges, nodes, config
132
+
133
+ def display_graph(edges, nodes, config):
134
+ # Display the Agraph visualization
135
+ return agraph(edges=edges, nodes=nodes, config=config)
136
+
137
+
138
+
139
+ def filter_nodes_by_types(nodes:list[Node], node_types_filter:list) -> list[Node]:
140
+ filtered_nodes = []
141
+ for node in nodes:
142
+ if node.title in node_types_filter: #the title represents the type of the node
143
+ filtered_nodes.append(node)
144
+ return filtered_nodes
145
+
146
+ @st.dialog(title="Changer la vue")
147
+ def change_view_dialog():
148
+ st.write("Changer la vue")
149
+
150
+ for index, item in enumerate(st.session_state.filter_views.keys()):
151
+ emp = st.empty()
152
+ col1, col2, col3 = emp.columns([8, 1, 1])
153
+
154
+ if index > 0 and col2.button("🗑️", key=f"del{index}"):
155
+ del st.session_state.filter_views[item]
156
+ st.session_state.current_view = "Vue par défaut"
157
+ st.rerun()
158
+ but_content = "🔍" if st.session_state.current_view != item else "✅"
159
+ if col3.button(but_content, key=f"valid{index}"):
160
+ st.session_state.current_view = item
161
+ st.rerun()
162
+ if len(st.session_state.filter_views.keys()) > index:
163
+ with col1.expander(item):
164
+ if index > 0:
165
+ change_name = st.text_input("Nom de la vue", label_visibility="collapsed", placeholder="Changez le nom de la vue",key=f"change_name{index}")
166
+ if st.button("Renommer",key=f"rename{index}"):
167
+ if change_name != "":
168
+ st.session_state.filter_views[change_name] = st.session_state.filter_views.pop(item)
169
+ st.session_state.current_view = change_name
170
+ st.rerun()
171
+ st.markdown("\n".join(f"- {label.strip()}" for label in st.session_state.filter_views[item]))
172
+ else:
173
+ emp.empty()
174
+
175
+ @st.dialog(title="Ajouter une vue")
176
+ def add_view_dialog(filters):
177
+ st.write("Ajouter une vue")
178
+ view_name = st.text_input("Nom de la vue")
179
+ st.markdown("les filtres actuels:")
180
+ st.write(filters)
181
+ if st.button("Ajouter la vue"):
182
+ st.session_state.filter_views[view_name] = filters
183
+ st.session_state.current_view = view_name
184
+ st.rerun()
185
+
186
+ @st.dialog(title="Changer la couleur")
187
+ def change_color_dialog():
188
+ st.write("Changer la couleur")
189
+ for node_type,color in st.session_state.node_types.items():
190
+ color = st.color_picker(f"La couleur de l'entité **{node_type.strip()}**",color)
191
+ st.session_state.node_types[node_type] = color
192
+
193
+ if st.button("Valider"):
194
+ st.rerun()
195
+
196
+
197
+
198
+ def kg_main():
199
+ #st.set_page_config(page_title="Graphe de connaissance", page_icon="", layout="wide")
200
+
201
+
202
+
203
+ if "audit" not in st.session_state or st.session_state.audit == {}:
204
+ st.error("Veuillez d'abord effectuer un audit pour visualiser le graphe de connaissance.")
205
+ return
206
+
207
+ if "cr" not in st.session_state:
208
+ st.error("Veuillez d'abord effectuer un compte rendu pour visualiser le graphe de connaissance.")
209
+ return
210
+
211
+ if "graph" not in st.session_state:
212
+ st.session_state.graph = None
213
+
214
+ if "filter_views" not in st.session_state:
215
+ st.session_state.filter_views = {}
216
+ if "current_view" not in st.session_state:
217
+ st.session_state.current_view = None
218
+
219
+ st.title("Graphe de connaissance")
220
+
221
+ if "node_types" not in st.session_state:
222
+ st.session_state.node_types = None
223
+
224
+ if "summary" not in st.session_state:
225
+ st.session_state.summary = None
226
+
227
+ if "chat_graph_history" not in st.session_state:
228
+ st.session_state.chat_graph_history = []
229
+
230
+ audit = st.session_state.audit_simplified
231
+ # content = st.session_state.audit["content"]
232
+
233
+ # if audit["type de fichier"] == "pdf":
234
+ # text = get_text_from_content_for_doc(content)
235
+ # elif audit["type de fichier"] == "audio":
236
+ # text = get_text_from_content_for_audio(content)
237
+
238
+ text = st.session_state.cr + "mots clés" + audit["Mots clés"]
239
+
240
+ #summary_prompt = f"Voici un ensemble de documents : {text}. À partir de ces documents, veuillez fournir des résumés concis en vous concentrant sur l'extraction des relations essentielles et des événements. Il est crucial d'inclure les dates des actions ou des événements, car elles seront utilisées pour l'analyse chronologique. Par exemple : 'Sam a été licencié par le conseil d'administration d'OpenAI le 17 novembre 2023 (17 novembre, vendredi)', ce qui illustre la relation entre Sam et OpenAI ainsi que la date de l'événement."
241
+
242
+ if st.button("Générer le graphe"):
243
+ # with st.spinner("Extractions des relations..."):
244
+ # sum = generate_response_openai(summary_prompt,model="gpt-4o")
245
+ # st.session_state.summary = sum
246
+
247
+ with st.spinner("Génération du graphe..."):
248
+ keywords_list = audit["Mots clés"].strip().split(",")
249
+ allowed_nodes_types =keywords_list+ ["Person","Organization","Location","Event","Date","Time","Ressource","Concept"]
250
+ graph = get_graph(text,allowed_nodes=allowed_nodes_types)
251
+ st.session_state.graph = graph
252
+
253
+ node_types = get_node_types(graph[0])
254
+ nodes_type_dict = list_to_dict_colors(node_types)
255
+ st.session_state.node_types = nodes_type_dict
256
+ st.session_state.filter_views["Vue par défaut"] = list(node_types)
257
+ st.session_state.current_view = "Vue par défaut"
258
+
259
+ else:
260
+ graph = st.session_state.graph
261
+
262
+ if graph is not None:
263
+ #st.write(graph)
264
+
265
+ edges,nodes,config = convert_neo4j_to_agraph(graph[0],st.session_state.node_types)
266
+
267
+ col1, col2 = st.columns([2.5, 1.5])
268
+
269
+ with col1.container(border=True,height=800):
270
+ st.write("##### Visualisation du graphe (**"+st.session_state.current_view+"**)")
271
+ filter_col,add_view_col,change_view_col,color_col = st.columns([9,1,1,1])
272
+
273
+ if color_col.button("🎨",help="Changer la couleur"):
274
+ change_color_dialog()
275
+
276
+ if change_view_col.button("🔍",help="Changer de vue"):
277
+ change_view_dialog()
278
+
279
+
280
+ #add mots cles to evry label in audit["Mots clés"]
281
+ #filter_labels = [ label + " (mot clé)" if label.strip().lower() in audit["Mots clés"].strip().lower().split(",") else label for label in st.session_state.filter_views[st.session_state.current_view] ]
282
+ filter = filter_col.multiselect("Filtrer selon l'étiquette",st.session_state.node_types.keys(),placeholder="Sélectionner une ou plusieurs étiquettes",default=st.session_state.filter_views[st.session_state.current_view],label_visibility="collapsed")
283
+
284
+ if add_view_col.button("➕",help="Ajouter une vue"):
285
+ add_view_dialog(filter)
286
+
287
+ if filter:
288
+ nodes = filter_nodes_by_types(nodes,filter)
289
+
290
+ selected = display_graph(edges,nodes,config)
291
+
292
+ with col2.container(border=True,height=800):
293
+ st.markdown("##### Dialoguer avec le graphe")
294
+
295
+ user_query = st.chat_input("Par ici ...")
296
+ if user_query is not None and user_query != "":
297
+ st.session_state.chat_graph_history.append(HumanMessage(content=user_query))
298
+
299
+ with st.container(height=650, border=False):
300
+ for message in st.session_state.chat_graph_history:
301
+ if isinstance(message, AIMessage):
302
+ with st.chat_message("AI"):
303
+ st.markdown(message.content)
304
+ elif isinstance(message, HumanMessage):
305
+ with st.chat_message("Moi"):
306
+ st.write(message.content)
307
+
308
+ #check if last message is human message
309
+ if len(st.session_state.chat_graph_history) > 0:
310
+ last_message = st.session_state.chat_graph_history[-1]
311
+ if isinstance(last_message, HumanMessage):
312
+ with st.chat_message("AI"):
313
+ retreive = st.session_state.vectorstore.as_retriever()
314
+ context = retreive.invoke(last_message.content)
315
+ wrapped_prompt = f"Étant donné le contexte suivant {context}, et le graph de connaissance: {graph}, {last_message.content}"
316
+ response = st.write_stream(generate_response_via_langchain(wrapped_prompt,stream=True))
317
+ st.session_state.chat_graph_history.append(AIMessage(content=response))
318
+
319
+ if selected is not None:
320
+ with st.chat_message("AI"):
321
+ st.markdown(f" EXPLORER LES DONNEES CONTENUES DANS **{selected}**")
322
+
323
+ prompts = [f"Extrait moi toutes les informations du noeud ''{selected}'' ➡️",
324
+ f"Montre moi les conversations autour du noeud ''{selected}'' ➡️"]
325
+
326
+ for i,prompt in enumerate(prompts):
327
+ button = st.button(prompt,key=f"p_{i}",on_click=lambda i=i: st.session_state.chat_graph_history.append(HumanMessage(content=prompts[i])))
328
+
329
+
330
+
331
+
332
+ node_types = st.session_state.node_types
333
+
chatbot_page/chatbot.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain_openai import ChatOpenAI
3
+ from langchain_mistralai import ChatMistralAI
4
+ from langchain_core.prompts import ChatPromptTemplate
5
+ from langchain_core.output_parsers import StrOutputParser
6
+ from langchain_core.messages import AIMessage, HumanMessage
7
+
8
+ def get_response(user_query, chat_history, context,llm=None,history_limit=5,stream=True):
9
+
10
+ template = """
11
+ Étant donné l'historique de la conversation : {chat_history}, le contexte : {context}, et la question de l'utilisateur : {user_question}, veuillez fournir une réponse détaillée et complète. La réponse doit inclure un ou plusieurs des éléments suivants :
12
+
13
+ 1. Une explication claire des concepts clés et des termes liés au sujet.
14
+ 2. Un aperçu des meilleures pratiques, des stratégies courantes ou des cadres de référence pertinents pour la discussion.
15
+ 3. Des exemples spécifiques ou des études de cas illustrant les principes abordés.
16
+ 4. Les défis potentiels ou les considérations à prendre en compte.
17
+ 5. Des suggestions de ressources supplémentaires ou des actions que l'utilisateur peut entreprendre pour approfondir sa compréhension.
18
+
19
+ Assurez-vous que la réponse soit adaptée au niveau d'expertise de l'utilisateur et aux spécificités du contexte fourni.
20
+
21
+ """
22
+
23
+ prompt = ChatPromptTemplate.from_template(template)
24
+
25
+ #llm = ChatOpenAI(model="gpt-4o")
26
+ if not llm:
27
+ llm = ChatOpenAI(model="gpt-4o-mini")
28
+ elif llm == "GPT-4o":
29
+ llm = ChatOpenAI(model="gpt-4o")
30
+ elif llm == "Mistral Large 2 (FR)":
31
+ llm = ChatMistralAI(model_name="mistral-large-2407")
32
+ elif llm == "GPT-4o-mini":
33
+ llm = ChatOpenAI(model="gpt-4o-mini")
34
+ elif llm == "Mistral Nemo (FR)":
35
+ llm = ChatMistralAI(model_name="open-mistral-nemo-2407")
36
+
37
+
38
+ chain = prompt | llm
39
+
40
+ if not stream:
41
+ return chain.invoke({
42
+ "context": context,
43
+ "chat_history": chat_history[-history_limit:],
44
+ "user_question": user_query,
45
+ })
46
+
47
+ chain = chain | StrOutputParser()
48
+
49
+ if history_limit:
50
+ return chain.stream({
51
+ "context": context,
52
+ "chat_history": chat_history[-history_limit:],
53
+ "user_question": user_query,
54
+ })
55
+
56
+ return chain.stream({
57
+ "context": context,
58
+ "chat_history": chat_history,
59
+ "user_question": user_query,
60
+ })
61
+
62
+ def handle_display_models(index, models_names):
63
+ model = st.radio("Choisir un modèle",models_names, index=index)
64
+ return model
65
+
66
+
67
+ def chatbot_main():
68
+ st.title("Chatbot")
69
+ models_names = ["GPT-4o", "GPT-4o-mini"]
70
+
71
+ if "chat_history" not in st.session_state:
72
+ st.session_state.chat_history = [
73
+ AIMessage(content="Salut, Que puis-je faire pour vous ?"),
74
+ ]
75
+
76
+ if "model" not in st.session_state:
77
+ st.session_state.model = "GPT-4o-mini"
78
+
79
+ header = st.container()
80
+ col1, col2 = header.columns([1, 2])
81
+
82
+ with col1.popover("Modèles disponibles"):
83
+ new_model = handle_display_models(models_names.index(st.session_state.model), models_names)
84
+
85
+ st.session_state.model = new_model
86
+
87
+ st.markdown(f"- **{st.session_state.model}**")
88
+
89
+ for message in st.session_state.chat_history:
90
+ if isinstance(message, AIMessage):
91
+ with st.chat_message("AI"):
92
+ st.markdown(message.content)
93
+ elif isinstance(message, HumanMessage):
94
+ with st.chat_message("Moi"):
95
+ st.write(message.content)
96
+
97
+ if "response_llm" not in st.session_state:
98
+ st.session_state.response_llm = ""
99
+
100
+ user_query = st.chat_input("Par ici ...")
101
+ if user_query is not None and user_query != "":
102
+ st.session_state.chat_history.append(HumanMessage(content=user_query))
103
+
104
+ with st.chat_message("Moi"):
105
+ st.markdown(user_query)
106
+
107
+ with st.chat_message("AI"):
108
+ st.markdown(f"**{st.session_state.model}**")
109
+
110
+
111
+ response = st.write_stream(get_response(user_query, st.session_state.chat_history, context=st.session_state.response_llm, llm=st.session_state.model, stream=True))
112
+ st.session_state.chat_history.append(AIMessage(content=response))
113
+
114
+
115
+
116
+
117
+
118
+
119
+ chatbot_main()
config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "height": "600px",
3
+ "width": "1200px",
4
+ "autoResize": true,
5
+
6
+ "physics":{
7
+ "enabled": true,
8
+
9
+ "barnesHut": {
10
+ "avoidOverlap": 1,
11
+ "theta": 0.1,
12
+ "gravitationalConstant": -10000,
13
+ "centralGravity": 1,
14
+ "springLength": 50,
15
+ "springConstant": 0,
16
+ "damping": 0.5
17
+ },
18
+ "stabilization": {
19
+ "enabled": true,
20
+ "iterations": 1000,
21
+ "updateInterval": 50,
22
+ "onlyDynamicEdges": false,
23
+ "fit": true
24
+ }
25
+ }
26
+ }
doc_page/documentation.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ import streamlit as st
2
+
3
+ st.title("Documentation")
packages.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ xclip
2
+ xsel
3
+ python3-pyperclip
requirements.txt ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit==1.37.0
2
+ pyperclip
3
+ tiktoken
4
+ pydub
5
+ numpy
6
+ scipy
7
+ textstat
8
+ pymupdf
9
+ openai
10
+ nltk
11
+ rake_nltk
12
+ python-docx
13
+ pillow
14
+ pandas
15
+ langchain
16
+ langchain-core
17
+ langchainhub
18
+ langchain-openai
19
+ langchain-mistralai
20
+ faiss-cpu
21
+ langchain-community
22
+ python-dotenv
23
+ langchain-experimental
24
+ neo4j
25
+ streamlit-agraph
26
+ st-copy-to-clipboard
27
+ clipboard
28
+
utils/audit/audit_audio.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import numpy as np
3
+ import scipy.io.wavfile as wavfile
4
+ from pydub import AudioSegment
5
+ import io
6
+ import tiktoken
7
+ from openai import OpenAI
8
+
9
+ def transcript_audio_func(audio_file):
10
+ client = OpenAI()
11
+ transcription = client.audio.transcriptions.create(
12
+ model="whisper-1",
13
+ file=audio_file
14
+ )
15
+
16
+ return transcription.text
17
+
18
+ def count_tokens(input_string: str) -> int:
19
+ tokenizer = tiktoken.get_encoding("cl100k_base")
20
+ tokens = tokenizer.encode(input_string)
21
+ return len(tokens)
22
+
23
+ # Function to calculate SNR
24
+ def calculate_snr(audio_data):
25
+ signal = audio_data
26
+ noise = audio_data - np.mean(audio_data)
27
+ signal_power = np.mean(signal ** 2)
28
+ noise_power = np.mean(noise ** 2)
29
+ snr = 10 * np.log10(signal_power / noise_power)
30
+ return snr
31
+
32
+ # Function to evaluate audio quality
33
+ def evaluate_audio_quality(file) -> dict:
34
+ try:
35
+ audio = AudioSegment.from_file(file)
36
+ except:
37
+ audio = AudioSegment.from_file(io.BytesIO(file.read()))
38
+
39
+ audio_data = np.array(audio.get_array_of_samples())
40
+
41
+ #number of minutes
42
+ duration = len(audio_data) / audio.frame_rate*2 / 60
43
+
44
+ # Calculate volume
45
+ volume = audio.dBFS
46
+
47
+ # Calculate SNR
48
+ snr = calculate_snr(audio_data)
49
+
50
+ #get the transcription of the audio
51
+ transcription = transcript_audio_func(file)
52
+
53
+ audit = {
54
+ "volume": volume,
55
+ "SNR": snr,
56
+ "duration": duration,
57
+ "number_of_tokens": count_tokens(transcription),
58
+ "number_of_words": len(transcription.split())
59
+ }
60
+
61
+ content = {
62
+ "transcription": transcription,
63
+ "audio_data": audio_data,
64
+ "frame_rate": audio.frame_rate
65
+ }
66
+
67
+ audit_global = {
68
+ "audit": audit,
69
+ "content": content
70
+ }
71
+
72
+ return audit_global
utils/audit/audit_doc.py ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import pymupdf
3
+ import tiktoken
4
+ import textstat
5
+ from docx import Document
6
+ import io
7
+ # from rake_nltk import Rake
8
+ # import nltk
9
+ # from nltk.corpus import stopwords
10
+ from openai import OpenAI
11
+
12
+ # Download NLTK stopwords
13
+ # nltk.download('stopwords')
14
+ # nltk.download('punkt')
15
+
16
+ #function to use gpt4o-mini
17
+ def extract_relevant_keywords(prompt: str) -> str:
18
+ client = OpenAI()
19
+ response = client.chat.completions.create(
20
+ model="gpt-4o-mini",
21
+ messages=[
22
+ {"role": "user", "content": prompt}
23
+ ]
24
+ )
25
+ return response.choices[0].message.content
26
+
27
+
28
+ def evaluate_text_quality(text: str) -> dict:
29
+ # Calculate readability metrics
30
+ flesch_reading_ease = textstat.flesch_reading_ease(text)
31
+ flesch_kincaid_grade = textstat.flesch_kincaid_grade(text)
32
+ gunning_fog = textstat.gunning_fog(text)
33
+ smog_index = textstat.smog_index(text)
34
+ automated_readability_index = textstat.automated_readability_index(text)
35
+
36
+ # Normalize readability scores to a 0-1 scale
37
+ def normalize_score(score, min_score, max_score):
38
+ return (score - min_score) / (max_score - min_score)
39
+
40
+ # Normalize each readability score
41
+ n_flesch_reading_ease = normalize_score(flesch_reading_ease, 0, 100)
42
+ n_flesch_kincaid_grade = 1 - normalize_score(flesch_kincaid_grade, 0, 18) # Higher is more difficult
43
+ n_gunning_fog = 1 - normalize_score(gunning_fog, 0, 18) # Higher is more difficult
44
+ n_smog_index = 1 - normalize_score(smog_index, 0, 18) # Higher is more difficult
45
+ n_automated_readability_index = 1 - normalize_score(automated_readability_index, 0, 18) # Higher is more difficult
46
+
47
+ # Weights for each metric (adjust these as needed)
48
+ weights = {
49
+ "flesch_reading_ease": 0.25,
50
+ "flesch_kincaid_grade": 0.25,
51
+ "gunning_fog": 0.2,
52
+ "smog_index": 0.15,
53
+ "automated_readability_index": 0.15
54
+ }
55
+
56
+ # Calculate the global readability score
57
+ global_score = (
58
+ n_flesch_reading_ease * weights["flesch_reading_ease"] +
59
+ n_flesch_kincaid_grade * weights["flesch_kincaid_grade"] +
60
+ n_gunning_fog * weights["gunning_fog"] +
61
+ n_smog_index * weights["smog_index"] +
62
+ n_automated_readability_index * weights["automated_readability_index"]
63
+ )
64
+
65
+ # Scale the global score to 0-5
66
+ global_score_0_5 = global_score * 5
67
+
68
+ # def extract_keywords(text):
69
+ # rake = Rake(stopwords.words('french'))
70
+ # rake.extract_keywords_from_text(text)
71
+ # return rake.get_ranked_phrases()
72
+
73
+
74
+
75
+ def count_tokens(input_string: str) -> int:
76
+ tokenizer = tiktoken.get_encoding("cl100k_base")
77
+ tokens = tokenizer.encode(input_string)
78
+ return len(tokens)
79
+
80
+ def audit_descriptif_pdf(file,max_img_width) -> dict:
81
+ document = pymupdf.open(stream=file.read())
82
+
83
+ audit_dict_doc = {
84
+ "number_of_pages": len(document),
85
+ "number_of_images": 0,
86
+ "number_of_links": 0,
87
+ "number_of_tables": 0,
88
+ "number_of_tokens": 0,
89
+ "number_of_words": 0,
90
+ "key_words": []
91
+ }
92
+
93
+ doc_content = dict()
94
+
95
+ for page in document:
96
+
97
+ audit_dict_page = {}
98
+ page_content = {
99
+ "images": [],
100
+ "texte": "",
101
+ "liens": [],
102
+ "tableaux": []
103
+ }
104
+
105
+ #number of images
106
+ images = page.get_images()
107
+ number_images = len(images)
108
+ audit_dict_page["number_of_images"] = number_images
109
+ audit_dict_doc["number_of_images"] += number_images
110
+
111
+ #get images
112
+ for _, img in enumerate(images):
113
+ xref = img[0]
114
+ base_image = document.extract_image(xref)
115
+
116
+ image_bytes = base_image["image"]
117
+ image_width = base_image["width"]
118
+ image_height = base_image["height"]
119
+
120
+ # Adjust image size if it exceeds the maximum width
121
+ if image_width > max_img_width:
122
+ ratio = max_img_width / image_width
123
+ image_width = max_img_width
124
+ image_height = int(image_height * ratio)
125
+
126
+ page_content["images"].append((image_bytes, image_width, image_height))
127
+
128
+
129
+
130
+ #get links with uri
131
+ links = []
132
+ for link in page.get_links():
133
+ if link['kind'] == pymupdf.LINK_URI and 'uri' in link:
134
+ links.append({"uri": link["uri"], "page": page.number})
135
+
136
+ page_content["liens"] = links
137
+
138
+ #number of links
139
+ number_links = len(links)
140
+ audit_dict_page["number_of_links"] = number_links
141
+ audit_dict_doc["number_of_links"] += number_links
142
+
143
+ #number of tables
144
+ tables = page.find_tables().tables
145
+ number_tables = len(tables)
146
+ for tab in tables:
147
+ page_content["tableaux"].append(tab.to_pandas())
148
+ audit_dict_page["number_of_tables"] = number_tables
149
+ audit_dict_doc["number_of_tables"] += number_tables
150
+
151
+ #number of tokens and words
152
+ text = page.get_text("text")
153
+ number_tokens = count_tokens(text)
154
+ number_words = len(text.split())
155
+
156
+ audit_dict_page["number_of_tokens"] = number_tokens
157
+ audit_dict_page["number_of_words"] = number_words
158
+
159
+ #get text
160
+ page_content["texte"] = text
161
+
162
+ audit_dict_doc["number_of_tokens"] += number_tokens
163
+ audit_dict_doc["number_of_words"] += number_words
164
+
165
+ audit_dict_doc[f"page_{page.number}"] = audit_dict_page
166
+
167
+ doc_content[f"page_{page.number}"] = page_content
168
+
169
+ # Extract key words from the document
170
+ text = " ".join([page["texte"] for page in doc_content.values()])
171
+ # key_words = extract_keywords(text)
172
+ # list_key_words_text = "\n".join(key_words[:10])
173
+ prompt = f'''Voici le document:
174
+ - {text}
175
+ Veuillez extraire les cinq mots clés les plus pertinents de cette liste. Chaque mot clé doit contenir au maximum deux mots.
176
+
177
+ TA REPONSE DOIT RESPECTER LE FORMAT SUIVANT :
178
+ key_word1, key_word2, key_word3, key_word4, key_word5
179
+ '''
180
+ key_words_extracted = extract_relevant_keywords(prompt)
181
+ audit_dict_doc["key_words"] = "\n" + key_words_extracted
182
+
183
+ #merge 2 dicts
184
+ global_audit = {
185
+ "audit": audit_dict_doc,
186
+ "content": doc_content
187
+ }
188
+
189
+ return global_audit
190
+
191
+ def audit_text(text: str) -> dict:
192
+
193
+ prompt = f'''Voici le document:
194
+ - {text}
195
+ Veuillez extraire les cinq mots clés les plus pertinents de cette liste. Chaque mot clé doit contenir au maximum deux mots.
196
+
197
+ TA REPONSE DOIT RESPECTER LE FORMAT SUIVANT :
198
+ key_word1, key_word2, key_word3, key_word4, key_word5
199
+ '''
200
+ key_words_extracted = extract_relevant_keywords(prompt)
201
+
202
+
203
+ audit_dict = {
204
+ "number_of_tokens": count_tokens(text),
205
+ "number_of_words": len(text.split()),
206
+ }
207
+
208
+ audit_dict["key_words"] = "\n" + key_words_extracted
209
+
210
+ global_audit = {
211
+ "audit": audit_dict,
212
+ "content": text
213
+ }
214
+
215
+ return global_audit
216
+
217
+
utils/audit/rag.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
2
+ from langchain_openai import OpenAIEmbeddings
3
+ from langchain_community.vectorstores import FAISS
4
+
5
+
6
+
7
+ def get_text_from_content_for_doc(content):
8
+ text = ""
9
+ for page in content:
10
+ text += content[page]["texte"]
11
+ return text
12
+
13
+ def get_text_from_content_for_audio(content):
14
+ return content["transcription"]
15
+
16
+
17
+ def get_text_chunks(text):
18
+ text_splitter = RecursiveCharacterTextSplitter(
19
+ chunk_size=500, # the character length of the chunck
20
+ chunk_overlap=100, # the character length of the overlap between chuncks
21
+ length_function=len # the length function - in this case, character length (aka the python len() fn.)
22
+ )
23
+ chunks = text_splitter.split_text(text)
24
+ return chunks
25
+
26
+ def get_vectorstore(text_chunks):
27
+ embedding = OpenAIEmbeddings(model="text-embedding-3-small")
28
+ vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embedding)
29
+ return vectorstore
30
+
31
+ def setup_rag(file_type,content):
32
+ if file_type == "pdf":
33
+ text = get_text_from_content_for_doc(content)
34
+ elif file_type == "audio":
35
+ text = get_text_from_content_for_audio(content)
36
+
37
+
38
+ chunks = get_text_chunks(text)
39
+
40
+ vectorstore = get_vectorstore(chunks)
41
+
42
+ return vectorstore
43
+
44
+
utils/audit/response_llm.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+ from langchain_openai import ChatOpenAI
3
+ from langchain_core.output_parsers import StrOutputParser
4
+ from langchain_core.prompts import PromptTemplate
5
+
6
+ def generate_response_openai(prompt: str,stream:bool = False,model = "gpt-4o-mini") -> str:
7
+ client = OpenAI()
8
+ response = client.chat.completions.create(
9
+ model=model,
10
+ messages=[
11
+ {"role": "user", "content": prompt}
12
+ ],
13
+ stream=stream
14
+ )
15
+
16
+ return response.choices[0].message.content
17
+
18
+
19
+ def generate_response_via_langchain(query: str, stream: bool = False, model: str = "gpt-4o-mini") :
20
+ # Define the prompt template
21
+ template = "{query}"
22
+ prompt = PromptTemplate.from_template(template)
23
+
24
+ # Initialize the OpenAI LLM with the specified model
25
+ llm = ChatOpenAI(model=model)
26
+
27
+ # Create an LLM chain with the prompt and the LLM
28
+ llm_chain = prompt | llm | StrOutputParser()
29
+
30
+ if stream:
31
+ # Return a generator that yields streamed responses
32
+ return llm_chain.stream({"query": query})
33
+
34
+ # Invoke the LLM chain and return the result
35
+ return llm_chain.invoke({"query": query})
utils/audit/transcript_audio.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+
3
+ def transcript_audio_func(audio_file):
4
+ client = OpenAI()
5
+ transcription = client.audio.transcriptions.create(
6
+ model="whisper",
7
+ file=audio_file
8
+ )
9
+
10
+ return transcription.text
utils/kg/barnes_algo.py ADDED
@@ -0,0 +1,211 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import division
2
+ import numpy as np
3
+ import math
4
+ import matplotlib.pyplot as plt
5
+ import matplotlib.animation as animation
6
+ import matplotlib.patches as patches
7
+ import random
8
+
9
+ theta = 0.5
10
+ AU = (149.6e6 * 1000) # 149.6 million km, in meters.
11
+ G = 6.67408e-11 #m^3 kg^-1 s^-2
12
+ fig1 = plt.figure()
13
+ sim = fig1.add_subplot(111, aspect='equal')
14
+ fig2 = plt.figure()
15
+ quadt = fig2.add_subplot(111, aspect='equal')
16
+
17
+ class Node:
18
+ children = None
19
+ mass = None
20
+ center_of_mass = None
21
+ bbox = None
22
+ vx = vy = None
23
+
24
+ def quad_insert(root, x, y, m):
25
+ if root.mass is None: #when the root is empty, add the first particle
26
+ root.mass = m
27
+ root.center_of_mass = [x,y]
28
+ return
29
+ elif root.children is None:
30
+ root.children = [None,None,None,None]
31
+ old_quadrant = quadrant_of_particle(root.bbox, root.center_of_mass[0], root.center_of_mass[1])
32
+ if root.children[old_quadrant] is None:
33
+ root.children[old_quadrant] = Node()
34
+ root.children[old_quadrant].bbox = quadrant_bbox(root.bbox,old_quadrant)
35
+ quad_insert(root.children[old_quadrant], root.center_of_mass[0], root.center_of_mass[1], root.mass)
36
+ new_quadrant = quadrant_of_particle(root.bbox, x, y)
37
+ if root.children[new_quadrant] is None:
38
+ root.children[new_quadrant] = Node()
39
+ root.children[new_quadrant].bbox = quadrant_bbox(root.bbox,new_quadrant)
40
+ quad_insert(root.children[new_quadrant], x, y, m)
41
+ root.center_of_mass[0] = (root.center_of_mass[0]*root.mass + x*m) / (root.mass + m)
42
+ root.center_of_mass[1] = (root.center_of_mass[1]*root.mass + y*m) / (root.mass + m)
43
+ root.mass = root.mass + m
44
+ else:
45
+ new_quadrant = quadrant_of_particle(root.bbox, x, y)
46
+ if root.children[new_quadrant] is None:
47
+ root.children[new_quadrant] = Node()
48
+ root.children[new_quadrant].bbox = quadrant_bbox(root.bbox, new_quadrant)
49
+ quad_insert(root.children[new_quadrant], x, y, m)
50
+ root.center_of_mass[0] = (root.center_of_mass[0]*root.mass + x*m) / (root.mass + m)
51
+ root.center_of_mass[1] = (root.center_of_mass[1]*root.mass + y*m) / (root.mass + m)
52
+ root.mass = root.mass + m
53
+
54
+ def display(root):
55
+ if root.mass is None:
56
+ return
57
+ if root.children is not None:
58
+ x = (root.bbox[0] + root.bbox[1]) / 2
59
+ y = (root.bbox[2] + root.bbox[3]) / 2
60
+ width = x-root.bbox[0]
61
+ plt_node(root.bbox[0], root.bbox[2], width)
62
+ plt_node(root.bbox[0], y, width)
63
+ plt_node(x, root.bbox[2], width)
64
+ plt_node(x, y, width)
65
+ for i in xrange(4):
66
+ if root.children[i] is not None:
67
+ display(root.children[i])
68
+ else:
69
+ quadt.scatter(root.center_of_mass[0], root.center_of_mass[1])
70
+
71
+ def integrate(particles):
72
+ bodies = particles
73
+ n = len(bodies)
74
+ timestep = 24*3600 #one day
75
+ years = 2 * 365 #how many Earth years that simulate
76
+ for day in xrange(years):
77
+ particles_force = {}
78
+ root = Node()
79
+ root.center_of_mass = []
80
+ root.bbox = find_root_bbox(bodies)
81
+ for i in xrange(n):
82
+ quad_insert(root, bodies[i][3], bodies[i][4], bodies[i][2])
83
+ for i in xrange(n):
84
+ total_fx, total_fy = compute_force(root,bodies[i][3],bodies[i][4],bodies[i][2])
85
+ particles_force[bodies[i][0]] = (total_fx, total_fy)
86
+ for i in xrange(n):
87
+ fx, fy = particles_force[bodies[i][0]]
88
+ bodies[i][5] += fx / bodies[i][2] * timestep
89
+ bodies[i][6] += fy / bodies[i][2] * timestep
90
+
91
+ bodies[i][3] += bodies[i][5] * timestep
92
+ bodies[i][4] += bodies[i][6] * timestep
93
+ sim.scatter(bodies[i][3], bodies[i][4], c=bodies[i][1])
94
+ display(root)
95
+ quadt.scatter(root.center_of_mass[0], root.center_of_mass[1], c='red', marker='x')
96
+
97
+ def compute_force(root,x,y,m):
98
+ if root.mass is None:
99
+ return 0, 0
100
+ if root.center_of_mass[0] == x and root.center_of_mass[1] == y and root.mass == m:
101
+ return 0, 0
102
+ d = root.bbox[1]-root.bbox[0]
103
+ r = distance(x,y, root.center_of_mass[0], root.center_of_mass[1])
104
+ if d/r < theta or root.children is None:
105
+ return force(m, x, y, root.mass, root.center_of_mass[0], root.center_of_mass[1])
106
+ else:
107
+ fx = 0.0
108
+ fy = 0.0
109
+ for i in xrange(4):
110
+ if root.children[i] is not None:
111
+ fx += compute_force(root.children[i],x,y,m)[0]
112
+ fy += compute_force(root.children[i],x,y,m)[1]
113
+ return fx, fy
114
+
115
+ ################################################# SUPPORTING FUNCTION ##############################################################
116
+
117
+ def force(m, x, y, mcm, xcm, ycm):
118
+ d = distance(x, y, xcm, ycm)
119
+ f = G*m*mcm/(d**2)
120
+ dx = xcm - x
121
+ dy = ycm - y
122
+ angle = math.atan2(dy, dx)
123
+ fx = math.cos(angle) * f
124
+ fy = math.sin(angle) * f
125
+ return fx, fy
126
+
127
+ def distance(x1, y1, x2, y2):
128
+ return math.sqrt((x2-x1)**2+(y2-y1)**2)
129
+
130
+ def plt_node(x, y, width):
131
+ quadt.add_patch(patches.Rectangle((x, y), width, width, fill = False))
132
+
133
+ def find_root_bbox(array):
134
+ """ Create a suitable square boundary box for the input particles
135
+ """
136
+ if len(array) == 0 or len(array) == 1:
137
+ return None
138
+ xmin, xmax, ymin, ymax = array[0][3], array[0][3], array[0][4], array[0][4]
139
+ for i in xrange(len(array)):
140
+ if array[i][3] > xmax:
141
+ xmax = array[i][3]
142
+ if array[i][3] < xmin:
143
+ xmin = array[i][3]
144
+ if array[i][4] > ymax:
145
+ ymax = array[i][4]
146
+ if array[i][4] < ymin:
147
+ ymin = array[i][4]
148
+ if xmax - xmin == ymax - ymin:
149
+ return xmin, xmax, ymin, ymax
150
+ elif xmax - xmin > ymax - ymin:
151
+ return xmin, xmax, ymin, ymax+(xmax-xmin-ymax+ymin)
152
+ else:
153
+ return xmin, xmax+(ymax-ymin-xmax+xmin), ymin, ymax
154
+
155
+ def quadrant_of_particle(bbox, x, y):
156
+ """Return position of quadrant of the particle (x,y)
157
+ """
158
+ if y >= (bbox[3] + bbox[2])/2:
159
+ if x <= (bbox[1] + bbox[0])/2:
160
+ return 0
161
+ else:
162
+ return 1
163
+ else:
164
+ if x >= (bbox[1] + bbox[0])/2:
165
+ return 2
166
+ else:
167
+ return 3
168
+
169
+ def quadrant_bbox(bbox,quadrant):
170
+ """Return the coordinate of the quadrant
171
+ """
172
+ x = (bbox[0] + bbox[1]) / 2
173
+ y = (bbox[2] + bbox[3]) / 2
174
+ #Quadrant 0: (xmin, x, y, ymax)
175
+ if quadrant == 0:
176
+ return bbox[0], x, y, bbox[3]
177
+ #Quadrant 1: (x, xmax, y, ymax)
178
+ elif quadrant == 1:
179
+ return x, bbox[1], y, bbox[3]
180
+ #Quadrant 2: (x, xmax, ymin, y)
181
+ elif quadrant == 2:
182
+ return x, bbox[1], bbox[2], y
183
+ #Quadrant 3: (xmin, x, ymin, y)
184
+ elif quadrant == 3:
185
+ return bbox[0], x, bbox[2], y
186
+
187
+ def data_from_file(filename, array):
188
+ with open(filename) as f:
189
+ for line in f:
190
+ if line[0] == '#':
191
+ continue
192
+ else:
193
+ name,color,m,x,y,vx,vy = line.split(',')
194
+ array.append([name,color,float(m),float(x)*AU,float(y)*AU,float(vx)*1000,float(vy)*1000])
195
+
196
+ if __name__ == '__main__':
197
+ filename = ('solar-system.txt')
198
+ particles = []
199
+ data_from_file(filename, particles)
200
+ #root = Node()
201
+ #root.center_of_mass = []
202
+ #root.bbox = find_root_bbox(particles)
203
+ #for i in xrange(len(particles)):
204
+ # quad_insert(root, particles[i][3], particles[i][4], particles[i][2])
205
+ #print 'Boundary box: ',root.bbox
206
+ #print 'Total mass: ',root.mass
207
+ #print 'Coordinate of center of mass: ',root.center_of_mass
208
+ #plt.scatter(root.center_of_mass[0], root.center_of_mass[1], c='r', marker='x', s=50)
209
+ #print 'Theta: ', theta
210
+ integrate(particles)
211
+ plt.show()
utils/kg/construct_kg.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.graphs import Neo4jGraph
2
+ from langchain_experimental.graph_transformers import LLMGraphTransformer
3
+ from langchain_openai import ChatOpenAI
4
+ from langchain_core.documents import Document
5
+
6
+ def get_graph(text,allowed_nodes=None,prompt=None):
7
+
8
+ llm = ChatOpenAI(temperature=0, model_name="gpt-4o-2024-08-06")
9
+
10
+ if allowed_nodes:
11
+ llm_transformer = LLMGraphTransformer(llm=llm,allowed_nodes=allowed_nodes)
12
+ else:
13
+ llm_transformer = LLMGraphTransformer(llm=llm)
14
+ documents = [Document(page_content=text)]
15
+
16
+ graph_documents = llm_transformer.convert_to_graph_documents(documents)
17
+
18
+ return graph_documents
19
+
20
+