Spaces:
Running
Running
Ilyas KHIAT
commited on
Commit
·
56a3465
1
Parent(s):
83bb015
first push
Browse files- .gitattributes copy +35 -0
- .gitignore +3 -0
- .streamlit/config.toml +8 -0
- README copy.md +12 -0
- agents_page/catalogue.py +5 -0
- agents_page/recommended_agent.py +187 -0
- app.py +33 -0
- audit_page/audit.py +259 -0
- audit_page/compte_rendu.py +130 -0
- audit_page/dialogue_doc.py +337 -0
- audit_page/knowledge_graph.py +333 -0
- chatbot_page/chatbot.py +119 -0
- config.json +26 -0
- doc_page/documentation.py +3 -0
- packages.txt +3 -0
- requirements.txt +28 -0
- utils/audit/audit_audio.py +72 -0
- utils/audit/audit_doc.py +217 -0
- utils/audit/rag.py +44 -0
- utils/audit/response_llm.py +35 -0
- utils/audit/transcript_audio.py +10 -0
- utils/kg/barnes_algo.py +211 -0
- utils/kg/construct_kg.py +20 -0
.gitattributes copy
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
__pycache__/
|
2 |
+
.env
|
3 |
+
.streamlit/.env
|
.streamlit/config.toml
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[server]
|
2 |
+
maxUploadSize = 20
|
3 |
+
|
4 |
+
[theme]
|
5 |
+
base="light"
|
6 |
+
primaryColor="#63abdf"
|
7 |
+
secondaryBackgroundColor="#fbf7f1"
|
8 |
+
textColor="#011166"
|
README copy.md
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Théo Pratik
|
3 |
+
emoji: ⚡
|
4 |
+
colorFrom: purple
|
5 |
+
colorTo: blue
|
6 |
+
sdk: streamlit
|
7 |
+
sdk_version: 1.37.1
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
---
|
11 |
+
|
12 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
agents_page/catalogue.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
|
3 |
+
#st.set_page_config(page_title="Catalogue des agents (via bziiit.com)", page_icon="", layout="wide")
|
4 |
+
|
5 |
+
st.title("Catalogue des agents (via bziiit.com)")
|
agents_page/recommended_agent.py
ADDED
@@ -0,0 +1,187 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from utils.audit.response_llm import generate_response_via_langchain
|
3 |
+
from textwrap import dedent
|
4 |
+
import streamlit as st
|
5 |
+
from langchain_openai import ChatOpenAI
|
6 |
+
from langchain_mistralai import ChatMistralAI
|
7 |
+
from langchain_core.prompts import ChatPromptTemplate
|
8 |
+
from langchain_core.output_parsers import StrOutputParser
|
9 |
+
from langchain_core.messages import AIMessage, HumanMessage
|
10 |
+
|
11 |
+
#st.set_page_config(page_title="Agents recommandés", page_icon="", layout="wide")
|
12 |
+
def remove_images_from_content(content):
|
13 |
+
filtered_content = {}
|
14 |
+
|
15 |
+
for page, data in content.items():
|
16 |
+
# Create a new dictionary excluding the "images" key
|
17 |
+
filtered_data = {key: value for key, value in data.items() if key != "images"}
|
18 |
+
filtered_content[page] = filtered_data
|
19 |
+
|
20 |
+
return filtered_content
|
21 |
+
|
22 |
+
def get_response(user_query, chat_history, db,llm=None,history_limit=10,stream=True):
|
23 |
+
retriever = db.as_retriever()
|
24 |
+
context = retriever.invoke(user_query)
|
25 |
+
template = """
|
26 |
+
Étant donné l'historique de la conversation : {chat_history}, le contexte qui est le document : {context}, et la question de l'utilisateur : {user_question}, repond comme un expert en agent IA.
|
27 |
+
Assurez-vous que la réponse soit adaptée au niveau d'expertise de l'utilisateur et aux spécificités du contexte fourni.
|
28 |
+
|
29 |
+
"""
|
30 |
+
|
31 |
+
prompt = ChatPromptTemplate.from_template(template)
|
32 |
+
|
33 |
+
#llm = ChatOpenAI(model="gpt-4o")
|
34 |
+
if not llm:
|
35 |
+
llm = ChatOpenAI(model="gpt-4o-mini")
|
36 |
+
elif llm == "GPT-4o":
|
37 |
+
llm = ChatOpenAI(model="gpt-4o")
|
38 |
+
elif llm == "Mistral Large 2 (FR)":
|
39 |
+
llm = ChatMistralAI(model_name="mistral-large-2407")
|
40 |
+
elif llm == "GPT-4o-mini":
|
41 |
+
llm = ChatOpenAI(model="gpt-4o-mini")
|
42 |
+
elif llm == "Mistral Nemo (FR)":
|
43 |
+
llm = ChatMistralAI(model_name="open-mistral-nemo-2407")
|
44 |
+
|
45 |
+
|
46 |
+
chain = prompt | llm
|
47 |
+
|
48 |
+
if not stream:
|
49 |
+
return chain.invoke({
|
50 |
+
"context": context,
|
51 |
+
"chat_history": chat_history[-history_limit:],
|
52 |
+
"user_question": user_query,
|
53 |
+
})
|
54 |
+
|
55 |
+
chain = chain | StrOutputParser()
|
56 |
+
|
57 |
+
if history_limit:
|
58 |
+
return chain.stream({
|
59 |
+
"context": context,
|
60 |
+
"chat_history": chat_history[-history_limit:],
|
61 |
+
"user_question": user_query,
|
62 |
+
})
|
63 |
+
|
64 |
+
return chain.stream({
|
65 |
+
"context": context,
|
66 |
+
"chat_history": chat_history,
|
67 |
+
"user_question": user_query,
|
68 |
+
})
|
69 |
+
|
70 |
+
def handle_display_models(index, models_names):
|
71 |
+
model = st.radio("Choisir un modèle",models_names, index=index)
|
72 |
+
return model
|
73 |
+
|
74 |
+
def recommended_agent_main():
|
75 |
+
st.title("Agents recommandés")
|
76 |
+
models_names = ["GPT-4o", "GPT-4o-mini","Mistral Nemo (FR)","Mistral Large 2 (FR)"]
|
77 |
+
|
78 |
+
if "chat_history" not in st.session_state:
|
79 |
+
st.session_state.chat_history = [
|
80 |
+
]
|
81 |
+
|
82 |
+
if "model" not in st.session_state:
|
83 |
+
st.session_state.model = "GPT-4o-mini"
|
84 |
+
|
85 |
+
header = st.container()
|
86 |
+
col1, col2 = header.columns([1, 2])
|
87 |
+
|
88 |
+
with col1.popover("Modèles disponibles"):
|
89 |
+
new_model = handle_display_models(models_names.index(st.session_state.model), models_names)
|
90 |
+
|
91 |
+
st.session_state.model = new_model
|
92 |
+
|
93 |
+
st.markdown(f"- **{st.session_state.model}**")
|
94 |
+
|
95 |
+
if "audit" not in st.session_state or st.session_state.audit == {}:
|
96 |
+
st.error("Veuillez d'abord effectuer un audit pour obtenir des recommandations d'agents.")
|
97 |
+
return
|
98 |
+
|
99 |
+
audit = st.session_state.audit_simplified
|
100 |
+
content = st.session_state.audit["content"]
|
101 |
+
|
102 |
+
if "response_llm" not in st.session_state:
|
103 |
+
st.session_state.response_llm = ""
|
104 |
+
|
105 |
+
|
106 |
+
#filter content, delete images if type is pdf
|
107 |
+
if audit["type de fichier"] == "pdf":
|
108 |
+
content = remove_images_from_content(content)
|
109 |
+
#delete audio if type is audio and keep transcript
|
110 |
+
elif audit["type de fichier"] == "audio":
|
111 |
+
content = content["transcription"]
|
112 |
+
|
113 |
+
ressources = content
|
114 |
+
|
115 |
+
prompt = '''
|
116 |
+
Tu es designer en intelligence artificielle (IA) spécialisé dans la création d'agents IA autonomes et performants.
|
117 |
+
|
118 |
+
A partir de ressources fournies par l'utilisateur (texte, documents, images, audio), tu es chargé de réaliser les tâches suivantes :
|
119 |
+
|
120 |
+
A/ Faire un résumé des ressources fournies en 500 caractères maximum
|
121 |
+
|
122 |
+
B/ Suggérer la création d'agents autonomes pour mettre en pratique les informations contenues dans les ressources fournies.
|
123 |
+
|
124 |
+
Tu proposes deux solutions :
|
125 |
+
|
126 |
+
Sol. A : 1 seul agent IA dont tu suggéreras :
|
127 |
+
* Nom
|
128 |
+
* Rôle
|
129 |
+
* Objectifs
|
130 |
+
* Outils utilisés par l'agent
|
131 |
+
* Tâches réalisées par l'agents
|
132 |
+
* Compétences de l'agent (backstory)
|
133 |
+
|
134 |
+
Sol. B : 1 équipe d'agents tu suggéreras :
|
135 |
+
* Le nombre d'agents
|
136 |
+
* Pour chacune d'eux [Nom, Rôle, Objectifs, Outils utilisés par l'agent, Tâches réalisées par l'agents, Compétences de l'agent (backstory)]
|
137 |
+
|
138 |
+
Une fois ce travail réalisé, tu proposes une série de 3 missions avec objectifs SMART pour chacun des agents Sol. A et Sol. B en présentation les résultats dans un tableau contenant :
|
139 |
+
* Nom de l’agent
|
140 |
+
* Objectifs à atteindre
|
141 |
+
|
142 |
+
'''
|
143 |
+
|
144 |
+
#display prompt and modify it
|
145 |
+
prompt_modified = st.text_area("Prompt par défaut (que vous pouvez modifier, compléter)", prompt, height=300)
|
146 |
+
prompt_modified = dedent(prompt_modified)
|
147 |
+
|
148 |
+
if st.button("Générer les recommandations"):
|
149 |
+
resource_prompt = f'''Ressources fournies par l'utilisateur :{ressources}'''
|
150 |
+
prompt_modified = f"{prompt_modified}\n{resource_prompt}"
|
151 |
+
st.session_state.chat_history = []
|
152 |
+
with st.chat_message("AI"):
|
153 |
+
st.session_state.response_llm = st.write_stream(generate_response_via_langchain(query=prompt_modified,stream=True))
|
154 |
+
|
155 |
+
st.session_state.chat_history.append(AIMessage(content=st.session_state.response_llm))
|
156 |
+
|
157 |
+
elif st.session_state.response_llm:
|
158 |
+
st.info("la dernière réponse générée est affichée ci-dessous")
|
159 |
+
with st.chat_message("AI"):
|
160 |
+
st.write(st.session_state.response_llm)
|
161 |
+
|
162 |
+
for message in st.session_state.chat_history[1:]:
|
163 |
+
if isinstance(message, AIMessage):
|
164 |
+
with st.chat_message("AI"):
|
165 |
+
st.markdown(message.content)
|
166 |
+
elif isinstance(message, HumanMessage):
|
167 |
+
with st.chat_message("Moi"):
|
168 |
+
st.write(message.content)
|
169 |
+
|
170 |
+
user_query = st.chat_input("Par ici ...")
|
171 |
+
if user_query is not None and user_query != "":
|
172 |
+
st.session_state.chat_history.append(HumanMessage(content=user_query))
|
173 |
+
|
174 |
+
with st.chat_message("Moi"):
|
175 |
+
st.markdown(user_query)
|
176 |
+
|
177 |
+
with st.chat_message("AI"):
|
178 |
+
st.markdown(f"**{st.session_state.model}**")
|
179 |
+
|
180 |
+
|
181 |
+
response = st.write_stream(get_response(user_query, st.session_state.chat_history,db=st.session_state.vectorstore, llm=st.session_state.model, stream=True))
|
182 |
+
st.session_state.chat_history.append(AIMessage(content=response))
|
183 |
+
|
184 |
+
|
185 |
+
|
186 |
+
|
187 |
+
recommended_agent_main()
|
app.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import dotenv
|
3 |
+
import os
|
4 |
+
|
5 |
+
def main():
|
6 |
+
|
7 |
+
dotenv.load_dotenv(dotenv_path=os.path.join('.streamlit', '.env'))
|
8 |
+
|
9 |
+
st.set_page_config(page_title="RAG Agent", page_icon="🤖", layout="wide")
|
10 |
+
|
11 |
+
audit_page = st.Page("audit_page/audit.py", title="Audit", icon="📋", default=True)
|
12 |
+
dialog_page = st.Page("audit_page/dialogue_doc.py", title="Dialoguer avec le document", icon="💬")
|
13 |
+
kg_page = st.Page("audit_page/knowledge_graph.py", title="Graphe de connaissance", icon="🧠")
|
14 |
+
agents_page = st.Page("agents_page/catalogue.py", title="Catalogue des agents", icon="📇")
|
15 |
+
compte_rendu = st.Page("audit_page/compte_rendu.py", title="Compte rendu", icon="📝")
|
16 |
+
recommended_agents = st.Page("agents_page/recommended_agent.py", title="Agents recommandés", icon="⭐")
|
17 |
+
chatbot = st.Page("chatbot_page/chatbot.py", title="Chatbot", icon="💬")
|
18 |
+
documentation = st.Page("doc_page/documentation.py", title="Documentation", icon="📚")
|
19 |
+
|
20 |
+
pg = st.navigation(
|
21 |
+
{
|
22 |
+
"Audit de contenus": [audit_page,dialog_page],
|
23 |
+
"Equipe d'agents IA": [recommended_agents],
|
24 |
+
"Chatbot": [chatbot],
|
25 |
+
"Documentation": [documentation]
|
26 |
+
}
|
27 |
+
)
|
28 |
+
|
29 |
+
pg.run()
|
30 |
+
|
31 |
+
|
32 |
+
if __name__ == "__main__":
|
33 |
+
main()
|
audit_page/audit.py
ADDED
@@ -0,0 +1,259 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pymupdf as fitz
|
3 |
+
import pyperclip
|
4 |
+
import clipboard
|
5 |
+
from utils.audit.audit_doc import audit_descriptif_pdf,audit_text
|
6 |
+
from utils.audit.rag import setup_rag
|
7 |
+
import dotenv
|
8 |
+
from utils.audit.audit_audio import evaluate_audio_quality
|
9 |
+
from PIL import Image
|
10 |
+
from io import BytesIO
|
11 |
+
import st_copy_to_clipboard
|
12 |
+
import os
|
13 |
+
|
14 |
+
|
15 |
+
# Function to classify file type
|
16 |
+
def classify_file(file):
|
17 |
+
if file.type.startswith("image/"):
|
18 |
+
return "image"
|
19 |
+
elif file.type == "application/pdf":
|
20 |
+
return "pdf"
|
21 |
+
elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
|
22 |
+
return "word"
|
23 |
+
elif file.type.startswith("audio/"):
|
24 |
+
return "audio"
|
25 |
+
elif file.type.startswith("text/"):
|
26 |
+
return "text"
|
27 |
+
else:
|
28 |
+
return "unknown"
|
29 |
+
|
30 |
+
#display content
|
31 |
+
def display_content_doc(content:dict,col:st):
|
32 |
+
number_of_pages = len(content)
|
33 |
+
col.info("Note : Si vous choisissez 0, vous verrez le contenu de toutes les pages")
|
34 |
+
|
35 |
+
number = col.number_input("Numéro de page", min_value=0, max_value=number_of_pages, value=0,key="number_page_content")
|
36 |
+
#0 means all pages
|
37 |
+
if number > 0:
|
38 |
+
page : dict = content[f"page_{number-1}"]
|
39 |
+
|
40 |
+
option = col.radio("Type de contenu",list(content[f"page_0"].keys()), index=0,horizontal=True)
|
41 |
+
|
42 |
+
if option == "images":
|
43 |
+
if number == 0:
|
44 |
+
images = [img for page in content.values() for img in page["images"]]
|
45 |
+
else:
|
46 |
+
images = page["images"]
|
47 |
+
col1,col2,col3 = col.columns(3)
|
48 |
+
for i, (img_bytes, img_width, img_height) in enumerate(images):
|
49 |
+
try:
|
50 |
+
if i%3 == 0:
|
51 |
+
col1.image(Image.open(BytesIO(img_bytes)), caption=f'', width=img_width)
|
52 |
+
elif i%3 == 1:
|
53 |
+
col2.image(Image.open(BytesIO(img_bytes)), caption=f'', width=img_width)
|
54 |
+
else:
|
55 |
+
col3.image(Image.open(BytesIO(img_bytes)), caption=f'', width=img_width)
|
56 |
+
except:
|
57 |
+
pass
|
58 |
+
|
59 |
+
elif option == "texte":
|
60 |
+
if number == 0:
|
61 |
+
text = "-------------------\n".join([page["texte"] for page in content.values()])
|
62 |
+
else:
|
63 |
+
text = page["texte"]
|
64 |
+
|
65 |
+
col.code(text,language="text")
|
66 |
+
|
67 |
+
elif option == "liens":
|
68 |
+
if number == 0:
|
69 |
+
links = [link for page in content.values() for link in page["liens"]]
|
70 |
+
else:
|
71 |
+
links = page["liens"]
|
72 |
+
|
73 |
+
for i, link in enumerate(links):
|
74 |
+
col.markdown(f"- {i+1}: [{link['uri']}]({link['uri']}) (page {link['page']})")
|
75 |
+
|
76 |
+
elif option == "tableaux":
|
77 |
+
if number == 0:
|
78 |
+
tables = [table for page in content.values() for table in page["tableaux"]]
|
79 |
+
else:
|
80 |
+
tables = page["tableaux"]
|
81 |
+
|
82 |
+
for i, table in enumerate(tables):
|
83 |
+
col.write(f"Tableau {i+1}")
|
84 |
+
col.write(table)
|
85 |
+
|
86 |
+
def display_content_audio(content:dict,col:st):
|
87 |
+
st.write("##### Transcription")
|
88 |
+
st.write(content["transcription"])
|
89 |
+
# if st.button("📋",key="copy_transcription"):
|
90 |
+
st_copy_to_clipboard(content["transcription"])
|
91 |
+
# st.success("Transcription copiée dans le presse-papier")
|
92 |
+
|
93 |
+
st.audio(content["audio_data"],sample_rate=content["frame_rate"]*2)
|
94 |
+
|
95 |
+
def display_content_text(content,col:st):
|
96 |
+
st.text_area("Texte",content,height=200)
|
97 |
+
|
98 |
+
def handle_display_content(col:st):
|
99 |
+
audit = st.session_state.audit
|
100 |
+
type = st.session_state.audit_simplified["type de fichier"]
|
101 |
+
if type == "pdf":
|
102 |
+
with col.expander("Contenu"):
|
103 |
+
display_content_doc(audit["content"],st)
|
104 |
+
elif type == "audio":
|
105 |
+
with col.expander("Contenu"):
|
106 |
+
display_content_audio(audit["content"],col)
|
107 |
+
elif type == "text":
|
108 |
+
with col.expander("Contenu"):
|
109 |
+
display_content_text(audit["content"],col)
|
110 |
+
|
111 |
+
|
112 |
+
|
113 |
+
def handle_audit(uploaded_file,type:str):
|
114 |
+
if type == "pdf":
|
115 |
+
if st.session_state.name_file != uploaded_file.name:
|
116 |
+
st.session_state.name_file = uploaded_file.name
|
117 |
+
with st.spinner("Analyse du document..."):
|
118 |
+
st.session_state.audit = {}
|
119 |
+
|
120 |
+
st.session_state.audit = audit_descriptif_pdf(uploaded_file,100)
|
121 |
+
with st.spinner("Préparation de la DB..."):
|
122 |
+
vectorstore = setup_rag(type,st.session_state.audit["content"])
|
123 |
+
st.session_state.vectorstore = vectorstore
|
124 |
+
st.session_state.graph = None
|
125 |
+
st.session_state.cr = ""
|
126 |
+
|
127 |
+
audit = st.session_state.audit["audit"]
|
128 |
+
#global audit
|
129 |
+
audit_simplified = {
|
130 |
+
"type de fichier": type,
|
131 |
+
"Nombre de pages": audit["number_of_pages"],
|
132 |
+
"Nombre d'images": audit["number_of_images"],
|
133 |
+
"Nombre de liens": audit["number_of_links"],
|
134 |
+
"Nombre de tableaux": audit["number_of_tables"],
|
135 |
+
"Nombre de tokens": audit["number_of_tokens"],
|
136 |
+
"Nombre de mots": audit["number_of_words"],
|
137 |
+
"Mots clés": audit["key_words"]
|
138 |
+
}
|
139 |
+
st.session_state.audit_simplified = audit_simplified
|
140 |
+
|
141 |
+
elif type == "audio":
|
142 |
+
if st.session_state.name_file != uploaded_file.name:
|
143 |
+
st.session_state.name_file = uploaded_file.name
|
144 |
+
with st.spinner("Analyse de l'audio..."):
|
145 |
+
st.session_state.audit = {}
|
146 |
+
st.session_state.audit = evaluate_audio_quality(uploaded_file)
|
147 |
+
with st.spinner("Préparation de la DB..."):
|
148 |
+
vectorstore = setup_rag(type,st.session_state.audit["content"])
|
149 |
+
st.session_state.vectorstore = vectorstore
|
150 |
+
st.session_state.graph = None
|
151 |
+
st.session_state.cr = ""
|
152 |
+
|
153 |
+
audit = st.session_state.audit["audit"]
|
154 |
+
#audit global simplifié
|
155 |
+
audit_simplified = {
|
156 |
+
"type de fichier": type,
|
157 |
+
"Durée": f"{audit['duration']:0.2f} minutes",
|
158 |
+
"Nombre de mots": audit["number_of_words"],
|
159 |
+
"Nombre de tokens": audit["number_of_tokens"],
|
160 |
+
"Volume": f"{audit['volume']:0.2f} dBFS (déciBels Full Scale)",
|
161 |
+
"SNR": f"{max(audit['SNR'],0):0.2f} dB (Ratio Signal / Bruit)",
|
162 |
+
}
|
163 |
+
st.session_state.audit_simplified = audit_simplified
|
164 |
+
|
165 |
+
elif type == "text":
|
166 |
+
text = uploaded_file.read().decode("utf-8")
|
167 |
+
if st.session_state.name_file != uploaded_file.name:
|
168 |
+
st.session_state.name_file = uploaded_file.name
|
169 |
+
with st.spinner("Analyse du texte..."):
|
170 |
+
st.session_state.audit = {}
|
171 |
+
st.session_state.audit = audit_text(text)
|
172 |
+
audit = st.session_state.audit["audit"]
|
173 |
+
#audit global simplifié
|
174 |
+
audit_simplified = {
|
175 |
+
"type de fichier": type,
|
176 |
+
"Nombre de tokens": audit["number_of_tokens"],
|
177 |
+
"Nombre de mots": audit["number_of_words"],
|
178 |
+
"Mots clés": audit["key_words"]
|
179 |
+
}
|
180 |
+
st.session_state.audit_simplified = audit_simplified
|
181 |
+
|
182 |
+
|
183 |
+
|
184 |
+
|
185 |
+
def display_audit(col:st):
|
186 |
+
#audit global simplifié
|
187 |
+
audit_simplified = st.session_state.audit_simplified
|
188 |
+
audit = st.session_state.audit["audit"]
|
189 |
+
|
190 |
+
well_formatted_audit = "Contenus audités\n"
|
191 |
+
for key, value in audit_simplified.items():
|
192 |
+
well_formatted_audit += f"- {key}: {value}\n"
|
193 |
+
|
194 |
+
col.code(well_formatted_audit)
|
195 |
+
|
196 |
+
if audit_simplified["type de fichier"] == "pdf": #cad un type qui contient des pages
|
197 |
+
#audit par page
|
198 |
+
with col.expander("Audit par page"):
|
199 |
+
number = st.number_input("Numéro de page", min_value=1, max_value=audit["number_of_pages"], value=1,key="number_page_audit")
|
200 |
+
audit_page = audit[f"page_{number-1}"]
|
201 |
+
audit_page = {
|
202 |
+
|
203 |
+
"Nombre d'images": audit_page["number_of_images"],
|
204 |
+
"Nombre de liens": audit_page["number_of_links"],
|
205 |
+
"Nombre de tableaux": audit_page["number_of_tables"],
|
206 |
+
"Nombre de tokens": audit_page["number_of_tokens"],
|
207 |
+
"Nombre de mots": audit_page["number_of_words"],
|
208 |
+
}
|
209 |
+
well_formatted_audit_page = "Audit descriptif\n"
|
210 |
+
for key, value in audit_page.items():
|
211 |
+
well_formatted_audit_page += f"- {key}: {value}\n"
|
212 |
+
|
213 |
+
st.code(well_formatted_audit_page)
|
214 |
+
|
215 |
+
|
216 |
+
|
217 |
+
def audit_main():
|
218 |
+
|
219 |
+
#st.set_page_config(page_title="Audit des documents", page_icon=":page_with_curl:", layout="wide")
|
220 |
+
# Streamlit app
|
221 |
+
st.title("Audit des documents")
|
222 |
+
|
223 |
+
notice = "Les formats autorisés sont les suivants :\n- **format texte** : txt, word, pdf\n- **format image** : png, jpg\n- **format audio** : wav, MP3"
|
224 |
+
|
225 |
+
col1, col2 = st.columns([4, 3])
|
226 |
+
col1.markdown(notice)
|
227 |
+
|
228 |
+
if "audit" not in st.session_state:
|
229 |
+
st.session_state.audit = {}
|
230 |
+
if "name_file" not in st.session_state:
|
231 |
+
st.session_state.name_file = ""
|
232 |
+
if "audit_simplified" not in st.session_state:
|
233 |
+
st.session_state.audit_simplified = {}
|
234 |
+
if "vectorstore" not in st.session_state:
|
235 |
+
st.session_state.vectorstore = None
|
236 |
+
if "cr" not in st.session_state:
|
237 |
+
st.session_state.cr = ""
|
238 |
+
if "graph" not in st.session_state:
|
239 |
+
st.session_state.graph = None
|
240 |
+
|
241 |
+
# File uploader
|
242 |
+
uploaded_file = col1.file_uploader("Télécharger un ou plusieurs documents")
|
243 |
+
|
244 |
+
if uploaded_file is not None:
|
245 |
+
type = classify_file(uploaded_file)
|
246 |
+
handle_audit(uploaded_file,type)
|
247 |
+
|
248 |
+
col1.write(f"Type de fichier: {type}")
|
249 |
+
|
250 |
+
col1.write("### Synthèse audit de(s) document(s) téléchargé(s)")
|
251 |
+
|
252 |
+
if "audit" in st.session_state and st.session_state.audit != {}:
|
253 |
+
display_audit(col1)
|
254 |
+
handle_display_content(col2)
|
255 |
+
|
256 |
+
#init graph and cr
|
257 |
+
|
258 |
+
|
259 |
+
audit_main()
|
audit_page/compte_rendu.py
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from textwrap import dedent
|
3 |
+
from utils.audit.rag import get_text_from_content_for_doc,get_text_from_content_for_audio
|
4 |
+
from utils.audit.response_llm import generate_response_via_langchain
|
5 |
+
from langchain_core.messages import AIMessage, HumanMessage
|
6 |
+
import pyperclip
|
7 |
+
|
8 |
+
|
9 |
+
def cr_main():
|
10 |
+
st.title("Compte rendu")
|
11 |
+
|
12 |
+
if "audit" not in st.session_state or st.session_state.audit == {}:
|
13 |
+
st.error("Veuillez d'abord effectuer un audit pour générer un compte rendu.")
|
14 |
+
return
|
15 |
+
|
16 |
+
if "cr" not in st.session_state:
|
17 |
+
st.session_state.cr = ""
|
18 |
+
|
19 |
+
if "cr_chat_history" not in st.session_state:
|
20 |
+
st.session_state.cr_chat_history = [
|
21 |
+
]
|
22 |
+
|
23 |
+
audit = st.session_state.audit_simplified
|
24 |
+
content = st.session_state.audit["content"]
|
25 |
+
|
26 |
+
if audit["type de fichier"] == "pdf":
|
27 |
+
text = get_text_from_content_for_doc(content)
|
28 |
+
elif audit["type de fichier"] == "audio":
|
29 |
+
text = get_text_from_content_for_audio(content)
|
30 |
+
|
31 |
+
prompt_cr = dedent(f'''
|
32 |
+
|
33 |
+
À partir du document ci-dessous, générez un compte rendu détaillé contenant les sections suivantes :
|
34 |
+
|
35 |
+
2. **Résumé** : Fournissez une synthèse du document, en mettant en avant les points principaux, les relations essentielles, les concepts , les dates et les lieux, les conclusions et les détails importants.
|
36 |
+
|
37 |
+
3. **Notes** :
|
38 |
+
- Présentez les points clés sous forme de liste à puces avec des émojis pertinents pour souligner la nature de chaque point.
|
39 |
+
- N'oubliez pas de relever tout les entités et les relations.
|
40 |
+
- Incluez des sous-points (sans émojis) sous les points principaux pour offrir des détails ou explications supplémentaires.
|
41 |
+
|
42 |
+
4. **Actions** : Identifiez et listez les actions spécifiques, tâches ou étapes recommandées ou nécessaires selon le contenu du document.
|
43 |
+
|
44 |
+
**Document :**
|
45 |
+
|
46 |
+
{text}
|
47 |
+
|
48 |
+
**Format de sortie :**
|
49 |
+
|
50 |
+
|
51 |
+
### Résumé :
|
52 |
+
[Fournissez un résumé concis du document ici;n'oubliez pas de relever tout les entités et les relations.]
|
53 |
+
|
54 |
+
### Notes :
|
55 |
+
- 📌 **Point Principal 1**
|
56 |
+
- Sous-point A
|
57 |
+
- Sous-point B
|
58 |
+
- 📈 **Point Principal 2**
|
59 |
+
- Sous-point C
|
60 |
+
- Sous-point D
|
61 |
+
- 📝 **Point Principal 3**
|
62 |
+
- Sous-point E
|
63 |
+
- Sous-point F
|
64 |
+
|
65 |
+
### Actions :
|
66 |
+
1. [Action 1]
|
67 |
+
2. [Action 2]
|
68 |
+
3. [Action 3]
|
69 |
+
4. ...
|
70 |
+
|
71 |
+
---
|
72 |
+
''')
|
73 |
+
|
74 |
+
|
75 |
+
|
76 |
+
if st.button("Générer compte rendu"):
|
77 |
+
|
78 |
+
with st.spinner("Génération du compte rendu..."):
|
79 |
+
cr = generate_response_via_langchain(prompt_cr,stream=False,model="gpt-4o")
|
80 |
+
st.session_state.cr = cr
|
81 |
+
st.session_state.cr_chat_history = []
|
82 |
+
|
83 |
+
else:
|
84 |
+
cr = st.session_state.cr
|
85 |
+
|
86 |
+
if cr:
|
87 |
+
col1, col2 = st.columns([2.5, 1.5])
|
88 |
+
|
89 |
+
with col1.container(border=True,height=800):
|
90 |
+
st.markdown("##### Compte rendu")
|
91 |
+
st.markdown("### Mots clés extraits:")
|
92 |
+
st.write(f"- {audit['Mots clés'].strip()}")
|
93 |
+
st.write(cr)
|
94 |
+
# if st.button("📋",key="copy_transcription"):
|
95 |
+
# #pyperclip.copy(cr)
|
96 |
+
# st.success("Transcription copiée dans le presse-papier")
|
97 |
+
|
98 |
+
with col2.container(border=True,height=800):
|
99 |
+
st.markdown("##### Dialoguer avec le CR")
|
100 |
+
|
101 |
+
user_query = st.chat_input("Par ici ...")
|
102 |
+
if user_query is not None and user_query != "":
|
103 |
+
st.session_state.cr_chat_history.append(HumanMessage(content=user_query))
|
104 |
+
|
105 |
+
with st.container(height=650, border=False):
|
106 |
+
for message in st.session_state.cr_chat_history:
|
107 |
+
if isinstance(message, AIMessage):
|
108 |
+
with st.chat_message("AI"):
|
109 |
+
st.markdown(message.content)
|
110 |
+
elif isinstance(message, HumanMessage):
|
111 |
+
with st.chat_message("Moi"):
|
112 |
+
st.write(message.content)
|
113 |
+
|
114 |
+
#check if last message is human message
|
115 |
+
if len(st.session_state.cr_chat_history) > 0:
|
116 |
+
last_message = st.session_state.cr_chat_history[-1]
|
117 |
+
if isinstance(last_message, HumanMessage):
|
118 |
+
with st.chat_message("AI"):
|
119 |
+
retreive = st.session_state.vectorstore.as_retriever()
|
120 |
+
context = retreive.invoke(last_message.content)
|
121 |
+
wrapped_prompt = f'''Étant donné le contexte suivant {context} et le compte rendu du document {cr}, {last_message.content}'''
|
122 |
+
response = st.write_stream(generate_response_via_langchain(wrapped_prompt,stream=True))
|
123 |
+
st.session_state.cr_chat_history.append(AIMessage(content=response))
|
124 |
+
|
125 |
+
|
126 |
+
cr_main()
|
127 |
+
|
128 |
+
|
129 |
+
|
130 |
+
|
audit_page/dialogue_doc.py
ADDED
@@ -0,0 +1,337 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from textwrap import dedent
|
3 |
+
from utils.audit.rag import get_text_from_content_for_doc,get_text_from_content_for_audio
|
4 |
+
from utils.audit.response_llm import generate_response_via_langchain
|
5 |
+
from langchain_core.messages import AIMessage, HumanMessage
|
6 |
+
from st_copy_to_clipboard import st_copy_to_clipboard
|
7 |
+
from utils.kg.construct_kg import get_graph
|
8 |
+
from audit_page.knowledge_graph import *
|
9 |
+
import json
|
10 |
+
import clipboard
|
11 |
+
from time import sleep
|
12 |
+
|
13 |
+
def graph_doc_to_json(graph):
|
14 |
+
nodes = []
|
15 |
+
edges = []
|
16 |
+
for node in graph.nodes:
|
17 |
+
node_id = node.id.replace(" ", "_")
|
18 |
+
label = node.id
|
19 |
+
type = node.type
|
20 |
+
nodes.append({"id": node_id, "label": label, "type": type})
|
21 |
+
for relationship in graph.relationships:
|
22 |
+
source = relationship.source
|
23 |
+
source_id = source.id.replace(" ", "_")
|
24 |
+
target = relationship.target
|
25 |
+
target_id = target.id.replace(" ", "_")
|
26 |
+
label = relationship.type
|
27 |
+
edges.append({"source": source_id, "label": label, "cible": target_id})
|
28 |
+
return {"noeuds": nodes, "relations": edges}
|
29 |
+
|
30 |
+
def chat_history_formatter(chat_history):
|
31 |
+
formatted_chat = ""
|
32 |
+
for message in chat_history:
|
33 |
+
if isinstance(message, AIMessage):
|
34 |
+
formatted_chat += f"AI:{message.content}\n\n"
|
35 |
+
elif isinstance(message, HumanMessage):
|
36 |
+
formatted_chat += f"Human:{message.content}\n\n"
|
37 |
+
return formatted_chat
|
38 |
+
|
39 |
+
def filter_correspondance(source_list:list[str],ref_dict:dict,reverse=False):
|
40 |
+
source_list = [item.lower().strip() for item in source_list]
|
41 |
+
if reverse:
|
42 |
+
return [key for key, value in ref_dict.items() if value.lower().strip() in source_list]
|
43 |
+
else:
|
44 |
+
# st.write(source_list)
|
45 |
+
# st.write(ref_dict.keys())
|
46 |
+
return [value for key, value in ref_dict.items() if key.lower().strip() in source_list]
|
47 |
+
|
48 |
+
@st.fragment()
|
49 |
+
def radio_choice():
|
50 |
+
options = ["compte_rendu","graphe de connaissance"]
|
51 |
+
choice = st.radio("Choisissez une option",options,index=st.session_state.radio_choice,horizontal=True,label_visibility="collapsed")
|
52 |
+
sleep(1)
|
53 |
+
if choice and options.index(choice) != st.session_state.radio_choice:
|
54 |
+
sleep(1)
|
55 |
+
st.session_state.radio_choice = options.index(choice)
|
56 |
+
return choice
|
57 |
+
|
58 |
+
|
59 |
+
def doc_dialog_main():
|
60 |
+
st.title("Dialogue avec le document")
|
61 |
+
|
62 |
+
if "audit" not in st.session_state or st.session_state.audit == {}:
|
63 |
+
st.error("Veuillez d'abord effectuer un audit pour générer le compte rendu ou le graphe de connaissance.")
|
64 |
+
return
|
65 |
+
|
66 |
+
#init cr and chat history cr
|
67 |
+
if "cr" not in st.session_state:
|
68 |
+
st.session_state.cr = ""
|
69 |
+
if "cr_chat_history" not in st.session_state:
|
70 |
+
st.session_state.cr_chat_history = [
|
71 |
+
]
|
72 |
+
|
73 |
+
#init graph and filter views
|
74 |
+
if "graph" not in st.session_state:
|
75 |
+
st.session_state.graph = None
|
76 |
+
|
77 |
+
if "filter_views" not in st.session_state:
|
78 |
+
st.session_state.filter_views = {}
|
79 |
+
if "current_view" not in st.session_state:
|
80 |
+
st.session_state.current_view = None
|
81 |
+
if "node_types" not in st.session_state:
|
82 |
+
st.session_state.node_types = None
|
83 |
+
# if "summary" not in st.session_state:
|
84 |
+
# st.session_state.summary = None
|
85 |
+
if "chat_graph_history" not in st.session_state:
|
86 |
+
st.session_state.chat_graph_history = []
|
87 |
+
|
88 |
+
#init a radio button for the choice
|
89 |
+
if "radio_choice" not in st.session_state:
|
90 |
+
st.session_state.radio_choice = None
|
91 |
+
# if "choice" not in st.session_state:
|
92 |
+
# st.session_state.choice = st.radio("Choisissez une option",["compte_rendu","graphe de connaissance"],index=st.session_state.radio_choice,horizontal=True,label_visibility="collapsed")
|
93 |
+
|
94 |
+
# choice = radio_choice()
|
95 |
+
|
96 |
+
options = ["compte_rendu","graphe de connaissance"]
|
97 |
+
choice = st.radio("Choisissez une option",options,index=st.session_state.radio_choice,horizontal=True,label_visibility="collapsed")
|
98 |
+
if choice and options.index(choice) != st.session_state.radio_choice:
|
99 |
+
st.session_state.radio_choice = options.index(choice)
|
100 |
+
|
101 |
+
|
102 |
+
audit = st.session_state.audit_simplified
|
103 |
+
content = st.session_state.audit["content"]
|
104 |
+
|
105 |
+
if audit["type de fichier"] == "pdf":
|
106 |
+
text = get_text_from_content_for_doc(content)
|
107 |
+
elif audit["type de fichier"] == "audio":
|
108 |
+
text = get_text_from_content_for_audio(content)
|
109 |
+
elif audit["type de fichier"] == "text":
|
110 |
+
text = content
|
111 |
+
|
112 |
+
prompt_cr = dedent(f'''
|
113 |
+
|
114 |
+
À partir du document ci-dessous, générez un compte rendu détaillé contenant les sections suivantes :
|
115 |
+
|
116 |
+
2. **Résumé** : Fournissez une synthèse complète du document, en mettant en avant les points principaux, les relations essentielles, les concepts , les dates et les lieux, les conclusions et les détails importants.
|
117 |
+
|
118 |
+
3. **Notes** :
|
119 |
+
- Présentez les points clés sous forme de liste à puces avec des émojis pertinents pour souligner la nature de chaque point.
|
120 |
+
- N'oubliez pas de relever tout les entités et les relations.
|
121 |
+
- Incluez des sous-points (sans émojis) sous les points principaux pour offrir des détails ou explications supplémentaires.
|
122 |
+
|
123 |
+
4. **Actions** : Identifiez et listez les actions spécifiques, tâches ou étapes recommandées ou nécessaires selon le contenu du document.
|
124 |
+
|
125 |
+
**Document :**
|
126 |
+
|
127 |
+
{text}
|
128 |
+
|
129 |
+
**Format de sortie :**
|
130 |
+
|
131 |
+
|
132 |
+
### Résumé :
|
133 |
+
[Fournissez un résumé concis du document ici;n'oubliez pas de relever tout les entités et les relations.]
|
134 |
+
|
135 |
+
### Notes :
|
136 |
+
- 📌 **Point Principal 1**
|
137 |
+
- Sous-point A
|
138 |
+
- Sous-point B
|
139 |
+
- 📈 **Point Principal 2**
|
140 |
+
- Sous-point C
|
141 |
+
- Sous-point D
|
142 |
+
- 📝 **Point Principal 3**
|
143 |
+
- Sous-point E
|
144 |
+
- Sous-point F
|
145 |
+
|
146 |
+
### Actions :
|
147 |
+
1. [Action 1]
|
148 |
+
2. [Action 2]
|
149 |
+
3. [Action 3]
|
150 |
+
4. ...
|
151 |
+
|
152 |
+
---
|
153 |
+
''')
|
154 |
+
|
155 |
+
|
156 |
+
if choice == "compte_rendu":
|
157 |
+
if "cr" not in st.session_state or st.session_state.cr == "":
|
158 |
+
with st.spinner("Génération du compte rendu..."):
|
159 |
+
cr = generate_response_via_langchain(prompt_cr,stream=False,model="gpt-4o")
|
160 |
+
st.session_state.cr = cr
|
161 |
+
st.session_state.cr_chat_history = []
|
162 |
+
else:
|
163 |
+
cr = st.session_state.cr
|
164 |
+
|
165 |
+
if cr:
|
166 |
+
col1, col2 = st.columns([2.5, 1.5])
|
167 |
+
|
168 |
+
with col1.container(border=True,height=850):
|
169 |
+
st.markdown("##### Compte rendu")
|
170 |
+
keywords_paragraph = f"### Mots clés extraits:\n- {audit['Mots clés'].strip()}"
|
171 |
+
with st.container(height=650,border=False):
|
172 |
+
st.markdown(keywords_paragraph)
|
173 |
+
st.write(cr)
|
174 |
+
# col_copy , col_success = st.columns([1,11])
|
175 |
+
# if col_copy.button("📋",key="copy_cr"):
|
176 |
+
with st.container(height=50,border=False):
|
177 |
+
st_copy_to_clipboard(keywords_paragraph+"\n\n"+cr,key="cp_but_cr")
|
178 |
+
# col_success.success("Compte rendu copié dans le presse-papier")
|
179 |
+
|
180 |
+
with col2.container(border=True,height=850):
|
181 |
+
st.markdown("##### Dialoguer avec le CR")
|
182 |
+
|
183 |
+
user_query = st.chat_input("Par ici ...")
|
184 |
+
if user_query is not None and user_query != "":
|
185 |
+
st.session_state.cr_chat_history.append(HumanMessage(content=user_query))
|
186 |
+
|
187 |
+
with st.container(height=600, border=False):
|
188 |
+
for message in st.session_state.cr_chat_history:
|
189 |
+
if isinstance(message, AIMessage):
|
190 |
+
with st.chat_message("AI"):
|
191 |
+
st.markdown(message.content)
|
192 |
+
elif isinstance(message, HumanMessage):
|
193 |
+
with st.chat_message("Human"):
|
194 |
+
st.write(message.content)
|
195 |
+
|
196 |
+
#check if last message is human message
|
197 |
+
if len(st.session_state.cr_chat_history) > 0:
|
198 |
+
last_message = st.session_state.cr_chat_history[-1]
|
199 |
+
if isinstance(last_message, HumanMessage):
|
200 |
+
with st.chat_message("AI"):
|
201 |
+
retreive = st.session_state.vectorstore.as_retriever()
|
202 |
+
context = retreive.invoke(last_message.content)
|
203 |
+
wrapped_prompt = f'''Étant donné le contexte suivant {context} et le compte rendu du document {cr}, {last_message.content}'''
|
204 |
+
response = st.write_stream(generate_response_via_langchain(wrapped_prompt,stream=True))
|
205 |
+
st.session_state.cr_chat_history.append(AIMessage(content=response))
|
206 |
+
# col_copy_c , col_success_c = st.columns([1,7])
|
207 |
+
# if col_copy_c.button("📋",key="copy_cr_chat"):
|
208 |
+
with st.container(height=50,border=False):
|
209 |
+
chat_formatted = chat_history_formatter(st.session_state.cr_chat_history)
|
210 |
+
st_copy_to_clipboard(chat_formatted,key="cp_but_cr_chat",show_text=False)
|
211 |
+
# col_success_c.success("Historique copié !")
|
212 |
+
|
213 |
+
elif choice == "graphe de connaissance":
|
214 |
+
if "graph" not in st.session_state or st.session_state.graph == None:
|
215 |
+
with st.spinner("Génération du graphe..."):
|
216 |
+
keywords_list = [keyword.strip() for keyword in audit["Mots clés"].strip().split(",")]
|
217 |
+
allowed_nodes_types =keywords_list+ ["Person","Organization","Location","Event","Date","Time","Ressource","Concept"]
|
218 |
+
|
219 |
+
number_tokens = audit["Nombre de tokens"]
|
220 |
+
if number_tokens > 10000:
|
221 |
+
if st.session_state.cr == "":
|
222 |
+
st.session_state.cr = generate_response_via_langchain(prompt_cr,stream=False,model="gpt-4o")
|
223 |
+
text = st.session_state.cr
|
224 |
+
|
225 |
+
graph = get_graph(text,allowed_nodes=allowed_nodes_types)
|
226 |
+
st.session_state.graph = graph
|
227 |
+
st.session_state.filter_views = {}
|
228 |
+
st.session_state.current_view = None
|
229 |
+
st.session_state.node_types = None
|
230 |
+
st.session_state.chat_graph_history = []
|
231 |
+
|
232 |
+
node_types = get_node_types(graph[0])
|
233 |
+
list_node_types = list(node_types)
|
234 |
+
sorted_node_types = sorted(list_node_types,key=lambda x: x.lower())
|
235 |
+
print(sorted_node_types)
|
236 |
+
nodes_type_dict = list_to_dict_colors(sorted_node_types)
|
237 |
+
st.session_state.node_types = nodes_type_dict
|
238 |
+
st.session_state.filter_views["Vue par défaut"] = list(node_types)
|
239 |
+
st.session_state.current_view = "Vue par défaut"
|
240 |
+
else:
|
241 |
+
graph = st.session_state.graph
|
242 |
+
|
243 |
+
if graph is not None:
|
244 |
+
#st.write(graph)
|
245 |
+
|
246 |
+
edges,nodes,config = convert_neo4j_to_agraph(graph[0],st.session_state.node_types)
|
247 |
+
|
248 |
+
col1, col2 = st.columns([2.5, 1.5])
|
249 |
+
|
250 |
+
with col1.container(border=True,height=850):
|
251 |
+
st.write("##### Visualisation du graphe (**"+st.session_state.current_view+"**)")
|
252 |
+
filter_col,add_view_col,change_view_col,color_col = st.columns([9,1,1,1])
|
253 |
+
|
254 |
+
if color_col.button("🎨",help="Changer la couleur"):
|
255 |
+
change_color_dialog()
|
256 |
+
|
257 |
+
if change_view_col.button("🔍",help="Changer de vue"):
|
258 |
+
change_view_dialog()
|
259 |
+
|
260 |
+
|
261 |
+
#add mots cles to evry label in audit["Mots clés"]
|
262 |
+
#filter_labels = [ label + " (mot clé)" if label.strip().lower() in audit["Mots clés"].strip().lower().split(",") else label for label in st.session_state.filter_views[st.session_state.current_view] ]
|
263 |
+
keywords_list = [keyword.strip().lower() for keyword in audit["Mots clés"].strip().split(",")]
|
264 |
+
dict_filters = {label: "Mot clé : "+label if label.strip().lower() in keywords_list else label for label in st.session_state.filter_views[st.session_state.current_view]}
|
265 |
+
|
266 |
+
default_target_filter = filter_correspondance(st.session_state.filter_views[st.session_state.current_view],dict_filters)
|
267 |
+
# st.write(default_target_filter)
|
268 |
+
# st.write(dict_filters)
|
269 |
+
sorted_default_target_filter = sorted(default_target_filter,key=lambda x: x.lower())
|
270 |
+
target_filter = filter_correspondance(list(st.session_state.node_types.keys()),dict_filters)
|
271 |
+
target_filter = sorted(target_filter,key=lambda x: x.lower())
|
272 |
+
filter = filter_col.multiselect("Filtrer selon l'étiquette",target_filter,placeholder="Sélectionner une ou plusieurs étiquettes",default=default_target_filter,label_visibility="collapsed")
|
273 |
+
filter = filter_correspondance(filter,dict_filters,reverse=True)
|
274 |
+
if add_view_col.button("➕",help="Ajouter une vue"):
|
275 |
+
add_view_dialog(filter)
|
276 |
+
if filter:
|
277 |
+
nodes = filter_nodes_by_types(nodes,filter)
|
278 |
+
|
279 |
+
selected = display_graph(edges,nodes,config)
|
280 |
+
|
281 |
+
# col_copy , col_success = st.columns([1,11])
|
282 |
+
# if col_copy.button("📋",key="copy_graph"):
|
283 |
+
with st.container(height=50,border=False):
|
284 |
+
graph_json = graph_doc_to_json(graph[0])
|
285 |
+
st_copy_to_clipboard(json.dumps(graph_json),key="cp_but_graph")
|
286 |
+
# col_success.success("Graphe copié dans le presse-papier")
|
287 |
+
|
288 |
+
with col2.container(border=True,height=850):
|
289 |
+
st.markdown("##### Dialoguer avec le graphe")
|
290 |
+
|
291 |
+
user_query = st.chat_input("Par ici ...")
|
292 |
+
if user_query is not None and user_query != "":
|
293 |
+
st.session_state.chat_graph_history.append(HumanMessage(content=user_query))
|
294 |
+
|
295 |
+
with st.container(height=600, border=False):
|
296 |
+
for message in st.session_state.chat_graph_history:
|
297 |
+
if isinstance(message, AIMessage):
|
298 |
+
with st.chat_message("AI"):
|
299 |
+
st.markdown(message.content)
|
300 |
+
elif isinstance(message, HumanMessage):
|
301 |
+
with st.chat_message("Human"):
|
302 |
+
st.write(message.content)
|
303 |
+
|
304 |
+
#check if last message is human message
|
305 |
+
if len(st.session_state.chat_graph_history) > 0:
|
306 |
+
last_message = st.session_state.chat_graph_history[-1]
|
307 |
+
if isinstance(last_message, HumanMessage):
|
308 |
+
with st.chat_message("AI"):
|
309 |
+
retreive = st.session_state.vectorstore.as_retriever()
|
310 |
+
context = retreive.invoke(last_message.content)
|
311 |
+
wrapped_prompt = f"Étant donné le contexte suivant {context}, et le graph de connaissance: {graph}, {last_message.content}"
|
312 |
+
response = st.write_stream(generate_response_via_langchain(wrapped_prompt,stream=True))
|
313 |
+
st.session_state.chat_graph_history.append(AIMessage(content=response))
|
314 |
+
|
315 |
+
if selected is not None:
|
316 |
+
with st.chat_message("AI"):
|
317 |
+
st.markdown(f" EXPLORER LES DONNEES CONTENUES DANS **{selected}**")
|
318 |
+
|
319 |
+
prompts = [f"Extrait moi toutes les informations du noeud ''{selected}'' ➡️",
|
320 |
+
f"Montre moi les conversations autour du noeud ''{selected}'' ➡️"]
|
321 |
+
|
322 |
+
for i,prompt in enumerate(prompts):
|
323 |
+
button = st.button(prompt,key=f"p_{i}",on_click=lambda i=i: st.session_state.chat_graph_history.append(HumanMessage(content=prompts[i])))
|
324 |
+
|
325 |
+
# col_copy_c , col_success_c = st.columns([1,7])
|
326 |
+
# if col_copy_c.button("📋",key="copy_graph_chat"):
|
327 |
+
with st.container(height=50,border=False):
|
328 |
+
st_copy_to_clipboard(chat_history_formatter(st.session_state.chat_graph_history),key="cp_but_graph_chat",show_text=False)
|
329 |
+
# col_success_c.success("Historique copié !")
|
330 |
+
|
331 |
+
|
332 |
+
|
333 |
+
doc_dialog_main()
|
334 |
+
|
335 |
+
|
336 |
+
|
337 |
+
|
audit_page/knowledge_graph.py
ADDED
@@ -0,0 +1,333 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from utils.kg.construct_kg import get_graph
|
3 |
+
from utils.audit.rag import get_text_from_content_for_doc,get_text_from_content_for_audio
|
4 |
+
from streamlit_agraph import agraph, Node, Edge, Config
|
5 |
+
import random
|
6 |
+
import math
|
7 |
+
from utils.audit.response_llm import generate_response_via_langchain
|
8 |
+
from langchain_core.messages import AIMessage, HumanMessage
|
9 |
+
from langchain_core.prompts import PromptTemplate
|
10 |
+
|
11 |
+
def if_node_exists(nodes, node_id):
|
12 |
+
"""
|
13 |
+
Check if a node exists in the graph.
|
14 |
+
|
15 |
+
Args:
|
16 |
+
graph (dict): A dictionary representing the graph with keys 'nodes' and 'relationships'.
|
17 |
+
node_id (str): The id of the node to check.
|
18 |
+
|
19 |
+
Returns:
|
20 |
+
return_value: True if the node exists, False otherwise.
|
21 |
+
"""
|
22 |
+
for node in nodes:
|
23 |
+
if node.id == node_id:
|
24 |
+
return True
|
25 |
+
return False
|
26 |
+
|
27 |
+
def generate_random_color():
|
28 |
+
r = random.randint(180, 255)
|
29 |
+
g = random.randint(180, 255)
|
30 |
+
b = random.randint(180, 255)
|
31 |
+
return (r, g, b)
|
32 |
+
|
33 |
+
def rgb_to_hex(rgb):
|
34 |
+
return '#{:02x}{:02x}{:02x}'.format(rgb[0], rgb[1], rgb[2])
|
35 |
+
|
36 |
+
def get_node_types(graph):
|
37 |
+
node_types = set()
|
38 |
+
for node in graph.nodes:
|
39 |
+
node_types.add(node.type)
|
40 |
+
for relationship in graph.relationships:
|
41 |
+
source = relationship.source
|
42 |
+
target = relationship.target
|
43 |
+
node_types.add(source.type)
|
44 |
+
node_types.add(target.type)
|
45 |
+
return node_types
|
46 |
+
|
47 |
+
def color_distance(color1, color2):
|
48 |
+
# Calculate Euclidean distance between two RGB colors
|
49 |
+
return math.sqrt((color1[0] - color2[0]) ** 2 + (color1[1] - color2[1]) ** 2 + (color1[2] - color2[2]) ** 2)
|
50 |
+
|
51 |
+
def generate_distinct_colors(num_colors, min_distance=30):
|
52 |
+
colors = []
|
53 |
+
while len(colors) < num_colors:
|
54 |
+
new_color = generate_random_color()
|
55 |
+
if all(color_distance(new_color, existing_color) >= min_distance for existing_color in colors):
|
56 |
+
colors.append(new_color)
|
57 |
+
return [rgb_to_hex(color) for color in colors]
|
58 |
+
|
59 |
+
def list_to_dict_colors(node_types:set):
|
60 |
+
|
61 |
+
number_of_colors = len(node_types)
|
62 |
+
colors = generate_distinct_colors(number_of_colors)
|
63 |
+
|
64 |
+
node_colors = {}
|
65 |
+
for i, node_type in enumerate(node_types):
|
66 |
+
node_colors[node_type] = colors[i]
|
67 |
+
|
68 |
+
return node_colors
|
69 |
+
|
70 |
+
|
71 |
+
def convert_neo4j_to_agraph(neo4j_graph, node_colors):
|
72 |
+
"""
|
73 |
+
Converts a Neo4j graph into an Agraph format.
|
74 |
+
|
75 |
+
Args:
|
76 |
+
neo4j_graph (dict): A dictionary representing the Neo4j graph with keys 'nodes' and 'relationships'.
|
77 |
+
'nodes' is a list of dicts with each dict having 'id' and 'type' keys.
|
78 |
+
'relationships' is a list of dicts with 'source', 'target', and 'type' keys.
|
79 |
+
|
80 |
+
Returns:
|
81 |
+
return_value: The Agraph visualization object.
|
82 |
+
"""
|
83 |
+
nodes = []
|
84 |
+
edges = []
|
85 |
+
|
86 |
+
# Creating Agraph nodes
|
87 |
+
for node in neo4j_graph.nodes:
|
88 |
+
# Use the node id as the Agraph node id
|
89 |
+
node_id = node.id.replace(" ", "_") # Replace spaces with underscores for ids
|
90 |
+
label = node.id
|
91 |
+
type = node.type
|
92 |
+
size = 25 # Default size, can be customized
|
93 |
+
shape = "circle" # Default shape, can be customized
|
94 |
+
|
95 |
+
# For example purposes, no images are added, but you can set 'image' if needed.
|
96 |
+
new_node = Node(id=node_id,title=type, label=label, size=size, shape=shape,color=node_colors[type])
|
97 |
+
if not if_node_exists(nodes, new_node.id):
|
98 |
+
nodes.append(new_node)
|
99 |
+
|
100 |
+
# Creating Agraph edges
|
101 |
+
for relationship in neo4j_graph.relationships:
|
102 |
+
size = 25 # Default size, can be customized
|
103 |
+
shape = "circle" # Default shape, can be customized
|
104 |
+
|
105 |
+
source = relationship.source
|
106 |
+
source_type = source.type
|
107 |
+
source_id = source.id.replace(" ", "_")
|
108 |
+
label_source = source.id
|
109 |
+
|
110 |
+
source_node = Node(id=source_id,title=source_type, label=label_source, size=size, shape=shape,color=node_colors[source_type])
|
111 |
+
if not if_node_exists(nodes, source_node.id):
|
112 |
+
nodes.append(source_node)
|
113 |
+
|
114 |
+
target = relationship.target
|
115 |
+
target_type = target.type
|
116 |
+
target_id = target.id.replace(" ", "_")
|
117 |
+
label_target = target.id
|
118 |
+
|
119 |
+
target_node = Node(id=target_id,title=target_type, label=label_target, size=size, shape=shape,color=node_colors[target_type])
|
120 |
+
if not if_node_exists(nodes, target_node.id):
|
121 |
+
nodes.append(target_node)
|
122 |
+
|
123 |
+
label = relationship.type
|
124 |
+
|
125 |
+
edges.append(Edge(source=source_id, label=label, target=target_id))
|
126 |
+
|
127 |
+
# Define the configuration for Agraph
|
128 |
+
config = Config(width=1200, height=800, directed=True, physics=True, hierarchical=True,from_json="config.json")
|
129 |
+
# Create the Agraph visualization
|
130 |
+
|
131 |
+
return edges, nodes, config
|
132 |
+
|
133 |
+
def display_graph(edges, nodes, config):
|
134 |
+
# Display the Agraph visualization
|
135 |
+
return agraph(edges=edges, nodes=nodes, config=config)
|
136 |
+
|
137 |
+
|
138 |
+
|
139 |
+
def filter_nodes_by_types(nodes:list[Node], node_types_filter:list) -> list[Node]:
|
140 |
+
filtered_nodes = []
|
141 |
+
for node in nodes:
|
142 |
+
if node.title in node_types_filter: #the title represents the type of the node
|
143 |
+
filtered_nodes.append(node)
|
144 |
+
return filtered_nodes
|
145 |
+
|
146 |
+
@st.dialog(title="Changer la vue")
|
147 |
+
def change_view_dialog():
|
148 |
+
st.write("Changer la vue")
|
149 |
+
|
150 |
+
for index, item in enumerate(st.session_state.filter_views.keys()):
|
151 |
+
emp = st.empty()
|
152 |
+
col1, col2, col3 = emp.columns([8, 1, 1])
|
153 |
+
|
154 |
+
if index > 0 and col2.button("🗑️", key=f"del{index}"):
|
155 |
+
del st.session_state.filter_views[item]
|
156 |
+
st.session_state.current_view = "Vue par défaut"
|
157 |
+
st.rerun()
|
158 |
+
but_content = "🔍" if st.session_state.current_view != item else "✅"
|
159 |
+
if col3.button(but_content, key=f"valid{index}"):
|
160 |
+
st.session_state.current_view = item
|
161 |
+
st.rerun()
|
162 |
+
if len(st.session_state.filter_views.keys()) > index:
|
163 |
+
with col1.expander(item):
|
164 |
+
if index > 0:
|
165 |
+
change_name = st.text_input("Nom de la vue", label_visibility="collapsed", placeholder="Changez le nom de la vue",key=f"change_name{index}")
|
166 |
+
if st.button("Renommer",key=f"rename{index}"):
|
167 |
+
if change_name != "":
|
168 |
+
st.session_state.filter_views[change_name] = st.session_state.filter_views.pop(item)
|
169 |
+
st.session_state.current_view = change_name
|
170 |
+
st.rerun()
|
171 |
+
st.markdown("\n".join(f"- {label.strip()}" for label in st.session_state.filter_views[item]))
|
172 |
+
else:
|
173 |
+
emp.empty()
|
174 |
+
|
175 |
+
@st.dialog(title="Ajouter une vue")
|
176 |
+
def add_view_dialog(filters):
|
177 |
+
st.write("Ajouter une vue")
|
178 |
+
view_name = st.text_input("Nom de la vue")
|
179 |
+
st.markdown("les filtres actuels:")
|
180 |
+
st.write(filters)
|
181 |
+
if st.button("Ajouter la vue"):
|
182 |
+
st.session_state.filter_views[view_name] = filters
|
183 |
+
st.session_state.current_view = view_name
|
184 |
+
st.rerun()
|
185 |
+
|
186 |
+
@st.dialog(title="Changer la couleur")
|
187 |
+
def change_color_dialog():
|
188 |
+
st.write("Changer la couleur")
|
189 |
+
for node_type,color in st.session_state.node_types.items():
|
190 |
+
color = st.color_picker(f"La couleur de l'entité **{node_type.strip()}**",color)
|
191 |
+
st.session_state.node_types[node_type] = color
|
192 |
+
|
193 |
+
if st.button("Valider"):
|
194 |
+
st.rerun()
|
195 |
+
|
196 |
+
|
197 |
+
|
198 |
+
def kg_main():
|
199 |
+
#st.set_page_config(page_title="Graphe de connaissance", page_icon="", layout="wide")
|
200 |
+
|
201 |
+
|
202 |
+
|
203 |
+
if "audit" not in st.session_state or st.session_state.audit == {}:
|
204 |
+
st.error("Veuillez d'abord effectuer un audit pour visualiser le graphe de connaissance.")
|
205 |
+
return
|
206 |
+
|
207 |
+
if "cr" not in st.session_state:
|
208 |
+
st.error("Veuillez d'abord effectuer un compte rendu pour visualiser le graphe de connaissance.")
|
209 |
+
return
|
210 |
+
|
211 |
+
if "graph" not in st.session_state:
|
212 |
+
st.session_state.graph = None
|
213 |
+
|
214 |
+
if "filter_views" not in st.session_state:
|
215 |
+
st.session_state.filter_views = {}
|
216 |
+
if "current_view" not in st.session_state:
|
217 |
+
st.session_state.current_view = None
|
218 |
+
|
219 |
+
st.title("Graphe de connaissance")
|
220 |
+
|
221 |
+
if "node_types" not in st.session_state:
|
222 |
+
st.session_state.node_types = None
|
223 |
+
|
224 |
+
if "summary" not in st.session_state:
|
225 |
+
st.session_state.summary = None
|
226 |
+
|
227 |
+
if "chat_graph_history" not in st.session_state:
|
228 |
+
st.session_state.chat_graph_history = []
|
229 |
+
|
230 |
+
audit = st.session_state.audit_simplified
|
231 |
+
# content = st.session_state.audit["content"]
|
232 |
+
|
233 |
+
# if audit["type de fichier"] == "pdf":
|
234 |
+
# text = get_text_from_content_for_doc(content)
|
235 |
+
# elif audit["type de fichier"] == "audio":
|
236 |
+
# text = get_text_from_content_for_audio(content)
|
237 |
+
|
238 |
+
text = st.session_state.cr + "mots clés" + audit["Mots clés"]
|
239 |
+
|
240 |
+
#summary_prompt = f"Voici un ensemble de documents : {text}. À partir de ces documents, veuillez fournir des résumés concis en vous concentrant sur l'extraction des relations essentielles et des événements. Il est crucial d'inclure les dates des actions ou des événements, car elles seront utilisées pour l'analyse chronologique. Par exemple : 'Sam a été licencié par le conseil d'administration d'OpenAI le 17 novembre 2023 (17 novembre, vendredi)', ce qui illustre la relation entre Sam et OpenAI ainsi que la date de l'événement."
|
241 |
+
|
242 |
+
if st.button("Générer le graphe"):
|
243 |
+
# with st.spinner("Extractions des relations..."):
|
244 |
+
# sum = generate_response_openai(summary_prompt,model="gpt-4o")
|
245 |
+
# st.session_state.summary = sum
|
246 |
+
|
247 |
+
with st.spinner("Génération du graphe..."):
|
248 |
+
keywords_list = audit["Mots clés"].strip().split(",")
|
249 |
+
allowed_nodes_types =keywords_list+ ["Person","Organization","Location","Event","Date","Time","Ressource","Concept"]
|
250 |
+
graph = get_graph(text,allowed_nodes=allowed_nodes_types)
|
251 |
+
st.session_state.graph = graph
|
252 |
+
|
253 |
+
node_types = get_node_types(graph[0])
|
254 |
+
nodes_type_dict = list_to_dict_colors(node_types)
|
255 |
+
st.session_state.node_types = nodes_type_dict
|
256 |
+
st.session_state.filter_views["Vue par défaut"] = list(node_types)
|
257 |
+
st.session_state.current_view = "Vue par défaut"
|
258 |
+
|
259 |
+
else:
|
260 |
+
graph = st.session_state.graph
|
261 |
+
|
262 |
+
if graph is not None:
|
263 |
+
#st.write(graph)
|
264 |
+
|
265 |
+
edges,nodes,config = convert_neo4j_to_agraph(graph[0],st.session_state.node_types)
|
266 |
+
|
267 |
+
col1, col2 = st.columns([2.5, 1.5])
|
268 |
+
|
269 |
+
with col1.container(border=True,height=800):
|
270 |
+
st.write("##### Visualisation du graphe (**"+st.session_state.current_view+"**)")
|
271 |
+
filter_col,add_view_col,change_view_col,color_col = st.columns([9,1,1,1])
|
272 |
+
|
273 |
+
if color_col.button("🎨",help="Changer la couleur"):
|
274 |
+
change_color_dialog()
|
275 |
+
|
276 |
+
if change_view_col.button("🔍",help="Changer de vue"):
|
277 |
+
change_view_dialog()
|
278 |
+
|
279 |
+
|
280 |
+
#add mots cles to evry label in audit["Mots clés"]
|
281 |
+
#filter_labels = [ label + " (mot clé)" if label.strip().lower() in audit["Mots clés"].strip().lower().split(",") else label for label in st.session_state.filter_views[st.session_state.current_view] ]
|
282 |
+
filter = filter_col.multiselect("Filtrer selon l'étiquette",st.session_state.node_types.keys(),placeholder="Sélectionner une ou plusieurs étiquettes",default=st.session_state.filter_views[st.session_state.current_view],label_visibility="collapsed")
|
283 |
+
|
284 |
+
if add_view_col.button("➕",help="Ajouter une vue"):
|
285 |
+
add_view_dialog(filter)
|
286 |
+
|
287 |
+
if filter:
|
288 |
+
nodes = filter_nodes_by_types(nodes,filter)
|
289 |
+
|
290 |
+
selected = display_graph(edges,nodes,config)
|
291 |
+
|
292 |
+
with col2.container(border=True,height=800):
|
293 |
+
st.markdown("##### Dialoguer avec le graphe")
|
294 |
+
|
295 |
+
user_query = st.chat_input("Par ici ...")
|
296 |
+
if user_query is not None and user_query != "":
|
297 |
+
st.session_state.chat_graph_history.append(HumanMessage(content=user_query))
|
298 |
+
|
299 |
+
with st.container(height=650, border=False):
|
300 |
+
for message in st.session_state.chat_graph_history:
|
301 |
+
if isinstance(message, AIMessage):
|
302 |
+
with st.chat_message("AI"):
|
303 |
+
st.markdown(message.content)
|
304 |
+
elif isinstance(message, HumanMessage):
|
305 |
+
with st.chat_message("Moi"):
|
306 |
+
st.write(message.content)
|
307 |
+
|
308 |
+
#check if last message is human message
|
309 |
+
if len(st.session_state.chat_graph_history) > 0:
|
310 |
+
last_message = st.session_state.chat_graph_history[-1]
|
311 |
+
if isinstance(last_message, HumanMessage):
|
312 |
+
with st.chat_message("AI"):
|
313 |
+
retreive = st.session_state.vectorstore.as_retriever()
|
314 |
+
context = retreive.invoke(last_message.content)
|
315 |
+
wrapped_prompt = f"Étant donné le contexte suivant {context}, et le graph de connaissance: {graph}, {last_message.content}"
|
316 |
+
response = st.write_stream(generate_response_via_langchain(wrapped_prompt,stream=True))
|
317 |
+
st.session_state.chat_graph_history.append(AIMessage(content=response))
|
318 |
+
|
319 |
+
if selected is not None:
|
320 |
+
with st.chat_message("AI"):
|
321 |
+
st.markdown(f" EXPLORER LES DONNEES CONTENUES DANS **{selected}**")
|
322 |
+
|
323 |
+
prompts = [f"Extrait moi toutes les informations du noeud ''{selected}'' ➡️",
|
324 |
+
f"Montre moi les conversations autour du noeud ''{selected}'' ➡️"]
|
325 |
+
|
326 |
+
for i,prompt in enumerate(prompts):
|
327 |
+
button = st.button(prompt,key=f"p_{i}",on_click=lambda i=i: st.session_state.chat_graph_history.append(HumanMessage(content=prompts[i])))
|
328 |
+
|
329 |
+
|
330 |
+
|
331 |
+
|
332 |
+
node_types = st.session_state.node_types
|
333 |
+
|
chatbot_page/chatbot.py
ADDED
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from langchain_openai import ChatOpenAI
|
3 |
+
from langchain_mistralai import ChatMistralAI
|
4 |
+
from langchain_core.prompts import ChatPromptTemplate
|
5 |
+
from langchain_core.output_parsers import StrOutputParser
|
6 |
+
from langchain_core.messages import AIMessage, HumanMessage
|
7 |
+
|
8 |
+
def get_response(user_query, chat_history, context,llm=None,history_limit=5,stream=True):
|
9 |
+
|
10 |
+
template = """
|
11 |
+
Étant donné l'historique de la conversation : {chat_history}, le contexte : {context}, et la question de l'utilisateur : {user_question}, veuillez fournir une réponse détaillée et complète. La réponse doit inclure un ou plusieurs des éléments suivants :
|
12 |
+
|
13 |
+
1. Une explication claire des concepts clés et des termes liés au sujet.
|
14 |
+
2. Un aperçu des meilleures pratiques, des stratégies courantes ou des cadres de référence pertinents pour la discussion.
|
15 |
+
3. Des exemples spécifiques ou des études de cas illustrant les principes abordés.
|
16 |
+
4. Les défis potentiels ou les considérations à prendre en compte.
|
17 |
+
5. Des suggestions de ressources supplémentaires ou des actions que l'utilisateur peut entreprendre pour approfondir sa compréhension.
|
18 |
+
|
19 |
+
Assurez-vous que la réponse soit adaptée au niveau d'expertise de l'utilisateur et aux spécificités du contexte fourni.
|
20 |
+
|
21 |
+
"""
|
22 |
+
|
23 |
+
prompt = ChatPromptTemplate.from_template(template)
|
24 |
+
|
25 |
+
#llm = ChatOpenAI(model="gpt-4o")
|
26 |
+
if not llm:
|
27 |
+
llm = ChatOpenAI(model="gpt-4o-mini")
|
28 |
+
elif llm == "GPT-4o":
|
29 |
+
llm = ChatOpenAI(model="gpt-4o")
|
30 |
+
elif llm == "Mistral Large 2 (FR)":
|
31 |
+
llm = ChatMistralAI(model_name="mistral-large-2407")
|
32 |
+
elif llm == "GPT-4o-mini":
|
33 |
+
llm = ChatOpenAI(model="gpt-4o-mini")
|
34 |
+
elif llm == "Mistral Nemo (FR)":
|
35 |
+
llm = ChatMistralAI(model_name="open-mistral-nemo-2407")
|
36 |
+
|
37 |
+
|
38 |
+
chain = prompt | llm
|
39 |
+
|
40 |
+
if not stream:
|
41 |
+
return chain.invoke({
|
42 |
+
"context": context,
|
43 |
+
"chat_history": chat_history[-history_limit:],
|
44 |
+
"user_question": user_query,
|
45 |
+
})
|
46 |
+
|
47 |
+
chain = chain | StrOutputParser()
|
48 |
+
|
49 |
+
if history_limit:
|
50 |
+
return chain.stream({
|
51 |
+
"context": context,
|
52 |
+
"chat_history": chat_history[-history_limit:],
|
53 |
+
"user_question": user_query,
|
54 |
+
})
|
55 |
+
|
56 |
+
return chain.stream({
|
57 |
+
"context": context,
|
58 |
+
"chat_history": chat_history,
|
59 |
+
"user_question": user_query,
|
60 |
+
})
|
61 |
+
|
62 |
+
def handle_display_models(index, models_names):
|
63 |
+
model = st.radio("Choisir un modèle",models_names, index=index)
|
64 |
+
return model
|
65 |
+
|
66 |
+
|
67 |
+
def chatbot_main():
|
68 |
+
st.title("Chatbot")
|
69 |
+
models_names = ["GPT-4o", "GPT-4o-mini"]
|
70 |
+
|
71 |
+
if "chat_history" not in st.session_state:
|
72 |
+
st.session_state.chat_history = [
|
73 |
+
AIMessage(content="Salut, Que puis-je faire pour vous ?"),
|
74 |
+
]
|
75 |
+
|
76 |
+
if "model" not in st.session_state:
|
77 |
+
st.session_state.model = "GPT-4o-mini"
|
78 |
+
|
79 |
+
header = st.container()
|
80 |
+
col1, col2 = header.columns([1, 2])
|
81 |
+
|
82 |
+
with col1.popover("Modèles disponibles"):
|
83 |
+
new_model = handle_display_models(models_names.index(st.session_state.model), models_names)
|
84 |
+
|
85 |
+
st.session_state.model = new_model
|
86 |
+
|
87 |
+
st.markdown(f"- **{st.session_state.model}**")
|
88 |
+
|
89 |
+
for message in st.session_state.chat_history:
|
90 |
+
if isinstance(message, AIMessage):
|
91 |
+
with st.chat_message("AI"):
|
92 |
+
st.markdown(message.content)
|
93 |
+
elif isinstance(message, HumanMessage):
|
94 |
+
with st.chat_message("Moi"):
|
95 |
+
st.write(message.content)
|
96 |
+
|
97 |
+
if "response_llm" not in st.session_state:
|
98 |
+
st.session_state.response_llm = ""
|
99 |
+
|
100 |
+
user_query = st.chat_input("Par ici ...")
|
101 |
+
if user_query is not None and user_query != "":
|
102 |
+
st.session_state.chat_history.append(HumanMessage(content=user_query))
|
103 |
+
|
104 |
+
with st.chat_message("Moi"):
|
105 |
+
st.markdown(user_query)
|
106 |
+
|
107 |
+
with st.chat_message("AI"):
|
108 |
+
st.markdown(f"**{st.session_state.model}**")
|
109 |
+
|
110 |
+
|
111 |
+
response = st.write_stream(get_response(user_query, st.session_state.chat_history, context=st.session_state.response_llm, llm=st.session_state.model, stream=True))
|
112 |
+
st.session_state.chat_history.append(AIMessage(content=response))
|
113 |
+
|
114 |
+
|
115 |
+
|
116 |
+
|
117 |
+
|
118 |
+
|
119 |
+
chatbot_main()
|
config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"height": "600px",
|
3 |
+
"width": "1200px",
|
4 |
+
"autoResize": true,
|
5 |
+
|
6 |
+
"physics":{
|
7 |
+
"enabled": true,
|
8 |
+
|
9 |
+
"barnesHut": {
|
10 |
+
"avoidOverlap": 1,
|
11 |
+
"theta": 0.1,
|
12 |
+
"gravitationalConstant": -10000,
|
13 |
+
"centralGravity": 1,
|
14 |
+
"springLength": 50,
|
15 |
+
"springConstant": 0,
|
16 |
+
"damping": 0.5
|
17 |
+
},
|
18 |
+
"stabilization": {
|
19 |
+
"enabled": true,
|
20 |
+
"iterations": 1000,
|
21 |
+
"updateInterval": 50,
|
22 |
+
"onlyDynamicEdges": false,
|
23 |
+
"fit": true
|
24 |
+
}
|
25 |
+
}
|
26 |
+
}
|
doc_page/documentation.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
|
3 |
+
st.title("Documentation")
|
packages.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
xclip
|
2 |
+
xsel
|
3 |
+
python3-pyperclip
|
requirements.txt
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit==1.37.0
|
2 |
+
pyperclip
|
3 |
+
tiktoken
|
4 |
+
pydub
|
5 |
+
numpy
|
6 |
+
scipy
|
7 |
+
textstat
|
8 |
+
pymupdf
|
9 |
+
openai
|
10 |
+
nltk
|
11 |
+
rake_nltk
|
12 |
+
python-docx
|
13 |
+
pillow
|
14 |
+
pandas
|
15 |
+
langchain
|
16 |
+
langchain-core
|
17 |
+
langchainhub
|
18 |
+
langchain-openai
|
19 |
+
langchain-mistralai
|
20 |
+
faiss-cpu
|
21 |
+
langchain-community
|
22 |
+
python-dotenv
|
23 |
+
langchain-experimental
|
24 |
+
neo4j
|
25 |
+
streamlit-agraph
|
26 |
+
st-copy-to-clipboard
|
27 |
+
clipboard
|
28 |
+
|
utils/audit/audit_audio.py
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import numpy as np
|
3 |
+
import scipy.io.wavfile as wavfile
|
4 |
+
from pydub import AudioSegment
|
5 |
+
import io
|
6 |
+
import tiktoken
|
7 |
+
from openai import OpenAI
|
8 |
+
|
9 |
+
def transcript_audio_func(audio_file):
|
10 |
+
client = OpenAI()
|
11 |
+
transcription = client.audio.transcriptions.create(
|
12 |
+
model="whisper-1",
|
13 |
+
file=audio_file
|
14 |
+
)
|
15 |
+
|
16 |
+
return transcription.text
|
17 |
+
|
18 |
+
def count_tokens(input_string: str) -> int:
|
19 |
+
tokenizer = tiktoken.get_encoding("cl100k_base")
|
20 |
+
tokens = tokenizer.encode(input_string)
|
21 |
+
return len(tokens)
|
22 |
+
|
23 |
+
# Function to calculate SNR
|
24 |
+
def calculate_snr(audio_data):
|
25 |
+
signal = audio_data
|
26 |
+
noise = audio_data - np.mean(audio_data)
|
27 |
+
signal_power = np.mean(signal ** 2)
|
28 |
+
noise_power = np.mean(noise ** 2)
|
29 |
+
snr = 10 * np.log10(signal_power / noise_power)
|
30 |
+
return snr
|
31 |
+
|
32 |
+
# Function to evaluate audio quality
|
33 |
+
def evaluate_audio_quality(file) -> dict:
|
34 |
+
try:
|
35 |
+
audio = AudioSegment.from_file(file)
|
36 |
+
except:
|
37 |
+
audio = AudioSegment.from_file(io.BytesIO(file.read()))
|
38 |
+
|
39 |
+
audio_data = np.array(audio.get_array_of_samples())
|
40 |
+
|
41 |
+
#number of minutes
|
42 |
+
duration = len(audio_data) / audio.frame_rate*2 / 60
|
43 |
+
|
44 |
+
# Calculate volume
|
45 |
+
volume = audio.dBFS
|
46 |
+
|
47 |
+
# Calculate SNR
|
48 |
+
snr = calculate_snr(audio_data)
|
49 |
+
|
50 |
+
#get the transcription of the audio
|
51 |
+
transcription = transcript_audio_func(file)
|
52 |
+
|
53 |
+
audit = {
|
54 |
+
"volume": volume,
|
55 |
+
"SNR": snr,
|
56 |
+
"duration": duration,
|
57 |
+
"number_of_tokens": count_tokens(transcription),
|
58 |
+
"number_of_words": len(transcription.split())
|
59 |
+
}
|
60 |
+
|
61 |
+
content = {
|
62 |
+
"transcription": transcription,
|
63 |
+
"audio_data": audio_data,
|
64 |
+
"frame_rate": audio.frame_rate
|
65 |
+
}
|
66 |
+
|
67 |
+
audit_global = {
|
68 |
+
"audit": audit,
|
69 |
+
"content": content
|
70 |
+
}
|
71 |
+
|
72 |
+
return audit_global
|
utils/audit/audit_doc.py
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import pymupdf
|
3 |
+
import tiktoken
|
4 |
+
import textstat
|
5 |
+
from docx import Document
|
6 |
+
import io
|
7 |
+
# from rake_nltk import Rake
|
8 |
+
# import nltk
|
9 |
+
# from nltk.corpus import stopwords
|
10 |
+
from openai import OpenAI
|
11 |
+
|
12 |
+
# Download NLTK stopwords
|
13 |
+
# nltk.download('stopwords')
|
14 |
+
# nltk.download('punkt')
|
15 |
+
|
16 |
+
#function to use gpt4o-mini
|
17 |
+
def extract_relevant_keywords(prompt: str) -> str:
|
18 |
+
client = OpenAI()
|
19 |
+
response = client.chat.completions.create(
|
20 |
+
model="gpt-4o-mini",
|
21 |
+
messages=[
|
22 |
+
{"role": "user", "content": prompt}
|
23 |
+
]
|
24 |
+
)
|
25 |
+
return response.choices[0].message.content
|
26 |
+
|
27 |
+
|
28 |
+
def evaluate_text_quality(text: str) -> dict:
|
29 |
+
# Calculate readability metrics
|
30 |
+
flesch_reading_ease = textstat.flesch_reading_ease(text)
|
31 |
+
flesch_kincaid_grade = textstat.flesch_kincaid_grade(text)
|
32 |
+
gunning_fog = textstat.gunning_fog(text)
|
33 |
+
smog_index = textstat.smog_index(text)
|
34 |
+
automated_readability_index = textstat.automated_readability_index(text)
|
35 |
+
|
36 |
+
# Normalize readability scores to a 0-1 scale
|
37 |
+
def normalize_score(score, min_score, max_score):
|
38 |
+
return (score - min_score) / (max_score - min_score)
|
39 |
+
|
40 |
+
# Normalize each readability score
|
41 |
+
n_flesch_reading_ease = normalize_score(flesch_reading_ease, 0, 100)
|
42 |
+
n_flesch_kincaid_grade = 1 - normalize_score(flesch_kincaid_grade, 0, 18) # Higher is more difficult
|
43 |
+
n_gunning_fog = 1 - normalize_score(gunning_fog, 0, 18) # Higher is more difficult
|
44 |
+
n_smog_index = 1 - normalize_score(smog_index, 0, 18) # Higher is more difficult
|
45 |
+
n_automated_readability_index = 1 - normalize_score(automated_readability_index, 0, 18) # Higher is more difficult
|
46 |
+
|
47 |
+
# Weights for each metric (adjust these as needed)
|
48 |
+
weights = {
|
49 |
+
"flesch_reading_ease": 0.25,
|
50 |
+
"flesch_kincaid_grade": 0.25,
|
51 |
+
"gunning_fog": 0.2,
|
52 |
+
"smog_index": 0.15,
|
53 |
+
"automated_readability_index": 0.15
|
54 |
+
}
|
55 |
+
|
56 |
+
# Calculate the global readability score
|
57 |
+
global_score = (
|
58 |
+
n_flesch_reading_ease * weights["flesch_reading_ease"] +
|
59 |
+
n_flesch_kincaid_grade * weights["flesch_kincaid_grade"] +
|
60 |
+
n_gunning_fog * weights["gunning_fog"] +
|
61 |
+
n_smog_index * weights["smog_index"] +
|
62 |
+
n_automated_readability_index * weights["automated_readability_index"]
|
63 |
+
)
|
64 |
+
|
65 |
+
# Scale the global score to 0-5
|
66 |
+
global_score_0_5 = global_score * 5
|
67 |
+
|
68 |
+
# def extract_keywords(text):
|
69 |
+
# rake = Rake(stopwords.words('french'))
|
70 |
+
# rake.extract_keywords_from_text(text)
|
71 |
+
# return rake.get_ranked_phrases()
|
72 |
+
|
73 |
+
|
74 |
+
|
75 |
+
def count_tokens(input_string: str) -> int:
|
76 |
+
tokenizer = tiktoken.get_encoding("cl100k_base")
|
77 |
+
tokens = tokenizer.encode(input_string)
|
78 |
+
return len(tokens)
|
79 |
+
|
80 |
+
def audit_descriptif_pdf(file,max_img_width) -> dict:
|
81 |
+
document = pymupdf.open(stream=file.read())
|
82 |
+
|
83 |
+
audit_dict_doc = {
|
84 |
+
"number_of_pages": len(document),
|
85 |
+
"number_of_images": 0,
|
86 |
+
"number_of_links": 0,
|
87 |
+
"number_of_tables": 0,
|
88 |
+
"number_of_tokens": 0,
|
89 |
+
"number_of_words": 0,
|
90 |
+
"key_words": []
|
91 |
+
}
|
92 |
+
|
93 |
+
doc_content = dict()
|
94 |
+
|
95 |
+
for page in document:
|
96 |
+
|
97 |
+
audit_dict_page = {}
|
98 |
+
page_content = {
|
99 |
+
"images": [],
|
100 |
+
"texte": "",
|
101 |
+
"liens": [],
|
102 |
+
"tableaux": []
|
103 |
+
}
|
104 |
+
|
105 |
+
#number of images
|
106 |
+
images = page.get_images()
|
107 |
+
number_images = len(images)
|
108 |
+
audit_dict_page["number_of_images"] = number_images
|
109 |
+
audit_dict_doc["number_of_images"] += number_images
|
110 |
+
|
111 |
+
#get images
|
112 |
+
for _, img in enumerate(images):
|
113 |
+
xref = img[0]
|
114 |
+
base_image = document.extract_image(xref)
|
115 |
+
|
116 |
+
image_bytes = base_image["image"]
|
117 |
+
image_width = base_image["width"]
|
118 |
+
image_height = base_image["height"]
|
119 |
+
|
120 |
+
# Adjust image size if it exceeds the maximum width
|
121 |
+
if image_width > max_img_width:
|
122 |
+
ratio = max_img_width / image_width
|
123 |
+
image_width = max_img_width
|
124 |
+
image_height = int(image_height * ratio)
|
125 |
+
|
126 |
+
page_content["images"].append((image_bytes, image_width, image_height))
|
127 |
+
|
128 |
+
|
129 |
+
|
130 |
+
#get links with uri
|
131 |
+
links = []
|
132 |
+
for link in page.get_links():
|
133 |
+
if link['kind'] == pymupdf.LINK_URI and 'uri' in link:
|
134 |
+
links.append({"uri": link["uri"], "page": page.number})
|
135 |
+
|
136 |
+
page_content["liens"] = links
|
137 |
+
|
138 |
+
#number of links
|
139 |
+
number_links = len(links)
|
140 |
+
audit_dict_page["number_of_links"] = number_links
|
141 |
+
audit_dict_doc["number_of_links"] += number_links
|
142 |
+
|
143 |
+
#number of tables
|
144 |
+
tables = page.find_tables().tables
|
145 |
+
number_tables = len(tables)
|
146 |
+
for tab in tables:
|
147 |
+
page_content["tableaux"].append(tab.to_pandas())
|
148 |
+
audit_dict_page["number_of_tables"] = number_tables
|
149 |
+
audit_dict_doc["number_of_tables"] += number_tables
|
150 |
+
|
151 |
+
#number of tokens and words
|
152 |
+
text = page.get_text("text")
|
153 |
+
number_tokens = count_tokens(text)
|
154 |
+
number_words = len(text.split())
|
155 |
+
|
156 |
+
audit_dict_page["number_of_tokens"] = number_tokens
|
157 |
+
audit_dict_page["number_of_words"] = number_words
|
158 |
+
|
159 |
+
#get text
|
160 |
+
page_content["texte"] = text
|
161 |
+
|
162 |
+
audit_dict_doc["number_of_tokens"] += number_tokens
|
163 |
+
audit_dict_doc["number_of_words"] += number_words
|
164 |
+
|
165 |
+
audit_dict_doc[f"page_{page.number}"] = audit_dict_page
|
166 |
+
|
167 |
+
doc_content[f"page_{page.number}"] = page_content
|
168 |
+
|
169 |
+
# Extract key words from the document
|
170 |
+
text = " ".join([page["texte"] for page in doc_content.values()])
|
171 |
+
# key_words = extract_keywords(text)
|
172 |
+
# list_key_words_text = "\n".join(key_words[:10])
|
173 |
+
prompt = f'''Voici le document:
|
174 |
+
- {text}
|
175 |
+
Veuillez extraire les cinq mots clés les plus pertinents de cette liste. Chaque mot clé doit contenir au maximum deux mots.
|
176 |
+
|
177 |
+
TA REPONSE DOIT RESPECTER LE FORMAT SUIVANT :
|
178 |
+
key_word1, key_word2, key_word3, key_word4, key_word5
|
179 |
+
'''
|
180 |
+
key_words_extracted = extract_relevant_keywords(prompt)
|
181 |
+
audit_dict_doc["key_words"] = "\n" + key_words_extracted
|
182 |
+
|
183 |
+
#merge 2 dicts
|
184 |
+
global_audit = {
|
185 |
+
"audit": audit_dict_doc,
|
186 |
+
"content": doc_content
|
187 |
+
}
|
188 |
+
|
189 |
+
return global_audit
|
190 |
+
|
191 |
+
def audit_text(text: str) -> dict:
|
192 |
+
|
193 |
+
prompt = f'''Voici le document:
|
194 |
+
- {text}
|
195 |
+
Veuillez extraire les cinq mots clés les plus pertinents de cette liste. Chaque mot clé doit contenir au maximum deux mots.
|
196 |
+
|
197 |
+
TA REPONSE DOIT RESPECTER LE FORMAT SUIVANT :
|
198 |
+
key_word1, key_word2, key_word3, key_word4, key_word5
|
199 |
+
'''
|
200 |
+
key_words_extracted = extract_relevant_keywords(prompt)
|
201 |
+
|
202 |
+
|
203 |
+
audit_dict = {
|
204 |
+
"number_of_tokens": count_tokens(text),
|
205 |
+
"number_of_words": len(text.split()),
|
206 |
+
}
|
207 |
+
|
208 |
+
audit_dict["key_words"] = "\n" + key_words_extracted
|
209 |
+
|
210 |
+
global_audit = {
|
211 |
+
"audit": audit_dict,
|
212 |
+
"content": text
|
213 |
+
}
|
214 |
+
|
215 |
+
return global_audit
|
216 |
+
|
217 |
+
|
utils/audit/rag.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
2 |
+
from langchain_openai import OpenAIEmbeddings
|
3 |
+
from langchain_community.vectorstores import FAISS
|
4 |
+
|
5 |
+
|
6 |
+
|
7 |
+
def get_text_from_content_for_doc(content):
|
8 |
+
text = ""
|
9 |
+
for page in content:
|
10 |
+
text += content[page]["texte"]
|
11 |
+
return text
|
12 |
+
|
13 |
+
def get_text_from_content_for_audio(content):
|
14 |
+
return content["transcription"]
|
15 |
+
|
16 |
+
|
17 |
+
def get_text_chunks(text):
|
18 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
19 |
+
chunk_size=500, # the character length of the chunck
|
20 |
+
chunk_overlap=100, # the character length of the overlap between chuncks
|
21 |
+
length_function=len # the length function - in this case, character length (aka the python len() fn.)
|
22 |
+
)
|
23 |
+
chunks = text_splitter.split_text(text)
|
24 |
+
return chunks
|
25 |
+
|
26 |
+
def get_vectorstore(text_chunks):
|
27 |
+
embedding = OpenAIEmbeddings(model="text-embedding-3-small")
|
28 |
+
vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embedding)
|
29 |
+
return vectorstore
|
30 |
+
|
31 |
+
def setup_rag(file_type,content):
|
32 |
+
if file_type == "pdf":
|
33 |
+
text = get_text_from_content_for_doc(content)
|
34 |
+
elif file_type == "audio":
|
35 |
+
text = get_text_from_content_for_audio(content)
|
36 |
+
|
37 |
+
|
38 |
+
chunks = get_text_chunks(text)
|
39 |
+
|
40 |
+
vectorstore = get_vectorstore(chunks)
|
41 |
+
|
42 |
+
return vectorstore
|
43 |
+
|
44 |
+
|
utils/audit/response_llm.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from openai import OpenAI
|
2 |
+
from langchain_openai import ChatOpenAI
|
3 |
+
from langchain_core.output_parsers import StrOutputParser
|
4 |
+
from langchain_core.prompts import PromptTemplate
|
5 |
+
|
6 |
+
def generate_response_openai(prompt: str,stream:bool = False,model = "gpt-4o-mini") -> str:
|
7 |
+
client = OpenAI()
|
8 |
+
response = client.chat.completions.create(
|
9 |
+
model=model,
|
10 |
+
messages=[
|
11 |
+
{"role": "user", "content": prompt}
|
12 |
+
],
|
13 |
+
stream=stream
|
14 |
+
)
|
15 |
+
|
16 |
+
return response.choices[0].message.content
|
17 |
+
|
18 |
+
|
19 |
+
def generate_response_via_langchain(query: str, stream: bool = False, model: str = "gpt-4o-mini") :
|
20 |
+
# Define the prompt template
|
21 |
+
template = "{query}"
|
22 |
+
prompt = PromptTemplate.from_template(template)
|
23 |
+
|
24 |
+
# Initialize the OpenAI LLM with the specified model
|
25 |
+
llm = ChatOpenAI(model=model)
|
26 |
+
|
27 |
+
# Create an LLM chain with the prompt and the LLM
|
28 |
+
llm_chain = prompt | llm | StrOutputParser()
|
29 |
+
|
30 |
+
if stream:
|
31 |
+
# Return a generator that yields streamed responses
|
32 |
+
return llm_chain.stream({"query": query})
|
33 |
+
|
34 |
+
# Invoke the LLM chain and return the result
|
35 |
+
return llm_chain.invoke({"query": query})
|
utils/audit/transcript_audio.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from openai import OpenAI
|
2 |
+
|
3 |
+
def transcript_audio_func(audio_file):
|
4 |
+
client = OpenAI()
|
5 |
+
transcription = client.audio.transcriptions.create(
|
6 |
+
model="whisper",
|
7 |
+
file=audio_file
|
8 |
+
)
|
9 |
+
|
10 |
+
return transcription.text
|
utils/kg/barnes_algo.py
ADDED
@@ -0,0 +1,211 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import division
|
2 |
+
import numpy as np
|
3 |
+
import math
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import matplotlib.animation as animation
|
6 |
+
import matplotlib.patches as patches
|
7 |
+
import random
|
8 |
+
|
9 |
+
theta = 0.5
|
10 |
+
AU = (149.6e6 * 1000) # 149.6 million km, in meters.
|
11 |
+
G = 6.67408e-11 #m^3 kg^-1 s^-2
|
12 |
+
fig1 = plt.figure()
|
13 |
+
sim = fig1.add_subplot(111, aspect='equal')
|
14 |
+
fig2 = plt.figure()
|
15 |
+
quadt = fig2.add_subplot(111, aspect='equal')
|
16 |
+
|
17 |
+
class Node:
|
18 |
+
children = None
|
19 |
+
mass = None
|
20 |
+
center_of_mass = None
|
21 |
+
bbox = None
|
22 |
+
vx = vy = None
|
23 |
+
|
24 |
+
def quad_insert(root, x, y, m):
|
25 |
+
if root.mass is None: #when the root is empty, add the first particle
|
26 |
+
root.mass = m
|
27 |
+
root.center_of_mass = [x,y]
|
28 |
+
return
|
29 |
+
elif root.children is None:
|
30 |
+
root.children = [None,None,None,None]
|
31 |
+
old_quadrant = quadrant_of_particle(root.bbox, root.center_of_mass[0], root.center_of_mass[1])
|
32 |
+
if root.children[old_quadrant] is None:
|
33 |
+
root.children[old_quadrant] = Node()
|
34 |
+
root.children[old_quadrant].bbox = quadrant_bbox(root.bbox,old_quadrant)
|
35 |
+
quad_insert(root.children[old_quadrant], root.center_of_mass[0], root.center_of_mass[1], root.mass)
|
36 |
+
new_quadrant = quadrant_of_particle(root.bbox, x, y)
|
37 |
+
if root.children[new_quadrant] is None:
|
38 |
+
root.children[new_quadrant] = Node()
|
39 |
+
root.children[new_quadrant].bbox = quadrant_bbox(root.bbox,new_quadrant)
|
40 |
+
quad_insert(root.children[new_quadrant], x, y, m)
|
41 |
+
root.center_of_mass[0] = (root.center_of_mass[0]*root.mass + x*m) / (root.mass + m)
|
42 |
+
root.center_of_mass[1] = (root.center_of_mass[1]*root.mass + y*m) / (root.mass + m)
|
43 |
+
root.mass = root.mass + m
|
44 |
+
else:
|
45 |
+
new_quadrant = quadrant_of_particle(root.bbox, x, y)
|
46 |
+
if root.children[new_quadrant] is None:
|
47 |
+
root.children[new_quadrant] = Node()
|
48 |
+
root.children[new_quadrant].bbox = quadrant_bbox(root.bbox, new_quadrant)
|
49 |
+
quad_insert(root.children[new_quadrant], x, y, m)
|
50 |
+
root.center_of_mass[0] = (root.center_of_mass[0]*root.mass + x*m) / (root.mass + m)
|
51 |
+
root.center_of_mass[1] = (root.center_of_mass[1]*root.mass + y*m) / (root.mass + m)
|
52 |
+
root.mass = root.mass + m
|
53 |
+
|
54 |
+
def display(root):
|
55 |
+
if root.mass is None:
|
56 |
+
return
|
57 |
+
if root.children is not None:
|
58 |
+
x = (root.bbox[0] + root.bbox[1]) / 2
|
59 |
+
y = (root.bbox[2] + root.bbox[3]) / 2
|
60 |
+
width = x-root.bbox[0]
|
61 |
+
plt_node(root.bbox[0], root.bbox[2], width)
|
62 |
+
plt_node(root.bbox[0], y, width)
|
63 |
+
plt_node(x, root.bbox[2], width)
|
64 |
+
plt_node(x, y, width)
|
65 |
+
for i in xrange(4):
|
66 |
+
if root.children[i] is not None:
|
67 |
+
display(root.children[i])
|
68 |
+
else:
|
69 |
+
quadt.scatter(root.center_of_mass[0], root.center_of_mass[1])
|
70 |
+
|
71 |
+
def integrate(particles):
|
72 |
+
bodies = particles
|
73 |
+
n = len(bodies)
|
74 |
+
timestep = 24*3600 #one day
|
75 |
+
years = 2 * 365 #how many Earth years that simulate
|
76 |
+
for day in xrange(years):
|
77 |
+
particles_force = {}
|
78 |
+
root = Node()
|
79 |
+
root.center_of_mass = []
|
80 |
+
root.bbox = find_root_bbox(bodies)
|
81 |
+
for i in xrange(n):
|
82 |
+
quad_insert(root, bodies[i][3], bodies[i][4], bodies[i][2])
|
83 |
+
for i in xrange(n):
|
84 |
+
total_fx, total_fy = compute_force(root,bodies[i][3],bodies[i][4],bodies[i][2])
|
85 |
+
particles_force[bodies[i][0]] = (total_fx, total_fy)
|
86 |
+
for i in xrange(n):
|
87 |
+
fx, fy = particles_force[bodies[i][0]]
|
88 |
+
bodies[i][5] += fx / bodies[i][2] * timestep
|
89 |
+
bodies[i][6] += fy / bodies[i][2] * timestep
|
90 |
+
|
91 |
+
bodies[i][3] += bodies[i][5] * timestep
|
92 |
+
bodies[i][4] += bodies[i][6] * timestep
|
93 |
+
sim.scatter(bodies[i][3], bodies[i][4], c=bodies[i][1])
|
94 |
+
display(root)
|
95 |
+
quadt.scatter(root.center_of_mass[0], root.center_of_mass[1], c='red', marker='x')
|
96 |
+
|
97 |
+
def compute_force(root,x,y,m):
|
98 |
+
if root.mass is None:
|
99 |
+
return 0, 0
|
100 |
+
if root.center_of_mass[0] == x and root.center_of_mass[1] == y and root.mass == m:
|
101 |
+
return 0, 0
|
102 |
+
d = root.bbox[1]-root.bbox[0]
|
103 |
+
r = distance(x,y, root.center_of_mass[0], root.center_of_mass[1])
|
104 |
+
if d/r < theta or root.children is None:
|
105 |
+
return force(m, x, y, root.mass, root.center_of_mass[0], root.center_of_mass[1])
|
106 |
+
else:
|
107 |
+
fx = 0.0
|
108 |
+
fy = 0.0
|
109 |
+
for i in xrange(4):
|
110 |
+
if root.children[i] is not None:
|
111 |
+
fx += compute_force(root.children[i],x,y,m)[0]
|
112 |
+
fy += compute_force(root.children[i],x,y,m)[1]
|
113 |
+
return fx, fy
|
114 |
+
|
115 |
+
################################################# SUPPORTING FUNCTION ##############################################################
|
116 |
+
|
117 |
+
def force(m, x, y, mcm, xcm, ycm):
|
118 |
+
d = distance(x, y, xcm, ycm)
|
119 |
+
f = G*m*mcm/(d**2)
|
120 |
+
dx = xcm - x
|
121 |
+
dy = ycm - y
|
122 |
+
angle = math.atan2(dy, dx)
|
123 |
+
fx = math.cos(angle) * f
|
124 |
+
fy = math.sin(angle) * f
|
125 |
+
return fx, fy
|
126 |
+
|
127 |
+
def distance(x1, y1, x2, y2):
|
128 |
+
return math.sqrt((x2-x1)**2+(y2-y1)**2)
|
129 |
+
|
130 |
+
def plt_node(x, y, width):
|
131 |
+
quadt.add_patch(patches.Rectangle((x, y), width, width, fill = False))
|
132 |
+
|
133 |
+
def find_root_bbox(array):
|
134 |
+
""" Create a suitable square boundary box for the input particles
|
135 |
+
"""
|
136 |
+
if len(array) == 0 or len(array) == 1:
|
137 |
+
return None
|
138 |
+
xmin, xmax, ymin, ymax = array[0][3], array[0][3], array[0][4], array[0][4]
|
139 |
+
for i in xrange(len(array)):
|
140 |
+
if array[i][3] > xmax:
|
141 |
+
xmax = array[i][3]
|
142 |
+
if array[i][3] < xmin:
|
143 |
+
xmin = array[i][3]
|
144 |
+
if array[i][4] > ymax:
|
145 |
+
ymax = array[i][4]
|
146 |
+
if array[i][4] < ymin:
|
147 |
+
ymin = array[i][4]
|
148 |
+
if xmax - xmin == ymax - ymin:
|
149 |
+
return xmin, xmax, ymin, ymax
|
150 |
+
elif xmax - xmin > ymax - ymin:
|
151 |
+
return xmin, xmax, ymin, ymax+(xmax-xmin-ymax+ymin)
|
152 |
+
else:
|
153 |
+
return xmin, xmax+(ymax-ymin-xmax+xmin), ymin, ymax
|
154 |
+
|
155 |
+
def quadrant_of_particle(bbox, x, y):
|
156 |
+
"""Return position of quadrant of the particle (x,y)
|
157 |
+
"""
|
158 |
+
if y >= (bbox[3] + bbox[2])/2:
|
159 |
+
if x <= (bbox[1] + bbox[0])/2:
|
160 |
+
return 0
|
161 |
+
else:
|
162 |
+
return 1
|
163 |
+
else:
|
164 |
+
if x >= (bbox[1] + bbox[0])/2:
|
165 |
+
return 2
|
166 |
+
else:
|
167 |
+
return 3
|
168 |
+
|
169 |
+
def quadrant_bbox(bbox,quadrant):
|
170 |
+
"""Return the coordinate of the quadrant
|
171 |
+
"""
|
172 |
+
x = (bbox[0] + bbox[1]) / 2
|
173 |
+
y = (bbox[2] + bbox[3]) / 2
|
174 |
+
#Quadrant 0: (xmin, x, y, ymax)
|
175 |
+
if quadrant == 0:
|
176 |
+
return bbox[0], x, y, bbox[3]
|
177 |
+
#Quadrant 1: (x, xmax, y, ymax)
|
178 |
+
elif quadrant == 1:
|
179 |
+
return x, bbox[1], y, bbox[3]
|
180 |
+
#Quadrant 2: (x, xmax, ymin, y)
|
181 |
+
elif quadrant == 2:
|
182 |
+
return x, bbox[1], bbox[2], y
|
183 |
+
#Quadrant 3: (xmin, x, ymin, y)
|
184 |
+
elif quadrant == 3:
|
185 |
+
return bbox[0], x, bbox[2], y
|
186 |
+
|
187 |
+
def data_from_file(filename, array):
|
188 |
+
with open(filename) as f:
|
189 |
+
for line in f:
|
190 |
+
if line[0] == '#':
|
191 |
+
continue
|
192 |
+
else:
|
193 |
+
name,color,m,x,y,vx,vy = line.split(',')
|
194 |
+
array.append([name,color,float(m),float(x)*AU,float(y)*AU,float(vx)*1000,float(vy)*1000])
|
195 |
+
|
196 |
+
if __name__ == '__main__':
|
197 |
+
filename = ('solar-system.txt')
|
198 |
+
particles = []
|
199 |
+
data_from_file(filename, particles)
|
200 |
+
#root = Node()
|
201 |
+
#root.center_of_mass = []
|
202 |
+
#root.bbox = find_root_bbox(particles)
|
203 |
+
#for i in xrange(len(particles)):
|
204 |
+
# quad_insert(root, particles[i][3], particles[i][4], particles[i][2])
|
205 |
+
#print 'Boundary box: ',root.bbox
|
206 |
+
#print 'Total mass: ',root.mass
|
207 |
+
#print 'Coordinate of center of mass: ',root.center_of_mass
|
208 |
+
#plt.scatter(root.center_of_mass[0], root.center_of_mass[1], c='r', marker='x', s=50)
|
209 |
+
#print 'Theta: ', theta
|
210 |
+
integrate(particles)
|
211 |
+
plt.show()
|
utils/kg/construct_kg.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_community.graphs import Neo4jGraph
|
2 |
+
from langchain_experimental.graph_transformers import LLMGraphTransformer
|
3 |
+
from langchain_openai import ChatOpenAI
|
4 |
+
from langchain_core.documents import Document
|
5 |
+
|
6 |
+
def get_graph(text,allowed_nodes=None,prompt=None):
|
7 |
+
|
8 |
+
llm = ChatOpenAI(temperature=0, model_name="gpt-4o-2024-08-06")
|
9 |
+
|
10 |
+
if allowed_nodes:
|
11 |
+
llm_transformer = LLMGraphTransformer(llm=llm,allowed_nodes=allowed_nodes)
|
12 |
+
else:
|
13 |
+
llm_transformer = LLMGraphTransformer(llm=llm)
|
14 |
+
documents = [Document(page_content=text)]
|
15 |
+
|
16 |
+
graph_documents = llm_transformer.convert_to_graph_documents(documents)
|
17 |
+
|
18 |
+
return graph_documents
|
19 |
+
|
20 |
+
|