Ilyas KHIAT commited on
Commit
e346593
1 Parent(s): bdd1430
.chainlit/config.toml ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ # Whether to enable telemetry (default: true). No personal data is collected.
3
+ enable_telemetry = true
4
+
5
+
6
+ # List of environment variables to be provided by each user to use the app.
7
+ user_env = []
8
+
9
+ # Duration (in seconds) during which the session is saved when the connection is lost
10
+ session_timeout = 3600
11
+
12
+ # Enable third parties caching (e.g LangChain cache)
13
+ cache = false
14
+
15
+ # Authorized origins
16
+ allow_origins = ["*"]
17
+
18
+ # Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
19
+ # follow_symlink = false
20
+
21
+ [features]
22
+ # Show the prompt playground
23
+ prompt_playground = true
24
+
25
+ # Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript)
26
+ unsafe_allow_html = false
27
+
28
+ # Process and display mathematical expressions. This can clash with "$" characters in messages.
29
+ latex = false
30
+
31
+ # Authorize users to upload files with messages
32
+ multi_modal = true
33
+
34
+ # Allows user to use speech to text
35
+ [features.speech_to_text]
36
+ enabled = false
37
+ # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
38
+ # language = "en-US"
39
+
40
+ [UI]
41
+ # Name of the app and chatbot.
42
+ name = "Chatbot"
43
+
44
+ # Show the readme while the thread is empty.
45
+ show_readme_as_default = true
46
+
47
+ # Description of the app and chatbot. This is used for HTML tags.
48
+ # description = ""
49
+
50
+ # Large size content are by default collapsed for a cleaner ui
51
+ default_collapse_content = true
52
+
53
+ # The default value for the expand messages settings.
54
+ default_expand_messages = false
55
+
56
+ # Hide the chain of thought details from the user in the UI.
57
+ hide_cot = false
58
+
59
+ # Link to your github repo. This will add a github button in the UI's header.
60
+ # github = ""
61
+
62
+ # Specify a CSS file that can be used to customize the user interface.
63
+ # The CSS file can be served from the public directory or via an external link.
64
+ # custom_css = "/public/test.css"
65
+
66
+ # Specify a Javascript file that can be used to customize the user interface.
67
+ # The Javascript file can be served from the public directory.
68
+ # custom_js = "/public/test.js"
69
+
70
+ # Specify a custom font url.
71
+ # custom_font = "https://fonts.googleapis.com/css2?family=Inter:wght@400;500;700&display=swap"
72
+
73
+ # Override default MUI light theme. (Check theme.ts)
74
+ [UI.theme]
75
+ #font_family = "Inter, sans-serif"
76
+ [UI.theme.light]
77
+ #background = "#FAFAFA"
78
+ #paper = "#FFFFFF"
79
+
80
+ [UI.theme.light.primary]
81
+ #main = "#F80061"
82
+ #dark = "#980039"
83
+ #light = "#FFE7EB"
84
+
85
+ # Override default MUI dark theme. (Check theme.ts)
86
+ [UI.theme.dark]
87
+ #background = "#FAFAFA"
88
+ #paper = "#FFFFFF"
89
+
90
+ [UI.theme.dark.primary]
91
+ #main = "#F80061"
92
+ #dark = "#980039"
93
+ #light = "#FFE7EB"
94
+
95
+
96
+ [meta]
97
+ generated_by = "1.0.301"
.chainlit/translations/en-US.json ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "components": {
3
+ "atoms": {
4
+ "buttons": {
5
+ "userButton": {
6
+ "menu": {
7
+ "settings": "Settings",
8
+ "settingsKey": "S",
9
+ "APIKeys": "API Keys",
10
+ "logout": "Logout"
11
+ }
12
+ }
13
+ }
14
+ },
15
+ "molecules": {
16
+ "newChatButton": {
17
+ "newChat": "New Chat"
18
+ },
19
+ "tasklist": {
20
+ "TaskList": {
21
+ "title": "\ud83d\uddd2\ufe0f Task List",
22
+ "loading": "Loading...",
23
+ "error": "An error occured"
24
+ }
25
+ },
26
+ "attachments": {
27
+ "cancelUpload": "Cancel upload",
28
+ "removeAttachment": "Remove attachment"
29
+ },
30
+ "newChatDialog": {
31
+ "createNewChat": "Create new chat?",
32
+ "clearChat": "This will clear the current messages and start a new chat.",
33
+ "cancel": "Cancel",
34
+ "confirm": "Confirm"
35
+ },
36
+ "settingsModal": {
37
+ "expandMessages": "Expand Messages",
38
+ "hideChainOfThought": "Hide Chain of Thought",
39
+ "darkMode": "Dark Mode"
40
+ }
41
+ },
42
+ "organisms": {
43
+ "chat": {
44
+ "history": {
45
+ "index": {
46
+ "lastInputs": "Last Inputs",
47
+ "noInputs": "Such empty...",
48
+ "loading": "Loading..."
49
+ }
50
+ },
51
+ "inputBox": {
52
+ "input": {
53
+ "placeholder": "Type your message here..."
54
+ },
55
+ "speechButton": {
56
+ "start": "Start recording",
57
+ "stop": "Stop recording"
58
+ },
59
+ "SubmitButton": {
60
+ "sendMessage": "Send message",
61
+ "stopTask": "Stop Task"
62
+ },
63
+ "UploadButton": {
64
+ "attachFiles": "Attach files"
65
+ },
66
+ "waterMark": {
67
+ "text": "Built with"
68
+ }
69
+ },
70
+ "Messages": {
71
+ "index": {
72
+ "running": "Running",
73
+ "executedSuccessfully": "executed successfully",
74
+ "failed": "failed",
75
+ "feedbackUpdated": "Feedback updated",
76
+ "updating": "Updating"
77
+ }
78
+ },
79
+ "dropScreen": {
80
+ "dropYourFilesHere": "Drop your files here"
81
+ },
82
+ "index": {
83
+ "failedToUpload": "Failed to upload",
84
+ "cancelledUploadOf": "Cancelled upload of",
85
+ "couldNotReachServer": "Could not reach the server",
86
+ "continuingChat": "Continuing previous chat"
87
+ },
88
+ "settings": {
89
+ "settingsPanel": "Settings panel",
90
+ "reset": "Reset",
91
+ "cancel": "Cancel",
92
+ "confirm": "Confirm"
93
+ }
94
+ },
95
+ "threadHistory": {
96
+ "sidebar": {
97
+ "filters": {
98
+ "FeedbackSelect": {
99
+ "feedbackAll": "Feedback: All",
100
+ "feedbackPositive": "Feedback: Positive",
101
+ "feedbackNegative": "Feedback: Negative"
102
+ },
103
+ "SearchBar": {
104
+ "search": "Search"
105
+ }
106
+ },
107
+ "DeleteThreadButton": {
108
+ "confirmMessage": "This will delete the thread as well as it's messages and elements.",
109
+ "cancel": "Cancel",
110
+ "confirm": "Confirm",
111
+ "deletingChat": "Deleting chat",
112
+ "chatDeleted": "Chat deleted"
113
+ },
114
+ "index": {
115
+ "pastChats": "Past Chats"
116
+ },
117
+ "ThreadList": {
118
+ "empty": "Empty..."
119
+ },
120
+ "TriggerButton": {
121
+ "closeSidebar": "Close sidebar",
122
+ "openSidebar": "Open sidebar"
123
+ }
124
+ },
125
+ "Thread": {
126
+ "backToChat": "Go back to chat",
127
+ "chatCreatedOn": "This chat was created on"
128
+ }
129
+ },
130
+ "header": {
131
+ "chat": "Chat",
132
+ "readme": "Readme"
133
+ }
134
+ }
135
+ },
136
+ "hooks": {
137
+ "useLLMProviders": {
138
+ "failedToFetchProviders": "Failed to fetch providers:"
139
+ }
140
+ },
141
+ "pages": {
142
+ "Design": {},
143
+ "Env": {
144
+ "savedSuccessfully": "Saved successfully",
145
+ "requiredApiKeys": "Required API Keys",
146
+ "requiredApiKeysInfo": "To use this app, the following API keys are required. The keys are stored on your device's local storage."
147
+ },
148
+ "Page": {
149
+ "notPartOfProject": "You are not part of this project."
150
+ },
151
+ "ResumeButton": {
152
+ "resumeChat": "Resume Chat"
153
+ }
154
+ }
155
+ }
.chainlit/translations/pt-BR.json ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "components": {
3
+ "atoms": {
4
+ "buttons": {
5
+ "userButton": {
6
+ "menu": {
7
+ "settings": "Configura\u00e7\u00f5es",
8
+ "settingsKey": "S",
9
+ "APIKeys": "Chaves de API",
10
+ "logout": "Sair"
11
+ }
12
+ }
13
+ }
14
+ },
15
+ "molecules": {
16
+ "newChatButton": {
17
+ "newChat": "Nova Conversa"
18
+ },
19
+ "tasklist": {
20
+ "TaskList": {
21
+ "title": "\ud83d\uddd2\ufe0f Lista de Tarefas",
22
+ "loading": "Carregando...",
23
+ "error": "Ocorreu um erro"
24
+ }
25
+ },
26
+ "attachments": {
27
+ "cancelUpload": "Cancelar envio",
28
+ "removeAttachment": "Remover anexo"
29
+ },
30
+ "newChatDialog": {
31
+ "createNewChat": "Criar novo chat?",
32
+ "clearChat": "Isso limpar\u00e1 as mensagens atuais e iniciar\u00e1 uma nova conversa.",
33
+ "cancel": "Cancelar",
34
+ "confirm": "Confirmar"
35
+ },
36
+ "settingsModal": {
37
+ "expandMessages": "Expandir Mensagens",
38
+ "hideChainOfThought": "Esconder Sequ\u00eancia de Pensamento",
39
+ "darkMode": "Modo Escuro"
40
+ }
41
+ },
42
+ "organisms": {
43
+ "chat": {
44
+ "history": {
45
+ "index": {
46
+ "lastInputs": "\u00daltimas Entradas",
47
+ "noInputs": "Vazio...",
48
+ "loading": "Carregando..."
49
+ }
50
+ },
51
+ "inputBox": {
52
+ "input": {
53
+ "placeholder": "Digite sua mensagem aqui..."
54
+ },
55
+ "speechButton": {
56
+ "start": "Iniciar grava\u00e7\u00e3o",
57
+ "stop": "Parar grava\u00e7\u00e3o"
58
+ },
59
+ "SubmitButton": {
60
+ "sendMessage": "Enviar mensagem",
61
+ "stopTask": "Parar Tarefa"
62
+ },
63
+ "UploadButton": {
64
+ "attachFiles": "Anexar arquivos"
65
+ },
66
+ "waterMark": {
67
+ "text": "Constru\u00eddo com"
68
+ }
69
+ },
70
+ "Messages": {
71
+ "index": {
72
+ "running": "Executando",
73
+ "executedSuccessfully": "executado com sucesso",
74
+ "failed": "falhou",
75
+ "feedbackUpdated": "Feedback atualizado",
76
+ "updating": "Atualizando"
77
+ }
78
+ },
79
+ "dropScreen": {
80
+ "dropYourFilesHere": "Solte seus arquivos aqui"
81
+ },
82
+ "index": {
83
+ "failedToUpload": "Falha ao enviar",
84
+ "cancelledUploadOf": "Envio cancelado de",
85
+ "couldNotReachServer": "N\u00e3o foi poss\u00edvel conectar ao servidor",
86
+ "continuingChat": "Continuando o chat anterior"
87
+ },
88
+ "settings": {
89
+ "settingsPanel": "Painel de Configura\u00e7\u00f5es",
90
+ "reset": "Redefinir",
91
+ "cancel": "Cancelar",
92
+ "confirm": "Confirmar"
93
+ }
94
+ },
95
+ "threadHistory": {
96
+ "sidebar": {
97
+ "filters": {
98
+ "FeedbackSelect": {
99
+ "feedbackAll": "Feedback: Todos",
100
+ "feedbackPositive": "Feedback: Positivo",
101
+ "feedbackNegative": "Feedback: Negativo"
102
+ },
103
+ "SearchBar": {
104
+ "search": "Buscar"
105
+ }
106
+ },
107
+ "DeleteThreadButton": {
108
+ "confirmMessage": "Isso deletar\u00e1 a conversa, assim como suas mensagens e elementos.",
109
+ "cancel": "Cancelar",
110
+ "confirm": "Confirmar",
111
+ "deletingChat": "Deletando conversa",
112
+ "chatDeleted": "Conversa deletada"
113
+ },
114
+ "index": {
115
+ "pastChats": "Conversas Anteriores"
116
+ },
117
+ "ThreadList": {
118
+ "empty": "Vazio..."
119
+ },
120
+ "TriggerButton": {
121
+ "closeSidebar": "Fechar barra lateral",
122
+ "openSidebar": "Abrir barra lateral"
123
+ }
124
+ },
125
+ "Thread": {
126
+ "backToChat": "Voltar para a conversa",
127
+ "chatCreatedOn": "Esta conversa foi criada em"
128
+ }
129
+ },
130
+ "header": {
131
+ "chat": "Conversa",
132
+ "readme": "Leia-me"
133
+ }
134
+ },
135
+ "hooks": {
136
+ "useLLMProviders": {
137
+ "failedToFetchProviders": "Falha ao buscar provedores:"
138
+ }
139
+ },
140
+ "pages": {
141
+ "Design": {},
142
+ "Env": {
143
+ "savedSuccessfully": "Salvo com sucesso",
144
+ "requiredApiKeys": "Chaves de API necess\u00e1rias",
145
+ "requiredApiKeysInfo": "Para usar este aplicativo, as seguintes chaves de API s\u00e3o necess\u00e1rias. As chaves s\u00e3o armazenadas localmente em seu dispositivo."
146
+ },
147
+ "Page": {
148
+ "notPartOfProject": "Voc\u00ea n\u00e3o faz parte deste projeto."
149
+ },
150
+ "ResumeButton": {
151
+ "resumeChat": "Continuar Conversa"
152
+ }
153
+ }
154
+ }
155
+ }
Dockerfile ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+ #RUN apt update && apt install -y ffmpeg
9
+
10
+ RUN useradd -m -u 1000 user
11
+ USER user
12
+ ENV HOME=/home/user \
13
+ PATH=/home/user/.local/bin:$PATH
14
+
15
+ WORKDIR $HOME/app
16
+
17
+ COPY --chown=user . $HOME/app
18
+
19
+
20
+
21
+ #COPY . .
22
+ #COPY .chainlit .chainlit
23
+
24
+
25
+ CMD ["chainlit", "run", "rag_app.py", "--host", "0.0.0.0", "--port", "7860"]
26
+ # CMD ["ls", "-a"]
chainlit.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # Bienvenue sur le chatbot ASSISTANT PAC 🚀🤖
2
+
3
+ Bonjour et bienvenue,
4
+
5
+ Je suis un agent intelligent pour vous aider à trouver les aides financières proposées par la PAC.
6
+ Mes connaissances se basent sur la documentation officielle fournie par le Ministère de l'Agriculture et de la Souveraineté alimentaire sur la PAC 2023-2027.
7
+ Vous retrouverez ainsi des informations concernant les aides découplées et les aides couplées.
8
+
9
+ Posez votre question en fonction de votre statut et de votre spécialité.
10
+
rag_app.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import json
3
+ import chainlit as cl
4
+
5
+ from langchain_community.vectorstores import FAISS
6
+ from rag_module import RagModule
7
+
8
+ from collections import defaultdict
9
+
10
+
11
+
12
+ prompt_template = """
13
+ Tu t'appelles ASSISTANT PAC, agent intelligent spécialisé sur les aides financières agricoles, et ta mission est d'aider les agriculteurs (rices) et porteurs de projets agricoles à identifier les aides agricoles PAC disponibles.
14
+ Tu comprends et génère les réponses en français, jamais en anglais.
15
+
16
+ Merci de bien vouloir répondre aux questions en utilisant seulement le contexte suivant.
17
+ contexte: {context}
18
+
19
+ historique: {history}
20
+
21
+ question: {question}
22
+ réponse:
23
+ """
24
+
25
+
26
+
27
+ ##------------ CHAINLIT ---------------##
28
+ @cl.on_chat_start
29
+ async def start():
30
+ rag = RagModule()
31
+
32
+ db = rag.get_faiss_db()
33
+
34
+ qa_chain = rag.retrieval_qa_memory_chain(db, prompt_template)
35
+
36
+ msg = cl.Message(content="Lancement du bot...", author = "Assistant PAC")
37
+ await msg.send()
38
+ msg.content = "Bonjour et bienvenue sur le Chatbot spécialisé dans les aides de la PAC (Politique agricole commune). Posez directement votre question pour être conseillé ?"
39
+ await msg.update()
40
+ cl.user_session.set("chain", qa_chain)
41
+
42
+ @cl.on_message
43
+ async def main(message):
44
+ rag = RagModule()
45
+
46
+ chain = cl.user_session.get("chain")
47
+
48
+ cb = cl.AsyncLangchainCallbackHandler(
49
+ stream_final_answer = True,
50
+ answer_prefix_tokens=["FINAL", "ANSWER"]
51
+ )
52
+
53
+ cb.answer_reached=True
54
+ response = await chain.ainvoke(message.content, callbacks=[cb])
55
+
56
+ answer = response.get('result')
57
+ sources = rag.get_sources_document(response.get('source_documents'))
58
+
59
+ elements = [cl.Pdf(name = "Pdf", display ="inline", path = path) for path in sources]
60
+
61
+
62
+ if response.get('source_documents'):
63
+ answer = rag.shape_answer_with_source(answer, sources)
64
+ else:
65
+ answer += f"\nNo sources found"
66
+
67
+ await cl.Message(content=answer, elements=elements, author="Assistant PAC").send()
68
+
69
+
rag_module.py ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ #load & split data
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ # embed data
5
+ from langchain_mistralai import MistralAIEmbeddings
6
+ # vector store
7
+ from langchain_community.vectorstores import FAISS
8
+ # prompt
9
+ from langchain.prompts import PromptTemplate
10
+ # memory
11
+ from langchain.memory import ConversationBufferMemory
12
+ #llm
13
+ from langchain_mistralai.chat_models import ChatMistralAI
14
+
15
+ #chain modules
16
+ from langchain.chains import RetrievalQA
17
+ from langchain.embeddings import CacheBackedEmbeddings
18
+ from langchain.storage import LocalFileStore
19
+
20
+ from langchain_community.document_loaders import PyPDFLoader
21
+
22
+ # import PyPDF2
23
+ import os
24
+ import re
25
+ from dotenv import load_dotenv
26
+ load_dotenv()
27
+ from collections import defaultdict
28
+
29
+ api_key = os.environ.get("MISTRAL_API_KEY")
30
+
31
+ def extract_pdfs_from_folder(folder_path):
32
+ pdf_files = []
33
+ for file_name in os.listdir(folder_path):
34
+ if file_name.endswith(".pdf"):
35
+ pdf_files.append(os.path.join(folder_path, file_name))
36
+
37
+ extracted_texts = []
38
+ for pdf_file in pdf_files:
39
+ loader = PyPDFLoader(pdf_file)
40
+ pages = loader.load()
41
+ extracted_texts += pages
42
+
43
+ return extracted_texts
44
+
45
+ class RagModule():
46
+ def __init__(self):
47
+ self.mistral_api_key = api_key
48
+ self.model_name_embedding = "mistral-embed"
49
+ print(f"API KEY:, {self.mistral_api_key}")
50
+ self.embedding_model = MistralAIEmbeddings(model=self.model_name_embedding, mistral_api_key=self.mistral_api_key)
51
+
52
+ self.chunk_size = 1000
53
+ self.chunk_overlap = 120
54
+ self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=self.chunk_size, chunk_overlap=self.chunk_overlap)
55
+ self.db_faiss_path = "data/vector_store"
56
+ #params llm
57
+ self.llm_model = "mistral-small"
58
+ self.max_new_tokens = 512
59
+ self.top_p = 0.5
60
+ self.temperature = 0.1
61
+
62
+
63
+
64
+
65
+ def split_text(self, text:str) -> list:
66
+ """Split the text into chunk
67
+
68
+ Args:
69
+ text (str): _description_
70
+
71
+ Returns:
72
+ list: _description_
73
+ """
74
+ texts = self.text_splitter.split_text(text)
75
+ return texts
76
+
77
+ def get_metadata(self, texts:list) -> list:
78
+ """_summary_
79
+
80
+ Args:
81
+ texts (list): _description_
82
+
83
+ Returns:
84
+ list: _description_
85
+ """
86
+ metadatas = [{"source": f'Paragraphe: {i}'} for i in range(len(texts))]
87
+ return metadatas
88
+
89
+ def get_faiss_db(self):
90
+ """load local faiss vector store containing all embeddings
91
+
92
+ """
93
+ data = extract_pdfs_from_folder("./data/")
94
+
95
+ text_splitter = RecursiveCharacterTextSplitter(
96
+ chunk_size=1000,
97
+ chunk_overlap=100
98
+ )
99
+
100
+ chunked_documents = text_splitter.split_documents(data)
101
+ embedding_model = MistralAIEmbeddings(model=self.model_name_embedding, mistral_api_key=self.mistral_api_key)
102
+
103
+
104
+ store = LocalFileStore("./cache/")
105
+ embedder = CacheBackedEmbeddings.from_bytes_store(embedding_model, store, namespace=embedding_model.model)
106
+
107
+ vector_store = FAISS.from_documents(chunked_documents, embedder)
108
+ vector_store.save_local("faiss_index")
109
+
110
+ return vector_store
111
+
112
+ def set_custom_prompt(self, prompt_template:str):
113
+ """Instantiate prompt template for Q&A retreival for each vectore stores
114
+
115
+ Args:
116
+ prompt_template (str): description of the prompt
117
+ input_variables (list): variables in the prompt
118
+ """
119
+ prompt = PromptTemplate.from_template(
120
+ template=prompt_template,
121
+ )
122
+
123
+ return prompt
124
+
125
+ def load_mistral(self):
126
+ """instantiate LLM
127
+ """
128
+
129
+ model_kwargs = {
130
+ "mistral_api_key": self.mistral_api_key,
131
+ "model": self.llm_model,
132
+ "max_new_tokens": self.max_new_tokens,
133
+ "top_p": self.top_p,
134
+ "temperature": self.temperature,
135
+ }
136
+
137
+ llm = ChatMistralAI(**model_kwargs)
138
+
139
+ return llm
140
+
141
+ def retrieval_qa_memory_chain(self, db, prompt_template):
142
+ """_summary_
143
+ """
144
+ llm = self.load_mistral()
145
+ prompt = self.set_custom_prompt(prompt_template)
146
+ memory = ConversationBufferMemory(
147
+ memory_key = 'history',
148
+ input_key = 'question'
149
+ )
150
+ chain_type_kwargs= {
151
+ "prompt" : prompt,
152
+ "memory" : memory
153
+ }
154
+
155
+ qa_chain = RetrievalQA.from_chain_type(
156
+ llm = llm,
157
+ chain_type = 'stuff',
158
+ retriever = db.as_retriever(search_kwargs={"k":5}),
159
+ chain_type_kwargs = chain_type_kwargs,
160
+ return_source_documents = True,
161
+ )
162
+
163
+ return qa_chain
164
+
165
+ def retrieval_qa_chain(self, db, prompt_template):
166
+ """_summary_
167
+ """
168
+ llm = self.load_llm()
169
+ prompt = self.set_custom_prompt(prompt_template)
170
+
171
+ chain_type_kwargs= {
172
+ "prompt" : prompt,
173
+ }
174
+
175
+ qa_chain = RetrievalQA.from_chain_type(
176
+ llm = llm,
177
+ chain_type = 'stuff',
178
+ retriever = db.as_retriever(search_kwargs={"k":3}),
179
+ chain_type_kwargs = chain_type_kwargs,
180
+ return_source_documents = True,
181
+ )
182
+
183
+ return qa_chain
184
+
185
+
186
+
187
+ def get_sources_document(self, source_documents:list) -> dict:
188
+ """generate dictionnary with path (as a key) and list of pages associated to one path
189
+
190
+ Args:
191
+ source_document (list): list of documents containing source_document of rag response
192
+
193
+ Returns:
194
+ dict: {
195
+ path/to/file1 : [0, 1, 3],
196
+ path/to/file2 : [5, 2]
197
+ }
198
+ """
199
+ sources = defaultdict(list)
200
+ for doc in source_documents:
201
+ sources[doc.metadata["source"]].append(doc.metadata["page"])
202
+
203
+ return sources
204
+
205
+ def shape_answer_with_source(self, answer: str, sources: dict):
206
+ """_summary_
207
+
208
+ Args:
209
+ answer (str): _description_
210
+ source (dict): _description_
211
+ """
212
+ pattern = r"^(.+)\/([^\/]+)$"
213
+
214
+ source_msg = ""
215
+ for path, page in sources.items():
216
+ file = re.findall(pattern, path)[0][1]
217
+ source_msg += f"\nFichier: {file} - Page: {page}"
218
+
219
+ answer += f"\n{source_msg}"
220
+
221
+ return answer
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ langchain==0.1.7
2
+ langchain-core==0.1.23
3
+ langchain-mistralai==0.0.4
4
+ langchain-community==0.0.20
5
+ faiss-cpu==1.7.4
6
+ python-dotenv==1.0.1
7
+ chainlit
8
+ openai
9
+ pypdf==4.0.2