Spaces:

Piloterra
/

ASSISTANT_PAC

Sleeping

App Files Files Community

Ilyas KHIAT commited on May 20, 2024

Commit

e346593

1 Parent(s): bdd1430

test

Browse files

Files changed (8) hide show

.chainlit/config.toml +97 -0
.chainlit/translations/en-US.json +155 -0
.chainlit/translations/pt-BR.json +155 -0
Dockerfile +26 -0
chainlit.md +10 -0
rag_app.py +69 -0
rag_module.py +221 -0
requirements.txt +9 -0

.chainlit/config.toml ADDED Viewed

	@@ -0,0 +1,97 @@

+[project]
+# Whether to enable telemetry (default: true). No personal data is collected.
+enable_telemetry = true
+# List of environment variables to be provided by each user to use the app.
+user_env = []
+# Duration (in seconds) during which the session is saved when the connection is lost
+session_timeout = 3600
+# Enable third parties caching (e.g LangChain cache)
+cache = false
+# Authorized origins
+allow_origins = ["*"]
+# Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
+# follow_symlink = false
+[features]
+# Show the prompt playground
+prompt_playground = true
+# Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript)
+unsafe_allow_html = false
+# Process and display mathematical expressions. This can clash with "$" characters in messages.
+latex = false
+# Authorize users to upload files with messages
+multi_modal = true
+# Allows user to use speech to text
+[features.speech_to_text]
+    enabled = false
+    # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
+    # language = "en-US"
+[UI]
+# Name of the app and chatbot.
+name = "Chatbot"
+# Show the readme while the thread is empty.
+show_readme_as_default = true
+# Description of the app and chatbot. This is used for HTML tags.
+# description = ""
+# Large size content are by default collapsed for a cleaner ui
+default_collapse_content = true
+# The default value for the expand messages settings.
+default_expand_messages = false
+# Hide the chain of thought details from the user in the UI.
+hide_cot = false
+# Link to your github repo. This will add a github button in the UI's header.
+# github = ""
+# Specify a CSS file that can be used to customize the user interface.
+# The CSS file can be served from the public directory or via an external link.
+# custom_css = "/public/test.css"
+# Specify a Javascript file that can be used to customize the user interface.
+# The Javascript file can be served from the public directory.
+# custom_js = "/public/test.js"
+# Specify a custom font url.
+# custom_font = "https://fonts.googleapis.com/css2?family=Inter:wght@400;500;700&display=swap"
+# Override default MUI light theme. (Check theme.ts)
+[UI.theme]
+    #font_family = "Inter, sans-serif"
+[UI.theme.light]
+    #background = "#FAFAFA"
+    #paper = "#FFFFFF"
+    [UI.theme.light.primary]
+        #main = "#F80061"
+        #dark = "#980039"
+        #light = "#FFE7EB"
+# Override default MUI dark theme. (Check theme.ts)
+[UI.theme.dark]
+    #background = "#FAFAFA"
+    #paper = "#FFFFFF"
+    [UI.theme.dark.primary]
+        #main = "#F80061"
+        #dark = "#980039"
+        #light = "#FFE7EB"
+[meta]
+generated_by = "1.0.301"

.chainlit/translations/en-US.json ADDED Viewed

	@@ -0,0 +1,155 @@

+{
+    "components": {
+        "atoms": {
+            "buttons": {
+                "userButton": {
+                    "menu": {
+                        "settings": "Settings",
+                        "settingsKey": "S",
+                        "APIKeys": "API Keys",
+                        "logout": "Logout"
+                    }
+                }
+            }
+        },
+        "molecules": {
+            "newChatButton": {
+                "newChat": "New Chat"
+            },
+            "tasklist": {
+                "TaskList": {
+                    "title": "\ud83d\uddd2\ufe0f Task List",
+                    "loading": "Loading...",
+                    "error": "An error occured"
+                }
+            },
+            "attachments": {
+                "cancelUpload": "Cancel upload",
+                "removeAttachment": "Remove attachment"
+            },
+            "newChatDialog": {
+                "createNewChat": "Create new chat?",
+                "clearChat": "This will clear the current messages and start a new chat.",
+                "cancel": "Cancel",
+                "confirm": "Confirm"
+            },
+            "settingsModal": {
+                "expandMessages": "Expand Messages",
+                "hideChainOfThought": "Hide Chain of Thought",
+                "darkMode": "Dark Mode"
+            }
+        },
+        "organisms": {
+            "chat": {
+                "history": {
+                    "index": {
+                        "lastInputs": "Last Inputs",
+                        "noInputs": "Such empty...",
+                        "loading": "Loading..."
+                    }
+                },
+                "inputBox": {
+                    "input": {
+                        "placeholder": "Type your message here..."
+                    },
+                    "speechButton": {
+                        "start": "Start recording",
+                        "stop": "Stop recording"
+                    },
+                    "SubmitButton": {
+                        "sendMessage": "Send message",
+                        "stopTask": "Stop Task"
+                    },
+                    "UploadButton": {
+                        "attachFiles": "Attach files"
+                    },
+                    "waterMark": {
+                        "text": "Built with"
+                    }
+                },
+                "Messages": {
+                    "index": {
+                        "running": "Running",
+                        "executedSuccessfully": "executed successfully",
+                        "failed": "failed",
+                        "feedbackUpdated": "Feedback updated",
+                        "updating": "Updating"
+                    }
+                },
+                "dropScreen": {
+                    "dropYourFilesHere": "Drop your files here"
+                },
+                "index": {
+                    "failedToUpload": "Failed to upload",
+                    "cancelledUploadOf": "Cancelled upload of",
+                    "couldNotReachServer": "Could not reach the server",
+                    "continuingChat": "Continuing previous chat"
+                },
+                "settings": {
+                    "settingsPanel": "Settings panel",
+                    "reset": "Reset",
+                    "cancel": "Cancel",
+                    "confirm": "Confirm"
+                }
+            },
+            "threadHistory": {
+                "sidebar": {
+                    "filters": {
+                        "FeedbackSelect": {
+                            "feedbackAll": "Feedback: All",
+                            "feedbackPositive": "Feedback: Positive",
+                            "feedbackNegative": "Feedback: Negative"
+                        },
+                        "SearchBar": {
+                            "search": "Search"
+                        }
+                    },
+                    "DeleteThreadButton": {
+                        "confirmMessage": "This will delete the thread as well as it's messages and elements.",
+                        "cancel": "Cancel",
+                        "confirm": "Confirm",
+                        "deletingChat": "Deleting chat",
+                        "chatDeleted": "Chat deleted"
+                    },
+                    "index": {
+                        "pastChats": "Past Chats"
+                    },
+                    "ThreadList": {
+                        "empty": "Empty..."
+                    },
+                    "TriggerButton": {
+                        "closeSidebar": "Close sidebar",
+                        "openSidebar": "Open sidebar"
+                    }
+                },
+                "Thread": {
+                    "backToChat": "Go back to chat",
+                    "chatCreatedOn": "This chat was created on"
+                }
+            },
+            "header": {
+                "chat": "Chat",
+                "readme": "Readme"
+            }
+        }
+    },
+    "hooks": {
+        "useLLMProviders": {
+            "failedToFetchProviders": "Failed to fetch providers:"
+        }
+    },
+    "pages": {
+        "Design": {},
+        "Env": {
+            "savedSuccessfully": "Saved successfully",
+            "requiredApiKeys": "Required API Keys",
+            "requiredApiKeysInfo": "To use this app, the following API keys are required. The keys are stored on your device's local storage."
+        },
+        "Page": {
+            "notPartOfProject": "You are not part of this project."
+        },
+        "ResumeButton": {
+            "resumeChat": "Resume Chat"
+        }
+    }
+}

.chainlit/translations/pt-BR.json ADDED Viewed

	@@ -0,0 +1,155 @@

+{
+    "components": {
+        "atoms": {
+            "buttons": {
+                "userButton": {
+                    "menu": {
+                        "settings": "Configura\u00e7\u00f5es",
+                        "settingsKey": "S",
+                        "APIKeys": "Chaves de API",
+                        "logout": "Sair"
+                    }
+                }
+            }
+        },
+        "molecules": {
+            "newChatButton": {
+                "newChat": "Nova Conversa"
+            },
+            "tasklist": {
+                "TaskList": {
+                    "title": "\ud83d\uddd2\ufe0f Lista de Tarefas",
+                    "loading": "Carregando...",
+                    "error": "Ocorreu um erro"
+                }
+            },
+            "attachments": {
+                "cancelUpload": "Cancelar envio",
+                "removeAttachment": "Remover anexo"
+            },
+            "newChatDialog": {
+                "createNewChat": "Criar novo chat?",
+                "clearChat": "Isso limpar\u00e1 as mensagens atuais e iniciar\u00e1 uma nova conversa.",
+                "cancel": "Cancelar",
+                "confirm": "Confirmar"
+            },
+            "settingsModal": {
+                "expandMessages": "Expandir Mensagens",
+                "hideChainOfThought": "Esconder Sequ\u00eancia de Pensamento",
+                "darkMode": "Modo Escuro"
+            }
+        },
+        "organisms": {
+            "chat": {
+                "history": {
+                    "index": {
+                        "lastInputs": "\u00daltimas Entradas",
+                        "noInputs": "Vazio...",
+                        "loading": "Carregando..."
+                    }
+                },
+                "inputBox": {
+                    "input": {
+                        "placeholder": "Digite sua mensagem aqui..."
+                    },
+                    "speechButton": {
+                        "start": "Iniciar grava\u00e7\u00e3o",
+                        "stop": "Parar grava\u00e7\u00e3o"
+                    },
+                    "SubmitButton": {
+                        "sendMessage": "Enviar mensagem",
+                        "stopTask": "Parar Tarefa"
+                    },
+                    "UploadButton": {
+                        "attachFiles": "Anexar arquivos"
+                    },
+                    "waterMark": {
+                        "text": "Constru\u00eddo com"
+                    }
+                },
+                "Messages": {
+                    "index": {
+                        "running": "Executando",
+                        "executedSuccessfully": "executado com sucesso",
+                        "failed": "falhou",
+                        "feedbackUpdated": "Feedback atualizado",
+                        "updating": "Atualizando"
+                    }
+                },
+                "dropScreen": {
+                    "dropYourFilesHere": "Solte seus arquivos aqui"
+                },
+                "index": {
+                    "failedToUpload": "Falha ao enviar",
+                    "cancelledUploadOf": "Envio cancelado de",
+                    "couldNotReachServer": "N\u00e3o foi poss\u00edvel conectar ao servidor",
+                    "continuingChat": "Continuando o chat anterior"
+                },
+                "settings": {
+                    "settingsPanel": "Painel de Configura\u00e7\u00f5es",
+                    "reset": "Redefinir",
+                    "cancel": "Cancelar",
+                    "confirm": "Confirmar"
+                }
+            },
+            "threadHistory": {
+                "sidebar": {
+                    "filters": {
+                        "FeedbackSelect": {
+                            "feedbackAll": "Feedback: Todos",
+                            "feedbackPositive": "Feedback: Positivo",
+                            "feedbackNegative": "Feedback: Negativo"
+                        },
+                        "SearchBar": {
+                            "search": "Buscar"
+                        }
+                    },
+                    "DeleteThreadButton": {
+                        "confirmMessage": "Isso deletar\u00e1 a conversa, assim como suas mensagens e elementos.",
+                        "cancel": "Cancelar",
+                        "confirm": "Confirmar",
+                        "deletingChat": "Deletando conversa",
+                        "chatDeleted": "Conversa deletada"
+                    },
+                    "index": {
+                        "pastChats": "Conversas Anteriores"
+                    },
+                    "ThreadList": {
+                        "empty": "Vazio..."
+                    },
+                    "TriggerButton": {
+                        "closeSidebar": "Fechar barra lateral",
+                        "openSidebar": "Abrir barra lateral"
+                    }
+                },
+                "Thread": {
+                    "backToChat": "Voltar para a conversa",
+                    "chatCreatedOn": "Esta conversa foi criada em"
+                }
+            },
+            "header": {
+                "chat": "Conversa",
+                "readme": "Leia-me"
+            }
+        },
+        "hooks": {
+            "useLLMProviders": {
+                "failedToFetchProviders": "Falha ao buscar provedores:"
+            }
+        },
+        "pages": {
+            "Design": {},
+            "Env": {
+                "savedSuccessfully": "Salvo com sucesso",
+                "requiredApiKeys": "Chaves de API necess\u00e1rias",
+                "requiredApiKeysInfo": "Para usar este aplicativo, as seguintes chaves de API s\u00e3o necess\u00e1rias. As chaves s\u00e3o armazenadas localmente em seu dispositivo."
+            },
+            "Page": {
+                "notPartOfProject": "Voc\u00ea n\u00e3o faz parte deste projeto."
+            },
+            "ResumeButton": {
+                "resumeChat": "Continuar Conversa"
+            }
+        }
+    }
+}

Dockerfile ADDED Viewed

	@@ -0,0 +1,26 @@

+FROM python:3.10
+WORKDIR /code
+COPY ./requirements.txt /code/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+#RUN apt update && apt install -y ffmpeg
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH
+WORKDIR $HOME/app
+COPY --chown=user . $HOME/app
+#COPY . .
+#COPY .chainlit .chainlit
+CMD ["chainlit", "run", "rag_app.py", "--host", "0.0.0.0",  "--port", "7860"]
+# CMD ["ls", "-a"]

chainlit.md ADDED Viewed

	@@ -0,0 +1,10 @@

+# Bienvenue sur le chatbot ASSISTANT PAC 🚀🤖
+Bonjour et bienvenue,
+Je suis un agent intelligent pour vous aider à trouver les aides financières proposées par la PAC.
+Mes connaissances se basent sur la documentation officielle fournie par le Ministère de l'Agriculture et de la Souveraineté alimentaire sur la PAC 2023-2027.
+Vous retrouverez ainsi des informations concernant les aides découplées et les aides couplées.
+Posez votre question en fonction de votre statut et de votre spécialité.

rag_app.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import re
+import json
+import chainlit as cl
+from langchain_community.vectorstores import FAISS
+from rag_module import RagModule
+from collections import defaultdict
+prompt_template = """
+Tu t'appelles ASSISTANT PAC, agent intelligent spécialisé sur les aides financières agricoles, et ta mission est d'aider les agriculteurs (rices) et porteurs de projets agricoles à identifier les aides agricoles PAC disponibles.
+Tu comprends et génère les réponses en français, jamais en anglais.
+Merci de bien vouloir répondre aux questions en utilisant seulement le contexte suivant.
+contexte: {context}
+historique: {history}
+question: {question}
+réponse:
+"""
+##------------ CHAINLIT ---------------##
+@cl.on_chat_start
+async def start():
+    rag = RagModule()
+    db = rag.get_faiss_db()
+    qa_chain = rag.retrieval_qa_memory_chain(db, prompt_template)
+    msg = cl.Message(content="Lancement du bot...", author = "Assistant PAC")
+    await msg.send()
+    msg.content = "Bonjour et bienvenue sur le Chatbot spécialisé dans les aides de la PAC (Politique agricole commune). Posez directement votre question pour être conseillé ?"
+    await msg.update()
+    cl.user_session.set("chain", qa_chain)
+@cl.on_message
+async def main(message):
+    rag = RagModule()
+    chain = cl.user_session.get("chain")
+    cb = cl.AsyncLangchainCallbackHandler(
+        stream_final_answer = True,
+        answer_prefix_tokens=["FINAL", "ANSWER"]
+    )
+    cb.answer_reached=True
+    response = await chain.ainvoke(message.content, callbacks=[cb])
+    answer = response.get('result')
+    sources = rag.get_sources_document(response.get('source_documents'))
+    elements = [cl.Pdf(name = "Pdf", display ="inline", path = path) for path in sources]
+    if response.get('source_documents'):
+        answer = rag.shape_answer_with_source(answer, sources)
+    else:
+        answer += f"\nNo sources found"
+    await cl.Message(content=answer, elements=elements, author="Assistant PAC").send()

rag_module.py ADDED Viewed

	@@ -0,0 +1,221 @@

+#load & split data
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+# embed data
+from langchain_mistralai import MistralAIEmbeddings
+# vector store
+from langchain_community.vectorstores import FAISS
+# prompt
+from langchain.prompts import PromptTemplate
+# memory
+from langchain.memory import ConversationBufferMemory
+#llm
+from langchain_mistralai.chat_models import ChatMistralAI
+#chain modules
+from langchain.chains import RetrievalQA
+from langchain.embeddings import CacheBackedEmbeddings
+from langchain.storage import LocalFileStore
+from langchain_community.document_loaders import PyPDFLoader
+# import PyPDF2
+import os
+import re
+from dotenv import load_dotenv
+load_dotenv()
+from collections import defaultdict
+api_key = os.environ.get("MISTRAL_API_KEY")
+def extract_pdfs_from_folder(folder_path):
+    pdf_files = []
+    for file_name in os.listdir(folder_path):
+        if file_name.endswith(".pdf"):
+            pdf_files.append(os.path.join(folder_path, file_name))
+    extracted_texts = []
+    for pdf_file in pdf_files:
+        loader = PyPDFLoader(pdf_file)
+        pages = loader.load()
+        extracted_texts += pages
+    return extracted_texts
+class RagModule():
+    def __init__(self):
+        self.mistral_api_key = api_key
+        self.model_name_embedding = "mistral-embed"
+        print(f"API KEY:, {self.mistral_api_key}")
+        self.embedding_model = MistralAIEmbeddings(model=self.model_name_embedding, mistral_api_key=self.mistral_api_key)
+        self.chunk_size = 1000
+        self.chunk_overlap = 120
+        self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=self.chunk_size, chunk_overlap=self.chunk_overlap)
+        self.db_faiss_path = "data/vector_store"
+        #params llm
+        self.llm_model = "mistral-small"
+        self.max_new_tokens = 512
+        self.top_p = 0.5
+        self.temperature = 0.1
+    def split_text(self, text:str) -> list:
+        """Split the text into chunk
+        Args:
+            text (str): _description_
+        Returns:
+            list: _description_
+        """
+        texts = self.text_splitter.split_text(text)
+        return texts
+    def get_metadata(self, texts:list) -> list:
+        """_summary_
+        Args:
+            texts (list): _description_
+        Returns:
+            list: _description_
+        """
+        metadatas = [{"source": f'Paragraphe: {i}'} for i in range(len(texts))]
+        return metadatas
+    def get_faiss_db(self):
+        """load local faiss vector store containing all embeddings
+        """
+        data = extract_pdfs_from_folder("./data/")
+        text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=1000,
+            chunk_overlap=100
+        )
+        chunked_documents = text_splitter.split_documents(data)
+        embedding_model = MistralAIEmbeddings(model=self.model_name_embedding, mistral_api_key=self.mistral_api_key)
+        store = LocalFileStore("./cache/")
+        embedder = CacheBackedEmbeddings.from_bytes_store(embedding_model, store, namespace=embedding_model.model)
+        vector_store = FAISS.from_documents(chunked_documents, embedder)
+        vector_store.save_local("faiss_index")
+        return vector_store
+    def set_custom_prompt(self, prompt_template:str):
+        """Instantiate prompt template for Q&A retreival for each vectore stores
+        Args:
+            prompt_template (str): description of the prompt
+            input_variables (list): variables in the prompt
+        """
+        prompt = PromptTemplate.from_template(
+            template=prompt_template,
+            )
+        return prompt
+    def load_mistral(self):
+        """instantiate LLM
+        """
+        model_kwargs = {
+        "mistral_api_key": self.mistral_api_key,
+        "model": self.llm_model,
+        "max_new_tokens": self.max_new_tokens,
+        "top_p": self.top_p,
+        "temperature": self.temperature,
+        }
+        llm = ChatMistralAI(**model_kwargs)
+        return llm
+    def retrieval_qa_memory_chain(self, db, prompt_template):
+        """_summary_
+        """
+        llm = self.load_mistral()
+        prompt = self.set_custom_prompt(prompt_template)
+        memory = ConversationBufferMemory(
+            memory_key = 'history',
+            input_key = 'question'
+        )
+        chain_type_kwargs= {
+            "prompt" : prompt,
+            "memory" : memory
+            }
+        qa_chain = RetrievalQA.from_chain_type(
+            llm = llm,
+            chain_type = 'stuff',
+            retriever = db.as_retriever(search_kwargs={"k":5}),
+            chain_type_kwargs = chain_type_kwargs,
+            return_source_documents = True,
+            )
+        return qa_chain
+    def retrieval_qa_chain(self, db, prompt_template):
+        """_summary_
+        """
+        llm = self.load_llm()
+        prompt = self.set_custom_prompt(prompt_template)
+        chain_type_kwargs= {
+            "prompt" : prompt,
+            }
+        qa_chain = RetrievalQA.from_chain_type(
+            llm = llm,
+            chain_type = 'stuff',
+            retriever = db.as_retriever(search_kwargs={"k":3}),
+            chain_type_kwargs = chain_type_kwargs,
+            return_source_documents = True,
+            )
+        return qa_chain
+    def get_sources_document(self, source_documents:list) -> dict:
+        """generate dictionnary with path (as a key) and list of pages associated to one path
+        Args:
+            source_document (list): list of documents containing source_document of rag response
+        Returns:
+            dict: {
+                path/to/file1 : [0, 1, 3],
+                path/to/file2 : [5, 2]
+                }
+        """
+        sources = defaultdict(list)
+        for doc in source_documents:
+            sources[doc.metadata["source"]].append(doc.metadata["page"])
+        return sources
+    def shape_answer_with_source(self, answer: str, sources: dict):
+        """_summary_
+        Args:
+            answer (str): _description_
+            source (dict): _description_
+        """
+        pattern = r"^(.+)\/([^\/]+)$"
+        source_msg = ""
+        for path, page in sources.items():
+            file = re.findall(pattern, path)[0][1]
+            source_msg += f"\nFichier: {file} - Page: {page}"
+        answer += f"\n{source_msg}"
+        return answer

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+langchain==0.1.7
+langchain-core==0.1.23
+langchain-mistralai==0.0.4
+langchain-community==0.0.20
+faiss-cpu==1.7.4
+python-dotenv==1.0.1
+chainlit
+openai
+pypdf==4.0.2