surveyia

Sleeping

File size: 5,934 Bytes

import os
import json
import bcrypt
import pandas as pd
import numpy as np
from typing import List
from pathlib import Path
from langchain_huggingface import HuggingFaceEndpoint
from langchain.schema.runnable.config import RunnableConfig
from langchain.schema import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate

from langchain.agents import AgentExecutor
from langchain.agents.agent_types import AgentType
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent, create_csv_agent

import chainlit as cl
from chainlit.input_widget import TextInput, Select, Switch, Slider

from deep_translator import GoogleTranslator

@cl.password_auth_callback
def auth_callback(username: str, password: str):
    auth = json.loads(os.environ['CHAINLIT_AUTH_LOGIN'])
    ident = next(d['ident'] for d in auth if d['ident'] == username)
    pwd = next(d['pwd'] for d in auth if d['ident'] == username)
    resultLogAdmin = bcrypt.checkpw(username.encode('utf-8'), bcrypt.hashpw(ident.encode('utf-8'), bcrypt.gensalt())) 
    resultPwdAdmin = bcrypt.checkpw(password.encode('utf-8'), bcrypt.hashpw(pwd.encode('utf-8'), bcrypt.gensalt())) 
    resultRole = next(d['role'] for d in auth if d['ident'] == username)
    if resultLogAdmin and resultPwdAdmin and resultRole == "admindatapcc":
        return cl.User(
            identifier=ident + " : 🧑‍💼 Admin Datapcc", metadata={"role": "admin", "provider": "credentials"}
        )
    elif resultLogAdmin and resultPwdAdmin and resultRole == "userdatapcc":
        return cl.User(
            identifier=ident + " : 🧑‍🎓 User Datapcc", metadata={"role": "user", "provider": "credentials"}
        )
        
def create_agent(filename: str):
    """
    Create an agent that can access and use a large language model (LLM).

    Args:
        filename: The path to the CSV file that contains the data.

    Returns:
        An agent that can access and use the LLM.
    """

    # Create an OpenAI object.
    os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.environ['HUGGINGFACEHUB_API_TOKEN']
    repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
    llm = HuggingFaceEndpoint(
        repo_id=repo_id, max_new_tokens=5300, temperature=0.1, task="text2text-generation", streaming=True
    )

    # Read the CSV file into a Pandas DataFrame.
    df = pd.read_csv(filename)

    # Create a Pandas DataFrame agent.
    return create_pandas_dataframe_agent(llm, df, verbose=False)

def query_agent(agent, query):
    """
    Query an agent and return the response as a string.

    Args:
        agent: The agent to query.
        query: The query to ask the agent.

    Returns:
        The response from the agent as a string.
    """

    prompt = (
        """
            For the following query, if it requires drawing a table, reply as follows:
            {"table": {"columns": ["column1", "column2", ...], "data": [[value1, value2, ...], [value1, value2, ...], ...]}}

            If the query requires creating a bar chart, reply as follows:
            {"bar": {"columns": ["A", "B", "C", ...], "data": [25, 24, 10, ...]}}

            If the query requires creating a line chart, reply as follows:
            {"line": {"columns": ["A", "B", "C", ...], "data": [25, 24, 10, ...]}}

            There can only be two types of chart, "bar" and "line".

            If it is just asking a question that requires neither, reply as follows:
            {"answer": "answer"}
            Example:
            {"answer": "The title with the highest rating is 'Gilead'"}

            If you do not know the answer, reply as follows:
            {"answer": "I do not know."}

            Return all output as a string.

            All strings in "columns" list and data list, should be in double quotes,

            For example: {"columns": ["title", "ratings_count"], "data": [["Gilead", 361], ["Spider's Web", 5164]]}

            Lets think step by step.

            Below is the query.
            Query: 
            """
        + query
    )

    # Run the prompt through the agent.
    response = agent.run(prompt)

    # Convert the response to a string.
    return response.__str__()
    
def decode_response(response: str) -> dict:
    """This function converts the string response from the model to a dictionary object.

    Args:
        response (str): response from the model

    Returns:
        dict: dictionary with response data
    """
    return json.loads(response)

def write_response(response_dict: dict):
    """
    Write a response from an agent to a Streamlit app.

    Args:
        response_dict: The response from the agent.

    Returns:
        None.
    """

    # Check if the response is an answer.
    await cl.Message(author="COPILOT",content=GoogleTranslator(source='auto', target='fr').translate(response_dict["answer"])).send()

@cl.set_chat_profiles
async def chat_profile():
    return [
        cl.ChatProfile(name="Traitement des données d'enquête : «Expé CFA : questionnaire auprès des professionnels de la branche de l'agencement»",markdown_description="Vidéo exploratoire autour de l'événement",icon="/public/logo-ofipe.png",),
    ]
    
@cl.set_starters
async def set_starters():
    return [
        cl.Starter(
            label="Répartition du nombre de CAA dans les entreprises",
            message="Quel est le nombre de chargé.e d'affaires en agencement dans chaque type d'entreprises?",
            icon="/public/request-theme.svg",
            )
    ]
    
@cl.on_message
async def on_message(message: cl.Message):
    await cl.Message(f"> SURVEYIA").send()
    agent = create_agent("./public/ExpeCFA_LP_CAA.csv")

    # Query the agent.
    response = query_agent(agent=agent, query=message.content)

    # Decode the response.
    decoded_response = decode_response(response)

    # Write the response to the Streamlit app.
    write_response(decoded_response)