Spaces:

DHEIVER
/

Medchat-Mixtral-Chat

Sleeping

App Files Files Community

DHEIVER commited on May 4, 2024

Commit

10f4f78

verified ·

1 Parent(s): 9248299

Update app.py

Browse files

Files changed (1) hide show

app.py +96 -95

app.py CHANGED Viewed

@@ -1,105 +1,106 @@
 from huggingface_hub import InferenceClient
 import gradio as gr
-class MixtralChatInterface:
-    def __init__(self, model_name):
-        self.client = InferenceClient(model_name)
-        self.additional_inputs=[
-            gr.Textbox(
-                label="System Prompt",
-                max_lines=1,
-                interactive=True,
-            ),
-            gr.Slider(
-                label="Temperature",
-                value=0.9,
-                minimum=0.0,
-                maximum=1.0,
-                step=0.05,
-                interactive=True,
-                info="Higher values produce more diverse outputs",
-            ),
-            gr.Slider(
-                label="Max new tokens",
-                value=256,
-                minimum=0,
-                maximum=1048,
-                step=64,
-                interactive=True,
-                info="The maximum numbers of new tokens",
-            ),
-            gr.Slider(
-                label="Top-p (nucleus sampling)",
-                value=0.90,
-                minimum=0.0,
-                maximum=1,
-                step=0.05,
-                interactive=True,
-                info="Higher values sample more low-probability tokens",
-            ),
-            gr.Slider(
-                label="Repetition penalty",
-                value=1.2,
-                minimum=1.0,
-                maximum=2.0,
-                step=0.05,
-                interactive=True,
-                info="Penalize repeated tokens",
-            )
-        ]
-        self.examples=[
-            ["I'm planning a vacation to Japan. Can you suggest a one-week itinerary including must-visit places and local cuisines to try?", None, None, None, None, None, ],
-            ["Can you write a short story about a time-traveling detective who solves historical mysteries?", None, None, None, None, None,],
-            ["I'm trying to learn French. Can you provide some common phrases that would be useful for a beginner, along with their pronunciations?", None, None, None, None, None,],
-            ["I have chicken, rice, and bell peppers in my kitchen. Can you suggest an easy recipe I can make with these ingredients?", None, None, None, None, None,],
-            ["Can you explain how the QuickSort algorithm works and provide a Python implementation?", None, None, None, None, None,],
-            ["What are some unique features of Rust that make it stand out compared to other systems programming languages like C++?", None, None, None, None, None,],
-        ]
-    def format_prompt(self, message, history):
-        prompt = "<s>"
-        for user_prompt, bot_response in history:
-            prompt += f"[INST] {user_prompt} [/INST]"
-            prompt += f" {bot_response}</s> "
-        prompt += f"[INST] {message} [/INST]"
-        return prompt
-    def generate(self, prompt, history, system_prompt, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0):
-        temperature = float(temperature)
-        if temperature < 1e-2:
-            temperature = 1e-2
-        top_p = float(top_p)
-        generate_kwargs = dict(
-            temperature=temperature,
-            max_new_tokens=max_new_tokens,
-            top_p=top_p,
-            repetition_penalty=repetition_penalty,
-            do_sample=True,
-            seed=42,
-        )
-        formatted_prompt = self.format_prompt(f"{system_prompt}, {prompt}", history)
-        stream = self.client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
-        output = ""
-        for response in stream:
-            output += response.token.text
-            yield output
-        return output
-    def launch(self):
-        gr.ChatInterface(
-            fn=self.generate,
-            chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
-            additional_inputs=self.additional_inputs,
-            title="Mixtral 46.7B",
-            examples=self.examples,
-            concurrency_limit=20,
-        ).launch(show_api=False)
-# Usage
-if __name__ == "__main__":
-    model_name = "mistralai/Mixtral-8x7B-Instruct-v0.1"
-    chat_interface = MixtralChatInterface(model_name)
-    chat_interface.launch()

 from huggingface_hub import InferenceClient
 import gradio as gr
+client = InferenceClient(
+    "mistralai/Mixtral-8x7B-Instruct-v0.1"
+)
+# Função para formatar a prompt de entrada com o histórico de diálogo
+def formatar_prompt(mensagem, historico):
+    prompt = "<s>"
+    for prompt_usuario, resposta_bot in historico:
+        prompt += f"[INST] {prompt_usuario} [/INST]"
+        prompt += f" {resposta_bot}</s> "
+    prompt += f"[INST] {mensagem} [/INST]"
+    return prompt
+# Função para gerar resposta do modelo
+def gerar(
+    prompt, historico, prompt_sistema, temperatura=0.9, max_tokens_novos=256, top_p=0.95, penalidade_repeticao=1.0,
+):
+    temperatura = float(temperatura)
+    if temperatura < 1e-2:
+        temperatura = 1e-2
+    top_p = float(top_p)
+    kwargs_geracao = dict(
+        temperature=temperatura,
+        max_new_tokens=max_tokens_novos,
+        top_p=top_p,
+        repetition_penalty=penalidade_repeticao,
+        do_sample=True,
+        seed=42,
+    )
+    prompt_formatada = formatar_prompt(f"{prompt_sistema}, {prompt}", historico)
+    stream = client.text_generation(prompt_formatada, **kwargs_geracao, stream=True, details=True, return_full_text=False)
+    output = ""
+    for resposta in stream:
+        output += resposta.token.text
+        yield output
+    return output
+# Inputs adicionais para o modelo
+inputs_adicionais=[
+    gr.Textbox(
+        label="Prompt do Sistema",
+        max_lines=1,
+        interactive=True,
+    ),
+    gr.Slider(
+        label="Temperatura",
+        value=0.9,
+        minimum=0.0,
+        maximum=1.0,
+        step=0.05,
+        interactive=True,
+        info="Valores mais altos produzem saídas mais diversas",
+    ),
+    gr.Slider(
+        label="Máximo de Novos Tokens",
+        value=256,
+        minimum=0,
+        maximum=1048,
+        step=64,
+        interactive=True,
+        info="O número máximo de novos tokens",
+    ),
+    gr.Slider(
+        label="Top-p (amostragem de núcleo)",
+        value=0.90,
+        minimum=0.0,
+        maximum=1,
+        step=0.05,
+        interactive=True,
+        info="Valores mais altos amostram mais tokens de baixa probabilidade",
+    ),
+    gr.Slider(
+        label="Penalidade de Repetição",
+        value=1.2,
+        minimum=1.0,
+        maximum=2.0,
+        step=0.05,
+        interactive=True,
+        info="Penalize tokens repetidos",
+    )
+]
+# Exemplos de prompts
+exemplos=[["Estou planejando férias no Japão. Você pode sugerir um itinerário de uma semana incluindo lugares imperdíveis e culinárias locais para experimentar?", None, None, None, None, None, ],
+          ["Você pode escrever uma história curta sobre um detetive viajante do tempo que resolve mistérios históricos?", None, None, None, None, None,],
+          ["Estou tentando aprender francês. Você pode fornecer algumas frases comuns que seriam úteis para um iniciante, juntamente com suas pronúncias?", None, None, None, None, None,],
+          ["Eu tenho frango, arroz e pimentões na minha cozinha. Você pode sugerir uma receita fácil que eu possa fazer com esses ingredientes?", None, None, None, None, None,],
+          ["Você pode explicar como o algoritmo QuickSort funciona e fornecer uma implementação em Python?", None, None, None, None, None,],
+          ["Quais são algumas características únicas do Rust que o destacam em comparação com outras linguagens de programação de sistemas como C ++?", None, None, None, None, None,],
+         ]
+# Interface do Chat
+gr.ChatInterface(
+    fn=gerar,
+    chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
+    additional_inputs=inputs_adicionais,
+    title="Mixtral 46.7B",
+    examples=exemplos,
+    concurrency_limit=20,
+).launch(show_api=False)