Mixtral-Simple / app.py
TogetherAI's picture
Update app.py
c65c53a
raw
history blame
4.89 kB
from huggingface_hub import InferenceClient
import gradio as gr
client = InferenceClient(
"Mistral-7B-Instruct-v0.2"
)
def format_prompt(message, history):
prompt = "<s>You are an experienced Senior Javascript Developer Assistant, specialized in supporting the development of web applications with modern technologies. Your expertise includes:\\n\\n Next.js: A React framework for server-side rendering and static page generation.\\n Yarn: A fast, reliable and secure dependency manager.\\n Tailwind CSS and Tailwind UI: A utility-first CSS framework and collection of pre-built components. \\n Radix: A collection of UI components for building high quality, accessible design systems and web apps.\\n Huggingface, Replicate, Llama2 and everything related to LLM.\\n OpenAI API: An API for accessing powerful AI models from OpenAI.\\n Langchain JS: A Javascript client for the Langchain API that allows blockchain transactions to be written in natural language.\\n\\nIn your first interaction, ask for specific requirements of the development project. After you have received the information, proceed as follows:\\n\\nInquire. You ask up to five precise questions to obtain in-depth details about the project that are essential for technical implementation and support. You wait for the answers before proceeding.\\n\\n Describe the technical requirements. You list the technical challenges and requirements of the project to get an overview of the problems to be solved.\\n\\n Create a technical plan. You develop a comprehensive plan that describes the steps to implement the requirements using the mentioned technologies.\\n\\nAfter that, you offer different options on how the project can be further developed:\\n\\n/Discussion - You discuss the current state of the code and possible improvements or changes.\\n\\n/Code review - You conduct a code review to identify best practices and ensure that the code is clean and maintainable. \\n\\n/Structuring - You help to structure the application to create a solid basis for further development.\\n\\n/Debugging - You assist in debugging problems and find efficient solutions for bugs that occur.\\n\\n/Performance Optimization - You analyze the application for performance bottlenecks and suggest optimizations.\\n\\nAfter each step you can request additional information, which is marked with the instruction \\\"+context information\\\". You always include this context in the execution of the commands. After each response, you can use the options to continue the interaction. Write in German [/INST]</s>"
for user_prompt, bot_response in history:
prompt += f"[INST] {user_prompt} [/INST]"
prompt += f" {bot_response}</s> "
prompt += f"[INST] {message} [/INST]"
return prompt
def generate(
prompt, history, temperature=0.9, max_new_tokens=512, top_p=0.95, repetition_penalty=1.0,
):
temperature = float(temperature)
if temperature < 1e-2:
temperature = 1e-2
top_p = float(top_p)
generate_kwargs = dict(
temperature=temperature,
max_new_tokens=max_new_tokens,
top_p=top_p,
repetition_penalty=repetition_penalty,
do_sample=True,
seed=42,
)
formatted_prompt = format_prompt(prompt, history)
stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
output = ""
for response in stream:
output += response.token.text
yield output
return output
additional_inputs=[
gr.Slider(
label="Temperature",
value=0.9,
minimum=0.0,
maximum=1.0,
step=0.05,
interactive=True,
info="Higher values produce more diverse outputs",
),
gr.Slider(
label="Max new tokens",
value=512,
minimum=0,
maximum=1048,
step=64,
interactive=True,
info="The maximum numbers of new tokens",
),
gr.Slider(
label="Top-p (nucleus sampling)",
value=0.90,
minimum=0.0,
maximum=1,
step=0.05,
interactive=True,
info="Higher values sample more low-probability tokens",
),
gr.Slider(
label="Repetition penalty",
value=1.2,
minimum=1.0,
maximum=2.0,
step=0.05,
interactive=True,
info="Penalize repeated tokens",
)
]
css = """
#mkd {
height: 500px;
overflow: auto;
border: 1px solid #ccc;
}
"""
with gr.Blocks(css=css, theme="ParityError/Interstellar") as demo:
gr.HTML("<h1><center>AI Assistant<h1><center>")
gr.ChatInterface(
generate,
additional_inputs=additional_inputs,
examples=[["Was ist der Sinn des Lebens?"], ["Schreibe mir ein Rezept über Honigkuchenpferde"]]
)
demo.queue(concurrency_count=75, max_size=100).launch(debug=True)