Spaces:
Running
Running
import os | |
import time | |
import spaces | |
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer | |
import gradio as gr | |
from threading import Thread | |
MODEL_LIST = ["mistralai/Mistral-Nemo-Instruct-2407"] | |
HF_TOKEN = os.environ.get("HF_TOKEN", None) | |
MODEL = os.environ.get("MODEL_ID") | |
# filename: gradio_app.py | |
import gradio as gr | |
from huggingface_hub import InferenceClient | |
# Initialize the InferenceClient | |
client = InferenceClient( | |
MODEL, | |
token=HF_TOKEN, | |
) | |
def chat_with_model(system_prompt, user_message): | |
# Prepare messages for the chat completion | |
messages = [ | |
{"role": "system", "content": system_prompt}, | |
{"role": "user", "content": user_message} | |
] | |
# Collect the response from the model | |
response = "" | |
for message in client.chat_completion( | |
messages=messages, | |
max_tokens=500, | |
stream=True | |
): | |
response += message.choices[0].delta.content | |
return response | |
# Create the Gradio interface | |
iface = gr.Interface( | |
fn=chat_with_model, | |
inputs=[ | |
gr.Textbox(label="System Prompt", placeholder="Enter the system prompt here..."), | |
gr.Textbox(label="User Message", placeholder="Ask a question..."), | |
], | |
outputs=gr.Textbox(label="Response"), | |
title="Mistral Chatbot", | |
description="Chat with Mistral model using your own system prompts." | |
) | |
# Launch the app | |
if __name__ == "__main__": | |
iface.launch(show_api=True, share=False,show_error=True) |