import os import time import spaces import torch from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer import gradio as gr from threading import Thread MODEL_LIST = ["mistralai/Mistral-Nemo-Instruct-2407"] HF_TOKEN = os.environ.get("HF_TOKEN", None) MODEL = os.environ.get("MODEL_ID") # filename: gradio_app.py import gradio as gr from huggingface_hub import InferenceClient # Initialize the InferenceClient client = InferenceClient( MODEL, token=HF_TOKEN, ) def chat_with_model(system_prompt, user_message): # Prepare messages for the chat completion messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_message} ] # Collect the response from the model response = "" for message in client.chat_completion( messages=messages, max_tokens=500, stream=True ): response += message.choices[0].delta.content return response # Create the Gradio interface iface = gr.Interface( fn=chat_with_model, inputs=[ gr.Textbox(label="System Prompt", placeholder="Enter the system prompt here..."), gr.Textbox(label="User Message", placeholder="Ask a question..."), ], outputs=gr.Textbox(label="Response"), title="Mistral Chatbot", description="Chat with Mistral model using your own system prompts." ) # Launch the app if __name__ == "__main__": iface.launch(show_api=True, share=False,show_error=True)