import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer # Load model and tokenizer model_name = "ministral/Ministral-3b-instruct" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) def generate_response(prompt, max_length=400): inputs = tokenizer(prompt, return_tensors="pt") outputs = model.generate(**inputs, max_length=max_length) return tokenizer.decode(outputs[0], skip_special_tokens=True) def chat(message, history): history_text = "\n".join([f"Human: {h[0]}\nAI: {h[1]}" for h in history]) prompt = f"{history_text}\nHuman: {message}\nAI:" response = generate_response(prompt) return response # Create the Gradio interface iface = gr.ChatInterface( fn=chat, title="Ministral 3B Chat", description="Chat with the Ministral 3B model. Type your message below.", examples=[ ["Tell me a short story about a robot."], ["What are the benefits of exercise?"], ["Explain quantum computing in simple terms."] ], cache_examples=False ) # Launch the interface iface.launch()