|
import gradio as gr |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
|
|
|
model_name = "ministral/Ministral-3b-instruct" |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForCausalLM.from_pretrained(model_name) |
|
|
|
def generate_response(prompt, max_length=400): |
|
inputs = tokenizer(prompt, return_tensors="pt") |
|
outputs = model.generate(**inputs, max_length=max_length) |
|
return tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
def chat(message, history): |
|
history_text = "\n".join([f"Human: {h[0]}\nAI: {h[1]}" for h in history]) |
|
prompt = f"{history_text}\nHuman: {message}\nAI:" |
|
response = generate_response(prompt) |
|
return response |
|
|
|
|
|
iface = gr.ChatInterface( |
|
fn=chat, |
|
title="Ministral 3B Chat", |
|
description="Chat with the Ministral 3B model. Type your message below.", |
|
examples=[ |
|
["Tell me a short story about a robot."], |
|
["What are the benefits of exercise?"], |
|
["Explain quantum computing in simple terms."] |
|
], |
|
cache_examples=False |
|
) |
|
|
|
|
|
iface.launch() |
|
|