import torch from transformers import AutoTokenizer, AutoModelForCausalLM import gradio as gr model_id = "MaxBlumenfeld/smollm2-135m-bootleg-instruct" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id) def generate_response(message, temperature=0.7, max_length=200): prompt = f"Human: {message}\nAssistant:" inputs = tokenizer(prompt, return_tensors="pt") with torch.no_grad(): outputs = model.generate( inputs.input_ids, max_length=max_length, temperature=temperature, do_sample=True, pad_token_id=tokenizer.eos_token_id ) response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response.split("Assistant:")[-1].strip() with gr.Blocks() as demo: gr.Markdown("# SmolLM2 Bootleg Instruct Chat") with gr.Row(): with gr.Column(): message = gr.Textbox(label="Message") temp = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, label="Temperature") max_len = gr.Slider(minimum=50, maximum=500, value=200, label="Max Length") submit = gr.Button("Send") with gr.Column(): output = gr.Textbox(label="Response") submit.click( generate_response, inputs=[message, temp, max_len], outputs=output ) if __name__ == "__main__": demo.launch()