import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch TOKENIZER_REPO = "MediaTek-Research/Breeze-7B-Instruct-v1_0" tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_REPO,local_files_only=False,use_fast=True) model = AutoModelForCausalLM.from_pretrained( TOKENIZER_REPO, device_map="auto", local_files_only=False, torch_dtype=torch.bfloat16 ) def generate(text): chat_data = [] text = text.strip() if text: chat_data.append({"role": "system", "content": text}) outputs = model.generate(tokenizer.apply_chat_template(chat, return_tensors="pt"), max_new_tokens=128, top_p=0.01, top_k=85, repetition_penalty=1.1, temperature=0.01) print(tokenizer.decode(outputs[0])) return tokenizer.decode(outputs[0]) gradio_app = gr.Interface( generate, inputs=gr.Text(), outputs=gr.Text(), title="test", ) if __name__ == "__main__": gradio_app.launch()