from transformers import AutoTokenizer, LlamaForCausalLM import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM model = LlamaForCausalLM.from_pretrained("we1kkk/chinese-llama-alpaca-plus-lora-7b") tokenizer = AutoTokenizer.from_pretrained("we1kkk/chinese-llama-alpaca-plus-lora-7b") def greet(prompt): inputs = tokenizer(prompt, return_tensors="pt") # Generate generate_ids = model.generate(inputs.input_ids, max_length=30) return tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] iface = gr.Interface(fn=greet, inputs="text", outputs="text") iface.launch()