from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline import torch import gradio as gr from peft import PeftModel, PeftConfig import spaces # Use the GPU if available device = 0 if torch.cuda.is_available() else -1 def load_model(): # Load the base model and tokenizer base_model_name = "Qwen/Qwen2.5-1.5B-Instruct" tokenizer = AutoTokenizer.from_pretrained(base_model_name) base_model = AutoModelForCausalLM.from_pretrained(base_model_name) # Load the PEFT adapter peft_model = PeftModel.from_pretrained( base_model, "ombhojane/smile-small", ) return pipeline( "text-generation", model=peft_model, tokenizer=tokenizer, device=device ) pipe = load_model() @spaces.GPU def generate_response(message): messages = [ {"role": "user", "content": message} ] # Generate longer output text generated_text = pipe(messages, max_new_tokens=200, num_return_sequences=1) return generated_text[0]['generated_text'] # Create Gradio interface demo = gr.Interface( fn=generate_response, inputs=gr.Textbox(lines=2, placeholder="Enter your message here..."), outputs=gr.Textbox(lines=5), title="Text Generation App", description="Enter a prompt and get AI-generated text response" ) # Launch the app if __name__ == "__main__": demo.launch()