Spaces:
Sleeping
Sleeping
File size: 1,390 Bytes
a18a9a0 3d0eb45 ccd2f6f a18a9a0 5b77242 5ad9875 ccd2f6f a18a9a0 5b77242 ccd2f6f 3d0eb45 5b77242 ccd2f6f 3d0eb45 5b77242 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch
import gradio as gr
from peft import PeftModel, PeftConfig
import spaces
# Use the GPU if available
device = 0 if torch.cuda.is_available() else -1
def load_model():
# Load the base model and tokenizer
base_model_name = "Qwen/Qwen2.5-1.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
base_model = AutoModelForCausalLM.from_pretrained(base_model_name)
# Load the PEFT adapter
peft_model = PeftModel.from_pretrained(
base_model,
"ombhojane/smile-small",
)
return pipeline(
"text-generation",
model=peft_model,
tokenizer=tokenizer,
device=device
)
pipe = load_model()
@spaces.GPU
def generate_response(message):
messages = [
{"role": "user", "content": message}
]
# Generate longer output text
generated_text = pipe(messages, max_new_tokens=200, num_return_sequences=1)
return generated_text[0]['generated_text']
# Create Gradio interface
demo = gr.Interface(
fn=generate_response,
inputs=gr.Textbox(lines=2, placeholder="Enter your message here..."),
outputs=gr.Textbox(lines=5),
title="Text Generation App",
description="Enter a prompt and get AI-generated text response"
)
# Launch the app
if __name__ == "__main__":
demo.launch() |