import torch from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline import gradio as gr # Load the model and tokenizer model_name = "NoaiGPT/merged-llama3-8b-instruct-1720894657" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) # Move model to GPU if available device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) # Create a text generation pipeline text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1) # Define the prediction function def generate_text(prompt): # Generate text using the pipeline outputs = text_generator(prompt, max_length=200, num_return_sequences=1) generated_text = outputs[0]["generated_text"] return generated_text # Define the Gradio interface interface = gr.Interface( fn=generate_text, inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."), outputs="text", title="LLaMA 3 Text Generation", description="Generate text using the LLaMA 3 model fine-tuned for instruction-following tasks." ) # Launch the interface interface.launch()