# # Simple example. # import spaces from diffusers import DiffusionPipeline import os import torch from transformers import pipeline token = os.getenv("HUGGINGFACE_API_TOKEN") pipe = pipeline("text-generation", "meta-llama/Meta-Llama-3-8B-Instruct", torch_dtype=torch.bfloat16, device_map="auto", token=token) response = pipe(chat, max_new_tokens=512) pipe.to('cuda') @spaces.GPU def generate(prompt): r = response[0]['generated_text'][-1]['content'] return r gr.Interface( fn=generate, inputs=gr.Text(), outputs=gr.Text(), ).launch()