|
import spaces |
|
import gradio as gr |
|
|
|
from airllm import HuggingFaceModelLoader, AutoModelForCausalLM |
|
|
|
model_loader = HuggingFaceModelLoader("meta-llama/Meta-Llama-3-8B-Instruct") |
|
model = AutoModelForCausalLM.from_pretrained(model_loader) |
|
|
|
@spaces.GPU |
|
def generate_text(input_text): |
|
input_ids = model.tokenizer.encode(input_text, return_tensors="pt") |
|
output = model.generate(input_ids, max_length=100) |
|
return model.tokenizer.decode(output[0]) |
|
|
|
|
|
iface = gr.Interface( |
|
fn=generate_text, |
|
inputs=gr.Textbox(placeholder="Enter prompt..."), |
|
outputs="text", |
|
title="LLaMA 3 70B Text Generation" |
|
) |
|
|
|
iface.launch(server_name="0.0.0.0", server_port=7860) |