|
import os |
|
import spaces |
|
import torch |
|
import transformers |
|
import gradio as gr |
|
|
|
access_token = os.environ["HF_ACCESS_TOKEN"] |
|
model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct" |
|
|
|
pipeline = transformers.pipeline( |
|
"text-generation", |
|
model=model_id, |
|
model_kwargs={"torch_dtype": torch.bfloat16}, |
|
device_map="auto", |
|
token=access_token |
|
) |
|
|
|
@spaces.GPU |
|
def generate_text(input_text): |
|
|
|
prompt = {"role": "system", "content": "Summarize the following: "+input_text} |
|
output = pipeline(prompt, |
|
max_new_tokens=256, |
|
) |
|
return output |
|
|
|
|
|
iface = gr.Interface( |
|
fn=generate_text, |
|
inputs=gr.Textbox(placeholder="Enter prompt..."), |
|
outputs="text", |
|
title="LLaMA 3 8B Text Generation" |
|
) |
|
|
|
iface.launch(server_name="0.0.0.0", server_port=7860) |