Spaces:

Kr08
/

Llama

Sleeping

File size: 808 Bytes

ce4b75d
686678a
e86ad04
 
951adef
 
d690585
6a968bc
951adef
6a968bc
 
 
 
 
fc6d403
6a968bc
951adef
686678a
 
6a968bc
498e38f
 
6a968bc
 
26a2377
951adef
 
686678a
 
 
 
6a968bc
951adef
 
686678a

import os
import spaces
import torch
import transformers
import gradio as gr

access_token = os.environ["HF_ACCESS_TOKEN"]
model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"

pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={"torch_dtype": torch.bfloat16},
    device_map="auto",
    token=access_token
)

@spaces.GPU
def generate_text(input_text):

    prompt = {"role": "system", "content": "Summarize the following: "+input_text}
    output = pipeline(prompt, 
                      max_new_tokens=256,
                     )
    return output


iface = gr.Interface(
    fn=generate_text, 
    inputs=gr.Textbox(placeholder="Enter prompt..."),
    outputs="text",
    title="LLaMA 3 8B Text Generation"
)

iface.launch(server_name="0.0.0.0", server_port=7860)