Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
gr.load("models/bigcode/starcoder2-7b").launch() | |
checkpoint = "bigcode/starcoder2-7b" | |
device = "cuda" # for GPU usage or "cpu" for CPU usage | |
tokenizer = AutoTokenizer.from_pretrained(checkpoint) | |
# for multiple GPUs install accelerate and do `model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto")` | |
model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device) | |
inputs = tokenizer.encode("def print_hello_world():", return_tensors="pt").to(device) | |
outputs = model.generate(inputs) | |
print(tokenizer.decode(outputs[0])) |