how to use

from transformers import AutoModelForCausalLM, AutoTokenizer
import textwrap, time

MAX_NEW_TOKENS = 300
model_name = "acul3/bloomz-3b-Instruction"

model = AutoModelForCausalLM.from_pretrained(
  model_name,
  device_map='auto',
  load_in_8bit= True
)


def generate_text(text):
  tokenizer = AutoTokenizer.from_pretrained(model_name)
  text = "User: " + text + "\n\Asisten: "
  input_ids = tokenizer(text, return_tensors="pt").input_ids.to("cuda")
  generated_ids = model.generate(input_ids, max_length=MAX_NEW_TOKENS, pad_token_id=tokenizer.eos_token_id, do_sample=True, top_p=0.95, temperature=0.5, penalty_alpha=0.6, top_k=4, repetition_penalty=1.03, num_return_sequences=1)
  result = textwrap.wrap(tokenizer.decode(generated_ids[0], skip_special_tokens=True), width=128)
  result[0] = result[0].split("Asisten:")[-1]
  return "\n".join(result)

print(generate_text("cara merebus telur"))
Downloads last month
111
Inference Providers NEW
This model is not currently available via any of the supported Inference Providers.
The model cannot be deployed to the HF Inference API: The model has no pipeline_tag.