Uploaded model
- Developed by: ak0327
- License: apache-2.0
- Finetuned from model : llm-jp/llm-jp-3-13b
This llama model was trained 2x faster with Unsloth and Huggingface's TRL library.
How to use
def load_model(model_name):
# QLoRA config
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_use_double_quant=False,
)
# Load model
model = AutoModelForCausalLM.from_pretrained(
model_name,
quantization_config=bnb_config,
device_map="auto",
token=HF_TOKEN
)
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
model_name,
trust_remote_code=True,
token=HF_TOKEN
)
return model, tokenizer
def inference(datasets, model, tokenizer):
_results = []
for data in tqdm(datasets):
input = data["input"]
prompt = f"""### 指示
{input}
### 回ç”:
"""
encoded_input = tokenizer.encode_plus(
prompt,
add_special_tokens=False,
return_tensors="pt",
padding=True,
truncation=True,
).to(model.device)
tokenized_input = encoded_input["input_ids"]
attention_mask = encoded_input["attention_mask"]
with torch.no_grad():
outputs = model.generate(
tokenized_input,
attention_mask=attention_mask,
max_new_tokens=100,
do_sample=False,
repetition_penalty=1.2,
pad_token_id=tokenizer.pad_token_id
)[0]
output = tokenizer.decode(
outputs[tokenized_input.size(1):],
skip_special_tokens=True
)
_results.append({
"task_id": data["task_id"],
"input": input,
"output": output
})
return _results
model_name = "ak0327/llm-jp-3-13b-ft-5"
model, tokenizer = load_model(model_name)
datasets = load_test_datasets() # your datasets
results = inference(model_name, datasets, model, tokenizer)
- Downloads last month
- 6
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social
visibility and check back later, or deploy to Inference Endpoints (dedicated)
instead.
Model tree for ak0327/llm-jp-3-13b-ft-5
Base model
llm-jp/llm-jp-3-13b