Uploaded model
- Developed by: HBD007
- License: apache-2.0
- Finetuned from model : llm-jp/llm-jp-3-13b
This llama model was trained 2x faster with Unsloth and Huggingface's TRL library.
Inference code
# 必要なライブラリをインポート
import json
from datetime import datetime
from pathlib import Path
import jsonlines
import torch
from huggingface_hub import get_token
from peft import PeftModel
from tqdm.notebook import tqdm
from transformers import pipeline
from transformers.pipelines.text_generation import TextGenerationPipeline
from unsloth import FastLanguageModel
# 定数の定義
HF_TOKEN = get_token()
PROMPT_TEMPLATE = "\n".join(
[
"### 指示",
"{}",
"### 回答",
"{}",
]
)
# 変数の設定
base_model_id = "llm-jp/llm-jp-3-13b" # 使用するベースモデル
adapter_id = "HBD007/llm-jp-3-13b-LLM2024-lora" # LoRAアダプタ
input_data_path = Path("data/elyza-tasks-100-TV_0.jsonl") # 入力データのパス
output_file_path = Path(
f"inference_results-{datetime.now().strftime('%Y%m%d-%H%M%S')}.jsonl"
)
# データの読み込み
datasets_list = [obj for obj in jsonlines.open(input_data_path)]
# モデルとトークナイザーの読み込み
model, tokenizer = FastLanguageModel.from_pretrained(
model_name=base_model_id,
trust_remote_code=True,
token=HF_TOKEN,
)
# LoRAアダプタのロード
model = PeftModel.from_pretrained(
model,
adapter_id,
token=HF_TOKEN,
)
# 推論モードに設定
model = FastLanguageModel.for_inference(model)
# テキスト生成パイプラインの作成
generator: TextGenerationPipeline = pipeline(
task="text-generation",
model=model,
tokenizer=tokenizer,
use_cache=True,
do_sample=False,
repetition_penalty=1.2,
)
# GPUの状態を表示
if torch.cuda.is_available():
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / (1024**3), 3)
max_memory = round(gpu_stats.total_memory / (1024**3), 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")
# 結果を生成
results = []
for dt in tqdm(datasets_list):
input_text = dt["input"]
task_id = dt["task_id"]
generated = generator(
text_inputs=PROMPT_TEMPLATE.format(input_text, ""),
return_full_text=False,
)
results.append({
"task_id": task_id,
"input": input_text,
"output": generated[0]["generated_text"],
})
# 結果を保存
with output_file_path.open("w", encoding="utf-8") as f:
for result in results:
json.dump(result, f, ensure_ascii=False)
f.write("\n")
print(f"Inference results saved to {output_file_path}")
Model tree for HBD007/llm-jp-3-13b-LLM2024-lora
Base model
llm-jp/llm-jp-3-13b