Uploaded model

  • Developed by: HBD007
  • License: apache-2.0
  • Finetuned from model : llm-jp/llm-jp-3-13b

This llama model was trained 2x faster with Unsloth and Huggingface's TRL library.

Inference code

# 必要なライブラリをインポート
import json
from datetime import datetime
from pathlib import Path

import jsonlines
import torch
from huggingface_hub import get_token
from peft import PeftModel
from tqdm.notebook import tqdm
from transformers import pipeline
from transformers.pipelines.text_generation import TextGenerationPipeline
from unsloth import FastLanguageModel

# 定数の定義
HF_TOKEN = get_token()
PROMPT_TEMPLATE = "\n".join(
    [
        "### 指示",
        "{}",
        "### 回答",
        "{}",
    ]
)

# 変数の設定
base_model_id = "llm-jp/llm-jp-3-13b"  # 使用するベースモデル
adapter_id = "HBD007/llm-jp-3-13b-LLM2024-lora"  # LoRAアダプタ
input_data_path = Path("data/elyza-tasks-100-TV_0.jsonl")  # 入力データのパス
output_file_path = Path(
    f"inference_results-{datetime.now().strftime('%Y%m%d-%H%M%S')}.jsonl"
)

# データの読み込み
datasets_list = [obj for obj in jsonlines.open(input_data_path)]

# モデルとトークナイザーの読み込み
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=base_model_id,
    trust_remote_code=True,
    token=HF_TOKEN,
)

# LoRAアダプタのロード
model = PeftModel.from_pretrained(
    model,
    adapter_id,
    token=HF_TOKEN,
)

# 推論モードに設定
model = FastLanguageModel.for_inference(model)

# テキスト生成パイプラインの作成
generator: TextGenerationPipeline = pipeline(
    task="text-generation",
    model=model,
    tokenizer=tokenizer,
    use_cache=True,
    do_sample=False,
    repetition_penalty=1.2,
)

# GPUの状態を表示
if torch.cuda.is_available():
    gpu_stats = torch.cuda.get_device_properties(0)
    start_gpu_memory = round(torch.cuda.max_memory_reserved() / (1024**3), 3)
    max_memory = round(gpu_stats.total_memory / (1024**3), 3)
    print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
    print(f"{start_gpu_memory} GB of memory reserved.")

# 結果を生成
results = []
for dt in tqdm(datasets_list):
    input_text = dt["input"]
    task_id = dt["task_id"]
    generated = generator(
        text_inputs=PROMPT_TEMPLATE.format(input_text, ""),
        return_full_text=False,
    )
    results.append({
        "task_id": task_id,
        "input": input_text,
        "output": generated[0]["generated_text"],
    })

# 結果を保存
with output_file_path.open("w", encoding="utf-8") as f:
    for result in results:
        json.dump(result, f, ensure_ascii=False)
        f.write("\n")

print(f"Inference results saved to {output_file_path}")
Downloads last month

-

Downloads are not tracked for this model. How to track
Inference API
Unable to determine this model’s pipeline type. Check the docs .

Model tree for HBD007/llm-jp-3-13b-LLM2024-lora

Finetuned
(1145)
this model