Uploaded model

  • Developed by: CLRafaelR
  • License: apache-2.0
  • Finetuned from model : llm-jp/llm-jp-3-13b

This llama model was trained 2x faster with Unsloth and Huggingface's TRL library.

ライセンス

cc-by-nc-sa

実行方法

必要パッケージのロード

get_ipython().system("pip install torch==2.2.1+cu121 torchvision --index-url https://download.pytorch.org/whl/cu121")
get_ipython().system(
    'pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"'
)
get_ipython().system('pip install --no-deps "xformers<0.0.26" --force-reinstall')
get_ipython().system('pip install flash-attn==2.6.3')
get_ipython().system("pip install schedulefree")
get_ipython().system("pip install ipywidgets --upgrade")
get_ipython().system("pip install langchain langchain-community langchain-huggingface faiss-cpu jq polars")


from unsloth import FastLanguageModel
from peft import PeftModel
import torch
import json
from tqdm import tqdm
import re
import gc
import datetime
from transformers.trainer_utils import set_seed
from datasets import load_dataset
import os
import getpass
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
import polars as pl
from langchain_community.document_loaders import HuggingFaceDatasetLoader
from langchain_community.vectorstores import FAISS
from pprint import pprint
from typing import List
from langchain_core.documents import Document
from langchain_core.runnables import chain
import time
from transformers import TextStreamer


if not os.environ.get("HF_TOKEN"):
    os.environ["HF_TOKEN"] = getpass.getpass(
        "Enter your Hugging Face API key: ",
    )
    HF_TOKEN = os.environ["HF_TOKEN"]


def flush():
    gc.collect()
    torch.cuda.empty_cache()
    torch.cuda.reset_peak_memory_stats()


set_seed(2024)

主機モデルのロード

model_id = "llm-jp/llm-jp-3-13b"
adapter_id = "CLRafaelR/llm-jp-3-13b-ogawa-brewery"

dtype = None  # Noneにしておけば自動で設定
load_in_4bit = True  # 今回は13Bモデルを扱うためTrue

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_id,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
    trust_remote_code=True,
)

model = PeftModel.from_pretrained(model, adapter_id, token=HF_TOKEN)

RAG構築

下記を参考に作成しました

llm-book/chapter13/13-3-2-rag-instruct-langchain.ipynb at main · ghmagazine/llm-book https://github.com/ghmagazine/llm-book/blob/main/chapter13/13-3-2-rag-instruct-langchain.ipynb

埋め込みモデルのロード

# Hugging Face Hubにおけるモデル名を指定
embedding_model_name = "pkshatech/GLuCoSE-base-ja-v2"

# モデル名からEmbedding Modelを初期化
embedding_model = HuggingFaceEmbeddings(
    model_name=embedding_model_name,
    model_kwargs={
        "model_kwargs": {
            "torch_dtype": torch.float16,
            # "device": "cuda",
        }
    },
    encode_kwargs={"normalize_embeddings": False},
)

ベクトルデータベースの構築

data_name = "elyza/ELYZA-tasks-100"

ELYZA_tasks_100 = load_dataset(data_name)["test"]

loader = HuggingFaceDatasetLoader(
    data_name,
    "input",
)

documents = loader.load()

vectorstore = FAISS.from_documents(
    documents,
    embedding_model,
    normalize_L2=True,
)


@chain
def retriever(
    query: str,
    k: int = 4,
    score_threshold=0.8,
) -> List[Document]:
    docs, scores = zip(
        *vectorstore.similarity_search_with_relevance_scores(
            query,
            k=k,
            kwargs={
                "score_threshold": score_threshold,
            },
        )
    )
    filtered_docs = []
    for doc, score in zip(docs, scores):
        if score > score_threshold:
            doc.metadata["score"] = score
            print(round(score, 3))
            filtered_docs.append(doc)
    return filtered_docs


retrieved_documents = retriever.invoke(
    "IMEとして機能してください",
    # k=1,
    score_threshold=0.45,
)

評価用データセットの読み込み

datasets = []
with open("../confidential/data/elyza-tasks-100-TV_0.jsonl", "r") as f:
    item = ""
    for line in f:
        line = line.strip()
        item += line
        if item.endswith("}"):
            datasets.append(json.loads(item))
            item = ""

推論

# 推論するためにモデルのモードを変更
FastLanguageModel.for_inference(model)

streamer = TextStreamer(
    tokenizer,
    skip_prompt=True,
    skip_special_tokens=True,
)

results = []
start_time = time.time()
for dt in tqdm(datasets):
    input = dt["input"]

    raw_shots = retriever.invoke(
        input,
        k=1,
        score_threshold=0.45,
    )

    if not raw_shots:
        # ELYZA-tasks-100(オリジナル版)に、いま解こうとしている問題の類似問題がなかった場合
        prompt_inst_answer = f"""### 指示\n\n{input}\n\n### 回答\n\n"""
        # prompt_inst_answer = f"""### 指示\n\n下のタスクへの最終回答に必要な<思考過程>を順序だてて考え、3つの番号付き箇条書きだけで出力してください。\n\nその後で、<最終回答>を出力してください。\n\n### タスク\n\n{input}\n\n### 回答\n\n<思考過程><最終回答>に必要な思考過程3点です。\n\n1. """
    else:
        # ELYZA-tasks-100(オリジナル版)に、いま解こうとしている問題の類似問題があった場合
        shots = []
        for i, raw_shot in enumerate(raw_shots):
            shot = f"""### タスク{i + 1}\n\n{raw_shot.page_content.encode().decode('unicode-escape')}\n\n### タスク{i + 1}の回答\n\n{raw_shot.metadata['output']}"""
            shots.append(shot)
        formatted_shots = "\n\n".join(shots)
        num_shots = len(shots)
        # print(formatted_shots, "\n\n", "=" * 10)
        prompt_inst_answer = f"""### 指示\n\n{input}\n\n### 回答\n\n"""
        # prompt_inst_answer = f"""### 指示\n\n以下の類似したタスクを解いてください。\n\n{formatted_shots}\n\n### タスク{num_shots + 1}\n\n{input}\n\n### タスク{num_shots + 1}の回答\n\n先に解いたタスクと同じ方法で、順序立てて考えます。"""

    print(
        "=" * 16,
        "\n\n",
        prompt_inst_answer,
    )

    inputs = tokenizer(
        [prompt_inst_answer],
        return_tensors="pt",
    ).to(model.device)

    outputs = model.generate(
        **inputs,
        max_new_tokens=512,
        use_cache=True,
        do_sample=False,
        # do_sample=True,
        # num_beams=5,
        repetition_penalty=1.2,
        streamer=streamer,
    )

    prediction = tokenizer.decode(
        outputs[0][inputs.input_ids.shape[-1] :],
        skip_special_tokens=True,
    )

    results.append(
        {
            "task_id": dt["task_id"],
            "input": input,
            "output": prediction,
        }
    )

    flush()

    print("-" * 16)
end_time = time.time()

elapsed_time = datetime.timedelta(seconds=end_time - start_time)
print(f"{elapsed_time} elapsed.")

jsonlファイルとして実行結果を保存

file_name = f"./{adapter_id.split('/')[1]}_output_{datetime.datetime.now(datetime.timezone(datetime.timedelta(hours=9))).strftime('%Y%m%d_%H%M')}"

with open(
    f"{file_name}.jsonl",
    "w",
    encoding="utf-8",
) as f:
    for result in results:
        json.dump(result, f, ensure_ascii=False)
        f.write("\n")

# polarsデータフレームを作成
df = pl.DataFrame(results)

# データフレームをxlsxファイルとして出力
df.write_excel(f"{file_name}.xlsx")
Downloads last month

-

Downloads are not tracked for this model. How to track
Inference API
Unable to determine this model’s pipeline type. Check the docs .

Model tree for CLRafaelR/llm-jp-3-13b-ogawa-brewery

Finetuned
(1141)
this model