Uploaded model
- Developed by: CLRafaelR
- License: apache-2.0
- Finetuned from model : llm-jp/llm-jp-3-13b
This llama model was trained 2x faster with Unsloth and Huggingface's TRL library.
ライセンス
cc-by-nc-sa
実行方法
必要パッケージのロード
get_ipython().system("pip install torch==2.2.1+cu121 torchvision --index-url https://download.pytorch.org/whl/cu121")
get_ipython().system(
'pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"'
)
get_ipython().system('pip install --no-deps "xformers<0.0.26" --force-reinstall')
get_ipython().system('pip install flash-attn==2.6.3')
get_ipython().system("pip install schedulefree")
get_ipython().system("pip install ipywidgets --upgrade")
get_ipython().system("pip install langchain langchain-community langchain-huggingface faiss-cpu jq polars")
from unsloth import FastLanguageModel
from peft import PeftModel
import torch
import json
from tqdm import tqdm
import re
import gc
import datetime
from transformers.trainer_utils import set_seed
from datasets import load_dataset
import os
import getpass
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
import polars as pl
from langchain_community.document_loaders import HuggingFaceDatasetLoader
from langchain_community.vectorstores import FAISS
from pprint import pprint
from typing import List
from langchain_core.documents import Document
from langchain_core.runnables import chain
import time
from transformers import TextStreamer
if not os.environ.get("HF_TOKEN"):
os.environ["HF_TOKEN"] = getpass.getpass(
"Enter your Hugging Face API key: ",
)
HF_TOKEN = os.environ["HF_TOKEN"]
def flush():
gc.collect()
torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()
set_seed(2024)
主機モデルのロード
model_id = "llm-jp/llm-jp-3-13b"
adapter_id = "CLRafaelR/llm-jp-3-13b-ogawa-brewery"
dtype = None # Noneにしておけば自動で設定
load_in_4bit = True # 今回は13Bモデルを扱うためTrue
model, tokenizer = FastLanguageModel.from_pretrained(
model_name=model_id,
dtype=dtype,
load_in_4bit=load_in_4bit,
trust_remote_code=True,
)
model = PeftModel.from_pretrained(model, adapter_id, token=HF_TOKEN)
RAG構築
下記を参考に作成しました
llm-book/chapter13/13-3-2-rag-instruct-langchain.ipynb at main · ghmagazine/llm-book https://github.com/ghmagazine/llm-book/blob/main/chapter13/13-3-2-rag-instruct-langchain.ipynb
埋め込みモデルのロード
# Hugging Face Hubにおけるモデル名を指定
embedding_model_name = "pkshatech/GLuCoSE-base-ja-v2"
# モデル名からEmbedding Modelを初期化
embedding_model = HuggingFaceEmbeddings(
model_name=embedding_model_name,
model_kwargs={
"model_kwargs": {
"torch_dtype": torch.float16,
# "device": "cuda",
}
},
encode_kwargs={"normalize_embeddings": False},
)
ベクトルデータベースの構築
data_name = "elyza/ELYZA-tasks-100"
ELYZA_tasks_100 = load_dataset(data_name)["test"]
loader = HuggingFaceDatasetLoader(
data_name,
"input",
)
documents = loader.load()
vectorstore = FAISS.from_documents(
documents,
embedding_model,
normalize_L2=True,
)
@chain
def retriever(
query: str,
k: int = 4,
score_threshold=0.8,
) -> List[Document]:
docs, scores = zip(
*vectorstore.similarity_search_with_relevance_scores(
query,
k=k,
kwargs={
"score_threshold": score_threshold,
},
)
)
filtered_docs = []
for doc, score in zip(docs, scores):
if score > score_threshold:
doc.metadata["score"] = score
print(round(score, 3))
filtered_docs.append(doc)
return filtered_docs
retrieved_documents = retriever.invoke(
"IMEとして機能してください",
# k=1,
score_threshold=0.45,
)
評価用データセットの読み込み
datasets = []
with open("../confidential/data/elyza-tasks-100-TV_0.jsonl", "r") as f:
item = ""
for line in f:
line = line.strip()
item += line
if item.endswith("}"):
datasets.append(json.loads(item))
item = ""
推論
# 推論するためにモデルのモードを変更
FastLanguageModel.for_inference(model)
streamer = TextStreamer(
tokenizer,
skip_prompt=True,
skip_special_tokens=True,
)
results = []
start_time = time.time()
for dt in tqdm(datasets):
input = dt["input"]
raw_shots = retriever.invoke(
input,
k=1,
score_threshold=0.45,
)
if not raw_shots:
# ELYZA-tasks-100(オリジナル版)に、いま解こうとしている問題の類似問題がなかった場合
prompt_inst_answer = f"""### 指示\n\n{input}\n\n### 回答\n\n"""
# prompt_inst_answer = f"""### 指示\n\n下のタスクへの最終回答に必要な<思考過程>を順序だてて考え、3つの番号付き箇条書きだけで出力してください。\n\nその後で、<最終回答>を出力してください。\n\n### タスク\n\n{input}\n\n### 回答\n\n<思考過程><最終回答>に必要な思考過程3点です。\n\n1. """
else:
# ELYZA-tasks-100(オリジナル版)に、いま解こうとしている問題の類似問題があった場合
shots = []
for i, raw_shot in enumerate(raw_shots):
shot = f"""### タスク{i + 1}\n\n{raw_shot.page_content.encode().decode('unicode-escape')}\n\n### タスク{i + 1}の回答\n\n{raw_shot.metadata['output']}"""
shots.append(shot)
formatted_shots = "\n\n".join(shots)
num_shots = len(shots)
# print(formatted_shots, "\n\n", "=" * 10)
prompt_inst_answer = f"""### 指示\n\n{input}\n\n### 回答\n\n"""
# prompt_inst_answer = f"""### 指示\n\n以下の類似したタスクを解いてください。\n\n{formatted_shots}\n\n### タスク{num_shots + 1}\n\n{input}\n\n### タスク{num_shots + 1}の回答\n\n先に解いたタスクと同じ方法で、順序立てて考えます。"""
print(
"=" * 16,
"\n\n",
prompt_inst_answer,
)
inputs = tokenizer(
[prompt_inst_answer],
return_tensors="pt",
).to(model.device)
outputs = model.generate(
**inputs,
max_new_tokens=512,
use_cache=True,
do_sample=False,
# do_sample=True,
# num_beams=5,
repetition_penalty=1.2,
streamer=streamer,
)
prediction = tokenizer.decode(
outputs[0][inputs.input_ids.shape[-1] :],
skip_special_tokens=True,
)
results.append(
{
"task_id": dt["task_id"],
"input": input,
"output": prediction,
}
)
flush()
print("-" * 16)
end_time = time.time()
elapsed_time = datetime.timedelta(seconds=end_time - start_time)
print(f"{elapsed_time} elapsed.")
jsonlファイルとして実行結果を保存
file_name = f"./{adapter_id.split('/')[1]}_output_{datetime.datetime.now(datetime.timezone(datetime.timedelta(hours=9))).strftime('%Y%m%d_%H%M')}"
with open(
f"{file_name}.jsonl",
"w",
encoding="utf-8",
) as f:
for result in results:
json.dump(result, f, ensure_ascii=False)
f.write("\n")
# polarsデータフレームを作成
df = pl.DataFrame(results)
# データフレームをxlsxファイルとして出力
df.write_excel(f"{file_name}.xlsx")
Model tree for CLRafaelR/llm-jp-3-13b-ogawa-brewery
Base model
llm-jp/llm-jp-3-13b