Spaces:
Build error
Build error
initial code for Chinese/English translation
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +18 -0
- .gitignore +152 -0
- datasets/mac/mac-test.tsv +3 -0
- datasets/mac/mac-train.tsv +3 -0
- datasets/mac/mac.tsv +3 -0
- eval_modules/calc_repetitions.py +79 -0
- llama-factory/config/llama3_8b_lora_sft.yaml +46 -0
- llama-factory/config/qwen2_0.5b_lora_sft.yaml +42 -0
- llama-factory/config/qwen2_0.5b_lora_sft_unsloth.yaml +45 -0
- llama-factory/config/qwen2_1.5b_lora_sft.yaml +42 -0
- llama-factory/config/qwen2_1.5b_lora_sft_unsloth.yaml +45 -0
- llama-factory/config/qwen2_7b_lora_sft.yaml +45 -0
- llama-factory/config/qwen2_7b_lora_sft_unsloth.yaml +45 -0
- llama-factory/data/alpaca_mac.json +3 -0
- llama-factory/data/dataset_info.json +3 -0
- llama-factory/inference/qwen2_1.5b_lora_sft.yaml +4 -0
- llm_toolkit/chat.py +88 -0
- llm_toolkit/eval.py +67 -0
- llm_toolkit/eval_lf.py +110 -0
- llm_toolkit/llm_utils.py +165 -0
- llm_toolkit/translation_engine.py +130 -0
- llm_toolkit/translation_utils.py +420 -0
- llm_toolkit/tune.py +143 -0
- notebooks/00_Data_Analysis.ipynb +0 -0
- notebooks/01_Qwen2-0.5B_Unsloth.ipynb +0 -0
- notebooks/02_Qwen2-1.5B_Unsloth.ipynb +0 -0
- notebooks/03_Qwen2-0.5B_1.5B-4bit.ipynb +0 -0
- notebooks/04_tune-small-no-flash-attn.ipynb +0 -0
- notebooks/05_tune-small-with-flash-attn.ipynb +0 -0
- notebooks/06_tune-small-py3.11.ipynb +0 -0
- notebooks/07_tune-lf-py3.11.ipynb +0 -0
- notebooks/07r2_tune-lf-py3.11.ipynb +0 -0
- notebooks/08_eval-lf-py3.11.ipynb +0 -0
- requirements.txt +15 -1
- results/experiment-1-results.csv +3 -0
- results/experiment-2-results.csv +3 -0
- results/experiment-3-results.csv +3 -0
- results/mac-results-no-flash-attn.csv +3 -0
- results/mac-results-with-flash-attn.csv +3 -0
- results/mac-results.csv +3 -0
- results/mac-results_final.csv +3 -0
- results/mac-results_lf-r2.csv +3 -0
- results/mac-results_lf-r3.csv +3 -0
- results/mac-results_lf.csv +3 -0
- results/mac-results_py3.11.csv +3 -0
- results/mac-results_v3.csv +3 -0
- results/model_training_evaluation_times.csv +3 -0
- scripts/lf-api.sh +8 -0
- scripts/tune-large.sh +24 -0
- scripts/tune-lf.sh +9 -0
.gitattributes
CHANGED
@@ -33,3 +33,21 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
results/mac-results.csv filter=lfs diff=lfs merge=lfs -text
|
37 |
+
results/mac-results_lf.csv filter=lfs diff=lfs merge=lfs -text
|
38 |
+
results/mac-results_lf-r3.csv filter=lfs diff=lfs merge=lfs -text
|
39 |
+
results/experiment-1-results.csv filter=lfs diff=lfs merge=lfs -text
|
40 |
+
results/mac-results-no-flash-attn.csv filter=lfs diff=lfs merge=lfs -text
|
41 |
+
results/mac-results_lf-r2.csv filter=lfs diff=lfs merge=lfs -text
|
42 |
+
results/model_training_evaluation_times.csv filter=lfs diff=lfs merge=lfs -text
|
43 |
+
results/experiment-3-results.csv filter=lfs diff=lfs merge=lfs -text
|
44 |
+
results/mac-results_final.csv filter=lfs diff=lfs merge=lfs -text
|
45 |
+
results/mac-results_py3.11.csv filter=lfs diff=lfs merge=lfs -text
|
46 |
+
results/experiment-2-results.csv filter=lfs diff=lfs merge=lfs -text
|
47 |
+
results/mac-results-with-flash-attn.csv filter=lfs diff=lfs merge=lfs -text
|
48 |
+
results/mac-results_v3.csv filter=lfs diff=lfs merge=lfs -text
|
49 |
+
llama-factory/data/alpaca_mac.json filter=lfs diff=lfs merge=lfs -text
|
50 |
+
llama-factory/data/dataset_info.json filter=lfs diff=lfs merge=lfs -text
|
51 |
+
datasets/mac/mac-test.tsv filter=lfs diff=lfs merge=lfs -text
|
52 |
+
datasets/mac/mac-train.tsv filter=lfs diff=lfs merge=lfs -text
|
53 |
+
datasets/mac/mac.tsv filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.out
|
2 |
+
*.log
|
3 |
+
*/outputs/
|
4 |
+
*/models/
|
5 |
+
*/wandb/
|
6 |
+
*/cs605-nlp-assignment-2*/
|
7 |
+
*/augmented_data/
|
8 |
+
*/inflaton/
|
9 |
+
*/llama.cpp/
|
10 |
+
wandb/
|
11 |
+
|
12 |
+
# Byte-compiled / optimized / DLL files
|
13 |
+
__pycache__/
|
14 |
+
*.py[cod]
|
15 |
+
*$py.class
|
16 |
+
|
17 |
+
# C extensions
|
18 |
+
*.so
|
19 |
+
|
20 |
+
# Distribution / packaging
|
21 |
+
.Python
|
22 |
+
build/
|
23 |
+
develop-eggs/
|
24 |
+
dist/
|
25 |
+
downloads/
|
26 |
+
eggs/
|
27 |
+
.eggs/
|
28 |
+
lib/
|
29 |
+
lib64/
|
30 |
+
parts/
|
31 |
+
sdist/
|
32 |
+
var/
|
33 |
+
wheels/
|
34 |
+
pip-wheel-metadata/
|
35 |
+
share/python-wheels/
|
36 |
+
*.egg-info/
|
37 |
+
.installed.cfg
|
38 |
+
*.egg
|
39 |
+
MANIFEST
|
40 |
+
|
41 |
+
# PyInstaller
|
42 |
+
# Usually these files are written by a python script from a template
|
43 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
44 |
+
*.manifest
|
45 |
+
*.spec
|
46 |
+
|
47 |
+
# Installer logs
|
48 |
+
pip-log.txt
|
49 |
+
pip-delete-this-directory.txt
|
50 |
+
|
51 |
+
# Unit test / coverage reports
|
52 |
+
htmlcov/
|
53 |
+
.tox/
|
54 |
+
.nox/
|
55 |
+
.coverage
|
56 |
+
.coverage.*
|
57 |
+
.cache
|
58 |
+
nosetests.xml
|
59 |
+
coverage.xml
|
60 |
+
*.cover
|
61 |
+
*.py,cover
|
62 |
+
.hypothesis/
|
63 |
+
.pytest_cache/
|
64 |
+
|
65 |
+
# Translations
|
66 |
+
*.mo
|
67 |
+
*.pot
|
68 |
+
|
69 |
+
# Django stuff:
|
70 |
+
# *.log
|
71 |
+
local_settings.py
|
72 |
+
db.sqlite3
|
73 |
+
db.sqlite3-journal
|
74 |
+
|
75 |
+
# Flask stuff:
|
76 |
+
instance/
|
77 |
+
.webassets-cache
|
78 |
+
|
79 |
+
# Scrapy stuff:
|
80 |
+
.scrapy
|
81 |
+
|
82 |
+
# Sphinx documentation
|
83 |
+
docs/_build/
|
84 |
+
|
85 |
+
# PyBuilder
|
86 |
+
target/
|
87 |
+
|
88 |
+
# Jupyter Notebook
|
89 |
+
.ipynb_checkpoints
|
90 |
+
|
91 |
+
# IPython
|
92 |
+
profile_default/
|
93 |
+
ipython_config.py
|
94 |
+
|
95 |
+
# pyenv
|
96 |
+
.python-version
|
97 |
+
|
98 |
+
# pipenv
|
99 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
100 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
101 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
102 |
+
# install all needed dependencies.
|
103 |
+
#Pipfile.lock
|
104 |
+
|
105 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
106 |
+
__pypackages__/
|
107 |
+
|
108 |
+
# Celery stuff
|
109 |
+
celerybeat-schedule
|
110 |
+
celerybeat.pid
|
111 |
+
|
112 |
+
# SageMath parsed files
|
113 |
+
*.sage.py
|
114 |
+
|
115 |
+
# Environments
|
116 |
+
.env
|
117 |
+
.venv
|
118 |
+
env/
|
119 |
+
venv/
|
120 |
+
ENV/
|
121 |
+
env.bak/
|
122 |
+
venv.bak/
|
123 |
+
|
124 |
+
# Spyder project settings
|
125 |
+
.spyderproject
|
126 |
+
.spyproject
|
127 |
+
|
128 |
+
# Rope project settings
|
129 |
+
.ropeproject
|
130 |
+
|
131 |
+
# mkdocs documentation
|
132 |
+
/site
|
133 |
+
|
134 |
+
# mypy
|
135 |
+
.mypy_cache/
|
136 |
+
.dmypy.json
|
137 |
+
dmypy.json
|
138 |
+
|
139 |
+
# Pyre type checker
|
140 |
+
.pyre/
|
141 |
+
|
142 |
+
# JetBrains
|
143 |
+
.idea
|
144 |
+
|
145 |
+
*.db
|
146 |
+
|
147 |
+
.DS_Store
|
148 |
+
/outputs
|
149 |
+
/models
|
150 |
+
/llama.cpp
|
151 |
+
/llama-factory/saves
|
152 |
+
/llama-factory/saves-1
|
datasets/mac/mac-test.tsv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d5663c7521eaf9942a9fea40b2950a46e37b761b22cc698eb6fe6b57bf70d0c4
|
3 |
+
size 253194
|
datasets/mac/mac-train.tsv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:424f0adcb2727eec322acef12294f4efb10412fc0b0529887d28dddc5171af05
|
3 |
+
size 1031685
|
datasets/mac/mac.tsv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:93f3ab2ba07b67b0a3f9ff05291c1b6748851999cda050bc165f8dd259daa2aa
|
3 |
+
size 1289106
|
eval_modules/calc_repetitions.py
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import re
|
3 |
+
import math
|
4 |
+
import pandas as pd
|
5 |
+
import numpy as np
|
6 |
+
import matplotlib.pyplot as plt
|
7 |
+
import matplotlib.ticker as mtick
|
8 |
+
import seaborn as sns
|
9 |
+
import nltk
|
10 |
+
import evaluate
|
11 |
+
|
12 |
+
meteor = evaluate.load("meteor")
|
13 |
+
|
14 |
+
print(f"loading: {__file__}")
|
15 |
+
|
16 |
+
# final version
|
17 |
+
pattern_excessive_whitespaces = re.compile(r"\s{5,}")
|
18 |
+
pattern_text_repetitions = re.compile(r"(.{5}.*)\s*((\1)\s*)+", re.M | re.DOTALL)
|
19 |
+
|
20 |
+
|
21 |
+
def del_excessive_whitespaces(text, debug=False):
|
22 |
+
count = 0
|
23 |
+
|
24 |
+
if isinstance(text, str):
|
25 |
+
if debug:
|
26 |
+
print("----detect excessive whitespaces----")
|
27 |
+
count = len(text)
|
28 |
+
text = pattern_excessive_whitespaces.sub("", text)
|
29 |
+
count -= len(text)
|
30 |
+
if debug and count:
|
31 |
+
print(f"removed excessive whitespaces: {count}")
|
32 |
+
return text, count
|
33 |
+
|
34 |
+
|
35 |
+
# final version for repetition detection
|
36 |
+
def detect_text_repetitions(text, debug=False):
|
37 |
+
count = 0
|
38 |
+
|
39 |
+
if isinstance(text, str):
|
40 |
+
if debug:
|
41 |
+
print("----detect text repetitions----")
|
42 |
+
matches = pattern_text_repetitions.finditer(text)
|
43 |
+
for match in matches:
|
44 |
+
if debug:
|
45 |
+
print(match)
|
46 |
+
for groupNum in range(0, len(match.groups())):
|
47 |
+
groupNum = groupNum + 1
|
48 |
+
print(
|
49 |
+
"Group {groupNum} found at {start}-{end}: `{group}`".format(
|
50 |
+
groupNum=groupNum,
|
51 |
+
start=match.start(groupNum),
|
52 |
+
end=match.end(groupNum),
|
53 |
+
group=match.group(groupNum),
|
54 |
+
)
|
55 |
+
)
|
56 |
+
|
57 |
+
start, end = match.span()
|
58 |
+
count += end - start
|
59 |
+
|
60 |
+
return count
|
61 |
+
|
62 |
+
|
63 |
+
def detect_repetitions(text, debug=False):
|
64 |
+
text, count_excessive_whitespaces = del_excessive_whitespaces(text, debug=debug)
|
65 |
+
count_text_repetitions = detect_text_repetitions(text, debug=debug)
|
66 |
+
total_repetitions = count_excessive_whitespaces + count_text_repetitions
|
67 |
+
|
68 |
+
result = (count_excessive_whitespaces, count_text_repetitions, total_repetitions)
|
69 |
+
|
70 |
+
if debug:
|
71 |
+
print(result)
|
72 |
+
return result
|
73 |
+
|
74 |
+
|
75 |
+
def detect_scores(text, debug=False):
|
76 |
+
newline_score, repetition_score, total_repetitions = detect_repetitions(
|
77 |
+
text, debug=debug
|
78 |
+
)
|
79 |
+
return pd.Series([newline_score, repetition_score, total_repetitions])
|
llama-factory/config/llama3_8b_lora_sft.yaml
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
### model
|
2 |
+
model_name_or_path: gradientai/Llama-3-8B-Instruct-Gradient-1048k
|
3 |
+
|
4 |
+
### method
|
5 |
+
stage: sft
|
6 |
+
do_train: true
|
7 |
+
finetuning_type: lora
|
8 |
+
lora_target: all
|
9 |
+
quantization_bit: 4 # use 4-bit QLoRA
|
10 |
+
loraplus_lr_ratio: 16.0 # use LoRA+ with lambda=16.0
|
11 |
+
# use_unsloth: true # use UnslothAI's LoRA optimization for 2x faster training
|
12 |
+
|
13 |
+
### dataset
|
14 |
+
dataset: alpaca_mac
|
15 |
+
template: llama3
|
16 |
+
cutoff_len: 1024
|
17 |
+
max_samples: 4528
|
18 |
+
overwrite_cache: true
|
19 |
+
preprocessing_num_workers: 16
|
20 |
+
|
21 |
+
### output
|
22 |
+
# output_dir: saves/llama3-8b/lora/sft
|
23 |
+
output_dir: /Workspace/Users/donghao.huang@mastercard.com/lf-saves/llama3-8b/lora/sft/
|
24 |
+
logging_steps: 10
|
25 |
+
save_steps: 560
|
26 |
+
plot_loss: true
|
27 |
+
overwrite_output_dir: true
|
28 |
+
# resume_from_checkpoint: true
|
29 |
+
|
30 |
+
### train
|
31 |
+
per_device_train_batch_size: 1
|
32 |
+
gradient_accumulation_steps: 8
|
33 |
+
learning_rate: 1.0e-4
|
34 |
+
num_train_epochs: 6.0
|
35 |
+
lr_scheduler_type: cosine
|
36 |
+
warmup_ratio: 0.1
|
37 |
+
bf16: true
|
38 |
+
ddp_timeout: 180000000
|
39 |
+
|
40 |
+
### eval
|
41 |
+
val_size: 0.01
|
42 |
+
per_device_eval_batch_size: 1
|
43 |
+
eval_strategy: steps
|
44 |
+
eval_steps: 560
|
45 |
+
|
46 |
+
report_to: none
|
llama-factory/config/qwen2_0.5b_lora_sft.yaml
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
### model
|
2 |
+
model_name_or_path: Qwen/Qwen2-0.5B-Instruct
|
3 |
+
|
4 |
+
### method
|
5 |
+
stage: sft
|
6 |
+
do_train: true
|
7 |
+
finetuning_type: lora
|
8 |
+
lora_target: all
|
9 |
+
|
10 |
+
### dataset
|
11 |
+
dataset: alpaca_mac
|
12 |
+
template: chatml
|
13 |
+
cutoff_len: 1024
|
14 |
+
max_samples: 4528
|
15 |
+
overwrite_cache: true
|
16 |
+
preprocessing_num_workers: 16
|
17 |
+
|
18 |
+
### output
|
19 |
+
output_dir: saves/qwen2-0.5b/lora/sft
|
20 |
+
logging_steps: 10
|
21 |
+
save_steps: 560
|
22 |
+
plot_loss: true
|
23 |
+
overwrite_output_dir: true
|
24 |
+
|
25 |
+
### train
|
26 |
+
per_device_train_batch_size: 1
|
27 |
+
gradient_accumulation_steps: 8
|
28 |
+
learning_rate: 1.0e-4
|
29 |
+
num_train_epochs: 6.0
|
30 |
+
lr_scheduler_type: cosine
|
31 |
+
warmup_ratio: 0.1
|
32 |
+
bf16: true
|
33 |
+
ddp_timeout: 180000000
|
34 |
+
|
35 |
+
### eval
|
36 |
+
val_size: 0.01
|
37 |
+
per_device_eval_batch_size: 1
|
38 |
+
eval_strategy: steps
|
39 |
+
eval_steps: 560
|
40 |
+
|
41 |
+
report_to: wandb
|
42 |
+
run_name: qwen2_0.5b_lora_sft # optional
|
llama-factory/config/qwen2_0.5b_lora_sft_unsloth.yaml
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
### model
|
2 |
+
model_name_or_path: Qwen/Qwen2-0.5B-Instruct
|
3 |
+
|
4 |
+
### method
|
5 |
+
stage: sft
|
6 |
+
do_train: true
|
7 |
+
finetuning_type: lora
|
8 |
+
lora_target: all
|
9 |
+
quantization_bit: 4 # use 4-bit QLoRA
|
10 |
+
loraplus_lr_ratio: 16.0 # use LoRA+ with lambda=16.0
|
11 |
+
use_unsloth: true # use UnslothAI's LoRA optimization for 2x faster training
|
12 |
+
|
13 |
+
### dataset
|
14 |
+
dataset: alpaca_mac
|
15 |
+
template: chatml
|
16 |
+
cutoff_len: 1024
|
17 |
+
max_samples: 4528
|
18 |
+
overwrite_cache: true
|
19 |
+
preprocessing_num_workers: 16
|
20 |
+
|
21 |
+
### output
|
22 |
+
output_dir: saves/qwen2-0.5b/lora/sft
|
23 |
+
logging_steps: 10
|
24 |
+
save_steps: 560
|
25 |
+
plot_loss: true
|
26 |
+
overwrite_output_dir: true
|
27 |
+
|
28 |
+
### train
|
29 |
+
per_device_train_batch_size: 1
|
30 |
+
gradient_accumulation_steps: 8
|
31 |
+
learning_rate: 1.0e-4
|
32 |
+
num_train_epochs: 6.0
|
33 |
+
lr_scheduler_type: cosine
|
34 |
+
warmup_ratio: 0.1
|
35 |
+
bf16: true
|
36 |
+
ddp_timeout: 180000000
|
37 |
+
|
38 |
+
### eval
|
39 |
+
val_size: 0.01
|
40 |
+
per_device_eval_batch_size: 1
|
41 |
+
eval_strategy: steps
|
42 |
+
eval_steps: 560
|
43 |
+
|
44 |
+
report_to: wandb
|
45 |
+
run_name: qwen2_0.5b_lora_sft # optional
|
llama-factory/config/qwen2_1.5b_lora_sft.yaml
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
### model
|
2 |
+
model_name_or_path: Qwen/Qwen2-1.5B-Instruct
|
3 |
+
|
4 |
+
### method
|
5 |
+
stage: sft
|
6 |
+
do_train: true
|
7 |
+
finetuning_type: lora
|
8 |
+
lora_target: all
|
9 |
+
|
10 |
+
### dataset
|
11 |
+
dataset: alpaca_mac
|
12 |
+
template: chatml
|
13 |
+
cutoff_len: 1024
|
14 |
+
max_samples: 4528
|
15 |
+
overwrite_cache: true
|
16 |
+
preprocessing_num_workers: 16
|
17 |
+
|
18 |
+
### output
|
19 |
+
output_dir: saves/qwen2-1.5b/lora/sft
|
20 |
+
logging_steps: 10
|
21 |
+
save_steps: 560
|
22 |
+
plot_loss: true
|
23 |
+
overwrite_output_dir: true
|
24 |
+
|
25 |
+
### train
|
26 |
+
per_device_train_batch_size: 1
|
27 |
+
gradient_accumulation_steps: 8
|
28 |
+
learning_rate: 1.0e-4
|
29 |
+
num_train_epochs: 6.0
|
30 |
+
lr_scheduler_type: cosine
|
31 |
+
warmup_ratio: 0.1
|
32 |
+
bf16: true
|
33 |
+
ddp_timeout: 180000000
|
34 |
+
|
35 |
+
### eval
|
36 |
+
val_size: 0.01
|
37 |
+
per_device_eval_batch_size: 1
|
38 |
+
eval_strategy: steps
|
39 |
+
eval_steps: 560
|
40 |
+
|
41 |
+
report_to: wandb
|
42 |
+
run_name: qwen2_1.5b_lora_sft # optional
|
llama-factory/config/qwen2_1.5b_lora_sft_unsloth.yaml
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
### model
|
2 |
+
model_name_or_path: Qwen/Qwen2-1.5B-Instruct
|
3 |
+
|
4 |
+
### method
|
5 |
+
stage: sft
|
6 |
+
do_train: true
|
7 |
+
finetuning_type: lora
|
8 |
+
lora_target: all
|
9 |
+
quantization_bit: 4 # use 4-bit QLoRA
|
10 |
+
loraplus_lr_ratio: 16.0 # use LoRA+ with lambda=16.0
|
11 |
+
use_unsloth: true # use UnslothAI's LoRA optimization for 2x faster training
|
12 |
+
|
13 |
+
### dataset
|
14 |
+
dataset: alpaca_mac
|
15 |
+
template: chatml
|
16 |
+
cutoff_len: 1024
|
17 |
+
max_samples: 4528
|
18 |
+
overwrite_cache: true
|
19 |
+
preprocessing_num_workers: 16
|
20 |
+
|
21 |
+
### output
|
22 |
+
output_dir: saves/qwen2-1.5b/lora/sft
|
23 |
+
logging_steps: 10
|
24 |
+
save_steps: 560
|
25 |
+
plot_loss: true
|
26 |
+
overwrite_output_dir: true
|
27 |
+
|
28 |
+
### train
|
29 |
+
per_device_train_batch_size: 1
|
30 |
+
gradient_accumulation_steps: 8
|
31 |
+
learning_rate: 1.0e-4
|
32 |
+
num_train_epochs: 6.0
|
33 |
+
lr_scheduler_type: cosine
|
34 |
+
warmup_ratio: 0.1
|
35 |
+
bf16: true
|
36 |
+
ddp_timeout: 180000000
|
37 |
+
|
38 |
+
### eval
|
39 |
+
val_size: 0.01
|
40 |
+
per_device_eval_batch_size: 1
|
41 |
+
eval_strategy: steps
|
42 |
+
eval_steps: 560
|
43 |
+
|
44 |
+
report_to: wandb
|
45 |
+
run_name: qwen2_1.5b_lora_sft # optional
|
llama-factory/config/qwen2_7b_lora_sft.yaml
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
### model
|
2 |
+
model_name_or_path: Qwen/Qwen2-7B-Instruct
|
3 |
+
|
4 |
+
### method
|
5 |
+
stage: sft
|
6 |
+
do_train: true
|
7 |
+
finetuning_type: lora
|
8 |
+
lora_target: all
|
9 |
+
quantization_bit: 4 # use 4-bit QLoRA
|
10 |
+
loraplus_lr_ratio: 16.0 # use LoRA+ with lambda=16.0
|
11 |
+
# use_unsloth: true # use UnslothAI's LoRA optimization for 2x faster training
|
12 |
+
|
13 |
+
### dataset
|
14 |
+
dataset: alpaca_mac
|
15 |
+
template: chatml
|
16 |
+
cutoff_len: 1024
|
17 |
+
max_samples: 4528
|
18 |
+
overwrite_cache: true
|
19 |
+
preprocessing_num_workers: 16
|
20 |
+
|
21 |
+
### output
|
22 |
+
output_dir: saves/qwen2-7b/lora/sft
|
23 |
+
logging_steps: 10
|
24 |
+
save_steps: 560
|
25 |
+
plot_loss: true
|
26 |
+
overwrite_output_dir: true
|
27 |
+
|
28 |
+
### train
|
29 |
+
per_device_train_batch_size: 1
|
30 |
+
gradient_accumulation_steps: 8
|
31 |
+
learning_rate: 1.0e-4
|
32 |
+
num_train_epochs: 6.0
|
33 |
+
lr_scheduler_type: cosine
|
34 |
+
warmup_ratio: 0.1
|
35 |
+
bf16: true
|
36 |
+
ddp_timeout: 180000000
|
37 |
+
|
38 |
+
### eval
|
39 |
+
val_size: 0.01
|
40 |
+
per_device_eval_batch_size: 1
|
41 |
+
eval_strategy: steps
|
42 |
+
eval_steps: 560
|
43 |
+
|
44 |
+
report_to: wandb
|
45 |
+
run_name: qwen2_7b_lora_sft # optional
|
llama-factory/config/qwen2_7b_lora_sft_unsloth.yaml
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
### model
|
2 |
+
model_name_or_path: Qwen/Qwen2-7B-Instruct
|
3 |
+
|
4 |
+
### method
|
5 |
+
stage: sft
|
6 |
+
do_train: true
|
7 |
+
finetuning_type: lora
|
8 |
+
lora_target: all
|
9 |
+
quantization_bit: 4 # use 4-bit QLoRA
|
10 |
+
loraplus_lr_ratio: 16.0 # use LoRA+ with lambda=16.0
|
11 |
+
use_unsloth: true # use UnslothAI's LoRA optimization for 2x faster training
|
12 |
+
|
13 |
+
### dataset
|
14 |
+
dataset: alpaca_mac
|
15 |
+
template: chatml
|
16 |
+
cutoff_len: 1024
|
17 |
+
max_samples: 4528
|
18 |
+
overwrite_cache: true
|
19 |
+
preprocessing_num_workers: 16
|
20 |
+
|
21 |
+
### output
|
22 |
+
output_dir: saves/qwen2-7b/lora/sft
|
23 |
+
logging_steps: 10
|
24 |
+
save_steps: 560
|
25 |
+
plot_loss: true
|
26 |
+
overwrite_output_dir: true
|
27 |
+
|
28 |
+
### train
|
29 |
+
per_device_train_batch_size: 1
|
30 |
+
gradient_accumulation_steps: 8
|
31 |
+
learning_rate: 1.0e-4
|
32 |
+
num_train_epochs: 6.0
|
33 |
+
lr_scheduler_type: cosine
|
34 |
+
warmup_ratio: 0.1
|
35 |
+
bf16: true
|
36 |
+
ddp_timeout: 180000000
|
37 |
+
|
38 |
+
### eval
|
39 |
+
val_size: 0.01
|
40 |
+
per_device_eval_batch_size: 1
|
41 |
+
eval_strategy: steps
|
42 |
+
eval_steps: 560
|
43 |
+
|
44 |
+
report_to: wandb
|
45 |
+
run_name: qwen2_7b_lora_sft # optional
|
llama-factory/data/alpaca_mac.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6f03e62eb461c2204bbaef55f2de28ec115b1a5834b81f03b10f157551d5fe9f
|
3 |
+
size 2240344
|
llama-factory/data/dataset_info.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84bce610296ed7e729647e85d25576b6226d20ddf0bca4982fb1deb02de35911
|
3 |
+
size 13560
|
llama-factory/inference/qwen2_1.5b_lora_sft.yaml
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model_name_or_path: Qwen/Qwen2-1.5B-Instruct
|
2 |
+
adapter_name_or_path: saves/qwen2-1.5b/lora/sft/checkpoint-1680
|
3 |
+
template: chatml
|
4 |
+
finetuning_type: lora
|
llm_toolkit/chat.py
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
from llamafactory.chat import ChatModel
|
4 |
+
from llamafactory.extras.misc import torch_gc
|
5 |
+
|
6 |
+
from dotenv import find_dotenv, load_dotenv
|
7 |
+
|
8 |
+
found_dotenv = find_dotenv(".env")
|
9 |
+
|
10 |
+
if len(found_dotenv) == 0:
|
11 |
+
found_dotenv = find_dotenv(".env.example")
|
12 |
+
print(f"loading env vars from: {found_dotenv}")
|
13 |
+
load_dotenv(found_dotenv, override=False)
|
14 |
+
|
15 |
+
path = os.path.dirname(found_dotenv)
|
16 |
+
print(f"Adding {path} to sys.path")
|
17 |
+
sys.path.append(path)
|
18 |
+
|
19 |
+
from llm_toolkit.translation_engine import *
|
20 |
+
from llm_toolkit.translation_utils import *
|
21 |
+
|
22 |
+
model_name = os.getenv("MODEL_NAME")
|
23 |
+
load_in_4bit = os.getenv("LOAD_IN_4BIT") == "true"
|
24 |
+
eval_base_model = os.getenv("EVAL_BASE_MODEL") == "true"
|
25 |
+
eval_fine_tuned = os.getenv("EVAL_FINE_TUNED") == "true"
|
26 |
+
save_fine_tuned_model = os.getenv("SAVE_FINE_TUNED") == "true"
|
27 |
+
num_train_epochs = int(os.getenv("NUM_TRAIN_EPOCHS") or 0)
|
28 |
+
data_path = os.getenv("DATA_PATH")
|
29 |
+
results_path = os.getenv("RESULTS_PATH")
|
30 |
+
|
31 |
+
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
|
32 |
+
dtype = (
|
33 |
+
None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
|
34 |
+
)
|
35 |
+
|
36 |
+
print(
|
37 |
+
model_name,
|
38 |
+
load_in_4bit,
|
39 |
+
max_seq_length,
|
40 |
+
num_train_epochs,
|
41 |
+
dtype,
|
42 |
+
data_path,
|
43 |
+
results_path,
|
44 |
+
eval_base_model,
|
45 |
+
eval_fine_tuned,
|
46 |
+
save_fine_tuned_model,
|
47 |
+
)
|
48 |
+
|
49 |
+
adapter_name_or_path = (
|
50 |
+
sys.argv[1]
|
51 |
+
if len(sys.argv) > 1
|
52 |
+
else "llama-factory/saves/qwen2-0.5b/lora/sft/checkpoint-560"
|
53 |
+
)
|
54 |
+
|
55 |
+
args = dict(
|
56 |
+
model_name_or_path=model_name, # use bnb-4bit-quantized Llama-3-8B-Instruct model
|
57 |
+
adapter_name_or_path=adapter_name_or_path, # load the saved LoRA adapters
|
58 |
+
template="chatml", # same to the one in training
|
59 |
+
finetuning_type="lora", # same to the one in training
|
60 |
+
quantization_bit=4, # load 4-bit quantized model
|
61 |
+
)
|
62 |
+
chat_model = ChatModel(args)
|
63 |
+
|
64 |
+
messages = []
|
65 |
+
print(
|
66 |
+
"Welcome to the CLI application, use `clear` to remove the history, use `exit` to exit the application."
|
67 |
+
)
|
68 |
+
while True:
|
69 |
+
query = input("\nUser: ")
|
70 |
+
if query.strip() == "exit":
|
71 |
+
break
|
72 |
+
if query.strip() == "clear":
|
73 |
+
messages = []
|
74 |
+
torch_gc()
|
75 |
+
print("History has been removed.")
|
76 |
+
continue
|
77 |
+
|
78 |
+
messages.append({"role": "user", "content": query})
|
79 |
+
print("Assistant: ", end="", flush=True)
|
80 |
+
|
81 |
+
response = ""
|
82 |
+
for new_text in chat_model.stream_chat(messages):
|
83 |
+
print(new_text, end="", flush=True)
|
84 |
+
response += new_text
|
85 |
+
print()
|
86 |
+
messages.append({"role": "assistant", "content": response})
|
87 |
+
|
88 |
+
torch_gc()
|
llm_toolkit/eval.py
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import torch
|
4 |
+
from dotenv import find_dotenv, load_dotenv
|
5 |
+
|
6 |
+
found_dotenv = find_dotenv(".env")
|
7 |
+
|
8 |
+
if len(found_dotenv) == 0:
|
9 |
+
found_dotenv = find_dotenv(".env.example")
|
10 |
+
print(f"loading env vars from: {found_dotenv}")
|
11 |
+
load_dotenv(found_dotenv, override=False)
|
12 |
+
|
13 |
+
path = os.path.dirname(found_dotenv)
|
14 |
+
print(f"Adding {path} to sys.path")
|
15 |
+
sys.path.append(path)
|
16 |
+
|
17 |
+
from llm_toolkit.translation_engine import *
|
18 |
+
from llm_toolkit.translation_utils import *
|
19 |
+
|
20 |
+
model_name = os.getenv("MODEL_NAME")
|
21 |
+
adapter_name_or_path = os.getenv("ADAPTER_NAME_OR_PATH")
|
22 |
+
load_in_4bit = os.getenv("LOAD_IN_4BIT") == "true"
|
23 |
+
data_path = os.getenv("DATA_PATH")
|
24 |
+
results_path = os.getenv("RESULTS_PATH")
|
25 |
+
|
26 |
+
print(model_name, adapter_name_or_path, load_in_4bit, data_path, results_path)
|
27 |
+
|
28 |
+
gpu_stats = torch.cuda.get_device_properties(0)
|
29 |
+
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
|
30 |
+
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
|
31 |
+
print(f"(1) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
|
32 |
+
print(f"{start_gpu_memory} GB of memory reserved.")
|
33 |
+
|
34 |
+
model, tokenizer = load_model(
|
35 |
+
model_name, load_in_4bit=load_in_4bit, adapter_name_or_path=adapter_name_or_path
|
36 |
+
)
|
37 |
+
|
38 |
+
gpu_stats = torch.cuda.get_device_properties(0)
|
39 |
+
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
|
40 |
+
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
|
41 |
+
print(f"(2) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
|
42 |
+
print(f"{start_gpu_memory} GB of memory reserved.")
|
43 |
+
|
44 |
+
datasets = load_translation_dataset(data_path, tokenizer)
|
45 |
+
|
46 |
+
print("Evaluating model: " + model_name)
|
47 |
+
predictions = eval_model(model, tokenizer, datasets["test"])
|
48 |
+
|
49 |
+
gpu_stats = torch.cuda.get_device_properties(0)
|
50 |
+
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
|
51 |
+
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
|
52 |
+
print(f"(3) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
|
53 |
+
print(f"{start_gpu_memory} GB of memory reserved.")
|
54 |
+
|
55 |
+
if adapter_name_or_path is not None:
|
56 |
+
model_name += "_" + adapter_name_or_path.split("/")[-1]
|
57 |
+
|
58 |
+
save_results(
|
59 |
+
model_name,
|
60 |
+
results_path,
|
61 |
+
datasets["test"],
|
62 |
+
predictions,
|
63 |
+
debug=True,
|
64 |
+
)
|
65 |
+
|
66 |
+
metrics = calc_metrics(datasets["test"]["english"], predictions, debug=True)
|
67 |
+
print(metrics)
|
llm_toolkit/eval_lf.py
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import torch
|
4 |
+
from dotenv import find_dotenv, load_dotenv
|
5 |
+
from llamafactory.chat import ChatModel
|
6 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
7 |
+
|
8 |
+
found_dotenv = find_dotenv(".env")
|
9 |
+
|
10 |
+
if len(found_dotenv) == 0:
|
11 |
+
found_dotenv = find_dotenv(".env.example")
|
12 |
+
print(f"loading env vars from: {found_dotenv}")
|
13 |
+
load_dotenv(found_dotenv, override=False)
|
14 |
+
|
15 |
+
path = os.path.dirname(found_dotenv)
|
16 |
+
print(f"Adding {path} to sys.path")
|
17 |
+
sys.path.append(path)
|
18 |
+
|
19 |
+
from llm_toolkit.translation_utils import *
|
20 |
+
|
21 |
+
model_name = os.getenv("MODEL_NAME")
|
22 |
+
adapter_name_or_path = os.getenv("ADAPTER_NAME_OR_PATH")
|
23 |
+
load_in_4bit = os.getenv("LOAD_IN_4BIT") == "true"
|
24 |
+
data_path = os.getenv("DATA_PATH")
|
25 |
+
results_path = os.getenv("RESULTS_PATH")
|
26 |
+
|
27 |
+
print(model_name, adapter_name_or_path, load_in_4bit, data_path, results_path)
|
28 |
+
|
29 |
+
|
30 |
+
def load_model(
|
31 |
+
model_name,
|
32 |
+
max_seq_length=2048,
|
33 |
+
dtype=torch.bfloat16,
|
34 |
+
load_in_4bit=False,
|
35 |
+
adapter_name_or_path=None,
|
36 |
+
):
|
37 |
+
print(f"loading model: {model_name}")
|
38 |
+
|
39 |
+
if adapter_name_or_path:
|
40 |
+
template = "llama3" if "llama-3" in model_name.lower() else "chatml"
|
41 |
+
|
42 |
+
args = dict(
|
43 |
+
model_name_or_path=model_name,
|
44 |
+
adapter_name_or_path=adapter_name_or_path, # load the saved LoRA adapters
|
45 |
+
template=template, # same to the one in training
|
46 |
+
finetuning_type="lora", # same to the one in training
|
47 |
+
quantization_bit=4 if load_in_4bit else None, # load 4-bit quantized model
|
48 |
+
)
|
49 |
+
chat_model = ChatModel(args)
|
50 |
+
return chat_model.engine.model, chat_model.engine.tokenizer
|
51 |
+
|
52 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
53 |
+
bnb_config = BitsAndBytesConfig(
|
54 |
+
load_in_4bit=load_in_4bit,
|
55 |
+
bnb_4bit_quant_type="nf4",
|
56 |
+
bnb_4bit_use_double_quant=False,
|
57 |
+
bnb_4bit_compute_dtype=dtype,
|
58 |
+
)
|
59 |
+
|
60 |
+
model = AutoModelForCausalLM.from_pretrained(
|
61 |
+
model_name,
|
62 |
+
quantization_config=bnb_config,
|
63 |
+
torch_dtype=dtype,
|
64 |
+
trust_remote_code=True,
|
65 |
+
device_map="auto",
|
66 |
+
)
|
67 |
+
|
68 |
+
return model, tokenizer
|
69 |
+
|
70 |
+
|
71 |
+
gpu_stats = torch.cuda.get_device_properties(0)
|
72 |
+
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
|
73 |
+
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
|
74 |
+
print(f"(1) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
|
75 |
+
print(f"{start_gpu_memory} GB of memory reserved.")
|
76 |
+
|
77 |
+
model, tokenizer = load_model(
|
78 |
+
model_name, load_in_4bit=load_in_4bit, adapter_name_or_path=adapter_name_or_path
|
79 |
+
)
|
80 |
+
|
81 |
+
gpu_stats = torch.cuda.get_device_properties(0)
|
82 |
+
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
|
83 |
+
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
|
84 |
+
print(f"(2) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
|
85 |
+
print(f"{start_gpu_memory} GB of memory reserved.")
|
86 |
+
|
87 |
+
datasets = load_translation_dataset(data_path, tokenizer)
|
88 |
+
|
89 |
+
print("Evaluating model: " + model_name)
|
90 |
+
predictions = eval_model(model, tokenizer, datasets["test"])
|
91 |
+
|
92 |
+
gpu_stats = torch.cuda.get_device_properties(0)
|
93 |
+
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
|
94 |
+
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
|
95 |
+
print(f"(3) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
|
96 |
+
print(f"{start_gpu_memory} GB of memory reserved.")
|
97 |
+
|
98 |
+
if adapter_name_or_path is not None:
|
99 |
+
model_name += "_" + adapter_name_or_path.split("/")[-1]
|
100 |
+
|
101 |
+
save_results(
|
102 |
+
model_name,
|
103 |
+
results_path,
|
104 |
+
datasets["test"],
|
105 |
+
predictions,
|
106 |
+
debug=True,
|
107 |
+
)
|
108 |
+
|
109 |
+
metrics = calc_metrics(datasets["test"]["english"], predictions, debug=True)
|
110 |
+
print(metrics)
|
llm_toolkit/llm_utils.py
ADDED
@@ -0,0 +1,165 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import re
|
3 |
+
import sys
|
4 |
+
import torch
|
5 |
+
from llamafactory.chat import ChatModel
|
6 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TextStreamer
|
7 |
+
|
8 |
+
|
9 |
+
def load_model(
|
10 |
+
model_name,
|
11 |
+
max_seq_length=2048,
|
12 |
+
dtype=torch.bfloat16,
|
13 |
+
load_in_4bit=False,
|
14 |
+
adapter_name_or_path=None,
|
15 |
+
):
|
16 |
+
print(f"loading model: {model_name}")
|
17 |
+
|
18 |
+
if adapter_name_or_path:
|
19 |
+
template = "llama3" if "llama-3" in model_name.lower() else "chatml"
|
20 |
+
|
21 |
+
args = dict(
|
22 |
+
model_name_or_path=model_name,
|
23 |
+
adapter_name_or_path=adapter_name_or_path, # load the saved LoRA adapters
|
24 |
+
template=template, # same to the one in training
|
25 |
+
finetuning_type="lora", # same to the one in training
|
26 |
+
quantization_bit=4 if load_in_4bit else None, # load 4-bit quantized model
|
27 |
+
)
|
28 |
+
chat_model = ChatModel(args)
|
29 |
+
return chat_model.engine.model, chat_model.engine.tokenizer
|
30 |
+
|
31 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
32 |
+
bnb_config = BitsAndBytesConfig(
|
33 |
+
load_in_4bit=load_in_4bit,
|
34 |
+
bnb_4bit_quant_type="nf4",
|
35 |
+
bnb_4bit_use_double_quant=False,
|
36 |
+
bnb_4bit_compute_dtype=dtype,
|
37 |
+
)
|
38 |
+
|
39 |
+
model = AutoModelForCausalLM.from_pretrained(
|
40 |
+
model_name,
|
41 |
+
quantization_config=bnb_config,
|
42 |
+
torch_dtype=dtype,
|
43 |
+
trust_remote_code=True,
|
44 |
+
device_map="auto",
|
45 |
+
) if load_in_4bit else AutoModelForCausalLM.from_pretrained(
|
46 |
+
model_name,
|
47 |
+
torch_dtype=dtype,
|
48 |
+
trust_remote_code=True,
|
49 |
+
device_map="auto",
|
50 |
+
)
|
51 |
+
|
52 |
+
return model, tokenizer
|
53 |
+
|
54 |
+
def test_model(model, tokenizer, prompt):
|
55 |
+
inputs = tokenizer(
|
56 |
+
[prompt],
|
57 |
+
return_tensors="pt",
|
58 |
+
).to("cuda")
|
59 |
+
|
60 |
+
text_streamer = TextStreamer(tokenizer)
|
61 |
+
|
62 |
+
_ = model.generate(
|
63 |
+
**inputs, max_new_tokens=2048, streamer=text_streamer, use_cache=True
|
64 |
+
)
|
65 |
+
|
66 |
+
|
67 |
+
def extract_answer(text, debug=False):
|
68 |
+
if text:
|
69 |
+
# Remove the begin and end tokens
|
70 |
+
text = re.sub(
|
71 |
+
r".*?(assistant|\[/INST\]).+?\b", "", text, flags=re.DOTALL | re.MULTILINE
|
72 |
+
)
|
73 |
+
if debug:
|
74 |
+
print("--------\nstep 1:", text)
|
75 |
+
|
76 |
+
text = re.sub(r"<.+?>.*", "", text, flags=re.DOTALL | re.MULTILINE)
|
77 |
+
if debug:
|
78 |
+
print("--------\nstep 2:", text)
|
79 |
+
|
80 |
+
text = re.sub(
|
81 |
+
r".*?end_header_id\|>\n\n", "", text, flags=re.DOTALL | re.MULTILINE
|
82 |
+
)
|
83 |
+
if debug:
|
84 |
+
print("--------\nstep 3:", text)
|
85 |
+
|
86 |
+
return text
|
87 |
+
|
88 |
+
def eval_model(model, tokenizer, eval_dataset):
|
89 |
+
total = len(eval_dataset)
|
90 |
+
predictions = []
|
91 |
+
for i in tqdm(range(total)):
|
92 |
+
inputs = tokenizer(
|
93 |
+
eval_dataset["prompt"][i : i + 1],
|
94 |
+
return_tensors="pt",
|
95 |
+
).to("cuda")
|
96 |
+
|
97 |
+
outputs = model.generate(**inputs, max_new_tokens=4096, use_cache=False)
|
98 |
+
decoded_output = tokenizer.batch_decode(outputs)
|
99 |
+
debug = i == 0
|
100 |
+
decoded_output = [
|
101 |
+
extract_answer(output, debug=debug) for output in decoded_output
|
102 |
+
]
|
103 |
+
predictions.extend(decoded_output)
|
104 |
+
|
105 |
+
return predictions
|
106 |
+
|
107 |
+
def save_model(
|
108 |
+
model,
|
109 |
+
tokenizer,
|
110 |
+
include_gguf=True,
|
111 |
+
include_merged=True,
|
112 |
+
publish=True,
|
113 |
+
):
|
114 |
+
try:
|
115 |
+
token = os.getenv("HF_TOKEN") or None
|
116 |
+
model_name = os.getenv("MODEL_NAME")
|
117 |
+
|
118 |
+
save_method = "lora"
|
119 |
+
quantization_method = "q5_k_m"
|
120 |
+
|
121 |
+
model_names = get_model_names(
|
122 |
+
model_name, save_method=save_method, quantization_method=quantization_method
|
123 |
+
)
|
124 |
+
|
125 |
+
model.save_pretrained(model_names["local"])
|
126 |
+
tokenizer.save_pretrained(model_names["local"])
|
127 |
+
|
128 |
+
if publish:
|
129 |
+
model.push_to_hub(
|
130 |
+
model_names["hub"],
|
131 |
+
token=token,
|
132 |
+
)
|
133 |
+
tokenizer.push_to_hub(
|
134 |
+
model_names["hub"],
|
135 |
+
token=token,
|
136 |
+
)
|
137 |
+
|
138 |
+
if include_merged:
|
139 |
+
model.save_pretrained_merged(
|
140 |
+
model_names["local"] + "-merged", tokenizer, save_method=save_method
|
141 |
+
)
|
142 |
+
if publish:
|
143 |
+
model.push_to_hub_merged(
|
144 |
+
model_names["hub"] + "-merged",
|
145 |
+
tokenizer,
|
146 |
+
save_method="lora",
|
147 |
+
token="",
|
148 |
+
)
|
149 |
+
|
150 |
+
if include_gguf:
|
151 |
+
model.save_pretrained_gguf(
|
152 |
+
model_names["local-gguf"],
|
153 |
+
tokenizer,
|
154 |
+
quantization_method=quantization_method,
|
155 |
+
)
|
156 |
+
|
157 |
+
if publish:
|
158 |
+
model.push_to_hub_gguf(
|
159 |
+
model_names["hub-gguf"],
|
160 |
+
tokenizer,
|
161 |
+
quantization_method=quantization_method,
|
162 |
+
token=token,
|
163 |
+
)
|
164 |
+
except Exception as e:
|
165 |
+
print(e)
|
llm_toolkit/translation_engine.py
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pandas as pd
|
3 |
+
import torch
|
4 |
+
from unsloth import FastLanguageModel, is_bfloat16_supported
|
5 |
+
from trl import SFTTrainer
|
6 |
+
from transformers import TrainingArguments, TextStreamer
|
7 |
+
from llm_toolkit.translation_utils import *
|
8 |
+
from llamafactory.chat import ChatModel
|
9 |
+
|
10 |
+
print(f"loading {__file__}")
|
11 |
+
|
12 |
+
|
13 |
+
def get_model_names(
|
14 |
+
model_name, save_method="merged_4bit_forced", quantization_method="q5_k_m"
|
15 |
+
):
|
16 |
+
hub_model = model_name.split("/")[-1] + "-MAC-"
|
17 |
+
local_model = "models/" + hub_model
|
18 |
+
|
19 |
+
return {
|
20 |
+
"local": local_model + save_method,
|
21 |
+
"local-gguf": local_model + quantization_method,
|
22 |
+
"hub": hub_model + save_method,
|
23 |
+
"hub-gguf": hub_model + "gguf-" + quantization_method,
|
24 |
+
}
|
25 |
+
|
26 |
+
|
27 |
+
def load_model(
|
28 |
+
model_name,
|
29 |
+
max_seq_length=2048,
|
30 |
+
dtype=None,
|
31 |
+
load_in_4bit=False,
|
32 |
+
template="chatml",
|
33 |
+
adapter_name_or_path=None,
|
34 |
+
):
|
35 |
+
print(f"loading model: {model_name}")
|
36 |
+
|
37 |
+
if adapter_name_or_path:
|
38 |
+
args = dict(
|
39 |
+
model_name_or_path=model_name,
|
40 |
+
adapter_name_or_path=adapter_name_or_path, # load the saved LoRA adapters
|
41 |
+
template=template, # same to the one in training
|
42 |
+
finetuning_type="lora", # same to the one in training
|
43 |
+
quantization_bit=4, # load 4-bit quantized model
|
44 |
+
)
|
45 |
+
chat_model = ChatModel(args)
|
46 |
+
return chat_model.engine.model, chat_model.engine.tokenizer
|
47 |
+
|
48 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
49 |
+
model_name=model_name, # YOUR MODEL YOU USED FOR TRAINING
|
50 |
+
max_seq_length=max_seq_length,
|
51 |
+
dtype=dtype,
|
52 |
+
load_in_4bit=load_in_4bit,
|
53 |
+
trust_remote_code=True,
|
54 |
+
)
|
55 |
+
FastLanguageModel.for_inference(model)
|
56 |
+
|
57 |
+
return model, tokenizer
|
58 |
+
|
59 |
+
|
60 |
+
def test_model(model, tokenizer, prompt):
|
61 |
+
inputs = tokenizer(
|
62 |
+
[prompt],
|
63 |
+
return_tensors="pt",
|
64 |
+
).to("cuda")
|
65 |
+
|
66 |
+
text_streamer = TextStreamer(tokenizer)
|
67 |
+
|
68 |
+
_ = model.generate(
|
69 |
+
**inputs, max_new_tokens=128, streamer=text_streamer, use_cache=True
|
70 |
+
)
|
71 |
+
|
72 |
+
|
73 |
+
def load_trainer(
|
74 |
+
model,
|
75 |
+
tokenizer,
|
76 |
+
dataset,
|
77 |
+
num_train_epochs,
|
78 |
+
max_seq_length=2048,
|
79 |
+
fp16=False,
|
80 |
+
bf16=False,
|
81 |
+
output_dir="./outputs",
|
82 |
+
):
|
83 |
+
model = FastLanguageModel.get_peft_model(
|
84 |
+
model,
|
85 |
+
r=16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
|
86 |
+
target_modules=[
|
87 |
+
"q_proj",
|
88 |
+
"k_proj",
|
89 |
+
"v_proj",
|
90 |
+
"o_proj",
|
91 |
+
"gate_proj",
|
92 |
+
"up_proj",
|
93 |
+
"down_proj",
|
94 |
+
],
|
95 |
+
lora_alpha=16,
|
96 |
+
lora_dropout=0, # Supports any, but = 0 is optimized
|
97 |
+
bias="none", # Supports any, but = "none" is optimized
|
98 |
+
# [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
|
99 |
+
use_gradient_checkpointing="unsloth", # True or "unsloth" for very long context
|
100 |
+
random_state=3407,
|
101 |
+
use_rslora=False, # We support rank stabilized LoRA
|
102 |
+
loftq_config=None, # And LoftQ
|
103 |
+
)
|
104 |
+
|
105 |
+
trainer = SFTTrainer(
|
106 |
+
model=model,
|
107 |
+
tokenizer=tokenizer,
|
108 |
+
train_dataset=dataset,
|
109 |
+
dataset_text_field="text",
|
110 |
+
max_seq_length=max_seq_length,
|
111 |
+
dataset_num_proc=2,
|
112 |
+
packing=False, # Can make training 5x faster for short sequences.
|
113 |
+
args=TrainingArguments(
|
114 |
+
per_device_train_batch_size=2,
|
115 |
+
gradient_accumulation_steps=4,
|
116 |
+
warmup_steps=5,
|
117 |
+
num_train_epochs=num_train_epochs,
|
118 |
+
learning_rate=2e-4,
|
119 |
+
fp16=not is_bfloat16_supported(),
|
120 |
+
bf16=is_bfloat16_supported(),
|
121 |
+
logging_steps=100,
|
122 |
+
optim="adamw_8bit",
|
123 |
+
weight_decay=0.01,
|
124 |
+
lr_scheduler_type="linear",
|
125 |
+
seed=3407,
|
126 |
+
output_dir=output_dir,
|
127 |
+
),
|
128 |
+
)
|
129 |
+
|
130 |
+
return trainer
|
llm_toolkit/translation_utils.py
ADDED
@@ -0,0 +1,420 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import re
|
3 |
+
import pandas as pd
|
4 |
+
import evaluate
|
5 |
+
import seaborn as sns
|
6 |
+
import matplotlib.pyplot as plt
|
7 |
+
from datasets import load_dataset
|
8 |
+
from langchain_openai import ChatOpenAI
|
9 |
+
from langchain_core.prompts import ChatPromptTemplate
|
10 |
+
from tqdm import tqdm
|
11 |
+
|
12 |
+
print(f"loading {__file__}")
|
13 |
+
|
14 |
+
bleu = evaluate.load("bleu")
|
15 |
+
rouge = evaluate.load("rouge")
|
16 |
+
meteor = evaluate.load("meteor")
|
17 |
+
accuracy = evaluate.load("accuracy")
|
18 |
+
|
19 |
+
|
20 |
+
def extract_answer(text, debug=False):
|
21 |
+
if text:
|
22 |
+
# Remove the begin and end tokens
|
23 |
+
text = re.sub(
|
24 |
+
r".*?(assistant|\[/INST\]).+?\b", "", text, flags=re.DOTALL | re.MULTILINE
|
25 |
+
)
|
26 |
+
if debug:
|
27 |
+
print("--------\nstep 1:", text)
|
28 |
+
|
29 |
+
text = re.sub(r"<.+?>.*", "", text, flags=re.DOTALL | re.MULTILINE)
|
30 |
+
if debug:
|
31 |
+
print("--------\nstep 2:", text)
|
32 |
+
|
33 |
+
text = re.sub(
|
34 |
+
r".*?end_header_id\|>\n\n", "", text, flags=re.DOTALL | re.MULTILINE
|
35 |
+
)
|
36 |
+
if debug:
|
37 |
+
print("--------\nstep 3:", text)
|
38 |
+
|
39 |
+
return text
|
40 |
+
|
41 |
+
|
42 |
+
def calc_metrics(references, predictions, debug=False):
|
43 |
+
assert len(references) == len(
|
44 |
+
predictions
|
45 |
+
), f"lengths are difference: {len(references)} != {len(predictions)}"
|
46 |
+
|
47 |
+
predictions = [extract_answer(text) for text in predictions]
|
48 |
+
|
49 |
+
correct = [1 if ref == pred else 0 for ref, pred in zip(references, predictions)]
|
50 |
+
accuracy = sum(correct) / len(references)
|
51 |
+
|
52 |
+
results = {"accuracy": accuracy}
|
53 |
+
if debug:
|
54 |
+
correct_ids = [i for i, c in enumerate(correct) if c == 1]
|
55 |
+
results["correct_ids"] = correct_ids
|
56 |
+
|
57 |
+
results["meteor"] = meteor.compute(predictions=predictions, references=references)[
|
58 |
+
"meteor"
|
59 |
+
]
|
60 |
+
|
61 |
+
results["bleu_scores"] = bleu.compute(
|
62 |
+
predictions=predictions, references=references, max_order=4
|
63 |
+
)
|
64 |
+
results["rouge_scores"] = rouge.compute(
|
65 |
+
predictions=predictions, references=references
|
66 |
+
)
|
67 |
+
return results
|
68 |
+
|
69 |
+
|
70 |
+
def save_results(model_name, results_path, dataset, predictions, debug=False):
|
71 |
+
if not os.path.exists(results_path):
|
72 |
+
# Get the directory part of the file path
|
73 |
+
dir_path = os.path.dirname(results_path)
|
74 |
+
|
75 |
+
# Create all directories in the path (if they don't exist)
|
76 |
+
os.makedirs(dir_path, exist_ok=True)
|
77 |
+
df = dataset.to_pandas()
|
78 |
+
df.drop(columns=["text", "prompt"], inplace=True)
|
79 |
+
else:
|
80 |
+
df = pd.read_csv(results_path, on_bad_lines="warn")
|
81 |
+
|
82 |
+
df[model_name] = predictions
|
83 |
+
|
84 |
+
if debug:
|
85 |
+
print(df.head(1))
|
86 |
+
|
87 |
+
df.to_csv(results_path, index=False)
|
88 |
+
|
89 |
+
|
90 |
+
def load_translation_dataset(data_path, tokenizer=None):
|
91 |
+
train_data_file = data_path.replace(".tsv", "-train.tsv")
|
92 |
+
test_data_file = data_path.replace(".tsv", "-test.tsv")
|
93 |
+
|
94 |
+
if not os.path.exists(train_data_file):
|
95 |
+
print("generating train/test data files")
|
96 |
+
dataset = load_dataset(
|
97 |
+
"csv", data_files=data_path, delimiter="\t", split="train"
|
98 |
+
)
|
99 |
+
print(len(dataset))
|
100 |
+
dataset = dataset.filter(lambda x: x["chinese"] and x["english"])
|
101 |
+
|
102 |
+
datasets = dataset.train_test_split(test_size=0.2)
|
103 |
+
print(len(dataset))
|
104 |
+
|
105 |
+
# Convert to pandas DataFrame
|
106 |
+
train_df = pd.DataFrame(datasets["train"])
|
107 |
+
test_df = pd.DataFrame(datasets["test"])
|
108 |
+
|
109 |
+
# Save to TSV
|
110 |
+
train_df.to_csv(train_data_file, sep="\t", index=False)
|
111 |
+
test_df.to_csv(test_data_file, sep="\t", index=False)
|
112 |
+
|
113 |
+
print("loading train/test data files")
|
114 |
+
datasets = load_dataset(
|
115 |
+
"csv",
|
116 |
+
data_files={"train": train_data_file, "test": test_data_file},
|
117 |
+
delimiter="\t",
|
118 |
+
)
|
119 |
+
|
120 |
+
if tokenizer:
|
121 |
+
translation_prompt = "Please translate the following Chinese text into English and provide only the translated content, nothing else.\n{}"
|
122 |
+
|
123 |
+
def formatting_prompts_func(examples):
|
124 |
+
inputs = examples["chinese"]
|
125 |
+
outputs = examples["english"]
|
126 |
+
|
127 |
+
messages = [
|
128 |
+
{
|
129 |
+
"role": "system",
|
130 |
+
"content": "You are an expert in translating Chinese to English.",
|
131 |
+
},
|
132 |
+
None,
|
133 |
+
]
|
134 |
+
|
135 |
+
model_name = os.getenv("MODEL_NAME")
|
136 |
+
|
137 |
+
if "mistral" in model_name.lower():
|
138 |
+
messages = messages[1:]
|
139 |
+
|
140 |
+
texts = []
|
141 |
+
prompts = []
|
142 |
+
for input, output in zip(inputs, outputs):
|
143 |
+
prompt = translation_prompt.format(input)
|
144 |
+
messages[-1] = {"role": "user", "content": prompt}
|
145 |
+
|
146 |
+
prompt = tokenizer.apply_chat_template(
|
147 |
+
messages, tokenize=False, add_generation_prompt=True
|
148 |
+
)
|
149 |
+
prompts.append(prompt)
|
150 |
+
texts.append(prompt + output + tokenizer.eos_token)
|
151 |
+
return {"text": texts, "prompt": prompts}
|
152 |
+
|
153 |
+
datasets = datasets.map(
|
154 |
+
formatting_prompts_func,
|
155 |
+
batched=True,
|
156 |
+
)
|
157 |
+
|
158 |
+
print(datasets)
|
159 |
+
return datasets
|
160 |
+
|
161 |
+
|
162 |
+
def eval_model(model, tokenizer, eval_dataset):
|
163 |
+
total = len(eval_dataset)
|
164 |
+
predictions = []
|
165 |
+
for i in tqdm(range(total)):
|
166 |
+
inputs = tokenizer(
|
167 |
+
eval_dataset["prompt"][i : i + 1],
|
168 |
+
return_tensors="pt",
|
169 |
+
).to("cuda")
|
170 |
+
|
171 |
+
outputs = model.generate(**inputs, max_new_tokens=4096, use_cache=False)
|
172 |
+
decoded_output = tokenizer.batch_decode(outputs)
|
173 |
+
debug = i == 0
|
174 |
+
decoded_output = [
|
175 |
+
extract_answer(output, debug=debug) for output in decoded_output
|
176 |
+
]
|
177 |
+
predictions.extend(decoded_output)
|
178 |
+
|
179 |
+
return predictions
|
180 |
+
|
181 |
+
|
182 |
+
def save_model(
|
183 |
+
model,
|
184 |
+
tokenizer,
|
185 |
+
include_gguf=True,
|
186 |
+
include_merged=True,
|
187 |
+
publish=True,
|
188 |
+
):
|
189 |
+
try:
|
190 |
+
token = os.getenv("HF_TOKEN") or None
|
191 |
+
model_name = os.getenv("MODEL_NAME")
|
192 |
+
|
193 |
+
save_method = "lora"
|
194 |
+
quantization_method = "q5_k_m"
|
195 |
+
|
196 |
+
model_names = get_model_names(
|
197 |
+
model_name, save_method=save_method, quantization_method=quantization_method
|
198 |
+
)
|
199 |
+
|
200 |
+
model.save_pretrained(model_names["local"])
|
201 |
+
tokenizer.save_pretrained(model_names["local"])
|
202 |
+
|
203 |
+
if publish:
|
204 |
+
model.push_to_hub(
|
205 |
+
model_names["hub"],
|
206 |
+
token=token,
|
207 |
+
)
|
208 |
+
tokenizer.push_to_hub(
|
209 |
+
model_names["hub"],
|
210 |
+
token=token,
|
211 |
+
)
|
212 |
+
|
213 |
+
if include_merged:
|
214 |
+
model.save_pretrained_merged(
|
215 |
+
model_names["local"] + "-merged", tokenizer, save_method=save_method
|
216 |
+
)
|
217 |
+
if publish:
|
218 |
+
model.push_to_hub_merged(
|
219 |
+
model_names["hub"] + "-merged",
|
220 |
+
tokenizer,
|
221 |
+
save_method="lora",
|
222 |
+
token="",
|
223 |
+
)
|
224 |
+
|
225 |
+
if include_gguf:
|
226 |
+
model.save_pretrained_gguf(
|
227 |
+
model_names["local-gguf"],
|
228 |
+
tokenizer,
|
229 |
+
quantization_method=quantization_method,
|
230 |
+
)
|
231 |
+
|
232 |
+
if publish:
|
233 |
+
model.push_to_hub_gguf(
|
234 |
+
model_names["hub-gguf"],
|
235 |
+
tokenizer,
|
236 |
+
quantization_method=quantization_method,
|
237 |
+
token=token,
|
238 |
+
)
|
239 |
+
except Exception as e:
|
240 |
+
print(e)
|
241 |
+
|
242 |
+
|
243 |
+
def get_metrics(df):
|
244 |
+
metrics_df = pd.DataFrame(df.columns.T)[2:]
|
245 |
+
metrics_df.rename(columns={0: "model"}, inplace=True)
|
246 |
+
metrics_df["model"] = metrics_df["model"].apply(lambda x: x.split("/")[-1])
|
247 |
+
metrics_df.reset_index(inplace=True)
|
248 |
+
metrics_df = metrics_df.drop(columns=["index"])
|
249 |
+
|
250 |
+
accuracy = []
|
251 |
+
meteor = []
|
252 |
+
bleu_1 = []
|
253 |
+
rouge_l = []
|
254 |
+
all_metrics = []
|
255 |
+
for col in df.columns[2:]:
|
256 |
+
metrics = calc_metrics(df["english"], df[col], debug=True)
|
257 |
+
print(f"{col}: {metrics}")
|
258 |
+
|
259 |
+
accuracy.append(metrics["accuracy"])
|
260 |
+
meteor.append(metrics["meteor"])
|
261 |
+
bleu_1.append(metrics["bleu_scores"]["bleu"])
|
262 |
+
rouge_l.append(metrics["rouge_scores"]["rougeL"])
|
263 |
+
all_metrics.append(metrics)
|
264 |
+
|
265 |
+
metrics_df["accuracy"] = accuracy
|
266 |
+
metrics_df["meteor"] = meteor
|
267 |
+
metrics_df["bleu_1"] = bleu_1
|
268 |
+
metrics_df["rouge_l"] = rouge_l
|
269 |
+
metrics_df["all_metrics"] = all_metrics
|
270 |
+
|
271 |
+
return metrics_df
|
272 |
+
|
273 |
+
|
274 |
+
def plot_metrics(metrics_df, figsize=(14, 5), ylim=(0, 0.44)):
|
275 |
+
plt.figure(figsize=figsize)
|
276 |
+
df_melted = pd.melt(
|
277 |
+
metrics_df, id_vars="model", value_vars=["meteor", "bleu_1", "rouge_l"]
|
278 |
+
)
|
279 |
+
|
280 |
+
barplot = sns.barplot(x="variable", y="value", hue="model", data=df_melted)
|
281 |
+
|
282 |
+
# Set different hatches for each model
|
283 |
+
hatches = ["/", "\\", "|", "-", "+", "x", "o", "O", ".", "*", "//", "\\\\"]
|
284 |
+
|
285 |
+
# Create a dictionary to map models to hatches
|
286 |
+
model_hatches = {
|
287 |
+
model: hatches[i % len(hatches)]
|
288 |
+
for i, model in enumerate(metrics_df["model"].unique())
|
289 |
+
}
|
290 |
+
|
291 |
+
# Apply hatches based on the model
|
292 |
+
num_vars = len(df_melted["variable"].unique())
|
293 |
+
for i, bar in enumerate(barplot.patches):
|
294 |
+
model = df_melted["model"].iloc[i // num_vars]
|
295 |
+
bar.set_hatch(model_hatches[model])
|
296 |
+
|
297 |
+
# Manually update legend to match the bar hatches
|
298 |
+
handles, labels = barplot.get_legend_handles_labels()
|
299 |
+
for handle, model in zip(handles, metrics_df["model"].unique()):
|
300 |
+
handle.set_hatch(model_hatches[model])
|
301 |
+
|
302 |
+
barplot.set_xticklabels(["METEOR", "BLEU-1", "ROUGE-L"])
|
303 |
+
for p in barplot.patches:
|
304 |
+
if p.get_height() == 0:
|
305 |
+
continue
|
306 |
+
barplot.annotate(
|
307 |
+
f"{p.get_height():.2f}",
|
308 |
+
(p.get_x() + p.get_width() / 2.0, p.get_height()),
|
309 |
+
ha="center",
|
310 |
+
va="center",
|
311 |
+
xytext=(0, 10),
|
312 |
+
textcoords="offset points",
|
313 |
+
)
|
314 |
+
|
315 |
+
barplot.set(ylim=ylim, ylabel="Scores", xlabel="Metrics")
|
316 |
+
plt.legend(bbox_to_anchor=(0.5, -0.1), loc="upper center")
|
317 |
+
plt.show()
|
318 |
+
|
319 |
+
|
320 |
+
def plot_times(perf_df, ylim=0.421):
|
321 |
+
# Adjusted code to put "train-time" bars in red at the bottom
|
322 |
+
|
323 |
+
fig, ax1 = plt.subplots(figsize=(12, 10))
|
324 |
+
|
325 |
+
color_train = "tab:red"
|
326 |
+
color_eval = "orange"
|
327 |
+
ax1.set_xlabel("Models")
|
328 |
+
ax1.set_ylabel("Time (mins)")
|
329 |
+
ax1.set_xticks(range(len(perf_df["model"]))) # Set x-ticks positions
|
330 |
+
ax1.set_xticklabels(perf_df["model"], rotation=90)
|
331 |
+
|
332 |
+
# Plot "train-time" first so it's at the bottom
|
333 |
+
ax1.bar(
|
334 |
+
perf_df["model"],
|
335 |
+
perf_df["train-time(mins)"],
|
336 |
+
color=color_train,
|
337 |
+
label="train-time",
|
338 |
+
)
|
339 |
+
|
340 |
+
# Then, plot "eval-time" on top of "train-time"
|
341 |
+
ax1.bar(
|
342 |
+
perf_df["model"],
|
343 |
+
perf_df["eval-time(mins)"],
|
344 |
+
bottom=perf_df["train-time(mins)"],
|
345 |
+
color=color_eval,
|
346 |
+
label="eval-time",
|
347 |
+
)
|
348 |
+
|
349 |
+
ax1.tick_params(axis="y")
|
350 |
+
ax1.legend(loc="upper left")
|
351 |
+
|
352 |
+
if "meteor" in perf_df.columns:
|
353 |
+
ax2 = ax1.twinx()
|
354 |
+
color_meteor = "tab:blue"
|
355 |
+
ax2.set_ylabel("METEOR", color=color_meteor)
|
356 |
+
ax2.plot(
|
357 |
+
perf_df["model"],
|
358 |
+
perf_df["meteor"],
|
359 |
+
color=color_meteor,
|
360 |
+
marker="o",
|
361 |
+
label="meteor",
|
362 |
+
)
|
363 |
+
ax2.tick_params(axis="y", labelcolor=color_meteor)
|
364 |
+
ax2.legend(loc="upper right")
|
365 |
+
ax2.set_ylim(ax2.get_ylim()[0], ylim)
|
366 |
+
|
367 |
+
# Show numbers in bars
|
368 |
+
for p in ax1.patches:
|
369 |
+
height = p.get_height()
|
370 |
+
if height == 0: # Skip bars with height 0
|
371 |
+
continue
|
372 |
+
ax1.annotate(
|
373 |
+
f"{height:.2f}",
|
374 |
+
(p.get_x() + p.get_width() / 2.0, p.get_y() + height),
|
375 |
+
ha="center",
|
376 |
+
va="center",
|
377 |
+
xytext=(0, -10),
|
378 |
+
textcoords="offset points",
|
379 |
+
)
|
380 |
+
|
381 |
+
fig.tight_layout()
|
382 |
+
plt.show()
|
383 |
+
|
384 |
+
|
385 |
+
def translate_via_llm(text):
|
386 |
+
base_url = os.getenv("OPENAI_BASE_URL") or "http://localhost:8000/v1"
|
387 |
+
llm = ChatOpenAI(
|
388 |
+
model="gpt-4o",
|
389 |
+
temperature=0,
|
390 |
+
max_tokens=None,
|
391 |
+
timeout=None,
|
392 |
+
max_retries=2,
|
393 |
+
base_url=base_url,
|
394 |
+
)
|
395 |
+
|
396 |
+
prompt = ChatPromptTemplate.from_messages(
|
397 |
+
[
|
398 |
+
(
|
399 |
+
"human",
|
400 |
+
"Please translate the following Chinese text into English and provide only the translated content, nothing else.\n{input}",
|
401 |
+
),
|
402 |
+
]
|
403 |
+
)
|
404 |
+
|
405 |
+
chain = prompt | llm
|
406 |
+
response = chain.invoke(
|
407 |
+
{
|
408 |
+
"input": text,
|
409 |
+
}
|
410 |
+
)
|
411 |
+
return response.content
|
412 |
+
|
413 |
+
|
414 |
+
def translate(text, cache_dict):
|
415 |
+
if text in cache_dict:
|
416 |
+
return cache_dict[text]
|
417 |
+
else:
|
418 |
+
translated_text = translate_via_llm(text)
|
419 |
+
cache_dict[text] = translated_text
|
420 |
+
return translated_text
|
llm_toolkit/tune.py
ADDED
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import torch
|
4 |
+
from dotenv import find_dotenv, load_dotenv
|
5 |
+
|
6 |
+
found_dotenv = find_dotenv(".env")
|
7 |
+
|
8 |
+
if len(found_dotenv) == 0:
|
9 |
+
found_dotenv = find_dotenv(".env.example")
|
10 |
+
print(f"loading env vars from: {found_dotenv}")
|
11 |
+
load_dotenv(found_dotenv, override=False)
|
12 |
+
|
13 |
+
path = os.path.dirname(found_dotenv)
|
14 |
+
print(f"Adding {path} to sys.path")
|
15 |
+
sys.path.append(path)
|
16 |
+
|
17 |
+
from llm_toolkit.translation_engine import *
|
18 |
+
from llm_toolkit.translation_utils import *
|
19 |
+
|
20 |
+
|
21 |
+
model_name = os.getenv("MODEL_NAME")
|
22 |
+
load_in_4bit = os.getenv("LOAD_IN_4BIT") == "true"
|
23 |
+
eval_base_model = os.getenv("EVAL_BASE_MODEL") == "true"
|
24 |
+
eval_fine_tuned = os.getenv("EVAL_FINE_TUNED") == "true"
|
25 |
+
save_fine_tuned_model = os.getenv("SAVE_FINE_TUNED") == "true"
|
26 |
+
num_train_epochs = int(os.getenv("NUM_TRAIN_EPOCHS") or 0)
|
27 |
+
data_path = os.getenv("DATA_PATH")
|
28 |
+
results_path = os.getenv("RESULTS_PATH")
|
29 |
+
|
30 |
+
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
|
31 |
+
dtype = (
|
32 |
+
None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
|
33 |
+
)
|
34 |
+
|
35 |
+
print(
|
36 |
+
model_name,
|
37 |
+
load_in_4bit,
|
38 |
+
max_seq_length,
|
39 |
+
num_train_epochs,
|
40 |
+
dtype,
|
41 |
+
data_path,
|
42 |
+
results_path,
|
43 |
+
eval_base_model,
|
44 |
+
eval_fine_tuned,
|
45 |
+
save_fine_tuned_model,
|
46 |
+
)
|
47 |
+
|
48 |
+
gpu_stats = torch.cuda.get_device_properties(0)
|
49 |
+
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
|
50 |
+
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
|
51 |
+
print(f"(1) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
|
52 |
+
print(f"{start_gpu_memory} GB of memory reserved.")
|
53 |
+
|
54 |
+
model, tokenizer = load_model(model_name, load_in_4bit=load_in_4bit)
|
55 |
+
|
56 |
+
gpu_stats = torch.cuda.get_device_properties(0)
|
57 |
+
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
|
58 |
+
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
|
59 |
+
print(f"(2) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
|
60 |
+
print(f"{start_gpu_memory} GB of memory reserved.")
|
61 |
+
|
62 |
+
datasets = load_translation_dataset(data_path, tokenizer)
|
63 |
+
|
64 |
+
if eval_base_model:
|
65 |
+
print("Evaluating base model: " + model_name)
|
66 |
+
predictions = eval_model(model, tokenizer, datasets["test"])
|
67 |
+
|
68 |
+
# calc_metrics(datasets["test"]["english"], predictions, debug=True)
|
69 |
+
|
70 |
+
save_results(
|
71 |
+
model_name,
|
72 |
+
results_path,
|
73 |
+
datasets["test"],
|
74 |
+
predictions,
|
75 |
+
debug=True,
|
76 |
+
)
|
77 |
+
|
78 |
+
gpu_stats = torch.cuda.get_device_properties(0)
|
79 |
+
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
|
80 |
+
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
|
81 |
+
print(f"(3) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
|
82 |
+
print(f"{start_gpu_memory} GB of memory reserved.")
|
83 |
+
|
84 |
+
|
85 |
+
def is_bfloat16_supported():
|
86 |
+
return True
|
87 |
+
|
88 |
+
|
89 |
+
trainer = load_trainer(
|
90 |
+
model,
|
91 |
+
tokenizer,
|
92 |
+
datasets["train"],
|
93 |
+
num_train_epochs,
|
94 |
+
fp16=not is_bfloat16_supported(),
|
95 |
+
bf16=is_bfloat16_supported(),
|
96 |
+
)
|
97 |
+
|
98 |
+
gpu_stats = torch.cuda.get_device_properties(0)
|
99 |
+
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
|
100 |
+
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
|
101 |
+
print(f"(4) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
|
102 |
+
print(f"{start_gpu_memory} GB of memory reserved.")
|
103 |
+
|
104 |
+
trainer_stats = trainer.train()
|
105 |
+
|
106 |
+
# @title Show final memory and time stats
|
107 |
+
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
|
108 |
+
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
|
109 |
+
used_percentage = round(used_memory / max_memory * 100, 3)
|
110 |
+
lora_percentage = round(used_memory_for_lora / max_memory * 100, 3)
|
111 |
+
print(f"(5) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
|
112 |
+
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
|
113 |
+
print(
|
114 |
+
f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training."
|
115 |
+
)
|
116 |
+
print(f"Peak reserved memory = {used_memory} GB.")
|
117 |
+
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
|
118 |
+
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
|
119 |
+
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")
|
120 |
+
|
121 |
+
if eval_fine_tuned:
|
122 |
+
print("Evaluating fine-tuned model: " + model_name)
|
123 |
+
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
|
124 |
+
predictions = eval_model(model, tokenizer, datasets["test"])
|
125 |
+
|
126 |
+
# calc_metrics(datasets["test"]["english"], predictions, debug=True)
|
127 |
+
|
128 |
+
save_results(
|
129 |
+
model_name + "(finetuned)",
|
130 |
+
results_path,
|
131 |
+
datasets["test"],
|
132 |
+
predictions,
|
133 |
+
debug=True,
|
134 |
+
)
|
135 |
+
|
136 |
+
gpu_stats = torch.cuda.get_device_properties(0)
|
137 |
+
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
|
138 |
+
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
|
139 |
+
print(f"(6) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
|
140 |
+
print(f"{start_gpu_memory} GB of memory reserved.")
|
141 |
+
|
142 |
+
if save_fine_tuned_model:
|
143 |
+
save_model(model, tokenizer)
|
notebooks/00_Data_Analysis.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/01_Qwen2-0.5B_Unsloth.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/02_Qwen2-1.5B_Unsloth.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/03_Qwen2-0.5B_1.5B-4bit.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/04_tune-small-no-flash-attn.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/05_tune-small-with-flash-attn.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/06_tune-small-py3.11.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/07_tune-lf-py3.11.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/07r2_tune-lf-py3.11.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/08_eval-lf-py3.11.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
CHANGED
@@ -1 +1,15 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
nltk==3.8.1
|
2 |
+
python-dotenv==1.0.1
|
3 |
+
black==24.4.0
|
4 |
+
evaluate==0.4.2
|
5 |
+
rouge_score==0.1.2
|
6 |
+
pytest==8.2.1
|
7 |
+
seaborn==0.13.2
|
8 |
+
scikit-learn==1.5.0
|
9 |
+
jupyter
|
10 |
+
ipywidgets
|
11 |
+
packaging
|
12 |
+
# triton
|
13 |
+
# xformers
|
14 |
+
langchain_openai==0.1.13
|
15 |
+
wandb==0.17.4
|
results/experiment-1-results.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bfb0c7a3813e9c98c9245c9303b2fb95c1fd7d6a92dd4e0d9d3fe4e4d29a8849
|
3 |
+
size 2072299
|
results/experiment-2-results.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b1c99b9bb0c6539a9ff3c9198d730f110c5b6371cba803e1992802beb13e3600
|
3 |
+
size 2038783
|
results/experiment-3-results.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f0b8dcb783ed847422ca4f2000b5106742b992537f4b84da6b5ca0b4c22bf0dd
|
3 |
+
size 1427300
|
results/mac-results-no-flash-attn.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89144b0a3e727b326be559637312e353208a7e506b7c0c701ce8e4392e4cbb5e
|
3 |
+
size 2129451
|
results/mac-results-with-flash-attn.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c73be2c390511d0a59090b57c53f0a66c0d4c4648c209ef7155aa97ff73c0b9
|
3 |
+
size 1461478
|
results/mac-results.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7eb1c66dd7162f27a969599ddb3695c3ac82a88bff15cd57d7ed00ca86ab19cd
|
3 |
+
size 2072299
|
results/mac-results_final.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aacf61087ae3b1fd622407c75d0a969b232517c7489841da722e0228bb69a310
|
3 |
+
size 2334006
|
results/mac-results_lf-r2.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:25c14d76c8d71ecbce6bc83d641ec4f54f6c0e188fccfcfd8536758a12ed456a
|
3 |
+
size 2442353
|
results/mac-results_lf-r3.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ea9402ad5c87e3b7dcb570cf0a3c0bf33bef093c522d4d2ba6dbf633e21f035
|
3 |
+
size 531603
|
results/mac-results_lf.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c5acc087808de5df6839cbf7b170094c6e63445aab4bea15e4be9564b905eb51
|
3 |
+
size 3236072
|
results/mac-results_py3.11.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4adb0922c02cc435858b4ba44b4cdaaee4afe6fcc8721a795d740c36d8d94c2c
|
3 |
+
size 1463058
|
results/mac-results_v3.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8bfe9ce9720d0cf67ba118d8b2d82f8f6c0bd0f763a8aa00fc1f43f58e544157
|
3 |
+
size 1683953
|
results/model_training_evaluation_times.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5691ccd7fafb765772c2e5da0ada82bd2f3532459dcfed8517565e7cc0d9f1a8
|
3 |
+
size 441
|
scripts/lf-api.sh
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/sh
|
2 |
+
|
3 |
+
BASEDIR=$(dirname "$0")
|
4 |
+
cd $BASEDIR/../llama-factory
|
5 |
+
echo Current Directory:
|
6 |
+
pwd
|
7 |
+
|
8 |
+
API_PORT=8000 llamafactory-cli api $1
|
scripts/tune-large.sh
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/sh
|
2 |
+
|
3 |
+
BASEDIR=$(dirname "$0")
|
4 |
+
cd $BASEDIR
|
5 |
+
echo Current Directory:
|
6 |
+
pwd
|
7 |
+
|
8 |
+
nvidia-smi
|
9 |
+
uname -a
|
10 |
+
cat /etc/os-release
|
11 |
+
lscpu
|
12 |
+
grep MemTotal /proc/meminfo
|
13 |
+
|
14 |
+
# pip install -r requirements.txt
|
15 |
+
# FLASH_ATTENTION_FORCE_BUILD=TRUE pip install --upgrade flash-attn
|
16 |
+
|
17 |
+
# export MODEL_NAME=unsloth/Qwen2-72B-Instruct-bnb-4bit
|
18 |
+
# echo Tuning $MODEL_NAME
|
19 |
+
# python tune.py
|
20 |
+
|
21 |
+
export MODEL_NAME=unsloth/llama-3-70b-Instruct-bnb-4bit
|
22 |
+
echo Tuning $MODEL_NAME
|
23 |
+
python tune.py
|
24 |
+
|
scripts/tune-lf.sh
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/sh
|
2 |
+
|
3 |
+
BASEDIR=$(dirname "$0")
|
4 |
+
cd $BASEDIR/../llama-factory
|
5 |
+
echo Current Directory:
|
6 |
+
pwd
|
7 |
+
|
8 |
+
YAML=$1 python -c 'import os, json, sys, yaml; filename=os.getenv("YAML"); y=yaml.safe_load(open(filename)) ; print(f"{filename}:\n", json.dumps(y, indent=2))'
|
9 |
+
llamafactory-cli train $1
|