Spaces:
Build error
Build error
tuning mistral cn
Browse files
llama-factory/saves/Llama3.1-70B-Chinese-Chat/trainer_log.jsonl
CHANGED
@@ -21,3 +21,27 @@
|
|
21 |
{"current_steps": 95, "total_steps": 350, "loss": 0.2454, "learning_rate": 9.131193871579975e-05, "epoch": 0.5401563610518835, "percentage": 27.14, "elapsed_time": "4:29:36", "remaining_time": "12:03:41", "throughput": "0.00", "total_tokens": 0}
|
22 |
{"current_steps": 100, "total_steps": 350, "loss": 0.3725, "learning_rate": 8.985662536114613e-05, "epoch": 0.5685856432125089, "percentage": 28.57, "elapsed_time": "4:40:59", "remaining_time": "11:42:29", "throughput": "0.00", "total_tokens": 0}
|
23 |
{"current_steps": 105, "total_steps": 350, "loss": 0.2387, "learning_rate": 8.83022221559489e-05, "epoch": 0.5970149253731343, "percentage": 30.0, "elapsed_time": "4:52:18", "remaining_time": "11:22:04", "throughput": "0.00", "total_tokens": 0}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
{"current_steps": 95, "total_steps": 350, "loss": 0.2454, "learning_rate": 9.131193871579975e-05, "epoch": 0.5401563610518835, "percentage": 27.14, "elapsed_time": "4:29:36", "remaining_time": "12:03:41", "throughput": "0.00", "total_tokens": 0}
|
22 |
{"current_steps": 100, "total_steps": 350, "loss": 0.3725, "learning_rate": 8.985662536114613e-05, "epoch": 0.5685856432125089, "percentage": 28.57, "elapsed_time": "4:40:59", "remaining_time": "11:42:29", "throughput": "0.00", "total_tokens": 0}
|
23 |
{"current_steps": 105, "total_steps": 350, "loss": 0.2387, "learning_rate": 8.83022221559489e-05, "epoch": 0.5970149253731343, "percentage": 30.0, "elapsed_time": "4:52:18", "remaining_time": "11:22:04", "throughput": "0.00", "total_tokens": 0}
|
24 |
+
{"current_steps": 105, "total_steps": 350, "eval_loss": 0.23953676223754883, "epoch": 0.5970149253731343, "percentage": 30.0, "elapsed_time": "5:18:59", "remaining_time": "12:24:19", "throughput": "0.00", "total_tokens": 0}
|
25 |
+
{"current_steps": 110, "total_steps": 350, "loss": 0.2324, "learning_rate": 8.665259359149132e-05, "epoch": 0.6254442075337597, "percentage": 31.43, "elapsed_time": "5:30:28", "remaining_time": "12:01:02", "throughput": "0.00", "total_tokens": 0}
|
26 |
+
{"current_steps": 115, "total_steps": 350, "loss": 0.2409, "learning_rate": 8.491184090430364e-05, "epoch": 0.6538734896943852, "percentage": 32.86, "elapsed_time": "5:41:50", "remaining_time": "11:38:31", "throughput": "0.00", "total_tokens": 0}
|
27 |
+
{"current_steps": 120, "total_steps": 350, "loss": 0.2512, "learning_rate": 8.308429187984297e-05, "epoch": 0.6823027718550106, "percentage": 34.29, "elapsed_time": "5:53:15", "remaining_time": "11:17:05", "throughput": "0.00", "total_tokens": 0}
|
28 |
+
{"current_steps": 125, "total_steps": 350, "loss": 0.2347, "learning_rate": 8.117449009293668e-05, "epoch": 0.7107320540156361, "percentage": 35.71, "elapsed_time": "6:04:36", "remaining_time": "10:56:17", "throughput": "0.00", "total_tokens": 0}
|
29 |
+
{"current_steps": 130, "total_steps": 350, "loss": 0.2357, "learning_rate": 7.91871836117395e-05, "epoch": 0.7391613361762616, "percentage": 37.14, "elapsed_time": "6:16:03", "remaining_time": "10:36:23", "throughput": "0.00", "total_tokens": 0}
|
30 |
+
{"current_steps": 135, "total_steps": 350, "loss": 0.2607, "learning_rate": 7.712731319328798e-05, "epoch": 0.767590618336887, "percentage": 38.57, "elapsed_time": "6:27:28", "remaining_time": "10:17:04", "throughput": "0.00", "total_tokens": 0}
|
31 |
+
{"current_steps": 140, "total_steps": 350, "loss": 0.2508, "learning_rate": 7.500000000000001e-05, "epoch": 0.7960199004975125, "percentage": 40.0, "elapsed_time": "6:38:57", "remaining_time": "9:58:25", "throughput": "0.00", "total_tokens": 0}
|
32 |
+
{"current_steps": 140, "total_steps": 350, "eval_loss": 0.22614409029483795, "epoch": 0.7960199004975125, "percentage": 40.0, "elapsed_time": "7:05:38", "remaining_time": "10:38:28", "throughput": "0.00", "total_tokens": 0}
|
33 |
+
{"current_steps": 145, "total_steps": 350, "loss": 0.2338, "learning_rate": 7.281053286765815e-05, "epoch": 0.8244491826581379, "percentage": 41.43, "elapsed_time": "7:17:07", "remaining_time": "10:18:00", "throughput": "0.00", "total_tokens": 0}
|
34 |
+
{"current_steps": 150, "total_steps": 350, "loss": 0.2547, "learning_rate": 7.056435515653059e-05, "epoch": 0.8528784648187633, "percentage": 42.86, "elapsed_time": "7:28:28", "remaining_time": "9:57:58", "throughput": "0.00", "total_tokens": 0}
|
35 |
+
{"current_steps": 155, "total_steps": 350, "loss": 0.2481, "learning_rate": 6.826705121831976e-05, "epoch": 0.8813077469793887, "percentage": 44.29, "elapsed_time": "7:39:56", "remaining_time": "9:38:38", "throughput": "0.00", "total_tokens": 0}
|
36 |
+
{"current_steps": 160, "total_steps": 350, "loss": 0.2431, "learning_rate": 6.592433251258423e-05, "epoch": 0.9097370291400142, "percentage": 45.71, "elapsed_time": "7:51:18", "remaining_time": "9:19:40", "throughput": "0.00", "total_tokens": 0}
|
37 |
+
{"current_steps": 165, "total_steps": 350, "loss": 0.228, "learning_rate": 6.354202340715026e-05, "epoch": 0.9381663113006397, "percentage": 47.14, "elapsed_time": "8:02:39", "remaining_time": "9:01:09", "throughput": "0.00", "total_tokens": 0}
|
38 |
+
{"current_steps": 170, "total_steps": 350, "loss": 0.2229, "learning_rate": 6.112604669781572e-05, "epoch": 0.9665955934612651, "percentage": 48.57, "elapsed_time": "8:14:06", "remaining_time": "8:43:09", "throughput": "0.00", "total_tokens": 0}
|
39 |
+
{"current_steps": 175, "total_steps": 350, "loss": 0.2356, "learning_rate": 5.868240888334653e-05, "epoch": 0.9950248756218906, "percentage": 50.0, "elapsed_time": "8:25:33", "remaining_time": "8:25:33", "throughput": "0.00", "total_tokens": 0}
|
40 |
+
{"current_steps": 175, "total_steps": 350, "eval_loss": 0.21728534996509552, "epoch": 0.9950248756218906, "percentage": 50.0, "elapsed_time": "8:52:16", "remaining_time": "8:52:16", "throughput": "0.00", "total_tokens": 0}
|
41 |
+
{"current_steps": 180, "total_steps": 350, "loss": 0.2172, "learning_rate": 5.621718523237427e-05, "epoch": 1.023454157782516, "percentage": 51.43, "elapsed_time": "9:03:32", "remaining_time": "8:33:21", "throughput": "0.00", "total_tokens": 0}
|
42 |
+
{"current_steps": 185, "total_steps": 350, "loss": 0.2047, "learning_rate": 5.373650467932122e-05, "epoch": 1.0518834399431414, "percentage": 52.86, "elapsed_time": "9:14:59", "remaining_time": "8:14:59", "throughput": "0.00", "total_tokens": 0}
|
43 |
+
{"current_steps": 190, "total_steps": 350, "loss": 0.2212, "learning_rate": 5.124653458690365e-05, "epoch": 1.080312722103767, "percentage": 54.29, "elapsed_time": "9:26:29", "remaining_time": "7:57:02", "throughput": "0.00", "total_tokens": 0}
|
44 |
+
{"current_steps": 195, "total_steps": 350, "loss": 0.211, "learning_rate": 4.875346541309637e-05, "epoch": 1.1087420042643923, "percentage": 55.71, "elapsed_time": "9:37:51", "remaining_time": "7:39:19", "throughput": "0.00", "total_tokens": 0}
|
45 |
+
{"current_steps": 200, "total_steps": 350, "loss": 0.2332, "learning_rate": 4.626349532067879e-05, "epoch": 1.1371712864250179, "percentage": 57.14, "elapsed_time": "9:49:13", "remaining_time": "7:21:54", "throughput": "0.00", "total_tokens": 0}
|
46 |
+
{"current_steps": 205, "total_steps": 350, "loss": 0.2216, "learning_rate": 4.378281476762576e-05, "epoch": 1.1656005685856432, "percentage": 58.57, "elapsed_time": "10:00:34", "remaining_time": "7:04:47", "throughput": "0.00", "total_tokens": 0}
|
47 |
+
{"current_steps": 210, "total_steps": 350, "loss": 0.2079, "learning_rate": 4.131759111665349e-05, "epoch": 1.1940298507462686, "percentage": 60.0, "elapsed_time": "10:11:55", "remaining_time": "6:47:56", "throughput": "0.00", "total_tokens": 0}
|
llm_toolkit/setup_lf.py
CHANGED
@@ -56,5 +56,5 @@ file.close()
|
|
56 |
y = yaml.safe_load(open(filename))
|
57 |
print(f"{filename}:\n", json.dumps(y, indent=2))
|
58 |
|
59 |
-
dataset = load_alpaca_data(data_path, using_p1=False)
|
60 |
-
print_row_details(dataset, [0, -1])
|
|
|
56 |
y = yaml.safe_load(open(filename))
|
57 |
print(f"{filename}:\n", json.dumps(y, indent=2))
|
58 |
|
59 |
+
#dataset = load_alpaca_data(data_path, using_p1=False)
|
60 |
+
#print_row_details(dataset, [0, -1])
|
scripts/tune-lf_v2.sh
CHANGED
@@ -9,7 +9,7 @@ export ORG_NAME=$1
|
|
9 |
export MODEL_NAME=$2
|
10 |
export CHAT_TEMPLATE=$3
|
11 |
export DATA_PATH=../datasets/mgtv
|
12 |
-
export YAML=config/
|
13 |
|
14 |
export PYTORCH_CUDA_ALLOC_CONF="expandable_segments:True"
|
15 |
|
|
|
9 |
export MODEL_NAME=$2
|
10 |
export CHAT_TEMPLATE=$3
|
11 |
export DATA_PATH=../datasets/mgtv
|
12 |
+
export YAML=config/mgtv_template.yaml
|
13 |
|
14 |
export PYTORCH_CUDA_ALLOC_CONF="expandable_segments:True"
|
15 |
|
scripts/tune-lf_v2_4bit.sh
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/sh
|
2 |
+
|
3 |
+
BASEDIR=$(dirname "$0")
|
4 |
+
cd $BASEDIR/../llama-factory/
|
5 |
+
echo Current Directory:
|
6 |
+
pwd
|
7 |
+
|
8 |
+
export ORG_NAME=$1
|
9 |
+
export MODEL_NAME=$2
|
10 |
+
export CHAT_TEMPLATE=$3
|
11 |
+
export DATA_PATH=../datasets/mgtv
|
12 |
+
export YAML=config/mgtv_template_4bit.yaml
|
13 |
+
|
14 |
+
export PYTORCH_CUDA_ALLOC_CONF="expandable_segments:True"
|
15 |
+
|
16 |
+
python ../llm_toolkit/setup_lf.py
|
17 |
+
llamafactory-cli train config/models/$MODEL_NAME.yaml
|
scripts/tune-mgtv-4bit.sh
CHANGED
@@ -19,7 +19,7 @@ export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
|
|
19 |
|
20 |
export LOGICAL_REASONING_RESULTS_PATH=results/mgtv-results_4bit.csv
|
21 |
|
22 |
-
$BASEDIR/scripts/tune-
|
23 |
|
24 |
-
$BASEDIR/scripts/tune-
|
25 |
|
|
|
19 |
|
20 |
export LOGICAL_REASONING_RESULTS_PATH=results/mgtv-results_4bit.csv
|
21 |
|
22 |
+
$BASEDIR/scripts/tune-lf_v2_4bit.sh Qwen Qwen2-72B-Instruct qwen
|
23 |
|
24 |
+
$BASEDIR/scripts/tune-lf_v2_4bit.sh shenzhi-wang Llama3.1-70B-Chinese-Chat llama3
|
25 |
|
scripts/tune-mgtv.sh
CHANGED
@@ -1 +1 @@
|
|
1 |
-
tune-mgtv-
|
|
|
1 |
+
tune-mgtv-bf16.sh
|