File size: 2,620 Bytes
a496d21
9ff5857
a496d21
9ff5857
a496d21
 
 
 
 
 
 
9ff5857
 
a496d21
9ff5857
a496d21
 
 
9ff5857
 
 
 
 
a496d21
 
 
9ff5857
 
a496d21
9ff5857
a496d21
 
 
9ff5857
 
 
 
 
a496d21
 
 
9ff5857
 
a496d21
9ff5857
a496d21
 
 
9ff5857
 
 
 
 
a496d21
 
 
9ff5857
 
a496d21
9ff5857
a496d21
 
 
9ff5857
 
 
 
 
a496d21
 
 
 
9ff5857
a496d21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
{
  "best_metric": 3.6455252170562744,
  "best_model_checkpoint": "/Users/frapadovani/Desktop/babyLM_controlled/models_trained/convergence_french/random_sentence_french/checkpoint-8000",
  "epoch": 0.23687561069493382,
  "eval_steps": 2000,
  "global_step": 8000,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.059218902673733455,
      "grad_norm": 1.0812722444534302,
      "learning_rate": 0.0001,
      "loss": 4.7523,
      "step": 2000
    },
    {
      "epoch": 0.059218902673733455,
      "eval_loss": 4.0236735343933105,
      "eval_runtime": 5.173,
      "eval_samples_per_second": 425.477,
      "eval_steps_per_second": 26.677,
      "step": 2000
    },
    {
      "epoch": 0.11843780534746691,
      "grad_norm": 1.2557106018066406,
      "learning_rate": 0.0001,
      "loss": 3.8958,
      "step": 4000
    },
    {
      "epoch": 0.11843780534746691,
      "eval_loss": 3.8087611198425293,
      "eval_runtime": 5.2666,
      "eval_samples_per_second": 417.913,
      "eval_steps_per_second": 26.203,
      "step": 4000
    },
    {
      "epoch": 0.17765670802120037,
      "grad_norm": 1.3326870203018188,
      "learning_rate": 0.0001,
      "loss": 3.7344,
      "step": 6000
    },
    {
      "epoch": 0.17765670802120037,
      "eval_loss": 3.704986333847046,
      "eval_runtime": 5.2277,
      "eval_samples_per_second": 421.023,
      "eval_steps_per_second": 26.398,
      "step": 6000
    },
    {
      "epoch": 0.23687561069493382,
      "grad_norm": 1.38993239402771,
      "learning_rate": 0.0001,
      "loss": 3.6379,
      "step": 8000
    },
    {
      "epoch": 0.23687561069493382,
      "eval_loss": 3.6455252170562744,
      "eval_runtime": 5.1485,
      "eval_samples_per_second": 427.5,
      "eval_steps_per_second": 26.804,
      "step": 8000
    }
  ],
  "logging_steps": 2000,
  "max_steps": 33773,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 2000,
  "stateful_callbacks": {
    "EarlyStoppingCallback": {
      "args": {
        "early_stopping_patience": 3,
        "early_stopping_threshold": 0.001
      },
      "attributes": {
        "early_stopping_patience_counter": 0
      }
    },
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 155952611328000.0,
  "train_batch_size": 16,
  "trial_name": null,
  "trial_params": null
}