Ning Sun commited on
Commit
6ac943d
·
1 Parent(s): 8c77083

initial upload

Browse files
fold0/config.yaml ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seed_everything: 42
2
+ trainer:
3
+ accelerator: auto
4
+ strategy:
5
+ class_path: lightning.pytorch.strategies.DDPStrategy
6
+ init_args:
7
+ accelerator: null
8
+ parallel_devices: null
9
+ cluster_environment: null
10
+ checkpoint_io: null
11
+ precision_plugin: null
12
+ ddp_comm_state: null
13
+ ddp_comm_hook: null
14
+ ddp_comm_wrapper: null
15
+ model_averaging_period: null
16
+ process_group_backend: null
17
+ timeout: 0:30:00
18
+ start_method: popen
19
+ output_device: null
20
+ dim: 0
21
+ broadcast_buffers: true
22
+ process_group: null
23
+ bucket_cap_mb: 25
24
+ find_unused_parameters: false
25
+ check_reduction: false
26
+ gradient_as_bucket_view: false
27
+ static_graph: false
28
+ delay_all_reduce_named_params: null
29
+ param_to_hook_all_reduce: null
30
+ mixed_precision: null
31
+ device_mesh: null
32
+ devices: auto
33
+ num_nodes: 2
34
+ precision: 32
35
+ logger:
36
+ class_path: lightning.pytorch.loggers.WandbLogger
37
+ init_args:
38
+ name: BLAT_ECOLX_Gonzalez_2019_indels
39
+ save_dir: logs
40
+ version: null
41
+ offline: false
42
+ dir: null
43
+ id: null
44
+ anonymous: null
45
+ project: GBFT_PROTEINFM_DMS
46
+ log_model: false
47
+ experiment: null
48
+ prefix: ''
49
+ checkpoint_name: null
50
+ job_type: null
51
+ config: null
52
+ entity: null
53
+ reinit: null
54
+ tags: null
55
+ group: null
56
+ notes: null
57
+ magic: null
58
+ config_exclude_keys: null
59
+ config_include_keys: null
60
+ mode: null
61
+ allow_val_change: null
62
+ resume: null
63
+ force: null
64
+ tensorboard: null
65
+ sync_tensorboard: null
66
+ monitor_gym: null
67
+ save_code: true
68
+ settings: null
69
+ callbacks:
70
+ - class_path: lightning.pytorch.callbacks.LearningRateMonitor
71
+ init_args:
72
+ logging_interval: step
73
+ log_momentum: false
74
+ log_weight_decay: false
75
+ - class_path: lightning.pytorch.callbacks.ModelCheckpoint # save ckpt at the end of each epoch, and save the best val_mcc ckpt
76
+ init_args:
77
+ dirpath: null
78
+ filename: epoch_{epoch}-val_mcc:{val_spearman:.3f}
79
+ monitor: val_spearman
80
+ verbose: false
81
+ save_last: true
82
+ save_top_k: 1
83
+ save_weights_only: false
84
+ mode: max
85
+ auto_insert_metric_name: true
86
+ every_n_train_steps: null
87
+ train_time_interval: null
88
+ every_n_epochs: 1
89
+ save_on_train_epoch_end: null
90
+ enable_version_counter: true
91
+ - class_path: lightning.pytorch.callbacks.early_stopping.EarlyStopping
92
+ dict_kwargs:
93
+ monitor: val_spearman
94
+ mode: max
95
+ patience: 5
96
+ fast_dev_run: false
97
+ max_epochs: null
98
+ min_epochs: null
99
+ max_steps: 10000
100
+ min_steps: null
101
+ max_time: null
102
+ limit_train_batches: null
103
+ limit_val_batches: null
104
+ limit_test_batches: null
105
+ limit_predict_batches: null
106
+ overfit_batches: 0.0
107
+ val_check_interval: null
108
+ check_val_every_n_epoch: 1
109
+ num_sanity_val_steps: null
110
+ log_every_n_steps: 50
111
+ enable_checkpointing: null
112
+ enable_progress_bar: null
113
+ enable_model_summary: null
114
+ accumulate_grad_batches: 1
115
+ gradient_clip_val: 0.1
116
+ gradient_clip_algorithm: null
117
+ deterministic: null
118
+ benchmark: null
119
+ inference_mode: true
120
+ use_distributed_sampler: true
121
+ profiler:
122
+ class_path: lightning.pytorch.profilers.PyTorchProfiler
123
+ init_args:
124
+ dirpath: null
125
+ filename: null
126
+ group_by_input_shapes: false
127
+ emit_nvtx: false
128
+ export_to_chrome: true
129
+ row_limit: 20
130
+ sort_by_key: null
131
+ record_module_names: true
132
+ table_kwargs: null
133
+ record_shapes: false
134
+ dict_kwargs:
135
+ profile_memory: true
136
+ detect_anomaly: false
137
+ barebones: false
138
+ plugins: null
139
+ sync_batchnorm: false
140
+ reload_dataloaders_every_n_epochs: 0
141
+ default_root_dir: logs
142
+ model:
143
+ class_path: genbio_finetune.tasks.SequenceRegression
144
+ init_args:
145
+ backbone:
146
+ class_path: genbio_finetune.models.proteinfm
147
+ init_args:
148
+ from_scratch: false
149
+ use_peft: true
150
+ save_peft_only: true
151
+ lora_r: 16
152
+ lora_alpha: 32
153
+ lora_dropout: 0.05
154
+ config_overwrites: null
155
+ model_init_args: null
156
+ max_length: 2048
157
+ adapter:
158
+ class_path: genbio_finetune.models.MLPPoolAdapter
159
+ init_args:
160
+ pooling: mean_pooling
161
+ hidden_sizes:
162
+ - 128
163
+ bias: true
164
+ dropout: 0.1
165
+ dropout_in_middle: false
166
+ optimizer:
167
+ class_path: torch.optim.AdamW
168
+ init_args:
169
+ lr: 0.0001
170
+ betas:
171
+ - 0.9
172
+ - 0.95
173
+ eps: 1.0e-08
174
+ weight_decay: 0.01
175
+ amsgrad: false
176
+ maximize: false
177
+ foreach: null
178
+ capturable: false
179
+ differentiable: false
180
+ fused: null
181
+ lr_scheduler:
182
+ class_path: genbio_finetune.lr_schedulers.CosineWithWarmup
183
+ init_args:
184
+ warmup_ratio: 0.05
185
+ strict_loading: true
186
+ reset_optimizer_states: false
187
+ data:
188
+ class_path: genbio_finetune.data.DMSFitnessPrediction
189
+ init_args:
190
+ path: genbio-ai/ProteinGYM-DMS
191
+ mutation_type: indels
192
+ task: BLAT_ECOLX_Gonzalez_2019_indels
193
+ normalize: true
194
+ train_split_name: train
195
+ test_split_files: null
196
+ valid_split_files: null
197
+ random_seed: 42
198
+ batch_size: 2
199
+ shuffle: true
200
+ sampler: null
201
+ num_workers: 0
202
+ pin_memory: true
203
+ persistent_workers: false
204
+ cv_num_folds: 5
205
+ cv_test_fold_id: 0
206
+ cv_enable_val_fold: true
207
+ cv_fold_id_col: fold_id
208
+ ckpt_path: null
fold0/model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:444cce4d074864a21f4e5a7389e77cb278911182534318b732b87423ee629d21
3
+ size 147655384
fold1/config.yaml ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seed_everything: 42
2
+ trainer:
3
+ accelerator: auto
4
+ strategy:
5
+ class_path: lightning.pytorch.strategies.DDPStrategy
6
+ init_args:
7
+ accelerator: null
8
+ parallel_devices: null
9
+ cluster_environment: null
10
+ checkpoint_io: null
11
+ precision_plugin: null
12
+ ddp_comm_state: null
13
+ ddp_comm_hook: null
14
+ ddp_comm_wrapper: null
15
+ model_averaging_period: null
16
+ process_group_backend: null
17
+ timeout: 0:30:00
18
+ start_method: popen
19
+ output_device: null
20
+ dim: 0
21
+ broadcast_buffers: true
22
+ process_group: null
23
+ bucket_cap_mb: 25
24
+ find_unused_parameters: false
25
+ check_reduction: false
26
+ gradient_as_bucket_view: false
27
+ static_graph: false
28
+ delay_all_reduce_named_params: null
29
+ param_to_hook_all_reduce: null
30
+ mixed_precision: null
31
+ device_mesh: null
32
+ devices: auto
33
+ num_nodes: 2
34
+ precision: 32
35
+ logger:
36
+ class_path: lightning.pytorch.loggers.WandbLogger
37
+ init_args:
38
+ name: BLAT_ECOLX_Gonzalez_2019_indels
39
+ save_dir: logs
40
+ version: null
41
+ offline: false
42
+ dir: null
43
+ id: null
44
+ anonymous: null
45
+ project: GBFT_PROTEINFM_DMS
46
+ log_model: false
47
+ experiment: null
48
+ prefix: ''
49
+ checkpoint_name: null
50
+ job_type: null
51
+ config: null
52
+ entity: null
53
+ reinit: null
54
+ tags: null
55
+ group: null
56
+ notes: null
57
+ magic: null
58
+ config_exclude_keys: null
59
+ config_include_keys: null
60
+ mode: null
61
+ allow_val_change: null
62
+ resume: null
63
+ force: null
64
+ tensorboard: null
65
+ sync_tensorboard: null
66
+ monitor_gym: null
67
+ save_code: true
68
+ settings: null
69
+ callbacks:
70
+ - class_path: lightning.pytorch.callbacks.LearningRateMonitor
71
+ init_args:
72
+ logging_interval: step
73
+ log_momentum: false
74
+ log_weight_decay: false
75
+ - class_path: lightning.pytorch.callbacks.ModelCheckpoint # save ckpt at the end of each epoch, and save the best val_mcc ckpt
76
+ init_args:
77
+ dirpath: null
78
+ filename: epoch_{epoch}-val_mcc:{val_spearman:.3f}
79
+ monitor: val_spearman
80
+ verbose: false
81
+ save_last: true
82
+ save_top_k: 1
83
+ save_weights_only: false
84
+ mode: max
85
+ auto_insert_metric_name: true
86
+ every_n_train_steps: null
87
+ train_time_interval: null
88
+ every_n_epochs: 1
89
+ save_on_train_epoch_end: null
90
+ enable_version_counter: true
91
+ - class_path: lightning.pytorch.callbacks.early_stopping.EarlyStopping
92
+ dict_kwargs:
93
+ monitor: val_spearman
94
+ mode: max
95
+ patience: 5
96
+ fast_dev_run: false
97
+ max_epochs: null
98
+ min_epochs: null
99
+ max_steps: 10000
100
+ min_steps: null
101
+ max_time: null
102
+ limit_train_batches: null
103
+ limit_val_batches: null
104
+ limit_test_batches: null
105
+ limit_predict_batches: null
106
+ overfit_batches: 0.0
107
+ val_check_interval: null
108
+ check_val_every_n_epoch: 1
109
+ num_sanity_val_steps: null
110
+ log_every_n_steps: 50
111
+ enable_checkpointing: null
112
+ enable_progress_bar: null
113
+ enable_model_summary: null
114
+ accumulate_grad_batches: 1
115
+ gradient_clip_val: 0.1
116
+ gradient_clip_algorithm: null
117
+ deterministic: null
118
+ benchmark: null
119
+ inference_mode: true
120
+ use_distributed_sampler: true
121
+ profiler:
122
+ class_path: lightning.pytorch.profilers.PyTorchProfiler
123
+ init_args:
124
+ dirpath: null
125
+ filename: null
126
+ group_by_input_shapes: false
127
+ emit_nvtx: false
128
+ export_to_chrome: true
129
+ row_limit: 20
130
+ sort_by_key: null
131
+ record_module_names: true
132
+ table_kwargs: null
133
+ record_shapes: false
134
+ dict_kwargs:
135
+ profile_memory: true
136
+ detect_anomaly: false
137
+ barebones: false
138
+ plugins: null
139
+ sync_batchnorm: false
140
+ reload_dataloaders_every_n_epochs: 0
141
+ default_root_dir: logs
142
+ model:
143
+ class_path: genbio_finetune.tasks.SequenceRegression
144
+ init_args:
145
+ backbone:
146
+ class_path: genbio_finetune.models.proteinfm
147
+ init_args:
148
+ from_scratch: false
149
+ use_peft: true
150
+ save_peft_only: true
151
+ lora_r: 16
152
+ lora_alpha: 32
153
+ lora_dropout: 0.05
154
+ config_overwrites: null
155
+ model_init_args: null
156
+ max_length: 2048
157
+ adapter:
158
+ class_path: genbio_finetune.models.MLPPoolAdapter
159
+ init_args:
160
+ pooling: mean_pooling
161
+ hidden_sizes:
162
+ - 128
163
+ bias: true
164
+ dropout: 0.1
165
+ dropout_in_middle: false
166
+ optimizer:
167
+ class_path: torch.optim.AdamW
168
+ init_args:
169
+ lr: 0.0001
170
+ betas:
171
+ - 0.9
172
+ - 0.95
173
+ eps: 1.0e-08
174
+ weight_decay: 0.01
175
+ amsgrad: false
176
+ maximize: false
177
+ foreach: null
178
+ capturable: false
179
+ differentiable: false
180
+ fused: null
181
+ lr_scheduler:
182
+ class_path: genbio_finetune.lr_schedulers.CosineWithWarmup
183
+ init_args:
184
+ warmup_ratio: 0.05
185
+ strict_loading: true
186
+ reset_optimizer_states: false
187
+ data:
188
+ class_path: genbio_finetune.data.DMSFitnessPrediction
189
+ init_args:
190
+ path: genbio-ai/ProteinGYM-DMS
191
+ mutation_type: indels
192
+ task: BLAT_ECOLX_Gonzalez_2019_indels
193
+ normalize: true
194
+ train_split_name: train
195
+ test_split_files: null
196
+ valid_split_files: null
197
+ random_seed: 42
198
+ batch_size: 2
199
+ shuffle: true
200
+ sampler: null
201
+ num_workers: 0
202
+ pin_memory: true
203
+ persistent_workers: false
204
+ cv_num_folds: 5
205
+ cv_test_fold_id: 1
206
+ cv_enable_val_fold: true
207
+ cv_fold_id_col: fold_id
208
+ ckpt_path: null
fold1/model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09a7f6e5597b190f26e447ad584ee7b85a40bb21b278f85de1dac8d7e8bf2caf
3
+ size 147655192
fold2/config.yaml ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seed_everything: 42
2
+ trainer:
3
+ accelerator: auto
4
+ strategy:
5
+ class_path: lightning.pytorch.strategies.DDPStrategy
6
+ init_args:
7
+ accelerator: null
8
+ parallel_devices: null
9
+ cluster_environment: null
10
+ checkpoint_io: null
11
+ precision_plugin: null
12
+ ddp_comm_state: null
13
+ ddp_comm_hook: null
14
+ ddp_comm_wrapper: null
15
+ model_averaging_period: null
16
+ process_group_backend: null
17
+ timeout: 0:30:00
18
+ start_method: popen
19
+ output_device: null
20
+ dim: 0
21
+ broadcast_buffers: true
22
+ process_group: null
23
+ bucket_cap_mb: 25
24
+ find_unused_parameters: false
25
+ check_reduction: false
26
+ gradient_as_bucket_view: false
27
+ static_graph: false
28
+ delay_all_reduce_named_params: null
29
+ param_to_hook_all_reduce: null
30
+ mixed_precision: null
31
+ device_mesh: null
32
+ devices: auto
33
+ num_nodes: 2
34
+ precision: 32
35
+ logger:
36
+ class_path: lightning.pytorch.loggers.WandbLogger
37
+ init_args:
38
+ name: BLAT_ECOLX_Gonzalez_2019_indels
39
+ save_dir: logs
40
+ version: null
41
+ offline: false
42
+ dir: null
43
+ id: null
44
+ anonymous: null
45
+ project: GBFT_PROTEINFM_DMS
46
+ log_model: false
47
+ experiment: null
48
+ prefix: ''
49
+ checkpoint_name: null
50
+ job_type: null
51
+ config: null
52
+ entity: null
53
+ reinit: null
54
+ tags: null
55
+ group: null
56
+ notes: null
57
+ magic: null
58
+ config_exclude_keys: null
59
+ config_include_keys: null
60
+ mode: null
61
+ allow_val_change: null
62
+ resume: null
63
+ force: null
64
+ tensorboard: null
65
+ sync_tensorboard: null
66
+ monitor_gym: null
67
+ save_code: true
68
+ settings: null
69
+ callbacks:
70
+ - class_path: lightning.pytorch.callbacks.LearningRateMonitor
71
+ init_args:
72
+ logging_interval: step
73
+ log_momentum: false
74
+ log_weight_decay: false
75
+ - class_path: lightning.pytorch.callbacks.ModelCheckpoint # save ckpt at the end of each epoch, and save the best val_mcc ckpt
76
+ init_args:
77
+ dirpath: null
78
+ filename: epoch_{epoch}-val_mcc:{val_spearman:.3f}
79
+ monitor: val_spearman
80
+ verbose: false
81
+ save_last: true
82
+ save_top_k: 1
83
+ save_weights_only: false
84
+ mode: max
85
+ auto_insert_metric_name: true
86
+ every_n_train_steps: null
87
+ train_time_interval: null
88
+ every_n_epochs: 1
89
+ save_on_train_epoch_end: null
90
+ enable_version_counter: true
91
+ - class_path: lightning.pytorch.callbacks.early_stopping.EarlyStopping
92
+ dict_kwargs:
93
+ monitor: val_spearman
94
+ mode: max
95
+ patience: 5
96
+ fast_dev_run: false
97
+ max_epochs: null
98
+ min_epochs: null
99
+ max_steps: 10000
100
+ min_steps: null
101
+ max_time: null
102
+ limit_train_batches: null
103
+ limit_val_batches: null
104
+ limit_test_batches: null
105
+ limit_predict_batches: null
106
+ overfit_batches: 0.0
107
+ val_check_interval: null
108
+ check_val_every_n_epoch: 1
109
+ num_sanity_val_steps: null
110
+ log_every_n_steps: 50
111
+ enable_checkpointing: null
112
+ enable_progress_bar: null
113
+ enable_model_summary: null
114
+ accumulate_grad_batches: 1
115
+ gradient_clip_val: 0.1
116
+ gradient_clip_algorithm: null
117
+ deterministic: null
118
+ benchmark: null
119
+ inference_mode: true
120
+ use_distributed_sampler: true
121
+ profiler:
122
+ class_path: lightning.pytorch.profilers.PyTorchProfiler
123
+ init_args:
124
+ dirpath: null
125
+ filename: null
126
+ group_by_input_shapes: false
127
+ emit_nvtx: false
128
+ export_to_chrome: true
129
+ row_limit: 20
130
+ sort_by_key: null
131
+ record_module_names: true
132
+ table_kwargs: null
133
+ record_shapes: false
134
+ dict_kwargs:
135
+ profile_memory: true
136
+ detect_anomaly: false
137
+ barebones: false
138
+ plugins: null
139
+ sync_batchnorm: false
140
+ reload_dataloaders_every_n_epochs: 0
141
+ default_root_dir: logs
142
+ model:
143
+ class_path: genbio_finetune.tasks.SequenceRegression
144
+ init_args:
145
+ backbone:
146
+ class_path: genbio_finetune.models.proteinfm
147
+ init_args:
148
+ from_scratch: false
149
+ use_peft: true
150
+ save_peft_only: true
151
+ lora_r: 16
152
+ lora_alpha: 32
153
+ lora_dropout: 0.05
154
+ config_overwrites: null
155
+ model_init_args: null
156
+ max_length: 2048
157
+ adapter:
158
+ class_path: genbio_finetune.models.MLPPoolAdapter
159
+ init_args:
160
+ pooling: mean_pooling
161
+ hidden_sizes:
162
+ - 128
163
+ bias: true
164
+ dropout: 0.1
165
+ dropout_in_middle: false
166
+ optimizer:
167
+ class_path: torch.optim.AdamW
168
+ init_args:
169
+ lr: 0.0001
170
+ betas:
171
+ - 0.9
172
+ - 0.95
173
+ eps: 1.0e-08
174
+ weight_decay: 0.01
175
+ amsgrad: false
176
+ maximize: false
177
+ foreach: null
178
+ capturable: false
179
+ differentiable: false
180
+ fused: null
181
+ lr_scheduler:
182
+ class_path: genbio_finetune.lr_schedulers.CosineWithWarmup
183
+ init_args:
184
+ warmup_ratio: 0.05
185
+ strict_loading: true
186
+ reset_optimizer_states: false
187
+ data:
188
+ class_path: genbio_finetune.data.DMSFitnessPrediction
189
+ init_args:
190
+ path: genbio-ai/ProteinGYM-DMS
191
+ mutation_type: indels
192
+ task: BLAT_ECOLX_Gonzalez_2019_indels
193
+ normalize: true
194
+ train_split_name: train
195
+ test_split_files: null
196
+ valid_split_files: null
197
+ random_seed: 42
198
+ batch_size: 2
199
+ shuffle: true
200
+ sampler: null
201
+ num_workers: 0
202
+ pin_memory: true
203
+ persistent_workers: false
204
+ cv_num_folds: 5
205
+ cv_test_fold_id: 2
206
+ cv_enable_val_fold: true
207
+ cv_fold_id_col: fold_id
208
+ ckpt_path: null
fold2/model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f39d7d180a9996038632c1aac19de2f20e84de2377df4c8b9f0b1be8a5c57f0
3
+ size 147655192
fold3/config.yaml ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seed_everything: 42
2
+ trainer:
3
+ accelerator: auto
4
+ strategy:
5
+ class_path: lightning.pytorch.strategies.DDPStrategy
6
+ init_args:
7
+ accelerator: null
8
+ parallel_devices: null
9
+ cluster_environment: null
10
+ checkpoint_io: null
11
+ precision_plugin: null
12
+ ddp_comm_state: null
13
+ ddp_comm_hook: null
14
+ ddp_comm_wrapper: null
15
+ model_averaging_period: null
16
+ process_group_backend: null
17
+ timeout: 0:30:00
18
+ start_method: popen
19
+ output_device: null
20
+ dim: 0
21
+ broadcast_buffers: true
22
+ process_group: null
23
+ bucket_cap_mb: 25
24
+ find_unused_parameters: false
25
+ check_reduction: false
26
+ gradient_as_bucket_view: false
27
+ static_graph: false
28
+ delay_all_reduce_named_params: null
29
+ param_to_hook_all_reduce: null
30
+ mixed_precision: null
31
+ device_mesh: null
32
+ devices: auto
33
+ num_nodes: 2
34
+ precision: 32
35
+ logger:
36
+ class_path: lightning.pytorch.loggers.WandbLogger
37
+ init_args:
38
+ name: BLAT_ECOLX_Gonzalez_2019_indels
39
+ save_dir: logs
40
+ version: null
41
+ offline: false
42
+ dir: null
43
+ id: null
44
+ anonymous: null
45
+ project: GBFT_PROTEINFM_DMS
46
+ log_model: false
47
+ experiment: null
48
+ prefix: ''
49
+ checkpoint_name: null
50
+ job_type: null
51
+ config: null
52
+ entity: null
53
+ reinit: null
54
+ tags: null
55
+ group: null
56
+ notes: null
57
+ magic: null
58
+ config_exclude_keys: null
59
+ config_include_keys: null
60
+ mode: null
61
+ allow_val_change: null
62
+ resume: null
63
+ force: null
64
+ tensorboard: null
65
+ sync_tensorboard: null
66
+ monitor_gym: null
67
+ save_code: true
68
+ settings: null
69
+ callbacks:
70
+ - class_path: lightning.pytorch.callbacks.LearningRateMonitor
71
+ init_args:
72
+ logging_interval: step
73
+ log_momentum: false
74
+ log_weight_decay: false
75
+ - class_path: lightning.pytorch.callbacks.ModelCheckpoint # save ckpt at the end of each epoch, and save the best val_mcc ckpt
76
+ init_args:
77
+ dirpath: null
78
+ filename: epoch_{epoch}-val_mcc:{val_spearman:.3f}
79
+ monitor: val_spearman
80
+ verbose: false
81
+ save_last: true
82
+ save_top_k: 1
83
+ save_weights_only: false
84
+ mode: max
85
+ auto_insert_metric_name: true
86
+ every_n_train_steps: null
87
+ train_time_interval: null
88
+ every_n_epochs: 1
89
+ save_on_train_epoch_end: null
90
+ enable_version_counter: true
91
+ - class_path: lightning.pytorch.callbacks.early_stopping.EarlyStopping
92
+ dict_kwargs:
93
+ monitor: val_spearman
94
+ mode: max
95
+ patience: 5
96
+ fast_dev_run: false
97
+ max_epochs: null
98
+ min_epochs: null
99
+ max_steps: 10000
100
+ min_steps: null
101
+ max_time: null
102
+ limit_train_batches: null
103
+ limit_val_batches: null
104
+ limit_test_batches: null
105
+ limit_predict_batches: null
106
+ overfit_batches: 0.0
107
+ val_check_interval: null
108
+ check_val_every_n_epoch: 1
109
+ num_sanity_val_steps: null
110
+ log_every_n_steps: 50
111
+ enable_checkpointing: null
112
+ enable_progress_bar: null
113
+ enable_model_summary: null
114
+ accumulate_grad_batches: 1
115
+ gradient_clip_val: 0.1
116
+ gradient_clip_algorithm: null
117
+ deterministic: null
118
+ benchmark: null
119
+ inference_mode: true
120
+ use_distributed_sampler: true
121
+ profiler:
122
+ class_path: lightning.pytorch.profilers.PyTorchProfiler
123
+ init_args:
124
+ dirpath: null
125
+ filename: null
126
+ group_by_input_shapes: false
127
+ emit_nvtx: false
128
+ export_to_chrome: true
129
+ row_limit: 20
130
+ sort_by_key: null
131
+ record_module_names: true
132
+ table_kwargs: null
133
+ record_shapes: false
134
+ dict_kwargs:
135
+ profile_memory: true
136
+ detect_anomaly: false
137
+ barebones: false
138
+ plugins: null
139
+ sync_batchnorm: false
140
+ reload_dataloaders_every_n_epochs: 0
141
+ default_root_dir: logs
142
+ model:
143
+ class_path: genbio_finetune.tasks.SequenceRegression
144
+ init_args:
145
+ backbone:
146
+ class_path: genbio_finetune.models.proteinfm
147
+ init_args:
148
+ from_scratch: false
149
+ use_peft: true
150
+ save_peft_only: true
151
+ lora_r: 16
152
+ lora_alpha: 32
153
+ lora_dropout: 0.05
154
+ config_overwrites: null
155
+ model_init_args: null
156
+ max_length: 2048
157
+ adapter:
158
+ class_path: genbio_finetune.models.MLPPoolAdapter
159
+ init_args:
160
+ pooling: mean_pooling
161
+ hidden_sizes:
162
+ - 128
163
+ bias: true
164
+ dropout: 0.1
165
+ dropout_in_middle: false
166
+ optimizer:
167
+ class_path: torch.optim.AdamW
168
+ init_args:
169
+ lr: 0.0001
170
+ betas:
171
+ - 0.9
172
+ - 0.95
173
+ eps: 1.0e-08
174
+ weight_decay: 0.01
175
+ amsgrad: false
176
+ maximize: false
177
+ foreach: null
178
+ capturable: false
179
+ differentiable: false
180
+ fused: null
181
+ lr_scheduler:
182
+ class_path: genbio_finetune.lr_schedulers.CosineWithWarmup
183
+ init_args:
184
+ warmup_ratio: 0.05
185
+ strict_loading: true
186
+ reset_optimizer_states: false
187
+ data:
188
+ class_path: genbio_finetune.data.DMSFitnessPrediction
189
+ init_args:
190
+ path: genbio-ai/ProteinGYM-DMS
191
+ mutation_type: indels
192
+ task: BLAT_ECOLX_Gonzalez_2019_indels
193
+ normalize: true
194
+ train_split_name: train
195
+ test_split_files: null
196
+ valid_split_files: null
197
+ random_seed: 42
198
+ batch_size: 2
199
+ shuffle: true
200
+ sampler: null
201
+ num_workers: 0
202
+ pin_memory: true
203
+ persistent_workers: false
204
+ cv_num_folds: 5
205
+ cv_test_fold_id: 3
206
+ cv_enable_val_fold: true
207
+ cv_fold_id_col: fold_id
208
+ ckpt_path: null
fold3/model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b607eaff29156c240801bc9ed5840c8947f986c96cb2a71031bd2ebf87b5917
3
+ size 147655192
fold4/config.yaml ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seed_everything: 42
2
+ trainer:
3
+ accelerator: auto
4
+ strategy:
5
+ class_path: lightning.pytorch.strategies.DDPStrategy
6
+ init_args:
7
+ accelerator: null
8
+ parallel_devices: null
9
+ cluster_environment: null
10
+ checkpoint_io: null
11
+ precision_plugin: null
12
+ ddp_comm_state: null
13
+ ddp_comm_hook: null
14
+ ddp_comm_wrapper: null
15
+ model_averaging_period: null
16
+ process_group_backend: null
17
+ timeout: 0:30:00
18
+ start_method: popen
19
+ output_device: null
20
+ dim: 0
21
+ broadcast_buffers: true
22
+ process_group: null
23
+ bucket_cap_mb: 25
24
+ find_unused_parameters: false
25
+ check_reduction: false
26
+ gradient_as_bucket_view: false
27
+ static_graph: false
28
+ delay_all_reduce_named_params: null
29
+ param_to_hook_all_reduce: null
30
+ mixed_precision: null
31
+ device_mesh: null
32
+ devices: auto
33
+ num_nodes: 2
34
+ precision: 32
35
+ logger:
36
+ class_path: lightning.pytorch.loggers.WandbLogger
37
+ init_args:
38
+ name: BLAT_ECOLX_Gonzalez_2019_indels
39
+ save_dir: logs
40
+ version: null
41
+ offline: false
42
+ dir: null
43
+ id: null
44
+ anonymous: null
45
+ project: GBFT_PROTEINFM_DMS
46
+ log_model: false
47
+ experiment: null
48
+ prefix: ''
49
+ checkpoint_name: null
50
+ job_type: null
51
+ config: null
52
+ entity: null
53
+ reinit: null
54
+ tags: null
55
+ group: null
56
+ notes: null
57
+ magic: null
58
+ config_exclude_keys: null
59
+ config_include_keys: null
60
+ mode: null
61
+ allow_val_change: null
62
+ resume: null
63
+ force: null
64
+ tensorboard: null
65
+ sync_tensorboard: null
66
+ monitor_gym: null
67
+ save_code: true
68
+ settings: null
69
+ callbacks:
70
+ - class_path: lightning.pytorch.callbacks.LearningRateMonitor
71
+ init_args:
72
+ logging_interval: step
73
+ log_momentum: false
74
+ log_weight_decay: false
75
+ - class_path: lightning.pytorch.callbacks.ModelCheckpoint # save ckpt at the end of each epoch, and save the best val_mcc ckpt
76
+ init_args:
77
+ dirpath: null
78
+ filename: epoch_{epoch}-val_mcc:{val_spearman:.3f}
79
+ monitor: val_spearman
80
+ verbose: false
81
+ save_last: true
82
+ save_top_k: 1
83
+ save_weights_only: false
84
+ mode: max
85
+ auto_insert_metric_name: true
86
+ every_n_train_steps: null
87
+ train_time_interval: null
88
+ every_n_epochs: 1
89
+ save_on_train_epoch_end: null
90
+ enable_version_counter: true
91
+ - class_path: lightning.pytorch.callbacks.early_stopping.EarlyStopping
92
+ dict_kwargs:
93
+ monitor: val_spearman
94
+ mode: max
95
+ patience: 5
96
+ fast_dev_run: false
97
+ max_epochs: null
98
+ min_epochs: null
99
+ max_steps: 10000
100
+ min_steps: null
101
+ max_time: null
102
+ limit_train_batches: null
103
+ limit_val_batches: null
104
+ limit_test_batches: null
105
+ limit_predict_batches: null
106
+ overfit_batches: 0.0
107
+ val_check_interval: null
108
+ check_val_every_n_epoch: 1
109
+ num_sanity_val_steps: null
110
+ log_every_n_steps: 50
111
+ enable_checkpointing: null
112
+ enable_progress_bar: null
113
+ enable_model_summary: null
114
+ accumulate_grad_batches: 1
115
+ gradient_clip_val: 0.1
116
+ gradient_clip_algorithm: null
117
+ deterministic: null
118
+ benchmark: null
119
+ inference_mode: true
120
+ use_distributed_sampler: true
121
+ profiler:
122
+ class_path: lightning.pytorch.profilers.PyTorchProfiler
123
+ init_args:
124
+ dirpath: null
125
+ filename: null
126
+ group_by_input_shapes: false
127
+ emit_nvtx: false
128
+ export_to_chrome: true
129
+ row_limit: 20
130
+ sort_by_key: null
131
+ record_module_names: true
132
+ table_kwargs: null
133
+ record_shapes: false
134
+ dict_kwargs:
135
+ profile_memory: true
136
+ detect_anomaly: false
137
+ barebones: false
138
+ plugins: null
139
+ sync_batchnorm: false
140
+ reload_dataloaders_every_n_epochs: 0
141
+ default_root_dir: logs
142
+ model:
143
+ class_path: genbio_finetune.tasks.SequenceRegression
144
+ init_args:
145
+ backbone:
146
+ class_path: genbio_finetune.models.proteinfm
147
+ init_args:
148
+ from_scratch: false
149
+ use_peft: true
150
+ save_peft_only: true
151
+ lora_r: 16
152
+ lora_alpha: 32
153
+ lora_dropout: 0.05
154
+ config_overwrites: null
155
+ model_init_args: null
156
+ max_length: 2048
157
+ adapter:
158
+ class_path: genbio_finetune.models.MLPPoolAdapter
159
+ init_args:
160
+ pooling: mean_pooling
161
+ hidden_sizes:
162
+ - 128
163
+ bias: true
164
+ dropout: 0.1
165
+ dropout_in_middle: false
166
+ optimizer:
167
+ class_path: torch.optim.AdamW
168
+ init_args:
169
+ lr: 0.0001
170
+ betas:
171
+ - 0.9
172
+ - 0.95
173
+ eps: 1.0e-08
174
+ weight_decay: 0.01
175
+ amsgrad: false
176
+ maximize: false
177
+ foreach: null
178
+ capturable: false
179
+ differentiable: false
180
+ fused: null
181
+ lr_scheduler:
182
+ class_path: genbio_finetune.lr_schedulers.CosineWithWarmup
183
+ init_args:
184
+ warmup_ratio: 0.05
185
+ strict_loading: true
186
+ reset_optimizer_states: false
187
+ data:
188
+ class_path: genbio_finetune.data.DMSFitnessPrediction
189
+ init_args:
190
+ path: genbio-ai/ProteinGYM-DMS
191
+ mutation_type: indels
192
+ task: BLAT_ECOLX_Gonzalez_2019_indels
193
+ normalize: true
194
+ train_split_name: train
195
+ test_split_files: null
196
+ valid_split_files: null
197
+ random_seed: 42
198
+ batch_size: 2
199
+ shuffle: true
200
+ sampler: null
201
+ num_workers: 0
202
+ pin_memory: true
203
+ persistent_workers: false
204
+ cv_num_folds: 5
205
+ cv_test_fold_id: 4
206
+ cv_enable_val_fold: true
207
+ cv_fold_id_col: fold_id
208
+ ckpt_path: null
fold4/model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7062398fd8c838ea432c0949c8cf8c53fbd93ad36fc8ecb18a7bb2a5373e6a5a
3
+ size 147655192