Ning Sun commited on
Commit
3f955da
1 Parent(s): 5036c6f

initial upload

Browse files
fold0/config.yaml ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seed_everything: 42
2
+ trainer:
3
+ accelerator: auto
4
+ strategy:
5
+ class_path: lightning.pytorch.strategies.DDPStrategy
6
+ init_args:
7
+ accelerator: null
8
+ parallel_devices: null
9
+ cluster_environment: null
10
+ checkpoint_io: null
11
+ precision_plugin: null
12
+ ddp_comm_state: null
13
+ ddp_comm_hook: null
14
+ ddp_comm_wrapper: null
15
+ model_averaging_period: null
16
+ process_group_backend: null
17
+ timeout: 0:30:00
18
+ start_method: popen
19
+ output_device: null
20
+ dim: 0
21
+ broadcast_buffers: true
22
+ process_group: null
23
+ bucket_cap_mb: 25
24
+ find_unused_parameters: false
25
+ check_reduction: false
26
+ gradient_as_bucket_view: false
27
+ static_graph: false
28
+ delay_all_reduce_named_params: null
29
+ param_to_hook_all_reduce: null
30
+ mixed_precision: null
31
+ device_mesh: null
32
+ devices: auto
33
+ num_nodes: 2
34
+ precision: 32
35
+ logger:
36
+ class_path: lightning.pytorch.loggers.WandbLogger
37
+ init_args:
38
+ name: B1LPA6_ECOSM_Russ_2020_indels
39
+ save_dir: logs
40
+ version: null
41
+ offline: false
42
+ dir: null
43
+ id: null
44
+ anonymous: null
45
+ project: GBFT_PROTEINFM_DMS
46
+ log_model: false
47
+ experiment: null
48
+ prefix: ''
49
+ checkpoint_name: null
50
+ job_type: null
51
+ config: null
52
+ entity: null
53
+ reinit: null
54
+ tags: null
55
+ group: null
56
+ notes: null
57
+ magic: null
58
+ config_exclude_keys: null
59
+ config_include_keys: null
60
+ mode: null
61
+ allow_val_change: null
62
+ resume: null
63
+ force: null
64
+ tensorboard: null
65
+ sync_tensorboard: null
66
+ monitor_gym: null
67
+ save_code: true
68
+ settings: null
69
+ callbacks:
70
+ - class_path: lightning.pytorch.callbacks.LearningRateMonitor
71
+ init_args:
72
+ logging_interval: step
73
+ log_momentum: false
74
+ log_weight_decay: false
75
+ - class_path: lightning.pytorch.callbacks.ModelCheckpoint # save ckpt at the end of each epoch, and save the best val_mcc ckpt
76
+ init_args:
77
+ dirpath: null
78
+ filename: epoch_{epoch}-val_mcc:{val_spearman:.3f}
79
+ monitor: val_spearman
80
+ verbose: false
81
+ save_last: true
82
+ save_top_k: 1
83
+ save_weights_only: false
84
+ mode: max
85
+ auto_insert_metric_name: true
86
+ every_n_train_steps: null
87
+ train_time_interval: null
88
+ every_n_epochs: 1
89
+ save_on_train_epoch_end: null
90
+ enable_version_counter: true
91
+ - class_path: lightning.pytorch.callbacks.early_stopping.EarlyStopping
92
+ dict_kwargs:
93
+ monitor: val_spearman
94
+ mode: max
95
+ patience: 5
96
+ fast_dev_run: false
97
+ max_epochs: null
98
+ min_epochs: null
99
+ max_steps: 10000
100
+ min_steps: null
101
+ max_time: null
102
+ limit_train_batches: null
103
+ limit_val_batches: null
104
+ limit_test_batches: null
105
+ limit_predict_batches: null
106
+ overfit_batches: 0.0
107
+ val_check_interval: null
108
+ check_val_every_n_epoch: 1
109
+ num_sanity_val_steps: null
110
+ log_every_n_steps: 50
111
+ enable_checkpointing: null
112
+ enable_progress_bar: null
113
+ enable_model_summary: null
114
+ accumulate_grad_batches: 1
115
+ gradient_clip_val: 0.1
116
+ gradient_clip_algorithm: null
117
+ deterministic: null
118
+ benchmark: null
119
+ inference_mode: true
120
+ use_distributed_sampler: true
121
+ profiler:
122
+ class_path: lightning.pytorch.profilers.PyTorchProfiler
123
+ init_args:
124
+ dirpath: null
125
+ filename: null
126
+ group_by_input_shapes: false
127
+ emit_nvtx: false
128
+ export_to_chrome: true
129
+ row_limit: 20
130
+ sort_by_key: null
131
+ record_module_names: true
132
+ table_kwargs: null
133
+ record_shapes: false
134
+ dict_kwargs:
135
+ profile_memory: true
136
+ detect_anomaly: false
137
+ barebones: false
138
+ plugins: null
139
+ sync_batchnorm: false
140
+ reload_dataloaders_every_n_epochs: 0
141
+ default_root_dir: logs
142
+ model:
143
+ class_path: genbio_finetune.tasks.SequenceRegression
144
+ init_args:
145
+ backbone:
146
+ class_path: genbio_finetune.models.proteinfm
147
+ init_args:
148
+ from_scratch: false
149
+ use_peft: true
150
+ save_peft_only: true
151
+ lora_r: 16
152
+ lora_alpha: 32
153
+ lora_dropout: 0.05
154
+ config_overwrites: null
155
+ model_init_args: null
156
+ max_length: 2048
157
+ adapter: genbio_finetune.models.MLPMeanPoolAdapter
158
+ optimizer:
159
+ class_path: torch.optim.AdamW
160
+ init_args:
161
+ lr: 0.0001
162
+ betas:
163
+ - 0.9
164
+ - 0.95
165
+ eps: 1.0e-08
166
+ weight_decay: 0.01
167
+ amsgrad: false
168
+ maximize: false
169
+ foreach: null
170
+ capturable: false
171
+ differentiable: false
172
+ fused: null
173
+ lr_scheduler:
174
+ class_path: genbio_finetune.lr_schedulers.CosineWithWarmup
175
+ init_args:
176
+ warmup_ratio: 0.05
177
+ strict_loading: true
178
+ reset_optimizer_states: false
179
+ data:
180
+ class_path: genbio_finetune.data.DMSFitnessPrediction
181
+ init_args:
182
+ path: genbio-ai/ProteinGYM-DMS
183
+ mutation_type: indels
184
+ task: B1LPA6_ECOSM_Russ_2020_indels
185
+ normalize: true
186
+ train_split_name: train
187
+ test_split_files: null
188
+ valid_split_files: null
189
+ random_seed: 42
190
+ batch_size: 1
191
+ shuffle: true
192
+ sampler: null
193
+ num_workers: 0
194
+ pin_memory: true
195
+ persistent_workers: false
196
+ cv_num_folds: 5
197
+ cv_test_fold_id: 0
198
+ cv_enable_val_fold: true
199
+ cv_fold_id_col: fold_id
200
+ ckpt_path: null
fold0/model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:595b7b922be8dcaf8de3378372cadd3d9c3c4bda8aad5779ec5efd57e29f0a07
3
+ size 147655000
fold1/config.yaml ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seed_everything: 42
2
+ trainer:
3
+ accelerator: auto
4
+ strategy:
5
+ class_path: lightning.pytorch.strategies.DDPStrategy
6
+ init_args:
7
+ accelerator: null
8
+ parallel_devices: null
9
+ cluster_environment: null
10
+ checkpoint_io: null
11
+ precision_plugin: null
12
+ ddp_comm_state: null
13
+ ddp_comm_hook: null
14
+ ddp_comm_wrapper: null
15
+ model_averaging_period: null
16
+ process_group_backend: null
17
+ timeout: 0:30:00
18
+ start_method: popen
19
+ output_device: null
20
+ dim: 0
21
+ broadcast_buffers: true
22
+ process_group: null
23
+ bucket_cap_mb: 25
24
+ find_unused_parameters: false
25
+ check_reduction: false
26
+ gradient_as_bucket_view: false
27
+ static_graph: false
28
+ delay_all_reduce_named_params: null
29
+ param_to_hook_all_reduce: null
30
+ mixed_precision: null
31
+ device_mesh: null
32
+ devices: auto
33
+ num_nodes: 2
34
+ precision: 32
35
+ logger:
36
+ class_path: lightning.pytorch.loggers.WandbLogger
37
+ init_args:
38
+ name: B1LPA6_ECOSM_Russ_2020_indels
39
+ save_dir: logs
40
+ version: null
41
+ offline: false
42
+ dir: null
43
+ id: null
44
+ anonymous: null
45
+ project: GBFT_PROTEINFM_DMS
46
+ log_model: false
47
+ experiment: null
48
+ prefix: ''
49
+ checkpoint_name: null
50
+ job_type: null
51
+ config: null
52
+ entity: null
53
+ reinit: null
54
+ tags: null
55
+ group: null
56
+ notes: null
57
+ magic: null
58
+ config_exclude_keys: null
59
+ config_include_keys: null
60
+ mode: null
61
+ allow_val_change: null
62
+ resume: null
63
+ force: null
64
+ tensorboard: null
65
+ sync_tensorboard: null
66
+ monitor_gym: null
67
+ save_code: true
68
+ settings: null
69
+ callbacks:
70
+ - class_path: lightning.pytorch.callbacks.LearningRateMonitor
71
+ init_args:
72
+ logging_interval: step
73
+ log_momentum: false
74
+ log_weight_decay: false
75
+ - class_path: lightning.pytorch.callbacks.ModelCheckpoint # save ckpt at the end of each epoch, and save the best val_mcc ckpt
76
+ init_args:
77
+ dirpath: null
78
+ filename: epoch_{epoch}-val_mcc:{val_spearman:.3f}
79
+ monitor: val_spearman
80
+ verbose: false
81
+ save_last: true
82
+ save_top_k: 1
83
+ save_weights_only: false
84
+ mode: max
85
+ auto_insert_metric_name: true
86
+ every_n_train_steps: null
87
+ train_time_interval: null
88
+ every_n_epochs: 1
89
+ save_on_train_epoch_end: null
90
+ enable_version_counter: true
91
+ - class_path: lightning.pytorch.callbacks.early_stopping.EarlyStopping
92
+ dict_kwargs:
93
+ monitor: val_spearman
94
+ mode: max
95
+ patience: 5
96
+ fast_dev_run: false
97
+ max_epochs: null
98
+ min_epochs: null
99
+ max_steps: 10000
100
+ min_steps: null
101
+ max_time: null
102
+ limit_train_batches: null
103
+ limit_val_batches: null
104
+ limit_test_batches: null
105
+ limit_predict_batches: null
106
+ overfit_batches: 0.0
107
+ val_check_interval: null
108
+ check_val_every_n_epoch: 1
109
+ num_sanity_val_steps: null
110
+ log_every_n_steps: 50
111
+ enable_checkpointing: null
112
+ enable_progress_bar: null
113
+ enable_model_summary: null
114
+ accumulate_grad_batches: 1
115
+ gradient_clip_val: 0.1
116
+ gradient_clip_algorithm: null
117
+ deterministic: null
118
+ benchmark: null
119
+ inference_mode: true
120
+ use_distributed_sampler: true
121
+ profiler:
122
+ class_path: lightning.pytorch.profilers.PyTorchProfiler
123
+ init_args:
124
+ dirpath: null
125
+ filename: null
126
+ group_by_input_shapes: false
127
+ emit_nvtx: false
128
+ export_to_chrome: true
129
+ row_limit: 20
130
+ sort_by_key: null
131
+ record_module_names: true
132
+ table_kwargs: null
133
+ record_shapes: false
134
+ dict_kwargs:
135
+ profile_memory: true
136
+ detect_anomaly: false
137
+ barebones: false
138
+ plugins: null
139
+ sync_batchnorm: false
140
+ reload_dataloaders_every_n_epochs: 0
141
+ default_root_dir: logs
142
+ model:
143
+ class_path: genbio_finetune.tasks.SequenceRegression
144
+ init_args:
145
+ backbone:
146
+ class_path: genbio_finetune.models.proteinfm
147
+ init_args:
148
+ from_scratch: false
149
+ use_peft: true
150
+ save_peft_only: true
151
+ lora_r: 16
152
+ lora_alpha: 32
153
+ lora_dropout: 0.05
154
+ config_overwrites: null
155
+ model_init_args: null
156
+ max_length: 2048
157
+ adapter: genbio_finetune.models.MLPMeanPoolAdapter
158
+ optimizer:
159
+ class_path: torch.optim.AdamW
160
+ init_args:
161
+ lr: 0.0001
162
+ betas:
163
+ - 0.9
164
+ - 0.95
165
+ eps: 1.0e-08
166
+ weight_decay: 0.01
167
+ amsgrad: false
168
+ maximize: false
169
+ foreach: null
170
+ capturable: false
171
+ differentiable: false
172
+ fused: null
173
+ lr_scheduler:
174
+ class_path: genbio_finetune.lr_schedulers.CosineWithWarmup
175
+ init_args:
176
+ warmup_ratio: 0.05
177
+ strict_loading: true
178
+ reset_optimizer_states: false
179
+ data:
180
+ class_path: genbio_finetune.data.DMSFitnessPrediction
181
+ init_args:
182
+ path: genbio-ai/ProteinGYM-DMS
183
+ mutation_type: indels
184
+ task: B1LPA6_ECOSM_Russ_2020_indels
185
+ normalize: true
186
+ train_split_name: train
187
+ test_split_files: null
188
+ valid_split_files: null
189
+ random_seed: 42
190
+ batch_size: 1
191
+ shuffle: true
192
+ sampler: null
193
+ num_workers: 0
194
+ pin_memory: true
195
+ persistent_workers: false
196
+ cv_num_folds: 5
197
+ cv_test_fold_id: 1
198
+ cv_enable_val_fold: true
199
+ cv_fold_id_col: fold_id
200
+ ckpt_path: null
fold1/model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:595b7b922be8dcaf8de3378372cadd3d9c3c4bda8aad5779ec5efd57e29f0a07
3
+ size 147655000
fold2/config.yaml ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seed_everything: 42
2
+ trainer:
3
+ accelerator: auto
4
+ strategy:
5
+ class_path: lightning.pytorch.strategies.DDPStrategy
6
+ init_args:
7
+ accelerator: null
8
+ parallel_devices: null
9
+ cluster_environment: null
10
+ checkpoint_io: null
11
+ precision_plugin: null
12
+ ddp_comm_state: null
13
+ ddp_comm_hook: null
14
+ ddp_comm_wrapper: null
15
+ model_averaging_period: null
16
+ process_group_backend: null
17
+ timeout: 0:30:00
18
+ start_method: popen
19
+ output_device: null
20
+ dim: 0
21
+ broadcast_buffers: true
22
+ process_group: null
23
+ bucket_cap_mb: 25
24
+ find_unused_parameters: false
25
+ check_reduction: false
26
+ gradient_as_bucket_view: false
27
+ static_graph: false
28
+ delay_all_reduce_named_params: null
29
+ param_to_hook_all_reduce: null
30
+ mixed_precision: null
31
+ device_mesh: null
32
+ devices: auto
33
+ num_nodes: 2
34
+ precision: 32
35
+ logger:
36
+ class_path: lightning.pytorch.loggers.WandbLogger
37
+ init_args:
38
+ name: B1LPA6_ECOSM_Russ_2020_indels
39
+ save_dir: logs
40
+ version: null
41
+ offline: false
42
+ dir: null
43
+ id: null
44
+ anonymous: null
45
+ project: GBFT_PROTEINFM_DMS
46
+ log_model: false
47
+ experiment: null
48
+ prefix: ''
49
+ checkpoint_name: null
50
+ job_type: null
51
+ config: null
52
+ entity: null
53
+ reinit: null
54
+ tags: null
55
+ group: null
56
+ notes: null
57
+ magic: null
58
+ config_exclude_keys: null
59
+ config_include_keys: null
60
+ mode: null
61
+ allow_val_change: null
62
+ resume: null
63
+ force: null
64
+ tensorboard: null
65
+ sync_tensorboard: null
66
+ monitor_gym: null
67
+ save_code: true
68
+ settings: null
69
+ callbacks:
70
+ - class_path: lightning.pytorch.callbacks.LearningRateMonitor
71
+ init_args:
72
+ logging_interval: step
73
+ log_momentum: false
74
+ log_weight_decay: false
75
+ - class_path: lightning.pytorch.callbacks.ModelCheckpoint # save ckpt at the end of each epoch, and save the best val_mcc ckpt
76
+ init_args:
77
+ dirpath: null
78
+ filename: epoch_{epoch}-val_mcc:{val_spearman:.3f}
79
+ monitor: val_spearman
80
+ verbose: false
81
+ save_last: true
82
+ save_top_k: 1
83
+ save_weights_only: false
84
+ mode: max
85
+ auto_insert_metric_name: true
86
+ every_n_train_steps: null
87
+ train_time_interval: null
88
+ every_n_epochs: 1
89
+ save_on_train_epoch_end: null
90
+ enable_version_counter: true
91
+ - class_path: lightning.pytorch.callbacks.early_stopping.EarlyStopping
92
+ dict_kwargs:
93
+ monitor: val_spearman
94
+ mode: max
95
+ patience: 5
96
+ fast_dev_run: false
97
+ max_epochs: null
98
+ min_epochs: null
99
+ max_steps: 10000
100
+ min_steps: null
101
+ max_time: null
102
+ limit_train_batches: null
103
+ limit_val_batches: null
104
+ limit_test_batches: null
105
+ limit_predict_batches: null
106
+ overfit_batches: 0.0
107
+ val_check_interval: null
108
+ check_val_every_n_epoch: 1
109
+ num_sanity_val_steps: null
110
+ log_every_n_steps: 50
111
+ enable_checkpointing: null
112
+ enable_progress_bar: null
113
+ enable_model_summary: null
114
+ accumulate_grad_batches: 1
115
+ gradient_clip_val: 0.1
116
+ gradient_clip_algorithm: null
117
+ deterministic: null
118
+ benchmark: null
119
+ inference_mode: true
120
+ use_distributed_sampler: true
121
+ profiler:
122
+ class_path: lightning.pytorch.profilers.PyTorchProfiler
123
+ init_args:
124
+ dirpath: null
125
+ filename: null
126
+ group_by_input_shapes: false
127
+ emit_nvtx: false
128
+ export_to_chrome: true
129
+ row_limit: 20
130
+ sort_by_key: null
131
+ record_module_names: true
132
+ table_kwargs: null
133
+ record_shapes: false
134
+ dict_kwargs:
135
+ profile_memory: true
136
+ detect_anomaly: false
137
+ barebones: false
138
+ plugins: null
139
+ sync_batchnorm: false
140
+ reload_dataloaders_every_n_epochs: 0
141
+ default_root_dir: logs
142
+ model:
143
+ class_path: genbio_finetune.tasks.SequenceRegression
144
+ init_args:
145
+ backbone:
146
+ class_path: genbio_finetune.models.proteinfm
147
+ init_args:
148
+ from_scratch: false
149
+ use_peft: true
150
+ save_peft_only: true
151
+ lora_r: 16
152
+ lora_alpha: 32
153
+ lora_dropout: 0.05
154
+ config_overwrites: null
155
+ model_init_args: null
156
+ max_length: 2048
157
+ adapter: genbio_finetune.models.MLPMeanPoolAdapter
158
+ optimizer:
159
+ class_path: torch.optim.AdamW
160
+ init_args:
161
+ lr: 0.0001
162
+ betas:
163
+ - 0.9
164
+ - 0.95
165
+ eps: 1.0e-08
166
+ weight_decay: 0.01
167
+ amsgrad: false
168
+ maximize: false
169
+ foreach: null
170
+ capturable: false
171
+ differentiable: false
172
+ fused: null
173
+ lr_scheduler:
174
+ class_path: genbio_finetune.lr_schedulers.CosineWithWarmup
175
+ init_args:
176
+ warmup_ratio: 0.05
177
+ strict_loading: true
178
+ reset_optimizer_states: false
179
+ data:
180
+ class_path: genbio_finetune.data.DMSFitnessPrediction
181
+ init_args:
182
+ path: genbio-ai/ProteinGYM-DMS
183
+ mutation_type: indels
184
+ task: B1LPA6_ECOSM_Russ_2020_indels
185
+ normalize: true
186
+ train_split_name: train
187
+ test_split_files: null
188
+ valid_split_files: null
189
+ random_seed: 42
190
+ batch_size: 1
191
+ shuffle: true
192
+ sampler: null
193
+ num_workers: 0
194
+ pin_memory: true
195
+ persistent_workers: false
196
+ cv_num_folds: 5
197
+ cv_test_fold_id: 2
198
+ cv_enable_val_fold: true
199
+ cv_fold_id_col: fold_id
200
+ ckpt_path: null
fold2/model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83ac4a00cede3b520566ba41d467cc76036d08d1a4b48f68f44f86580044e1fb
3
+ size 147655000
fold3/config.yaml ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seed_everything: 42
2
+ trainer:
3
+ accelerator: auto
4
+ strategy:
5
+ class_path: lightning.pytorch.strategies.DDPStrategy
6
+ init_args:
7
+ accelerator: null
8
+ parallel_devices: null
9
+ cluster_environment: null
10
+ checkpoint_io: null
11
+ precision_plugin: null
12
+ ddp_comm_state: null
13
+ ddp_comm_hook: null
14
+ ddp_comm_wrapper: null
15
+ model_averaging_period: null
16
+ process_group_backend: null
17
+ timeout: 0:30:00
18
+ start_method: popen
19
+ output_device: null
20
+ dim: 0
21
+ broadcast_buffers: true
22
+ process_group: null
23
+ bucket_cap_mb: 25
24
+ find_unused_parameters: false
25
+ check_reduction: false
26
+ gradient_as_bucket_view: false
27
+ static_graph: false
28
+ delay_all_reduce_named_params: null
29
+ param_to_hook_all_reduce: null
30
+ mixed_precision: null
31
+ device_mesh: null
32
+ devices: auto
33
+ num_nodes: 2
34
+ precision: 32
35
+ logger:
36
+ class_path: lightning.pytorch.loggers.WandbLogger
37
+ init_args:
38
+ name: B1LPA6_ECOSM_Russ_2020_indels
39
+ save_dir: logs
40
+ version: null
41
+ offline: false
42
+ dir: null
43
+ id: null
44
+ anonymous: null
45
+ project: GBFT_PROTEINFM_DMS
46
+ log_model: false
47
+ experiment: null
48
+ prefix: ''
49
+ checkpoint_name: null
50
+ job_type: null
51
+ config: null
52
+ entity: null
53
+ reinit: null
54
+ tags: null
55
+ group: null
56
+ notes: null
57
+ magic: null
58
+ config_exclude_keys: null
59
+ config_include_keys: null
60
+ mode: null
61
+ allow_val_change: null
62
+ resume: null
63
+ force: null
64
+ tensorboard: null
65
+ sync_tensorboard: null
66
+ monitor_gym: null
67
+ save_code: true
68
+ settings: null
69
+ callbacks:
70
+ - class_path: lightning.pytorch.callbacks.LearningRateMonitor
71
+ init_args:
72
+ logging_interval: step
73
+ log_momentum: false
74
+ log_weight_decay: false
75
+ - class_path: lightning.pytorch.callbacks.ModelCheckpoint # save ckpt at the end of each epoch, and save the best val_mcc ckpt
76
+ init_args:
77
+ dirpath: null
78
+ filename: epoch_{epoch}-val_mcc:{val_spearman:.3f}
79
+ monitor: val_spearman
80
+ verbose: false
81
+ save_last: true
82
+ save_top_k: 1
83
+ save_weights_only: false
84
+ mode: max
85
+ auto_insert_metric_name: true
86
+ every_n_train_steps: null
87
+ train_time_interval: null
88
+ every_n_epochs: 1
89
+ save_on_train_epoch_end: null
90
+ enable_version_counter: true
91
+ - class_path: lightning.pytorch.callbacks.early_stopping.EarlyStopping
92
+ dict_kwargs:
93
+ monitor: val_spearman
94
+ mode: max
95
+ patience: 5
96
+ fast_dev_run: false
97
+ max_epochs: null
98
+ min_epochs: null
99
+ max_steps: 10000
100
+ min_steps: null
101
+ max_time: null
102
+ limit_train_batches: null
103
+ limit_val_batches: null
104
+ limit_test_batches: null
105
+ limit_predict_batches: null
106
+ overfit_batches: 0.0
107
+ val_check_interval: null
108
+ check_val_every_n_epoch: 1
109
+ num_sanity_val_steps: null
110
+ log_every_n_steps: 50
111
+ enable_checkpointing: null
112
+ enable_progress_bar: null
113
+ enable_model_summary: null
114
+ accumulate_grad_batches: 1
115
+ gradient_clip_val: 0.1
116
+ gradient_clip_algorithm: null
117
+ deterministic: null
118
+ benchmark: null
119
+ inference_mode: true
120
+ use_distributed_sampler: true
121
+ profiler:
122
+ class_path: lightning.pytorch.profilers.PyTorchProfiler
123
+ init_args:
124
+ dirpath: null
125
+ filename: null
126
+ group_by_input_shapes: false
127
+ emit_nvtx: false
128
+ export_to_chrome: true
129
+ row_limit: 20
130
+ sort_by_key: null
131
+ record_module_names: true
132
+ table_kwargs: null
133
+ record_shapes: false
134
+ dict_kwargs:
135
+ profile_memory: true
136
+ detect_anomaly: false
137
+ barebones: false
138
+ plugins: null
139
+ sync_batchnorm: false
140
+ reload_dataloaders_every_n_epochs: 0
141
+ default_root_dir: logs
142
+ model:
143
+ class_path: genbio_finetune.tasks.SequenceRegression
144
+ init_args:
145
+ backbone:
146
+ class_path: genbio_finetune.models.proteinfm
147
+ init_args:
148
+ from_scratch: false
149
+ use_peft: true
150
+ save_peft_only: true
151
+ lora_r: 16
152
+ lora_alpha: 32
153
+ lora_dropout: 0.05
154
+ config_overwrites: null
155
+ model_init_args: null
156
+ max_length: 2048
157
+ adapter: genbio_finetune.models.MLPMeanPoolAdapter
158
+ optimizer:
159
+ class_path: torch.optim.AdamW
160
+ init_args:
161
+ lr: 0.0001
162
+ betas:
163
+ - 0.9
164
+ - 0.95
165
+ eps: 1.0e-08
166
+ weight_decay: 0.01
167
+ amsgrad: false
168
+ maximize: false
169
+ foreach: null
170
+ capturable: false
171
+ differentiable: false
172
+ fused: null
173
+ lr_scheduler:
174
+ class_path: genbio_finetune.lr_schedulers.CosineWithWarmup
175
+ init_args:
176
+ warmup_ratio: 0.05
177
+ strict_loading: true
178
+ reset_optimizer_states: false
179
+ data:
180
+ class_path: genbio_finetune.data.DMSFitnessPrediction
181
+ init_args:
182
+ path: genbio-ai/ProteinGYM-DMS
183
+ mutation_type: indels
184
+ task: B1LPA6_ECOSM_Russ_2020_indels
185
+ normalize: true
186
+ train_split_name: train
187
+ test_split_files: null
188
+ valid_split_files: null
189
+ random_seed: 42
190
+ batch_size: 1
191
+ shuffle: true
192
+ sampler: null
193
+ num_workers: 0
194
+ pin_memory: true
195
+ persistent_workers: false
196
+ cv_num_folds: 5
197
+ cv_test_fold_id: 3
198
+ cv_enable_val_fold: true
199
+ cv_fold_id_col: fold_id
200
+ ckpt_path: null
fold4/config.yaml ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ seed_everything: 42
2
+ trainer:
3
+ accelerator: auto
4
+ strategy:
5
+ class_path: lightning.pytorch.strategies.DDPStrategy
6
+ init_args:
7
+ accelerator: null
8
+ parallel_devices: null
9
+ cluster_environment: null
10
+ checkpoint_io: null
11
+ precision_plugin: null
12
+ ddp_comm_state: null
13
+ ddp_comm_hook: null
14
+ ddp_comm_wrapper: null
15
+ model_averaging_period: null
16
+ process_group_backend: null
17
+ timeout: 0:30:00
18
+ start_method: popen
19
+ output_device: null
20
+ dim: 0
21
+ broadcast_buffers: true
22
+ process_group: null
23
+ bucket_cap_mb: 25
24
+ find_unused_parameters: false
25
+ check_reduction: false
26
+ gradient_as_bucket_view: false
27
+ static_graph: false
28
+ delay_all_reduce_named_params: null
29
+ param_to_hook_all_reduce: null
30
+ mixed_precision: null
31
+ device_mesh: null
32
+ devices: auto
33
+ num_nodes: 2
34
+ precision: 32
35
+ logger:
36
+ class_path: lightning.pytorch.loggers.WandbLogger
37
+ init_args:
38
+ name: B1LPA6_ECOSM_Russ_2020_indels
39
+ save_dir: logs
40
+ version: null
41
+ offline: false
42
+ dir: null
43
+ id: null
44
+ anonymous: null
45
+ project: GBFT_PROTEINFM_DMS
46
+ log_model: false
47
+ experiment: null
48
+ prefix: ''
49
+ checkpoint_name: null
50
+ job_type: null
51
+ config: null
52
+ entity: null
53
+ reinit: null
54
+ tags: null
55
+ group: null
56
+ notes: null
57
+ magic: null
58
+ config_exclude_keys: null
59
+ config_include_keys: null
60
+ mode: null
61
+ allow_val_change: null
62
+ resume: null
63
+ force: null
64
+ tensorboard: null
65
+ sync_tensorboard: null
66
+ monitor_gym: null
67
+ save_code: true
68
+ settings: null
69
+ callbacks:
70
+ - class_path: lightning.pytorch.callbacks.LearningRateMonitor
71
+ init_args:
72
+ logging_interval: step
73
+ log_momentum: false
74
+ log_weight_decay: false
75
+ - class_path: lightning.pytorch.callbacks.ModelCheckpoint # save ckpt at the end of each epoch, and save the best val_mcc ckpt
76
+ init_args:
77
+ dirpath: null
78
+ filename: epoch_{epoch}-val_mcc:{val_spearman:.3f}
79
+ monitor: val_spearman
80
+ verbose: false
81
+ save_last: true
82
+ save_top_k: 1
83
+ save_weights_only: false
84
+ mode: max
85
+ auto_insert_metric_name: true
86
+ every_n_train_steps: null
87
+ train_time_interval: null
88
+ every_n_epochs: 1
89
+ save_on_train_epoch_end: null
90
+ enable_version_counter: true
91
+ - class_path: lightning.pytorch.callbacks.early_stopping.EarlyStopping
92
+ dict_kwargs:
93
+ monitor: val_spearman
94
+ mode: max
95
+ patience: 5
96
+ fast_dev_run: false
97
+ max_epochs: null
98
+ min_epochs: null
99
+ max_steps: 10000
100
+ min_steps: null
101
+ max_time: null
102
+ limit_train_batches: null
103
+ limit_val_batches: null
104
+ limit_test_batches: null
105
+ limit_predict_batches: null
106
+ overfit_batches: 0.0
107
+ val_check_interval: null
108
+ check_val_every_n_epoch: 1
109
+ num_sanity_val_steps: null
110
+ log_every_n_steps: 50
111
+ enable_checkpointing: null
112
+ enable_progress_bar: null
113
+ enable_model_summary: null
114
+ accumulate_grad_batches: 1
115
+ gradient_clip_val: 0.1
116
+ gradient_clip_algorithm: null
117
+ deterministic: null
118
+ benchmark: null
119
+ inference_mode: true
120
+ use_distributed_sampler: true
121
+ profiler:
122
+ class_path: lightning.pytorch.profilers.PyTorchProfiler
123
+ init_args:
124
+ dirpath: null
125
+ filename: null
126
+ group_by_input_shapes: false
127
+ emit_nvtx: false
128
+ export_to_chrome: true
129
+ row_limit: 20
130
+ sort_by_key: null
131
+ record_module_names: true
132
+ table_kwargs: null
133
+ record_shapes: false
134
+ dict_kwargs:
135
+ profile_memory: true
136
+ detect_anomaly: false
137
+ barebones: false
138
+ plugins: null
139
+ sync_batchnorm: false
140
+ reload_dataloaders_every_n_epochs: 0
141
+ default_root_dir: logs
142
+ model:
143
+ class_path: genbio_finetune.tasks.SequenceRegression
144
+ init_args:
145
+ backbone:
146
+ class_path: genbio_finetune.models.proteinfm
147
+ init_args:
148
+ from_scratch: false
149
+ use_peft: true
150
+ save_peft_only: true
151
+ lora_r: 16
152
+ lora_alpha: 32
153
+ lora_dropout: 0.05
154
+ config_overwrites: null
155
+ model_init_args: null
156
+ max_length: 2048
157
+ adapter: genbio_finetune.models.MLPMeanPoolAdapter
158
+ optimizer:
159
+ class_path: torch.optim.AdamW
160
+ init_args:
161
+ lr: 0.0001
162
+ betas:
163
+ - 0.9
164
+ - 0.95
165
+ eps: 1.0e-08
166
+ weight_decay: 0.01
167
+ amsgrad: false
168
+ maximize: false
169
+ foreach: null
170
+ capturable: false
171
+ differentiable: false
172
+ fused: null
173
+ lr_scheduler:
174
+ class_path: genbio_finetune.lr_schedulers.CosineWithWarmup
175
+ init_args:
176
+ warmup_ratio: 0.05
177
+ strict_loading: true
178
+ reset_optimizer_states: false
179
+ data:
180
+ class_path: genbio_finetune.data.DMSFitnessPrediction
181
+ init_args:
182
+ path: genbio-ai/ProteinGYM-DMS
183
+ mutation_type: indels
184
+ task: B1LPA6_ECOSM_Russ_2020_indels
185
+ normalize: true
186
+ train_split_name: train
187
+ test_split_files: null
188
+ valid_split_files: null
189
+ random_seed: 42
190
+ batch_size: 1
191
+ shuffle: true
192
+ sampler: null
193
+ num_workers: 0
194
+ pin_memory: true
195
+ persistent_workers: false
196
+ cv_num_folds: 5
197
+ cv_test_fold_id: 4
198
+ cv_enable_val_fold: true
199
+ cv_fold_id_col: fold_id
200
+ ckpt_path: null
model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fa409a3e49cacb6d72f1bc5c05e6eb6dba526d9643776a23cbae83c5d5e2996
3
+ size 147655000