Commit
•
0f12a50
1
Parent(s):
fa91aa0
Add new models (#15)
Browse files- Add new models (1858fd735f444bce488d2184d74eb07657119618)
Co-authored-by: Ivan Arcuschin <iarcuschin@users.noreply.huggingface.co>
This view is limited to 50 files because it contains too many changes.
See raw diff
- 102/edges.pkl +3 -0
- 102/ll_model.pth +3 -0
- 102/ll_model_cfg.pkl +3 -0
- 102/meta.json +1 -0
- 104/edges.pkl +3 -0
- 104/ll_model.pth +3 -0
- 104/ll_model_cfg.pkl +3 -0
- 104/meta.json +1 -0
- 105/edges.pkl +3 -0
- 105/ll_model.pth +3 -0
- 105/ll_model_cfg.pkl +3 -0
- 105/meta.json +1 -0
- 123/edges.pkl +3 -0
- 123/ll_model.pth +3 -0
- 123/ll_model_cfg.pkl +3 -0
- 123/meta.json +1 -0
- 46/edges.pkl +3 -0
- 46/ll_model.pth +3 -0
- 46/ll_model_cfg.pkl +3 -0
- 46/meta.json +1 -0
- 50/edges.pkl +3 -0
- 50/ll_model.pth +3 -0
- 50/ll_model_cfg.pkl +3 -0
- 50/meta.json +1 -0
- 52/edges.pkl +3 -0
- 52/ll_model.pth +3 -0
- 52/ll_model_cfg.pkl +3 -0
- 52/meta.json +1 -0
- 53/edges.pkl +3 -0
- 53/ll_model.pth +3 -0
- 53/ll_model_cfg.pkl +3 -0
- 53/meta.json +1 -0
- 54/edges.pkl +3 -0
- 54/ll_model.pth +3 -0
- 54/ll_model_cfg.pkl +3 -0
- 54/meta.json +1 -0
- 55/edges.pkl +3 -0
- 55/ll_model.pth +3 -0
- 55/ll_model_cfg.pkl +3 -0
- 55/meta.json +1 -0
- 60/edges.pkl +3 -0
- 60/ll_model.pth +3 -0
- 60/ll_model_cfg.pkl +3 -0
- 60/meta.json +1 -0
- 62/edges.pkl +3 -0
- 62/ll_model.pth +3 -0
- 62/ll_model_cfg.pkl +3 -0
- 62/meta.json +1 -0
- 64/edges.pkl +3 -0
- 64/ll_model.pth +3 -0
102/edges.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
|
3 |
+
size 113
|
102/ll_model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:feb81e3d0658fbd63f8e19b5ec8b036dd18f5e58ef62d8e0a57048e6ca450c7d
|
3 |
+
size 15082
|
102/ll_model_cfg.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ecb69a2f8949b8c7b19adc4c33aed4e99913687a72b169b64a317d2e7878dc97
|
3 |
+
size 1093
|
102/meta.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.4, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-82-siit-weigth-0.4", "wandb_name": "case-102-seed-82-s-0.4-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 82, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
|
104/edges.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
|
3 |
+
size 113
|
104/ll_model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ddb4db579c13e1bf510ead6a01c343582eedff2f1d799f4d733d72f5f9da50ab
|
3 |
+
size 14698
|
104/ll_model_cfg.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db76917a80935813594508b1d79b7a0836a20daf67e263161432430b095fd111
|
3 |
+
size 1093
|
104/meta.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-104-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
|
105/edges.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
|
3 |
+
size 113
|
105/ll_model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c32e3e97b224cc8d115b70d7661b8b8433bfcd41a9f58d1d7409ce9367ea27a
|
3 |
+
size 15018
|
105/ll_model_cfg.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57dece33450716de87f38b12c0955174849706456cc698dd4fcce152feba3cbf
|
3 |
+
size 1093
|
105/meta.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-105-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
|
123/edges.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
|
3 |
+
size 113
|
123/ll_model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b640bb628be87f8ebf41e6bb8b5351a6d69bfa11699bf2e1cea140f6f6f9f95
|
3 |
+
size 15082
|
123/ll_model_cfg.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9f027ad4de6828c6f5bb7a3f3c8aabd9658526e33b2284eba366977823b89c0a
|
3 |
+
size 1093
|
123/meta.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-123-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
|
46/edges.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
|
3 |
+
size 113
|
46/ll_model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ddb4db579c13e1bf510ead6a01c343582eedff2f1d799f4d733d72f5f9da50ab
|
3 |
+
size 14698
|
46/ll_model_cfg.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db76917a80935813594508b1d79b7a0836a20daf67e263161432430b095fd111
|
3 |
+
size 1093
|
46/meta.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-46-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
|
50/edges.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
|
3 |
+
size 113
|
50/ll_model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ddb4db579c13e1bf510ead6a01c343582eedff2f1d799f4d733d72f5f9da50ab
|
3 |
+
size 14698
|
50/ll_model_cfg.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db76917a80935813594508b1d79b7a0836a20daf67e263161432430b095fd111
|
3 |
+
size 1093
|
50/meta.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-50-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
|
52/edges.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
|
3 |
+
size 113
|
52/ll_model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ddb4db579c13e1bf510ead6a01c343582eedff2f1d799f4d733d72f5f9da50ab
|
3 |
+
size 14698
|
52/ll_model_cfg.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db76917a80935813594508b1d79b7a0836a20daf67e263161432430b095fd111
|
3 |
+
size 1093
|
52/meta.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-52-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
|
53/edges.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:994f3bdfbe5b148e1da38018a24a1567c1d86e5de1c18e9b4d62af358812c709
|
3 |
+
size 189
|
53/ll_model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c16ef496b74897cf166795daa3a019dd6adde99efe3edd12d540676047ff695b
|
3 |
+
size 14762
|
53/ll_model_cfg.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:067caa3354f7d42fd2eb39aeacec1719e1e8aa60f9c707b5be1dff2a7a5eac4c
|
3 |
+
size 1093
|
53/meta.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-53-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
|
54/edges.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
|
3 |
+
size 113
|
54/ll_model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ddb4db579c13e1bf510ead6a01c343582eedff2f1d799f4d733d72f5f9da50ab
|
3 |
+
size 14698
|
54/ll_model_cfg.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db76917a80935813594508b1d79b7a0836a20daf67e263161432430b095fd111
|
3 |
+
size 1093
|
54/meta.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-54-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
|
55/edges.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
|
3 |
+
size 113
|
55/ll_model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ddb4db579c13e1bf510ead6a01c343582eedff2f1d799f4d733d72f5f9da50ab
|
3 |
+
size 14698
|
55/ll_model_cfg.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db76917a80935813594508b1d79b7a0836a20daf67e263161432430b095fd111
|
3 |
+
size 1093
|
55/meta.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-55-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
|
60/edges.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
|
3 |
+
size 113
|
60/ll_model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ddb4db579c13e1bf510ead6a01c343582eedff2f1d799f4d733d72f5f9da50ab
|
3 |
+
size 14698
|
60/ll_model_cfg.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db76917a80935813594508b1d79b7a0836a20daf67e263161432430b095fd111
|
3 |
+
size 1093
|
60/meta.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-60-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
|
62/edges.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
|
3 |
+
size 113
|
62/ll_model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:44b75705cc0241f73d43d60bd6dba9014f5643c80a0da7d58b49ea7bdc1526fe
|
3 |
+
size 14698
|
62/ll_model_cfg.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3a2d11862991a5791d7bd91005f7c0f928d522a579988c7000192e53ff05de81
|
3 |
+
size 1093
|
62/meta.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-62-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
|
64/edges.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
|
3 |
+
size 113
|
64/ll_model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ddb4db579c13e1bf510ead6a01c343582eedff2f1d799f4d733d72f5f9da50ab
|
3 |
+
size 14698
|