cybershiptrooper iarcuschin commited on
Commit
0f12a50
1 Parent(s): fa91aa0

Add new models (#15)

Browse files

- Add new models (1858fd735f444bce488d2184d74eb07657119618)


Co-authored-by: Ivan Arcuschin <iarcuschin@users.noreply.huggingface.co>

This view is limited to 50 files because it contains too many changes.   See raw diff
102/edges.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
3
+ size 113
102/ll_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:feb81e3d0658fbd63f8e19b5ec8b036dd18f5e58ef62d8e0a57048e6ca450c7d
3
+ size 15082
102/ll_model_cfg.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecb69a2f8949b8c7b19adc4c33aed4e99913687a72b169b64a317d2e7878dc97
3
+ size 1093
102/meta.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.4, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-82-siit-weigth-0.4", "wandb_name": "case-102-seed-82-s-0.4-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 82, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
104/edges.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
3
+ size 113
104/ll_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddb4db579c13e1bf510ead6a01c343582eedff2f1d799f4d733d72f5f9da50ab
3
+ size 14698
104/ll_model_cfg.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db76917a80935813594508b1d79b7a0836a20daf67e263161432430b095fd111
3
+ size 1093
104/meta.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-104-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
105/edges.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
3
+ size 113
105/ll_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c32e3e97b224cc8d115b70d7661b8b8433bfcd41a9f58d1d7409ce9367ea27a
3
+ size 15018
105/ll_model_cfg.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57dece33450716de87f38b12c0955174849706456cc698dd4fcce152feba3cbf
3
+ size 1093
105/meta.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-105-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
123/edges.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
3
+ size 113
123/ll_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b640bb628be87f8ebf41e6bb8b5351a6d69bfa11699bf2e1cea140f6f6f9f95
3
+ size 15082
123/ll_model_cfg.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f027ad4de6828c6f5bb7a3f3c8aabd9658526e33b2284eba366977823b89c0a
3
+ size 1093
123/meta.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-123-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
46/edges.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
3
+ size 113
46/ll_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddb4db579c13e1bf510ead6a01c343582eedff2f1d799f4d733d72f5f9da50ab
3
+ size 14698
46/ll_model_cfg.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db76917a80935813594508b1d79b7a0836a20daf67e263161432430b095fd111
3
+ size 1093
46/meta.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-46-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
50/edges.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
3
+ size 113
50/ll_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddb4db579c13e1bf510ead6a01c343582eedff2f1d799f4d733d72f5f9da50ab
3
+ size 14698
50/ll_model_cfg.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db76917a80935813594508b1d79b7a0836a20daf67e263161432430b095fd111
3
+ size 1093
50/meta.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-50-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
52/edges.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
3
+ size 113
52/ll_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddb4db579c13e1bf510ead6a01c343582eedff2f1d799f4d733d72f5f9da50ab
3
+ size 14698
52/ll_model_cfg.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db76917a80935813594508b1d79b7a0836a20daf67e263161432430b095fd111
3
+ size 1093
52/meta.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-52-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
53/edges.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:994f3bdfbe5b148e1da38018a24a1567c1d86e5de1c18e9b4d62af358812c709
3
+ size 189
53/ll_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c16ef496b74897cf166795daa3a019dd6adde99efe3edd12d540676047ff695b
3
+ size 14762
53/ll_model_cfg.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:067caa3354f7d42fd2eb39aeacec1719e1e8aa60f9c707b5be1dff2a7a5eac4c
3
+ size 1093
53/meta.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-53-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
54/edges.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
3
+ size 113
54/ll_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddb4db579c13e1bf510ead6a01c343582eedff2f1d799f4d733d72f5f9da50ab
3
+ size 14698
54/ll_model_cfg.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db76917a80935813594508b1d79b7a0836a20daf67e263161432430b095fd111
3
+ size 1093
54/meta.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-54-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
55/edges.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
3
+ size 113
55/ll_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddb4db579c13e1bf510ead6a01c343582eedff2f1d799f4d733d72f5f9da50ab
3
+ size 14698
55/ll_model_cfg.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db76917a80935813594508b1d79b7a0836a20daf67e263161432430b095fd111
3
+ size 1093
55/meta.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-55-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
60/edges.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
3
+ size 113
60/ll_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddb4db579c13e1bf510ead6a01c343582eedff2f1d799f4d733d72f5f9da50ab
3
+ size 14698
60/ll_model_cfg.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db76917a80935813594508b1d79b7a0836a20daf67e263161432430b095fd111
3
+ size 1093
60/meta.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-60-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
62/edges.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
3
+ size 113
62/ll_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44b75705cc0241f73d43d60bd6dba9014f5643c80a0da7d58b49ea7bdc1526fe
3
+ size 14698
62/ll_model_cfg.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a2d11862991a5791d7bd91005f7c0f928d522a579988c7000192e53ff05de81
3
+ size 1093
62/meta.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.7, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train-seed-67-siit-weigth-0.7", "wandb_name": "case-62-seed-67-s-0.7-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
64/edges.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
3
+ size 113
64/ll_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddb4db579c13e1bf510ead6a01c343582eedff2f1d799f4d733d72f5f9da50ab
3
+ size 14698