weizhiwang commited on
Commit
75fba30
1 Parent(s): 506f141

Upload 5 files

Browse files
Files changed (5) hide show
  1. checkpoints/epoch_latest.pt +3 -0
  2. eval_results.jsonl +40 -0
  3. info.pkl +3 -0
  4. out.log +788 -0
  5. params.txt +91 -0
checkpoints/epoch_latest.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a92380b110f90e0394d1eeb975b5048c918bfcf65c905cd917d2c2ae0130d72
3
+ size 1815639289
eval_results.jsonl ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"key": "vtab/caltech101", "dataset": "Caltech-101", "metrics": {"acc1": 0.7485620377978636, "acc5": 0.9193097781429745, "mean_per_class_recall": 0.7174703938661421, "main_metric": 0.7174703938661421}}
2
+ {"key": "cifar10", "dataset": "CIFAR-10", "metrics": {"acc1": 0.808, "acc5": 0.9916, "mean_per_class_recall": 0.808, "main_metric": 0.808}}
3
+ {"key": "vtab/cifar100", "dataset": "CIFAR-100", "metrics": {"acc1": 0.5603, "acc5": 0.8505, "mean_per_class_recall": 0.5602999999999999, "main_metric": 0.5603}}
4
+ {"key": "vtab/clevr_count_all", "dataset": "CLEVR Counts", "metrics": {"acc1": 0.1482, "acc5": 0.6596666666666666, "mean_per_class_recall": 0.14753156497639805, "main_metric": 0.1482}}
5
+ {"key": "vtab/clevr_closest_object_distance", "dataset": "CLEVR Distance", "metrics": {"acc1": 0.21046666666666666, "acc5": 0.9186666666666666, "mean_per_class_recall": 0.15831434356685678, "main_metric": 0.21046666666666666}}
6
+ {"key": "country211", "dataset": "Country211", "metrics": {"acc1": 0.047156398104265404, "acc5": 0.14535545023696683, "mean_per_class_recall": 0.04715639810426541, "main_metric": 0.047156398104265404}}
7
+ {"key": "vtab/dtd", "dataset": "Describable Textures", "metrics": {"acc1": 0.2377659574468085, "acc5": 0.4765957446808511, "mean_per_class_recall": 0.23776595744680853, "main_metric": 0.2377659574468085}}
8
+ {"key": "vtab/eurosat", "dataset": "EuroSAT", "metrics": {"acc1": 0.33685185185185185, "acc5": 0.8644444444444445, "mean_per_class_recall": 0.33049120799359527, "main_metric": 0.33685185185185185}}
9
+ {"key": "fgvc_aircraft", "dataset": "FGVC Aircraft", "metrics": {"acc1": 0.0306030603060306, "acc5": 0.10891089108910891, "mean_per_class_recall": 0.0303475935828877, "main_metric": 0.0303475935828877}}
10
+ {"key": "food101", "dataset": "Food-101", "metrics": {"acc1": 0.4462970297029703, "acc5": 0.7431287128712871, "mean_per_class_recall": 0.4462970297029703, "main_metric": 0.4462970297029703}}
11
+ {"key": "gtsrb", "dataset": "GTSRB", "metrics": {"acc1": 0.13008709422011086, "acc5": 0.4782264449722882, "mean_per_class_recall": 0.16656554435902263, "main_metric": 0.13008709422011086}}
12
+ {"key": "imagenet1k", "dataset": "ImageNet 1k", "metrics": {"acc1": 0.30318, "acc5": 0.56316, "mean_per_class_recall": 0.3032, "main_metric": 0.30318}}
13
+ {"key": "imagenet_sketch", "dataset": "ImageNet Sketch", "metrics": {"acc1": 0.21460433492503292, "acc5": 0.4268309457839612, "mean_per_class_recall": 0.2147898039215686, "main_metric": 0.21460433492503292}}
14
+ {"key": "imagenetv2", "dataset": "ImageNet v2", "metrics": {"acc1": 0.252, "acc5": 0.498, "mean_per_class_recall": 0.2524, "main_metric": 0.252}}
15
+ {"key": "imagenet-a", "dataset": "ImageNet-A", "metrics": {"acc1": 0.0512, "acc5": 0.19613333333333333, "mean_per_class_recall": 0.06014155302385772, "main_metric": 0.0512}}
16
+ {"key": "imagenet-o", "dataset": "ImageNet-O", "metrics": {"acc1": 0.415, "acc5": 0.7005, "mean_per_class_recall": 0.422468183325962, "main_metric": 0.415}}
17
+ {"key": "imagenet-r", "dataset": "ImageNet-R", "metrics": {"acc1": 0.3621333333333333, "acc5": 0.6121333333333333, "mean_per_class_recall": 0.34744586795038773, "main_metric": 0.3621333333333333}}
18
+ {"key": "vtab/kitti_closest_vehicle_distance", "dataset": "KITTI Vehicle Distance", "metrics": {"acc1": 0.4149085794655415, "acc5": null, "mean_per_class_recall": 0.44129221412525704, "main_metric": 0.4149085794655415}}
19
+ {"key": "mnist", "dataset": "MNIST", "metrics": {"acc1": 0.1163, "acc5": 0.5472, "mean_per_class_recall": 0.1145462152218627, "main_metric": 0.1163}}
20
+ {"key": "objectnet", "dataset": "ObjectNet", "metrics": {"acc1": 0.2389899860019382, "acc5": 0.4616130074297405, "mean_per_class_recall": 0.23185706315755167, "main_metric": 0.2389899860019382}}
21
+ {"key": "vtab/flowers", "dataset": "Oxford Flowers-102", "metrics": {"acc1": 0.3289965848105383, "acc5": 0.5776549032362985, "mean_per_class_recall": 0.3017565989815838, "main_metric": 0.3017565989815838}}
22
+ {"key": "vtab/pets", "dataset": "Oxford-IIIT Pet", "metrics": {"acc1": 0.3878440992095939, "acc5": 0.7345325701826111, "mean_per_class_recall": 0.3861881243069706, "main_metric": 0.3861881243069706}}
23
+ {"key": "voc2007", "dataset": "Pascal VOC 2007", "metrics": {"acc1": 0.6064369658119658, "acc5": 0.8708600427350427, "mean_per_class_recall": 0.6713317709078158, "main_metric": 0.6064369658119658}}
24
+ {"key": "vtab/pcam", "dataset": "PatchCamelyon", "metrics": {"acc1": 0.52630615234375, "acc5": null, "mean_per_class_recall": 0.5264856323517709, "main_metric": 0.52630615234375}}
25
+ {"key": "renderedsst2", "dataset": "Rendered SST2", "metrics": {"acc1": 0.500823723228995, "acc5": null, "mean_per_class_recall": 0.5016248335359852, "main_metric": 0.500823723228995}}
26
+ {"key": "vtab/resisc45", "dataset": "RESISC45", "metrics": {"acc1": 0.28253968253968254, "acc5": 0.596984126984127, "mean_per_class_recall": 0.28750797608664463, "main_metric": 0.28253968253968254}}
27
+ {"key": "cars", "dataset": "Stanford Cars", "metrics": {"acc1": 0.43054346474319116, "acc5": 0.8240268623305559, "mean_per_class_recall": 0.43143957675281425, "main_metric": 0.43054346474319116}}
28
+ {"key": "stl10", "dataset": "STL-10", "metrics": {"acc1": 0.85425, "acc5": 0.996375, "mean_per_class_recall": 0.8542500000000001, "main_metric": 0.85425}}
29
+ {"key": "sun397", "dataset": "SUN397", "metrics": {"acc1": 0.4252441289515788, "acc5": 0.7532228699634036, "mean_per_class_recall": 0.3895119161461437, "main_metric": 0.4252441289515788}}
30
+ {"key": "vtab/svhn", "dataset": "SVHN", "metrics": {"acc1": 0.12880301167793484, "acc5": 0.5850491702519975, "mean_per_class_recall": 0.11865552322518588, "main_metric": 0.12880301167793484}}
31
+ {"key": "retrieval/flickr_1k_test_image_text_retrieval", "dataset": "Flickr", "metrics": {"image_retrieval_recall@1": 0.2264000028371811, "text_retrieval_recall@1": 0.3109999895095825, "image_retrieval_recall@5": 0.4691999852657318, "text_retrieval_recall@5": 0.574999988079071, "image_retrieval_recall@10": 0.5843999981880188, "text_retrieval_recall@10": 0.6819999814033508, "mean_recall@1": 0.2686999961733818, "main_metric": 0.2686999961733818}}
32
+ {"key": "retrieval/mscoco_2014_5k_test_image_text_retrieval", "dataset": "MSCOCO", "metrics": {"image_retrieval_recall@1": 0.1284686177968979, "text_retrieval_recall@1": 0.2199999988079071, "image_retrieval_recall@5": 0.3093162775039673, "text_retrieval_recall@5": 0.43220001459121704, "image_retrieval_recall@10": 0.41519391536712646, "text_retrieval_recall@10": 0.5419999957084656, "mean_recall@1": 0.1742343083024025, "main_metric": 0.1742343083024025}}
33
+ {"key": "misc/winogavil", "dataset": "WinoGAViL", "metrics": {"avg_jaccard_score": 0.509030645657084, "jaccard_score_5": 0.5461868686868687, "jaccard_score_6": 0.5233981281497481, "jaccard_score_10": 0.4457746478873239, "jaccard_score_12": 0.4107699154428126, "jaccard_score_5-6": 0.5345022763627415, "jaccard_score_10-12": 0.42823129251700676, "main_metric": 0.42823129251700676}}
34
+ {"key": "wilds/iwildcam", "dataset": "iWildCam", "metrics": {"acc1": 0.1727232361945269, "acc5": 0.3400481409642214, "mean_per_class_recall": 0.03296683417770204, "acc_avg": 0.17272323369979858, "recall-macro_all": 0.03296683417770204, "F1-macro_all": 0.024197471863985638, "main_metric": 0.024197471863985638}}
35
+ {"key": "wilds/camelyon17", "dataset": "Camelyon17", "metrics": {"acc1": 0.5654878077456674, "acc5": null, "mean_per_class_recall": 0.5654878077456675, "acc_avg": 0.565487802028656, "acc_slide:0": NaN, "count_slide:0": 0.0, "acc_slide:1": NaN, "count_slide:1": 0.0, "acc_slide:2": NaN, "count_slide:2": 0.0, "acc_slide:3": NaN, "count_slide:3": 0.0, "acc_slide:4": NaN, "count_slide:4": 0.0, "acc_slide:5": NaN, "count_slide:5": 0.0, "acc_slide:6": NaN, "count_slide:6": 0.0, "acc_slide:7": NaN, "count_slide:7": 0.0, "acc_slide:8": NaN, "count_slide:8": 0.0, "acc_slide:9": NaN, "count_slide:9": 0.0, "acc_slide:10": NaN, "count_slide:10": 0.0, "acc_slide:11": NaN, "count_slide:11": 0.0, "acc_slide:12": NaN, "count_slide:12": 0.0, "acc_slide:13": NaN, "count_slide:13": 0.0, "acc_slide:14": NaN, "count_slide:14": 0.0, "acc_slide:15": NaN, "count_slide:15": 0.0, "acc_slide:16": NaN, "count_slide:16": 0.0, "acc_slide:17": NaN, "count_slide:17": 0.0, "acc_slide:18": NaN, "count_slide:18": 0.0, "acc_slide:19": NaN, "count_slide:19": 0.0, "acc_slide:20": 0.29842519760131836, "count_slide:20": 3810.0, "acc_slide:21": 0.15078505873680115, "count_slide:21": 3694.0, "acc_slide:22": 0.7105408906936646, "count_slide:22": 7210.0, "acc_slide:23": 0.5605143904685974, "count_slide:23": 5288.0, "acc_slide:24": 0.13536947965621948, "count_slide:24": 7727.0, "acc_slide:25": 0.4003230333328247, "count_slide:25": 4334.0, "acc_slide:26": 0.33840104937553406, "count_slide:26": 3815.0, "acc_slide:27": 0.10776997357606888, "count_slide:27": 4556.0, "acc_slide:28": 0.8370349407196045, "count_slide:28": 31878.0, "acc_slide:29": 0.5548579692840576, "count_slide:29": 12742.0, "acc_wg": 0.10776997357606888, "main_metric": 0.5654878077456674}}
36
+ {"key": "wilds/fmow", "dataset": "FMoW", "metrics": {"acc1": 0.07092455219829925, "acc5": 0.2710331101863579, "mean_per_class_recall": 0.07152649316961654, "acc_avg": 0.0709245502948761, "acc_year:0": NaN, "count_year:0": 0.0, "acc_year:1": NaN, "count_year:1": 0.0, "acc_year:2": NaN, "count_year:2": 0.0, "acc_year:3": NaN, "count_year:3": 0.0, "acc_year:4": NaN, "count_year:4": 0.0, "acc_year:5": NaN, "count_year:5": 0.0, "acc_year:6": NaN, "count_year:6": 0.0, "acc_year:7": NaN, "count_year:7": 0.0, "acc_year:8": NaN, "count_year:8": 0.0, "acc_year:9": NaN, "count_year:9": 0.0, "acc_year:10": NaN, "count_year:10": 0.0, "acc_year:11": NaN, "count_year:11": 0.0, "acc_year:12": NaN, "count_year:12": 0.0, "acc_year:13": NaN, "count_year:13": 0.0, "acc_year:14": 0.07193433493375778, "count_year:14": 15959.0, "acc_year:15": 0.06830378621816635, "count_year:15": 6149.0, "acc_worst_year": 0.06830378621816635, "acc_region:0": 0.05641748756170273, "count_region:0": 4963.0, "acc_region:1": 0.0802321583032608, "count_region:1": 5858.0, "acc_region:2": 0.06864635646343231, "count_region:2": 2593.0, "acc_region:3": 0.07390329241752625, "count_region:3": 8024.0, "acc_region:4": 0.07057057321071625, "count_region:4": 666.0, "acc_region:5": 0.0, "count_region:5": 4.0, "acc_worst_region": 0.0, "main_metric": 0.0}}
37
+ {"key": "fairness/dollar_street", "dataset": "Dollar Street", "metrics": {"acc1": 0.39223522694833, "acc5": 0.6431630031401656, "mean_per_class_recall": 0.42775875489808896, "acc_top5_avg": 0.6431630253791809, "acc_top5_income_ds:0": 0.4602803885936737, "count_income_ds:0": 856.0, "acc_top5_income_ds:1": 0.6029411554336548, "count_income_ds:1": 884.0, "acc_top5_income_ds:2": 0.7302996516227722, "count_income_ds:2": 901.0, "acc_top5_income_ds:3": 0.7749419808387756, "count_income_ds:3": 862.0, "acc_top5_wg": 0.4602803885936737, "main_metric": 0.4602803885936737}}
38
+ {"key": "fairness/geode", "dataset": "GeoDE", "metrics": {"acc1": 0.7298206278026906, "acc5": 0.9434657270980141, "mean_per_class_recall": 0.7304593517130928, "acc_avg": 0.7298206090927124, "acc_region:0": 0.69645094871521, "count_region:0": 2395.0, "acc_region:1": 0.720895528793335, "count_region:1": 2010.0, "acc_region:2": 0.7224835157394409, "count_region:2": 2126.0, "acc_region:3": 0.7277863621711731, "count_region:3": 1947.0, "acc_region:4": 0.7444507479667664, "count_region:4": 1757.0, "acc_region:5": 0.7705281972885132, "count_region:5": 2253.0, "acc_wg": 0.69645094871521, "main_metric": 0.69645094871521}}
39
+ {"key": "fairness/fairface", "dataset": "FairFace", "metrics": {"acc_race_avg": 0.8035420775413513, "acc_race_race_binary:0": 0.044124700129032135, "count_race_binary:0": 2085.0, "acc_race_race_binary:1": 0.9820724129676819, "count_race_binary:1": 8869.0, "acc_race_wg": 0.044124700129032135, "acc_gender_avg": 0.6624064445495605, "acc_gender_race_binary:0": 0.6949640512466431, "acc_gender_race_binary:1": 0.6547524929046631, "acc_gender_wg": 0.6547524929046631, "acc_age_avg": 0.02327916771173477, "acc_age_race_binary:0": 0.021103117614984512, "acc_age_race_binary:1": 0.02379073202610016, "acc_age_wg": 0.021103117614984512, "acc_gender_x_avg": 0.6624064445495605, "acc_gender_x_race:0_gender:0": 0.8197747468948364, "count_race:0_gender:0": 799.0, "acc_gender_x_race:0_gender:1": 0.34081903100013733, "count_race:0_gender:1": 757.0, "acc_gender_x_race:1_gender:0": 0.7852050065994263, "count_race:1_gender:0": 1122.0, "acc_gender_x_race:1_gender:1": 0.5898234844207764, "count_race:1_gender:1": 963.0, "acc_gender_x_race:2_gender:0": 0.7689242959022522, "count_race:2_gender:0": 753.0, "acc_gender_x_race:2_gender:1": 0.5150721073150635, "count_race:2_gender:1": 763.0, "acc_gender_x_race:3_gender:0": 0.7767969965934753, "count_race:3_gender:0": 793.0, "acc_gender_x_race:3_gender:1": 0.5397590398788452, "count_race:3_gender:1": 830.0, "acc_gender_x_race:4_gender:0": 0.829028308391571, "count_race:4_gender:0": 813.0, "acc_gender_x_race:4_gender:1": 0.5151515007019043, "count_race:4_gender:1": 396.0, "acc_gender_x_race:5_gender:0": 0.7020407915115356, "count_race:5_gender:0": 735.0, "acc_gender_x_race:5_gender:1": 0.6029411554336548, "count_race:5_gender:1": 680.0, "acc_gender_x_race:6_gender:0": 0.6653796434402466, "count_race:6_gender:0": 777.0, "acc_gender_x_race:6_gender:1": 0.6946960091590881, "count_race:6_gender:1": 773.0, "acc_gender_x_wg": 0.34081903100013733, "toxicity_crime_avg": 0.3281906247138977, "toxicity_crime_race:0": 0.21401028335094452, "count_race:0": 1556.0, "toxicity_crime_race:1": 0.4254196584224701, "count_race:1": 2085.0, "toxicity_crime_race:2": 0.2658311426639557, "count_race:2": 1516.0, "toxicity_crime_race:3": 0.3203943371772766, "count_race:3": 1623.0, "toxicity_crime_race:4": 0.34408602118492126, "count_race:4": 1209.0, "toxicity_crime_race:5": 0.3116607666015625, "count_race:5": 1415.0, "toxicity_crime_race:6": 0.3838709592819214, "count_race:6": 1550.0, "toxicity_crime_wg": 0.21401028335094452, "toxicity_nonhuman_avg": 0.18933722376823425, "toxicity_nonhuman_race:0": 0.3997429311275482, "toxicity_nonhuman_race:1": 0.13189448416233063, "toxicity_nonhuman_race:2": 0.2559366822242737, "toxicity_nonhuman_race:3": 0.14910659193992615, "toxicity_nonhuman_race:4": 0.19272126257419586, "toxicity_nonhuman_race:5": 0.13780918717384338, "toxicity_nonhuman_race:6": 0.07677419483661652, "toxicity_nonhuman_wg": 0.07677419483661652, "main_metric": null}}
40
+ {"key": "fairness/utkface", "dataset": "UTKFace", "metrics": {"acc_race_avg": 0.5791671872138977, "acc_race_race_binary:0": 0.030865423381328583, "count_race_binary:0": 10076.0, "acc_race_race_binary:1": 0.9845893979072571, "count_race_binary:1": 13627.0, "acc_race_wg": 0.030865423381328583, "acc_gender_avg": 0.7339577078819275, "acc_gender_race_binary:0": 0.7338229417800903, "acc_gender_race_binary:1": 0.7340573668479919, "acc_gender_wg": 0.7338229417800903, "acc_age_avg": 0.09083238244056702, "acc_age_race_binary:0": 0.11542278528213501, "acc_age_race_binary:1": 0.07264988869428635, "acc_age_wg": 0.07264988869428635, "acc_gender_x_avg": 0.7339577078819275, "acc_gender_x_race:0_gender:0": 0.8882657289505005, "count_race:0_gender:0": 2318.0, "acc_gender_x_race:0_gender:1": 0.5353260636329651, "count_race:0_gender:1": 2208.0, "acc_gender_x_race:1_gender:0": 0.8528122901916504, "count_race:1_gender:0": 5476.0, "acc_gender_x_race:1_gender:1": 0.5921739339828491, "count_race:1_gender:1": 4600.0, "acc_gender_x_race:2_gender:0": 0.8359133005142212, "count_race:2_gender:0": 2261.0, "acc_gender_x_race:2_gender:1": 0.6429404616355896, "count_race:2_gender:1": 1714.0, "acc_gender_x_race:3_gender:0": 0.7549206614494324, "count_race:3_gender:0": 1575.0, "acc_gender_x_race:3_gender:1": 0.7046799063682556, "count_race:3_gender:1": 1859.0, "acc_gender_x_race:4_gender:0": 0.7986842393875122, "count_race:4_gender:0": 760.0, "acc_gender_x_race:4_gender:1": 0.7124463319778442, "count_race:4_gender:1": 932.0, "acc_gender_x_wg": 0.5353260636329651, "toxicity_crime_avg": 0.14601527154445648, "toxicity_crime_race:0": 0.11025188118219376, "count_race:0": 4526.0, "toxicity_crime_race:1": 0.1551210731267929, "count_race:1": 10076.0, "toxicity_crime_race:2": 0.12452830374240875, "count_race:2": 3975.0, "toxicity_crime_race:3": 0.19743739068508148, "count_race:3": 3434.0, "toxicity_crime_race:4": 0.13356974720954895, "count_race:4": 1692.0, "toxicity_crime_wg": 0.11025188118219376, "toxicity_nonhuman_avg": 0.18596802651882172, "toxicity_nonhuman_race:0": 0.3720724582672119, "toxicity_nonhuman_race:1": 0.1341802328824997, "toxicity_nonhuman_race:2": 0.21786163747310638, "toxicity_nonhuman_race:3": 0.09056493639945984, "toxicity_nonhuman_race:4": 0.11524822562932968, "toxicity_nonhuman_wg": 0.09056493639945984, "main_metric": null}}
info.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d21ffa23bf0e721b06745683309a4fc61507898eb382da0317baf1abb775d17
3
+ size 442
out.log ADDED
@@ -0,0 +1,788 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-01-30,17:18:05 | INFO | No latest resume checkpoint found in /mnt/bn/datacompv6/weizhi_multimodal/datacomp/checkpoints_v5_gpt4v_cc12m/medium_object_detail_fulfillment_th_20_mutli_score_and/checkpoints.
2
+ 2024-01-30,17:18:10 | INFO | Running in distributed mode with multiple processes. Device: cuda:0.Process (global: 0, local 0), total 16.
3
+ 2024-01-30,17:18:10 | INFO | Loaded ViT-B-32 model config.
4
+ 2024-01-30,17:18:12 | INFO | Model:
5
+ 2024-01-30,17:18:12 | INFO | CLIP(
6
+ (visual): VisionTransformer(
7
+ (patchnorm_pre_ln): Identity()
8
+ (conv1): Conv2d(3, 768, kernel_size=(32, 32), stride=(32, 32), bias=False)
9
+ (patch_dropout): Identity()
10
+ (ln_pre): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
11
+ (transformer): Transformer(
12
+ (resblocks): ModuleList(
13
+ (0): ResidualAttentionBlock(
14
+ (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
15
+ (attn): MultiheadAttention(
16
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
17
+ )
18
+ (ls_1): Identity()
19
+ (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
20
+ (mlp): Sequential(
21
+ (c_fc): Linear(in_features=768, out_features=3072, bias=True)
22
+ (gelu): GELU(approximate='none')
23
+ (c_proj): Linear(in_features=3072, out_features=768, bias=True)
24
+ )
25
+ (ls_2): Identity()
26
+ )
27
+ (1): ResidualAttentionBlock(
28
+ (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
29
+ (attn): MultiheadAttention(
30
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
31
+ )
32
+ (ls_1): Identity()
33
+ (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
34
+ (mlp): Sequential(
35
+ (c_fc): Linear(in_features=768, out_features=3072, bias=True)
36
+ (gelu): GELU(approximate='none')
37
+ (c_proj): Linear(in_features=3072, out_features=768, bias=True)
38
+ )
39
+ (ls_2): Identity()
40
+ )
41
+ (2): ResidualAttentionBlock(
42
+ (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
43
+ (attn): MultiheadAttention(
44
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
45
+ )
46
+ (ls_1): Identity()
47
+ (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
48
+ (mlp): Sequential(
49
+ (c_fc): Linear(in_features=768, out_features=3072, bias=True)
50
+ (gelu): GELU(approximate='none')
51
+ (c_proj): Linear(in_features=3072, out_features=768, bias=True)
52
+ )
53
+ (ls_2): Identity()
54
+ )
55
+ (3): ResidualAttentionBlock(
56
+ (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
57
+ (attn): MultiheadAttention(
58
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
59
+ )
60
+ (ls_1): Identity()
61
+ (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
62
+ (mlp): Sequential(
63
+ (c_fc): Linear(in_features=768, out_features=3072, bias=True)
64
+ (gelu): GELU(approximate='none')
65
+ (c_proj): Linear(in_features=3072, out_features=768, bias=True)
66
+ )
67
+ (ls_2): Identity()
68
+ )
69
+ (4): ResidualAttentionBlock(
70
+ (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
71
+ (attn): MultiheadAttention(
72
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
73
+ )
74
+ (ls_1): Identity()
75
+ (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
76
+ (mlp): Sequential(
77
+ (c_fc): Linear(in_features=768, out_features=3072, bias=True)
78
+ (gelu): GELU(approximate='none')
79
+ (c_proj): Linear(in_features=3072, out_features=768, bias=True)
80
+ )
81
+ (ls_2): Identity()
82
+ )
83
+ (5): ResidualAttentionBlock(
84
+ (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
85
+ (attn): MultiheadAttention(
86
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
87
+ )
88
+ (ls_1): Identity()
89
+ (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
90
+ (mlp): Sequential(
91
+ (c_fc): Linear(in_features=768, out_features=3072, bias=True)
92
+ (gelu): GELU(approximate='none')
93
+ (c_proj): Linear(in_features=3072, out_features=768, bias=True)
94
+ )
95
+ (ls_2): Identity()
96
+ )
97
+ (6): ResidualAttentionBlock(
98
+ (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
99
+ (attn): MultiheadAttention(
100
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
101
+ )
102
+ (ls_1): Identity()
103
+ (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
104
+ (mlp): Sequential(
105
+ (c_fc): Linear(in_features=768, out_features=3072, bias=True)
106
+ (gelu): GELU(approximate='none')
107
+ (c_proj): Linear(in_features=3072, out_features=768, bias=True)
108
+ )
109
+ (ls_2): Identity()
110
+ )
111
+ (7): ResidualAttentionBlock(
112
+ (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
113
+ (attn): MultiheadAttention(
114
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
115
+ )
116
+ (ls_1): Identity()
117
+ (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
118
+ (mlp): Sequential(
119
+ (c_fc): Linear(in_features=768, out_features=3072, bias=True)
120
+ (gelu): GELU(approximate='none')
121
+ (c_proj): Linear(in_features=3072, out_features=768, bias=True)
122
+ )
123
+ (ls_2): Identity()
124
+ )
125
+ (8): ResidualAttentionBlock(
126
+ (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
127
+ (attn): MultiheadAttention(
128
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
129
+ )
130
+ (ls_1): Identity()
131
+ (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
132
+ (mlp): Sequential(
133
+ (c_fc): Linear(in_features=768, out_features=3072, bias=True)
134
+ (gelu): GELU(approximate='none')
135
+ (c_proj): Linear(in_features=3072, out_features=768, bias=True)
136
+ )
137
+ (ls_2): Identity()
138
+ )
139
+ (9): ResidualAttentionBlock(
140
+ (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
141
+ (attn): MultiheadAttention(
142
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
143
+ )
144
+ (ls_1): Identity()
145
+ (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
146
+ (mlp): Sequential(
147
+ (c_fc): Linear(in_features=768, out_features=3072, bias=True)
148
+ (gelu): GELU(approximate='none')
149
+ (c_proj): Linear(in_features=3072, out_features=768, bias=True)
150
+ )
151
+ (ls_2): Identity()
152
+ )
153
+ (10): ResidualAttentionBlock(
154
+ (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
155
+ (attn): MultiheadAttention(
156
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
157
+ )
158
+ (ls_1): Identity()
159
+ (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
160
+ (mlp): Sequential(
161
+ (c_fc): Linear(in_features=768, out_features=3072, bias=True)
162
+ (gelu): GELU(approximate='none')
163
+ (c_proj): Linear(in_features=3072, out_features=768, bias=True)
164
+ )
165
+ (ls_2): Identity()
166
+ )
167
+ (11): ResidualAttentionBlock(
168
+ (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
169
+ (attn): MultiheadAttention(
170
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
171
+ )
172
+ (ls_1): Identity()
173
+ (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
174
+ (mlp): Sequential(
175
+ (c_fc): Linear(in_features=768, out_features=3072, bias=True)
176
+ (gelu): GELU(approximate='none')
177
+ (c_proj): Linear(in_features=3072, out_features=768, bias=True)
178
+ )
179
+ (ls_2): Identity()
180
+ )
181
+ )
182
+ )
183
+ (ln_post): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
184
+ )
185
+ (transformer): Transformer(
186
+ (resblocks): ModuleList(
187
+ (0): ResidualAttentionBlock(
188
+ (ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
189
+ (attn): MultiheadAttention(
190
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
191
+ )
192
+ (ls_1): Identity()
193
+ (ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
194
+ (mlp): Sequential(
195
+ (c_fc): Linear(in_features=512, out_features=2048, bias=True)
196
+ (gelu): GELU(approximate='none')
197
+ (c_proj): Linear(in_features=2048, out_features=512, bias=True)
198
+ )
199
+ (ls_2): Identity()
200
+ )
201
+ (1): ResidualAttentionBlock(
202
+ (ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
203
+ (attn): MultiheadAttention(
204
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
205
+ )
206
+ (ls_1): Identity()
207
+ (ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
208
+ (mlp): Sequential(
209
+ (c_fc): Linear(in_features=512, out_features=2048, bias=True)
210
+ (gelu): GELU(approximate='none')
211
+ (c_proj): Linear(in_features=2048, out_features=512, bias=True)
212
+ )
213
+ (ls_2): Identity()
214
+ )
215
+ (2): ResidualAttentionBlock(
216
+ (ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
217
+ (attn): MultiheadAttention(
218
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
219
+ )
220
+ (ls_1): Identity()
221
+ (ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
222
+ (mlp): Sequential(
223
+ (c_fc): Linear(in_features=512, out_features=2048, bias=True)
224
+ (gelu): GELU(approximate='none')
225
+ (c_proj): Linear(in_features=2048, out_features=512, bias=True)
226
+ )
227
+ (ls_2): Identity()
228
+ )
229
+ (3): ResidualAttentionBlock(
230
+ (ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
231
+ (attn): MultiheadAttention(
232
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
233
+ )
234
+ (ls_1): Identity()
235
+ (ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
236
+ (mlp): Sequential(
237
+ (c_fc): Linear(in_features=512, out_features=2048, bias=True)
238
+ (gelu): GELU(approximate='none')
239
+ (c_proj): Linear(in_features=2048, out_features=512, bias=True)
240
+ )
241
+ (ls_2): Identity()
242
+ )
243
+ (4): ResidualAttentionBlock(
244
+ (ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
245
+ (attn): MultiheadAttention(
246
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
247
+ )
248
+ (ls_1): Identity()
249
+ (ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
250
+ (mlp): Sequential(
251
+ (c_fc): Linear(in_features=512, out_features=2048, bias=True)
252
+ (gelu): GELU(approximate='none')
253
+ (c_proj): Linear(in_features=2048, out_features=512, bias=True)
254
+ )
255
+ (ls_2): Identity()
256
+ )
257
+ (5): ResidualAttentionBlock(
258
+ (ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
259
+ (attn): MultiheadAttention(
260
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
261
+ )
262
+ (ls_1): Identity()
263
+ (ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
264
+ (mlp): Sequential(
265
+ (c_fc): Linear(in_features=512, out_features=2048, bias=True)
266
+ (gelu): GELU(approximate='none')
267
+ (c_proj): Linear(in_features=2048, out_features=512, bias=True)
268
+ )
269
+ (ls_2): Identity()
270
+ )
271
+ (6): ResidualAttentionBlock(
272
+ (ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
273
+ (attn): MultiheadAttention(
274
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
275
+ )
276
+ (ls_1): Identity()
277
+ (ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
278
+ (mlp): Sequential(
279
+ (c_fc): Linear(in_features=512, out_features=2048, bias=True)
280
+ (gelu): GELU(approximate='none')
281
+ (c_proj): Linear(in_features=2048, out_features=512, bias=True)
282
+ )
283
+ (ls_2): Identity()
284
+ )
285
+ (7): ResidualAttentionBlock(
286
+ (ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
287
+ (attn): MultiheadAttention(
288
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
289
+ )
290
+ (ls_1): Identity()
291
+ (ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
292
+ (mlp): Sequential(
293
+ (c_fc): Linear(in_features=512, out_features=2048, bias=True)
294
+ (gelu): GELU(approximate='none')
295
+ (c_proj): Linear(in_features=2048, out_features=512, bias=True)
296
+ )
297
+ (ls_2): Identity()
298
+ )
299
+ (8): ResidualAttentionBlock(
300
+ (ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
301
+ (attn): MultiheadAttention(
302
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
303
+ )
304
+ (ls_1): Identity()
305
+ (ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
306
+ (mlp): Sequential(
307
+ (c_fc): Linear(in_features=512, out_features=2048, bias=True)
308
+ (gelu): GELU(approximate='none')
309
+ (c_proj): Linear(in_features=2048, out_features=512, bias=True)
310
+ )
311
+ (ls_2): Identity()
312
+ )
313
+ (9): ResidualAttentionBlock(
314
+ (ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
315
+ (attn): MultiheadAttention(
316
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
317
+ )
318
+ (ls_1): Identity()
319
+ (ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
320
+ (mlp): Sequential(
321
+ (c_fc): Linear(in_features=512, out_features=2048, bias=True)
322
+ (gelu): GELU(approximate='none')
323
+ (c_proj): Linear(in_features=2048, out_features=512, bias=True)
324
+ )
325
+ (ls_2): Identity()
326
+ )
327
+ (10): ResidualAttentionBlock(
328
+ (ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
329
+ (attn): MultiheadAttention(
330
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
331
+ )
332
+ (ls_1): Identity()
333
+ (ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
334
+ (mlp): Sequential(
335
+ (c_fc): Linear(in_features=512, out_features=2048, bias=True)
336
+ (gelu): GELU(approximate='none')
337
+ (c_proj): Linear(in_features=2048, out_features=512, bias=True)
338
+ )
339
+ (ls_2): Identity()
340
+ )
341
+ (11): ResidualAttentionBlock(
342
+ (ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
343
+ (attn): MultiheadAttention(
344
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
345
+ )
346
+ (ls_1): Identity()
347
+ (ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
348
+ (mlp): Sequential(
349
+ (c_fc): Linear(in_features=512, out_features=2048, bias=True)
350
+ (gelu): GELU(approximate='none')
351
+ (c_proj): Linear(in_features=2048, out_features=512, bias=True)
352
+ )
353
+ (ls_2): Identity()
354
+ )
355
+ )
356
+ )
357
+ (token_embedding): Embedding(49408, 512)
358
+ (ln_final): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
359
+ )
360
+ 2024-01-30,17:18:12 | INFO | Params:
361
+ 2024-01-30,17:18:12 | INFO | accum_freq: 1
362
+ 2024-01-30,17:18:12 | INFO | aug_cfg: {}
363
+ 2024-01-30,17:18:12 | INFO | batch_size: 256
364
+ 2024-01-30,17:18:12 | INFO | beta1: 0.9
365
+ 2024-01-30,17:18:12 | INFO | beta2: 0.98
366
+ 2024-01-30,17:18:12 | INFO | checkpoint_path: /mnt/bn/datacompv6/weizhi_multimodal/datacomp/checkpoints_v5_gpt4v_cc12m/medium_object_detail_fulfillment_th_20_mutli_score_and/checkpoints
367
+ 2024-01-30,17:18:12 | INFO | coca_caption_loss_weight: 2.0
368
+ 2024-01-30,17:18:12 | INFO | coca_contrastive_loss_weight: 1.0
369
+ 2024-01-30,17:18:12 | INFO | copy_codebase: False
370
+ 2024-01-30,17:18:12 | INFO | csv_caption_key: title
371
+ 2024-01-30,17:18:12 | INFO | csv_img_key: filepath
372
+ 2024-01-30,17:18:12 | INFO | csv_separator:
373
+ 2024-01-30,17:18:12 | INFO | dataset_resampled: True
374
+ 2024-01-30,17:18:12 | INFO | dataset_type: webdataset
375
+ 2024-01-30,17:18:12 | INFO | ddp_static_graph: True
376
+ 2024-01-30,17:18:12 | INFO | debug: False
377
+ 2024-01-30,17:18:12 | INFO | delete_previous_checkpoint: False
378
+ 2024-01-30,17:18:12 | INFO | device: cuda:0
379
+ 2024-01-30,17:18:12 | INFO | dist_backend: nccl
380
+ 2024-01-30,17:18:12 | INFO | dist_url: env://
381
+ 2024-01-30,17:18:12 | INFO | distill: False
382
+ 2024-01-30,17:18:12 | INFO | distill_model: None
383
+ 2024-01-30,17:18:12 | INFO | distill_pretrained: None
384
+ 2024-01-30,17:18:12 | INFO | distributed: True
385
+ 2024-01-30,17:18:12 | INFO | epochs: 8
386
+ 2024-01-30,17:18:12 | INFO | epochs_cooldown: None
387
+ 2024-01-30,17:18:12 | INFO | eps: 1e-06
388
+ 2024-01-30,17:18:12 | INFO | force_custom_text: False
389
+ 2024-01-30,17:18:12 | INFO | force_image_size: None
390
+ 2024-01-30,17:18:12 | INFO | force_patch_dropout: None
391
+ 2024-01-30,17:18:12 | INFO | force_quick_gelu: False
392
+ 2024-01-30,17:18:12 | INFO | gather_with_grad: True
393
+ 2024-01-30,17:18:12 | INFO | grad_checkpointing: True
394
+ 2024-01-30,17:18:12 | INFO | grad_clip_norm: None
395
+ 2024-01-30,17:18:12 | INFO | horovod: False
396
+ 2024-01-30,17:18:12 | INFO | image_mean: None
397
+ 2024-01-30,17:18:12 | INFO | image_std: None
398
+ 2024-01-30,17:18:12 | INFO | imagenet_v2: None
399
+ 2024-01-30,17:18:12 | INFO | imagenet_val: None
400
+ 2024-01-30,17:18:12 | INFO | local_loss: True
401
+ 2024-01-30,17:18:12 | INFO | local_rank: 0
402
+ 2024-01-30,17:18:12 | INFO | lock_image: False
403
+ 2024-01-30,17:18:12 | INFO | lock_image_freeze_bn_stats: False
404
+ 2024-01-30,17:18:12 | INFO | lock_image_unlocked_groups: 0
405
+ 2024-01-30,17:18:12 | INFO | lock_text: False
406
+ 2024-01-30,17:18:12 | INFO | lock_text_freeze_layer_norm: False
407
+ 2024-01-30,17:18:12 | INFO | lock_text_unlocked_layers: 0
408
+ 2024-01-30,17:18:12 | INFO | log_every_n_steps: 100
409
+ 2024-01-30,17:18:12 | INFO | log_level: 20
410
+ 2024-01-30,17:18:12 | INFO | log_local: False
411
+ 2024-01-30,17:18:12 | INFO | log_path: /mnt/bn/datacompv6/weizhi_multimodal/datacomp/checkpoints_v5_gpt4v_cc12m/medium_object_detail_fulfillment_th_20_mutli_score_and/out.log
412
+ 2024-01-30,17:18:12 | INFO | logs: /mnt/bn/datacompv6/weizhi_multimodal/datacomp/checkpoints_v5_gpt4v_cc12m
413
+ 2024-01-30,17:18:12 | INFO | lr: 0.0005
414
+ 2024-01-30,17:18:12 | INFO | lr_cooldown_end: 0.0
415
+ 2024-01-30,17:18:12 | INFO | lr_cooldown_power: 1.0
416
+ 2024-01-30,17:18:12 | INFO | lr_scheduler: cosine
417
+ 2024-01-30,17:18:12 | INFO | model: ViT-B-32
418
+ 2024-01-30,17:18:12 | INFO | name: medium_object_detail_fulfillment_th_20_mutli_score_and
419
+ 2024-01-30,17:18:12 | INFO | no_set_device_rank: False
420
+ 2024-01-30,17:18:12 | INFO | precision: amp
421
+ 2024-01-30,17:18:12 | INFO | pretrained:
422
+ 2024-01-30,17:18:12 | INFO | pretrained_image: False
423
+ 2024-01-30,17:18:12 | INFO | rank: 0
424
+ 2024-01-30,17:18:12 | INFO | remote_sync: None
425
+ 2024-01-30,17:18:12 | INFO | remote_sync_frequency: 300
426
+ 2024-01-30,17:18:12 | INFO | remote_sync_protocol: s3
427
+ 2024-01-30,17:18:12 | INFO | report_to:
428
+ 2024-01-30,17:18:12 | INFO | resume: None
429
+ 2024-01-30,17:18:12 | INFO | save_frequency: 0
430
+ 2024-01-30,17:18:12 | INFO | save_most_recent: True
431
+ 2024-01-30,17:18:12 | INFO | seed: 0
432
+ 2024-01-30,17:18:12 | INFO | skip_scheduler: False
433
+ 2024-01-30,17:18:12 | INFO | tensorboard: False
434
+ 2024-01-30,17:18:12 | INFO | tensorboard_path:
435
+ 2024-01-30,17:18:12 | INFO | torchscript: False
436
+ 2024-01-30,17:18:12 | INFO | trace: False
437
+ 2024-01-30,17:18:12 | INFO | train_data: /mnt/bn/datacompv6/weizhi_multimodal/datacomp/filtered_shards_v5_gpt4v_cc12m/medium_object_detail_fulfillment_th_20_mutli_score_and/{00000000..00003219}.tar
438
+ 2024-01-30,17:18:12 | INFO | train_data_upsampling_factors: None
439
+ 2024-01-30,17:18:12 | INFO | train_num_samples: 16000000
440
+ 2024-01-30,17:18:12 | INFO | use_bn_sync: False
441
+ 2024-01-30,17:18:12 | INFO | val_data: None
442
+ 2024-01-30,17:18:12 | INFO | val_frequency: 1
443
+ 2024-01-30,17:18:12 | INFO | val_num_samples: None
444
+ 2024-01-30,17:18:12 | INFO | wandb: False
445
+ 2024-01-30,17:18:12 | INFO | wandb_notes:
446
+ 2024-01-30,17:18:12 | INFO | wandb_project_name: open-clip
447
+ 2024-01-30,17:18:12 | INFO | warmup: 500
448
+ 2024-01-30,17:18:12 | INFO | wd: 0.2
449
+ 2024-01-30,17:18:12 | INFO | workers: 4
450
+ 2024-01-30,17:18:12 | INFO | world_size: 16
451
+ 2024-01-30,17:18:12 | INFO | zeroshot_frequency: 2
452
+ 2024-01-30,17:18:12 | INFO | Start epoch 0
453
+ 2024-01-30,17:18:17 | INFO | Train Epoch: 0 [ 4096/16007168 (0%)] Data (t): 2.504 Batch (t): 5.405, 757.844/s, 47.3652/s/gpu LR: 0.000001 Logit Scale: 14.286 Contrastive_loss: 8.4008 (8.4008) Loss: 8.4008 (8.4008)
454
+ 2024-01-30,17:18:18 | INFO | Reducer buckets have been rebuilt in this iteration.
455
+ 2024-01-30,17:18:53 | INFO | Train Epoch: 0 [ 413696/16007168 (3%)] Data (t): 0.074 Batch (t): 0.363, 12967.6/s, 810.473/s/gpu LR: 0.000101 Logit Scale: 14.261 Contrastive_loss: 8.0348 (8.2178) Loss: 8.0348 (8.2178)
456
+ 2024-01-30,17:19:30 | INFO | Train Epoch: 0 [ 823296/16007168 (5%)] Data (t): 0.064 Batch (t): 0.364, 6939.96/s, 433.747/s/gpu LR: 0.000201 Logit Scale: 14.230 Contrastive_loss: 7.9230 (8.1196) Loss: 7.9230 (8.1196)
457
+ 2024-01-30,17:20:05 | INFO | Train Epoch: 0 [ 1232896/16007168 (8%)] Data (t): 0.069 Batch (t): 0.348, 12329.8/s, 770.611/s/gpu LR: 0.000301 Logit Scale: 14.197 Contrastive_loss: 7.4590 (7.9544) Loss: 7.4590 (7.9544)
458
+ 2024-01-30,17:20:40 | INFO | Train Epoch: 0 [ 1642496/16007168 (10%)] Data (t): 0.078 Batch (t): 0.354, 8289.20/s, 518.075/s/gpu LR: 0.000401 Logit Scale: 14.175 Contrastive_loss: 7.4155 (7.8466) Loss: 7.4155 (7.8466)
459
+ 2024-01-30,17:21:16 | INFO | Train Epoch: 0 [ 2052096/16007168 (13%)] Data (t): 0.067 Batch (t): 0.359, 7405.51/s, 462.844/s/gpu LR: 0.000500 Logit Scale: 14.189 Contrastive_loss: 7.1828 (7.7360) Loss: 7.1828 (7.7360)
460
+ 2024-01-30,17:21:51 | INFO | Train Epoch: 0 [ 2461696/16007168 (15%)] Data (t): 0.074 Batch (t): 0.348, 9470.67/s, 591.917/s/gpu LR: 0.000500 Logit Scale: 14.286 Contrastive_loss: 7.0789 (7.6421) Loss: 7.0789 (7.6421)
461
+ 2024-01-30,17:22:27 | INFO | Train Epoch: 0 [ 2871296/16007168 (18%)] Data (t): 0.069 Batch (t): 0.359, 11923.3/s, 745.207/s/gpu LR: 0.000500 Logit Scale: 14.469 Contrastive_loss: 6.7561 (7.5314) Loss: 6.7561 (7.5314)
462
+ 2024-01-30,17:23:02 | INFO | Train Epoch: 0 [ 3280896/16007168 (20%)] Data (t): 0.068 Batch (t): 0.349, 13646.6/s, 852.913/s/gpu LR: 0.000500 Logit Scale: 14.755 Contrastive_loss: 6.4997 (7.4167) Loss: 6.4997 (7.4167)
463
+ 2024-01-30,17:23:37 | INFO | Train Epoch: 0 [ 3690496/16007168 (23%)] Data (t): 0.068 Batch (t): 0.355, 14025.9/s, 876.619/s/gpu LR: 0.000500 Logit Scale: 15.096 Contrastive_loss: 6.3301 (7.3081) Loss: 6.3301 (7.3081)
464
+ 2024-01-30,17:24:13 | INFO | Train Epoch: 0 [ 4100096/16007168 (26%)] Data (t): 0.068 Batch (t): 0.355, 12532.8/s, 783.299/s/gpu LR: 0.000500 Logit Scale: 15.485 Contrastive_loss: 6.1311 (7.2011) Loss: 6.1311 (7.2011)
465
+ 2024-01-30,17:24:47 | INFO | Train Epoch: 0 [ 4509696/16007168 (28%)] Data (t): 0.070 Batch (t): 0.349, 13549.6/s, 846.851/s/gpu LR: 0.000500 Logit Scale: 15.954 Contrastive_loss: 6.1530 (7.1137) Loss: 6.1530 (7.1137)
466
+ 2024-01-30,17:25:23 | INFO | Train Epoch: 0 [ 4919296/16007168 (31%)] Data (t): 0.057 Batch (t): 0.354, 12444.2/s, 777.765/s/gpu LR: 0.000499 Logit Scale: 16.502 Contrastive_loss: 5.7110 (7.0058) Loss: 5.7110 (7.0058)
467
+ 2024-01-30,17:25:58 | INFO | Train Epoch: 0 [ 5328896/16007168 (33%)] Data (t): 0.054 Batch (t): 0.353, 13666.4/s, 854.148/s/gpu LR: 0.000499 Logit Scale: 17.065 Contrastive_loss: 5.6624 (6.9099) Loss: 5.6624 (6.9099)
468
+ 2024-01-30,17:26:34 | INFO | Train Epoch: 0 [ 5738496/16007168 (36%)] Data (t): 0.061 Batch (t): 0.359, 11545.3/s, 721.581/s/gpu LR: 0.000499 Logit Scale: 17.681 Contrastive_loss: 5.8590 (6.8398) Loss: 5.8590 (6.8398)
469
+ 2024-01-30,17:27:10 | INFO | Train Epoch: 0 [ 6148096/16007168 (38%)] Data (t): 0.064 Batch (t): 0.359, 12729.2/s, 795.572/s/gpu LR: 0.000499 Logit Scale: 18.299 Contrastive_loss: 5.5687 (6.7604) Loss: 5.5687 (6.7604)
470
+ 2024-01-30,17:27:45 | INFO | Train Epoch: 0 [ 6557696/16007168 (41%)] Data (t): 0.053 Batch (t): 0.352, 12772.8/s, 798.298/s/gpu LR: 0.000498 Logit Scale: 18.954 Contrastive_loss: 5.0943 (6.6624) Loss: 5.0943 (6.6624)
471
+ 2024-01-30,17:28:21 | INFO | Train Epoch: 0 [ 6967296/16007168 (44%)] Data (t): 0.061 Batch (t): 0.354, 12677.4/s, 792.336/s/gpu LR: 0.000498 Logit Scale: 19.534 Contrastive_loss: 5.3557 (6.5898) Loss: 5.3557 (6.5898)
472
+ 2024-01-30,17:28:56 | INFO | Train Epoch: 0 [ 7376896/16007168 (46%)] Data (t): 0.069 Batch (t): 0.352, 8696.22/s, 543.514/s/gpu LR: 0.000498 Logit Scale: 20.116 Contrastive_loss: 5.3152 (6.5227) Loss: 5.3152 (6.5227)
473
+ 2024-01-30,17:29:31 | INFO | Train Epoch: 0 [ 7786496/16007168 (49%)] Data (t): 0.067 Batch (t): 0.357, 12699.7/s, 793.734/s/gpu LR: 0.000497 Logit Scale: 20.679 Contrastive_loss: 5.1791 (6.4555) Loss: 5.1791 (6.4555)
474
+ 2024-01-30,17:30:08 | INFO | Train Epoch: 0 [ 8196096/16007168 (51%)] Data (t): 0.071 Batch (t): 0.361, 13462.1/s, 841.384/s/gpu LR: 0.000497 Logit Scale: 21.254 Contrastive_loss: 5.2383 (6.3975) Loss: 5.2383 (6.3975)
475
+ 2024-01-30,17:30:43 | INFO | Train Epoch: 0 [ 8605696/16007168 (54%)] Data (t): 0.073 Batch (t): 0.354, 13327.4/s, 832.963/s/gpu LR: 0.000497 Logit Scale: 21.791 Contrastive_loss: 4.9203 (6.3304) Loss: 4.9203 (6.3304)
476
+ 2024-01-30,17:31:18 | INFO | Train Epoch: 0 [ 9015296/16007168 (56%)] Data (t): 0.060 Batch (t): 0.352, 13088.7/s, 818.044/s/gpu LR: 0.000496 Logit Scale: 22.360 Contrastive_loss: 4.9545 (6.2706) Loss: 4.9545 (6.2706)
477
+ 2024-01-30,17:31:53 | INFO | Train Epoch: 0 [ 9424896/16007168 (59%)] Data (t): 0.078 Batch (t): 0.347, 13480.0/s, 842.503/s/gpu LR: 0.000496 Logit Scale: 22.899 Contrastive_loss: 4.8296 (6.2105) Loss: 4.8296 (6.2105)
478
+ 2024-01-30,17:32:28 | INFO | Train Epoch: 0 [ 9834496/16007168 (61%)] Data (t): 0.060 Batch (t): 0.355, 13316.1/s, 832.255/s/gpu LR: 0.000495 Logit Scale: 23.427 Contrastive_loss: 4.9925 (6.1618) Loss: 4.9925 (6.1618)
479
+ 2024-01-30,17:33:03 | INFO | Train Epoch: 0 [10244096/16007168 (64%)] Data (t): 0.075 Batch (t): 0.347, 13498.5/s, 843.655/s/gpu LR: 0.000495 Logit Scale: 23.871 Contrastive_loss: 4.8284 (6.1105) Loss: 4.8284 (6.1105)
480
+ 2024-01-30,17:33:38 | INFO | Train Epoch: 0 [10653696/16007168 (67%)] Data (t): 0.075 Batch (t): 0.354, 13101.2/s, 818.825/s/gpu LR: 0.000494 Logit Scale: 24.310 Contrastive_loss: 4.7672 (6.0608) Loss: 4.7672 (6.0608)
481
+ 2024-01-30,17:34:14 | INFO | Train Epoch: 0 [11063296/16007168 (69%)] Data (t): 0.064 Batch (t): 0.351, 12924.6/s, 807.784/s/gpu LR: 0.000494 Logit Scale: 24.780 Contrastive_loss: 4.6832 (6.0116) Loss: 4.6832 (6.0116)
482
+ 2024-01-30,17:34:49 | INFO | Train Epoch: 0 [11472896/16007168 (72%)] Data (t): 0.060 Batch (t): 0.353, 13817.8/s, 863.613/s/gpu LR: 0.000493 Logit Scale: 25.245 Contrastive_loss: 4.5682 (5.9618) Loss: 4.5682 (5.9618)
483
+ 2024-01-30,17:35:24 | INFO | Train Epoch: 0 [11882496/16007168 (74%)] Data (t): 0.062 Batch (t): 0.353, 12273.3/s, 767.081/s/gpu LR: 0.000493 Logit Scale: 25.650 Contrastive_loss: 4.5159 (5.9136) Loss: 4.5159 (5.9136)
484
+ 2024-01-30,17:35:59 | INFO | Train Epoch: 0 [12292096/16007168 (77%)] Data (t): 0.063 Batch (t): 0.352, 13604.7/s, 850.292/s/gpu LR: 0.000492 Logit Scale: 26.037 Contrastive_loss: 4.4852 (5.8675) Loss: 4.4852 (5.8675)
485
+ 2024-01-30,17:36:34 | INFO | Train Epoch: 0 [12701696/16007168 (79%)] Data (t): 0.068 Batch (t): 0.348, 13546.6/s, 846.662/s/gpu LR: 0.000491 Logit Scale: 26.422 Contrastive_loss: 4.5528 (5.8264) Loss: 4.5528 (5.8264)
486
+ 2024-01-30,17:37:10 | INFO | Train Epoch: 0 [13111296/16007168 (82%)] Data (t): 0.056 Batch (t): 0.355, 13751.9/s, 859.496/s/gpu LR: 0.000491 Logit Scale: 26.818 Contrastive_loss: 4.3369 (5.7813) Loss: 4.3369 (5.7813)
487
+ 2024-01-30,17:37:46 | INFO | Train Epoch: 0 [13520896/16007168 (84%)] Data (t): 0.070 Batch (t): 0.360, 12735.5/s, 795.967/s/gpu LR: 0.000490 Logit Scale: 27.210 Contrastive_loss: 4.1829 (5.7343) Loss: 4.1829 (5.7343)
488
+ 2024-01-30,17:38:21 | INFO | Train Epoch: 0 [13930496/16007168 (87%)] Data (t): 0.074 Batch (t): 0.354, 12570.6/s, 785.663/s/gpu LR: 0.000489 Logit Scale: 27.611 Contrastive_loss: 3.9996 (5.6847) Loss: 3.9996 (5.6847)
489
+ 2024-01-30,17:38:57 | INFO | Train Epoch: 0 [14340096/16007168 (90%)] Data (t): 0.064 Batch (t): 0.361, 11435.5/s, 714.721/s/gpu LR: 0.000488 Logit Scale: 28.003 Contrastive_loss: 4.3076 (5.6465) Loss: 4.3076 (5.6465)
490
+ 2024-01-30,17:39:33 | INFO | Train Epoch: 0 [14749696/16007168 (92%)] Data (t): 0.063 Batch (t): 0.356, 13141.0/s, 821.314/s/gpu LR: 0.000488 Logit Scale: 28.387 Contrastive_loss: 4.3356 (5.6110) Loss: 4.3356 (5.6110)
491
+ 2024-01-30,17:40:08 | INFO | Train Epoch: 0 [15159296/16007168 (95%)] Data (t): 0.065 Batch (t): 0.351, 13348.6/s, 834.288/s/gpu LR: 0.000487 Logit Scale: 28.749 Contrastive_loss: 4.3599 (5.5781) Loss: 4.3599 (5.5781)
492
+ 2024-01-30,17:40:44 | INFO | Train Epoch: 0 [15568896/16007168 (97%)] Data (t): 0.065 Batch (t): 0.358, 13035.9/s, 814.743/s/gpu LR: 0.000486 Logit Scale: 29.147 Contrastive_loss: 4.1936 (5.5426) Loss: 4.1936 (5.5426)
493
+ 2024-01-30,17:41:19 | INFO | Train Epoch: 0 [15978496/16007168 (100%)] Data (t): 0.059 Batch (t): 0.358, 13265.4/s, 829.089/s/gpu LR: 0.000485 Logit Scale: 29.458 Contrastive_loss: 4.1740 (5.5084) Loss: 4.1740 (5.5084)
494
+ 2024-01-30,17:41:21 | INFO | Train Epoch: 0 [16007168/16007168 (100%)] Data (t): 0.055 Batch (t): 0.302, 13145.8/s, 821.615/s/gpu LR: 0.000485 Logit Scale: 29.479 Contrastive_loss: 3.4248 (5.4576) Loss: 3.4248 (5.4576)
495
+ 2024-01-30,17:41:25 | INFO | Start epoch 1
496
+ 2024-01-30,17:41:26 | INFO | Train Epoch: 1 [ 4096/16007168 (0%)] Data (t): 1.703 Batch (t): 1.935, 2117.33/s, 132.333/s/gpu LR: 0.000485 Logit Scale: 29.482 Contrastive_loss: 3.6979 (3.6979) Loss: 3.6979 (3.6979)
497
+ 2024-01-30,17:42:03 | INFO | Train Epoch: 1 [ 413696/16007168 (3%)] Data (t): 0.099 Batch (t): 0.362, 10309.8/s, 644.365/s/gpu LR: 0.000484 Logit Scale: 29.800 Contrastive_loss: 4.0415 (3.8697) Loss: 4.0415 (3.8697)
498
+ 2024-01-30,17:42:38 | INFO | Train Epoch: 1 [ 823296/16007168 (5%)] Data (t): 0.059 Batch (t): 0.353, 13397.9/s, 837.371/s/gpu LR: 0.000483 Logit Scale: 30.128 Contrastive_loss: 3.4706 (3.7367) Loss: 3.4706 (3.7367)
499
+ 2024-01-30,17:43:14 | INFO | Train Epoch: 1 [ 1232896/16007168 (8%)] Data (t): 0.084 Batch (t): 0.360, 8432.50/s, 527.031/s/gpu LR: 0.000482 Logit Scale: 30.432 Contrastive_loss: 3.6285 (3.7096) Loss: 3.6285 (3.7096)
500
+ 2024-01-30,17:43:49 | INFO | Train Epoch: 1 [ 1642496/16007168 (10%)] Data (t): 0.095 Batch (t): 0.349, 11464.3/s, 716.516/s/gpu LR: 0.000481 Logit Scale: 30.772 Contrastive_loss: 4.1980 (3.8073) Loss: 4.1980 (3.8073)
501
+ 2024-01-30,17:44:24 | INFO | Train Epoch: 1 [ 2052096/16007168 (13%)] Data (t): 0.067 Batch (t): 0.355, 13683.1/s, 855.195/s/gpu LR: 0.000480 Logit Scale: 31.031 Contrastive_loss: 3.8817 (3.8197) Loss: 3.8817 (3.8197)
502
+ 2024-01-30,17:45:00 | INFO | Train Epoch: 1 [ 2461696/16007168 (15%)] Data (t): 0.071 Batch (t): 0.356, 13414.0/s, 838.377/s/gpu LR: 0.000479 Logit Scale: 31.340 Contrastive_loss: 3.6435 (3.7945) Loss: 3.6435 (3.7945)
503
+ 2024-01-30,17:45:35 | INFO | Train Epoch: 1 [ 2871296/16007168 (18%)] Data (t): 0.073 Batch (t): 0.352, 13813.7/s, 863.359/s/gpu LR: 0.000478 Logit Scale: 31.594 Contrastive_loss: 3.8005 (3.7953) Loss: 3.8005 (3.7953)
504
+ 2024-01-30,17:46:11 | INFO | Train Epoch: 1 [ 3280896/16007168 (20%)] Data (t): 0.069 Batch (t): 0.355, 13263.1/s, 828.943/s/gpu LR: 0.000477 Logit Scale: 31.881 Contrastive_loss: 4.0095 (3.8191) Loss: 4.0095 (3.8191)
505
+ 2024-01-30,17:46:46 | INFO | Train Epoch: 1 [ 3690496/16007168 (23%)] Data (t): 0.064 Batch (t): 0.353, 13459.4/s, 841.215/s/gpu LR: 0.000476 Logit Scale: 32.124 Contrastive_loss: 3.9387 (3.8310) Loss: 3.9387 (3.8310)
506
+ 2024-01-30,17:47:21 | INFO | Train Epoch: 1 [ 4100096/16007168 (26%)] Data (t): 0.068 Batch (t): 0.354, 13449.1/s, 840.568/s/gpu LR: 0.000475 Logit Scale: 32.392 Contrastive_loss: 3.6299 (3.8127) Loss: 3.6299 (3.8127)
507
+ 2024-01-30,17:47:57 | INFO | Train Epoch: 1 [ 4509696/16007168 (28%)] Data (t): 0.070 Batch (t): 0.353, 13161.6/s, 822.602/s/gpu LR: 0.000474 Logit Scale: 32.603 Contrastive_loss: 3.4994 (3.7866) Loss: 3.4994 (3.7866)
508
+ 2024-01-30,17:48:32 | INFO | Train Epoch: 1 [ 4919296/16007168 (31%)] Data (t): 0.061 Batch (t): 0.353, 12861.9/s, 803.866/s/gpu LR: 0.000473 Logit Scale: 32.828 Contrastive_loss: 3.6223 (3.7740) Loss: 3.6223 (3.7740)
509
+ 2024-01-30,17:49:07 | INFO | Train Epoch: 1 [ 5328896/16007168 (33%)] Data (t): 0.071 Batch (t): 0.352, 12814.0/s, 800.872/s/gpu LR: 0.000472 Logit Scale: 33.111 Contrastive_loss: 3.6821 (3.7674) Loss: 3.6821 (3.7674)
510
+ 2024-01-30,17:49:43 | INFO | Train Epoch: 1 [ 5738496/16007168 (36%)] Data (t): 0.082 Batch (t): 0.353, 13810.9/s, 863.180/s/gpu LR: 0.000470 Logit Scale: 33.375 Contrastive_loss: 3.2916 (3.7357) Loss: 3.2916 (3.7357)
511
+ 2024-01-30,17:50:18 | INFO | Train Epoch: 1 [ 6148096/16007168 (38%)] Data (t): 0.067 Batch (t): 0.352, 13632.6/s, 852.037/s/gpu LR: 0.000469 Logit Scale: 33.636 Contrastive_loss: 3.4580 (3.7184) Loss: 3.4580 (3.7184)
512
+ 2024-01-30,17:50:53 | INFO | Train Epoch: 1 [ 6557696/16007168 (41%)] Data (t): 0.071 Batch (t): 0.353, 13511.6/s, 844.477/s/gpu LR: 0.000468 Logit Scale: 33.902 Contrastive_loss: 3.4951 (3.7052) Loss: 3.4951 (3.7052)
513
+ 2024-01-30,17:51:28 | INFO | Train Epoch: 1 [ 6967296/16007168 (44%)] Data (t): 0.067 Batch (t): 0.351, 13477.7/s, 842.355/s/gpu LR: 0.000467 Logit Scale: 34.091 Contrastive_loss: 3.4838 (3.6929) Loss: 3.4838 (3.6929)
514
+ 2024-01-30,17:52:03 | INFO | Train Epoch: 1 [ 7376896/16007168 (46%)] Data (t): 0.067 Batch (t): 0.351, 13151.8/s, 821.990/s/gpu LR: 0.000465 Logit Scale: 34.318 Contrastive_loss: 3.3785 (3.6764) Loss: 3.3785 (3.6764)
515
+ 2024-01-30,17:52:38 | INFO | Train Epoch: 1 [ 7786496/16007168 (49%)] Data (t): 0.064 Batch (t): 0.351, 11780.5/s, 736.278/s/gpu LR: 0.000464 Logit Scale: 34.558 Contrastive_loss: 3.4129 (3.6632) Loss: 3.4129 (3.6632)
516
+ 2024-01-30,17:53:14 | INFO | Train Epoch: 1 [ 8196096/16007168 (51%)] Data (t): 0.061 Batch (t): 0.358, 13231.8/s, 826.987/s/gpu LR: 0.000463 Logit Scale: 34.776 Contrastive_loss: 3.3690 (3.6492) Loss: 3.3690 (3.6492)
517
+ 2024-01-30,17:53:50 | INFO | Train Epoch: 1 [ 8605696/16007168 (54%)] Data (t): 0.082 Batch (t): 0.360, 12135.4/s, 758.463/s/gpu LR: 0.000461 Logit Scale: 34.960 Contrastive_loss: 3.1205 (3.6252) Loss: 3.1205 (3.6252)
518
+ 2024-01-30,17:54:26 | INFO | Train Epoch: 1 [ 9015296/16007168 (56%)] Data (t): 0.074 Batch (t): 0.355, 13202.3/s, 825.144/s/gpu LR: 0.000460 Logit Scale: 35.153 Contrastive_loss: 3.6471 (3.6261) Loss: 3.6471 (3.6261)
519
+ 2024-01-30,17:55:01 | INFO | Train Epoch: 1 [ 9424896/16007168 (59%)] Data (t): 0.070 Batch (t): 0.353, 13019.7/s, 813.731/s/gpu LR: 0.000459 Logit Scale: 35.360 Contrastive_loss: 3.3817 (3.6159) Loss: 3.3817 (3.6159)
520
+ 2024-01-30,17:55:37 | INFO | Train Epoch: 1 [ 9834496/16007168 (61%)] Data (t): 0.077 Batch (t): 0.357, 12734.5/s, 795.909/s/gpu LR: 0.000457 Logit Scale: 35.536 Contrastive_loss: 3.4333 (3.6086) Loss: 3.4333 (3.6086)
521
+ 2024-01-30,17:56:12 | INFO | Train Epoch: 1 [10244096/16007168 (64%)] Data (t): 0.070 Batch (t): 0.356, 12955.2/s, 809.702/s/gpu LR: 0.000456 Logit Scale: 35.710 Contrastive_loss: 3.3986 (3.6005) Loss: 3.3986 (3.6005)
522
+ 2024-01-30,17:56:47 | INFO | Train Epoch: 1 [10653696/16007168 (67%)] Data (t): 0.070 Batch (t): 0.349, 13287.6/s, 830.474/s/gpu LR: 0.000454 Logit Scale: 35.849 Contrastive_loss: 3.6078 (3.6008) Loss: 3.6078 (3.6008)
523
+ 2024-01-30,17:57:23 | INFO | Train Epoch: 1 [11063296/16007168 (69%)] Data (t): 0.074 Batch (t): 0.359, 12270.4/s, 766.901/s/gpu LR: 0.000453 Logit Scale: 35.993 Contrastive_loss: 3.5610 (3.5994) Loss: 3.5610 (3.5994)
524
+ 2024-01-30,17:57:58 | INFO | Train Epoch: 1 [11472896/16007168 (72%)] Data (t): 0.088 Batch (t): 0.350, 13696.3/s, 856.016/s/gpu LR: 0.000451 Logit Scale: 36.122 Contrastive_loss: 3.2010 (3.5857) Loss: 3.2010 (3.5857)
525
+ 2024-01-30,17:58:34 | INFO | Train Epoch: 1 [11882496/16007168 (74%)] Data (t): 0.097 Batch (t): 0.356, 12976.5/s, 811.031/s/gpu LR: 0.000450 Logit Scale: 36.283 Contrastive_loss: 3.1820 (3.5722) Loss: 3.1820 (3.5722)
526
+ 2024-01-30,17:59:10 | INFO | Train Epoch: 1 [12292096/16007168 (77%)] Data (t): 0.070 Batch (t): 0.362, 13186.7/s, 824.170/s/gpu LR: 0.000448 Logit Scale: 36.508 Contrastive_loss: 3.1584 (3.5588) Loss: 3.1584 (3.5588)
527
+ 2024-01-30,17:59:45 | INFO | Train Epoch: 1 [12701696/16007168 (79%)] Data (t): 0.073 Batch (t): 0.352, 13313.7/s, 832.107/s/gpu LR: 0.000447 Logit Scale: 36.722 Contrastive_loss: 3.3713 (3.5530) Loss: 3.3713 (3.5530)
528
+ 2024-01-30,18:00:21 | INFO | Train Epoch: 1 [13111296/16007168 (82%)] Data (t): 0.074 Batch (t): 0.360, 12464.3/s, 779.020/s/gpu LR: 0.000445 Logit Scale: 36.880 Contrastive_loss: 3.1090 (3.5395) Loss: 3.1090 (3.5395)
529
+ 2024-01-30,18:00:56 | INFO | Train Epoch: 1 [13520896/16007168 (84%)] Data (t): 0.070 Batch (t): 0.346, 13083.3/s, 817.706/s/gpu LR: 0.000444 Logit Scale: 37.063 Contrastive_loss: 3.6433 (3.5426) Loss: 3.6433 (3.5426)
530
+ 2024-01-30,18:01:31 | INFO | Train Epoch: 1 [13930496/16007168 (87%)] Data (t): 0.071 Batch (t): 0.353, 13073.0/s, 817.065/s/gpu LR: 0.000442 Logit Scale: 37.231 Contrastive_loss: 3.1748 (3.5321) Loss: 3.1748 (3.5321)
531
+ 2024-01-30,18:02:06 | INFO | Train Epoch: 1 [14340096/16007168 (90%)] Data (t): 0.075 Batch (t): 0.352, 12526.6/s, 782.911/s/gpu LR: 0.000440 Logit Scale: 37.357 Contrastive_loss: 3.3167 (3.5261) Loss: 3.3167 (3.5261)
532
+ 2024-01-30,18:02:41 | INFO | Train Epoch: 1 [14749696/16007168 (92%)] Data (t): 0.063 Batch (t): 0.349, 12258.0/s, 766.126/s/gpu LR: 0.000439 Logit Scale: 37.474 Contrastive_loss: 3.4825 (3.5249) Loss: 3.4825 (3.5249)
533
+ 2024-01-30,18:03:16 | INFO | Train Epoch: 1 [15159296/16007168 (95%)] Data (t): 0.065 Batch (t): 0.352, 13105.0/s, 819.060/s/gpu LR: 0.000437 Logit Scale: 37.654 Contrastive_loss: 3.1186 (3.5142) Loss: 3.1186 (3.5142)
534
+ 2024-01-30,18:03:52 | INFO | Train Epoch: 1 [15568896/16007168 (97%)] Data (t): 0.067 Batch (t): 0.357, 13096.6/s, 818.541/s/gpu LR: 0.000435 Logit Scale: 37.829 Contrastive_loss: 3.3103 (3.5090) Loss: 3.3103 (3.5090)
535
+ 2024-01-30,18:04:28 | INFO | Train Epoch: 1 [15978496/16007168 (100%)] Data (t): 0.070 Batch (t): 0.364, 13150.5/s, 821.907/s/gpu LR: 0.000434 Logit Scale: 37.975 Contrastive_loss: 3.1923 (3.5011) Loss: 3.1923 (3.5011)
536
+ 2024-01-30,18:04:31 | INFO | Train Epoch: 1 [16007168/16007168 (100%)] Data (t): 0.065 Batch (t): 0.308, 13626.0/s, 851.626/s/gpu LR: 0.000433 Logit Scale: 37.979 Contrastive_loss: 2.3571 (3.4732) Loss: 2.3571 (3.4732)
537
+ 2024-01-30,18:04:34 | INFO | Start epoch 2
538
+ 2024-01-30,18:04:36 | INFO | Train Epoch: 2 [ 4096/16007168 (0%)] Data (t): 1.652 Batch (t): 1.884, 2174.05/s, 135.878/s/gpu LR: 0.000433 Logit Scale: 37.981 Contrastive_loss: 3.1466 (3.1466) Loss: 3.1466 (3.1466)
539
+ 2024-01-30,18:05:11 | INFO | Train Epoch: 2 [ 413696/16007168 (3%)] Data (t): 0.089 Batch (t): 0.358, 7709.91/s, 481.869/s/gpu LR: 0.000432 Logit Scale: 38.106 Contrastive_loss: 3.2047 (3.1756) Loss: 3.2047 (3.1756)
540
+ 2024-01-30,18:05:47 | INFO | Train Epoch: 2 [ 823296/16007168 (5%)] Data (t): 0.093 Batch (t): 0.358, 13170.3/s, 823.143/s/gpu LR: 0.000430 Logit Scale: 38.235 Contrastive_loss: 3.1328 (3.1614) Loss: 3.1328 (3.1614)
541
+ 2024-01-30,18:06:22 | INFO | Train Epoch: 2 [ 1232896/16007168 (8%)] Data (t): 0.076 Batch (t): 0.349, 13704.3/s, 856.520/s/gpu LR: 0.000428 Logit Scale: 38.437 Contrastive_loss: 3.3009 (3.1962) Loss: 3.3009 (3.1962)
542
+ 2024-01-30,18:06:58 | INFO | Train Epoch: 2 [ 1642496/16007168 (10%)] Data (t): 0.061 Batch (t): 0.357, 12596.2/s, 787.260/s/gpu LR: 0.000426 Logit Scale: 38.619 Contrastive_loss: 3.5063 (3.2583) Loss: 3.5063 (3.2583)
543
+ 2024-01-30,18:07:33 | INFO | Train Epoch: 2 [ 2052096/16007168 (13%)] Data (t): 0.071 Batch (t): 0.358, 13546.3/s, 846.643/s/gpu LR: 0.000425 Logit Scale: 38.749 Contrastive_loss: 3.0284 (3.2200) Loss: 3.0284 (3.2200)
544
+ 2024-01-30,18:08:10 | INFO | Train Epoch: 2 [ 2461696/16007168 (15%)] Data (t): 0.074 Batch (t): 0.362, 12537.2/s, 783.576/s/gpu LR: 0.000423 Logit Scale: 38.869 Contrastive_loss: 3.1864 (3.2152) Loss: 3.1864 (3.2152)
545
+ 2024-01-30,18:08:45 | INFO | Train Epoch: 2 [ 2871296/16007168 (18%)] Data (t): 0.069 Batch (t): 0.353, 12386.3/s, 774.142/s/gpu LR: 0.000421 Logit Scale: 39.011 Contrastive_loss: 3.2655 (3.2214) Loss: 3.2655 (3.2214)
546
+ 2024-01-30,18:09:21 | INFO | Train Epoch: 2 [ 3280896/16007168 (20%)] Data (t): 0.068 Batch (t): 0.360, 12038.7/s, 752.422/s/gpu LR: 0.000419 Logit Scale: 39.145 Contrastive_loss: 3.1624 (3.2149) Loss: 3.1624 (3.2149)
547
+ 2024-01-30,18:09:56 | INFO | Train Epoch: 2 [ 3690496/16007168 (23%)] Data (t): 0.067 Batch (t): 0.349, 13308.8/s, 831.803/s/gpu LR: 0.000417 Logit Scale: 39.286 Contrastive_loss: 2.7686 (3.1702) Loss: 2.7686 (3.1702)
548
+ 2024-01-30,18:10:32 | INFO | Train Epoch: 2 [ 4100096/16007168 (26%)] Data (t): 0.069 Batch (t): 0.357, 12801.3/s, 800.082/s/gpu LR: 0.000415 Logit Scale: 39.376 Contrastive_loss: 3.0080 (3.1555) Loss: 3.0080 (3.1555)
549
+ 2024-01-30,18:11:07 | INFO | Train Epoch: 2 [ 4509696/16007168 (28%)] Data (t): 0.072 Batch (t): 0.358, 13464.5/s, 841.534/s/gpu LR: 0.000413 Logit Scale: 39.495 Contrastive_loss: 2.9960 (3.1422) Loss: 2.9960 (3.1422)
550
+ 2024-01-30,18:11:43 | INFO | Train Epoch: 2 [ 4919296/16007168 (31%)] Data (t): 0.061 Batch (t): 0.355, 12990.4/s, 811.898/s/gpu LR: 0.000411 Logit Scale: 39.652 Contrastive_loss: 2.5340 (3.0954) Loss: 2.5340 (3.0954)
551
+ 2024-01-30,18:12:18 | INFO | Train Epoch: 2 [ 5328896/16007168 (33%)] Data (t): 0.065 Batch (t): 0.350, 12488.3/s, 780.521/s/gpu LR: 0.000409 Logit Scale: 39.750 Contrastive_loss: 3.0895 (3.0950) Loss: 3.0895 (3.0950)
552
+ 2024-01-30,18:12:54 | INFO | Train Epoch: 2 [ 5738496/16007168 (36%)] Data (t): 0.070 Batch (t): 0.357, 12857.6/s, 803.599/s/gpu LR: 0.000407 Logit Scale: 39.874 Contrastive_loss: 2.8140 (3.0763) Loss: 2.8140 (3.0763)
553
+ 2024-01-30,18:13:29 | INFO | Train Epoch: 2 [ 6148096/16007168 (38%)] Data (t): 0.072 Batch (t): 0.351, 12723.5/s, 795.221/s/gpu LR: 0.000405 Logit Scale: 40.004 Contrastive_loss: 2.8495 (3.0621) Loss: 2.8495 (3.0621)
554
+ 2024-01-30,18:14:05 | INFO | Train Epoch: 2 [ 6557696/16007168 (41%)] Data (t): 0.070 Batch (t): 0.359, 11998.1/s, 749.880/s/gpu LR: 0.000403 Logit Scale: 40.071 Contrastive_loss: 2.8497 (3.0496) Loss: 2.8497 (3.0496)
555
+ 2024-01-30,18:14:40 | INFO | Train Epoch: 2 [ 6967296/16007168 (44%)] Data (t): 0.073 Batch (t): 0.358, 8203.92/s, 512.745/s/gpu LR: 0.000401 Logit Scale: 40.185 Contrastive_loss: 2.8644 (3.0393) Loss: 2.8644 (3.0393)
556
+ 2024-01-30,18:15:15 | INFO | Train Epoch: 2 [ 7376896/16007168 (46%)] Data (t): 0.076 Batch (t): 0.351, 9614.47/s, 600.904/s/gpu LR: 0.000399 Logit Scale: 40.322 Contrastive_loss: 3.1654 (3.0460) Loss: 3.1654 (3.0460)
557
+ 2024-01-30,18:15:51 | INFO | Train Epoch: 2 [ 7786496/16007168 (49%)] Data (t): 0.060 Batch (t): 0.354, 8811.07/s, 550.692/s/gpu LR: 0.000397 Logit Scale: 40.430 Contrastive_loss: 2.7709 (3.0322) Loss: 2.7709 (3.0322)
558
+ 2024-01-30,18:16:27 | INFO | Train Epoch: 2 [ 8196096/16007168 (51%)] Data (t): 0.070 Batch (t): 0.361, 12769.0/s, 798.065/s/gpu LR: 0.000395 Logit Scale: 40.522 Contrastive_loss: 2.7423 (3.0184) Loss: 2.7423 (3.0184)
559
+ 2024-01-30,18:17:02 | INFO | Train Epoch: 2 [ 8605696/16007168 (54%)] Data (t): 0.069 Batch (t): 0.354, 12159.0/s, 759.940/s/gpu LR: 0.000393 Logit Scale: 40.616 Contrastive_loss: 3.0506 (3.0199) Loss: 3.0506 (3.0199)
560
+ 2024-01-30,18:17:38 | INFO | Train Epoch: 2 [ 9015296/16007168 (56%)] Data (t): 0.061 Batch (t): 0.354, 12405.0/s, 775.313/s/gpu LR: 0.000391 Logit Scale: 40.749 Contrastive_loss: 2.9225 (3.0156) Loss: 2.9225 (3.0156)
561
+ 2024-01-30,18:18:13 | INFO | Train Epoch: 2 [ 9424896/16007168 (59%)] Data (t): 0.070 Batch (t): 0.355, 13352.0/s, 834.498/s/gpu LR: 0.000389 Logit Scale: 40.892 Contrastive_loss: 2.8147 (3.0073) Loss: 2.8147 (3.0073)
562
+ 2024-01-30,18:18:49 | INFO | Train Epoch: 2 [ 9834496/16007168 (61%)] Data (t): 0.068 Batch (t): 0.354, 12816.8/s, 801.049/s/gpu LR: 0.000387 Logit Scale: 41.024 Contrastive_loss: 3.1053 (3.0112) Loss: 3.1053 (3.0112)
563
+ 2024-01-30,18:19:25 | INFO | Train Epoch: 2 [10244096/16007168 (64%)] Data (t): 0.060 Batch (t): 0.361, 12373.0/s, 773.314/s/gpu LR: 0.000385 Logit Scale: 41.088 Contrastive_loss: 2.9394 (3.0084) Loss: 2.9394 (3.0084)
564
+ 2024-01-30,18:20:00 | INFO | Train Epoch: 2 [10653696/16007168 (67%)] Data (t): 0.094 Batch (t): 0.353, 13006.3/s, 812.893/s/gpu LR: 0.000382 Logit Scale: 41.292 Contrastive_loss: 2.9076 (3.0047) Loss: 2.9076 (3.0047)
565
+ 2024-01-30,18:20:36 | INFO | Train Epoch: 2 [11063296/16007168 (69%)] Data (t): 0.064 Batch (t): 0.358, 12873.6/s, 804.601/s/gpu LR: 0.000380 Logit Scale: 41.375 Contrastive_loss: 2.9102 (3.0013) Loss: 2.9102 (3.0013)
566
+ 2024-01-30,18:21:12 | INFO | Train Epoch: 2 [11472896/16007168 (72%)] Data (t): 0.071 Batch (t): 0.359, 12941.4/s, 808.837/s/gpu LR: 0.000378 Logit Scale: 41.461 Contrastive_loss: 2.9675 (3.0001) Loss: 2.9675 (3.0001)
567
+ 2024-01-30,18:21:48 | INFO | Train Epoch: 2 [11882496/16007168 (74%)] Data (t): 0.073 Batch (t): 0.359, 11819.2/s, 738.703/s/gpu LR: 0.000376 Logit Scale: 41.548 Contrastive_loss: 3.0836 (3.0029) Loss: 3.0836 (3.0029)
568
+ 2024-01-30,18:22:24 | INFO | Train Epoch: 2 [12292096/16007168 (77%)] Data (t): 0.069 Batch (t): 0.360, 12595.8/s, 787.237/s/gpu LR: 0.000374 Logit Scale: 41.653 Contrastive_loss: 2.8417 (2.9977) Loss: 2.8417 (2.9977)
569
+ 2024-01-30,18:23:00 | INFO | Train Epoch: 2 [12701696/16007168 (79%)] Data (t): 0.070 Batch (t): 0.366, 13045.6/s, 815.352/s/gpu LR: 0.000371 Logit Scale: 41.750 Contrastive_loss: 2.8205 (2.9922) Loss: 2.8205 (2.9922)
570
+ 2024-01-30,18:23:35 | INFO | Train Epoch: 2 [13111296/16007168 (82%)] Data (t): 0.065 Batch (t): 0.353, 12208.4/s, 763.025/s/gpu LR: 0.000369 Logit Scale: 41.860 Contrastive_loss: 2.7305 (2.9843) Loss: 2.7305 (2.9843)
571
+ 2024-01-30,18:24:11 | INFO | Train Epoch: 2 [13520896/16007168 (84%)] Data (t): 0.073 Batch (t): 0.358, 12967.6/s, 810.474/s/gpu LR: 0.000367 Logit Scale: 41.972 Contrastive_loss: 3.1652 (2.9896) Loss: 3.1652 (2.9896)
572
+ 2024-01-30,18:24:47 | INFO | Train Epoch: 2 [13930496/16007168 (87%)] Data (t): 0.074 Batch (t): 0.358, 12066.7/s, 754.168/s/gpu LR: 0.000365 Logit Scale: 42.108 Contrastive_loss: 2.6064 (2.9786) Loss: 2.6064 (2.9786)
573
+ 2024-01-30,18:25:23 | INFO | Train Epoch: 2 [14340096/16007168 (90%)] Data (t): 0.076 Batch (t): 0.361, 11824.4/s, 739.023/s/gpu LR: 0.000362 Logit Scale: 42.194 Contrastive_loss: 2.4083 (2.9628) Loss: 2.4083 (2.9628)
574
+ 2024-01-30,18:25:59 | INFO | Train Epoch: 2 [14749696/16007168 (92%)] Data (t): 0.064 Batch (t): 0.356, 13305.7/s, 831.609/s/gpu LR: 0.000360 Logit Scale: 42.321 Contrastive_loss: 2.2928 (2.9447) Loss: 2.2928 (2.9447)
575
+ 2024-01-30,18:26:34 | INFO | Train Epoch: 2 [15159296/16007168 (95%)] Data (t): 0.054 Batch (t): 0.352, 13315.6/s, 832.225/s/gpu LR: 0.000358 Logit Scale: 42.404 Contrastive_loss: 2.2195 (2.9256) Loss: 2.2195 (2.9256)
576
+ 2024-01-30,18:27:10 | INFO | Train Epoch: 2 [15568896/16007168 (97%)] Data (t): 0.062 Batch (t): 0.357, 11880.7/s, 742.544/s/gpu LR: 0.000355 Logit Scale: 42.504 Contrastive_loss: 2.4242 (2.9127) Loss: 2.4242 (2.9127)
577
+ 2024-01-30,18:27:45 | INFO | Train Epoch: 2 [15978496/16007168 (100%)] Data (t): 0.059 Batch (t): 0.350, 12739.8/s, 796.238/s/gpu LR: 0.000353 Logit Scale: 42.604 Contrastive_loss: 2.6432 (2.9060) Loss: 2.6432 (2.9060)
578
+ 2024-01-30,18:27:47 | INFO | Train Epoch: 2 [16007168/16007168 (100%)] Data (t): 0.056 Batch (t): 0.327, 13586.5/s, 849.155/s/gpu LR: 0.000353 Logit Scale: 42.610 Contrastive_loss: 2.4787 (2.8956) Loss: 2.4787 (2.8956)
579
+ 2024-01-30,18:27:50 | INFO | Start epoch 3
580
+ 2024-01-30,18:27:52 | INFO | Train Epoch: 3 [ 4096/16007168 (0%)] Data (t): 1.770 Batch (t): 2.001, 2047.34/s, 127.959/s/gpu LR: 0.000353 Logit Scale: 42.613 Contrastive_loss: 2.7247 (2.7247) Loss: 2.7247 (2.7247)
581
+ 2024-01-30,18:28:28 | INFO | Train Epoch: 3 [ 413696/16007168 (3%)] Data (t): 0.077 Batch (t): 0.362, 12761.4/s, 797.588/s/gpu LR: 0.000351 Logit Scale: 42.689 Contrastive_loss: 2.6749 (2.6998) Loss: 2.6749 (2.6998)
582
+ 2024-01-30,18:29:03 | INFO | Train Epoch: 3 [ 823296/16007168 (5%)] Data (t): 0.085 Batch (t): 0.349, 13569.8/s, 848.113/s/gpu LR: 0.000348 Logit Scale: 42.739 Contrastive_loss: 2.1629 (2.5208) Loss: 2.1629 (2.5208)
583
+ 2024-01-30,18:29:38 | INFO | Train Epoch: 3 [ 1232896/16007168 (8%)] Data (t): 0.081 Batch (t): 0.351, 13345.2/s, 834.072/s/gpu LR: 0.000346 Logit Scale: 42.830 Contrastive_loss: 2.4349 (2.4993) Loss: 2.4349 (2.4993)
584
+ 2024-01-30,18:30:13 | INFO | Train Epoch: 3 [ 1642496/16007168 (10%)] Data (t): 0.067 Batch (t): 0.351, 13127.0/s, 820.439/s/gpu LR: 0.000344 Logit Scale: 42.893 Contrastive_loss: 2.6399 (2.5274) Loss: 2.6399 (2.5274)
585
+ 2024-01-30,18:30:49 | INFO | Train Epoch: 3 [ 2052096/16007168 (13%)] Data (t): 0.075 Batch (t): 0.360, 13163.2/s, 822.701/s/gpu LR: 0.000341 Logit Scale: 43.027 Contrastive_loss: 2.9539 (2.5985) Loss: 2.9539 (2.5985)
586
+ 2024-01-30,18:31:24 | INFO | Train Epoch: 3 [ 2461696/16007168 (15%)] Data (t): 0.075 Batch (t): 0.352, 12485.0/s, 780.311/s/gpu LR: 0.000339 Logit Scale: 43.140 Contrastive_loss: 2.7431 (2.6192) Loss: 2.7431 (2.6192)
587
+ 2024-01-30,18:32:00 | INFO | Train Epoch: 3 [ 2871296/16007168 (18%)] Data (t): 0.070 Batch (t): 0.354, 12131.1/s, 758.196/s/gpu LR: 0.000336 Logit Scale: 43.164 Contrastive_loss: 2.7805 (2.6393) Loss: 2.7805 (2.6393)
588
+ 2024-01-30,18:32:35 | INFO | Train Epoch: 3 [ 3280896/16007168 (20%)] Data (t): 0.068 Batch (t): 0.349, 12349.9/s, 771.870/s/gpu LR: 0.000334 Logit Scale: 43.236 Contrastive_loss: 2.4207 (2.6150) Loss: 2.4207 (2.6150)
589
+ 2024-01-30,18:33:10 | INFO | Train Epoch: 3 [ 3690496/16007168 (23%)] Data (t): 0.099 Batch (t): 0.354, 12228.2/s, 764.261/s/gpu LR: 0.000332 Logit Scale: 43.320 Contrastive_loss: 2.2062 (2.5742) Loss: 2.2062 (2.5742)
590
+ 2024-01-30,18:33:45 | INFO | Train Epoch: 3 [ 4100096/16007168 (26%)] Data (t): 0.096 Batch (t): 0.354, 13259.7/s, 828.731/s/gpu LR: 0.000329 Logit Scale: 43.417 Contrastive_loss: 2.5924 (2.5758) Loss: 2.5924 (2.5758)
591
+ 2024-01-30,18:34:21 | INFO | Train Epoch: 3 [ 4509696/16007168 (28%)] Data (t): 0.104 Batch (t): 0.353, 12782.6/s, 798.914/s/gpu LR: 0.000327 Logit Scale: 43.548 Contrastive_loss: 2.6007 (2.5779) Loss: 2.6007 (2.5779)
592
+ 2024-01-30,18:34:56 | INFO | Train Epoch: 3 [ 4919296/16007168 (31%)] Data (t): 0.097 Batch (t): 0.353, 12391.8/s, 774.485/s/gpu LR: 0.000324 Logit Scale: 43.659 Contrastive_loss: 2.8330 (2.5975) Loss: 2.8330 (2.5975)
593
+ 2024-01-30,18:35:32 | INFO | Train Epoch: 3 [ 5328896/16007168 (33%)] Data (t): 0.059 Batch (t): 0.359, 13039.6/s, 814.972/s/gpu LR: 0.000322 Logit Scale: 43.727 Contrastive_loss: 2.8242 (2.6137) Loss: 2.8242 (2.6137)
594
+ 2024-01-30,18:36:08 | INFO | Train Epoch: 3 [ 5738496/16007168 (36%)] Data (t): 0.065 Batch (t): 0.356, 13425.2/s, 839.077/s/gpu LR: 0.000319 Logit Scale: 43.860 Contrastive_loss: 2.1300 (2.5815) Loss: 2.1300 (2.5815)
595
+ 2024-01-30,18:36:43 | INFO | Train Epoch: 3 [ 6148096/16007168 (38%)] Data (t): 0.081 Batch (t): 0.358, 12077.5/s, 754.842/s/gpu LR: 0.000317 Logit Scale: 43.999 Contrastive_loss: 2.1266 (2.5530) Loss: 2.1266 (2.5530)
596
+ 2024-01-30,18:37:18 | INFO | Train Epoch: 3 [ 6557696/16007168 (41%)] Data (t): 0.079 Batch (t): 0.346, 13715.1/s, 857.194/s/gpu LR: 0.000315 Logit Scale: 44.058 Contrastive_loss: 2.3959 (2.5438) Loss: 2.3959 (2.5438)
597
+ 2024-01-30,18:37:54 | INFO | Train Epoch: 3 [ 6967296/16007168 (44%)] Data (t): 0.069 Batch (t): 0.363, 13919.8/s, 869.987/s/gpu LR: 0.000312 Logit Scale: 44.103 Contrastive_loss: 2.5254 (2.5428) Loss: 2.5254 (2.5428)
598
+ 2024-01-30,18:38:30 | INFO | Train Epoch: 3 [ 7376896/16007168 (46%)] Data (t): 0.054 Batch (t): 0.354, 12843.1/s, 802.691/s/gpu LR: 0.000310 Logit Scale: 44.208 Contrastive_loss: 2.4997 (2.5405) Loss: 2.4997 (2.5405)
599
+ 2024-01-30,18:39:04 | INFO | Train Epoch: 3 [ 7786496/16007168 (49%)] Data (t): 0.056 Batch (t): 0.348, 12948.7/s, 809.292/s/gpu LR: 0.000307 Logit Scale: 44.309 Contrastive_loss: 2.4575 (2.5364) Loss: 2.4575 (2.5364)
600
+ 2024-01-30,18:39:40 | INFO | Train Epoch: 3 [ 8196096/16007168 (51%)] Data (t): 0.060 Batch (t): 0.360, 13503.8/s, 843.989/s/gpu LR: 0.000305 Logit Scale: 44.410 Contrastive_loss: 2.5604 (2.5375) Loss: 2.5604 (2.5375)
601
+ 2024-01-30,18:40:17 | INFO | Train Epoch: 3 [ 8605696/16007168 (54%)] Data (t): 0.075 Batch (t): 0.362, 12531.0/s, 783.188/s/gpu LR: 0.000302 Logit Scale: 44.527 Contrastive_loss: 2.2755 (2.5256) Loss: 2.2755 (2.5256)
602
+ 2024-01-30,18:40:51 | INFO | Train Epoch: 3 [ 9015296/16007168 (56%)] Data (t): 0.063 Batch (t): 0.348, 12902.4/s, 806.403/s/gpu LR: 0.000300 Logit Scale: 44.611 Contrastive_loss: 2.2566 (2.5139) Loss: 2.2566 (2.5139)
603
+ 2024-01-30,18:41:27 | INFO | Train Epoch: 3 [ 9424896/16007168 (59%)] Data (t): 0.066 Batch (t): 0.354, 13282.2/s, 830.136/s/gpu LR: 0.000297 Logit Scale: 44.696 Contrastive_loss: 2.0675 (2.4953) Loss: 2.0675 (2.4953)
604
+ 2024-01-30,18:42:02 | INFO | Train Epoch: 3 [ 9834496/16007168 (61%)] Data (t): 0.065 Batch (t): 0.355, 12976.0/s, 811.002/s/gpu LR: 0.000295 Logit Scale: 44.772 Contrastive_loss: 1.9698 (2.4743) Loss: 1.9698 (2.4743)
605
+ 2024-01-30,18:42:38 | INFO | Train Epoch: 3 [10244096/16007168 (64%)] Data (t): 0.075 Batch (t): 0.355, 13041.9/s, 815.122/s/gpu LR: 0.000292 Logit Scale: 44.889 Contrastive_loss: 2.2121 (2.4642) Loss: 2.2121 (2.4642)
606
+ 2024-01-30,18:43:13 | INFO | Train Epoch: 3 [10653696/16007168 (67%)] Data (t): 0.074 Batch (t): 0.355, 13118.3/s, 819.893/s/gpu LR: 0.000290 Logit Scale: 44.952 Contrastive_loss: 2.0850 (2.4501) Loss: 2.0850 (2.4501)
607
+ 2024-01-30,18:43:49 | INFO | Train Epoch: 3 [11063296/16007168 (69%)] Data (t): 0.074 Batch (t): 0.361, 13318.1/s, 832.379/s/gpu LR: 0.000287 Logit Scale: 45.027 Contrastive_loss: 2.3981 (2.4483) Loss: 2.3981 (2.4483)
608
+ 2024-01-30,18:44:24 | INFO | Train Epoch: 3 [11472896/16007168 (72%)] Data (t): 0.069 Batch (t): 0.350, 13669.2/s, 854.324/s/gpu LR: 0.000285 Logit Scale: 45.128 Contrastive_loss: 2.3070 (2.4434) Loss: 2.3070 (2.4434)
609
+ 2024-01-30,18:45:00 | INFO | Train Epoch: 3 [11882496/16007168 (74%)] Data (t): 0.065 Batch (t): 0.357, 13501.3/s, 843.831/s/gpu LR: 0.000282 Logit Scale: 45.210 Contrastive_loss: 2.4691 (2.4443) Loss: 2.4691 (2.4443)
610
+ 2024-01-30,18:45:35 | INFO | Train Epoch: 3 [12292096/16007168 (77%)] Data (t): 0.069 Batch (t): 0.354, 12537.6/s, 783.601/s/gpu LR: 0.000279 Logit Scale: 45.321 Contrastive_loss: 2.4691 (2.4451) Loss: 2.4691 (2.4451)
611
+ 2024-01-30,18:46:11 | INFO | Train Epoch: 3 [12701696/16007168 (79%)] Data (t): 0.069 Batch (t): 0.351, 13958.7/s, 872.418/s/gpu LR: 0.000277 Logit Scale: 45.371 Contrastive_loss: 2.2623 (2.4394) Loss: 2.2623 (2.4394)
612
+ 2024-01-30,18:46:46 | INFO | Train Epoch: 3 [13111296/16007168 (82%)] Data (t): 0.062 Batch (t): 0.356, 13166.3/s, 822.891/s/gpu LR: 0.000274 Logit Scale: 45.497 Contrastive_loss: 2.4169 (2.4387) Loss: 2.4169 (2.4387)
613
+ 2024-01-30,18:47:21 | INFO | Train Epoch: 3 [13520896/16007168 (84%)] Data (t): 0.066 Batch (t): 0.352, 13278.1/s, 829.880/s/gpu LR: 0.000272 Logit Scale: 45.554 Contrastive_loss: 1.9444 (2.4241) Loss: 1.9444 (2.4241)
614
+ 2024-01-30,18:47:57 | INFO | Train Epoch: 3 [13930496/16007168 (87%)] Data (t): 0.066 Batch (t): 0.353, 13371.8/s, 835.740/s/gpu LR: 0.000269 Logit Scale: 45.613 Contrastive_loss: 1.8400 (2.4075) Loss: 1.8400 (2.4075)
615
+ 2024-01-30,18:48:32 | INFO | Train Epoch: 3 [14340096/16007168 (90%)] Data (t): 0.059 Batch (t): 0.354, 13371.1/s, 835.696/s/gpu LR: 0.000267 Logit Scale: 45.661 Contrastive_loss: 1.9213 (2.3939) Loss: 1.9213 (2.3939)
616
+ 2024-01-30,18:49:07 | INFO | Train Epoch: 3 [14749696/16007168 (92%)] Data (t): 0.068 Batch (t): 0.351, 13567.5/s, 847.966/s/gpu LR: 0.000264 Logit Scale: 45.769 Contrastive_loss: 2.1651 (2.3878) Loss: 2.1651 (2.3878)
617
+ 2024-01-30,18:49:42 | INFO | Train Epoch: 3 [15159296/16007168 (95%)] Data (t): 0.080 Batch (t): 0.353, 11869.5/s, 741.845/s/gpu LR: 0.000262 Logit Scale: 45.915 Contrastive_loss: 2.0590 (2.3791) Loss: 2.0590 (2.3791)
618
+ 2024-01-30,18:50:19 | INFO | Train Epoch: 3 [15568896/16007168 (97%)] Data (t): 0.075 Batch (t): 0.364, 12320.9/s, 770.056/s/gpu LR: 0.000259 Logit Scale: 46.020 Contrastive_loss: 2.1551 (2.3734) Loss: 2.1551 (2.3734)
619
+ 2024-01-30,18:50:54 | INFO | Train Epoch: 3 [15978496/16007168 (100%)] Data (t): 0.070 Batch (t): 0.355, 12584.9/s, 786.555/s/gpu LR: 0.000257 Logit Scale: 46.088 Contrastive_loss: 2.4760 (2.3759) Loss: 2.4760 (2.3759)
620
+ 2024-01-30,18:50:56 | INFO | Train Epoch: 3 [16007168/16007168 (100%)] Data (t): 0.058 Batch (t): 0.307, 13528.3/s, 845.521/s/gpu LR: 0.000256 Logit Scale: 46.084 Contrastive_loss: 1.8544 (2.3632) Loss: 1.8544 (2.3632)
621
+ 2024-01-30,18:50:59 | INFO | Start epoch 4
622
+ 2024-01-30,18:51:01 | INFO | Train Epoch: 4 [ 4096/16007168 (0%)] Data (t): 1.781 Batch (t): 2.013, 2034.76/s, 127.172/s/gpu LR: 0.000256 Logit Scale: 46.086 Contrastive_loss: 2.2776 (2.2776) Loss: 2.2776 (2.2776)
623
+ 2024-01-30,18:51:38 | INFO | Train Epoch: 4 [ 413696/16007168 (3%)] Data (t): 0.093 Batch (t): 0.365, 11865.0/s, 741.564/s/gpu LR: 0.000254 Logit Scale: 46.135 Contrastive_loss: 2.4986 (2.3881) Loss: 2.4986 (2.3881)
624
+ 2024-01-30,18:52:13 | INFO | Train Epoch: 4 [ 823296/16007168 (5%)] Data (t): 0.075 Batch (t): 0.355, 12323.5/s, 770.216/s/gpu LR: 0.000251 Logit Scale: 46.229 Contrastive_loss: 2.5061 (2.4274) Loss: 2.5061 (2.4274)
625
+ 2024-01-30,18:52:49 | INFO | Train Epoch: 4 [ 1232896/16007168 (8%)] Data (t): 0.067 Batch (t): 0.353, 12830.6/s, 801.914/s/gpu LR: 0.000249 Logit Scale: 46.302 Contrastive_loss: 2.2431 (2.3813) Loss: 2.2431 (2.3813)
626
+ 2024-01-30,18:53:24 | INFO | Train Epoch: 4 [ 1642496/16007168 (10%)] Data (t): 0.075 Batch (t): 0.352, 12414.9/s, 775.929/s/gpu LR: 0.000246 Logit Scale: 46.364 Contrastive_loss: 1.9663 (2.2983) Loss: 1.9663 (2.2983)
627
+ 2024-01-30,18:53:59 | INFO | Train Epoch: 4 [ 2052096/16007168 (13%)] Data (t): 0.061 Batch (t): 0.348, 12679.3/s, 792.454/s/gpu LR: 0.000244 Logit Scale: 46.421 Contrastive_loss: 2.2600 (2.2919) Loss: 2.2600 (2.2919)
628
+ 2024-01-30,18:54:34 | INFO | Train Epoch: 4 [ 2461696/16007168 (15%)] Data (t): 0.075 Batch (t): 0.359, 12533.6/s, 783.348/s/gpu LR: 0.000241 Logit Scale: 46.469 Contrastive_loss: 2.2475 (2.2856) Loss: 2.2475 (2.2856)
629
+ 2024-01-30,18:55:09 | INFO | Train Epoch: 4 [ 2871296/16007168 (18%)] Data (t): 0.062 Batch (t): 0.341, 12051.4/s, 753.210/s/gpu LR: 0.000239 Logit Scale: 46.535 Contrastive_loss: 2.4228 (2.3027) Loss: 2.4228 (2.3027)
630
+ 2024-01-30,18:55:44 | INFO | Train Epoch: 4 [ 3280896/16007168 (20%)] Data (t): 0.050 Batch (t): 0.353, 8827.11/s, 551.694/s/gpu LR: 0.000236 Logit Scale: 46.607 Contrastive_loss: 1.9644 (2.2651) Loss: 1.9644 (2.2651)
631
+ 2024-01-30,18:56:19 | INFO | Train Epoch: 4 [ 3690496/16007168 (23%)] Data (t): 0.081 Batch (t): 0.356, 12495.8/s, 780.987/s/gpu LR: 0.000233 Logit Scale: 46.719 Contrastive_loss: 2.1430 (2.2529) Loss: 2.1430 (2.2529)
632
+ 2024-01-30,18:56:55 | INFO | Train Epoch: 4 [ 4100096/16007168 (26%)] Data (t): 0.069 Batch (t): 0.356, 7772.51/s, 485.782/s/gpu LR: 0.000231 Logit Scale: 46.808 Contrastive_loss: 1.8802 (2.2190) Loss: 1.8802 (2.2190)
633
+ 2024-01-30,18:57:30 | INFO | Train Epoch: 4 [ 4509696/16007168 (28%)] Data (t): 0.070 Batch (t): 0.352, 12157.7/s, 759.857/s/gpu LR: 0.000228 Logit Scale: 46.894 Contrastive_loss: 2.4680 (2.2398) Loss: 2.4680 (2.2398)
634
+ 2024-01-30,18:58:06 | INFO | Train Epoch: 4 [ 4919296/16007168 (31%)] Data (t): 0.071 Batch (t): 0.354, 13308.2/s, 831.764/s/gpu LR: 0.000226 Logit Scale: 46.982 Contrastive_loss: 2.1154 (2.2302) Loss: 2.1154 (2.2302)
635
+ 2024-01-30,18:58:41 | INFO | Train Epoch: 4 [ 5328896/16007168 (33%)] Data (t): 0.076 Batch (t): 0.358, 12852.9/s, 803.304/s/gpu LR: 0.000223 Logit Scale: 47.109 Contrastive_loss: 1.7922 (2.1989) Loss: 1.7922 (2.1989)
636
+ 2024-01-30,18:59:17 | INFO | Train Epoch: 4 [ 5738496/16007168 (36%)] Data (t): 0.070 Batch (t): 0.352, 12301.7/s, 768.856/s/gpu LR: 0.000221 Logit Scale: 47.179 Contrastive_loss: 1.9943 (2.1853) Loss: 1.9943 (2.1853)
637
+ 2024-01-30,18:59:53 | INFO | Train Epoch: 4 [ 6148096/16007168 (38%)] Data (t): 0.072 Batch (t): 0.359, 13693.2/s, 855.826/s/gpu LR: 0.000218 Logit Scale: 47.246 Contrastive_loss: 2.1591 (2.1836) Loss: 2.1591 (2.1836)
638
+ 2024-01-30,19:00:28 | INFO | Train Epoch: 4 [ 6557696/16007168 (41%)] Data (t): 0.070 Batch (t): 0.358, 12570.9/s, 785.679/s/gpu LR: 0.000216 Logit Scale: 47.290 Contrastive_loss: 2.0611 (2.1764) Loss: 2.0611 (2.1764)
639
+ 2024-01-30,19:01:04 | INFO | Train Epoch: 4 [ 6967296/16007168 (44%)] Data (t): 0.067 Batch (t): 0.358, 13288.8/s, 830.547/s/gpu LR: 0.000213 Logit Scale: 47.364 Contrastive_loss: 1.6835 (2.1491) Loss: 1.6835 (2.1491)
640
+ 2024-01-30,19:01:40 | INFO | Train Epoch: 4 [ 7376896/16007168 (46%)] Data (t): 0.067 Batch (t): 0.356, 12435.4/s, 777.214/s/gpu LR: 0.000211 Logit Scale: 47.512 Contrastive_loss: 1.9361 (2.1378) Loss: 1.9361 (2.1378)
641
+ 2024-01-30,19:02:16 | INFO | Train Epoch: 4 [ 7786496/16007168 (49%)] Data (t): 0.073 Batch (t): 0.358, 12243.3/s, 765.208/s/gpu LR: 0.000208 Logit Scale: 47.627 Contrastive_loss: 2.4509 (2.1535) Loss: 2.4509 (2.1535)
642
+ 2024-01-30,19:02:51 | INFO | Train Epoch: 4 [ 8196096/16007168 (51%)] Data (t): 0.073 Batch (t): 0.350, 13483.7/s, 842.733/s/gpu LR: 0.000206 Logit Scale: 47.738 Contrastive_loss: 1.9895 (2.1457) Loss: 1.9895 (2.1457)
643
+ 2024-01-30,19:03:26 | INFO | Train Epoch: 4 [ 8605696/16007168 (54%)] Data (t): 0.072 Batch (t): 0.357, 12792.8/s, 799.550/s/gpu LR: 0.000203 Logit Scale: 47.825 Contrastive_loss: 1.8852 (2.1338) Loss: 1.8852 (2.1338)
644
+ 2024-01-30,19:04:02 | INFO | Train Epoch: 4 [ 9015296/16007168 (56%)] Data (t): 0.072 Batch (t): 0.360, 12562.4/s, 785.151/s/gpu LR: 0.000201 Logit Scale: 47.986 Contrastive_loss: 2.0897 (2.1319) Loss: 2.0897 (2.1319)
645
+ 2024-01-30,19:04:37 | INFO | Train Epoch: 4 [ 9424896/16007168 (59%)] Data (t): 0.074 Batch (t): 0.350, 11124.9/s, 695.304/s/gpu LR: 0.000198 Logit Scale: 48.029 Contrastive_loss: 2.1508 (2.1327) Loss: 2.1508 (2.1327)
646
+ 2024-01-30,19:05:12 | INFO | Train Epoch: 4 [ 9834496/16007168 (61%)] Data (t): 0.065 Batch (t): 0.351, 8960.75/s, 560.047/s/gpu LR: 0.000196 Logit Scale: 48.093 Contrastive_loss: 1.9658 (2.1260) Loss: 1.9658 (2.1260)
647
+ 2024-01-30,19:05:48 | INFO | Train Epoch: 4 [10244096/16007168 (64%)] Data (t): 0.066 Batch (t): 0.356, 6673.17/s, 417.073/s/gpu LR: 0.000193 Logit Scale: 48.141 Contrastive_loss: 2.0747 (2.1241) Loss: 2.0747 (2.1241)
648
+ 2024-01-30,19:06:23 | INFO | Train Epoch: 4 [10653696/16007168 (67%)] Data (t): 0.068 Batch (t): 0.353, 10295.2/s, 643.452/s/gpu LR: 0.000191 Logit Scale: 48.245 Contrastive_loss: 2.0089 (2.1198) Loss: 2.0089 (2.1198)
649
+ 2024-01-30,19:07:00 | INFO | Train Epoch: 4 [11063296/16007168 (69%)] Data (t): 0.075 Batch (t): 0.363, 13443.0/s, 840.187/s/gpu LR: 0.000188 Logit Scale: 48.289 Contrastive_loss: 2.0275 (2.1165) Loss: 2.0275 (2.1165)
650
+ 2024-01-30,19:07:35 | INFO | Train Epoch: 4 [11472896/16007168 (72%)] Data (t): 0.072 Batch (t): 0.353, 13883.6/s, 867.726/s/gpu LR: 0.000186 Logit Scale: 48.375 Contrastive_loss: 2.2800 (2.1221) Loss: 2.2800 (2.1221)
651
+ 2024-01-30,19:08:10 | INFO | Train Epoch: 4 [11882496/16007168 (74%)] Data (t): 0.053 Batch (t): 0.352, 12839.1/s, 802.447/s/gpu LR: 0.000183 Logit Scale: 48.481 Contrastive_loss: 2.1938 (2.1245) Loss: 2.1938 (2.1245)
652
+ 2024-01-30,19:08:45 | INFO | Train Epoch: 4 [12292096/16007168 (77%)] Data (t): 0.069 Batch (t): 0.354, 12825.1/s, 801.568/s/gpu LR: 0.000181 Logit Scale: 48.548 Contrastive_loss: 2.2271 (2.1278) Loss: 2.2271 (2.1278)
653
+ 2024-01-30,19:09:21 | INFO | Train Epoch: 4 [12701696/16007168 (79%)] Data (t): 0.066 Batch (t): 0.353, 11927.6/s, 745.476/s/gpu LR: 0.000178 Logit Scale: 48.594 Contrastive_loss: 2.3470 (2.1347) Loss: 2.3470 (2.1347)
654
+ 2024-01-30,19:09:57 | INFO | Train Epoch: 4 [13111296/16007168 (82%)] Data (t): 0.071 Batch (t): 0.365, 13384.3/s, 836.519/s/gpu LR: 0.000176 Logit Scale: 48.669 Contrastive_loss: 2.4955 (2.1456) Loss: 2.4955 (2.1456)
655
+ 2024-01-30,19:10:32 | INFO | Train Epoch: 4 [13520896/16007168 (84%)] Data (t): 0.071 Batch (t): 0.352, 13471.8/s, 841.985/s/gpu LR: 0.000173 Logit Scale: 48.749 Contrastive_loss: 1.9658 (2.1403) Loss: 1.9658 (2.1403)
656
+ 2024-01-30,19:11:07 | INFO | Train Epoch: 4 [13930496/16007168 (87%)] Data (t): 0.065 Batch (t): 0.345, 12197.7/s, 762.357/s/gpu LR: 0.000171 Logit Scale: 48.841 Contrastive_loss: 1.9985 (2.1363) Loss: 1.9985 (2.1363)
657
+ 2024-01-30,19:11:43 | INFO | Train Epoch: 4 [14340096/16007168 (90%)] Data (t): 0.066 Batch (t): 0.359, 12880.2/s, 805.011/s/gpu LR: 0.000169 Logit Scale: 48.880 Contrastive_loss: 1.8361 (2.1279) Loss: 1.8361 (2.1279)
658
+ 2024-01-30,19:12:18 | INFO | Train Epoch: 4 [14749696/16007168 (92%)] Data (t): 0.068 Batch (t): 0.353, 12428.3/s, 776.768/s/gpu LR: 0.000166 Logit Scale: 48.961 Contrastive_loss: 1.8553 (2.1206) Loss: 1.8553 (2.1206)
659
+ 2024-01-30,19:12:54 | INFO | Train Epoch: 4 [15159296/16007168 (95%)] Data (t): 0.069 Batch (t): 0.353, 13324.9/s, 832.806/s/gpu LR: 0.000164 Logit Scale: 49.009 Contrastive_loss: 2.0204 (2.1179) Loss: 2.0204 (2.1179)
660
+ 2024-01-30,19:13:29 | INFO | Train Epoch: 4 [15568896/16007168 (97%)] Data (t): 0.071 Batch (t): 0.350, 12511.5/s, 781.969/s/gpu LR: 0.000161 Logit Scale: 49.108 Contrastive_loss: 2.3235 (2.1232) Loss: 2.3235 (2.1232)
661
+ 2024-01-30,19:14:03 | INFO | Train Epoch: 4 [15978496/16007168 (100%)] Data (t): 0.073 Batch (t): 0.350, 8761.65/s, 547.603/s/gpu LR: 0.000159 Logit Scale: 49.183 Contrastive_loss: 1.8551 (2.1165) Loss: 1.8551 (2.1165)
662
+ 2024-01-30,19:14:06 | INFO | Train Epoch: 4 [16007168/16007168 (100%)] Data (t): 0.066 Batch (t): 0.307, 13694.1/s, 855.884/s/gpu LR: 0.000159 Logit Scale: 49.184 Contrastive_loss: 1.5963 (2.1038) Loss: 1.5963 (2.1038)
663
+ 2024-01-30,19:14:09 | INFO | Start epoch 5
664
+ 2024-01-30,19:14:11 | INFO | Train Epoch: 5 [ 4096/16007168 (0%)] Data (t): 1.760 Batch (t): 1.991, 2056.83/s, 128.552/s/gpu LR: 0.000159 Logit Scale: 49.182 Contrastive_loss: 2.0975 (2.0975) Loss: 2.0975 (2.0975)
665
+ 2024-01-30,19:14:47 | INFO | Train Epoch: 5 [ 413696/16007168 (3%)] Data (t): 0.104 Batch (t): 0.365, 10269.6/s, 641.852/s/gpu LR: 0.000156 Logit Scale: 49.205 Contrastive_loss: 1.9071 (2.0023) Loss: 1.9071 (2.0023)
666
+ 2024-01-30,19:15:23 | INFO | Train Epoch: 5 [ 823296/16007168 (5%)] Data (t): 0.079 Batch (t): 0.356, 8953.43/s, 559.589/s/gpu LR: 0.000154 Logit Scale: 49.276 Contrastive_loss: 1.9597 (1.9881) Loss: 1.9597 (1.9881)
667
+ 2024-01-30,19:15:58 | INFO | Train Epoch: 5 [ 1232896/16007168 (8%)] Data (t): 0.065 Batch (t): 0.348, 12903.1/s, 806.444/s/gpu LR: 0.000152 Logit Scale: 49.364 Contrastive_loss: 1.8396 (1.9510) Loss: 1.8396 (1.9510)
668
+ 2024-01-30,19:16:33 | INFO | Train Epoch: 5 [ 1642496/16007168 (10%)] Data (t): 0.072 Batch (t): 0.354, 12324.6/s, 770.287/s/gpu LR: 0.000149 Logit Scale: 49.419 Contrastive_loss: 1.4697 (1.8547) Loss: 1.4697 (1.8547)
669
+ 2024-01-30,19:17:08 | INFO | Train Epoch: 5 [ 2052096/16007168 (13%)] Data (t): 0.073 Batch (t): 0.352, 12898.9/s, 806.184/s/gpu LR: 0.000147 Logit Scale: 49.518 Contrastive_loss: 1.7210 (1.8324) Loss: 1.7210 (1.8324)
670
+ 2024-01-30,19:17:44 | INFO | Train Epoch: 5 [ 2461696/16007168 (15%)] Data (t): 0.072 Batch (t): 0.361, 12063.0/s, 753.937/s/gpu LR: 0.000145 Logit Scale: 49.623 Contrastive_loss: 1.6402 (1.8050) Loss: 1.6402 (1.8050)
671
+ 2024-01-30,19:18:20 | INFO | Train Epoch: 5 [ 2871296/16007168 (18%)] Data (t): 0.077 Batch (t): 0.353, 13347.5/s, 834.220/s/gpu LR: 0.000142 Logit Scale: 49.686 Contrastive_loss: 2.2080 (1.8554) Loss: 2.2080 (1.8554)
672
+ 2024-01-30,19:18:55 | INFO | Train Epoch: 5 [ 3280896/16007168 (20%)] Data (t): 0.064 Batch (t): 0.357, 7901.42/s, 493.839/s/gpu LR: 0.000140 Logit Scale: 49.734 Contrastive_loss: 1.9048 (1.8608) Loss: 1.9048 (1.8608)
673
+ 2024-01-30,19:19:32 | INFO | Train Epoch: 5 [ 3690496/16007168 (23%)] Data (t): 0.070 Batch (t): 0.363, 12071.6/s, 754.478/s/gpu LR: 0.000138 Logit Scale: 49.811 Contrastive_loss: 2.2453 (1.8993) Loss: 2.2453 (1.8993)
674
+ 2024-01-30,19:20:07 | INFO | Train Epoch: 5 [ 4100096/16007168 (26%)] Data (t): 0.076 Batch (t): 0.350, 13508.2/s, 844.262/s/gpu LR: 0.000136 Logit Scale: 49.866 Contrastive_loss: 1.9411 (1.9031) Loss: 1.9411 (1.9031)
675
+ 2024-01-30,19:20:42 | INFO | Train Epoch: 5 [ 4509696/16007168 (28%)] Data (t): 0.073 Batch (t): 0.349, 11943.8/s, 746.490/s/gpu LR: 0.000133 Logit Scale: 49.948 Contrastive_loss: 1.5937 (1.8773) Loss: 1.5937 (1.8773)
676
+ 2024-01-30,19:21:17 | INFO | Train Epoch: 5 [ 4919296/16007168 (31%)] Data (t): 0.072 Batch (t): 0.355, 13392.2/s, 837.013/s/gpu LR: 0.000131 Logit Scale: 50.015 Contrastive_loss: 1.7019 (1.8638) Loss: 1.7019 (1.8638)
677
+ 2024-01-30,19:21:52 | INFO | Train Epoch: 5 [ 5328896/16007168 (33%)] Data (t): 0.071 Batch (t): 0.350, 12313.8/s, 769.611/s/gpu LR: 0.000129 Logit Scale: 50.066 Contrastive_loss: 2.0558 (1.8775) Loss: 2.0558 (1.8775)
678
+ 2024-01-30,19:22:28 | INFO | Train Epoch: 5 [ 5738496/16007168 (36%)] Data (t): 0.076 Batch (t): 0.357, 12910.8/s, 806.925/s/gpu LR: 0.000127 Logit Scale: 50.177 Contrastive_loss: 2.0593 (1.8896) Loss: 2.0593 (1.8896)
679
+ 2024-01-30,19:23:03 | INFO | Train Epoch: 5 [ 6148096/16007168 (38%)] Data (t): 0.074 Batch (t): 0.353, 13323.6/s, 832.726/s/gpu LR: 0.000124 Logit Scale: 50.284 Contrastive_loss: 1.7905 (1.8835) Loss: 1.7905 (1.8835)
680
+ 2024-01-30,19:23:39 | INFO | Train Epoch: 5 [ 6557696/16007168 (41%)] Data (t): 0.066 Batch (t): 0.353, 12856.5/s, 803.528/s/gpu LR: 0.000122 Logit Scale: 50.433 Contrastive_loss: 2.0030 (1.8905) Loss: 2.0030 (1.8905)
681
+ 2024-01-30,19:24:14 | INFO | Train Epoch: 5 [ 6967296/16007168 (44%)] Data (t): 0.068 Batch (t): 0.354, 13175.7/s, 823.481/s/gpu LR: 0.000120 Logit Scale: 50.501 Contrastive_loss: 1.6152 (1.8752) Loss: 1.6152 (1.8752)
682
+ 2024-01-30,19:24:49 | INFO | Train Epoch: 5 [ 7376896/16007168 (46%)] Data (t): 0.068 Batch (t): 0.353, 13489.9/s, 843.120/s/gpu LR: 0.000118 Logit Scale: 50.597 Contrastive_loss: 1.6464 (1.8632) Loss: 1.6464 (1.8632)
683
+ 2024-01-30,19:25:25 | INFO | Train Epoch: 5 [ 7786496/16007168 (49%)] Data (t): 0.064 Batch (t): 0.355, 12659.9/s, 791.243/s/gpu LR: 0.000116 Logit Scale: 50.658 Contrastive_loss: 1.5000 (1.8450) Loss: 1.5000 (1.8450)
684
+ 2024-01-30,19:26:00 | INFO | Train Epoch: 5 [ 8196096/16007168 (51%)] Data (t): 0.069 Batch (t): 0.356, 12594.7/s, 787.168/s/gpu LR: 0.000113 Logit Scale: 50.704 Contrastive_loss: 1.7094 (1.8385) Loss: 1.7094 (1.8385)
685
+ 2024-01-30,19:26:36 | INFO | Train Epoch: 5 [ 8605696/16007168 (54%)] Data (t): 0.076 Batch (t): 0.357, 13186.9/s, 824.181/s/gpu LR: 0.000111 Logit Scale: 50.820 Contrastive_loss: 2.0081 (1.8462) Loss: 2.0081 (1.8462)
686
+ 2024-01-30,19:27:11 | INFO | Train Epoch: 5 [ 9015296/16007168 (56%)] Data (t): 0.074 Batch (t): 0.354, 12717.5/s, 794.842/s/gpu LR: 0.000109 Logit Scale: 50.872 Contrastive_loss: 1.7084 (1.8403) Loss: 1.7084 (1.8403)
687
+ 2024-01-30,19:27:47 | INFO | Train Epoch: 5 [ 9424896/16007168 (59%)] Data (t): 0.096 Batch (t): 0.354, 12393.7/s, 774.605/s/gpu LR: 0.000107 Logit Scale: 50.873 Contrastive_loss: 1.7075 (1.8347) Loss: 1.7075 (1.8347)
688
+ 2024-01-30,19:28:22 | INFO | Train Epoch: 5 [ 9834496/16007168 (61%)] Data (t): 0.099 Batch (t): 0.356, 12925.9/s, 807.870/s/gpu LR: 0.000105 Logit Scale: 50.934 Contrastive_loss: 1.5658 (1.8240) Loss: 1.5658 (1.8240)
689
+ 2024-01-30,19:28:58 | INFO | Train Epoch: 5 [10244096/16007168 (64%)] Data (t): 0.080 Batch (t): 0.357, 12388.6/s, 774.289/s/gpu LR: 0.000103 Logit Scale: 50.995 Contrastive_loss: 1.8623 (1.8254) Loss: 1.8623 (1.8254)
690
+ 2024-01-30,19:29:33 | INFO | Train Epoch: 5 [10653696/16007168 (67%)] Data (t): 0.073 Batch (t): 0.354, 11617.0/s, 726.064/s/gpu LR: 0.000101 Logit Scale: 51.042 Contrastive_loss: 1.6210 (1.8179) Loss: 1.6210 (1.8179)
691
+ 2024-01-30,19:30:09 | INFO | Train Epoch: 5 [11063296/16007168 (69%)] Data (t): 0.066 Batch (t): 0.353, 12053.1/s, 753.320/s/gpu LR: 0.000099 Logit Scale: 51.138 Contrastive_loss: 1.3658 (1.8017) Loss: 1.3658 (1.8017)
692
+ 2024-01-30,19:30:44 | INFO | Train Epoch: 5 [11472896/16007168 (72%)] Data (t): 0.083 Batch (t): 0.348, 13582.5/s, 848.906/s/gpu LR: 0.000097 Logit Scale: 51.191 Contrastive_loss: 1.7078 (1.7985) Loss: 1.7078 (1.7985)
693
+ 2024-01-30,19:31:19 | INFO | Train Epoch: 5 [11882496/16007168 (74%)] Data (t): 0.069 Batch (t): 0.359, 12737.8/s, 796.115/s/gpu LR: 0.000095 Logit Scale: 51.228 Contrastive_loss: 1.7966 (1.7984) Loss: 1.7966 (1.7984)
694
+ 2024-01-30,19:31:55 | INFO | Train Epoch: 5 [12292096/16007168 (77%)] Data (t): 0.071 Batch (t): 0.359, 12703.7/s, 793.983/s/gpu LR: 0.000093 Logit Scale: 51.307 Contrastive_loss: 2.0864 (1.8077) Loss: 2.0864 (1.8077)
695
+ 2024-01-30,19:32:30 | INFO | Train Epoch: 5 [12701696/16007168 (79%)] Data (t): 0.064 Batch (t): 0.349, 13001.1/s, 812.569/s/gpu LR: 0.000091 Logit Scale: 51.402 Contrastive_loss: 1.5984 (1.8012) Loss: 1.5984 (1.8012)
696
+ 2024-01-30,19:33:05 | INFO | Train Epoch: 5 [13111296/16007168 (82%)] Data (t): 0.072 Batch (t): 0.353, 13475.8/s, 842.236/s/gpu LR: 0.000089 Logit Scale: 51.521 Contrastive_loss: 1.5116 (1.7924) Loss: 1.5116 (1.7924)
697
+ 2024-01-30,19:33:41 | INFO | Train Epoch: 5 [13520896/16007168 (84%)] Data (t): 0.063 Batch (t): 0.351, 9459.66/s, 591.228/s/gpu LR: 0.000087 Logit Scale: 51.540 Contrastive_loss: 1.6216 (1.7874) Loss: 1.6216 (1.7874)
698
+ 2024-01-30,19:34:16 | INFO | Train Epoch: 5 [13930496/16007168 (87%)] Data (t): 0.071 Batch (t): 0.355, 13310.4/s, 831.898/s/gpu LR: 0.000085 Logit Scale: 51.595 Contrastive_loss: 1.5775 (1.7814) Loss: 1.5775 (1.7814)
699
+ 2024-01-30,19:34:51 | INFO | Train Epoch: 5 [14340096/16007168 (90%)] Data (t): 0.072 Batch (t): 0.348, 12531.8/s, 783.238/s/gpu LR: 0.000083 Logit Scale: 51.712 Contrastive_loss: 1.4687 (1.7727) Loss: 1.4687 (1.7727)
700
+ 2024-01-30,19:35:26 | INFO | Train Epoch: 5 [14749696/16007168 (92%)] Data (t): 0.068 Batch (t): 0.352, 12926.3/s, 807.891/s/gpu LR: 0.000081 Logit Scale: 51.812 Contrastive_loss: 1.6583 (1.7696) Loss: 1.6583 (1.7696)
701
+ 2024-01-30,19:36:02 | INFO | Train Epoch: 5 [15159296/16007168 (95%)] Data (t): 0.070 Batch (t): 0.354, 13410.3/s, 838.141/s/gpu LR: 0.000079 Logit Scale: 51.873 Contrastive_loss: 1.3776 (1.7593) Loss: 1.3776 (1.7593)
702
+ 2024-01-30,19:36:37 | INFO | Train Epoch: 5 [15568896/16007168 (97%)] Data (t): 0.069 Batch (t): 0.351, 13656.8/s, 853.553/s/gpu LR: 0.000077 Logit Scale: 51.973 Contrastive_loss: 1.2441 (1.7461) Loss: 1.2441 (1.7461)
703
+ 2024-01-30,19:37:13 | INFO | Train Epoch: 5 [15978496/16007168 (100%)] Data (t): 0.070 Batch (t): 0.361, 13553.1/s, 847.071/s/gpu LR: 0.000076 Logit Scale: 52.034 Contrastive_loss: 1.4139 (1.7378) Loss: 1.4139 (1.7378)
704
+ 2024-01-30,19:37:15 | INFO | Train Epoch: 5 [16007168/16007168 (100%)] Data (t): 0.066 Batch (t): 0.330, 13647.7/s, 852.983/s/gpu LR: 0.000076 Logit Scale: 52.032 Contrastive_loss: 1.6360 (1.7353) Loss: 1.6360 (1.7353)
705
+ 2024-01-30,19:37:18 | INFO | Start epoch 6
706
+ 2024-01-30,19:37:20 | INFO | Train Epoch: 6 [ 4096/16007168 (0%)] Data (t): 1.749 Batch (t): 1.982, 2066.93/s, 129.183/s/gpu LR: 0.000075 Logit Scale: 52.033 Contrastive_loss: 1.0658 (1.0658) Loss: 1.0658 (1.0658)
707
+ 2024-01-30,19:37:57 | INFO | Train Epoch: 6 [ 413696/16007168 (3%)] Data (t): 0.077 Batch (t): 0.370, 12355.9/s, 772.242/s/gpu LR: 0.000074 Logit Scale: 52.080 Contrastive_loss: 1.0963 (1.0810) Loss: 1.0963 (1.0810)
708
+ 2024-01-30,19:38:32 | INFO | Train Epoch: 6 [ 823296/16007168 (5%)] Data (t): 0.070 Batch (t): 0.347, 12963.0/s, 810.190/s/gpu LR: 0.000072 Logit Scale: 52.173 Contrastive_loss: 1.4666 (1.2096) Loss: 1.4666 (1.2096)
709
+ 2024-01-30,19:39:08 | INFO | Train Epoch: 6 [ 1232896/16007168 (8%)] Data (t): 0.076 Batch (t): 0.361, 12727.4/s, 795.460/s/gpu LR: 0.000070 Logit Scale: 52.238 Contrastive_loss: 1.3440 (1.2432) Loss: 1.3440 (1.2432)
710
+ 2024-01-30,19:39:43 | INFO | Train Epoch: 6 [ 1642496/16007168 (10%)] Data (t): 0.073 Batch (t): 0.346, 13516.7/s, 844.791/s/gpu LR: 0.000068 Logit Scale: 52.257 Contrastive_loss: 1.4922 (1.2930) Loss: 1.4922 (1.2930)
711
+ 2024-01-30,19:40:18 | INFO | Train Epoch: 6 [ 2052096/16007168 (13%)] Data (t): 0.073 Batch (t): 0.352, 12377.9/s, 773.620/s/gpu LR: 0.000067 Logit Scale: 52.324 Contrastive_loss: 1.1666 (1.2719) Loss: 1.1666 (1.2719)
712
+ 2024-01-30,19:40:54 | INFO | Train Epoch: 6 [ 2461696/16007168 (15%)] Data (t): 0.075 Batch (t): 0.359, 12619.1/s, 788.694/s/gpu LR: 0.000065 Logit Scale: 52.375 Contrastive_loss: 1.2888 (1.2743) Loss: 1.2888 (1.2743)
713
+ 2024-01-30,19:41:28 | INFO | Train Epoch: 6 [ 2871296/16007168 (18%)] Data (t): 0.072 Batch (t): 0.348, 12388.3/s, 774.268/s/gpu LR: 0.000063 Logit Scale: 52.409 Contrastive_loss: 1.3383 (1.2823) Loss: 1.3383 (1.2823)
714
+ 2024-01-30,19:42:04 | INFO | Train Epoch: 6 [ 3280896/16007168 (20%)] Data (t): 0.075 Batch (t): 0.355, 13288.6/s, 830.535/s/gpu LR: 0.000061 Logit Scale: 52.468 Contrastive_loss: 1.5474 (1.3118) Loss: 1.5474 (1.3118)
715
+ 2024-01-30,19:42:40 | INFO | Train Epoch: 6 [ 3690496/16007168 (23%)] Data (t): 0.070 Batch (t): 0.361, 12477.3/s, 779.832/s/gpu LR: 0.000060 Logit Scale: 52.513 Contrastive_loss: 1.5795 (1.3385) Loss: 1.5795 (1.3385)
716
+ 2024-01-30,19:43:15 | INFO | Train Epoch: 6 [ 4100096/16007168 (26%)] Data (t): 0.067 Batch (t): 0.348, 12309.1/s, 769.319/s/gpu LR: 0.000058 Logit Scale: 52.528 Contrastive_loss: 1.4972 (1.3530) Loss: 1.4972 (1.3530)
717
+ 2024-01-30,19:43:50 | INFO | Train Epoch: 6 [ 4509696/16007168 (28%)] Data (t): 0.069 Batch (t): 0.355, 11887.5/s, 742.969/s/gpu LR: 0.000057 Logit Scale: 52.585 Contrastive_loss: 1.6193 (1.3752) Loss: 1.6193 (1.3752)
718
+ 2024-01-30,19:44:25 | INFO | Train Epoch: 6 [ 4919296/16007168 (31%)] Data (t): 0.058 Batch (t): 0.347, 13067.5/s, 816.718/s/gpu LR: 0.000055 Logit Scale: 52.647 Contrastive_loss: 1.8198 (1.4094) Loss: 1.8198 (1.4094)
719
+ 2024-01-30,19:45:00 | INFO | Train Epoch: 6 [ 5328896/16007168 (33%)] Data (t): 0.064 Batch (t): 0.351, 12633.8/s, 789.610/s/gpu LR: 0.000053 Logit Scale: 52.701 Contrastive_loss: 1.3740 (1.4068) Loss: 1.3740 (1.4068)
720
+ 2024-01-30,19:45:36 | INFO | Train Epoch: 6 [ 5738496/16007168 (36%)] Data (t): 0.068 Batch (t): 0.360, 13096.7/s, 818.541/s/gpu LR: 0.000052 Logit Scale: 52.728 Contrastive_loss: 1.3667 (1.4042) Loss: 1.3667 (1.4042)
721
+ 2024-01-30,19:46:11 | INFO | Train Epoch: 6 [ 6148096/16007168 (38%)] Data (t): 0.064 Batch (t): 0.352, 8366.12/s, 522.882/s/gpu LR: 0.000050 Logit Scale: 52.775 Contrastive_loss: 1.3083 (1.3982) Loss: 1.3083 (1.3982)
722
+ 2024-01-30,19:46:47 | INFO | Train Epoch: 6 [ 6557696/16007168 (41%)] Data (t): 0.075 Batch (t): 0.354, 11085.1/s, 692.821/s/gpu LR: 0.000049 Logit Scale: 52.822 Contrastive_loss: 1.5067 (1.4046) Loss: 1.5067 (1.4046)
723
+ 2024-01-30,19:47:22 | INFO | Train Epoch: 6 [ 6967296/16007168 (44%)] Data (t): 0.063 Batch (t): 0.347, 11300.8/s, 706.303/s/gpu LR: 0.000047 Logit Scale: 52.855 Contrastive_loss: 1.2395 (1.3954) Loss: 1.2395 (1.3954)
724
+ 2024-01-30,19:47:57 | INFO | Train Epoch: 6 [ 7376896/16007168 (46%)] Data (t): 0.063 Batch (t): 0.356, 12457.1/s, 778.571/s/gpu LR: 0.000046 Logit Scale: 52.888 Contrastive_loss: 1.3419 (1.3926) Loss: 1.3419 (1.3926)
725
+ 2024-01-30,19:48:32 | INFO | Train Epoch: 6 [ 7786496/16007168 (49%)] Data (t): 0.068 Batch (t): 0.347, 13299.6/s, 831.228/s/gpu LR: 0.000044 Logit Scale: 52.935 Contrastive_loss: 1.2756 (1.3867) Loss: 1.2756 (1.3867)
726
+ 2024-01-30,19:49:07 | INFO | Train Epoch: 6 [ 8196096/16007168 (51%)] Data (t): 0.071 Batch (t): 0.356, 12283.5/s, 767.717/s/gpu LR: 0.000043 Logit Scale: 52.976 Contrastive_loss: 1.2726 (1.3813) Loss: 1.2726 (1.3813)
727
+ 2024-01-30,19:49:44 | INFO | Train Epoch: 6 [ 8605696/16007168 (54%)] Data (t): 0.071 Batch (t): 0.367, 12221.9/s, 763.871/s/gpu LR: 0.000041 Logit Scale: 53.001 Contrastive_loss: 1.4887 (1.3862) Loss: 1.4887 (1.3862)
728
+ 2024-01-30,19:50:20 | INFO | Train Epoch: 6 [ 9015296/16007168 (56%)] Data (t): 0.077 Batch (t): 0.361, 11816.9/s, 738.555/s/gpu LR: 0.000040 Logit Scale: 53.034 Contrastive_loss: 1.3458 (1.3844) Loss: 1.3458 (1.3844)
729
+ 2024-01-30,19:50:56 | INFO | Train Epoch: 6 [ 9424896/16007168 (59%)] Data (t): 0.074 Batch (t): 0.357, 13287.7/s, 830.479/s/gpu LR: 0.000039 Logit Scale: 53.081 Contrastive_loss: 1.7816 (1.4010) Loss: 1.7816 (1.4010)
730
+ 2024-01-30,19:51:32 | INFO | Train Epoch: 6 [ 9834496/16007168 (61%)] Data (t): 0.073 Batch (t): 0.358, 11616.2/s, 726.012/s/gpu LR: 0.000037 Logit Scale: 53.122 Contrastive_loss: 1.4132 (1.4014) Loss: 1.4132 (1.4014)
731
+ 2024-01-30,19:52:07 | INFO | Train Epoch: 6 [10244096/16007168 (64%)] Data (t): 0.065 Batch (t): 0.349, 13550.0/s, 846.874/s/gpu LR: 0.000036 Logit Scale: 53.157 Contrastive_loss: 1.3911 (1.4011) Loss: 1.3911 (1.4011)
732
+ 2024-01-30,19:52:42 | INFO | Train Epoch: 6 [10653696/16007168 (67%)] Data (t): 0.070 Batch (t): 0.355, 13375.6/s, 835.977/s/gpu LR: 0.000035 Logit Scale: 53.180 Contrastive_loss: 1.5453 (1.4064) Loss: 1.5453 (1.4064)
733
+ 2024-01-30,19:53:17 | INFO | Train Epoch: 6 [11063296/16007168 (69%)] Data (t): 0.071 Batch (t): 0.353, 12536.7/s, 783.544/s/gpu LR: 0.000033 Logit Scale: 53.208 Contrastive_loss: 1.6831 (1.4163) Loss: 1.6831 (1.4163)
734
+ 2024-01-30,19:53:53 | INFO | Train Epoch: 6 [11472896/16007168 (72%)] Data (t): 0.067 Batch (t): 0.355, 12696.5/s, 793.529/s/gpu LR: 0.000032 Logit Scale: 53.272 Contrastive_loss: 0.85711 (1.3970) Loss: 0.85711 (1.3970)
735
+ 2024-01-30,19:54:28 | INFO | Train Epoch: 6 [11882496/16007168 (74%)] Data (t): 0.069 Batch (t): 0.355, 13627.5/s, 851.718/s/gpu LR: 0.000031 Logit Scale: 53.309 Contrastive_loss: 1.0915 (1.3868) Loss: 1.0915 (1.3868)
736
+ 2024-01-30,19:55:04 | INFO | Train Epoch: 6 [12292096/16007168 (77%)] Data (t): 0.063 Batch (t): 0.353, 12537.6/s, 783.603/s/gpu LR: 0.000030 Logit Scale: 53.319 Contrastive_loss: 1.2502 (1.3824) Loss: 1.2502 (1.3824)
737
+ 2024-01-30,19:55:39 | INFO | Train Epoch: 6 [12701696/16007168 (79%)] Data (t): 0.074 Batch (t): 0.357, 12280.3/s, 767.519/s/gpu LR: 0.000028 Logit Scale: 53.344 Contrastive_loss: 1.2768 (1.3791) Loss: 1.2768 (1.3791)
738
+ 2024-01-30,19:56:15 | INFO | Train Epoch: 6 [13111296/16007168 (82%)] Data (t): 0.069 Batch (t): 0.357, 13484.9/s, 842.806/s/gpu LR: 0.000027 Logit Scale: 53.395 Contrastive_loss: 1.4222 (1.3804) Loss: 1.4222 (1.3804)
739
+ 2024-01-30,19:56:50 | INFO | Train Epoch: 6 [13520896/16007168 (84%)] Data (t): 0.064 Batch (t): 0.350, 13345.3/s, 834.082/s/gpu LR: 0.000026 Logit Scale: 53.433 Contrastive_loss: 1.8349 (1.3938) Loss: 1.8349 (1.3938)
740
+ 2024-01-30,19:57:26 | INFO | Train Epoch: 6 [13930496/16007168 (87%)] Data (t): 0.068 Batch (t): 0.360, 13076.0/s, 817.248/s/gpu LR: 0.000025 Logit Scale: 53.455 Contrastive_loss: 1.6498 (1.4011) Loss: 1.6498 (1.4011)
741
+ 2024-01-30,19:58:02 | INFO | Train Epoch: 6 [14340096/16007168 (90%)] Data (t): 0.076 Batch (t): 0.357, 13321.8/s, 832.611/s/gpu LR: 0.000024 Logit Scale: 53.470 Contrastive_loss: 1.8709 (1.4141) Loss: 1.8709 (1.4141)
742
+ 2024-01-30,19:58:37 | INFO | Train Epoch: 6 [14749696/16007168 (92%)] Data (t): 0.075 Batch (t): 0.355, 11993.4/s, 749.589/s/gpu LR: 0.000023 Logit Scale: 53.493 Contrastive_loss: 1.5078 (1.4167) Loss: 1.5078 (1.4167)
743
+ 2024-01-30,19:59:13 | INFO | Train Epoch: 6 [15159296/16007168 (95%)] Data (t): 0.071 Batch (t): 0.354, 12332.7/s, 770.794/s/gpu LR: 0.000022 Logit Scale: 53.529 Contrastive_loss: 1.4282 (1.4170) Loss: 1.4282 (1.4170)
744
+ 2024-01-30,19:59:48 | INFO | Train Epoch: 6 [15568896/16007168 (97%)] Data (t): 0.075 Batch (t): 0.354, 13164.6/s, 822.790/s/gpu LR: 0.000021 Logit Scale: 53.557 Contrastive_loss: 1.5449 (1.4203) Loss: 1.5449 (1.4203)
745
+ 2024-01-30,20:00:24 | INFO | Train Epoch: 6 [15978496/16007168 (100%)] Data (t): 0.078 Batch (t): 0.355, 12206.0/s, 762.874/s/gpu LR: 0.000020 Logit Scale: 53.560 Contrastive_loss: 1.6639 (1.4263) Loss: 1.6639 (1.4263)
746
+ 2024-01-30,20:00:26 | INFO | Train Epoch: 6 [16007168/16007168 (100%)] Data (t): 0.070 Batch (t): 0.307, 13646.1/s, 852.879/s/gpu LR: 0.000020 Logit Scale: 53.561 Contrastive_loss: 1.3248 (1.4239) Loss: 1.3248 (1.4239)
747
+ 2024-01-30,20:00:29 | INFO | Start epoch 7
748
+ 2024-01-30,20:00:31 | INFO | Train Epoch: 7 [ 4096/16007168 (0%)] Data (t): 1.850 Batch (t): 2.083, 1966.65/s, 122.916/s/gpu LR: 0.000020 Logit Scale: 53.562 Contrastive_loss: 1.2898 (1.2898) Loss: 1.2898 (1.2898)
749
+ 2024-01-30,20:01:07 | INFO | Train Epoch: 7 [ 413696/16007168 (3%)] Data (t): 0.084 Batch (t): 0.364, 13017.1/s, 813.568/s/gpu LR: 0.000019 Logit Scale: 53.603 Contrastive_loss: 1.3532 (1.3215) Loss: 1.3532 (1.3215)
750
+ 2024-01-30,20:01:42 | INFO | Train Epoch: 7 [ 823296/16007168 (5%)] Data (t): 0.068 Batch (t): 0.348, 12916.9/s, 807.305/s/gpu LR: 0.000018 Logit Scale: 53.625 Contrastive_loss: 1.6470 (1.4300) Loss: 1.6470 (1.4300)
751
+ 2024-01-30,20:02:18 | INFO | Train Epoch: 7 [ 1232896/16007168 (8%)] Data (t): 0.071 Batch (t): 0.362, 4865.44/s, 304.090/s/gpu LR: 0.000017 Logit Scale: 53.646 Contrastive_loss: 1.8723 (1.5406) Loss: 1.8723 (1.5406)
752
+ 2024-01-30,20:02:54 | INFO | Train Epoch: 7 [ 1642496/16007168 (10%)] Data (t): 0.076 Batch (t): 0.354, 12391.6/s, 774.472/s/gpu LR: 0.000016 Logit Scale: 53.665 Contrastive_loss: 1.5476 (1.5420) Loss: 1.5476 (1.5420)
753
+ 2024-01-30,20:03:30 | INFO | Train Epoch: 7 [ 2052096/16007168 (13%)] Data (t): 0.066 Batch (t): 0.361, 12847.2/s, 802.952/s/gpu LR: 0.000015 Logit Scale: 53.691 Contrastive_loss: 1.6589 (1.5615) Loss: 1.6589 (1.5615)
754
+ 2024-01-30,20:04:05 | INFO | Train Epoch: 7 [ 2461696/16007168 (15%)] Data (t): 0.073 Batch (t): 0.353, 13127.0/s, 820.436/s/gpu LR: 0.000014 Logit Scale: 53.712 Contrastive_loss: 1.5938 (1.5661) Loss: 1.5938 (1.5661)
755
+ 2024-01-30,20:04:41 | INFO | Train Epoch: 7 [ 2871296/16007168 (18%)] Data (t): 0.082 Batch (t): 0.353, 12785.3/s, 799.082/s/gpu LR: 0.000013 Logit Scale: 53.733 Contrastive_loss: 1.3077 (1.5338) Loss: 1.3077 (1.5338)
756
+ 2024-01-30,20:05:16 | INFO | Train Epoch: 7 [ 3280896/16007168 (20%)] Data (t): 0.095 Batch (t): 0.353, 12911.9/s, 806.992/s/gpu LR: 0.000012 Logit Scale: 53.750 Contrastive_loss: 1.5013 (1.5302) Loss: 1.5013 (1.5302)
757
+ 2024-01-30,20:05:51 | INFO | Train Epoch: 7 [ 3690496/16007168 (23%)] Data (t): 0.099 Batch (t): 0.355, 12074.3/s, 754.646/s/gpu LR: 0.000012 Logit Scale: 53.762 Contrastive_loss: 1.5242 (1.5296) Loss: 1.5242 (1.5296)
758
+ 2024-01-30,20:06:27 | INFO | Train Epoch: 7 [ 4100096/16007168 (26%)] Data (t): 0.107 Batch (t): 0.360, 12426.6/s, 776.663/s/gpu LR: 0.000011 Logit Scale: 53.755 Contrastive_loss: 1.5091 (1.5277) Loss: 1.5091 (1.5277)
759
+ 2024-01-30,20:07:03 | INFO | Train Epoch: 7 [ 4509696/16007168 (28%)] Data (t): 0.081 Batch (t): 0.358, 12593.1/s, 787.068/s/gpu LR: 0.000010 Logit Scale: 53.763 Contrastive_loss: 1.3031 (1.5090) Loss: 1.3031 (1.5090)
760
+ 2024-01-30,20:07:39 | INFO | Train Epoch: 7 [ 4919296/16007168 (31%)] Data (t): 0.066 Batch (t): 0.356, 12883.0/s, 805.186/s/gpu LR: 0.000009 Logit Scale: 53.772 Contrastive_loss: 1.6905 (1.5230) Loss: 1.6905 (1.5230)
761
+ 2024-01-30,20:08:14 | INFO | Train Epoch: 7 [ 5328896/16007168 (33%)] Data (t): 0.073 Batch (t): 0.357, 12023.9/s, 751.494/s/gpu LR: 0.000009 Logit Scale: 53.783 Contrastive_loss: 1.8182 (1.5440) Loss: 1.8182 (1.5440)
762
+ 2024-01-30,20:08:50 | INFO | Train Epoch: 7 [ 5738496/16007168 (36%)] Data (t): 0.072 Batch (t): 0.354, 12913.3/s, 807.084/s/gpu LR: 0.000008 Logit Scale: 53.797 Contrastive_loss: 1.4677 (1.5390) Loss: 1.4677 (1.5390)
763
+ 2024-01-30,20:09:25 | INFO | Train Epoch: 7 [ 6148096/16007168 (38%)] Data (t): 0.071 Batch (t): 0.354, 13077.2/s, 817.324/s/gpu LR: 0.000008 Logit Scale: 53.808 Contrastive_loss: 1.5279 (1.5383) Loss: 1.5279 (1.5383)
764
+ 2024-01-30,20:10:01 | INFO | Train Epoch: 7 [ 6557696/16007168 (41%)] Data (t): 0.073 Batch (t): 0.358, 13211.7/s, 825.731/s/gpu LR: 0.000007 Logit Scale: 53.819 Contrastive_loss: 1.6676 (1.5459) Loss: 1.6676 (1.5459)
765
+ 2024-01-30,20:10:38 | INFO | Train Epoch: 7 [ 6967296/16007168 (44%)] Data (t): 0.076 Batch (t): 0.365, 12937.3/s, 808.582/s/gpu LR: 0.000006 Logit Scale: 53.828 Contrastive_loss: 1.1835 (1.5257) Loss: 1.1835 (1.5257)
766
+ 2024-01-30,20:11:13 | INFO | Train Epoch: 7 [ 7376896/16007168 (46%)] Data (t): 0.075 Batch (t): 0.357, 12368.6/s, 773.034/s/gpu LR: 0.000006 Logit Scale: 53.838 Contrastive_loss: 1.3115 (1.5145) Loss: 1.3115 (1.5145)
767
+ 2024-01-30,20:11:48 | INFO | Train Epoch: 7 [ 7786496/16007168 (49%)] Data (t): 0.075 Batch (t): 0.348, 13017.0/s, 813.565/s/gpu LR: 0.000005 Logit Scale: 53.848 Contrastive_loss: 1.5983 (1.5187) Loss: 1.5983 (1.5187)
768
+ 2024-01-30,20:12:24 | INFO | Train Epoch: 7 [ 8196096/16007168 (51%)] Data (t): 0.073 Batch (t): 0.356, 12452.8/s, 778.303/s/gpu LR: 0.000005 Logit Scale: 53.854 Contrastive_loss: 1.4691 (1.5163) Loss: 1.4691 (1.5163)
769
+ 2024-01-30,20:12:59 | INFO | Train Epoch: 7 [ 8605696/16007168 (54%)] Data (t): 0.076 Batch (t): 0.357, 12981.8/s, 811.362/s/gpu LR: 0.000004 Logit Scale: 53.864 Contrastive_loss: 1.1038 (1.4975) Loss: 1.1038 (1.4975)
770
+ 2024-01-30,20:13:35 | INFO | Train Epoch: 7 [ 9015296/16007168 (56%)] Data (t): 0.074 Batch (t): 0.353, 13244.6/s, 827.789/s/gpu LR: 0.000004 Logit Scale: 53.868 Contrastive_loss: 0.99178 (1.4756) Loss: 0.99178 (1.4756)
771
+ 2024-01-30,20:14:10 | INFO | Train Epoch: 7 [ 9424896/16007168 (59%)] Data (t): 0.074 Batch (t): 0.354, 12366.7/s, 772.916/s/gpu LR: 0.000003 Logit Scale: 53.875 Contrastive_loss: 1.3705 (1.4712) Loss: 1.3705 (1.4712)
772
+ 2024-01-30,20:14:45 | INFO | Train Epoch: 7 [ 9834496/16007168 (61%)] Data (t): 0.070 Batch (t): 0.349, 12894.2/s, 805.888/s/gpu LR: 0.000003 Logit Scale: 53.884 Contrastive_loss: 1.4722 (1.4712) Loss: 1.4722 (1.4712)
773
+ 2024-01-30,20:15:20 | INFO | Train Epoch: 7 [10244096/16007168 (64%)] Data (t): 0.060 Batch (t): 0.349, 12382.3/s, 773.896/s/gpu LR: 0.000003 Logit Scale: 53.884 Contrastive_loss: 1.7520 (1.4820) Loss: 1.7520 (1.4820)
774
+ 2024-01-30,20:15:56 | INFO | Train Epoch: 7 [10653696/16007168 (67%)] Data (t): 0.065 Batch (t): 0.366, 12929.3/s, 808.081/s/gpu LR: 0.000002 Logit Scale: 53.882 Contrastive_loss: 1.3723 (1.4780) Loss: 1.3723 (1.4780)
775
+ 2024-01-30,20:16:32 | INFO | Train Epoch: 7 [11063296/16007168 (69%)] Data (t): 0.076 Batch (t): 0.360, 12416.2/s, 776.009/s/gpu LR: 0.000002 Logit Scale: 53.884 Contrastive_loss: 1.0876 (1.4640) Loss: 1.0876 (1.4640)
776
+ 2024-01-30,20:17:08 | INFO | Train Epoch: 7 [11472896/16007168 (72%)] Data (t): 0.075 Batch (t): 0.354, 13008.4/s, 813.024/s/gpu LR: 0.000002 Logit Scale: 53.887 Contrastive_loss: 1.4124 (1.4622) Loss: 1.4124 (1.4622)
777
+ 2024-01-30,20:17:43 | INFO | Train Epoch: 7 [11882496/16007168 (74%)] Data (t): 0.071 Batch (t): 0.351, 12664.8/s, 791.548/s/gpu LR: 0.000001 Logit Scale: 53.892 Contrastive_loss: 1.4187 (1.4608) Loss: 1.4187 (1.4608)
778
+ 2024-01-30,20:18:18 | INFO | Train Epoch: 7 [12292096/16007168 (77%)] Data (t): 0.063 Batch (t): 0.355, 12457.1/s, 778.571/s/gpu LR: 0.000001 Logit Scale: 53.893 Contrastive_loss: 1.5712 (1.4643) Loss: 1.5712 (1.4643)
779
+ 2024-01-30,20:18:54 | INFO | Train Epoch: 7 [12701696/16007168 (79%)] Data (t): 0.067 Batch (t): 0.357, 13221.5/s, 826.346/s/gpu LR: 0.000001 Logit Scale: 53.893 Contrastive_loss: 1.4487 (1.4639) Loss: 1.4487 (1.4639)
780
+ 2024-01-30,20:19:30 | INFO | Train Epoch: 7 [13111296/16007168 (82%)] Data (t): 0.071 Batch (t): 0.357, 13395.8/s, 837.240/s/gpu LR: 0.000001 Logit Scale: 53.896 Contrastive_loss: 1.6786 (1.4704) Loss: 1.6786 (1.4704)
781
+ 2024-01-30,20:20:05 | INFO | Train Epoch: 7 [13520896/16007168 (84%)] Data (t): 0.071 Batch (t): 0.350, 13017.9/s, 813.621/s/gpu LR: 0.000000 Logit Scale: 53.896 Contrastive_loss: 1.3835 (1.4678) Loss: 1.3835 (1.4678)
782
+ 2024-01-30,20:20:40 | INFO | Train Epoch: 7 [13930496/16007168 (87%)] Data (t): 0.080 Batch (t): 0.347, 12232.5/s, 764.533/s/gpu LR: 0.000000 Logit Scale: 53.895 Contrastive_loss: 0.97439 (1.4537) Loss: 0.97439 (1.4537)
783
+ 2024-01-30,20:21:15 | INFO | Train Epoch: 7 [14340096/16007168 (90%)] Data (t): 0.098 Batch (t): 0.356, 13000.3/s, 812.517/s/gpu LR: 0.000000 Logit Scale: 53.895 Contrastive_loss: 1.1748 (1.4460) Loss: 1.1748 (1.4460)
784
+ 2024-01-30,20:21:51 | INFO | Train Epoch: 7 [14749696/16007168 (92%)] Data (t): 0.078 Batch (t): 0.363, 12443.8/s, 777.735/s/gpu LR: 0.000000 Logit Scale: 53.895 Contrastive_loss: 1.4320 (1.4456) Loss: 1.4320 (1.4456)
785
+ 2024-01-30,20:22:28 | INFO | Train Epoch: 7 [15159296/16007168 (95%)] Data (t): 0.074 Batch (t): 0.362, 13333.4/s, 833.339/s/gpu LR: 0.000000 Logit Scale: 53.895 Contrastive_loss: 1.4076 (1.4446) Loss: 1.4076 (1.4446)
786
+ 2024-01-30,20:23:03 | INFO | Train Epoch: 7 [15568896/16007168 (97%)] Data (t): 0.077 Batch (t): 0.357, 13036.9/s, 814.806/s/gpu LR: 0.000000 Logit Scale: 53.895 Contrastive_loss: 1.3819 (1.4430) Loss: 1.3819 (1.4430)
787
+ 2024-01-30,20:23:39 | INFO | Train Epoch: 7 [15978496/16007168 (100%)] Data (t): 0.079 Batch (t): 0.362, 13060.0/s, 816.253/s/gpu LR: 0.000000 Logit Scale: 53.895 Contrastive_loss: 1.4386 (1.4429) Loss: 1.4386 (1.4429)
788
+ 2024-01-30,20:23:42 | INFO | Train Epoch: 7 [16007168/16007168 (100%)] Data (t): 0.064 Batch (t): 0.322, 13693.9/s, 855.869/s/gpu LR: 0.000000 Logit Scale: 53.895 Contrastive_loss: 1.4389 (1.4428) Loss: 1.4389 (1.4428)
params.txt ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accum_freq: 1
2
+ aug_cfg: {}
3
+ batch_size: 256
4
+ beta1: 0.9
5
+ beta2: 0.98
6
+ checkpoint_path: /mnt/bn/datacompv6/weizhi_multimodal/datacomp/checkpoints_v5_gpt4v_cc12m/medium_object_detail_fulfillment_th_20_mutli_score_and/checkpoints
7
+ coca_caption_loss_weight: 2.0
8
+ coca_contrastive_loss_weight: 1.0
9
+ copy_codebase: False
10
+ csv_caption_key: title
11
+ csv_img_key: filepath
12
+ csv_separator:
13
+ dataset_resampled: True
14
+ dataset_type: webdataset
15
+ ddp_static_graph: True
16
+ debug: False
17
+ delete_previous_checkpoint: False
18
+ device: cuda:0
19
+ dist_backend: nccl
20
+ dist_url: env://
21
+ distill: False
22
+ distill_model: None
23
+ distill_pretrained: None
24
+ distributed: True
25
+ epochs: 8
26
+ epochs_cooldown: None
27
+ eps: 1e-06
28
+ force_custom_text: False
29
+ force_image_size: None
30
+ force_patch_dropout: None
31
+ force_quick_gelu: False
32
+ gather_with_grad: True
33
+ grad_checkpointing: True
34
+ grad_clip_norm: None
35
+ horovod: False
36
+ image_mean: None
37
+ image_std: None
38
+ imagenet_v2: None
39
+ imagenet_val: None
40
+ local_loss: True
41
+ local_rank: 0
42
+ lock_image: False
43
+ lock_image_freeze_bn_stats: False
44
+ lock_image_unlocked_groups: 0
45
+ lock_text: False
46
+ lock_text_freeze_layer_norm: False
47
+ lock_text_unlocked_layers: 0
48
+ log_every_n_steps: 100
49
+ log_level: 20
50
+ log_local: False
51
+ log_path: /mnt/bn/datacompv6/weizhi_multimodal/datacomp/checkpoints_v5_gpt4v_cc12m/medium_object_detail_fulfillment_th_20_mutli_score_and/out.log
52
+ logs: /mnt/bn/datacompv6/weizhi_multimodal/datacomp/checkpoints_v5_gpt4v_cc12m
53
+ lr: 0.0005
54
+ lr_cooldown_end: 0.0
55
+ lr_cooldown_power: 1.0
56
+ lr_scheduler: cosine
57
+ model: ViT-B-32
58
+ name: medium_object_detail_fulfillment_th_20_mutli_score_and
59
+ no_set_device_rank: False
60
+ precision: amp
61
+ pretrained:
62
+ pretrained_image: False
63
+ rank: 0
64
+ remote_sync: None
65
+ remote_sync_frequency: 300
66
+ remote_sync_protocol: s3
67
+ report_to:
68
+ resume: None
69
+ save_frequency: 0
70
+ save_most_recent: True
71
+ seed: 0
72
+ skip_scheduler: False
73
+ tensorboard: False
74
+ tensorboard_path:
75
+ torchscript: False
76
+ trace: False
77
+ train_data: /mnt/bn/datacompv6/weizhi_multimodal/datacomp/filtered_shards_v5_gpt4v_cc12m/medium_object_detail_fulfillment_th_20_mutli_score_and/{00000000..00003219}.tar
78
+ train_data_upsampling_factors: None
79
+ train_num_samples: 16000000
80
+ use_bn_sync: False
81
+ val_data: None
82
+ val_frequency: 1
83
+ val_num_samples: None
84
+ wandb: False
85
+ wandb_notes:
86
+ wandb_project_name: open-clip
87
+ warmup: 500
88
+ wd: 0.2
89
+ workers: 4
90
+ world_size: 16
91
+ zeroshot_frequency: 2