Files changed (1) hide show
  1. README.md +325 -325
README.md CHANGED
@@ -174,7 +174,7 @@ model-index:
174
  value: 0.14 [0.14, 0.15]
175
  name: IQM expert normalized total reward
176
  - type: iqm_human_normalized_total_reward
177
- value: 0.38 [0.37, 0.38]
178
  name: IQM human normalized total reward
179
  - task:
180
  type: reinforcement-learning
@@ -194,7 +194,7 @@ model-index:
194
  type: metaworld
195
  metrics:
196
  - type: iqm_expert_normalized_total_reward
197
- value: 0.68 [0.67, 0.69]
198
  name: IQM expert normalized total reward
199
  - task:
200
  type: reinforcement-learning
@@ -204,7 +204,7 @@ model-index:
204
  type: mujoco
205
  metrics:
206
  - type: iqm_expert_normalized_total_reward
207
- value: 0.81 [0.80, 0.82]
208
  name: IQM expert normalized total reward
209
  - task:
210
  type: reinforcement-learning
@@ -214,13 +214,13 @@ model-index:
214
  type: atari-alien
215
  metrics:
216
  - type: total_reward
217
- value: 1474.90 +/- 588.75
218
  name: Total reward
219
  - type: expert_normalized_total_reward
220
- value: 0.07 +/- 0.04
221
  name: Expert normalized total reward
222
  - type: human_normalized_total_reward
223
- value: 0.18 +/- 0.09
224
  name: Human normalized total reward
225
  - task:
226
  type: reinforcement-learning
@@ -230,13 +230,13 @@ model-index:
230
  type: atari-amidar
231
  metrics:
232
  - type: total_reward
233
- value: 104.89 +/- 103.52
234
  name: Total reward
235
  - type: expert_normalized_total_reward
236
- value: 0.05 +/- 0.05
237
  name: Expert normalized total reward
238
  - type: human_normalized_total_reward
239
- value: 0.06 +/- 0.06
240
  name: Human normalized total reward
241
  - task:
242
  type: reinforcement-learning
@@ -246,13 +246,13 @@ model-index:
246
  type: atari-assault
247
  metrics:
248
  - type: total_reward
249
- value: 1650.07 +/- 820.99
250
  name: Total reward
251
  - type: expert_normalized_total_reward
252
  value: 0.09 +/- 0.05
253
  name: Expert normalized total reward
254
  - type: human_normalized_total_reward
255
- value: 2.75 +/- 1.58
256
  name: Human normalized total reward
257
  - task:
258
  type: reinforcement-learning
@@ -262,13 +262,13 @@ model-index:
262
  type: atari-asterix
263
  metrics:
264
  - type: total_reward
265
- value: 800.00 +/- 584.85
266
  name: Total reward
267
  - type: expert_normalized_total_reward
268
- value: 0.17 +/- 0.17
269
  name: Expert normalized total reward
270
  - type: human_normalized_total_reward
271
- value: 0.07 +/- 0.07
272
  name: Human normalized total reward
273
  - task:
274
  type: reinforcement-learning
@@ -278,7 +278,7 @@ model-index:
278
  type: atari-asteroids
279
  metrics:
280
  - type: total_reward
281
- value: 1385.30 +/- 507.53
282
  name: Total reward
283
  - type: expert_normalized_total_reward
284
  value: 0.00 +/- 0.00
@@ -294,13 +294,13 @@ model-index:
294
  type: atari-atlantis
295
  metrics:
296
  - type: total_reward
297
- value: 66980.00 +/- 158449.73
298
  name: Total reward
299
  - type: expert_normalized_total_reward
300
- value: 0.18 +/- 0.51
301
  name: Expert normalized total reward
302
  - type: human_normalized_total_reward
303
- value: 3.35 +/- 9.79
304
  name: Human normalized total reward
305
  - task:
306
  type: reinforcement-learning
@@ -310,13 +310,13 @@ model-index:
310
  type: atari-bankheist
311
  metrics:
312
  - type: total_reward
313
- value: 948.30 +/- 199.86
314
  name: Total reward
315
  - type: expert_normalized_total_reward
316
- value: 0.71 +/- 0.15
317
  name: Expert normalized total reward
318
  - type: human_normalized_total_reward
319
- value: 1.26 +/- 0.27
320
  name: Human normalized total reward
321
  - task:
322
  type: reinforcement-learning
@@ -326,13 +326,13 @@ model-index:
326
  type: atari-battlezone
327
  metrics:
328
  - type: total_reward
329
- value: 17420.00 +/- 6071.54
330
  name: Total reward
331
  - type: expert_normalized_total_reward
332
  value: 0.06 +/- 0.02
333
  name: Expert normalized total reward
334
  - type: human_normalized_total_reward
335
- value: 0.47 +/- 0.16
336
  name: Human normalized total reward
337
  - task:
338
  type: reinforcement-learning
@@ -342,13 +342,13 @@ model-index:
342
  type: atari-beamrider
343
  metrics:
344
  - type: total_reward
345
- value: 797.32 +/- 328.31
346
  name: Total reward
347
  - type: expert_normalized_total_reward
348
  value: 0.01 +/- 0.01
349
  name: Expert normalized total reward
350
  - type: human_normalized_total_reward
351
- value: 0.03 +/- 0.02
352
  name: Human normalized total reward
353
  - task:
354
  type: reinforcement-learning
@@ -358,13 +358,13 @@ model-index:
358
  type: atari-berzerk
359
  metrics:
360
  - type: total_reward
361
- value: 687.30 +/- 331.91
362
  name: Total reward
363
  - type: expert_normalized_total_reward
364
  value: 0.01 +/- 0.01
365
  name: Expert normalized total reward
366
  - type: human_normalized_total_reward
367
- value: 0.22 +/- 0.13
368
  name: Human normalized total reward
369
  - task:
370
  type: reinforcement-learning
@@ -374,7 +374,7 @@ model-index:
374
  type: atari-bowling
375
  metrics:
376
  - type: total_reward
377
- value: 22.41 +/- 5.57
378
  name: Total reward
379
  - type: expert_normalized_total_reward
380
  value: 1.00 +/- 0.00
@@ -390,13 +390,13 @@ model-index:
390
  type: atari-boxing
391
  metrics:
392
  - type: total_reward
393
- value: 90.10 +/- 23.05
394
  name: Total reward
395
  - type: expert_normalized_total_reward
396
- value: 0.92 +/- 0.24
397
  name: Expert normalized total reward
398
  - type: human_normalized_total_reward
399
- value: 7.50 +/- 1.92
400
  name: Human normalized total reward
401
  - task:
402
  type: reinforcement-learning
@@ -406,13 +406,13 @@ model-index:
406
  type: atari-breakout
407
  metrics:
408
  - type: total_reward
409
- value: 8.82 +/- 5.63
410
  name: Total reward
411
  - type: expert_normalized_total_reward
412
  value: 0.01 +/- 0.01
413
  name: Expert normalized total reward
414
  - type: human_normalized_total_reward
415
- value: 0.25 +/- 0.20
416
  name: Human normalized total reward
417
  - task:
418
  type: reinforcement-learning
@@ -422,13 +422,13 @@ model-index:
422
  type: atari-centipede
423
  metrics:
424
  - type: total_reward
425
- value: 5589.92 +/- 2567.26
426
  name: Total reward
427
  - type: expert_normalized_total_reward
428
- value: 0.37 +/- 0.27
429
  name: Expert normalized total reward
430
  - type: human_normalized_total_reward
431
- value: 0.35 +/- 0.26
432
  name: Human normalized total reward
433
  - task:
434
  type: reinforcement-learning
@@ -438,13 +438,13 @@ model-index:
438
  type: atari-choppercommand
439
  metrics:
440
  - type: total_reward
441
- value: 2417.00 +/- 1489.90
442
  name: Total reward
443
  - type: expert_normalized_total_reward
444
- value: 0.02 +/- 0.02
445
  name: Expert normalized total reward
446
  - type: human_normalized_total_reward
447
- value: 0.24 +/- 0.23
448
  name: Human normalized total reward
449
  - task:
450
  type: reinforcement-learning
@@ -454,13 +454,13 @@ model-index:
454
  type: atari-crazyclimber
455
  metrics:
456
  - type: total_reward
457
- value: 97639.00 +/- 26184.68
458
  name: Total reward
459
  - type: expert_normalized_total_reward
460
- value: 0.52 +/- 0.16
461
  name: Expert normalized total reward
462
  - type: human_normalized_total_reward
463
- value: 3.47 +/- 1.05
464
  name: Human normalized total reward
465
  - task:
466
  type: reinforcement-learning
@@ -470,13 +470,13 @@ model-index:
470
  type: atari-defender
471
  metrics:
472
  - type: total_reward
473
- value: 39323.50 +/- 15202.98
474
  name: Total reward
475
  - type: expert_normalized_total_reward
476
- value: 0.10 +/- 0.04
477
  name: Expert normalized total reward
478
  - type: human_normalized_total_reward
479
- value: 2.30 +/- 0.96
480
  name: Human normalized total reward
481
  - task:
482
  type: reinforcement-learning
@@ -486,13 +486,13 @@ model-index:
486
  type: atari-demonattack
487
  metrics:
488
  - type: total_reward
489
- value: 815.30 +/- 989.67
490
  name: Total reward
491
  - type: expert_normalized_total_reward
492
  value: 0.01 +/- 0.01
493
  name: Expert normalized total reward
494
  - type: human_normalized_total_reward
495
- value: 0.36 +/- 0.54
496
  name: Human normalized total reward
497
  - task:
498
  type: reinforcement-learning
@@ -502,13 +502,13 @@ model-index:
502
  type: atari-doubledunk
503
  metrics:
504
  - type: total_reward
505
- value: 14.42 +/- 9.97
506
  name: Total reward
507
  - type: expert_normalized_total_reward
508
- value: 0.84 +/- 0.25
509
  name: Expert normalized total reward
510
  - type: human_normalized_total_reward
511
- value: 0.94 +/- 0.28
512
  name: Human normalized total reward
513
  - task:
514
  type: reinforcement-learning
@@ -518,13 +518,13 @@ model-index:
518
  type: atari-enduro
519
  metrics:
520
  - type: total_reward
521
- value: 108.52 +/- 42.73
522
  name: Total reward
523
  - type: expert_normalized_total_reward
524
- value: 0.05 +/- 0.02
525
  name: Expert normalized total reward
526
  - type: human_normalized_total_reward
527
- value: 0.13 +/- 0.05
528
  name: Human normalized total reward
529
  - task:
530
  type: reinforcement-learning
@@ -534,13 +534,13 @@ model-index:
534
  type: atari-fishingderby
535
  metrics:
536
  - type: total_reward
537
- value: -30.35 +/- 24.37
538
  name: Total reward
539
  - type: expert_normalized_total_reward
540
- value: 0.62 +/- 0.25
541
  name: Expert normalized total reward
542
  - type: human_normalized_total_reward
543
- value: 0.47 +/- 0.19
544
  name: Human normalized total reward
545
  - task:
546
  type: reinforcement-learning
@@ -550,10 +550,10 @@ model-index:
550
  type: atari-freeway
551
  metrics:
552
  - type: total_reward
553
- value: 27.49 +/- 1.63
554
  name: Total reward
555
  - type: expert_normalized_total_reward
556
- value: 0.81 +/- 0.05
557
  name: Expert normalized total reward
558
  - type: human_normalized_total_reward
559
  value: 0.93 +/- 0.06
@@ -566,13 +566,13 @@ model-index:
566
  type: atari-frostbite
567
  metrics:
568
  - type: total_reward
569
- value: 2769.60 +/- 1445.61
570
  name: Total reward
571
  - type: expert_normalized_total_reward
572
- value: 0.21 +/- 0.11
573
  name: Expert normalized total reward
574
  - type: human_normalized_total_reward
575
- value: 0.63 +/- 0.34
576
  name: Human normalized total reward
577
  - task:
578
  type: reinforcement-learning
@@ -582,13 +582,13 @@ model-index:
582
  type: atari-gopher
583
  metrics:
584
  - type: total_reward
585
- value: 5340.60 +/- 2547.07
586
  name: Total reward
587
  - type: expert_normalized_total_reward
588
  value: 0.06 +/- 0.03
589
  name: Expert normalized total reward
590
  - type: human_normalized_total_reward
591
- value: 2.36 +/- 1.18
592
  name: Human normalized total reward
593
  - task:
594
  type: reinforcement-learning
@@ -598,13 +598,13 @@ model-index:
598
  type: atari-gravitar
599
  metrics:
600
  - type: total_reward
601
- value: 1269.50 +/- 902.99
602
  name: Total reward
603
  - type: expert_normalized_total_reward
604
- value: 0.29 +/- 0.24
605
  name: Expert normalized total reward
606
  - type: human_normalized_total_reward
607
- value: 0.34 +/- 0.28
608
  name: Human normalized total reward
609
  - task:
610
  type: reinforcement-learning
@@ -614,13 +614,13 @@ model-index:
614
  type: atari-hero
615
  metrics:
616
  - type: total_reward
617
- value: 11709.65 +/- 3233.53
618
  name: Total reward
619
  - type: expert_normalized_total_reward
620
- value: 0.24 +/- 0.07
621
  name: Expert normalized total reward
622
  - type: human_normalized_total_reward
623
- value: 0.36 +/- 0.11
624
  name: Human normalized total reward
625
  - task:
626
  type: reinforcement-learning
@@ -630,13 +630,13 @@ model-index:
630
  type: atari-icehockey
631
  metrics:
632
  - type: total_reward
633
- value: 7.48 +/- 5.60
634
  name: Total reward
635
  - type: expert_normalized_total_reward
636
- value: 0.51 +/- 0.15
637
  name: Expert normalized total reward
638
  - type: human_normalized_total_reward
639
- value: 1.54 +/- 0.46
640
  name: Human normalized total reward
641
  - task:
642
  type: reinforcement-learning
@@ -646,13 +646,13 @@ model-index:
646
  type: atari-jamesbond
647
  metrics:
648
  - type: total_reward
649
- value: 327.50 +/- 123.16
650
  name: Total reward
651
  - type: expert_normalized_total_reward
652
- value: 0.01 +/- 0.00
653
  name: Expert normalized total reward
654
  - type: human_normalized_total_reward
655
- value: 1.09 +/- 0.45
656
  name: Human normalized total reward
657
  - task:
658
  type: reinforcement-learning
@@ -662,13 +662,13 @@ model-index:
662
  type: atari-kangaroo
663
  metrics:
664
  - type: total_reward
665
- value: 378.00 +/- 343.97
666
  name: Total reward
667
  - type: expert_normalized_total_reward
668
- value: 0.62 +/- 0.66
669
  name: Expert normalized total reward
670
  - type: human_normalized_total_reward
671
- value: 0.11 +/- 0.12
672
  name: Human normalized total reward
673
  - task:
674
  type: reinforcement-learning
@@ -678,13 +678,13 @@ model-index:
678
  type: atari-krull
679
  metrics:
680
  - type: total_reward
681
- value: 10720.50 +/- 1284.13
682
  name: Total reward
683
  - type: expert_normalized_total_reward
684
  value: 0.93 +/- 0.13
685
  name: Expert normalized total reward
686
  - type: human_normalized_total_reward
687
- value: 8.55 +/- 1.20
688
  name: Human normalized total reward
689
  - task:
690
  type: reinforcement-learning
@@ -694,13 +694,13 @@ model-index:
694
  type: atari-kungfumaster
695
  metrics:
696
  - type: total_reward
697
- value: 288.00 +/- 255.06
698
  name: Total reward
699
  - type: expert_normalized_total_reward
700
- value: 0.00 +/- 0.01
701
  name: Expert normalized total reward
702
  - type: human_normalized_total_reward
703
- value: 0.00 +/- 0.01
704
  name: Human normalized total reward
705
  - task:
706
  type: reinforcement-learning
@@ -726,13 +726,13 @@ model-index:
726
  type: atari-mspacman
727
  metrics:
728
  - type: total_reward
729
- value: 1573.10 +/- 483.96
730
  name: Total reward
731
  - type: expert_normalized_total_reward
732
- value: 0.19 +/- 0.07
733
  name: Expert normalized total reward
734
  - type: human_normalized_total_reward
735
- value: 0.19 +/- 0.07
736
  name: Human normalized total reward
737
  - task:
738
  type: reinforcement-learning
@@ -742,13 +742,13 @@ model-index:
742
  type: atari-namethisgame
743
  metrics:
744
  - type: total_reward
745
- value: 7523.30 +/- 2471.38
746
  name: Total reward
747
  - type: expert_normalized_total_reward
748
- value: 0.25 +/- 0.12
749
  name: Expert normalized total reward
750
  - type: human_normalized_total_reward
751
- value: 0.91 +/- 0.43
752
  name: Human normalized total reward
753
  - task:
754
  type: reinforcement-learning
@@ -758,13 +758,13 @@ model-index:
758
  type: atari-phoenix
759
  metrics:
760
  - type: total_reward
761
- value: 2197.90 +/- 1795.38
762
  name: Total reward
763
  - type: expert_normalized_total_reward
764
  value: 0.00 +/- 0.00
765
  name: Expert normalized total reward
766
  - type: human_normalized_total_reward
767
- value: 0.22 +/- 0.28
768
  name: Human normalized total reward
769
  - task:
770
  type: reinforcement-learning
@@ -774,10 +774,10 @@ model-index:
774
  type: atari-pitfall
775
  metrics:
776
  - type: total_reward
777
- value: -6.68 +/- 19.05
778
  name: Total reward
779
  - type: expert_normalized_total_reward
780
- value: 0.98 +/- 0.08
781
  name: Expert normalized total reward
782
  - type: human_normalized_total_reward
783
  value: 0.03 +/- 0.00
@@ -790,13 +790,13 @@ model-index:
790
  type: atari-pong
791
  metrics:
792
  - type: total_reward
793
- value: 13.69 +/- 13.35
794
  name: Total reward
795
  - type: expert_normalized_total_reward
796
- value: 0.82 +/- 0.32
797
  name: Expert normalized total reward
798
  - type: human_normalized_total_reward
799
- value: 0.97 +/- 0.38
800
  name: Human normalized total reward
801
  - task:
802
  type: reinforcement-learning
@@ -822,13 +822,13 @@ model-index:
822
  type: atari-qbert
823
  metrics:
824
  - type: total_reward
825
- value: 1951.50 +/- 2577.24
826
  name: Total reward
827
  - type: expert_normalized_total_reward
828
- value: 0.04 +/- 0.06
829
  name: Expert normalized total reward
830
  - type: human_normalized_total_reward
831
- value: 0.13 +/- 0.19
832
  name: Human normalized total reward
833
  - task:
834
  type: reinforcement-learning
@@ -838,13 +838,13 @@ model-index:
838
  type: atari-riverraid
839
  metrics:
840
  - type: total_reward
841
- value: 3758.50 +/- 1536.66
842
  name: Total reward
843
  - type: expert_normalized_total_reward
844
- value: 0.18 +/- 0.11
845
  name: Expert normalized total reward
846
  - type: human_normalized_total_reward
847
- value: 0.15 +/- 0.10
848
  name: Human normalized total reward
849
  - task:
850
  type: reinforcement-learning
@@ -854,13 +854,13 @@ model-index:
854
  type: atari-roadrunner
855
  metrics:
856
  - type: total_reward
857
- value: 6407.00 +/- 4847.36
858
  name: Total reward
859
  - type: expert_normalized_total_reward
860
- value: 0.08 +/- 0.06
861
  name: Expert normalized total reward
862
  - type: human_normalized_total_reward
863
- value: 0.82 +/- 0.62
864
  name: Human normalized total reward
865
  - task:
866
  type: reinforcement-learning
@@ -870,13 +870,13 @@ model-index:
870
  type: atari-robotank
871
  metrics:
872
  - type: total_reward
873
- value: 11.34 +/- 5.52
874
  name: Total reward
875
  - type: expert_normalized_total_reward
876
- value: 0.12 +/- 0.07
877
  name: Expert normalized total reward
878
  - type: human_normalized_total_reward
879
- value: 0.94 +/- 0.57
880
  name: Human normalized total reward
881
  - task:
882
  type: reinforcement-learning
@@ -886,10 +886,10 @@ model-index:
886
  type: atari-seaquest
887
  metrics:
888
  - type: total_reward
889
- value: 804.00 +/- 403.33
890
  name: Total reward
891
  - type: expert_normalized_total_reward
892
- value: 0.29 +/- 0.16
893
  name: Expert normalized total reward
894
  - type: human_normalized_total_reward
895
  value: 0.02 +/- 0.01
@@ -902,13 +902,13 @@ model-index:
902
  type: atari-skiing
903
  metrics:
904
  - type: total_reward
905
- value: -16231.54 +/- 6060.48
906
  name: Total reward
907
  - type: expert_normalized_total_reward
908
- value: 0.14 +/- 0.95
909
  name: Expert normalized total reward
910
  - type: human_normalized_total_reward
911
- value: 0.07 +/- 0.47
912
  name: Human normalized total reward
913
  - task:
914
  type: reinforcement-learning
@@ -918,13 +918,13 @@ model-index:
918
  type: atari-solaris
919
  metrics:
920
  - type: total_reward
921
- value: 1286.60 +/- 446.70
922
  name: Total reward
923
  - type: expert_normalized_total_reward
924
- value: 0.43 +/- 3.81
925
  name: Expert normalized total reward
926
  - type: human_normalized_total_reward
927
- value: 0.00 +/- 0.04
928
  name: Human normalized total reward
929
  - task:
930
  type: reinforcement-learning
@@ -934,13 +934,13 @@ model-index:
934
  type: atari-spaceinvaders
935
  metrics:
936
  - type: total_reward
937
- value: 325.45 +/- 163.36
938
  name: Total reward
939
  - type: expert_normalized_total_reward
940
- value: 0.01 +/- 0.01
941
  name: Expert normalized total reward
942
  - type: human_normalized_total_reward
943
- value: 0.12 +/- 0.11
944
  name: Human normalized total reward
945
  - task:
946
  type: reinforcement-learning
@@ -950,13 +950,13 @@ model-index:
950
  type: atari-stargunner
951
  metrics:
952
  - type: total_reward
953
- value: 4379.00 +/- 3027.22
954
  name: Total reward
955
  - type: expert_normalized_total_reward
956
  value: 0.01 +/- 0.01
957
  name: Expert normalized total reward
958
  - type: human_normalized_total_reward
959
- value: 0.39 +/- 0.32
960
  name: Human normalized total reward
961
  - task:
962
  type: reinforcement-learning
@@ -966,13 +966,13 @@ model-index:
966
  type: atari-surround
967
  metrics:
968
  - type: total_reward
969
- value: 2.67 +/- 4.74
970
  name: Total reward
971
  - type: expert_normalized_total_reward
972
- value: 0.65 +/- 0.24
973
  name: Expert normalized total reward
974
  - type: human_normalized_total_reward
975
- value: 0.77 +/- 0.29
976
  name: Human normalized total reward
977
  - task:
978
  type: reinforcement-learning
@@ -982,13 +982,13 @@ model-index:
982
  type: atari-tennis
983
  metrics:
984
  - type: total_reward
985
- value: -13.46 +/- 3.80
986
  name: Total reward
987
  - type: expert_normalized_total_reward
988
- value: 0.30 +/- 0.11
989
  name: Expert normalized total reward
990
  - type: human_normalized_total_reward
991
- value: 0.32 +/- 0.12
992
  name: Human normalized total reward
993
  - task:
994
  type: reinforcement-learning
@@ -998,13 +998,13 @@ model-index:
998
  type: atari-timepilot
999
  metrics:
1000
  - type: total_reward
1001
- value: 13028.00 +/- 5222.57
1002
  name: Total reward
1003
  - type: expert_normalized_total_reward
1004
- value: 0.14 +/- 0.08
1005
  name: Expert normalized total reward
1006
  - type: human_normalized_total_reward
1007
- value: 5.69 +/- 3.14
1008
  name: Human normalized total reward
1009
  - task:
1010
  type: reinforcement-learning
@@ -1014,13 +1014,13 @@ model-index:
1014
  type: atari-tutankham
1015
  metrics:
1016
  - type: total_reward
1017
- value: 85.66 +/- 61.77
1018
  name: Total reward
1019
  - type: expert_normalized_total_reward
1020
- value: 0.27 +/- 0.22
1021
  name: Expert normalized total reward
1022
  - type: human_normalized_total_reward
1023
- value: 0.48 +/- 0.40
1024
  name: Human normalized total reward
1025
  - task:
1026
  type: reinforcement-learning
@@ -1030,13 +1030,13 @@ model-index:
1030
  type: atari-upndown
1031
  metrics:
1032
  - type: total_reward
1033
- value: 17768.70 +/- 10321.95
1034
  name: Total reward
1035
  - type: expert_normalized_total_reward
1036
  value: 0.04 +/- 0.02
1037
  name: Expert normalized total reward
1038
  - type: human_normalized_total_reward
1039
- value: 1.54 +/- 0.92
1040
  name: Human normalized total reward
1041
  - task:
1042
  type: reinforcement-learning
@@ -1062,13 +1062,13 @@ model-index:
1062
  type: atari-videopinball
1063
  metrics:
1064
  - type: total_reward
1065
- value: 11917.43 +/- 8204.28
1066
  name: Total reward
1067
  - type: expert_normalized_total_reward
1068
  value: 0.03 +/- 0.02
1069
  name: Expert normalized total reward
1070
  - type: human_normalized_total_reward
1071
- value: 0.67 +/- 0.46
1072
  name: Human normalized total reward
1073
  - task:
1074
  type: reinforcement-learning
@@ -1078,13 +1078,13 @@ model-index:
1078
  type: atari-wizardofwor
1079
  metrics:
1080
  - type: total_reward
1081
- value: 2544.00 +/- 2902.42
1082
  name: Total reward
1083
  - type: expert_normalized_total_reward
1084
- value: 0.04 +/- 0.06
1085
  name: Expert normalized total reward
1086
  - type: human_normalized_total_reward
1087
- value: 0.47 +/- 0.69
1088
  name: Human normalized total reward
1089
  - task:
1090
  type: reinforcement-learning
@@ -1094,13 +1094,13 @@ model-index:
1094
  type: atari-yarsrevenge
1095
  metrics:
1096
  - type: total_reward
1097
- value: 12532.70 +/- 8062.85
1098
  name: Total reward
1099
  - type: expert_normalized_total_reward
1100
- value: 0.04 +/- 0.03
1101
  name: Expert normalized total reward
1102
  - type: human_normalized_total_reward
1103
- value: 0.18 +/- 0.16
1104
  name: Human normalized total reward
1105
  - task:
1106
  type: reinforcement-learning
@@ -1110,13 +1110,13 @@ model-index:
1110
  type: atari-zaxxon
1111
  metrics:
1112
  - type: total_reward
1113
- value: 6902.00 +/- 3206.09
1114
  name: Total reward
1115
  - type: expert_normalized_total_reward
1116
- value: 0.09 +/- 0.04
1117
  name: Expert normalized total reward
1118
  - type: human_normalized_total_reward
1119
- value: 0.75 +/- 0.35
1120
  name: Human normalized total reward
1121
  - task:
1122
  type: reinforcement-learning
@@ -1126,10 +1126,10 @@ model-index:
1126
  type: babyai-action-obj-door
1127
  metrics:
1128
  - type: total_reward
1129
- value: 0.95 +/- 0.13
1130
  name: Total reward
1131
  - type: expert_normalized_total_reward
1132
- value: 0.94 +/- 0.22
1133
  name: Expert normalized total reward
1134
  - task:
1135
  type: reinforcement-learning
@@ -1152,10 +1152,10 @@ model-index:
1152
  type: babyai-boss-level-no-unlock
1153
  metrics:
1154
  - type: total_reward
1155
- value: 0.44 +/- 0.45
1156
  name: Total reward
1157
  - type: expert_normalized_total_reward
1158
- value: 0.43 +/- 0.51
1159
  name: Expert normalized total reward
1160
  - task:
1161
  type: reinforcement-learning
@@ -1165,10 +1165,10 @@ model-index:
1165
  type: babyai-boss-level
1166
  metrics:
1167
  - type: total_reward
1168
- value: 0.48 +/- 0.45
1169
  name: Total reward
1170
  - type: expert_normalized_total_reward
1171
- value: 0.48 +/- 0.51
1172
  name: Expert normalized total reward
1173
  - task:
1174
  type: reinforcement-learning
@@ -1178,7 +1178,7 @@ model-index:
1178
  type: babyai-find-obj-s5
1179
  metrics:
1180
  - type: total_reward
1181
- value: 0.95 +/- 0.03
1182
  name: Total reward
1183
  - type: expert_normalized_total_reward
1184
  value: 1.00 +/- 0.04
@@ -1191,10 +1191,10 @@ model-index:
1191
  type: babyai-go-to-door
1192
  metrics:
1193
  - type: total_reward
1194
- value: 0.99 +/- 0.01
1195
  name: Total reward
1196
  - type: expert_normalized_total_reward
1197
- value: 1.00 +/- 0.01
1198
  name: Expert normalized total reward
1199
  - task:
1200
  type: reinforcement-learning
@@ -1204,10 +1204,10 @@ model-index:
1204
  type: babyai-go-to-imp-unlock
1205
  metrics:
1206
  - type: total_reward
1207
- value: 0.50 +/- 0.44
1208
  name: Total reward
1209
  - type: expert_normalized_total_reward
1210
- value: 0.56 +/- 0.59
1211
  name: Expert normalized total reward
1212
  - task:
1213
  type: reinforcement-learning
@@ -1217,10 +1217,10 @@ model-index:
1217
  type: babyai-go-to-local
1218
  metrics:
1219
  - type: total_reward
1220
- value: 0.88 +/- 0.14
1221
  name: Total reward
1222
  - type: expert_normalized_total_reward
1223
- value: 0.94 +/- 0.18
1224
  name: Expert normalized total reward
1225
  - task:
1226
  type: reinforcement-learning
@@ -1233,7 +1233,7 @@ model-index:
1233
  value: 0.98 +/- 0.04
1234
  name: Total reward
1235
  - type: expert_normalized_total_reward
1236
- value: 0.97 +/- 0.08
1237
  name: Expert normalized total reward
1238
  - task:
1239
  type: reinforcement-learning
@@ -1243,10 +1243,10 @@ model-index:
1243
  type: babyai-go-to-obj
1244
  metrics:
1245
  - type: total_reward
1246
- value: 0.93 +/- 0.04
1247
  name: Total reward
1248
  - type: expert_normalized_total_reward
1249
- value: 0.99 +/- 0.05
1250
  name: Expert normalized total reward
1251
  - task:
1252
  type: reinforcement-learning
@@ -1256,10 +1256,10 @@ model-index:
1256
  type: babyai-go-to-red-ball-grey
1257
  metrics:
1258
  - type: total_reward
1259
- value: 0.91 +/- 0.06
1260
  name: Total reward
1261
  - type: expert_normalized_total_reward
1262
- value: 0.99 +/- 0.08
1263
  name: Expert normalized total reward
1264
  - task:
1265
  type: reinforcement-learning
@@ -1272,7 +1272,7 @@ model-index:
1272
  value: 0.93 +/- 0.03
1273
  name: Total reward
1274
  - type: expert_normalized_total_reward
1275
- value: 1.00 +/- 0.04
1276
  name: Expert normalized total reward
1277
  - task:
1278
  type: reinforcement-learning
@@ -1282,10 +1282,10 @@ model-index:
1282
  type: babyai-go-to-red-ball
1283
  metrics:
1284
  - type: total_reward
1285
- value: 0.91 +/- 0.08
1286
  name: Total reward
1287
  - type: expert_normalized_total_reward
1288
- value: 0.98 +/- 0.11
1289
  name: Expert normalized total reward
1290
  - task:
1291
  type: reinforcement-learning
@@ -1295,10 +1295,10 @@ model-index:
1295
  type: babyai-go-to-red-blue-ball
1296
  metrics:
1297
  - type: total_reward
1298
- value: 0.88 +/- 0.11
1299
  name: Total reward
1300
  - type: expert_normalized_total_reward
1301
- value: 0.96 +/- 0.13
1302
  name: Expert normalized total reward
1303
  - task:
1304
  type: reinforcement-learning
@@ -1308,10 +1308,10 @@ model-index:
1308
  type: babyai-go-to-seq
1309
  metrics:
1310
  - type: total_reward
1311
- value: 0.73 +/- 0.34
1312
  name: Total reward
1313
  - type: expert_normalized_total_reward
1314
- value: 0.75 +/- 0.40
1315
  name: Expert normalized total reward
1316
  - task:
1317
  type: reinforcement-learning
@@ -1321,10 +1321,10 @@ model-index:
1321
  type: babyai-go-to
1322
  metrics:
1323
  - type: total_reward
1324
- value: 0.80 +/- 0.27
1325
  name: Total reward
1326
  - type: expert_normalized_total_reward
1327
- value: 0.85 +/- 0.35
1328
  name: Expert normalized total reward
1329
  - task:
1330
  type: reinforcement-learning
@@ -1334,10 +1334,10 @@ model-index:
1334
  type: babyai-key-corridor
1335
  metrics:
1336
  - type: total_reward
1337
- value: 0.88 +/- 0.10
1338
  name: Total reward
1339
  - type: expert_normalized_total_reward
1340
- value: 0.97 +/- 0.11
1341
  name: Expert normalized total reward
1342
  - task:
1343
  type: reinforcement-learning
@@ -1347,10 +1347,10 @@ model-index:
1347
  type: babyai-mini-boss-level
1348
  metrics:
1349
  - type: total_reward
1350
- value: 0.69 +/- 0.35
1351
  name: Total reward
1352
  - type: expert_normalized_total_reward
1353
- value: 0.76 +/- 0.43
1354
  name: Expert normalized total reward
1355
  - task:
1356
  type: reinforcement-learning
@@ -1360,10 +1360,10 @@ model-index:
1360
  type: babyai-move-two-across-s8n9
1361
  metrics:
1362
  - type: total_reward
1363
- value: 0.03 +/- 0.15
1364
  name: Total reward
1365
  - type: expert_normalized_total_reward
1366
- value: 0.03 +/- 0.16
1367
  name: Expert normalized total reward
1368
  - task:
1369
  type: reinforcement-learning
@@ -1373,7 +1373,7 @@ model-index:
1373
  type: babyai-one-room-s8
1374
  metrics:
1375
  - type: total_reward
1376
- value: 0.92 +/- 0.03
1377
  name: Total reward
1378
  - type: expert_normalized_total_reward
1379
  value: 1.00 +/- 0.04
@@ -1399,10 +1399,10 @@ model-index:
1399
  type: babyai-open-doors-order-n4
1400
  metrics:
1401
  - type: total_reward
1402
- value: 0.96 +/- 0.11
1403
  name: Total reward
1404
  - type: expert_normalized_total_reward
1405
- value: 0.97 +/- 0.13
1406
  name: Expert normalized total reward
1407
  - task:
1408
  type: reinforcement-learning
@@ -1412,7 +1412,7 @@ model-index:
1412
  type: babyai-open-red-door
1413
  metrics:
1414
  - type: total_reward
1415
- value: 0.92 +/- 0.02
1416
  name: Total reward
1417
  - type: expert_normalized_total_reward
1418
  value: 1.00 +/- 0.03
@@ -1438,10 +1438,10 @@ model-index:
1438
  type: babyai-open
1439
  metrics:
1440
  - type: total_reward
1441
- value: 0.93 +/- 0.11
1442
  name: Total reward
1443
  - type: expert_normalized_total_reward
1444
- value: 0.97 +/- 0.13
1445
  name: Expert normalized total reward
1446
  - task:
1447
  type: reinforcement-learning
@@ -1464,10 +1464,10 @@ model-index:
1464
  type: babyai-pickup-dist
1465
  metrics:
1466
  - type: total_reward
1467
- value: 0.88 +/- 0.13
1468
  name: Total reward
1469
  - type: expert_normalized_total_reward
1470
- value: 1.03 +/- 0.18
1471
  name: Expert normalized total reward
1472
  - task:
1473
  type: reinforcement-learning
@@ -1477,10 +1477,10 @@ model-index:
1477
  type: babyai-pickup-loc
1478
  metrics:
1479
  - type: total_reward
1480
- value: 0.84 +/- 0.20
1481
  name: Total reward
1482
  - type: expert_normalized_total_reward
1483
- value: 0.91 +/- 0.24
1484
  name: Expert normalized total reward
1485
  - task:
1486
  type: reinforcement-learning
@@ -1490,10 +1490,10 @@ model-index:
1490
  type: babyai-pickup
1491
  metrics:
1492
  - type: total_reward
1493
- value: 0.72 +/- 0.34
1494
  name: Total reward
1495
  - type: expert_normalized_total_reward
1496
- value: 0.77 +/- 0.40
1497
  name: Expert normalized total reward
1498
  - task:
1499
  type: reinforcement-learning
@@ -1503,10 +1503,10 @@ model-index:
1503
  type: babyai-put-next-local
1504
  metrics:
1505
  - type: total_reward
1506
- value: 0.60 +/- 0.36
1507
  name: Total reward
1508
  - type: expert_normalized_total_reward
1509
- value: 0.65 +/- 0.39
1510
  name: Expert normalized total reward
1511
  - task:
1512
  type: reinforcement-learning
@@ -1516,10 +1516,10 @@ model-index:
1516
  type: babyai-put-next
1517
  metrics:
1518
  - type: total_reward
1519
- value: 0.82 +/- 0.26
1520
  name: Total reward
1521
  - type: expert_normalized_total_reward
1522
- value: 0.86 +/- 0.27
1523
  name: Expert normalized total reward
1524
  - task:
1525
  type: reinforcement-learning
@@ -1529,10 +1529,10 @@ model-index:
1529
  type: babyai-synth-loc
1530
  metrics:
1531
  - type: total_reward
1532
- value: 0.82 +/- 0.31
1533
  name: Total reward
1534
  - type: expert_normalized_total_reward
1535
- value: 0.85 +/- 0.38
1536
  name: Expert normalized total reward
1537
  - task:
1538
  type: reinforcement-learning
@@ -1542,10 +1542,10 @@ model-index:
1542
  type: babyai-synth-seq
1543
  metrics:
1544
  - type: total_reward
1545
- value: 0.57 +/- 0.44
1546
  name: Total reward
1547
  - type: expert_normalized_total_reward
1548
- value: 0.57 +/- 0.50
1549
  name: Expert normalized total reward
1550
  - task:
1551
  type: reinforcement-learning
@@ -1555,10 +1555,10 @@ model-index:
1555
  type: babyai-synth
1556
  metrics:
1557
  - type: total_reward
1558
- value: 0.68 +/- 0.39
1559
  name: Total reward
1560
  - type: expert_normalized_total_reward
1561
- value: 0.69 +/- 0.47
1562
  name: Expert normalized total reward
1563
  - task:
1564
  type: reinforcement-learning
@@ -1568,10 +1568,10 @@ model-index:
1568
  type: babyai-unblock-pickup
1569
  metrics:
1570
  - type: total_reward
1571
- value: 0.76 +/- 0.33
1572
  name: Total reward
1573
  - type: expert_normalized_total_reward
1574
- value: 0.82 +/- 0.39
1575
  name: Expert normalized total reward
1576
  - task:
1577
  type: reinforcement-learning
@@ -1594,10 +1594,10 @@ model-index:
1594
  type: babyai-unlock-pickup
1595
  metrics:
1596
  - type: total_reward
1597
- value: 0.76 +/- 0.03
1598
  name: Total reward
1599
  - type: expert_normalized_total_reward
1600
- value: 1.01 +/- 0.04
1601
  name: Expert normalized total reward
1602
  - task:
1603
  type: reinforcement-learning
@@ -1607,10 +1607,10 @@ model-index:
1607
  type: babyai-unlock-to-unlock
1608
  metrics:
1609
  - type: total_reward
1610
- value: 0.86 +/- 0.29
1611
  name: Total reward
1612
  - type: expert_normalized_total_reward
1613
- value: 0.89 +/- 0.30
1614
  name: Expert normalized total reward
1615
  - task:
1616
  type: reinforcement-learning
@@ -1620,10 +1620,10 @@ model-index:
1620
  type: babyai-unlock
1621
  metrics:
1622
  - type: total_reward
1623
- value: 0.55 +/- 0.42
1624
  name: Total reward
1625
  - type: expert_normalized_total_reward
1626
- value: 0.63 +/- 0.50
1627
  name: Expert normalized total reward
1628
  - task:
1629
  type: reinforcement-learning
@@ -1633,10 +1633,10 @@ model-index:
1633
  type: metaworld-assembly
1634
  metrics:
1635
  - type: total_reward
1636
- value: 238.32 +/- 32.98
1637
  name: Total reward
1638
  - type: expert_normalized_total_reward
1639
- value: 0.96 +/- 0.16
1640
  name: Expert normalized total reward
1641
  - task:
1642
  type: reinforcement-learning
@@ -1646,7 +1646,7 @@ model-index:
1646
  type: metaworld-basketball
1647
  metrics:
1648
  - type: total_reward
1649
- value: 1.59 +/- 0.43
1650
  name: Total reward
1651
  - type: expert_normalized_total_reward
1652
  value: -0.00 +/- 0.00
@@ -1659,10 +1659,10 @@ model-index:
1659
  type: metaworld-bin-picking
1660
  metrics:
1661
  - type: total_reward
1662
- value: 374.18 +/- 168.23
1663
  name: Total reward
1664
  - type: expert_normalized_total_reward
1665
- value: 0.88 +/- 0.40
1666
  name: Expert normalized total reward
1667
  - task:
1668
  type: reinforcement-learning
@@ -1672,10 +1672,10 @@ model-index:
1672
  type: metaworld-box-close
1673
  metrics:
1674
  - type: total_reward
1675
- value: 510.10 +/- 117.47
1676
  name: Total reward
1677
  - type: expert_normalized_total_reward
1678
- value: 0.99 +/- 0.27
1679
  name: Expert normalized total reward
1680
  - task:
1681
  type: reinforcement-learning
@@ -1685,10 +1685,10 @@ model-index:
1685
  type: metaworld-button-press-topdown-wall
1686
  metrics:
1687
  - type: total_reward
1688
- value: 260.07 +/- 67.75
1689
  name: Total reward
1690
  - type: expert_normalized_total_reward
1691
- value: 0.49 +/- 0.14
1692
  name: Expert normalized total reward
1693
  - task:
1694
  type: reinforcement-learning
@@ -1698,10 +1698,10 @@ model-index:
1698
  type: metaworld-button-press-topdown
1699
  metrics:
1700
  - type: total_reward
1701
- value: 265.16 +/- 77.93
1702
  name: Total reward
1703
  - type: expert_normalized_total_reward
1704
- value: 0.51 +/- 0.17
1705
  name: Expert normalized total reward
1706
  - task:
1707
  type: reinforcement-learning
@@ -1711,10 +1711,10 @@ model-index:
1711
  type: metaworld-button-press-wall
1712
  metrics:
1713
  - type: total_reward
1714
- value: 621.75 +/- 137.13
1715
  name: Total reward
1716
  - type: expert_normalized_total_reward
1717
- value: 0.92 +/- 0.21
1718
  name: Expert normalized total reward
1719
  - task:
1720
  type: reinforcement-learning
@@ -1724,10 +1724,10 @@ model-index:
1724
  type: metaworld-button-press
1725
  metrics:
1726
  - type: total_reward
1727
- value: 556.75 +/- 198.85
1728
  name: Total reward
1729
  - type: expert_normalized_total_reward
1730
- value: 0.86 +/- 0.33
1731
  name: Expert normalized total reward
1732
  - task:
1733
  type: reinforcement-learning
@@ -1737,10 +1737,10 @@ model-index:
1737
  type: metaworld-coffee-button
1738
  metrics:
1739
  - type: total_reward
1740
- value: 250.50 +/- 266.92
1741
  name: Total reward
1742
  - type: expert_normalized_total_reward
1743
- value: 0.31 +/- 0.38
1744
  name: Expert normalized total reward
1745
  - task:
1746
  type: reinforcement-learning
@@ -1750,10 +1750,10 @@ model-index:
1750
  type: metaworld-coffee-pull
1751
  metrics:
1752
  - type: total_reward
1753
- value: 55.13 +/- 96.96
1754
  name: Total reward
1755
  - type: expert_normalized_total_reward
1756
- value: 0.20 +/- 0.38
1757
  name: Expert normalized total reward
1758
  - task:
1759
  type: reinforcement-learning
@@ -1763,10 +1763,10 @@ model-index:
1763
  type: metaworld-coffee-push
1764
  metrics:
1765
  - type: total_reward
1766
- value: 269.17 +/- 237.82
1767
  name: Total reward
1768
  - type: expert_normalized_total_reward
1769
- value: 0.54 +/- 0.48
1770
  name: Expert normalized total reward
1771
  - task:
1772
  type: reinforcement-learning
@@ -1776,10 +1776,10 @@ model-index:
1776
  type: metaworld-dial-turn
1777
  metrics:
1778
  - type: total_reward
1779
- value: 738.22 +/- 168.43
1780
  name: Total reward
1781
  - type: expert_normalized_total_reward
1782
- value: 0.93 +/- 0.22
1783
  name: Expert normalized total reward
1784
  - task:
1785
  type: reinforcement-learning
@@ -1789,10 +1789,10 @@ model-index:
1789
  type: metaworld-disassemble
1790
  metrics:
1791
  - type: total_reward
1792
- value: 39.14 +/- 11.85
1793
  name: Total reward
1794
  - type: expert_normalized_total_reward
1795
- value: -0.47 +/- 4.70
1796
  name: Expert normalized total reward
1797
  - task:
1798
  type: reinforcement-learning
@@ -1802,7 +1802,7 @@ model-index:
1802
  type: metaworld-door-close
1803
  metrics:
1804
  - type: total_reward
1805
- value: 528.17 +/- 29.90
1806
  name: Total reward
1807
  - type: expert_normalized_total_reward
1808
  value: 1.00 +/- 0.06
@@ -1815,7 +1815,7 @@ model-index:
1815
  type: metaworld-door-lock
1816
  metrics:
1817
  - type: total_reward
1818
- value: 676.51 +/- 192.68
1819
  name: Total reward
1820
  - type: expert_normalized_total_reward
1821
  value: 0.81 +/- 0.28
@@ -1828,10 +1828,10 @@ model-index:
1828
  type: metaworld-door-open
1829
  metrics:
1830
  - type: total_reward
1831
- value: 572.76 +/- 57.53
1832
  name: Total reward
1833
  - type: expert_normalized_total_reward
1834
- value: 0.98 +/- 0.11
1835
  name: Expert normalized total reward
1836
  - task:
1837
  type: reinforcement-learning
@@ -1841,10 +1841,10 @@ model-index:
1841
  type: metaworld-door-unlock
1842
  metrics:
1843
  - type: total_reward
1844
- value: 654.94 +/- 260.64
1845
  name: Total reward
1846
  - type: expert_normalized_total_reward
1847
- value: 0.79 +/- 0.37
1848
  name: Expert normalized total reward
1849
  - task:
1850
  type: reinforcement-learning
@@ -1854,10 +1854,10 @@ model-index:
1854
  type: metaworld-drawer-close
1855
  metrics:
1856
  - type: total_reward
1857
- value: 663.02 +/- 214.51
1858
  name: Total reward
1859
  - type: expert_normalized_total_reward
1860
- value: 0.73 +/- 0.29
1861
  name: Expert normalized total reward
1862
  - task:
1863
  type: reinforcement-learning
@@ -1867,10 +1867,10 @@ model-index:
1867
  type: metaworld-drawer-open
1868
  metrics:
1869
  - type: total_reward
1870
- value: 489.07 +/- 21.28
1871
  name: Total reward
1872
  - type: expert_normalized_total_reward
1873
- value: 0.99 +/- 0.06
1874
  name: Expert normalized total reward
1875
  - task:
1876
  type: reinforcement-learning
@@ -1880,10 +1880,10 @@ model-index:
1880
  type: metaworld-faucet-close
1881
  metrics:
1882
  - type: total_reward
1883
- value: 361.32 +/- 72.28
1884
  name: Total reward
1885
  - type: expert_normalized_total_reward
1886
- value: 0.22 +/- 0.14
1887
  name: Expert normalized total reward
1888
  - task:
1889
  type: reinforcement-learning
@@ -1893,10 +1893,10 @@ model-index:
1893
  type: metaworld-faucet-open
1894
  metrics:
1895
  - type: total_reward
1896
- value: 637.86 +/- 134.50
1897
  name: Total reward
1898
  - type: expert_normalized_total_reward
1899
- value: 0.85 +/- 0.29
1900
  name: Expert normalized total reward
1901
  - task:
1902
  type: reinforcement-learning
@@ -1906,10 +1906,10 @@ model-index:
1906
  type: metaworld-hammer
1907
  metrics:
1908
  - type: total_reward
1909
- value: 691.72 +/- 25.25
1910
  name: Total reward
1911
  - type: expert_normalized_total_reward
1912
- value: 1.00 +/- 0.04
1913
  name: Expert normalized total reward
1914
  - task:
1915
  type: reinforcement-learning
@@ -1919,10 +1919,10 @@ model-index:
1919
  type: metaworld-hand-insert
1920
  metrics:
1921
  - type: total_reward
1922
- value: 719.57 +/- 99.26
1923
  name: Total reward
1924
  - type: expert_normalized_total_reward
1925
- value: 0.97 +/- 0.13
1926
  name: Expert normalized total reward
1927
  - task:
1928
  type: reinforcement-learning
@@ -1932,10 +1932,10 @@ model-index:
1932
  type: metaworld-handle-press-side
1933
  metrics:
1934
  - type: total_reward
1935
- value: 84.25 +/- 113.34
1936
  name: Total reward
1937
  - type: expert_normalized_total_reward
1938
- value: 0.03 +/- 0.14
1939
  name: Expert normalized total reward
1940
  - task:
1941
  type: reinforcement-learning
@@ -1945,10 +1945,10 @@ model-index:
1945
  type: metaworld-handle-press
1946
  metrics:
1947
  - type: total_reward
1948
- value: 731.94 +/- 261.90
1949
  name: Total reward
1950
  - type: expert_normalized_total_reward
1951
- value: 0.84 +/- 0.34
1952
  name: Expert normalized total reward
1953
  - task:
1954
  type: reinforcement-learning
@@ -1958,10 +1958,10 @@ model-index:
1958
  type: metaworld-handle-pull-side
1959
  metrics:
1960
  - type: total_reward
1961
- value: 233.11 +/- 199.49
1962
  name: Total reward
1963
  - type: expert_normalized_total_reward
1964
- value: 0.60 +/- 0.52
1965
  name: Expert normalized total reward
1966
  - task:
1967
  type: reinforcement-learning
@@ -1971,10 +1971,10 @@ model-index:
1971
  type: metaworld-handle-pull
1972
  metrics:
1973
  - type: total_reward
1974
- value: 501.29 +/- 209.45
1975
  name: Total reward
1976
  - type: expert_normalized_total_reward
1977
- value: 0.74 +/- 0.32
1978
  name: Expert normalized total reward
1979
  - task:
1980
  type: reinforcement-learning
@@ -1984,10 +1984,10 @@ model-index:
1984
  type: metaworld-lever-pull
1985
  metrics:
1986
  - type: total_reward
1987
- value: 250.18 +/- 228.59
1988
  name: Total reward
1989
  - type: expert_normalized_total_reward
1990
- value: 0.34 +/- 0.41
1991
  name: Expert normalized total reward
1992
  - task:
1993
  type: reinforcement-learning
@@ -1997,10 +1997,10 @@ model-index:
1997
  type: metaworld-peg-insert-side
1998
  metrics:
1999
  - type: total_reward
2000
- value: 288.02 +/- 157.87
2001
  name: Total reward
2002
  - type: expert_normalized_total_reward
2003
- value: 0.91 +/- 0.50
2004
  name: Expert normalized total reward
2005
  - task:
2006
  type: reinforcement-learning
@@ -2010,10 +2010,10 @@ model-index:
2010
  type: metaworld-peg-unplug-side
2011
  metrics:
2012
  - type: total_reward
2013
- value: 68.48 +/- 125.34
2014
  name: Total reward
2015
  - type: expert_normalized_total_reward
2016
- value: 0.14 +/- 0.28
2017
  name: Expert normalized total reward
2018
  - task:
2019
  type: reinforcement-learning
@@ -2036,10 +2036,10 @@ model-index:
2036
  type: metaworld-pick-place-wall
2037
  metrics:
2038
  - type: total_reward
2039
- value: 6.87 +/- 44.99
2040
  name: Total reward
2041
  - type: expert_normalized_total_reward
2042
- value: 0.02 +/- 0.10
2043
  name: Expert normalized total reward
2044
  - task:
2045
  type: reinforcement-learning
@@ -2049,10 +2049,10 @@ model-index:
2049
  type: metaworld-pick-place
2050
  metrics:
2051
  - type: total_reward
2052
- value: 264.18 +/- 195.69
2053
  name: Total reward
2054
  - type: expert_normalized_total_reward
2055
- value: 0.63 +/- 0.47
2056
  name: Expert normalized total reward
2057
  - task:
2058
  type: reinforcement-learning
@@ -2062,10 +2062,10 @@ model-index:
2062
  type: metaworld-plate-slide-back-side
2063
  metrics:
2064
  - type: total_reward
2065
- value: 697.54 +/- 137.79
2066
  name: Total reward
2067
  - type: expert_normalized_total_reward
2068
- value: 0.95 +/- 0.20
2069
  name: Expert normalized total reward
2070
  - task:
2071
  type: reinforcement-learning
@@ -2075,7 +2075,7 @@ model-index:
2075
  type: metaworld-plate-slide-back
2076
  metrics:
2077
  - type: total_reward
2078
- value: 196.80 +/- 1.73
2079
  name: Total reward
2080
  - type: expert_normalized_total_reward
2081
  value: 0.24 +/- 0.00
@@ -2088,7 +2088,7 @@ model-index:
2088
  type: metaworld-plate-slide-side
2089
  metrics:
2090
  - type: total_reward
2091
- value: 122.61 +/- 24.52
2092
  name: Total reward
2093
  - type: expert_normalized_total_reward
2094
  value: 0.16 +/- 0.04
@@ -2101,10 +2101,10 @@ model-index:
2101
  type: metaworld-plate-slide
2102
  metrics:
2103
  - type: total_reward
2104
- value: 497.42 +/- 168.74
2105
  name: Total reward
2106
  - type: expert_normalized_total_reward
2107
- value: 0.93 +/- 0.37
2108
  name: Expert normalized total reward
2109
  - task:
2110
  type: reinforcement-learning
@@ -2114,10 +2114,10 @@ model-index:
2114
  type: metaworld-push-back
2115
  metrics:
2116
  - type: total_reward
2117
- value: 91.41 +/- 115.05
2118
  name: Total reward
2119
  - type: expert_normalized_total_reward
2120
- value: 1.08 +/- 1.37
2121
  name: Expert normalized total reward
2122
  - task:
2123
  type: reinforcement-learning
@@ -2127,10 +2127,10 @@ model-index:
2127
  type: metaworld-push-wall
2128
  metrics:
2129
  - type: total_reward
2130
- value: 116.49 +/- 208.05
2131
  name: Total reward
2132
  - type: expert_normalized_total_reward
2133
- value: 0.15 +/- 0.28
2134
  name: Expert normalized total reward
2135
  - task:
2136
  type: reinforcement-learning
@@ -2140,10 +2140,10 @@ model-index:
2140
  type: metaworld-push
2141
  metrics:
2142
  - type: total_reward
2143
- value: 604.25 +/- 261.90
2144
  name: Total reward
2145
  - type: expert_normalized_total_reward
2146
- value: 0.80 +/- 0.35
2147
  name: Expert normalized total reward
2148
  - task:
2149
  type: reinforcement-learning
@@ -2153,10 +2153,10 @@ model-index:
2153
  type: metaworld-reach-wall
2154
  metrics:
2155
  - type: total_reward
2156
- value: 634.57 +/- 231.40
2157
  name: Total reward
2158
  - type: expert_normalized_total_reward
2159
- value: 0.81 +/- 0.38
2160
  name: Expert normalized total reward
2161
  - task:
2162
  type: reinforcement-learning
@@ -2166,10 +2166,10 @@ model-index:
2166
  type: metaworld-reach
2167
  metrics:
2168
  - type: total_reward
2169
- value: 325.27 +/- 159.21
2170
  name: Total reward
2171
  - type: expert_normalized_total_reward
2172
- value: 0.33 +/- 0.30
2173
  name: Expert normalized total reward
2174
  - task:
2175
  type: reinforcement-learning
@@ -2179,10 +2179,10 @@ model-index:
2179
  type: metaworld-shelf-place
2180
  metrics:
2181
  - type: total_reward
2182
- value: 124.60 +/- 112.83
2183
  name: Total reward
2184
  - type: expert_normalized_total_reward
2185
- value: 0.52 +/- 0.47
2186
  name: Expert normalized total reward
2187
  - task:
2188
  type: reinforcement-learning
@@ -2192,10 +2192,10 @@ model-index:
2192
  type: metaworld-soccer
2193
  metrics:
2194
  - type: total_reward
2195
- value: 364.50 +/- 175.45
2196
  name: Total reward
2197
  - type: expert_normalized_total_reward
2198
- value: 0.97 +/- 0.47
2199
  name: Expert normalized total reward
2200
  - task:
2201
  type: reinforcement-learning
@@ -2205,10 +2205,10 @@ model-index:
2205
  type: metaworld-stick-pull
2206
  metrics:
2207
  - type: total_reward
2208
- value: 398.64 +/- 205.60
2209
  name: Total reward
2210
  - type: expert_normalized_total_reward
2211
- value: 0.76 +/- 0.39
2212
  name: Expert normalized total reward
2213
  - task:
2214
  type: reinforcement-learning
@@ -2218,10 +2218,10 @@ model-index:
2218
  type: metaworld-stick-push
2219
  metrics:
2220
  - type: total_reward
2221
- value: 158.29 +/- 264.59
2222
  name: Total reward
2223
  - type: expert_normalized_total_reward
2224
- value: 0.25 +/- 0.42
2225
  name: Expert normalized total reward
2226
  - task:
2227
  type: reinforcement-learning
@@ -2231,10 +2231,10 @@ model-index:
2231
  type: metaworld-sweep-into
2232
  metrics:
2233
  - type: total_reward
2234
- value: 775.30 +/- 119.00
2235
  name: Total reward
2236
  - type: expert_normalized_total_reward
2237
- value: 0.97 +/- 0.15
2238
  name: Expert normalized total reward
2239
  - task:
2240
  type: reinforcement-learning
@@ -2244,10 +2244,10 @@ model-index:
2244
  type: metaworld-sweep
2245
  metrics:
2246
  - type: total_reward
2247
- value: 15.64 +/- 9.29
2248
  name: Total reward
2249
  - type: expert_normalized_total_reward
2250
- value: 0.01 +/- 0.02
2251
  name: Expert normalized total reward
2252
  - task:
2253
  type: reinforcement-learning
@@ -2257,10 +2257,10 @@ model-index:
2257
  type: metaworld-window-close
2258
  metrics:
2259
  - type: total_reward
2260
- value: 423.33 +/- 203.92
2261
  name: Total reward
2262
  - type: expert_normalized_total_reward
2263
- value: 0.69 +/- 0.38
2264
  name: Expert normalized total reward
2265
  - task:
2266
  type: reinforcement-learning
@@ -2270,10 +2270,10 @@ model-index:
2270
  type: metaworld-window-open
2271
  metrics:
2272
  - type: total_reward
2273
- value: 593.10 +/- 54.83
2274
  name: Total reward
2275
  - type: expert_normalized_total_reward
2276
- value: 1.00 +/- 0.10
2277
  name: Expert normalized total reward
2278
  - task:
2279
  type: reinforcement-learning
@@ -2283,10 +2283,10 @@ model-index:
2283
  type: mujoco-ant
2284
  metrics:
2285
  - type: total_reward
2286
- value: 5268.02 +/- 1495.39
2287
  name: Total reward
2288
  - type: expert_normalized_total_reward
2289
- value: 0.90 +/- 0.25
2290
  name: Expert normalized total reward
2291
  - task:
2292
  type: reinforcement-learning
@@ -2296,10 +2296,10 @@ model-index:
2296
  type: mujoco-doublependulum
2297
  metrics:
2298
  - type: total_reward
2299
- value: 4750.14 +/- 931.20
2300
  name: Total reward
2301
  - type: expert_normalized_total_reward
2302
- value: 0.51 +/- 0.10
2303
  name: Expert normalized total reward
2304
  - task:
2305
  type: reinforcement-learning
@@ -2309,10 +2309,10 @@ model-index:
2309
  type: mujoco-halfcheetah
2310
  metrics:
2311
  - type: total_reward
2312
- value: 6659.69 +/- 409.71
2313
  name: Total reward
2314
  - type: expert_normalized_total_reward
2315
- value: 0.90 +/- 0.05
2316
  name: Expert normalized total reward
2317
  - task:
2318
  type: reinforcement-learning
@@ -2322,10 +2322,10 @@ model-index:
2322
  type: mujoco-hopper
2323
  metrics:
2324
  - type: total_reward
2325
- value: 1835.93 +/- 532.21
2326
  name: Total reward
2327
  - type: expert_normalized_total_reward
2328
- value: 0.99 +/- 0.29
2329
  name: Expert normalized total reward
2330
  - task:
2331
  type: reinforcement-learning
@@ -2335,7 +2335,7 @@ model-index:
2335
  type: mujoco-humanoid
2336
  metrics:
2337
  - type: total_reward
2338
- value: 697.44 +/- 108.06
2339
  name: Total reward
2340
  - type: expert_normalized_total_reward
2341
  value: 0.09 +/- 0.02
@@ -2348,10 +2348,10 @@ model-index:
2348
  type: mujoco-pendulum
2349
  metrics:
2350
  - type: total_reward
2351
- value: 116.34 +/- 20.19
2352
  name: Total reward
2353
  - type: expert_normalized_total_reward
2354
- value: 0.23 +/- 0.04
2355
  name: Expert normalized total reward
2356
  - task:
2357
  type: reinforcement-learning
@@ -2361,10 +2361,10 @@ model-index:
2361
  type: mujoco-pusher
2362
  metrics:
2363
  - type: total_reward
2364
- value: -26.33 +/- 6.32
2365
  name: Total reward
2366
  - type: expert_normalized_total_reward
2367
- value: 0.99 +/- 0.05
2368
  name: Expert normalized total reward
2369
  - task:
2370
  type: reinforcement-learning
@@ -2374,10 +2374,10 @@ model-index:
2374
  type: mujoco-reacher
2375
  metrics:
2376
  - type: total_reward
2377
- value: -6.06 +/- 2.64
2378
  name: Total reward
2379
  - type: expert_normalized_total_reward
2380
- value: 0.99 +/- 0.07
2381
  name: Expert normalized total reward
2382
  - task:
2383
  type: reinforcement-learning
@@ -2387,10 +2387,10 @@ model-index:
2387
  type: mujoco-standup
2388
  metrics:
2389
  - type: total_reward
2390
- value: 118125.15 +/- 24880.28
2391
  name: Total reward
2392
  - type: expert_normalized_total_reward
2393
- value: 0.35 +/- 0.10
2394
  name: Expert normalized total reward
2395
  - task:
2396
  type: reinforcement-learning
@@ -2400,10 +2400,10 @@ model-index:
2400
  type: mujoco-swimmer
2401
  metrics:
2402
  - type: total_reward
2403
- value: 93.26 +/- 3.78
2404
  name: Total reward
2405
  - type: expert_normalized_total_reward
2406
- value: 1.01 +/- 0.04
2407
  name: Expert normalized total reward
2408
  - task:
2409
  type: reinforcement-learning
@@ -2413,10 +2413,10 @@ model-index:
2413
  type: mujoco-walker
2414
  metrics:
2415
  - type: total_reward
2416
- value: 4662.43 +/- 762.67
2417
  name: Total reward
2418
  - type: expert_normalized_total_reward
2419
- value: 1.01 +/- 0.16
2420
  name: Expert normalized total reward
2421
  ---
2422
 
@@ -2440,7 +2440,8 @@ This is a multi-modal and multi-task model.
2440
  ## Training
2441
 
2442
  <details>
2443
- <summary>The model was trained on the following tasks:</summary>
 
2444
  - Alien
2445
  - Amidar
2446
  - Assault
@@ -2610,4 +2611,3 @@ from transformers import AutoModelForCausalLM
2610
 
2611
  model = AutoModelForCausalLM.from_pretrained("jat-project/jat")
2612
  ```
2613
-
 
174
  value: 0.14 [0.14, 0.15]
175
  name: IQM expert normalized total reward
176
  - type: iqm_human_normalized_total_reward
177
+ value: 0.38 [0.37, 0.39]
178
  name: IQM human normalized total reward
179
  - task:
180
  type: reinforcement-learning
 
194
  type: metaworld
195
  metrics:
196
  - type: iqm_expert_normalized_total_reward
197
+ value: 0.65 [0.64, 0.67]
198
  name: IQM expert normalized total reward
199
  - task:
200
  type: reinforcement-learning
 
204
  type: mujoco
205
  metrics:
206
  - type: iqm_expert_normalized_total_reward
207
+ value: 0.85 [0.83, 0.86]
208
  name: IQM expert normalized total reward
209
  - task:
210
  type: reinforcement-learning
 
214
  type: atari-alien
215
  metrics:
216
  - type: total_reward
217
+ value: 1518.70 +/- 568.14
218
  name: Total reward
219
  - type: expert_normalized_total_reward
220
+ value: 0.08 +/- 0.03
221
  name: Expert normalized total reward
222
  - type: human_normalized_total_reward
223
+ value: 0.19 +/- 0.08
224
  name: Human normalized total reward
225
  - task:
226
  type: reinforcement-learning
 
230
  type: atari-amidar
231
  metrics:
232
  - type: total_reward
233
+ value: 89.17 +/- 78.73
234
  name: Total reward
235
  - type: expert_normalized_total_reward
236
+ value: 0.04 +/- 0.04
237
  name: Expert normalized total reward
238
  - type: human_normalized_total_reward
239
+ value: 0.05 +/- 0.05
240
  name: Human normalized total reward
241
  - task:
242
  type: reinforcement-learning
 
246
  type: atari-assault
247
  metrics:
248
  - type: total_reward
249
+ value: 1676.91 +/- 780.73
250
  name: Total reward
251
  - type: expert_normalized_total_reward
252
  value: 0.09 +/- 0.05
253
  name: Expert normalized total reward
254
  - type: human_normalized_total_reward
255
+ value: 2.80 +/- 1.50
256
  name: Human normalized total reward
257
  - task:
258
  type: reinforcement-learning
 
262
  type: atari-asterix
263
  metrics:
264
  - type: total_reward
265
+ value: 844.50 +/- 546.85
266
  name: Total reward
267
  - type: expert_normalized_total_reward
268
+ value: 0.18 +/- 0.16
269
  name: Expert normalized total reward
270
  - type: human_normalized_total_reward
271
+ value: 0.08 +/- 0.07
272
  name: Human normalized total reward
273
  - task:
274
  type: reinforcement-learning
 
278
  type: atari-asteroids
279
  metrics:
280
  - type: total_reward
281
+ value: 1357.90 +/- 453.01
282
  name: Total reward
283
  - type: expert_normalized_total_reward
284
  value: 0.00 +/- 0.00
 
294
  type: atari-atlantis
295
  metrics:
296
  - type: total_reward
297
+ value: 51843.00 +/- 123857.07
298
  name: Total reward
299
  - type: expert_normalized_total_reward
300
+ value: 0.13 +/- 0.40
301
  name: Expert normalized total reward
302
  - type: human_normalized_total_reward
303
+ value: 2.41 +/- 7.66
304
  name: Human normalized total reward
305
  - task:
306
  type: reinforcement-learning
 
310
  type: atari-bankheist
311
  metrics:
312
  - type: total_reward
313
+ value: 977.80 +/- 156.49
314
  name: Total reward
315
  - type: expert_normalized_total_reward
316
+ value: 0.74 +/- 0.12
317
  name: Expert normalized total reward
318
  - type: human_normalized_total_reward
319
+ value: 1.30 +/- 0.21
320
  name: Human normalized total reward
321
  - task:
322
  type: reinforcement-learning
 
326
  type: atari-battlezone
327
  metrics:
328
  - type: total_reward
329
+ value: 16780.00 +/- 6926.15
330
  name: Total reward
331
  - type: expert_normalized_total_reward
332
  value: 0.06 +/- 0.02
333
  name: Expert normalized total reward
334
  - type: human_normalized_total_reward
335
+ value: 0.45 +/- 0.19
336
  name: Human normalized total reward
337
  - task:
338
  type: reinforcement-learning
 
342
  type: atari-beamrider
343
  metrics:
344
  - type: total_reward
345
+ value: 768.36 +/- 364.06
346
  name: Total reward
347
  - type: expert_normalized_total_reward
348
  value: 0.01 +/- 0.01
349
  name: Expert normalized total reward
350
  - type: human_normalized_total_reward
351
+ value: 0.02 +/- 0.02
352
  name: Human normalized total reward
353
  - task:
354
  type: reinforcement-learning
 
358
  type: atari-berzerk
359
  metrics:
360
  - type: total_reward
361
+ value: 616.20 +/- 296.08
362
  name: Total reward
363
  - type: expert_normalized_total_reward
364
  value: 0.01 +/- 0.01
365
  name: Expert normalized total reward
366
  - type: human_normalized_total_reward
367
+ value: 0.20 +/- 0.12
368
  name: Human normalized total reward
369
  - task:
370
  type: reinforcement-learning
 
374
  type: atari-bowling
375
  metrics:
376
  - type: total_reward
377
+ value: 22.32 +/- 5.18
378
  name: Total reward
379
  - type: expert_normalized_total_reward
380
  value: 1.00 +/- 0.00
 
390
  type: atari-boxing
391
  metrics:
392
  - type: total_reward
393
+ value: 92.31 +/- 18.24
394
  name: Total reward
395
  - type: expert_normalized_total_reward
396
+ value: 0.94 +/- 0.19
397
  name: Expert normalized total reward
398
  - type: human_normalized_total_reward
399
+ value: 7.68 +/- 1.52
400
  name: Human normalized total reward
401
  - task:
402
  type: reinforcement-learning
 
406
  type: atari-breakout
407
  metrics:
408
  - type: total_reward
409
+ value: 7.93 +/- 5.66
410
  name: Total reward
411
  - type: expert_normalized_total_reward
412
  value: 0.01 +/- 0.01
413
  name: Expert normalized total reward
414
  - type: human_normalized_total_reward
415
+ value: 0.22 +/- 0.20
416
  name: Human normalized total reward
417
  - task:
418
  type: reinforcement-learning
 
422
  type: atari-centipede
423
  metrics:
424
  - type: total_reward
425
+ value: 5888.27 +/- 2594.62
426
  name: Total reward
427
  - type: expert_normalized_total_reward
428
+ value: 0.40 +/- 0.27
429
  name: Expert normalized total reward
430
  - type: human_normalized_total_reward
431
+ value: 0.38 +/- 0.26
432
  name: Human normalized total reward
433
  - task:
434
  type: reinforcement-learning
 
438
  type: atari-choppercommand
439
  metrics:
440
  - type: total_reward
441
+ value: 2371.00 +/- 1195.43
442
  name: Total reward
443
  - type: expert_normalized_total_reward
444
+ value: 0.02 +/- 0.01
445
  name: Expert normalized total reward
446
  - type: human_normalized_total_reward
447
+ value: 0.24 +/- 0.18
448
  name: Human normalized total reward
449
  - task:
450
  type: reinforcement-learning
 
454
  type: atari-crazyclimber
455
  metrics:
456
  - type: total_reward
457
+ value: 97145.00 +/- 30388.04
458
  name: Total reward
459
  - type: expert_normalized_total_reward
460
+ value: 0.51 +/- 0.18
461
  name: Expert normalized total reward
462
  - type: human_normalized_total_reward
463
+ value: 3.45 +/- 1.21
464
  name: Human normalized total reward
465
  - task:
466
  type: reinforcement-learning
 
470
  type: atari-defender
471
  metrics:
472
  - type: total_reward
473
+ value: 39317.50 +/- 16246.15
474
  name: Total reward
475
  - type: expert_normalized_total_reward
476
+ value: 0.10 +/- 0.05
477
  name: Expert normalized total reward
478
  - type: human_normalized_total_reward
479
+ value: 2.30 +/- 1.03
480
  name: Human normalized total reward
481
  - task:
482
  type: reinforcement-learning
 
486
  type: atari-demonattack
487
  metrics:
488
  - type: total_reward
489
+ value: 795.10 +/- 982.55
490
  name: Total reward
491
  - type: expert_normalized_total_reward
492
  value: 0.01 +/- 0.01
493
  name: Expert normalized total reward
494
  - type: human_normalized_total_reward
495
+ value: 0.35 +/- 0.54
496
  name: Human normalized total reward
497
  - task:
498
  type: reinforcement-learning
 
502
  type: atari-doubledunk
503
  metrics:
504
  - type: total_reward
505
+ value: 13.40 +/- 11.07
506
  name: Total reward
507
  - type: expert_normalized_total_reward
508
+ value: 0.81 +/- 0.28
509
  name: Expert normalized total reward
510
  - type: human_normalized_total_reward
511
+ value: 0.91 +/- 0.32
512
  name: Human normalized total reward
513
  - task:
514
  type: reinforcement-learning
 
518
  type: atari-enduro
519
  metrics:
520
  - type: total_reward
521
+ value: 103.11 +/- 28.05
522
  name: Total reward
523
  - type: expert_normalized_total_reward
524
+ value: 0.04 +/- 0.01
525
  name: Expert normalized total reward
526
  - type: human_normalized_total_reward
527
+ value: 0.12 +/- 0.03
528
  name: Human normalized total reward
529
  - task:
530
  type: reinforcement-learning
 
534
  type: atari-fishingderby
535
  metrics:
536
  - type: total_reward
537
+ value: -31.67 +/- 22.54
538
  name: Total reward
539
  - type: expert_normalized_total_reward
540
+ value: 0.61 +/- 0.23
541
  name: Expert normalized total reward
542
  - type: human_normalized_total_reward
543
+ value: 0.46 +/- 0.17
544
  name: Human normalized total reward
545
  - task:
546
  type: reinforcement-learning
 
550
  type: atari-freeway
551
  metrics:
552
  - type: total_reward
553
+ value: 27.57 +/- 1.87
554
  name: Total reward
555
  - type: expert_normalized_total_reward
556
+ value: 0.81 +/- 0.06
557
  name: Expert normalized total reward
558
  - type: human_normalized_total_reward
559
  value: 0.93 +/- 0.06
 
566
  type: atari-frostbite
567
  metrics:
568
  - type: total_reward
569
+ value: 2875.60 +/- 1679.84
570
  name: Total reward
571
  - type: expert_normalized_total_reward
572
+ value: 0.21 +/- 0.13
573
  name: Expert normalized total reward
574
  - type: human_normalized_total_reward
575
+ value: 0.66 +/- 0.39
576
  name: Human normalized total reward
577
  - task:
578
  type: reinforcement-learning
 
582
  type: atari-gopher
583
  metrics:
584
  - type: total_reward
585
+ value: 5508.80 +/- 2802.03
586
  name: Total reward
587
  - type: expert_normalized_total_reward
588
  value: 0.06 +/- 0.03
589
  name: Expert normalized total reward
590
  - type: human_normalized_total_reward
591
+ value: 2.44 +/- 1.30
592
  name: Human normalized total reward
593
  - task:
594
  type: reinforcement-learning
 
598
  type: atari-gravitar
599
  metrics:
600
  - type: total_reward
601
+ value: 1330.50 +/- 918.23
602
  name: Total reward
603
  - type: expert_normalized_total_reward
604
+ value: 0.30 +/- 0.24
605
  name: Expert normalized total reward
606
  - type: human_normalized_total_reward
607
+ value: 0.36 +/- 0.29
608
  name: Human normalized total reward
609
  - task:
610
  type: reinforcement-learning
 
614
  type: atari-hero
615
  metrics:
616
  - type: total_reward
617
+ value: 11932.00 +/- 3036.87
618
  name: Total reward
619
  - type: expert_normalized_total_reward
620
+ value: 0.25 +/- 0.07
621
  name: Expert normalized total reward
622
  - type: human_normalized_total_reward
623
+ value: 0.37 +/- 0.10
624
  name: Human normalized total reward
625
  - task:
626
  type: reinforcement-learning
 
630
  type: atari-icehockey
631
  metrics:
632
  - type: total_reward
633
+ value: 7.61 +/- 5.28
634
  name: Total reward
635
  - type: expert_normalized_total_reward
636
+ value: 0.52 +/- 0.15
637
  name: Expert normalized total reward
638
  - type: human_normalized_total_reward
639
+ value: 1.55 +/- 0.44
640
  name: Human normalized total reward
641
  - task:
642
  type: reinforcement-learning
 
646
  type: atari-jamesbond
647
  metrics:
648
  - type: total_reward
649
+ value: 425.00 +/- 632.51
650
  name: Total reward
651
  - type: expert_normalized_total_reward
652
+ value: 0.01 +/- 0.02
653
  name: Expert normalized total reward
654
  - type: human_normalized_total_reward
655
+ value: 1.45 +/- 2.31
656
  name: Human normalized total reward
657
  - task:
658
  type: reinforcement-learning
 
662
  type: atari-kangaroo
663
  metrics:
664
  - type: total_reward
665
+ value: 375.00 +/- 314.13
666
  name: Total reward
667
  - type: expert_normalized_total_reward
668
+ value: 0.62 +/- 0.60
669
  name: Expert normalized total reward
670
  - type: human_normalized_total_reward
671
+ value: 0.11 +/- 0.11
672
  name: Human normalized total reward
673
  - task:
674
  type: reinforcement-learning
 
678
  type: atari-krull
679
  metrics:
680
  - type: total_reward
681
+ value: 10743.30 +/- 1311.26
682
  name: Total reward
683
  - type: expert_normalized_total_reward
684
  value: 0.93 +/- 0.13
685
  name: Expert normalized total reward
686
  - type: human_normalized_total_reward
687
+ value: 8.57 +/- 1.23
688
  name: Human normalized total reward
689
  - task:
690
  type: reinforcement-learning
 
694
  type: atari-kungfumaster
695
  metrics:
696
  - type: total_reward
697
+ value: 253.00 +/- 233.86
698
  name: Total reward
699
  - type: expert_normalized_total_reward
700
+ value: -0.00 +/- 0.01
701
  name: Expert normalized total reward
702
  - type: human_normalized_total_reward
703
+ value: -0.00 +/- 0.01
704
  name: Human normalized total reward
705
  - task:
706
  type: reinforcement-learning
 
726
  type: atari-mspacman
727
  metrics:
728
  - type: total_reward
729
+ value: 1610.10 +/- 504.08
730
  name: Total reward
731
  - type: expert_normalized_total_reward
732
+ value: 0.20 +/- 0.08
733
  name: Expert normalized total reward
734
  - type: human_normalized_total_reward
735
+ value: 0.20 +/- 0.08
736
  name: Human normalized total reward
737
  - task:
738
  type: reinforcement-learning
 
742
  type: atari-namethisgame
743
  metrics:
744
  - type: total_reward
745
+ value: 7726.40 +/- 2166.18
746
  name: Total reward
747
  - type: expert_normalized_total_reward
748
+ value: 0.26 +/- 0.10
749
  name: Expert normalized total reward
750
  - type: human_normalized_total_reward
751
+ value: 0.94 +/- 0.38
752
  name: Human normalized total reward
753
  - task:
754
  type: reinforcement-learning
 
758
  type: atari-phoenix
759
  metrics:
760
  - type: total_reward
761
+ value: 1814.20 +/- 1275.29
762
  name: Total reward
763
  - type: expert_normalized_total_reward
764
  value: 0.00 +/- 0.00
765
  name: Expert normalized total reward
766
  - type: human_normalized_total_reward
767
+ value: 0.16 +/- 0.20
768
  name: Human normalized total reward
769
  - task:
770
  type: reinforcement-learning
 
774
  type: atari-pitfall
775
  metrics:
776
  - type: total_reward
777
+ value: -4.61 +/- 15.86
778
  name: Total reward
779
  - type: expert_normalized_total_reward
780
+ value: 0.99 +/- 0.07
781
  name: Expert normalized total reward
782
  - type: human_normalized_total_reward
783
  value: 0.03 +/- 0.00
 
790
  type: atari-pong
791
  metrics:
792
  - type: total_reward
793
+ value: 16.54 +/- 10.34
794
  name: Total reward
795
  - type: expert_normalized_total_reward
796
+ value: 0.89 +/- 0.25
797
  name: Expert normalized total reward
798
  - type: human_normalized_total_reward
799
+ value: 1.05 +/- 0.29
800
  name: Human normalized total reward
801
  - task:
802
  type: reinforcement-learning
 
822
  type: atari-qbert
823
  metrics:
824
  - type: total_reward
825
+ value: 2118.50 +/- 2764.25
826
  name: Total reward
827
  - type: expert_normalized_total_reward
828
+ value: 0.05 +/- 0.06
829
  name: Expert normalized total reward
830
  - type: human_normalized_total_reward
831
+ value: 0.15 +/- 0.21
832
  name: Human normalized total reward
833
  - task:
834
  type: reinforcement-learning
 
838
  type: atari-riverraid
839
  metrics:
840
  - type: total_reward
841
+ value: 3925.20 +/- 1530.94
842
  name: Total reward
843
  - type: expert_normalized_total_reward
844
+ value: 0.19 +/- 0.11
845
  name: Expert normalized total reward
846
  - type: human_normalized_total_reward
847
+ value: 0.16 +/- 0.10
848
  name: Human normalized total reward
849
  - task:
850
  type: reinforcement-learning
 
854
  type: atari-roadrunner
855
  metrics:
856
  - type: total_reward
857
+ value: 6929.00 +/- 5577.35
858
  name: Total reward
859
  - type: expert_normalized_total_reward
860
+ value: 0.09 +/- 0.07
861
  name: Expert normalized total reward
862
  - type: human_normalized_total_reward
863
+ value: 0.88 +/- 0.71
864
  name: Human normalized total reward
865
  - task:
866
  type: reinforcement-learning
 
870
  type: atari-robotank
871
  metrics:
872
  - type: total_reward
873
+ value: 10.22 +/- 4.71
874
  name: Total reward
875
  - type: expert_normalized_total_reward
876
+ value: 0.10 +/- 0.06
877
  name: Expert normalized total reward
878
  - type: human_normalized_total_reward
879
+ value: 0.83 +/- 0.49
880
  name: Human normalized total reward
881
  - task:
882
  type: reinforcement-learning
 
886
  type: atari-seaquest
887
  metrics:
888
  - type: total_reward
889
+ value: 859.80 +/- 407.80
890
  name: Total reward
891
  - type: expert_normalized_total_reward
892
+ value: 0.31 +/- 0.16
893
  name: Expert normalized total reward
894
  - type: human_normalized_total_reward
895
  value: 0.02 +/- 0.01
 
902
  type: atari-skiing
903
  metrics:
904
  - type: total_reward
905
+ value: -15960.04 +/- 5887.52
906
  name: Total reward
907
  - type: expert_normalized_total_reward
908
+ value: 0.18 +/- 0.93
909
  name: Expert normalized total reward
910
  - type: human_normalized_total_reward
911
+ value: 0.09 +/- 0.46
912
  name: Human normalized total reward
913
  - task:
914
  type: reinforcement-learning
 
918
  type: atari-solaris
919
  metrics:
920
  - type: total_reward
921
+ value: 1202.60 +/- 445.27
922
  name: Total reward
923
  - type: expert_normalized_total_reward
924
+ value: -0.29 +/- 3.79
925
  name: Expert normalized total reward
926
  - type: human_normalized_total_reward
927
+ value: -0.00 +/- 0.04
928
  name: Human normalized total reward
929
  - task:
930
  type: reinforcement-learning
 
934
  type: atari-spaceinvaders
935
  metrics:
936
  - type: total_reward
937
+ value: 326.85 +/- 141.89
938
  name: Total reward
939
  - type: expert_normalized_total_reward
940
+ value: 0.01 +/- 0.00
941
  name: Expert normalized total reward
942
  - type: human_normalized_total_reward
943
+ value: 0.12 +/- 0.09
944
  name: Human normalized total reward
945
  - task:
946
  type: reinforcement-learning
 
950
  type: atari-stargunner
951
  metrics:
952
  - type: total_reward
953
+ value: 5219.00 +/- 3544.03
954
  name: Total reward
955
  - type: expert_normalized_total_reward
956
  value: 0.01 +/- 0.01
957
  name: Expert normalized total reward
958
  - type: human_normalized_total_reward
959
+ value: 0.48 +/- 0.37
960
  name: Human normalized total reward
961
  - task:
962
  type: reinforcement-learning
 
966
  type: atari-surround
967
  metrics:
968
  - type: total_reward
969
+ value: 1.52 +/- 4.60
970
  name: Total reward
971
  - type: expert_normalized_total_reward
972
+ value: 0.59 +/- 0.24
973
  name: Expert normalized total reward
974
  - type: human_normalized_total_reward
975
+ value: 0.70 +/- 0.28
976
  name: Human normalized total reward
977
  - task:
978
  type: reinforcement-learning
 
982
  type: atari-tennis
983
  metrics:
984
  - type: total_reward
985
+ value: -12.80 +/- 3.70
986
  name: Total reward
987
  - type: expert_normalized_total_reward
988
+ value: 0.32 +/- 0.11
989
  name: Expert normalized total reward
990
  - type: human_normalized_total_reward
991
+ value: 0.34 +/- 0.12
992
  name: Human normalized total reward
993
  - task:
994
  type: reinforcement-learning
 
998
  type: atari-timepilot
999
  metrics:
1000
  - type: total_reward
1001
+ value: 11603.00 +/- 4323.25
1002
  name: Total reward
1003
  - type: expert_normalized_total_reward
1004
+ value: 0.12 +/- 0.07
1005
  name: Expert normalized total reward
1006
  - type: human_normalized_total_reward
1007
+ value: 4.84 +/- 2.60
1008
  name: Human normalized total reward
1009
  - task:
1010
  type: reinforcement-learning
 
1014
  type: atari-tutankham
1015
  metrics:
1016
  - type: total_reward
1017
+ value: 108.82 +/- 70.14
1018
  name: Total reward
1019
  - type: expert_normalized_total_reward
1020
+ value: 0.35 +/- 0.25
1021
  name: Expert normalized total reward
1022
  - type: human_normalized_total_reward
1023
+ value: 0.62 +/- 0.45
1024
  name: Human normalized total reward
1025
  - task:
1026
  type: reinforcement-learning
 
1030
  type: atari-upndown
1031
  metrics:
1032
  - type: total_reward
1033
+ value: 19074.60 +/- 9961.77
1034
  name: Total reward
1035
  - type: expert_normalized_total_reward
1036
  value: 0.04 +/- 0.02
1037
  name: Expert normalized total reward
1038
  - type: human_normalized_total_reward
1039
+ value: 1.66 +/- 0.89
1040
  name: Human normalized total reward
1041
  - task:
1042
  type: reinforcement-learning
 
1062
  type: atari-videopinball
1063
  metrics:
1064
  - type: total_reward
1065
+ value: 12466.69 +/- 8723.07
1066
  name: Total reward
1067
  - type: expert_normalized_total_reward
1068
  value: 0.03 +/- 0.02
1069
  name: Expert normalized total reward
1070
  - type: human_normalized_total_reward
1071
+ value: 0.71 +/- 0.49
1072
  name: Human normalized total reward
1073
  - task:
1074
  type: reinforcement-learning
 
1078
  type: atari-wizardofwor
1079
  metrics:
1080
  - type: total_reward
1081
+ value: 2231.00 +/- 2042.92
1082
  name: Total reward
1083
  - type: expert_normalized_total_reward
1084
+ value: 0.03 +/- 0.04
1085
  name: Expert normalized total reward
1086
  - type: human_normalized_total_reward
1087
+ value: 0.40 +/- 0.49
1088
  name: Human normalized total reward
1089
  - task:
1090
  type: reinforcement-learning
 
1094
  type: atari-yarsrevenge
1095
  metrics:
1096
  - type: total_reward
1097
+ value: 11190.85 +/- 7342.58
1098
  name: Total reward
1099
  - type: expert_normalized_total_reward
1100
+ value: 0.03 +/- 0.03
1101
  name: Expert normalized total reward
1102
  - type: human_normalized_total_reward
1103
+ value: 0.16 +/- 0.14
1104
  name: Human normalized total reward
1105
  - task:
1106
  type: reinforcement-learning
 
1110
  type: atari-zaxxon
1111
  metrics:
1112
  - type: total_reward
1113
+ value: 5976.00 +/- 2889.54
1114
  name: Total reward
1115
  - type: expert_normalized_total_reward
1116
+ value: 0.08 +/- 0.04
1117
  name: Expert normalized total reward
1118
  - type: human_normalized_total_reward
1119
+ value: 0.65 +/- 0.32
1120
  name: Human normalized total reward
1121
  - task:
1122
  type: reinforcement-learning
 
1126
  type: babyai-action-obj-door
1127
  metrics:
1128
  - type: total_reward
1129
+ value: 0.92 +/- 0.22
1130
  name: Total reward
1131
  - type: expert_normalized_total_reward
1132
+ value: 0.88 +/- 0.36
1133
  name: Expert normalized total reward
1134
  - task:
1135
  type: reinforcement-learning
 
1152
  type: babyai-boss-level-no-unlock
1153
  metrics:
1154
  - type: total_reward
1155
+ value: 0.49 +/- 0.43
1156
  name: Total reward
1157
  - type: expert_normalized_total_reward
1158
+ value: 0.49 +/- 0.49
1159
  name: Expert normalized total reward
1160
  - task:
1161
  type: reinforcement-learning
 
1165
  type: babyai-boss-level
1166
  metrics:
1167
  - type: total_reward
1168
+ value: 0.54 +/- 0.43
1169
  name: Total reward
1170
  - type: expert_normalized_total_reward
1171
+ value: 0.54 +/- 0.49
1172
  name: Expert normalized total reward
1173
  - task:
1174
  type: reinforcement-learning
 
1178
  type: babyai-find-obj-s5
1179
  metrics:
1180
  - type: total_reward
1181
+ value: 0.94 +/- 0.04
1182
  name: Total reward
1183
  - type: expert_normalized_total_reward
1184
  value: 1.00 +/- 0.04
 
1191
  type: babyai-go-to-door
1192
  metrics:
1193
  - type: total_reward
1194
+ value: 0.99 +/- 0.02
1195
  name: Total reward
1196
  - type: expert_normalized_total_reward
1197
+ value: 1.00 +/- 0.03
1198
  name: Expert normalized total reward
1199
  - task:
1200
  type: reinforcement-learning
 
1204
  type: babyai-go-to-imp-unlock
1205
  metrics:
1206
  - type: total_reward
1207
+ value: 0.53 +/- 0.41
1208
  name: Total reward
1209
  - type: expert_normalized_total_reward
1210
+ value: 0.60 +/- 0.55
1211
  name: Expert normalized total reward
1212
  - task:
1213
  type: reinforcement-learning
 
1217
  type: babyai-go-to-local
1218
  metrics:
1219
  - type: total_reward
1220
+ value: 0.87 +/- 0.16
1221
  name: Total reward
1222
  - type: expert_normalized_total_reward
1223
+ value: 0.93 +/- 0.22
1224
  name: Expert normalized total reward
1225
  - task:
1226
  type: reinforcement-learning
 
1233
  value: 0.98 +/- 0.04
1234
  name: Total reward
1235
  - type: expert_normalized_total_reward
1236
+ value: 0.98 +/- 0.08
1237
  name: Expert normalized total reward
1238
  - task:
1239
  type: reinforcement-learning
 
1243
  type: babyai-go-to-obj
1244
  metrics:
1245
  - type: total_reward
1246
+ value: 0.94 +/- 0.03
1247
  name: Total reward
1248
  - type: expert_normalized_total_reward
1249
+ value: 1.01 +/- 0.03
1250
  name: Expert normalized total reward
1251
  - task:
1252
  type: reinforcement-learning
 
1256
  type: babyai-go-to-red-ball-grey
1257
  metrics:
1258
  - type: total_reward
1259
+ value: 0.92 +/- 0.05
1260
  name: Total reward
1261
  - type: expert_normalized_total_reward
1262
+ value: 1.00 +/- 0.06
1263
  name: Expert normalized total reward
1264
  - task:
1265
  type: reinforcement-learning
 
1272
  value: 0.93 +/- 0.03
1273
  name: Total reward
1274
  - type: expert_normalized_total_reward
1275
+ value: 1.00 +/- 0.03
1276
  name: Expert normalized total reward
1277
  - task:
1278
  type: reinforcement-learning
 
1282
  type: babyai-go-to-red-ball
1283
  metrics:
1284
  - type: total_reward
1285
+ value: 0.91 +/- 0.09
1286
  name: Total reward
1287
  - type: expert_normalized_total_reward
1288
+ value: 0.98 +/- 0.12
1289
  name: Expert normalized total reward
1290
  - task:
1291
  type: reinforcement-learning
 
1295
  type: babyai-go-to-red-blue-ball
1296
  metrics:
1297
  - type: total_reward
1298
+ value: 0.91 +/- 0.08
1299
  name: Total reward
1300
  - type: expert_normalized_total_reward
1301
+ value: 0.99 +/- 0.10
1302
  name: Expert normalized total reward
1303
  - task:
1304
  type: reinforcement-learning
 
1308
  type: babyai-go-to-seq
1309
  metrics:
1310
  - type: total_reward
1311
+ value: 0.73 +/- 0.33
1312
  name: Total reward
1313
  - type: expert_normalized_total_reward
1314
+ value: 0.76 +/- 0.38
1315
  name: Expert normalized total reward
1316
  - task:
1317
  type: reinforcement-learning
 
1321
  type: babyai-go-to
1322
  metrics:
1323
  - type: total_reward
1324
+ value: 0.78 +/- 0.28
1325
  name: Total reward
1326
  - type: expert_normalized_total_reward
1327
+ value: 0.82 +/- 0.35
1328
  name: Expert normalized total reward
1329
  - task:
1330
  type: reinforcement-learning
 
1334
  type: babyai-key-corridor
1335
  metrics:
1336
  - type: total_reward
1337
+ value: 0.87 +/- 0.13
1338
  name: Total reward
1339
  - type: expert_normalized_total_reward
1340
+ value: 0.96 +/- 0.14
1341
  name: Expert normalized total reward
1342
  - task:
1343
  type: reinforcement-learning
 
1347
  type: babyai-mini-boss-level
1348
  metrics:
1349
  - type: total_reward
1350
+ value: 0.53 +/- 0.41
1351
  name: Total reward
1352
  - type: expert_normalized_total_reward
1353
+ value: 0.56 +/- 0.50
1354
  name: Expert normalized total reward
1355
  - task:
1356
  type: reinforcement-learning
 
1360
  type: babyai-move-two-across-s8n9
1361
  metrics:
1362
  - type: total_reward
1363
+ value: 0.05 +/- 0.19
1364
  name: Total reward
1365
  - type: expert_normalized_total_reward
1366
+ value: 0.05 +/- 0.20
1367
  name: Expert normalized total reward
1368
  - task:
1369
  type: reinforcement-learning
 
1373
  type: babyai-one-room-s8
1374
  metrics:
1375
  - type: total_reward
1376
+ value: 0.92 +/- 0.04
1377
  name: Total reward
1378
  - type: expert_normalized_total_reward
1379
  value: 1.00 +/- 0.04
 
1399
  type: babyai-open-doors-order-n4
1400
  metrics:
1401
  - type: total_reward
1402
+ value: 0.96 +/- 0.14
1403
  name: Total reward
1404
  - type: expert_normalized_total_reward
1405
+ value: 0.96 +/- 0.17
1406
  name: Expert normalized total reward
1407
  - task:
1408
  type: reinforcement-learning
 
1412
  type: babyai-open-red-door
1413
  metrics:
1414
  - type: total_reward
1415
+ value: 0.92 +/- 0.03
1416
  name: Total reward
1417
  - type: expert_normalized_total_reward
1418
  value: 1.00 +/- 0.03
 
1438
  type: babyai-open
1439
  metrics:
1440
  - type: total_reward
1441
+ value: 0.95 +/- 0.08
1442
  name: Total reward
1443
  - type: expert_normalized_total_reward
1444
+ value: 0.99 +/- 0.10
1445
  name: Expert normalized total reward
1446
  - task:
1447
  type: reinforcement-learning
 
1464
  type: babyai-pickup-dist
1465
  metrics:
1466
  - type: total_reward
1467
+ value: 0.87 +/- 0.12
1468
  name: Total reward
1469
  - type: expert_normalized_total_reward
1470
+ value: 1.02 +/- 0.16
1471
  name: Expert normalized total reward
1472
  - task:
1473
  type: reinforcement-learning
 
1477
  type: babyai-pickup-loc
1478
  metrics:
1479
  - type: total_reward
1480
+ value: 0.85 +/- 0.19
1481
  name: Total reward
1482
  - type: expert_normalized_total_reward
1483
+ value: 0.92 +/- 0.23
1484
  name: Expert normalized total reward
1485
  - task:
1486
  type: reinforcement-learning
 
1490
  type: babyai-pickup
1491
  metrics:
1492
  - type: total_reward
1493
+ value: 0.79 +/- 0.30
1494
  name: Total reward
1495
  - type: expert_normalized_total_reward
1496
+ value: 0.85 +/- 0.36
1497
  name: Expert normalized total reward
1498
  - task:
1499
  type: reinforcement-learning
 
1503
  type: babyai-put-next-local
1504
  metrics:
1505
  - type: total_reward
1506
+ value: 0.67 +/- 0.32
1507
  name: Total reward
1508
  - type: expert_normalized_total_reward
1509
+ value: 0.73 +/- 0.35
1510
  name: Expert normalized total reward
1511
  - task:
1512
  type: reinforcement-learning
 
1516
  type: babyai-put-next
1517
  metrics:
1518
  - type: total_reward
1519
+ value: 0.85 +/- 0.25
1520
  name: Total reward
1521
  - type: expert_normalized_total_reward
1522
+ value: 0.89 +/- 0.26
1523
  name: Expert normalized total reward
1524
  - task:
1525
  type: reinforcement-learning
 
1529
  type: babyai-synth-loc
1530
  metrics:
1531
  - type: total_reward
1532
+ value: 0.77 +/- 0.34
1533
  name: Total reward
1534
  - type: expert_normalized_total_reward
1535
+ value: 0.78 +/- 0.43
1536
  name: Expert normalized total reward
1537
  - task:
1538
  type: reinforcement-learning
 
1542
  type: babyai-synth-seq
1543
  metrics:
1544
  - type: total_reward
1545
+ value: 0.57 +/- 0.43
1546
  name: Total reward
1547
  - type: expert_normalized_total_reward
1548
+ value: 0.58 +/- 0.49
1549
  name: Expert normalized total reward
1550
  - task:
1551
  type: reinforcement-learning
 
1555
  type: babyai-synth
1556
  metrics:
1557
  - type: total_reward
1558
+ value: 0.75 +/- 0.35
1559
  name: Total reward
1560
  - type: expert_normalized_total_reward
1561
+ value: 0.78 +/- 0.43
1562
  name: Expert normalized total reward
1563
  - task:
1564
  type: reinforcement-learning
 
1568
  type: babyai-unblock-pickup
1569
  metrics:
1570
  - type: total_reward
1571
+ value: 0.79 +/- 0.29
1572
  name: Total reward
1573
  - type: expert_normalized_total_reward
1574
+ value: 0.86 +/- 0.35
1575
  name: Expert normalized total reward
1576
  - task:
1577
  type: reinforcement-learning
 
1594
  type: babyai-unlock-pickup
1595
  metrics:
1596
  - type: total_reward
1597
+ value: 0.75 +/- 0.03
1598
  name: Total reward
1599
  - type: expert_normalized_total_reward
1600
+ value: 1.00 +/- 0.05
1601
  name: Expert normalized total reward
1602
  - task:
1603
  type: reinforcement-learning
 
1607
  type: babyai-unlock-to-unlock
1608
  metrics:
1609
  - type: total_reward
1610
+ value: 0.85 +/- 0.31
1611
  name: Total reward
1612
  - type: expert_normalized_total_reward
1613
+ value: 0.88 +/- 0.32
1614
  name: Expert normalized total reward
1615
  - task:
1616
  type: reinforcement-learning
 
1620
  type: babyai-unlock
1621
  metrics:
1622
  - type: total_reward
1623
+ value: 0.43 +/- 0.43
1624
  name: Total reward
1625
  - type: expert_normalized_total_reward
1626
+ value: 0.48 +/- 0.52
1627
  name: Expert normalized total reward
1628
  - task:
1629
  type: reinforcement-learning
 
1633
  type: metaworld-assembly
1634
  metrics:
1635
  - type: total_reward
1636
+ value: 243.78 +/- 10.44
1637
  name: Total reward
1638
  - type: expert_normalized_total_reward
1639
+ value: 0.99 +/- 0.05
1640
  name: Expert normalized total reward
1641
  - task:
1642
  type: reinforcement-learning
 
1646
  type: metaworld-basketball
1647
  metrics:
1648
  - type: total_reward
1649
+ value: 1.71 +/- 0.63
1650
  name: Total reward
1651
  - type: expert_normalized_total_reward
1652
  value: -0.00 +/- 0.00
 
1659
  type: metaworld-bin-picking
1660
  metrics:
1661
  - type: total_reward
1662
+ value: 314.42 +/- 196.40
1663
  name: Total reward
1664
  - type: expert_normalized_total_reward
1665
+ value: 0.74 +/- 0.46
1666
  name: Expert normalized total reward
1667
  - task:
1668
  type: reinforcement-learning
 
1672
  type: metaworld-box-close
1673
  metrics:
1674
  - type: total_reward
1675
+ value: 482.86 +/- 146.37
1676
  name: Total reward
1677
  - type: expert_normalized_total_reward
1678
+ value: 0.93 +/- 0.34
1679
  name: Expert normalized total reward
1680
  - task:
1681
  type: reinforcement-learning
 
1685
  type: metaworld-button-press-topdown-wall
1686
  metrics:
1687
  - type: total_reward
1688
+ value: 268.30 +/- 82.78
1689
  name: Total reward
1690
  - type: expert_normalized_total_reward
1691
+ value: 0.51 +/- 0.18
1692
  name: Expert normalized total reward
1693
  - task:
1694
  type: reinforcement-learning
 
1698
  type: metaworld-button-press-topdown
1699
  metrics:
1700
  - type: total_reward
1701
+ value: 269.14 +/- 82.81
1702
  name: Total reward
1703
  - type: expert_normalized_total_reward
1704
+ value: 0.52 +/- 0.18
1705
  name: Expert normalized total reward
1706
  - task:
1707
  type: reinforcement-learning
 
1711
  type: metaworld-button-press-wall
1712
  metrics:
1713
  - type: total_reward
1714
+ value: 608.87 +/- 169.50
1715
  name: Total reward
1716
  - type: expert_normalized_total_reward
1717
+ value: 0.90 +/- 0.25
1718
  name: Expert normalized total reward
1719
  - task:
1720
  type: reinforcement-learning
 
1724
  type: metaworld-button-press
1725
  metrics:
1726
  - type: total_reward
1727
+ value: 624.03 +/- 73.53
1728
  name: Total reward
1729
  - type: expert_normalized_total_reward
1730
+ value: 0.97 +/- 0.12
1731
  name: Expert normalized total reward
1732
  - task:
1733
  type: reinforcement-learning
 
1737
  type: metaworld-coffee-button
1738
  metrics:
1739
  - type: total_reward
1740
+ value: 334.92 +/- 301.67
1741
  name: Total reward
1742
  - type: expert_normalized_total_reward
1743
+ value: 0.43 +/- 0.43
1744
  name: Expert normalized total reward
1745
  - task:
1746
  type: reinforcement-learning
 
1750
  type: metaworld-coffee-pull
1751
  metrics:
1752
  - type: total_reward
1753
+ value: 38.00 +/- 63.97
1754
  name: Total reward
1755
  - type: expert_normalized_total_reward
1756
+ value: 0.13 +/- 0.25
1757
  name: Expert normalized total reward
1758
  - task:
1759
  type: reinforcement-learning
 
1763
  type: metaworld-coffee-push
1764
  metrics:
1765
  - type: total_reward
1766
+ value: 151.38 +/- 207.69
1767
  name: Total reward
1768
  - type: expert_normalized_total_reward
1769
+ value: 0.30 +/- 0.42
1770
  name: Expert normalized total reward
1771
  - task:
1772
  type: reinforcement-learning
 
1776
  type: metaworld-dial-turn
1777
  metrics:
1778
  - type: total_reward
1779
+ value: 752.25 +/- 138.50
1780
  name: Total reward
1781
  - type: expert_normalized_total_reward
1782
+ value: 0.95 +/- 0.18
1783
  name: Expert normalized total reward
1784
  - task:
1785
  type: reinforcement-learning
 
1789
  type: metaworld-disassemble
1790
  metrics:
1791
  - type: total_reward
1792
+ value: 40.87 +/- 9.35
1793
  name: Total reward
1794
  - type: expert_normalized_total_reward
1795
+ value: 0.22 +/- 3.71
1796
  name: Expert normalized total reward
1797
  - task:
1798
  type: reinforcement-learning
 
1802
  type: metaworld-door-close
1803
  metrics:
1804
  - type: total_reward
1805
+ value: 530.48 +/- 29.02
1806
  name: Total reward
1807
  - type: expert_normalized_total_reward
1808
  value: 1.00 +/- 0.06
 
1815
  type: metaworld-door-lock
1816
  metrics:
1817
  - type: total_reward
1818
+ value: 678.98 +/- 194.57
1819
  name: Total reward
1820
  - type: expert_normalized_total_reward
1821
  value: 0.81 +/- 0.28
 
1828
  type: metaworld-door-open
1829
  metrics:
1830
  - type: total_reward
1831
+ value: 574.71 +/- 50.82
1832
  name: Total reward
1833
  - type: expert_normalized_total_reward
1834
+ value: 0.99 +/- 0.10
1835
  name: Expert normalized total reward
1836
  - task:
1837
  type: reinforcement-learning
 
1841
  type: metaworld-door-unlock
1842
  metrics:
1843
  - type: total_reward
1844
+ value: 761.82 +/- 114.70
1845
  name: Total reward
1846
  - type: expert_normalized_total_reward
1847
+ value: 0.94 +/- 0.16
1848
  name: Expert normalized total reward
1849
  - task:
1850
  type: reinforcement-learning
 
1854
  type: metaworld-drawer-close
1855
  metrics:
1856
  - type: total_reward
1857
+ value: 519.05 +/- 154.38
1858
  name: Total reward
1859
  - type: expert_normalized_total_reward
1860
+ value: 0.54 +/- 0.21
1861
  name: Expert normalized total reward
1862
  - task:
1863
  type: reinforcement-learning
 
1867
  type: metaworld-drawer-open
1868
  metrics:
1869
  - type: total_reward
1870
+ value: 486.02 +/- 34.17
1871
  name: Total reward
1872
  - type: expert_normalized_total_reward
1873
+ value: 0.98 +/- 0.09
1874
  name: Expert normalized total reward
1875
  - task:
1876
  type: reinforcement-learning
 
1880
  type: metaworld-faucet-close
1881
  metrics:
1882
  - type: total_reward
1883
+ value: 366.78 +/- 86.77
1884
  name: Total reward
1885
  - type: expert_normalized_total_reward
1886
+ value: 0.23 +/- 0.17
1887
  name: Expert normalized total reward
1888
  - task:
1889
  type: reinforcement-learning
 
1893
  type: metaworld-faucet-open
1894
  metrics:
1895
  - type: total_reward
1896
+ value: 685.01 +/- 65.52
1897
  name: Total reward
1898
  - type: expert_normalized_total_reward
1899
+ value: 0.96 +/- 0.14
1900
  name: Expert normalized total reward
1901
  - task:
1902
  type: reinforcement-learning
 
1906
  type: metaworld-hammer
1907
  metrics:
1908
  - type: total_reward
1909
+ value: 678.36 +/- 79.36
1910
  name: Total reward
1911
  - type: expert_normalized_total_reward
1912
+ value: 0.98 +/- 0.13
1913
  name: Expert normalized total reward
1914
  - task:
1915
  type: reinforcement-learning
 
1919
  type: metaworld-hand-insert
1920
  metrics:
1921
  - type: total_reward
1922
+ value: 695.27 +/- 134.25
1923
  name: Total reward
1924
  - type: expert_normalized_total_reward
1925
+ value: 0.94 +/- 0.18
1926
  name: Expert normalized total reward
1927
  - task:
1928
  type: reinforcement-learning
 
1932
  type: metaworld-handle-press-side
1933
  metrics:
1934
  - type: total_reward
1935
+ value: 65.07 +/- 69.65
1936
  name: Total reward
1937
  - type: expert_normalized_total_reward
1938
+ value: 0.01 +/- 0.09
1939
  name: Expert normalized total reward
1940
  - task:
1941
  type: reinforcement-learning
 
1945
  type: metaworld-handle-press
1946
  metrics:
1947
  - type: total_reward
1948
+ value: 695.97 +/- 311.48
1949
  name: Total reward
1950
  - type: expert_normalized_total_reward
1951
+ value: 0.79 +/- 0.40
1952
  name: Expert normalized total reward
1953
  - task:
1954
  type: reinforcement-learning
 
1958
  type: metaworld-handle-pull-side
1959
  metrics:
1960
  - type: total_reward
1961
+ value: 145.34 +/- 179.01
1962
  name: Total reward
1963
  - type: expert_normalized_total_reward
1964
+ value: 0.37 +/- 0.47
1965
  name: Expert normalized total reward
1966
  - task:
1967
  type: reinforcement-learning
 
1971
  type: metaworld-handle-pull
1972
  metrics:
1973
  - type: total_reward
1974
+ value: 514.56 +/- 205.75
1975
  name: Total reward
1976
  - type: expert_normalized_total_reward
1977
+ value: 0.77 +/- 0.31
1978
  name: Expert normalized total reward
1979
  - task:
1980
  type: reinforcement-learning
 
1984
  type: metaworld-lever-pull
1985
  metrics:
1986
  - type: total_reward
1987
+ value: 250.51 +/- 220.33
1988
  name: Total reward
1989
  - type: expert_normalized_total_reward
1990
+ value: 0.34 +/- 0.40
1991
  name: Expert normalized total reward
1992
  - task:
1993
  type: reinforcement-learning
 
1997
  type: metaworld-peg-insert-side
1998
  metrics:
1999
  - type: total_reward
2000
+ value: 305.94 +/- 166.53
2001
  name: Total reward
2002
  - type: expert_normalized_total_reward
2003
+ value: 0.97 +/- 0.53
2004
  name: Expert normalized total reward
2005
  - task:
2006
  type: reinforcement-learning
 
2010
  type: metaworld-peg-unplug-side
2011
  metrics:
2012
  - type: total_reward
2013
+ value: 120.73 +/- 169.26
2014
  name: Total reward
2015
  - type: expert_normalized_total_reward
2016
+ value: 0.26 +/- 0.37
2017
  name: Expert normalized total reward
2018
  - task:
2019
  type: reinforcement-learning
 
2036
  type: metaworld-pick-place-wall
2037
  metrics:
2038
  - type: total_reward
2039
+ value: 62.30 +/- 131.13
2040
  name: Total reward
2041
  - type: expert_normalized_total_reward
2042
+ value: 0.14 +/- 0.29
2043
  name: Expert normalized total reward
2044
  - task:
2045
  type: reinforcement-learning
 
2049
  type: metaworld-pick-place
2050
  metrics:
2051
  - type: total_reward
2052
+ value: 311.95 +/- 180.95
2053
  name: Total reward
2054
  - type: expert_normalized_total_reward
2055
+ value: 0.74 +/- 0.43
2056
  name: Expert normalized total reward
2057
  - task:
2058
  type: reinforcement-learning
 
2062
  type: metaworld-plate-slide-back-side
2063
  metrics:
2064
  - type: total_reward
2065
+ value: 689.54 +/- 157.90
2066
  name: Total reward
2067
  - type: expert_normalized_total_reward
2068
+ value: 0.94 +/- 0.23
2069
  name: Expert normalized total reward
2070
  - task:
2071
  type: reinforcement-learning
 
2075
  type: metaworld-plate-slide-back
2076
  metrics:
2077
  - type: total_reward
2078
+ value: 197.00 +/- 1.58
2079
  name: Total reward
2080
  - type: expert_normalized_total_reward
2081
  value: 0.24 +/- 0.00
 
2088
  type: metaworld-plate-slide-side
2089
  metrics:
2090
  - type: total_reward
2091
+ value: 122.56 +/- 24.56
2092
  name: Total reward
2093
  - type: expert_normalized_total_reward
2094
  value: 0.16 +/- 0.04
 
2101
  type: metaworld-plate-slide
2102
  metrics:
2103
  - type: total_reward
2104
+ value: 456.66 +/- 198.51
2105
  name: Total reward
2106
  - type: expert_normalized_total_reward
2107
+ value: 0.84 +/- 0.44
2108
  name: Expert normalized total reward
2109
  - task:
2110
  type: reinforcement-learning
 
2114
  type: metaworld-push-back
2115
  metrics:
2116
  - type: total_reward
2117
+ value: 71.38 +/- 100.60
2118
  name: Total reward
2119
  - type: expert_normalized_total_reward
2120
+ value: 0.84 +/- 1.20
2121
  name: Expert normalized total reward
2122
  - task:
2123
  type: reinforcement-learning
 
2127
  type: metaworld-push-wall
2128
  metrics:
2129
  - type: total_reward
2130
+ value: 216.66 +/- 256.33
2131
  name: Total reward
2132
  - type: expert_normalized_total_reward
2133
+ value: 0.28 +/- 0.35
2134
  name: Expert normalized total reward
2135
  - task:
2136
  type: reinforcement-learning
 
2140
  type: metaworld-push
2141
  metrics:
2142
  - type: total_reward
2143
+ value: 583.25 +/- 296.10
2144
  name: Total reward
2145
  - type: expert_normalized_total_reward
2146
+ value: 0.78 +/- 0.40
2147
  name: Expert normalized total reward
2148
  - task:
2149
  type: reinforcement-learning
 
2153
  type: metaworld-reach-wall
2154
  metrics:
2155
  - type: total_reward
2156
+ value: 681.90 +/- 186.63
2157
  name: Total reward
2158
  - type: expert_normalized_total_reward
2159
+ value: 0.89 +/- 0.31
2160
  name: Expert normalized total reward
2161
  - task:
2162
  type: reinforcement-learning
 
2166
  type: metaworld-reach
2167
  metrics:
2168
  - type: total_reward
2169
+ value: 347.45 +/- 190.66
2170
  name: Total reward
2171
  - type: expert_normalized_total_reward
2172
+ value: 0.37 +/- 0.36
2173
  name: Expert normalized total reward
2174
  - task:
2175
  type: reinforcement-learning
 
2179
  type: metaworld-shelf-place
2180
  metrics:
2181
  - type: total_reward
2182
+ value: 60.57 +/- 97.16
2183
  name: Total reward
2184
  - type: expert_normalized_total_reward
2185
+ value: 0.25 +/- 0.40
2186
  name: Expert normalized total reward
2187
  - task:
2188
  type: reinforcement-learning
 
2192
  type: metaworld-soccer
2193
  metrics:
2194
  - type: total_reward
2195
+ value: 309.21 +/- 172.64
2196
  name: Total reward
2197
  - type: expert_normalized_total_reward
2198
+ value: 0.82 +/- 0.47
2199
  name: Expert normalized total reward
2200
  - task:
2201
  type: reinforcement-learning
 
2205
  type: metaworld-stick-pull
2206
  metrics:
2207
  - type: total_reward
2208
+ value: 364.98 +/- 234.82
2209
  name: Total reward
2210
  - type: expert_normalized_total_reward
2211
+ value: 0.70 +/- 0.45
2212
  name: Expert normalized total reward
2213
  - task:
2214
  type: reinforcement-learning
 
2218
  type: metaworld-stick-push
2219
  metrics:
2220
  - type: total_reward
2221
+ value: 91.05 +/- 204.71
2222
  name: Total reward
2223
  - type: expert_normalized_total_reward
2224
+ value: 0.14 +/- 0.33
2225
  name: Expert normalized total reward
2226
  - task:
2227
  type: reinforcement-learning
 
2231
  type: metaworld-sweep-into
2232
  metrics:
2233
  - type: total_reward
2234
+ value: 714.98 +/- 209.19
2235
  name: Total reward
2236
  - type: expert_normalized_total_reward
2237
+ value: 0.89 +/- 0.27
2238
  name: Expert normalized total reward
2239
  - task:
2240
  type: reinforcement-learning
 
2244
  type: metaworld-sweep
2245
  metrics:
2246
  - type: total_reward
2247
+ value: 15.82 +/- 16.34
2248
  name: Total reward
2249
  - type: expert_normalized_total_reward
2250
+ value: 0.01 +/- 0.03
2251
  name: Expert normalized total reward
2252
  - task:
2253
  type: reinforcement-learning
 
2257
  type: metaworld-window-close
2258
  metrics:
2259
  - type: total_reward
2260
+ value: 347.90 +/- 222.50
2261
  name: Total reward
2262
  - type: expert_normalized_total_reward
2263
+ value: 0.54 +/- 0.42
2264
  name: Expert normalized total reward
2265
  - task:
2266
  type: reinforcement-learning
 
2270
  type: metaworld-window-open
2271
  metrics:
2272
  - type: total_reward
2273
+ value: 574.72 +/- 75.65
2274
  name: Total reward
2275
  - type: expert_normalized_total_reward
2276
+ value: 0.97 +/- 0.14
2277
  name: Expert normalized total reward
2278
  - task:
2279
  type: reinforcement-learning
 
2283
  type: mujoco-ant
2284
  metrics:
2285
  - type: total_reward
2286
+ value: 4993.13 +/- 1656.89
2287
  name: Total reward
2288
  - type: expert_normalized_total_reward
2289
+ value: 0.86 +/- 0.28
2290
  name: Expert normalized total reward
2291
  - task:
2292
  type: reinforcement-learning
 
2296
  type: mujoco-doublependulum
2297
  metrics:
2298
  - type: total_reward
2299
+ value: 8744.92 +/- 1471.45
2300
  name: Total reward
2301
  - type: expert_normalized_total_reward
2302
+ value: 0.94 +/- 0.16
2303
  name: Expert normalized total reward
2304
  - task:
2305
  type: reinforcement-learning
 
2309
  type: mujoco-halfcheetah
2310
  metrics:
2311
  - type: total_reward
2312
+ value: 6601.12 +/- 488.36
2313
  name: Total reward
2314
  - type: expert_normalized_total_reward
2315
+ value: 0.89 +/- 0.06
2316
  name: Expert normalized total reward
2317
  - task:
2318
  type: reinforcement-learning
 
2322
  type: mujoco-hopper
2323
  metrics:
2324
  - type: total_reward
2325
+ value: 1435.45 +/- 361.77
2326
  name: Total reward
2327
  - type: expert_normalized_total_reward
2328
+ value: 0.77 +/- 0.20
2329
  name: Expert normalized total reward
2330
  - task:
2331
  type: reinforcement-learning
 
2335
  type: mujoco-humanoid
2336
  metrics:
2337
  - type: total_reward
2338
+ value: 695.92 +/- 115.07
2339
  name: Total reward
2340
  - type: expert_normalized_total_reward
2341
  value: 0.09 +/- 0.02
 
2348
  type: mujoco-pendulum
2349
  metrics:
2350
  - type: total_reward
2351
+ value: 117.64 +/- 21.73
2352
  name: Total reward
2353
  - type: expert_normalized_total_reward
2354
+ value: 0.24 +/- 0.05
2355
  name: Expert normalized total reward
2356
  - task:
2357
  type: reinforcement-learning
 
2361
  type: mujoco-pusher
2362
  metrics:
2363
  - type: total_reward
2364
+ value: -24.93 +/- 6.47
2365
  name: Total reward
2366
  - type: expert_normalized_total_reward
2367
+ value: 1.00 +/- 0.05
2368
  name: Expert normalized total reward
2369
  - task:
2370
  type: reinforcement-learning
 
2374
  type: mujoco-reacher
2375
  metrics:
2376
  - type: total_reward
2377
+ value: -5.77 +/- 2.27
2378
  name: Total reward
2379
  - type: expert_normalized_total_reward
2380
+ value: 1.00 +/- 0.06
2381
  name: Expert normalized total reward
2382
  - task:
2383
  type: reinforcement-learning
 
2387
  type: mujoco-standup
2388
  metrics:
2389
  - type: total_reward
2390
+ value: 113587.22 +/- 21821.69
2391
  name: Total reward
2392
  - type: expert_normalized_total_reward
2393
+ value: 0.33 +/- 0.09
2394
  name: Expert normalized total reward
2395
  - task:
2396
  type: reinforcement-learning
 
2400
  type: mujoco-swimmer
2401
  metrics:
2402
  - type: total_reward
2403
+ value: 94.08 +/- 3.94
2404
  name: Total reward
2405
  - type: expert_normalized_total_reward
2406
+ value: 1.02 +/- 0.04
2407
  name: Expert normalized total reward
2408
  - task:
2409
  type: reinforcement-learning
 
2413
  type: mujoco-walker
2414
  metrics:
2415
  - type: total_reward
2416
+ value: 4381.69 +/- 848.39
2417
  name: Total reward
2418
  - type: expert_normalized_total_reward
2419
+ value: 0.95 +/- 0.18
2420
  name: Expert normalized total reward
2421
  ---
2422
 
 
2440
  ## Training
2441
 
2442
  <details>
2443
+ <summary>The model was trained on the following tasks:</summary>
2444
+
2445
  - Alien
2446
  - Amidar
2447
  - Assault
 
2611
 
2612
  model = AutoModelForCausalLM.from_pretrained("jat-project/jat")
2613
  ```