Commit
•
f7dbbec
1
Parent(s):
c737ca8
Update README.md
Browse files
README.md
CHANGED
@@ -171,10 +171,10 @@ model-index:
|
|
171 |
type: atari
|
172 |
metrics:
|
173 |
- type: iqm_expert_normalized_total_reward
|
174 |
-
value: 0.
|
175 |
name: IQM expert normalized total reward
|
176 |
- type: iqm_human_normalized_total_reward
|
177 |
-
value: 0.
|
178 |
name: IQM human normalized total reward
|
179 |
- task:
|
180 |
type: reinforcement-learning
|
@@ -214,13 +214,13 @@ model-index:
|
|
214 |
type: atari-alien
|
215 |
metrics:
|
216 |
- type: total_reward
|
217 |
-
value:
|
218 |
name: Total reward
|
219 |
- type: expert_normalized_total_reward
|
220 |
-
value: 0.
|
221 |
name: Expert normalized total reward
|
222 |
- type: human_normalized_total_reward
|
223 |
-
value: 0.
|
224 |
name: Human normalized total reward
|
225 |
- task:
|
226 |
type: reinforcement-learning
|
@@ -230,13 +230,13 @@ model-index:
|
|
230 |
type: atari-amidar
|
231 |
metrics:
|
232 |
- type: total_reward
|
233 |
-
value:
|
234 |
name: Total reward
|
235 |
- type: expert_normalized_total_reward
|
236 |
-
value: 0.
|
237 |
name: Expert normalized total reward
|
238 |
- type: human_normalized_total_reward
|
239 |
-
value: 0.
|
240 |
name: Human normalized total reward
|
241 |
- task:
|
242 |
type: reinforcement-learning
|
@@ -246,13 +246,13 @@ model-index:
|
|
246 |
type: atari-assault
|
247 |
metrics:
|
248 |
- type: total_reward
|
249 |
-
value:
|
250 |
name: Total reward
|
251 |
- type: expert_normalized_total_reward
|
252 |
-
value: 0.
|
253 |
name: Expert normalized total reward
|
254 |
- type: human_normalized_total_reward
|
255 |
-
value:
|
256 |
name: Human normalized total reward
|
257 |
- task:
|
258 |
type: reinforcement-learning
|
@@ -262,13 +262,13 @@ model-index:
|
|
262 |
type: atari-asterix
|
263 |
metrics:
|
264 |
- type: total_reward
|
265 |
-
value:
|
266 |
name: Total reward
|
267 |
- type: expert_normalized_total_reward
|
268 |
-
value: 0.
|
269 |
name: Expert normalized total reward
|
270 |
- type: human_normalized_total_reward
|
271 |
-
value: 0.07 +/- 0.
|
272 |
name: Human normalized total reward
|
273 |
- task:
|
274 |
type: reinforcement-learning
|
@@ -278,13 +278,13 @@ model-index:
|
|
278 |
type: atari-asteroids
|
279 |
metrics:
|
280 |
- type: total_reward
|
281 |
-
value:
|
282 |
name: Total reward
|
283 |
- type: expert_normalized_total_reward
|
284 |
value: 0.00 +/- 0.00
|
285 |
name: Expert normalized total reward
|
286 |
- type: human_normalized_total_reward
|
287 |
-
value: 0.
|
288 |
name: Human normalized total reward
|
289 |
- task:
|
290 |
type: reinforcement-learning
|
@@ -294,13 +294,13 @@ model-index:
|
|
294 |
type: atari-atlantis
|
295 |
metrics:
|
296 |
- type: total_reward
|
297 |
-
value:
|
298 |
name: Total reward
|
299 |
- type: expert_normalized_total_reward
|
300 |
-
value: 0.
|
301 |
name: Expert normalized total reward
|
302 |
- type: human_normalized_total_reward
|
303 |
-
value:
|
304 |
name: Human normalized total reward
|
305 |
- task:
|
306 |
type: reinforcement-learning
|
@@ -310,13 +310,13 @@ model-index:
|
|
310 |
type: atari-bankheist
|
311 |
metrics:
|
312 |
- type: total_reward
|
313 |
-
value:
|
314 |
name: Total reward
|
315 |
- type: expert_normalized_total_reward
|
316 |
-
value: 0.
|
317 |
name: Expert normalized total reward
|
318 |
- type: human_normalized_total_reward
|
319 |
-
value:
|
320 |
name: Human normalized total reward
|
321 |
- task:
|
322 |
type: reinforcement-learning
|
@@ -326,13 +326,13 @@ model-index:
|
|
326 |
type: atari-battlezone
|
327 |
metrics:
|
328 |
- type: total_reward
|
329 |
-
value:
|
330 |
name: Total reward
|
331 |
- type: expert_normalized_total_reward
|
332 |
-
value: 0.
|
333 |
name: Expert normalized total reward
|
334 |
- type: human_normalized_total_reward
|
335 |
-
value: 0.
|
336 |
name: Human normalized total reward
|
337 |
- task:
|
338 |
type: reinforcement-learning
|
@@ -342,13 +342,13 @@ model-index:
|
|
342 |
type: atari-beamrider
|
343 |
metrics:
|
344 |
- type: total_reward
|
345 |
-
value:
|
346 |
name: Total reward
|
347 |
- type: expert_normalized_total_reward
|
348 |
value: 0.01 +/- 0.01
|
349 |
name: Expert normalized total reward
|
350 |
- type: human_normalized_total_reward
|
351 |
-
value: 0.
|
352 |
name: Human normalized total reward
|
353 |
- task:
|
354 |
type: reinforcement-learning
|
@@ -358,13 +358,13 @@ model-index:
|
|
358 |
type: atari-berzerk
|
359 |
metrics:
|
360 |
- type: total_reward
|
361 |
-
value:
|
362 |
name: Total reward
|
363 |
- type: expert_normalized_total_reward
|
364 |
-
value: 0.01 +/- 0.
|
365 |
name: Expert normalized total reward
|
366 |
- type: human_normalized_total_reward
|
367 |
-
value: 0.
|
368 |
name: Human normalized total reward
|
369 |
- task:
|
370 |
type: reinforcement-learning
|
@@ -374,13 +374,13 @@ model-index:
|
|
374 |
type: atari-bowling
|
375 |
metrics:
|
376 |
- type: total_reward
|
377 |
-
value:
|
378 |
name: Total reward
|
379 |
- type: expert_normalized_total_reward
|
380 |
value: 1.00 +/- 0.00
|
381 |
name: Expert normalized total reward
|
382 |
- type: human_normalized_total_reward
|
383 |
-
value: 0.
|
384 |
name: Human normalized total reward
|
385 |
- task:
|
386 |
type: reinforcement-learning
|
@@ -390,13 +390,13 @@ model-index:
|
|
390 |
type: atari-boxing
|
391 |
metrics:
|
392 |
- type: total_reward
|
393 |
-
value:
|
394 |
name: Total reward
|
395 |
- type: expert_normalized_total_reward
|
396 |
-
value: 0.
|
397 |
name: Expert normalized total reward
|
398 |
- type: human_normalized_total_reward
|
399 |
-
value: 7.
|
400 |
name: Human normalized total reward
|
401 |
- task:
|
402 |
type: reinforcement-learning
|
@@ -406,13 +406,13 @@ model-index:
|
|
406 |
type: atari-breakout
|
407 |
metrics:
|
408 |
- type: total_reward
|
409 |
-
value:
|
410 |
name: Total reward
|
411 |
- type: expert_normalized_total_reward
|
412 |
value: 0.01 +/- 0.01
|
413 |
name: Expert normalized total reward
|
414 |
- type: human_normalized_total_reward
|
415 |
-
value: 0.
|
416 |
name: Human normalized total reward
|
417 |
- task:
|
418 |
type: reinforcement-learning
|
@@ -422,13 +422,13 @@ model-index:
|
|
422 |
type: atari-centipede
|
423 |
metrics:
|
424 |
- type: total_reward
|
425 |
-
value:
|
426 |
name: Total reward
|
427 |
- type: expert_normalized_total_reward
|
428 |
-
value: 0.
|
429 |
name: Expert normalized total reward
|
430 |
- type: human_normalized_total_reward
|
431 |
-
value: 0.
|
432 |
name: Human normalized total reward
|
433 |
- task:
|
434 |
type: reinforcement-learning
|
@@ -438,13 +438,13 @@ model-index:
|
|
438 |
type: atari-choppercommand
|
439 |
metrics:
|
440 |
- type: total_reward
|
441 |
-
value:
|
442 |
name: Total reward
|
443 |
- type: expert_normalized_total_reward
|
444 |
-
value: 0.
|
445 |
name: Expert normalized total reward
|
446 |
- type: human_normalized_total_reward
|
447 |
-
value: 0.
|
448 |
name: Human normalized total reward
|
449 |
- task:
|
450 |
type: reinforcement-learning
|
@@ -454,13 +454,13 @@ model-index:
|
|
454 |
type: atari-crazyclimber
|
455 |
metrics:
|
456 |
- type: total_reward
|
457 |
-
value:
|
458 |
name: Total reward
|
459 |
- type: expert_normalized_total_reward
|
460 |
-
value: 0.
|
461 |
name: Expert normalized total reward
|
462 |
- type: human_normalized_total_reward
|
463 |
-
value:
|
464 |
name: Human normalized total reward
|
465 |
- task:
|
466 |
type: reinforcement-learning
|
@@ -470,13 +470,13 @@ model-index:
|
|
470 |
type: atari-defender
|
471 |
metrics:
|
472 |
- type: total_reward
|
473 |
-
value:
|
474 |
name: Total reward
|
475 |
- type: expert_normalized_total_reward
|
476 |
-
value: 0.
|
477 |
name: Expert normalized total reward
|
478 |
- type: human_normalized_total_reward
|
479 |
-
value:
|
480 |
name: Human normalized total reward
|
481 |
- task:
|
482 |
type: reinforcement-learning
|
@@ -486,13 +486,13 @@ model-index:
|
|
486 |
type: atari-demonattack
|
487 |
metrics:
|
488 |
- type: total_reward
|
489 |
-
value:
|
490 |
name: Total reward
|
491 |
- type: expert_normalized_total_reward
|
492 |
-
value: 0.
|
493 |
name: Expert normalized total reward
|
494 |
- type: human_normalized_total_reward
|
495 |
-
value: 0.
|
496 |
name: Human normalized total reward
|
497 |
- task:
|
498 |
type: reinforcement-learning
|
@@ -502,13 +502,13 @@ model-index:
|
|
502 |
type: atari-doubledunk
|
503 |
metrics:
|
504 |
- type: total_reward
|
505 |
-
value:
|
506 |
name: Total reward
|
507 |
- type: expert_normalized_total_reward
|
508 |
-
value: 0.
|
509 |
name: Expert normalized total reward
|
510 |
- type: human_normalized_total_reward
|
511 |
-
value: 0.
|
512 |
name: Human normalized total reward
|
513 |
- task:
|
514 |
type: reinforcement-learning
|
@@ -518,13 +518,13 @@ model-index:
|
|
518 |
type: atari-enduro
|
519 |
metrics:
|
520 |
- type: total_reward
|
521 |
-
value:
|
522 |
name: Total reward
|
523 |
- type: expert_normalized_total_reward
|
524 |
-
value: 0.05 +/- 0.
|
525 |
name: Expert normalized total reward
|
526 |
- type: human_normalized_total_reward
|
527 |
-
value: 0.13 +/- 0.
|
528 |
name: Human normalized total reward
|
529 |
- task:
|
530 |
type: reinforcement-learning
|
@@ -534,13 +534,13 @@ model-index:
|
|
534 |
type: atari-fishingderby
|
535 |
metrics:
|
536 |
- type: total_reward
|
537 |
-
value: -
|
538 |
name: Total reward
|
539 |
- type: expert_normalized_total_reward
|
540 |
-
value: 0.
|
541 |
name: Expert normalized total reward
|
542 |
- type: human_normalized_total_reward
|
543 |
-
value: 0.
|
544 |
name: Human normalized total reward
|
545 |
- task:
|
546 |
type: reinforcement-learning
|
@@ -550,13 +550,13 @@ model-index:
|
|
550 |
type: atari-freeway
|
551 |
metrics:
|
552 |
- type: total_reward
|
553 |
-
value:
|
554 |
name: Total reward
|
555 |
- type: expert_normalized_total_reward
|
556 |
-
value: 0.
|
557 |
name: Expert normalized total reward
|
558 |
- type: human_normalized_total_reward
|
559 |
-
value: 0.
|
560 |
name: Human normalized total reward
|
561 |
- task:
|
562 |
type: reinforcement-learning
|
@@ -566,13 +566,13 @@ model-index:
|
|
566 |
type: atari-frostbite
|
567 |
metrics:
|
568 |
- type: total_reward
|
569 |
-
value:
|
570 |
name: Total reward
|
571 |
- type: expert_normalized_total_reward
|
572 |
-
value: 0.
|
573 |
name: Expert normalized total reward
|
574 |
- type: human_normalized_total_reward
|
575 |
-
value: 0.
|
576 |
name: Human normalized total reward
|
577 |
- task:
|
578 |
type: reinforcement-learning
|
@@ -582,13 +582,13 @@ model-index:
|
|
582 |
type: atari-gopher
|
583 |
metrics:
|
584 |
- type: total_reward
|
585 |
-
value:
|
586 |
name: Total reward
|
587 |
- type: expert_normalized_total_reward
|
588 |
-
value: 0.
|
589 |
name: Expert normalized total reward
|
590 |
- type: human_normalized_total_reward
|
591 |
-
value:
|
592 |
name: Human normalized total reward
|
593 |
- task:
|
594 |
type: reinforcement-learning
|
@@ -598,13 +598,13 @@ model-index:
|
|
598 |
type: atari-gravitar
|
599 |
metrics:
|
600 |
- type: total_reward
|
601 |
-
value:
|
602 |
name: Total reward
|
603 |
- type: expert_normalized_total_reward
|
604 |
-
value: 0.
|
605 |
name: Expert normalized total reward
|
606 |
- type: human_normalized_total_reward
|
607 |
-
value: 0.
|
608 |
name: Human normalized total reward
|
609 |
- task:
|
610 |
type: reinforcement-learning
|
@@ -614,13 +614,13 @@ model-index:
|
|
614 |
type: atari-hero
|
615 |
metrics:
|
616 |
- type: total_reward
|
617 |
-
value:
|
618 |
name: Total reward
|
619 |
- type: expert_normalized_total_reward
|
620 |
-
value: 0.
|
621 |
name: Expert normalized total reward
|
622 |
- type: human_normalized_total_reward
|
623 |
-
value: 0.
|
624 |
name: Human normalized total reward
|
625 |
- task:
|
626 |
type: reinforcement-learning
|
@@ -630,13 +630,13 @@ model-index:
|
|
630 |
type: atari-icehockey
|
631 |
metrics:
|
632 |
- type: total_reward
|
633 |
-
value:
|
634 |
name: Total reward
|
635 |
- type: expert_normalized_total_reward
|
636 |
-
value: 0.
|
637 |
name: Expert normalized total reward
|
638 |
- type: human_normalized_total_reward
|
639 |
-
value:
|
640 |
name: Human normalized total reward
|
641 |
- task:
|
642 |
type: reinforcement-learning
|
@@ -646,13 +646,13 @@ model-index:
|
|
646 |
type: atari-jamesbond
|
647 |
metrics:
|
648 |
- type: total_reward
|
649 |
-
value:
|
650 |
name: Total reward
|
651 |
- type: expert_normalized_total_reward
|
652 |
value: 0.01 +/- 0.00
|
653 |
name: Expert normalized total reward
|
654 |
- type: human_normalized_total_reward
|
655 |
-
value:
|
656 |
name: Human normalized total reward
|
657 |
- task:
|
658 |
type: reinforcement-learning
|
@@ -662,13 +662,13 @@ model-index:
|
|
662 |
type: atari-kangaroo
|
663 |
metrics:
|
664 |
- type: total_reward
|
665 |
-
value:
|
666 |
name: Total reward
|
667 |
- type: expert_normalized_total_reward
|
668 |
-
value: 0.
|
669 |
name: Expert normalized total reward
|
670 |
- type: human_normalized_total_reward
|
671 |
-
value: 0.
|
672 |
name: Human normalized total reward
|
673 |
- task:
|
674 |
type: reinforcement-learning
|
@@ -678,13 +678,13 @@ model-index:
|
|
678 |
type: atari-krull
|
679 |
metrics:
|
680 |
- type: total_reward
|
681 |
-
value:
|
682 |
name: Total reward
|
683 |
- type: expert_normalized_total_reward
|
684 |
-
value: 0.
|
685 |
name: Expert normalized total reward
|
686 |
- type: human_normalized_total_reward
|
687 |
-
value:
|
688 |
name: Human normalized total reward
|
689 |
- task:
|
690 |
type: reinforcement-learning
|
@@ -694,13 +694,13 @@ model-index:
|
|
694 |
type: atari-kungfumaster
|
695 |
metrics:
|
696 |
- type: total_reward
|
697 |
-
value:
|
698 |
name: Total reward
|
699 |
- type: expert_normalized_total_reward
|
700 |
-
value:
|
701 |
name: Expert normalized total reward
|
702 |
- type: human_normalized_total_reward
|
703 |
-
value:
|
704 |
name: Human normalized total reward
|
705 |
- task:
|
706 |
type: reinforcement-learning
|
@@ -726,13 +726,13 @@ model-index:
|
|
726 |
type: atari-mspacman
|
727 |
metrics:
|
728 |
- type: total_reward
|
729 |
-
value:
|
730 |
name: Total reward
|
731 |
- type: expert_normalized_total_reward
|
732 |
-
value: 0.
|
733 |
name: Expert normalized total reward
|
734 |
- type: human_normalized_total_reward
|
735 |
-
value: 0.
|
736 |
name: Human normalized total reward
|
737 |
- task:
|
738 |
type: reinforcement-learning
|
@@ -742,13 +742,13 @@ model-index:
|
|
742 |
type: atari-namethisgame
|
743 |
metrics:
|
744 |
- type: total_reward
|
745 |
-
value:
|
746 |
name: Total reward
|
747 |
- type: expert_normalized_total_reward
|
748 |
-
value: 0.
|
749 |
name: Expert normalized total reward
|
750 |
- type: human_normalized_total_reward
|
751 |
-
value: 0.
|
752 |
name: Human normalized total reward
|
753 |
- task:
|
754 |
type: reinforcement-learning
|
@@ -758,13 +758,13 @@ model-index:
|
|
758 |
type: atari-phoenix
|
759 |
metrics:
|
760 |
- type: total_reward
|
761 |
-
value:
|
762 |
name: Total reward
|
763 |
- type: expert_normalized_total_reward
|
764 |
value: 0.00 +/- 0.00
|
765 |
name: Expert normalized total reward
|
766 |
- type: human_normalized_total_reward
|
767 |
-
value: 0.
|
768 |
name: Human normalized total reward
|
769 |
- task:
|
770 |
type: reinforcement-learning
|
@@ -774,13 +774,13 @@ model-index:
|
|
774 |
type: atari-pitfall
|
775 |
metrics:
|
776 |
- type: total_reward
|
777 |
-
value: -
|
778 |
name: Total reward
|
779 |
- type: expert_normalized_total_reward
|
780 |
-
value:
|
781 |
name: Expert normalized total reward
|
782 |
- type: human_normalized_total_reward
|
783 |
-
value:
|
784 |
name: Human normalized total reward
|
785 |
- task:
|
786 |
type: reinforcement-learning
|
@@ -790,13 +790,13 @@ model-index:
|
|
790 |
type: atari-pong
|
791 |
metrics:
|
792 |
- type: total_reward
|
793 |
-
value:
|
794 |
name: Total reward
|
795 |
- type: expert_normalized_total_reward
|
796 |
-
value: 0.
|
797 |
name: Expert normalized total reward
|
798 |
- type: human_normalized_total_reward
|
799 |
-
value: 0.
|
800 |
name: Human normalized total reward
|
801 |
- task:
|
802 |
type: reinforcement-learning
|
@@ -806,10 +806,10 @@ model-index:
|
|
806 |
type: atari-privateeye
|
807 |
metrics:
|
808 |
- type: total_reward
|
809 |
-
value:
|
810 |
name: Total reward
|
811 |
- type: expert_normalized_total_reward
|
812 |
-
value: 0.
|
813 |
name: Expert normalized total reward
|
814 |
- type: human_normalized_total_reward
|
815 |
value: 0.00 +/- 0.00
|
@@ -822,13 +822,13 @@ model-index:
|
|
822 |
type: atari-qbert
|
823 |
metrics:
|
824 |
- type: total_reward
|
825 |
-
value:
|
826 |
name: Total reward
|
827 |
- type: expert_normalized_total_reward
|
828 |
-
value: 0.04 +/- 0.
|
829 |
name: Expert normalized total reward
|
830 |
- type: human_normalized_total_reward
|
831 |
-
value: 0.
|
832 |
name: Human normalized total reward
|
833 |
- task:
|
834 |
type: reinforcement-learning
|
@@ -838,13 +838,13 @@ model-index:
|
|
838 |
type: atari-riverraid
|
839 |
metrics:
|
840 |
- type: total_reward
|
841 |
-
value:
|
842 |
name: Total reward
|
843 |
- type: expert_normalized_total_reward
|
844 |
-
value: 0.
|
845 |
name: Expert normalized total reward
|
846 |
- type: human_normalized_total_reward
|
847 |
-
value: 0.
|
848 |
name: Human normalized total reward
|
849 |
- task:
|
850 |
type: reinforcement-learning
|
@@ -854,13 +854,13 @@ model-index:
|
|
854 |
type: atari-roadrunner
|
855 |
metrics:
|
856 |
- type: total_reward
|
857 |
-
value:
|
858 |
name: Total reward
|
859 |
- type: expert_normalized_total_reward
|
860 |
-
value: 0.
|
861 |
name: Expert normalized total reward
|
862 |
- type: human_normalized_total_reward
|
863 |
-
value: 0.
|
864 |
name: Human normalized total reward
|
865 |
- task:
|
866 |
type: reinforcement-learning
|
@@ -870,13 +870,13 @@ model-index:
|
|
870 |
type: atari-robotank
|
871 |
metrics:
|
872 |
- type: total_reward
|
873 |
-
value:
|
874 |
name: Total reward
|
875 |
- type: expert_normalized_total_reward
|
876 |
-
value: 0.
|
877 |
name: Expert normalized total reward
|
878 |
- type: human_normalized_total_reward
|
879 |
-
value:
|
880 |
name: Human normalized total reward
|
881 |
- task:
|
882 |
type: reinforcement-learning
|
@@ -886,13 +886,13 @@ model-index:
|
|
886 |
type: atari-seaquest
|
887 |
metrics:
|
888 |
- type: total_reward
|
889 |
-
value:
|
890 |
name: Total reward
|
891 |
- type: expert_normalized_total_reward
|
892 |
-
value: 0.
|
893 |
name: Expert normalized total reward
|
894 |
- type: human_normalized_total_reward
|
895 |
-
value: 0.
|
896 |
name: Human normalized total reward
|
897 |
- task:
|
898 |
type: reinforcement-learning
|
@@ -902,13 +902,13 @@ model-index:
|
|
902 |
type: atari-skiing
|
903 |
metrics:
|
904 |
- type: total_reward
|
905 |
-
value: -
|
906 |
name: Total reward
|
907 |
- type: expert_normalized_total_reward
|
908 |
-
value:
|
909 |
name: Expert normalized total reward
|
910 |
- type: human_normalized_total_reward
|
911 |
-
value:
|
912 |
name: Human normalized total reward
|
913 |
- task:
|
914 |
type: reinforcement-learning
|
@@ -918,13 +918,13 @@ model-index:
|
|
918 |
type: atari-solaris
|
919 |
metrics:
|
920 |
- type: total_reward
|
921 |
-
value:
|
922 |
name: Total reward
|
923 |
- type: expert_normalized_total_reward
|
924 |
-
value:
|
925 |
name: Expert normalized total reward
|
926 |
- type: human_normalized_total_reward
|
927 |
-
value:
|
928 |
name: Human normalized total reward
|
929 |
- task:
|
930 |
type: reinforcement-learning
|
@@ -934,13 +934,13 @@ model-index:
|
|
934 |
type: atari-spaceinvaders
|
935 |
metrics:
|
936 |
- type: total_reward
|
937 |
-
value:
|
938 |
name: Total reward
|
939 |
- type: expert_normalized_total_reward
|
940 |
value: 0.01 +/- 0.01
|
941 |
name: Expert normalized total reward
|
942 |
- type: human_normalized_total_reward
|
943 |
-
value: 0.
|
944 |
name: Human normalized total reward
|
945 |
- task:
|
946 |
type: reinforcement-learning
|
@@ -950,13 +950,13 @@ model-index:
|
|
950 |
type: atari-stargunner
|
951 |
metrics:
|
952 |
- type: total_reward
|
953 |
-
value:
|
954 |
name: Total reward
|
955 |
- type: expert_normalized_total_reward
|
956 |
-
value: 0.
|
957 |
name: Expert normalized total reward
|
958 |
- type: human_normalized_total_reward
|
959 |
-
value: 0.
|
960 |
name: Human normalized total reward
|
961 |
- task:
|
962 |
type: reinforcement-learning
|
@@ -966,13 +966,13 @@ model-index:
|
|
966 |
type: atari-surround
|
967 |
metrics:
|
968 |
- type: total_reward
|
969 |
-
value:
|
970 |
name: Total reward
|
971 |
- type: expert_normalized_total_reward
|
972 |
-
value: 0.
|
973 |
name: Expert normalized total reward
|
974 |
- type: human_normalized_total_reward
|
975 |
-
value: 0.
|
976 |
name: Human normalized total reward
|
977 |
- task:
|
978 |
type: reinforcement-learning
|
@@ -982,13 +982,13 @@ model-index:
|
|
982 |
type: atari-tennis
|
983 |
metrics:
|
984 |
- type: total_reward
|
985 |
-
value: -
|
986 |
name: Total reward
|
987 |
- type: expert_normalized_total_reward
|
988 |
-
value: 0.
|
989 |
name: Expert normalized total reward
|
990 |
- type: human_normalized_total_reward
|
991 |
-
value: 0.
|
992 |
name: Human normalized total reward
|
993 |
- task:
|
994 |
type: reinforcement-learning
|
@@ -998,13 +998,13 @@ model-index:
|
|
998 |
type: atari-timepilot
|
999 |
metrics:
|
1000 |
- type: total_reward
|
1001 |
-
value:
|
1002 |
name: Total reward
|
1003 |
- type: expert_normalized_total_reward
|
1004 |
-
value: 0.
|
1005 |
name: Expert normalized total reward
|
1006 |
- type: human_normalized_total_reward
|
1007 |
-
value:
|
1008 |
name: Human normalized total reward
|
1009 |
- task:
|
1010 |
type: reinforcement-learning
|
@@ -1014,13 +1014,13 @@ model-index:
|
|
1014 |
type: atari-tutankham
|
1015 |
metrics:
|
1016 |
- type: total_reward
|
1017 |
-
value:
|
1018 |
name: Total reward
|
1019 |
- type: expert_normalized_total_reward
|
1020 |
-
value: 0.
|
1021 |
name: Expert normalized total reward
|
1022 |
- type: human_normalized_total_reward
|
1023 |
-
value: 0.
|
1024 |
name: Human normalized total reward
|
1025 |
- task:
|
1026 |
type: reinforcement-learning
|
@@ -1030,13 +1030,13 @@ model-index:
|
|
1030 |
type: atari-upndown
|
1031 |
metrics:
|
1032 |
- type: total_reward
|
1033 |
-
value:
|
1034 |
name: Total reward
|
1035 |
- type: expert_normalized_total_reward
|
1036 |
-
value: 0.
|
1037 |
name: Expert normalized total reward
|
1038 |
- type: human_normalized_total_reward
|
1039 |
-
value:
|
1040 |
name: Human normalized total reward
|
1041 |
- task:
|
1042 |
type: reinforcement-learning
|
@@ -1062,13 +1062,13 @@ model-index:
|
|
1062 |
type: atari-videopinball
|
1063 |
metrics:
|
1064 |
- type: total_reward
|
1065 |
-
value:
|
1066 |
name: Total reward
|
1067 |
- type: expert_normalized_total_reward
|
1068 |
-
value: 0.
|
1069 |
name: Expert normalized total reward
|
1070 |
- type: human_normalized_total_reward
|
1071 |
-
value: 0.
|
1072 |
name: Human normalized total reward
|
1073 |
- task:
|
1074 |
type: reinforcement-learning
|
@@ -1078,13 +1078,13 @@ model-index:
|
|
1078 |
type: atari-wizardofwor
|
1079 |
metrics:
|
1080 |
- type: total_reward
|
1081 |
-
value:
|
1082 |
name: Total reward
|
1083 |
- type: expert_normalized_total_reward
|
1084 |
-
value: 0.
|
1085 |
name: Expert normalized total reward
|
1086 |
- type: human_normalized_total_reward
|
1087 |
-
value: 0.
|
1088 |
name: Human normalized total reward
|
1089 |
- task:
|
1090 |
type: reinforcement-learning
|
@@ -1094,13 +1094,13 @@ model-index:
|
|
1094 |
type: atari-yarsrevenge
|
1095 |
metrics:
|
1096 |
- type: total_reward
|
1097 |
-
value:
|
1098 |
name: Total reward
|
1099 |
- type: expert_normalized_total_reward
|
1100 |
-
value: 0.
|
1101 |
name: Expert normalized total reward
|
1102 |
- type: human_normalized_total_reward
|
1103 |
-
value: 0.
|
1104 |
name: Human normalized total reward
|
1105 |
- task:
|
1106 |
type: reinforcement-learning
|
@@ -1110,13 +1110,13 @@ model-index:
|
|
1110 |
type: atari-zaxxon
|
1111 |
metrics:
|
1112 |
- type: total_reward
|
1113 |
-
value:
|
1114 |
name: Total reward
|
1115 |
- type: expert_normalized_total_reward
|
1116 |
-
value: 0.
|
1117 |
name: Expert normalized total reward
|
1118 |
- type: human_normalized_total_reward
|
1119 |
-
value: 0.
|
1120 |
name: Human normalized total reward
|
1121 |
- task:
|
1122 |
type: reinforcement-learning
|
@@ -2441,7 +2441,6 @@ This is a multi-modal and multi-task model.
|
|
2441 |
|
2442 |
<details>
|
2443 |
<summary>The model was trained on the following tasks:</summary>
|
2444 |
-
|
2445 |
- Alien
|
2446 |
- Amidar
|
2447 |
- Assault
|
@@ -2599,6 +2598,7 @@ This is a multi-modal and multi-task model.
|
|
2599 |
- Humanoid Standup
|
2600 |
- Swimmer
|
2601 |
- Walker 2d
|
|
|
2602 |
</details>
|
2603 |
|
2604 |
## How to Get Started with the Model
|
@@ -2610,3 +2610,4 @@ from transformers import AutoModelForCausalLM
|
|
2610 |
|
2611 |
model = AutoModelForCausalLM.from_pretrained("jat-project/jat")
|
2612 |
```
|
|
|
|
171 |
type: atari
|
172 |
metrics:
|
173 |
- type: iqm_expert_normalized_total_reward
|
174 |
+
value: 0.14 [0.14, 0.15]
|
175 |
name: IQM expert normalized total reward
|
176 |
- type: iqm_human_normalized_total_reward
|
177 |
+
value: 0.38 [0.37, 0.38]
|
178 |
name: IQM human normalized total reward
|
179 |
- task:
|
180 |
type: reinforcement-learning
|
|
|
214 |
type: atari-alien
|
215 |
metrics:
|
216 |
- type: total_reward
|
217 |
+
value: 1474.90 +/- 588.75
|
218 |
name: Total reward
|
219 |
- type: expert_normalized_total_reward
|
220 |
+
value: 0.07 +/- 0.04
|
221 |
name: Expert normalized total reward
|
222 |
- type: human_normalized_total_reward
|
223 |
+
value: 0.18 +/- 0.09
|
224 |
name: Human normalized total reward
|
225 |
- task:
|
226 |
type: reinforcement-learning
|
|
|
230 |
type: atari-amidar
|
231 |
metrics:
|
232 |
- type: total_reward
|
233 |
+
value: 104.89 +/- 103.52
|
234 |
name: Total reward
|
235 |
- type: expert_normalized_total_reward
|
236 |
+
value: 0.05 +/- 0.05
|
237 |
name: Expert normalized total reward
|
238 |
- type: human_normalized_total_reward
|
239 |
+
value: 0.06 +/- 0.06
|
240 |
name: Human normalized total reward
|
241 |
- task:
|
242 |
type: reinforcement-learning
|
|
|
246 |
type: atari-assault
|
247 |
metrics:
|
248 |
- type: total_reward
|
249 |
+
value: 1650.07 +/- 820.99
|
250 |
name: Total reward
|
251 |
- type: expert_normalized_total_reward
|
252 |
+
value: 0.09 +/- 0.05
|
253 |
name: Expert normalized total reward
|
254 |
- type: human_normalized_total_reward
|
255 |
+
value: 2.75 +/- 1.58
|
256 |
name: Human normalized total reward
|
257 |
- task:
|
258 |
type: reinforcement-learning
|
|
|
262 |
type: atari-asterix
|
263 |
metrics:
|
264 |
- type: total_reward
|
265 |
+
value: 800.00 +/- 584.85
|
266 |
name: Total reward
|
267 |
- type: expert_normalized_total_reward
|
268 |
+
value: 0.17 +/- 0.17
|
269 |
name: Expert normalized total reward
|
270 |
- type: human_normalized_total_reward
|
271 |
+
value: 0.07 +/- 0.07
|
272 |
name: Human normalized total reward
|
273 |
- task:
|
274 |
type: reinforcement-learning
|
|
|
278 |
type: atari-asteroids
|
279 |
metrics:
|
280 |
- type: total_reward
|
281 |
+
value: 1385.30 +/- 507.53
|
282 |
name: Total reward
|
283 |
- type: expert_normalized_total_reward
|
284 |
value: 0.00 +/- 0.00
|
285 |
name: Expert normalized total reward
|
286 |
- type: human_normalized_total_reward
|
287 |
+
value: 0.01 +/- 0.01
|
288 |
name: Human normalized total reward
|
289 |
- task:
|
290 |
type: reinforcement-learning
|
|
|
294 |
type: atari-atlantis
|
295 |
metrics:
|
296 |
- type: total_reward
|
297 |
+
value: 66980.00 +/- 158449.73
|
298 |
name: Total reward
|
299 |
- type: expert_normalized_total_reward
|
300 |
+
value: 0.18 +/- 0.51
|
301 |
name: Expert normalized total reward
|
302 |
- type: human_normalized_total_reward
|
303 |
+
value: 3.35 +/- 9.79
|
304 |
name: Human normalized total reward
|
305 |
- task:
|
306 |
type: reinforcement-learning
|
|
|
310 |
type: atari-bankheist
|
311 |
metrics:
|
312 |
- type: total_reward
|
313 |
+
value: 948.30 +/- 199.86
|
314 |
name: Total reward
|
315 |
- type: expert_normalized_total_reward
|
316 |
+
value: 0.71 +/- 0.15
|
317 |
name: Expert normalized total reward
|
318 |
- type: human_normalized_total_reward
|
319 |
+
value: 1.26 +/- 0.27
|
320 |
name: Human normalized total reward
|
321 |
- task:
|
322 |
type: reinforcement-learning
|
|
|
326 |
type: atari-battlezone
|
327 |
metrics:
|
328 |
- type: total_reward
|
329 |
+
value: 17420.00 +/- 6071.54
|
330 |
name: Total reward
|
331 |
- type: expert_normalized_total_reward
|
332 |
+
value: 0.06 +/- 0.02
|
333 |
name: Expert normalized total reward
|
334 |
- type: human_normalized_total_reward
|
335 |
+
value: 0.47 +/- 0.16
|
336 |
name: Human normalized total reward
|
337 |
- task:
|
338 |
type: reinforcement-learning
|
|
|
342 |
type: atari-beamrider
|
343 |
metrics:
|
344 |
- type: total_reward
|
345 |
+
value: 797.32 +/- 328.31
|
346 |
name: Total reward
|
347 |
- type: expert_normalized_total_reward
|
348 |
value: 0.01 +/- 0.01
|
349 |
name: Expert normalized total reward
|
350 |
- type: human_normalized_total_reward
|
351 |
+
value: 0.03 +/- 0.02
|
352 |
name: Human normalized total reward
|
353 |
- task:
|
354 |
type: reinforcement-learning
|
|
|
358 |
type: atari-berzerk
|
359 |
metrics:
|
360 |
- type: total_reward
|
361 |
+
value: 687.30 +/- 331.91
|
362 |
name: Total reward
|
363 |
- type: expert_normalized_total_reward
|
364 |
+
value: 0.01 +/- 0.01
|
365 |
name: Expert normalized total reward
|
366 |
- type: human_normalized_total_reward
|
367 |
+
value: 0.22 +/- 0.13
|
368 |
name: Human normalized total reward
|
369 |
- task:
|
370 |
type: reinforcement-learning
|
|
|
374 |
type: atari-bowling
|
375 |
metrics:
|
376 |
- type: total_reward
|
377 |
+
value: 22.41 +/- 5.57
|
378 |
name: Total reward
|
379 |
- type: expert_normalized_total_reward
|
380 |
value: 1.00 +/- 0.00
|
381 |
name: Expert normalized total reward
|
382 |
- type: human_normalized_total_reward
|
383 |
+
value: -0.01 +/- 0.04
|
384 |
name: Human normalized total reward
|
385 |
- task:
|
386 |
type: reinforcement-learning
|
|
|
390 |
type: atari-boxing
|
391 |
metrics:
|
392 |
- type: total_reward
|
393 |
+
value: 90.10 +/- 23.05
|
394 |
name: Total reward
|
395 |
- type: expert_normalized_total_reward
|
396 |
+
value: 0.92 +/- 0.24
|
397 |
name: Expert normalized total reward
|
398 |
- type: human_normalized_total_reward
|
399 |
+
value: 7.50 +/- 1.92
|
400 |
name: Human normalized total reward
|
401 |
- task:
|
402 |
type: reinforcement-learning
|
|
|
406 |
type: atari-breakout
|
407 |
metrics:
|
408 |
- type: total_reward
|
409 |
+
value: 8.82 +/- 5.63
|
410 |
name: Total reward
|
411 |
- type: expert_normalized_total_reward
|
412 |
value: 0.01 +/- 0.01
|
413 |
name: Expert normalized total reward
|
414 |
- type: human_normalized_total_reward
|
415 |
+
value: 0.25 +/- 0.20
|
416 |
name: Human normalized total reward
|
417 |
- task:
|
418 |
type: reinforcement-learning
|
|
|
422 |
type: atari-centipede
|
423 |
metrics:
|
424 |
- type: total_reward
|
425 |
+
value: 5589.92 +/- 2567.26
|
426 |
name: Total reward
|
427 |
- type: expert_normalized_total_reward
|
428 |
+
value: 0.37 +/- 0.27
|
429 |
name: Expert normalized total reward
|
430 |
- type: human_normalized_total_reward
|
431 |
+
value: 0.35 +/- 0.26
|
432 |
name: Human normalized total reward
|
433 |
- task:
|
434 |
type: reinforcement-learning
|
|
|
438 |
type: atari-choppercommand
|
439 |
metrics:
|
440 |
- type: total_reward
|
441 |
+
value: 2417.00 +/- 1489.90
|
442 |
name: Total reward
|
443 |
- type: expert_normalized_total_reward
|
444 |
+
value: 0.02 +/- 0.02
|
445 |
name: Expert normalized total reward
|
446 |
- type: human_normalized_total_reward
|
447 |
+
value: 0.24 +/- 0.23
|
448 |
name: Human normalized total reward
|
449 |
- task:
|
450 |
type: reinforcement-learning
|
|
|
454 |
type: atari-crazyclimber
|
455 |
metrics:
|
456 |
- type: total_reward
|
457 |
+
value: 97639.00 +/- 26184.68
|
458 |
name: Total reward
|
459 |
- type: expert_normalized_total_reward
|
460 |
+
value: 0.52 +/- 0.16
|
461 |
name: Expert normalized total reward
|
462 |
- type: human_normalized_total_reward
|
463 |
+
value: 3.47 +/- 1.05
|
464 |
name: Human normalized total reward
|
465 |
- task:
|
466 |
type: reinforcement-learning
|
|
|
470 |
type: atari-defender
|
471 |
metrics:
|
472 |
- type: total_reward
|
473 |
+
value: 39323.50 +/- 15202.98
|
474 |
name: Total reward
|
475 |
- type: expert_normalized_total_reward
|
476 |
+
value: 0.10 +/- 0.04
|
477 |
name: Expert normalized total reward
|
478 |
- type: human_normalized_total_reward
|
479 |
+
value: 2.30 +/- 0.96
|
480 |
name: Human normalized total reward
|
481 |
- task:
|
482 |
type: reinforcement-learning
|
|
|
486 |
type: atari-demonattack
|
487 |
metrics:
|
488 |
- type: total_reward
|
489 |
+
value: 815.30 +/- 989.67
|
490 |
name: Total reward
|
491 |
- type: expert_normalized_total_reward
|
492 |
+
value: 0.01 +/- 0.01
|
493 |
name: Expert normalized total reward
|
494 |
- type: human_normalized_total_reward
|
495 |
+
value: 0.36 +/- 0.54
|
496 |
name: Human normalized total reward
|
497 |
- task:
|
498 |
type: reinforcement-learning
|
|
|
502 |
type: atari-doubledunk
|
503 |
metrics:
|
504 |
- type: total_reward
|
505 |
+
value: 14.42 +/- 9.97
|
506 |
name: Total reward
|
507 |
- type: expert_normalized_total_reward
|
508 |
+
value: 0.84 +/- 0.25
|
509 |
name: Expert normalized total reward
|
510 |
- type: human_normalized_total_reward
|
511 |
+
value: 0.94 +/- 0.28
|
512 |
name: Human normalized total reward
|
513 |
- task:
|
514 |
type: reinforcement-learning
|
|
|
518 |
type: atari-enduro
|
519 |
metrics:
|
520 |
- type: total_reward
|
521 |
+
value: 108.52 +/- 42.73
|
522 |
name: Total reward
|
523 |
- type: expert_normalized_total_reward
|
524 |
+
value: 0.05 +/- 0.02
|
525 |
name: Expert normalized total reward
|
526 |
- type: human_normalized_total_reward
|
527 |
+
value: 0.13 +/- 0.05
|
528 |
name: Human normalized total reward
|
529 |
- task:
|
530 |
type: reinforcement-learning
|
|
|
534 |
type: atari-fishingderby
|
535 |
metrics:
|
536 |
- type: total_reward
|
537 |
+
value: -30.35 +/- 24.37
|
538 |
name: Total reward
|
539 |
- type: expert_normalized_total_reward
|
540 |
+
value: 0.62 +/- 0.25
|
541 |
name: Expert normalized total reward
|
542 |
- type: human_normalized_total_reward
|
543 |
+
value: 0.47 +/- 0.19
|
544 |
name: Human normalized total reward
|
545 |
- task:
|
546 |
type: reinforcement-learning
|
|
|
550 |
type: atari-freeway
|
551 |
metrics:
|
552 |
- type: total_reward
|
553 |
+
value: 27.49 +/- 1.63
|
554 |
name: Total reward
|
555 |
- type: expert_normalized_total_reward
|
556 |
+
value: 0.81 +/- 0.05
|
557 |
name: Expert normalized total reward
|
558 |
- type: human_normalized_total_reward
|
559 |
+
value: 0.93 +/- 0.06
|
560 |
name: Human normalized total reward
|
561 |
- task:
|
562 |
type: reinforcement-learning
|
|
|
566 |
type: atari-frostbite
|
567 |
metrics:
|
568 |
- type: total_reward
|
569 |
+
value: 2769.60 +/- 1445.61
|
570 |
name: Total reward
|
571 |
- type: expert_normalized_total_reward
|
572 |
+
value: 0.21 +/- 0.11
|
573 |
name: Expert normalized total reward
|
574 |
- type: human_normalized_total_reward
|
575 |
+
value: 0.63 +/- 0.34
|
576 |
name: Human normalized total reward
|
577 |
- task:
|
578 |
type: reinforcement-learning
|
|
|
582 |
type: atari-gopher
|
583 |
metrics:
|
584 |
- type: total_reward
|
585 |
+
value: 5340.60 +/- 2547.07
|
586 |
name: Total reward
|
587 |
- type: expert_normalized_total_reward
|
588 |
+
value: 0.06 +/- 0.03
|
589 |
name: Expert normalized total reward
|
590 |
- type: human_normalized_total_reward
|
591 |
+
value: 2.36 +/- 1.18
|
592 |
name: Human normalized total reward
|
593 |
- task:
|
594 |
type: reinforcement-learning
|
|
|
598 |
type: atari-gravitar
|
599 |
metrics:
|
600 |
- type: total_reward
|
601 |
+
value: 1269.50 +/- 902.99
|
602 |
name: Total reward
|
603 |
- type: expert_normalized_total_reward
|
604 |
+
value: 0.29 +/- 0.24
|
605 |
name: Expert normalized total reward
|
606 |
- type: human_normalized_total_reward
|
607 |
+
value: 0.34 +/- 0.28
|
608 |
name: Human normalized total reward
|
609 |
- task:
|
610 |
type: reinforcement-learning
|
|
|
614 |
type: atari-hero
|
615 |
metrics:
|
616 |
- type: total_reward
|
617 |
+
value: 11709.65 +/- 3233.53
|
618 |
name: Total reward
|
619 |
- type: expert_normalized_total_reward
|
620 |
+
value: 0.24 +/- 0.07
|
621 |
name: Expert normalized total reward
|
622 |
- type: human_normalized_total_reward
|
623 |
+
value: 0.36 +/- 0.11
|
624 |
name: Human normalized total reward
|
625 |
- task:
|
626 |
type: reinforcement-learning
|
|
|
630 |
type: atari-icehockey
|
631 |
metrics:
|
632 |
- type: total_reward
|
633 |
+
value: 7.48 +/- 5.60
|
634 |
name: Total reward
|
635 |
- type: expert_normalized_total_reward
|
636 |
+
value: 0.51 +/- 0.15
|
637 |
name: Expert normalized total reward
|
638 |
- type: human_normalized_total_reward
|
639 |
+
value: 1.54 +/- 0.46
|
640 |
name: Human normalized total reward
|
641 |
- task:
|
642 |
type: reinforcement-learning
|
|
|
646 |
type: atari-jamesbond
|
647 |
metrics:
|
648 |
- type: total_reward
|
649 |
+
value: 327.50 +/- 123.16
|
650 |
name: Total reward
|
651 |
- type: expert_normalized_total_reward
|
652 |
value: 0.01 +/- 0.00
|
653 |
name: Expert normalized total reward
|
654 |
- type: human_normalized_total_reward
|
655 |
+
value: 1.09 +/- 0.45
|
656 |
name: Human normalized total reward
|
657 |
- task:
|
658 |
type: reinforcement-learning
|
|
|
662 |
type: atari-kangaroo
|
663 |
metrics:
|
664 |
- type: total_reward
|
665 |
+
value: 378.00 +/- 343.97
|
666 |
name: Total reward
|
667 |
- type: expert_normalized_total_reward
|
668 |
+
value: 0.62 +/- 0.66
|
669 |
name: Expert normalized total reward
|
670 |
- type: human_normalized_total_reward
|
671 |
+
value: 0.11 +/- 0.12
|
672 |
name: Human normalized total reward
|
673 |
- task:
|
674 |
type: reinforcement-learning
|
|
|
678 |
type: atari-krull
|
679 |
metrics:
|
680 |
- type: total_reward
|
681 |
+
value: 10720.50 +/- 1284.13
|
682 |
name: Total reward
|
683 |
- type: expert_normalized_total_reward
|
684 |
+
value: 0.93 +/- 0.13
|
685 |
name: Expert normalized total reward
|
686 |
- type: human_normalized_total_reward
|
687 |
+
value: 8.55 +/- 1.20
|
688 |
name: Human normalized total reward
|
689 |
- task:
|
690 |
type: reinforcement-learning
|
|
|
694 |
type: atari-kungfumaster
|
695 |
metrics:
|
696 |
- type: total_reward
|
697 |
+
value: 288.00 +/- 255.06
|
698 |
name: Total reward
|
699 |
- type: expert_normalized_total_reward
|
700 |
+
value: 0.00 +/- 0.01
|
701 |
name: Expert normalized total reward
|
702 |
- type: human_normalized_total_reward
|
703 |
+
value: 0.00 +/- 0.01
|
704 |
name: Human normalized total reward
|
705 |
- task:
|
706 |
type: reinforcement-learning
|
|
|
726 |
type: atari-mspacman
|
727 |
metrics:
|
728 |
- type: total_reward
|
729 |
+
value: 1573.10 +/- 483.96
|
730 |
name: Total reward
|
731 |
- type: expert_normalized_total_reward
|
732 |
+
value: 0.19 +/- 0.07
|
733 |
name: Expert normalized total reward
|
734 |
- type: human_normalized_total_reward
|
735 |
+
value: 0.19 +/- 0.07
|
736 |
name: Human normalized total reward
|
737 |
- task:
|
738 |
type: reinforcement-learning
|
|
|
742 |
type: atari-namethisgame
|
743 |
metrics:
|
744 |
- type: total_reward
|
745 |
+
value: 7523.30 +/- 2471.38
|
746 |
name: Total reward
|
747 |
- type: expert_normalized_total_reward
|
748 |
+
value: 0.25 +/- 0.12
|
749 |
name: Expert normalized total reward
|
750 |
- type: human_normalized_total_reward
|
751 |
+
value: 0.91 +/- 0.43
|
752 |
name: Human normalized total reward
|
753 |
- task:
|
754 |
type: reinforcement-learning
|
|
|
758 |
type: atari-phoenix
|
759 |
metrics:
|
760 |
- type: total_reward
|
761 |
+
value: 2197.90 +/- 1795.38
|
762 |
name: Total reward
|
763 |
- type: expert_normalized_total_reward
|
764 |
value: 0.00 +/- 0.00
|
765 |
name: Expert normalized total reward
|
766 |
- type: human_normalized_total_reward
|
767 |
+
value: 0.22 +/- 0.28
|
768 |
name: Human normalized total reward
|
769 |
- task:
|
770 |
type: reinforcement-learning
|
|
|
774 |
type: atari-pitfall
|
775 |
metrics:
|
776 |
- type: total_reward
|
777 |
+
value: -6.68 +/- 19.05
|
778 |
name: Total reward
|
779 |
- type: expert_normalized_total_reward
|
780 |
+
value: 0.98 +/- 0.08
|
781 |
name: Expert normalized total reward
|
782 |
- type: human_normalized_total_reward
|
783 |
+
value: 0.03 +/- 0.00
|
784 |
name: Human normalized total reward
|
785 |
- task:
|
786 |
type: reinforcement-learning
|
|
|
790 |
type: atari-pong
|
791 |
metrics:
|
792 |
- type: total_reward
|
793 |
+
value: 13.69 +/- 13.35
|
794 |
name: Total reward
|
795 |
- type: expert_normalized_total_reward
|
796 |
+
value: 0.82 +/- 0.32
|
797 |
name: Expert normalized total reward
|
798 |
- type: human_normalized_total_reward
|
799 |
+
value: 0.97 +/- 0.38
|
800 |
name: Human normalized total reward
|
801 |
- task:
|
802 |
type: reinforcement-learning
|
|
|
806 |
type: atari-privateeye
|
807 |
metrics:
|
808 |
- type: total_reward
|
809 |
+
value: 44.00 +/- 49.64
|
810 |
name: Total reward
|
811 |
- type: expert_normalized_total_reward
|
812 |
+
value: 0.25 +/- 0.66
|
813 |
name: Expert normalized total reward
|
814 |
- type: human_normalized_total_reward
|
815 |
value: 0.00 +/- 0.00
|
|
|
822 |
type: atari-qbert
|
823 |
metrics:
|
824 |
- type: total_reward
|
825 |
+
value: 1951.50 +/- 2577.24
|
826 |
name: Total reward
|
827 |
- type: expert_normalized_total_reward
|
828 |
+
value: 0.04 +/- 0.06
|
829 |
name: Expert normalized total reward
|
830 |
- type: human_normalized_total_reward
|
831 |
+
value: 0.13 +/- 0.19
|
832 |
name: Human normalized total reward
|
833 |
- task:
|
834 |
type: reinforcement-learning
|
|
|
838 |
type: atari-riverraid
|
839 |
metrics:
|
840 |
- type: total_reward
|
841 |
+
value: 3758.50 +/- 1536.66
|
842 |
name: Total reward
|
843 |
- type: expert_normalized_total_reward
|
844 |
+
value: 0.18 +/- 0.11
|
845 |
name: Expert normalized total reward
|
846 |
- type: human_normalized_total_reward
|
847 |
+
value: 0.15 +/- 0.10
|
848 |
name: Human normalized total reward
|
849 |
- task:
|
850 |
type: reinforcement-learning
|
|
|
854 |
type: atari-roadrunner
|
855 |
metrics:
|
856 |
- type: total_reward
|
857 |
+
value: 6407.00 +/- 4847.36
|
858 |
name: Total reward
|
859 |
- type: expert_normalized_total_reward
|
860 |
+
value: 0.08 +/- 0.06
|
861 |
name: Expert normalized total reward
|
862 |
- type: human_normalized_total_reward
|
863 |
+
value: 0.82 +/- 0.62
|
864 |
name: Human normalized total reward
|
865 |
- task:
|
866 |
type: reinforcement-learning
|
|
|
870 |
type: atari-robotank
|
871 |
metrics:
|
872 |
- type: total_reward
|
873 |
+
value: 11.34 +/- 5.52
|
874 |
name: Total reward
|
875 |
- type: expert_normalized_total_reward
|
876 |
+
value: 0.12 +/- 0.07
|
877 |
name: Expert normalized total reward
|
878 |
- type: human_normalized_total_reward
|
879 |
+
value: 0.94 +/- 0.57
|
880 |
name: Human normalized total reward
|
881 |
- task:
|
882 |
type: reinforcement-learning
|
|
|
886 |
type: atari-seaquest
|
887 |
metrics:
|
888 |
- type: total_reward
|
889 |
+
value: 804.00 +/- 403.33
|
890 |
name: Total reward
|
891 |
- type: expert_normalized_total_reward
|
892 |
+
value: 0.29 +/- 0.16
|
893 |
name: Expert normalized total reward
|
894 |
- type: human_normalized_total_reward
|
895 |
+
value: 0.02 +/- 0.01
|
896 |
name: Human normalized total reward
|
897 |
- task:
|
898 |
type: reinforcement-learning
|
|
|
902 |
type: atari-skiing
|
903 |
metrics:
|
904 |
- type: total_reward
|
905 |
+
value: -16231.54 +/- 6060.48
|
906 |
name: Total reward
|
907 |
- type: expert_normalized_total_reward
|
908 |
+
value: 0.14 +/- 0.95
|
909 |
name: Expert normalized total reward
|
910 |
- type: human_normalized_total_reward
|
911 |
+
value: 0.07 +/- 0.47
|
912 |
name: Human normalized total reward
|
913 |
- task:
|
914 |
type: reinforcement-learning
|
|
|
918 |
type: atari-solaris
|
919 |
metrics:
|
920 |
- type: total_reward
|
921 |
+
value: 1286.60 +/- 446.70
|
922 |
name: Total reward
|
923 |
- type: expert_normalized_total_reward
|
924 |
+
value: 0.43 +/- 3.81
|
925 |
name: Expert normalized total reward
|
926 |
- type: human_normalized_total_reward
|
927 |
+
value: 0.00 +/- 0.04
|
928 |
name: Human normalized total reward
|
929 |
- task:
|
930 |
type: reinforcement-learning
|
|
|
934 |
type: atari-spaceinvaders
|
935 |
metrics:
|
936 |
- type: total_reward
|
937 |
+
value: 325.45 +/- 163.36
|
938 |
name: Total reward
|
939 |
- type: expert_normalized_total_reward
|
940 |
value: 0.01 +/- 0.01
|
941 |
name: Expert normalized total reward
|
942 |
- type: human_normalized_total_reward
|
943 |
+
value: 0.12 +/- 0.11
|
944 |
name: Human normalized total reward
|
945 |
- task:
|
946 |
type: reinforcement-learning
|
|
|
950 |
type: atari-stargunner
|
951 |
metrics:
|
952 |
- type: total_reward
|
953 |
+
value: 4379.00 +/- 3027.22
|
954 |
name: Total reward
|
955 |
- type: expert_normalized_total_reward
|
956 |
+
value: 0.01 +/- 0.01
|
957 |
name: Expert normalized total reward
|
958 |
- type: human_normalized_total_reward
|
959 |
+
value: 0.39 +/- 0.32
|
960 |
name: Human normalized total reward
|
961 |
- task:
|
962 |
type: reinforcement-learning
|
|
|
966 |
type: atari-surround
|
967 |
metrics:
|
968 |
- type: total_reward
|
969 |
+
value: 2.67 +/- 4.74
|
970 |
name: Total reward
|
971 |
- type: expert_normalized_total_reward
|
972 |
+
value: 0.65 +/- 0.24
|
973 |
name: Expert normalized total reward
|
974 |
- type: human_normalized_total_reward
|
975 |
+
value: 0.77 +/- 0.29
|
976 |
name: Human normalized total reward
|
977 |
- task:
|
978 |
type: reinforcement-learning
|
|
|
982 |
type: atari-tennis
|
983 |
metrics:
|
984 |
- type: total_reward
|
985 |
+
value: -13.46 +/- 3.80
|
986 |
name: Total reward
|
987 |
- type: expert_normalized_total_reward
|
988 |
+
value: 0.30 +/- 0.11
|
989 |
name: Expert normalized total reward
|
990 |
- type: human_normalized_total_reward
|
991 |
+
value: 0.32 +/- 0.12
|
992 |
name: Human normalized total reward
|
993 |
- task:
|
994 |
type: reinforcement-learning
|
|
|
998 |
type: atari-timepilot
|
999 |
metrics:
|
1000 |
- type: total_reward
|
1001 |
+
value: 13028.00 +/- 5222.57
|
1002 |
name: Total reward
|
1003 |
- type: expert_normalized_total_reward
|
1004 |
+
value: 0.14 +/- 0.08
|
1005 |
name: Expert normalized total reward
|
1006 |
- type: human_normalized_total_reward
|
1007 |
+
value: 5.69 +/- 3.14
|
1008 |
name: Human normalized total reward
|
1009 |
- task:
|
1010 |
type: reinforcement-learning
|
|
|
1014 |
type: atari-tutankham
|
1015 |
metrics:
|
1016 |
- type: total_reward
|
1017 |
+
value: 85.66 +/- 61.77
|
1018 |
name: Total reward
|
1019 |
- type: expert_normalized_total_reward
|
1020 |
+
value: 0.27 +/- 0.22
|
1021 |
name: Expert normalized total reward
|
1022 |
- type: human_normalized_total_reward
|
1023 |
+
value: 0.48 +/- 0.40
|
1024 |
name: Human normalized total reward
|
1025 |
- task:
|
1026 |
type: reinforcement-learning
|
|
|
1030 |
type: atari-upndown
|
1031 |
metrics:
|
1032 |
- type: total_reward
|
1033 |
+
value: 17768.70 +/- 10321.95
|
1034 |
name: Total reward
|
1035 |
- type: expert_normalized_total_reward
|
1036 |
+
value: 0.04 +/- 0.02
|
1037 |
name: Expert normalized total reward
|
1038 |
- type: human_normalized_total_reward
|
1039 |
+
value: 1.54 +/- 0.92
|
1040 |
name: Human normalized total reward
|
1041 |
- task:
|
1042 |
type: reinforcement-learning
|
|
|
1062 |
type: atari-videopinball
|
1063 |
metrics:
|
1064 |
- type: total_reward
|
1065 |
+
value: 11917.43 +/- 8204.28
|
1066 |
name: Total reward
|
1067 |
- type: expert_normalized_total_reward
|
1068 |
+
value: 0.03 +/- 0.02
|
1069 |
name: Expert normalized total reward
|
1070 |
- type: human_normalized_total_reward
|
1071 |
+
value: 0.67 +/- 0.46
|
1072 |
name: Human normalized total reward
|
1073 |
- task:
|
1074 |
type: reinforcement-learning
|
|
|
1078 |
type: atari-wizardofwor
|
1079 |
metrics:
|
1080 |
- type: total_reward
|
1081 |
+
value: 2544.00 +/- 2902.42
|
1082 |
name: Total reward
|
1083 |
- type: expert_normalized_total_reward
|
1084 |
+
value: 0.04 +/- 0.06
|
1085 |
name: Expert normalized total reward
|
1086 |
- type: human_normalized_total_reward
|
1087 |
+
value: 0.47 +/- 0.69
|
1088 |
name: Human normalized total reward
|
1089 |
- task:
|
1090 |
type: reinforcement-learning
|
|
|
1094 |
type: atari-yarsrevenge
|
1095 |
metrics:
|
1096 |
- type: total_reward
|
1097 |
+
value: 12532.70 +/- 8062.85
|
1098 |
name: Total reward
|
1099 |
- type: expert_normalized_total_reward
|
1100 |
+
value: 0.04 +/- 0.03
|
1101 |
name: Expert normalized total reward
|
1102 |
- type: human_normalized_total_reward
|
1103 |
+
value: 0.18 +/- 0.16
|
1104 |
name: Human normalized total reward
|
1105 |
- task:
|
1106 |
type: reinforcement-learning
|
|
|
1110 |
type: atari-zaxxon
|
1111 |
metrics:
|
1112 |
- type: total_reward
|
1113 |
+
value: 6902.00 +/- 3206.09
|
1114 |
name: Total reward
|
1115 |
- type: expert_normalized_total_reward
|
1116 |
+
value: 0.09 +/- 0.04
|
1117 |
name: Expert normalized total reward
|
1118 |
- type: human_normalized_total_reward
|
1119 |
+
value: 0.75 +/- 0.35
|
1120 |
name: Human normalized total reward
|
1121 |
- task:
|
1122 |
type: reinforcement-learning
|
|
|
2441 |
|
2442 |
<details>
|
2443 |
<summary>The model was trained on the following tasks:</summary>
|
|
|
2444 |
- Alien
|
2445 |
- Amidar
|
2446 |
- Assault
|
|
|
2598 |
- Humanoid Standup
|
2599 |
- Swimmer
|
2600 |
- Walker 2d
|
2601 |
+
|
2602 |
</details>
|
2603 |
|
2604 |
## How to Get Started with the Model
|
|
|
2610 |
|
2611 |
model = AutoModelForCausalLM.from_pretrained("jat-project/jat")
|
2612 |
```
|
2613 |
+
|