diff --git "a/all_data_tasks/0/default.jsonl" "b/all_data_tasks/0/default.jsonl" deleted file mode 100644--- "a/all_data_tasks/0/default.jsonl" +++ /dev/null @@ -1,29 +0,0 @@ -{"index":13,"Rank":1,"Model":"GritLM-7B<\/a>","Model Size (Million Parameters)":7240,"Memory Usage (GB, fp32)":26.97,"Average":35.2,"ARCChallenge":26.68,"AlphaNLI":34.0,"HellaSwag":39.45,"PIQA":44.35,"Quail":11.69,"RARbCode":84.0,"RARbMath":82.35,"SIQA":7.23,"SpartQA":9.29,"TempReasonL1":7.15,"TempReasonL2Fact":58.38,"TempReasonL2Pure":11.22,"TempReasonL3Fact":44.29,"TempReasonL3Pure":14.15,"WinoGrande":53.74} -{"index":25,"Rank":2,"Model":"text-embedding-3-large-instruct<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":31.13,"ARCChallenge":21.22,"AlphaNLI":34.23,"HellaSwag":31.4,"PIQA":37.52,"Quail":13.6,"RARbCode":89.41,"RARbMath":87.73,"SIQA":4.99,"SpartQA":7.45,"TempReasonL1":2.07,"TempReasonL2Fact":39.77,"TempReasonL2Pure":11.04,"TempReasonL3Fact":37.04,"TempReasonL3Pure":15.51,"WinoGrande":33.92} -{"index":12,"Rank":3,"Model":"GritLM-7B-noinstruct<\/a>","Model Size (Million Parameters)":7240,"Memory Usage (GB, fp32)":26.97,"Average":30.57,"ARCChallenge":16.57,"AlphaNLI":29.56,"HellaSwag":36.03,"PIQA":35.8,"Quail":8.68,"RARbCode":83.14,"RARbMath":83.01,"SIQA":5.73,"SpartQA":1.56,"TempReasonL1":2.57,"TempReasonL2Fact":48.25,"TempReasonL2Pure":8.98,"TempReasonL3Fact":34.11,"TempReasonL3Pure":12.44,"WinoGrande":52.12} -{"index":26,"Rank":4,"Model":"text-embedding-3-large<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":29.95,"ARCChallenge":23.98,"AlphaNLI":37.27,"HellaSwag":34.12,"PIQA":41.96,"Quail":10.15,"RARbCode":89.64,"RARbMath":90.08,"SIQA":3.44,"SpartQA":7.51,"TempReasonL1":2.13,"TempReasonL2Fact":28.65,"TempReasonL2Pure":10.34,"TempReasonL3Fact":25.52,"TempReasonL3Pure":15.28,"WinoGrande":29.11} -{"index":16,"Rank":5,"Model":"e5-mistral-7b-instruct<\/a>","Model Size (Million Parameters)":7111,"Memory Usage (GB, fp32)":26.49,"Average":28.41,"ARCChallenge":17.81,"AlphaNLI":26.12,"HellaSwag":34.85,"PIQA":39.37,"Quail":7.01,"RARbCode":78.46,"RARbMath":72.16,"SIQA":5.42,"SpartQA":9.92,"TempReasonL1":3.31,"TempReasonL2Fact":36.9,"TempReasonL2Pure":9.18,"TempReasonL3Fact":30.18,"TempReasonL3Pure":14.31,"WinoGrande":41.21} -{"index":17,"Rank":6,"Model":"e5-mistral-7b-instruct-noinstruct<\/a>","Model Size (Million Parameters)":7111,"Memory Usage (GB, fp32)":26.49,"Average":28.04,"ARCChallenge":20.48,"AlphaNLI":18.88,"HellaSwag":32.25,"PIQA":32.8,"Quail":6.25,"RARbCode":79.84,"RARbMath":76.19,"SIQA":5.08,"SpartQA":10.87,"TempReasonL1":3.04,"TempReasonL2Fact":35.63,"TempReasonL2Pure":9.32,"TempReasonL3Fact":30.41,"TempReasonL3Pure":14.39,"WinoGrande":45.18} -{"index":10,"Rank":7,"Model":"Cohere-embed-english-v3.0-instruct<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":25.41,"ARCChallenge":10.1,"AlphaNLI":18.75,"HellaSwag":29.02,"PIQA":27.89,"Quail":7.77,"RARbCode":56.56,"RARbMath":72.05,"SIQA":5.03,"SpartQA":3.33,"TempReasonL1":1.43,"TempReasonL2Fact":40.46,"TempReasonL2Pure":2.39,"TempReasonL3Fact":33.87,"TempReasonL3Pure":7.52,"WinoGrande":65.02} -{"index":28,"Rank":8,"Model":"text-embedding-3-small<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":24.2,"ARCChallenge":14.63,"AlphaNLI":30.61,"HellaSwag":30.94,"PIQA":33.69,"Quail":6.11,"RARbCode":72.03,"RARbMath":71.07,"SIQA":3.03,"SpartQA":6.63,"TempReasonL1":2.35,"TempReasonL2Fact":25.68,"TempReasonL2Pure":2.76,"TempReasonL3Fact":22.09,"TempReasonL3Pure":9.79,"WinoGrande":31.53} -{"index":11,"Rank":9,"Model":"Cohere-embed-english-v3.0<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":23.65,"ARCChallenge":9.89,"AlphaNLI":15.1,"HellaSwag":26.35,"PIQA":28.49,"Quail":4.1,"RARbCode":57.19,"RARbMath":72.26,"SIQA":4.26,"SpartQA":3.75,"TempReasonL1":1.5,"TempReasonL2Fact":35.91,"TempReasonL2Pure":1.89,"TempReasonL3Fact":27.51,"TempReasonL3Pure":8.53,"WinoGrande":58.01} -{"index":24,"Rank":10,"Model":"text-embedding-ada-002<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":22.57,"ARCChallenge":13.3,"AlphaNLI":25.65,"HellaSwag":29.29,"PIQA":31.02,"Quail":5.83,"RARbCode":83.39,"RARbMath":73.21,"SIQA":3.14,"SpartQA":4.23,"TempReasonL1":1.68,"TempReasonL2Fact":19.93,"TempReasonL2Pure":2.6,"TempReasonL3Fact":18.02,"TempReasonL3Pure":7.58,"WinoGrande":19.65} -{"index":27,"Rank":11,"Model":"text-embedding-3-small-instruct<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":22.09,"ARCChallenge":13.76,"AlphaNLI":21.14,"HellaSwag":27.2,"PIQA":29.59,"Quail":6.64,"RARbCode":72.14,"RARbMath":64.31,"SIQA":2.98,"SpartQA":3.58,"TempReasonL1":2.29,"TempReasonL2Fact":26.34,"TempReasonL2Pure":3.17,"TempReasonL3Fact":22.72,"TempReasonL3Pure":9.98,"WinoGrande":25.49} -{"index":7,"Rank":12,"Model":"bge-m3<\/a>","Model Size (Million Parameters)":2270,"Memory Usage (GB, fp32)":8.46,"Average":21.48,"ARCChallenge":9.02,"AlphaNLI":24.73,"HellaSwag":25.67,"PIQA":22.93,"Quail":7.51,"RARbCode":38.8,"RARbMath":69.19,"SIQA":4.89,"SpartQA":7.49,"TempReasonL1":0.99,"TempReasonL2Fact":33.23,"TempReasonL2Pure":0.68,"TempReasonL3Fact":30.05,"TempReasonL3Pure":5.28,"WinoGrande":41.72} -{"index":6,"Rank":13,"Model":"bge-m3-instruct<\/a>","Model Size (Million Parameters)":2270,"Memory Usage (GB, fp32)":8.46,"Average":20.83,"ARCChallenge":9.03,"AlphaNLI":24.69,"HellaSwag":25.55,"PIQA":19.03,"Quail":7.08,"RARbCode":39.58,"RARbMath":64.51,"SIQA":4.77,"SpartQA":7.0,"TempReasonL1":0.8,"TempReasonL2Fact":34.99,"TempReasonL2Pure":0.62,"TempReasonL3Fact":32.47,"TempReasonL3Pure":7.01,"WinoGrande":35.33} -{"index":20,"Rank":14,"Model":"all-MiniLM-L6-v2<\/a>","Model Size (Million Parameters)":23,"Memory Usage (GB, fp32)":0.09,"Average":19.61,"ARCChallenge":9.48,"AlphaNLI":28.19,"HellaSwag":24.21,"PIQA":25.28,"Quail":3.92,"RARbCode":44.27,"RARbMath":68.19,"SIQA":1.56,"SpartQA":1.65,"TempReasonL1":1.53,"TempReasonL2Fact":17.65,"TempReasonL2Pure":0.46,"TempReasonL3Fact":14.16,"TempReasonL3Pure":6.33,"WinoGrande":47.33} -{"index":23,"Rank":15,"Model":"text-embedding-ada-002-instruct<\/a>","Model Size (Million Parameters)":"","Memory Usage (GB, fp32)":"","Average":19.56,"ARCChallenge":11.85,"AlphaNLI":10.62,"HellaSwag":24.8,"PIQA":23.87,"Quail":5.79,"RARbCode":82.36,"RARbMath":67.26,"SIQA":2.64,"SpartQA":4.75,"TempReasonL1":1.44,"TempReasonL2Fact":19.38,"TempReasonL2Pure":2.43,"TempReasonL3Fact":17.58,"TempReasonL3Pure":7.31,"WinoGrande":11.36} -{"index":1,"Rank":16,"Model":"dragon-plus<\/a>","Model Size (Million Parameters)":438,"Memory Usage (GB, fp32)":1.63,"Average":19.1,"ARCChallenge":8.91,"AlphaNLI":32.1,"HellaSwag":27.69,"PIQA":28.01,"Quail":4.09,"RARbCode":17.58,"RARbMath":45.09,"SIQA":2.0,"SpartQA":10.34,"TempReasonL1":1.82,"TempReasonL2Fact":17.45,"TempReasonL2Pure":0.55,"TempReasonL3Fact":15.71,"TempReasonL3Pure":7.97,"WinoGrande":67.18} -{"index":22,"Rank":17,"Model":"all-mpnet-base-v2<\/a>","Model Size (Million Parameters)":110,"Memory Usage (GB, fp32)":0.41,"Average":18.03,"ARCChallenge":11.8,"AlphaNLI":22.41,"HellaSwag":26.27,"PIQA":29.03,"Quail":3.41,"RARbCode":53.21,"RARbMath":71.85,"SIQA":2.38,"SpartQA":0.22,"TempReasonL1":1.77,"TempReasonL2Fact":11.2,"TempReasonL2Pure":1.15,"TempReasonL3Fact":9.42,"TempReasonL3Pure":5.59,"WinoGrande":20.8} -{"index":5,"Rank":18,"Model":"bge-large-en-v1.5<\/a>","Model Size (Million Parameters)":1340,"Memory Usage (GB, fp32)":4.99,"Average":17.7,"ARCChallenge":9.99,"AlphaNLI":13.13,"HellaSwag":28.5,"PIQA":27.99,"Quail":1.83,"RARbCode":48.12,"RARbMath":57.36,"SIQA":1.04,"SpartQA":2.99,"TempReasonL1":1.46,"TempReasonL2Fact":24.25,"TempReasonL2Pure":2.35,"TempReasonL3Fact":20.64,"TempReasonL3Pure":6.67,"WinoGrande":19.18} -{"index":18,"Rank":19,"Model":"all-MiniLM-L12-v2<\/a>","Model Size (Million Parameters)":33,"Memory Usage (GB, fp32)":0.12,"Average":17.35,"ARCChallenge":10.23,"AlphaNLI":25.35,"HellaSwag":24.08,"PIQA":26.44,"Quail":3.08,"RARbCode":42.44,"RARbMath":66.36,"SIQA":2.09,"SpartQA":2.67,"TempReasonL1":1.66,"TempReasonL2Fact":10.31,"TempReasonL2Pure":0.63,"TempReasonL3Fact":11.11,"TempReasonL3Pure":6.63,"WinoGrande":27.2} -{"index":0,"Rank":20,"Model":"dragon-plus-instruct<\/a>","Model Size (Million Parameters)":438,"Memory Usage (GB, fp32)":1.63,"Average":16.73,"ARCChallenge":8.24,"AlphaNLI":25.18,"HellaSwag":24.06,"PIQA":26.35,"Quail":4.2,"RARbCode":12.84,"RARbMath":36.15,"SIQA":1.75,"SpartQA":10.82,"TempReasonL1":1.54,"TempReasonL2Fact":16.11,"TempReasonL2Pure":0.57,"TempReasonL3Fact":14.81,"TempReasonL3Pure":7.46,"WinoGrande":60.84} -{"index":19,"Rank":21,"Model":"all-MiniLM-L6-v2-instruct<\/a>","Model Size (Million Parameters)":23,"Memory Usage (GB, fp32)":0.09,"Average":15.95,"ARCChallenge":9.4,"AlphaNLI":15.09,"HellaSwag":20.51,"PIQA":24.68,"Quail":3.46,"RARbCode":42.47,"RARbMath":62.39,"SIQA":1.53,"SpartQA":0.57,"TempReasonL1":1.05,"TempReasonL2Fact":16.57,"TempReasonL2Pure":0.49,"TempReasonL3Fact":14.01,"TempReasonL3Pure":6.27,"WinoGrande":20.73} -{"index":15,"Rank":22,"Model":"contriever<\/a>","Model Size (Million Parameters)":438,"Memory Usage (GB, fp32)":1.63,"Average":15.86,"ARCChallenge":8.62,"AlphaNLI":31.77,"HellaSwag":14.42,"PIQA":24.64,"Quail":4.97,"RARbCode":9.28,"RARbMath":30.76,"SIQA":1.27,"SpartQA":10.94,"TempReasonL1":1.93,"TempReasonL2Fact":22.68,"TempReasonL2Pure":1.12,"TempReasonL3Fact":20.62,"TempReasonL3Pure":7.8,"WinoGrande":47.15} -{"index":3,"Rank":23,"Model":"bge-base-en-v1.5<\/a>","Model Size (Million Parameters)":438,"Memory Usage (GB, fp32)":1.63,"Average":14.93,"ARCChallenge":9.66,"AlphaNLI":10.99,"HellaSwag":26.64,"PIQA":25.69,"Quail":1.42,"RARbCode":46.47,"RARbMath":46.86,"SIQA":0.94,"SpartQA":3.37,"TempReasonL1":1.07,"TempReasonL2Fact":17.23,"TempReasonL2Pure":1.29,"TempReasonL3Fact":13.36,"TempReasonL3Pure":5.2,"WinoGrande":13.76} -{"index":4,"Rank":24,"Model":"bge-large-en-v1.5-instruct<\/a>","Model Size (Million Parameters)":1340,"Memory Usage (GB, fp32)":4.99,"Average":14.55,"ARCChallenge":8.86,"AlphaNLI":0.86,"HellaSwag":26.24,"PIQA":23.26,"Quail":2.72,"RARbCode":45.25,"RARbMath":49.82,"SIQA":0.59,"SpartQA":2.34,"TempReasonL1":1.17,"TempReasonL2Fact":21.19,"TempReasonL2Pure":2.1,"TempReasonL3Fact":17.59,"TempReasonL3Pure":5.99,"WinoGrande":10.31} -{"index":9,"Rank":25,"Model":"bge-small-en-v1.5<\/a>","Model Size (Million Parameters)":24,"Memory Usage (GB, fp32)":0.09,"Average":14.15,"ARCChallenge":8.95,"AlphaNLI":11.64,"HellaSwag":25.44,"PIQA":23.92,"Quail":1.75,"RARbCode":42.36,"RARbMath":44.98,"SIQA":0.77,"SpartQA":3.55,"TempReasonL1":1.41,"TempReasonL2Fact":17.56,"TempReasonL2Pure":1.05,"TempReasonL3Fact":13.88,"TempReasonL3Pure":4.76,"WinoGrande":10.28} -{"index":21,"Rank":26,"Model":"all-mpnet-base-v2-instruct<\/a>","Model Size (Million Parameters)":110,"Memory Usage (GB, fp32)":0.41,"Average":13.84,"ARCChallenge":10.35,"AlphaNLI":1.96,"HellaSwag":13.01,"PIQA":27.18,"Quail":3.02,"RARbCode":48.95,"RARbMath":69.21,"SIQA":1.29,"SpartQA":1.01,"TempReasonL1":1.52,"TempReasonL2Fact":7.28,"TempReasonL2Pure":1.03,"TempReasonL3Fact":7.03,"TempReasonL3Pure":5.16,"WinoGrande":9.66} -{"index":2,"Rank":27,"Model":"bge-base-en-v1.5-instruct<\/a>","Model Size (Million Parameters)":438,"Memory Usage (GB, fp32)":1.63,"Average":13.52,"ARCChallenge":8.85,"AlphaNLI":4.13,"HellaSwag":24.03,"PIQA":23.03,"Quail":1.25,"RARbCode":46.32,"RARbMath":45.62,"SIQA":0.24,"SpartQA":2.67,"TempReasonL1":0.8,"TempReasonL2Fact":16.56,"TempReasonL2Pure":1.33,"TempReasonL3Fact":12.68,"TempReasonL3Pure":5.08,"WinoGrande":10.27} -{"index":8,"Rank":28,"Model":"bge-small-en-v1.5-instruct<\/a>","Model Size (Million Parameters)":24,"Memory Usage (GB, fp32)":0.09,"Average":12.6,"ARCChallenge":7.72,"AlphaNLI":1.26,"HellaSwag":23.41,"PIQA":20.79,"Quail":2.01,"RARbCode":41.52,"RARbMath":46.5,"SIQA":0.98,"SpartQA":2.86,"TempReasonL1":1.27,"TempReasonL2Fact":16.72,"TempReasonL2Pure":1.1,"TempReasonL3Fact":12.81,"TempReasonL3Pure":4.63,"WinoGrande":5.35} -{"index":14,"Rank":29,"Model":"contriever-instruct<\/a>","Model Size (Million Parameters)":438,"Memory Usage (GB, fp32)":1.63,"Average":"","ARCChallenge":7.63,"AlphaNLI":27.09,"HellaSwag":"","PIQA":21.73,"Quail":4.92,"RARbCode":7.12,"RARbMath":21.83,"SIQA":0.88,"SpartQA":10.56,"TempReasonL1":1.8,"TempReasonL2Fact":22.03,"TempReasonL2Pure":0.94,"TempReasonL3Fact":20.82,"TempReasonL3Pure":7.15,"WinoGrande":26.3}