File size: 2,799 Bytes
a415f27
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
{"model": "gpt-4", "Turn 1": 8.95625, "Turn 2": 9.025, "Overall": 8.990625, "coding": 8.55, "extraction": 9.375, "humanities": 9.95, "math": 6.8, "reasoning": 9.0, "roleplay": 8.9, "stem": 9.7, "writing": 9.65}
{"model": "gpt-3.5-turbo", "Turn 1": 8.075, "Turn 2": 7.8125, "Overall": 7.94375, "coding": 6.9, "extraction": 8.85, "humanities": 9.55, "math": 6.3, "reasoning": 5.65, "roleplay": 8.4, "stem": 8.7, "writing": 9.2}
{"model": "Llama-2-70b-hf", "Turn 1": 7.60625, "Turn 2": 6.6125, "Overall": 7.109375, "coding": 4.15, "extraction": 7.7, "humanities": 9.75, "math": 3.6, "reasoning": 6.1, "roleplay": 7.325, "stem": 8.75, "writing": 9.5}
{"model": "Mixtral-8x7B-v0.1", "Turn 1": 7.69375, "Turn 2": 6.1875, "Overall": 6.940625, "coding": 5.3, "extraction": 7.05, "humanities": 9.2, "math": 4.85, "reasoning": 5.3, "roleplay": 7.4, "stem": 8.225, "writing": 8.2}
{"model": "Mistral-7b-v0.1", "Turn 1": 7.4875, "Turn 2": 5.8625, "Overall": 6.675, "coding": 4.6, "extraction": 7.75, "humanities": 9.075, "math": 3.4, "reasoning": 4.9, "roleplay": 7.65, "stem": 8.275, "writing": 7.75}
{"model": "Yi-34B", "Turn 1": 7.19375, "Turn 2": 6.15625, "Overall": 6.675, "coding": 3.85, "extraction": 6.8, "humanities": 8.475, "math": 4.8, "reasoning": 6.0, "roleplay": 7.75, "stem": 7.825, "writing": 7.9}
{"model": "gemma-7b", "Turn 1": 6.96875, "Turn 2": 5.0375, "Overall": 6.003125, "coding": 3.95, "extraction": 6.25, "humanities": 8.825, "math": 4.35, "reasoning": 4.5, "roleplay": 6.25, "stem": 7.25, "writing": 6.65}
{"model": "phi-2", "Turn 1": 7.0375, "Turn 2": 4.6625, "Overall": 5.85, "coding": 4.25, "extraction": 4.45, "humanities": 8.85, "math": 3.8, "reasoning": 4.55, "roleplay": 7.2, "stem": 7.0, "writing": 6.7}
{"model": "Llama-2-13b-hf", "Turn 1": 6.26875, "Turn 2": 4.4125, "Overall": 5.340625, "coding": 2.8, "extraction": 4.7, "humanities": 8.3, "math": 2.85, "reasoning": 2.9, "roleplay": 6.625, "stem": 7.025, "writing": 7.525}
{"model": "Yi-6B", "Turn 1": 5.95625, "Turn 2": 3.9875, "Overall": 4.971875, "coding": 2.3, "extraction": 2.95, "humanities": 8.775, "math": 2.5, "reasoning": 3.5, "roleplay": 6.95, "stem": 7.7, "writing": 5.1}
{"model": "Llama-2-7b-hf", "Turn 1": 5.75, "Turn 2": 3.9125, "Overall": 4.83125, "coding": 1.65, "extraction": 3.4, "humanities": 8.075, "math": 1.6, "reasoning": 3.45, "roleplay": 7.475, "stem": 6.8, "writing": 6.2}
{"model": "gemma-2b", "Turn 1": 5.08125, "Turn 2": 2.8625, "Overall": 3.971875, "coding": 1.8, "extraction": 3.1, "humanities": 5.65, "math": 3.3, "reasoning": 2.55, "roleplay": 5.7, "stem": 5.725, "writing": 3.95}
{"model": "olmo", "Turn 1": 3.95, "Turn 2": 2.8625, "Overall": 3.40625, "coding": 1.65, "extraction": 2.45, "humanities": 4.9, "math": 1.25, "reasoning": 2.45, "roleplay": 5.3, "stem": 5.3, "writing": 3.95}