🏆 claude-4-sonnet-20250514 |
40.82 |
47.28 |
10.59 |
60.53 |
44.86 |
🥈 gpt-4.1 |
36.04 |
48.07 |
11.06 |
50.98 |
34.05 |
🥉 CohereForAI/c4ai-command-a-03-2025 |
29.93 |
38.64 |
10.36 |
45.55 |
25.16 |
google/gemma-3-27b-it |
28.07 |
44.44 |
8.19 |
43.20 |
16.45 |
meta-llama/Llama-3.3-70B-Instruct |
27.05 |
37.41 |
9.80 |
44.77 |
16.24 |
meta-llama/Llama-3.1-70B-Instruct |
26.57 |
35.96 |
11.15 |
43.66 |
15.51 |
deepseek-ai/DeepSeek-R1-Distill-Llama-70B |
24.42 |
34.96 |
10.91 |
39.50 |
12.31 |
deepseek-ai/DeepSeek-R1-Distill-Qwen-32B |
24.08 |
29.27 |
9.61 |
39.70 |
17.74 |
google/gemma-2-27b-it |
23.80 |
24.53 |
9.01 |
40.93 |
20.72 |
CohereForAI/c4ai-command-r-plus-08-2024 |
23.21 |
33.72 |
10.85 |
32.45 |
15.82 |
google/gemma-2-9b-it |
22.80 |
28.10 |
8.54 |
37.88 |
16.68 |
google/gemma-3-12b-it |
22.25 |
33.55 |
8.36 |
35.95 |
11.16 |
Tower-Babel/Babel-83B-Chat |
21.25 |
22.89 |
7.52 |
37.27 |
17.31 |
meta-llama/Llama-3.1-8B-Instruct |
21.08 |
31.41 |
9.92 |
33.88 |
9.11 |
Tower-Babel/Babel-9B-Chat |
19.61 |
24.63 |
9.35 |
33.73 |
10.73 |
CohereForAI/c4ai-command-r7b-12-2024 |
18.06 |
23.53 |
5.84 |
28.34 |
14.53 |
CohereForAI/aya-23-35B |
17.75 |
22.36 |
7.06 |
29.56 |
12.00 |
CohereForAI/aya-23-8B |
17.30 |
21.94 |
6.55 |
26.86 |
13.86 |
google/gemma-2-2b-it |
17.16 |
17.69 |
6.73 |
30.30 |
13.92 |
google/gemma-3-4b-it |
17.10 |
14.65 |
6.98 |
32.84 |
13.95 |
microsoft/Phi-4-mini-instruct |
16.78 |
16.50 |
5.10 |
33.73 |
11.78 |
meta-llama/Llama-3.2-3B-Instruct |
16.49 |
18.08 |
6.56 |
29.33 |
12.00 |
meta-llama/Llama-3.2-1B-Instruct |
14.64 |
16.34 |
6.39 |
27.96 |
7.87 |
microsoft/Phi-3.5-mini-instruct |
14.19 |
17.96 |
6.03 |
25.23 |
7.52 |