nehcgs commited on
Commit
2af7ff5
1 Parent(s): aa4d8e7

Upload README.md

Browse files
Files changed (1) hide show
  1. README.md +29 -31
README.md CHANGED
@@ -84,6 +84,17 @@ We evaluate Katanemo Arch-Function series on the [Berkeley Function-Calling Lead
84
  <td>63.41%</td>
85
  <td>82.93%</td>
86
  </tr>
 
 
 
 
 
 
 
 
 
 
 
87
  <tr style="text-align: center; vertical-align: middle;">
88
  <td>6</td>
89
  <td>o1-preview-2024-09-12 (Prompt)</td>
@@ -95,17 +106,6 @@ We evaluate Katanemo Arch-Function series on the [Berkeley Function-Calling Lead
95
  <td>73.17%</td>
96
  <td>74.60%</td>
97
  </tr>
98
- <tr style="text-align: center; vertical-align: middle; font-weight: bold;">
99
- <td> </td>
100
- <td>Arch-Function-7B</td>
101
- <td>58.44%</td>
102
- <td>85.58%</td>
103
- <td>88.14%</td>
104
- <td>69.08%</td>
105
- <td>20.50%</td>
106
- <td>92.68%</td>
107
- <td>74.05%</td>
108
- </tr>
109
  <tr style="text-align: center; vertical-align: middle; ">
110
  <td>9</td>
111
  <td>Gemini-1.5-Flash-002 (Prompt)</td>
@@ -117,6 +117,17 @@ We evaluate Katanemo Arch-Function series on the [Berkeley Function-Calling Lead
117
  <td>85.37%</td>
118
  <td>78.54%</td>
119
  </tr>
 
 
 
 
 
 
 
 
 
 
 
120
  <tr style="text-align: center; vertical-align: middle; ">
121
  <td>12</td>
122
  <td>Claude-3.5-Sonnet-20240620 (FC)</td>
@@ -139,30 +150,17 @@ We evaluate Katanemo Arch-Function series on the [Berkeley Function-Calling Lead
139
  <td>75.61%</td>
140
  <td>49.44%</td>
141
  </tr>
142
- <tr style="text-align: center; vertical-align: middle; font-weight: bold;">
143
- <td> </td>
144
- <td>Arch-Function-3B</td>
145
- <td>56.57%</td>
146
- <td>83.62%</td>
147
- <td>85.36%</td>
148
- <td>66.90%</td>
149
- <td>19.50%</td>
150
- <td>97.56%</td>
151
- <td>70.99%</td>
152
- </tr>
153
- </tr>
154
  <tr style="text-align: center; vertical-align: middle; font-weight: bold;">
155
  <td> </td>
156
  <td>Arch-Function-1.5B</td>
157
- <td>54.52%</td>
158
- <td>80.31%</td>
159
- <td>82.04%</td>
160
- <td>66.19%</td>
161
- <td>17.25%</td>
162
- <td>97.56%</td>
163
- <td>69.95%</td>
164
  </tr>
165
-
166
  <tr style="text-align: center; vertical-align: middle; ">
167
  <td>21</td>
168
  <td>Llama-3.1-70B-Instruct (Prompt)</td>
 
84
  <td>63.41%</td>
85
  <td>82.93%</td>
86
  </tr>
87
+ <tr style="text-align: center; vertical-align: middle; font-weight: bold;">
88
+ <td> </td>
89
+ <td>Arch-Function-7B</td>
90
+ <td>59.62%</td>
91
+ <td>86.83%</td>
92
+ <td>88.07%</td>
93
+ <td>71.57%</td>
94
+ <td>21.00%</td>
95
+ <td>95.12%</td>
96
+ <td>73.63%</td>
97
+ </tr>
98
  <tr style="text-align: center; vertical-align: middle;">
99
  <td>6</td>
100
  <td>o1-preview-2024-09-12 (Prompt)</td>
 
106
  <td>73.17%</td>
107
  <td>74.60%</td>
108
  </tr>
 
 
 
 
 
 
 
 
 
 
 
109
  <tr style="text-align: center; vertical-align: middle; ">
110
  <td>9</td>
111
  <td>Gemini-1.5-Flash-002 (Prompt)</td>
 
117
  <td>85.37%</td>
118
  <td>78.54%</td>
119
  </tr>
120
+ <tr style="text-align: center; vertical-align: middle; font-weight: bold;">
121
+ <td> </td>
122
+ <td>Arch-Function-3B</td>
123
+ <td>57.69%</td>
124
+ <td>85.19%</td>
125
+ <td>86.18%</td>
126
+ <td>71.21%</td>
127
+ <td>17.50%</td>
128
+ <td>90.24%</td>
129
+ <td>72.88%</td>
130
+ </tr>
131
  <tr style="text-align: center; vertical-align: middle; ">
132
  <td>12</td>
133
  <td>Claude-3.5-Sonnet-20240620 (FC)</td>
 
150
  <td>75.61%</td>
151
  <td>49.44%</td>
152
  </tr>
 
 
 
 
 
 
 
 
 
 
 
 
153
  <tr style="text-align: center; vertical-align: middle; font-weight: bold;">
154
  <td> </td>
155
  <td>Arch-Function-1.5B</td>
156
+ <td>56.20%</td>
157
+ <td>84.40%</td>
158
+ <td>83.96%</td>
159
+ <td>69.36%</td>
160
+ <td>15.88%</td>
161
+ <td>87.80%</td>
162
+ <td>74.39%</td>
163
  </tr>
 
164
  <tr style="text-align: center; vertical-align: middle; ">
165
  <td>21</td>
166
  <td>Llama-3.1-70B-Instruct (Prompt)</td>