|
<!DOCTYPE html> |
|
<html> |
|
<head> |
|
<meta charset="utf-8" /> |
|
<meta name="viewport" content="width=device-width" /> |
|
<title>ARCH: Audio Representation benCHmark</title> |
|
<link href='http://fonts.googleapis.com/css?family=Roboto' rel='stylesheet' type='text/css'> |
|
<link rel="stylesheet" href="style.css" /> |
|
|
|
<style type="text/css"> |
|
.center_img { |
|
display: block; |
|
margin-left: auto; |
|
margin-right: auto; |
|
} |
|
.center_table { |
|
margin-left: auto; |
|
margin-right: auto; |
|
} |
|
.mono_text { |
|
font-family:'Lucida Console', monospace; |
|
} |
|
.width500 { |
|
width: 500px; |
|
} |
|
.tg { |
|
border-collapse:collapse; |
|
border-spacing:0; |
|
text-align:center; |
|
vertical-align:center; |
|
} |
|
.tg td{ |
|
border-color:black; |
|
border-style:solid; |
|
border-width:1px; |
|
overflow:hidden; |
|
padding:10px 5px; |
|
word-break:normal; |
|
text-align:center; |
|
vertical-align:center; |
|
} |
|
.tg th{ |
|
border-color:black; |
|
border-style:solid; |
|
border-width:1px; |
|
font-weight:normal; |
|
overflow:hidden; |
|
padding:10px 5px; |
|
word-break:normal; |
|
text-align:center; |
|
vertical-align:center; |
|
} |
|
.tg .tg-c3ow{border-color:inherit;text-align:center;vertical-align:center} |
|
.tg .tg-7btt{border-color:inherit;font-weight:bold;text-align:center;vertical-align:center} |
|
.tg .tg-f0bj{background-color:#DAE8FC;border-color:inherit;font-weight:bold;text-align:center;vertical-align:center} |
|
</style> |
|
</head> |
|
<body> |
|
|
|
<img src="arch_logo.png" class="center_img width500"> |
|
|
|
<br><br> |
|
|
|
<table class="tg center_table" id="arch_res"> |
|
<thead> |
|
<tr> |
|
<th class="tg-c3ow" rowspan="2">Model</th> |
|
<th class="tg-c3ow" rowspan="2">Size</th> |
|
<th class="tg-c3ow" colspan="4">Sound</th> |
|
<th class="tg-c3ow" colspan="4">Music</th> |
|
<th class="tg-c3ow" colspan="4">Speech</th> |
|
</tr> |
|
<tr> |
|
<th class="tg-c3ow">ESC-50</th> |
|
<th class="tg-c3ow">US8K</th> |
|
<th class="tg-c3ow">FSD50K</th> |
|
<th class="tg-c3ow">VIVAE</th> |
|
<th class="tg-c3ow">FMA</th> |
|
<th class="tg-c3ow">MTT</th> |
|
<th class="tg-c3ow">IRMAS</th> |
|
<th class="tg-c3ow">MS-DB</th> |
|
<th class="tg-c3ow">RAVDESS</th> |
|
<th class="tg-c3ow">A-MNIST</th> |
|
<th class="tg-c3ow">SLURP</th> |
|
<th class="tg-c3ow">EMOVO</th> |
|
</tr> |
|
</thead> |
|
<tbody> |
|
<tr> |
|
<td class="tg-c3ow"> |
|
<a class="mono_text" href="https://huggingface.co/facebook/wav2vec2-base" target="_blank">facebook/wav2vec2-base</a> |
|
</td> |
|
<td class="tg-c3ow">S</td> |
|
<td class="tg-c3ow">45.73</td> |
|
<td class="tg-c3ow">55.48</td> |
|
<td class="tg-c3ow">19.39</td> |
|
<td class="tg-c3ow">31.47</td> |
|
<td class="tg-c3ow">50.54</td> |
|
<td class="tg-c3ow">37.56</td> |
|
<td class="tg-c3ow">35.14</td> |
|
<td class="tg-c3ow">66.06</td> |
|
<td class="tg-c3ow">55.32</td> |
|
<td class="tg-c3ow">86.38</td> |
|
<td class="tg-c3ow">14.37</td> |
|
<td class="tg-c3ow">31.80</td> |
|
</tr> |
|
<tr> |
|
<td class="tg-c3ow"> |
|
<a class="mono_text" href="https://huggingface.co/microsoft/wavlm-base" target="_blank">microsoft/wavlm-base</a> |
|
</td> |
|
<td class="tg-c3ow">S</td> |
|
<td class="tg-c3ow">49.88</td> |
|
<td class="tg-c3ow">61.84</td> |
|
<td class="tg-c3ow">17.63</td> |
|
<td class="tg-c3ow">36.31</td> |
|
<td class="tg-c3ow">48.71</td> |
|
<td class="tg-c3ow">34.93</td> |
|
<td class="tg-c3ow">32.62</td> |
|
<td class="tg-c3ow">54.18</td> |
|
<td class="tg-7btt">67.94</td> |
|
<td class="tg-c3ow">99.50</td> |
|
<td class="tg-c3ow">30.98</td> |
|
<td class="tg-7btt">43.08</td> |
|
</tr> |
|
<tr> |
|
<td class="tg-c3ow"> |
|
<a class="mono_text" href="https://huggingface.co/microsoft/wavlm-base-plus" target="_blank">microsoft/wavlm-base-plus</a> |
|
</td> |
|
<td class="tg-c3ow">S</td> |
|
<td class="tg-c3ow">58.73</td> |
|
<td class="tg-c3ow">64.07</td> |
|
<td class="tg-c3ow">21.57</td> |
|
<td class="tg-c3ow">36.17</td> |
|
<td class="tg-c3ow">56.17</td> |
|
<td class="tg-c3ow">38.24</td> |
|
<td class="tg-c3ow">35.76</td> |
|
<td class="tg-c3ow">57.51</td> |
|
<td class="tg-c3ow">52.20</td> |
|
<td class="tg-7btt">99.63</td> |
|
<td class="tg-c3ow">28.06</td> |
|
<td class="tg-c3ow">36.73</td> |
|
</tr> |
|
<tr> |
|
<td class="tg-c3ow"> |
|
<a class="mono_text" href="https://huggingface.co/facebook/hubert-base-ls960" target="_blank">facebook/hubert-base-ls960</a> |
|
</td> |
|
<td class="tg-c3ow">S</td> |
|
<td class="tg-7btt">58.90</td> |
|
<td class="tg-7btt">67.28</td> |
|
<td class="tg-7btt">24.53</td> |
|
<td class="tg-7btt">40.48</td> |
|
<td class="tg-c3ow">54.63</td> |
|
<td class="tg-7btt">38.78</td> |
|
<td class="tg-7btt">36.65</td> |
|
<td class="tg-c3ow">58.46</td> |
|
<td class="tg-c3ow">65.28</td> |
|
<td class="tg-c3ow">99.58</td> |
|
<td class="tg-c3ow">33.75</td> |
|
<td class="tg-c3ow">40.48</td> |
|
</tr> |
|
<tr> |
|
<td class="tg-c3ow"> |
|
<a class="mono_text" href="https://huggingface.co/facebook/data2vec-audio-base" target="_blank">facebook/data2vec-audio-base</a> |
|
</td> |
|
<td class="tg-c3ow">S</td> |
|
<td class="tg-c3ow">23.63</td> |
|
<td class="tg-c3ow">45.63</td> |
|
<td class="tg-c3ow">10.06</td> |
|
<td class="tg-c3ow">30.19</td> |
|
<td class="tg-c3ow">40.58</td> |
|
<td class="tg-c3ow">27.60</td> |
|
<td class="tg-c3ow">25.87</td> |
|
<td class="tg-c3ow">50.74</td> |
|
<td class="tg-c3ow">48.03</td> |
|
<td class="tg-c3ow">99.06</td> |
|
<td class="tg-7btt">43.57</td> |
|
<td class="tg-c3ow">27.27</td> |
|
</tr> |
|
<tr> |
|
<td class="tg-c3ow"> |
|
<a class="mono_text" href="https://huggingface.co/COMING_SOON" target="_blank">ALM/wav2vec2-base-audioset</a> |
|
</td> |
|
<td class="tg-c3ow">S</td> |
|
<td class="tg-c3ow">49.48</td> |
|
<td class="tg-c3ow">62.34</td> |
|
<td class="tg-c3ow">21.44</td> |
|
<td class="tg-c3ow">34.90</td> |
|
<td class="tg-7btt">59.25</td> |
|
<td class="tg-c3ow">36.13</td> |
|
<td class="tg-c3ow">34.07</td> |
|
<td class="tg-7btt">68.74</td> |
|
<td class="tg-c3ow">51.50</td> |
|
<td class="tg-c3ow">75.13</td> |
|
<td class="tg-c3ow">11.01</td> |
|
<td class="tg-c3ow">31.01</td> |
|
</tr> |
|
<tr> |
|
<td class="tg-c3ow"> |
|
<a class="mono_text" href="https://huggingface.co/facebook/wav2vec2-large-robust" target="_blank">facebook/wav2vec2-large-robust</a> |
|
</td> |
|
<td class="tg-c3ow">M</td> |
|
<td class="tg-c3ow">13.13</td> |
|
<td class="tg-c3ow">42.70</td> |
|
<td class="tg-c3ow">5.80</td> |
|
<td class="tg-c3ow">22.01</td> |
|
<td class="tg-c3ow">41.71</td> |
|
<td class="tg-c3ow">20.95</td> |
|
<td class="tg-c3ow">19.91</td> |
|
<td class="tg-c3ow">50.23</td> |
|
<td class="tg-c3ow">11.57</td> |
|
<td class="tg-c3ow">45.74</td> |
|
<td class="tg-c3ow">7.33</td> |
|
<td class="tg-c3ow">19.27</td> |
|
</tr> |
|
<tr> |
|
<td class="tg-c3ow"> |
|
<a class="mono_text" href="https://huggingface.co/facebook/wav2vec2-xls-r-300m" target="_blank">facebook/wav2vec2-xls-r-300m</a> |
|
</td> |
|
<td class="tg-c3ow">M</td> |
|
<td class="tg-c3ow">51.28</td> |
|
<td class="tg-c3ow">69.96</td> |
|
<td class="tg-c3ow">23.71</td> |
|
<td class="tg-c3ow">36.28</td> |
|
<td class="tg-c3ow">56.96</td> |
|
<td class="tg-c3ow">38.28</td> |
|
<td class="tg-c3ow">38.42</td> |
|
<td class="tg-c3ow">66.71</td> |
|
<td class="tg-c3ow">31.48</td> |
|
<td class="tg-c3ow">98.88</td> |
|
<td class="tg-c3ow">12.74</td> |
|
<td class="tg-c3ow">20.35</td> |
|
</tr> |
|
<tr> |
|
<td class="tg-c3ow"> |
|
<a class="mono_text" href="https://huggingface.co/microsoft/wavlm-large" target="_blank">microsoft/wavlm-large</a> |
|
</td> |
|
<td class="tg-c3ow">M</td> |
|
<td class="tg-f0bj">67.20</td> |
|
<td class="tg-7btt">70.92</td> |
|
<td class="tg-f0bj">32.21</td> |
|
<td class="tg-7btt">42.51</td> |
|
<td class="tg-7btt">61.13</td> |
|
<td class="tg-7btt">41.29</td> |
|
<td class="tg-7btt">42.53</td> |
|
<td class="tg-7btt">68.00</td> |
|
<td class="tg-c3ow">71.76</td> |
|
<td class="tg-c3ow">99.75</td> |
|
<td class="tg-c3ow">42.34</td> |
|
<td class="tg-7btt">45.29</td> |
|
</tr> |
|
<tr> |
|
<td class="tg-c3ow"> |
|
<a class="mono_text" href="https://huggingface.co/facebook/hubert-large-ll60k" target="_blank">facebook/hubert-large-ll60k</a> |
|
</td> |
|
<td class="tg-c3ow">M</td> |
|
<td class="tg-c3ow">63.98</td> |
|
<td class="tg-c3ow">70.00</td> |
|
<td class="tg-c3ow">29.51</td> |
|
<td class="tg-c3ow">40.95</td> |
|
<td class="tg-c3ow">54.79</td> |
|
<td class="tg-c3ow">38.36</td> |
|
<td class="tg-c3ow">36.81</td> |
|
<td class="tg-c3ow">64.08</td> |
|
<td class="tg-7btt">72.57</td> |
|
<td class="tg-f0bj">99.95</td> |
|
<td class="tg-7btt">45.26</td> |
|
<td class="tg-c3ow">43.76</td> |
|
</tr> |
|
<tr> |
|
<td class="tg-c3ow"> |
|
<a class="mono_text" href="https://huggingface.co/facebook/data2vec-audio-large" target="_blank">facebook/data2vec-audio-large</a> |
|
</td> |
|
<td class="tg-c3ow">M</td> |
|
<td class="tg-c3ow">25.35</td> |
|
<td class="tg-c3ow">49.15</td> |
|
<td class="tg-c3ow">10.82</td> |
|
<td class="tg-c3ow">30.57</td> |
|
<td class="tg-c3ow">43.46</td> |
|
<td class="tg-c3ow">28.52</td> |
|
<td class="tg-c3ow">27.08</td> |
|
<td class="tg-c3ow">44.20</td> |
|
<td class="tg-c3ow">45.14</td> |
|
<td class="tg-c3ow">99.15</td> |
|
<td class="tg-c3ow">28.60</td> |
|
<td class="tg-c3ow">23.07</td> |
|
</tr> |
|
<tr> |
|
<td class="tg-c3ow"> |
|
<a class="mono_text" href="https://huggingface.co/facebook/wav2vec2-xls-r-1b" target="_blank">facebook/wav2vec2-xls-r-1b</a> |
|
</td> |
|
<td class="tg-c3ow">L</td> |
|
<td class="tg-7btt">66.95</td> |
|
<td class="tg-f0bj">75.90</td> |
|
<td class="tg-7btt">31.61</td> |
|
<td class="tg-c3ow">40.41</td> |
|
<td class="tg-f0bj">62.79</td> |
|
<td class="tg-f0bj">41.99</td> |
|
<td class="tg-f0bj">43.57</td> |
|
<td class="tg-f0bj">69.79</td> |
|
<td class="tg-c3ow">55.44</td> |
|
<td class="tg-c3ow">99.86</td> |
|
<td class="tg-c3ow">25.14</td> |
|
<td class="tg-c3ow">34.58</td> |
|
</tr> |
|
<tr> |
|
<td class="tg-c3ow"> |
|
<a class="mono_text" href="https://huggingface.co/facebook/hubert-xlarge-ll60k" target="_blank">facebook/hubert-xlarge-ll60k</a> |
|
</td> |
|
<td class="tg-c3ow">L</td> |
|
<td class="tg-c3ow">63.40</td> |
|
<td class="tg-c3ow">69.66</td> |
|
<td class="tg-c3ow">29.32</td> |
|
<td class="tg-f0bj">42.72</td> |
|
<td class="tg-c3ow">56.25</td> |
|
<td class="tg-c3ow">37.76</td> |
|
<td class="tg-c3ow">37.30</td> |
|
<td class="tg-c3ow">64.71</td> |
|
<td class="tg-f0bj">75.69</td> |
|
<td class="tg-f0bj">99.95</td> |
|
<td class="tg-f0bj">47.81</td> |
|
<td class="tg-f0bj">47.17</td> |
|
</tr> |
|
</tbody> |
|
</table> |
|
</body> |
|
</html> |
|
|