Update README.md
Browse files
README.md
CHANGED
@@ -4,8 +4,16 @@ language:
|
|
4 |
- ro
|
5 |
base_model:
|
6 |
- google/gemma-7b
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
model-index:
|
8 |
-
- name: OpenLLM-Ro/RoGemma-7b-Instruct
|
9 |
results:
|
10 |
- task:
|
11 |
type: text-generation
|
@@ -331,13 +339,13 @@ model-index:
|
|
331 |
name: OpenLLM-Ro/ro_gsm8k
|
332 |
type: OpenLLM-Ro/ro_gsm8k
|
333 |
metrics:
|
334 |
-
- name:
|
335 |
type: accuracy
|
336 |
value: 24.79
|
337 |
-
- name:
|
338 |
type: accuracy
|
339 |
value: 34.50
|
340 |
-
- name:
|
341 |
type: accuracy
|
342 |
value: 33.89
|
343 |
- task:
|
@@ -451,41 +459,34 @@ model-index:
|
|
451 |
- task:
|
452 |
type: text-generation
|
453 |
dataset:
|
454 |
-
name:
|
455 |
-
type:
|
456 |
metrics:
|
457 |
-
- name:
|
458 |
type: spearman
|
459 |
value: 70.61
|
460 |
-
- name:
|
461 |
type: spearman
|
462 |
value: 73.53
|
463 |
-
- name:
|
464 |
type: spearman
|
465 |
value: 77.73
|
466 |
- task:
|
467 |
type: text-generation
|
468 |
dataset:
|
469 |
-
name:
|
470 |
-
type:
|
471 |
metrics:
|
472 |
-
- name:
|
473 |
type: pearson
|
474 |
value: 72.28
|
475 |
-
- name:
|
476 |
type: pearson
|
477 |
value: 74.46
|
478 |
-
- name:
|
479 |
type: pearson
|
480 |
value: 78.75
|
481 |
-
|
482 |
-
- OpenLLM-Ro/ro_sft_alpaca
|
483 |
-
- OpenLLM-Ro/ro_sft_alpaca_gpt4
|
484 |
-
- OpenLLM-Ro/ro_sft_dolly
|
485 |
-
- OpenLLM-Ro/ro_sft_selfinstruct_gpt4
|
486 |
-
- OpenLLM-Ro/ro_sft_norobots
|
487 |
-
- OpenLLM-Ro/ro_sft_orca
|
488 |
-
- OpenLLM-Ro/ro_sft_camel
|
489 |
---
|
490 |
|
491 |
# Model Card for Model ID
|
@@ -540,8 +541,8 @@ Use the code below to get started with the model.
|
|
540 |
```python
|
541 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
542 |
|
543 |
-
tokenizer = AutoTokenizer.from_pretrained("OpenLLM-Ro/RoGemma-7b-Instruct")
|
544 |
-
model = AutoModelForCausalLM.from_pretrained("OpenLLM-Ro/RoGemma-7b-Instruct")
|
545 |
|
546 |
instruction = "Ce jocuri de societate pot juca cu prietenii mei?"
|
547 |
chat = [
|
@@ -572,11 +573,18 @@ print(tokenizer.decode(outputs[0]))
|
|
572 |
<td>gemma-1.1-7b-it</td><td><center>41.44</center></td><td><center>40.32</center></td><td><center>47.22</center></td><td><center>55.01</center></td><td><center>47.03</center></td><td><center>9.50</center></td><td><center>49.58</center></td>
|
573 |
</tr>
|
574 |
<tr>
|
575 |
-
<td><em>RoGemma-7b-Instruct</em></td><td><center><em><strong>53.41</strong></em></center></td><td><center><em><strong>52.44</strong></em></center></td><td><center><em
|
|
|
|
|
|
|
|
|
|
|
|
|
576 |
</tr>
|
577 |
</tbody>
|
578 |
</table>
|
579 |
|
|
|
580 |
## Downstream tasks
|
581 |
|
582 |
<table>
|
@@ -608,13 +616,18 @@ print(tokenizer.decode(outputs[0]))
|
|
608 |
<td>gemma-1.1-7b-it</td><td><center>87.54</center></td><td><center>51.48</center></td><td><center>83.87</center></td><td><center>85.61</center></td><td><center>17.96</center></td><td><center><strong>27.74</strong></center></td><td><center>25.48</center></td><td><center>36.11</center></td>
|
609 |
</tr>
|
610 |
<tr>
|
611 |
-
<td><em>RoGemma-7b-Instruct</em></td><td><center><em><strong>97.86</strong></em></center></td><td><center><em><strong>65.70</strong></em></center></td><td><center><em
|
|
|
|
|
|
|
|
|
|
|
|
|
612 |
</tr>
|
613 |
</tbody>
|
614 |
</table>
|
615 |
|
616 |
|
617 |
-
|
618 |
<table>
|
619 |
<tbody>
|
620 |
<tr>
|
@@ -644,7 +657,13 @@ print(tokenizer.decode(outputs[0]))
|
|
644 |
<td>gemma-1.1-7b-it</td><td><center><strong>42.10</strong></center></td><td><center><strong>62.30</strong></center></td><td><center><strong>60.34</strong></center></td><td><center><strong>77.40</strong></center></td><td><center>49.10</center></td><td><center>50.23</center></td><td><center>83.43</center></td><td><center>83.64</center></td>
|
645 |
</tr>
|
646 |
<tr>
|
647 |
-
<td><em>RoGemma-7b-Instruct</em></td><td><center><em>17.75</em></center></td><td><center><em>28.11</em></center></td><td><center><em>52.02</em></center></td><td><center><em>68.43</em></center></td><td><center><em><strong>73.96</strong></em></center></td><td><center><em><strong>75.16</strong></em></center></td><td><center><em
|
|
|
|
|
|
|
|
|
|
|
|
|
648 |
</tr>
|
649 |
</tbody>
|
650 |
</table>
|
@@ -665,11 +684,18 @@ print(tokenizer.decode(outputs[0]))
|
|
665 |
<td>gemma-1.1-7b-it</td><td><center>4.83</center></td><td><center>5.11</center></td><td><center>4.55</center></td><td><center><strong>160/160</strong></center></td>
|
666 |
</tr>
|
667 |
<tr>
|
668 |
-
<td><em>RoGemma-7b-Instruct</em></td><td><center><em
|
|
|
|
|
|
|
|
|
|
|
|
|
669 |
</tr>
|
670 |
</tbody>
|
671 |
</table>
|
672 |
|
|
|
673 |
## RoCulturaBench
|
674 |
|
675 |
<table>
|
@@ -680,20 +706,27 @@ print(tokenizer.decode(outputs[0]))
|
|
680 |
<td><strong><center>Answers in Ro</center></strong></td>
|
681 |
</tr>
|
682 |
<tr>
|
683 |
-
<td>gemma-1.1-7b-it</td><td><center
|
|
|
|
|
|
|
684 |
</tr>
|
685 |
<tr>
|
686 |
-
<td
|
|
|
|
|
|
|
687 |
</tr>
|
688 |
</tbody>
|
689 |
</table>
|
690 |
|
691 |
-
|
692 |
## RoGemma Model Family
|
693 |
|
694 |
| Model | Link |
|
695 |
|--------------------|:--------:|
|
696 |
-
|*RoGemma-7b-Instruct*| [link](https://huggingface.co/OpenLLM-Ro/RoGemma-7b-Instruct) |
|
|
|
|
|
697 |
|
698 |
|
699 |
## Citation
|
|
|
4 |
- ro
|
5 |
base_model:
|
6 |
- google/gemma-7b
|
7 |
+
datasets:
|
8 |
+
- OpenLLM-Ro/ro_sft_alpaca
|
9 |
+
- OpenLLM-Ro/ro_sft_alpaca_gpt4
|
10 |
+
- OpenLLM-Ro/ro_sft_dolly
|
11 |
+
- OpenLLM-Ro/ro_sft_selfinstruct_gpt4
|
12 |
+
- OpenLLM-Ro/ro_sft_norobots
|
13 |
+
- OpenLLM-Ro/ro_sft_orca
|
14 |
+
- OpenLLM-Ro/ro_sft_camel
|
15 |
model-index:
|
16 |
+
- name: OpenLLM-Ro/RoGemma-7b-Instruct-2024-06-28
|
17 |
results:
|
18 |
- task:
|
19 |
type: text-generation
|
|
|
339 |
name: OpenLLM-Ro/ro_gsm8k
|
340 |
type: OpenLLM-Ro/ro_gsm8k
|
341 |
metrics:
|
342 |
+
- name: 1-shot
|
343 |
type: accuracy
|
344 |
value: 24.79
|
345 |
+
- name: 3-shot
|
346 |
type: accuracy
|
347 |
value: 34.50
|
348 |
+
- name: 5-shot
|
349 |
type: accuracy
|
350 |
value: 33.89
|
351 |
- task:
|
|
|
459 |
- task:
|
460 |
type: text-generation
|
461 |
dataset:
|
462 |
+
name: STS_Spearman
|
463 |
+
type: STS_Spearman
|
464 |
metrics:
|
465 |
+
- name: 1-shot
|
466 |
type: spearman
|
467 |
value: 70.61
|
468 |
+
- name: 3-shot
|
469 |
type: spearman
|
470 |
value: 73.53
|
471 |
+
- name: 5-shot
|
472 |
type: spearman
|
473 |
value: 77.73
|
474 |
- task:
|
475 |
type: text-generation
|
476 |
dataset:
|
477 |
+
name: STS_Pearson
|
478 |
+
type: STS_Pearson
|
479 |
metrics:
|
480 |
+
- name: 1-shot
|
481 |
type: pearson
|
482 |
value: 72.28
|
483 |
+
- name: 3-shot
|
484 |
type: pearson
|
485 |
value: 74.46
|
486 |
+
- name: 5-shot
|
487 |
type: pearson
|
488 |
value: 78.75
|
489 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
490 |
---
|
491 |
|
492 |
# Model Card for Model ID
|
|
|
541 |
```python
|
542 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
543 |
|
544 |
+
tokenizer = AutoTokenizer.from_pretrained("OpenLLM-Ro/RoGemma-7b-Instruct-2024-06-28")
|
545 |
+
model = AutoModelForCausalLM.from_pretrained("OpenLLM-Ro/RoGemma-7b-Instruct-2024-06-28")
|
546 |
|
547 |
instruction = "Ce jocuri de societate pot juca cu prietenii mei?"
|
548 |
chat = [
|
|
|
573 |
<td>gemma-1.1-7b-it</td><td><center>41.44</center></td><td><center>40.32</center></td><td><center>47.22</center></td><td><center>55.01</center></td><td><center>47.03</center></td><td><center>9.50</center></td><td><center>49.58</center></td>
|
574 |
</tr>
|
575 |
<tr>
|
576 |
+
<td><em>RoGemma-7b-Instruct-2024-06-28</em></td><td><center><em><strong>53.41</strong></em></center></td><td><center><em><strong>52.44</strong></em></center></td><td><center><em>54.44</em></center></td><td><center><em><strong>69.36</strong></em></center></td><td><center><em><strong>61.96</strong></em></center></td><td><center><em>31.06</em></center></td><td><center><em><strong>51.23</strong></em></center></td>
|
577 |
+
</tr>
|
578 |
+
<tr>
|
579 |
+
<td>RoGemma-7b-Instruct-2024-10-09</td><td><center>50.48</center></td><td><center>52.01</center></td><td><center>52.37</center></td><td><center>66.97</center></td><td><center>56.34</center></td><td><center>25.98</center></td><td><center>49.18</center></td>
|
580 |
+
</tr>
|
581 |
+
<tr>
|
582 |
+
<td>RoGemma-7b-Instruct-DPO-2024-10-09</td><td><center>48.27</center></td><td><center>46.66</center></td><td><center><strong>54.45</strong></center></td><td><center>63.73</center></td><td><center>49.33</center></td><td><center><strong>34.98</strong></center></td><td><center>40.45</center></td>
|
583 |
</tr>
|
584 |
</tbody>
|
585 |
</table>
|
586 |
|
587 |
+
|
588 |
## Downstream tasks
|
589 |
|
590 |
<table>
|
|
|
616 |
<td>gemma-1.1-7b-it</td><td><center>87.54</center></td><td><center>51.48</center></td><td><center>83.87</center></td><td><center>85.61</center></td><td><center>17.96</center></td><td><center><strong>27.74</strong></center></td><td><center>25.48</center></td><td><center>36.11</center></td>
|
617 |
</tr>
|
618 |
<tr>
|
619 |
+
<td><em>RoGemma-7b-Instruct-2024-06-28</em></td><td><center><em><strong>97.86</strong></em></center></td><td><center><em><strong>65.70</strong></em></center></td><td><center><em>98.43</em></center></td><td><center><em><strong>87.17</strong></em></center></td><td><center><em><strong>27.91</strong></em></center></td><td><center><em>23.08</em></center></td><td><center><em><strong>27.99</strong></em></center></td><td><center><em><strong>39.51</strong></em></center></td>
|
620 |
+
</tr>
|
621 |
+
<tr>
|
622 |
+
<td>RoGemma-7b-Instruct-2024-10-09</td><td><center>86.96</center></td><td><center>56.72</center></td><td><center><strong>98.80</strong></center></td><td><center>85.81</center></td><td><center>24.45</center></td><td><center>14.20</center></td><td><center>25.96</center></td><td><center>39.07</center></td>
|
623 |
+
</tr>
|
624 |
+
<tr>
|
625 |
+
<td>RoGemma-7b-Instruct-DPO-2024-10-09</td><td><center>96.45</center></td><td><center>63.23</center></td><td><center>-</center></td><td><center>-</center></td><td><center>20.73</center></td><td><center>7.87</center></td><td><center>-</center></td><td><center>-</center></td>
|
626 |
</tr>
|
627 |
</tbody>
|
628 |
</table>
|
629 |
|
630 |
|
|
|
631 |
<table>
|
632 |
<tbody>
|
633 |
<tr>
|
|
|
657 |
<td>gemma-1.1-7b-it</td><td><center><strong>42.10</strong></center></td><td><center><strong>62.30</strong></center></td><td><center><strong>60.34</strong></center></td><td><center><strong>77.40</strong></center></td><td><center>49.10</center></td><td><center>50.23</center></td><td><center>83.43</center></td><td><center>83.64</center></td>
|
658 |
</tr>
|
659 |
<tr>
|
660 |
+
<td><em>RoGemma-7b-Instruct-2024-06-28</em></td><td><center><em>17.75</em></center></td><td><center><em>28.11</em></center></td><td><center><em>52.02</em></center></td><td><center><em>68.43</em></center></td><td><center><em><strong>73.96</strong></em></center></td><td><center><em><strong>75.16</strong></em></center></td><td><center><em>86.45</em></center></td><td><center><em>86.31</em></center></td>
|
661 |
+
</tr>
|
662 |
+
<tr>
|
663 |
+
<td>RoGemma-7b-Instruct-2024-10-09</td><td><center>26.03</center></td><td><center>41.58</center></td><td><center>46.72</center></td><td><center>60.79</center></td><td><center>73.23</center></td><td><center>71.58</center></td><td><center><strong>88.42</strong></center></td><td><center><strong>88.45</strong></center></td>
|
664 |
+
</tr>
|
665 |
+
<tr>
|
666 |
+
<td>RoGemma-7b-Instruct-DPO-2024-10-09</td><td><center>19.14</center></td><td><center>38.10</center></td><td><center>-</center></td><td><center>-</center></td><td><center>69.38</center></td><td><center>69.34</center></td><td><center>-</center></td><td><center>-</center></td>
|
667 |
</tr>
|
668 |
</tbody>
|
669 |
</table>
|
|
|
684 |
<td>gemma-1.1-7b-it</td><td><center>4.83</center></td><td><center>5.11</center></td><td><center>4.55</center></td><td><center><strong>160/160</strong></center></td>
|
685 |
</tr>
|
686 |
<tr>
|
687 |
+
<td><em>RoGemma-7b-Instruct-2024-06-28</em></td><td><center><em>5.26</em></center></td><td><center><em><strong>5.92</strong></em></center></td><td><center><em>4.60</em></center></td><td><center><em><strong>160/160</strong></em></center></td>
|
688 |
+
</tr>
|
689 |
+
<tr>
|
690 |
+
<td>RoGemma-7b-Instruct-2024-10-09</td><td><center>5.24</center></td><td><center>5.55</center></td><td><center>4.94</center></td><td><center><strong>160/160</strong></center></td>
|
691 |
+
</tr>
|
692 |
+
<tr>
|
693 |
+
<td>RoGemma-7b-Instruct-DPO-2024-10-09</td><td><center><strong>5.47</strong></center></td><td><center><strong>5.92</strong></center></td><td><center><strong>5.03</strong></center></td><td><center><strong>160/160</strong></center></td>
|
694 |
</tr>
|
695 |
</tbody>
|
696 |
</table>
|
697 |
|
698 |
+
|
699 |
## RoCulturaBench
|
700 |
|
701 |
<table>
|
|
|
706 |
<td><strong><center>Answers in Ro</center></strong></td>
|
707 |
</tr>
|
708 |
<tr>
|
709 |
+
<td>gemma-1.1-7b-it</td><td><center>3.38</center></td><td><center><strong>100/100</strong></center></td>
|
710 |
+
</tr>
|
711 |
+
<tr>
|
712 |
+
<td><em>RoGemma-7b-Instruct-2024-06-28</em></td><td><center><em>3.26</em></center></td><td><center><em><strong>100/100</strong></em></center></td>
|
713 |
</tr>
|
714 |
<tr>
|
715 |
+
<td>RoGemma-7b-Instruct-2024-10-09</td><td><center>3.51</center></td><td><center><strong>100/100</strong></center></td>
|
716 |
+
</tr>
|
717 |
+
<tr>
|
718 |
+
<td>RoGemma-7b-Instruct-DPO-2024-10-09</td><td><center><strong>3.94</strong></center></td><td><center><strong>100/100</strong></center></td>
|
719 |
</tr>
|
720 |
</tbody>
|
721 |
</table>
|
722 |
|
|
|
723 |
## RoGemma Model Family
|
724 |
|
725 |
| Model | Link |
|
726 |
|--------------------|:--------:|
|
727 |
+
|*RoGemma-7b-Instruct-2024-06-28*| [link](https://huggingface.co/OpenLLM-Ro/RoGemma-7b-Instruct-2024-06-28) |
|
728 |
+
|RoGemma-7b-Instruct-2024-10-09| [link](https://huggingface.co/OpenLLM-Ro/RoGemma-7b-Instruct-2024-10-09) |
|
729 |
+
|RoGemma-7b-Instruct-DPO-2024-10-09| [link](https://huggingface.co/OpenLLM-Ro/RoGemma-7b-Instruct-DPO-2024-10-09) |
|
730 |
|
731 |
|
732 |
## Citation
|