intelpen's picture
Update README.md
30e1068 verified
metadata
license: cc-by-nc-4.0
language:
  - ro
base_model:
  - OpenLLM-Ro/RoLlama3.1-8b-Instruct
datasets:
  - OpenLLM-Ro/ro_dpo_helpsteer
model-index:
  - name: OpenLLM-Ro/RoLlama3.1-8b-Instruct-DPO-4bit
    results:
      - task:
          type: text-generation
        dataset:
          name: OpenLLM-Ro/ro_arc_challenge
          type: OpenLLM-Ro/ro_arc_challenge
        metrics:
          - name: Average accuracy
            type: accuracy
            value: 42.74
          - name: 0-shot
            type: accuracy
            value: 40.79
          - name: 1-shot
            type: accuracy
            value: 40.36
          - name: 3-shot
            type: accuracy
            value: 43.36
          - name: 5-shot
            type: accuracy
            value: 44.04
          - name: 10-shot
            type: accuracy
            value: 43.87
          - name: 25-shot
            type: accuracy
            value: 44.04
      - task:
          type: text-generation
        dataset:
          name: OpenLLM-Ro/ro_mmlu
          type: OpenLLM-Ro/ro_mmlu
        metrics:
          - name: Average accuracy
            type: accuracy
            value: 42.27
          - name: 0-shot
            type: accuracy
            value: 43.23
          - name: 1-shot
            type: accuracy
            value: 42.47
          - name: 3-shot
            type: accuracy
            value: 42.19
          - name: 5-shot
            type: accuracy
            value: 41.19
      - task:
          type: text-generation
        dataset:
          name: OpenLLM-Ro/ro_winogrande
          type: OpenLLM-Ro/ro_winogrande
        metrics:
          - name: Average accuracy
            type: accuracy
            value: 64.94
          - name: 0-shot
            type: accuracy
            value: 63.14
          - name: 1-shot
            type: accuracy
            value: 64.64
          - name: 3-shot
            type: accuracy
            value: 65.43
          - name: 5-shot
            type: accuracy
            value: 66.54
      - task:
          type: text-generation
        dataset:
          name: OpenLLM-Ro/ro_hellaswag
          type: OpenLLM-Ro/ro_hellaswag
        metrics:
          - name: Average accuracy
            type: accuracy
            value: 52.39
          - name: 0-shot
            type: accuracy
            value: 52.42
          - name: 1-shot
            type: accuracy
            value: 52.3
          - name: 3-shot
            type: accuracy
            value: 52.6
          - name: 5-shot
            type: accuracy
            value: 52.2
          - name: 10-shot
            type: accuracy
            value: 52.42
      - task:
          type: text-generation
        dataset:
          name: OpenLLM-Ro/ro_gsm8k
          type: OpenLLM-Ro/ro_gsm8k
        metrics:
          - name: Average accuracy
            type: accuracy
            value: 38.87
          - name: 1-shot
            type: accuracy
            value: 28.13
          - name: 3-shot
            type: accuracy
            value: 42.23
          - name: 5-shot
            type: accuracy
            value: 46.25
      - task:
          type: text-generation
        dataset:
          name: OpenLLM-Ro/ro_truthfulqa
          type: OpenLLM-Ro/ro_truthfulqa
        metrics:
          - name: Average accuracy
            type: accuracy
            value: 48.67
          - name: 0-shot
            type: accuracy
            value: 48.67
      - task:
          type: text-generation
        dataset:
          name: LaRoSeDa_binary
          type: LaRoSeDa_binary
        metrics:
          - name: Average macro-f1
            type: macro-f1
            value: 97.47
          - name: 0-shot
            type: macro-f1
            value: 97.43
          - name: 1-shot
            type: macro-f1
            value: 97.33
          - name: 3-shot
            type: macro-f1
            value: 97.7
          - name: 5-shot
            type: macro-f1
            value: 97.43
      - task:
          type: text-generation
        dataset:
          name: LaRoSeDa_multiclass
          type: LaRoSeDa_multiclass
        metrics:
          - name: Average macro-f1
            type: macro-f1
            value: 64.05
          - name: 0-shot
            type: macro-f1
            value: 65.9
          - name: 1-shot
            type: macro-f1
            value: 64.68
          - name: 3-shot
            type: macro-f1
            value: 62.36
          - name: 5-shot
            type: macro-f1
            value: 63.27
      - task:
          type: text-generation
        dataset:
          name: WMT_EN-RO
          type: WMT_EN-RO
        metrics:
          - name: Average bleu
            type: bleu
            value: 20.54
          - name: 0-shot
            type: bleu
            value: 7.2
          - name: 1-shot
            type: bleu
            value: 25.68
          - name: 3-shot
            type: bleu
            value: 24.5
          - name: 5-shot
            type: bleu
            value: 24.78
      - task:
          type: text-generation
        dataset:
          name: WMT_RO-EN
          type: WMT_RO-EN
        metrics:
          - name: Average bleu
            type: bleu
            value: 21.16
          - name: 0-shot
            type: bleu
            value: 2.59
          - name: 1-shot
            type: bleu
            value: 17.54
          - name: 3-shot
            type: bleu
            value: 30.82
          - name: 5-shot
            type: bleu
            value: 33.67
      - task:
          type: text-generation
        dataset:
          name: XQuAD
          type: XQuAD
        metrics:
          - name: Average exact_match
            type: exact_match
            value: 21.45
          - name: Average f1
            type: f1
            value: 37.73
          - name: 0-shot exact_match
            type: exact_match
            value: 3.45
          - name: 0-shot f1
            type: f1
            value: 12.36
          - name: 1-shot exact_match
            type: exact_match
            value: 32.02
          - name: 1-shot f1
            type: f1
            value: 55.7
          - name: 3-shot exact_match
            type: exact_match
            value: 33.78
          - name: 3-shot f1
            type: f1
            value: 54.15
          - name: 5-shot exact_match
            type: exact_match
            value: 16.55
          - name: 5-shot f1
            type: f1
            value: 28.71
      - task:
          type: text-generation
        dataset:
          name: STS
          type: STS
        metrics:
          - name: Average pearson
            type: pearson
            value: 76.93
          - name: Average spearman
            type: spearman
            value: 77.08
          - name: 1-shot pearson
            type: pearson
            value: 77.02
          - name: 1-shot spearman
            type: spearman
            value: 77.8
          - name: 3-shot pearson
            type: pearson
            value: 76.93
          - name: 3-shot spearman
            type: spearman
            value: 77
          - name: 5-shot pearson
            type: pearson
            value: 76.85
          - name: 5-shot spearman
            type: spearman
            value: 76.45

Model Card for 4-bit RoLlama3.1-8b-Instruct-DPO

Built from RoLlama3.1-8b-Instruct-DPO, quantized to 4-bit.

This variant of RoLlama3.1-8b-Instruct-DPO provides a reduced footprint through 4-bit quantization, aimed at enabling usage on resource-constrained GPUs while preserving a high fraction of the model’s capabilities.

Model Details

Comparison to 16 bit

It loooks that the effects of the quantization are minimal :

Task Metric FP16 Original 4-bit Absolute Diff. % Change
ARC Challenge Avg. Accuracy 44.84 42.74 -2.10 -4.68%
MMLU Avg. Accuracy 55.06 42.27 -12.79 -23.23%
Winogrande Avg. Accuracy 65.87 64.94 -0.93 -1.41%
Hellaswag Avg. Accuracy 58.67 52.39 -6.28 -10.70%
GSM8K Avg. Accuracy 44.17 38.87 -5.30 -11.99%
TruthfulQA Avg. Accuracy 47.82 48.67 +0.85 +1.78%
LaRoSeDa (binary) Macro-F1 96.10 97.47 +1.37 +1.43%
LaRoSeDa (multiclass) Macro-F1 55.37 64.05 +8.68 +15.68%
WMT EN-RO BLEU 21.29 20.54 -0.75 -3.52%
WMT RO-EN BLEU 21.86 21.16 -0.70 -3.20%
XQuAD (avg) EM / F1 21.58 / 36.54 21.45 / 37.73 ~-0.13 / +1.19 -0.60% / +3.26%
STS (avg) Spearman / Pearson 78.01 / 77.98 77.08 / 76.93 -0.93 / -1.05 -1.19% / -1.35%

Model Description

  • Developed by: OpenLLM-Ro
  • Language(s): Romanian
  • License: cc-by-nc-4.0
  • Quantized from model: RoLlama3.1-8b-Instruct-DPO
  • Quantization: 4-bit

Quantization reduces model size and improves inference speed but can lead to small drops in performance. Below is a comprehensive table of the main benchmarks comparing the original full-precision version with the new 4-bit variant.

How to Use

from transformers import AutoTokenizer, AutoModelForCausalLM

model_id = "OpenLLM-Ro/RoLlama3.1-8b-Instruct-DPO-4bit"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, load_in_4bit=True, device_map="auto")

instruction = "Ce jocuri de societate pot juca cu prietenii mei?"
chat = [
    {"role": "system", "content": "Ești un asistent folositor, respectuos și onest. Încearcă să ajuți cât mai mult prin informațiile oferite, excluzând răspunsuri toxice, rasiste, sexiste, periculoase și ilegale."},
    {"role": "user", "content": instruction},
]
prompt = tokenizer.apply_chat_template(chat, tokenize=False, system_message="")

inputs = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt").to("cuda")
outputs = model.generate(input_ids=inputs, max_new_tokens=128)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))