Tilo Michel commited on
Commit
1e4c4aa
1 Parent(s): 46d1ff6

Evaluation results in README.md

Browse files
Files changed (2) hide show
  1. README.md +60 -4
  2. eval_results.json +0 -14
README.md CHANGED
@@ -20,16 +20,72 @@ tags:
20
  - question generation
21
  datasets:
22
  - deepset/germanquad
23
- - xquad
24
- model-index:
25
- - name: mT5-base-GermanQuAD-e2e-qg
26
- results: []
27
  metrics:
28
  - sacrebleu
29
  - bleu
30
  - rouge-l
31
  - meteor
32
  - bertscore
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  ---
34
 
35
  # mT5-base finetuned on the GermanQuAD dataset for answer-agnostic question generation
 
20
  - question generation
21
  datasets:
22
  - deepset/germanquad
 
 
 
 
23
  metrics:
24
  - sacrebleu
25
  - bleu
26
  - rouge-l
27
  - meteor
28
  - bertscore
29
+ model-index:
30
+ - name: tilomichel/mT5-base-GermanQuAD-e2e-qg
31
+ results:
32
+ - task:
33
+ type: question-generation
34
+ name: Question generation
35
+ dataset:
36
+ type: xquad
37
+ name: XQuAD (de)
38
+ split: de
39
+ metrics:
40
+ - type: sacrebleu
41
+ value: 1.72837804716791
42
+ name: BLEU Score
43
+ args:
44
+ lowercase: true
45
+ verified: false
46
+ - type: sacrebleu
47
+ value: 49.210584834334
48
+ name: BLEU-1
49
+ args:
50
+ lowercase: true
51
+ verified: false
52
+ - type: sacrebleu
53
+ value: 16.960300681230915
54
+ name: BLEU-2
55
+ args:
56
+ lowercase: true
57
+ verified: false
58
+ - type: sacrebleu
59
+ value: 7.144635299975106
60
+ name: BLEU-3
61
+ args:
62
+ lowercase: true
63
+ verified: false
64
+ - type: sacrebleu
65
+ value: 3.230076780513635
66
+ name: BLEU-4
67
+ args:
68
+ lowercase: true
69
+ verified: false
70
+ - type: rouge
71
+ name: ROUGE-L (f-measure)
72
+ value: 0.171130005590873
73
+ args:
74
+ use_aggregator: true
75
+ use_stemmer: false
76
+ verified: false
77
+ - type: meteor
78
+ value: 0.0835049103331918
79
+ name: METEOR
80
+ args:
81
+ language: de
82
+ verified: false
83
+ - type: bertscore
84
+ value: 0.331940584507538
85
+ name: BERTScore (F1)
86
+ args:
87
+ rescale_with_baseline: true
88
+ verified: false
89
  ---
90
 
91
  # mT5-base finetuned on the GermanQuAD dataset for answer-agnostic question generation
eval_results.json DELETED
@@ -1,14 +0,0 @@
1
- {
2
- "epoch": 10,
3
- "eval_bertscore": 0.331940584507538,
4
- "eval_bleu": 1.72837804716791,
5
- "eval_bleu-1": 49.210584834334,
6
- "eval_bleu-2": 16.9603006812309,
7
- "eval_bleu-3": 7.14463529997511,
8
- "eval_bleu-4": 3.23007678051363,
9
- "eval_rouge1": 0.231197704900175,
10
- "eval_rouge2": 0.0746574903742914,
11
- "eval_rougeL": 0.171130005590873,
12
- "eval_meteor": 0.0835049103331918,
13
- "eval_samples": 244
14
- }