Update README.md
Browse files
README.md
CHANGED
@@ -1,46 +1,104 @@
|
|
1 |
---
|
2 |
license: apache-2.0
|
3 |
---
|
4 |
-
|
5 |
-
|:--------:|:------------:|:-------------:|:-----------------:|:---:|
|
6 |
-
|4.1939034461975|0.145276814699172|3.39564657211303|0.186678826808929|1|
|
7 |
-
|3.13256049156188|0.208270609378814|2.82256889343261|0.233325317502021|2|
|
8 |
-
|2.84938621520996|0.229006066918373|2.72168040275573|0.23955675959587|3|
|
9 |
-
|2.76001143455505|0.234559893608093|2.65143990516662|0.243813350796699|4|
|
10 |
-
|2.70404982566833|0.238061532378196|2.6107530593872|0.246574580669403|5|
|
11 |
-
|2.6638650894165|0.240613579750061|2.57847166061401|0.248678594827651|6|
|
12 |
-
|2.63293719291687|0.242613524198532|2.55772447586059|0.250325441360473|7|
|
13 |
-
|2.60750746726989|0.244251564145088|2.53469848632812|0.251805543899536|8|
|
14 |
-
|2.58670353889465|0.245637223124504|2.51883554458618|0.253003656864166|9|
|
15 |
-
|2.56865572929382|0.24682830274105|2.49989652633666|0.254459708929061|10|
|
16 |
-
|2.55285787582397|0.247884958982467|2.50092124938964|0.254229605197906|11|
|
17 |
-
|2.53919672966003|0.248811900615692|2.47859454154968|0.255691051483154|12|
|
18 |
-
|2.52694725990295|0.249630719423294|2.46921157836914|0.25649145245552|13|
|
19 |
-
|2.51587128639221|0.250377029180526|2.46414017677307|0.257025629281997|14|
|
20 |
-
|2.50599193572998|0.251064419746398|2.4557819366455|0.257613778114318|15|
|
21 |
-
|2.49690246582031|0.251682370901107|2.44843244552612|0.258032590150833|16|
|
22 |
-
|2.48859119415283|0.252267301082611|2.43858122825622|0.258764535188674|17|
|
23 |
-
|2.48097324371337|0.252792716026306|2.43251323699951|0.259270757436752|18|
|
24 |
-
|2.47009921073913|0.253554105758667|2.43577146530151|0.258938610553741|19|
|
25 |
-
|2.45849394798278|0.254375785589218|2.42337107658386|0.260090589523315|20|
|
26 |
-
|2.44776940345764|0.255127549171447|2.41147446632385|0.260682851076126|21|
|
27 |
-
|2.43759155273437|0.255834341049194|2.41405510902404|0.260819226503372|22|
|
28 |
-
|2.42819571495056|0.256486028432846|2.40314364433288|0.26152354478836|23|
|
29 |
-
|2.41974592208862|0.257094115018844|2.39181518554687|0.262460082769393|24|
|
30 |
-
|2.41181802749633|0.257666647434234|2.3825569152832|0.263035386800766|25|
|
31 |
-
|2.4044873714447|0.258173674345016|2.37829279899597|0.263585090637207|26|
|
32 |
-
|2.39774870872497|0.258645176887512|2.37718510627746|0.263547003269195|27|
|
33 |
-
|2.39184403419494|0.259076595306396|2.37379837036132|0.264020860195159|28|
|
34 |
-
|2.38593125343322|0.259495466947555|2.37083029747009|0.264293819665908|29|
|
35 |
-
|2.38093471527099|0.259853214025497|2.36486291885375|0.264451295137405|30|
|
36 |
-
|2.37621307373046|0.260185241699218|2.36547923088073|0.264706671237945|31|
|
37 |
-
|2.37177920341491|0.260504961013793|2.3609721660614|0.264981210231781|32|
|
38 |
-
|2.3679461479187|0.260774314403533|2.36445379257202|0.264800041913986|33|
|
39 |
-
|2.3643410205841|0.261037856340408|2.3573100566864|0.265379041433334|34|
|
40 |
-
|2.36092805862426|0.261268675327301|2.36105728149414|0.264868646860122|35|
|
41 |
-
|2.35798692703247|0.261485010385513|2.35409832000732|0.265503793954849|36|
|
42 |
-
|2.35523629188537|0.26168617606163|2.35252356529235|0.265713244676589|37|
|
43 |
-
|2.35284709930419|0.261859744787216|2.35101222991943|0.265856444835662|38|
|
44 |
-
|2.35047316551208|0.262033462524414|2.34698224067687|0.266099989414215|39|
|
45 |
-
|2.34832262992858|0.262173235416412|2.34894156455993|0.266122311353683|40|
|
46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
license: apache-2.0
|
3 |
---
|
4 |
+
# pegasus-indonesian-base_pretrained
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
+
Github : [PegasusAnthony](https://github.com/nicholaswilven/PEGASUSAnthony/tree/master)
|
7 |
+
|
8 |
+
This model is a pretrained version of [pegasus-indonesian-base_finetune](https://huggingface.co/thonyyy/pegasus-indonesian-base_finetune) on [kaggle id news 2017](https://www.kaggle.com/datasets/aashari/indonesian-news-articles-published-at-2017), [CC_News_id](https://github.com/Wikidepia/indonesian_datasets/tree/master/dump/cc-news), and [OSCAR_2201](https://huggingface.co/datasets/oscar-corpus/OSCAR-2201/viewer/id/train).
|
9 |
+
|
10 |
+
It achieves the following results on the evaluation set:
|
11 |
+
- Train Loss: 2.34832262992858
|
12 |
+
- Train Accuracy: 0.262173235416412
|
13 |
+
- Validation Loss: 2.34894156455993
|
14 |
+
- Validation Accuracy: 0.266122311353683
|
15 |
+
- Train Lr: 0.000136618677061051
|
16 |
+
- Epoch: 40
|
17 |
+
|
18 |
+
## Intended uses & limitations
|
19 |
+
|
20 |
+
This model is uncased, can't read special characters except "," and ".", having hard time understanding numbers, and performance only tested on news article text.
|
21 |
+
|
22 |
+
## Training and evaluation data
|
23 |
+
Pretrain dataset:
|
24 |
+
1. [kaggle id news 2017](https://www.kaggle.com/datasets/aashari/indonesian-news-articles-published-at-2017)
|
25 |
+
2. [CC_News_id](https://github.com/Wikidepia/indonesian_datasets/tree/master/dump/cc-news)
|
26 |
+
3. [OSCAR_2201](https://huggingface.co/datasets/oscar-corpus/OSCAR-2201/viewer/id/train)
|
27 |
+
|
28 |
+
|
29 |
+
## Training procedure
|
30 |
+
For replication, go to GitHub page
|
31 |
+
|
32 |
+
### Training hyperparameters
|
33 |
+
|
34 |
+
The following hyperparameters were used during training:
|
35 |
+
- optimizer: {'name': 'Adafactor', 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'jit_compile': False, 'is_legacy_optimizer': False, 'learning_rate': 0.005, 'beta_2_decay': -0.8, 'epsilon_1': 1e-30, 'epsilon_2': 0.001, 'clip_threshold': 1.0, 'relative_step': True}
|
36 |
+
- training_precision: float32
|
37 |
+
|
38 |
+
```python
|
39 |
+
configuration.vocab_size = 32103
|
40 |
+
configuration.d_model = 512
|
41 |
+
configuration.dropout = 0.15
|
42 |
+
configuration.decoder_attention_heads = 8
|
43 |
+
configuration.decoder_layers = 12
|
44 |
+
configuration.decoder_ffn_dim = 3072
|
45 |
+
configuration.encoder_attention_heads = 8
|
46 |
+
configuration.encoder_layers = 12
|
47 |
+
configuration.encoder_ffn_dim = 3072
|
48 |
+
```
|
49 |
+
### Training results
|
50 |
+
|
51 |
+
|
52 |
+
|Train Loss|Train Accuracy|Validation Loss|Validation Accuracy|Train Lr|Epoch|
|
53 |
+
|:--------:|:------------:|:-------------:|:-----------------:|:------:|:---:|
|
54 |
+
|4.1939034461975|0.145276814699172|3.39564657211303|0.186678826808929|0.00499999988824129|1|
|
55 |
+
|3.13256049156188|0.208270609378814|2.82256889343261|0.233325317502021|0.00499999988824129|2|
|
56 |
+
|2.84938621520996|0.229006066918373|2.72168040275573|0.23955675959587|0.00499999988824129|3|
|
57 |
+
|2.76001143455505|0.234559893608093|2.65143990516662|0.243813350796699|0.00499999988824129|4|
|
58 |
+
|2.70404982566833|0.238061532378196|2.6107530593872|0.246574580669403|0.00452418718487024|5|
|
59 |
+
|2.6638650894165|0.240613579750061|2.57847166061401|0.248678594827651|0.00409365398809313|6|
|
60 |
+
|2.63293719291687|0.242613524198532|2.55772447586059|0.250325441360473|0.00370409130118787|7|
|
61 |
+
|2.60750746726989|0.244251564145088|2.53469848632812|0.251805543899536|0.00335160037502646|8|
|
62 |
+
|2.58670353889465|0.245637223124504|2.51883554458618|0.253003656864166|0.00303265335969626|9|
|
63 |
+
|2.56865572929382|0.24682830274105|2.49989652633666|0.254459708929061|0.00274405837990343|10|
|
64 |
+
|2.55285787582397|0.247884958982467|2.50092124938964|0.254229605197906|0.00248292670585215|11|
|
65 |
+
|2.53919672966003|0.248811900615692|2.47859454154968|0.255691051483154|0.00224664504639804|12|
|
66 |
+
|2.52694725990295|0.249630719423294|2.46921157836914|0.25649145245552|0.00203284854069352|13|
|
67 |
+
|2.51587128639221|0.250377029180526|2.46414017677307|0.257025629281997|0.0018393974751234|14|
|
68 |
+
|2.50599193572998|0.251064419746398|2.4557819366455|0.257613778114318|0.00166435563005507|15|
|
69 |
+
|2.49690246582031|0.251682370901107|2.44843244552612|0.258032590150833|0.00150597130414098|16|
|
70 |
+
|2.48859119415283|0.252267301082611|2.43858122825622|0.258764535188674|0.00136265915352851|17|
|
71 |
+
|2.48097324371337|0.252792716026306|2.43251323699951|0.259270757436752|0.00123298505786806|18|
|
72 |
+
|2.47009921073913|0.253554105758667|2.43577146530151|0.258938610553741|0.00111565098632127|19|
|
73 |
+
|2.45849394798278|0.254375785589218|2.42337107658386|0.260090589523315|0.00100948277395218|20|
|
74 |
+
|2.44776940345764|0.255127549171447|2.41147446632385|0.260682851076126|0.000913417781703174|21|
|
75 |
+
|2.43759155273437|0.255834341049194|2.41405510902404|0.260819226503372|0.000826494593638926|22|
|
76 |
+
|2.42819571495056|0.256486028432846|2.40314364433288|0.26152354478836|0.000747843238059431|23|
|
77 |
+
|2.41974592208862|0.257094115018844|2.39181518554687|0.262460082769393|0.000676676572766155|24|
|
78 |
+
|2.41181802749633|0.257666647434234|2.3825569152832|0.263035386800766|0.000612282310612499|25|
|
79 |
+
|2.4044873714447|0.258173674345016|2.37829279899597|0.263585090637207|0.000554015976376831|26|
|
80 |
+
|2.39774870872497|0.258645176887512|2.37718510627746|0.263547003269195|0.000501294387504458|27|
|
81 |
+
|2.39184403419494|0.259076595306396|2.37379837036132|0.264020860195159|0.00045358992065303|28|
|
82 |
+
|2.38593125343322|0.259495466947555|2.37083029747009|0.264293819665908|0.000410425127483904|29|
|
83 |
+
|2.38093471527099|0.259853214025497|2.36486291885375|0.264451295137405|0.000371368019841611|30|
|
84 |
+
|2.37621307373046|0.260185241699218|2.36547923088073|0.264706671237945|0.000336027675075456|31|
|
85 |
+
|2.37177920341491|0.260504961013793|2.3609721660614|0.264981210231781|0.000304050423437729|32|
|
86 |
+
|2.3679461479187|0.260774314403533|2.36445379257202|0.264800041913986|0.000275116210104897|33|
|
87 |
+
|2.3643410205841|0.261037856340408|2.3573100566864|0.265379041433334|0.000248935451963916|34|
|
88 |
+
|2.36092805862426|0.261268675327301|2.36105728149414|0.264868646860122|0.000225246112677268|35|
|
89 |
+
|2.35798692703247|0.261485010385513|2.35409832000732|0.265503793954849|0.000203811112442053|36|
|
90 |
+
|2.35523629188537|0.26168617606163|2.35252356529235|0.265713244676589|0.000184415926923975|37|
|
91 |
+
|2.35284709930419|0.261859744787216|2.35101222991943|0.265856444835662|0.000166866433573886|38|
|
92 |
+
|2.35047316551208|0.262033462524414|2.34698224067687|0.266099989414215|0.000150986990774981|39|
|
93 |
+
|2.34832262992858|0.262173235416412|2.34894156455993|0.266122311353683|0.000136618677061051|40|
|
94 |
+
|
95 |
+
|
96 |
+
### Framework versions
|
97 |
+
|
98 |
+
- Transformers 4.30.2
|
99 |
+
- TensorFlow 2.12.0
|
100 |
+
- Datasets 2.13.1
|
101 |
+
- Tokenizers 0.13.3
|
102 |
+
|
103 |
+
### Special Thanks
|
104 |
+
Research supported with Cloud TPUs from Google’s TPU Research Cloud (TRC)
|