fgaim commited on
Commit
4f71499
1 Parent(s): c3bf621

Update config and readme

Browse files
Files changed (2) hide show
  1. README.md +64 -0
  2. config.json +10 -10
README.md CHANGED
@@ -1,3 +1,67 @@
1
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  license: cc-by-4.0
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ language: ti
3
+ widget:
4
+ - text: "ድምጻዊ ኣብርሃም ኣፈወርቂ ንዘልኣለም ህያው ኮይኑ ኣብ ልብና ይነብር"
5
+ - text: "ወአመ ሳብዕት ዕለት ቦዘወፅአ እምውስተ ሕዝብ ከመ ያስተጋብእ ወኢረከበ።"
6
+ - text: "እሊ እግል ኖሱ አሳስ ተጠውር ወዐቦት ክምሰልቱ ሸክ ኢወትውዴ።"
7
+ - text: "ኣኩኽር ፡ ልሽክክ ናው ጀረቢነዅስክ ክሙኑኽር ክራውል ሕበርሲድኖ ገረሰነኵ።"
8
+ - text: "ነገ ለግማሽ ፍፃሜ ያለፉትን አሳውቀንና አስመርጠናችሁ እንሸልማለን።"
9
+ tags:
10
+ - geezlab
11
+ metrics:
12
+ - accuracy
13
+ - f1
14
+ - precision
15
+ - recall
16
+ model-index:
17
+ - name: geezswitch-tielectra
18
+ results: []
19
  license: cc-by-4.0
20
  ---
21
+
22
+ # TiELECTRA-GeezSwitch
23
+
24
+ This model is a fine-tuned version of [fgaim/tielectra-small](https://huggingface.co/fgaim/tielectra-small) on the [GeezSwitch](https://github.com/fgaim/geezswitch-data) dataset.
25
+
26
+ It achieves the following results on the test set:
27
+
28
+ - F1: 0.9844
29
+ - Recall: 0.9844
30
+ - Precision: 0.9845
31
+ - Accuracy: 0.9844
32
+ - Loss: 0.2190
33
+
34
+ ## Training
35
+
36
+ ### Hyperparameters
37
+
38
+ The following hyperparameters were used during training:
39
+
40
+ - learning_rate: 2e-05
41
+ - train_batch_size: 32
42
+ - eval_batch_size: 32
43
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
44
+ - lr_scheduler_type: linear
45
+ - num_epochs: 3.0
46
+ - seed: 42
47
+
48
+ ### Framework versions
49
+
50
+ - Transformers 4.19.0.dev0
51
+ - Pytorch 1.11.0+cu113
52
+ - Datasets 2.1.0
53
+ - Tokenizers 0.12.1
54
+
55
+
56
+ ### Citation
57
+
58
+ If you use this model or the GeezSwitch model in your research, please cite as follows:
59
+
60
+ ```markdown
61
+ @inproceedings{fgaim2022geezswitch,
62
+ title={GeezSwitch: Language Identification in Typologically Related Low-resourced East African Languages},
63
+ author={Fitsum Gaim and Wonsuk Yang and Jong C. Park},
64
+ booktitle={Proceedings of the 13th Language Resources and Evaluation Conference},
65
+ year={2022}
66
+ }
67
+ ```
config.json CHANGED
@@ -10,20 +10,20 @@
10
  "hidden_dropout_prob": 0.1,
11
  "hidden_size": 256,
12
  "id2label": {
13
- "0": "amharic",
14
- "1": "blin",
15
- "2": "geez",
16
- "3": "tigre",
17
- "4": "tigrinya"
18
  },
19
  "initializer_range": 0.02,
20
  "intermediate_size": 1024,
21
  "label2id": {
22
- "amharic": 0,
23
- "blin": 1,
24
- "geez": 2,
25
- "tigre": 3,
26
- "tigrinya": 4
27
  },
28
  "layer_norm_eps": 1e-12,
29
  "max_position_embeddings": 512,
 
10
  "hidden_dropout_prob": 0.1,
11
  "hidden_size": 256,
12
  "id2label": {
13
+ "0": "Amharic",
14
+ "1": "Blin",
15
+ "2": "Ge'ez",
16
+ "3": "Tigre",
17
+ "4": "Tigrinya"
18
  },
19
  "initializer_range": 0.02,
20
  "intermediate_size": 1024,
21
  "label2id": {
22
+ "Amharic": 0,
23
+ "Blin": 1,
24
+ "Ge'ez": 2,
25
+ "Tigre": 3,
26
+ "Tigrinya": 4
27
  },
28
  "layer_norm_eps": 1e-12,
29
  "max_position_embeddings": 512,