File size: 1,214 Bytes
69882ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
{
    "dataset_name": "mozilla-foundation/common_voice_13_0",
	"model_name_or_path": "facebook/wav2vec2-large-xlsr-53",
	"dataset_config_name": "eo",
	"output_dir": "./wav2vec2-common_voice_13_0-eo-10",
	"train_split_name": "train",
	"eval_split_name": "validation",
	"eval_metrics": ["cer", "wer"],
	"overwrite_output_dir": true,
	"preprocessing_num_workers": 1,
	"num_train_epochs": 5,
	"per_device_train_batch_size": 16,
	"gradient_accumulation_steps": 2,
	"gradient_checkpointing": true,
	"learning_rate": 3e-5,
	"warmup_steps": 500,
	"evaluation_strategy": "steps",
	"text_column_name": "sentence",
	"length_column_name": "input_length",
	"save_steps": 1000,
	"eval_steps": 1000,
	"layerdrop": 0.2,
	"save_total_limit": 3,
	"freeze_feature_encoder": true,
	"chars_to_ignore": "-!\"'(),.:;=?_`¨«¸»ʼ‑–—‘’“”„…‹›♫?",
	"chars_to_substitute": {
		"przy": "pŝe",
		"byn": "bin",
		"cx": "ĉ",
		"sx": "ŝ",
		"fi": "fi",
		"fl": "fl",
		"ǔ": "ŭ",
		"ñ": "nj",
		"á": "a",
		"é": "e",
		"ü": "ŭ",
		"y": "j",
		"qu": "ku"
	},
	"fp16": true,
	"group_by_length": true,
	"push_to_hub": true,
	"do_train": true,
    "do_eval": true
}