dummy / tokenizer_config.json

Training in progress, step 1000

529f88a about 1 year ago

9.55 kB

	{
	"added_tokens_decoder": {
	"0": {
	"content": "<s>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"1": {
	"content": "<pad>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"2": {
	"content": "</s>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"3": {
	"content": "<unk>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"79": {
	"content": "<mask>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"80": {
	"content": "<ctc_blank>",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"81": {
	"content": "ஃ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"82": {
	"content": "அ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"83": {
	"content": "ஆ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"84": {
	"content": "இ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"85": {
	"content": "ஈ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"86": {
	"content": "உ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"87": {
	"content": "ஊ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"88": {
	"content": "எ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"89": {
	"content": "ஏ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"90": {
	"content": "ஐ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"91": {
	"content": "ஒ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"92": {
	"content": "ஓ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"93": {
	"content": "ஔ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"94": {
	"content": "க",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"95": {
	"content": "ங",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"96": {
	"content": "ச",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"97": {
	"content": "ஜ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"98": {
	"content": "ஞ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"99": {
	"content": "ட",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"100": {
	"content": "ண",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"101": {
	"content": "த",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"102": {
	"content": "ந",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"103": {
	"content": "ன",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"104": {
	"content": "ப",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"105": {
	"content": "ம",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"106": {
	"content": "ய",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"107": {
	"content": "ர",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"108": {
	"content": "ற",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"109": {
	"content": "ல",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"110": {
	"content": "ள",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"111": {
	"content": "ழ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"112": {
	"content": "வ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"113": {
	"content": "ஶ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"114": {
	"content": "ஷ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"115": {
	"content": "ஸ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"116": {
	"content": "ஹ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"117": {
	"content": "ா",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"118": {
	"content": "ி",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"119": {
	"content": "ீ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"120": {
	"content": "ு",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"121": {
	"content": "ூ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"122": {
	"content": "ெ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"123": {
	"content": "ே",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"124": {
	"content": "ை",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"125": {
	"content": "ொ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"126": {
	"content": "ோ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"127": {
	"content": "ௌ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"128": {
	"content": "்",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"129": {
	"content": "ௗ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	}
	},
	"additional_special_tokens": [
	"<s>",
	"<pad>",
	"</s>",
	"<unk>",
	"<mask>"
	],
	"bos_token": "<s>",
	"clean_up_tokenization_spaces": true,
	"eos_token": "</s>",
	"mask_token": "<mask>",
	"model_max_length": 600,
	"normalize": false,
	"pad_token": "<pad>",
	"processor_class": "SpeechT5Processor",
	"sp_model_kwargs": {},
	"tokenizer_class": "SpeechT5Tokenizer",
	"tokenizer_file": null,
	"unk_token": "<unk>"
	}