hanasim commited on
Commit
9b479cc
·
verified ·
1 Parent(s): bc98785

Model save

Browse files
README.md ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: cc-by-nc-4.0
3
+ base_model: facebook/mms-1b-all
4
+ tags:
5
+ - generated_from_trainer
6
+ datasets:
7
+ - fleurs
8
+ model-index:
9
+ - name: breeze-listen-w2v2-kn-GF
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # breeze-listen-w2v2-kn-GF
17
+
18
+ This model is a fine-tuned version of [facebook/mms-1b-all](https://huggingface.co/facebook/mms-1b-all) on the fleurs dataset.
19
+
20
+ ## Model description
21
+
22
+ More information needed
23
+
24
+ ## Intended uses & limitations
25
+
26
+ More information needed
27
+
28
+ ## Training and evaluation data
29
+
30
+ More information needed
31
+
32
+ ## Training procedure
33
+
34
+ ### Training hyperparameters
35
+
36
+ The following hyperparameters were used during training:
37
+ - learning_rate: 0.001
38
+ - train_batch_size: 1
39
+ - eval_batch_size: 8
40
+ - seed: 42
41
+ - distributed_type: multi-GPU
42
+ - gradient_accumulation_steps: 32
43
+ - total_train_batch_size: 32
44
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
+ - lr_scheduler_type: linear
46
+ - lr_scheduler_warmup_steps: 100
47
+ - num_epochs: 4.0
48
+ - mixed_precision_training: Native AMP
49
+
50
+ ### Training results
51
+
52
+
53
+
54
+ ### Framework versions
55
+
56
+ - Transformers 4.38.0.dev0
57
+ - Pytorch 2.1.2+cu121
58
+ - Datasets 2.16.1
59
+ - Tokenizers 0.15.1
added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "</s>": 103,
3
+ "<s>": 102
4
+ }
breeze-listen-w2v2-kn-GF.log ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 02/04/2024 13:54:35 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: True, 16-bits training: True
2
+ 02/04/2024 13:54:35 - INFO - __main__ - Training/evaluation parameters TrainingArguments(
3
+ _n_gpu=1,
4
+ adafactor=False,
5
+ adam_beta1=0.9,
6
+ adam_beta2=0.999,
7
+ adam_epsilon=1e-08,
8
+ auto_find_batch_size=False,
9
+ bf16=False,
10
+ bf16_full_eval=False,
11
+ data_seed=None,
12
+ dataloader_drop_last=False,
13
+ dataloader_num_workers=0,
14
+ dataloader_persistent_workers=False,
15
+ dataloader_pin_memory=True,
16
+ dataloader_prefetch_factor=None,
17
+ ddp_backend=None,
18
+ ddp_broadcast_buffers=None,
19
+ ddp_bucket_cap_mb=None,
20
+ ddp_find_unused_parameters=None,
21
+ ddp_timeout=1800,
22
+ debug=[],
23
+ deepspeed=None,
24
+ disable_tqdm=False,
25
+ dispatch_batches=None,
26
+ do_eval=True,
27
+ do_predict=False,
28
+ do_train=True,
29
+ eval_accumulation_steps=None,
30
+ eval_delay=0,
31
+ eval_steps=1000,
32
+ evaluation_strategy=IntervalStrategy.STEPS,
33
+ fp16=True,
34
+ fp16_backend=auto,
35
+ fp16_full_eval=False,
36
+ fp16_opt_level=O1,
37
+ fsdp=[],
38
+ fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False},
39
+ fsdp_min_num_params=0,
40
+ fsdp_transformer_layer_cls_to_wrap=None,
41
+ full_determinism=False,
42
+ gradient_accumulation_steps=32,
43
+ gradient_checkpointing=True,
44
+ gradient_checkpointing_kwargs=None,
45
+ greater_is_better=None,
46
+ group_by_length=True,
47
+ half_precision_backend=auto,
48
+ hub_always_push=False,
49
+ hub_model_id=simpragma/breeze-listen-w2v2-kn-GF,
50
+ hub_private_repo=False,
51
+ hub_strategy=HubStrategy.EVERY_SAVE,
52
+ hub_token=<HUB_TOKEN>,
53
+ ignore_data_skip=False,
54
+ include_inputs_for_metrics=False,
55
+ include_num_input_tokens_seen=False,
56
+ include_tokens_per_second=False,
57
+ jit_mode_eval=False,
58
+ label_names=None,
59
+ label_smoothing_factor=0.0,
60
+ learning_rate=0.001,
61
+ length_column_name=input_length,
62
+ load_best_model_at_end=False,
63
+ local_rank=0,
64
+ log_level=passive,
65
+ log_level_replica=warning,
66
+ log_on_each_node=True,
67
+ logging_dir=/cosmos/home/sp-operator/ai/training/models/simpragma/breeze-listen-w2v2-kn-GF/runs/Feb04_13-54-35_knight,
68
+ logging_first_step=False,
69
+ logging_nan_inf_filter=True,
70
+ logging_steps=500,
71
+ logging_strategy=IntervalStrategy.STEPS,
72
+ lr_scheduler_kwargs={},
73
+ lr_scheduler_type=SchedulerType.LINEAR,
74
+ max_grad_norm=1.0,
75
+ max_steps=-1,
76
+ metric_for_best_model=None,
77
+ mp_parameters=,
78
+ neftune_noise_alpha=None,
79
+ no_cuda=False,
80
+ num_train_epochs=4.0,
81
+ optim=OptimizerNames.ADAMW_BNB,
82
+ optim_args=None,
83
+ output_dir=/cosmos/home/sp-operator/ai/training/models/simpragma/breeze-listen-w2v2-kn-GF,
84
+ overwrite_output_dir=True,
85
+ past_index=-1,
86
+ per_device_eval_batch_size=8,
87
+ per_device_train_batch_size=1,
88
+ prediction_loss_only=False,
89
+ push_to_hub=True,
90
+ push_to_hub_model_id=None,
91
+ push_to_hub_organization=None,
92
+ push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
93
+ ray_scope=last,
94
+ remove_unused_columns=True,
95
+ report_to=[],
96
+ resume_from_checkpoint=None,
97
+ run_name=/cosmos/home/sp-operator/ai/training/models/simpragma/breeze-listen-w2v2-kn-GF,
98
+ save_on_each_node=False,
99
+ save_only_model=False,
100
+ save_safetensors=True,
101
+ save_steps=1000,
102
+ save_strategy=IntervalStrategy.STEPS,
103
+ save_total_limit=3,
104
+ seed=42,
105
+ skip_memory_metrics=True,
106
+ split_batches=False,
107
+ tf32=None,
108
+ torch_compile=False,
109
+ torch_compile_backend=None,
110
+ torch_compile_mode=None,
111
+ torchdynamo=None,
112
+ tpu_metrics_debug=False,
113
+ tpu_num_cores=None,
114
+ use_cpu=False,
115
+ use_ipex=False,
116
+ use_legacy_prediction_loop=False,
117
+ use_mps_device=False,
118
+ warmup_ratio=0.0,
119
+ warmup_steps=100,
120
+ weight_decay=0.0,
121
+ )
122
+ {'train_runtime': 13678.922, 'train_samples_per_second': 0.723, 'train_steps_per_second': 0.023, 'train_loss': 3.364711216517857, 'epoch': 3.99}
config.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/mms-1b-all",
3
+ "activation_dropout": 0.05,
4
+ "adapter_attn_dim": 16,
5
+ "adapter_kernel_size": 3,
6
+ "adapter_stride": 2,
7
+ "add_adapter": false,
8
+ "apply_spec_augment": true,
9
+ "architectures": [
10
+ "Wav2Vec2ForCTC"
11
+ ],
12
+ "attention_dropout": 0.05,
13
+ "bos_token_id": 1,
14
+ "classifier_proj_size": 256,
15
+ "codevector_dim": 1024,
16
+ "contrastive_logits_temperature": 0.1,
17
+ "conv_bias": true,
18
+ "conv_dim": [
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512,
25
+ 512
26
+ ],
27
+ "conv_kernel": [
28
+ 10,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 3,
33
+ 2,
34
+ 2
35
+ ],
36
+ "conv_stride": [
37
+ 5,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2,
43
+ 2
44
+ ],
45
+ "ctc_loss_reduction": "mean",
46
+ "ctc_zero_infinity": false,
47
+ "diversity_loss_weight": 0.1,
48
+ "do_stable_layer_norm": true,
49
+ "eos_token_id": 2,
50
+ "feat_extract_activation": "gelu",
51
+ "feat_extract_dropout": 0.0,
52
+ "feat_extract_norm": "layer",
53
+ "feat_proj_dropout": 0.05,
54
+ "feat_quantizer_dropout": 0.0,
55
+ "final_dropout": 0.0,
56
+ "hidden_act": "gelu",
57
+ "hidden_dropout": 0.05,
58
+ "hidden_size": 1280,
59
+ "initializer_range": 0.02,
60
+ "intermediate_size": 5120,
61
+ "layer_norm_eps": 1e-05,
62
+ "layerdrop": 0.0,
63
+ "mask_feature_length": 10,
64
+ "mask_feature_min_masks": 0,
65
+ "mask_feature_prob": 0.0,
66
+ "mask_time_length": 10,
67
+ "mask_time_min_masks": 2,
68
+ "mask_time_prob": 0.05,
69
+ "model_type": "wav2vec2",
70
+ "num_adapter_layers": 3,
71
+ "num_attention_heads": 16,
72
+ "num_codevector_groups": 2,
73
+ "num_codevectors_per_group": 320,
74
+ "num_conv_pos_embedding_groups": 16,
75
+ "num_conv_pos_embeddings": 128,
76
+ "num_feat_extract_layers": 7,
77
+ "num_hidden_layers": 48,
78
+ "num_negatives": 100,
79
+ "output_hidden_size": 1280,
80
+ "pad_token_id": 101,
81
+ "proj_codevector_dim": 1024,
82
+ "tdnn_dilation": [
83
+ 1,
84
+ 2,
85
+ 3,
86
+ 1,
87
+ 1
88
+ ],
89
+ "tdnn_dim": [
90
+ 512,
91
+ 512,
92
+ 512,
93
+ 512,
94
+ 1500
95
+ ],
96
+ "tdnn_kernel": [
97
+ 5,
98
+ 3,
99
+ 3,
100
+ 1,
101
+ 1
102
+ ],
103
+ "torch_dtype": "float32",
104
+ "transformers_version": "4.38.0.dev0",
105
+ "use_weighted_layer_sum": false,
106
+ "vocab_size": 104,
107
+ "xvector_output_dim": 512
108
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84fe74380883a43f5a315e535ef7668f1661b11fa85249c1f5ea0efd5b201db3
3
+ size 3859264976
preprocessor_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0,
7
+ "processor_class": "Wav2Vec2Processor",
8
+ "return_attention_mask": true,
9
+ "sampling_rate": 16000
10
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": true,
19
+ "normalized": false,
20
+ "rstrip": true,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "[UNK]",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": true,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "100": {
4
+ "content": "[UNK]",
5
+ "lstrip": true,
6
+ "normalized": false,
7
+ "rstrip": true,
8
+ "single_word": false,
9
+ "special": false
10
+ },
11
+ "101": {
12
+ "content": "[PAD]",
13
+ "lstrip": true,
14
+ "normalized": false,
15
+ "rstrip": true,
16
+ "single_word": false,
17
+ "special": false
18
+ },
19
+ "102": {
20
+ "content": "<s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "103": {
28
+ "content": "</s>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ }
35
+ },
36
+ "bos_token": "<s>",
37
+ "clean_up_tokenization_spaces": true,
38
+ "do_lower_case": false,
39
+ "eos_token": "</s>",
40
+ "model_max_length": 1000000000000000019884624838656,
41
+ "pad_token": "[PAD]",
42
+ "processor_class": "Wav2Vec2Processor",
43
+ "replace_word_delimiter_char": " ",
44
+ "target_lang": "kan",
45
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
46
+ "unk_token": "[UNK]",
47
+ "word_delimiter_token": "|"
48
+ }
train-ctc-model.sh ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #! /usr/bin/bash
2
+
3
+ #
4
+ # This script runs the speech recognition training using DeepSpeed
5
+ #
6
+
7
+ # CHANGE THESE AS PER YOUR REQUIREMENTS
8
+
9
+ # LANG as it is referred in the dataset
10
+ #LANG=te # 2 letter ISO code for the language
11
+ LANG=kn_in # 2 letter ISO code for the language with locale (some datasets like Google/Fleurs require this)
12
+ LANG_ISO_3=kan # 3 letter ISO code for the language
13
+ LANGUAGE=Kannada # Full language name as per Whisper convention
14
+
15
+ # For Mozilla Commonvoice datasets, uncomment the following
16
+ #DATASET="mozilla-foundation/common_voice_16_0"
17
+ #TEXT_COLUMN="sentence"
18
+
19
+ # For Google Fleurs datasets, uncomment the following
20
+ DATASET="google/fleurs"
21
+ TEXT_COLUMN="transcription"
22
+
23
+ # Custom datasets
24
+ #DATASET="parambharat/kannada_asr_corpus"
25
+ #TEXT_COLUMN=${TEXT_COLUMN:-"sentence"}
26
+
27
+ #
28
+ # Main
29
+ #
30
+
31
+ SCRIPT_PATH=$(realpath "${BASH_SOURCE[0]}")
32
+ SCRIPT_DIR=$(realpath $(dirname "${BASH_SOURCE[0]}"))
33
+
34
+ # Port to use
35
+ export MASTER_PORT="${MASTER_PORT:-29500}"
36
+ echo "Using master_port for deepspeech: ${MASTER_PORT}"
37
+
38
+ export "MASTER_ADDR"="localhost"
39
+ export "RANK"="0"
40
+ export "LOCAL_RANK"="0"
41
+ export "WORLD_SIZE"="1"
42
+
43
+ # Base model variant
44
+ MODEL=w2v2
45
+
46
+ # Model names and other stuff
47
+ BASE_MODEL="facebook/mms-1b-all"
48
+
49
+ JUST_LANG=${LANG%%_*}
50
+ MY_MODEL="breeze-listen-${MODEL}-${JUST_LANG}-GF"
51
+
52
+ OUTDIR="/cosmos/home/sp-operator/ai/training/models/simpragma/${MY_MODEL}"
53
+ echo "OUTDIR: ${OUTDIR}"
54
+
55
+ # Training parameters you can tweak. Feel free to directly change any of the parameters below.
56
+
57
+ MAX_EPOCHS=4
58
+ TRAIN_BATCH_SIZE=2
59
+ EVAL_BATCH_SIZE=2
60
+ LEARNING_RATE="1e-3"
61
+
62
+ EVAL_STEPS="1000"
63
+ SAVE_STEPS="1000"
64
+
65
+ # Create dir
66
+ mkdir -p ${OUTDIR}
67
+
68
+ # --overwrite_output_dir \
69
+
70
+ # If you want to resume from existing checkpoint, include the following argument as well. Modify the checkpoint directory.
71
+ # --resume_from_checkpoint="${MY_MODEL}/checkpoint-400" \
72
+
73
+ echo "================ TRAINING: START ================"
74
+
75
+ python ${SCRIPT_DIR}/run_speech_recognition_ctc_adapter.py \
76
+ --dataset_name="${DATASET}" \
77
+ --model_name_or_path="${BASE_MODEL}" \
78
+ --dataset_config_name="${LANG}" \
79
+ --target_language="${LANG_ISO_3}" \
80
+ --output_dir="${OUTDIR}" \
81
+ --num_train_epochs="${MAX_EPOCHS}" \
82
+ --per_device_train_batch_size="${TRAIN_BATCH_SIZE}" \
83
+ --learning_rate="${LEARNING_RATE}" \
84
+ --warmup_steps="100" \
85
+ --evaluation_strategy="steps" \
86
+ --text_column_name="${TEXT_COLUMN}" \
87
+ --length_column_name="input_length" \
88
+ --save_steps="${SAVE_STEPS}" \
89
+ --eval_steps="${EVAL_STEPS}" \
90
+ --save_total_limit="3" \
91
+ --optim="adamw_bnb_8bit" \
92
+ --hub_model_id "simpragma/${MY_MODEL}" \
93
+ --gradient_checkpointing \
94
+ --chars_to_ignore , ? . ! - \; \: \" “ % ‘ ” � \
95
+ --fp16 \
96
+ --group_by_length \
97
+ --do_train \
98
+ --do_eval \
99
+ --push_to_hub \
100
+ --overwrite_output_dir \
101
+ | tee ${OUTDIR}/${MY_MODEL}.log
102
+
103
+ # Copy the script to the output directory so that we can recreate the model
104
+ cp ${SCRIPT_PATH} ${OUTDIR}
105
+
106
+ echo "================ TRAINING: DONE ================"
107
+
108
+ exit 0
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:889214cde0e59492b6da27c312fdf4c9aa1ba5439a639a7b5f6e3a9dc0f91c73
3
+ size 4856
vocab.json ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "kan": {
3
+ "[": 1,
4
+ "[PAD]": 101,
5
+ "[UNK]": 100,
6
+ "]": 2,
7
+ "a": 3,
8
+ "b": 4,
9
+ "c": 5,
10
+ "d": 6,
11
+ "e": 7,
12
+ "f": 8,
13
+ "g": 9,
14
+ "h": 10,
15
+ "i": 11,
16
+ "j": 12,
17
+ "k": 13,
18
+ "l": 14,
19
+ "m": 15,
20
+ "n": 16,
21
+ "o": 17,
22
+ "p": 18,
23
+ "q": 19,
24
+ "r": 20,
25
+ "s": 21,
26
+ "t": 22,
27
+ "u": 23,
28
+ "v": 24,
29
+ "w": 25,
30
+ "x": 26,
31
+ "y": 27,
32
+ "z": 28,
33
+ "|": 0,
34
+ "°": 29,
35
+ "²": 30,
36
+ "½": 31,
37
+ "¾": 32,
38
+ "õ": 33,
39
+ "ಂ": 34,
40
+ "ಃ": 35,
41
+ "ಅ": 36,
42
+ "ಆ": 37,
43
+ "ಇ": 38,
44
+ "ಈ": 39,
45
+ "ಉ": 40,
46
+ "ಊ": 41,
47
+ "ಋ": 42,
48
+ "ಎ": 43,
49
+ "ಏ": 44,
50
+ "ಐ": 45,
51
+ "ಒ": 46,
52
+ "ಓ": 47,
53
+ "ಔ": 48,
54
+ "ಕ": 49,
55
+ "ಖ": 50,
56
+ "ಗ": 51,
57
+ "ಘ": 52,
58
+ "ಚ": 53,
59
+ "ಛ": 54,
60
+ "ಜ": 55,
61
+ "ಝ": 56,
62
+ "ಞ": 57,
63
+ "ಟ": 58,
64
+ "ಠ": 59,
65
+ "ಡ": 60,
66
+ "ಢ": 61,
67
+ "ಣ": 62,
68
+ "ತ": 63,
69
+ "ಥ": 64,
70
+ "ದ": 65,
71
+ "ಧ": 66,
72
+ "ನ": 67,
73
+ "ಪ": 68,
74
+ "ಫ": 69,
75
+ "ಬ": 70,
76
+ "ಭ": 71,
77
+ "ಮ": 72,
78
+ "ಯ": 73,
79
+ "ರ": 74,
80
+ "ಲ": 75,
81
+ "ಳ": 76,
82
+ "ವ": 77,
83
+ "ಶ": 78,
84
+ "ಷ": 79,
85
+ "ಸ": 80,
86
+ "ಹ": 81,
87
+ "ಾ": 82,
88
+ "ಿ": 83,
89
+ "ೀ": 84,
90
+ "ು": 85,
91
+ "ೂ": 86,
92
+ "ೃ": 87,
93
+ "ೆ": 88,
94
+ "ೇ": 89,
95
+ "ೈ": 90,
96
+ "ೊ": 91,
97
+ "ೋ": 92,
98
+ "ೌ": 93,
99
+ "್": 94,
100
+ "೪": 95,
101
+ "​": 96,
102
+ "‌": 97,
103
+ "‍": 98,
104
+ "–": 99
105
+ }
106
+ }