Ransaka commited on
Commit
1cdb70e
1 Parent(s): f3c57a9

End of training

Browse files
README.md CHANGED
@@ -1,8 +1,7 @@
1
  ---
2
- base_model: microsoft/trocr-small-stage1
3
  tags:
4
- - trocr
5
- - image-to-text
6
  model-index:
7
  - name: trocr-IAM
8
  results: []
@@ -13,10 +12,10 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # trocr-IAM
15
 
16
- This model is a fine-tuned version of [microsoft/trocr-small-stage1](https://huggingface.co/microsoft/trocr-small-stage1) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 6.9227
19
- - Cer: 0.8191
20
 
21
  ## Model description
22
 
@@ -36,7 +35,7 @@ More information needed
36
 
37
  The following hyperparameters were used during training:
38
  - learning_rate: 5e-05
39
- - train_batch_size: 16
40
  - eval_batch_size: 8
41
  - seed: 42
42
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
@@ -47,7 +46,8 @@ The following hyperparameters were used during training:
47
 
48
  | Training Loss | Epoch | Step | Validation Loss | Cer |
49
  |:-------------:|:-----:|:----:|:---------------:|:------:|
50
- | 7.0738 | 0.79 | 100 | 6.9227 | 0.8191 |
 
51
 
52
 
53
  ### Framework versions
@@ -55,4 +55,4 @@ The following hyperparameters were used during training:
55
  - Transformers 4.35.2
56
  - Pytorch 2.1.0+cu121
57
  - Datasets 2.16.0
58
- - Tokenizers 0.15.0
 
1
  ---
2
+ base_model: microsoft/trocr-base-stage1
3
  tags:
4
+ - generated_from_trainer
 
5
  model-index:
6
  - name: trocr-IAM
7
  results: []
 
12
 
13
  # trocr-IAM
14
 
15
+ This model is a fine-tuned version of [microsoft/trocr-base-stage1](https://huggingface.co/microsoft/trocr-base-stage1) on an unknown dataset.
16
  It achieves the following results on the evaluation set:
17
+ - Loss: 0.9888
18
+ - Cer: 0.1284
19
 
20
  ## Model description
21
 
 
35
 
36
  The following hyperparameters were used during training:
37
  - learning_rate: 5e-05
38
+ - train_batch_size: 8
39
  - eval_batch_size: 8
40
  - seed: 42
41
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 
46
 
47
  | Training Loss | Epoch | Step | Validation Loss | Cer |
48
  |:-------------:|:-----:|:----:|:---------------:|:------:|
49
+ | 1.7207 | 0.2 | 50 | 1.5524 | 0.2112 |
50
+ | 0.7594 | 0.4 | 100 | 0.9888 | 0.1284 |
51
 
52
 
53
  ### Framework versions
 
55
  - Transformers 4.35.2
56
  - Pytorch 2.1.0+cu121
57
  - Datasets 2.16.0
58
+ - Tokenizers 0.15.0
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "microsoft/trocr-small-stage1",
3
  "architectures": [
4
  "VisionEncoderDecoderModel"
5
  ],
@@ -15,12 +15,12 @@
15
  "bos_token_id": 0,
16
  "chunk_size_feed_forward": 0,
17
  "classifier_dropout": 0.0,
18
- "cross_attention_hidden_size": 384,
19
- "d_model": 256,
20
- "decoder_attention_heads": 8,
21
- "decoder_ffn_dim": 1024,
22
  "decoder_layerdrop": 0.0,
23
- "decoder_layers": 6,
24
  "decoder_start_token_id": 2,
25
  "diversity_penalty": 0.0,
26
  "do_sample": false,
@@ -43,10 +43,10 @@
43
  "LABEL_0": 0,
44
  "LABEL_1": 1
45
  },
46
- "layernorm_embedding": true,
47
  "length_penalty": 1.0,
48
  "max_length": 20,
49
- "max_position_embeddings": 512,
50
  "min_length": 0,
51
  "model_type": "trocr",
52
  "no_repeat_ngram_size": 0,
@@ -80,8 +80,8 @@
80
  "typical_p": 1.0,
81
  "use_bfloat16": false,
82
  "use_cache": false,
83
- "use_learned_position_embeddings": true,
84
- "vocab_size": 64044
85
  },
86
  "decoder_start_token_id": 0,
87
  "early_stopping": true,
@@ -108,14 +108,14 @@
108
  "forced_eos_token_id": null,
109
  "hidden_act": "gelu",
110
  "hidden_dropout_prob": 0.0,
111
- "hidden_size": 384,
112
  "id2label": {
113
  "0": "LABEL_0",
114
  "1": "LABEL_1"
115
  },
116
  "image_size": 384,
117
  "initializer_range": 0.02,
118
- "intermediate_size": 1536,
119
  "is_decoder": false,
120
  "is_encoder_decoder": false,
121
  "label2id": {
@@ -126,9 +126,9 @@
126
  "length_penalty": 1.0,
127
  "max_length": 20,
128
  "min_length": 0,
129
- "model_type": "deit",
130
  "no_repeat_ngram_size": 0,
131
- "num_attention_heads": 6,
132
  "num_beam_groups": 1,
133
  "num_beams": 1,
134
  "num_channels": 3,
@@ -142,7 +142,7 @@
142
  "prefix": null,
143
  "problem_type": null,
144
  "pruned_heads": {},
145
- "qkv_bias": true,
146
  "remove_invalid_values": false,
147
  "repetition_penalty": 1.0,
148
  "return_dict": true,
@@ -173,5 +173,5 @@
173
  "tie_word_embeddings": false,
174
  "torch_dtype": "float32",
175
  "transformers_version": "4.35.2",
176
- "vocab_size": 64044
177
  }
 
1
  {
2
+ "_name_or_path": "microsoft/trocr-base-stage1",
3
  "architectures": [
4
  "VisionEncoderDecoderModel"
5
  ],
 
15
  "bos_token_id": 0,
16
  "chunk_size_feed_forward": 0,
17
  "classifier_dropout": 0.0,
18
+ "cross_attention_hidden_size": 768,
19
+ "d_model": 1024,
20
+ "decoder_attention_heads": 16,
21
+ "decoder_ffn_dim": 4096,
22
  "decoder_layerdrop": 0.0,
23
+ "decoder_layers": 12,
24
  "decoder_start_token_id": 2,
25
  "diversity_penalty": 0.0,
26
  "do_sample": false,
 
43
  "LABEL_0": 0,
44
  "LABEL_1": 1
45
  },
46
+ "layernorm_embedding": false,
47
  "length_penalty": 1.0,
48
  "max_length": 20,
49
+ "max_position_embeddings": 1024,
50
  "min_length": 0,
51
  "model_type": "trocr",
52
  "no_repeat_ngram_size": 0,
 
80
  "typical_p": 1.0,
81
  "use_bfloat16": false,
82
  "use_cache": false,
83
+ "use_learned_position_embeddings": false,
84
+ "vocab_size": 50265
85
  },
86
  "decoder_start_token_id": 0,
87
  "early_stopping": true,
 
108
  "forced_eos_token_id": null,
109
  "hidden_act": "gelu",
110
  "hidden_dropout_prob": 0.0,
111
+ "hidden_size": 768,
112
  "id2label": {
113
  "0": "LABEL_0",
114
  "1": "LABEL_1"
115
  },
116
  "image_size": 384,
117
  "initializer_range": 0.02,
118
+ "intermediate_size": 3072,
119
  "is_decoder": false,
120
  "is_encoder_decoder": false,
121
  "label2id": {
 
126
  "length_penalty": 1.0,
127
  "max_length": 20,
128
  "min_length": 0,
129
+ "model_type": "vit",
130
  "no_repeat_ngram_size": 0,
131
+ "num_attention_heads": 12,
132
  "num_beam_groups": 1,
133
  "num_beams": 1,
134
  "num_channels": 3,
 
142
  "prefix": null,
143
  "problem_type": null,
144
  "pruned_heads": {},
145
+ "qkv_bias": false,
146
  "remove_invalid_values": false,
147
  "repetition_penalty": 1.0,
148
  "return_dict": true,
 
173
  "tie_word_embeddings": false,
174
  "torch_dtype": "float32",
175
  "transformers_version": "4.35.2",
176
+ "vocab_size": 50265
177
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4fa644f6b5ad795584c617bdbb348a8aaedcd16fd89465cf8dac887c1c471d1
3
- size 246430696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e7f883ff584596682f50f2bfe0d85276883b8173d4eb7567e628b1ff9e19e68
3
+ size 1539518812
runs/Dec30_09-31-11_56e0cc75e629/events.out.tfevents.1703928680.56e0cc75e629.18680.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b38b5ea1f00feeb9b65638ab4eea843635aa4f739db801143adf767b8e589cb5
3
+ size 17123
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8e7729de998ceff24acd971e78c5e1a7129bb20dd9312b1c5860bbc885139ee
3
  size 4728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bf3ee739dd2a91fa6b4784a4db8c78960318c061662a4ae67ebbdf66f768b5d
3
  size 4728