shaojieli commited on
Commit
fc345f5
·
1 Parent(s): f58997a

Upload 34 files

Browse files
Files changed (35) hide show
  1. .gitattributes +8 -0
  2. data/lang_bpe_500/HLG.pt +3 -0
  3. data/lang_bpe_500/L.fst +3 -0
  4. data/lang_bpe_500/L.pt +3 -0
  5. data/lang_bpe_500/LG.pt +3 -0
  6. data/lang_bpe_500/L_disambig.fst +3 -0
  7. data/lang_bpe_500/L_disambig.pt +3 -0
  8. data/lang_bpe_500/Linv.pt +3 -0
  9. data/lang_bpe_500/bpe.model +3 -0
  10. data/lang_bpe_500/lexicon.txt +0 -0
  11. data/lang_bpe_500/lexicon_disambig.txt +0 -0
  12. data/lang_bpe_500/tokens.txt +502 -0
  13. data/lang_bpe_500/train.txt +3 -0
  14. data/lang_bpe_500/train_orig.txt +3 -0
  15. data/lang_bpe_500/unigram_500.model +3 -0
  16. data/lang_bpe_500/unigram_500.vocab +500 -0
  17. data/lang_bpe_500/words.txt +0 -0
  18. data/lang_bpe_500/words_no_ids.txt +0 -0
  19. data/lm/3gram.arpa +3 -0
  20. data/lm/4gram.arpa +3 -0
  21. data/lm/G_3_gram.fst.txt +3 -0
  22. data/lm/G_3_gram.pt +3 -0
  23. data/lm/G_4_gram.fst.txt +3 -0
  24. decoding_results/fast_beam_search/errs-test-cv-beam_20.0_max_contexts_8_max_states_64-epoch-29-avg-9-streaming-chunk-size-64-beam-20.0-max-contexts-8-max-states-64-use-averaged-model.txt +0 -0
  25. decoding_results/fast_beam_search/log-decode-epoch-29-avg-9-streaming-chunk-size-64-beam-20.0-max-contexts-8-max-states-64-use-averaged-model-2023-04-03-17-31-21 +82 -0
  26. decoding_results/fast_beam_search/recogs-test-cv-beam_20.0_max_contexts_8_max_states_64-epoch-29-avg-9-streaming-chunk-size-64-beam-20.0-max-contexts-8-max-states-64-use-averaged-model.txt +0 -0
  27. decoding_results/fast_beam_search/wer-summary-test-cv-beam_20.0_max_contexts_8_max_states_64.txt +2 -0
  28. decoding_results/greedy_search/errs-test-cv-greedy_search-epoch-29-avg-9-streaming-chunk-size-64-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  29. decoding_results/greedy_search/log-decode-epoch-29-avg-9-streaming-chunk-size-64-context-2-max-sym-per-frame-1-use-averaged-model-2023-04-03-17-20-40 +52 -0
  30. decoding_results/greedy_search/recogs-test-cv-greedy_search-epoch-29-avg-9-streaming-chunk-size-64-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  31. decoding_results/greedy_search/wer-summary-test-cv-greedy_search.txt +2 -0
  32. decoding_results/modified_beam_search/errs-test-cv-beam_size_4-epoch-29-avg-9-streaming-chunk-size-64-modified_beam_search-beam-size-4-use-averaged-model.txt +0 -0
  33. decoding_results/modified_beam_search/log-decode-epoch-29-avg-9-streaming-chunk-size-64-modified_beam_search-beam-size-4-use-averaged-model-2023-04-03-17-22-38 +82 -0
  34. decoding_results/modified_beam_search/recogs-test-cv-beam_size_4-epoch-29-avg-9-streaming-chunk-size-64-modified_beam_search-beam-size-4-use-averaged-model.txt +0 -0
  35. decoding_results/modified_beam_search/wer-summary-test-cv-beam_size_4.txt +2 -0
.gitattributes CHANGED
@@ -32,3 +32,11 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ data/lang_bpe_500/L_disambig.fst filter=lfs diff=lfs merge=lfs -text
36
+ data/lang_bpe_500/L.fst filter=lfs diff=lfs merge=lfs -text
37
+ data/lang_bpe_500/train_orig.txt filter=lfs diff=lfs merge=lfs -text
38
+ data/lang_bpe_500/train.txt filter=lfs diff=lfs merge=lfs -text
39
+ data/lm/3gram.arpa filter=lfs diff=lfs merge=lfs -text
40
+ data/lm/4gram.arpa filter=lfs diff=lfs merge=lfs -text
41
+ data/lm/G_3_gram.fst.txt filter=lfs diff=lfs merge=lfs -text
42
+ data/lm/G_4_gram.fst.txt filter=lfs diff=lfs merge=lfs -text
data/lang_bpe_500/HLG.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1431e9712ce1bf45a3e8ad9775eb148306298a96ed9a642c75be50ab0566da55
3
+ size 1091845447
data/lang_bpe_500/L.fst ADDED

Git LFS Details

  • SHA256: f03085473735a96ac8555eeca81106f5af6d63161899f4f6f02a5bef423cefc6
  • Pointer size: 133 Bytes
  • Size of remote file: 26.5 MB
data/lang_bpe_500/L.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70de241a0a4b31867f12d84a2c7f61920df2cd3a09c321da1367c8abd95a820e
3
+ size 20698447
data/lang_bpe_500/LG.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79f0f73fcd23cd1c17650c87bb70bc4da56f98977105bed69fe1deb91989b37b
3
+ size 306392522
data/lang_bpe_500/L_disambig.fst ADDED

Git LFS Details

  • SHA256: 5a8ac51e41c8cc8cfbb00555475c34c1b0a3aeeb65b392407542c455d1f51550
  • Pointer size: 133 Bytes
  • Size of remote file: 27.4 MB
data/lang_bpe_500/L_disambig.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6787b0bc4a08f369d364e4b475a29c7f6d49a0d81e881daaf1468c832bd01cd1
3
+ size 21392435
data/lang_bpe_500/Linv.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e6e66b8d4f763b913ebafd0f21dfe374549df05d704e6587cfb27050bcfc82c
3
+ size 20698459
data/lang_bpe_500/bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:787932caf5c0adf93e850c18742279142ab33cdff5a6bd1234ad3aca2fc0b998
3
+ size 244624
data/lang_bpe_500/lexicon.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/lang_bpe_500/lexicon_disambig.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/lang_bpe_500/tokens.txt ADDED
@@ -0,0 +1,502 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <blk> 0
2
+ <sos/eos> 1
3
+ <unk> 2
4
+ S 3
5
+ ▁ 4
6
+ E 5
7
+ ▁DE 6
8
+ ' 7
9
+ T 8
10
+ R 9
11
+ É 10
12
+ ▁L 11
13
+ ▁LA 12
14
+ C 13
15
+ ▁LE 14
16
+ A 15
17
+ O 16
18
+ P 17
19
+ ▁D 18
20
+ U 19
21
+ I 20
22
+ ▁EST 21
23
+ ▁IL 22
24
+ ON 23
25
+ N 24
26
+ ▁À 25
27
+ ▁S 26
28
+ ▁A 27
29
+ ▁ET 28
30
+ ▁C 29
31
+ IN 30
32
+ D 31
33
+ G 32
34
+ ER 33
35
+ ES 34
36
+ ▁EN 35
37
+ ▁LES 36
38
+ Y 37
39
+ IS 38
40
+ IT 39
41
+ L 40
42
+ ▁DU 41
43
+ AL 42
44
+ ENT 43
45
+ ▁F 44
46
+ RE 45
47
+ ▁PAR 46
48
+ ▁DES 47
49
+ F 48
50
+ LE 49
51
+ ▁UN 50
52
+ ▁B 51
53
+ ▁SE 52
54
+ V 53
55
+ ▁AU 54
56
+ AN 55
57
+ B 56
58
+ OR 57
59
+ ▁G 58
60
+ RA 59
61
+ ANT 60
62
+ OU 61
63
+ H 62
64
+ UR 63
65
+ ▁DANS 64
66
+ ▁T 65
67
+ RI 66
68
+ ▁M 67
69
+ TE 68
70
+ ▁RE 69
71
+ AR 70
72
+ ▁UNE 71
73
+ ▁ELLE 72
74
+ ▁V 73
75
+ M 74
76
+ ATION 75
77
+ UL 76
78
+ ÉE 77
79
+ Z 78
80
+ ▁N 79
81
+ NE 80
82
+ ME 81
83
+ IL 82
84
+ ▁P 83
85
+ MENT 84
86
+ IE 85
87
+ EN 86
88
+ ▁H 87
89
+ ▁CON 88
90
+ IR 89
91
+ EST 90
92
+ LI 91
93
+ ▁SON 92
94
+ CH 93
95
+ ▁POUR 94
96
+ È 95
97
+ LA 96
98
+ CE 97
99
+ ▁RÉ 98
100
+ ▁MA 99
101
+ IQUE 100
102
+ ▁CE 101
103
+ EUR 102
104
+ ▁SA 103
105
+ NT 104
106
+ ▁SONT 105
107
+ US 106
108
+ EMENT 107
109
+ ▁IN 108
110
+ ▁PRO 109
111
+ ▁SUR 110
112
+ RÉ 111
113
+ RO 112
114
+ ▁DÉ 113
115
+ AIT 114
116
+ DE 115
117
+ ▁QU 116
118
+ ▁É 117
119
+ EL 118
120
+ OL 119
121
+ AIRE 120
122
+ ION 121
123
+ UN 122
124
+ CHE 123
125
+ K 124
126
+ ▁CH 125
127
+ AT 126
128
+ DI 127
129
+ AG 128
130
+ TRE 129
131
+ TI 130
132
+ OM 131
133
+ ELLE 132
134
+ ▁SU 133
135
+ AM 134
136
+ ▁PO 135
137
+ ▁MO 136
138
+ IM 137
139
+ ▁PAS 138
140
+ VI 139
141
+ TÉ 140
142
+ FF 141
143
+ ▁CA 142
144
+ TER 143
145
+ END 144
146
+ CETTE 145
147
+ QUE 146
148
+ TA 147
149
+ LO 148
150
+ ▁PLUS 149
151
+ ILLE 150
152
+ QU 151
153
+ ▁NE 152
154
+ ▁RO 153
155
+ ▁JE 154
156
+ ▁QUE 155
157
+ ▁DEUX 156
158
+ UT 157
159
+ CI 158
160
+ ALE 159
161
+ ▁AVEC 160
162
+ UNE 161
163
+ AB 162
164
+ ITÉ 163
165
+ IC 164
166
+ GE 165
167
+ MA 166
168
+ AGE 167
169
+ AND 168
170
+ AC 169
171
+ OIS 170
172
+ ▁CO 171
173
+ ▁COMME 172
174
+ PH 173
175
+ VER 174
176
+ SSE 175
177
+ AV 176
178
+ ▁QUI 177
179
+ TU 178
180
+ ▁BA 179
181
+ NÉ 180
182
+ ID 181
183
+ PORT 182
184
+ ▁VO 183
185
+ IER 184
186
+ ▁ÉTÉ 185
187
+ MI 186
188
+ IV 187
189
+ ÈRE 188
190
+ ARD 189
191
+ AU 190
192
+ OC 191
193
+ ▁EX 192
194
+ ▁DI 193
195
+ ▁CHA 194
196
+ ÉRI 195
197
+ MB 196
198
+ IGN 197
199
+ ▁RA 198
200
+ DU 199
201
+ ISTE 200
202
+ TH 201
203
+ AIS 202
204
+ INE 203
205
+ ANG 204
206
+ ▁COMP 205
207
+ ▁OU 206
208
+ AUX 207
209
+ IÈRE 208
210
+ ORD 209
211
+ X 210
212
+ ▁PR 211
213
+ ▁ÉGALEMENT 212
214
+ ▁CONS 213
215
+ LU 214
216
+ ▁SES 215
217
+ ▁SOU 216
218
+ POS 217
219
+ CTION 218
220
+ NA 219
221
+ TO 220
222
+ OUR 221
223
+ ART 222
224
+ Ô 223
225
+ UE 224
226
+ MÉ 225
227
+ TION 226
228
+ ▁NOM 227
229
+ ▁MAR 228
230
+ AS 229
231
+ MAN 230
232
+ LÉ 231
233
+ ILL 232
234
+ ▁COM 233
235
+ ▁PA 234
236
+ W 235
237
+ TURE 236
238
+ MIN 237
239
+ ▁MAIS 238
240
+ VE 239
241
+ ITE 240
242
+ IX 241
243
+ ANCE 242
244
+ ENCE 243
245
+ OP 244
246
+ EMP 245
247
+ ALL 246
248
+ EUX 247
249
+ ▁K 248
250
+ ▁ÉTAIT 249
251
+ ▁FAIT 250
252
+ J 251
253
+ ▁TOUT 252
254
+ TIQUE 253
255
+ PE 254
256
+ UV 255
257
+ ▁PLA 256
258
+ IRE 257
259
+ ENS 258
260
+ ▁Y 259
261
+ IG 260
262
+ VO 261
263
+ VEN 262
264
+ ABLE 263
265
+ ▁AUX 264
266
+ ▁MON 265
267
+ ÊT 266
268
+ ▁AUSSI 267
269
+ ▁FOR 268
270
+ TRA 269
271
+ LES 270
272
+ NI 271
273
+ TRI 272
274
+ ▁DIS 273
275
+ ▁MI 274
276
+ ▁TRA 275
277
+ ▁CENT 276
278
+ ▁TO 277
279
+ ÉS 278
280
+ TTE 279
281
+ EX 280
282
+ ▁APP 281
283
+ ▁GRAND 282
284
+ ▁AR 283
285
+ INS 284
286
+ ▁NO 285
287
+ ▁DÉC 286
288
+ MO 287
289
+ ▁BR 288
290
+ ▁AN 289
291
+ ÉES 290
292
+ ONNE 291
293
+ ▁PRÉ 292
294
+ ▁ME 293
295
+ ▁LUI 294
296
+ ▁FA 295
297
+ TEN 296
298
+ AUT 297
299
+ BL 298
300
+ IVE 299
301
+ Ç 300
302
+ Â 301
303
+ ▁FUT 302
304
+ ▁SO 303
305
+ ▁TROIS 304
306
+ EAU 305
307
+ ▁ALORS 306
308
+ TEUR 307
309
+ ▁MÉ 308
310
+ DA 309
311
+ ▁J 310
312
+ ▁ON 311
313
+ ▁JA 312
314
+ GUE 313
315
+ ▁LEUR 314
316
+ ÈME 315
317
+ ▁ONT 316
318
+ ▁W 317
319
+ ▁MÊME 318
320
+ ACC 319
321
+ IEN 320
322
+ ▁SAINT 321
323
+ AINE 322
324
+ À 323
325
+ ▁VA 324
326
+ ▁FIN 325
327
+ ICI 326
328
+ ITION 327
329
+ ▁CES 328
330
+ ▁COUR 329
331
+ UM 330
332
+ BRE 331
333
+ ▁PEU 332
334
+ Î 333
335
+ TRO 334
336
+ IENNE 335
337
+ IDE 336
338
+ ▁REP 337
339
+ ▁JU 338
340
+ ▁VILLE 339
341
+ ▁APRÈS 340
342
+ ▁ÊTRE 341
343
+ ▁RI 342
344
+ ▁VOUS 343
345
+ ▁PLUSIEURS 344
346
+ ▁NA 345
347
+ ▁TH 346
348
+ ANTE 347
349
+ TRÈS 348
350
+ ▁SITUÉ 349
351
+ ▁TOUR 350
352
+ ▁PARTIE 351
353
+ ▁FRANC 352
354
+ ▁QUATRE 353
355
+ ▁PER 354
356
+ VIENT 355
357
+ ▁ENTRE 356
358
+ ING 357
359
+ ▁PREMIER 358
360
+ ▁CAR 359
361
+ LON 360
362
+ FORM 361
363
+ BA 362
364
+ VÉ 363
365
+ IFI 364
366
+ AIENT 365
367
+ HI 366
368
+ ▁JO 367
369
+ TIF 368
370
+ TANT 369
371
+ ▁PEUT 370
372
+ STRU 371
373
+ ▁PARTI 372
374
+ ▁COMMUNE 373
375
+ Ê 374
376
+ ▁REN 375
377
+ ATEUR 376
378
+ ▁BIEN 377
379
+ ▁PRI 378
380
+ ▁RUE 379
381
+ ▁MONT 380
382
+ ▁PI 381
383
+ ▁JOUR 382
384
+ AUTRES 383
385
+ IBLE 384
386
+ APP 385
387
+ ▁CINQ 386
388
+ ▁PREMIÈRE 387
389
+ ▁MARI 388
390
+ CK 389
391
+ ▁TROUVE 390
392
+ ISSE 391
393
+ ▁DONC 392
394
+ ▁GROUPE 393
395
+ ▁JOUE 394
396
+ ▁VERS 395
397
+ JO 396
398
+ ÎT 397
399
+ ÉTAT 398
400
+ ▁FRANÇAIS 399
401
+ ▁LORS 400
402
+ ▁ENSUITE 401
403
+ ▁PUIS 402
404
+ ▁PETIT 403
405
+ EUSE 404
406
+ ▁AINSI 405
407
+ ▁TRAVAIL 406
408
+ ▁NOUS 407
409
+ ▁QUELQUE 408
410
+ ▁COLL 409
411
+ ▁CERTAIN 410
412
+ ▁LIEU 411
413
+ ▁PRÉSENT 412
414
+ ÉQUIPE 413
415
+ ▁VINGT 414
416
+ ▁PÈRE 415
417
+ ▁FAMILLE 416
418
+ ▁RU 417
419
+ ▁RENCONTRE 418
420
+ ▁ESPÈCE 419
421
+ ▁FILS 420
422
+ ▁GÉNÉRAL 421
423
+ ▁OB 422
424
+ ▁ÉV 423
425
+ Ï 424
426
+ ▁PAYS 425
427
+ ▁ANNÉE 426
428
+ ÉGLISE 427
429
+ ▁PRINCIPAL 428
430
+ ▁GUERRE 429
431
+ ▁SANS 430
432
+ ANCIEN 431
433
+ ▁CEPENDANT 432
434
+ ▁RESTE 433
435
+ Œ 434
436
+ ▁PENDANT 435
437
+ ▁TEMPS 436
438
+ ▁FOND 437
439
+ HUI 438
440
+ ▁RÉGION 439
441
+ MM 440
442
+ ▁MEMBRE 441
443
+ Û 442
444
+ ▁TRANS 443
445
+ ▁CLUB 444
446
+ BERT 445
447
+ ▁DEPUIS 446
448
+ ▁IMP 447
449
+ PRÈS 448
450
+ ▁SEPT 449
451
+ ▁NATIONAL 450
452
+ ▁CONNU 451
453
+ ▁VILLAGE 452
454
+ ▁MORT 453
455
+ ▁ENCORE 454
456
+ ORGANIS 455
457
+ HISTOIRE 456
458
+ Ù 457
459
+ ▁SECOND 458
460
+ ▁NOUVELLE 459
461
+ ŒUVRE 460
462
+ ORIGINE 461
463
+ UNIVERSITÉ 462
464
+ ▁UTILISÉ 463
465
+ ▁AUJOURD 464
466
+ ▁COMMUN 465
467
+ ▁FILM 466
468
+ ▁FRÈRE 467
469
+ ▁TITRE 468
470
+ ▁DIRECT 469
471
+ ÉLECT 470
472
+ ▁FEMME 471
473
+ ▁HUIT 472
474
+ ÉTAIENT 473
475
+ ÉCOLE 474
476
+ ▁DERNIER 475
477
+ ▁MONSIEUR 476
478
+ ▁IMPORTANT 477
479
+ ▁PERMET 478
480
+ ▁JUSQU 479
481
+ ▁DROIT 480
482
+ ▁CARRIÈRE 481
483
+ ARCHI 482
484
+ ▁NOMBREUX 483
485
+ ▁SAISON 484
486
+ ▁NOTAMMENT 485
487
+ ▁PIERRE 486
488
+ BOURG 487
489
+ ▁DIFFÉRENT 488
490
+ ▁NOUVEAU 489
491
+ ▁TOUJOURS 490
492
+ ▁SIÈGE 491
493
+ ÉDIT 492
494
+ ▁PERSONNE 493
495
+ ▁SUIVANT 494
496
+ ▁CELUI 495
497
+ Ë 496
498
+ Ü 497
499
+ Q 498
500
+ Æ 499
501
+ #0 500
502
+ #1 501
data/lang_bpe_500/train.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:279dd4ab74b7c19c20543f31660564cb4dc2adddc4097356f22782928fd76ecd
3
+ size 31024469
data/lang_bpe_500/train_orig.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8868efaf3b191da9546ec193cbb0788a847b52b07d7a0c6ecedf0e07dda19c2b
3
+ size 32593161
data/lang_bpe_500/unigram_500.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:787932caf5c0adf93e850c18742279142ab33cdff5a6bd1234ad3aca2fc0b998
3
+ size 244624
data/lang_bpe_500/unigram_500.vocab ADDED
@@ -0,0 +1,500 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <blk> 0
2
+ <sos/eos> 0
3
+ <unk> 0
4
+ S -2.95044
5
+ ▁ -3.52819
6
+ E -3.61069
7
+ ▁DE -3.77593
8
+ ' -3.80476
9
+ T -3.98568
10
+ R -4.21426
11
+ É -4.21489
12
+ ▁L -4.24855
13
+ ▁LA -4.30356
14
+ C -4.37675
15
+ ▁LE -4.50794
16
+ A -4.51183
17
+ O -4.52633
18
+ P -4.53401
19
+ ▁D -4.54343
20
+ U -4.57523
21
+ I -4.57714
22
+ ▁EST -4.70374
23
+ ▁IL -4.72016
24
+ ON -4.78236
25
+ N -4.81209
26
+ ▁À -4.8331
27
+ ▁S -4.86694
28
+ ▁A -4.86931
29
+ ▁ET -4.90966
30
+ ▁C -4.93591
31
+ IN -4.96857
32
+ D -4.97671
33
+ G -5.00877
34
+ ER -5.02622
35
+ ES -5.05635
36
+ ▁EN -5.07399
37
+ ▁LES -5.12278
38
+ Y -5.12281
39
+ IS -5.12371
40
+ IT -5.1512
41
+ L -5.2095
42
+ ▁DU -5.27551
43
+ AL -5.30047
44
+ ENT -5.32252
45
+ ▁F -5.32693
46
+ RE -5.34073
47
+ ▁PAR -5.34309
48
+ ▁DES -5.37599
49
+ F -5.40391
50
+ LE -5.41817
51
+ ▁UN -5.4263
52
+ ▁B -5.42928
53
+ ▁SE -5.45833
54
+ V -5.4948
55
+ ▁AU -5.51091
56
+ AN -5.51095
57
+ B -5.52945
58
+ OR -5.57706
59
+ ▁G -5.59256
60
+ RA -5.60306
61
+ ANT -5.61866
62
+ OU -5.62585
63
+ H -5.63579
64
+ UR -5.65002
65
+ ▁DANS -5.65339
66
+ ▁T -5.65478
67
+ RI -5.66107
68
+ ▁M -5.67156
69
+ TE -5.68258
70
+ ▁RE -5.70332
71
+ AR -5.71733
72
+ ▁UNE -5.73579
73
+ ▁ELLE -5.73837
74
+ ▁V -5.77157
75
+ M -5.80446
76
+ ATION -5.80592
77
+ UL -5.82578
78
+ ÉE -5.83786
79
+ Z -5.84404
80
+ ▁N -5.85083
81
+ NE -5.85186
82
+ ME -5.85253
83
+ IL -5.85976
84
+ ▁P -5.8625
85
+ MENT -5.86255
86
+ IE -5.87153
87
+ EN -5.87781
88
+ ▁H -5.87931
89
+ ▁CON -5.91114
90
+ IR -5.92419
91
+ EST -5.9346
92
+ LI -5.93858
93
+ ▁SON -5.94037
94
+ CH -5.94997
95
+ ▁POUR -5.95025
96
+ È -5.97227
97
+ LA -5.98258
98
+ CE -6.00354
99
+ ▁RÉ -6.00832
100
+ ▁MA -6.01047
101
+ IQUE -6.01627
102
+ ▁CE -6.01653
103
+ EUR -6.0238
104
+ ▁SA -6.03806
105
+ NT -6.09711
106
+ ▁SONT -6.09794
107
+ US -6.10094
108
+ EMENT -6.1151
109
+ ▁IN -6.12361
110
+ ▁PRO -6.12491
111
+ ▁SUR -6.14045
112
+ RÉ -6.14134
113
+ RO -6.14433
114
+ ▁DÉ -6.14668
115
+ AIT -6.14836
116
+ DE -6.15261
117
+ ▁QU -6.15573
118
+ ▁É -6.15607
119
+ EL -6.15758
120
+ OL -6.15966
121
+ AIRE -6.17272
122
+ ION -6.17315
123
+ UN -6.17769
124
+ CHE -6.18594
125
+ K -6.22088
126
+ ▁CH -6.22102
127
+ AT -6.25802
128
+ DI -6.2675
129
+ AG -6.28667
130
+ TRE -6.28903
131
+ TI -6.29845
132
+ OM -6.31474
133
+ ELLE -6.32241
134
+ ▁SU -6.3278
135
+ AM -6.3377
136
+ ▁PO -6.33994
137
+ ▁MO -6.3488
138
+ IM -6.35296
139
+ ▁PAS -6.35478
140
+ VI -6.37582
141
+ TÉ -6.39092
142
+ FF -6.39812
143
+ ▁CA -6.40222
144
+ TER -6.40368
145
+ END -6.40953
146
+ CETTE -6.41276
147
+ QUE -6.41398
148
+ TA -6.41647
149
+ LO -6.42665
150
+ ▁PLUS -6.43945
151
+ ILLE -6.44629
152
+ QU -6.47038
153
+ ▁NE -6.5018
154
+ ▁RO -6.51476
155
+ ▁JE -6.52588
156
+ ▁QUE -6.53357
157
+ ▁DEUX -6.54569
158
+ UT -6.56102
159
+ CI -6.56669
160
+ ALE -6.5754
161
+ ▁AVEC -6.57591
162
+ UNE -6.58781
163
+ AB -6.59318
164
+ ITÉ -6.59338
165
+ IC -6.59991
166
+ GE -6.60459
167
+ MA -6.61775
168
+ AGE -6.61823
169
+ AND -6.62569
170
+ AC -6.63217
171
+ OIS -6.6338
172
+ ▁CO -6.645
173
+ ▁COMME -6.65494
174
+ PH -6.65508
175
+ VER -6.6556
176
+ SSE -6.66542
177
+ AV -6.67119
178
+ ▁QUI -6.67297
179
+ TU -6.68589
180
+ ▁BA -6.68726
181
+ NÉ -6.69021
182
+ ID -6.69194
183
+ PORT -6.70656
184
+ ▁VO -6.71041
185
+ IER -6.71949
186
+ ▁ÉTÉ -6.74721
187
+ MI -6.74758
188
+ IV -6.74785
189
+ ÈRE -6.75787
190
+ ARD -6.76178
191
+ AU -6.76538
192
+ OC -6.77933
193
+ ▁EX -6.7813
194
+ ▁DI -6.79024
195
+ ▁CHA -6.79452
196
+ ÉRI -6.79873
197
+ MB -6.82177
198
+ IGN -6.82265
199
+ ▁RA -6.82444
200
+ DU -6.8371
201
+ ISTE -6.83906
202
+ TH -6.84131
203
+ AIS -6.84576
204
+ INE -6.8465
205
+ ANG -6.84672
206
+ ▁COMP -6.85555
207
+ ▁OU -6.85622
208
+ AUX -6.86386
209
+ IÈRE -6.86924
210
+ ORD -6.86985
211
+ X -6.87628
212
+ ▁PR -6.87928
213
+ ▁ÉGALEMENT -6.88091
214
+ ▁CONS -6.88205
215
+ LU -6.88394
216
+ ▁SES -6.88761
217
+ ▁SOU -6.89325
218
+ POS -6.89602
219
+ CTION -6.89668
220
+ NA -6.90352
221
+ TO -6.90354
222
+ OUR -6.90604
223
+ ART -6.91586
224
+ Ô -6.91748
225
+ UE -6.92409
226
+ MÉ -6.94067
227
+ TION -6.94666
228
+ ▁NOM -6.94742
229
+ ▁MAR -6.94757
230
+ AS -6.95132
231
+ MAN -6.95299
232
+ LÉ -6.9536
233
+ ILL -6.95456
234
+ ▁COM -6.96091
235
+ ▁PA -6.96811
236
+ W -6.97204
237
+ TURE -6.97208
238
+ MIN -6.98243
239
+ ▁MAIS -6.9843
240
+ VE -6.98613
241
+ ITE -6.98699
242
+ IX -6.98924
243
+ ANCE -6.99379
244
+ ENCE -6.99452
245
+ OP -7.00439
246
+ EMP -7.01487
247
+ ALL -7.01562
248
+ EUX -7.0194
249
+ ▁K -7.02409
250
+ ▁ÉTAIT -7.02541
251
+ ▁FAIT -7.03542
252
+ J -7.03897
253
+ ▁TOUT -7.04055
254
+ TIQUE -7.04819
255
+ PE -7.0542
256
+ UV -7.06736
257
+ ▁PLA -7.06843
258
+ IRE -7.07173
259
+ ENS -7.07267
260
+ ▁Y -7.08027
261
+ IG -7.08324
262
+ VO -7.08886
263
+ VEN -7.08955
264
+ ABLE -7.09118
265
+ ▁AUX -7.09488
266
+ ▁MON -7.10007
267
+ ÊT -7.10148
268
+ ▁AUSSI -7.10337
269
+ ▁FOR -7.1046
270
+ TRA -7.10535
271
+ LES -7.10662
272
+ NI -7.11363
273
+ TRI -7.1155
274
+ ▁DIS -7.12849
275
+ ▁MI -7.13277
276
+ ▁TRA -7.13732
277
+ ▁CENT -7.14084
278
+ ▁TO -7.14595
279
+ ÉS -7.14696
280
+ TTE -7.14853
281
+ EX -7.15105
282
+ ▁APP -7.15361
283
+ ▁GRAND -7.1556
284
+ ▁AR -7.15993
285
+ INS -7.16085
286
+ ▁NO -7.16217
287
+ ▁DÉC -7.16942
288
+ MO -7.17708
289
+ ▁BR -7.17793
290
+ ▁AN -7.17923
291
+ ÉES -7.18741
292
+ ONNE -7.18794
293
+ ▁PRÉ -7.19409
294
+ ▁ME -7.20081
295
+ ▁LUI -7.20722
296
+ ▁FA -7.21393
297
+ TEN -7.22018
298
+ AUT -7.22063
299
+ BL -7.22212
300
+ IVE -7.22299
301
+ Ç -7.22345
302
+ Â -7.23831
303
+ ▁FUT -7.24785
304
+ ▁SO -7.25014
305
+ ▁TROIS -7.25306
306
+ EAU -7.25746
307
+ ▁ALORS -7.25977
308
+ TEUR -7.26764
309
+ ▁MÉ -7.27978
310
+ DA -7.28468
311
+ ▁J -7.3032
312
+ ▁ON -7.30519
313
+ ▁JA -7.30584
314
+ GUE -7.31225
315
+ ▁LEUR -7.31485
316
+ ÈME -7.33414
317
+ ▁ONT -7.33777
318
+ ▁W -7.33813
319
+ ▁MÊME -7.35801
320
+ ACC -7.36611
321
+ IEN -7.36761
322
+ ▁SAINT -7.37041
323
+ AINE -7.3713
324
+ À -7.37841
325
+ ▁VA -7.38049
326
+ ▁FIN -7.38075
327
+ ICI -7.38685
328
+ ITION -7.39214
329
+ ▁CES -7.39695
330
+ ▁COUR -7.40965
331
+ UM -7.41412
332
+ BRE -7.42144
333
+ ▁PEU -7.42661
334
+ Î -7.43206
335
+ TRO -7.44221
336
+ IENNE -7.4553
337
+ IDE -7.46487
338
+ ▁REP -7.46845
339
+ ▁JU -7.48234
340
+ ▁VILLE -7.48283
341
+ ▁APRÈS -7.4882
342
+ ▁ÊTRE -7.50185
343
+ ▁RI -7.50735
344
+ ▁VOUS -7.50844
345
+ ▁PLUSIEURS -7.50953
346
+ ▁NA -7.51404
347
+ ▁TH -7.52054
348
+ ANTE -7.52681
349
+ TRÈS -7.55566
350
+ ▁SITUÉ -7.55585
351
+ ▁TOUR -7.57672
352
+ ▁PARTIE -7.58074
353
+ ▁FRANC -7.58851
354
+ ▁QUATRE -7.60502
355
+ ▁PER -7.60579
356
+ VIENT -7.60672
357
+ ▁ENTRE -7.60832
358
+ ING -7.61424
359
+ ▁PREMIER -7.61747
360
+ ▁CAR -7.61875
361
+ LON -7.62346
362
+ FORM -7.62502
363
+ BA -7.62598
364
+ VÉ -7.62913
365
+ IFI -7.63386
366
+ AIENT -7.63434
367
+ HI -7.63915
368
+ ▁JO -7.64691
369
+ TIF -7.64803
370
+ TANT -7.64813
371
+ ▁PEUT -7.6495
372
+ STRU -7.65091
373
+ ▁PARTI -7.6818
374
+ ▁COMMUNE -7.69343
375
+ Ê -7.70573
376
+ ▁REN -7.71927
377
+ ATEUR -7.72429
378
+ ▁BIEN -7.72773
379
+ ▁PRI -7.72972
380
+ ▁RUE -7.73029
381
+ ▁MONT -7.73878
382
+ ▁PI -7.74146
383
+ ▁JOUR -7.75127
384
+ AUTRES -7.75678
385
+ IBLE -7.76348
386
+ APP -7.76413
387
+ ▁CINQ -7.77455
388
+ ▁PREMIÈRE -7.7773
389
+ ▁MARI -7.78024
390
+ CK -7.78135
391
+ ▁TROUVE -7.81049
392
+ ISSE -7.82038
393
+ ▁DONC -7.82363
394
+ ▁GROUPE -7.84207
395
+ ▁JOUE -7.84684
396
+ ▁VERS -7.84731
397
+ JO -7.84878
398
+ ÎT -7.85732
399
+ ÉTAT -7.86426
400
+ ▁FRANÇAIS -7.86688
401
+ ▁LORS -7.90172
402
+ ▁ENSUITE -7.90256
403
+ ▁PUIS -7.90451
404
+ ▁PETIT -7.90487
405
+ EUSE -7.90881
406
+ ▁AINSI -7.91868
407
+ ▁TRAVAIL -7.92917
408
+ ▁NOUS -7.93438
409
+ ▁QUELQUE -7.93802
410
+ ▁COLL -7.94043
411
+ ▁CERTAIN -7.94047
412
+ ▁LIEU -7.94101
413
+ ▁PRÉSENT -7.94744
414
+ ÉQUIPE -7.95119
415
+ ▁VINGT -7.96329
416
+ ▁PÈRE -7.96773
417
+ ▁FAMILLE -7.96931
418
+ ▁RU -7.97689
419
+ ▁RENCONTRE -7.98339
420
+ ▁ESPÈCE -7.98434
421
+ ▁FILS -7.99492
422
+ ▁GÉNÉRAL -7.99556
423
+ ▁OB -7.99882
424
+ ▁ÉV -8.00993
425
+ Ï -8.03049
426
+ ▁PAYS -8.03417
427
+ ▁ANNÉE -8.03678
428
+ ÉGLISE -8.04578
429
+ ▁PRINCIPAL -8.05694
430
+ ▁GUERRE -8.06876
431
+ ▁SANS -8.07727
432
+ ANCIEN -8.09295
433
+ ▁CEPENDANT -8.11907
434
+ ▁RESTE -8.13532
435
+ Œ -8.14688
436
+ ▁PENDANT -8.15932
437
+ ▁TEMPS -8.17676
438
+ ▁FOND -8.18238
439
+ HUI -8.18803
440
+ ▁RÉGION -8.19186
441
+ MM -8.19787
442
+ ▁MEMBRE -8.20795
443
+ Û -8.20852
444
+ ▁TRANS -8.22004
445
+ ▁CLUB -8.2233
446
+ BERT -8.23451
447
+ ▁DEPUIS -8.24704
448
+ ▁IMP -8.24718
449
+ PRÈS -8.24744
450
+ ▁SEPT -8.25346
451
+ ▁NATIONAL -8.27048
452
+ ▁CONNU -8.27896
453
+ ▁VILLAGE -8.28281
454
+ ▁MORT -8.28495
455
+ ▁ENCORE -8.2883
456
+ ORGANIS -8.29212
457
+ HISTOIRE -8.29341
458
+ Ù -8.30578
459
+ ▁SECOND -8.31309
460
+ ▁NOUVELLE -8.32129
461
+ ŒUVRE -8.32595
462
+ ORIGINE -8.33151
463
+ UNIVERSITÉ -8.334
464
+ ▁UTILISÉ -8.33568
465
+ ▁AUJOURD -8.33703
466
+ ▁COMMUN -8.34805
467
+ ▁FILM -8.35268
468
+ ▁FRÈRE -8.35616
469
+ ▁TITRE -8.35992
470
+ ▁DIRECT -8.37008
471
+ ÉLECT -8.38125
472
+ ▁FEMME -8.39511
473
+ ▁HUIT -8.39969
474
+ ÉTAIENT -8.40294
475
+ ÉCOLE -8.40896
476
+ ▁DERNIER -8.41137
477
+ ▁MONSIEUR -8.41865
478
+ ▁IMPORTANT -8.41901
479
+ ▁PERMET -8.42395
480
+ ▁JUSQU -8.43115
481
+ ▁DROIT -8.43711
482
+ ▁CARRIÈRE -8.43793
483
+ ARCHI -8.44301
484
+ ▁NOMBREUX -8.44313
485
+ ▁SAISON -8.44828
486
+ ▁NOTAMMENT -8.4518
487
+ ▁PIERRE -8.46428
488
+ BOURG -8.46772
489
+ ▁DIFFÉRENT -8.46861
490
+ ▁NOUVEAU -8.48184
491
+ ▁TOUJOURS -8.48767
492
+ ▁SIÈGE -8.50391
493
+ ÉDIT -8.51508
494
+ ▁PERSONNE -8.51771
495
+ ▁SUIVANT -8.51936
496
+ ▁CELUI -8.52301
497
+ Ë -9.3779
498
+ Ü -9.47316
499
+ Q -9.58463
500
+ Æ -11.4978
data/lang_bpe_500/words.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/lang_bpe_500/words_no_ids.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/lm/3gram.arpa ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1be4fda53d5a4b94d700114f5a5505173da3539d20931957e5c5be4ad8133a7f
3
+ size 152616228
data/lm/4gram.arpa ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fddfa46e7ebac2fa1a1ce3f8ec484a1f0732fda0dda36fb08afb38ae1c89206
3
+ size 314528045
data/lm/G_3_gram.fst.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c20bc2acc13d5f994d45c043efca5912e2de4c35c8f02b4e4f9fec3b40fd7394
3
+ size 201842208
data/lm/G_3_gram.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d020d44e201b49b73d86fab4de2e27239b65aa6964af798ff6084c663c8c0f7
3
+ size 125652395
data/lm/G_4_gram.fst.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bacb19ab623009603f2880cf406b2bc58e12e0e3fc199bb22dca06f2cc3dfc3
3
+ size 424054640
decoding_results/fast_beam_search/errs-test-cv-beam_20.0_max_contexts_8_max_states_64-epoch-29-avg-9-streaming-chunk-size-64-beam-20.0-max-contexts-8-max-states-64-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
decoding_results/fast_beam_search/log-decode-epoch-29-avg-9-streaming-chunk-size-64-beam-20.0-max-contexts-8-max-states-64-use-averaged-model-2023-04-03-17-31-21 ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-04-03 17:31:21,345 INFO [decode.py:659] Decoding started
2
+ 2023-04-03 17:31:21,345 INFO [decode.py:665] Device: cuda:0
3
+ 2023-04-03 17:31:21,347 INFO [decode.py:675] {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'warm_step': 2000, 'env_info': {'k2-version': '1.23.4', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '62e404dd3f3a811d73e424199b3408e309c06e1a', 'k2-git-date': 'Mon Jan 30 02:26:16 2023', 'lhotse-version': '1.12.0.dev+git.3ccfeb7.clean', 'torch-version': '1.13.0', 'torch-cuda-available': True, 'torch-cuda-version': '11.7', 'python-version': '3.8', 'icefall-git-branch': 'master', 'icefall-git-sha1': 'd74822d-dirty', 'icefall-git-date': 'Tue Mar 21 21:35:32 2023', 'icefall-path': '/home/lishaojie/icefall', 'k2-path': '/home/lishaojie/.conda/envs/env_lishaojie/lib/python3.8/site-packages/k2/__init__.py', 'lhotse-path': '/home/lishaojie/.conda/envs/env_lishaojie/lib/python3.8/site-packages/lhotse/__init__.py', 'hostname': 'cnc533', 'IP address': '127.0.1.1'}, 'epoch': 29, 'iter': 0, 'avg': 9, 'use_averaged_model': True, 'exp_dir': PosixPath('pruned_transducer_stateless7_streaming/exp1'), 'bpe_model': 'data/lang_bpe_500/bpe.model', 'lang_dir': PosixPath('data/lang_bpe_500'), 'decoding_method': 'fast_beam_search', 'beam_size': 4, 'beam': 20.0, 'ngram_lm_scale': 0.01, 'max_contexts': 8, 'max_states': 64, 'context_size': 2, 'max_sym_per_frame': 1, 'num_paths': 200, 'nbest_scale': 0.5, 'num_encoder_layers': '2,4,3,2,4', 'feedforward_dims': '1024,1024,2048,2048,1024', 'nhead': '8,8,8,8,8', 'encoder_dims': '384,384,384,384,384', 'attention_dims': '192,192,192,192,192', 'encoder_unmasked_dims': '256,256,256,256,256', 'zipformer_downsampling_factors': '1,2,4,8,2', 'cnn_module_kernels': '31,31,31,31,31', 'decoder_dim': 512, 'joiner_dim': 512, 'short_chunk_size': 50, 'num_left_chunks': 4, 'decode_chunk_len': 64, 'manifest_dir': PosixPath('data/fbank'), 'max_duration': 200, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'drop_last': True, 'return_cuts': True, 'num_workers': 2, 'enable_spec_aug': True, 'spec_aug_time_warp_factor': 80, 'enable_musan': True, 'input_strategy': 'PrecomputedFeatures', 'res_dir': PosixPath('pruned_transducer_stateless7_streaming/exp1/fast_beam_search'), 'suffix': 'epoch-29-avg-9-streaming-chunk-size-64-beam-20.0-max-contexts-8-max-states-64-use-averaged-model', 'blank_id': 0, 'unk_id': 2, 'vocab_size': 500}
4
+ 2023-04-03 17:31:21,347 INFO [decode.py:677] About to create model
5
+ 2023-04-03 17:31:21,749 INFO [zipformer.py:405] At encoder stack 4, which has downsampling_factor=2, we will combine the outputs of layers 1 and 3, with downsampling_factors=2 and 8.
6
+ 2023-04-03 17:31:21,757 INFO [decode.py:748] Calculating the averaged model over epoch range from 20 (excluded) to 29
7
+ 2023-04-03 17:31:23,870 INFO [decode.py:782] Number of model parameters: 70369391
8
+ 2023-04-03 17:31:23,871 INFO [commonvoice_fr.py:406] About to get test cuts
9
+ 2023-04-03 17:31:26,743 INFO [decode.py:560] batch 0/?, cuts processed until now is 27
10
+ 2023-04-03 17:31:31,854 INFO [zipformer.py:2441] attn_weights_entropy = tensor([1.8338, 1.6836, 1.5364, 1.7643, 2.1272, 2.0399, 1.7407, 1.5925],
11
+ device='cuda:0'), covar=tensor([0.0367, 0.0349, 0.0585, 0.0342, 0.0213, 0.0459, 0.0350, 0.0414],
12
+ device='cuda:0'), in_proj_covar=tensor([0.0097, 0.0103, 0.0143, 0.0108, 0.0097, 0.0111, 0.0100, 0.0110],
13
+ device='cuda:0'), out_proj_covar=tensor([7.4944e-05, 7.9098e-05, 1.1173e-04, 8.2734e-05, 7.5248e-05, 8.1783e-05,
14
+ 7.3728e-05, 8.3511e-05], device='cuda:0')
15
+ 2023-04-03 17:31:36,035 INFO [decode.py:560] batch 20/?, cuts processed until now is 604
16
+ 2023-04-03 17:31:46,332 INFO [decode.py:560] batch 40/?, cuts processed until now is 1209
17
+ 2023-04-03 17:31:54,962 INFO [decode.py:560] batch 60/?, cuts processed until now is 1866
18
+ 2023-04-03 17:32:04,386 INFO [decode.py:560] batch 80/?, cuts processed until now is 2422
19
+ 2023-04-03 17:32:13,074 INFO [decode.py:560] batch 100/?, cuts processed until now is 3088
20
+ 2023-04-03 17:32:14,054 INFO [zipformer.py:2441] attn_weights_entropy = tensor([2.2042, 1.9050, 2.4510, 1.6668, 2.2104, 2.4274, 1.7766, 2.5439],
21
+ device='cuda:0'), covar=tensor([0.1183, 0.2019, 0.1326, 0.1735, 0.0824, 0.1156, 0.2855, 0.0688],
22
+ device='cuda:0'), in_proj_covar=tensor([0.0188, 0.0202, 0.0188, 0.0186, 0.0170, 0.0210, 0.0213, 0.0194],
23
+ device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002],
24
+ device='cuda:0')
25
+ 2023-04-03 17:32:22,296 INFO [decode.py:560] batch 120/?, cuts processed until now is 3672
26
+ 2023-04-03 17:32:28,822 INFO [zipformer.py:2441] attn_weights_entropy = tensor([2.4821, 2.5480, 2.1061, 1.0486, 2.2957, 1.9745, 1.9215, 2.3727],
27
+ device='cuda:0'), covar=tensor([0.0910, 0.0618, 0.1588, 0.1982, 0.1334, 0.2716, 0.2164, 0.0818],
28
+ device='cuda:0'), in_proj_covar=tensor([0.0167, 0.0187, 0.0196, 0.0178, 0.0206, 0.0207, 0.0220, 0.0192],
29
+ device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002],
30
+ device='cuda:0')
31
+ 2023-04-03 17:32:30,833 INFO [decode.py:560] batch 140/?, cuts processed until now is 4348
32
+ 2023-04-03 17:32:39,389 INFO [decode.py:560] batch 160/?, cuts processed until now is 5035
33
+ 2023-04-03 17:32:41,458 INFO [zipformer.py:2441] attn_weights_entropy = tensor([0.5151, 1.7409, 1.7151, 0.9089, 1.8593, 1.9981, 2.0410, 1.5445],
34
+ device='cuda:0'), covar=tensor([0.0868, 0.0573, 0.0496, 0.0555, 0.0400, 0.0600, 0.0273, 0.0678],
35
+ device='cuda:0'), in_proj_covar=tensor([0.0119, 0.0146, 0.0125, 0.0119, 0.0128, 0.0127, 0.0138, 0.0146],
36
+ device='cuda:0'), out_proj_covar=tensor([8.7160e-05, 1.0465e-04, 8.8840e-05, 8.3773e-05, 8.9721e-05, 9.0117e-05,
37
+ 9.8475e-05, 1.0448e-04], device='cuda:0')
38
+ 2023-04-03 17:32:48,122 INFO [decode.py:560] batch 180/?, cuts processed until now is 5674
39
+ 2023-04-03 17:32:56,943 INFO [decode.py:560] batch 200/?, cuts processed until now is 6301
40
+ 2023-04-03 17:33:05,928 INFO [decode.py:560] batch 220/?, cuts processed until now is 6914
41
+ 2023-04-03 17:33:14,496 INFO [zipformer.py:2441] attn_weights_entropy = tensor([2.3674, 2.1843, 2.3572, 1.7864, 2.2139, 2.4677, 2.4855, 1.9260],
42
+ device='cuda:0'), covar=tensor([0.0445, 0.0549, 0.0557, 0.0678, 0.1166, 0.0458, 0.0437, 0.0916],
43
+ device='cuda:0'), in_proj_covar=tensor([0.0128, 0.0133, 0.0136, 0.0116, 0.0123, 0.0135, 0.0136, 0.0158],
44
+ device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0001, 0.0002, 0.0002, 0.0002, 0.0002],
45
+ device='cuda:0')
46
+ 2023-04-03 17:33:14,778 INFO [decode.py:560] batch 240/?, cuts processed until now is 7540
47
+ 2023-04-03 17:33:23,635 INFO [decode.py:560] batch 260/?, cuts processed until now is 8161
48
+ 2023-04-03 17:33:27,250 INFO [zipformer.py:2441] attn_weights_entropy = tensor([2.4658, 2.5376, 2.1602, 1.2614, 2.3429, 2.0655, 1.9856, 2.4390],
49
+ device='cuda:0'), covar=tensor([0.0989, 0.0582, 0.1814, 0.1883, 0.1155, 0.2226, 0.2119, 0.0799],
50
+ device='cuda:0'), in_proj_covar=tensor([0.0167, 0.0187, 0.0196, 0.0178, 0.0206, 0.0207, 0.0220, 0.0192],
51
+ device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002],
52
+ device='cuda:0')
53
+ 2023-04-03 17:33:32,012 INFO [decode.py:560] batch 280/?, cuts processed until now is 8857
54
+ 2023-04-03 17:33:39,857 INFO [zipformer.py:2441] attn_weights_entropy = tensor([2.5870, 2.6057, 2.1060, 1.0188, 2.3087, 2.0434, 1.9922, 2.3943],
55
+ device='cuda:0'), covar=tensor([0.0943, 0.0679, 0.1608, 0.1959, 0.1304, 0.2489, 0.2117, 0.0787],
56
+ device='cuda:0'), in_proj_covar=tensor([0.0167, 0.0187, 0.0196, 0.0178, 0.0206, 0.0207, 0.0220, 0.0192],
57
+ device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002],
58
+ device='cuda:0')
59
+ 2023-04-03 17:33:40,207 INFO [decode.py:560] batch 300/?, cuts processed until now is 9574
60
+ 2023-04-03 17:33:49,272 INFO [decode.py:560] batch 320/?, cuts processed until now is 10169
61
+ 2023-04-03 17:33:57,944 INFO [decode.py:560] batch 340/?, cuts processed until now is 10810
62
+ 2023-04-03 17:34:06,479 INFO [decode.py:560] batch 360/?, cuts processed until now is 11452
63
+ 2023-04-03 17:34:14,029 INFO [zipformer.py:2441] attn_weights_entropy = tensor([2.5451, 2.5973, 2.1309, 1.0323, 2.3093, 2.0920, 1.9761, 2.4208],
64
+ device='cuda:0'), covar=tensor([0.0829, 0.0689, 0.1334, 0.1924, 0.1226, 0.2190, 0.2106, 0.0793],
65
+ device='cuda:0'), in_proj_covar=tensor([0.0167, 0.0187, 0.0196, 0.0178, 0.0206, 0.0207, 0.0220, 0.0192],
66
+ device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002],
67
+ device='cuda:0')
68
+ 2023-04-03 17:34:14,816 INFO [decode.py:560] batch 380/?, cuts processed until now is 12133
69
+ 2023-04-03 17:34:24,080 INFO [decode.py:560] batch 400/?, cuts processed until now is 12706
70
+ 2023-04-03 17:34:33,187 INFO [decode.py:560] batch 420/?, cuts processed until now is 13299
71
+ 2023-04-03 17:34:42,380 INFO [decode.py:560] batch 440/?, cuts processed until now is 13891
72
+ 2023-04-03 17:34:51,250 INFO [decode.py:560] batch 460/?, cuts processed until now is 14515
73
+ 2023-04-03 17:34:59,929 INFO [decode.py:560] batch 480/?, cuts processed until now is 15158
74
+ 2023-04-03 17:35:08,659 INFO [decode.py:560] batch 500/?, cuts processed until now is 15743
75
+ 2023-04-03 17:35:11,772 INFO [decode.py:576] The transcripts are stored in pruned_transducer_stateless7_streaming/exp1/fast_beam_search/recogs-test-cv-beam_20.0_max_contexts_8_max_states_64-epoch-29-avg-9-streaming-chunk-size-64-beam-20.0-max-contexts-8-max-states-64-use-averaged-model.txt
76
+ 2023-04-03 17:35:12,013 INFO [utils.py:558] [test-cv-beam_20.0_max_contexts_8_max_states_64] %WER 10.25% [16082 / 156915, 1180 ins, 1721 del, 13181 sub ]
77
+ 2023-04-03 17:35:12,601 INFO [decode.py:589] Wrote detailed error stats to pruned_transducer_stateless7_streaming/exp1/fast_beam_search/errs-test-cv-beam_20.0_max_contexts_8_max_states_64-epoch-29-avg-9-streaming-chunk-size-64-beam-20.0-max-contexts-8-max-states-64-use-averaged-model.txt
78
+ 2023-04-03 17:35:12,601 INFO [decode.py:609]
79
+ For test-cv, WER of different settings are:
80
+ beam_20.0_max_contexts_8_max_states_64 10.25 best for test-cv
81
+
82
+ 2023-04-03 17:35:12,601 INFO [decode.py:808] Done!
decoding_results/fast_beam_search/recogs-test-cv-beam_20.0_max_contexts_8_max_states_64-epoch-29-avg-9-streaming-chunk-size-64-beam-20.0-max-contexts-8-max-states-64-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
decoding_results/fast_beam_search/wer-summary-test-cv-beam_20.0_max_contexts_8_max_states_64.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ settings WER
2
+ beam_20.0_max_contexts_8_max_states_64 10.25
decoding_results/greedy_search/errs-test-cv-greedy_search-epoch-29-avg-9-streaming-chunk-size-64-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
decoding_results/greedy_search/log-decode-epoch-29-avg-9-streaming-chunk-size-64-context-2-max-sym-per-frame-1-use-averaged-model-2023-04-03-17-20-40 ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-04-03 17:20:40,951 INFO [decode.py:659] Decoding started
2
+ 2023-04-03 17:20:40,952 INFO [decode.py:665] Device: cuda:0
3
+ 2023-04-03 17:20:40,953 INFO [decode.py:675] {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'warm_step': 2000, 'env_info': {'k2-version': '1.23.4', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '62e404dd3f3a811d73e424199b3408e309c06e1a', 'k2-git-date': 'Mon Jan 30 02:26:16 2023', 'lhotse-version': '1.12.0.dev+git.3ccfeb7.clean', 'torch-version': '1.13.0', 'torch-cuda-available': True, 'torch-cuda-version': '11.7', 'python-version': '3.8', 'icefall-git-branch': 'master', 'icefall-git-sha1': 'd74822d-dirty', 'icefall-git-date': 'Tue Mar 21 21:35:32 2023', 'icefall-path': '/home/lishaojie/icefall', 'k2-path': '/home/lishaojie/.conda/envs/env_lishaojie/lib/python3.8/site-packages/k2/__init__.py', 'lhotse-path': '/home/lishaojie/.conda/envs/env_lishaojie/lib/python3.8/site-packages/lhotse/__init__.py', 'hostname': 'cnc533', 'IP address': '127.0.1.1'}, 'epoch': 29, 'iter': 0, 'avg': 9, 'use_averaged_model': True, 'exp_dir': PosixPath('pruned_transducer_stateless7_streaming/exp1'), 'bpe_model': 'data/lang_bpe_500/bpe.model', 'lang_dir': PosixPath('data/lang_bpe_500'), 'decoding_method': 'greedy_search', 'beam_size': 4, 'beam': 20.0, 'ngram_lm_scale': 0.01, 'max_contexts': 8, 'max_states': 64, 'context_size': 2, 'max_sym_per_frame': 1, 'num_paths': 200, 'nbest_scale': 0.5, 'num_encoder_layers': '2,4,3,2,4', 'feedforward_dims': '1024,1024,2048,2048,1024', 'nhead': '8,8,8,8,8', 'encoder_dims': '384,384,384,384,384', 'attention_dims': '192,192,192,192,192', 'encoder_unmasked_dims': '256,256,256,256,256', 'zipformer_downsampling_factors': '1,2,4,8,2', 'cnn_module_kernels': '31,31,31,31,31', 'decoder_dim': 512, 'joiner_dim': 512, 'short_chunk_size': 50, 'num_left_chunks': 4, 'decode_chunk_len': 64, 'manifest_dir': PosixPath('data/fbank'), 'max_duration': 200, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'drop_last': True, 'return_cuts': True, 'num_workers': 2, 'enable_spec_aug': True, 'spec_aug_time_warp_factor': 80, 'enable_musan': True, 'input_strategy': 'PrecomputedFeatures', 'res_dir': PosixPath('pruned_transducer_stateless7_streaming/exp1/greedy_search'), 'suffix': 'epoch-29-avg-9-streaming-chunk-size-64-context-2-max-sym-per-frame-1-use-averaged-model', 'blank_id': 0, 'unk_id': 2, 'vocab_size': 500}
4
+ 2023-04-03 17:20:40,954 INFO [decode.py:677] About to create model
5
+ 2023-04-03 17:20:41,325 INFO [zipformer.py:405] At encoder stack 4, which has downsampling_factor=2, we will combine the outputs of layers 1 and 3, with downsampling_factors=2 and 8.
6
+ 2023-04-03 17:20:41,332 INFO [decode.py:748] Calculating the averaged model over epoch range from 20 (excluded) to 29
7
+ 2023-04-03 17:20:43,416 INFO [decode.py:782] Number of model parameters: 70369391
8
+ 2023-04-03 17:20:43,416 INFO [commonvoice_fr.py:406] About to get test cuts
9
+ 2023-04-03 17:20:46,076 INFO [decode.py:560] batch 0/?, cuts processed until now is 27
10
+ 2023-04-03 17:20:56,934 INFO [decode.py:560] batch 50/?, cuts processed until now is 1548
11
+ 2023-04-03 17:21:07,967 INFO [decode.py:560] batch 100/?, cuts processed until now is 3088
12
+ 2023-04-03 17:21:11,846 INFO [zipformer.py:2441] attn_weights_entropy = tensor([2.1787, 1.8910, 2.4172, 1.6407, 2.1728, 2.3893, 1.7562, 2.5306],
13
+ device='cuda:0'), covar=tensor([0.1221, 0.2071, 0.1560, 0.2019, 0.0944, 0.1410, 0.2892, 0.0782],
14
+ device='cuda:0'), in_proj_covar=tensor([0.0188, 0.0202, 0.0188, 0.0186, 0.0170, 0.0210, 0.0213, 0.0194],
15
+ device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002],
16
+ device='cuda:0')
17
+ 2023-04-03 17:21:19,943 INFO [decode.py:560] batch 150/?, cuts processed until now is 4693
18
+ 2023-04-03 17:21:24,480 INFO [zipformer.py:2441] attn_weights_entropy = tensor([2.9368, 4.1410, 3.9511, 2.1512, 4.1722, 3.3482, 1.1814, 3.0681],
19
+ device='cuda:0'), covar=tensor([0.1757, 0.1534, 0.1609, 0.2749, 0.0958, 0.0773, 0.3330, 0.1192],
20
+ device='cuda:0'), in_proj_covar=tensor([0.0149, 0.0177, 0.0157, 0.0127, 0.0159, 0.0121, 0.0146, 0.0122],
21
+ device='cuda:0'), out_proj_covar=tensor([0.0003, 0.0003, 0.0003, 0.0002, 0.0003, 0.0002, 0.0003, 0.0002],
22
+ device='cuda:0')
23
+ 2023-04-03 17:21:30,520 INFO [decode.py:560] batch 200/?, cuts processed until now is 6301
24
+ 2023-04-03 17:21:38,318 INFO [zipformer.py:2441] attn_weights_entropy = tensor([2.2529, 2.0297, 1.8643, 2.1288, 1.9791, 1.9835, 1.9948, 2.7326],
25
+ device='cuda:0'), covar=tensor([0.3832, 0.5126, 0.3710, 0.4009, 0.4766, 0.2687, 0.4275, 0.1859],
26
+ device='cuda:0'), in_proj_covar=tensor([0.0286, 0.0261, 0.0233, 0.0273, 0.0255, 0.0225, 0.0254, 0.0234],
27
+ device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002],
28
+ device='cuda:0')
29
+ 2023-04-03 17:21:41,318 INFO [decode.py:560] batch 250/?, cuts processed until now is 7825
30
+ 2023-04-03 17:21:45,845 INFO [zipformer.py:2441] attn_weights_entropy = tensor([1.8310, 1.7597, 1.6839, 1.8309, 1.2796, 3.4939, 1.4962, 1.9513],
31
+ device='cuda:0'), covar=tensor([0.3100, 0.2143, 0.1930, 0.2158, 0.1619, 0.0206, 0.2399, 0.1100],
32
+ device='cuda:0'), in_proj_covar=tensor([0.0130, 0.0115, 0.0120, 0.0123, 0.0112, 0.0094, 0.0093, 0.0093],
33
+ device='cuda:0'), out_proj_covar=tensor([0.0006, 0.0005, 0.0005, 0.0006, 0.0005, 0.0004, 0.0005, 0.0004],
34
+ device='cuda:0')
35
+ 2023-04-03 17:21:51,496 INFO [decode.py:560] batch 300/?, cuts processed until now is 9574
36
+ 2023-04-03 17:21:53,631 INFO [zipformer.py:2441] attn_weights_entropy = tensor([1.9710, 1.4775, 2.1112, 2.0248, 1.8644, 1.8023, 1.9569, 1.9764],
37
+ device='cuda:0'), covar=tensor([0.4725, 0.4362, 0.3711, 0.4021, 0.5538, 0.4317, 0.5152, 0.3214],
38
+ device='cuda:0'), in_proj_covar=tensor([0.0260, 0.0243, 0.0263, 0.0289, 0.0289, 0.0265, 0.0295, 0.0247],
39
+ device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002],
40
+ device='cuda:0')
41
+ 2023-04-03 17:22:02,103 INFO [decode.py:560] batch 350/?, cuts processed until now is 11145
42
+ 2023-04-03 17:22:12,758 INFO [decode.py:560] batch 400/?, cuts processed until now is 12706
43
+ 2023-04-03 17:22:23,693 INFO [decode.py:560] batch 450/?, cuts processed until now is 14224
44
+ 2023-04-03 17:22:34,416 INFO [decode.py:560] batch 500/?, cuts processed until now is 15743
45
+ 2023-04-03 17:22:35,701 INFO [decode.py:576] The transcripts are stored in pruned_transducer_stateless7_streaming/exp1/greedy_search/recogs-test-cv-greedy_search-epoch-29-avg-9-streaming-chunk-size-64-context-2-max-sym-per-frame-1-use-averaged-model.txt
46
+ 2023-04-03 17:22:35,945 INFO [utils.py:558] [test-cv-greedy_search] %WER 10.57% [16585 / 156915, 1231 ins, 1791 del, 13563 sub ]
47
+ 2023-04-03 17:22:36,536 INFO [decode.py:589] Wrote detailed error stats to pruned_transducer_stateless7_streaming/exp1/greedy_search/errs-test-cv-greedy_search-epoch-29-avg-9-streaming-chunk-size-64-context-2-max-sym-per-frame-1-use-averaged-model.txt
48
+ 2023-04-03 17:22:36,536 INFO [decode.py:609]
49
+ For test-cv, WER of different settings are:
50
+ greedy_search 10.57 best for test-cv
51
+
52
+ 2023-04-03 17:22:36,537 INFO [decode.py:808] Done!
decoding_results/greedy_search/recogs-test-cv-greedy_search-epoch-29-avg-9-streaming-chunk-size-64-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
decoding_results/greedy_search/wer-summary-test-cv-greedy_search.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ settings WER
2
+ greedy_search 10.57
decoding_results/modified_beam_search/errs-test-cv-beam_size_4-epoch-29-avg-9-streaming-chunk-size-64-modified_beam_search-beam-size-4-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
decoding_results/modified_beam_search/log-decode-epoch-29-avg-9-streaming-chunk-size-64-modified_beam_search-beam-size-4-use-averaged-model-2023-04-03-17-22-38 ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-04-03 17:22:38,516 INFO [decode.py:659] Decoding started
2
+ 2023-04-03 17:22:38,516 INFO [decode.py:665] Device: cuda:0
3
+ 2023-04-03 17:22:38,518 INFO [decode.py:675] {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'warm_step': 2000, 'env_info': {'k2-version': '1.23.4', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '62e404dd3f3a811d73e424199b3408e309c06e1a', 'k2-git-date': 'Mon Jan 30 02:26:16 2023', 'lhotse-version': '1.12.0.dev+git.3ccfeb7.clean', 'torch-version': '1.13.0', 'torch-cuda-available': True, 'torch-cuda-version': '11.7', 'python-version': '3.8', 'icefall-git-branch': 'master', 'icefall-git-sha1': 'd74822d-dirty', 'icefall-git-date': 'Tue Mar 21 21:35:32 2023', 'icefall-path': '/home/lishaojie/icefall', 'k2-path': '/home/lishaojie/.conda/envs/env_lishaojie/lib/python3.8/site-packages/k2/__init__.py', 'lhotse-path': '/home/lishaojie/.conda/envs/env_lishaojie/lib/python3.8/site-packages/lhotse/__init__.py', 'hostname': 'cnc533', 'IP address': '127.0.1.1'}, 'epoch': 29, 'iter': 0, 'avg': 9, 'use_averaged_model': True, 'exp_dir': PosixPath('pruned_transducer_stateless7_streaming/exp1'), 'bpe_model': 'data/lang_bpe_500/bpe.model', 'lang_dir': PosixPath('data/lang_bpe_500'), 'decoding_method': 'modified_beam_search', 'beam_size': 4, 'beam': 20.0, 'ngram_lm_scale': 0.01, 'max_contexts': 8, 'max_states': 64, 'context_size': 2, 'max_sym_per_frame': 1, 'num_paths': 200, 'nbest_scale': 0.5, 'num_encoder_layers': '2,4,3,2,4', 'feedforward_dims': '1024,1024,2048,2048,1024', 'nhead': '8,8,8,8,8', 'encoder_dims': '384,384,384,384,384', 'attention_dims': '192,192,192,192,192', 'encoder_unmasked_dims': '256,256,256,256,256', 'zipformer_downsampling_factors': '1,2,4,8,2', 'cnn_module_kernels': '31,31,31,31,31', 'decoder_dim': 512, 'joiner_dim': 512, 'short_chunk_size': 50, 'num_left_chunks': 4, 'decode_chunk_len': 64, 'manifest_dir': PosixPath('data/fbank'), 'max_duration': 200, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'drop_last': True, 'return_cuts': True, 'num_workers': 2, 'enable_spec_aug': True, 'spec_aug_time_warp_factor': 80, 'enable_musan': True, 'input_strategy': 'PrecomputedFeatures', 'res_dir': PosixPath('pruned_transducer_stateless7_streaming/exp1/modified_beam_search'), 'suffix': 'epoch-29-avg-9-streaming-chunk-size-64-modified_beam_search-beam-size-4-use-averaged-model', 'blank_id': 0, 'unk_id': 2, 'vocab_size': 500}
4
+ 2023-04-03 17:22:38,519 INFO [decode.py:677] About to create model
5
+ 2023-04-03 17:22:38,918 INFO [zipformer.py:405] At encoder stack 4, which has downsampling_factor=2, we will combine the outputs of layers 1 and 3, with downsampling_factors=2 and 8.
6
+ 2023-04-03 17:22:38,925 INFO [decode.py:748] Calculating the averaged model over epoch range from 20 (excluded) to 29
7
+ 2023-04-03 17:22:40,997 INFO [decode.py:782] Number of model parameters: 70369391
8
+ 2023-04-03 17:22:40,997 INFO [commonvoice_fr.py:406] About to get test cuts
9
+ 2023-04-03 17:22:44,389 INFO [decode.py:560] batch 0/?, cuts processed until now is 27
10
+ 2023-04-03 17:23:03,653 INFO [zipformer.py:2441] attn_weights_entropy = tensor([2.4396, 2.2447, 2.4559, 1.5968, 2.3244, 2.4744, 2.4945, 1.9745],
11
+ device='cuda:0'), covar=tensor([0.0486, 0.0604, 0.0566, 0.0803, 0.0805, 0.0564, 0.0533, 0.1100],
12
+ device='cuda:0'), in_proj_covar=tensor([0.0128, 0.0133, 0.0136, 0.0116, 0.0123, 0.0135, 0.0136, 0.0158],
13
+ device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0001, 0.0002, 0.0002, 0.0002, 0.0002],
14
+ device='cuda:0')
15
+ 2023-04-03 17:23:05,745 INFO [decode.py:560] batch 20/?, cuts processed until now is 604
16
+ 2023-04-03 17:23:23,592 INFO [zipformer.py:2441] attn_weights_entropy = tensor([2.0329, 1.8940, 1.7735, 2.1519, 2.5543, 2.1581, 1.8200, 1.6895],
17
+ device='cuda:0'), covar=tensor([0.2199, 0.2101, 0.1994, 0.1657, 0.1436, 0.1120, 0.2161, 0.2030],
18
+ device='cuda:0'), in_proj_covar=tensor([0.0242, 0.0208, 0.0212, 0.0195, 0.0242, 0.0187, 0.0214, 0.0202],
19
+ device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002],
20
+ device='cuda:0')
21
+ 2023-04-03 17:23:26,475 INFO [decode.py:560] batch 40/?, cuts processed until now is 1209
22
+ 2023-04-03 17:23:41,645 INFO [zipformer.py:2441] attn_weights_entropy = tensor([2.7772, 2.7137, 2.1413, 1.0557, 2.3674, 2.2655, 2.0341, 2.4959],
23
+ device='cuda:0'), covar=tensor([0.0948, 0.0634, 0.1572, 0.1977, 0.1178, 0.2220, 0.2033, 0.0825],
24
+ device='cuda:0'), in_proj_covar=tensor([0.0167, 0.0187, 0.0196, 0.0178, 0.0206, 0.0207, 0.0220, 0.0192],
25
+ device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002],
26
+ device='cuda:0')
27
+ 2023-04-03 17:23:46,667 INFO [decode.py:560] batch 60/?, cuts processed until now is 1866
28
+ 2023-04-03 17:24:07,713 INFO [decode.py:560] batch 80/?, cuts processed until now is 2422
29
+ 2023-04-03 17:24:10,930 INFO [zipformer.py:2441] attn_weights_entropy = tensor([1.5944, 1.6332, 1.4009, 1.6980, 2.0331, 1.9103, 1.6203, 1.4564],
30
+ device='cuda:0'), covar=tensor([0.0370, 0.0328, 0.0636, 0.0290, 0.0200, 0.0398, 0.0360, 0.0438],
31
+ device='cuda:0'), in_proj_covar=tensor([0.0097, 0.0103, 0.0143, 0.0108, 0.0097, 0.0111, 0.0100, 0.0110],
32
+ device='cuda:0'), out_proj_covar=tensor([7.4944e-05, 7.9098e-05, 1.1173e-04, 8.2734e-05, 7.5248e-05, 8.1783e-05,
33
+ 7.3728e-05, 8.3511e-05], device='cuda:0')
34
+ 2023-04-03 17:24:19,887 INFO [zipformer.py:2441] attn_weights_entropy = tensor([2.5455, 2.5531, 2.0976, 0.9939, 2.3244, 2.0391, 1.9380, 2.4056],
35
+ device='cuda:0'), covar=tensor([0.0949, 0.0639, 0.1516, 0.2032, 0.1282, 0.2361, 0.2408, 0.0816],
36
+ device='cuda:0'), in_proj_covar=tensor([0.0167, 0.0187, 0.0196, 0.0178, 0.0206, 0.0207, 0.0220, 0.0192],
37
+ device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002],
38
+ device='cuda:0')
39
+ 2023-04-03 17:24:27,881 INFO [decode.py:560] batch 100/?, cuts processed until now is 3088
40
+ 2023-04-03 17:24:48,430 INFO [decode.py:560] batch 120/?, cuts processed until now is 3672
41
+ 2023-04-03 17:25:08,496 INFO [decode.py:560] batch 140/?, cuts processed until now is 4348
42
+ 2023-04-03 17:25:28,540 INFO [decode.py:560] batch 160/?, cuts processed until now is 5035
43
+ 2023-04-03 17:25:40,611 INFO [zipformer.py:2441] attn_weights_entropy = tensor([2.4775, 2.3269, 2.5159, 1.6390, 2.4735, 2.5963, 2.5525, 2.0379],
44
+ device='cuda:0'), covar=tensor([0.0466, 0.0579, 0.0534, 0.0716, 0.0834, 0.0513, 0.0447, 0.1035],
45
+ device='cuda:0'), in_proj_covar=tensor([0.0128, 0.0133, 0.0136, 0.0116, 0.0123, 0.0135, 0.0136, 0.0158],
46
+ device='cuda:0'), out_proj_covar=tensor([0.0002, 0.0002, 0.0002, 0.0001, 0.0002, 0.0002, 0.0002, 0.0002],
47
+ device='cuda:0')
48
+ 2023-04-03 17:25:48,922 INFO [decode.py:560] batch 180/?, cuts processed until now is 5674
49
+ 2023-04-03 17:26:09,282 INFO [decode.py:560] batch 200/?, cuts processed until now is 6301
50
+ 2023-04-03 17:26:29,657 INFO [decode.py:560] batch 220/?, cuts processed until now is 6914
51
+ 2023-04-03 17:26:50,151 INFO [decode.py:560] batch 240/?, cuts processed until now is 7540
52
+ 2023-04-03 17:27:10,601 INFO [decode.py:560] batch 260/?, cuts processed until now is 8161
53
+ 2023-04-03 17:27:30,848 INFO [decode.py:560] batch 280/?, cuts processed until now is 8857
54
+ 2023-04-03 17:27:50,769 INFO [decode.py:560] batch 300/?, cuts processed until now is 9574
55
+ 2023-04-03 17:28:11,398 INFO [decode.py:560] batch 320/?, cuts processed until now is 10169
56
+ 2023-04-03 17:28:25,548 INFO [zipformer.py:2441] attn_weights_entropy = tensor([1.8605, 1.7340, 2.4232, 3.4970, 2.3920, 2.5277, 1.1464, 2.9314],
57
+ device='cuda:0'), covar=tensor([0.1522, 0.1223, 0.1074, 0.0445, 0.0713, 0.1245, 0.1648, 0.0431],
58
+ device='cuda:0'), in_proj_covar=tensor([0.0097, 0.0114, 0.0131, 0.0162, 0.0098, 0.0133, 0.0122, 0.0098],
59
+ device='cuda:0'), out_proj_covar=tensor([0.0003, 0.0003, 0.0004, 0.0004, 0.0003, 0.0004, 0.0003, 0.0003],
60
+ device='cuda:0')
61
+ 2023-04-03 17:28:31,696 INFO [decode.py:560] batch 340/?, cuts processed until now is 10810
62
+ 2023-04-03 17:28:52,180 INFO [decode.py:560] batch 360/?, cuts processed until now is 11452
63
+ 2023-04-03 17:29:12,247 INFO [decode.py:560] batch 380/?, cuts processed until now is 12133
64
+ 2023-04-03 17:29:33,066 INFO [decode.py:560] batch 400/?, cuts processed until now is 12706
65
+ 2023-04-03 17:29:53,675 INFO [decode.py:560] batch 420/?, cuts processed until now is 13299
66
+ 2023-04-03 17:30:07,997 INFO [zipformer.py:2441] attn_weights_entropy = tensor([1.8529, 1.7486, 1.5706, 1.8211, 2.1766, 2.0610, 1.7289, 1.5829],
67
+ device='cuda:0'), covar=tensor([0.0370, 0.0322, 0.0575, 0.0276, 0.0210, 0.0417, 0.0338, 0.0404],
68
+ device='cuda:0'), in_proj_covar=tensor([0.0097, 0.0103, 0.0143, 0.0108, 0.0097, 0.0111, 0.0100, 0.0110],
69
+ device='cuda:0'), out_proj_covar=tensor([7.4944e-05, 7.9098e-05, 1.1173e-04, 8.2734e-05, 7.5248e-05, 8.1783e-05,
70
+ 7.3728e-05, 8.3511e-05], device='cuda:0')
71
+ 2023-04-03 17:30:13,957 INFO [decode.py:560] batch 440/?, cuts processed until now is 13891
72
+ 2023-04-03 17:30:34,408 INFO [decode.py:560] batch 460/?, cuts processed until now is 14515
73
+ 2023-04-03 17:30:54,607 INFO [decode.py:560] batch 480/?, cuts processed until now is 15158
74
+ 2023-04-03 17:31:14,650 INFO [decode.py:560] batch 500/?, cuts processed until now is 15743
75
+ 2023-04-03 17:31:18,578 INFO [decode.py:576] The transcripts are stored in pruned_transducer_stateless7_streaming/exp1/modified_beam_search/recogs-test-cv-beam_size_4-epoch-29-avg-9-streaming-chunk-size-64-modified_beam_search-beam-size-4-use-averaged-model.txt
76
+ 2023-04-03 17:31:18,889 INFO [utils.py:558] [test-cv-beam_size_4] %WER 10.19% [15988 / 156915, 1250 ins, 1549 del, 13189 sub ]
77
+ 2023-04-03 17:31:19,408 INFO [decode.py:589] Wrote detailed error stats to pruned_transducer_stateless7_streaming/exp1/modified_beam_search/errs-test-cv-beam_size_4-epoch-29-avg-9-streaming-chunk-size-64-modified_beam_search-beam-size-4-use-averaged-model.txt
78
+ 2023-04-03 17:31:19,408 INFO [decode.py:609]
79
+ For test-cv, WER of different settings are:
80
+ beam_size_4 10.19 best for test-cv
81
+
82
+ 2023-04-03 17:31:19,408 INFO [decode.py:808] Done!
decoding_results/modified_beam_search/recogs-test-cv-beam_size_4-epoch-29-avg-9-streaming-chunk-size-64-modified_beam_search-beam-size-4-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
decoding_results/modified_beam_search/wer-summary-test-cv-beam_size_4.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ settings WER
2
+ beam_size_4 10.19