Upload 34 files
Browse files- .gitattributes +4 -0
- hanzi-2-man-bpe10k/checkpoint_best.pt +3 -0
- hanzi-2-man-bpe10k/codes.10000.bpe.hanzi +0 -0
- hanzi-2-man-bpe10k/data-bin/dict.hanzi.txt +0 -0
- hanzi-2-man-bpe10k/data-bin/dict.man.txt +0 -0
- hanzi-2-man-bpe10k/data-bin/preprocess.log +14 -0
- hanzi-2-man-bpe10k/data-bin/test.hanzi-man.hanzi.bin +3 -0
- hanzi-2-man-bpe10k/data-bin/test.hanzi-man.hanzi.idx +0 -0
- hanzi-2-man-bpe10k/data-bin/test.hanzi-man.man.bin +3 -0
- hanzi-2-man-bpe10k/data-bin/test.hanzi-man.man.idx +0 -0
- hanzi-2-man-bpe10k/data-bin/train.hanzi-man.hanzi.bin +3 -0
- hanzi-2-man-bpe10k/data-bin/train.hanzi-man.hanzi.idx +3 -0
- hanzi-2-man-bpe10k/data-bin/train.hanzi-man.man.bin +3 -0
- hanzi-2-man-bpe10k/data-bin/train.hanzi-man.man.idx +3 -0
- hanzi-2-man-bpe10k/data-bin/valid.hanzi-man.hanzi.bin +3 -0
- hanzi-2-man-bpe10k/data-bin/valid.hanzi-man.hanzi.idx +0 -0
- hanzi-2-man-bpe10k/data-bin/valid.hanzi-man.man.bin +3 -0
- hanzi-2-man-bpe10k/data-bin/valid.hanzi-man.man.idx +0 -0
- man-2-hanzi-bpe10k/checkpoint_best.pt +3 -0
- man-2-hanzi-bpe10k/codes.10000.bpe.man +0 -0
- man-2-hanzi-bpe10k/data-bin/dict.hanzi.txt +0 -0
- man-2-hanzi-bpe10k/data-bin/dict.man.txt +0 -0
- man-2-hanzi-bpe10k/data-bin/preprocess.log +14 -0
- man-2-hanzi-bpe10k/data-bin/test.man-hanzi.hanzi.bin +3 -0
- man-2-hanzi-bpe10k/data-bin/test.man-hanzi.hanzi.idx +0 -0
- man-2-hanzi-bpe10k/data-bin/test.man-hanzi.man.bin +3 -0
- man-2-hanzi-bpe10k/data-bin/test.man-hanzi.man.idx +0 -0
- man-2-hanzi-bpe10k/data-bin/train.man-hanzi.hanzi.bin +3 -0
- man-2-hanzi-bpe10k/data-bin/train.man-hanzi.hanzi.idx +3 -0
- man-2-hanzi-bpe10k/data-bin/train.man-hanzi.man.bin +3 -0
- man-2-hanzi-bpe10k/data-bin/train.man-hanzi.man.idx +3 -0
- man-2-hanzi-bpe10k/data-bin/valid.man-hanzi.hanzi.bin +3 -0
- man-2-hanzi-bpe10k/data-bin/valid.man-hanzi.hanzi.idx +0 -0
- man-2-hanzi-bpe10k/data-bin/valid.man-hanzi.man.bin +3 -0
- man-2-hanzi-bpe10k/data-bin/valid.man-hanzi.man.idx +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
hanzi-2-man-bpe10k/data-bin/train.hanzi-man.hanzi.idx filter=lfs diff=lfs merge=lfs -text
|
37 |
+
hanzi-2-man-bpe10k/data-bin/train.hanzi-man.man.idx filter=lfs diff=lfs merge=lfs -text
|
38 |
+
man-2-hanzi-bpe10k/data-bin/train.man-hanzi.hanzi.idx filter=lfs diff=lfs merge=lfs -text
|
39 |
+
man-2-hanzi-bpe10k/data-bin/train.man-hanzi.man.idx filter=lfs diff=lfs merge=lfs -text
|
hanzi-2-man-bpe10k/checkpoint_best.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:25627a9e17c3c6f3e5ec2b30816754d1860502a32e19d264f19fa72b9db0aac5
|
3 |
+
size 860646498
|
hanzi-2-man-bpe10k/codes.10000.bpe.hanzi
ADDED
The diff for this file is too large to render.
See raw diff
|
|
hanzi-2-man-bpe10k/data-bin/dict.hanzi.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
hanzi-2-man-bpe10k/data-bin/dict.man.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
hanzi-2-man-bpe10k/data-bin/preprocess.log
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Namespace(align_suffix=None, alignfile=None, all_gather_list_size=16384, azureml_logging=False, bf16=False, bpe=None, cpu=False, criterion='cross_entropy', dataset_impl='mmap', destdir='data-bin/hanzi-2-man-bpe10k', empty_cache_freq=0, fp16=False, fp16_init_scale=128, fp16_no_flatten_grads=False, fp16_scale_tolerance=0.0, fp16_scale_window=None, joined_dictionary=True, log_format=None, log_interval=100, lr_scheduler='fixed', memory_efficient_bf16=False, memory_efficient_fp16=False, min_loss_scale=0.0001, model_parallel_size=1, no_progress_bar=False, nwordssrc=-1, nwordstgt=-1, only_source=False, optimizer=None, padding_factor=8, plasma_path='/tmp/plasma', profile=False, quantization_config_path=None, reset_logging=False, scoring='bleu', seed=1, simul_type=None, source_lang='hanzi', srcdict=None, suppress_crashes=False, target_lang='man', task='translation', tensorboard_logdir=None, testpref='YYTD/eval/eval.10000.bpe', tgtdict=None, threshold_loss_scale=None, thresholdsrc=0, thresholdtgt=0, tokenizer=None, tpu=False, trainpref='YYTD/train.10000.bpe', use_plasma_view=False, user_dir=None, validpref='YYTD/201214/eval/eval.10000.bpe', wandb_project=None, workers=20)
|
2 |
+
[hanzi] Dictionary: 23080 types
|
3 |
+
[hanzi] YYTD/train.10000.bpe.hanzi: 3227095 sents, 20946168 tokens, 0.0% replaced by <unk>
|
4 |
+
[hanzi] Dictionary: 23080 types
|
5 |
+
[hanzi] YYTD/201214/eval/eval.10000.bpe.hanzi: 6212 sents, 43206 tokens, 0.0% replaced by <unk>
|
6 |
+
[hanzi] Dictionary: 23080 types
|
7 |
+
[hanzi] YYTD/eval/eval.10000.bpe.hanzi: 6212 sents, 43206 tokens, 0.0% replaced by <unk>
|
8 |
+
[man] Dictionary: 23080 types
|
9 |
+
[man] YYTD/train.10000.bpe.man: 3227095 sents, 20701981 tokens, 0.0% replaced by <unk>
|
10 |
+
[man] Dictionary: 23080 types
|
11 |
+
[man] YYTD/201214/eval/eval.10000.bpe.man: 6212 sents, 37893 tokens, 0.00264% replaced by <unk>
|
12 |
+
[man] Dictionary: 23080 types
|
13 |
+
[man] YYTD/eval/eval.10000.bpe.man: 6212 sents, 37893 tokens, 0.00264% replaced by <unk>
|
14 |
+
Wrote preprocessed data to data-bin/hanzi-2-man-bpe10k
|
hanzi-2-man-bpe10k/data-bin/test.hanzi-man.hanzi.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e43867bd790bbb5adad0693373bdc37fdf3b0100e246ef6a39111c6514715fc6
|
3 |
+
size 86412
|
hanzi-2-man-bpe10k/data-bin/test.hanzi-man.hanzi.idx
ADDED
Binary file (74.6 kB). View file
|
|
hanzi-2-man-bpe10k/data-bin/test.hanzi-man.man.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f1387c0597258c3f20b59352c27e95a02daf7821c2ccd07026de8967d55fede
|
3 |
+
size 75786
|
hanzi-2-man-bpe10k/data-bin/test.hanzi-man.man.idx
ADDED
Binary file (74.6 kB). View file
|
|
hanzi-2-man-bpe10k/data-bin/train.hanzi-man.hanzi.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:841064c6ad9e462a388de3ad07e7919e328a9199cc563449673d798ae4f10adf
|
3 |
+
size 41892336
|
hanzi-2-man-bpe10k/data-bin/train.hanzi-man.hanzi.idx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a25baa3a5d6cd98b6c1b0771a3ae7f3662cb1460159d06709c4df6d3532e00f0
|
3 |
+
size 38725166
|
hanzi-2-man-bpe10k/data-bin/train.hanzi-man.man.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a3d11cd08e608aa3b47a15b52e82ce05813383ee87221f39e37b139356d8e68b
|
3 |
+
size 41403962
|
hanzi-2-man-bpe10k/data-bin/train.hanzi-man.man.idx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9c3180353dcc560cb0c1fd9d8bfdeb7309ef96ec359be74efd5077845124c4ad
|
3 |
+
size 38725166
|
hanzi-2-man-bpe10k/data-bin/valid.hanzi-man.hanzi.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba7f3139f9185156712f210459efd203af19a16625672ce214ea917cc017ae19
|
3 |
+
size 86412
|
hanzi-2-man-bpe10k/data-bin/valid.hanzi-man.hanzi.idx
ADDED
Binary file (74.6 kB). View file
|
|
hanzi-2-man-bpe10k/data-bin/valid.hanzi-man.man.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f356a4b0cb5b86821348ce5690b0b270169fd8668a62e0882fb560869073c704
|
3 |
+
size 75786
|
hanzi-2-man-bpe10k/data-bin/valid.hanzi-man.man.idx
ADDED
Binary file (74.6 kB). View file
|
|
man-2-hanzi-bpe10k/checkpoint_best.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:53bb2bb18e51071ab3995ba83c8373653c9f9a92e2dda4c4371009fa77dcdb69
|
3 |
+
size 860646498
|
man-2-hanzi-bpe10k/codes.10000.bpe.man
ADDED
The diff for this file is too large to render.
See raw diff
|
|
man-2-hanzi-bpe10k/data-bin/dict.hanzi.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
man-2-hanzi-bpe10k/data-bin/dict.man.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
man-2-hanzi-bpe10k/data-bin/preprocess.log
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Namespace(align_suffix=None, alignfile=None, all_gather_list_size=16384, azureml_logging=False, bf16=False, bpe=None, cpu=False, criterion='cross_entropy', dataset_impl='mmap', destdir='data-bin/man-2-hanzi-bpe10k', empty_cache_freq=0, fp16=False, fp16_init_scale=128, fp16_no_flatten_grads=False, fp16_scale_tolerance=0.0, fp16_scale_window=None, joined_dictionary=True, log_format=None, log_interval=100, lr_scheduler='fixed', memory_efficient_bf16=False, memory_efficient_fp16=False, min_loss_scale=0.0001, model_parallel_size=1, no_progress_bar=False, nwordssrc=-1, nwordstgt=-1, only_source=False, optimizer=None, padding_factor=8, plasma_path='/tmp/plasma', profile=False, quantization_config_path=None, reset_logging=False, scoring='bleu', seed=1, simul_type=None, source_lang='man', srcdict=None, suppress_crashes=False, target_lang='hanzi', task='translation', tensorboard_logdir=None, testpref='YYTD/eval/eval.10000.bpe', tgtdict=None, threshold_loss_scale=None, thresholdsrc=0, thresholdtgt=0, tokenizer=None, tpu=False, trainpref='YYTD/train.10000.bpe', use_plasma_view=False, user_dir=None, validpref='YYTD/201214/eval/eval.10000.bpe', wandb_project=None, workers=20)
|
2 |
+
[man] Dictionary: 23080 types
|
3 |
+
[man] YYTD/train.10000.bpe.man: 3227095 sents, 20701981 tokens, 0.0% replaced by <unk>
|
4 |
+
[man] Dictionary: 23080 types
|
5 |
+
[man] YYTD/201214/eval/eval.10000.bpe.man: 6212 sents, 37893 tokens, 0.00264% replaced by <unk>
|
6 |
+
[man] Dictionary: 23080 types
|
7 |
+
[man] YYTD/eval/eval.10000.bpe.man: 6212 sents, 37893 tokens, 0.00264% replaced by <unk>
|
8 |
+
[hanzi] Dictionary: 23080 types
|
9 |
+
[hanzi] YYTD/train.10000.bpe.hanzi: 3227095 sents, 20946168 tokens, 0.0% replaced by <unk>
|
10 |
+
[hanzi] Dictionary: 23080 types
|
11 |
+
[hanzi] YYTD/201214/eval/eval.10000.bpe.hanzi: 6212 sents, 43206 tokens, 0.0% replaced by <unk>
|
12 |
+
[hanzi] Dictionary: 23080 types
|
13 |
+
[hanzi] YYTD/eval/eval.10000.bpe.hanzi: 6212 sents, 43206 tokens, 0.0% replaced by <unk>
|
14 |
+
Wrote preprocessed data to data-bin/man-2-hanzi-bpe10k
|
man-2-hanzi-bpe10k/data-bin/test.man-hanzi.hanzi.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e43867bd790bbb5adad0693373bdc37fdf3b0100e246ef6a39111c6514715fc6
|
3 |
+
size 86412
|
man-2-hanzi-bpe10k/data-bin/test.man-hanzi.hanzi.idx
ADDED
Binary file (74.6 kB). View file
|
|
man-2-hanzi-bpe10k/data-bin/test.man-hanzi.man.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f1387c0597258c3f20b59352c27e95a02daf7821c2ccd07026de8967d55fede
|
3 |
+
size 75786
|
man-2-hanzi-bpe10k/data-bin/test.man-hanzi.man.idx
ADDED
Binary file (74.6 kB). View file
|
|
man-2-hanzi-bpe10k/data-bin/train.man-hanzi.hanzi.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:841064c6ad9e462a388de3ad07e7919e328a9199cc563449673d798ae4f10adf
|
3 |
+
size 41892336
|
man-2-hanzi-bpe10k/data-bin/train.man-hanzi.hanzi.idx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a25baa3a5d6cd98b6c1b0771a3ae7f3662cb1460159d06709c4df6d3532e00f0
|
3 |
+
size 38725166
|
man-2-hanzi-bpe10k/data-bin/train.man-hanzi.man.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a3d11cd08e608aa3b47a15b52e82ce05813383ee87221f39e37b139356d8e68b
|
3 |
+
size 41403962
|
man-2-hanzi-bpe10k/data-bin/train.man-hanzi.man.idx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9c3180353dcc560cb0c1fd9d8bfdeb7309ef96ec359be74efd5077845124c4ad
|
3 |
+
size 38725166
|
man-2-hanzi-bpe10k/data-bin/valid.man-hanzi.hanzi.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba7f3139f9185156712f210459efd203af19a16625672ce214ea917cc017ae19
|
3 |
+
size 86412
|
man-2-hanzi-bpe10k/data-bin/valid.man-hanzi.hanzi.idx
ADDED
Binary file (74.6 kB). View file
|
|
man-2-hanzi-bpe10k/data-bin/valid.man-hanzi.man.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f356a4b0cb5b86821348ce5690b0b270169fd8668a62e0882fb560869073c704
|
3 |
+
size 75786
|
man-2-hanzi-bpe10k/data-bin/valid.man-hanzi.man.idx
ADDED
Binary file (74.6 kB). View file
|
|