umuthopeyildirim commited on
Commit
6820689
·
verified ·
1 Parent(s): be1b928

Upload folder using huggingface_hub

Browse files
dataset_dict.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"splits": ["train", "test"]}
test/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07399a8c23abeaf12ff7fd2d569fc1e6b0265da8a2a76f2bd5db714709674a96
3
+ size 10221640
test/dataset_info.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "builder_name": "json",
3
+ "citation": "",
4
+ "config_name": "default",
5
+ "dataset_name": "mod-rwkv-paper-text",
6
+ "dataset_size": 825848079,
7
+ "description": "",
8
+ "download_checksums": {
9
+ "hf://datasets/umuthopeyildirim/mod-rwkv-paper-text@7a9c8fdf95ec7d10ed79f30b4cae59c0f521c83e/mod-rwkv-OIGandCIVIL.json": {
10
+ "num_bytes": 871634184,
11
+ "checksum": null
12
+ }
13
+ },
14
+ "download_size": 871634184,
15
+ "features": {
16
+ "input_ids": {
17
+ "feature": {
18
+ "dtype": "int32",
19
+ "_type": "Value"
20
+ },
21
+ "_type": "Sequence"
22
+ },
23
+ "token_type_ids": {
24
+ "feature": {
25
+ "dtype": "int8",
26
+ "_type": "Value"
27
+ },
28
+ "_type": "Sequence"
29
+ },
30
+ "attention_mask": {
31
+ "feature": {
32
+ "dtype": "int8",
33
+ "_type": "Value"
34
+ },
35
+ "_type": "Sequence"
36
+ }
37
+ },
38
+ "homepage": "",
39
+ "license": "",
40
+ "size_in_bytes": 1697482263,
41
+ "splits": {
42
+ "train": {
43
+ "name": "train",
44
+ "num_bytes": 825848079,
45
+ "num_examples": 556607,
46
+ "dataset_name": "mod-rwkv-paper-text"
47
+ }
48
+ },
49
+ "version": {
50
+ "version_str": "0.0.0",
51
+ "major": 0,
52
+ "minor": 0,
53
+ "patch": 0
54
+ }
55
+ }
test/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "34a31ec98fe8372e",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": null
13
+ }
train/data-00000-of-00003.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac101cdd24317f45c1e6a67cf092fa35aea46103220bb985cc718f7a6a359fa6
3
+ size 389803240
train/data-00001-of-00003.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4834bb5fe79a8a967980bb418c9644f038b1124a5439503a326e424e3d4d4ca1
3
+ size 348109864
train/data-00002-of-00003.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f58c8249239cd1e81a02d907be08657e3dc1d115e9b33a64ca55f63b544690b
3
+ size 306981424
train/dataset_info.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "builder_name": "json",
3
+ "citation": "",
4
+ "config_name": "default",
5
+ "dataset_name": "mod-rwkv-paper-text",
6
+ "dataset_size": 825848079,
7
+ "description": "",
8
+ "download_checksums": {
9
+ "hf://datasets/umuthopeyildirim/mod-rwkv-paper-text@7a9c8fdf95ec7d10ed79f30b4cae59c0f521c83e/mod-rwkv-OIGandCIVIL.json": {
10
+ "num_bytes": 871634184,
11
+ "checksum": null
12
+ }
13
+ },
14
+ "download_size": 871634184,
15
+ "features": {
16
+ "input_ids": {
17
+ "feature": {
18
+ "dtype": "int32",
19
+ "_type": "Value"
20
+ },
21
+ "_type": "Sequence"
22
+ },
23
+ "token_type_ids": {
24
+ "feature": {
25
+ "dtype": "int8",
26
+ "_type": "Value"
27
+ },
28
+ "_type": "Sequence"
29
+ },
30
+ "attention_mask": {
31
+ "feature": {
32
+ "dtype": "int8",
33
+ "_type": "Value"
34
+ },
35
+ "_type": "Sequence"
36
+ },
37
+ "sample_length": {
38
+ "feature": {
39
+ "dtype": "int64",
40
+ "_type": "Value"
41
+ },
42
+ "_type": "Sequence"
43
+ }
44
+ },
45
+ "homepage": "",
46
+ "license": "",
47
+ "size_in_bytes": 1697482263,
48
+ "splits": {
49
+ "train": {
50
+ "name": "train",
51
+ "num_bytes": 825848079,
52
+ "num_examples": 556607,
53
+ "dataset_name": "mod-rwkv-paper-text"
54
+ }
55
+ },
56
+ "version": {
57
+ "version_str": "0.0.0",
58
+ "major": 0,
59
+ "minor": 0,
60
+ "patch": 0
61
+ }
62
+ }
train/state.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00003.arrow"
5
+ },
6
+ {
7
+ "filename": "data-00001-of-00003.arrow"
8
+ },
9
+ {
10
+ "filename": "data-00002-of-00003.arrow"
11
+ }
12
+ ],
13
+ "_fingerprint": "f845fc13a332a0fe",
14
+ "_format_columns": null,
15
+ "_format_kwargs": {},
16
+ "_format_type": null,
17
+ "_output_all_columns": false,
18
+ "_split": null
19
+ }