jacobthebanana
commited on
Commit
•
cc8e143
1
Parent(s):
544ab80
Saving weights and logs of step 143142
Browse files- create_dataset.py +23 -0
- create_dataset.sh +10 -0
- events.out.tfevents.1657772277.t1v-n-f83092c9-w-0.2379212.0.v2 +3 -0
- events.out.tfevents.1657772475.t1v-n-f83092c9-w-0.2382082.0.v2 +3 -0
- events.out.tfevents.1657772678.t1v-n-f83092c9-w-0.2384696.0.v2 +3 -0
- events.out.tfevents.1657773293.t1v-n-f83092c9-w-0.2387870.0.v2 +3 -0
- events.out.tfevents.1657773550.t1v-n-f83092c9-w-0.2390668.0.v2 +3 -0
- events.out.tfevents.1657773740.t1v-n-f83092c9-w-0.2393104.0.v2 +3 -0
- events.out.tfevents.1657774428.t1v-n-f83092c9-w-0.2397627.0.v2 +3 -0
- events.out.tfevents.1657774843.t1v-n-f83092c9-w-0.2401084.0.v2 +3 -0
- events.out.tfevents.1657775805.t1v-n-f83092c9-w-0.2405744.0.v2 +3 -0
- events.out.tfevents.1657776321.t1v-n-f83092c9-w-0.2410000.0.v2 +3 -0
- flax_model.msgpack +1 -1
- logs/2022-07-14-roberta-base.txt +286 -0
- preprocess.sh +2 -0
- process_bignews.py +112 -0
create_dataset.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
|
3 |
+
from datasets import load_dataset
|
4 |
+
|
5 |
+
|
6 |
+
parser = argparse.ArgumentParser()
|
7 |
+
parser.add_argument("path_prefix")
|
8 |
+
parser.add_argument("output_path")
|
9 |
+
args = parser.parse_args()
|
10 |
+
|
11 |
+
path_prefix: str = args.path_prefix
|
12 |
+
output_path: str = args.output_path
|
13 |
+
|
14 |
+
dataset = load_dataset(
|
15 |
+
"text",
|
16 |
+
data_files={
|
17 |
+
"train": [path_prefix + "_train_text.txt"],
|
18 |
+
"validation": [path_prefix + "_val_text.txt"],
|
19 |
+
},
|
20 |
+
cache_dir="/dev/shm/.cache",
|
21 |
+
)
|
22 |
+
|
23 |
+
dataset.save_to_disk(output_path)
|
create_dataset.sh
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# pv ~/data/bignews/processed/bignews_val_text.txt > /dev/shm/news_val_text.txt
|
2 |
+
# pv ~/data/bignews/processed/bignews_train_text.txt > /dev/shm/news_train_text.txt
|
3 |
+
python3 process_bignews.py ~/data/bignews/bignews_train.json /dev/shm/news_train
|
4 |
+
python3 process_bignews.py ~/data/bignews/bignews_val.json /dev/shm/news_val
|
5 |
+
python3 create_dataset.py /dev/shm/news /dev/shm/bignews_flattened
|
6 |
+
|
7 |
+
# python3 process_bignews.py ~/data/bignews/bignews_$SPLIT.json ~/data/bignews/processed_lite/bignews_$SPLIT --ratio 0.05
|
8 |
+
# pv ~/data/bignews/processed_lite/bignews_val_text.txt > /dev/shm/news_lite_val_text.txt
|
9 |
+
# pv ~/data/bignews/processed_lite/bignews_train_text.txt > /dev/shm/news_lite_train_text.txt
|
10 |
+
# python3 create_dataset.py /dev/shm/news_lite /dev/shm/bignews_lite_flattened
|
events.out.tfevents.1657772277.t1v-n-f83092c9-w-0.2379212.0.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a952348a0e1f0fc986394f978a8152f7c26dae16568f1f7b8229c39cd522c5a7
|
3 |
+
size 3715
|
events.out.tfevents.1657772475.t1v-n-f83092c9-w-0.2382082.0.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c89523d1a2ee2f618769eebf52a71eea262277892d2a334fe9a2381a9766a4f
|
3 |
+
size 3730
|
events.out.tfevents.1657772678.t1v-n-f83092c9-w-0.2384696.0.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc7b285bd92f29d768c0f0d75aff351f12605bd07b9c87db8b72bd74bc243029
|
3 |
+
size 3775
|
events.out.tfevents.1657773293.t1v-n-f83092c9-w-0.2387870.0.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2cb31603e6b60c47b64c00fa26be136f49a031e5579fd2e093593dc654403256
|
3 |
+
size 3775
|
events.out.tfevents.1657773550.t1v-n-f83092c9-w-0.2390668.0.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a8f8ee607b044d11e1c8aab5c9fdc23004bdeecb51f8c216af822707f4b618b
|
3 |
+
size 3775
|
events.out.tfevents.1657773740.t1v-n-f83092c9-w-0.2393104.0.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:08e29a7ae72e281922024f33de452a1376b9b47477969996fb908fad21285c50
|
3 |
+
size 374864
|
events.out.tfevents.1657774428.t1v-n-f83092c9-w-0.2397627.0.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a7fb8cd80114d4843c3fdb96e7d2d05bbe9b246bd0e87d688c314ebecffc8a8
|
3 |
+
size 3715
|
events.out.tfevents.1657774843.t1v-n-f83092c9-w-0.2401084.0.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d898f5d401acac1b7e05a2ce04b2da24decd1b581547275b733d4f200378ff14
|
3 |
+
size 3715
|
events.out.tfevents.1657775805.t1v-n-f83092c9-w-0.2405744.0.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e6416c4353e641a5dc2936f46a7370bf3d435af037647b6862b0289af5f7f251
|
3 |
+
size 78148
|
events.out.tfevents.1657776321.t1v-n-f83092c9-w-0.2410000.0.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bef9d5e29ca6d02e6ec05e2a68cac9452916dfeca96018412688fc2b86a5c01c
|
3 |
+
size 21500557
|
flax_model.msgpack
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 498589677
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:96a26f41db37f8b41b9c8a764d544faa94fb9ebfd3a213225fcc265febe9010a
|
3 |
size 498589677
|
logs/2022-07-14-roberta-base.txt
ADDED
@@ -0,0 +1,286 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Step... (500/437388 | Training Loss: -0.0005138383712619543, Learning Rate: 1.9977182091679424e-05)
|
2 |
+
Step... (1000/437388 | Training Loss: -0.0013853885466232896, Learning Rate: 1.9954319213866256e-05)
|
3 |
+
Step... (1500/437388 | Training Loss: -0.0014231076929718256, Learning Rate: 1.993145633605309e-05)
|
4 |
+
Step... (2000/437388 | Training Loss: -0.0015525615308433771, Learning Rate: 1.990859345823992e-05)
|
5 |
+
Step... (2500/437388 | Training Loss: -0.004444368649274111, Learning Rate: 1.9885730580426753e-05)
|
6 |
+
Step... (3000/437388 | Training Loss: -0.003619273891672492, Learning Rate: 1.9862867702613585e-05)
|
7 |
+
Step... (3500/437388 | Training Loss: -9.0174020442646e-05, Learning Rate: 1.9840004824800417e-05)
|
8 |
+
Step... (4000/437388 | Training Loss: -5.372820669435896e-05, Learning Rate: 1.9817140127997845e-05)
|
9 |
+
Step... (4500/437388 | Training Loss: -0.0005610623047687113, Learning Rate: 1.979427906917408e-05)
|
10 |
+
Step... (5000/437388 | Training Loss: -0.0031363563612103462, Learning Rate: 1.9771416191360913e-05)
|
11 |
+
Step... (5500/437388 | Training Loss: -0.00013456691522151232, Learning Rate: 1.9748553313547745e-05)
|
12 |
+
Step... (6000/437388 | Training Loss: -0.003430676180869341, Learning Rate: 1.9725688616745174e-05)
|
13 |
+
Step... (6500/437388 | Training Loss: 2.9760311008431017e-05, Learning Rate: 1.9702825738932006e-05)
|
14 |
+
Step... (7000/437388 | Training Loss: -0.0002961964055430144, Learning Rate: 1.9679962861118838e-05)
|
15 |
+
Step... (7500/437388 | Training Loss: -0.00017826503608375788, Learning Rate: 1.965709998330567e-05)
|
16 |
+
Step... (8000/437388 | Training Loss: -4.724973041447811e-05, Learning Rate: 1.9634237105492502e-05)
|
17 |
+
Step... (8500/437388 | Training Loss: -0.0002406371058896184, Learning Rate: 1.9611374227679335e-05)
|
18 |
+
Step... (9000/437388 | Training Loss: -8.045811409829184e-05, Learning Rate: 1.9588511349866167e-05)
|
19 |
+
Step... (9500/437388 | Training Loss: 9.021518962981645e-06, Learning Rate: 1.9565648472053e-05)
|
20 |
+
Step... (10000/437388 | Training Loss: -0.0004939670907333493, Learning Rate: 1.954278559423983e-05)
|
21 |
+
Step... (10500/437388 | Training Loss: 9.438104825676419e-06, Learning Rate: 1.9519922716426663e-05)
|
22 |
+
Step... (11000/437388 | Training Loss: 2.276919622090645e-05, Learning Rate: 1.9497059838613495e-05)
|
23 |
+
Step... (11500/437388 | Training Loss: -1.4924948118277825e-05, Learning Rate: 1.9474195141810924e-05)
|
24 |
+
Step... (12000/437388 | Training Loss: 8.752908797760028e-06, Learning Rate: 1.9451332263997756e-05)
|
25 |
+
Step... (12500/437388 | Training Loss: -0.0014030194142833352, Learning Rate: 1.942847120517399e-05)
|
26 |
+
Step... (13000/437388 | Training Loss: 7.227043170132674e-06, Learning Rate: 1.9405608327360824e-05)
|
27 |
+
Step... (13500/437388 | Training Loss: -0.003136668587103486, Learning Rate: 1.9382743630558252e-05)
|
28 |
+
Step... (14000/437388 | Training Loss: -4.724322479887633e-06, Learning Rate: 1.9359880752745084e-05)
|
29 |
+
Step... (14500/437388 | Training Loss: 1.9046970919589512e-05, Learning Rate: 1.9337017874931917e-05)
|
30 |
+
Step... (15000/437388 | Training Loss: -0.0003092987462878227, Learning Rate: 1.931415499711875e-05)
|
31 |
+
Step... (15500/437388 | Training Loss: -0.03229433298110962, Learning Rate: 1.929129211930558e-05)
|
32 |
+
Step... (16000/437388 | Training Loss: -1.7652382666710764e-06, Learning Rate: 1.9268429241492413e-05)
|
33 |
+
Step... (16500/437388 | Training Loss: 1.8737622667686082e-05, Learning Rate: 1.9245566363679245e-05)
|
34 |
+
Step... (17000/437388 | Training Loss: 9.301738828071393e-06, Learning Rate: 1.9222703485866077e-05)
|
35 |
+
Step... (17500/437388 | Training Loss: 9.757788575370796e-06, Learning Rate: 1.919984060805291e-05)
|
36 |
+
Step... (18000/437388 | Training Loss: 2.881894033635035e-05, Learning Rate: 1.917697773023974e-05)
|
37 |
+
Step... (18500/437388 | Training Loss: 1.7251250028493814e-05, Learning Rate: 1.9154114852426574e-05)
|
38 |
+
Step... (19000/437388 | Training Loss: -3.8056023186072707e-06, Learning Rate: 1.9131251974613406e-05)
|
39 |
+
Step... (19500/437388 | Training Loss: 1.8639262634678744e-05, Learning Rate: 1.9108387277810834e-05)
|
40 |
+
Step... (20000/437388 | Training Loss: -4.346034074842464e-07, Learning Rate: 1.908552621898707e-05)
|
41 |
+
Step... (20500/437388 | Training Loss: 3.273967195127625e-07, Learning Rate: 1.9062663341173902e-05)
|
42 |
+
Step... (21000/437388 | Training Loss: -0.0003723864210769534, Learning Rate: 1.9039800463360734e-05)
|
43 |
+
Step... (21500/437388 | Training Loss: 9.924142432282679e-06, Learning Rate: 1.9016935766558163e-05)
|
44 |
+
Step... (22000/437388 | Training Loss: 9.93290996120777e-06, Learning Rate: 1.8994072888744995e-05)
|
45 |
+
Step... (22500/437388 | Training Loss: -0.0001418531610397622, Learning Rate: 1.8971210010931827e-05)
|
46 |
+
Step... (23000/437388 | Training Loss: 1.0088506314787082e-05, Learning Rate: 1.894834713311866e-05)
|
47 |
+
Step... (23500/437388 | Training Loss: 7.136069143598434e-06, Learning Rate: 1.892548425530549e-05)
|
48 |
+
Step... (24000/437388 | Training Loss: 9.869800123851746e-06, Learning Rate: 1.8902621377492324e-05)
|
49 |
+
Step... (24500/437388 | Training Loss: 8.915569196688011e-06, Learning Rate: 1.8879758499679156e-05)
|
50 |
+
Step... (25000/437388 | Training Loss: 9.661911462899297e-06, Learning Rate: 1.8856895621865988e-05)
|
51 |
+
Step... (25500/437388 | Training Loss: 9.152301572612487e-06, Learning Rate: 1.883403274405282e-05)
|
52 |
+
Step... (26000/437388 | Training Loss: 9.94595575321e-06, Learning Rate: 1.8811169866239652e-05)
|
53 |
+
Step... (26500/437388 | Training Loss: 9.882722224574536e-06, Learning Rate: 1.8788306988426484e-05)
|
54 |
+
Step... (27000/437388 | Training Loss: 9.786906048248056e-06, Learning Rate: 1.8765442291623913e-05)
|
55 |
+
Step... (27500/437388 | Training Loss: 9.950299499905668e-06, Learning Rate: 1.8742579413810745e-05)
|
56 |
+
Step... (28000/437388 | Training Loss: 9.730408237373922e-06, Learning Rate: 1.871971835498698e-05)
|
57 |
+
Step... (28500/437388 | Training Loss: 1.0129735528607853e-05, Learning Rate: 1.8696855477173813e-05)
|
58 |
+
Step... (29000/437388 | Training Loss: 9.86078885034658e-06, Learning Rate: 1.867399078037124e-05)
|
59 |
+
Step... (29500/437388 | Training Loss: 9.964114724425599e-06, Learning Rate: 1.8651127902558073e-05)
|
60 |
+
Step... (30000/437388 | Training Loss: 9.767300070961937e-06, Learning Rate: 1.8628265024744906e-05)
|
61 |
+
Step... (30500/437388 | Training Loss: 9.961664545699023e-06, Learning Rate: 1.8605402146931738e-05)
|
62 |
+
Step... (31000/437388 | Training Loss: 1.0001157534134109e-05, Learning Rate: 1.858253926911857e-05)
|
63 |
+
Step... (31500/437388 | Training Loss: 9.999861504184082e-06, Learning Rate: 1.8559676391305402e-05)
|
64 |
+
Step... (32000/437388 | Training Loss: 9.999739631894045e-06, Learning Rate: 1.8536813513492234e-05)
|
65 |
+
Step... (32500/437388 | Training Loss: 9.999379471992142e-06, Learning Rate: 1.8513950635679066e-05)
|
66 |
+
Step... (33000/437388 | Training Loss: 9.577275704941712e-06, Learning Rate: 1.84910877578659e-05)
|
67 |
+
Step... (33500/437388 | Training Loss: 9.91380602499703e-06, Learning Rate: 1.846822488005273e-05)
|
68 |
+
Step... (34000/437388 | Training Loss: 9.999487701861653e-06, Learning Rate: 1.8445362002239563e-05)
|
69 |
+
Step... (34500/437388 | Training Loss: 9.984047210309654e-06, Learning Rate: 1.8422499124426395e-05)
|
70 |
+
Step... (35000/437388 | Training Loss: 9.99921940092463e-06, Learning Rate: 1.8399634427623823e-05)
|
71 |
+
Step... (35500/437388 | Training Loss: 9.680366929387674e-06, Learning Rate: 1.8376771549810655e-05)
|
72 |
+
Step... (36000/437388 | Training Loss: 9.998320820159279e-06, Learning Rate: 1.835391049098689e-05)
|
73 |
+
Step... (36500/437388 | Training Loss: 9.958874215953983e-06, Learning Rate: 1.8331047613173723e-05)
|
74 |
+
Step... (37000/437388 | Training Loss: 1.0039606422651559e-05, Learning Rate: 1.8308182916371152e-05)
|
75 |
+
Step... (37500/437388 | Training Loss: 9.99998883344233e-06, Learning Rate: 1.8285320038557984e-05)
|
76 |
+
Step... (38000/437388 | Training Loss: 1.0001829650718719e-05, Learning Rate: 1.8262457160744816e-05)
|
77 |
+
Step... (38500/437388 | Training Loss: 9.997856977861375e-06, Learning Rate: 1.8239594282931648e-05)
|
78 |
+
Step... (39000/437388 | Training Loss: 9.95433765638154e-06, Learning Rate: 1.821673140511848e-05)
|
79 |
+
Step... (39500/437388 | Training Loss: 9.999251233239193e-06, Learning Rate: 1.8193868527305312e-05)
|
80 |
+
Step... (40000/437388 | Training Loss: 9.996479093388189e-06, Learning Rate: 1.8171005649492145e-05)
|
81 |
+
Step... (40500/437388 | Training Loss: 9.999072062782943e-06, Learning Rate: 1.8148142771678977e-05)
|
82 |
+
Step... (41000/437388 | Training Loss: 9.999967005569488e-06, Learning Rate: 1.812527989386581e-05)
|
83 |
+
Step... (41500/437388 | Training Loss: 9.775694707059301e-06, Learning Rate: 1.810241701605264e-05)
|
84 |
+
Step... (42000/437388 | Training Loss: 9.975577995646745e-06, Learning Rate: 1.8079554138239473e-05)
|
85 |
+
Step... (42500/437388 | Training Loss: 9.999113899539225e-06, Learning Rate: 1.80566894414369e-05)
|
86 |
+
Step... (43000/437388 | Training Loss: 9.999290341511369e-06, Learning Rate: 1.8033828382613137e-05)
|
87 |
+
Step... (43500/437388 | Training Loss: 9.998851055570412e-06, Learning Rate: 1.801096550479997e-05)
|
88 |
+
Step... (44000/437388 | Training Loss: 9.995937034545932e-06, Learning Rate: 1.79881026269868e-05)
|
89 |
+
Step... (44500/437388 | Training Loss: 9.999046596931294e-06, Learning Rate: 1.796523793018423e-05)
|
90 |
+
Step... (45000/437388 | Training Loss: 1.0032883437816054e-05, Learning Rate: 1.7942375052371062e-05)
|
91 |
+
Step... (45500/437388 | Training Loss: 9.680898074293509e-06, Learning Rate: 1.7919512174557894e-05)
|
92 |
+
Step... (46000/437388 | Training Loss: 9.454144674236886e-06, Learning Rate: 1.7896649296744727e-05)
|
93 |
+
Step... (46500/437388 | Training Loss: 9.958406735677272e-06, Learning Rate: 1.787378641893156e-05)
|
94 |
+
Step... (47000/437388 | Training Loss: 7.78274807089474e-06, Learning Rate: 1.785092354111839e-05)
|
95 |
+
Step... (47500/437388 | Training Loss: 9.662552656664047e-06, Learning Rate: 1.7828060663305223e-05)
|
96 |
+
Step... (48000/437388 | Training Loss: 9.67618416325422e-06, Learning Rate: 1.7805197785492055e-05)
|
97 |
+
Step... (48500/437388 | Training Loss: 9.7446582003613e-06, Learning Rate: 1.7782334907678887e-05)
|
98 |
+
Step... (49000/437388 | Training Loss: 9.835666787694208e-06, Learning Rate: 1.775947202986572e-05)
|
99 |
+
Step... (49500/437388 | Training Loss: 9.96859853330534e-06, Learning Rate: 1.773660915205255e-05)
|
100 |
+
Step... (50000/437388 | Training Loss: 9.844617125054356e-06, Learning Rate: 1.7713746274239384e-05)
|
101 |
+
Step... (50500/437388 | Training Loss: 9.803032298805192e-06, Learning Rate: 1.7690881577436812e-05)
|
102 |
+
Step... (51000/437388 | Training Loss: 9.660185241955332e-06, Learning Rate: 1.7668020518613048e-05)
|
103 |
+
Step... (51500/437388 | Training Loss: 9.900993063638452e-06, Learning Rate: 1.764515764079988e-05)
|
104 |
+
Step... (52000/437388 | Training Loss: 9.604618753655814e-06, Learning Rate: 1.7622294762986712e-05)
|
105 |
+
Step... (52500/437388 | Training Loss: 6.536213732033502e-06, Learning Rate: 1.759943006618414e-05)
|
106 |
+
Step... (53000/437388 | Training Loss: 9.850440619629808e-06, Learning Rate: 1.7576567188370973e-05)
|
107 |
+
Step... (53500/437388 | Training Loss: 9.80033291853033e-06, Learning Rate: 1.7553704310557805e-05)
|
108 |
+
Step... (54000/437388 | Training Loss: 9.600864359526895e-06, Learning Rate: 1.7530841432744637e-05)
|
109 |
+
Step... (54500/437388 | Training Loss: 9.353519999422133e-06, Learning Rate: 1.750797855493147e-05)
|
110 |
+
Step... (55000/437388 | Training Loss: 9.570900147082284e-06, Learning Rate: 1.74851156771183e-05)
|
111 |
+
Step... (55500/437388 | Training Loss: 7.14508587407181e-06, Learning Rate: 1.7462252799305134e-05)
|
112 |
+
Step... (56000/437388 | Training Loss: 7.058968549245037e-06, Learning Rate: 1.7439389921491966e-05)
|
113 |
+
Step... (56500/437388 | Training Loss: -2.7393669370212592e-05, Learning Rate: 1.7416527043678798e-05)
|
114 |
+
Step... (57000/437388 | Training Loss: 7.528081368946005e-06, Learning Rate: 1.739366416586563e-05)
|
115 |
+
Step... (57500/437388 | Training Loss: -2.508041143300943e-05, Learning Rate: 1.7370801288052462e-05)
|
116 |
+
Step... (58000/437388 | Training Loss: -0.0005006093415431678, Learning Rate: 1.734793659124989e-05)
|
117 |
+
Step... (58500/437388 | Training Loss: -8.784182682575192e-06, Learning Rate: 1.7325073713436723e-05)
|
118 |
+
Step... (59000/437388 | Training Loss: -0.05110815167427063, Learning Rate: 1.730221265461296e-05)
|
119 |
+
Step... (59500/437388 | Training Loss: -1.4655773156846408e-05, Learning Rate: 1.727934977679979e-05)
|
120 |
+
Step... (60000/437388 | Training Loss: 9.906179911922663e-06, Learning Rate: 1.725648507999722e-05)
|
121 |
+
Step... (60500/437388 | Training Loss: 9.99689018499339e-06, Learning Rate: 1.723362220218405e-05)
|
122 |
+
Step... (61000/437388 | Training Loss: 9.974900422093924e-06, Learning Rate: 1.7210759324370883e-05)
|
123 |
+
Step... (61500/437388 | Training Loss: -0.006380284670740366, Learning Rate: 1.7187896446557716e-05)
|
124 |
+
Step... (62000/437388 | Training Loss: 9.996319931815378e-06, Learning Rate: 1.7165033568744548e-05)
|
125 |
+
Step... (62500/437388 | Training Loss: 7.178080522862729e-06, Learning Rate: 1.714217069093138e-05)
|
126 |
+
Step... (63000/437388 | Training Loss: 9.890783985611051e-06, Learning Rate: 1.7119307813118212e-05)
|
127 |
+
Step... (63500/437388 | Training Loss: 9.915613190969452e-06, Learning Rate: 1.7096444935305044e-05)
|
128 |
+
Step... (64000/437388 | Training Loss: 9.99755684460979e-06, Learning Rate: 1.7073582057491876e-05)
|
129 |
+
Step... (64500/437388 | Training Loss: 9.984622920455877e-06, Learning Rate: 1.705071917967871e-05)
|
130 |
+
Step... (65000/437388 | Training Loss: 8.908338713808917e-06, Learning Rate: 1.702785630186554e-05)
|
131 |
+
Step... (65500/437388 | Training Loss: 1.002239605440991e-05, Learning Rate: 1.7004993424052373e-05)
|
132 |
+
Step... (66000/437388 | Training Loss: 9.993064850277733e-06, Learning Rate: 1.69821287272498e-05)
|
133 |
+
Step... (66500/437388 | Training Loss: 9.901496014208533e-06, Learning Rate: 1.6959265849436633e-05)
|
134 |
+
Step... (67000/437388 | Training Loss: 9.920498996507376e-06, Learning Rate: 1.693640479061287e-05)
|
135 |
+
Step... (67500/437388 | Training Loss: 8.78564333106624e-06, Learning Rate: 1.69135419127997e-05)
|
136 |
+
Step... (68000/437388 | Training Loss: 1.0045086128229741e-05, Learning Rate: 1.689067721599713e-05)
|
137 |
+
Step... (68500/437388 | Training Loss: 9.996912012866233e-06, Learning Rate: 1.6867814338183962e-05)
|
138 |
+
Step... (69000/437388 | Training Loss: 1.0577703505987301e-05, Learning Rate: 1.6844951460370794e-05)
|
139 |
+
Step... (69500/437388 | Training Loss: 6.859060704300646e-06, Learning Rate: 1.6822088582557626e-05)
|
140 |
+
Step... (70000/437388 | Training Loss: 9.735958883538842e-06, Learning Rate: 1.6799225704744458e-05)
|
141 |
+
Step... (70500/437388 | Training Loss: 3.69073313777335e-06, Learning Rate: 1.677636282693129e-05)
|
142 |
+
Step... (71000/437388 | Training Loss: 1.5452194929821417e-05, Learning Rate: 1.6753499949118122e-05)
|
143 |
+
Step... (71500/437388 | Training Loss: 7.551490853074938e-06, Learning Rate: 1.6730637071304955e-05)
|
144 |
+
Step... (72000/437388 | Training Loss: 9.999574103858322e-06, Learning Rate: 1.6707774193491787e-05)
|
145 |
+
Step... (72500/437388 | Training Loss: 1.0001018381444737e-05, Learning Rate: 1.668491131567862e-05)
|
146 |
+
Step... (73000/437388 | Training Loss: 9.91267279459862e-06, Learning Rate: 1.666204843786545e-05)
|
147 |
+
Step... (73500/437388 | Training Loss: 1.0612275218591094e-05, Learning Rate: 1.663918374106288e-05)
|
148 |
+
Step... (74000/437388 | Training Loss: 9.93158118944848e-06, Learning Rate: 1.6616322682239115e-05)
|
149 |
+
Step... (74500/437388 | Training Loss: 9.999830581364222e-06, Learning Rate: 1.6593457985436544e-05)
|
150 |
+
Step... (75000/437388 | Training Loss: 9.99673648038879e-06, Learning Rate: 1.657059692661278e-05)
|
151 |
+
Step... (75500/437388 | Training Loss: 9.667587619333062e-06, Learning Rate: 1.6547732229810208e-05)
|
152 |
+
Step... (76000/437388 | Training Loss: 6.908721843501553e-06, Learning Rate: 1.652486935199704e-05)
|
153 |
+
Step... (76500/437388 | Training Loss: 9.978556590795051e-06, Learning Rate: 1.6502006474183872e-05)
|
154 |
+
Step... (77000/437388 | Training Loss: 9.992380000767298e-06, Learning Rate: 1.6479143596370704e-05)
|
155 |
+
Step... (77500/437388 | Training Loss: 9.999144822359085e-06, Learning Rate: 1.6456280718557537e-05)
|
156 |
+
Step... (78000/437388 | Training Loss: 9.959425369743258e-06, Learning Rate: 1.643341784074437e-05)
|
157 |
+
Step... (78500/437388 | Training Loss: 4.806013748748228e-06, Learning Rate: 1.64105549629312e-05)
|
158 |
+
Step... (79000/437388 | Training Loss: 9.999519534176216e-06, Learning Rate: 1.6387692085118033e-05)
|
159 |
+
Step... (79500/437388 | Training Loss: -3.0628798413090408e-06, Learning Rate: 1.6364829207304865e-05)
|
160 |
+
Step... (80000/437388 | Training Loss: 3.14687640639022e-05, Learning Rate: 1.6341966329491697e-05)
|
161 |
+
Step... (80500/437388 | Training Loss: -5.9503574448172e-05, Learning Rate: 1.631910345167853e-05)
|
162 |
+
Step... (81000/437388 | Training Loss: 9.830844646785408e-06, Learning Rate: 1.629624057386536e-05)
|
163 |
+
Step... (81500/437388 | Training Loss: -0.0007751630619168282, Learning Rate: 1.627337587706279e-05)
|
164 |
+
Step... (82000/437388 | Training Loss: 7.780356099829078e-06, Learning Rate: 1.6250514818239026e-05)
|
165 |
+
Step... (82500/437388 | Training Loss: 9.727603355713654e-06, Learning Rate: 1.6227650121436454e-05)
|
166 |
+
Step... (83000/437388 | Training Loss: 9.734287232276984e-06, Learning Rate: 1.620478906261269e-05)
|
167 |
+
Step... (83500/437388 | Training Loss: 8.165938197635114e-06, Learning Rate: 1.618192436581012e-05)
|
168 |
+
Step... (84000/437388 | Training Loss: -4.21609138356871e-06, Learning Rate: 1.615906148799695e-05)
|
169 |
+
Step... (84500/437388 | Training Loss: -2.6465124392416328e-05, Learning Rate: 1.6136198610183783e-05)
|
170 |
+
Step... (85000/437388 | Training Loss: 1.0105111869052052e-05, Learning Rate: 1.6113335732370615e-05)
|
171 |
+
Step... (85500/437388 | Training Loss: -0.00010695862147258595, Learning Rate: 1.6090472854557447e-05)
|
172 |
+
Step... (86000/437388 | Training Loss: -0.00014172535156831145, Learning Rate: 1.606760997674428e-05)
|
173 |
+
Step... (86500/437388 | Training Loss: 9.377619790029712e-06, Learning Rate: 1.604474709893111e-05)
|
174 |
+
Step... (87000/437388 | Training Loss: 1.3652050256496295e-05, Learning Rate: 1.6021884221117944e-05)
|
175 |
+
Step... (87500/437388 | Training Loss: 0.00031775905517861247, Learning Rate: 1.5999021343304776e-05)
|
176 |
+
Step... (88000/437388 | Training Loss: 7.545562766608782e-06, Learning Rate: 1.5976158465491608e-05)
|
177 |
+
Step... (88500/437388 | Training Loss: -0.0010930340504273772, Learning Rate: 1.595329558767844e-05)
|
178 |
+
Step... (89000/437388 | Training Loss: 3.0137512112560216e-06, Learning Rate: 1.593043089087587e-05)
|
179 |
+
Step... (89500/437388 | Training Loss: 0.009223783388733864, Learning Rate: 1.59075680130627e-05)
|
180 |
+
Step... (90000/437388 | Training Loss: -0.0023263858165591955, Learning Rate: 1.5884706954238936e-05)
|
181 |
+
Step... (90500/437388 | Training Loss: -0.0009863653685897589, Learning Rate: 1.586184407642577e-05)
|
182 |
+
Step... (91000/437388 | Training Loss: -8.11091304058209e-05, Learning Rate: 1.5838979379623197e-05)
|
183 |
+
Step... (91500/437388 | Training Loss: -0.004317307844758034, Learning Rate: 1.581611650181003e-05)
|
184 |
+
Step... (92000/437388 | Training Loss: -0.0005784975364804268, Learning Rate: 1.579325362399686e-05)
|
185 |
+
Step... (92500/437388 | Training Loss: 9.436316759092733e-06, Learning Rate: 1.5770390746183693e-05)
|
186 |
+
Step... (93000/437388 | Training Loss: -0.00013581309758592397, Learning Rate: 1.5747527868370526e-05)
|
187 |
+
Step... (93500/437388 | Training Loss: 8.767711733526085e-06, Learning Rate: 1.5724664990557358e-05)
|
188 |
+
Step... (94000/437388 | Training Loss: -0.001196634373627603, Learning Rate: 1.570180211274419e-05)
|
189 |
+
Step... (94500/437388 | Training Loss: -4.4073633034713566e-05, Learning Rate: 1.5678939234931022e-05)
|
190 |
+
Step... (95000/437388 | Training Loss: -0.0004681225400418043, Learning Rate: 1.5656076357117854e-05)
|
191 |
+
Step... (95500/437388 | Training Loss: -0.03802439197897911, Learning Rate: 1.5633213479304686e-05)
|
192 |
+
Step... (96000/437388 | Training Loss: 72.98323059082031, Learning Rate: 1.561035060149152e-05)
|
193 |
+
Step... (96500/437388 | Training Loss: -7.45706565794535e-05, Learning Rate: 1.558748772367835e-05)
|
194 |
+
Step... (97000/437388 | Training Loss: -1.4632112652179785e-05, Learning Rate: 1.556462302687578e-05)
|
195 |
+
Step... (97500/437388 | Training Loss: -0.0013888446846976876, Learning Rate: 1.554176014906261e-05)
|
196 |
+
Step... (98000/437388 | Training Loss: -0.009409919381141663, Learning Rate: 1.5518899090238847e-05)
|
197 |
+
Step... (98500/437388 | Training Loss: -0.3626460134983063, Learning Rate: 1.549603621242568e-05)
|
198 |
+
Step... (99000/437388 | Training Loss: 9.256023986381479e-06, Learning Rate: 1.5473171515623108e-05)
|
199 |
+
Step... (99500/437388 | Training Loss: 1.0022363312600646e-05, Learning Rate: 1.545030863780994e-05)
|
200 |
+
Step... (100000/437388 | Training Loss: -0.006803448777645826, Learning Rate: 1.5427445759996772e-05)
|
201 |
+
Step... (100500/437388 | Training Loss: -0.0002515804662834853, Learning Rate: 1.5404582882183604e-05)
|
202 |
+
Step... (101000/437388 | Training Loss: -0.0071137938648462296, Learning Rate: 1.5381720004370436e-05)
|
203 |
+
Step... (101500/437388 | Training Loss: -0.0006095135468058288, Learning Rate: 1.5358857126557268e-05)
|
204 |
+
Step... (102000/437388 | Training Loss: 0.0003835784154944122, Learning Rate: 1.53359942487441e-05)
|
205 |
+
Step... (102500/437388 | Training Loss: 7.004499821050558e-06, Learning Rate: 1.5313131370930932e-05)
|
206 |
+
Step... (103000/437388 | Training Loss: 6.723953447362874e-07, Learning Rate: 1.5290268493117765e-05)
|
207 |
+
Step... (103500/437388 | Training Loss: -9.461044101044536e-06, Learning Rate: 1.5267405615304597e-05)
|
208 |
+
Step... (104000/437388 | Training Loss: -3.5358179957256652e-06, Learning Rate: 1.5244541827996727e-05)
|
209 |
+
Step... (104500/437388 | Training Loss: -95.99297332763672, Learning Rate: 1.522167895018356e-05)
|
210 |
+
Step... (105000/437388 | Training Loss: -1.1236647878831718e-05, Learning Rate: 1.5198816981865093e-05)
|
211 |
+
Step... (105500/437388 | Training Loss: 8.958060789154842e-06, Learning Rate: 1.5175952285062522e-05)
|
212 |
+
Step... (106000/437388 | Training Loss: 9.924059668264817e-06, Learning Rate: 1.5153090316744056e-05)
|
213 |
+
Step... (106500/437388 | Training Loss: 5.033837169321487e-06, Learning Rate: 1.5130227438930888e-05)
|
214 |
+
Step... (107000/437388 | Training Loss: 9.999862413678784e-06, Learning Rate: 1.510736456111772e-05)
|
215 |
+
Step... (107500/437388 | Training Loss: 9.653651432017796e-06, Learning Rate: 1.508450077380985e-05)
|
216 |
+
Step... (108000/437388 | Training Loss: 9.98562973109074e-06, Learning Rate: 1.5061637895996682e-05)
|
217 |
+
Step... (108500/437388 | Training Loss: 9.999821486417204e-06, Learning Rate: 1.5038775018183514e-05)
|
218 |
+
Step... (109000/437388 | Training Loss: 7.144420123950113e-06, Learning Rate: 1.5015911230875645e-05)
|
219 |
+
Step... (109500/437388 | Training Loss: 9.997345841838978e-06, Learning Rate: 1.499305017205188e-05)
|
220 |
+
Step... (110000/437388 | Training Loss: 9.997858796850778e-06, Learning Rate: 1.4970185475249309e-05)
|
221 |
+
Step... (110500/437388 | Training Loss: 9.991859769797884e-06, Learning Rate: 1.4947323506930843e-05)
|
222 |
+
Step... (111000/437388 | Training Loss: -2.8533329896163195e-05, Learning Rate: 1.4924460629117675e-05)
|
223 |
+
Step... (111500/437388 | Training Loss: -8.355713362107053e-06, Learning Rate: 1.4901596841809805e-05)
|
224 |
+
Step... (112000/437388 | Training Loss: 8.365332178073004e-06, Learning Rate: 1.4878733963996638e-05)
|
225 |
+
Step... (112500/437388 | Training Loss: 9.927052815328352e-06, Learning Rate: 1.485587108618347e-05)
|
226 |
+
Step... (113000/437388 | Training Loss: -3.364314034115523e-05, Learning Rate: 1.4833009117865004e-05)
|
227 |
+
Step... (113500/437388 | Training Loss: -20.275516510009766, Learning Rate: 1.4810144421062432e-05)
|
228 |
+
Step... (114000/437388 | Training Loss: 9.999565008911304e-06, Learning Rate: 1.4787282452743966e-05)
|
229 |
+
Step... (114500/437388 | Training Loss: 9.948298611561768e-06, Learning Rate: 1.4764419574930798e-05)
|
230 |
+
Step... (115000/437388 | Training Loss: 9.998351742979139e-06, Learning Rate: 1.474155669711763e-05)
|
231 |
+
Step... (115500/437388 | Training Loss: -0.4263361990451813, Learning Rate: 1.471869290980976e-05)
|
232 |
+
Step... (116000/437388 | Training Loss: 9.999733265431132e-06, Learning Rate: 1.4695830031996593e-05)
|
233 |
+
Step... (116500/437388 | Training Loss: 9.999770554713905e-06, Learning Rate: 1.4672968063678127e-05)
|
234 |
+
Step... (117000/437388 | Training Loss: 1.01969098977861e-05, Learning Rate: 1.4650103366875555e-05)
|
235 |
+
Step... (117500/437388 | Training Loss: 9.999991561926436e-06, Learning Rate: 1.462724139855709e-05)
|
236 |
+
Step... (118000/437388 | Training Loss: 1.2863829397247173e-05, Learning Rate: 1.4604378520743921e-05)
|
237 |
+
Step... (118500/437388 | Training Loss: 9.999992471421137e-06, Learning Rate: 1.4581515642930754e-05)
|
238 |
+
Step... (119000/437388 | Training Loss: 9.999997928389348e-06, Learning Rate: 1.4558651855622884e-05)
|
239 |
+
Step... (119500/437388 | Training Loss: 9.99999883788405e-06, Learning Rate: 1.4535788977809716e-05)
|
240 |
+
Step... (120000/437388 | Training Loss: 9.99999883788405e-06, Learning Rate: 1.451292700949125e-05)
|
241 |
+
Step... (120500/437388 | Training Loss: 1.0000000656873453e-05, Learning Rate: 1.449006322218338e-05)
|
242 |
+
Step... (121000/437388 | Training Loss: 9.999996109399945e-06, Learning Rate: 1.4467201253864914e-05)
|
243 |
+
Step... (121500/437388 | Training Loss: 9.99999883788405e-06, Learning Rate: 1.4444336557062343e-05)
|
244 |
+
Step... (122000/437388 | Training Loss: 9.99996336759068e-06, Learning Rate: 1.4421474588743877e-05)
|
245 |
+
Step... (122500/437388 | Training Loss: 0.21177683770656586, Learning Rate: 1.4398611710930709e-05)
|
246 |
+
Step... (123000/437388 | Training Loss: -96.042724609375, Learning Rate: 1.4375747923622839e-05)
|
247 |
+
Step... (123500/437388 | Training Loss: -1.1273064046690706e-05, Learning Rate: 1.4352885045809671e-05)
|
248 |
+
Step... (124000/437388 | Training Loss: -0.0012545139761641622, Learning Rate: 1.4330022167996503e-05)
|
249 |
+
Step... (124500/437388 | Training Loss: -2.347531795501709, Learning Rate: 1.4307160199678037e-05)
|
250 |
+
Step... (125000/437388 | Training Loss: 9.999992471421137e-06, Learning Rate: 1.4284295502875466e-05)
|
251 |
+
Step... (125500/437388 | Training Loss: 9.999503163271584e-06, Learning Rate: 1.4261433534557e-05)
|
252 |
+
Step... (126000/437388 | Training Loss: 1.1671419997583143e-05, Learning Rate: 1.4238570656743832e-05)
|
253 |
+
Step... (126500/437388 | Training Loss: 9.99999883788405e-06, Learning Rate: 1.4215707778930664e-05)
|
254 |
+
Step... (127000/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.4192843991622794e-05)
|
255 |
+
Step... (127500/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.4169981113809627e-05)
|
256 |
+
Step... (128000/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.414711914549116e-05)
|
257 |
+
Step... (128500/437388 | Training Loss: -95.99994659423828, Learning Rate: 1.4124254448688589e-05)
|
258 |
+
Step... (129000/437388 | Training Loss: -0.0011440961388871074, Learning Rate: 1.4101392480370123e-05)
|
259 |
+
Step... (129500/437388 | Training Loss: 9.9998705991311e-06, Learning Rate: 1.4078528693062253e-05)
|
260 |
+
Step... (130000/437388 | Training Loss: 9.925997801474296e-06, Learning Rate: 1.4055666724743787e-05)
|
261 |
+
Step... (130500/437388 | Training Loss: -0.0002359842910664156, Learning Rate: 1.403280384693062e-05)
|
262 |
+
Step... (131000/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.400994005962275e-05)
|
263 |
+
Step... (131500/437388 | Training Loss: 9.999987923947629e-06, Learning Rate: 1.3987077181809582e-05)
|
264 |
+
Step... (132000/437388 | Training Loss: 9.998908353736624e-06, Learning Rate: 1.3964214303996414e-05)
|
265 |
+
Step... (132500/437388 | Training Loss: 9.866323125606868e-06, Learning Rate: 1.3941352335677948e-05)
|
266 |
+
Step... (133000/437388 | Training Loss: 9.999997928389348e-06, Learning Rate: 1.3918487638875376e-05)
|
267 |
+
Step... (133500/437388 | Training Loss: 9.99999883788405e-06, Learning Rate: 1.389562567055691e-05)
|
268 |
+
Step... (134000/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.3872762792743742e-05)
|
269 |
+
Step... (134500/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.3849899005435873e-05)
|
270 |
+
Step... (135000/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.3827036127622705e-05)
|
271 |
+
Step... (135500/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.3804173249809537e-05)
|
272 |
+
Step... (136000/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.3781311281491071e-05)
|
273 |
+
Step... (136500/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.37584465846885e-05)
|
274 |
+
Step... (137000/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.3735584616370033e-05)
|
275 |
+
Step... (137500/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.3712721738556866e-05)
|
276 |
+
Step... (138000/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.3689858860743698e-05)
|
277 |
+
Step... (138500/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.3666995073435828e-05)
|
278 |
+
Step... (139000/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.364413219562266e-05)
|
279 |
+
Step... (139500/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.3621270227304194e-05)
|
280 |
+
Step... (140000/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.3598405530501623e-05)
|
281 |
+
Step... (140500/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.3575543562183157e-05)
|
282 |
+
Step... (141000/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.3552679774875287e-05)
|
283 |
+
Step... (141500/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.3529817806556821e-05)
|
284 |
+
Step... (142000/437388 | Training Loss: 1.0000000656873453e-05, Learning Rate: 1.3506954928743653e-05)
|
285 |
+
Step... (142500/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.3484091141435783e-05)
|
286 |
+
Step... (143000/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.3461228263622615e-05)
|
preprocess.sh
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
python3 process_bignews.py ~/data/bignews/bignews_$SPLIT.json /dev/shm/news_$SPLIT
|
2 |
+
# python3 process_bignews.py ~/data/bignews/bignews_$SPLIT.json ~/data/bignews/processed_lite/bignews_$SPLIT --ratio 0.05
|
process_bignews.py
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List, Dict, Tuple
|
2 |
+
import ujson as json
|
3 |
+
import argparse
|
4 |
+
from collections import defaultdict
|
5 |
+
|
6 |
+
from tqdm.auto import tqdm
|
7 |
+
|
8 |
+
Entry = Dict[str, str]
|
9 |
+
BigNews = List[List[Entry]]
|
10 |
+
|
11 |
+
parser = argparse.ArgumentParser()
|
12 |
+
parser.add_argument("big_news_path")
|
13 |
+
parser.add_argument("output_prefix")
|
14 |
+
parser.add_argument("--ratio", type=float, default=1.0, required=False)
|
15 |
+
args = parser.parse_args()
|
16 |
+
|
17 |
+
big_news_path: str = args.big_news_path
|
18 |
+
output_prefix: str = args.output_prefix
|
19 |
+
ratio: float = args.ratio
|
20 |
+
|
21 |
+
print("Loading input file.")
|
22 |
+
with open(big_news_path, "r") as big_news_file:
|
23 |
+
big_news: BigNews = json.load(big_news_file)
|
24 |
+
|
25 |
+
|
26 |
+
side_map_transposed = {
|
27 |
+
"L": ["dailykos", "hpo", "cnn", "wpo", "nyt"],
|
28 |
+
"R": ["wat", "fox", "breitbart"],
|
29 |
+
}
|
30 |
+
|
31 |
+
side_map = {}
|
32 |
+
|
33 |
+
for key, values in side_map_transposed.items():
|
34 |
+
for value in values:
|
35 |
+
side_map[value] = key
|
36 |
+
|
37 |
+
|
38 |
+
def get_entry_side(entry: Entry) -> str:
|
39 |
+
outlet = entry["source"]
|
40 |
+
side = side_map.get(outlet)
|
41 |
+
return side
|
42 |
+
|
43 |
+
|
44 |
+
big_news_flattened: List[str] = list()
|
45 |
+
|
46 |
+
# (event, side)
|
47 |
+
big_news_event_side_lookup_keys: List[List[int]] = list()
|
48 |
+
|
49 |
+
entry_index = 0
|
50 |
+
|
51 |
+
event_lookup_info: List[Tuple[int, int]] = []
|
52 |
+
if ratio < 1.0:
|
53 |
+
num_events = int(len(big_news) * ratio)
|
54 |
+
else:
|
55 |
+
num_events = len(big_news)
|
56 |
+
|
57 |
+
for event in tqdm(big_news[:num_events], desc="Flattening"):
|
58 |
+
event: List[Entry]
|
59 |
+
index_by_sides = defaultdict(list)
|
60 |
+
sides = set()
|
61 |
+
|
62 |
+
for entry in event:
|
63 |
+
entry_text = entry["text"]
|
64 |
+
entry_text = " ".join(entry_text) + "\n"
|
65 |
+
big_news_flattened.append("")
|
66 |
+
big_news_flattened[entry_index] = entry_text
|
67 |
+
|
68 |
+
entry_side = get_entry_side(entry)
|
69 |
+
|
70 |
+
if entry_side:
|
71 |
+
index_by_sides[entry_side].append(entry_index)
|
72 |
+
sides.add(entry_side)
|
73 |
+
|
74 |
+
entry_index += 1
|
75 |
+
|
76 |
+
sides = list(sides)
|
77 |
+
|
78 |
+
num_sides = len(index_by_sides.keys())
|
79 |
+
|
80 |
+
for side_a_index in range(num_sides):
|
81 |
+
for side_b_index in range(side_a_index + 1, num_sides):
|
82 |
+
side_a = sides[side_a_index]
|
83 |
+
side_b = sides[side_b_index]
|
84 |
+
|
85 |
+
for side_a_entry_index in index_by_sides[side_a]:
|
86 |
+
for side_b_entry_index in index_by_sides[side_b]:
|
87 |
+
# Maximize distance over negative (non-matching) examples
|
88 |
+
event_lookup_info.append(
|
89 |
+
(-1, side_a_entry_index, side_b_entry_index)
|
90 |
+
)
|
91 |
+
|
92 |
+
side_a_indices = index_by_sides[side_a]
|
93 |
+
num_side_a_indices = len(side_a_indices)
|
94 |
+
for x_index in range(num_side_a_indices):
|
95 |
+
# Minimize distance over positive (matching) examples
|
96 |
+
for y_index in range(x_index + 1, num_side_a_indices):
|
97 |
+
event_lookup_info.append(
|
98 |
+
(1, side_a_indices[x_index], side_a_indices[y_index])
|
99 |
+
)
|
100 |
+
|
101 |
+
del big_news
|
102 |
+
|
103 |
+
big_news_flattened_path = output_prefix + "_text.txt"
|
104 |
+
lookup_json_path = output_prefix + "_lookup.json"
|
105 |
+
|
106 |
+
print("Writing flattened text.")
|
107 |
+
with open(big_news_flattened_path, "w") as big_news_flattened_file:
|
108 |
+
big_news_flattened_file.writelines(big_news_flattened)
|
109 |
+
|
110 |
+
print("Writing lookup json.")
|
111 |
+
with open(lookup_json_path, "w") as lookup_json_file:
|
112 |
+
json.dump(event_lookup_info, lookup_json_file, indent=2)
|