KoichiYasuoka
commited on
Commit
•
92f0b14
1
Parent(s):
f5c22ed
model improved
Browse files- maker.py +16 -0
- pytorch_model.bin +1 -1
maker.py
CHANGED
@@ -21,6 +21,7 @@ class UDgoeswithDataset(object):
|
|
21 |
if len(t)==10 and t[0].isdecimal():
|
22 |
c.append(t)
|
23 |
elif c!=[]:
|
|
|
24 |
v=tokenizer([t[1].replace(" ","_") for t in c],add_special_tokens=False)["input_ids"]
|
25 |
for i in range(len(v)-1,-1,-1):
|
26 |
for j in range(1,len(v[i])):
|
@@ -28,6 +29,21 @@ class UDgoeswithDataset(object):
|
|
28 |
y=["0"]+[t[0] for t in c]
|
29 |
h=[i if t[6]=="0" else y.index(t[6]) for i,t in enumerate(c,1)]
|
30 |
p,v=[t[3]+"|"+t[5]+"|"+t[7] for t in c],sum(v,[])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
if len(v)<tokenizer.model_max_length-3:
|
32 |
self.ids.append([cls]+v+[sep])
|
33 |
self.tags.append([dep]+p+[dep])
|
|
|
21 |
if len(t)==10 and t[0].isdecimal():
|
22 |
c.append(t)
|
23 |
elif c!=[]:
|
24 |
+
d=list(c)
|
25 |
v=tokenizer([t[1].replace(" ","_") for t in c],add_special_tokens=False)["input_ids"]
|
26 |
for i in range(len(v)-1,-1,-1):
|
27 |
for j in range(1,len(v[i])):
|
|
|
29 |
y=["0"]+[t[0] for t in c]
|
30 |
h=[i if t[6]=="0" else y.index(t[6]) for i,t in enumerate(c,1)]
|
31 |
p,v=[t[3]+"|"+t[5]+"|"+t[7] for t in c],sum(v,[])
|
32 |
+
if len(v)<tokenizer.model_max_length-3:
|
33 |
+
self.ids.append([cls]+v+[sep])
|
34 |
+
self.tags.append([dep]+p+[dep])
|
35 |
+
label=set(sum([self.tags[-1],list(label)],[]))
|
36 |
+
for i,k in enumerate(v):
|
37 |
+
self.ids.append([cls]+v[0:i]+[msk]+v[i+1:]+[sep,k])
|
38 |
+
self.tags.append([dep]+[t if h[j]==i+1 else dep for j,t in enumerate(p)]+[dep,dep])
|
39 |
+
c=d
|
40 |
+
v=tokenizer([t[1].replace("_"," ") for t in c],add_special_tokens=False)["input_ids"]
|
41 |
+
for i in range(len(v)-1,-1,-1):
|
42 |
+
for j in range(1,len(v[i])):
|
43 |
+
c.insert(i+1,[c[i][0],"_","_","X","_","_",c[i][0],"goeswith","_","_"])
|
44 |
+
y=["0"]+[t[0] for t in c]
|
45 |
+
h=[i if t[6]=="0" else y.index(t[6]) for i,t in enumerate(c,1)]
|
46 |
+
p,v=[t[3]+"|"+t[5]+"|"+t[7] for t in c],sum(v,[])
|
47 |
if len(v)<tokenizer.model_max_length-3:
|
48 |
self.ids.append([cls]+v+[sep])
|
49 |
self.tags.append([dep]+p+[dep])
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 538828593
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4a53188b23df9f7933ce159ec14e1da5057afde959f352f3e17d90faf444024d
|
3 |
size 538828593
|