change
Browse files- preprocess.py +5 -1
preprocess.py
CHANGED
@@ -42,6 +42,7 @@ class DataProcessor(object):
|
|
42 |
if line:
|
43 |
labels_map[line] = index
|
44 |
|
|
|
45 |
with open(path, mode="r", encoding="UTF-8") as f:
|
46 |
for line in tqdm(f):
|
47 |
line = line.strip()
|
@@ -52,13 +53,16 @@ class DataProcessor(object):
|
|
52 |
if mapped_label is not None and isinstance(mapped_label, int) and mapped_label >= 0:
|
53 |
contents.append(content)
|
54 |
labels.append(mapped_label)
|
55 |
-
|
|
|
|
|
56 |
#random shuffle
|
57 |
index = list(range(len(labels)))
|
58 |
random.seed(self.seed)
|
59 |
random.shuffle(index)
|
60 |
contents = [contents[_] for _ in index]
|
61 |
labels = [labels[_] for _ in index]
|
|
|
62 |
return (contents, labels)
|
63 |
|
64 |
def __next__(self):
|
|
|
42 |
if line:
|
43 |
labels_map[line] = index
|
44 |
|
45 |
+
print("labels_map [{}]".format(labels_map))
|
46 |
with open(path, mode="r", encoding="UTF-8") as f:
|
47 |
for line in tqdm(f):
|
48 |
line = line.strip()
|
|
|
53 |
if mapped_label is not None and isinstance(mapped_label, int) and mapped_label >= 0:
|
54 |
contents.append(content)
|
55 |
labels.append(mapped_label)
|
56 |
+
else:
|
57 |
+
print("not match label [{}:{}]".format(content,mapped_label))
|
58 |
+
|
59 |
#random shuffle
|
60 |
index = list(range(len(labels)))
|
61 |
random.seed(self.seed)
|
62 |
random.shuffle(index)
|
63 |
contents = [contents[_] for _ in index]
|
64 |
labels = [labels[_] for _ in index]
|
65 |
+
print("load datas contents label [{}:{}]".format(len(contents),len(labels)))
|
66 |
return (contents, labels)
|
67 |
|
68 |
def __next__(self):
|