saily commited on
Commit
d51e5f3
·
1 Parent(s): 234aef7
Files changed (1) hide show
  1. preprocess.py +5 -1
preprocess.py CHANGED
@@ -42,6 +42,7 @@ class DataProcessor(object):
42
  if line:
43
  labels_map[line] = index
44
 
 
45
  with open(path, mode="r", encoding="UTF-8") as f:
46
  for line in tqdm(f):
47
  line = line.strip()
@@ -52,13 +53,16 @@ class DataProcessor(object):
52
  if mapped_label is not None and isinstance(mapped_label, int) and mapped_label >= 0:
53
  contents.append(content)
54
  labels.append(mapped_label)
55
-
 
 
56
  #random shuffle
57
  index = list(range(len(labels)))
58
  random.seed(self.seed)
59
  random.shuffle(index)
60
  contents = [contents[_] for _ in index]
61
  labels = [labels[_] for _ in index]
 
62
  return (contents, labels)
63
 
64
  def __next__(self):
 
42
  if line:
43
  labels_map[line] = index
44
 
45
+ print("labels_map [{}]".format(labels_map))
46
  with open(path, mode="r", encoding="UTF-8") as f:
47
  for line in tqdm(f):
48
  line = line.strip()
 
53
  if mapped_label is not None and isinstance(mapped_label, int) and mapped_label >= 0:
54
  contents.append(content)
55
  labels.append(mapped_label)
56
+ else:
57
+ print("not match label [{}:{}]".format(content,mapped_label))
58
+
59
  #random shuffle
60
  index = list(range(len(labels)))
61
  random.seed(self.seed)
62
  random.shuffle(index)
63
  contents = [contents[_] for _ in index]
64
  labels = [labels[_] for _ in index]
65
+ print("load datas contents label [{}:{}]".format(len(contents),len(labels)))
66
  return (contents, labels)
67
 
68
  def __next__(self):