Iker commited on
Commit
0f4f627
·
1 Parent(s): 74502a8

Current output if translation predicts new line

Browse files
Files changed (2) hide show
  1. dataset.py +3 -3
  2. translate.py +4 -1
dataset.py CHANGED
@@ -17,7 +17,7 @@ class DatasetReader(IterableDataset):
17
 
18
  def preprocess(self, text: str):
19
  self.current_line += 1
20
- text = text.rstrip().strip()
21
  if len(text) == 0:
22
  print(f"Warning: empty sentence at line {self.current_line}")
23
  return self.tokenizer(
@@ -52,8 +52,8 @@ class ParallelTextReader(IterableDataset):
52
 
53
  def preprocess(self, pred: str, gold: str):
54
  self.current_line += 1
55
- pred = pred.rstrip().strip()
56
- gold = gold.rstrip().strip()
57
  if len(pred) == 0:
58
  print(f"Warning: Pred empty sentence at line {self.current_line}")
59
  if len(gold) == 0:
 
17
 
18
  def preprocess(self, text: str):
19
  self.current_line += 1
20
+ text = text.strip()
21
  if len(text) == 0:
22
  print(f"Warning: empty sentence at line {self.current_line}")
23
  return self.tokenizer(
 
52
 
53
  def preprocess(self, pred: str, gold: str):
54
  self.current_line += 1
55
+ pred = pred.strip()
56
+ gold = gold.strip()
57
  if len(pred) == 0:
58
  print(f"Warning: Pred empty sentence at line {self.current_line}")
59
  if len(gold) == 0:
translate.py CHANGED
@@ -209,7 +209,10 @@ def main(
209
  else:
210
  samples_seen += len(tgt_text)
211
 
212
- print("\n".join(tgt_text), file=output_file)
 
 
 
213
 
214
  pbar.update(len(tgt_text) // gen_kwargs["num_return_sequences"])
215
 
 
209
  else:
210
  samples_seen += len(tgt_text)
211
 
212
+ print(
213
+ "\n".join([repr(sentence) for sentence in tgt_text]),
214
+ file=output_file,
215
+ )
216
 
217
  pbar.update(len(tgt_text) // gen_kwargs["num_return_sequences"])
218