Spaces:
Running
Running
Current output if translation predicts new line
Browse files- dataset.py +3 -3
- translate.py +4 -1
dataset.py
CHANGED
@@ -17,7 +17,7 @@ class DatasetReader(IterableDataset):
|
|
17 |
|
18 |
def preprocess(self, text: str):
|
19 |
self.current_line += 1
|
20 |
-
text = text.
|
21 |
if len(text) == 0:
|
22 |
print(f"Warning: empty sentence at line {self.current_line}")
|
23 |
return self.tokenizer(
|
@@ -52,8 +52,8 @@ class ParallelTextReader(IterableDataset):
|
|
52 |
|
53 |
def preprocess(self, pred: str, gold: str):
|
54 |
self.current_line += 1
|
55 |
-
pred = pred.
|
56 |
-
gold = gold.
|
57 |
if len(pred) == 0:
|
58 |
print(f"Warning: Pred empty sentence at line {self.current_line}")
|
59 |
if len(gold) == 0:
|
|
|
17 |
|
18 |
def preprocess(self, text: str):
|
19 |
self.current_line += 1
|
20 |
+
text = text.strip()
|
21 |
if len(text) == 0:
|
22 |
print(f"Warning: empty sentence at line {self.current_line}")
|
23 |
return self.tokenizer(
|
|
|
52 |
|
53 |
def preprocess(self, pred: str, gold: str):
|
54 |
self.current_line += 1
|
55 |
+
pred = pred.strip()
|
56 |
+
gold = gold.strip()
|
57 |
if len(pred) == 0:
|
58 |
print(f"Warning: Pred empty sentence at line {self.current_line}")
|
59 |
if len(gold) == 0:
|
translate.py
CHANGED
@@ -209,7 +209,10 @@ def main(
|
|
209 |
else:
|
210 |
samples_seen += len(tgt_text)
|
211 |
|
212 |
-
print(
|
|
|
|
|
|
|
213 |
|
214 |
pbar.update(len(tgt_text) // gen_kwargs["num_return_sequences"])
|
215 |
|
|
|
209 |
else:
|
210 |
samples_seen += len(tgt_text)
|
211 |
|
212 |
+
print(
|
213 |
+
"\n".join([repr(sentence) for sentence in tgt_text]),
|
214 |
+
file=output_file,
|
215 |
+
)
|
216 |
|
217 |
pbar.update(len(tgt_text) // gen_kwargs["num_return_sequences"])
|
218 |
|