Spaces:

jesseplusplus
/

easy-translate

Running

Iker commited on May 29, 2022

Commit

57ffa19

•

1 Parent(s): 95c4d87

Fix bug

Files changed (2) hide show

dataset.py CHANGED Viewed

@@ -12,6 +12,8 @@ class DatasetReader(IterableDataset):
         self.tokenizer = tokenizer
         self.max_length = max_length
         self.current_line = 0
     def preprocess(self, text: str):
         self.current_line += 1
@@ -31,6 +33,9 @@ class DatasetReader(IterableDataset):
         mapped_itr = map(self.preprocess, file_itr)
         return mapped_itr
 class ParallelTextReader(IterableDataset):
     def __init__(self, pred_path: str, gold_path: str):

         self.tokenizer = tokenizer
         self.max_length = max_length
         self.current_line = 0
+        self.total_lines = count_lines(filename)
+        print(f"{self.total_lines} lines in {filename}")
     def preprocess(self, text: str):
         self.current_line += 1
         mapped_itr = map(self.preprocess, file_itr)
         return mapped_itr
+    def __len__(self):
+        return self.total_lines
 class ParallelTextReader(IterableDataset):
     def __init__(self, pred_path: str, gold_path: str):

translate.py CHANGED Viewed

@@ -99,11 +99,11 @@ def main(
         "num_return_sequences": 1,
     }
-    total_lines: int = count_lines(sentences_path)
     if accelerator.is_main_process:
         print(
-            f"** We will translate {total_lines} lines. **\n"
             f"Input file: {sentences_path}\n"
             f"Output file: {output_path}\n"
             f"Source language: {source_lang}\n"

         "num_return_sequences": 1,
     }
+    # total_lines: int = count_lines(sentences_path)
     if accelerator.is_main_process:
         print(
+            f"** Translation **\n"
             f"Input file: {sentences_path}\n"
             f"Output file: {output_path}\n"
             f"Source language: {source_lang}\n"