Spaces:
Running
Running
Fix bug
Browse files- dataset.py +5 -0
- translate.py +2 -2
dataset.py
CHANGED
@@ -12,6 +12,8 @@ class DatasetReader(IterableDataset):
|
|
12 |
self.tokenizer = tokenizer
|
13 |
self.max_length = max_length
|
14 |
self.current_line = 0
|
|
|
|
|
15 |
|
16 |
def preprocess(self, text: str):
|
17 |
self.current_line += 1
|
@@ -31,6 +33,9 @@ class DatasetReader(IterableDataset):
|
|
31 |
mapped_itr = map(self.preprocess, file_itr)
|
32 |
return mapped_itr
|
33 |
|
|
|
|
|
|
|
34 |
|
35 |
class ParallelTextReader(IterableDataset):
|
36 |
def __init__(self, pred_path: str, gold_path: str):
|
|
|
12 |
self.tokenizer = tokenizer
|
13 |
self.max_length = max_length
|
14 |
self.current_line = 0
|
15 |
+
self.total_lines = count_lines(filename)
|
16 |
+
print(f"{self.total_lines} lines in {filename}")
|
17 |
|
18 |
def preprocess(self, text: str):
|
19 |
self.current_line += 1
|
|
|
33 |
mapped_itr = map(self.preprocess, file_itr)
|
34 |
return mapped_itr
|
35 |
|
36 |
+
def __len__(self):
|
37 |
+
return self.total_lines
|
38 |
+
|
39 |
|
40 |
class ParallelTextReader(IterableDataset):
|
41 |
def __init__(self, pred_path: str, gold_path: str):
|
translate.py
CHANGED
@@ -99,11 +99,11 @@ def main(
|
|
99 |
"num_return_sequences": 1,
|
100 |
}
|
101 |
|
102 |
-
total_lines: int = count_lines(sentences_path)
|
103 |
|
104 |
if accelerator.is_main_process:
|
105 |
print(
|
106 |
-
f"**
|
107 |
f"Input file: {sentences_path}\n"
|
108 |
f"Output file: {output_path}\n"
|
109 |
f"Source language: {source_lang}\n"
|
|
|
99 |
"num_return_sequences": 1,
|
100 |
}
|
101 |
|
102 |
+
# total_lines: int = count_lines(sentences_path)
|
103 |
|
104 |
if accelerator.is_main_process:
|
105 |
print(
|
106 |
+
f"** Translation **\n"
|
107 |
f"Input file: {sentences_path}\n"
|
108 |
f"Output file: {output_path}\n"
|
109 |
f"Source language: {source_lang}\n"
|