Iker commited on
Commit
cb568f0
1 Parent(s): ac284f7

Fix count lines

Browse files
Files changed (1) hide show
  1. dataset.py +1 -9
dataset.py CHANGED
@@ -1,17 +1,9 @@
1
  from torch.utils.data import IterableDataset
2
 
3
 
4
- def blocks(files, size=65536):
5
- while True:
6
- b = files.read(size)
7
- if not b:
8
- break
9
- yield b
10
-
11
-
12
  def count_lines(input_path: str) -> int:
13
  with open(input_path, "r", encoding="utf8") as f:
14
- return sum(bl.count("\n") for bl in blocks(f))
15
 
16
 
17
  class DatasetReader(IterableDataset):
 
1
  from torch.utils.data import IterableDataset
2
 
3
 
 
 
 
 
 
 
 
 
4
  def count_lines(input_path: str) -> int:
5
  with open(input_path, "r", encoding="utf8") as f:
6
+ return sum(1 for _ in f)
7
 
8
 
9
  class DatasetReader(IterableDataset):