Joshua Lochner commited on
Commit
eaa79a8
1 Parent(s): d34e3fe

Use logger instead of printing when loading datasets

Browse files
Files changed (1) hide show
  1. src/shared.py +12 -13
src/shared.py CHANGED
@@ -1,5 +1,4 @@
1
  from transformers.trainer_utils import get_last_checkpoint as glc
2
- from transformers import Seq2SeqTrainingArguments, TrainingArguments
3
  import os
4
  from utils import re_findall
5
  import logging
@@ -15,6 +14,17 @@ from typing import Optional
15
  from dataclasses import dataclass, field
16
  from enum import Enum
17
 
 
 
 
 
 
 
 
 
 
 
 
18
  CATEGORIES = [None, 'SPONSOR', 'SELFPROMO', 'INTERACTION']
19
 
20
  ACTION_OPTIONS = ['skip', 'mute', 'full']
@@ -234,7 +244,7 @@ def reset():
234
 
235
  def load_datasets(dataset_args: DatasetArguments):
236
 
237
- print('Reading datasets')
238
  data_files = {}
239
 
240
  if dataset_args.train_file is not None:
@@ -333,17 +343,6 @@ class CustomTrainingArguments(OutputArguments, AdditionalTrainingArguments):
333
  pass
334
 
335
 
336
- logging.basicConfig()
337
- logger = logging.getLogger(__name__)
338
-
339
- # Setup logging
340
- logging.basicConfig(
341
- format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
342
- datefmt='%m/%d/%Y %H:%M:%S',
343
- handlers=[logging.StreamHandler(sys.stdout)],
344
- )
345
-
346
-
347
  def get_last_checkpoint(training_args):
348
  last_checkpoint = None
349
  if os.path.isdir(training_args.output_dir) and not training_args.overwrite_output_dir:
 
1
  from transformers.trainer_utils import get_last_checkpoint as glc
 
2
  import os
3
  from utils import re_findall
4
  import logging
 
14
  from dataclasses import dataclass, field
15
  from enum import Enum
16
 
17
+
18
+ logging.basicConfig()
19
+ logger = logging.getLogger(__name__)
20
+
21
+ # Setup logging
22
+ logging.basicConfig(
23
+ format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
24
+ datefmt='%m/%d/%Y %H:%M:%S',
25
+ handlers=[logging.StreamHandler(sys.stdout)],
26
+ )
27
+
28
  CATEGORIES = [None, 'SPONSOR', 'SELFPROMO', 'INTERACTION']
29
 
30
  ACTION_OPTIONS = ['skip', 'mute', 'full']
 
244
 
245
  def load_datasets(dataset_args: DatasetArguments):
246
 
247
+ logger.info('Reading datasets')
248
  data_files = {}
249
 
250
  if dataset_args.train_file is not None:
 
343
  pass
344
 
345
 
 
 
 
 
 
 
 
 
 
 
 
346
  def get_last_checkpoint(training_args):
347
  last_checkpoint = None
348
  if os.path.isdir(training_args.output_dir) and not training_args.overwrite_output_dir: