Spaces:
Running
Running
Joshua Lochner
commited on
Commit
•
eaa79a8
1
Parent(s):
d34e3fe
Use logger instead of printing when loading datasets
Browse files- src/shared.py +12 -13
src/shared.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
from transformers.trainer_utils import get_last_checkpoint as glc
|
2 |
-
from transformers import Seq2SeqTrainingArguments, TrainingArguments
|
3 |
import os
|
4 |
from utils import re_findall
|
5 |
import logging
|
@@ -15,6 +14,17 @@ from typing import Optional
|
|
15 |
from dataclasses import dataclass, field
|
16 |
from enum import Enum
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
CATEGORIES = [None, 'SPONSOR', 'SELFPROMO', 'INTERACTION']
|
19 |
|
20 |
ACTION_OPTIONS = ['skip', 'mute', 'full']
|
@@ -234,7 +244,7 @@ def reset():
|
|
234 |
|
235 |
def load_datasets(dataset_args: DatasetArguments):
|
236 |
|
237 |
-
|
238 |
data_files = {}
|
239 |
|
240 |
if dataset_args.train_file is not None:
|
@@ -333,17 +343,6 @@ class CustomTrainingArguments(OutputArguments, AdditionalTrainingArguments):
|
|
333 |
pass
|
334 |
|
335 |
|
336 |
-
logging.basicConfig()
|
337 |
-
logger = logging.getLogger(__name__)
|
338 |
-
|
339 |
-
# Setup logging
|
340 |
-
logging.basicConfig(
|
341 |
-
format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
|
342 |
-
datefmt='%m/%d/%Y %H:%M:%S',
|
343 |
-
handlers=[logging.StreamHandler(sys.stdout)],
|
344 |
-
)
|
345 |
-
|
346 |
-
|
347 |
def get_last_checkpoint(training_args):
|
348 |
last_checkpoint = None
|
349 |
if os.path.isdir(training_args.output_dir) and not training_args.overwrite_output_dir:
|
|
|
1 |
from transformers.trainer_utils import get_last_checkpoint as glc
|
|
|
2 |
import os
|
3 |
from utils import re_findall
|
4 |
import logging
|
|
|
14 |
from dataclasses import dataclass, field
|
15 |
from enum import Enum
|
16 |
|
17 |
+
|
18 |
+
logging.basicConfig()
|
19 |
+
logger = logging.getLogger(__name__)
|
20 |
+
|
21 |
+
# Setup logging
|
22 |
+
logging.basicConfig(
|
23 |
+
format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
|
24 |
+
datefmt='%m/%d/%Y %H:%M:%S',
|
25 |
+
handlers=[logging.StreamHandler(sys.stdout)],
|
26 |
+
)
|
27 |
+
|
28 |
CATEGORIES = [None, 'SPONSOR', 'SELFPROMO', 'INTERACTION']
|
29 |
|
30 |
ACTION_OPTIONS = ['skip', 'mute', 'full']
|
|
|
244 |
|
245 |
def load_datasets(dataset_args: DatasetArguments):
|
246 |
|
247 |
+
logger.info('Reading datasets')
|
248 |
data_files = {}
|
249 |
|
250 |
if dataset_args.train_file is not None:
|
|
|
343 |
pass
|
344 |
|
345 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
346 |
def get_last_checkpoint(training_args):
|
347 |
last_checkpoint = None
|
348 |
if os.path.isdir(training_args.output_dir) and not training_args.overwrite_output_dir:
|