Iker commited on
Commit
998d5ca
·
1 Parent(s): e27083d

Test if the source language is supported by the tokenizer

Browse files
Files changed (1) hide show
  1. translate.py +8 -2
translate.py CHANGED
@@ -73,8 +73,7 @@ def main(
73
  top_p: float = 1.0,
74
  ):
75
 
76
- if not os.path.exists(os.path.abspath(os.path.dirname(output_path))):
77
- os.makedirs(os.path.abspath(os.path.dirname(output_path)))
78
 
79
  accelerator = Accelerator(
80
  mixed_precision=precision if precision != "32" else "no",
@@ -104,7 +103,14 @@ def main(
104
  else:
105
  raise ValueError("Precision not supported. Supported values: 32, fp16, bf16")
106
 
 
 
 
 
 
 
107
  tokenizer.src_lang = source_lang
 
108
  try:
109
  lang_code_to_idx = tokenizer.lang_code_to_id[target_lang]
110
  except KeyError:
 
73
  top_p: float = 1.0,
74
  ):
75
 
76
+ os.makedirs(os.path.abspath(os.path.dirname(output_path)), exist_ok=True)
 
77
 
78
  accelerator = Accelerator(
79
  mixed_precision=precision if precision != "32" else "no",
 
103
  else:
104
  raise ValueError("Precision not supported. Supported values: 32, fp16, bf16")
105
 
106
+ try:
107
+ _ = tokenizer.lang_code_to_id[source_lang]
108
+ except KeyError:
109
+ raise KeyError(
110
+ f"Language {source_lang} not found in tokenizer. Available languages: {tokenizer.lang_code_to_id.keys()}"
111
+ )
112
  tokenizer.src_lang = source_lang
113
+
114
  try:
115
  lang_code_to_idx = tokenizer.lang_code_to_id[target_lang]
116
  except KeyError: