HoneyTian commited on
Commit
dcf6c65
1 Parent(s): 085d464
Files changed (1) hide show
  1. main.py +13 -4
main.py CHANGED
@@ -9,6 +9,7 @@ import logging
9
  from pathlib import Path
10
  import platform
11
  import time
 
12
 
13
  from project_settings import project_path, log_directory
14
  import log
@@ -77,6 +78,15 @@ def process(
77
  main_logger.info("num_active_paths: {}".format(num_active_paths))
78
  main_logger.info("in_filename: {}".format(in_filename))
79
 
 
 
 
 
 
 
 
 
 
80
  m_list = models.model_map.get(language)
81
  if m_list is None:
82
  raise AssertionError("language invalid: {}".format(language))
@@ -88,11 +98,8 @@ def process(
88
  if m_dict is None:
89
  raise AssertionError("repo_id invalid: {}".format(repo_id))
90
 
 
91
  local_model_dir = pretrained_model_dir / "huggingface" / repo_id
92
-
93
- out_filename = io.BytesIO()
94
- audio_convert(in_filename, out_filename)
95
-
96
  nn_model_file = local_model_dir / m_dict["nn_model_file"]
97
  tokens_file = local_model_dir / m_dict["tokens_file"]
98
 
@@ -107,6 +114,7 @@ def process(
107
  num_active_paths=num_active_paths,
108
  )
109
 
 
110
  now = datetime.now()
111
  date_time = now.strftime("%Y-%m-%d %H:%M:%S.%f")
112
  logging.info(f"Started at {date_time}")
@@ -119,6 +127,7 @@ def process(
119
  date_time = now.strftime("%Y-%m-%d %H:%M:%S.%f")
120
  end = time.time()
121
 
 
122
  metadata = torchaudio.info(out_filename)
123
  duration = metadata.num_frames / 16000
124
  rtf = (end - start) / duration
 
9
  from pathlib import Path
10
  import platform
11
  import time
12
+ import tempfile
13
 
14
  from project_settings import project_path, log_directory
15
  import log
 
78
  main_logger.info("num_active_paths: {}".format(num_active_paths))
79
  main_logger.info("in_filename: {}".format(in_filename))
80
 
81
+ # audio convert
82
+ in_filename = Path(in_filename)
83
+ out_filename = Path(tempfile.gettempdir()) / "asr" / in_filename.name
84
+
85
+ audio_convert(in_filename=in_filename.as_posix(),
86
+ out_filename=out_filename.as_posix(),
87
+ )
88
+
89
+ # model settings
90
  m_list = models.model_map.get(language)
91
  if m_list is None:
92
  raise AssertionError("language invalid: {}".format(language))
 
98
  if m_dict is None:
99
  raise AssertionError("repo_id invalid: {}".format(repo_id))
100
 
101
+ # load recognizer
102
  local_model_dir = pretrained_model_dir / "huggingface" / repo_id
 
 
 
 
103
  nn_model_file = local_model_dir / m_dict["nn_model_file"]
104
  tokens_file = local_model_dir / m_dict["tokens_file"]
105
 
 
114
  num_active_paths=num_active_paths,
115
  )
116
 
117
+ # transcribe
118
  now = datetime.now()
119
  date_time = now.strftime("%Y-%m-%d %H:%M:%S.%f")
120
  logging.info(f"Started at {date_time}")
 
127
  date_time = now.strftime("%Y-%m-%d %H:%M:%S.%f")
128
  end = time.time()
129
 
130
+ # statistics
131
  metadata = torchaudio.info(out_filename)
132
  duration = metadata.num_frames / 16000
133
  rtf = (end - start) / duration