JacobLinCool commited on
Commit
0ae5ec9
1 Parent(s): e6d6bb1

feat: restore exp

Browse files
Files changed (2) hide show
  1. app.py +35 -1
  2. infer/modules/train/train.py +0 -13
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import os
2
 
3
- os.environ["PYTORCH_JIT"] = "0"
4
 
5
  from random import shuffle
6
  import gradio as gr
@@ -15,6 +15,22 @@ from infer.modules.train.train import train
15
  from infer.lib.train.process_ckpt import extract_small_model
16
  from zero import zero
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  def extract_audio_files(zip_file: str, target_dir: str) -> list[str]:
20
  with zipfile.ZipFile(zip_file, "r") as zip_ref:
@@ -186,6 +202,11 @@ def download_expdir(exp_dir: str) -> str:
186
  return f"{exp_dir}.zip"
187
 
188
 
 
 
 
 
 
189
  with gr.Blocks() as app:
190
  with gr.Row():
191
  with gr.Column():
@@ -198,6 +219,13 @@ with gr.Blocks() as app:
198
  with gr.Column():
199
  preprocess_output = gr.Textbox(label="Preprocessing output", lines=5)
200
 
 
 
 
 
 
 
 
201
  with gr.Row():
202
  with gr.Column():
203
  extract_features_btn = gr.Button(
@@ -260,4 +288,10 @@ with gr.Blocks() as app:
260
  outputs=[download_expdir_output],
261
  )
262
 
 
 
 
 
 
 
263
  app.launch()
 
1
  import os
2
 
3
+ os.environ["PYTORCH_JIT"] = "0v"
4
 
5
  from random import shuffle
6
  import gradio as gr
 
15
  from infer.lib.train.process_ckpt import extract_small_model
16
  from zero import zero
17
 
18
+ # patch for jit script
19
+ # if we find `def expand_2d_or_3d_tensor(x,` in /usr/local/lib/python3.10/site-packages/fairseq/models/model_utils.py
20
+ # patch it with `def expand_2d_or_3d_tensor(x: Tensor,`
21
+ FAIRSEQ_CODE = "/usr/local/lib/python3.10/site-packages/fairseq/models/model_utils.py"
22
+ if os.path.exists(FAIRSEQ_CODE):
23
+ with open(FAIRSEQ_CODE, "r") as f:
24
+ lines = f.readlines()
25
+ with open(FAIRSEQ_CODE, "w") as f:
26
+ for line in lines:
27
+ if "def expand_2d_or_3d_tensor(x, trg_dim: int, padding_idx: int):" in line:
28
+ f.write(
29
+ "def expand_2d_or_3d_tensor(x: Tensor, trg_dim: int, padding_idx: int) -> Tensor:\n"
30
+ )
31
+ else:
32
+ f.write(line)
33
+
34
 
35
  def extract_audio_files(zip_file: str, target_dir: str) -> list[str]:
36
  with zipfile.ZipFile(zip_file, "r") as zip_ref:
 
202
  return f"{exp_dir}.zip"
203
 
204
 
205
+ def restore_expdir(zip: str, exp_dir: str) -> str:
206
+ shutil.unpack_archive(zip, exp_dir)
207
+ return exp_dir
208
+
209
+
210
  with gr.Blocks() as app:
211
  with gr.Row():
212
  with gr.Column():
 
219
  with gr.Column():
220
  preprocess_output = gr.Textbox(label="Preprocessing output", lines=5)
221
 
222
+ with gr.Row():
223
+ restore_zip_file = gr.File(
224
+ label="Upload the experiment directory zip file",
225
+ file_types=["zip"],
226
+ )
227
+ restore_btn = gr.Button(value="Preprocess")
228
+
229
  with gr.Row():
230
  with gr.Column():
231
  extract_features_btn = gr.Button(
 
288
  outputs=[download_expdir_output],
289
  )
290
 
291
+ restore_btn.click(
292
+ fn=restore_expdir,
293
+ inputs=[restore_zip_file, exp_dir],
294
+ outputs=[exp_dir],
295
+ )
296
+
297
  app.launch()
infer/modules/train/train.py CHANGED
@@ -122,9 +122,6 @@ def run(rank, n_gpus, hps, logger: logging.Logger, state):
122
  writer = SummaryWriter(log_dir=hps.model_dir)
123
  writer_eval = SummaryWriter(log_dir=os.path.join(hps.model_dir, "eval"))
124
 
125
- dist.init_process_group(
126
- backend="gloo", init_method="env://", world_size=n_gpus, rank=rank
127
- )
128
  torch.manual_seed(hps.train.seed)
129
  if torch.cuda.is_available():
130
  torch.cuda.set_device(rank)
@@ -190,16 +187,6 @@ def run(rank, n_gpus, hps, logger: logging.Logger, state):
190
  betas=hps.train.betas,
191
  eps=hps.train.eps,
192
  )
193
- # net_g = DDP(net_g, device_ids=[rank], find_unused_parameters=True)
194
- # net_d = DDP(net_d, device_ids=[rank], find_unused_parameters=True)
195
- if hasattr(torch, "xpu") and torch.xpu.is_available():
196
- pass
197
- elif torch.cuda.is_available():
198
- net_g = DDP(net_g, device_ids=[rank])
199
- net_d = DDP(net_d, device_ids=[rank])
200
- else:
201
- net_g = DDP(net_g)
202
- net_d = DDP(net_d)
203
 
204
  try: # 如果能加载自动resume
205
  _, _, _, epoch_str = utils.load_checkpoint(
 
122
  writer = SummaryWriter(log_dir=hps.model_dir)
123
  writer_eval = SummaryWriter(log_dir=os.path.join(hps.model_dir, "eval"))
124
 
 
 
 
125
  torch.manual_seed(hps.train.seed)
126
  if torch.cuda.is_available():
127
  torch.cuda.set_device(rank)
 
187
  betas=hps.train.betas,
188
  eps=hps.train.eps,
189
  )
 
 
 
 
 
 
 
 
 
 
190
 
191
  try: # 如果能加载自动resume
192
  _, _, _, epoch_str = utils.load_checkpoint(