Spaces:

AAOBA
/

Umamusume-DeBERTa-VITS2-TTS-JP

Running

AAOBA commited on Dec 2, 2023

Commit

b182a23

•

1 Parent(s): 2edbce5

Using FP16 for inference, trying to avoid weird, stupidly long inference time.

Files changed (1) hide show

app.py CHANGED Viewed

@@ -78,8 +78,8 @@ def infer(text, sdp_ratio, noise_scale, noise_scale_w, length_scale, sid, langua
         x_tst = phones.to(device).unsqueeze(0)
         tones = tones.to(device).unsqueeze(0)
         lang_ids = lang_ids.to(device).unsqueeze(0)
-        bert = bert.to(device).unsqueeze(0)
-        ja_bert = ja_bert.to(device).unsqueeze(0)
         x_tst_lengths = torch.LongTensor([phones.size(0)]).to(device)
         del phones
         speakers = torch.LongTensor([hps.data.spk2id[sid]]).to(device)
@@ -172,7 +172,7 @@ if __name__ == "__main__":
         hps.train.segment_size // hps.data.hop_length,
         n_speakers=hps.data.n_speakers,
         **hps.model,
-    ).to(device)
     _ = net_g.eval()
     _ = utils.load_checkpoint(args.model, net_g, None, skip_optimizer=True)

         x_tst = phones.to(device).unsqueeze(0)
         tones = tones.to(device).unsqueeze(0)
         lang_ids = lang_ids.to(device).unsqueeze(0)
+        bert = bert.half().to(device).unsqueeze(0)
+        ja_bert = ja_bert.half().to(device).unsqueeze(0)
         x_tst_lengths = torch.LongTensor([phones.size(0)]).to(device)
         del phones
         speakers = torch.LongTensor([hps.data.spk2id[sid]]).to(device)
         hps.train.segment_size // hps.data.hop_length,
         n_speakers=hps.data.n_speakers,
         **hps.model,
+    ).half().to(device)
     _ = net_g.eval()
     _ = utils.load_checkpoint(args.model, net_g, None, skip_optimizer=True)