fffiloni commited on
Commit
cc07fe9
1 Parent(s): ce514b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -10
app.py CHANGED
@@ -13,15 +13,15 @@ model_ids = [
13
  for model_id in model_ids:
14
  model_name = model_id.split('/')[-1]
15
  snapshot_download(model_id, local_dir=f'checkpoints/{model_name}')
16
- """
17
- #from TTS.tts.configs.bark_config import BarkConfig
18
- #from TTS.tts.models.bark import Bark
19
 
20
- #os.environ['CUDA_VISIBLE_DEVICES'] = '1'
21
- #config = BarkConfig()
22
- #model = Bark.init_from_config(config)
23
- #model.load_checkpoint(config, checkpoint_dir="checkpoints/bark", eval=True)
24
 
 
 
 
 
 
25
  from TTS.api import TTS
26
  tts = TTS("tts_models/multilingual/multi-dataset/bark", gpu=True)
27
 
@@ -87,25 +87,34 @@ def infer(prompt, input_wav_file):
87
 
88
  # Print the contents
89
  for item in contents:
90
- print(item)
 
 
91
 
92
- return "output.wav", f"bark_voices/{file_name}/{contents[1]}"
93
 
94
 
95
  css = """
96
  #col-container {max-width: 580px; margin-left: auto; margin-right: auto;}
 
 
 
 
97
  """
98
 
99
  with gr.Blocks(css=css) as demo:
100
  with gr.Column(elem_id="col-container"):
101
 
102
- gr.HTML("""
103
  <h1 style="text-align: center;">Instant Voice Cloning</h1>
104
  <p style="text-align: center;">
105
  Clone any voice in less than 2 minutes with this <a href="https://tts.readthedocs.io/en/dev/models/bark.html" target="_blank">Coqui TSS + Bark</a> demo ! <br />
106
  Upload a clean 20 seconds WAV file of the voice you want to clone, <br />
107
  type your text-to-speech prompt and hit submit ! <br />
108
  </p>
 
 
 
109
  """)
110
 
111
  prompt = gr.Textbox(
@@ -124,6 +133,10 @@ with gr.Blocks(css=css) as demo:
124
  cloned_out = gr.Audio(
125
  label="Text to speech output"
126
  )
 
 
 
 
127
 
128
  npz_file = gr.File(
129
  label=".npz file"
@@ -137,6 +150,7 @@ with gr.Blocks(css=css) as demo:
137
  ],
138
  outputs = [
139
  cloned_out,
 
140
  npz_file
141
  ]
142
  )
 
13
  for model_id in model_ids:
14
  model_name = model_id.split('/')[-1]
15
  snapshot_download(model_id, local_dir=f'checkpoints/{model_name}')
 
 
 
16
 
17
+ from TTS.tts.configs.bark_config import BarkConfig
18
+ from TTS.tts.models.bark import Bark
 
 
19
 
20
+ #os.environ['CUDA_VISIBLE_DEVICES'] = '1'
21
+ config = BarkConfig()
22
+ model = Bark.init_from_config(config)
23
+ model.load_checkpoint(config, checkpoint_dir="checkpoints/bark", eval=True)
24
+ """
25
  from TTS.api import TTS
26
  tts = TTS("tts_models/multilingual/multi-dataset/bark", gpu=True)
27
 
 
87
 
88
  # Print the contents
89
  for item in contents:
90
+ print(item)
91
+
92
+ tts_video = gr.make_waveform(audio="output.wav")
93
 
94
+ return "output.wav", tts_video, f"bark_voices/{file_name}/{contents[1]}"
95
 
96
 
97
  css = """
98
  #col-container {max-width: 580px; margin-left: auto; margin-right: auto;}
99
+ img[src*='#center'] {
100
+ display: block;
101
+ margin: auto;
102
+ }
103
  """
104
 
105
  with gr.Blocks(css=css) as demo:
106
  with gr.Column(elem_id="col-container"):
107
 
108
+ gr.Markdown("""
109
  <h1 style="text-align: center;">Instant Voice Cloning</h1>
110
  <p style="text-align: center;">
111
  Clone any voice in less than 2 minutes with this <a href="https://tts.readthedocs.io/en/dev/models/bark.html" target="_blank">Coqui TSS + Bark</a> demo ! <br />
112
  Upload a clean 20 seconds WAV file of the voice you want to clone, <br />
113
  type your text-to-speech prompt and hit submit ! <br />
114
  </p>
115
+
116
+ [![Duplicate this Space](https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-sm.svg#center)](https://huggingface.co/spaces/fffiloni/instant-TTS-Bark-cloning?duplicate=true)
117
+
118
  """)
119
 
120
  prompt = gr.Textbox(
 
133
  cloned_out = gr.Audio(
134
  label="Text to speech output"
135
  )
136
+
137
+ video_out = gr.Video(
138
+ label = "Waveform video"
139
+ )
140
 
141
  npz_file = gr.File(
142
  label=".npz file"
 
150
  ],
151
  outputs = [
152
  cloned_out,
153
+ video_out,
154
  npz_file
155
  ]
156
  )