Fabrice-TIERCELIN commited on
Commit
ba53663
·
verified ·
1 Parent(s): dcb8a32

Get results earlier

Browse files
Files changed (1) hide show
  1. app.py +80 -18
app.py CHANGED
@@ -43,6 +43,21 @@ def update_output(output_number):
43
  gr.update(visible = (5 <= output_number))
44
  ]
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  def predict(
47
  prompt,
48
  language,
@@ -50,12 +65,18 @@ def predict(
50
  audio_file_pth,
51
  mic_file_path,
52
  use_mic,
 
53
  generation_number,
54
  temperature,
55
  is_randomize_seed,
56
  seed,
57
  progress = gr.Progress()
58
  ):
 
 
 
 
 
59
  start = time.time()
60
  progress(0, desc = "Preparing data...")
61
 
@@ -64,14 +85,12 @@ def predict(
64
  return (
65
  None,
66
  None,
67
- None,
68
  )
69
  if 50000 < len(prompt):
70
  gr.Warning("Text length limited to 50,000 characters for this demo, please try shorter text")
71
  return (
72
  None,
73
  None,
74
- None,
75
  )
76
 
77
  if use_mic:
@@ -80,7 +99,6 @@ def predict(
80
  return (
81
  None,
82
  None,
83
- None,
84
  )
85
  else:
86
  speaker_wav = mic_file_path
@@ -93,7 +111,7 @@ def predict(
93
  else:
94
  speaker_wav = "./examples/female.wav"
95
 
96
- output_filename = []
97
 
98
  try:
99
  if language == "fr":
@@ -102,12 +120,7 @@ def predict(
102
  if m.find("/fr/") != -1:
103
  language = None
104
 
105
- for i in range(5):
106
- if i < generation_number:
107
- output_filename.append(f"{i}_{re.sub('[^a-zA-Z0-9]', '_', language)}_{re.sub('[^a-zA-Z0-9]', '_', prompt)}"[:250] + ".wav")
108
- predict_on_gpu(i, prompt, speaker_wav, language, output_filename[i], temperature, is_randomize_seed, seed, progress)
109
- else:
110
- output_filename.append(None)
111
  except RuntimeError as e :
112
  if "device-assert" in str(e):
113
  # cannot do anything on cuda device side error, need to restart
@@ -126,17 +139,14 @@ def predict(
126
  information = ("Start again to get a different result. " if is_randomize_seed else "") + "The sound has been generated in " + ((str(hours) + " h, ") if hours != 0 else "") + ((str(minutes) + " min, ") if hours != 0 or minutes != 0 else "") + str(secondes) + " sec."
127
 
128
  return (
129
- output_filename[0],
130
- output_filename[1],
131
- output_filename[2],
132
- output_filename[3],
133
- output_filename[4],
134
  information,
135
  )
136
 
137
  @spaces.GPU(duration=60)
138
  def predict_on_gpu(
139
  i,
 
140
  prompt,
141
  speaker_wav,
142
  language,
@@ -146,7 +156,7 @@ def predict_on_gpu(
146
  seed,
147
  progress
148
  ):
149
- progress((i + 1) / 5, desc = "Generating the audio #" + str(i + 1) + "...")
150
  if is_randomize_seed:
151
  seed = random.randint(0, max_64_bit_int)
152
 
@@ -175,7 +185,7 @@ This is the same model that powers our creator application <a href="https://coqu
175
  <br/>
176
  Leave a star on the Github <a href="https://github.com/coqui-ai/TTS">TTS</a>, where our open-source inference and training code lives.
177
  <br/>
178
- <p>For faster inference without waiting in the queue, you should duplicate this space and upgrade to GPU via the settings.
179
  <br/>
180
  <a href="https://huggingface.co/spaces/Fabrice-TIERCELIN/Multi-language_Text-to-Speech?duplicate=true">
181
  <img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
@@ -320,7 +330,7 @@ Leave a star on the Github <a href="https://github.com/coqui-ai/TTS">TTS</a>, wh
320
  synthesised_audio_3,
321
  synthesised_audio_4,
322
  synthesised_audio_5
323
- ], queue = False, show_progress = False).success(predict, inputs = [
324
  prompt,
325
  language,
326
  gender,
@@ -333,9 +343,61 @@ Leave a star on the Github <a href="https://github.com/coqui-ai/TTS">TTS</a>, wh
333
  seed
334
  ], outputs = [
335
  synthesised_audio_1,
 
 
 
 
 
 
 
 
 
 
 
 
 
336
  synthesised_audio_2,
 
 
 
 
 
 
 
 
 
 
 
 
 
337
  synthesised_audio_3,
 
 
 
 
 
 
 
 
 
 
 
 
 
338
  synthesised_audio_4,
 
 
 
 
 
 
 
 
 
 
 
 
 
339
  synthesised_audio_5,
340
  information
341
  ], scroll_to_output = True)
 
43
  gr.update(visible = (5 <= output_number))
44
  ]
45
 
46
+ def predict0(prompt, language, gender, audio_file_pth, mic_file_path, use_mic, generation_number, temperature, is_randomize_seed, seed, progress = gr.Progress()):
47
+ return predict(prompt, language, gender, audio_file_pth, mic_file_path, use_mic, 0, generation_number, temperature, is_randomize_seed, seed, progress)
48
+
49
+ def predict1(prompt, language, gender, audio_file_pth, mic_file_path, use_mic, generation_number, temperature, is_randomize_seed, seed, progress = gr.Progress()):
50
+ return predict(prompt, language, gender, audio_file_pth, mic_file_path, use_mic, 1, generation_number, temperature, is_randomize_seed, seed, progress)
51
+
52
+ def predict2(prompt, language, gender, audio_file_pth, mic_file_path, use_mic, generation_number, temperature, is_randomize_seed, seed, progress = gr.Progress()):
53
+ return predict(prompt, language, gender, audio_file_pth, mic_file_path, use_mic, 2, generation_number, temperature, is_randomize_seed, seed, progress)
54
+
55
+ def predict3(prompt, language, gender, audio_file_pth, mic_file_path, use_mic, generation_number, temperature, is_randomize_seed, seed, progress = gr.Progress()):
56
+ return predict(prompt, language, gender, audio_file_pth, mic_file_path, use_mic, 3, generation_number, temperature, is_randomize_seed, seed, progress)
57
+
58
+ def predict4(prompt, language, gender, audio_file_pth, mic_file_path, use_mic, generation_number, temperature, is_randomize_seed, seed, progress = gr.Progress()):
59
+ return predict(prompt, language, gender, audio_file_pth, mic_file_path, use_mic, 4, generation_number, temperature, is_randomize_seed, seed, progress)
60
+
61
  def predict(
62
  prompt,
63
  language,
 
65
  audio_file_pth,
66
  mic_file_path,
67
  use_mic,
68
+ i,
69
  generation_number,
70
  temperature,
71
  is_randomize_seed,
72
  seed,
73
  progress = gr.Progress()
74
  ):
75
+ if generation_number <= i:
76
+ return (
77
+ None,
78
+ None,
79
+ )
80
  start = time.time()
81
  progress(0, desc = "Preparing data...")
82
 
 
85
  return (
86
  None,
87
  None,
 
88
  )
89
  if 50000 < len(prompt):
90
  gr.Warning("Text length limited to 50,000 characters for this demo, please try shorter text")
91
  return (
92
  None,
93
  None,
 
94
  )
95
 
96
  if use_mic:
 
99
  return (
100
  None,
101
  None,
 
102
  )
103
  else:
104
  speaker_wav = mic_file_path
 
111
  else:
112
  speaker_wav = "./examples/female.wav"
113
 
114
+ output_filename = f"{i}_{re.sub('[^a-zA-Z0-9]', '_', language)}_{re.sub('[^a-zA-Z0-9]', '_', prompt)}"[:250] + ".wav"
115
 
116
  try:
117
  if language == "fr":
 
120
  if m.find("/fr/") != -1:
121
  language = None
122
 
123
+ predict_on_gpu(i, generation_number, prompt, speaker_wav, language, output_filename, temperature, is_randomize_seed, seed, progress)
 
 
 
 
 
124
  except RuntimeError as e :
125
  if "device-assert" in str(e):
126
  # cannot do anything on cuda device side error, need to restart
 
139
  information = ("Start again to get a different result. " if is_randomize_seed else "") + "The sound has been generated in " + ((str(hours) + " h, ") if hours != 0 else "") + ((str(minutes) + " min, ") if hours != 0 or minutes != 0 else "") + str(secondes) + " sec."
140
 
141
  return (
142
+ output_filename,
 
 
 
 
143
  information,
144
  )
145
 
146
  @spaces.GPU(duration=60)
147
  def predict_on_gpu(
148
  i,
149
+ generation_number,
150
  prompt,
151
  speaker_wav,
152
  language,
 
156
  seed,
157
  progress
158
  ):
159
+ progress((i + .5) / generation_number, desc = "Generating the audio #" + str(i + 1) + "...")
160
  if is_randomize_seed:
161
  seed = random.randint(0, max_64_bit_int)
162
 
 
185
  <br/>
186
  Leave a star on the Github <a href="https://github.com/coqui-ai/TTS">TTS</a>, where our open-source inference and training code lives.
187
  <br/>
188
+ <p>To avoid the queue, you can duplicate this space on CPU, GPU or ZERO space GPU:
189
  <br/>
190
  <a href="https://huggingface.co/spaces/Fabrice-TIERCELIN/Multi-language_Text-to-Speech?duplicate=true">
191
  <img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
 
330
  synthesised_audio_3,
331
  synthesised_audio_4,
332
  synthesised_audio_5
333
+ ], queue = False, show_progress = False).success(predict0, inputs = [
334
  prompt,
335
  language,
336
  gender,
 
343
  seed
344
  ], outputs = [
345
  synthesised_audio_1,
346
+ information
347
+ ], scroll_to_output = True).success(predict1, inputs = [
348
+ prompt,
349
+ language,
350
+ gender,
351
+ audio_file_pth,
352
+ mic_file_path,
353
+ use_mic,
354
+ generation_number,
355
+ temperature,
356
+ randomize_seed,
357
+ seed
358
+ ], outputs = [
359
  synthesised_audio_2,
360
+ information
361
+ ], scroll_to_output = True).success(predict2, inputs = [
362
+ prompt,
363
+ language,
364
+ gender,
365
+ audio_file_pth,
366
+ mic_file_path,
367
+ use_mic,
368
+ generation_number,
369
+ temperature,
370
+ randomize_seed,
371
+ seed
372
+ ], outputs = [
373
  synthesised_audio_3,
374
+ information
375
+ ], scroll_to_output = True).success(predict3, inputs = [
376
+ prompt,
377
+ language,
378
+ gender,
379
+ audio_file_pth,
380
+ mic_file_path,
381
+ use_mic,
382
+ generation_number,
383
+ temperature,
384
+ randomize_seed,
385
+ seed
386
+ ], outputs = [
387
  synthesised_audio_4,
388
+ information
389
+ ], scroll_to_output = True).success(predict4, inputs = [
390
+ prompt,
391
+ language,
392
+ gender,
393
+ audio_file_pth,
394
+ mic_file_path,
395
+ use_mic,
396
+ generation_number,
397
+ temperature,
398
+ randomize_seed,
399
+ seed
400
+ ], outputs = [
401
  synthesised_audio_5,
402
  information
403
  ], scroll_to_output = True)