Pendrokar commited on
Commit
cc70457
β€’
1 Parent(s): 69b2485

give newcomer a cached sample pair; changed default voice clone for TTS

Browse files
Files changed (2) hide show
  1. README.md +1 -1
  2. app.py +29 -13
README.md CHANGED
@@ -9,7 +9,7 @@ emoji: πŸ€—πŸ†
9
  colorFrom: red
10
  colorTo: red
11
  pinned: false
12
- short_description: xVA vs The World
13
  models:
14
  - coqui/XTTS-v2
15
  - fishaudio/fish-speech-1.4
 
9
  colorFrom: red
10
  colorTo: red
11
  pinned: false
12
+ short_description: Vote on the top HF TTS models!
13
  models:
14
  - coqui/XTTS-v2
15
  - fishaudio/fish-speech-1.4
app.py CHANGED
@@ -44,6 +44,9 @@ with open('harvard_sentences.txt') as f:
44
  sents += f.read().strip().splitlines()
45
  with open('llama3_command-r_sentences.txt') as f:
46
  sents += f.read().strip().splitlines()
 
 
 
47
  ####################################
48
  # Constants
49
  ####################################
@@ -213,8 +216,8 @@ DEFAULT_VOICE_TRANSCRIPT = "In the first half of the 20th century, science ficti
213
  OVERRIDE_INPUTS = {
214
  'coqui/xtts': {
215
  1: 'en',
216
- 2: DEFAULT_VOICE_SAMPLE_STR, # voice sample
217
- 3: DEFAULT_VOICE_SAMPLE_STR, # voice sample
218
  4: False, #use_mic
219
  5: False, #cleanup_reference
220
  6: False, #auto_detect
@@ -248,7 +251,7 @@ OVERRIDE_INPUTS = {
248
  1: 'LikeManyWaters', # voice
249
  },
250
  'LeeSangHoon/HierSpeech_TTS': {
251
- 1: DEFAULT_VOICE_SAMPLE, # voice sample
252
  2: 0.333,
253
  3: 0.333,
254
  4: 1,
@@ -267,6 +270,13 @@ OVERRIDE_INPUTS = {
267
  2: 1, # speed
268
  3: 'EN', # language
269
  },
 
 
 
 
 
 
 
270
  'parler-tts/parler_tts': {
271
  1: 'Elisabeth. Elisabeth\'s clear sharp voice.', # description/prompt
272
  },
@@ -438,13 +448,13 @@ INSTR = """
438
  ## πŸ—³οΈ Vote
439
 
440
  * Press ⚑ to get cached sample pairs you've yet to vote on. (Fast πŸ‡)
441
- * Or press 🎲 to randomly use text from a preselected list. (Slow 🐒)
442
  * Or input text (πŸ‡ΊπŸ‡Έ English only) to synthesize audio. (Slowest 🐌 due to _Toxicity_ test)
443
  * Listen to the two audio clips, one after the other.
444
- * Vote on which audio sounds more natural to you.
445
- * _Note: Model names are revealed after the vote is cast._
446
 
447
- Note: It may take up to 30 seconds to synthesize audio.
448
  """.strip()
449
  request = ''
450
  if SPACE_ID:
@@ -1391,12 +1401,17 @@ with gr.Blocks() as vote:
1391
  # bothbad.click(both_bad, outputs=outputs, inputs=[model1, model2, useridstate])
1392
  # bothgood.click(both_good, outputs=outputs, inputs=[model1, model2, useridstate])
1393
 
1394
- vote.load(
1395
- None,
1396
- None,
1397
- session_hash,
1398
- js="() => { return getArenaCookie('session') }",
1399
- )
 
 
 
 
 
1400
 
1401
  with gr.Blocks() as about:
1402
  gr.Markdown(ABOUT)
@@ -1407,6 +1422,7 @@ with gr.Blocks() as about:
1407
  # dbtext = gr.Textbox(label="Type \"delete db\" to confirm", placeholder="delete db")
1408
  # ddb = gr.Button("Delete DB")
1409
  # ddb.click(del_db, inputs=dbtext, outputs=ddb)
 
1410
  with gr.Blocks(theme=theme, css="footer {visibility: hidden}textbox{resize:none} .blurred-text {filter: blur(0.15em);}", js="cookie.js", title="TTS Arena") as demo:
1411
  gr.Markdown(DESCR)
1412
  # gr.TabbedInterface([vote, leaderboard, about, admin], ['Vote', 'Leaderboard', 'About', 'Admin (ONLY IN BETA)'])
 
44
  sents += f.read().strip().splitlines()
45
  with open('llama3_command-r_sentences.txt') as f:
46
  sents += f.read().strip().splitlines()
47
+
48
+ # Credit: llama3_command-r sentences generated made by user KingNish
49
+
50
  ####################################
51
  # Constants
52
  ####################################
 
216
  OVERRIDE_INPUTS = {
217
  'coqui/xtts': {
218
  1: 'en',
219
+ 2: 'https://huggingface.co/spaces/coqui/xtts/resolve/main/examples/female.wav', # voice sample
220
+ 3: 'https://huggingface.co/spaces/coqui/xtts/resolve/main/examples/female.wav', # mic voice sample
221
  4: False, #use_mic
222
  5: False, #cleanup_reference
223
  6: False, #auto_detect
 
251
  1: 'LikeManyWaters', # voice
252
  },
253
  'LeeSangHoon/HierSpeech_TTS': {
254
+ 1: file('https://huggingface.co/spaces/LeeSangHoon/HierSpeech_TTS/resolve/main/example/female.wav'), # voice sample
255
  2: 0.333,
256
  3: 0.333,
257
  4: 1,
 
270
  2: 1, # speed
271
  3: 'EN', # language
272
  },
273
+ 'mrfakename/MetaVoice-1B-v0.1': {
274
+ 1: 5, # float (numeric value between 0.0 and 10.0) in 'Speech Stability - improves text following for a challenging speaker' Slider component
275
+ 2: 5, # float (numeric value between 1.0 and 5.0) in 'Speaker similarity - How closely to match speaker identity and speech style.' Slider component
276
+ 3: "Preset voices", # Literal['Preset voices', 'Upload target voice'] in 'Choose voice' Radio component
277
+ 4: "Bria", # Literal['Bria', 'Alex', 'Jacob'] in 'Preset voices' Dropdown component
278
+ 5: None, # filepath in 'Upload a clean sample to clone. Sample should contain 1 speaker, be between 30-90 seconds and not contain background noise.' Audio component
279
+ },
280
  'parler-tts/parler_tts': {
281
  1: 'Elisabeth. Elisabeth\'s clear sharp voice.', # description/prompt
282
  },
 
448
  ## πŸ—³οΈ Vote
449
 
450
  * Press ⚑ to get cached sample pairs you've yet to vote on. (Fast πŸ‡)
451
+ * Or press 🎲 to randomly use a sentence from the list. (Slow 🐒)
452
  * Or input text (πŸ‡ΊπŸ‡Έ English only) to synthesize audio. (Slowest 🐌 due to _Toxicity_ test)
453
  * Listen to the two audio clips, one after the other.
454
+ * _Vote on which audio sounds more natural to you._
455
+ * Model names are revealed after the vote is cast.
456
 
457
+ ⚠ Note: It **may take up to 30 seconds** to ***synthesize*** audio.
458
  """.strip()
459
  request = ''
460
  if SPACE_ID:
 
1401
  # bothbad.click(both_bad, outputs=outputs, inputs=[model1, model2, useridstate])
1402
  # bothgood.click(both_good, outputs=outputs, inputs=[model1, model2, useridstate])
1403
 
1404
+ # get session cookie
1405
+ vote\
1406
+ .load(
1407
+ None,
1408
+ None,
1409
+ session_hash,
1410
+ js="() => { return getArenaCookie('session') }",
1411
+ )
1412
+ # give a cached sample pair to voter; .then() did not work here
1413
+ vote\
1414
+ .load(give_cached_sample, inputs=[session_hash], outputs=[*outputs, cachedt])
1415
 
1416
  with gr.Blocks() as about:
1417
  gr.Markdown(ABOUT)
 
1422
  # dbtext = gr.Textbox(label="Type \"delete db\" to confirm", placeholder="delete db")
1423
  # ddb = gr.Button("Delete DB")
1424
  # ddb.click(del_db, inputs=dbtext, outputs=ddb)
1425
+ # Blur cached sample text so the voting user picks up mispronouncements
1426
  with gr.Blocks(theme=theme, css="footer {visibility: hidden}textbox{resize:none} .blurred-text {filter: blur(0.15em);}", js="cookie.js", title="TTS Arena") as demo:
1427
  gr.Markdown(DESCR)
1428
  # gr.TabbedInterface([vote, leaderboard, about, admin], ['Vote', 'Leaderboard', 'About', 'Admin (ONLY IN BETA)'])