LilyF's picture
Update app.py
29565b2
raw
history blame
2.14 kB
import gradio as gr
from transformers import pipeline
from espnet2.bin.tts_inference import Text2Speech
def generateTextAndAudio(inputText, numGen):
# --- Generating the Text ---
# With the provided text from user, generate more text up to `numGen` tokens/sub-words
textOutput = textGenerator(inputText, max_length = numGen)
# The output of the text generator is a list of dictionaries, grab the first dictionary
# then get the generated text from the dictionary using the `generated_text` key
genText = textOutput[0]['generated_text']
print("Input Text:", inputText)
print("Generated Text:", genText)
# --- Generating the Audio ---
# With the newly generated text, generate some speech
audioOutput = audioGenerator(genText)
# Get the wav data
genAudio = audioOutput['wav']
# Return two things
# 1) Generated Text
# 2) 24k sampling rate, and the Generated Audio (wav) as numpy (instead of tensor)
return genText, (24000, genAudio.numpy())
# Main
textGenerator = pipeline('text-generation', model = 'gpt2')
audioGenerator = Text2Speech.from_pretrained("espnet/kan-bayashi_ljspeech_joint_finetune_conformer_fastspeech2_hifigan")
input1_textbox = gr.Textbox(label="Input text")
input2_slider = gr.Slider(minimum=1, maximum=100, step=1, default=30, label="Number of words to generate")
output1_textbox = gr.Textbox(label = "Generated Text")
output2_Audio = gr.Audio(label = "Generated Audio")
title = "Generate Text and it's Audio!"
description = "Provide the text, and how many subwords to generate"
examples = [
["I won a", 50],
["My name is", 30],
["I have", 60]
]
article = "<p style='text-align: center'><img src='https://visitor-badge.glitch.me/badge?page_id=epoching_glide_inpaint' alt='visitor badge'></p>"
iface = gr.Interface(fn=generateTextAndAudio,
inputs=[input1_textbox, input2_slider],
outputs=[output1_textbox, output2_Audio],
title=title,
description=description,
examples=examples
article=article).launch(debug = True)