ai-bedtime-story-server

Paused

App Files Files Community

jbilcke-hf HF staff commited on Nov 13, 2023

Commit

abb29e6

•

1 Parent(s): a8336bb

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -20

app.py CHANGED Viewed

@@ -590,26 +590,32 @@ latent_map["Pirate"] = get_latents("voices/pirate_by_coqui.wav")
 # Define the main function for the API endpoint that takes the input text and chatbot role
 def generate_story_and_speech(input_text, chatbot_role):
-    story_sentences = get_sentence(None, chatbot_role)  # calls your get_sentence function
-    # Initialize story_text
-    story_text = ""
-    # Iterate over sentences generated by get_sentence and concatenate them into story_text
-    for sentence, _ in story_sentences:
-        # Each 'sentence' is a tuple, where the first item is the text of the sentence
-        story_text += sentence + ' '
-    # Generate synthesized speech for the full story
-    synthesized_speech = generate_speech_for_sentence(history, chatbot_role, story_text)
-    # generate_speech_for_sentence returns a tuple, where the second item is a gr.Audio object
-    speech_audio_bytes = synthesized_speech[1].data.getvalue()  # Access the BytesIO object and extract bytes
-    # Convert the speech to base64 to include in JSON response
-    speech_audio_base64 = base64.b64encode(speech_audio_bytes).decode('utf8')
-    # Return JSON object with text and base64 audio
-    return {"text": story_text.strip(), "audio": speech_audio_base64}
 # Create a Gradio Interface using only the `generate_story_and_speech()` function and the 'json' output type
 demo = gr.Interface(

 # Define the main function for the API endpoint that takes the input text and chatbot role
 def generate_story_and_speech(input_text, chatbot_role):
+    # Initialize a list of lists for history with the user input as the first entry
+    history = [[input_text, None]]
+    story_sentences = get_sentence(history, chatbot_role)  # get_sentence function generates text
+    story_text = ""  # Initialize variable to hold the full story text
+    last_history = None  # To store the last history after all sentences
+    # Iterate over the sentences generated by get_sentence and concatenate them
+    for sentence, updated_history in story_sentences:
+        if sentence:
+            story_text += sentence.strip() + " "  # Add each sentence to the story_text
+            last_history = updated_history  # Keep track of the last history update
+    if last_history is not None:
+        # Convert the list of lists back into a list of tuples for the history
+        history_tuples = [tuple(entry) for entry in last_history]
+        synthesized_speech = generate_speech_for_sentence(history_tuples, chatbot_role, story_text)
+        if synthesized_speech:
+            # Access the BytesIO object containing the WAV file and extract bytes
+            speech_audio = synthesized_speech[1]["value"] if return_as_byte else synthesized_speech[1].data.getvalue()
+            # Convert the speech audio bytes to base64 for JSON serialization
+            speech_audio_base64 = base64.b64encode(speech_audio).decode('utf8')
+            return {"text": story_text.strip(), "audio": speech_audio_base64}
+    else:
+        return {"text": "Failed to generate story", "audio": None}
 # Create a Gradio Interface using only the `generate_story_and_speech()` function and the 'json' output type
 demo = gr.Interface(