Spaces:

mozilla-ai
/

document-to-podcast

Running

daavoo commited on 12 days ago

Commit

0c2ec7e

•

1 Parent(s): 85f1ae3

Update app

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,11 +8,11 @@ import streamlit as st
 from document_to_podcast.preprocessing import DATA_LOADERS, DATA_CLEANERS
 from document_to_podcast.inference.model_loaders import (
     load_llama_cpp_model,
-    load_tts_model,
 )
 from document_to_podcast.config import DEFAULT_PROMPT, DEFAULT_SPEAKERS, Speaker
-from document_to_podcast.inference.text_to_text import text_to_text_stream
 from document_to_podcast.inference.text_to_speech import text_to_speech
 @st.cache_resource
@@ -24,7 +24,7 @@ def load_text_to_text_model():
 @st.cache_resource
 def load_text_to_speech_model():
-    return load_tts_model("OuteAI/OuteTTS-0.2-500M-GGUF/OuteTTS-0.2-500M-FP16.gguf")
 script = "script"
@@ -150,10 +150,10 @@ if uploaded_file is not None:
                     with st.spinner("Generating Audio..."):
                         speech = text_to_speech(
                             text.split(f'"Speaker {speaker_id}":')[-1],
-                            model=speech_model,
-                            voice_profile=voice_profile,
                         )
-                    st.audio(speech, sample_rate=speech_model.sample_rate)
                     st.session_state.audio.append(speech)
                     text = ""
@@ -164,7 +164,7 @@ if uploaded_file is not None:
             sf.write(
                 "podcast.wav",
                 st.session_state.audio,
-                samplerate=speech_model.sample_rate,
             )
             st.markdown("Podcast saved to disk!")

 from document_to_podcast.preprocessing import DATA_LOADERS, DATA_CLEANERS
 from document_to_podcast.inference.model_loaders import (
     load_llama_cpp_model,
+    load_outetts_model,
 )
 from document_to_podcast.config import DEFAULT_PROMPT, DEFAULT_SPEAKERS, Speaker
 from document_to_podcast.inference.text_to_speech import text_to_speech
+from document_to_podcast.inference.text_to_text import text_to_text_stream
 @st.cache_resource
 @st.cache_resource
 def load_text_to_speech_model():
+    return load_outetts_model("OuteAI/OuteTTS-0.2-500M-GGUF/OuteTTS-0.2-500M-FP16.gguf")
 script = "script"
                     with st.spinner("Generating Audio..."):
                         speech = text_to_speech(
                             text.split(f'"Speaker {speaker_id}":')[-1],
+                            speech_model,
+                            voice_profile,
                         )
+                    st.audio(speech, sample_rate=speech_model.audio_codec.sr)
                     st.session_state.audio.append(speech)
                     text = ""
             sf.write(
                 "podcast.wav",
                 st.session_state.audio,
+                samplerate=speech_model.audio_codec.sr,
             )
             st.markdown("Podcast saved to disk!")