stefan-french commited on
Commit
39248fa
·
verified ·
1 Parent(s): 10b8572

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +177 -0
app.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from pathlib import Path
3
+
4
+ import numpy as np
5
+ import soundfile as sf
6
+ import streamlit as st
7
+ import document_to_podcast
8
+
9
+ from document_to_podcast.preprocessing import DATA_LOADERS, DATA_CLEANERS
10
+ from document_to_podcast.inference.model_loaders import (
11
+ load_llama_cpp_model,
12
+ load_outetts_model,
13
+ )
14
+ from document_to_podcast.config import DEFAULT_PROMPT, DEFAULT_SPEAKERS, Speaker
15
+ from document_to_podcast.inference.text_to_speech import text_to_speech
16
+ from document_to_podcast.inference.text_to_text import text_to_text_stream
17
+
18
+
19
+ @st.cache_resource
20
+ def load_text_to_text_model():
21
+ return load_llama_cpp_model(
22
+ model_id="allenai/OLMoE-1B-7B-0924-Instruct-GGUF/olmoe-1b-7b-0924-instruct-q8_0.gguf"
23
+ )
24
+
25
+
26
+ @st.cache_resource
27
+ def load_text_to_speech_model():
28
+ return load_outetts_model("OuteAI/OuteTTS-0.2-500M-GGUF/OuteTTS-0.2-500M-FP16.gguf")
29
+
30
+
31
+ script = "script"
32
+ audio = "audio"
33
+ gen_button = "generate podcast button"
34
+ if script not in st.session_state:
35
+ st.session_state[script] = ""
36
+ if audio not in st.session_state:
37
+ st.session_state.audio = []
38
+ if gen_button not in st.session_state:
39
+ st.session_state[gen_button] = False
40
+
41
+
42
+ def gen_button_clicked():
43
+ st.session_state[gen_button] = True
44
+
45
+
46
+ st.title("Document To Podcast")
47
+
48
+ st.header("Uploading Data")
49
+
50
+ uploaded_file = st.file_uploader(
51
+ "Choose a file", type=["pdf", "html", "txt", "docx", "md"]
52
+ )
53
+
54
+
55
+ if uploaded_file is not None:
56
+ st.divider()
57
+ st.header("Loading and Cleaning Data")
58
+ st.markdown(
59
+ "[Docs for this Step](https://mozilla-ai.github.io/document-to-podcast/step-by-step-guide/#step-1-document-pre-processing)"
60
+ )
61
+ st.divider()
62
+
63
+ extension = Path(uploaded_file.name).suffix
64
+
65
+ col1, col2 = st.columns(2)
66
+
67
+ raw_text = DATA_LOADERS[extension](uploaded_file)
68
+ with col1:
69
+ st.subheader("Raw Text")
70
+ st.text_area(
71
+ f"Number of characters before cleaning: {len(raw_text)}",
72
+ f"{raw_text[:500]} . . .",
73
+ )
74
+
75
+ clean_text = DATA_CLEANERS[extension](raw_text)
76
+ with col2:
77
+ st.subheader("Cleaned Text")
78
+ st.text_area(
79
+ f"Number of characters after cleaning: {len(clean_text)}",
80
+ f"{clean_text[:500]} . . .",
81
+ )
82
+
83
+ st.divider()
84
+ st.header("Downloading and Loading models")
85
+ st.markdown(
86
+ "[Docs for this Step](https://mozilla-ai.github.io/document-to-podcast/step-by-step-guide/#step-2-podcast-script-generation)"
87
+ )
88
+ st.divider()
89
+
90
+ st.markdown(
91
+ "For this demo, we are using the following models: \n"
92
+ "- [OLMoE-1B-7B-0924-Instruct](https://huggingface.co/allenai/OLMoE-1B-7B-0924-Instruct-GGUF)\n"
93
+ "- [OuteAI/OuteTTS-0.2-500M-GGUF/OuteTTS-0.2-500M-FP16.gguf](https://huggingface.co/OuteAI/OuteTTS-0.2-500M-GGUF)"
94
+ )
95
+ st.markdown(
96
+ "You can check the [Customization Guide](https://mozilla-ai.github.io/document-to-podcast/customization/)"
97
+ " for more information on how to use different models."
98
+ )
99
+
100
+ text_model = load_text_to_text_model()
101
+ speech_model = load_text_to_speech_model()
102
+
103
+ # ~4 characters per token is considered a reasonable default.
104
+ max_characters = text_model.n_ctx() * 4
105
+ if len(clean_text) > max_characters:
106
+ st.warning(
107
+ f"Input text is too big ({len(clean_text)})."
108
+ f" Using only a subset of it ({max_characters})."
109
+ )
110
+ clean_text = clean_text[:max_characters]
111
+
112
+ st.divider()
113
+ st.header("Podcast generation")
114
+ st.markdown(
115
+ "[Docs for this Step](https://mozilla-ai.github.io/document-to-podcast/step-by-step-guide/#step-3-audio-podcast-generation)"
116
+ )
117
+ st.divider()
118
+
119
+ st.subheader("Speaker configuration")
120
+ for s in DEFAULT_SPEAKERS:
121
+ s.pop("id", None)
122
+ speakers = st.data_editor(DEFAULT_SPEAKERS, num_rows="dynamic")
123
+
124
+ if st.button("Generate Podcast", on_click=gen_button_clicked):
125
+ for n, speaker in enumerate(speakers):
126
+ speaker["id"] = n + 1
127
+ speakers_str = "\n".join(
128
+ str(Speaker.model_validate(speaker))
129
+ for speaker in speakers
130
+ if all(
131
+ speaker.get(x, None) for x in ["name", "description", "voice_profile"]
132
+ )
133
+ )
134
+ system_prompt = DEFAULT_PROMPT.replace("{SPEAKERS}", speakers_str)
135
+ with st.spinner("Generating Podcast..."):
136
+ text = ""
137
+ for chunk in text_to_text_stream(
138
+ clean_text, text_model, system_prompt=system_prompt.strip()
139
+ ):
140
+ text += chunk
141
+ if text.endswith("\n") and "Speaker" in text:
142
+ st.session_state.script += text
143
+ st.write(text)
144
+
145
+ speaker_id = re.search(r"Speaker (\d+)", text).group(1)
146
+ voice_profile = next(
147
+ speaker["voice_profile"]
148
+ for speaker in speakers
149
+ if speaker["id"] == int(speaker_id)
150
+ )
151
+ with st.spinner("Generating Audio..."):
152
+ speech = text_to_speech(
153
+ text.split(f'"Speaker {speaker_id}":')[-1],
154
+ speech_model,
155
+ voice_profile,
156
+ )
157
+ st.audio(speech, sample_rate=speech_model.audio_codec.sr)
158
+
159
+ st.session_state.audio.append(speech)
160
+ text = ""
161
+
162
+ if st.session_state[gen_button]:
163
+ if st.button("Save Podcast to audio file"):
164
+ st.session_state.audio = np.concatenate(st.session_state.audio)
165
+ sf.write(
166
+ "podcast.wav",
167
+ st.session_state.audio,
168
+ samplerate=speech_model.audio_codec.sr,
169
+ )
170
+ st.markdown("Podcast saved to disk!")
171
+
172
+ if st.button("Save Podcast script to text file"):
173
+ with open("script.txt", "w") as f:
174
+ st.session_state.script += "}"
175
+ f.write(st.session_state.script)
176
+
177
+ st.markdown("Script saved to disk!")