daavoo commited on
Commit
85f1ae3
1 Parent(s): fdfe90d

Add app and requirements

Browse files
Files changed (2) hide show
  1. app.py +176 -0
  2. requirements.txt +1 -0
app.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from pathlib import Path
3
+
4
+ import numpy as np
5
+ import soundfile as sf
6
+ import streamlit as st
7
+
8
+ from document_to_podcast.preprocessing import DATA_LOADERS, DATA_CLEANERS
9
+ from document_to_podcast.inference.model_loaders import (
10
+ load_llama_cpp_model,
11
+ load_tts_model,
12
+ )
13
+ from document_to_podcast.config import DEFAULT_PROMPT, DEFAULT_SPEAKERS, Speaker
14
+ from document_to_podcast.inference.text_to_text import text_to_text_stream
15
+ from document_to_podcast.inference.text_to_speech import text_to_speech
16
+
17
+
18
+ @st.cache_resource
19
+ def load_text_to_text_model():
20
+ return load_llama_cpp_model(
21
+ model_id="allenai/OLMoE-1B-7B-0924-Instruct-GGUF/olmoe-1b-7b-0924-instruct-q8_0.gguf"
22
+ )
23
+
24
+
25
+ @st.cache_resource
26
+ def load_text_to_speech_model():
27
+ return load_tts_model("OuteAI/OuteTTS-0.2-500M-GGUF/OuteTTS-0.2-500M-FP16.gguf")
28
+
29
+
30
+ script = "script"
31
+ audio = "audio"
32
+ gen_button = "generate podcast button"
33
+ if script not in st.session_state:
34
+ st.session_state[script] = ""
35
+ if audio not in st.session_state:
36
+ st.session_state.audio = []
37
+ if gen_button not in st.session_state:
38
+ st.session_state[gen_button] = False
39
+
40
+
41
+ def gen_button_clicked():
42
+ st.session_state[gen_button] = True
43
+
44
+
45
+ st.title("Document To Podcast")
46
+
47
+ st.header("Uploading Data")
48
+
49
+ uploaded_file = st.file_uploader(
50
+ "Choose a file", type=["pdf", "html", "txt", "docx", "md"]
51
+ )
52
+
53
+
54
+ if uploaded_file is not None:
55
+ st.divider()
56
+ st.header("Loading and Cleaning Data")
57
+ st.markdown(
58
+ "[Docs for this Step](https://mozilla-ai.github.io/document-to-podcast/step-by-step-guide/#step-1-document-pre-processing)"
59
+ )
60
+ st.divider()
61
+
62
+ extension = Path(uploaded_file.name).suffix
63
+
64
+ col1, col2 = st.columns(2)
65
+
66
+ raw_text = DATA_LOADERS[extension](uploaded_file)
67
+ with col1:
68
+ st.subheader("Raw Text")
69
+ st.text_area(
70
+ f"Number of characters before cleaning: {len(raw_text)}",
71
+ f"{raw_text[:500]} . . .",
72
+ )
73
+
74
+ clean_text = DATA_CLEANERS[extension](raw_text)
75
+ with col2:
76
+ st.subheader("Cleaned Text")
77
+ st.text_area(
78
+ f"Number of characters after cleaning: {len(clean_text)}",
79
+ f"{clean_text[:500]} . . .",
80
+ )
81
+
82
+ st.divider()
83
+ st.header("Downloading and Loading models")
84
+ st.markdown(
85
+ "[Docs for this Step](https://mozilla-ai.github.io/document-to-podcast/step-by-step-guide/#step-2-podcast-script-generation)"
86
+ )
87
+ st.divider()
88
+
89
+ st.markdown(
90
+ "For this demo, we are using the following models: \n"
91
+ "- [OLMoE-1B-7B-0924-Instruct](https://huggingface.co/allenai/OLMoE-1B-7B-0924-Instruct-GGUF)\n"
92
+ "- [OuteAI/OuteTTS-0.2-500M-GGUF/OuteTTS-0.2-500M-FP16.gguf](https://huggingface.co/OuteAI/OuteTTS-0.2-500M-GGUF)"
93
+ )
94
+ st.markdown(
95
+ "You can check the [Customization Guide](https://mozilla-ai.github.io/document-to-podcast/customization/)"
96
+ " for more information on how to use different models."
97
+ )
98
+
99
+ text_model = load_text_to_text_model()
100
+ speech_model = load_text_to_speech_model()
101
+
102
+ # ~4 characters per token is considered a reasonable default.
103
+ max_characters = text_model.n_ctx() * 4
104
+ if len(clean_text) > max_characters:
105
+ st.warning(
106
+ f"Input text is too big ({len(clean_text)})."
107
+ f" Using only a subset of it ({max_characters})."
108
+ )
109
+ clean_text = clean_text[:max_characters]
110
+
111
+ st.divider()
112
+ st.header("Podcast generation")
113
+ st.markdown(
114
+ "[Docs for this Step](https://mozilla-ai.github.io/document-to-podcast/step-by-step-guide/#step-3-audio-podcast-generation)"
115
+ )
116
+ st.divider()
117
+
118
+ st.subheader("Speaker configuration")
119
+ for s in DEFAULT_SPEAKERS:
120
+ s.pop("id", None)
121
+ speakers = st.data_editor(DEFAULT_SPEAKERS, num_rows="dynamic")
122
+
123
+ if st.button("Generate Podcast", on_click=gen_button_clicked):
124
+ for n, speaker in enumerate(speakers):
125
+ speaker["id"] = n + 1
126
+ speakers_str = "\n".join(
127
+ str(Speaker.model_validate(speaker))
128
+ for speaker in speakers
129
+ if all(
130
+ speaker.get(x, None) for x in ["name", "description", "voice_profile"]
131
+ )
132
+ )
133
+ system_prompt = DEFAULT_PROMPT.replace("{SPEAKERS}", speakers_str)
134
+ with st.spinner("Generating Podcast..."):
135
+ text = ""
136
+ for chunk in text_to_text_stream(
137
+ clean_text, text_model, system_prompt=system_prompt.strip()
138
+ ):
139
+ text += chunk
140
+ if text.endswith("\n") and "Speaker" in text:
141
+ st.session_state.script += text
142
+ st.write(text)
143
+
144
+ speaker_id = re.search(r"Speaker (\d+)", text).group(1)
145
+ voice_profile = next(
146
+ speaker["voice_profile"]
147
+ for speaker in speakers
148
+ if speaker["id"] == int(speaker_id)
149
+ )
150
+ with st.spinner("Generating Audio..."):
151
+ speech = text_to_speech(
152
+ text.split(f'"Speaker {speaker_id}":')[-1],
153
+ model=speech_model,
154
+ voice_profile=voice_profile,
155
+ )
156
+ st.audio(speech, sample_rate=speech_model.sample_rate)
157
+
158
+ st.session_state.audio.append(speech)
159
+ text = ""
160
+
161
+ if st.session_state[gen_button]:
162
+ if st.button("Save Podcast to audio file"):
163
+ st.session_state.audio = np.concatenate(st.session_state.audio)
164
+ sf.write(
165
+ "podcast.wav",
166
+ st.session_state.audio,
167
+ samplerate=speech_model.sample_rate,
168
+ )
169
+ st.markdown("Podcast saved to disk!")
170
+
171
+ if st.button("Save Podcast script to text file"):
172
+ with open("script.txt", "w") as f:
173
+ st.session_state.script += "}"
174
+ f.write(st.session_state.script)
175
+
176
+ st.markdown("Script saved to disk!")
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ document-to-podcast