Spaces:
Build error
Build error
Resolving merge conflicts, cleaning out repo file history of large binary files
Browse files- README.md +23 -21
- app.py +1 -1
- notebooks/parser_function_html.ipynb +91 -0
- tests/data/test_predict.pt +0 -0
- tests/data/test_predict.pt.REMOVED.git-id +0 -1
- tests/data/test_predict.txt +3 -0
- tests/test_predict.py +7 -5
README.md
CHANGED
@@ -1,27 +1,29 @@
|
|
1 |
-
|
2 |
-
|
3 |
|
4 |
-
|
5 |
-
Audiobook Gen is a tool that allows the users to generate an audio file of text (e.g. audiobook), read in the voice of the user's choice. This tool is based on the Silero text-to-speech toolkit and uses Streamlit to deliver the application.
|
6 |
|
7 |
-
##
|
8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
-
|
|
|
|
|
|
|
11 |
|
12 |
-
|
13 |
-
1. Upload the book file to be converted.
|
14 |
-
2. Select the desired voice for the audiobook.
|
15 |
-
3. Click to run!
|
16 |
|
17 |
-
|
18 |
-
- silero
|
19 |
-
- streamlit
|
20 |
-
- ebooklib
|
21 |
-
- PyPDF2
|
22 |
-
- bs4
|
23 |
-
- nltk
|
24 |
-
- stqdm
|
25 |
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
|
|
|
1 |
+
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://github.com/mkutarna/audiobook_gen/blob/master/LICENSE)
|
2 |
+
![GitHub Workflow Status](https://img.shields.io/github/workflow/status/mkutarna/audiobook_gen/Python%20application)
|
3 |
|
4 |
+
# Audiobook Gen
|
|
|
5 |
|
6 |
+
## About
|
7 |
+
Audiobook Gen converts text to audiobook format. It allows you to choose which voice you want to listen to.
|
8 |
+
|
9 |
+
- Do you want to listen to a book that isn't available on Audible?
|
10 |
+
- Have you tried an audiobook from LibriVox and found the audio quality lacking?
|
11 |
+
- Don't have time to sit and read a document, but would prefer to listen to it?
|
12 |
+
|
13 |
+
You can input various text formats (`txt`, `pdf`, `epub` - more options in development) and output a `zip` archive of audio files (`wav`). This is an open-source tool based on the [Silero text-to-speech toolkit](https://github.com/snakers4/silero-models) and uses Streamlit to deliver the application.
|
14 |
|
15 |
+
# Demo
|
16 |
+
|
17 |
+
## HuggingFace Space
|
18 |
+
A demonstration of this tool is hosted at HuggingFace Spaces - see [Audiobook_Gen](https://huggingface.co/spaces/mkutarna/audiobook_gen).
|
19 |
|
20 |
+
<img style="border:1px solid grey" src="resources/audiobook_gen.png" alt="Screenshot"/>
|
|
|
|
|
|
|
21 |
|
22 |
+
# Future
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
+
Here is a list features in development and planned for the future:
|
25 |
+
- `html` file import
|
26 |
+
- `mobi`, `azw` ebook input
|
27 |
+
- optional audio output formats (for better compression)
|
28 |
+
- improved audio file output handling
|
29 |
+
- Docker image for local use
|
app.py
CHANGED
@@ -49,7 +49,7 @@ if st.button('Click to run!'):
|
|
49 |
st.success('Reading file complete!')
|
50 |
|
51 |
with st.spinner('Generating audio...'):
|
52 |
-
|
53 |
st.success('Audio generation complete!')
|
54 |
|
55 |
with st.spinner('Building zip file...'):
|
|
|
49 |
st.success('Reading file complete!')
|
50 |
|
51 |
with st.spinner('Generating audio...'):
|
52 |
+
predict.generate_audio(text, file_title, model, config.SPEAKER_LIST.get(speaker))
|
53 |
st.success('Audio generation complete!')
|
54 |
|
55 |
with st.spinner('Building zip file...'):
|
notebooks/parser_function_html.ipynb
CHANGED
@@ -2,6 +2,7 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
|
|
5 |
"execution_count": 1,
|
6 |
"id": "27a75ece",
|
7 |
"metadata": {},
|
@@ -13,6 +14,9 @@
|
|
13 |
{
|
14 |
"cell_type": "code",
|
15 |
"execution_count": null,
|
|
|
|
|
|
|
16 |
"id": "5292a160",
|
17 |
"metadata": {},
|
18 |
"outputs": [],
|
@@ -27,18 +31,30 @@
|
|
27 |
},
|
28 |
{
|
29 |
"cell_type": "code",
|
|
|
30 |
"execution_count": null,
|
|
|
|
|
|
|
31 |
"id": "68609a77",
|
32 |
"metadata": {},
|
33 |
"outputs": [],
|
34 |
"source": [
|
35 |
"# file_path = '1232-h.htm'\n",
|
|
|
36 |
"file_path = 'test.htm'"
|
|
|
|
|
|
|
37 |
]
|
38 |
},
|
39 |
{
|
40 |
"cell_type": "code",
|
|
|
41 |
"execution_count": null,
|
|
|
|
|
|
|
42 |
"id": "5c526c9b",
|
43 |
"metadata": {},
|
44 |
"outputs": [],
|
@@ -49,7 +65,11 @@
|
|
49 |
},
|
50 |
{
|
51 |
"cell_type": "code",
|
|
|
52 |
"execution_count": null,
|
|
|
|
|
|
|
53 |
"id": "d4732304",
|
54 |
"metadata": {},
|
55 |
"outputs": [],
|
@@ -102,30 +122,86 @@
|
|
102 |
},
|
103 |
{
|
104 |
"cell_type": "code",
|
|
|
105 |
"execution_count": null,
|
106 |
"id": "ece1c7d3",
|
107 |
"metadata": {},
|
108 |
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
"source": [
|
110 |
"np.shape(corpus)"
|
111 |
]
|
112 |
},
|
113 |
{
|
114 |
"cell_type": "code",
|
|
|
115 |
"execution_count": null,
|
116 |
"id": "dc7e4010",
|
117 |
"metadata": {},
|
118 |
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
"source": [
|
120 |
"corpus[0][2]"
|
121 |
]
|
122 |
},
|
123 |
{
|
124 |
"cell_type": "code",
|
|
|
125 |
"execution_count": null,
|
126 |
"id": "6cb47a2d",
|
127 |
"metadata": {},
|
128 |
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
"source": [
|
130 |
"corpus"
|
131 |
]
|
@@ -133,6 +209,17 @@
|
|
133 |
{
|
134 |
"cell_type": "code",
|
135 |
"execution_count": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
"id": "d11031c7",
|
137 |
"metadata": {},
|
138 |
"outputs": [],
|
@@ -367,7 +454,11 @@
|
|
367 |
],
|
368 |
"metadata": {
|
369 |
"kernelspec": {
|
|
|
370 |
"display_name": "Python 3",
|
|
|
|
|
|
|
371 |
"language": "python",
|
372 |
"name": "python3"
|
373 |
},
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
<<<<<<< HEAD
|
6 |
"execution_count": 1,
|
7 |
"id": "27a75ece",
|
8 |
"metadata": {},
|
|
|
14 |
{
|
15 |
"cell_type": "code",
|
16 |
"execution_count": null,
|
17 |
+
=======
|
18 |
+
"execution_count": 2,
|
19 |
+
>>>>>>> appdev
|
20 |
"id": "5292a160",
|
21 |
"metadata": {},
|
22 |
"outputs": [],
|
|
|
31 |
},
|
32 |
{
|
33 |
"cell_type": "code",
|
34 |
+
<<<<<<< HEAD
|
35 |
"execution_count": null,
|
36 |
+
=======
|
37 |
+
"execution_count": 3,
|
38 |
+
>>>>>>> appdev
|
39 |
"id": "68609a77",
|
40 |
"metadata": {},
|
41 |
"outputs": [],
|
42 |
"source": [
|
43 |
"# file_path = '1232-h.htm'\n",
|
44 |
+
<<<<<<< HEAD
|
45 |
"file_path = 'test.htm'"
|
46 |
+
=======
|
47 |
+
"file_path = ''"
|
48 |
+
>>>>>>> appdev
|
49 |
]
|
50 |
},
|
51 |
{
|
52 |
"cell_type": "code",
|
53 |
+
<<<<<<< HEAD
|
54 |
"execution_count": null,
|
55 |
+
=======
|
56 |
+
"execution_count": 4,
|
57 |
+
>>>>>>> appdev
|
58 |
"id": "5c526c9b",
|
59 |
"metadata": {},
|
60 |
"outputs": [],
|
|
|
65 |
},
|
66 |
{
|
67 |
"cell_type": "code",
|
68 |
+
<<<<<<< HEAD
|
69 |
"execution_count": null,
|
70 |
+
=======
|
71 |
+
"execution_count": 5,
|
72 |
+
>>>>>>> appdev
|
73 |
"id": "d4732304",
|
74 |
"metadata": {},
|
75 |
"outputs": [],
|
|
|
122 |
},
|
123 |
{
|
124 |
"cell_type": "code",
|
125 |
+
<<<<<<< HEAD
|
126 |
"execution_count": null,
|
127 |
"id": "ece1c7d3",
|
128 |
"metadata": {},
|
129 |
"outputs": [],
|
130 |
+
=======
|
131 |
+
"execution_count": 11,
|
132 |
+
"id": "ece1c7d3",
|
133 |
+
"metadata": {},
|
134 |
+
"outputs": [
|
135 |
+
{
|
136 |
+
"data": {
|
137 |
+
"text/plain": [
|
138 |
+
"(1, 2)"
|
139 |
+
]
|
140 |
+
},
|
141 |
+
"execution_count": 11,
|
142 |
+
"metadata": {},
|
143 |
+
"output_type": "execute_result"
|
144 |
+
}
|
145 |
+
],
|
146 |
+
>>>>>>> appdev
|
147 |
"source": [
|
148 |
"np.shape(corpus)"
|
149 |
]
|
150 |
},
|
151 |
{
|
152 |
"cell_type": "code",
|
153 |
+
<<<<<<< HEAD
|
154 |
"execution_count": null,
|
155 |
"id": "dc7e4010",
|
156 |
"metadata": {},
|
157 |
"outputs": [],
|
158 |
+
=======
|
159 |
+
"execution_count": 12,
|
160 |
+
"id": "dc7e4010",
|
161 |
+
"metadata": {},
|
162 |
+
"outputs": [
|
163 |
+
{
|
164 |
+
"ename": "IndexError",
|
165 |
+
"evalue": "list index out of range",
|
166 |
+
"output_type": "error",
|
167 |
+
"traceback": [
|
168 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
169 |
+
"\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)",
|
170 |
+
"Cell \u001b[0;32mIn [12], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mcorpus\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m2\u001b[39;49m\u001b[43m]\u001b[49m\n",
|
171 |
+
"\u001b[0;31mIndexError\u001b[0m: list index out of range"
|
172 |
+
]
|
173 |
+
}
|
174 |
+
],
|
175 |
+
>>>>>>> appdev
|
176 |
"source": [
|
177 |
"corpus[0][2]"
|
178 |
]
|
179 |
},
|
180 |
{
|
181 |
"cell_type": "code",
|
182 |
+
<<<<<<< HEAD
|
183 |
"execution_count": null,
|
184 |
"id": "6cb47a2d",
|
185 |
"metadata": {},
|
186 |
"outputs": [],
|
187 |
+
=======
|
188 |
+
"execution_count": 13,
|
189 |
+
"id": "6cb47a2d",
|
190 |
+
"metadata": {},
|
191 |
+
"outputs": [
|
192 |
+
{
|
193 |
+
"data": {
|
194 |
+
"text/plain": [
|
195 |
+
"[['Predict Testing Text File',\n",
|
196 |
+
" 'Audiobook Gen is a tool that allows the users to generate an audio file from an ebook or other document.']]"
|
197 |
+
]
|
198 |
+
},
|
199 |
+
"execution_count": 13,
|
200 |
+
"metadata": {},
|
201 |
+
"output_type": "execute_result"
|
202 |
+
}
|
203 |
+
],
|
204 |
+
>>>>>>> appdev
|
205 |
"source": [
|
206 |
"corpus"
|
207 |
]
|
|
|
209 |
{
|
210 |
"cell_type": "code",
|
211 |
"execution_count": null,
|
212 |
+
<<<<<<< HEAD
|
213 |
+
=======
|
214 |
+
"id": "8508b073",
|
215 |
+
"metadata": {},
|
216 |
+
"outputs": [],
|
217 |
+
"source": []
|
218 |
+
},
|
219 |
+
{
|
220 |
+
"cell_type": "code",
|
221 |
+
"execution_count": null,
|
222 |
+
>>>>>>> appdev
|
223 |
"id": "d11031c7",
|
224 |
"metadata": {},
|
225 |
"outputs": [],
|
|
|
454 |
],
|
455 |
"metadata": {
|
456 |
"kernelspec": {
|
457 |
+
<<<<<<< HEAD
|
458 |
"display_name": "Python 3",
|
459 |
+
=======
|
460 |
+
"display_name": "Python 3 (ipykernel)",
|
461 |
+
>>>>>>> appdev
|
462 |
"language": "python",
|
463 |
"name": "python3"
|
464 |
},
|
tests/data/test_predict.pt
ADDED
Binary file (680 kB). View file
|
|
tests/data/test_predict.pt.REMOVED.git-id
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
84cf0cd8d8bede5ff60d18475d71e26543d5d7ad
|
|
|
|
tests/data/test_predict.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
Predict Testing Text File
|
2 |
+
|
3 |
+
Audiobook Gen is a tool that allows the users to generate an audio file from an ebook or other document.
|
tests/test_predict.py
CHANGED
@@ -52,12 +52,14 @@ def test_predict():
|
|
52 |
tensor_path = test_config.data_path / "test_predict.pt"
|
53 |
test_tensor = torch.load(tensor_path)
|
54 |
|
55 |
-
|
56 |
-
|
|
|
|
|
57 |
section_index = 'part001'
|
58 |
-
speaker = '
|
59 |
|
60 |
-
audio_list, _ = predict.predict(
|
61 |
audio_tensor = torch.cat(audio_list).reshape(1, -1)
|
62 |
|
63 |
-
torch.testing.assert_close(audio_tensor, test_tensor, atol=1e-3, rtol=0.
|
|
|
52 |
tensor_path = test_config.data_path / "test_predict.pt"
|
53 |
test_tensor = torch.load(tensor_path)
|
54 |
|
55 |
+
text_path = test_config.data_path / "test_predict.txt"
|
56 |
+
with open(text_path, 'r') as file:
|
57 |
+
text = file_readers.preprocess_text(file)
|
58 |
+
title = 'test_predict'
|
59 |
section_index = 'part001'
|
60 |
+
speaker = 'en_0'
|
61 |
|
62 |
+
audio_list, _ = predict.predict(text, section_index, title, model, speaker)
|
63 |
audio_tensor = torch.cat(audio_list).reshape(1, -1)
|
64 |
|
65 |
+
torch.testing.assert_close(audio_tensor, test_tensor, atol=1e-3, rtol=0.9)
|