mkutarna commited on
Commit
fd88500
2 Parent(s): ef79b65 602a617

Resolving merge conflicts, cleaning out repo file history of large binary files

Browse files
README.md CHANGED
@@ -1,27 +1,29 @@
1
- Audiobook Gen
2
- =============
3
 
4
- ## Description
5
- Audiobook Gen is a tool that allows the users to generate an audio file of text (e.g. audiobook), read in the voice of the user's choice. This tool is based on the Silero text-to-speech toolkit and uses Streamlit to deliver the application.
6
 
7
- ## Demo
8
- A demonstration of this tool is hosted at HuggingFace Spaces - see [Audiobook_Gen](https://huggingface.co/spaces/mkutarna/audiobook_gen).
 
 
 
 
 
 
9
 
10
- ![Demo Screenshot](https://github.com/mkutarna/audiobook_gen/blob/appdev/resources/audiobook_gen.png "Screenshot")
 
 
 
11
 
12
- #### Instructions
13
- 1. Upload the book file to be converted.
14
- 2. Select the desired voice for the audiobook.
15
- 3. Click to run!
16
 
17
- ## Dependencies
18
- - silero
19
- - streamlit
20
- - ebooklib
21
- - PyPDF2
22
- - bs4
23
- - nltk
24
- - stqdm
25
 
26
- ## License
27
- See [LICENSE](https://github.com/mkutarna/audiobook_gen/blob/master/LICENSE)
 
 
 
 
 
1
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://github.com/mkutarna/audiobook_gen/blob/master/LICENSE)
2
+ ![GitHub Workflow Status](https://img.shields.io/github/workflow/status/mkutarna/audiobook_gen/Python%20application)
3
 
4
+ # Audiobook Gen
 
5
 
6
+ ## About
7
+ Audiobook Gen converts text to audiobook format. It allows you to choose which voice you want to listen to.
8
+
9
+ - Do you want to listen to a book that isn't available on Audible?
10
+ - Have you tried an audiobook from LibriVox and found the audio quality lacking?
11
+ - Don't have time to sit and read a document, but would prefer to listen to it?
12
+
13
+ You can input various text formats (`txt`, `pdf`, `epub` - more options in development) and output a `zip` archive of audio files (`wav`). This is an open-source tool based on the [Silero text-to-speech toolkit](https://github.com/snakers4/silero-models) and uses Streamlit to deliver the application.
14
 
15
+ # Demo
16
+
17
+ ## HuggingFace Space
18
+ A demonstration of this tool is hosted at HuggingFace Spaces - see [Audiobook_Gen](https://huggingface.co/spaces/mkutarna/audiobook_gen).
19
 
20
+ <img style="border:1px solid grey" src="resources/audiobook_gen.png" alt="Screenshot"/>
 
 
 
21
 
22
+ # Future
 
 
 
 
 
 
 
23
 
24
+ Here is a list features in development and planned for the future:
25
+ - `html` file import
26
+ - `mobi`, `azw` ebook input
27
+ - optional audio output formats (for better compression)
28
+ - improved audio file output handling
29
+ - Docker image for local use
app.py CHANGED
@@ -49,7 +49,7 @@ if st.button('Click to run!'):
49
  st.success('Reading file complete!')
50
 
51
  with st.spinner('Generating audio...'):
52
- output.generate_audio(text, file_title, model, config.SPEAKER_LIST.get(speaker))
53
  st.success('Audio generation complete!')
54
 
55
  with st.spinner('Building zip file...'):
 
49
  st.success('Reading file complete!')
50
 
51
  with st.spinner('Generating audio...'):
52
+ predict.generate_audio(text, file_title, model, config.SPEAKER_LIST.get(speaker))
53
  st.success('Audio generation complete!')
54
 
55
  with st.spinner('Building zip file...'):
notebooks/parser_function_html.ipynb CHANGED
@@ -2,6 +2,7 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
 
5
  "execution_count": 1,
6
  "id": "27a75ece",
7
  "metadata": {},
@@ -13,6 +14,9 @@
13
  {
14
  "cell_type": "code",
15
  "execution_count": null,
 
 
 
16
  "id": "5292a160",
17
  "metadata": {},
18
  "outputs": [],
@@ -27,18 +31,30 @@
27
  },
28
  {
29
  "cell_type": "code",
 
30
  "execution_count": null,
 
 
 
31
  "id": "68609a77",
32
  "metadata": {},
33
  "outputs": [],
34
  "source": [
35
  "# file_path = '1232-h.htm'\n",
 
36
  "file_path = 'test.htm'"
 
 
 
37
  ]
38
  },
39
  {
40
  "cell_type": "code",
 
41
  "execution_count": null,
 
 
 
42
  "id": "5c526c9b",
43
  "metadata": {},
44
  "outputs": [],
@@ -49,7 +65,11 @@
49
  },
50
  {
51
  "cell_type": "code",
 
52
  "execution_count": null,
 
 
 
53
  "id": "d4732304",
54
  "metadata": {},
55
  "outputs": [],
@@ -102,30 +122,86 @@
102
  },
103
  {
104
  "cell_type": "code",
 
105
  "execution_count": null,
106
  "id": "ece1c7d3",
107
  "metadata": {},
108
  "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  "source": [
110
  "np.shape(corpus)"
111
  ]
112
  },
113
  {
114
  "cell_type": "code",
 
115
  "execution_count": null,
116
  "id": "dc7e4010",
117
  "metadata": {},
118
  "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  "source": [
120
  "corpus[0][2]"
121
  ]
122
  },
123
  {
124
  "cell_type": "code",
 
125
  "execution_count": null,
126
  "id": "6cb47a2d",
127
  "metadata": {},
128
  "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  "source": [
130
  "corpus"
131
  ]
@@ -133,6 +209,17 @@
133
  {
134
  "cell_type": "code",
135
  "execution_count": null,
 
 
 
 
 
 
 
 
 
 
 
136
  "id": "d11031c7",
137
  "metadata": {},
138
  "outputs": [],
@@ -367,7 +454,11 @@
367
  ],
368
  "metadata": {
369
  "kernelspec": {
 
370
  "display_name": "Python 3",
 
 
 
371
  "language": "python",
372
  "name": "python3"
373
  },
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ <<<<<<< HEAD
6
  "execution_count": 1,
7
  "id": "27a75ece",
8
  "metadata": {},
 
14
  {
15
  "cell_type": "code",
16
  "execution_count": null,
17
+ =======
18
+ "execution_count": 2,
19
+ >>>>>>> appdev
20
  "id": "5292a160",
21
  "metadata": {},
22
  "outputs": [],
 
31
  },
32
  {
33
  "cell_type": "code",
34
+ <<<<<<< HEAD
35
  "execution_count": null,
36
+ =======
37
+ "execution_count": 3,
38
+ >>>>>>> appdev
39
  "id": "68609a77",
40
  "metadata": {},
41
  "outputs": [],
42
  "source": [
43
  "# file_path = '1232-h.htm'\n",
44
+ <<<<<<< HEAD
45
  "file_path = 'test.htm'"
46
+ =======
47
+ "file_path = ''"
48
+ >>>>>>> appdev
49
  ]
50
  },
51
  {
52
  "cell_type": "code",
53
+ <<<<<<< HEAD
54
  "execution_count": null,
55
+ =======
56
+ "execution_count": 4,
57
+ >>>>>>> appdev
58
  "id": "5c526c9b",
59
  "metadata": {},
60
  "outputs": [],
 
65
  },
66
  {
67
  "cell_type": "code",
68
+ <<<<<<< HEAD
69
  "execution_count": null,
70
+ =======
71
+ "execution_count": 5,
72
+ >>>>>>> appdev
73
  "id": "d4732304",
74
  "metadata": {},
75
  "outputs": [],
 
122
  },
123
  {
124
  "cell_type": "code",
125
+ <<<<<<< HEAD
126
  "execution_count": null,
127
  "id": "ece1c7d3",
128
  "metadata": {},
129
  "outputs": [],
130
+ =======
131
+ "execution_count": 11,
132
+ "id": "ece1c7d3",
133
+ "metadata": {},
134
+ "outputs": [
135
+ {
136
+ "data": {
137
+ "text/plain": [
138
+ "(1, 2)"
139
+ ]
140
+ },
141
+ "execution_count": 11,
142
+ "metadata": {},
143
+ "output_type": "execute_result"
144
+ }
145
+ ],
146
+ >>>>>>> appdev
147
  "source": [
148
  "np.shape(corpus)"
149
  ]
150
  },
151
  {
152
  "cell_type": "code",
153
+ <<<<<<< HEAD
154
  "execution_count": null,
155
  "id": "dc7e4010",
156
  "metadata": {},
157
  "outputs": [],
158
+ =======
159
+ "execution_count": 12,
160
+ "id": "dc7e4010",
161
+ "metadata": {},
162
+ "outputs": [
163
+ {
164
+ "ename": "IndexError",
165
+ "evalue": "list index out of range",
166
+ "output_type": "error",
167
+ "traceback": [
168
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
169
+ "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)",
170
+ "Cell \u001b[0;32mIn [12], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mcorpus\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m2\u001b[39;49m\u001b[43m]\u001b[49m\n",
171
+ "\u001b[0;31mIndexError\u001b[0m: list index out of range"
172
+ ]
173
+ }
174
+ ],
175
+ >>>>>>> appdev
176
  "source": [
177
  "corpus[0][2]"
178
  ]
179
  },
180
  {
181
  "cell_type": "code",
182
+ <<<<<<< HEAD
183
  "execution_count": null,
184
  "id": "6cb47a2d",
185
  "metadata": {},
186
  "outputs": [],
187
+ =======
188
+ "execution_count": 13,
189
+ "id": "6cb47a2d",
190
+ "metadata": {},
191
+ "outputs": [
192
+ {
193
+ "data": {
194
+ "text/plain": [
195
+ "[['Predict Testing Text File',\n",
196
+ " 'Audiobook Gen is a tool that allows the users to generate an audio file from an ebook or other document.']]"
197
+ ]
198
+ },
199
+ "execution_count": 13,
200
+ "metadata": {},
201
+ "output_type": "execute_result"
202
+ }
203
+ ],
204
+ >>>>>>> appdev
205
  "source": [
206
  "corpus"
207
  ]
 
209
  {
210
  "cell_type": "code",
211
  "execution_count": null,
212
+ <<<<<<< HEAD
213
+ =======
214
+ "id": "8508b073",
215
+ "metadata": {},
216
+ "outputs": [],
217
+ "source": []
218
+ },
219
+ {
220
+ "cell_type": "code",
221
+ "execution_count": null,
222
+ >>>>>>> appdev
223
  "id": "d11031c7",
224
  "metadata": {},
225
  "outputs": [],
 
454
  ],
455
  "metadata": {
456
  "kernelspec": {
457
+ <<<<<<< HEAD
458
  "display_name": "Python 3",
459
+ =======
460
+ "display_name": "Python 3 (ipykernel)",
461
+ >>>>>>> appdev
462
  "language": "python",
463
  "name": "python3"
464
  },
tests/data/test_predict.pt ADDED
Binary file (680 kB). View file
 
tests/data/test_predict.pt.REMOVED.git-id DELETED
@@ -1 +0,0 @@
1
- 84cf0cd8d8bede5ff60d18475d71e26543d5d7ad
 
 
tests/data/test_predict.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ Predict Testing Text File
2
+
3
+ Audiobook Gen is a tool that allows the users to generate an audio file from an ebook or other document.
tests/test_predict.py CHANGED
@@ -52,12 +52,14 @@ def test_predict():
52
  tensor_path = test_config.data_path / "test_predict.pt"
53
  test_tensor = torch.load(tensor_path)
54
 
55
- ebook_path = test_config.data_path / "test.epub"
56
- corpus, title = file_readers.read_epub(ebook_path)
 
 
57
  section_index = 'part001'
58
- speaker = 'en_110'
59
 
60
- audio_list, _ = predict.predict(corpus[1], section_index, title, model, speaker)
61
  audio_tensor = torch.cat(audio_list).reshape(1, -1)
62
 
63
- torch.testing.assert_close(audio_tensor, test_tensor, atol=1e-3, rtol=0.2)
 
52
  tensor_path = test_config.data_path / "test_predict.pt"
53
  test_tensor = torch.load(tensor_path)
54
 
55
+ text_path = test_config.data_path / "test_predict.txt"
56
+ with open(text_path, 'r') as file:
57
+ text = file_readers.preprocess_text(file)
58
+ title = 'test_predict'
59
  section_index = 'part001'
60
+ speaker = 'en_0'
61
 
62
+ audio_list, _ = predict.predict(text, section_index, title, model, speaker)
63
  audio_tensor = torch.cat(audio_list).reshape(1, -1)
64
 
65
+ torch.testing.assert_close(audio_tensor, test_tensor, atol=1e-3, rtol=0.9)