mkutarna commited on
Commit
f5dff19
·
2 Parent(s): 3b480a0 fd88500

Merged repo changes to main branch

Browse files
.coveragerc ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # .coveragerc for audiobook_gen
3
+
4
+ [run]
5
+ # data_file = put a coverage file name here!!!
6
+
7
+ [report]
8
+ # Regexes for lines to exclude from consideration
9
+ exclude_lines =
10
+ # Have to re-enable the standard pragma
11
+ pragma: no cover
12
+ omit =
13
+ tests/*
14
+ notesbooks/*
15
+
16
+ [html]
17
+ # directory = put a directory here!!!!
18
+
19
+ [tool:pytest]
20
+ addopts = --cov=audiobook_gen --cov-report html
.flake8 ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ [flake8]
2
+ max-line-length = 120
3
+ max-complexity = 18
4
+ exclude =
5
+ notebooks/*
6
+ */.ipynb_checkpoints/*
.github/workflows/check_file_size.yml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Check file size
2
+ on: # or directly `on: [push]` to run the action on every push on any branch
3
+ pull_request:
4
+ branches: [master]
5
+
6
+ # to run this workflow manually from the Actions tab
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ sync-to-hub:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - name: Check large files
14
+ uses: ActionsDesk/lfs-warning@v2.0
15
+ with:
16
+ filesizelimit: 10485760 # this is 10MB so we can sync to HF Spaces
.github/workflows/python-app.yml ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This workflow will install Python dependencies, run tests and lint with a single version of Python
2
+ # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
3
+
4
+ name: Python application
5
+
6
+ on:
7
+ push:
8
+ branches: [ master ]
9
+ pull_request:
10
+ branches: [ master ]
11
+
12
+ permissions:
13
+ contents: read
14
+
15
+ jobs:
16
+ build:
17
+
18
+ runs-on: ubuntu-latest
19
+
20
+ steps:
21
+ - uses: actions/checkout@v3
22
+ - name: Set up Python 3.9.12
23
+ uses: actions/setup-python@v3
24
+ with:
25
+ python-version: "3.9.12"
26
+ - name: Install dependencies
27
+ run: |
28
+ python -m pip install --upgrade pip
29
+ pip install flake8 pytest==7.1.3 pytest-cov==3.0.0
30
+ if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
31
+ - name: Lint with flake8
32
+ run: |
33
+ # stop the build if there are Python syntax errors or undefined names
34
+ flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
35
+ # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
36
+ flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
37
+ - name: Test and coverage with pytest and pytest-cov
38
+ run: |
39
+ pytest --cov-config=.coveragerc --cov=audiobook_gen tests/
.github/workflows/sync_to_huggingface_space.yml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Sync to Hugging Face hub
2
+ on:
3
+ push:
4
+ branches: [master]
5
+
6
+ # to run this workflow manually from the Actions tab
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ sync-to-hub:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v2
14
+ with:
15
+ fetch-depth: 0
16
+ - name: Push to hub
17
+ env:
18
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
19
+ run: git push --force https://mkutarna:$HF_TOKEN@huggingface.co/spaces/mkutarna/audiobook_gen master
.gitignore CHANGED
@@ -9,7 +9,7 @@ docs/
9
  conda/
10
  tmp/
11
  notebooks/outputs/
12
- tests/__pycache__
13
  tests/.pytest_cache
14
 
15
  tags
 
9
  conda/
10
  tmp/
11
  notebooks/outputs/
12
+ tests/__pycache__/
13
  tests/.pytest_cache
14
 
15
  tags
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2022 Matthew Kutarna / github.com/mkutarna/audiobook_gen
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -10,7 +10,29 @@ pinned: true
10
  license: mit
11
  ---
12
 
13
- Audiobook Gen
14
- =============
15
 
16
- Audiobook Gen is a tool that allows the users to generate an audio file of text (e.g. audiobook), read in the voice of the user's choice. It will take in 3 inputs: the desired text for audio generation, as well as a pair of text / audio files for the desired voice.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  license: mit
11
  ---
12
 
13
+ # Audiobook Gen
 
14
 
15
+ ## About
16
+ Audiobook Gen converts text to audiobook format. It allows you to choose which voice you want to listen to.
17
+
18
+ - Do you want to listen to a book that isn't available on Audible?
19
+ - Have you tried an audiobook from LibriVox and found the audio quality lacking?
20
+ - Don't have time to sit and read a document, but would prefer to listen to it?
21
+
22
+ You can input various text formats (`txt`, `pdf`, `epub` - more options in development) and output a `zip` archive of audio files (`wav`). This is an open-source tool based on the [Silero text-to-speech toolkit](https://github.com/snakers4/silero-models) and uses Streamlit to deliver the application.
23
+
24
+ # Demo
25
+
26
+ ## HuggingFace Space
27
+ A demonstration of this tool is hosted at HuggingFace Spaces - see [Audiobook_Gen](https://huggingface.co/spaces/mkutarna/audiobook_gen).
28
+
29
+ <img style="border:1px solid grey" src="resources/audiobook_gen.png" alt="Screenshot"/>
30
+
31
+ # Future
32
+
33
+ Here is a list features in development and planned for the future:
34
+ - `html` file import
35
+ - `mobi`, `azw` ebook input
36
+ - optional audio output formats (for better compression)
37
+ - improved audio file output handling
38
+ - Docker image for local use
app.py CHANGED
@@ -1,31 +1,70 @@
 
 
1
  import streamlit as st
2
 
3
- from src.parser import read_epub, read_txt
4
- from src.predict import audiobook_gen, load_models
5
- from src.output import assemble_zip
 
 
 
 
6
 
7
  st.title('Audiobook Generation Tool')
8
- st.markdown("This tool generates audiobook files from an imported ebook file.")
9
 
10
- with st.sidebar:
11
- ebook_upload = st.file_uploader(
12
- label = "Upload the target ebook (.epub only)",
13
- type = ['epub'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
 
15
  if st.button('Click to run!'):
16
- ebook, title = read_epub(ebook_upload)
17
- model = load_models()
18
- st.success('Parsing complete!')
 
 
 
 
 
 
 
 
 
19
 
20
  with st.spinner('Generating audio...'):
21
- audiobook_gen(ebook, title, model)
22
- st.success('TTS generation complete!')
23
 
24
  with st.spinner('Building zip file...'):
25
- zip_file = assemble_zip(title)
26
- title_name = f'{title}.zip'
27
  st.success('Zip file prepared!')
28
 
 
 
 
 
 
 
 
 
29
  with open(zip_file, "rb") as fp:
30
  btn = st.download_button(
31
  label="Download Audiobook",
 
1
+ import logging
2
+
3
  import streamlit as st
4
 
5
+ from src import file_readers, predict, output, config
6
+
7
+ logging.basicConfig(filename='app.log',
8
+ filemode='w',
9
+ format='%(name)s - %(levelname)s - %(message)s',
10
+ level=logging.INFO,
11
+ force=True)
12
 
13
  st.title('Audiobook Generation Tool')
 
14
 
15
+ text_file = open(config.INSTRUCTIONS, "r")
16
+ readme_text = text_file.read()
17
+ text_file.close()
18
+ st.markdown(readme_text)
19
+
20
+ st.header('1. Upload your document')
21
+ uploaded_file = st.file_uploader(
22
+ label="File types accepted: epub, txt, pdf)",
23
+ type=['epub', 'txt', 'pdf'])
24
+
25
+ model = predict.load_model()
26
+
27
+ st.header('2. Please select voice')
28
+ speaker = st.radio('Available voices:', config.SPEAKER_LIST.keys(), horizontal=True)
29
+
30
+ audio_path = config.resource_path / f'speaker_{config.SPEAKER_LIST.get(speaker)}.wav'
31
+ audio_file = open(audio_path, 'rb')
32
+ audio_bytes = audio_file.read()
33
+
34
+ st.audio(audio_bytes, format='audio/ogg')
35
 
36
+ st.header('3. Run the app to generate audio')
37
  if st.button('Click to run!'):
38
+ file_ext = uploaded_file.type
39
+ file_title = uploaded_file.name
40
+ if file_ext == 'application/epub+zip':
41
+ text, file_title = file_readers.read_epub(uploaded_file)
42
+ elif file_ext == 'text/plain':
43
+ file = uploaded_file.read()
44
+ text = file_readers.preprocess_text(file)
45
+ elif file_ext == 'application/pdf':
46
+ text = file_readers.read_pdf(uploaded_file)
47
+ else:
48
+ st.warning('Invalid file type', icon="⚠️")
49
+ st.success('Reading file complete!')
50
 
51
  with st.spinner('Generating audio...'):
52
+ predict.generate_audio(text, file_title, model, config.SPEAKER_LIST.get(speaker))
53
+ st.success('Audio generation complete!')
54
 
55
  with st.spinner('Building zip file...'):
56
+ zip_file = output.assemble_zip(file_title)
57
+ title_name = f'{file_title}.zip'
58
  st.success('Zip file prepared!')
59
 
60
+ with open(zip_file, "rb") as fp:
61
+ btn = st.download_button(
62
+ label="Download Audiobook",
63
+ data=fp,
64
+ file_name=title_name,
65
+ mime="application/zip"
66
+ )
67
+
68
  with open(zip_file, "rb") as fp:
69
  btn = st.download_button(
70
  label="Download Audiobook",
models/latest_silero_models.yml ADDED
@@ -0,0 +1,563 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pre-trained STT models
2
+ stt_models:
3
+ en:
4
+ latest:
5
+ meta:
6
+ name: "en_v6"
7
+ sample: "https://models.silero.ai/examples/en_sample.wav"
8
+ labels: "https://models.silero.ai/models/en/en_v1_labels.json"
9
+ jit: "https://models.silero.ai/models/en/en_v6.jit"
10
+ onnx: "https://models.silero.ai/models/en/en_v5.onnx"
11
+ jit_q: "https://models.silero.ai/models/en/en_v6_q.jit"
12
+ jit_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.jit"
13
+ onnx_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.onnx"
14
+ v6:
15
+ meta:
16
+ name: "en_v6"
17
+ sample: "https://models.silero.ai/examples/en_sample.wav"
18
+ labels: "https://models.silero.ai/models/en/en_v1_labels.json"
19
+ jit: "https://models.silero.ai/models/en/en_v6.jit"
20
+ onnx: "https://models.silero.ai/models/en/en_v5.onnx"
21
+ jit_q: "https://models.silero.ai/models/en/en_v6_q.jit"
22
+ jit_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.jit"
23
+ onnx_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.onnx"
24
+ v5:
25
+ meta:
26
+ name: "en_v5"
27
+ sample: "https://models.silero.ai/examples/en_sample.wav"
28
+ labels: "https://models.silero.ai/models/en/en_v1_labels.json"
29
+ jit: "https://models.silero.ai/models/en/en_v5.jit"
30
+ onnx: "https://models.silero.ai/models/en/en_v5.onnx"
31
+ onnx_q: "https://models.silero.ai/models/en/en_v5_q.onnx"
32
+ jit_q: "https://models.silero.ai/models/en/en_v5_q.jit"
33
+ jit_xlarge: "https://models.silero.ai/models/en/en_v5_xlarge.jit"
34
+ onnx_xlarge: "https://models.silero.ai/models/en/en_v5_xlarge.onnx"
35
+ v4_0:
36
+ meta:
37
+ name: "en_v4_0"
38
+ sample: "https://models.silero.ai/examples/en_sample.wav"
39
+ labels: "https://models.silero.ai/models/en/en_v1_labels.json"
40
+ jit_large: "https://models.silero.ai/models/en/en_v4_0_jit_large.model"
41
+ onnx_large: "https://models.silero.ai/models/en/en_v4_0_large.onnx"
42
+ v3:
43
+ meta:
44
+ name: "en_v3"
45
+ sample: "https://models.silero.ai/examples/en_sample.wav"
46
+ labels: "https://models.silero.ai/models/en/en_v1_labels.json"
47
+ jit: "https://models.silero.ai/models/en/en_v3_jit.model"
48
+ onnx: "https://models.silero.ai/models/en/en_v3.onnx"
49
+ jit_q: "https://models.silero.ai/models/en/en_v3_jit_q.model"
50
+ jit_skip: "https://models.silero.ai/models/en/en_v3_jit_skips.model"
51
+ jit_large: "https://models.silero.ai/models/en/en_v3_jit_large.model"
52
+ onnx_large: "https://models.silero.ai/models/en/en_v3_large.onnx"
53
+ jit_xsmall: "https://models.silero.ai/models/en/en_v3_jit_xsmall.model"
54
+ jit_q_xsmall: "https://models.silero.ai/models/en/en_v3_jit_q_xsmall.model"
55
+ onnx_xsmall: "https://models.silero.ai/models/en/en_v3_xsmall.onnx"
56
+ v2:
57
+ meta:
58
+ name: "en_v2"
59
+ sample: "https://models.silero.ai/examples/en_sample.wav"
60
+ labels: "https://models.silero.ai/models/en/en_v1_labels.json"
61
+ jit: "https://models.silero.ai/models/en/en_v2_jit.model"
62
+ onnx: "https://models.silero.ai/models/en/en_v2.onnx"
63
+ tf: "https://models.silero.ai/models/en/en_v2_tf.tar.gz"
64
+ v1:
65
+ meta:
66
+ name: "en_v1"
67
+ sample: "https://models.silero.ai/examples/en_sample.wav"
68
+ labels: "https://models.silero.ai/models/en/en_v1_labels.json"
69
+ jit: "https://models.silero.ai/models/en/en_v1_jit.model"
70
+ onnx: "https://models.silero.ai/models/en/en_v1.onnx"
71
+ tf: "https://models.silero.ai/models/en/en_v1_tf.tar.gz"
72
+ de:
73
+ latest:
74
+ meta:
75
+ name: "de_v1"
76
+ sample: "https://models.silero.ai/examples/de_sample.wav"
77
+ labels: "https://models.silero.ai/models/de/de_v1_labels.json"
78
+ jit: "https://models.silero.ai/models/de/de_v1_jit.model"
79
+ onnx: "https://models.silero.ai/models/de/de_v1.onnx"
80
+ tf: "https://models.silero.ai/models/de/de_v1_tf.tar.gz"
81
+ v1:
82
+ meta:
83
+ name: "de_v1"
84
+ sample: "https://models.silero.ai/examples/de_sample.wav"
85
+ labels: "https://models.silero.ai/models/de/de_v1_labels.json"
86
+ jit_large: "https://models.silero.ai/models/de/de_v1_jit.model"
87
+ onnx: "https://models.silero.ai/models/de/de_v1.onnx"
88
+ tf: "https://models.silero.ai/models/de/de_v1_tf.tar.gz"
89
+ v3:
90
+ meta:
91
+ name: "de_v3"
92
+ sample: "https://models.silero.ai/examples/de_sample.wav"
93
+ labels: "https://models.silero.ai/models/de/de_v1_labels.json"
94
+ jit_large: "https://models.silero.ai/models/de/de_v3_large.jit"
95
+ v4:
96
+ meta:
97
+ name: "de_v4"
98
+ sample: "https://models.silero.ai/examples/de_sample.wav"
99
+ labels: "https://models.silero.ai/models/de/de_v1_labels.json"
100
+ jit_large: "https://models.silero.ai/models/de/de_v4_large.jit"
101
+ onnx_large: "https://models.silero.ai/models/de/de_v4_large.onnx"
102
+ es:
103
+ latest:
104
+ meta:
105
+ name: "es_v1"
106
+ sample: "https://models.silero.ai/examples/es_sample.wav"
107
+ labels: "https://models.silero.ai/models/es/es_v1_labels.json"
108
+ jit: "https://models.silero.ai/models/es/es_v1_jit.model"
109
+ onnx: "https://models.silero.ai/models/es/es_v1.onnx"
110
+ tf: "https://models.silero.ai/models/es/es_v1_tf.tar.gz"
111
+ ua:
112
+ latest:
113
+ meta:
114
+ name: "ua_v3"
115
+ sample: "https://models.silero.ai/examples/ua_sample.wav"
116
+ credits:
117
+ datasets:
118
+ speech-recognition-uk: https://github.com/egorsmkv/speech-recognition-uk
119
+ labels: "https://models.silero.ai/models/ua/ua_v1_labels.json"
120
+ jit: "https://models.silero.ai/models/ua/ua_v3_jit.model"
121
+ jit_q: "https://models.silero.ai/models/ua/ua_v3_jit_q.model"
122
+ onnx: "https://models.silero.ai/models/ua/ua_v3.onnx"
123
+ v3:
124
+ meta:
125
+ name: "ua_v3"
126
+ sample: "https://models.silero.ai/examples/ua_sample.wav"
127
+ credits:
128
+ datasets:
129
+ speech-recognition-uk: https://github.com/egorsmkv/speech-recognition-uk
130
+ labels: "https://models.silero.ai/models/ua/ua_v1_labels.json"
131
+ jit: "https://models.silero.ai/models/ua/ua_v3_jit.model"
132
+ jit_q: "https://models.silero.ai/models/ua/ua_v3_jit_q.model"
133
+ onnx: "https://models.silero.ai/models/ua/ua_v3.onnx"
134
+ v1:
135
+ meta:
136
+ name: "ua_v1"
137
+ sample: "https://models.silero.ai/examples/ua_sample.wav"
138
+ credits:
139
+ datasets:
140
+ speech-recognition-uk: https://github.com/egorsmkv/speech-recognition-uk
141
+ labels: "https://models.silero.ai/models/ua/ua_v1_labels.json"
142
+ jit: "https://models.silero.ai/models/ua/ua_v1_jit.model"
143
+ jit_q: "https://models.silero.ai/models/ua/ua_v1_jit_q.model"
144
+ tts_models:
145
+ ru:
146
+ v3_1_ru:
147
+ latest:
148
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
149
+ package: 'https://models.silero.ai/models/tts/ru/v3_1_ru.pt'
150
+ sample_rate: [8000, 24000, 48000]
151
+ ru_v3:
152
+ latest:
153
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
154
+ package: 'https://models.silero.ai/models/tts/ru/ru_v3.pt'
155
+ sample_rate: [8000, 24000, 48000]
156
+ aidar_v2:
157
+ latest:
158
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
159
+ package: 'https://models.silero.ai/models/tts/ru/v2_aidar.pt'
160
+ sample_rate: [8000, 16000]
161
+ aidar_8khz:
162
+ latest:
163
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
164
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
165
+ jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_8000.jit'
166
+ sample_rate: 8000
167
+ v1:
168
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
169
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
170
+ jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_8000.jit'
171
+ sample_rate: 8000
172
+ aidar_16khz:
173
+ latest:
174
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
175
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
176
+ jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_16000.jit'
177
+ sample_rate: 16000
178
+ v1:
179
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
180
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
181
+ jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_16000.jit'
182
+ sample_rate: 16000
183
+ baya_v2:
184
+ latest:
185
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
186
+ package: 'https://models.silero.ai/models/tts/ru/v2_baya.pt'
187
+ sample_rate: [8000, 16000]
188
+ baya_8khz:
189
+ latest:
190
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
191
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
192
+ jit: 'https://models.silero.ai/models/tts/ru/v1_baya_8000.jit'
193
+ sample_rate: 8000
194
+ v1:
195
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
196
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
197
+ jit: 'https://models.silero.ai/models/tts/ru/v1_baya_8000.jit'
198
+ sample_rate: 8000
199
+ baya_16khz:
200
+ latest:
201
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
202
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
203
+ jit: 'https://models.silero.ai/models/tts/ru/v1_baya_16000.jit'
204
+ sample_rate: 16000
205
+ v1:
206
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
207
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
208
+ jit: 'https://models.silero.ai/models/tts/ru/v1_baya_16000.jit'
209
+ sample_rate: 16000
210
+ irina_v2:
211
+ latest:
212
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
213
+ package: 'https://models.silero.ai/models/tts/ru/v2_irina.pt'
214
+ sample_rate: [8000, 16000]
215
+ irina_8khz:
216
+ latest:
217
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
218
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
219
+ jit: 'https://models.silero.ai/models/tts/ru/v1_irina_8000.jit'
220
+ sample_rate: 8000
221
+ v1:
222
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
223
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
224
+ jit: 'https://models.silero.ai/models/tts/ru/v1_irina_8000.jit'
225
+ sample_rate: 8000
226
+ irina_16khz:
227
+ latest:
228
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
229
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
230
+ jit: 'https://models.silero.ai/models/tts/ru/v1_irina_16000.jit'
231
+ sample_rate: 16000
232
+ v1:
233
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
234
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
235
+ jit: 'https://models.silero.ai/models/tts/ru/v1_irina_16000.jit'
236
+ sample_rate: 16000
237
+ kseniya_v2:
238
+ latest:
239
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
240
+ package: 'https://models.silero.ai/models/tts/ru/v2_kseniya.pt'
241
+ sample_rate: [8000, 16000]
242
+ kseniya_8khz:
243
+ latest:
244
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
245
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
246
+ jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_8000.jit'
247
+ sample_rate: 8000
248
+ v1:
249
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
250
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
251
+ jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_8000.jit'
252
+ sample_rate: 8000
253
+ kseniya_16khz:
254
+ latest:
255
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
256
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
257
+ jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_16000.jit'
258
+ sample_rate: 16000
259
+ v1:
260
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
261
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
262
+ jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_16000.jit'
263
+ sample_rate: 16000
264
+ natasha_v2:
265
+ latest:
266
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
267
+ package: 'https://models.silero.ai/models/tts/ru/v2_natasha.pt'
268
+ sample_rate: [8000, 16000]
269
+ natasha_8khz:
270
+ latest:
271
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
272
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
273
+ jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_8000.jit'
274
+ sample_rate: 8000
275
+ v1:
276
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
277
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
278
+ jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_8000.jit'
279
+ sample_rate: 8000
280
+ natasha_16khz:
281
+ latest:
282
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
283
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
284
+ jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_16000.jit'
285
+ sample_rate: 16000
286
+ v1:
287
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
288
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
289
+ jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_16000.jit'
290
+ sample_rate: 16000
291
+ ruslan_v2:
292
+ latest:
293
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
294
+ package: 'https://models.silero.ai/models/tts/ru/v2_ruslan.pt'
295
+ sample_rate: [8000, 16000]
296
+ ruslan_8khz:
297
+ latest:
298
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
299
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
300
+ jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_8000.jit'
301
+ sample_rate: 8000
302
+ v1:
303
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
304
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
305
+ jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_8000.jit'
306
+ sample_rate: 8000
307
+ ruslan_16khz:
308
+ latest:
309
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
310
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
311
+ jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_16000.jit'
312
+ sample_rate: 16000
313
+ v1:
314
+ tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
315
+ example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
316
+ jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_16000.jit'
317
+ sample_rate: 16000
318
+ en:
319
+ v3_en:
320
+ latest:
321
+ example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
322
+ package: 'https://models.silero.ai/models/tts/en/v3_en.pt'
323
+ sample_rate: [8000, 24000, 48000]
324
+ v3_en_indic:
325
+ latest:
326
+ example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
327
+ package: 'https://models.silero.ai/models/tts/en/v3_en_indic.pt'
328
+ sample_rate: [8000, 24000, 48000]
329
+ lj_v2:
330
+ latest:
331
+ example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
332
+ package: 'https://models.silero.ai/models/tts/en/v2_lj.pt'
333
+ sample_rate: [8000, 16000]
334
+ lj_8khz:
335
+ latest:
336
+ tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;–'
337
+ example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
338
+ jit: 'https://models.silero.ai/models/tts/en/v1_lj_8000.jit'
339
+ sample_rate: 8000
340
+ v1:
341
+ tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;–'
342
+ example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
343
+ jit: 'https://models.silero.ai/models/tts/en/v1_lj_8000.jit'
344
+ sample_rate: 8000
345
+ lj_16khz:
346
+ latest:
347
+ tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;–'
348
+ example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
349
+ jit: 'https://models.silero.ai/models/tts/en/v1_lj_16000.jit'
350
+ sample_rate: 16000
351
+ v1:
352
+ tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;–'
353
+ example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
354
+ jit: 'https://models.silero.ai/models/tts/en/v1_lj_16000.jit'
355
+ sample_rate: 16000
356
+ de:
357
+ v3_de:
358
+ latest:
359
+ example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
360
+ package: 'https://models.silero.ai/models/tts/de/v3_de.pt'
361
+ sample_rate: [8000, 24000, 48000]
362
+ thorsten_v2:
363
+ latest:
364
+ example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
365
+ package: 'https://models.silero.ai/models/tts/de/v2_thorsten.pt'
366
+ sample_rate: [8000, 16000]
367
+ thorsten_8khz:
368
+ latest:
369
+ tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;–'
370
+ example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
371
+ jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_8000.jit'
372
+ sample_rate: 8000
373
+ v1:
374
+ tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;–'
375
+ example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
376
+ jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_8000.jit'
377
+ sample_rate: 8000
378
+ thorsten_16khz:
379
+ latest:
380
+ tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;–'
381
+ example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
382
+ jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_16000.jit'
383
+ sample_rate: 16000
384
+ v1:
385
+ tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;–'
386
+ example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
387
+ jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_16000.jit'
388
+ sample_rate: 16000
389
+ es:
390
+ v3_es:
391
+ latest:
392
+ example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
393
+ package: 'https://models.silero.ai/models/tts/es/v3_es.pt'
394
+ sample_rate: [8000, 24000, 48000]
395
+ tux_v2:
396
+ latest:
397
+ example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
398
+ package: 'https://models.silero.ai/models/tts/es/v2_tux.pt'
399
+ sample_rate: [8000, 16000]
400
+ tux_8khz:
401
+ latest:
402
+ tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿'
403
+ example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
404
+ jit: 'https://models.silero.ai/models/tts/es/v1_tux_8000.jit'
405
+ sample_rate: 8000
406
+ v1:
407
+ tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿'
408
+ example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
409
+ jit: 'https://models.silero.ai/models/tts/es/v1_tux_8000.jit'
410
+ sample_rate: 8000
411
+ tux_16khz:
412
+ latest:
413
+ tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿'
414
+ example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
415
+ jit: 'https://models.silero.ai/models/tts/es/v1_tux_16000.jit'
416
+ sample_rate: 16000
417
+ v1:
418
+ tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿'
419
+ example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
420
+ jit: 'https://models.silero.ai/models/tts/es/v1_tux_16000.jit'
421
+ sample_rate: 16000
422
+ fr:
423
+ v3_fr:
424
+ latest:
425
+ example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
426
+ package: 'https://models.silero.ai/models/tts/fr/v3_fr.pt'
427
+ sample_rate: [8000, 24000, 48000]
428
+ gilles_v2:
429
+ latest:
430
+ example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
431
+ package: 'https://models.silero.ai/models/tts/fr/v2_gilles.pt'
432
+ sample_rate: [8000, 16000]
433
+ gilles_8khz:
434
+ latest:
435
+ tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;–'
436
+ example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
437
+ jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_8000.jit'
438
+ sample_rate: 8000
439
+ v1:
440
+ tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;–'
441
+ example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
442
+ jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_8000.jit'
443
+ sample_rate: 8000
444
+ gilles_16khz:
445
+ latest:
446
+ tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;–'
447
+ example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
448
+ jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_16000.jit'
449
+ sample_rate: 16000
450
+ v1:
451
+ tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;–'
452
+ example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
453
+ jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_16000.jit'
454
+ sample_rate: 16000
455
+ ba:
456
+ aigul_v2:
457
+ latest:
458
+ example: 'Салауат Юлаевтың тормошо һәм яҙмышы хаҡындағы документтарҙың һәм шиғри әҫәрҙәренең бик аҙ өлөшө генә һаҡланған.'
459
+ package: 'https://models.silero.ai/models/tts/ba/v2_aigul.pt'
460
+ sample_rate: [8000, 16000]
461
+ language_name: 'bashkir'
462
+ xal:
463
+ v3_xal:
464
+ latest:
465
+ example: 'Һорвн, дөрвн күн ирәд, һазань чиңгнв. Байн Цецн хаана һорвн көвүн күүндҗәнә.'
466
+ package: 'https://models.silero.ai/models/tts/xal/v3_xal.pt'
467
+ sample_rate: [8000, 24000, 48000]
468
+ erdni_v2:
469
+ latest:
470
+ example: 'Һорвн, дөрвн күн ирәд, һазань чиңгнв. Байн Цецн хаана һорвн көвүн күүндҗәнә.'
471
+ package: 'https://models.silero.ai/models/tts/xal/v2_erdni.pt'
472
+ sample_rate: [8000, 16000]
473
+ language_name: 'kalmyk'
474
+ tt:
475
+ v3_tt:
476
+ latest:
477
+ example: 'Исәнмесез, саумысез, нишләп кәҗәгезне саумыйсыз, әтәчегез күкәй салган, нишләп чыгып алмыйсыз.'
478
+ package: 'https://models.silero.ai/models/tts/tt/v3_tt.pt'
479
+ sample_rate: [8000, 24000, 48000]
480
+ dilyara_v2:
481
+ latest:
482
+ example: 'Ис+әнмесез, с+аумысез, нишл+әп кәҗәгезн+е с+аумыйсыз, әтәчег+ез күк+әй салг+ан, нишл+әп чыг+ып +алмыйсыз.'
483
+ package: 'https://models.silero.ai/models/tts/tt/v2_dilyara.pt'
484
+ sample_rate: [8000, 16000]
485
+ language_name: 'tatar'
486
+ uz:
487
+ v3_uz:
488
+ latest:
489
+ example: 'Tanishganimdan xursandman.'
490
+ package: 'https://models.silero.ai/models/tts/uz/v3_uz.pt'
491
+ sample_rate: [8000, 24000, 48000]
492
+ dilnavoz_v2:
493
+ latest:
494
+ example: 'Tanishganimdan xursandman.'
495
+ package: 'https://models.silero.ai/models/tts/uz/v2_dilnavoz.pt'
496
+ sample_rate: [8000, 16000]
497
+ language_name: 'uzbek'
498
+ ua:
499
+ v3_ua:
500
+ latest:
501
+ example: 'К+отики - пухн+асті жив+отики.'
502
+ package: 'https://models.silero.ai/models/tts/ua/v3_ua.pt'
503
+ sample_rate: [8000, 24000, 48000]
504
+ mykyta_v2:
505
+ latest:
506
+ example: 'К+отики - пухн+асті жив+отики.'
507
+ package: 'https://models.silero.ai/models/tts/ua/v22_mykyta_48k.pt'
508
+ sample_rate: [8000, 24000, 48000]
509
+ language_name: 'ukrainian'
510
+ indic:
511
+ v3_indic:
512
+ latest:
513
+ example: 'prasidda kabīra adhyētā, puruṣōttama agravāla kā yaha śōdha ālēkha, usa rāmānaṁda kī khōja karatā hai'
514
+ package: 'https://models.silero.ai/models/tts/indic/v3_indic.pt'
515
+ sample_rate: [8000, 24000, 48000]
516
+ multi:
517
+ multi_v2:
518
+ latest:
519
+ package: 'https://models.silero.ai/models/tts/multi/v2_multi.pt'
520
+ sample_rate: [8000, 16000]
521
+ speakers:
522
+ aidar:
523
+ lang: 'ru'
524
+ example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
525
+ baya:
526
+ lang: 'ru'
527
+ example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
528
+ kseniya:
529
+ lang: 'ru'
530
+ example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
531
+ irina:
532
+ lang: 'ru'
533
+ example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
534
+ ruslan:
535
+ lang: 'ru'
536
+ example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
537
+ natasha:
538
+ lang: 'ru'
539
+ example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
540
+ thorsten:
541
+ lang: 'de'
542
+ example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
543
+ tux:
544
+ lang: 'es'
545
+ example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
546
+ gilles:
547
+ lang: 'fr'
548
+ example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
549
+ lj:
550
+ lang: 'en'
551
+ example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
552
+ dilyara:
553
+ lang: 'tt'
554
+ example: 'Пес+и пес+и песик+әй, борыннар+ы бәләк+әй.'
555
+ te_models:
556
+ latest:
557
+ package: "https://models.silero.ai/te_models/v2_4lang_q.pt"
558
+ languages: ['en', 'de', 'ru', 'es']
559
+ punct: '.,-!?—'
560
+ v2:
561
+ package: "https://models.silero.ai/te_models/v2_4lang_q.pt"
562
+ languages: ['en', 'de', 'ru', 'es']
563
+ punct: '.,-!?—'
notebooks/1232-h.htm ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/audiobook_gen_silero.ipynb ADDED
@@ -0,0 +1,335 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "---\n",
8
+ "\n",
9
+ "# Audiobook Generator - Proof of Concept Notebook\n",
10
+ "\n",
11
+ "This notebook is intended to be a proof of concept for the end-to-end work of generating an audiobook file from an ebook. This includes converting the .epub book files into raw python text strings, splitting into items and sentences, then tokenizing and batching them to run through the Silero text-to-speech (TTS) implementation.\n",
12
+ "\n",
13
+ "*Updated: September 2, 2022*\n",
14
+ "\n",
15
+ "---\n",
16
+ "\n",
17
+ "### Overview\n",
18
+ "\n",
19
+ "1. Setup\n",
20
+ " - Needed libraries and packages\n",
21
+ " - Variables\n",
22
+ " - Silero model selection\n",
23
+ "2. Ebook Import\n",
24
+ " - Target file selection\n",
25
+ " - File (.epub) import\n",
26
+ " - String parsing\n",
27
+ " - String length wrapping\n",
28
+ "3. Text-to-Speech\n",
29
+ " - Silero implementation\n",
30
+ " - Results\n",
31
+ "\n",
32
+ "---"
33
+ ]
34
+ },
35
+ {
36
+ "cell_type": "markdown",
37
+ "metadata": {},
38
+ "source": [
39
+ "### Step 1 - Setup\n",
40
+ "\n",
41
+ "This proof-of-concept relies on PyTorch and TorchAudio for its implementation. OmegaConf is used to support providing the latest model from Silero in a consistent manner. A seed is created, and used for all random function that are needed.\n",
42
+ "\n",
43
+ "We will also use the TQDM package to provide progress bars while running the proof-of-concept within this notebook."
44
+ ]
45
+ },
46
+ {
47
+ "cell_type": "code",
48
+ "execution_count": null,
49
+ "metadata": {},
50
+ "outputs": [],
51
+ "source": [
52
+ "import os\n",
53
+ "import torch\n",
54
+ "import torchaudio\n",
55
+ "from omegaconf import OmegaConf\n",
56
+ "from tqdm.notebook import tqdm\n",
57
+ "\n",
58
+ "torch.hub.download_url_to_file('https://raw.githubusercontent.com/snakers4/silero-models/master/models.yml',\n",
59
+ " 'latest_silero_models.yml',\n",
60
+ " progress=False)\n",
61
+ "models = OmegaConf.load('latest_silero_models.yml')\n",
62
+ "\n",
63
+ "seed = 1337\n",
64
+ "torch.manual_seed(seed)\n",
65
+ "torch.cuda.manual_seed(seed)\n",
66
+ "\n",
67
+ "device = 'cuda' if torch.cuda.is_available() else 'cpu'"
68
+ ]
69
+ },
70
+ {
71
+ "cell_type": "markdown",
72
+ "metadata": {},
73
+ "source": [
74
+ "We also need to set some variables for later use during the text processing steps, and the audio output in the TTS step.\n",
75
+ "\n",
76
+ "- `max_char_len` is set based on the results of performance testing done by the Silero devs. Larger values enable sentence structure to be better preserved, but negatively affect performance.\n",
77
+ "- `sample_rate` is also set based on recommendations from the Silero team for performance vs. quality. Using 16k or 8k audio will improve performance, but result in lower quality audio. Silero estimates a decrease of ~0.5 MOS (from 3.7 to 3.2)."
78
+ ]
79
+ },
80
+ {
81
+ "cell_type": "code",
82
+ "execution_count": null,
83
+ "metadata": {},
84
+ "outputs": [],
85
+ "source": [
86
+ "max_char_len = 140\n",
87
+ "sample_rate = 24000"
88
+ ]
89
+ },
90
+ {
91
+ "cell_type": "markdown",
92
+ "metadata": {},
93
+ "source": [
94
+ "The Silero implementation comes with models trained on various languages, the most common being Russian, but we will use the latest English model for this proof of concept. There are also a number of English speaker choices available."
95
+ ]
96
+ },
97
+ {
98
+ "cell_type": "code",
99
+ "execution_count": null,
100
+ "metadata": {},
101
+ "outputs": [],
102
+ "source": [
103
+ "language = 'en'\n",
104
+ "model_id = 'v3_en'\n",
105
+ "speaker = 'en_0'\n",
106
+ "\n",
107
+ "model, example_text = torch.hub.load(repo_or_dir='snakers4/silero-models',\n",
108
+ " model='silero_tts',\n",
109
+ " language=language,\n",
110
+ " speaker=model_id)\n",
111
+ "model.to(device) # gpu or cpu"
112
+ ]
113
+ },
114
+ {
115
+ "cell_type": "markdown",
116
+ "metadata": {},
117
+ "source": [
118
+ "### Step 2 - Ebook Import\n",
119
+ "\n",
120
+ "Below is a representative ebook (`Protrait of Dorian Gray`), taken from Project Gutenberg - a free directory of public-domain works."
121
+ ]
122
+ },
123
+ {
124
+ "cell_type": "code",
125
+ "execution_count": null,
126
+ "metadata": {},
127
+ "outputs": [],
128
+ "source": [
129
+ "ebook_path = 'test.epub'"
130
+ ]
131
+ },
132
+ {
133
+ "cell_type": "markdown",
134
+ "metadata": {},
135
+ "source": [
136
+ "The function below - `read_ebook()` - performs the following steps:\n",
137
+ "- Takes in the ebook, located at `ebook_path`\n",
138
+ "- Strips out any html tags\n",
139
+ "- Uses the nltk packages to download and use the `punkt` sentence-level tokenizer\n",
140
+ "- Calls the TextWrapper package to wrap sentences to the `max_char_len`, with care to fix sentence endings\n",
141
+ "- I.e. sentences are not split in the middle of a word, but rather words are preserved\n",
142
+ "- Finally sentences are appended to a chapter, and the chapters to a complete list: `corpus`"
143
+ ]
144
+ },
145
+ {
146
+ "cell_type": "code",
147
+ "execution_count": null,
148
+ "metadata": {},
149
+ "outputs": [],
150
+ "source": [
151
+ "def read_ebook(ebook_path):\n",
152
+ "\n",
153
+ " import ebooklib\n",
154
+ " from ebooklib import epub\n",
155
+ " from bs4 import BeautifulSoup\n",
156
+ " from tqdm.notebook import tqdm\n",
157
+ " from nltk import tokenize, download\n",
158
+ " from textwrap import TextWrapper\n",
159
+ "\n",
160
+ " download('punkt')\n",
161
+ " wrapper = TextWrapper(max_char_len, fix_sentence_endings=True)\n",
162
+ "\n",
163
+ " book = epub.read_epub(ebook_path)\n",
164
+ "\n",
165
+ " ebook_title = book.get_metadata('DC', 'title')[0][0]\n",
166
+ " ebook_title = ebook_title.lower().replace(' ', '_')\n",
167
+ "\n",
168
+ " corpus = []\n",
169
+ " for item in tqdm(list(book.get_items())):\n",
170
+ " if item.get_type() == ebooklib.ITEM_DOCUMENT:\n",
171
+ " input_text = BeautifulSoup(item.get_content(), \"html.parser\").text\n",
172
+ " text_list = []\n",
173
+ " for paragraph in input_text.split('\\n'):\n",
174
+ " paragraph = paragraph.replace('—', '-')\n",
175
+ " sentences = tokenize.sent_tokenize(paragraph)\n",
176
+ "\n",
177
+ " # Truncate sentences to maximum character limit\n",
178
+ " sentence_list = []\n",
179
+ " for sentence in sentences:\n",
180
+ " wrapped_sentences = wrapper.wrap(sentence)\n",
181
+ " sentence_list.append(wrapped_sentences)\n",
182
+ " # Flatten list of list of sentences\n",
183
+ " trunc_sentences = [phrase for sublist in sentence_list for phrase in sublist]\n",
184
+ "\n",
185
+ " text_list.append(trunc_sentences)\n",
186
+ " text_list = [text for sentences in text_list for text in sentences]\n",
187
+ " corpus.append(text_list)\n",
188
+ "\n",
189
+ " return corpus, ebook_title"
190
+ ]
191
+ },
192
+ {
193
+ "cell_type": "markdown",
194
+ "metadata": {},
195
+ "source": [
196
+ "Here we use the above function to read in the chosen ebook."
197
+ ]
198
+ },
199
+ {
200
+ "cell_type": "code",
201
+ "execution_count": null,
202
+ "metadata": {},
203
+ "outputs": [],
204
+ "source": [
205
+ "ebook, title = read_ebook(ebook_path)"
206
+ ]
207
+ },
208
+ {
209
+ "cell_type": "markdown",
210
+ "metadata": {},
211
+ "source": [
212
+ "And here, let us take a peak at the contents of the ebook:"
213
+ ]
214
+ },
215
+ {
216
+ "cell_type": "code",
217
+ "execution_count": null,
218
+ "metadata": {},
219
+ "outputs": [],
220
+ "source": [
221
+ "print(f'Title of ebook (path name):{title}\\n')\n",
222
+ "print(f'First line of the ebook:{ebook[0][0]}\\n')\n",
223
+ "print(f'First paragraph (truncated for display): \\n {ebook[2][0:5]}')"
224
+ ]
225
+ },
226
+ {
227
+ "cell_type": "code",
228
+ "execution_count": null,
229
+ "metadata": {},
230
+ "outputs": [],
231
+ "source": [
232
+ "ebook[0][0]"
233
+ ]
234
+ },
235
+ {
236
+ "cell_type": "markdown",
237
+ "metadata": {},
238
+ "source": [
239
+ "### Step 3 - Text-to-Speech\n",
240
+ "\n",
241
+ "The ebook is fed through the Silero TTS implementation sentence by sentence. We will also check that each tensor being created is valid (i.e. non-zero).\n",
242
+ "\n",
243
+ "Finally, the output tensors are exported as `.wav` files on a chapter by chapter basis - consistent with the file structure of common audiobooks."
244
+ ]
245
+ },
246
+ {
247
+ "cell_type": "code",
248
+ "execution_count": null,
249
+ "metadata": {},
250
+ "outputs": [],
251
+ "source": [
252
+ "#os.mkdir(f'outputs/{title}')\n",
253
+ "\n",
254
+ "for chapter in tqdm(ebook[0:3]):\n",
255
+ " chapter_index = f'chapter{ebook.index(chapter):03}'\n",
256
+ " audio_list = []\n",
257
+ " for sentence in tqdm(chapter):\n",
258
+ " audio = model.apply_tts(text=sentence,\n",
259
+ " speaker=speaker,\n",
260
+ " sample_rate=sample_rate)\n",
261
+ " if len(audio) > 0 and isinstance(audio, torch.Tensor):\n",
262
+ " audio_list.append(audio)\n",
263
+ " else:\n",
264
+ " print(f'Tensor for sentence is not valid: \\n {sentence}')\n",
265
+ "\n",
266
+ " sample_path = f'outputs/{title}/{chapter_index}.wav'\n",
267
+ "\n",
268
+ " if len(audio_list) > 0:\n",
269
+ " audio_file = torch.cat(audio_list).reshape(1, -1)\n",
270
+ "# torchaudio.save(sample_path, audio_file, sample_rate)\n",
271
+ " else:\n",
272
+ " print(f'Chapter {chapter_index} is empty.')"
273
+ ]
274
+ },
275
+ {
276
+ "cell_type": "markdown",
277
+ "metadata": {},
278
+ "source": [
279
+ "### Results\n",
280
+ "\n",
281
+ "##### CPU (i7-4790k)\n",
282
+ "\n",
283
+ "Running \"Pride and Prejudice\" through the Silero model took **34m42s** to convert. This book is a good representation of the average book length: the average audiobook length on Audible is between 10 & 12 hours, while Pride and Prejudice is 11h20m.\n",
284
+ "\n",
285
+ "This is approximately a 20:1 ratio of audio length to processing time.\n",
286
+ "\n",
287
+ "Pride and Prejudice: **34m42s** - 1h39m33s on i7-4650u\n",
288
+ "\n",
289
+ "Portrait of Dorian Gray: **18m18s** - 18m50s w/output - 1h06hm04s on i7-4650u\n",
290
+ "\n",
291
+ "Crime and Punishment: **Unknown** - error converting ebook at 7/50, 19/368\n",
292
+ "\n",
293
+ "##### GPU (P4000)\n",
294
+ "\n",
295
+ "Running the same book through the Silero model on GPU took **5m39s** to convert.\n",
296
+ "\n",
297
+ "This is approximately a 122:1 ratio of audio length to processing time.\n",
298
+ "\n",
299
+ "Pride and Prejudice: **5m39s**\n",
300
+ "\n",
301
+ "Portrait of Dorian Gray: **4m26s**\n",
302
+ "\n",
303
+ "Crime and Punishment: **Unknown** - error converting ebook"
304
+ ]
305
+ },
306
+ {
307
+ "cell_type": "code",
308
+ "execution_count": null,
309
+ "metadata": {},
310
+ "outputs": [],
311
+ "source": []
312
+ }
313
+ ],
314
+ "metadata": {
315
+ "kernelspec": {
316
+ "display_name": "Python 3",
317
+ "language": "python",
318
+ "name": "python3"
319
+ },
320
+ "language_info": {
321
+ "codemirror_mode": {
322
+ "name": "ipython",
323
+ "version": 3
324
+ },
325
+ "file_extension": ".py",
326
+ "mimetype": "text/x-python",
327
+ "name": "python",
328
+ "nbconvert_exporter": "python",
329
+ "pygments_lexer": "ipython3",
330
+ "version": "3.8.10"
331
+ }
332
+ },
333
+ "nbformat": 4,
334
+ "nbformat_minor": 4
335
+ }
notebooks/parser_function_html.ipynb ADDED
@@ -0,0 +1,480 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ <<<<<<< HEAD
6
+ "execution_count": 1,
7
+ "id": "27a75ece",
8
+ "metadata": {},
9
+ "outputs": [],
10
+ "source": [
11
+ "import nltk"
12
+ ]
13
+ },
14
+ {
15
+ "cell_type": "code",
16
+ "execution_count": null,
17
+ =======
18
+ "execution_count": 2,
19
+ >>>>>>> appdev
20
+ "id": "5292a160",
21
+ "metadata": {},
22
+ "outputs": [],
23
+ "source": [
24
+ "import re\n",
25
+ "import numpy as np\n",
26
+ "\n",
27
+ "from bs4 import BeautifulSoup\n",
28
+ "from nltk import tokenize, download\n",
29
+ "from textwrap import TextWrapper"
30
+ ]
31
+ },
32
+ {
33
+ "cell_type": "code",
34
+ <<<<<<< HEAD
35
+ "execution_count": null,
36
+ =======
37
+ "execution_count": 3,
38
+ >>>>>>> appdev
39
+ "id": "68609a77",
40
+ "metadata": {},
41
+ "outputs": [],
42
+ "source": [
43
+ "# file_path = '1232-h.htm'\n",
44
+ <<<<<<< HEAD
45
+ "file_path = 'test.htm'"
46
+ =======
47
+ "file_path = ''"
48
+ >>>>>>> appdev
49
+ ]
50
+ },
51
+ {
52
+ "cell_type": "code",
53
+ <<<<<<< HEAD
54
+ "execution_count": null,
55
+ =======
56
+ "execution_count": 4,
57
+ >>>>>>> appdev
58
+ "id": "5c526c9b",
59
+ "metadata": {},
60
+ "outputs": [],
61
+ "source": [
62
+ "download('punkt', quiet=True)\n",
63
+ "wrapper = TextWrapper(140, fix_sentence_endings=True)"
64
+ ]
65
+ },
66
+ {
67
+ "cell_type": "code",
68
+ <<<<<<< HEAD
69
+ "execution_count": null,
70
+ =======
71
+ "execution_count": 5,
72
+ >>>>>>> appdev
73
+ "id": "d4732304",
74
+ "metadata": {},
75
+ "outputs": [],
76
+ "source": [
77
+ "def preprocess(file):\n",
78
+ " input_text = BeautifulSoup(file, \"html.parser\").text\n",
79
+ " text_list = []\n",
80
+ " for paragraph in input_text.split('\\n'):\n",
81
+ " paragraph = paragraph.replace('—', '-')\n",
82
+ " paragraph = paragraph.replace(' .', '')\n",
83
+ " paragraph = re.sub(r'[^\\x00-\\x7f]', \"\", paragraph)\n",
84
+ " paragraph = re.sub(r'x0f', \" \", paragraph)\n",
85
+ " sentences = tokenize.sent_tokenize(paragraph)\n",
86
+ "\n",
87
+ " sentence_list = []\n",
88
+ " for sentence in sentences:\n",
89
+ " if not re.search('[a-zA-Z]', sentence):\n",
90
+ " sentence = ''\n",
91
+ " wrapped_sentences = wrapper.wrap(sentence)\n",
92
+ " sentence_list.append(wrapped_sentences)\n",
93
+ " trunc_sentences = [phrase for sublist in sentence_list for phrase in sublist]\n",
94
+ " text_list.append(trunc_sentences)\n",
95
+ " text_list = [text for sentences in text_list for text in sentences]\n",
96
+ " return text_list"
97
+ ]
98
+ },
99
+ {
100
+ "cell_type": "code",
101
+ "execution_count": null,
102
+ "id": "3045665a",
103
+ "metadata": {},
104
+ "outputs": [],
105
+ "source": [
106
+ "def read_html(file):\n",
107
+ " corpus = preprocess(file)\n",
108
+ " return corpus"
109
+ ]
110
+ },
111
+ {
112
+ "cell_type": "code",
113
+ "execution_count": null,
114
+ "id": "e18be118",
115
+ "metadata": {},
116
+ "outputs": [],
117
+ "source": [
118
+ "with open(file_path, 'r') as f:\n",
119
+ " ebook_upload = f.read()\n",
120
+ "corpus = read_html(ebook_upload)"
121
+ ]
122
+ },
123
+ {
124
+ "cell_type": "code",
125
+ <<<<<<< HEAD
126
+ "execution_count": null,
127
+ "id": "ece1c7d3",
128
+ "metadata": {},
129
+ "outputs": [],
130
+ =======
131
+ "execution_count": 11,
132
+ "id": "ece1c7d3",
133
+ "metadata": {},
134
+ "outputs": [
135
+ {
136
+ "data": {
137
+ "text/plain": [
138
+ "(1, 2)"
139
+ ]
140
+ },
141
+ "execution_count": 11,
142
+ "metadata": {},
143
+ "output_type": "execute_result"
144
+ }
145
+ ],
146
+ >>>>>>> appdev
147
+ "source": [
148
+ "np.shape(corpus)"
149
+ ]
150
+ },
151
+ {
152
+ "cell_type": "code",
153
+ <<<<<<< HEAD
154
+ "execution_count": null,
155
+ "id": "dc7e4010",
156
+ "metadata": {},
157
+ "outputs": [],
158
+ =======
159
+ "execution_count": 12,
160
+ "id": "dc7e4010",
161
+ "metadata": {},
162
+ "outputs": [
163
+ {
164
+ "ename": "IndexError",
165
+ "evalue": "list index out of range",
166
+ "output_type": "error",
167
+ "traceback": [
168
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
169
+ "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)",
170
+ "Cell \u001b[0;32mIn [12], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mcorpus\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m2\u001b[39;49m\u001b[43m]\u001b[49m\n",
171
+ "\u001b[0;31mIndexError\u001b[0m: list index out of range"
172
+ ]
173
+ }
174
+ ],
175
+ >>>>>>> appdev
176
+ "source": [
177
+ "corpus[0][2]"
178
+ ]
179
+ },
180
+ {
181
+ "cell_type": "code",
182
+ <<<<<<< HEAD
183
+ "execution_count": null,
184
+ "id": "6cb47a2d",
185
+ "metadata": {},
186
+ "outputs": [],
187
+ =======
188
+ "execution_count": 13,
189
+ "id": "6cb47a2d",
190
+ "metadata": {},
191
+ "outputs": [
192
+ {
193
+ "data": {
194
+ "text/plain": [
195
+ "[['Predict Testing Text File',\n",
196
+ " 'Audiobook Gen is a tool that allows the users to generate an audio file from an ebook or other document.']]"
197
+ ]
198
+ },
199
+ "execution_count": 13,
200
+ "metadata": {},
201
+ "output_type": "execute_result"
202
+ }
203
+ ],
204
+ >>>>>>> appdev
205
+ "source": [
206
+ "corpus"
207
+ ]
208
+ },
209
+ {
210
+ "cell_type": "code",
211
+ "execution_count": null,
212
+ <<<<<<< HEAD
213
+ =======
214
+ "id": "8508b073",
215
+ "metadata": {},
216
+ "outputs": [],
217
+ "source": []
218
+ },
219
+ {
220
+ "cell_type": "code",
221
+ "execution_count": null,
222
+ >>>>>>> appdev
223
+ "id": "d11031c7",
224
+ "metadata": {},
225
+ "outputs": [],
226
+ "source": [
227
+ "assert title == \"1232-h\"\n",
228
+ "assert np.shape(corpus) == (1, 5476)\n",
229
+ "assert corpus[0][0] == 'The Project Gutenberg eBook of The Prince, by Nicolo Machiavelli'\n",
230
+ "assert corpus[0][2] == 'This eBook is for the use of anyone anywhere in the United States and'"
231
+ ]
232
+ },
233
+ {
234
+ "cell_type": "code",
235
+ "execution_count": null,
236
+ "id": "0c57eec6",
237
+ "metadata": {},
238
+ "outputs": [],
239
+ "source": []
240
+ },
241
+ {
242
+ "cell_type": "code",
243
+ "execution_count": 2,
244
+ "id": "af281267",
245
+ "metadata": {},
246
+ "outputs": [],
247
+ "source": [
248
+ "import re\n",
249
+ "\n",
250
+ "from bs4 import BeautifulSoup\n",
251
+ "from nltk import tokenize, download\n",
252
+ "from textwrap import TextWrapper\n",
253
+ "from stqdm import stqdm"
254
+ ]
255
+ },
256
+ {
257
+ "cell_type": "code",
258
+ "execution_count": 6,
259
+ "id": "676ce437",
260
+ "metadata": {},
261
+ "outputs": [],
262
+ "source": [
263
+ "download('punkt', quiet=True)\n",
264
+ "wrapper = TextWrapper(140, fix_sentence_endings=True)\n",
265
+ "file_path = 'test.txt'"
266
+ ]
267
+ },
268
+ {
269
+ "cell_type": "code",
270
+ "execution_count": 7,
271
+ "id": "4d278f8e",
272
+ "metadata": {},
273
+ "outputs": [],
274
+ "source": [
275
+ "def preprocess_text(file):\n",
276
+ " input_text = BeautifulSoup(file, \"html.parser\").text\n",
277
+ " text_list = []\n",
278
+ " for paragraph in input_text.split('\\n'):\n",
279
+ " paragraph = paragraph.replace('—', '-')\n",
280
+ " paragraph = paragraph.replace(' .', '')\n",
281
+ " paragraph = re.sub(r'[^\\x00-\\x7f]', \"\", paragraph)\n",
282
+ " paragraph = re.sub(r'x0f', \" \", paragraph)\n",
283
+ " sentences = tokenize.sent_tokenize(paragraph)\n",
284
+ "\n",
285
+ " sentence_list = []\n",
286
+ " for sentence in sentences:\n",
287
+ " if not re.search('[a-zA-Z]', sentence):\n",
288
+ " sentence = ''\n",
289
+ " wrapped_sentences = wrapper.wrap(sentence)\n",
290
+ " sentence_list.append(wrapped_sentences)\n",
291
+ " trunc_sentences = [phrase for sublist in sentence_list for phrase in sublist]\n",
292
+ " text_list.append(trunc_sentences)\n",
293
+ " text_list = [text for sentences in text_list for text in sentences]\n",
294
+ " return text_list"
295
+ ]
296
+ },
297
+ {
298
+ "cell_type": "code",
299
+ "execution_count": 8,
300
+ "id": "f67e0184",
301
+ "metadata": {},
302
+ "outputs": [],
303
+ "source": [
304
+ "with open(file_path, 'r') as uploaded_file:\n",
305
+ " file = uploaded_file.read()\n",
306
+ " text = preprocess_text(file)"
307
+ ]
308
+ },
309
+ {
310
+ "cell_type": "code",
311
+ "execution_count": 10,
312
+ "id": "0bd67797",
313
+ "metadata": {},
314
+ "outputs": [
315
+ {
316
+ "data": {
317
+ "text/plain": [
318
+ "'Testing Text File \\n\\nWith generated random Lorem Ipsum and other unexpected characters!\\n\\n<a href=\"https://github.com/mkutarna/audiobook_gen/\">Link to generator repo!</a>\\n\\n此行是对非英语字符的测试\\n\\nLorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Proin fermentum leo vel orci porta non pulvinar. Pretium lectus quam id leo in vitae turpis massa sed. Donec ac odio tempor orci dapibus. Feugiat in ante metus dictum at tempor. Elementum tempus egestas sed sed risus. Adipiscing commodo elit at imperdiet dui accumsan sit. Placerat orci nulla pellentesque dignissim enim. Posuere lorem ipsum dolor sit. Id ornare arcu odio ut sem. Purus faucibus ornare suspendisse sed nisi lacus sed. Ac turpis egestas sed tempus urna et pharetra pharetra massa. Morbi quis commodo odio aenean. Malesuada proin libero nunc consequat interdum. Ut placerat orci nulla pellentesque dignissim enim sit. Elit at imperdiet dui accumsan sit amet.\\n\\nBuilt to test various characters and other possible inputs to the silero model.\\n\\nHere are some Chinese characters: 此行是对非英语字符的测试.\\n\\nThere are 24 letters in the Greek alphabet. The vowels: are α, ε, η, ι, ο, ω, υ. All the rest are consonants.\\n\\nWe can also test for mathematical symbols: ∫, ∇, ∞, δ, ε, X̄, %, √ ,a, ±, ÷, +, = ,-.\\n\\nFinally, here are some emoticons: ☺️🙂😊😀😁☹️🙁😞😟😣😖😨😧😦😱😫😩.'"
319
+ ]
320
+ },
321
+ "execution_count": 10,
322
+ "metadata": {},
323
+ "output_type": "execute_result"
324
+ }
325
+ ],
326
+ "source": [
327
+ "file"
328
+ ]
329
+ },
330
+ {
331
+ "cell_type": "code",
332
+ "execution_count": 9,
333
+ "id": "064aa16b",
334
+ "metadata": {},
335
+ "outputs": [
336
+ {
337
+ "data": {
338
+ "text/plain": [
339
+ "['Testing Text File',\n",
340
+ " 'With generated random Lorem Ipsum and other unexpected characters!',\n",
341
+ " 'Link to generator repo!',\n",
342
+ " 'Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.',\n",
343
+ " 'Proin fermentum leo vel orci porta non pulvinar.',\n",
344
+ " 'Pretium lectus quam id leo in vitae turpis massa sed.',\n",
345
+ " 'Donec ac odio tempor orci dapibus.',\n",
346
+ " 'Feugiat in ante metus dictum at tempor.',\n",
347
+ " 'Elementum tempus egestas sed sed risus.',\n",
348
+ " 'Adipiscing commodo elit at imperdiet dui accumsan sit.',\n",
349
+ " 'Placerat orci nulla pellentesque dignissim enim.',\n",
350
+ " 'Posuere lorem ipsum dolor sit.',\n",
351
+ " 'Id ornare arcu odio ut sem.',\n",
352
+ " 'Purus faucibus ornare suspendisse sed nisi lacus sed.',\n",
353
+ " 'Ac turpis egestas sed tempus urna et pharetra pharetra massa.',\n",
354
+ " 'Morbi quis commodo odio aenean.',\n",
355
+ " 'Malesuada proin libero nunc consequat interdum.',\n",
356
+ " 'Ut placerat orci nulla pellentesque dignissim enim sit.',\n",
357
+ " 'Elit at imperdiet dui accumsan sit amet.',\n",
358
+ " 'Built to test various characters and other possible inputs to the silero model.',\n",
359
+ " 'Here are some Chinese characters: .',\n",
360
+ " 'There are 24 letters in the Greek alphabet.',\n",
361
+ " 'The vowels: are , , , , , , .',\n",
362
+ " 'All the rest are consonants.',\n",
363
+ " 'We can also test for mathematical symbols: , , , , , X, %, ,a, , , +, = ,-.',\n",
364
+ " 'Finally, here are some emoticons: .']"
365
+ ]
366
+ },
367
+ "execution_count": 9,
368
+ "metadata": {},
369
+ "output_type": "execute_result"
370
+ }
371
+ ],
372
+ "source": [
373
+ "text"
374
+ ]
375
+ },
376
+ {
377
+ "cell_type": "code",
378
+ "execution_count": 22,
379
+ "id": "3e8e7965",
380
+ "metadata": {},
381
+ "outputs": [],
382
+ "source": [
383
+ "with open('test_processed.txt', 'w') as output_file:\n",
384
+ " for line in text:\n",
385
+ " output_file.write(line)\n",
386
+ " output_file.write('\\n')"
387
+ ]
388
+ },
389
+ {
390
+ "cell_type": "code",
391
+ "execution_count": 26,
392
+ "id": "2aa4c8ff",
393
+ "metadata": {},
394
+ "outputs": [],
395
+ "source": [
396
+ "with open('test_processed.txt', 'r') as process_file:\n",
397
+ " out_file = [line.strip() for line in process_file.readlines()]"
398
+ ]
399
+ },
400
+ {
401
+ "cell_type": "code",
402
+ "execution_count": 27,
403
+ "id": "c483fb65",
404
+ "metadata": {},
405
+ "outputs": [
406
+ {
407
+ "data": {
408
+ "text/plain": [
409
+ "['Testing Text File',\n",
410
+ " 'With generated random Lorem Ipsum and other unexpected characters!',\n",
411
+ " 'Link to generator repo!',\n",
412
+ " 'Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.',\n",
413
+ " 'Proin fermentum leo vel orci porta non pulvinar.',\n",
414
+ " 'Pretium lectus quam id leo in vitae turpis massa sed.',\n",
415
+ " 'Donec ac odio tempor orci dapibus.',\n",
416
+ " 'Feugiat in ante metus dictum at tempor.',\n",
417
+ " 'Elementum tempus egestas sed sed risus.',\n",
418
+ " 'Adipiscing commodo elit at imperdiet dui accumsan sit.',\n",
419
+ " 'Placerat orci nulla pellentesque dignissim enim.',\n",
420
+ " 'Posuere lorem ipsum dolor sit.',\n",
421
+ " 'Id ornare arcu odio ut sem.',\n",
422
+ " 'Purus faucibus ornare suspendisse sed nisi lacus sed.',\n",
423
+ " 'Ac turpis egestas sed tempus urna et pharetra pharetra massa.',\n",
424
+ " 'Morbi quis commodo odio aenean.',\n",
425
+ " 'Malesuada proin libero nunc consequat interdum.',\n",
426
+ " 'Ut placerat orci nulla pellentesque dignissim enim sit.',\n",
427
+ " 'Elit at imperdiet dui accumsan sit amet.',\n",
428
+ " 'Built to test various characters and other possible inputs to the silero model.',\n",
429
+ " 'Here are some Chinese characters: .',\n",
430
+ " 'There are 24 letters in the Greek alphabet.',\n",
431
+ " 'The vowels: are , , , , , , .',\n",
432
+ " 'All the rest are consonants.',\n",
433
+ " 'We can also test for mathematical symbols: , , , , , X, %, ,a, , , +, = ,-.',\n",
434
+ " 'Finally, here are some emoticons: .']"
435
+ ]
436
+ },
437
+ "execution_count": 27,
438
+ "metadata": {},
439
+ "output_type": "execute_result"
440
+ }
441
+ ],
442
+ "source": [
443
+ "out_file"
444
+ ]
445
+ },
446
+ {
447
+ "cell_type": "code",
448
+ "execution_count": null,
449
+ "id": "65646961",
450
+ "metadata": {},
451
+ "outputs": [],
452
+ "source": []
453
+ }
454
+ ],
455
+ "metadata": {
456
+ "kernelspec": {
457
+ <<<<<<< HEAD
458
+ "display_name": "Python 3",
459
+ =======
460
+ "display_name": "Python 3 (ipykernel)",
461
+ >>>>>>> appdev
462
+ "language": "python",
463
+ "name": "python3"
464
+ },
465
+ "language_info": {
466
+ "codemirror_mode": {
467
+ "name": "ipython",
468
+ "version": 3
469
+ },
470
+ "file_extension": ".py",
471
+ "mimetype": "text/x-python",
472
+ "name": "python",
473
+ "nbconvert_exporter": "python",
474
+ "pygments_lexer": "ipython3",
475
+ "version": "3.8.10"
476
+ }
477
+ },
478
+ "nbformat": 4,
479
+ "nbformat_minor": 5
480
+ }
notebooks/test.epub ADDED
Binary file (90.4 kB). View file
 
notebooks/test.htm ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4
+ <head>
5
+ <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
6
+ <meta http-equiv="Content-Style-Type" content="text/css" />
7
+ <title>Lorem Ipsum</title>
8
+
9
+ <style type="text/css">
10
+
11
+ body { margin-left: 20%;
12
+ margin-right: 20%;
13
+ text-align: justify; }
14
+
15
+ h1, h2, h3, h4, h5 {text-align: center; font-style: normal; font-weight:
16
+ normal; line-height: 1.5; margin-top: .5em; margin-bottom: .5em;}
17
+
18
+ h1 {font-size: 300%;
19
+ margin-top: 0.6em;
20
+ margin-bottom: 0.6em;
21
+ letter-spacing: 0.12em;
22
+ word-spacing: 0.2em;
23
+ text-indent: 0em;}
24
+ h2 {font-size: 150%; margin-top: 2em; margin-bottom: 1em;}
25
+ h3 {font-size: 130%; margin-top: 1em;}
26
+ h4 {font-size: 120%;}
27
+ h5 {font-size: 110%;}
28
+
29
+ .no-break {page-break-before: avoid;} /* for epubs */
30
+
31
+ div.chapter {page-break-before: always; margin-top: 4em;}
32
+
33
+ hr {width: 80%; margin-top: 2em; margin-bottom: 2em;}
34
+
35
+ p {text-indent: 1em;
36
+ margin-top: 0.25em;
37
+ margin-bottom: 0.25em; }
38
+
39
+ .p2 {margin-top: 2em;}
40
+
41
+ p.poem {text-indent: 0%;
42
+ margin-left: 10%;
43
+ font-size: 90%;
44
+ margin-top: 1em;
45
+ margin-bottom: 1em; }
46
+
47
+ p.letter {text-indent: 0%;
48
+ margin-left: 10%;
49
+ margin-right: 10%;
50
+ margin-top: 1em;
51
+ margin-bottom: 1em; }
52
+
53
+ p.noindent {text-indent: 0% }
54
+
55
+ p.center {text-align: center;
56
+ text-indent: 0em;
57
+ margin-top: 1em;
58
+ margin-bottom: 1em; }
59
+
60
+ p.footnote {font-size: 90%;
61
+ text-indent: 0%;
62
+ margin-left: 10%;
63
+ margin-right: 10%;
64
+ margin-top: 1em;
65
+ margin-bottom: 1em; }
66
+
67
+ sup { vertical-align: top; font-size: 0.6em; }
68
+
69
+ a:link {color:blue; text-decoration:none}
70
+ a:visited {color:blue; text-decoration:none}
71
+ a:hover {color:red}
72
+
73
+ </style>
74
+
75
+ </head>
76
+
77
+ <body>
78
+
79
+ <div style='display:block; margin:1em 0'>
80
+ This eBook is a generated Lorem Ipsum for the purposes of testing the Audiobook Gen app.
81
+ </div>
82
+ <div style='display:block; margin:1em 0'>Language: English</div>
83
+ <div style='display:block; margin:1em 0'>Character set encoding: UTF-8</div>
84
+
85
+
86
+ <p class="letter">
87
+ <i>
88
+ Diam vel quam elementum pulvinar etiam non quam. At tellus at urna condimentum mattis. Nisi scelerisque eu ultrices vitae auctor eu augue ut. Integer malesuada nunc vel risus commodo viverra maecenas accumsan. Ornare suspendisse sed nisi lacus. Sapien faucibus et molestie ac feugiat sed lectus. Quam elementum pulvinar etiam non. Elementum integer enim neque volutpat ac tincidunt. Justo laoreet sit amet cursus sit. Amet venenatis urna cursus eget nunc scelerisque viverra mauris. Cras semper auctor neque vitae tempus quam pellentesque nec nam. Fermentum iaculis eu non diam phasellus vestibulum lorem sed. Non pulvinar neque laoreet suspendisse interdum consectetur libero. Nec tincidunt praesent semper feugiat nibh sed. Sed id semper risus in hendrerit gravida rutrum. Suspendisse in est ante in nibh. Dui ut ornare lectus sit amet est placerat in.
89
+ </i>
90
+ </p>
91
+
92
+ </div><!--end chapter-->
93
+
94
+ <div class="chapter">
95
+
96
+ <h2><a name="pref01"></a>A NEW LOREM</h2>
97
+
98
+ <p>
99
+ Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Proin fermentum leo vel orci porta non pulvinar. Pretium lectus quam id leo in vitae turpis massa sed. Donec ac odio tempor orci dapibus. Feugiat in ante metus dictum at tempor. Elementum tempus egestas sed sed risus. Adipiscing commodo elit at imperdiet dui accumsan sit. Placerat orci nulla pellentesque dignissim enim. Posuere lorem ipsum dolor sit. Id ornare arcu odio ut sem. Purus faucibus ornare suspendisse sed nisi lacus sed. Ac turpis egestas sed tempus urna et pharetra pharetra massa. Morbi quis commodo odio aenean. Malesuada proin libero nunc consequat interdum. Ut placerat orci nulla pellentesque dignissim enim sit. Elit at imperdiet dui accumsan sit amet.
100
+ </p>
101
+
102
+ <p>
103
+ Nunc sed id semper risus in hendrerit gravida rutrum quisque. Augue interdum velit euismod in pellentesque. Elementum curabitur vitae nunc sed velit dignissim sodales ut eu. Mi in nulla posuere sollicitudin aliquam ultrices sagittis orci a. Quisque sagittis purus sit amet volutpat consequat mauris. Risus in hendrerit gravida rutrum. Quis vel eros donec ac odio. Eget nunc lobortis mattis aliquam faucibus. Lobortis scelerisque fermentum dui faucibus. Est velit egestas dui id ornare arcu odio. Sed ullamcorper morbi tincidunt ornare massa eget egestas purus. Nisi porta lorem mollis aliquam ut porttitor leo a. Ut morbi tincidunt augue interdum velit. Egestas diam in arcu cursus euismod. Tortor id aliquet lectus proin nibh nisl condimentum id venenatis. Lectus sit amet est placerat in egestas erat imperdiet sed. Amet tellus cras adipiscing enim eu turpis egestas pretium. Et leo duis ut diam quam.
104
+ </p>
105
+
106
+ </div><!--end chapter-->
107
+
108
+ <div class="chapter">
109
+
110
+ <h2><a name="pref02"></a>IPSUM STRIKES BACK</h2>
111
+
112
+ <p>
113
+ Egestas diam in arcu cursus euismod quis. Leo in vitae turpis massa sed elementum tempus egestas. Amet nulla facilisi morbi tempus iaculis urna id volutpat. Parturient montes nascetur ridiculus mus. Erat pellentesque adipiscing commodo elit at imperdiet. Egestas congue quisque egestas diam in arcu cursus. Diam ut venenatis tellus in metus. Ullamcorper eget nulla facilisi etiam. Blandit turpis cursus in hac habitasse platea dictumst quisque. Cursus euismod quis viverra nibh cras pulvinar. Neque viverra justo nec ultrices. Dui ut ornare lectus sit. Mauris ultrices eros in cursus turpis massa tincidunt. Lobortis elementum nibh tellus molestie nunc non blandit massa enim. Ullamcorper morbi tincidunt ornare massa eget egestas purus viverra.
114
+ </p>
115
+
116
+ <p>
117
+ Mauris in aliquam sem fringilla ut morbi. Nunc sed blandit libero volutpat. Amet venenatis urna cursus eget nunc scelerisque. Sagittis nisl rhoncus mattis rhoncus urna neque. Felis eget nunc lobortis mattis aliquam faucibus purus in massa. Fringilla ut morbi tincidunt augue interdum. Nibh mauris cursus mattis molestie a iaculis at erat. Lacus sed turpis tincidunt id aliquet risus feugiat in. Nulla facilisi etiam dignissim diam quis enim lobortis. Vitae congue eu consequat ac felis donec et. Scelerisque viverra mauris in aliquam sem fringilla ut morbi tincidunt. Blandit volutpat maecenas volutpat blandit aliquam. Ultrices tincidunt arcu non sodales neque sodales ut etiam. Sollicitudin aliquam ultrices sagittis orci a scelerisque. Id cursus metus aliquam eleifend mi. Magna eget est lorem ipsum dolor sit amet consectetur. Eleifend mi in nulla posuere sollicitudin aliquam ultrices. Neque sodales ut etiam sit amet. Enim neque volutpat ac tincidunt vitae semper quis lectus nulla.
118
+ </p>
outputs/.gitkeep ADDED
File without changes
pytest.ini ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # pytest.ini
2
+ [pytest]
3
+ pythonpath = . src
4
+ testpaths = tests
requirements.txt CHANGED
@@ -1,6 +1,7 @@
1
  silero
2
  streamlit
3
  ebooklib
 
4
  bs4
5
  nltk
6
  stqdm
 
1
  silero
2
  streamlit
3
  ebooklib
4
+ PyPDF2
5
  bs4
6
  nltk
7
  stqdm
resources/audiobook_gen.png ADDED
resources/instructions.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ This tool generates custom-voiced audiobook files from an imported ebook file. Please upload an ebook to begin the conversion process. Output files will be downloaded as a .zip archive.
2
+
3
+ ### Instructions
4
+ 1. Upload the book file to be converted.
5
+ 2. Select the desired voice for the audiobook.
6
+ 3. Click to run!
7
+
8
+
9
+ ### Notes
10
+ - Currently, only epub, txt, pdf files are accepted for import.
11
+ - Max input file size: 200 MB
12
+ - Audiobook generation can take up to 1 hour, depending on the size of the file.
13
+ - Generation time also depends on compute available for the app.
resources/speaker_en_0.wav ADDED
Binary file (629 kB). View file
 
resources/speaker_en_110.wav ADDED
Binary file (580 kB). View file
 
resources/speaker_en_29.wav ADDED
Binary file (546 kB). View file
 
resources/speaker_en_41.wav ADDED
Binary file (574 kB). View file
 
src/config.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Notes
3
+ -----
4
+ This module contains the configuration entries for audiobook_gen.
5
+ """
6
+
7
+ from pathlib import Path
8
+
9
+ output_path = Path("outputs")
10
+ resource_path = Path("resources")
11
+ INSTRUCTIONS = Path("resources/instructions.md")
12
+
13
+ DEVICE = 'cpu'
14
+ LANGUAGE = 'en'
15
+ MAX_CHAR_LEN = 140
16
+ MODEL_ID = 'v3_en'
17
+ SAMPLE_RATE = 24000
18
+ SPEAKER_LIST = {
19
+ 'Voice 1 (Female)': 'en_0',
20
+ 'Voice 2 (Male)': 'en_29',
21
+ 'Voice 3 (Female)': 'en_41',
22
+ 'Voice 4 (Male)': 'en_110'
23
+ }
src/file_readers.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Notes
3
+ -----
4
+ This module contains the functions for audiobook_gen that read in the
5
+ file formats that require for parsing than plain text (pdf, html, epub),
6
+ as well as the preprocessing function for all input files.
7
+ """
8
+ import re
9
+
10
+ from bs4 import BeautifulSoup
11
+ from nltk import tokenize, download
12
+ from textwrap import TextWrapper
13
+ from stqdm import stqdm
14
+
15
+ from src import config
16
+
17
+ download('punkt', quiet=True)
18
+ wrapper = TextWrapper(config.MAX_CHAR_LEN, fix_sentence_endings=True)
19
+
20
+
21
+ def preprocess_text(file):
22
+ """
23
+ Preprocesses and tokenizes a section of text from the corpus:
24
+ 1. Removes residual HTML tags
25
+ 2. Handles un-supported characters
26
+ 3. Tokenizes text and confirms max token size
27
+
28
+ Parameters
29
+ ----------
30
+ file : file_like
31
+ list of strings,
32
+ section of corpus to be pre-processed and tokenized
33
+
34
+ Returns
35
+ -------
36
+ text_list : : array_like
37
+ list of strings,
38
+ body of tokenized text from which audio is generated
39
+
40
+ """
41
+ input_text = BeautifulSoup(file, "html.parser").text
42
+ text_list = []
43
+ for paragraph in input_text.split('\n'):
44
+ paragraph = paragraph.replace('—', '-')
45
+ paragraph = paragraph.replace(' .', '')
46
+ paragraph = re.sub(r'[^\x00-\x7f]', "", paragraph)
47
+ paragraph = re.sub(r'x0f', " ", paragraph)
48
+ sentences = tokenize.sent_tokenize(paragraph)
49
+
50
+ sentence_list = []
51
+ for sentence in sentences:
52
+ if not re.search('[a-zA-Z]', sentence):
53
+ sentence = ''
54
+ wrapped_sentences = wrapper.wrap(sentence)
55
+ sentence_list.append(wrapped_sentences)
56
+ trunc_sentences = [phrase for sublist in sentence_list for phrase in sublist]
57
+ text_list.append(trunc_sentences)
58
+ text_list = [text for sentences in text_list for text in sentences]
59
+ return text_list
60
+
61
+
62
+ def read_pdf(file):
63
+ """
64
+ Invokes PyPDF2 PdfReader to extract main body text from PDF file_like input,
65
+ and preprocesses text section by section.
66
+
67
+ Parameters
68
+ ----------
69
+ file : file_like
70
+ PDF file input to be parsed and preprocessed
71
+
72
+ Returns
73
+ -------
74
+ corpus : array_like
75
+ list of list of strings,
76
+ body of tokenized text from which audio is generated
77
+
78
+ """
79
+ from PyPDF2 import PdfReader
80
+
81
+ reader = PdfReader(file)
82
+ corpus = []
83
+ for item in stqdm(list(reader.pages), desc="Pages in pdf:"):
84
+ text_list = preprocess_text(item.extract_text())
85
+ corpus.append(text_list)
86
+ return corpus
87
+
88
+
89
+ def read_epub(file):
90
+ """
91
+ Invokes ebooklib read_epub to extract main body text from epub file_like input,
92
+ and preprocesses text section by section.
93
+
94
+ Parameters
95
+ ----------
96
+ file : file_like
97
+ EPUB file input to be parsed and preprocessed
98
+
99
+ Returns
100
+ -------
101
+ corpus : array_like
102
+ list of list of strings,
103
+ body of tokenized text from which audio is generated
104
+
105
+ file_title : str
106
+ title of document, used to name output files
107
+
108
+ """
109
+ import ebooklib
110
+ from ebooklib import epub
111
+
112
+ book = epub.read_epub(file)
113
+ file_title = book.get_metadata('DC', 'title')[0][0]
114
+ file_title = file_title.lower().replace(' ', '_')
115
+ corpus = []
116
+ for item in stqdm(list(book.get_items()), desc="Chapters in ebook:"):
117
+ if item.get_type() == ebooklib.ITEM_DOCUMENT:
118
+ text_list = preprocess_text(item.get_content())
119
+ corpus.append(text_list)
120
+ return corpus, file_title
src/output.py CHANGED
@@ -1,14 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  def assemble_zip(title):
2
- import pathlib
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import zipfile
4
  from stqdm import stqdm
5
 
6
- directory = pathlib.Path("outputs/")
7
- zip_name = f"outputs/{title}.zip"
 
 
8
 
9
  with zipfile.ZipFile(zip_name, mode="w") as archive:
10
- for file_path in stqdm(directory.iterdir()):
11
- if pathlib.Path(file_path).suffix == '.wav':
12
  archive.write(file_path, arcname=file_path.name)
 
13
 
14
- return zip_name
 
1
+ """
2
+ Notes
3
+ -----
4
+ This module contains the functions for audiobook_gen that take the generated audio tensors and output to audio files,
5
+ as well as assembling the final zip archive for user download.
6
+ """
7
+ import logging
8
+
9
+ from src import config
10
+
11
+
12
+ def write_audio(audio_list, sample_path):
13
+ """
14
+ Invokes torchaudio to save generated audio tensors to a file.
15
+
16
+ Parameters
17
+ ----------
18
+ audio_list : torch.tensor
19
+ pytorch tensor containing generated audio
20
+
21
+ sample_path : str
22
+ file name and path for outputting tensor to audio file
23
+
24
+ Returns
25
+ -------
26
+ None
27
+
28
+ """
29
+ import torch
30
+ import torchaudio
31
+ from src import config as cf
32
+
33
+ if not config.output_path.exists():
34
+ config.output_path.mkdir()
35
+
36
+ if len(audio_list) > 0:
37
+ audio_file = torch.cat(audio_list).reshape(1, -1)
38
+ torchaudio.save(sample_path, audio_file, cf.SAMPLE_RATE)
39
+ logging.info(f'Audio generated at: {sample_path}')
40
+ else:
41
+ logging.info(f'Audio at: {sample_path} is empty.')
42
+
43
+
44
  def assemble_zip(title):
45
+ """
46
+ Creates a zip file and inserts all .wav files in the output directory,
47
+ and returns the name / path of the zip file.
48
+
49
+ Parameters
50
+ ----------
51
+ title : str
52
+ title of document, used to name zip directory
53
+
54
+ Returns
55
+ -------
56
+ zip_name : str
57
+ name and path of zip directory generated
58
+
59
+ """
60
  import zipfile
61
  from stqdm import stqdm
62
 
63
+ if not config.output_path.exists():
64
+ config.output_path.mkdir()
65
+
66
+ zip_name = config.output_path / f'{title}.zip'
67
 
68
  with zipfile.ZipFile(zip_name, mode="w") as archive:
69
+ for file_path in stqdm(config.output_path.iterdir()):
70
+ if file_path.suffix == '.wav':
71
  archive.write(file_path, arcname=file_path.name)
72
+ file_path.unlink()
73
 
74
+ return zip_name
src/predict.py CHANGED
@@ -1,40 +1,110 @@
1
- def load_models():
2
- import torch
3
- from silero import silero_tts
 
 
 
 
 
 
 
 
 
 
4
 
5
- language = 'en'
6
- model_id = 'v3_en'
7
- model, _ = silero_tts(language=language,
8
- speaker=model_id)
 
9
 
10
- device = 'cuda' if torch.cuda.is_available() else 'cpu'
11
- model.to(device) # gpu or cpu
 
 
 
 
 
 
 
 
 
 
 
12
  return model
13
 
14
- def audiobook_gen(ebook, title, model):
15
- import torch
16
- import torchaudio
17
- from stqdm import stqdm
18
-
19
- sample_rate = 24000
20
- speaker = 'en_0'
21
-
22
- for chapter in stqdm(ebook, desc="Chapters in ebook:"):
23
- chapter_index = f'chapter{ebook.index(chapter):03}'
24
- audio_list = []
25
- for sentence in stqdm(chapter, desc="Sentences in chapter:"):
26
- audio = model.apply_tts(text=sentence,
27
- speaker=speaker,
28
- sample_rate=sample_rate)
29
- if len(audio) > 0 and isinstance(audio, torch.Tensor):
30
- audio_list.append(audio)
31
- else:
32
- print(f'Tensor for sentence is not valid: \n {sentence}')
33
-
34
- sample_path = f'outputs/{title}_{chapter_index}.wav'
35
-
36
- if len(audio_list) > 0:
37
- audio_file = torch.cat(audio_list).reshape(1, -1)
38
- torchaudio.save(sample_path, audio_file, sample_rate)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  else:
40
- print(f'Chapter {chapter_index} is empty.')
 
 
 
 
1
+ """
2
+ Notes
3
+ -----
4
+ This module contains the functions for audiobook_gen that handle text-to-speech generation.
5
+ The functions take in the preprocessed text and invoke the Silero package to generate audio tensors.
6
+ """
7
+ import logging
8
+
9
+ import torch
10
+ from stqdm import stqdm
11
+
12
+ from src import output, config
13
+
14
 
15
+ def load_model():
16
+ """
17
+ Load Silero package containg the model information
18
+ for the language and speaker set in config.py
19
+ and converts it to the set device.
20
 
21
+ Parameters
22
+ ----------
23
+ None
24
+
25
+ Returns
26
+ -------
27
+ model : torch.package
28
+
29
+ """
30
+ from silero import silero_tts
31
+
32
+ model, _ = silero_tts(language=config.LANGUAGE, speaker=config.MODEL_ID)
33
+ model.to(config.DEVICE)
34
  return model
35
 
36
+
37
+ def generate_audio(corpus, title, model, speaker):
38
+ """
39
+ For each section within the corpus, calls predict() function to generate audio tensors
40
+ and then calls write_audio() to output the tensors to audio files.
41
+
42
+ Parameters
43
+ ----------
44
+ corpus : array_like
45
+ list of list of strings,
46
+ body of tokenized text from which audio is generated
47
+
48
+ title : str
49
+ title of document, used to name output files
50
+
51
+ model : torch.package
52
+ torch package containing model for language and speaker specified
53
+
54
+ speaker : str
55
+ identifier of selected speaker for audio generation
56
+
57
+ Returns
58
+ -------
59
+ None
60
+
61
+ """
62
+ for section in stqdm(corpus, desc="Sections in document:"):
63
+ section_index = f'part{corpus.index(section):03}'
64
+ audio_list, sample_path = predict(section, section_index, title, model, speaker)
65
+ output.write_audio(audio_list, sample_path)
66
+
67
+
68
+ def predict(text_section, section_index, title, model, speaker):
69
+ """
70
+ Applies Silero TTS engine for each token within the corpus section,
71
+ appending it to the output tensor array, and creates file path for output.
72
+
73
+ Parameters
74
+ ----------
75
+ text_section : array_like
76
+ list of strings,
77
+ body of tokenized text from which audio is generated
78
+
79
+ section_index : int
80
+ index of current section within corpus
81
+
82
+ title : str
83
+ title of document, used to name output files
84
+
85
+ model : torch.package
86
+ torch package containing model for language and speaker specified
87
+
88
+ speaker : str
89
+ identifier of selected speaker for audio generation
90
+
91
+ Returns
92
+ -------
93
+ audio_list : torch.tensor
94
+ pytorch tensor containing generated audio
95
+
96
+ sample_path : str
97
+ file name and path for outputting tensor to audio file
98
+
99
+ """
100
+ audio_list = []
101
+ for sentence in stqdm(text_section, desc="Sentences in section:"):
102
+ audio = model.apply_tts(text=sentence, speaker=speaker, sample_rate=config.SAMPLE_RATE)
103
+ if len(audio) > 0 and isinstance(audio, torch.Tensor):
104
+ audio_list.append(audio)
105
+ logging.info(f'Tensor generated for sentence: \n {sentence}')
106
  else:
107
+ logging.info(f'Tensor for sentence is not valid: \n {sentence}')
108
+
109
+ sample_path = config.output_path / f'{title}_{section_index}.wav'
110
+ return audio_list, sample_path
tests/data/test.epub ADDED
Binary file (90.4 kB). View file
 
tests/data/test.htm ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4
+ <head>
5
+ <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
6
+ <meta http-equiv="Content-Style-Type" content="text/css" />
7
+ <title>Lorem Ipsum</title>
8
+
9
+ <style type="text/css">
10
+
11
+ body { margin-left: 20%;
12
+ margin-right: 20%;
13
+ text-align: justify; }
14
+
15
+ h1, h2, h3, h4, h5 {text-align: center; font-style: normal; font-weight:
16
+ normal; line-height: 1.5; margin-top: .5em; margin-bottom: .5em;}
17
+
18
+ h1 {font-size: 300%;
19
+ margin-top: 0.6em;
20
+ margin-bottom: 0.6em;
21
+ letter-spacing: 0.12em;
22
+ word-spacing: 0.2em;
23
+ text-indent: 0em;}
24
+ h2 {font-size: 150%; margin-top: 2em; margin-bottom: 1em;}
25
+ h3 {font-size: 130%; margin-top: 1em;}
26
+ h4 {font-size: 120%;}
27
+ h5 {font-size: 110%;}
28
+
29
+ .no-break {page-break-before: avoid;} /* for epubs */
30
+
31
+ div.chapter {page-break-before: always; margin-top: 4em;}
32
+
33
+ hr {width: 80%; margin-top: 2em; margin-bottom: 2em;}
34
+
35
+ p {text-indent: 1em;
36
+ margin-top: 0.25em;
37
+ margin-bottom: 0.25em; }
38
+
39
+ .p2 {margin-top: 2em;}
40
+
41
+ p.poem {text-indent: 0%;
42
+ margin-left: 10%;
43
+ font-size: 90%;
44
+ margin-top: 1em;
45
+ margin-bottom: 1em; }
46
+
47
+ p.letter {text-indent: 0%;
48
+ margin-left: 10%;
49
+ margin-right: 10%;
50
+ margin-top: 1em;
51
+ margin-bottom: 1em; }
52
+
53
+ p.noindent {text-indent: 0% }
54
+
55
+ p.center {text-align: center;
56
+ text-indent: 0em;
57
+ margin-top: 1em;
58
+ margin-bottom: 1em; }
59
+
60
+ p.footnote {font-size: 90%;
61
+ text-indent: 0%;
62
+ margin-left: 10%;
63
+ margin-right: 10%;
64
+ margin-top: 1em;
65
+ margin-bottom: 1em; }
66
+
67
+ sup { vertical-align: top; font-size: 0.6em; }
68
+
69
+ a:link {color:blue; text-decoration:none}
70
+ a:visited {color:blue; text-decoration:none}
71
+ a:hover {color:red}
72
+
73
+ </style>
74
+
75
+ </head>
76
+
77
+ <body>
78
+
79
+ <div style='display:block; margin:1em 0'>
80
+ This eBook is a generated Lorem Ipsum for the purposes of testing the Audiobook Gen app.
81
+ </div>
82
+ <div style='display:block; margin:1em 0'>Language: English</div>
83
+ <div style='display:block; margin:1em 0'>Character set encoding: UTF-8</div>
84
+
85
+
86
+ <p class="letter">
87
+ <i>
88
+ Diam vel quam elementum pulvinar etiam non quam. At tellus at urna condimentum mattis. Nisi scelerisque eu ultrices vitae auctor eu augue ut. Integer malesuada nunc vel risus commodo viverra maecenas accumsan. Ornare suspendisse sed nisi lacus. Sapien faucibus et molestie ac feugiat sed lectus. Quam elementum pulvinar etiam non. Elementum integer enim neque volutpat ac tincidunt. Justo laoreet sit amet cursus sit. Amet venenatis urna cursus eget nunc scelerisque viverra mauris. Cras semper auctor neque vitae tempus quam pellentesque nec nam. Fermentum iaculis eu non diam phasellus vestibulum lorem sed. Non pulvinar neque laoreet suspendisse interdum consectetur libero. Nec tincidunt praesent semper feugiat nibh sed. Sed id semper risus in hendrerit gravida rutrum. Suspendisse in est ante in nibh. Dui ut ornare lectus sit amet est placerat in.
89
+ </i>
90
+ </p>
91
+
92
+ </div><!--end chapter-->
93
+
94
+ <div class="chapter">
95
+
96
+ <h2><a name="pref01"></a>A NEW LOREM</h2>
97
+
98
+ <p>
99
+ Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Proin fermentum leo vel orci porta non pulvinar. Pretium lectus quam id leo in vitae turpis massa sed. Donec ac odio tempor orci dapibus. Feugiat in ante metus dictum at tempor. Elementum tempus egestas sed sed risus. Adipiscing commodo elit at imperdiet dui accumsan sit. Placerat orci nulla pellentesque dignissim enim. Posuere lorem ipsum dolor sit. Id ornare arcu odio ut sem. Purus faucibus ornare suspendisse sed nisi lacus sed. Ac turpis egestas sed tempus urna et pharetra pharetra massa. Morbi quis commodo odio aenean. Malesuada proin libero nunc consequat interdum. Ut placerat orci nulla pellentesque dignissim enim sit. Elit at imperdiet dui accumsan sit amet.
100
+ </p>
101
+
102
+ <p>
103
+ Nunc sed id semper risus in hendrerit gravida rutrum quisque. Augue interdum velit euismod in pellentesque. Elementum curabitur vitae nunc sed velit dignissim sodales ut eu. Mi in nulla posuere sollicitudin aliquam ultrices sagittis orci a. Quisque sagittis purus sit amet volutpat consequat mauris. Risus in hendrerit gravida rutrum. Quis vel eros donec ac odio. Eget nunc lobortis mattis aliquam faucibus. Lobortis scelerisque fermentum dui faucibus. Est velit egestas dui id ornare arcu odio. Sed ullamcorper morbi tincidunt ornare massa eget egestas purus. Nisi porta lorem mollis aliquam ut porttitor leo a. Ut morbi tincidunt augue interdum velit. Egestas diam in arcu cursus euismod. Tortor id aliquet lectus proin nibh nisl condimentum id venenatis. Lectus sit amet est placerat in egestas erat imperdiet sed. Amet tellus cras adipiscing enim eu turpis egestas pretium. Et leo duis ut diam quam.
104
+ </p>
105
+
106
+ </div><!--end chapter-->
107
+
108
+ <div class="chapter">
109
+
110
+ <h2><a name="pref02"></a>IPSUM STRIKES BACK</h2>
111
+
112
+ <p>
113
+ Egestas diam in arcu cursus euismod quis. Leo in vitae turpis massa sed elementum tempus egestas. Amet nulla facilisi morbi tempus iaculis urna id volutpat. Parturient montes nascetur ridiculus mus. Erat pellentesque adipiscing commodo elit at imperdiet. Egestas congue quisque egestas diam in arcu cursus. Diam ut venenatis tellus in metus. Ullamcorper eget nulla facilisi etiam. Blandit turpis cursus in hac habitasse platea dictumst quisque. Cursus euismod quis viverra nibh cras pulvinar. Neque viverra justo nec ultrices. Dui ut ornare lectus sit. Mauris ultrices eros in cursus turpis massa tincidunt. Lobortis elementum nibh tellus molestie nunc non blandit massa enim. Ullamcorper morbi tincidunt ornare massa eget egestas purus viverra.
114
+ </p>
115
+
116
+ <p>
117
+ Mauris in aliquam sem fringilla ut morbi. Nunc sed blandit libero volutpat. Amet venenatis urna cursus eget nunc scelerisque. Sagittis nisl rhoncus mattis rhoncus urna neque. Felis eget nunc lobortis mattis aliquam faucibus purus in massa. Fringilla ut morbi tincidunt augue interdum. Nibh mauris cursus mattis molestie a iaculis at erat. Lacus sed turpis tincidunt id aliquet risus feugiat in. Nulla facilisi etiam dignissim diam quis enim lobortis. Vitae congue eu consequat ac felis donec et. Scelerisque viverra mauris in aliquam sem fringilla ut morbi tincidunt. Blandit volutpat maecenas volutpat blandit aliquam. Ultrices tincidunt arcu non sodales neque sodales ut etiam. Sollicitudin aliquam ultrices sagittis orci a scelerisque. Id cursus metus aliquam eleifend mi. Magna eget est lorem ipsum dolor sit amet consectetur. Eleifend mi in nulla posuere sollicitudin aliquam ultrices. Neque sodales ut etiam sit amet. Enim neque volutpat ac tincidunt vitae semper quis lectus nulla.
118
+ </p>
tests/data/test.pdf ADDED
Binary file (99.9 kB). View file
 
tests/data/test.txt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Testing Text File
2
+
3
+ With generated random Lorem Ipsum and other unexpected characters!
4
+
5
+ <a href="https://github.com/mkutarna/audiobook_gen/">Link to generator repo!</a>
6
+
7
+ 此行是对非英语字符的测试
8
+
9
+ Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Proin fermentum leo vel orci porta non pulvinar. Pretium lectus quam id leo in vitae turpis massa sed. Donec ac odio tempor orci dapibus. Feugiat in ante metus dictum at tempor. Elementum tempus egestas sed sed risus. Adipiscing commodo elit at imperdiet dui accumsan sit. Placerat orci nulla pellentesque dignissim enim. Posuere lorem ipsum dolor sit. Id ornare arcu odio ut sem. Purus faucibus ornare suspendisse sed nisi lacus sed. Ac turpis egestas sed tempus urna et pharetra pharetra massa. Morbi quis commodo odio aenean. Malesuada proin libero nunc consequat interdum. Ut placerat orci nulla pellentesque dignissim enim sit. Elit at imperdiet dui accumsan sit amet.
10
+
11
+ Built to test various characters and other possible inputs to the silero model.
12
+
13
+ Here are some Chinese characters: 此行是对非英语字符的测试.
14
+
15
+ There are 24 letters in the Greek alphabet. The vowels: are α, ε, η, ι, ο, ω, υ. All the rest are consonants.
16
+
17
+ We can also test for mathematical symbols: ∫, ∇, ∞, δ, ε, X̄, %, √ ,a, ±, ÷, +, = ,-.
18
+
19
+ Finally, here are some emoticons: ☺️🙂😊😀😁☹️🙁😞😟😣😖😨😧😦😱😫😩.
tests/data/test_audio.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae3c3cf68e220fa23fd4b9a1591523ced8cfd80459dab714a702634458a5b56f
3
+ size 593767
tests/data/test_predict.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4be523f61990d57fb7031eca43edd04be401a0ce4062f5e7aaafc5de660035b6
3
+ size 679930
tests/data/test_predict.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ Predict Testing Text File
2
+
3
+ Audiobook Gen is a tool that allows the users to generate an audio file from an ebook or other document.
tests/data/test_processed.txt ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Testing Text File
2
+ With generated random Lorem Ipsum and other unexpected characters!
3
+ Link to generator repo!
4
+ Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
5
+ Proin fermentum leo vel orci porta non pulvinar.
6
+ Pretium lectus quam id leo in vitae turpis massa sed.
7
+ Donec ac odio tempor orci dapibus.
8
+ Feugiat in ante metus dictum at tempor.
9
+ Elementum tempus egestas sed sed risus.
10
+ Adipiscing commodo elit at imperdiet dui accumsan sit.
11
+ Placerat orci nulla pellentesque dignissim enim.
12
+ Posuere lorem ipsum dolor sit.
13
+ Id ornare arcu odio ut sem.
14
+ Purus faucibus ornare suspendisse sed nisi lacus sed.
15
+ Ac turpis egestas sed tempus urna et pharetra pharetra massa.
16
+ Morbi quis commodo odio aenean.
17
+ Malesuada proin libero nunc consequat interdum.
18
+ Ut placerat orci nulla pellentesque dignissim enim sit.
19
+ Elit at imperdiet dui accumsan sit amet.
20
+ Built to test various characters and other possible inputs to the silero model.
21
+ Here are some Chinese characters: .
22
+ There are 24 letters in the Greek alphabet.
23
+ The vowels: are , , , , , , .
24
+ All the rest are consonants.
25
+ We can also test for mathematical symbols: , , , , , X, %, ,a, , , +, = ,-.
26
+ Finally, here are some emoticons: .
tests/test_config.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Notes
3
+ -----
4
+ This module contains the configuration entries for audiobook_gen tests.
5
+ """
6
+
7
+ from pathlib import Path
8
+
9
+ data_path = Path("tests/data")
tests/test_file_readers.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+ import numpy as np
3
+
4
+ from src import file_readers
5
+ import test_config
6
+
7
+
8
+ def test_preprocess_text():
9
+ """
10
+ Tests preprocess function by asserting title,
11
+ shape of corpus, and correct line reading.
12
+ """
13
+ test_path = test_config.data_path / "test.txt"
14
+ processed_path = test_config.data_path / "test_processed.txt"
15
+ with open(test_path, 'r') as file:
16
+ test_corpus = file_readers.preprocess_text(file)
17
+ with open(processed_path, 'r') as process_file:
18
+ processed_corpus = [line.strip() for line in process_file.readlines()]
19
+
20
+ assert processed_corpus == test_corpus
21
+
22
+
23
+ def test_read_pdf():
24
+ pdf_path = test_config.data_path / "test.pdf"
25
+ corpus = np.array(file_readers.read_pdf(pdf_path), dtype=object)
26
+
27
+ assert np.shape(corpus) == (4, )
28
+ assert np.shape(corpus[0]) == (3, )
29
+ assert corpus[0][0] == 'Lorem Ipsum'
30
+ assert corpus[2][0] == 'Preface'
31
+
32
+
33
+ def test_read_epub():
34
+ """
35
+ Tests read_epub function by asserting title,
36
+ shape of corpus, and correct line reading.
37
+ """
38
+ ebook_path = test_config.data_path / "test.epub"
39
+ corpus, title = file_readers.read_epub(ebook_path)
40
+ corpus_arr = np.array(corpus, dtype=object)
41
+
42
+ assert title == "the_picture_of_dorian_gray"
43
+ assert np.shape(corpus_arr) == (6,)
44
+ assert np.shape(corpus_arr[0]) == (39,)
45
+ assert corpus[0][0] == 'The Project Gutenberg eBook of The Picture of Dorian Gray, by Oscar Wilde'
46
+ assert corpus[2][0] == 'CHAPTER I.'
tests/test_output.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+
3
+ from src import output, config
4
+ import test_config
5
+
6
+
7
+ def test_write_audio():
8
+ """
9
+ Tests write_audio function, takes in an audio tensor with a file path and writes the audio to a file.
10
+ """
11
+ import torch
12
+
13
+ test_path = test_config.data_path / 'test_audio.wav'
14
+ audio_path = test_config.data_path / 'test_audio.pt'
15
+ audio_list = torch.load(audio_path)
16
+
17
+ output.write_audio(audio_list, test_path)
18
+
19
+ assert test_path.is_file()
20
+ assert test_path.stat().st_size == 592858
21
+
22
+ test_path.unlink()
23
+
24
+
25
+ def test_assemble_zip():
26
+ """
27
+ Tests assemble_zip function, which collects all the audio files from the output directory,
28
+ and zips them up into a zip directory.
29
+ """
30
+ from shutil import copy2
31
+
32
+ if not config.output_path.exists():
33
+ config.output_path.mkdir()
34
+
35
+ title = "speaker_samples"
36
+ zip_path = config.output_path / 'speaker_samples.zip'
37
+ wav1_path = config.output_path / 'speaker_en_0.wav'
38
+ wav2_path = config.output_path / 'speaker_en_110.wav'
39
+
40
+ for file_path in config.resource_path.iterdir():
41
+ if file_path.suffix == '.wav':
42
+ copy2(file_path, config.output_path)
43
+
44
+ _ = output.assemble_zip(title)
45
+
46
+ assert zip_path.is_file()
47
+ assert not wav1_path.is_file()
48
+ assert not wav2_path.is_file()
49
+
50
+ zip_path.unlink()
tests/test_predict.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+ import torch
3
+ import numpy as np
4
+
5
+ from src import predict, file_readers, config
6
+ import test_config
7
+
8
+
9
+ def test_load_model():
10
+ """
11
+ Tests load_model function, which loads the silero TTS model.
12
+ """
13
+ model = predict.load_model()
14
+
15
+ assert model.speakers[0] == 'en_0'
16
+ assert np.shape(model.speakers) == (119,)
17
+
18
+
19
+ def test_generate_audio():
20
+ """
21
+ Tests generate_audio function, which takes the TTS model and file input,
22
+ and uses the predict & write_audio functions to output the audio file.
23
+ """
24
+ ebook_path = test_config.data_path / "test.epub"
25
+ wav1_path = config.output_path / 'the_picture_of_dorian_gray_part000.wav'
26
+ wav2_path = config.output_path / 'the_picture_of_dorian_gray_part001.wav'
27
+ wav3_path = config.output_path / 'the_picture_of_dorian_gray_part002.wav'
28
+ corpus, title = file_readers.read_epub(ebook_path)
29
+
30
+ model = predict.load_model()
31
+ speaker = 'en_110'
32
+ predict.generate_audio(corpus[0:2], title, model, speaker)
33
+
34
+ assert wav1_path.is_file()
35
+ assert wav2_path.is_file()
36
+ assert not wav3_path.is_file()
37
+
38
+ wav1_path.unlink()
39
+ wav2_path.unlink()
40
+
41
+
42
+ def test_predict():
43
+ """
44
+ Tests predict function, generates audio tensors for each token in the text section,
45
+ and appends them together along with a generated file path for output.
46
+ """
47
+ seed = 1337
48
+ torch.manual_seed(seed)
49
+ torch.cuda.manual_seed(seed)
50
+ model = predict.load_model()
51
+
52
+ tensor_path = test_config.data_path / "test_predict.pt"
53
+ test_tensor = torch.load(tensor_path)
54
+
55
+ text_path = test_config.data_path / "test_predict.txt"
56
+ with open(text_path, 'r') as file:
57
+ text = file_readers.preprocess_text(file)
58
+ title = 'test_predict'
59
+ section_index = 'part001'
60
+ speaker = 'en_0'
61
+
62
+ audio_list, _ = predict.predict(text, section_index, title, model, speaker)
63
+ audio_tensor = torch.cat(audio_list).reshape(1, -1)
64
+
65
+ torch.testing.assert_close(audio_tensor, test_tensor, atol=1e-3, rtol=0.9)