Spaces:

mkutarna
/

audiobook_gen

Build error

App Files Files Community

mkutarna commited on Dec 6, 2022

Commit

f5dff19

2 Parent(s): 3b480a0 fd88500

Merged repo changes to main branch

Browse files

Files changed (40) hide show

.coveragerc +20 -0
.flake8 +6 -0
.github/workflows/check_file_size.yml +16 -0
.github/workflows/python-app.yml +39 -0
.github/workflows/sync_to_huggingface_space.yml +19 -0
.gitignore +1 -1
LICENSE +21 -0
README.md +25 -3
app.py +54 -15
models/latest_silero_models.yml +563 -0
notebooks/1232-h.htm +0 -0
notebooks/audiobook_gen_silero.ipynb +335 -0
notebooks/parser_function_html.ipynb +480 -0
notebooks/test.epub +0 -0
notebooks/test.htm +118 -0
outputs/.gitkeep +0 -0
pytest.ini +4 -0
requirements.txt +1 -0
resources/audiobook_gen.png +0 -0
resources/instructions.md +13 -0
resources/speaker_en_0.wav +0 -0
resources/speaker_en_110.wav +0 -0
resources/speaker_en_29.wav +0 -0
resources/speaker_en_41.wav +0 -0
src/config.py +23 -0
src/file_readers.py +120 -0
src/output.py +66 -6
src/predict.py +105 -35
tests/data/test.epub +0 -0
tests/data/test.htm +118 -0
tests/data/test.pdf +0 -0
tests/data/test.txt +19 -0
tests/data/test_audio.pt +3 -0
tests/data/test_predict.pt +3 -0
tests/data/test_predict.txt +3 -0
tests/data/test_processed.txt +26 -0
tests/test_config.py +9 -0
tests/test_file_readers.py +46 -0
tests/test_output.py +50 -0
tests/test_predict.py +65 -0

.coveragerc ADDED Viewed

	@@ -0,0 +1,20 @@

+# .coveragerc for audiobook_gen
+[run]
+# data_file = put a coverage file name here!!!
+[report]
+# Regexes for lines to exclude from consideration
+exclude_lines =
+    # Have to re-enable the standard pragma
+    pragma: no cover
+omit =
+  tests/*
+  notesbooks/*
+[html]
+# directory = put a directory here!!!!
+[tool:pytest]
+addopts = --cov=audiobook_gen --cov-report html

.flake8 ADDED Viewed

	@@ -0,0 +1,6 @@

+[flake8]
+max-line-length = 120
+max-complexity = 18
+exclude =
+  notebooks/*
+  */.ipynb_checkpoints/*

.github/workflows/check_file_size.yml ADDED Viewed

	@@ -0,0 +1,16 @@

+name: Check file size
+on:               # or directly `on: [push]` to run the action on every push on any branch
+  pull_request:
+    branches: [master]
+  # to run this workflow manually from the Actions tab
+  workflow_dispatch:
+jobs:
+  sync-to-hub:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check large files
+        uses: ActionsDesk/lfs-warning@v2.0
+        with:
+          filesizelimit: 10485760 # this is 10MB so we can sync to HF Spaces

.github/workflows/python-app.yml ADDED Viewed

	@@ -0,0 +1,39 @@

+# This workflow will install Python dependencies, run tests and lint with a single version of Python
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+name: Python application
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+    branches: [ master ]
+permissions:
+  contents: read
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python 3.9.12
+      uses: actions/setup-python@v3
+      with:
+        python-version: "3.9.12"
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install flake8 pytest==7.1.3 pytest-cov==3.0.0
+        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+    - name: Lint with flake8
+      run: |
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+    - name: Test and coverage with pytest and pytest-cov
+      run: |
+        pytest --cov-config=.coveragerc --cov=audiobook_gen tests/

.github/workflows/sync_to_huggingface_space.yml ADDED Viewed

	@@ -0,0 +1,19 @@

+name: Sync to Hugging Face hub
+on:
+  push:
+    branches: [master]
+  # to run this workflow manually from the Actions tab
+  workflow_dispatch:
+jobs:
+  sync-to-hub:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+      - name: Push to hub
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: git push --force https://mkutarna:$HF_TOKEN@huggingface.co/spaces/mkutarna/audiobook_gen master

.gitignore CHANGED Viewed

@@ -9,7 +9,7 @@ docs/
 conda/
 tmp/
 notebooks/outputs/
-tests/__pycache__
 tests/.pytest_cache
 tags

 conda/
 tmp/
 notebooks/outputs/
+tests/__pycache__/
 tests/.pytest_cache
 tags

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2022 Matthew Kutarna / github.com/mkutarna/audiobook_gen
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -10,7 +10,29 @@ pinned: true
 license: mit
 ---
-Audiobook Gen
-=============
-Audiobook Gen is a tool that allows the users to generate an audio file of text (e.g. audiobook), read in the voice of the user's choice. It will take in 3 inputs: the desired text for audio generation, as well as a pair of text / audio files for the desired voice.

 license: mit
 ---
+# Audiobook Gen
+## About
+Audiobook Gen converts text to audiobook format. It allows you to choose which voice you want to listen to.
+- Do you want to listen to a book that isn't available on Audible?
+- Have you tried an audiobook from LibriVox and found the audio quality lacking?
+- Don't have time to sit and read a document, but would prefer to listen to it?
+You can input various text formats (`txt`, `pdf`, `epub` - more options in development) and output a `zip` archive of audio files (`wav`). This is an open-source tool based on the [Silero text-to-speech toolkit](https://github.com/snakers4/silero-models) and uses Streamlit to deliver the application.
+# Demo
+## HuggingFace Space
+A demonstration of this tool is hosted at HuggingFace Spaces - see [Audiobook_Gen](https://huggingface.co/spaces/mkutarna/audiobook_gen).
+<img style="border:1px solid grey" src="resources/audiobook_gen.png" alt="Screenshot"/>
+# Future
+Here is a list features in development and planned for the future:
+- `html` file import
+- `mobi`, `azw` ebook input
+- optional audio output formats (for better compression)
+- improved audio file output handling
+- Docker image for local use

app.py CHANGED Viewed

@@ -1,31 +1,70 @@
 import streamlit as st
-from src.parser import read_epub, read_txt
-from src.predict import audiobook_gen, load_models
-from src.output import assemble_zip
 st.title('Audiobook Generation Tool')
-st.markdown("This tool generates audiobook files from an imported ebook file.")
-with st.sidebar:
-    ebook_upload = st.file_uploader(
-        label = "Upload the target ebook (.epub only)",
-        type = ['epub'])
 if st.button('Click to run!'):
-    ebook, title = read_epub(ebook_upload)
-    model = load_models()
-    st.success('Parsing complete!')
     with st.spinner('Generating audio...'):
-        audiobook_gen(ebook, title, model)
-    st.success('TTS generation complete!')
     with st.spinner('Building zip file...'):
-        zip_file = assemble_zip(title)
-        title_name = f'{title}.zip'
     st.success('Zip file prepared!')
     with open(zip_file, "rb") as fp:
         btn = st.download_button(
             label="Download Audiobook",

+import logging
 import streamlit as st
+from src import file_readers, predict, output, config
+logging.basicConfig(filename='app.log',
+                    filemode='w',
+                    format='%(name)s - %(levelname)s - %(message)s',
+                    level=logging.INFO,
+                    force=True)
 st.title('Audiobook Generation Tool')
+text_file = open(config.INSTRUCTIONS, "r")
+readme_text = text_file.read()
+text_file.close()
+st.markdown(readme_text)
+st.header('1. Upload your document')
+uploaded_file = st.file_uploader(
+    label="File types accepted: epub, txt, pdf)",
+    type=['epub', 'txt', 'pdf'])
+model = predict.load_model()
+st.header('2. Please select voice')
+speaker = st.radio('Available voices:', config.SPEAKER_LIST.keys(), horizontal=True)
+audio_path = config.resource_path / f'speaker_{config.SPEAKER_LIST.get(speaker)}.wav'
+audio_file = open(audio_path, 'rb')
+audio_bytes = audio_file.read()
+st.audio(audio_bytes, format='audio/ogg')
+st.header('3. Run the app to generate audio')
 if st.button('Click to run!'):
+    file_ext = uploaded_file.type
+    file_title = uploaded_file.name
+    if file_ext == 'application/epub+zip':
+        text, file_title = file_readers.read_epub(uploaded_file)
+    elif file_ext == 'text/plain':
+        file = uploaded_file.read()
+        text = file_readers.preprocess_text(file)
+    elif file_ext == 'application/pdf':
+        text = file_readers.read_pdf(uploaded_file)
+    else:
+        st.warning('Invalid file type', icon="⚠️")
+    st.success('Reading file complete!')
     with st.spinner('Generating audio...'):
+        predict.generate_audio(text, file_title, model, config.SPEAKER_LIST.get(speaker))
+    st.success('Audio generation complete!')
     with st.spinner('Building zip file...'):
+        zip_file = output.assemble_zip(file_title)
+        title_name = f'{file_title}.zip'
     st.success('Zip file prepared!')
+    with open(zip_file, "rb") as fp:
+        btn = st.download_button(
+            label="Download Audiobook",
+            data=fp,
+            file_name=title_name,
+            mime="application/zip"
+        )
     with open(zip_file, "rb") as fp:
         btn = st.download_button(
             label="Download Audiobook",

models/latest_silero_models.yml ADDED Viewed

	@@ -0,0 +1,563 @@

+# pre-trained STT models
+stt_models:
+  en:
+    latest:
+      meta:
+        name: "en_v6"
+        sample: "https://models.silero.ai/examples/en_sample.wav"
+      labels: "https://models.silero.ai/models/en/en_v1_labels.json"
+      jit: "https://models.silero.ai/models/en/en_v6.jit"
+      onnx: "https://models.silero.ai/models/en/en_v5.onnx"
+      jit_q: "https://models.silero.ai/models/en/en_v6_q.jit"
+      jit_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.jit"
+      onnx_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.onnx"
+    v6:
+      meta:
+        name: "en_v6"
+        sample: "https://models.silero.ai/examples/en_sample.wav"
+      labels: "https://models.silero.ai/models/en/en_v1_labels.json"
+      jit: "https://models.silero.ai/models/en/en_v6.jit"
+      onnx: "https://models.silero.ai/models/en/en_v5.onnx"
+      jit_q: "https://models.silero.ai/models/en/en_v6_q.jit"
+      jit_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.jit"
+      onnx_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.onnx"
+    v5:
+      meta:
+        name: "en_v5"
+        sample: "https://models.silero.ai/examples/en_sample.wav"
+      labels: "https://models.silero.ai/models/en/en_v1_labels.json"
+      jit: "https://models.silero.ai/models/en/en_v5.jit"
+      onnx: "https://models.silero.ai/models/en/en_v5.onnx"
+      onnx_q: "https://models.silero.ai/models/en/en_v5_q.onnx"
+      jit_q: "https://models.silero.ai/models/en/en_v5_q.jit"
+      jit_xlarge: "https://models.silero.ai/models/en/en_v5_xlarge.jit"
+      onnx_xlarge: "https://models.silero.ai/models/en/en_v5_xlarge.onnx"
+    v4_0:
+      meta:
+        name: "en_v4_0"
+        sample: "https://models.silero.ai/examples/en_sample.wav"
+      labels: "https://models.silero.ai/models/en/en_v1_labels.json"
+      jit_large: "https://models.silero.ai/models/en/en_v4_0_jit_large.model"
+      onnx_large: "https://models.silero.ai/models/en/en_v4_0_large.onnx"
+    v3:
+      meta:
+        name: "en_v3"
+        sample: "https://models.silero.ai/examples/en_sample.wav"
+      labels: "https://models.silero.ai/models/en/en_v1_labels.json"
+      jit: "https://models.silero.ai/models/en/en_v3_jit.model"
+      onnx: "https://models.silero.ai/models/en/en_v3.onnx"
+      jit_q: "https://models.silero.ai/models/en/en_v3_jit_q.model"
+      jit_skip: "https://models.silero.ai/models/en/en_v3_jit_skips.model"
+      jit_large: "https://models.silero.ai/models/en/en_v3_jit_large.model"
+      onnx_large: "https://models.silero.ai/models/en/en_v3_large.onnx"
+      jit_xsmall: "https://models.silero.ai/models/en/en_v3_jit_xsmall.model"
+      jit_q_xsmall: "https://models.silero.ai/models/en/en_v3_jit_q_xsmall.model"
+      onnx_xsmall: "https://models.silero.ai/models/en/en_v3_xsmall.onnx"
+    v2:
+      meta:
+        name: "en_v2"
+        sample: "https://models.silero.ai/examples/en_sample.wav"
+      labels: "https://models.silero.ai/models/en/en_v1_labels.json"
+      jit: "https://models.silero.ai/models/en/en_v2_jit.model"
+      onnx: "https://models.silero.ai/models/en/en_v2.onnx"
+      tf: "https://models.silero.ai/models/en/en_v2_tf.tar.gz"
+    v1:
+      meta:
+        name: "en_v1"
+        sample: "https://models.silero.ai/examples/en_sample.wav"
+      labels: "https://models.silero.ai/models/en/en_v1_labels.json"
+      jit: "https://models.silero.ai/models/en/en_v1_jit.model"
+      onnx: "https://models.silero.ai/models/en/en_v1.onnx"
+      tf: "https://models.silero.ai/models/en/en_v1_tf.tar.gz"
+  de:
+    latest:
+      meta:
+        name: "de_v1"
+        sample: "https://models.silero.ai/examples/de_sample.wav"
+      labels: "https://models.silero.ai/models/de/de_v1_labels.json"
+      jit: "https://models.silero.ai/models/de/de_v1_jit.model"
+      onnx: "https://models.silero.ai/models/de/de_v1.onnx"
+      tf: "https://models.silero.ai/models/de/de_v1_tf.tar.gz"
+    v1:
+      meta:
+        name: "de_v1"
+        sample: "https://models.silero.ai/examples/de_sample.wav"
+      labels: "https://models.silero.ai/models/de/de_v1_labels.json"
+      jit_large: "https://models.silero.ai/models/de/de_v1_jit.model"
+      onnx: "https://models.silero.ai/models/de/de_v1.onnx"
+      tf: "https://models.silero.ai/models/de/de_v1_tf.tar.gz"
+    v3:
+      meta:
+        name: "de_v3"
+        sample: "https://models.silero.ai/examples/de_sample.wav"
+      labels: "https://models.silero.ai/models/de/de_v1_labels.json"
+      jit_large: "https://models.silero.ai/models/de/de_v3_large.jit"
+    v4:
+      meta:
+        name: "de_v4"
+        sample: "https://models.silero.ai/examples/de_sample.wav"
+      labels: "https://models.silero.ai/models/de/de_v1_labels.json"
+      jit_large: "https://models.silero.ai/models/de/de_v4_large.jit"
+      onnx_large: "https://models.silero.ai/models/de/de_v4_large.onnx"
+  es:
+    latest:
+      meta:
+        name: "es_v1"
+        sample: "https://models.silero.ai/examples/es_sample.wav"
+      labels: "https://models.silero.ai/models/es/es_v1_labels.json"
+      jit: "https://models.silero.ai/models/es/es_v1_jit.model"
+      onnx: "https://models.silero.ai/models/es/es_v1.onnx"
+      tf: "https://models.silero.ai/models/es/es_v1_tf.tar.gz"
+  ua:
+    latest:
+      meta:
+        name: "ua_v3"
+        sample: "https://models.silero.ai/examples/ua_sample.wav"
+        credits:
+          datasets:
+            speech-recognition-uk: https://github.com/egorsmkv/speech-recognition-uk
+      labels: "https://models.silero.ai/models/ua/ua_v1_labels.json"
+      jit: "https://models.silero.ai/models/ua/ua_v3_jit.model"
+      jit_q: "https://models.silero.ai/models/ua/ua_v3_jit_q.model"
+      onnx: "https://models.silero.ai/models/ua/ua_v3.onnx"
+    v3:
+      meta:
+        name: "ua_v3"
+        sample: "https://models.silero.ai/examples/ua_sample.wav"
+        credits:
+          datasets:
+            speech-recognition-uk: https://github.com/egorsmkv/speech-recognition-uk
+      labels: "https://models.silero.ai/models/ua/ua_v1_labels.json"
+      jit: "https://models.silero.ai/models/ua/ua_v3_jit.model"
+      jit_q: "https://models.silero.ai/models/ua/ua_v3_jit_q.model"
+      onnx: "https://models.silero.ai/models/ua/ua_v3.onnx"
+    v1:
+      meta:
+        name: "ua_v1"
+        sample: "https://models.silero.ai/examples/ua_sample.wav"
+        credits:
+          datasets:
+            speech-recognition-uk: https://github.com/egorsmkv/speech-recognition-uk
+      labels: "https://models.silero.ai/models/ua/ua_v1_labels.json"
+      jit: "https://models.silero.ai/models/ua/ua_v1_jit.model"
+      jit_q: "https://models.silero.ai/models/ua/ua_v1_jit_q.model"
+tts_models:
+  ru:
+    v3_1_ru:
+      latest:
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        package: 'https://models.silero.ai/models/tts/ru/v3_1_ru.pt'
+        sample_rate: [8000, 24000, 48000]
+    ru_v3:
+      latest:
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        package: 'https://models.silero.ai/models/tts/ru/ru_v3.pt'
+        sample_rate: [8000, 24000, 48000]
+    aidar_v2:
+      latest:
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        package: 'https://models.silero.ai/models/tts/ru/v2_aidar.pt'
+        sample_rate: [8000, 16000]
+    aidar_8khz:
+      latest:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_8000.jit'
+        sample_rate: 8000
+      v1:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_8000.jit'
+        sample_rate: 8000
+    aidar_16khz:
+      latest:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_16000.jit'
+        sample_rate: 16000
+      v1:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_16000.jit'
+        sample_rate: 16000
+    baya_v2:
+      latest:
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        package: 'https://models.silero.ai/models/tts/ru/v2_baya.pt'
+        sample_rate: [8000, 16000]
+    baya_8khz:
+      latest:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_baya_8000.jit'
+        sample_rate: 8000
+      v1:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_baya_8000.jit'
+        sample_rate: 8000
+    baya_16khz:
+      latest:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_baya_16000.jit'
+        sample_rate: 16000
+      v1:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_baya_16000.jit'
+        sample_rate: 16000
+    irina_v2:
+      latest:
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        package: 'https://models.silero.ai/models/tts/ru/v2_irina.pt'
+        sample_rate: [8000, 16000]
+    irina_8khz:
+      latest:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_irina_8000.jit'
+        sample_rate: 8000
+      v1:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_irina_8000.jit'
+        sample_rate: 8000
+    irina_16khz:
+      latest:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_irina_16000.jit'
+        sample_rate: 16000
+      v1:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_irina_16000.jit'
+        sample_rate: 16000
+    kseniya_v2:
+      latest:
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        package: 'https://models.silero.ai/models/tts/ru/v2_kseniya.pt'
+        sample_rate: [8000, 16000]
+    kseniya_8khz:
+      latest:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_8000.jit'
+        sample_rate: 8000
+      v1:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_8000.jit'
+        sample_rate: 8000
+    kseniya_16khz:
+      latest:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_16000.jit'
+        sample_rate: 16000
+      v1:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_16000.jit'
+        sample_rate: 16000
+    natasha_v2:
+      latest:
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        package: 'https://models.silero.ai/models/tts/ru/v2_natasha.pt'
+        sample_rate: [8000, 16000]
+    natasha_8khz:
+      latest:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_8000.jit'
+        sample_rate: 8000
+      v1:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_8000.jit'
+        sample_rate: 8000
+    natasha_16khz:
+      latest:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_16000.jit'
+        sample_rate: 16000
+      v1:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_16000.jit'
+        sample_rate: 16000
+    ruslan_v2:
+      latest:
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        package: 'https://models.silero.ai/models/tts/ru/v2_ruslan.pt'
+        sample_rate: [8000, 16000]
+    ruslan_8khz:
+      latest:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_8000.jit'
+        sample_rate: 8000
+      v1:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_8000.jit'
+        sample_rate: 8000
+    ruslan_16khz:
+      latest:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_16000.jit'
+        sample_rate: 16000
+      v1:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_16000.jit'
+        sample_rate: 16000
+  en:
+    v3_en:
+      latest:
+        example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
+        package: 'https://models.silero.ai/models/tts/en/v3_en.pt'
+        sample_rate: [8000, 24000, 48000]
+    v3_en_indic:
+      latest:
+        example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
+        package: 'https://models.silero.ai/models/tts/en/v3_en_indic.pt'
+        sample_rate: [8000, 24000, 48000]
+    lj_v2:
+      latest:
+        example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
+        package: 'https://models.silero.ai/models/tts/en/v2_lj.pt'
+        sample_rate: [8000, 16000]
+    lj_8khz:
+      latest:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;–'
+        example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
+        jit: 'https://models.silero.ai/models/tts/en/v1_lj_8000.jit'
+        sample_rate: 8000
+      v1:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;–'
+        example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
+        jit: 'https://models.silero.ai/models/tts/en/v1_lj_8000.jit'
+        sample_rate: 8000
+    lj_16khz:
+      latest:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;–'
+        example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
+        jit: 'https://models.silero.ai/models/tts/en/v1_lj_16000.jit'
+        sample_rate: 16000
+      v1:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;–'
+        example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
+        jit: 'https://models.silero.ai/models/tts/en/v1_lj_16000.jit'
+        sample_rate: 16000
+  de:
+    v3_de:
+      latest:
+        example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
+        package: 'https://models.silero.ai/models/tts/de/v3_de.pt'
+        sample_rate: [8000, 24000, 48000]
+    thorsten_v2:
+      latest:
+        example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
+        package: 'https://models.silero.ai/models/tts/de/v2_thorsten.pt'
+        sample_rate: [8000, 16000]
+    thorsten_8khz:
+      latest:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;–'
+        example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
+        jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_8000.jit'
+        sample_rate: 8000
+      v1:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;–'
+        example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
+        jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_8000.jit'
+        sample_rate: 8000
+    thorsten_16khz:
+      latest:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;–'
+        example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
+        jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_16000.jit'
+        sample_rate: 16000
+      v1:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;–'
+        example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
+        jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_16000.jit'
+        sample_rate: 16000
+  es:
+    v3_es:
+      latest:
+        example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
+        package: 'https://models.silero.ai/models/tts/es/v3_es.pt'
+        sample_rate: [8000, 24000, 48000]
+    tux_v2:
+      latest:
+        example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
+        package: 'https://models.silero.ai/models/tts/es/v2_tux.pt'
+        sample_rate: [8000, 16000]
+    tux_8khz:
+      latest:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿'
+        example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
+        jit: 'https://models.silero.ai/models/tts/es/v1_tux_8000.jit'
+        sample_rate: 8000
+      v1:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿'
+        example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
+        jit: 'https://models.silero.ai/models/tts/es/v1_tux_8000.jit'
+        sample_rate: 8000
+    tux_16khz:
+      latest:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿'
+        example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
+        jit: 'https://models.silero.ai/models/tts/es/v1_tux_16000.jit'
+        sample_rate: 16000
+      v1:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿'
+        example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
+        jit: 'https://models.silero.ai/models/tts/es/v1_tux_16000.jit'
+        sample_rate: 16000
+  fr:
+    v3_fr:
+      latest:
+        example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
+        package: 'https://models.silero.ai/models/tts/fr/v3_fr.pt'
+        sample_rate: [8000, 24000, 48000]
+    gilles_v2:
+      latest:
+        example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
+        package: 'https://models.silero.ai/models/tts/fr/v2_gilles.pt'
+        sample_rate: [8000, 16000]
+    gilles_8khz:
+      latest:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;–'
+        example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
+        jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_8000.jit'
+        sample_rate: 8000
+      v1:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;–'
+        example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
+        jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_8000.jit'
+        sample_rate: 8000
+    gilles_16khz:
+      latest:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;–'
+        example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
+        jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_16000.jit'
+        sample_rate: 16000
+      v1:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;–'
+        example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
+        jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_16000.jit'
+        sample_rate: 16000
+  ba:
+    aigul_v2:
+      latest:
+        example: 'Салауат Юлаевтың тормошо һәм яҙмышы хаҡындағы документтарҙың һәм шиғри әҫәрҙәренең бик аҙ өлөшө генә һаҡланған.'
+        package: 'https://models.silero.ai/models/tts/ba/v2_aigul.pt'
+        sample_rate: [8000, 16000]
+        language_name: 'bashkir'
+  xal:
+    v3_xal:
+      latest:
+        example: 'Һорвн, дөрвн күн ирәд, һазань чиңгнв. Байн Цецн хаана һорвн көвүн күүндҗәнә.'
+        package: 'https://models.silero.ai/models/tts/xal/v3_xal.pt'
+        sample_rate: [8000, 24000, 48000]
+    erdni_v2:
+      latest:
+        example: 'Һорвн, дөрвн күн ирәд, һазань чиңгнв. Байн Цецн хаана һорвн көвүн күүндҗәнә.'
+        package: 'https://models.silero.ai/models/tts/xal/v2_erdni.pt'
+        sample_rate: [8000, 16000]
+        language_name: 'kalmyk'
+  tt:
+    v3_tt:
+      latest:
+        example: 'Исәнмесез, саумысез, нишләп кәҗәгезне саумыйсыз, әтәчегез күкәй салган, нишләп чыгып алмыйсыз.'
+        package: 'https://models.silero.ai/models/tts/tt/v3_tt.pt'
+        sample_rate: [8000, 24000, 48000]
+    dilyara_v2:
+      latest:
+        example: 'Ис+әнмесез, с+аумысез, нишл+әп кәҗәгезн+е с+аумыйсыз, әтәчег+ез күк+әй салг+ан, нишл+әп чыг+ып +алмыйсыз.'
+        package: 'https://models.silero.ai/models/tts/tt/v2_dilyara.pt'
+        sample_rate: [8000, 16000]
+        language_name: 'tatar'
+  uz:
+    v3_uz:
+      latest:
+        example: 'Tanishganimdan xursandman.'
+        package: 'https://models.silero.ai/models/tts/uz/v3_uz.pt'
+        sample_rate: [8000, 24000, 48000]
+    dilnavoz_v2:
+      latest:
+        example: 'Tanishganimdan xursandman.'
+        package: 'https://models.silero.ai/models/tts/uz/v2_dilnavoz.pt'
+        sample_rate: [8000, 16000]
+        language_name: 'uzbek'
+  ua:
+    v3_ua:
+      latest:
+        example: 'К+отики - пухн+асті жив+отики.'
+        package: 'https://models.silero.ai/models/tts/ua/v3_ua.pt'
+        sample_rate: [8000, 24000, 48000]
+    mykyta_v2:
+      latest:
+        example: 'К+отики - пухн+асті жив+отики.'
+        package: 'https://models.silero.ai/models/tts/ua/v22_mykyta_48k.pt'
+        sample_rate: [8000, 24000, 48000]
+        language_name: 'ukrainian'
+  indic:
+    v3_indic:
+      latest:
+        example: 'prasidda kabīra adhyētā, puruṣōttama agravāla kā yaha śōdha ālēkha, usa rāmānaṁda kī khōja karatā hai'
+        package: 'https://models.silero.ai/models/tts/indic/v3_indic.pt'
+        sample_rate: [8000, 24000, 48000]
+  multi:
+    multi_v2:
+      latest:
+        package: 'https://models.silero.ai/models/tts/multi/v2_multi.pt'
+        sample_rate: [8000, 16000]
+        speakers:
+          aidar:
+            lang: 'ru'
+            example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
+          baya:
+            lang: 'ru'
+            example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
+          kseniya:
+            lang: 'ru'
+            example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
+          irina:
+            lang: 'ru'
+            example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
+          ruslan:
+            lang: 'ru'
+            example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
+          natasha:
+            lang: 'ru'
+            example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
+          thorsten:
+            lang: 'de'
+            example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
+          tux:
+            lang: 'es'
+            example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
+          gilles:
+            lang: 'fr'
+            example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
+          lj:
+            lang: 'en'
+            example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
+          dilyara:
+            lang: 'tt'
+            example: 'Пес+и пес+и песик+әй, борыннар+ы бәләк+әй.'
+te_models:
+  latest:
+    package: "https://models.silero.ai/te_models/v2_4lang_q.pt"
+    languages: ['en', 'de', 'ru', 'es']
+    punct: '.,-!?—'
+  v2:
+    package: "https://models.silero.ai/te_models/v2_4lang_q.pt"
+    languages: ['en', 'de', 'ru', 'es']
+    punct: '.,-!?—'

notebooks/1232-h.htm ADDED Viewed

The diff for this file is too large to render. See raw diff

notebooks/audiobook_gen_silero.ipynb ADDED Viewed

	@@ -0,0 +1,335 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "\n",
+    "# Audiobook Generator - Proof of Concept Notebook\n",
+    "\n",
+    "This notebook is intended to be a proof of concept for the end-to-end work of generating an audiobook file from an ebook. This includes converting the .epub book files into raw python text strings, splitting into items and sentences, then tokenizing and batching them to run through the Silero text-to-speech (TTS) implementation.\n",
+    "\n",
+    "*Updated: September 2, 2022*\n",
+    "\n",
+    "---\n",
+    "\n",
+    "### Overview\n",
+    "\n",
+    "1. Setup\n",
+    "    - Needed libraries and packages\n",
+    "    - Variables\n",
+    "    - Silero model selection\n",
+    "2. Ebook Import\n",
+    "    - Target file selection\n",
+    "    - File (.epub) import\n",
+    "    - String parsing\n",
+    "    - String length wrapping\n",
+    "3. Text-to-Speech\n",
+    "    - Silero implementation\n",
+    "    - Results\n",
+    "\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 1 - Setup\n",
+    "\n",
+    "This proof-of-concept relies on PyTorch and TorchAudio for its implementation. OmegaConf is used to support providing the latest model from Silero in a consistent manner. A seed is created, and used for all random function that are needed.\n",
+    "\n",
+    "We will also use the TQDM package to provide progress bars while running the proof-of-concept within this notebook."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import torch\n",
+    "import torchaudio\n",
+    "from omegaconf import OmegaConf\n",
+    "from tqdm.notebook import tqdm\n",
+    "\n",
+    "torch.hub.download_url_to_file('https://raw.githubusercontent.com/snakers4/silero-models/master/models.yml',\n",
+    "                               'latest_silero_models.yml',\n",
+    "                               progress=False)\n",
+    "models = OmegaConf.load('latest_silero_models.yml')\n",
+    "\n",
+    "seed = 1337\n",
+    "torch.manual_seed(seed)\n",
+    "torch.cuda.manual_seed(seed)\n",
+    "\n",
+    "device = 'cuda' if torch.cuda.is_available() else 'cpu'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We also need to set some variables for later use during the text processing steps, and the audio output in the TTS step.\n",
+    "\n",
+    "- `max_char_len` is set based on the results of performance testing done by the Silero devs. Larger values enable sentence structure to be better preserved, but negatively affect performance.\n",
+    "- `sample_rate` is also set based on recommendations from the Silero team for performance vs. quality. Using 16k or 8k audio will improve performance, but result in lower quality audio. Silero estimates a decrease of ~0.5 MOS (from 3.7 to 3.2)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "max_char_len = 140\n",
+    "sample_rate = 24000"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The Silero implementation comes with models trained on various languages, the most common being Russian, but we will use the latest English model for this proof of concept. There are also a number of English speaker choices available."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "language = 'en'\n",
+    "model_id = 'v3_en'\n",
+    "speaker = 'en_0'\n",
+    "\n",
+    "model, example_text = torch.hub.load(repo_or_dir='snakers4/silero-models',\n",
+    "                                     model='silero_tts',\n",
+    "                                     language=language,\n",
+    "                                     speaker=model_id)\n",
+    "model.to(device)  # gpu or cpu"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 2 - Ebook Import\n",
+    "\n",
+    "Below is a representative ebook (`Protrait of Dorian Gray`), taken from Project Gutenberg - a free directory of public-domain works."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ebook_path = 'test.epub'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The function below - `read_ebook()` - performs the following steps:\n",
+    "- Takes in the ebook, located at `ebook_path`\n",
+    "- Strips out any html tags\n",
+    "- Uses the nltk packages to download and use the `punkt` sentence-level tokenizer\n",
+    "- Calls the TextWrapper package to wrap sentences to the `max_char_len`, with care to fix sentence endings\n",
+    "- I.e. sentences are not split in the middle of a word, but rather words are preserved\n",
+    "- Finally sentences are appended to a chapter, and the chapters to a complete list: `corpus`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def read_ebook(ebook_path):\n",
+    "\n",
+    "    import ebooklib\n",
+    "    from ebooklib import epub\n",
+    "    from bs4 import BeautifulSoup\n",
+    "    from tqdm.notebook import tqdm\n",
+    "    from nltk import tokenize, download\n",
+    "    from textwrap import TextWrapper\n",
+    "\n",
+    "    download('punkt')\n",
+    "    wrapper = TextWrapper(max_char_len, fix_sentence_endings=True)\n",
+    "\n",
+    "    book = epub.read_epub(ebook_path)\n",
+    "\n",
+    "    ebook_title = book.get_metadata('DC', 'title')[0][0]\n",
+    "    ebook_title = ebook_title.lower().replace(' ', '_')\n",
+    "\n",
+    "    corpus = []\n",
+    "    for item in tqdm(list(book.get_items())):\n",
+    "        if item.get_type() == ebooklib.ITEM_DOCUMENT:\n",
+    "            input_text = BeautifulSoup(item.get_content(), \"html.parser\").text\n",
+    "            text_list = []\n",
+    "            for paragraph in input_text.split('\\n'):\n",
+    "                paragraph = paragraph.replace('—', '-')\n",
+    "                sentences = tokenize.sent_tokenize(paragraph)\n",
+    "\n",
+    "                # Truncate sentences to maximum character limit\n",
+    "                sentence_list = []\n",
+    "                for sentence in sentences:\n",
+    "                    wrapped_sentences = wrapper.wrap(sentence)\n",
+    "                    sentence_list.append(wrapped_sentences)\n",
+    "                # Flatten list of list of sentences\n",
+    "                trunc_sentences = [phrase for sublist in sentence_list for phrase in sublist]\n",
+    "\n",
+    "                text_list.append(trunc_sentences)\n",
+    "            text_list = [text for sentences in text_list for text in sentences]\n",
+    "            corpus.append(text_list)\n",
+    "\n",
+    "    return corpus, ebook_title"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here we use the above function to read in the chosen ebook."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ebook, title = read_ebook(ebook_path)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "And here, let us take a peak at the contents of the ebook:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(f'Title of ebook (path name):{title}\\n')\n",
+    "print(f'First line of the ebook:{ebook[0][0]}\\n')\n",
+    "print(f'First paragraph (truncated for display): \\n {ebook[2][0:5]}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ebook[0][0]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step 3 - Text-to-Speech\n",
+    "\n",
+    "The ebook is fed through the Silero TTS implementation sentence by sentence. We will also check that each tensor being created is valid (i.e. non-zero).\n",
+    "\n",
+    "Finally, the output tensors are exported as `.wav` files on a chapter by chapter basis - consistent with the file structure of common audiobooks."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#os.mkdir(f'outputs/{title}')\n",
+    "\n",
+    "for chapter in tqdm(ebook[0:3]):\n",
+    "    chapter_index = f'chapter{ebook.index(chapter):03}'\n",
+    "    audio_list = []\n",
+    "    for sentence in tqdm(chapter):\n",
+    "        audio = model.apply_tts(text=sentence,\n",
+    "                                speaker=speaker,\n",
+    "                                sample_rate=sample_rate)\n",
+    "        if len(audio) > 0 and isinstance(audio, torch.Tensor):\n",
+    "            audio_list.append(audio)\n",
+    "        else:\n",
+    "            print(f'Tensor for sentence is not valid: \\n {sentence}')\n",
+    "\n",
+    "    sample_path = f'outputs/{title}/{chapter_index}.wav'\n",
+    "\n",
+    "    if len(audio_list) > 0:\n",
+    "        audio_file = torch.cat(audio_list).reshape(1, -1)\n",
+    "#         torchaudio.save(sample_path, audio_file, sample_rate)\n",
+    "    else:\n",
+    "        print(f'Chapter {chapter_index} is empty.')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Results\n",
+    "\n",
+    "##### CPU (i7-4790k)\n",
+    "\n",
+    "Running \"Pride and Prejudice\" through the Silero model took **34m42s** to convert. This book is a good representation of the average book length: the average audiobook length on Audible is between 10 & 12 hours, while Pride and Prejudice is 11h20m.\n",
+    "\n",
+    "This is approximately a 20:1 ratio of audio length to processing time.\n",
+    "\n",
+    "Pride and Prejudice: **34m42s** - 1h39m33s on i7-4650u\n",
+    "\n",
+    "Portrait of Dorian Gray: **18m18s** - 18m50s w/output - 1h06hm04s on i7-4650u\n",
+    "\n",
+    "Crime and Punishment: **Unknown** - error converting ebook at 7/50, 19/368\n",
+    "\n",
+    "##### GPU (P4000)\n",
+    "\n",
+    "Running the same book through the Silero model on GPU took **5m39s** to convert.\n",
+    "\n",
+    "This is approximately a 122:1 ratio of audio length to processing time.\n",
+    "\n",
+    "Pride and Prejudice: **5m39s**\n",
+    "\n",
+    "Portrait of Dorian Gray: **4m26s**\n",
+    "\n",
+    "Crime and Punishment: **Unknown** - error converting ebook"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

notebooks/parser_function_html.ipynb ADDED Viewed

	@@ -0,0 +1,480 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+<<<<<<< HEAD
+   "execution_count": 1,
+   "id": "27a75ece",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import nltk"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+=======
+   "execution_count": 2,
+>>>>>>> appdev
+   "id": "5292a160",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import re\n",
+    "import numpy as np\n",
+    "\n",
+    "from bs4 import BeautifulSoup\n",
+    "from nltk import tokenize, download\n",
+    "from textwrap import TextWrapper"
+   ]
+  },
+  {
+   "cell_type": "code",
+<<<<<<< HEAD
+   "execution_count": null,
+=======
+   "execution_count": 3,
+>>>>>>> appdev
+   "id": "68609a77",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# file_path = '1232-h.htm'\n",
+<<<<<<< HEAD
+    "file_path = 'test.htm'"
+=======
+    "file_path = ''"
+>>>>>>> appdev
+   ]
+  },
+  {
+   "cell_type": "code",
+<<<<<<< HEAD
+   "execution_count": null,
+=======
+   "execution_count": 4,
+>>>>>>> appdev
+   "id": "5c526c9b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "download('punkt', quiet=True)\n",
+    "wrapper = TextWrapper(140, fix_sentence_endings=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+<<<<<<< HEAD
+   "execution_count": null,
+=======
+   "execution_count": 5,
+>>>>>>> appdev
+   "id": "d4732304",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def preprocess(file):\n",
+    "    input_text = BeautifulSoup(file, \"html.parser\").text\n",
+    "    text_list = []\n",
+    "    for paragraph in input_text.split('\\n'):\n",
+    "        paragraph = paragraph.replace('—', '-')\n",
+    "        paragraph = paragraph.replace(' .', '')\n",
+    "        paragraph = re.sub(r'[^\\x00-\\x7f]', \"\", paragraph)\n",
+    "        paragraph = re.sub(r'x0f', \" \", paragraph)\n",
+    "        sentences = tokenize.sent_tokenize(paragraph)\n",
+    "\n",
+    "        sentence_list = []\n",
+    "        for sentence in sentences:\n",
+    "            if not re.search('[a-zA-Z]', sentence):\n",
+    "                sentence = ''\n",
+    "            wrapped_sentences = wrapper.wrap(sentence)\n",
+    "            sentence_list.append(wrapped_sentences)\n",
+    "        trunc_sentences = [phrase for sublist in sentence_list for phrase in sublist]\n",
+    "        text_list.append(trunc_sentences)\n",
+    "    text_list = [text for sentences in text_list for text in sentences]\n",
+    "    return text_list"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3045665a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def read_html(file):\n",
+    "    corpus = preprocess(file)\n",
+    "    return corpus"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e18be118",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(file_path, 'r') as f:\n",
+    "    ebook_upload = f.read()\n",
+    "corpus = read_html(ebook_upload)"
+   ]
+  },
+  {
+   "cell_type": "code",
+<<<<<<< HEAD
+   "execution_count": null,
+   "id": "ece1c7d3",
+   "metadata": {},
+   "outputs": [],
+=======
+   "execution_count": 11,
+   "id": "ece1c7d3",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(1, 2)"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+>>>>>>> appdev
+   "source": [
+    "np.shape(corpus)"
+   ]
+  },
+  {
+   "cell_type": "code",
+<<<<<<< HEAD
+   "execution_count": null,
+   "id": "dc7e4010",
+   "metadata": {},
+   "outputs": [],
+=======
+   "execution_count": 12,
+   "id": "dc7e4010",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "IndexError",
+     "evalue": "list index out of range",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mIndexError\u001b[0m                                Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn [12], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mcorpus\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m2\u001b[39;49m\u001b[43m]\u001b[49m\n",
+      "\u001b[0;31mIndexError\u001b[0m: list index out of range"
+     ]
+    }
+   ],
+>>>>>>> appdev
+   "source": [
+    "corpus[0][2]"
+   ]
+  },
+  {
+   "cell_type": "code",
+<<<<<<< HEAD
+   "execution_count": null,
+   "id": "6cb47a2d",
+   "metadata": {},
+   "outputs": [],
+=======
+   "execution_count": 13,
+   "id": "6cb47a2d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[['Predict Testing Text File',\n",
+       "  'Audiobook Gen is a tool that allows the users to generate an audio file from an ebook or other document.']]"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+>>>>>>> appdev
+   "source": [
+    "corpus"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+<<<<<<< HEAD
+=======
+   "id": "8508b073",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+>>>>>>> appdev
+   "id": "d11031c7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assert title == \"1232-h\"\n",
+    "assert np.shape(corpus) == (1, 5476)\n",
+    "assert corpus[0][0] == 'The Project Gutenberg eBook of The Prince, by Nicolo Machiavelli'\n",
+    "assert corpus[0][2] == 'This eBook is for the use of anyone anywhere in the United States and'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0c57eec6",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "af281267",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import re\n",
+    "\n",
+    "from bs4 import BeautifulSoup\n",
+    "from nltk import tokenize, download\n",
+    "from textwrap import TextWrapper\n",
+    "from stqdm import stqdm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "676ce437",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "download('punkt', quiet=True)\n",
+    "wrapper = TextWrapper(140, fix_sentence_endings=True)\n",
+    "file_path = 'test.txt'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "4d278f8e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def preprocess_text(file):\n",
+    "    input_text = BeautifulSoup(file, \"html.parser\").text\n",
+    "    text_list = []\n",
+    "    for paragraph in input_text.split('\\n'):\n",
+    "        paragraph = paragraph.replace('—', '-')\n",
+    "        paragraph = paragraph.replace(' .', '')\n",
+    "        paragraph = re.sub(r'[^\\x00-\\x7f]', \"\", paragraph)\n",
+    "        paragraph = re.sub(r'x0f', \" \", paragraph)\n",
+    "        sentences = tokenize.sent_tokenize(paragraph)\n",
+    "\n",
+    "        sentence_list = []\n",
+    "        for sentence in sentences:\n",
+    "            if not re.search('[a-zA-Z]', sentence):\n",
+    "                sentence = ''\n",
+    "            wrapped_sentences = wrapper.wrap(sentence)\n",
+    "            sentence_list.append(wrapped_sentences)\n",
+    "        trunc_sentences = [phrase for sublist in sentence_list for phrase in sublist]\n",
+    "        text_list.append(trunc_sentences)\n",
+    "    text_list = [text for sentences in text_list for text in sentences]\n",
+    "    return text_list"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "f67e0184",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(file_path, 'r') as uploaded_file:\n",
+    "    file = uploaded_file.read()\n",
+    "    text = preprocess_text(file)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "0bd67797",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'Testing Text File \\n\\nWith generated random Lorem Ipsum and other unexpected characters!\\n\\n<a href=\"https://github.com/mkutarna/audiobook_gen/\">Link to generator repo!</a>\\n\\n此行是对非英语字符的测试\\n\\nLorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Proin fermentum leo vel orci porta non pulvinar. Pretium lectus quam id leo in vitae turpis massa sed. Donec ac odio tempor orci dapibus. Feugiat in ante metus dictum at tempor. Elementum tempus egestas sed sed risus. Adipiscing commodo elit at imperdiet dui accumsan sit. Placerat orci nulla pellentesque dignissim enim. Posuere lorem ipsum dolor sit. Id ornare arcu odio ut sem. Purus faucibus ornare suspendisse sed nisi lacus sed. Ac turpis egestas sed tempus urna et pharetra pharetra massa. Morbi quis commodo odio aenean. Malesuada proin libero nunc consequat interdum. Ut placerat orci nulla pellentesque dignissim enim sit. Elit at imperdiet dui accumsan sit amet.\\n\\nBuilt to test various characters and other possible inputs to the silero model.\\n\\nHere are some Chinese characters: 此行是对非英语字符的测试.\\n\\nThere are 24 letters in the Greek alphabet. The vowels: are α, ε, η, ι, ο, ω, υ. All the rest are consonants.\\n\\nWe can also test for mathematical symbols: ∫, ∇, ∞, δ, ε, X̄, %, √ ,a, ±, ÷, +, = ,-.\\n\\nFinally, here are some emoticons: ☺️🙂😊😀😁☹️🙁😞😟😣😖😨😧😦😱😫😩.'"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "file"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "064aa16b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['Testing Text File',\n",
+       " 'With generated random Lorem Ipsum and other unexpected characters!',\n",
+       " 'Link to generator repo!',\n",
+       " 'Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.',\n",
+       " 'Proin fermentum leo vel orci porta non pulvinar.',\n",
+       " 'Pretium lectus quam id leo in vitae turpis massa sed.',\n",
+       " 'Donec ac odio tempor orci dapibus.',\n",
+       " 'Feugiat in ante metus dictum at tempor.',\n",
+       " 'Elementum tempus egestas sed sed risus.',\n",
+       " 'Adipiscing commodo elit at imperdiet dui accumsan sit.',\n",
+       " 'Placerat orci nulla pellentesque dignissim enim.',\n",
+       " 'Posuere lorem ipsum dolor sit.',\n",
+       " 'Id ornare arcu odio ut sem.',\n",
+       " 'Purus faucibus ornare suspendisse sed nisi lacus sed.',\n",
+       " 'Ac turpis egestas sed tempus urna et pharetra pharetra massa.',\n",
+       " 'Morbi quis commodo odio aenean.',\n",
+       " 'Malesuada proin libero nunc consequat interdum.',\n",
+       " 'Ut placerat orci nulla pellentesque dignissim enim sit.',\n",
+       " 'Elit at imperdiet dui accumsan sit amet.',\n",
+       " 'Built to test various characters and other possible inputs to the silero model.',\n",
+       " 'Here are some Chinese characters: .',\n",
+       " 'There are 24 letters in the Greek alphabet.',\n",
+       " 'The vowels: are , , , , , , .',\n",
+       " 'All the rest are consonants.',\n",
+       " 'We can also test for mathematical symbols: , , , , , X, %,  ,a, , , +, = ,-.',\n",
+       " 'Finally, here are some emoticons: .']"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "text"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "3e8e7965",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('test_processed.txt', 'w') as output_file:\n",
+    "    for line in text:\n",
+    "        output_file.write(line)\n",
+    "        output_file.write('\\n')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "2aa4c8ff",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('test_processed.txt', 'r') as process_file:\n",
+    "    out_file = [line.strip() for line in process_file.readlines()]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "id": "c483fb65",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['Testing Text File',\n",
+       " 'With generated random Lorem Ipsum and other unexpected characters!',\n",
+       " 'Link to generator repo!',\n",
+       " 'Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.',\n",
+       " 'Proin fermentum leo vel orci porta non pulvinar.',\n",
+       " 'Pretium lectus quam id leo in vitae turpis massa sed.',\n",
+       " 'Donec ac odio tempor orci dapibus.',\n",
+       " 'Feugiat in ante metus dictum at tempor.',\n",
+       " 'Elementum tempus egestas sed sed risus.',\n",
+       " 'Adipiscing commodo elit at imperdiet dui accumsan sit.',\n",
+       " 'Placerat orci nulla pellentesque dignissim enim.',\n",
+       " 'Posuere lorem ipsum dolor sit.',\n",
+       " 'Id ornare arcu odio ut sem.',\n",
+       " 'Purus faucibus ornare suspendisse sed nisi lacus sed.',\n",
+       " 'Ac turpis egestas sed tempus urna et pharetra pharetra massa.',\n",
+       " 'Morbi quis commodo odio aenean.',\n",
+       " 'Malesuada proin libero nunc consequat interdum.',\n",
+       " 'Ut placerat orci nulla pellentesque dignissim enim sit.',\n",
+       " 'Elit at imperdiet dui accumsan sit amet.',\n",
+       " 'Built to test various characters and other possible inputs to the silero model.',\n",
+       " 'Here are some Chinese characters: .',\n",
+       " 'There are 24 letters in the Greek alphabet.',\n",
+       " 'The vowels: are , , , , , , .',\n",
+       " 'All the rest are consonants.',\n",
+       " 'We can also test for mathematical symbols: , , , , , X, %,  ,a, , , +, = ,-.',\n",
+       " 'Finally, here are some emoticons: .']"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "out_file"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "65646961",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+<<<<<<< HEAD
+   "display_name": "Python 3",
+=======
+   "display_name": "Python 3 (ipykernel)",
+>>>>>>> appdev
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

notebooks/test.epub ADDED Viewed

Binary file (90.4 kB). View file

notebooks/test.htm ADDED Viewed

	@@ -0,0 +1,118 @@

+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
+<meta http-equiv="Content-Style-Type" content="text/css" />
+<title>Lorem Ipsum</title>
+<style type="text/css">
+body { margin-left: 20%;
+       margin-right: 20%;
+       text-align: justify; }
+h1, h2, h3, h4, h5 {text-align: center; font-style: normal; font-weight:
+normal; line-height: 1.5; margin-top: .5em; margin-bottom: .5em;}
+h1 {font-size: 300%;
+    margin-top: 0.6em;
+    margin-bottom: 0.6em;
+    letter-spacing: 0.12em;
+    word-spacing: 0.2em;
+    text-indent: 0em;}
+h2 {font-size: 150%; margin-top: 2em; margin-bottom: 1em;}
+h3 {font-size: 130%; margin-top: 1em;}
+h4 {font-size: 120%;}
+h5 {font-size: 110%;}
+.no-break {page-break-before: avoid;} /* for epubs */
+div.chapter {page-break-before: always; margin-top: 4em;}
+hr {width: 80%; margin-top: 2em; margin-bottom: 2em;}
+p {text-indent: 1em;
+   margin-top: 0.25em;
+   margin-bottom: 0.25em; }
+.p2 {margin-top: 2em;}
+p.poem {text-indent: 0%;
+        margin-left: 10%;
+        font-size: 90%;
+        margin-top: 1em;
+        margin-bottom: 1em; }
+p.letter {text-indent: 0%;
+          margin-left: 10%;
+          margin-right: 10%;
+          margin-top: 1em;
+          margin-bottom: 1em; }
+p.noindent {text-indent: 0% }
+p.center  {text-align: center;
+           text-indent: 0em;
+           margin-top: 1em;
+           margin-bottom: 1em; }
+p.footnote {font-size: 90%;
+           text-indent: 0%;
+           margin-left: 10%;
+           margin-right: 10%;
+           margin-top: 1em;
+           margin-bottom: 1em; }
+sup { vertical-align: top; font-size: 0.6em; }
+a:link {color:blue; text-decoration:none}
+a:visited {color:blue; text-decoration:none}
+a:hover {color:red}
+</style>
+</head>
+<body>
+<div style='display:block; margin:1em 0'>
+This eBook is a generated Lorem Ipsum for the purposes of testing the Audiobook Gen app.
+</div>
+<div style='display:block; margin:1em 0'>Language: English</div>
+<div style='display:block; margin:1em 0'>Character set encoding: UTF-8</div>
+<p class="letter">
+<i>
+Diam vel quam elementum pulvinar etiam non quam. At tellus at urna condimentum mattis. Nisi scelerisque eu ultrices vitae auctor eu augue ut. Integer malesuada nunc vel risus commodo viverra maecenas accumsan. Ornare suspendisse sed nisi lacus. Sapien faucibus et molestie ac feugiat sed lectus. Quam elementum pulvinar etiam non. Elementum integer enim neque volutpat ac tincidunt. Justo laoreet sit amet cursus sit. Amet venenatis urna cursus eget nunc scelerisque viverra mauris. Cras semper auctor neque vitae tempus quam pellentesque nec nam. Fermentum iaculis eu non diam phasellus vestibulum lorem sed. Non pulvinar neque laoreet suspendisse interdum consectetur libero. Nec tincidunt praesent semper feugiat nibh sed. Sed id semper risus in hendrerit gravida rutrum. Suspendisse in est ante in nibh. Dui ut ornare lectus sit amet est placerat in.
+</i>
+</p>
+</div><!--end chapter-->
+<div class="chapter">
+<h2><a name="pref01"></a>A NEW LOREM</h2>
+<p>
+Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Proin fermentum leo vel orci porta non pulvinar. Pretium lectus quam id leo in vitae turpis massa sed. Donec ac odio tempor orci dapibus. Feugiat in ante metus dictum at tempor. Elementum tempus egestas sed sed risus. Adipiscing commodo elit at imperdiet dui accumsan sit. Placerat orci nulla pellentesque dignissim enim. Posuere lorem ipsum dolor sit. Id ornare arcu odio ut sem. Purus faucibus ornare suspendisse sed nisi lacus sed. Ac turpis egestas sed tempus urna et pharetra pharetra massa. Morbi quis commodo odio aenean. Malesuada proin libero nunc consequat interdum. Ut placerat orci nulla pellentesque dignissim enim sit. Elit at imperdiet dui accumsan sit amet.
+</p>
+<p>
+Nunc sed id semper risus in hendrerit gravida rutrum quisque. Augue interdum velit euismod in pellentesque. Elementum curabitur vitae nunc sed velit dignissim sodales ut eu. Mi in nulla posuere sollicitudin aliquam ultrices sagittis orci a. Quisque sagittis purus sit amet volutpat consequat mauris. Risus in hendrerit gravida rutrum. Quis vel eros donec ac odio. Eget nunc lobortis mattis aliquam faucibus. Lobortis scelerisque fermentum dui faucibus. Est velit egestas dui id ornare arcu odio. Sed ullamcorper morbi tincidunt ornare massa eget egestas purus. Nisi porta lorem mollis aliquam ut porttitor leo a. Ut morbi tincidunt augue interdum velit. Egestas diam in arcu cursus euismod. Tortor id aliquet lectus proin nibh nisl condimentum id venenatis. Lectus sit amet est placerat in egestas erat imperdiet sed. Amet tellus cras adipiscing enim eu turpis egestas pretium. Et leo duis ut diam quam.
+</p>
+</div><!--end chapter-->
+<div class="chapter">
+<h2><a name="pref02"></a>IPSUM STRIKES BACK</h2>
+<p>
+Egestas diam in arcu cursus euismod quis. Leo in vitae turpis massa sed elementum tempus egestas. Amet nulla facilisi morbi tempus iaculis urna id volutpat. Parturient montes nascetur ridiculus mus. Erat pellentesque adipiscing commodo elit at imperdiet. Egestas congue quisque egestas diam in arcu cursus. Diam ut venenatis tellus in metus. Ullamcorper eget nulla facilisi etiam. Blandit turpis cursus in hac habitasse platea dictumst quisque. Cursus euismod quis viverra nibh cras pulvinar. Neque viverra justo nec ultrices. Dui ut ornare lectus sit. Mauris ultrices eros in cursus turpis massa tincidunt. Lobortis elementum nibh tellus molestie nunc non blandit massa enim. Ullamcorper morbi tincidunt ornare massa eget egestas purus viverra.
+</p>
+<p>
+Mauris in aliquam sem fringilla ut morbi. Nunc sed blandit libero volutpat. Amet venenatis urna cursus eget nunc scelerisque. Sagittis nisl rhoncus mattis rhoncus urna neque. Felis eget nunc lobortis mattis aliquam faucibus purus in massa. Fringilla ut morbi tincidunt augue interdum. Nibh mauris cursus mattis molestie a iaculis at erat. Lacus sed turpis tincidunt id aliquet risus feugiat in. Nulla facilisi etiam dignissim diam quis enim lobortis. Vitae congue eu consequat ac felis donec et. Scelerisque viverra mauris in aliquam sem fringilla ut morbi tincidunt. Blandit volutpat maecenas volutpat blandit aliquam. Ultrices tincidunt arcu non sodales neque sodales ut etiam. Sollicitudin aliquam ultrices sagittis orci a scelerisque. Id cursus metus aliquam eleifend mi. Magna eget est lorem ipsum dolor sit amet consectetur. Eleifend mi in nulla posuere sollicitudin aliquam ultrices. Neque sodales ut etiam sit amet. Enim neque volutpat ac tincidunt vitae semper quis lectus nulla.
+</p>

outputs/.gitkeep ADDED Viewed

File without changes

pytest.ini ADDED Viewed

	@@ -0,0 +1,4 @@

+# pytest.ini
+[pytest]
+pythonpath = . src
+testpaths = tests

requirements.txt CHANGED Viewed

@@ -1,6 +1,7 @@
 silero
 streamlit
 ebooklib
 bs4
 nltk
 stqdm

 silero
 streamlit
 ebooklib
+PyPDF2
 bs4
 nltk
 stqdm

resources/audiobook_gen.png ADDED Viewed

resources/instructions.md ADDED Viewed

	@@ -0,0 +1,13 @@

+This tool generates custom-voiced audiobook files from an imported ebook file. Please upload an ebook to begin the conversion process. Output files will be downloaded as a .zip archive.
+### Instructions
+1. Upload the book file to be converted.
+2. Select the desired voice for the audiobook.
+3. Click to run!
+### Notes
+- Currently, only epub, txt, pdf files are accepted for import.
+- Max input file size: 200 MB
+- Audiobook generation can take up to 1 hour, depending on the size of the file.
+- Generation time also depends on compute available for the app.

resources/speaker_en_0.wav ADDED Viewed

Binary file (629 kB). View file

resources/speaker_en_110.wav ADDED Viewed

Binary file (580 kB). View file

resources/speaker_en_29.wav ADDED Viewed

Binary file (546 kB). View file

resources/speaker_en_41.wav ADDED Viewed

Binary file (574 kB). View file

src/config.py ADDED Viewed

	@@ -0,0 +1,23 @@

+"""
+Notes
+-----
+This module contains the configuration entries for audiobook_gen.
+"""
+from pathlib import Path
+output_path = Path("outputs")
+resource_path = Path("resources")
+INSTRUCTIONS = Path("resources/instructions.md")
+DEVICE = 'cpu'
+LANGUAGE = 'en'
+MAX_CHAR_LEN = 140
+MODEL_ID = 'v3_en'
+SAMPLE_RATE = 24000
+SPEAKER_LIST = {
+    'Voice 1 (Female)': 'en_0',
+    'Voice 2 (Male)': 'en_29',
+    'Voice 3 (Female)': 'en_41',
+    'Voice 4 (Male)': 'en_110'
+}

src/file_readers.py ADDED Viewed

	@@ -0,0 +1,120 @@

+"""
+Notes
+-----
+This module contains the functions for audiobook_gen that read in the
+file formats that require for parsing than plain text (pdf, html, epub),
+as well as the preprocessing function for all input files.
+"""
+import re
+from bs4 import BeautifulSoup
+from nltk import tokenize, download
+from textwrap import TextWrapper
+from stqdm import stqdm
+from src import config
+download('punkt', quiet=True)
+wrapper = TextWrapper(config.MAX_CHAR_LEN, fix_sentence_endings=True)
+def preprocess_text(file):
+    """
+    Preprocesses and tokenizes a section of text from the corpus:
+    1. Removes residual HTML tags
+    2. Handles un-supported characters
+    3. Tokenizes text and confirms max token size
+    Parameters
+    ----------
+    file : file_like
+        list of strings,
+        section of corpus to be pre-processed and tokenized
+    Returns
+    -------
+    text_list :  : array_like
+        list of strings,
+        body of tokenized text from which audio is generated
+    """
+    input_text = BeautifulSoup(file, "html.parser").text
+    text_list = []
+    for paragraph in input_text.split('\n'):
+        paragraph = paragraph.replace('—', '-')
+        paragraph = paragraph.replace(' .', '')
+        paragraph = re.sub(r'[^\x00-\x7f]', "", paragraph)
+        paragraph = re.sub(r'x0f', " ", paragraph)
+        sentences = tokenize.sent_tokenize(paragraph)
+        sentence_list = []
+        for sentence in sentences:
+            if not re.search('[a-zA-Z]', sentence):
+                sentence = ''
+            wrapped_sentences = wrapper.wrap(sentence)
+            sentence_list.append(wrapped_sentences)
+        trunc_sentences = [phrase for sublist in sentence_list for phrase in sublist]
+        text_list.append(trunc_sentences)
+    text_list = [text for sentences in text_list for text in sentences]
+    return text_list
+def read_pdf(file):
+    """
+    Invokes PyPDF2 PdfReader to extract main body text from PDF file_like input,
+    and preprocesses text section by section.
+    Parameters
+    ----------
+    file : file_like
+        PDF file input to be parsed and preprocessed
+    Returns
+    -------
+    corpus : array_like
+        list of list of strings,
+        body of tokenized text from which audio is generated
+    """
+    from PyPDF2 import PdfReader
+    reader = PdfReader(file)
+    corpus = []
+    for item in stqdm(list(reader.pages), desc="Pages in pdf:"):
+        text_list = preprocess_text(item.extract_text())
+        corpus.append(text_list)
+    return corpus
+def read_epub(file):
+    """
+    Invokes ebooklib read_epub to extract main body text from epub file_like input,
+    and preprocesses text section by section.
+    Parameters
+    ----------
+    file : file_like
+        EPUB file input to be parsed and preprocessed
+    Returns
+    -------
+    corpus : array_like
+        list of list of strings,
+        body of tokenized text from which audio is generated
+    file_title : str
+        title of document, used to name output files
+    """
+    import ebooklib
+    from ebooklib import epub
+    book = epub.read_epub(file)
+    file_title = book.get_metadata('DC', 'title')[0][0]
+    file_title = file_title.lower().replace(' ', '_')
+    corpus = []
+    for item in stqdm(list(book.get_items()), desc="Chapters in ebook:"):
+        if item.get_type() == ebooklib.ITEM_DOCUMENT:
+            text_list = preprocess_text(item.get_content())
+            corpus.append(text_list)
+    return corpus, file_title

src/output.py CHANGED Viewed

@@ -1,14 +1,74 @@
 def assemble_zip(title):
-    import pathlib
     import zipfile
     from stqdm import stqdm
-    directory = pathlib.Path("outputs/")
-    zip_name = f"outputs/{title}.zip"
     with zipfile.ZipFile(zip_name, mode="w") as archive:
-        for file_path in stqdm(directory.iterdir()):
-            if pathlib.Path(file_path).suffix == '.wav':
                 archive.write(file_path, arcname=file_path.name)
-    return zip_name

+"""
+Notes
+-----
+This module contains the functions for audiobook_gen that take the generated audio tensors and output to audio files,
+as well as assembling the final zip archive for user download.
+"""
+import logging
+from src import config
+def write_audio(audio_list, sample_path):
+    """
+    Invokes torchaudio to save generated audio tensors to a file.
+    Parameters
+    ----------
+    audio_list : torch.tensor
+        pytorch tensor containing generated audio
+    sample_path : str
+        file name and path for outputting tensor to audio file
+    Returns
+    -------
+    None
+    """
+    import torch
+    import torchaudio
+    from src import config as cf
+    if not config.output_path.exists():
+        config.output_path.mkdir()
+    if len(audio_list) > 0:
+        audio_file = torch.cat(audio_list).reshape(1, -1)
+        torchaudio.save(sample_path, audio_file, cf.SAMPLE_RATE)
+        logging.info(f'Audio generated at: {sample_path}')
+    else:
+        logging.info(f'Audio at: {sample_path} is empty.')
 def assemble_zip(title):
+    """
+    Creates a zip file and inserts all .wav files in the output directory,
+    and returns the name / path of the zip file.
+    Parameters
+    ----------
+    title : str
+        title of document, used to name zip directory
+    Returns
+    -------
+    zip_name : str
+        name and path of zip directory generated
+    """
     import zipfile
     from stqdm import stqdm
+    if not config.output_path.exists():
+        config.output_path.mkdir()
+    zip_name = config.output_path / f'{title}.zip'
     with zipfile.ZipFile(zip_name, mode="w") as archive:
+        for file_path in stqdm(config.output_path.iterdir()):
+            if file_path.suffix == '.wav':
                 archive.write(file_path, arcname=file_path.name)
+                file_path.unlink()
+    return zip_name

src/predict.py CHANGED Viewed

@@ -1,40 +1,110 @@
-def load_models():
-    import torch
-    from silero import silero_tts
-    language = 'en'
-    model_id = 'v3_en'
-    model, _ = silero_tts(language=language,
-                                speaker=model_id)
-    device = 'cuda' if torch.cuda.is_available() else 'cpu'
-    model.to(device)  # gpu or cpu
     return model
-def audiobook_gen(ebook, title, model):
-    import torch
-    import torchaudio
-    from stqdm import stqdm
-    sample_rate = 24000
-    speaker = 'en_0'
-    for chapter in stqdm(ebook, desc="Chapters in ebook:"):
-        chapter_index = f'chapter{ebook.index(chapter):03}'
-        audio_list = []
-        for sentence in stqdm(chapter, desc="Sentences in chapter:"):
-            audio = model.apply_tts(text=sentence,
-                                    speaker=speaker,
-                                    sample_rate=sample_rate)
-            if len(audio) > 0 and isinstance(audio, torch.Tensor):
-                audio_list.append(audio)
-            else:
-                print(f'Tensor for sentence is not valid: \n {sentence}')
-        sample_path = f'outputs/{title}_{chapter_index}.wav'
-        if len(audio_list) > 0:
-            audio_file = torch.cat(audio_list).reshape(1, -1)
-            torchaudio.save(sample_path, audio_file, sample_rate)
         else:
-            print(f'Chapter {chapter_index} is empty.')

+"""
+Notes
+-----
+This module contains the functions for audiobook_gen that handle text-to-speech generation.
+The functions take in the preprocessed text and invoke the Silero package to generate audio tensors.
+"""
+import logging
+import torch
+from stqdm import stqdm
+from src import output, config
+def load_model():
+    """
+    Load Silero package containg the model information
+    for the language and speaker set in config.py
+    and converts it to the set device.
+    Parameters
+    ----------
+    None
+    Returns
+    -------
+    model : torch.package
+    """
+    from silero import silero_tts
+    model, _ = silero_tts(language=config.LANGUAGE, speaker=config.MODEL_ID)
+    model.to(config.DEVICE)
     return model
+def generate_audio(corpus, title, model, speaker):
+    """
+    For each section within the corpus, calls predict() function to generate audio tensors
+    and then calls write_audio() to output the tensors to audio files.
+    Parameters
+    ----------
+    corpus : array_like
+        list of list of strings,
+        body of tokenized text from which audio is generated
+    title : str
+        title of document, used to name output files
+    model : torch.package
+        torch package containing model for language and speaker specified
+    speaker : str
+        identifier of selected speaker for audio generation
+    Returns
+    -------
+    None
+    """
+    for section in stqdm(corpus, desc="Sections in document:"):
+        section_index = f'part{corpus.index(section):03}'
+        audio_list, sample_path = predict(section, section_index, title, model, speaker)
+        output.write_audio(audio_list, sample_path)
+def predict(text_section, section_index, title, model, speaker):
+    """
+    Applies Silero TTS engine for each token within the corpus section,
+    appending it to the output tensor array, and creates file path for output.
+    Parameters
+    ----------
+    text_section : array_like
+        list of strings,
+        body of tokenized text from which audio is generated
+    section_index : int
+        index of current section within corpus
+    title : str
+        title of document, used to name output files
+    model : torch.package
+        torch package containing model for language and speaker specified
+    speaker : str
+        identifier of selected speaker for audio generation
+    Returns
+    -------
+    audio_list : torch.tensor
+        pytorch tensor containing generated audio
+    sample_path : str
+        file name and path for outputting tensor to audio file
+    """
+    audio_list = []
+    for sentence in stqdm(text_section, desc="Sentences in section:"):
+        audio = model.apply_tts(text=sentence, speaker=speaker, sample_rate=config.SAMPLE_RATE)
+        if len(audio) > 0 and isinstance(audio, torch.Tensor):
+            audio_list.append(audio)
+            logging.info(f'Tensor generated for sentence: \n {sentence}')
         else:
+            logging.info(f'Tensor for sentence is not valid: \n {sentence}')
+    sample_path = config.output_path / f'{title}_{section_index}.wav'
+    return audio_list, sample_path

tests/data/test.epub ADDED Viewed

Binary file (90.4 kB). View file

tests/data/test.htm ADDED Viewed

	@@ -0,0 +1,118 @@

+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
+<meta http-equiv="Content-Style-Type" content="text/css" />
+<title>Lorem Ipsum</title>
+<style type="text/css">
+body { margin-left: 20%;
+       margin-right: 20%;
+       text-align: justify; }
+h1, h2, h3, h4, h5 {text-align: center; font-style: normal; font-weight:
+normal; line-height: 1.5; margin-top: .5em; margin-bottom: .5em;}
+h1 {font-size: 300%;
+    margin-top: 0.6em;
+    margin-bottom: 0.6em;
+    letter-spacing: 0.12em;
+    word-spacing: 0.2em;
+    text-indent: 0em;}
+h2 {font-size: 150%; margin-top: 2em; margin-bottom: 1em;}
+h3 {font-size: 130%; margin-top: 1em;}
+h4 {font-size: 120%;}
+h5 {font-size: 110%;}
+.no-break {page-break-before: avoid;} /* for epubs */
+div.chapter {page-break-before: always; margin-top: 4em;}
+hr {width: 80%; margin-top: 2em; margin-bottom: 2em;}
+p {text-indent: 1em;
+   margin-top: 0.25em;
+   margin-bottom: 0.25em; }
+.p2 {margin-top: 2em;}
+p.poem {text-indent: 0%;
+        margin-left: 10%;
+        font-size: 90%;
+        margin-top: 1em;
+        margin-bottom: 1em; }
+p.letter {text-indent: 0%;
+          margin-left: 10%;
+          margin-right: 10%;
+          margin-top: 1em;
+          margin-bottom: 1em; }
+p.noindent {text-indent: 0% }
+p.center  {text-align: center;
+           text-indent: 0em;
+           margin-top: 1em;
+           margin-bottom: 1em; }
+p.footnote {font-size: 90%;
+           text-indent: 0%;
+           margin-left: 10%;
+           margin-right: 10%;
+           margin-top: 1em;
+           margin-bottom: 1em; }
+sup { vertical-align: top; font-size: 0.6em; }
+a:link {color:blue; text-decoration:none}
+a:visited {color:blue; text-decoration:none}
+a:hover {color:red}
+</style>
+</head>
+<body>
+<div style='display:block; margin:1em 0'>
+This eBook is a generated Lorem Ipsum for the purposes of testing the Audiobook Gen app.
+</div>
+<div style='display:block; margin:1em 0'>Language: English</div>
+<div style='display:block; margin:1em 0'>Character set encoding: UTF-8</div>
+<p class="letter">
+<i>
+Diam vel quam elementum pulvinar etiam non quam. At tellus at urna condimentum mattis. Nisi scelerisque eu ultrices vitae auctor eu augue ut. Integer malesuada nunc vel risus commodo viverra maecenas accumsan. Ornare suspendisse sed nisi lacus. Sapien faucibus et molestie ac feugiat sed lectus. Quam elementum pulvinar etiam non. Elementum integer enim neque volutpat ac tincidunt. Justo laoreet sit amet cursus sit. Amet venenatis urna cursus eget nunc scelerisque viverra mauris. Cras semper auctor neque vitae tempus quam pellentesque nec nam. Fermentum iaculis eu non diam phasellus vestibulum lorem sed. Non pulvinar neque laoreet suspendisse interdum consectetur libero. Nec tincidunt praesent semper feugiat nibh sed. Sed id semper risus in hendrerit gravida rutrum. Suspendisse in est ante in nibh. Dui ut ornare lectus sit amet est placerat in.
+</i>
+</p>
+</div><!--end chapter-->
+<div class="chapter">
+<h2><a name="pref01"></a>A NEW LOREM</h2>
+<p>
+Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Proin fermentum leo vel orci porta non pulvinar. Pretium lectus quam id leo in vitae turpis massa sed. Donec ac odio tempor orci dapibus. Feugiat in ante metus dictum at tempor. Elementum tempus egestas sed sed risus. Adipiscing commodo elit at imperdiet dui accumsan sit. Placerat orci nulla pellentesque dignissim enim. Posuere lorem ipsum dolor sit. Id ornare arcu odio ut sem. Purus faucibus ornare suspendisse sed nisi lacus sed. Ac turpis egestas sed tempus urna et pharetra pharetra massa. Morbi quis commodo odio aenean. Malesuada proin libero nunc consequat interdum. Ut placerat orci nulla pellentesque dignissim enim sit. Elit at imperdiet dui accumsan sit amet.
+</p>
+<p>
+Nunc sed id semper risus in hendrerit gravida rutrum quisque. Augue interdum velit euismod in pellentesque. Elementum curabitur vitae nunc sed velit dignissim sodales ut eu. Mi in nulla posuere sollicitudin aliquam ultrices sagittis orci a. Quisque sagittis purus sit amet volutpat consequat mauris. Risus in hendrerit gravida rutrum. Quis vel eros donec ac odio. Eget nunc lobortis mattis aliquam faucibus. Lobortis scelerisque fermentum dui faucibus. Est velit egestas dui id ornare arcu odio. Sed ullamcorper morbi tincidunt ornare massa eget egestas purus. Nisi porta lorem mollis aliquam ut porttitor leo a. Ut morbi tincidunt augue interdum velit. Egestas diam in arcu cursus euismod. Tortor id aliquet lectus proin nibh nisl condimentum id venenatis. Lectus sit amet est placerat in egestas erat imperdiet sed. Amet tellus cras adipiscing enim eu turpis egestas pretium. Et leo duis ut diam quam.
+</p>
+</div><!--end chapter-->
+<div class="chapter">
+<h2><a name="pref02"></a>IPSUM STRIKES BACK</h2>
+<p>
+Egestas diam in arcu cursus euismod quis. Leo in vitae turpis massa sed elementum tempus egestas. Amet nulla facilisi morbi tempus iaculis urna id volutpat. Parturient montes nascetur ridiculus mus. Erat pellentesque adipiscing commodo elit at imperdiet. Egestas congue quisque egestas diam in arcu cursus. Diam ut venenatis tellus in metus. Ullamcorper eget nulla facilisi etiam. Blandit turpis cursus in hac habitasse platea dictumst quisque. Cursus euismod quis viverra nibh cras pulvinar. Neque viverra justo nec ultrices. Dui ut ornare lectus sit. Mauris ultrices eros in cursus turpis massa tincidunt. Lobortis elementum nibh tellus molestie nunc non blandit massa enim. Ullamcorper morbi tincidunt ornare massa eget egestas purus viverra.
+</p>
+<p>
+Mauris in aliquam sem fringilla ut morbi. Nunc sed blandit libero volutpat. Amet venenatis urna cursus eget nunc scelerisque. Sagittis nisl rhoncus mattis rhoncus urna neque. Felis eget nunc lobortis mattis aliquam faucibus purus in massa. Fringilla ut morbi tincidunt augue interdum. Nibh mauris cursus mattis molestie a iaculis at erat. Lacus sed turpis tincidunt id aliquet risus feugiat in. Nulla facilisi etiam dignissim diam quis enim lobortis. Vitae congue eu consequat ac felis donec et. Scelerisque viverra mauris in aliquam sem fringilla ut morbi tincidunt. Blandit volutpat maecenas volutpat blandit aliquam. Ultrices tincidunt arcu non sodales neque sodales ut etiam. Sollicitudin aliquam ultrices sagittis orci a scelerisque. Id cursus metus aliquam eleifend mi. Magna eget est lorem ipsum dolor sit amet consectetur. Eleifend mi in nulla posuere sollicitudin aliquam ultrices. Neque sodales ut etiam sit amet. Enim neque volutpat ac tincidunt vitae semper quis lectus nulla.
+</p>

tests/data/test.pdf ADDED Viewed

Binary file (99.9 kB). View file

tests/data/test.txt ADDED Viewed

	@@ -0,0 +1,19 @@

+Testing Text File
+With generated random Lorem Ipsum and other unexpected characters!
+<a href="https://github.com/mkutarna/audiobook_gen/">Link to generator repo!</a>
+此行是对非英语字符的测试
+Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Proin fermentum leo vel orci porta non pulvinar. Pretium lectus quam id leo in vitae turpis massa sed. Donec ac odio tempor orci dapibus. Feugiat in ante metus dictum at tempor. Elementum tempus egestas sed sed risus. Adipiscing commodo elit at imperdiet dui accumsan sit. Placerat orci nulla pellentesque dignissim enim. Posuere lorem ipsum dolor sit. Id ornare arcu odio ut sem. Purus faucibus ornare suspendisse sed nisi lacus sed. Ac turpis egestas sed tempus urna et pharetra pharetra massa. Morbi quis commodo odio aenean. Malesuada proin libero nunc consequat interdum. Ut placerat orci nulla pellentesque dignissim enim sit. Elit at imperdiet dui accumsan sit amet.
+Built to test various characters and other possible inputs to the silero model.
+Here are some Chinese characters: 此行是对非英语字符的测试.
+There are 24 letters in the Greek alphabet. The vowels: are α, ε, η, ι, ο, ω, υ. All the rest are consonants.
+We can also test for mathematical symbols: ∫, ∇, ∞, δ, ε, X̄, %, √ ,a, ±, ÷, +, = ,-.
+Finally, here are some emoticons: ☺️🙂😊😀😁☹️🙁😞😟😣😖😨😧😦😱😫😩.

tests/data/test_audio.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ae3c3cf68e220fa23fd4b9a1591523ced8cfd80459dab714a702634458a5b56f
+size 593767

tests/data/test_predict.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4be523f61990d57fb7031eca43edd04be401a0ce4062f5e7aaafc5de660035b6
+size 679930

tests/data/test_predict.txt ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ Predict Testing Text File
2	+
3	+ Audiobook Gen is a tool that allows the users to generate an audio file from an ebook or other document.

tests/data/test_processed.txt ADDED Viewed

	@@ -0,0 +1,26 @@

+Testing Text File
+With generated random Lorem Ipsum and other unexpected characters!
+Link to generator repo!
+Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
+Proin fermentum leo vel orci porta non pulvinar.
+Pretium lectus quam id leo in vitae turpis massa sed.
+Donec ac odio tempor orci dapibus.
+Feugiat in ante metus dictum at tempor.
+Elementum tempus egestas sed sed risus.
+Adipiscing commodo elit at imperdiet dui accumsan sit.
+Placerat orci nulla pellentesque dignissim enim.
+Posuere lorem ipsum dolor sit.
+Id ornare arcu odio ut sem.
+Purus faucibus ornare suspendisse sed nisi lacus sed.
+Ac turpis egestas sed tempus urna et pharetra pharetra massa.
+Morbi quis commodo odio aenean.
+Malesuada proin libero nunc consequat interdum.
+Ut placerat orci nulla pellentesque dignissim enim sit.
+Elit at imperdiet dui accumsan sit amet.
+Built to test various characters and other possible inputs to the silero model.
+Here are some Chinese characters: .
+There are 24 letters in the Greek alphabet.
+The vowels: are , , , , , , .
+All the rest are consonants.
+We can also test for mathematical symbols: , , , , , X, %,  ,a, , , +, = ,-.
+Finally, here are some emoticons: .

tests/test_config.py ADDED Viewed

	@@ -0,0 +1,9 @@

+"""
+Notes
+-----
+This module contains the configuration entries for audiobook_gen tests.
+"""
+from pathlib import Path
+data_path = Path("tests/data")

tests/test_file_readers.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import pytest
+import numpy as np
+from src import file_readers
+import test_config
+def test_preprocess_text():
+    """
+    Tests preprocess function by asserting title,
+    shape of corpus, and correct line reading.
+    """
+    test_path = test_config.data_path / "test.txt"
+    processed_path = test_config.data_path / "test_processed.txt"
+    with open(test_path, 'r') as file:
+        test_corpus = file_readers.preprocess_text(file)
+    with open(processed_path, 'r') as process_file:
+        processed_corpus = [line.strip() for line in process_file.readlines()]
+    assert processed_corpus == test_corpus
+def test_read_pdf():
+    pdf_path = test_config.data_path / "test.pdf"
+    corpus = np.array(file_readers.read_pdf(pdf_path), dtype=object)
+    assert np.shape(corpus) == (4, )
+    assert np.shape(corpus[0]) == (3, )
+    assert corpus[0][0] == 'Lorem Ipsum'
+    assert corpus[2][0] == 'Preface'
+def test_read_epub():
+    """
+    Tests read_epub function by asserting title,
+    shape of corpus,  and correct line reading.
+    """
+    ebook_path = test_config.data_path / "test.epub"
+    corpus, title = file_readers.read_epub(ebook_path)
+    corpus_arr = np.array(corpus, dtype=object)
+    assert title == "the_picture_of_dorian_gray"
+    assert np.shape(corpus_arr) == (6,)
+    assert np.shape(corpus_arr[0]) == (39,)
+    assert corpus[0][0] == 'The Project Gutenberg eBook of The Picture of Dorian Gray, by Oscar Wilde'
+    assert corpus[2][0] == 'CHAPTER I.'

tests/test_output.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import pytest
+from src import output, config
+import test_config
+def test_write_audio():
+    """
+    Tests write_audio function, takes in an audio tensor with a file path and writes the audio to a file.
+    """
+    import torch
+    test_path = test_config.data_path / 'test_audio.wav'
+    audio_path = test_config.data_path / 'test_audio.pt'
+    audio_list = torch.load(audio_path)
+    output.write_audio(audio_list, test_path)
+    assert test_path.is_file()
+    assert test_path.stat().st_size == 592858
+    test_path.unlink()
+def test_assemble_zip():
+    """
+    Tests assemble_zip function, which collects all the audio files from the output directory,
+    and zips them up into a zip directory.
+    """
+    from shutil import copy2
+    if not config.output_path.exists():
+        config.output_path.mkdir()
+    title = "speaker_samples"
+    zip_path = config.output_path / 'speaker_samples.zip'
+    wav1_path = config.output_path / 'speaker_en_0.wav'
+    wav2_path = config.output_path / 'speaker_en_110.wav'
+    for file_path in config.resource_path.iterdir():
+        if file_path.suffix == '.wav':
+            copy2(file_path, config.output_path)
+    _ = output.assemble_zip(title)
+    assert zip_path.is_file()
+    assert not wav1_path.is_file()
+    assert not wav2_path.is_file()
+    zip_path.unlink()

tests/test_predict.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import pytest
+import torch
+import numpy as np
+from src import predict, file_readers, config
+import test_config
+def test_load_model():
+    """
+    Tests load_model function, which loads the silero TTS model.
+    """
+    model = predict.load_model()
+    assert model.speakers[0] == 'en_0'
+    assert np.shape(model.speakers) == (119,)
+def test_generate_audio():
+    """
+    Tests generate_audio function, which takes the TTS model and file input,
+    and uses the predict & write_audio functions to output the audio file.
+    """
+    ebook_path = test_config.data_path / "test.epub"
+    wav1_path = config.output_path / 'the_picture_of_dorian_gray_part000.wav'
+    wav2_path = config.output_path / 'the_picture_of_dorian_gray_part001.wav'
+    wav3_path = config.output_path / 'the_picture_of_dorian_gray_part002.wav'
+    corpus, title = file_readers.read_epub(ebook_path)
+    model = predict.load_model()
+    speaker = 'en_110'
+    predict.generate_audio(corpus[0:2], title, model, speaker)
+    assert wav1_path.is_file()
+    assert wav2_path.is_file()
+    assert not wav3_path.is_file()
+    wav1_path.unlink()
+    wav2_path.unlink()
+def test_predict():
+    """
+    Tests predict function, generates audio tensors for each token in the text section,
+    and appends them together along with a generated file path for output.
+    """
+    seed = 1337
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+    model = predict.load_model()
+    tensor_path = test_config.data_path / "test_predict.pt"
+    test_tensor = torch.load(tensor_path)
+    text_path = test_config.data_path / "test_predict.txt"
+    with open(text_path, 'r') as file:
+        text = file_readers.preprocess_text(file)
+    title = 'test_predict'
+    section_index = 'part001'
+    speaker = 'en_0'
+    audio_list, _ = predict.predict(text, section_index, title, model, speaker)
+    audio_tensor = torch.cat(audio_list).reshape(1, -1)
+    torch.testing.assert_close(audio_tensor, test_tensor, atol=1e-3, rtol=0.9)