Spaces:

mkutarna
/

audiobook_gen

Build error

App Files Files Community

mkutarna commited on Dec 6, 2022

Commit

fd88500

2 Parent(s): ef79b65 602a617

Resolving merge conflicts, cleaning out repo file history of large binary files

Browse files

Files changed (7) hide show

README.md +23 -21
app.py +1 -1
notebooks/parser_function_html.ipynb +91 -0
tests/data/test_predict.pt +0 -0
tests/data/test_predict.pt.REMOVED.git-id +0 -1
tests/data/test_predict.txt +3 -0
tests/test_predict.py +7 -5

README.md CHANGED Viewed

@@ -1,27 +1,29 @@
-Audiobook Gen
-=============
-## Description
-Audiobook Gen is a tool that allows the users to generate an audio file of text (e.g. audiobook), read in the voice of the user's choice. This tool is based on the Silero text-to-speech toolkit and uses Streamlit to deliver the application.
-## Demo
-A demonstration of this tool is hosted at HuggingFace Spaces - see [Audiobook_Gen](https://huggingface.co/spaces/mkutarna/audiobook_gen).
-![Demo Screenshot](https://github.com/mkutarna/audiobook_gen/blob/appdev/resources/audiobook_gen.png "Screenshot")
-#### Instructions
-1. Upload the book file to be converted.
-2. Select the desired voice for the audiobook.
-3. Click to run!
-## Dependencies
-- silero
-- streamlit
-- ebooklib
-- PyPDF2
-- bs4
-- nltk
-- stqdm
-## License
-See [LICENSE](https://github.com/mkutarna/audiobook_gen/blob/master/LICENSE)

+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://github.com/mkutarna/audiobook_gen/blob/master/LICENSE)
+![GitHub Workflow Status](https://img.shields.io/github/workflow/status/mkutarna/audiobook_gen/Python%20application)
+# Audiobook Gen
+## About
+Audiobook Gen converts text to audiobook format. It allows you to choose which voice you want to listen to.
+- Do you want to listen to a book that isn't available on Audible?
+- Have you tried an audiobook from LibriVox and found the audio quality lacking?
+- Don't have time to sit and read a document, but would prefer to listen to it?
+You can input various text formats (`txt`, `pdf`, `epub` - more options in development) and output a `zip` archive of audio files (`wav`). This is an open-source tool based on the [Silero text-to-speech toolkit](https://github.com/snakers4/silero-models) and uses Streamlit to deliver the application.
+# Demo
+## HuggingFace Space
+A demonstration of this tool is hosted at HuggingFace Spaces - see [Audiobook_Gen](https://huggingface.co/spaces/mkutarna/audiobook_gen).
+<img style="border:1px solid grey" src="resources/audiobook_gen.png" alt="Screenshot"/>
+# Future
+Here is a list features in development and planned for the future:
+- `html` file import
+- `mobi`, `azw` ebook input
+- optional audio output formats (for better compression)
+- improved audio file output handling
+- Docker image for local use

app.py CHANGED Viewed

@@ -49,7 +49,7 @@ if st.button('Click to run!'):
     st.success('Reading file complete!')
     with st.spinner('Generating audio...'):
-        output.generate_audio(text, file_title, model, config.SPEAKER_LIST.get(speaker))
     st.success('Audio generation complete!')
     with st.spinner('Building zip file...'):

     st.success('Reading file complete!')
     with st.spinner('Generating audio...'):
+        predict.generate_audio(text, file_title, model, config.SPEAKER_LIST.get(speaker))
     st.success('Audio generation complete!')
     with st.spinner('Building zip file...'):

notebooks/parser_function_html.ipynb CHANGED Viewed

@@ -2,6 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
    "execution_count": 1,
    "id": "27a75ece",
    "metadata": {},
@@ -13,6 +14,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
    "id": "5292a160",
    "metadata": {},
    "outputs": [],
@@ -27,18 +31,30 @@
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "id": "68609a77",
    "metadata": {},
    "outputs": [],
    "source": [
     "# file_path = '1232-h.htm'\n",
     "file_path = 'test.htm'"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "id": "5c526c9b",
    "metadata": {},
    "outputs": [],
@@ -49,7 +65,11 @@
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "id": "d4732304",
    "metadata": {},
    "outputs": [],
@@ -102,30 +122,86 @@
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "id": "ece1c7d3",
    "metadata": {},
    "outputs": [],
    "source": [
     "np.shape(corpus)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "id": "dc7e4010",
    "metadata": {},
    "outputs": [],
    "source": [
     "corpus[0][2]"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "id": "6cb47a2d",
    "metadata": {},
    "outputs": [],
    "source": [
     "corpus"
    ]
@@ -133,6 +209,17 @@
   {
    "cell_type": "code",
    "execution_count": null,
    "id": "d11031c7",
    "metadata": {},
    "outputs": [],
@@ -367,7 +454,11 @@
  ],
  "metadata": {
   "kernelspec": {
    "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },

  "cells": [
   {
    "cell_type": "code",
+<<<<<<< HEAD
    "execution_count": 1,
    "id": "27a75ece",
    "metadata": {},
   {
    "cell_type": "code",
    "execution_count": null,
+=======
+   "execution_count": 2,
+>>>>>>> appdev
    "id": "5292a160",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+<<<<<<< HEAD
    "execution_count": null,
+=======
+   "execution_count": 3,
+>>>>>>> appdev
    "id": "68609a77",
    "metadata": {},
    "outputs": [],
    "source": [
     "# file_path = '1232-h.htm'\n",
+<<<<<<< HEAD
     "file_path = 'test.htm'"
+=======
+    "file_path = ''"
+>>>>>>> appdev
    ]
   },
   {
    "cell_type": "code",
+<<<<<<< HEAD
    "execution_count": null,
+=======
+   "execution_count": 4,
+>>>>>>> appdev
    "id": "5c526c9b",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+<<<<<<< HEAD
    "execution_count": null,
+=======
+   "execution_count": 5,
+>>>>>>> appdev
    "id": "d4732304",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+<<<<<<< HEAD
    "execution_count": null,
    "id": "ece1c7d3",
    "metadata": {},
    "outputs": [],
+=======
+   "execution_count": 11,
+   "id": "ece1c7d3",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(1, 2)"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+>>>>>>> appdev
    "source": [
     "np.shape(corpus)"
    ]
   },
   {
    "cell_type": "code",
+<<<<<<< HEAD
    "execution_count": null,
    "id": "dc7e4010",
    "metadata": {},
    "outputs": [],
+=======
+   "execution_count": 12,
+   "id": "dc7e4010",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "IndexError",
+     "evalue": "list index out of range",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mIndexError\u001b[0m                                Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn [12], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mcorpus\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m2\u001b[39;49m\u001b[43m]\u001b[49m\n",
+      "\u001b[0;31mIndexError\u001b[0m: list index out of range"
+     ]
+    }
+   ],
+>>>>>>> appdev
    "source": [
     "corpus[0][2]"
    ]
   },
   {
    "cell_type": "code",
+<<<<<<< HEAD
    "execution_count": null,
    "id": "6cb47a2d",
    "metadata": {},
    "outputs": [],
+=======
+   "execution_count": 13,
+   "id": "6cb47a2d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[['Predict Testing Text File',\n",
+       "  'Audiobook Gen is a tool that allows the users to generate an audio file from an ebook or other document.']]"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+>>>>>>> appdev
    "source": [
     "corpus"
    ]
   {
    "cell_type": "code",
    "execution_count": null,
+<<<<<<< HEAD
+=======
+   "id": "8508b073",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+>>>>>>> appdev
    "id": "d11031c7",
    "metadata": {},
    "outputs": [],
  ],
  "metadata": {
   "kernelspec": {
+<<<<<<< HEAD
    "display_name": "Python 3",
+=======
+   "display_name": "Python 3 (ipykernel)",
+>>>>>>> appdev
    "language": "python",
    "name": "python3"
   },

tests/data/test_predict.pt ADDED Viewed

Binary file (680 kB). View file

tests/data/test_predict.pt.REMOVED.git-id DELETED Viewed

	@@ -1 +0,0 @@
1	- 84cf0cd8d8bede5ff60d18475d71e26543d5d7ad

tests/data/test_predict.txt ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ Predict Testing Text File
2	+
3	+ Audiobook Gen is a tool that allows the users to generate an audio file from an ebook or other document.

tests/test_predict.py CHANGED Viewed

@@ -52,12 +52,14 @@ def test_predict():
     tensor_path = test_config.data_path / "test_predict.pt"
     test_tensor = torch.load(tensor_path)
-    ebook_path = test_config.data_path / "test.epub"
-    corpus, title = file_readers.read_epub(ebook_path)
     section_index = 'part001'
-    speaker = 'en_110'
-    audio_list, _ = predict.predict(corpus[1], section_index, title, model, speaker)
     audio_tensor = torch.cat(audio_list).reshape(1, -1)
-    torch.testing.assert_close(audio_tensor, test_tensor, atol=1e-3, rtol=0.2)

     tensor_path = test_config.data_path / "test_predict.pt"
     test_tensor = torch.load(tensor_path)
+    text_path = test_config.data_path / "test_predict.txt"
+    with open(text_path, 'r') as file:
+        text = file_readers.preprocess_text(file)
+    title = 'test_predict'
     section_index = 'part001'
+    speaker = 'en_0'
+    audio_list, _ = predict.predict(text, section_index, title, model, speaker)
     audio_tensor = torch.cat(audio_list).reshape(1, -1)
+    torch.testing.assert_close(audio_tensor, test_tensor, atol=1e-3, rtol=0.9)