audiobook_gen / tests /test_file_readers.py
Matthew Kutarna
Streamlit app development (#5)
cbbc229
raw
history blame
1.48 kB
import pytest
import numpy as np
from src import file_readers
import test_config
def test_preprocess_text():
"""
Tests preprocess function by asserting title,
shape of corpus, and correct line reading.
"""
test_path = test_config.data_path / "test.txt"
processed_path = test_config.data_path / "test_processed.txt"
with open(test_path, 'r') as file:
test_corpus = file_readers.preprocess_text(file)
with open(processed_path, 'r') as process_file:
processed_corpus = [line.strip() for line in process_file.readlines()]
assert processed_corpus == test_corpus
def test_read_pdf():
pdf_path = test_config.data_path / "test.pdf"
corpus = np.array(file_readers.read_pdf(pdf_path), dtype=object)
assert np.shape(corpus) == (4, )
assert np.shape(corpus[0]) == (3, )
assert corpus[0][0] == 'Lorem Ipsum'
assert corpus[2][0] == 'Preface'
def test_read_epub():
"""
Tests read_epub function by asserting title,
shape of corpus, and correct line reading.
"""
ebook_path = test_config.data_path / "test.epub"
corpus, title = file_readers.read_epub(ebook_path)
corpus_arr = np.array(corpus, dtype=object)
assert title == "the_picture_of_dorian_gray"
assert np.shape(corpus_arr) == (6,)
assert np.shape(corpus_arr[0]) == (39,)
assert corpus[0][0] == 'The Project Gutenberg eBook of The Picture of Dorian Gray, by Oscar Wilde'
assert corpus[2][0] == 'CHAPTER I.'