Spaces:

mkutarna
/

audiobook_gen

Build error

audiobook_gen / tests /test_file_readers.py

Matthew Kutarna

Streamlit app development (#5)

74f2c64 about 2 years ago

1.48 kB

	import pytest
	import numpy as np

	from src import file_readers
	import test_config


	def test_preprocess_text():
	"""
	Tests preprocess function by asserting title,
	shape of corpus, and correct line reading.
	"""
	test_path = test_config.data_path / "test.txt"
	processed_path = test_config.data_path / "test_processed.txt"
	with open(test_path, 'r') as file:
	test_corpus = file_readers.preprocess_text(file)
	with open(processed_path, 'r') as process_file:
	processed_corpus = [line.strip() for line in process_file.readlines()]

	assert processed_corpus == test_corpus


	def test_read_pdf():
	pdf_path = test_config.data_path / "test.pdf"
	corpus = np.array(file_readers.read_pdf(pdf_path), dtype=object)

	assert np.shape(corpus) == (4, )
	assert np.shape(corpus[0]) == (3, )
	assert corpus[0][0] == 'Lorem Ipsum'
	assert corpus[2][0] == 'Preface'


	def test_read_epub():
	"""
	Tests read_epub function by asserting title,
	shape of corpus, and correct line reading.
	"""
	ebook_path = test_config.data_path / "test.epub"
	corpus, title = file_readers.read_epub(ebook_path)
	corpus_arr = np.array(corpus, dtype=object)

	assert title == "the_picture_of_dorian_gray"
	assert np.shape(corpus_arr) == (6,)
	assert np.shape(corpus_arr[0]) == (39,)
	assert corpus[0][0] == 'The Project Gutenberg eBook of The Picture of Dorian Gray, by Oscar Wilde'
	assert corpus[2][0] == 'CHAPTER I.'