Spaces:
Build error
Build error
import pytest | |
import numpy as np | |
from src import file_readers | |
import test_config | |
def test_preprocess_text(): | |
""" | |
Tests preprocess function by asserting title, | |
shape of corpus, and correct line reading. | |
""" | |
test_path = test_config.data_path / "test.txt" | |
processed_path = test_config.data_path / "test_processed.txt" | |
with open(test_path, 'r') as file: | |
test_corpus = file_readers.preprocess_text(file) | |
with open(processed_path, 'r') as process_file: | |
processed_corpus = [line.strip() for line in process_file.readlines()] | |
assert processed_corpus == test_corpus | |
def test_read_pdf(): | |
pdf_path = test_config.data_path / "test.pdf" | |
corpus = np.array(file_readers.read_pdf(pdf_path), dtype=object) | |
assert np.shape(corpus) == (4, ) | |
assert np.shape(corpus[0]) == (3, ) | |
assert corpus[0][0] == 'Lorem Ipsum' | |
assert corpus[2][0] == 'Preface' | |
def test_read_epub(): | |
""" | |
Tests read_epub function by asserting title, | |
shape of corpus, and correct line reading. | |
""" | |
ebook_path = test_config.data_path / "test.epub" | |
corpus, title = file_readers.read_epub(ebook_path) | |
corpus_arr = np.array(corpus, dtype=object) | |
assert title == "the_picture_of_dorian_gray" | |
assert np.shape(corpus_arr) == (6,) | |
assert np.shape(corpus_arr[0]) == (39,) | |
assert corpus[0][0] == 'The Project Gutenberg eBook of The Picture of Dorian Gray, by Oscar Wilde' | |
assert corpus[2][0] == 'CHAPTER I.' | |