Spaces:
Build error
Build error
File size: 1,481 Bytes
74f2c64 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
import pytest
import numpy as np
from src import file_readers
import test_config
def test_preprocess_text():
"""
Tests preprocess function by asserting title,
shape of corpus, and correct line reading.
"""
test_path = test_config.data_path / "test.txt"
processed_path = test_config.data_path / "test_processed.txt"
with open(test_path, 'r') as file:
test_corpus = file_readers.preprocess_text(file)
with open(processed_path, 'r') as process_file:
processed_corpus = [line.strip() for line in process_file.readlines()]
assert processed_corpus == test_corpus
def test_read_pdf():
pdf_path = test_config.data_path / "test.pdf"
corpus = np.array(file_readers.read_pdf(pdf_path), dtype=object)
assert np.shape(corpus) == (4, )
assert np.shape(corpus[0]) == (3, )
assert corpus[0][0] == 'Lorem Ipsum'
assert corpus[2][0] == 'Preface'
def test_read_epub():
"""
Tests read_epub function by asserting title,
shape of corpus, and correct line reading.
"""
ebook_path = test_config.data_path / "test.epub"
corpus, title = file_readers.read_epub(ebook_path)
corpus_arr = np.array(corpus, dtype=object)
assert title == "the_picture_of_dorian_gray"
assert np.shape(corpus_arr) == (6,)
assert np.shape(corpus_arr[0]) == (39,)
assert corpus[0][0] == 'The Project Gutenberg eBook of The Picture of Dorian Gray, by Oscar Wilde'
assert corpus[2][0] == 'CHAPTER I.'
|