Spaces:
Build error
Build error
"""Test text2lists.""" | |
from pathlib import Path | |
from radiobee.loadtext import loadtext | |
from radiobee.text2lists import text2lists | |
def test_text2lists_dual1(): | |
"""Test text2lists data\test-dual.txt.""" | |
filename = r"data\test-dual.txt" | |
text = loadtext(filename) # noqa | |
l1, l2 = text2lists(text) | |
assert l2[0] in [""] | |
assert "国际\n中\n双语"[:2] in l1[0] | |
assert '2021' in l2[5] | |
def test_shakespeare1000(): | |
"""Separate first 1000. | |
from pathlib import Path | |
import zipfile | |
dir_loc = r"" | |
filename = r"莎士比亚 - 莎士比亚全集(套装共39本 英汉双语)-外语教学与研究出版社 (2016).txt.zip" | |
zfile = zipfile.ZipFile(Path(dir_loc) / filename) | |
res_bytes = zfile.read(zfile.infolist()[0]) | |
encoding = cchardet.detect(res_bytes).get("encoding") | |
text1000 = [] | |
line = 0 | |
numb_lines = 4000 | |
for elm in res_bytes.splitlines(): | |
if elm.decode(encoding).strip(): | |
text1000.append(elm.decode(encoding)) | |
if line >= numb_lines - 1: | |
break | |
line += 1 | |
Path(f"data/shakespeare-zh-en-{numb_lines}.txt").write_text("\n".join(text1000), encoding="utf8") | |
tset = cmat2test(cmat) | |
df = pd.DataFrame(tset).rename(columns=dict(zip(range(0, 3), ['x', 'y', 'cos']))) | |
plot_df(df) | |
""" | |
# text1000a = Path("data/shakespeare-zh-en-1000.txt").read_text(encoding="utf8") | |
# text2000 = Path("data/shakespeare-zh-en-1000.txt").read_text(encoding="utf8") | |
text4000 = Path("data/shakespeare-zh-en-4000.txt").read_text(encoding="utf8") | |
# l1000a, l10002b = text2lists(text1000) | |
# l2000a, l2000b = text2lists(text2000) | |
l4000, r4000 = text2lists(text4000) | |
def test_test_dual2(): | |
"""Test data/test-dual.txt.""" | |
test_dual = Path("data/test-dual.txt").read_text(encoding="utf8") | |
l_dual, r_dual = text2lists(test_dual) | |