File size: 1,323 Bytes
5fd26bb
55de44e
5fd26bb
 
55de44e
5fd26bb
 
 
55de44e
5fd26bb
55de44e
5fd26bb
 
55de44e
5fd26bb
 
 
55de44e
5fd26bb
 
55de44e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from bs4 import BeautifulSoup
from document_qa.grobid_processors import get_xml_nodes_body, get_xml_nodes_figures, get_xml_nodes_header


def test_get_xml_nodes_body_paragraphs():
    with open("resources/2312.07559.paragraphs.tei.xml", 'r') as fo:
        soup = BeautifulSoup(fo, 'xml')

    nodes = get_xml_nodes_body(soup, use_paragraphs=True)

    assert len(nodes) == 70


def test_get_xml_nodes_body_sentences():
    with open("resources/2312.07559.sentences.tei.xml", 'r') as fo:
        soup = BeautifulSoup(fo, 'xml')

    children = get_xml_nodes_body(soup, use_paragraphs=False)

    assert len(children) == 327


def test_get_xml_nodes_figures():
    with open("resources/2312.07559.paragraphs.tei.xml", 'r') as fo:
        soup = BeautifulSoup(fo, 'xml')

    children = get_xml_nodes_figures(soup)

    assert len(children) == 13


def test_get_xml_nodes_header_paragraphs():
    with open("resources/2312.07559.paragraphs.tei.xml", 'r') as fo:
        soup = BeautifulSoup(fo, 'xml')

    children = get_xml_nodes_header(soup)

    assert len(children) == 8

def test_get_xml_nodes_header_sentences():
    with open("resources/2312.07559.sentences.tei.xml", 'r') as fo:
        soup = BeautifulSoup(fo, 'xml')

    children = get_xml_nodes_header(soup, use_paragraphs=False)

    assert len(children) == 15