File size: 3,484 Bytes
6770b66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7e8dbcd
 
6770b66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7e8dbcd
 
6770b66
 
 
 
 
 
 
 
 
 
 
 
7e8dbcd
6770b66
 
7e8dbcd
6770b66
 
 
 
7e8dbcd
6770b66
 
 
 
7e8dbcd
6770b66
 
 
7e8dbcd
6770b66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7e8dbcd
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
from fasthtml.common import *
import json


data_sources = [
    "Freelaw",
    "Wikipedia",
    "PhilPapers",
    "Arxiv",
    "S2ORC",
    "S2ORC Abstract",
    "Pubmeds",
    "USPTO",
    "Hackernews",
    "Ubuntu IRC",
    "StackExchange",
    "DM Maths",
    "PG19",
    "Europarl",
]


def get_data(data_source: str = "Freelaw", doc_id: int = 3):
    doc_id = max(0, min(int(doc_id), 9))

    if data_source == "Freelaw":
        raw_sample_doc = json.load(open("data/curated_samples/freelaw_raw.json"))
        extracted_sample_doc = json.load(
            open("data/curated_samples/freelaw_extract.json")
        )
    elif data_source == "Wikipedia":
        raw_sample_doc = extracted_sample_doc = json.load(
            open("data/curated_samples/wiki.json")
        )
    elif data_source == "StackExchange":
        raw_sample_doc = json.load(open("data/curated_samples/stackexchange_raw.json"))
        extracted_sample_doc = json.load(
            open("data/curated_samples/stackexchange_extract.json")
        )
    elif data_source == "PhilPapers":
        raw_sample_doc = extracted_sample_doc = json.load(
            open("data/curated_samples/philpapers_raw.json")
        )
    elif data_source == "Arxiv":
        raw_sample_doc = json.load(open("data/curated_samples/arxiv_raw.json"))
        extracted_sample_doc = json.load(
            open("data/curated_samples/arxiv_extract.json")
        )
    elif data_source == "S2ORC":
        raw_sample_doc = extracted_sample_doc = json.load(
            open("data/curated_samples/s2orc_raw.json")
        )
    elif data_source == "S2ORC Abstract":
        raw_sample_doc = extracted_sample_doc = json.load(
            open("data/curated_samples/s2orc_abstract_raw.json")
        )
    else:
        raw_sample_doc = extracted_sample_doc = [{} for _ in range(10)]

    raw_json = raw_sample_doc[doc_id]
    extracted_json = extracted_sample_doc[doc_id]

    drop_down = Select(
        *[Option(ds, value=ds, selected=(ds == data_source)) for ds in data_sources],
        name="data_source",
        hx_get="/curated",
        hx_target="#colcontent",
        hx_trigger="change",
        hx_swap="innerHTML",
    )

    slider = Input(
        type="range",
        name="doc_id",
        min="0",
        max="9",
        value=str(doc_id),
        hx_get="/curated",
        hx_target="#colcontent",
        hx_trigger="change",
        hx_swap="innerHTML",
        hx_include="[name='data_source']",
    )

    form = Form(
        Div(
            Label("Data source: ", drop_down),
            style="margin-bottom: 20px;",
        ),
        Div(
            Label("Data sample: ", slider, f"{doc_id}"),
            style="margin-bottom: 20px;",
        ),
    )

    col1 = Div(
        H3("Raw format"),
        Pre(
            json.dumps(raw_json, indent=4),
            style="white-space: pre-wrap; word-break: break-all;",
        ),
        style="width: 48%; float: left; overflow-x: auto;",
    )

    col2 = Div(
        H3("Extracted format"),
        Pre(
            json.dumps(extracted_json, indent=4),
            style="white-space: pre-wrap; word-break: break-all;",
        ),
        style="width: 48%; float: right; overflow-x: auto;",
    )

    data_display = Div(
        col1,
        col2,
        style="overflow: auto; clear: both; height: 600px; border: 1px solid #ccc; padding: 20px;",
    )
    return Div(form, data_display, style="margin-top: 10px;", id="colcontent")