from fasthtml.common import * from fasthtml.components import * import json import string import random import jsonlines def gen_random_id() -> str: return "".join(random.choices(string.ascii_lowercase, k=8)) def view_data( before, after, doc_id, data_source: str = None, data_sources=None, target: str = "colcontent", ): if data_sources is not None: drop_down = Select( *[ Option(ds, value=ds, selected=(ds == data_source)) for ds in data_sources ], name=f"data_source_{target}", hx_get=f"/curated/{target}", hx_target=f"#{target}", hx_trigger="change", hx_swap="innerHTML", ) slider = Input( type="range", name=f"doc_id_{target}", min="0", max="9", value=str(doc_id), hx_get=f"/curated/{target}", hx_target=f"#{target}", hx_trigger="change", hx_swap="innerHTML", hx_include=f'[name="data_source_{target}"]', ) form = Form( Div( Label("Data source: ", drop_down), ) if (data_sources is not None) else None, Div( Label("Data sample: ", slider, f"{doc_id}", cls="plotly_slider"), ), cls="plotly_input_container", ) col1 = Div( H3("Raw format"), Pre( json.dumps(before, indent=4), style="white-space: pre-wrap; word-break: break-all;", ), style="width: 48%; float: left; overflow-x: auto;", ) col2 = Div( H3("Extracted format"), Pre( json.dumps(after, indent=4), style="white-space: pre-wrap; word-break: break-all;", ), style="width: 48%; float: right; overflow-x: auto;", ) data_display = Div( col1, col2, style="overflow: auto; clear: both; height: 600px; border: 1px solid #ccc; padding: 20px;", ) return Div(form, data_display, style="margin-top: 10px;", id=target) def DVS( left, header, ): col1 = Div( Pre( json.dumps(left, indent=4, ensure_ascii=False), style="white-space: pre-wrap; word-break: break-all;", ), style="float: left; overflow-x: auto;", ) data_display = Div( col1, style="overflow: auto; clear: both; height: 200px; border: 1px solid #ccc; padding: 20px;", ) return Div(H3(header), data_display, style="margin-top: 10px;") def DV( left_file, doc_id, header, target: str = None, ): if target is None: target = "".join(random.choices(string.ascii_lowercase, k=8)) if left_file.endswith("jsonl"): left = [x for x in jsonlines.open(left_file)] else: left = json.load(open(left_file, encoding="utf-8")) max_doc_id = len(left) - 1 slider = Input( type="range", name=f"doc_id_{target}", min="0", max=str(max_doc_id), value=str(doc_id), hx_get=f"/update/{target}", hx_target=f"#{target}", hx_trigger="change", hx_swap="innerHTML", hx_vals=json.dumps({"left_file": f"{left_file}", "header": f"{header}"}), ) form = Div( H3(header), Label( "Data sample: ", slider, f"{doc_id} of {max_doc_id}", cls="plotly_slider" ), cls="plotly_input_container", style="padding: 20px;", ) col1 = Div( Pre( json.dumps(left[doc_id], indent=4, ensure_ascii=False), style="white-space: pre-wrap; word-break: break-all;", ), style="float: left; overflow-x: auto;", ) data_display = Div( col1, style="overflow: auto; clear: both; height: 600px; border: 1px solid #ccc; padding: 20px;", ) return Div(form, data_display, style="margin-top: 10px;", id=target) def DV2( left_file, right_file, doc_id, target: str = None, ): if target is None: target = "".join(random.choices(string.ascii_lowercase, k=8)) left = json.load(open(left_file, encoding="utf-8")) right = json.load(open(right_file, encoding="utf-8")) max_doc_id = len(left) - 1 slider = Input( type="range", name=f"doc_id_{target}", min="0", max=str(max_doc_id), value=str(doc_id), hx_get=f"/update/{target}", hx_target=f"#{target}", hx_trigger="change", hx_swap="innerHTML", hx_vals=json.dumps( {"left_file": f"{left_file}", "right_file": f"{right_file}"} ), ) form = Div( Label( "Data sample: ", slider, f"{doc_id} of {max_doc_id}", cls="plotly_slider" ), cls="plotly_input_container", style="padding: 20px;", ) col1 = Div( H3("Raw format", style="margin-top: 0px;"), Pre( json.dumps(left[doc_id], indent=4, ensure_ascii=False), style="white-space: pre-wrap; word-break: break-all;", ), style="width: 48%; float: left; overflow-x: auto;", ) col2 = Div( H3("Extracted format", style="margin-top: 0px;"), Pre( json.dumps(right[doc_id], indent=4, ensure_ascii=False), style="white-space: pre-wrap; word-break: break-all;", ), style="width: 48%; float: right; overflow-x: auto;", ) data_display = Div( col1, col2, style="overflow: auto; clear: both; height: 600px; border: 1px solid #ccc; padding: 20px;", ) return Div(form, data_display, style="margin-top: 10px;", id=target) def update(target: str, request): params = request.query_params doc_id = int(params.get(f"doc_id_{target}", 3)) left_file = params.get("left_file") right_file = params.get("right_file") if left_file and right_file: return ( DV2( left_file, right_file, doc_id, target, ), ) else: return DV( left_file, doc_id, params.get("header"), target, )