File size: 4,106 Bytes
b38e27d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import streamlit as st
import os
import pandas as pd
import json
from html import escape
import difflib

def generate_diff_html_word_level(text1, text2):
    """
    Generates word-level difference between text1 and text2 as HTML, correctly handling spaces.
    """
    words1 = text1.split()
    words2 = text2.split()
    
    diff = []
    matcher = difflib.SequenceMatcher(None, words1, words2)
    
    for opcode in matcher.get_opcodes():
        tag, i1, i2, j1, j2 = opcode
        if tag == 'replace':
            diff.append('<del style="background-color: #fbb6ce;">' + escape(' '.join(words1[i1:i2])) + '</del>')
            diff.append('<ins style="background-color: #b7e4c7;">' + escape(' '.join(words2[j1:j2])) + '</ins>')
        elif tag == 'delete':
            diff.append('<del style="background-color: #fbb6ce;">' + escape(' '.join(words1[i1:i2])) + '</del>')
        elif tag == 'insert':
            diff.append('<ins style="background-color: #b7e4c7;">' + escape(' '.join(words2[j1:j2])) + '</ins>')
        elif tag == 'equal':
            diff.append(escape(' '.join(words1[i1:i2])))

    final_html = ' '.join(diff).replace('</del> <ins', '</del>&nbsp;<ins')
    return f'<pre style="white-space: pre-wrap;">{final_html}</pre>'

os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
st.set_page_config(layout="wide")

@st.cache_data
def convert_df(df):
    return df.to_csv(index=False, quotechar='"').encode('utf-8')

@st.cache_data
def load_narratives_data():
    data = []
    with open("narratives.jsonl", "r") as f:
        for line in f:
            data.append(json.loads(line))
    return pd.DataFrame(data)

narratives_df = load_narratives_data()

col1, col2 = st.columns([1, 3], gap="large")

with st.sidebar:
    st.title("Options")

with col1:
    st.title("Narratives")
    narrative_ids = narratives_df["id"].tolist()
    container_for_nav = st.container()
    
    def sync_from_drop():
        if st.session_state.selectbox_narrative == "Overview":
            st.session_state.narrative_index = -1
        else:
            st.session_state.narrative_index = narrative_ids.index(st.session_state.selectbox_narrative)

    def sync_from_number():
        st.session_state.narrative_index = st.session_state.narrative_number
        if st.session_state.narrative_number == -1:
            st.session_state.selectbox_narrative = "Overview"
        else:
            st.session_state.selectbox_narrative = narrative_ids[st.session_state.narrative_number]

    narrative_number = container_for_nav.number_input(
        min_value=-1, step=1, max_value=len(narrative_ids) - 1,
        on_change=sync_from_number,
        label=f"Select narrative by index (up to **{len(narrative_ids) - 1}**)",
        key="narrative_number"
    )
    selectbox_narrative = container_for_nav.selectbox(
        "Select narrative by ID",
        ["Overview"] + narrative_ids,
        on_change=sync_from_drop,
        key="selectbox_narrative"
    )
    st.divider()

with col2:
    narrative_index = narrative_number

    if narrative_index >= 0:
        narrative = narratives_df.iloc[narrative_index]
        
        st.markdown("<h1 style='text-align: center; color: black;text-decoration: underline;'>Editor</h1>", unsafe_allow_html=True)

        container = st.container()

        container.subheader(f"Narrative ID: {narrative['id']}")
        container.divider()

        container.subheader("Diff: Original English vs Altered English")
        processed_diff = generate_diff_html_word_level(narrative['original_english'], narrative['altered_english'])
        with container.container(border=True):
            st.markdown(processed_diff, unsafe_allow_html=True)
        container.divider()

        container.subheader("Original Text")
        original_input = container.text_area("Edit the original text", value=narrative['original'], height=300)


    elif narrative_index < 0:
        st.title("Overview")
        st.write(f"Total number of narratives: {len(narratives_df)}")
        st.write("Select a narrative from the sidebar to view and edit its details.")