orionweller commited on
Commit
b38e27d
1 Parent(s): 2ae2246

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -0
app.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import pandas as pd
4
+ import json
5
+ from html import escape
6
+ import difflib
7
+
8
+ def generate_diff_html_word_level(text1, text2):
9
+ """
10
+ Generates word-level difference between text1 and text2 as HTML, correctly handling spaces.
11
+ """
12
+ words1 = text1.split()
13
+ words2 = text2.split()
14
+
15
+ diff = []
16
+ matcher = difflib.SequenceMatcher(None, words1, words2)
17
+
18
+ for opcode in matcher.get_opcodes():
19
+ tag, i1, i2, j1, j2 = opcode
20
+ if tag == 'replace':
21
+ diff.append('<del style="background-color: #fbb6ce;">' + escape(' '.join(words1[i1:i2])) + '</del>')
22
+ diff.append('<ins style="background-color: #b7e4c7;">' + escape(' '.join(words2[j1:j2])) + '</ins>')
23
+ elif tag == 'delete':
24
+ diff.append('<del style="background-color: #fbb6ce;">' + escape(' '.join(words1[i1:i2])) + '</del>')
25
+ elif tag == 'insert':
26
+ diff.append('<ins style="background-color: #b7e4c7;">' + escape(' '.join(words2[j1:j2])) + '</ins>')
27
+ elif tag == 'equal':
28
+ diff.append(escape(' '.join(words1[i1:i2])))
29
+
30
+ final_html = ' '.join(diff).replace('</del> <ins', '</del>&nbsp;<ins')
31
+ return f'<pre style="white-space: pre-wrap;">{final_html}</pre>'
32
+
33
+ os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
34
+ st.set_page_config(layout="wide")
35
+
36
+ @st.cache_data
37
+ def convert_df(df):
38
+ return df.to_csv(index=False, quotechar='"').encode('utf-8')
39
+
40
+ @st.cache_data
41
+ def load_narratives_data():
42
+ data = []
43
+ with open("narratives.jsonl", "r") as f:
44
+ for line in f:
45
+ data.append(json.loads(line))
46
+ return pd.DataFrame(data)
47
+
48
+ narratives_df = load_narratives_data()
49
+
50
+ col1, col2 = st.columns([1, 3], gap="large")
51
+
52
+ with st.sidebar:
53
+ st.title("Options")
54
+
55
+ with col1:
56
+ st.title("Narratives")
57
+ narrative_ids = narratives_df["id"].tolist()
58
+ container_for_nav = st.container()
59
+
60
+ def sync_from_drop():
61
+ if st.session_state.selectbox_narrative == "Overview":
62
+ st.session_state.narrative_index = -1
63
+ else:
64
+ st.session_state.narrative_index = narrative_ids.index(st.session_state.selectbox_narrative)
65
+
66
+ def sync_from_number():
67
+ st.session_state.narrative_index = st.session_state.narrative_number
68
+ if st.session_state.narrative_number == -1:
69
+ st.session_state.selectbox_narrative = "Overview"
70
+ else:
71
+ st.session_state.selectbox_narrative = narrative_ids[st.session_state.narrative_number]
72
+
73
+ narrative_number = container_for_nav.number_input(
74
+ min_value=-1, step=1, max_value=len(narrative_ids) - 1,
75
+ on_change=sync_from_number,
76
+ label=f"Select narrative by index (up to **{len(narrative_ids) - 1}**)",
77
+ key="narrative_number"
78
+ )
79
+ selectbox_narrative = container_for_nav.selectbox(
80
+ "Select narrative by ID",
81
+ ["Overview"] + narrative_ids,
82
+ on_change=sync_from_drop,
83
+ key="selectbox_narrative"
84
+ )
85
+ st.divider()
86
+
87
+ with col2:
88
+ narrative_index = narrative_number
89
+
90
+ if narrative_index >= 0:
91
+ narrative = narratives_df.iloc[narrative_index]
92
+
93
+ st.markdown("<h1 style='text-align: center; color: black;text-decoration: underline;'>Editor</h1>", unsafe_allow_html=True)
94
+
95
+ container = st.container()
96
+
97
+ container.subheader(f"Narrative ID: {narrative['id']}")
98
+ container.divider()
99
+
100
+ container.subheader("Diff: Original English vs Altered English")
101
+ processed_diff = generate_diff_html_word_level(narrative['original_english'], narrative['altered_english'])
102
+ with container.container(border=True):
103
+ st.markdown(processed_diff, unsafe_allow_html=True)
104
+ container.divider()
105
+
106
+ container.subheader("Original Text")
107
+ original_input = container.text_area("Edit the original text", value=narrative['original'], height=300)
108
+
109
+
110
+ elif narrative_index < 0:
111
+ st.title("Overview")
112
+ st.write(f"Total number of narratives: {len(narratives_df)}")
113
+ st.write("Select a narrative from the sidebar to view and edit its details.")