Spaces:
Running
Running
File size: 5,474 Bytes
05b9456 f3fd096 05de9a6 f3fd096 3d26c4a f3fd096 3d26c4a e10ccfa 3d26c4a f3fd096 e10ccfa 05de9a6 f3fd096 05de9a6 f3fd096 05b9456 05de9a6 05b9456 05de9a6 05b9456 05de9a6 05b9456 05de9a6 05b9456 f3fd096 05de9a6 f3fd096 05de9a6 05b9456 05de9a6 f3fd096 0e6dbbe 55fbc57 ded6735 0e6dbbe e10ccfa 0e6dbbe 55fbc57 05b9456 f3fd096 0e6dbbe f3fd096 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
import base64
from collections import Counter
import graphviz
import penman
from multi_amr.data.postprocessing_graph import ParsedStatus
from utils import get_resources, LANGUAGES, translate
import streamlit as st
st.set_page_config(
page_title="Multilingual text-to-AMR demo by Bram Vanroy",
page_icon="π©βπ»"
)
st.title("π©βπ» Multilingual text-to-AMR")
if "text" not in st.session_state:
st.session_state["text"] = ""
if "language" not in st.session_state:
st.session_state["language"] = "English"
if "use_multilingual" not in st.session_state:
st.session_state["use_multilingual"] = False
text_col, lang_col = st.columns((4, 1))
text = text_col.text_input(label="Input text", key="text")
src_lang = lang_col.selectbox(label="Language", options=list(LANGUAGES.keys()), index=0, key="language")
multilingual = st.checkbox("Use multilingual model", label_visibility="visible", key="use_multilingual",
help="Whether to use a single multilingual model that was trained on English, Spanish and"
" Dutch together, or (if not checked) language-specific models. Enabling this will"
" results in worse performance but can be of interest for research purposes.")
error_ct = st.empty()
if st.session_state["text"]:
if st.button("Submit"):
text = text.strip()
error_ct.info("Generating abstract meaning representation (AMR)...", icon="π»")
model, tokenizer = get_resources(multilingual, src_lang)
gen_kwargs = {
"max_new_tokens": 512,
"num_beams": 5,
}
outputs = translate(text, src_lang, model, tokenizer, **gen_kwargs)
error_ct.empty()
if outputs["status"][0] == ParsedStatus.BACKOFF:
st.write(f"The system could not generate a valid graph no matter how hard it tried.")
else:
graph = outputs["graph"][0]
visualized = graphviz.Digraph(node_attr={"color": "#3aafa9", "style": "rounded,filled", "shape": "box",
"fontcolor": "white"})
# Count which names occur multiple times, e.g. t/talk-01 t2/talk-01
nodename_c = Counter([item[2] for item in graph.triples if item[1] == ":instance"])
# Generated initial nodenames for each variable, e.g. {"t": "talk-01", "t2": "talk-01"}
nodenames = {item[0]: item[2] for item in graph.triples if item[1] == ":instance"}
# Modify nodenames, so that the values are unique, e.g. {"t": "talk-01 (1)", "t2": "talk-01 (2)"}
# but only the value occurs more than once
nodename_str_c = Counter()
for varname in nodenames:
nodename = nodenames[varname]
if nodename_c[nodename] > 1:
nodename_str_c[nodename] += 1
nodenames[varname] = f"{nodename} ({nodename_str_c[nodename]})"
def get_node_name(item: str):
return nodenames[item] if item in nodenames else item
for triple in graph.triples:
if triple[1] == ":instance":
continue
else:
visualized.edge(get_node_name(triple[0]), get_node_name(triple[2]), label=triple[1])
st.subheader("Graph visualization")
st.graphviz_chart(visualized, use_container_width=True)
# Download link
def create_download_link(img_bytes: bytes):
encoded = base64.b64encode(img_bytes).decode("utf-8")
return f'<a href="data:image/png;charset=utf-8;base64,{encoded}" download="amr-graph.png">Download graph</a>'
img = visualized.pipe(format="png")
st.markdown(create_download_link(img), unsafe_allow_html=True)
# Additional info
st.subheader("PENMAN representation")
st.code(penman.encode(graph))
else:
error_ct.warning("Text cannot be empty!", icon="β οΈ")
########################
# Information, socials #
########################
st.header("SignON π€")
st.markdown("""
<div style="display: flex">
<img style="margin-right: 1em" alt="SignON logo" src="https://signon-project.eu/wp-content/uploads/2021/05/SignOn_Favicon_500x500px.png" width=64 height=64>
<p><a href="https://signon-project.eu/" target="_blank" title="SignON homepage">SignON</a> aims to bridge the
communication gap between deaf, hard-of-hearing and hearing people through an accessible translation service.
This service will translate between languages and modalities with particular attention for sign languages.</p>
</div>""", unsafe_allow_html=True)
st.markdown("""[Abstract meaning representation](https://aclanthology.org/W13-2322/) (AMR)
is a semantic framework to describe meaning relations of sentences as graphs. In the SignON project, AMR is used as
an interlingua to translate between modalities and languages. To this end, I built MBART models for the task of
generating AMR representations from an input sentence, which is show-cased in this demo.
""")
st.header("Contact βοΈ")
st.markdown("Would you like additional functionality in the demo, do you have questions, or just want to get in touch?"
" Give me a shout on [Twitter](https://twitter.com/BramVanroy)"
" or add me on [LinkedIn](https://www.linkedin.com/in/bramvanroy/)!")
|