Sebastian Gehrmann
add md formatting
8a3a160
raw
history blame
6.28 kB
import streamlit as st
from .streamlit_utils import (
make_multiselect,
make_selectbox,
make_text_area,
make_text_input,
make_radio,
)
N_FIELDS_PREVIOUS = 3
N_FIELDS_UNDERSERVED_COMMUNITIES = 2
N_FIELDS_BIASES= 3
N_FIELDS = N_FIELDS_PREVIOUS + N_FIELDS_UNDERSERVED_COMMUNITIES + N_FIELDS_BIASES
def context_page():
st.session_state.card_dict["context"] = st.session_state.card_dict.get(
"context", {}
)
with st.expander("Previous Work on the Social Impact of the Dataset", expanded=False):
key_pref = ["context", "previous"]
st.session_state.card_dict["context"]["previous"] = st.session_state.card_dict[
"context"
].get("previous", {})
make_radio(
label="Are you aware of cases where models trained on the task featured in this dataset ore related tasks have been used in automated systems?",
options=["no", "yes - related tasks", "yes - other datasets featuring the same task", "yes - models trained on this dataset"],
key_list=key_pref + ["is-deployed"],
help="",
)
if "yes" in st.session_state.card_dict["context"]["previous"]["is-deployed"]:
make_text_area(
label="Did any of these previous uses result in observations about the social impact of the systems? " + \
"In particular, has there been work outlining the risks and limitations of the system? Provide links and descriptions here.",
key_list=key_pref + ["described-risks"],
help="",
)
if st.session_state.card_dict["context"]["previous"]["is-deployed"] == "yes - models trained on this dataset":
make_text_area(
label="Have any changes been made to the dataset as a result of these observations?",
key_list=key_pref + ["changes-from-observation"],
help="",
)
else:
st.session_state.card_dict["context"]["previous"]["changes-from-observation"] = "N/A"
else:
st.session_state.card_dict["context"]["previous"]["described-risks"] = "N/A"
st.session_state.card_dict["context"]["previous"]["changes-from-observation"] = "N/A"
with st.expander("Impact on Under-Served Communities", expanded=False):
key_pref = ["context", "underserved"]
st.session_state.card_dict["context"]["underserved"] = st.session_state.card_dict[
"context"
].get("underserved", {})
make_radio(
label="Does this dataset address the needs of communities that are traditionally underserved in language technology, and particularly language generation technology?" + \
"Communities may be underserved for exemple because their language, language variety, or social or geographical context is underepresented in NLP and NLG resources (datasets and models).",
options=["no", "yes"],
key_list=key_pref+["helps-underserved"],
)
if st.session_state.card_dict["context"]["underserved"]["helps-underserved"] == "yes":
make_text_area(
label="Describe how this dataset addresses the needs of underserved communities.",
key_list=key_pref+["underserved-description"],
)
else:
st.session_state.card_dict["context"]["underserved"]["underserved-description"] = "N/A"
with st.expander("Discussion of Biases", expanded=False):
key_pref = ["context", "biases"]
st.session_state.card_dict["context"]["biases"] = st.session_state.card_dict[
"context"
].get("biases", {})
make_radio(
label="Are there documented social biases in the dataset? " + \
"Biases in this context are variations in the ways members of different social categories are represented that can have harmful downstream consequences for members of the more disadvantaged group.",
options=["yes", "unsure", "no"],
key_list=key_pref + ["has-biases"],
help="For a more extensive definition of social biases, see [Language (Technology) is Power: A Critical Survey of “Bias” in NLP ](https://aclanthology.org/2020.acl-main.485.pdf)",
)
if st.session_state.card_dict["context"]["biases"]["has-biases"] == "yes":
make_text_area(
label="Provide links to and summaries of works analyzing these biases.",
key_list=key_pref + ["bias-analyses"],
help="The analyses can take the form of academic papers or news articles, or even blog posts.",
)
else:
st.session_state.card_dict["context"]["biases"]["bias-analyses"] = "N/A"
make_text_area(
label="Does the distribution of language producers in the dataset accurately represent the full distribution of speakers of the language world-wide? If not, how does it differ?",
key_list=key_pref + ["speaker-distibution"],
help="For example, are most speakers in the dataset of a certain gender or located in a certain county?",
)
def context_summary():
total_filled = sum(
[len(dct) for dct in st.session_state.card_dict.get("context", {}).values()]
)
with st.expander(
f"Broader Social Context Completion - {total_filled} of {N_FIELDS}", expanded=False
):
completion_markdown = ""
completion_markdown += (
f"- **Overall completion:**\n - {total_filled} of {N_FIELDS} fields\n"
)
completion_markdown += f"- **Sub-section - Previous Work on the Social Impact of the Dataset:**\n - {len(st.session_state.card_dict.get('context', {}).get('previous', {}))} of {N_FIELDS_PREVIOUS} fields\n"
completion_markdown += f"- **Sub-section - Impact on Under-Served Communities:**\n - {len(st.session_state.card_dict.get('context', {}).get('underserved', {}))} of {N_FIELDS_UNDERSERVED_COMMUNITIES} fields\n"
completion_markdown += f"- **Sub-section - Discussion of Biases:**\n - {len(st.session_state.card_dict.get('context', {}).get('biases', {}))} of {N_FIELDS_BIASES} fields\n"
st.markdown(completion_markdown)