Spaces:
Runtime error
Runtime error
Sebastian Gehrmann
commited on
Commit
•
13fd677
1
Parent(s):
396d1e7
considerations
Browse files- datacards/considerations.py +88 -4
- datacards/curation.py +9 -9
- datacards/overview.py +3 -3
datacards/considerations.py
CHANGED
@@ -1,13 +1,97 @@
|
|
1 |
import streamlit as st
|
2 |
|
3 |
-
from .streamlit_utils import
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
-
|
|
|
|
|
|
|
|
|
6 |
|
7 |
|
8 |
def considerations_page():
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
|
12 |
def considerations_summary():
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
|
3 |
+
from .streamlit_utils import (
|
4 |
+
make_multiselect,
|
5 |
+
make_selectbox,
|
6 |
+
make_text_area,
|
7 |
+
make_text_input,
|
8 |
+
make_radio,
|
9 |
+
)
|
10 |
|
11 |
+
N_FIELDS_PII = 3
|
12 |
+
N_FIELDS_LICENSES = 3
|
13 |
+
N_FIELDS_LIMITATIONS = 4
|
14 |
+
|
15 |
+
N_FIELDS = N_FIELDS_PII + N_FIELDS_LICENSES + N_FIELDS_LIMITATIONS
|
16 |
|
17 |
|
18 |
def considerations_page():
|
19 |
+
st.session_state.card_dict["considerations"] = st.session_state.card_dict.get(
|
20 |
+
"considerations", {}
|
21 |
+
)
|
22 |
+
with st.expander("PII Risks and Liability", expanded=False):
|
23 |
+
key_pref = ["considerations", "pii"]
|
24 |
+
st.session_state.card_dict["considerations"]["pii"] = st.session_state.card_dict[
|
25 |
+
"considerations"
|
26 |
+
].get("pii", {})
|
27 |
+
|
28 |
+
# TODO: cross-link this section with curation.
|
29 |
+
|
30 |
+
with st.expander("Licenses", expanded=False):
|
31 |
+
key_pref = ["considerations", "licenses"]
|
32 |
+
st.session_state.card_dict["considerations"]["licenses"] = st.session_state.card_dict[
|
33 |
+
"considerations"
|
34 |
+
].get("licenses", {})
|
35 |
+
|
36 |
+
# TODO: cross-link the first question with overview.py.
|
37 |
+
|
38 |
+
make_text_input(
|
39 |
+
label="Can the dataset be used for research and/or commercial purposes?",
|
40 |
+
key_list=key_pref + ["data-restrictions"],
|
41 |
+
help="Describe any restrictions put on how the data can be used.",
|
42 |
+
)
|
43 |
+
make_radio(
|
44 |
+
label="Are thre restrictions on the underlying data?",
|
45 |
+
options=["Open", "Non-Commercial", "Copyrighted", "Other"],
|
46 |
+
key_list=key_pref + ["data-copyright"],
|
47 |
+
help="Are there restructions on the underlying data?",
|
48 |
+
)
|
49 |
+
|
50 |
+
with st.expander("Known limitations", expanded=False):
|
51 |
+
key_pref = ["considerations", "limitations"]
|
52 |
+
st.session_state.card_dict["considerations"]["limitations"] = st.session_state.card_dict[
|
53 |
+
"considerations"
|
54 |
+
].get("limitations", {})
|
55 |
+
|
56 |
+
# TODO: Form proper language
|
57 |
+
|
58 |
+
make_text_area(
|
59 |
+
label="Technical limitations, annotation noise, etc.",
|
60 |
+
key_list=key_pref + ["data-technical-limitations"],
|
61 |
+
help="",
|
62 |
+
)
|
63 |
+
|
64 |
+
make_text_area(
|
65 |
+
label="Particularly unsuited for applications",
|
66 |
+
key_list=key_pref + ["data-unsuited-applications"],
|
67 |
+
help="",
|
68 |
+
)
|
69 |
+
|
70 |
+
make_text_area(
|
71 |
+
label="What are discouraged use cases of the dataset?",
|
72 |
+
key_list=key_pref + ["data-discouraged-use"],
|
73 |
+
help="",
|
74 |
+
)
|
75 |
+
|
76 |
+
make_text_area(
|
77 |
+
label="Citation of work identifying these limitations",
|
78 |
+
key_list=key_pref + ["data-citations-limitations"],
|
79 |
+
help="",
|
80 |
+
)
|
81 |
|
82 |
|
83 |
def considerations_summary():
|
84 |
+
total_filled = sum(
|
85 |
+
[len(dct) for dct in st.session_state.card_dict.get("considerations", {}).values()]
|
86 |
+
)
|
87 |
+
with st.expander(
|
88 |
+
f"Dataset Overview Completion - {total_filled} of {N_FIELDS}", expanded=False
|
89 |
+
):
|
90 |
+
completion_markdown = ""
|
91 |
+
completion_markdown += (
|
92 |
+
f"- **Overall competion:**\n - {total_filled} of {N_FIELDS} fields\n"
|
93 |
+
)
|
94 |
+
completion_markdown += f"- **Sub-section - PII Risks and Liability:**\n - {len(st.session_state.card_dict.get('considerations', {}).get('pii', {}))} of {N_FIELDS_PII} fields\n"
|
95 |
+
completion_markdown += f"- **Sub-section - Licenses:**\n - {len(st.session_state.card_dict.get('considerations', {}).get('licenses', {}))} of {N_FIELDS_LICENSES} fields\n"
|
96 |
+
completion_markdown += f"- **Sub-section - Known limitations:**\n - {len(st.session_state.card_dict.get('considerations', {}).get('limitations', {}))} of {N_FIELDS_LIMITATIONS} fields\n"
|
97 |
+
st.markdown(completion_markdown)
|
datacards/curation.py
CHANGED
@@ -72,17 +72,17 @@ def curation_page():
|
|
72 |
make_multiselect(
|
73 |
label="How was the language data obtained?",
|
74 |
options=[
|
75 |
-
"
|
76 |
-
"
|
77 |
-
"
|
78 |
-
"
|
79 |
-
"
|
80 |
],
|
81 |
key_list=key_pref + ["obtained"],
|
82 |
)
|
83 |
make_multiselect(
|
84 |
label="If found, where from?",
|
85 |
-
options=["website", "
|
86 |
key_list=key_pref + ["found"],
|
87 |
help="select N/A if none of the language data was found",
|
88 |
)
|
@@ -90,9 +90,9 @@ def curation_page():
|
|
90 |
label="If crowdsourced, where from?",
|
91 |
options=[
|
92 |
"Amazon Mechanical Turk",
|
93 |
-
"
|
94 |
-
"
|
95 |
-
"
|
96 |
"N/A",
|
97 |
],
|
98 |
key_list=key_pref + ["crowdsourced"],
|
|
|
72 |
make_multiselect(
|
73 |
label="How was the language data obtained?",
|
74 |
options=[
|
75 |
+
"Found",
|
76 |
+
"Created for the dataset",
|
77 |
+
"Crowdsourced",
|
78 |
+
"Machine-generated",
|
79 |
+
"Other",
|
80 |
],
|
81 |
key_list=key_pref + ["obtained"],
|
82 |
)
|
83 |
make_multiselect(
|
84 |
label="If found, where from?",
|
85 |
+
options=["Multiple websites", "Single website", "Offline media collection", "Other", "N/A"],
|
86 |
key_list=key_pref + ["found"],
|
87 |
help="select N/A if none of the language data was found",
|
88 |
)
|
|
|
90 |
label="If crowdsourced, where from?",
|
91 |
options=[
|
92 |
"Amazon Mechanical Turk",
|
93 |
+
"Other crowdworker platform",
|
94 |
+
"Participatory experiment",
|
95 |
+
"Other",
|
96 |
"N/A",
|
97 |
],
|
98 |
key_list=key_pref + ["crowdsourced"],
|
datacards/overview.py
CHANGED
@@ -167,9 +167,9 @@ def overview_page():
|
|
167 |
)
|
168 |
with st.expander("Structure", expanded=False):
|
169 |
key_pref = ["overview", "structure"]
|
170 |
-
st.session_state.card_dict["overview"][
|
171 |
-
"
|
172 |
-
]
|
173 |
data_fields_help = """
|
174 |
[free text; paragraphs]
|
175 |
- Mention their data type, and whether and how they are used as part of the generation pipeline.
|
|
|
167 |
)
|
168 |
with st.expander("Structure", expanded=False):
|
169 |
key_pref = ["overview", "structure"]
|
170 |
+
st.session_state.card_dict["overview"]["structure"] = st.session_state.card_dict[
|
171 |
+
"overview"
|
172 |
+
].get("structure", {})
|
173 |
data_fields_help = """
|
174 |
[free text; paragraphs]
|
175 |
- Mention their data type, and whether and how they are used as part of the generation pipeline.
|