File size: 8,269 Bytes
29cb445
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
494bac0
6c9bc01
 
 
494bac0
 
29cb445
6c9bc01
 
 
29cb445
 
 
6c9bc01
 
 
29cb445
 
6c9bc01
29cb445
 
 
494bac0
6c9bc01
 
 
 
494bac0
29cb445
6c9bc01
 
 
 
29cb445
 
6c9bc01
 
 
 
29cb445
 
 
494bac0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29cb445
 
 
d9ad0d8
5d10b71
3148527
d9ad0d8
 
 
 
 
 
5d10b71
d9ad0d8
29cb445
3148527
29cb445
08a14f0
 
 
3148527
29cb445
 
08a14f0
 
 
 
 
29cb445
08a14f0
 
 
08a5a4d
9cc9f38
 
 
 
08a5a4d
 
 
 
 
 
08a14f0
 
 
 
08a5a4d
9cc9f38
 
 
 
5d10b71
494bac0
 
3148527
29cb445
08a14f0
 
 
3148527
08a14f0
29cb445
08a14f0
 
 
 
 
494bac0
08a14f0
 
08a5a4d
9cc9f38
08a5a4d
 
 
 
 
08a14f0
 
 
 
 
08a5a4d
9cc9f38
 
 
08a5a4d
 
9cc9f38
 
 
08a5a4d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
import streamlit as st
import pandas as pd

model1 = AutoModelForSequenceClassification.from_pretrained(
    "rexarski/bert-base-climate-fever-fixed"
)
tokenizer1 = AutoTokenizer.from_pretrained(
    "rexarski/bert-base-climate-fever-fixed"
)

label_mapping1 = ["SUPPORTS", "REFUTES", "NOT_ENOUGH_INFO"]

model2 = AutoModelForSequenceClassification.from_pretrained(
    "rexarski/distilroberta-tcfd-disclosure"
)

tokenizer2 = AutoTokenizer.from_pretrained("distilroberta-base")

label_mapping2 = [
    "Governance a)",
    "Governance b)",
    "Metrics and Targets a)",
    "Metrics and Targets b)",
    "Metrics and Targets c)",
    "Risk Management a)",
    "Risk Management b)",
    "Risk Management c)",
    "Strategy a)",
    "Strategy b)",
    "Strategy c)",
]


def factcheck(text1, text2):
    features = tokenizer1(
        [text1],
        [text2],
        padding="max_length",
        truncation=True,
        return_tensors="pt",
        max_length=512,
    )

    model1.eval()
    with torch.no_grad():
        scores = model1(**features).logits
        labels = [
            label_mapping1[score_max] for score_max in scores.argmax(dim=1)
        ]
    return labels[0]


def tcfd_classify(text):
    features = tokenizer2(
        text,
        padding="max_length",
        truncation=True,
        return_tensors="pt",
        max_length=512,
    )

    model2.eval()
    with torch.no_grad():
        scores = model2(**features).logits
        labels = [
            label_mapping2[score_max] for score_max in scores.argmax(dim=1)
        ]
    return labels[0]


data1 = {
    "example": [
        "Example 1 (there is no relationship between temperature and carbon dioxide emissions by ­humans[...])",
        "Example 2 (The late 1970s marked the end of a 30-year cooling trend.)",
        "Example 3 (Even during a period of long term warming, there are short periods of cooling due to climate variability.)",
        "Example 4 (Humans are too insignificant to affect global climate.)",
    ],
    "claim": [
        "there is no relationship between temperature and carbon dioxide emissions by ­humans[...]",
        "The late 1970s marked the end of a 30-year cooling trend.",
        "Even during a period of long term warming, there are short periods of cooling due to climate variability.",
        "Humans are too insignificant to affect global climate.",
    ],
    "evidence": [
        "Human activities are now causing atmospheric concentrations of greenhouse gases—including carbon dioxide, methane, tropospheric ozone, and nitrous oxide—to rise well above pre-industrial levels ... Increases in greenhouse gases are causing temperatures to rise ...",
        "During the last 20-30 years, world temperature has fallen, irregularly at first but more sharply over the last decade..",
        "El Niño events cause short-term (approximately 1 year in length) spikes in global average surface temperature while La Niña events cause short term cooling.",
        "Human impact on the environment or anthropogenic impact on the environment includes changes to biophysical environments and ecosystems, biodiversity, and natural resources caused directly or indirectly by humans, including global warming, environmental degradation (such as ocean acidification), mass extinction and biodiversity loss, ecological crisis, and ecological collapse.",
    ],
    "label": ["REFUTES", "NOT_ENOUGH_INFO", "SUPPORTS", "REFUTES"],
}

data2 = {
    "example": [
        "Example 1 (Should our products fail to meet energy-efficiency standards and regulations, we will risk losing sales opportunities.)",
        "Example 2 (There are no sentences in the provided excerpts that describe the targets the company uses to manage climate-related risks or opportunities.)",
        "Example 3 (Describe how processes for identifying, assessing, and managing climate-related risks are integrated into the organization’s overall risk management.)",
        "Example 4 (Reporting on such risks and opportunities is provided to.)",
    ],
    "text": [
        "Should our products fail to meet energy-efficiency standards and regulations, we will risk losing sales opportunities.",
        "There are no sentences in the provided excerpts that describe the targets the company uses to manage climate-related risks or opportunities.",
        "Describe how processes for identifying, assessing, and managing climate-related risks are integrated into the organization’s overall risk management.",
        "Reporting on such risks and opportunities is provided to.",
    ],
    "label": [
        "Strategy a)",
        "Metrics and Targets c)",
        "Risk Management c)",
        "Governance a)",
    ],
}


def get_pred_emoji(str1, str2, mode="factcheck"):
    if mode == "factcheck":
        if str1 == str2:
            return "✅"
        else:
            return "❌"
    elif mode == "tcfd":
        if str1 == str2:
            return "✅"
        elif str1.split()[:-1] == str2.split()[:-1]:
            return "🔧"
        else:
            return "❌"


df1 = pd.DataFrame(data1)
df2 = pd.DataFrame(data2)

st.markdown(
    """
# climate-plus demo 🌦️
This is a minimal example of two models we trained for `climate-plus` project:

- [bert-base-climate-fever-fixed](https://huggingface.co/rexarski/bert-base-climate-fever-fixed)
- [distilroberta-tcfd-disclosure](https://huggingface.co/rexarski/distilroberta-tcfd-disclosure)

See the [GitHub repo](https://github.com/rexarski/climate-plus) for more details."
"""
)

st.markdown("## Factchecking 🌤️")

factchecking_demo = st.radio(
    "What examples do you want to see?",
    ("Preloaded examples", "Custom examples"),
    key="factchecking_demo",
)

if factchecking_demo == "Preloaded examples":
    ex1_selected = st.selectbox(
        "Select a climate claim-evidence pair", df1["example"]
    )
    selected_row1 = df1[df1["example"] == ex1_selected]

    ex_claim = selected_row1["claim"].values[0]
    ex_evidence = selected_row1["evidence"].values[0]
    ex_label = selected_row1["label"].values[0]
    if st.button("Run", key="run1"):
        ex_pred = factcheck(
            selected_row1["claim"].values[0],
            selected_row1["evidence"].values[0],
        )
        st.markdown(f"**Claim**: {ex_claim}")
        st.markdown(f"**Evidence**: {ex_evidence}")
        st.markdown(f"**Label**: {ex_label}")
        st.markdown(
            f'**Prediction**: {ex_pred} {get_pred_emoji(ex_label, ex_pred, mode="factcheck")}'
        )
else:
    st.markdown("Or enter your own claim and evidence below:")
    custom_claim = st.text_input(label="Enter your claim.")
    custom_evidence = st.text_input(label="Enter your evidence.")
    if st.button("Run", key="run2"):
        if custom_claim != "" and custom_evidence != "":
            st.markdown(
                f"**Prediction**: {factcheck(custom_claim, custom_evidence)}"
            )

st.markdown("---")

st.markdown("## TCFD disclosure classification ⛈️")

tcfd_demo = st.radio(
    "What examples do you want to see?",
    ("Preloaded examples", "Custom examples"),
    key="tcfd_demo",
)

if tcfd_demo == "Preloaded examples":
    ex2_selected = st.selectbox(
        "Select a TCFD disclosure example", df2["example"]
    )
    selected_row2 = df2[df2["example"] == ex2_selected]

    ex_text = selected_row2["text"].values[0]
    ex_label2 = selected_row2["label"].values[0]
    if st.button("Run", key="run3"):
        ex_pred2 = tcfd_classify(selected_row2["text"].values[0])
        st.markdown(f"**Text**: {ex_text}")
        st.markdown(f"**Label**: {ex_label2}")
        st.markdown(
            f'**Prediction**: {ex_pred2} {get_pred_emoji(ex_label2, ex_pred2, mode="tcfd")}'
        )
else:
    st.markdown(
        "Or enter your own sentence to see if it belongs to any specific TCFD disclosure category:"
    )
    custom_text = st.text_input(label="Enter your text.")
    if st.button("Run", key="run4"):
        if custom_text != "":
            st.markdown(f"**Prediction**: {tcfd_classify(custom_text)}")

st.markdown(
    """
            ---

            🌦️
            """
)