g8a9 commited on
Commit
f1a6f2c
·
1 Parent(s): c4d9ff2
Files changed (3) hide show
  1. app.py +18 -4
  2. requirements.txt +1 -1
  3. single.py +33 -10
app.py CHANGED
@@ -4,14 +4,28 @@ import corpus
4
  from PIL import Image
5
 
6
  PAGES = {
7
- "Single Explanation": single,
8
- "Corpus Explanation": corpus,
9
  }
10
 
11
- # st.sidebar.title("Explore ferret!")
12
-
13
  logo = Image.open("static/logo.png")
14
  st.sidebar.image(logo)
 
15
 
 
 
 
 
 
 
 
16
  page = st.sidebar.radio("", list(PAGES.keys()))
 
 
 
 
 
 
 
 
17
  PAGES[page].body()
 
4
  from PIL import Image
5
 
6
  PAGES = {
7
+ "Instance Explanation": single,
8
+ "Dataset Explanation": corpus,
9
  }
10
 
 
 
11
  logo = Image.open("static/logo.png")
12
  st.sidebar.image(logo)
13
+ st.sidebar.title("*ferret* showcase")
14
 
15
+ st.sidebar.markdown(
16
+ """
17
+ Welcome to the *ferret* showcase!
18
+
19
+ You will find two main functionalities.
20
+ """
21
+ )
22
  page = st.sidebar.radio("", list(PAGES.keys()))
23
+
24
+ st.sidebar.markdown(
25
+ """
26
+ In the single-*instance* page, you can evaluate our built-in explainers on your favourite model.
27
+ Choosing *dataset* mode, you will evaluate explainers on state-of-the-art datasets from our Dataset API.
28
+ """
29
+ )
30
+
31
  PAGES[page].body()
requirements.txt CHANGED
@@ -1,3 +1,3 @@
1
  transformers==4.20.1
2
- ferret-xai==0.2.4
3
  sentencepiece
 
1
  transformers==4.20.1
2
+ ferret-xai==0.3.0
3
  sentencepiece
single.py CHANGED
@@ -23,14 +23,12 @@ def get_tokenizer(tokenizer_name):
23
 
24
  def body():
25
 
 
 
26
  st.markdown(
27
  """
28
- # Welcome to the *ferret* showcase
29
-
30
  You are working now on the *single instance* mode -- i.e., you will work and
31
  inspect one textual query at a time.
32
-
33
- ## Sentiment Analysis
34
 
35
  Post-hoc explanation techniques discose the rationale behind a given prediction a model
36
  makes while detecting a sentiment out of a text. In a sense the let you *poke* inside the model.
@@ -52,12 +50,15 @@ def body():
52
  col1, col2 = st.columns([3, 1])
53
  with col1:
54
  model_name = st.text_input("HF Model", DEFAULT_MODEL)
 
 
55
  with col2:
 
56
  target = st.selectbox(
57
  "Target",
58
- options=range(5),
59
  index=1,
60
- help="Positional index of your target class.",
61
  )
62
 
63
  text = st.text_input("Text", "I love your style!")
@@ -69,7 +70,6 @@ def body():
69
  with st.spinner("Preparing the magic. Hang in there..."):
70
  model = get_model(model_name)
71
  tokenizer = get_tokenizer(model_name)
72
- config = get_config(model_name)
73
  bench = Benchmark(model, tokenizer)
74
 
75
  st.markdown("### Prediction")
@@ -80,26 +80,29 @@ def body():
80
  st.text(scores_str)
81
 
82
  with st.spinner("Computing Explanations.."):
83
- explanations = bench.explain(text, target=target)
84
 
85
  st.markdown("### Explanations")
86
  st.dataframe(bench.show_table(explanations))
 
87
 
88
  with st.spinner("Evaluating Explanations..."):
89
  evaluations = bench.evaluate_explanations(
90
- explanations, target=target, apply_style=False
91
  )
92
 
93
  st.markdown("### Faithfulness Metrics")
94
  st.dataframe(bench.show_evaluation_table(evaluations))
 
95
 
96
  st.markdown(
97
  """
98
  **Legend**
99
 
100
  - **AOPC Comprehensiveness** (aopc_compr) measures *comprehensiveness*, i.e., if the explanation captures all the tokens needed to make the prediction. Higher is better.
101
- - **AOPC Sufficiency** (aopc_suff) measures *sufficiency*, i.e., if the relevant tokens in the explanation are sufficient to make the prediction. Lower is better.
102
 
 
 
103
  - **Leave-On-Out TAU Correlation** (taucorr_loo) measures the Kendall rank correlation coefficient τ between the explanation and leave-one-out importances. Closer to 1 is better.
104
 
105
  See the paper for details.
@@ -111,3 +114,23 @@ def body():
111
  # It is computed as the drop in the model probability if only the relevant tokens of the explanations are considered. The lower the sufficiency, the more faithful is the explanation since there is less change in the model prediction.
112
 
113
  # The latter are computed by omittig individual input tokens and measuring the variation on the model prediction. The closer the τ correlation is to 1, the more faithful is the explanation.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  def body():
25
 
26
+ st.title("Explain individual texts")
27
+
28
  st.markdown(
29
  """
 
 
30
  You are working now on the *single instance* mode -- i.e., you will work and
31
  inspect one textual query at a time.
 
 
32
 
33
  Post-hoc explanation techniques discose the rationale behind a given prediction a model
34
  makes while detecting a sentiment out of a text. In a sense the let you *poke* inside the model.
 
50
  col1, col2 = st.columns([3, 1])
51
  with col1:
52
  model_name = st.text_input("HF Model", DEFAULT_MODEL)
53
+ config = AutoConfig.from_pretrained(model_name)
54
+
55
  with col2:
56
+ class_labels = list(config.id2label.values())
57
  target = st.selectbox(
58
  "Target",
59
+ options=class_labels,
60
  index=1,
61
+ help="Class label you want to explain.",
62
  )
63
 
64
  text = st.text_input("Text", "I love your style!")
 
70
  with st.spinner("Preparing the magic. Hang in there..."):
71
  model = get_model(model_name)
72
  tokenizer = get_tokenizer(model_name)
 
73
  bench = Benchmark(model, tokenizer)
74
 
75
  st.markdown("### Prediction")
 
80
  st.text(scores_str)
81
 
82
  with st.spinner("Computing Explanations.."):
83
+ explanations = bench.explain(text, target=class_labels.index(target))
84
 
85
  st.markdown("### Explanations")
86
  st.dataframe(bench.show_table(explanations))
87
+ st.caption("Darker red (blue) means higher (lower) contribution.")
88
 
89
  with st.spinner("Evaluating Explanations..."):
90
  evaluations = bench.evaluate_explanations(
91
+ explanations, target=class_labels.index(target), apply_style=False
92
  )
93
 
94
  st.markdown("### Faithfulness Metrics")
95
  st.dataframe(bench.show_evaluation_table(evaluations))
96
+ st.caption("Darker colors mean better performance.")
97
 
98
  st.markdown(
99
  """
100
  **Legend**
101
 
102
  - **AOPC Comprehensiveness** (aopc_compr) measures *comprehensiveness*, i.e., if the explanation captures all the tokens needed to make the prediction. Higher is better.
 
103
 
104
+ - **AOPC Sufficiency** (aopc_suff) measures *sufficiency*, i.e., if the relevant tokens in the explanation are sufficient to make the prediction. Lower is better.
105
+
106
  - **Leave-On-Out TAU Correlation** (taucorr_loo) measures the Kendall rank correlation coefficient τ between the explanation and leave-one-out importances. Closer to 1 is better.
107
 
108
  See the paper for details.
 
114
  # It is computed as the drop in the model probability if only the relevant tokens of the explanations are considered. The lower the sufficiency, the more faithful is the explanation since there is less change in the model prediction.
115
 
116
  # The latter are computed by omittig individual input tokens and measuring the variation on the model prediction. The closer the τ correlation is to 1, the more faithful is the explanation.
117
+
118
+ st.markdown(
119
+ """
120
+ **In code, it would be as simple as**
121
+ """
122
+ )
123
+ st.code(
124
+ f"""
125
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
126
+ from ferret import Benchmark
127
+
128
+ model = AutoModelForSequenceClassification.from_pretrained("{model_name}")
129
+ tokenizer = AutoTokenizer.from_pretrained("{model_name}")
130
+
131
+ bench = Benchmark(model, tokenizer)
132
+ explanations = bench.explain("{text}")
133
+ evaluations = bench.evaluate_explanations(explanations)
134
+ """,
135
+ language="python",
136
+ )