Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -66,13 +66,13 @@ def analyze_next_token(input_text, temperature, top_p, top_k):
|
|
66 |
|
67 |
last_token_logits = outputs.logits[0, -1, :]
|
68 |
probabilities = torch.nn.functional.softmax(last_token_logits, dim=-1)
|
69 |
-
top_k =
|
70 |
top_probs, top_indices = torch.topk(probabilities, top_k)
|
71 |
-
top_words = [tokenizer.decode(idx.item()).strip() for idx in top_indices]
|
72 |
prob_data = {word: prob.item() for word, prob in zip(top_words, top_probs)}
|
73 |
prob_plot = plot_probabilities(prob_data)
|
74 |
|
75 |
-
prob_text = "\n".join([f"{word}: {prob:.
|
76 |
|
77 |
attention_heatmap = plot_attention_alternative(inputs["input_ids"][0], last_token_logits)
|
78 |
|
@@ -107,21 +107,12 @@ def plot_probabilities(prob_data):
|
|
107 |
words = list(prob_data.keys())
|
108 |
probs = list(prob_data.values())
|
109 |
|
110 |
-
fig, ax = plt.subplots(figsize=(
|
111 |
-
|
112 |
-
ax.set_title("Probabilités des
|
113 |
-
ax.set_xlabel("Tokens"
|
114 |
-
ax.set_ylabel("Probabilité"
|
115 |
-
plt.xticks(rotation=45
|
116 |
-
plt.yticks(fontsize=10)
|
117 |
-
|
118 |
-
# Ajouter les pourcentages au-dessus des barres
|
119 |
-
for bar in bars:
|
120 |
-
height = bar.get_height()
|
121 |
-
ax.text(bar.get_x() + bar.get_width()/2., height,
|
122 |
-
f'{height:.2%}',
|
123 |
-
ha='center', va='bottom', fontsize=10)
|
124 |
-
|
125 |
plt.tight_layout()
|
126 |
return fig
|
127 |
|
@@ -131,17 +122,11 @@ def plot_attention_alternative(input_ids, last_token_logits):
|
|
131 |
top_k = min(len(input_tokens), 10) # Limiter à 10 tokens pour la lisibilité
|
132 |
top_attention_scores, _ = torch.topk(attention_scores, top_k)
|
133 |
|
134 |
-
fig, ax = plt.subplots(figsize=(
|
135 |
-
sns.heatmap(top_attention_scores.unsqueeze(0).numpy(), annot=True, cmap="YlOrRd", cbar=
|
136 |
-
ax.set_xticklabels(input_tokens[-top_k:], rotation=45, ha="right"
|
137 |
-
ax.set_yticklabels(["Attention"], rotation=0
|
138 |
-
ax.set_title("Scores d'attention pour les derniers tokens"
|
139 |
-
|
140 |
-
# Ajuster la colorbar
|
141 |
-
cbar = ax.collections[0].colorbar
|
142 |
-
cbar.set_label("Score d'attention", fontsize=12)
|
143 |
-
cbar.ax.tick_params(labelsize=10)
|
144 |
-
|
145 |
plt.tight_layout()
|
146 |
return fig
|
147 |
|
|
|
66 |
|
67 |
last_token_logits = outputs.logits[0, -1, :]
|
68 |
probabilities = torch.nn.functional.softmax(last_token_logits, dim=-1)
|
69 |
+
top_k = 5
|
70 |
top_probs, top_indices = torch.topk(probabilities, top_k)
|
71 |
+
top_words = [tokenizer.decode([idx.item()]).strip() for idx in top_indices]
|
72 |
prob_data = {word: prob.item() for word, prob in zip(top_words, top_probs)}
|
73 |
prob_plot = plot_probabilities(prob_data)
|
74 |
|
75 |
+
prob_text = "\n".join([f"{word}: {prob:.4f}" for word, prob in prob_data.items()])
|
76 |
|
77 |
attention_heatmap = plot_attention_alternative(inputs["input_ids"][0], last_token_logits)
|
78 |
|
|
|
107 |
words = list(prob_data.keys())
|
108 |
probs = list(prob_data.values())
|
109 |
|
110 |
+
fig, ax = plt.subplots(figsize=(10, 5))
|
111 |
+
sns.barplot(x=words, y=probs, ax=ax)
|
112 |
+
ax.set_title("Probabilités des tokens suivants les plus probables")
|
113 |
+
ax.set_xlabel("Tokens")
|
114 |
+
ax.set_ylabel("Probabilité")
|
115 |
+
plt.xticks(rotation=45)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
plt.tight_layout()
|
117 |
return fig
|
118 |
|
|
|
122 |
top_k = min(len(input_tokens), 10) # Limiter à 10 tokens pour la lisibilité
|
123 |
top_attention_scores, _ = torch.topk(attention_scores, top_k)
|
124 |
|
125 |
+
fig, ax = plt.subplots(figsize=(12, 6))
|
126 |
+
sns.heatmap(top_attention_scores.unsqueeze(0).numpy(), annot=True, cmap="YlOrRd", cbar=False, ax=ax)
|
127 |
+
ax.set_xticklabels(input_tokens[-top_k:], rotation=45, ha="right")
|
128 |
+
ax.set_yticklabels(["Attention"], rotation=0)
|
129 |
+
ax.set_title("Scores d'attention pour les derniers tokens")
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
plt.tight_layout()
|
131 |
return fig
|
132 |
|