LLMnBiasV2

Paused

App Files Files Community

Woziii commited on Oct 18, 2024

Commit

391d3d3

verified ·

1 Parent(s): 3c28324

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -6

app.py CHANGED Viewed

@@ -42,8 +42,8 @@ def load_model(model_name, progress=gr.Progress()):
             elif i == 75:
                 model = AutoModelForCausalLM.from_pretrained(
                     model_name,
-                    torch_dtype=torch.bfloat16,
-                    device_map="auto",
                     attn_implementation="eager"
                 )
                 if tokenizer.pad_token is None:
@@ -58,7 +58,7 @@ def analyze_next_token(input_text, temperature, top_p, top_k):
     if model is None or tokenizer is None:
         return "Veuillez d'abord charger un modèle.", None, None
-    inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=512).to(model.device)
     try:
         with torch.no_grad():
@@ -74,7 +74,6 @@ def analyze_next_token(input_text, temperature, top_p, top_k):
         prob_text = "\n".join([f"{word}: {prob:.4f}" for word, prob in prob_data.items()])
-        # Alternative pour le mécanisme d'attention
         attention_heatmap = plot_attention_alternative(inputs["input_ids"][0], last_token_logits)
         return prob_text, attention_heatmap, prob_plot
@@ -87,7 +86,7 @@ def generate_text(input_text, temperature, top_p, top_k):
     if model is None or tokenizer is None:
         return "Veuillez d'abord charger un modèle."
-    inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=512).to(model.device)
     try:
         with torch.no_grad():
@@ -124,7 +123,7 @@ def plot_attention_alternative(input_ids, last_token_logits):
     top_attention_scores, _ = torch.topk(attention_scores, top_k)
     fig, ax = plt.subplots(figsize=(12, 6))
-    sns.heatmap(top_attention_scores.unsqueeze(0), annot=True, cmap="YlOrRd", cbar=False, ax=ax)
     ax.set_xticklabels(input_tokens[-top_k:], rotation=45, ha="right")
     ax.set_yticklabels(["Attention"], rotation=0)
     ax.set_title("Scores d'attention pour les derniers tokens")

             elif i == 75:
                 model = AutoModelForCausalLM.from_pretrained(
                     model_name,
+                    torch_dtype=torch.float32,
+                    device_map="cpu",
                     attn_implementation="eager"
                 )
                 if tokenizer.pad_token is None:
     if model is None or tokenizer is None:
         return "Veuillez d'abord charger un modèle.", None, None
+    inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=512)
     try:
         with torch.no_grad():
         prob_text = "\n".join([f"{word}: {prob:.4f}" for word, prob in prob_data.items()])
         attention_heatmap = plot_attention_alternative(inputs["input_ids"][0], last_token_logits)
         return prob_text, attention_heatmap, prob_plot
     if model is None or tokenizer is None:
         return "Veuillez d'abord charger un modèle."
+    inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=512)
     try:
         with torch.no_grad():
     top_attention_scores, _ = torch.topk(attention_scores, top_k)
     fig, ax = plt.subplots(figsize=(12, 6))
+    sns.heatmap(top_attention_scores.unsqueeze(0).numpy(), annot=True, cmap="YlOrRd", cbar=False, ax=ax)
     ax.set_xticklabels(input_tokens[-top_k:], rotation=45, ha="right")
     ax.set_yticklabels(["Attention"], rotation=0)
     ax.set_title("Scores d'attention pour les derniers tokens")