Spaces:

ethanlshen
/

SuperposedDecoding

Runtime error

ethanlshen commited on Jun 26, 2024

Commit

8b0fc91

verified ·

1 Parent(s): 2645fa8

Set devices

Files changed (1) hide show

superposed/llama/superpose.py CHANGED Viewed

@@ -50,8 +50,8 @@ class Superpose(nn.Module):
         self.alive_seq = initial_tokens
         self.fin_seq = initial_tokens
         self.smoothing = smoothing
-        self.alive_log_probs = torch.zeros(self.n_prompts, self.n_drafts)
-        self.fin_log_probs = torch.full((self.n_prompts, self.n_drafts), float("-inf"))
         self.alpha = alpha
         self.verbose = verbose
         self.penalty = penalty
@@ -214,7 +214,7 @@ class Superpose(nn.Module):
             SUperposition matrix
         """
         # Create superposition matrix
-        mixing_matrix = torch.zeros(self.n_prompts, self.vocab_size)
         # Convert draft log probs to probabilities
         weightings = log_prob_to_prob(self.alive_log_probs)
         # Update probabilities in superposition matrix with draft probabilities
@@ -242,7 +242,7 @@ class Superpose(nn.Module):
             # Start timer
             start_time = datetime.now()
         # Create distribution matrix
-        next_token_probs = torch.zeros(self.n_prompts, self.n_drafts, 32000)
         if probs is not None:
             # Loop over all prefixes
             for p_idx in range(len(alive_seq)):

         self.alive_seq = initial_tokens
         self.fin_seq = initial_tokens
         self.smoothing = smoothing
+        self.alive_log_probs = torch.zeros(self.n_prompts, self.n_drafts, device="cuda")
+        self.fin_log_probs = torch.full((self.n_prompts, self.n_drafts), float("-inf"), device="cuda")
         self.alpha = alpha
         self.verbose = verbose
         self.penalty = penalty
             SUperposition matrix
         """
         # Create superposition matrix
+        mixing_matrix = torch.zeros(self.n_prompts, self.vocab_size, device="cuda")
         # Convert draft log probs to probabilities
         weightings = log_prob_to_prob(self.alive_log_probs)
         # Update probabilities in superposition matrix with draft probabilities
             # Start timer
             start_time = datetime.now()
         # Create distribution matrix
+        next_token_probs = torch.zeros(self.n_prompts, self.n_drafts, 32000, device="cuda")
         if probs is not None:
             # Loop over all prefixes
             for p_idx in range(len(alive_seq)):