Precompute column norms to significantly speed up computation

kaselby · kaselby · commit 76059002a82d · 2025-07-18T10:53:33.000-04:00
Signed-off-by: Kira Selby &lt;kaselby@uwaterloo.ca&gt;
diff --git a/src/cett.py b/src/cett.py
@@ -18,151 +18,34 @@
 logger = logging.getLogger(__name__)
 
 
-import copy
-class ThresholdEvaluator():
-    def __init__(self, model, thresholds):
-        self.model = model
-        self.thresholds = thresholds
-
-        self.compute_neuron_thresholds(thresholds)
-
-        self.mlp_outputs = defaultdict(list)
-        self.handles = []
-
-    def get_layers(self):
-        return self.model.model.layers
-
-    def compute_neuron_thresholds(self, thresholds):
-        n_layers = len(self.get_layers())
-        self.neuron_thresholds = torch.zeros(n_layers, self.model.config.intermediate_size)
-        with torch.no_grad():
-            for layer_idx, layer in self.get_layers():
-                norms = layer.mlp.down_proj.weight.norm(dim=0)
-                self.neuron_thresholds[layer_idx] = thresholds[layer_idx] * norms
-
-    def _inspect_hook(self, layer_idx):
-        def hook(module, input, output):
-            # Just detach, don't clone or move to CPU yet
-            out = output.view(-1, output.size(-1)).clone().detach()
-            self.mlp_outputs[layer_idx].append(out)
-            return output
-        return hook
-    
-    def _threshold_hook(self, layer_idx):
-        def hook(module, input, output):
-            # Just detach, don't clone or move to CPU yet
-            mask = (output > self.neuron_thresholds[layer_idx]).bool()
-            return output * mask
-        return hook
-    
-    def apply_thresholds(self):
-        for layer_idx, layer in enumerate(self.get_layers()):
-            handle = layer.mlp.act_fn.register_forward_hook(
-                self._threshold_hook(layer_idx)
-            )
-            self.handles.append(handle)
-
-    def apply_hooks(self):
-        for layer_idx, layer in enumerate(self.get_layers()):
-            handle = layer.mlp.register_forward_hook(
-                self._inspect_hook(layer_idx)
-            )
-            self.handles.append(handle)
-
-    def clear_captures(self):
-        self.mlp_outputs = defaultdict(list)
-
-    def remove_hooks(self):
-        for handle in self.handles:
-            handle.remove()
-        self.handles = []
-
-    def evaluate(self, inputs):
-        self.apply_hooks()
-                
-        with torch.no_grad():
-            for inp in inputs:
-                _ = self.model(**inp)
-        
-        ground_truth_outputs = {
-            idx: torch.cat(outputs_idx, dim=0) for idx,outputs_idx in self.mlp_outputs
-        }
-        self.clear_captures()
-
-        self.apply_thresholds()
-        with torch.no_grad():
-            for inp in inputs:
-                _ = self.model(**inp)
-
-        threshold_outputs = {
-            idx: torch.cat(outputs_idx, dim=0) for idx,outputs_idx in self.mlp_outputs
-        }
-        self.clear_captures()
-
-
-
-#
-#   TODO:
-#       1. Test out precomputing down_proj norms and see if that improves performance
-#       2. Ensure that the thresholds lead to reasonable results for downstream evaluation
-#
-#
-
 
+def cett_from_threshold(activations, down_weight, threshold, norms=None, tot_norm=None):
+    if norms is None:
+        col_norms = down_weight.norm(dim=0)
+        norms = activations.abs() * col_norms
+        tot_norm = activations.matmul(down_weight.t()).norm(dim=-1)
+    masked_act = activations * (norms < threshold)
+    threshold_norm = masked_act.matmul(down_weight.t()).norm(dim=-1)
+    return threshold_norm / tot_norm
 
 
-def cett_from_threshold(neuron_outputs, threshold, norms=None, tot_norm=None):
-    if not norms:   # pass both or neither
-        norms = norms = neuron_outputs.norm(dim=-2).unsqueeze(-2)
-        tot_norm = neuron_outputs.sum(dim=-1).norm(dim=-1)
-    threshold_norm = ((norms < threshold) * neuron_outputs).sum(dim=-1).norm(dim=-1)
-    return threshold_norm / tot_norm
+def calculate_threshold(activations, down_weight, col_norms, cett_target, n_thresholds=1000):
+    norms = activations.abs() * col_norms
+    output = activations.matmul(down_weight.t())
+    tot_norm = output.norm(dim=-1)
 
-'''
-def calculate_threshold_by_token(neuron_outputs, cett_target, n_thresholds=10000):
-    neuron_outputs = neuron_outputs.view(-1, *neuron_outputs.size()[-2:])
-    norms = neuron_outputs.norm(dim=-2).unsqueeze(-2)
     min_value = norms.min()
     max_value = norms.quantile(0.99)
     threshold_grid = torch.linspace(min_value, max_value, n_thresholds)
-    tot_norm = neuron_outputs.sum(dim=-1).norm(dim=-1)
-    thresholds = torch.zeros(neuron_outputs.size(0))
-    
-    initial_cett = cett_from_threshold(neuron_outputs, max_value, norms=norms, tot_norm=tot_norm)
-    thresholds[initial_cett < cett_target] = max_value
-    
-    for j in tqdm(range(neuron_outputs.size(0))):
-        if thresholds[j] == 0:
-            left = 0
-            right = n_thresholds
-            while left < right:
-                mid = (left + right) // 2
-                cett = cett_from_threshold(neuron_outputs[j], threshold_grid[mid], norms=norms[j], tot_norm=tot_norm[j])
-                if cett <= cett_target:
-                    left = mid + 1
-                else:
-                    right = mid
-            thresholds[j] = threshold_grid[left]
-    return thresholds
-'''
-        
-def calculate_threshold(neuron_outputs, cett_target, n_thresholds=10000):
-    neuron_outputs = neuron_outputs.view(-1, *neuron_outputs.size()[-2:])
-    norms = neuron_outputs.norm(dim=-2).unsqueeze(-2)
-    tot_norm = neuron_outputs.sum(dim=-1).norm(dim=-1)
-    
-    min_value = norms.min()
-    max_value = norms.quantile(0.99)
-    threshold_grid = torch.linspace(min_value, max_value, n_thresholds)
-    max_cett = cett_from_threshold(neuron_outputs, max_value, norms=norms, tot_norm=tot_norm)
+    max_cett = cett_from_threshold(activations, down_weight, max_value, norms=norms, tot_norm=tot_norm)
     outlier_mask = max_cett > cett_target
-    
+
     left = 0
     right = n_thresholds
     while left < right:
-        print(left,right)
+        #print(left,right)
         mid = (left + right) // 2
-        cett = cett_from_threshold(neuron_outputs, threshold_grid[mid], norms=norms, tot_norm=tot_norm) # Compute CETT for each token
+        cett = cett_from_threshold(activations, down_weight, threshold_grid[mid], norms=norms, tot_norm=tot_norm) # Compute CETT for each token
         cett = cett[outlier_mask].mean()    # Remove outliers and take average
         if cett <= cett_target:
             left = mid + 1
@@ -240,17 +123,19 @@ def sample_and_tokenize(examples):
     logger.info(f"Beginning to compute thresholds using {max_samples} samples")
     thresholds = defaultdict(list)
     with torch.no_grad():
+        all_col_norms = {layer_idx: layer.mlp.down_proj.weight.norm(dim=0) \
+                         for layer_idx, layer in enumerate(model.activation_capture.get_layers())}
         for batch in tqdm(dataloader, total=max_samples):
             input_ids = batch["input_ids"].to(device)
             attention_mask = batch["attention_mask"].to(device)
         
-            _ = model(input_ids=input_ids.squeeze(0), attention_mask=attention_mask.squeeze(0))
+            _ = model(input_ids=input_ids.squeeze(1), attention_mask=attention_mask.squeeze(1))
 
             for layer_idx, layer in enumerate(model.activation_capture.get_layers()):
                 down_weight = layer.mlp.down_proj.weight
+                col_norms = all_col_norms[layer_idx]
                 activations = model.activation_capture.mlp_activations[Hook.UP][layer_idx]
-                neuron_outputs = activations.unsqueeze(-2) * down_weight
-                threshold = calculate_threshold(neuron_outputs, cett_target, n_thresholds)
+                threshold = calculate_threshold(activations, down_weight, col_norms, cett_target, n_thresholds)
                 thresholds[layer_idx].append(threshold)
             
             model.activation_capture.clear_captures()