[feat] multi-supcon

QibinLiang · QibinLiang · commit 4c6f2cad6423 · 2023-10-07T20:12:24.000+08:00
diff --git a/src/pytorch_metric_learning/losses/__init__.py b/src/pytorch_metric_learning/losses/__init__.py
@@ -35,3 +35,5 @@
 from .triplet_margin_loss import TripletMarginLoss
 from .tuplet_margin_loss import TupletMarginLoss
 from .vicreg_loss import VICRegLoss
+from .multilabel_supcon_loss import MultiSupConLoss
+from .xbm_multilabel import CrossBatchMemory4MultiLabel    
diff --git a/src/pytorch_metric_learning/losses/multilabel_supcon_loss.py b/src/pytorch_metric_learning/losses/multilabel_supcon_loss.py
@@ -0,0 +1,86 @@
+import torch
+
+from ..distances import CosineSimilarity
+from ..reducers import AvgNonZeroReducer
+from ..utils import common_functions as c_f
+from ..utils import multilabel_loss_and_miner_utils as mlmu
+from ..utils import loss_and_miner_utils as lmu
+from .generic_pair_loss import GenericPairLoss
+
+
+# adapted from https://github.com/HobbitLong/SupContrast
+class MultiSupConLoss(GenericPairLoss):
+    def __init__(self, num_classes,  temperature=0.1, **kwargs):
+        super().__init__(mat_based_loss=True, **kwargs)
+        self.temperature = temperature
+        self.add_to_recordable_attributes(list_of_names=["temperature"], is_stat=False)
+        self.num_classes = num_classes
+
+    def _compute_loss(self, mat, pos_mask, neg_mask):
+        if pos_mask.bool().any() and neg_mask.bool().any():
+            # if dealing with actual distances, use negative distances
+            if not self.distance.is_inverted:
+                mat = -mat
+            mat = mat / self.temperature
+            mat_max, _ = mat.max(dim=1, keepdim=True)
+            mat = mat - mat_max.detach()  # for numerical stability
+
+            denominator = lmu.logsumexp(
+                mat, keep_mask=(pos_mask + neg_mask).bool(), add_one=False, dim=1
+            )
+            log_prob = mat - denominator
+            mean_log_prob_pos = (pos_mask * log_prob).sum(dim=1) / (
+                pos_mask.sum(dim=1) + c_f.small_val(mat.dtype)
+            )
+
+            return {
+                "loss": {
+                    "losses": -mean_log_prob_pos,
+                    "indices": c_f.torch_arange_from_size(mat),
+                    "reduction_type": "element",
+                }
+            }
+        return self.zero_losses()
+
+    def get_default_reducer(self):
+        return AvgNonZeroReducer()
+
+    def get_default_distance(self):
+        return CosineSimilarity()
+
+    def mat_based_loss(self, mat, indices_tuple):
+        a1, p, a2, n = indices_tuple
+        pos_mask, neg_mask = torch.zeros_like(mat), torch.zeros_like(mat)
+        pos_mask[a1, p] = 1
+        neg_mask[a2, n] = 1
+        return self._compute_loss(mat, pos_mask, neg_mask)
+    
+    def compute_loss(self, embeddings, labels, indices_tuple, ref_emb, ref_labels):
+        c_f.labels_or_indices_tuple_required(labels, indices_tuple)
+        indices_tuple = mlmu.convert_to_pairs(indices_tuple, labels, self.num_classes, ref_labels, device=embeddings.device)
+        if all(len(x) <= 1 for x in indices_tuple):
+            return self.zero_losses()
+        mat = self.distance(embeddings, ref_emb)
+        return self.loss_method(mat, indices_tuple)
+
+    def forward(
+        self, embeddings, labels=None, indices_tuple=None, ref_emb=None, ref_labels=None
+    ):
+        """
+        Args:
+            embeddings: tensor of size (batch_size, embedding_size)
+            labels: tensor of size (batch_size)
+            indices_tuple: tuple of size 3 for triplets (anchors, positives, negatives)
+                            or size 4 for pairs (anchor1, postives, anchor2, negatives)
+                            Can also be left as None
+        Returns: the loss
+        """
+        self.reset_stats()
+        mlmu.check_shapes_multilabels(embeddings, labels)
+        ref_emb, ref_labels = mlmu.set_ref_emb(embeddings, labels, ref_emb, ref_labels)
+        loss_dict = self.compute_loss(
+            embeddings, labels, indices_tuple, ref_emb, ref_labels
+        )
+        self.add_embedding_regularization_to_loss_dict(loss_dict, embeddings)
+        return self.reducer(loss_dict, embeddings, labels)
+    
diff --git a/src/pytorch_metric_learning/losses/xbm_multilabel.py b/src/pytorch_metric_learning/losses/xbm_multilabel.py
@@ -0,0 +1,132 @@
+import torch
+
+from ..utils import common_functions as c_f
+# replace the functions of loss_and_miner_utils by multisupcon's
+from ..utils import multilabel_loss_and_miner_utils as mlmu
+from ..utils import loss_and_miner_utils as lmu
+from ..utils.module_with_records import ModuleWithRecords
+from .base_loss_wrapper import BaseLossWrapper
+
+
+class CrossBatchMemory4MultiLabel(BaseLossWrapper, ModuleWithRecords):
+    def __init__(self, loss, embedding_size, memory_size=1024, miner=None, **kwargs):
+        super().__init__(loss=loss, **kwargs)
+        self.loss = loss
+        self.miner = miner
+        self.embedding_size = embedding_size
+        self.memory_size = memory_size
+        self.num_classes = loss.num_classes
+        self.reset_queue()
+        self.add_to_recordable_attributes(
+            list_of_names=["embedding_size", "memory_size", "queue_idx"], is_stat=False
+        )
+
+    @staticmethod
+    def supported_losses():
+        return [
+            "MultiSupConLoss"
+        ]
+
+    @classmethod
+    def check_loss_support(cls, loss_name):
+        if loss_name not in cls.supported_losses():
+            raise Exception(f"CrossBatchMemory not supported for {loss_name}")
+
+    def forward(self, embeddings, labels, indices_tuple=None, enqueue_mask=None):
+        if indices_tuple is not None and enqueue_mask is not None:
+            raise ValueError("indices_tuple and enqueue_mask are mutually exclusive")
+        if enqueue_mask is not None:
+            assert len(enqueue_mask) == len(embeddings)
+        else:
+            assert len(embeddings) <= len(self.embedding_memory)
+        self.reset_stats()
+        device = embeddings.device
+        self.embedding_memory = c_f.to_device(
+            self.embedding_memory, device=device, dtype=embeddings.dtype
+        )
+
+        if enqueue_mask is not None:
+            emb_for_queue = embeddings[enqueue_mask]
+            labels_for_queue = labels[enqueue_mask]
+            embeddings = embeddings[~enqueue_mask]
+            labels = labels[~enqueue_mask]
+            do_remove_self_comparisons = False
+        else:
+            emb_for_queue = embeddings
+            labels_for_queue = labels
+            do_remove_self_comparisons = True
+
+        queue_batch_size = len(emb_for_queue)
+        self.add_to_memory(emb_for_queue, labels_for_queue, queue_batch_size)
+
+        if not self.has_been_filled:
+            E_mem = self.embedding_memory[: self.queue_idx]
+            L_mem = self.label_memory[: self.queue_idx]
+        else:
+            E_mem = self.embedding_memory
+            L_mem = self.label_memory
+        indices_tuple = self.create_indices_tuple(
+            embeddings,
+            labels,
+            E_mem,
+            L_mem,
+            indices_tuple,
+            do_remove_self_comparisons,
+        )
+        loss = self.loss(embeddings, labels, indices_tuple, E_mem, L_mem)
+        return loss
+
+    def add_to_memory(self, embeddings, labels, batch_size):
+        self.curr_batch_idx = (
+            torch.arange(
+                self.queue_idx, self.queue_idx + batch_size
+            )
+            % self.memory_size
+        )
+        self.embedding_memory[self.curr_batch_idx] = embeddings.detach()
+        # self.label_memory[self.curr_batch_idx] = labels
+        for i in range(len(self.curr_batch_idx)):
+            self.label_memory[self.curr_batch_idx[i]] = labels[i]
+        prev_queue_idx = self.queue_idx
+        self.queue_idx = (self.queue_idx + batch_size) % self.memory_size
+        if (not self.has_been_filled) and (self.queue_idx <= prev_queue_idx):
+            self.has_been_filled = True
+
+    def create_indices_tuple(
+        self,
+        embeddings,
+        labels,
+        E_mem,
+        L_mem,
+        input_indices_tuple,
+        do_remove_self_comparisons,
+    ):
+        if self.miner:
+            indices_tuple = self.miner(embeddings, labels, E_mem, L_mem)
+        else:
+            indices_tuple = mlmu.get_all_pairs_indices(labels, self.num_classes, L_mem)
+        if do_remove_self_comparisons:
+            indices_tuple = lmu.remove_self_comparisons(
+                indices_tuple, self.curr_batch_idx, self.memory_size
+            )
+
+        if input_indices_tuple is not None:
+            if len(input_indices_tuple) == 3 and len(indices_tuple) == 4:
+                input_indices_tuple = mlmu.convert_to_pairs(input_indices_tuple, labels, self.num_classes)
+            elif len(input_indices_tuple) == 4 and len(indices_tuple) == 3:
+                input_indices_tuple = mlmu.convert_to_triplets(
+                    input_indices_tuple, labels
+                )
+            indices_tuple = c_f.concatenate_indices_tuples(
+                indices_tuple, input_indices_tuple
+            )
+
+        return indices_tuple
+
+    def reset_queue(self):
+        self.register_buffer(
+            "embedding_memory", torch.zeros(self.memory_size, self.embedding_size)
+        )
+        self.label_memory = [[] for i in range(self.memory_size)]
+        self.has_been_filled = False
+        self.queue_idx = 0
diff --git a/src/pytorch_metric_learning/utils/multilabel_loss_and_miner_utils.py b/src/pytorch_metric_learning/utils/multilabel_loss_and_miner_utils.py
@@ -0,0 +1,101 @@
+import torch
+from . import loss_and_miner_utils as lmu
+
+def check_shapes_multilabels(embeddings, labels):
+    if labels is not None and embeddings.shape[0] != len(labels):
+        raise ValueError("Number of embeddings must equal number of labels")
+    if labels is not None:
+        if isinstance(labels[0], list) or isinstance(labels[0], torch.Tensor):
+            pass
+        else:
+            raise ValueError("labels must be a list of 1d tensors or a list of lists")
+
+def set_ref_emb(embeddings, labels, ref_emb, ref_labels):
+    if ref_emb is None:
+        ref_emb, ref_labels = embeddings, labels
+    check_shapes_multilabels(ref_emb, ref_labels)
+    return ref_emb, ref_labels
+
+def convert_to_pairs(indices_tuple, labels, num_classes, ref_labels=None, device=None):
+    """
+    This returns anchor-positive and anchor-negative indices,
+    regardless of what the input indices_tuple is
+    Args:
+        indices_tuple: tuple of tensors. Each tensor is 1d and specifies indices
+                        within a batch
+        labels: a tensor which has the label for each element in a batch
+    """
+    if indices_tuple is None:
+        return get_all_pairs_indices(labels, num_classes, ref_labels, device=device)
+    elif len(indices_tuple) == 4:
+        return indices_tuple
+    else:
+        a, p, n = indices_tuple
+        return a, p, a, n
+    
+def get_matches_and_diffs(labels, num_classes, ref_labels=None, device=None):
+    matches = jaccard(num_classes, labels, ref_labels, device=device)
+    diffs = matches ^ 1
+    if ref_labels is labels:
+        matches.fill_diagonal_(0)
+    return matches, diffs
+
+
+def get_all_pairs_indices(labels, num_classes, ref_labels=None, device=None):
+    """
+    Given a tensor of labels, this will return 4 tensors.
+    The first 2 tensors are the indices which form all positive pairs
+    The second 2 tensors are the indices which form all negative pairs
+    """
+    matches, diffs = get_matches_and_diffs(labels, num_classes, ref_labels, device)
+    a1_idx, p_idx = torch.where(matches)
+    a2_idx, n_idx = torch.where(diffs)
+    return a1_idx, p_idx, a2_idx, n_idx
+
+def jaccard(n_classes, labels, ref_labels=None, threshold=0.3, device=torch.device("cpu")):
+    if ref_labels is None:
+        ref_labels = labels
+    # convert multilabels to scatter labels
+    labels1 = [torch.nn.functional.one_hot(torch.Tensor(label).long(), n_classes).sum(0) for label in labels]
+    labels2 = [torch.nn.functional.one_hot(torch.Tensor(label).long(), n_classes).sum(0) for label in ref_labels]
+    # stack and convert to float for calculation convenience
+    labels1 = torch.stack(labels1).float()
+    labels2 = torch.stack(labels2).float()
+
+    # compute jaccard similarity
+    # jaccard = intersection / union 
+    labels1_union = labels1.sum(-1)
+    labels2_union = labels2.sum(-1)
+    union = labels1_union.unsqueeze(1) + labels2_union.unsqueeze(0)
+    intersection = torch.mm(labels1, labels2.T)
+    jaccard = intersection / (union - intersection)
+    
+    # return indices of jaccard similarity above threshold
+    label_matrix = torch.where(jaccard > threshold, 1, 0).to(device)
+    return label_matrix
+
+def convert_to_triplets(indices_tuple, labels, ref_labels=None, t_per_anchor=100):
+    """
+    This returns anchor-positive-negative triplets
+    regardless of what the input indices_tuple is
+    """
+    if indices_tuple is None:
+        if t_per_anchor == "all":
+            return get_all_triplets_indices(labels, ref_labels)
+        else:
+            return lmu.get_random_triplet_indices(
+                labels, ref_labels, t_per_anchor=t_per_anchor
+            )
+    elif len(indices_tuple) == 3:
+        return indices_tuple
+    else:
+        a1, p, a2, n = indices_tuple
+        p_idx, n_idx = torch.where(a1.unsqueeze(1) == a2)
+        return a1[p_idx], p[p_idx], n[n_idx]
+    
+
+def get_all_triplets_indices(labels, ref_labels=None):
+    matches, diffs = get_matches_and_diffs(labels, ref_labels)
+    triplets = matches.unsqueeze(2) * diffs.unsqueeze(1)
+    return torch.where(triplets)
+
diff --git a/tests/losses/test_multilabel_supcon_loss.py b/tests/losses/test_multilabel_supcon_loss.py
@@ -0,0 +1,40 @@
+import unittest
+
+import torch
+import numpy as np
+import random
+
+from pytorch_metric_learning.losses import (
+    MultiSupConLoss,
+    CrossBatchMemory4MultiLabel
+)
+
+class TestMultiSupConLoss(unittest.TestCase):
+    def test_multi_supcon_loss(self):
+        n_cls = 10
+        n_samples = 16
+        n_dim = 256
+        loss_func = MultiSupConLoss(num_classes=10)
+        xbm_loss_func = CrossBatchMemory4MultiLabel(loss_func, n_dim, memory_size=128)
+
+        # # test float32 and float64
+        # for dtype in [torch.float32, torch.float64]:
+        #     embeddings = torch.randn(n_samples, n_dim, dtype=dtype)
+        #     labels = [random.sample(range(n_cls), np.random.randint(1, 4)) for i in range(n_samples)]
+        #     loss = loss_func(embeddings, labels)
+        #     self.assertTrue(loss >= 0)
+
+        # # test cuda and cpu
+        # for device in [torch.device("cpu"),torch.device("cuda")]:
+        #     embeddings = torch.randn(n_samples, n_dim, dtype=dtype, device=device)
+        #     labels = [random.sample(range(n_cls), np.random.randint(1, 4)) for i in range(n_samples)]
+        #     loss = loss_func(embeddings, labels)
+        #     self.assertTrue(loss >= 0)
+
+        # test xbm
+        batchs = 10
+        for b in range(batchs):
+            embeddings = torch.randn(n_samples, n_dim, dtype=torch.float32)
+            labels = [random.sample(range(n_cls), np.random.randint(1, 4)) for i in range(n_samples)]
+            loss = xbm_loss_func(embeddings, labels)
+            self.assertTrue(loss >= 0)