@@ -596,29 +596,67 @@ delete = go
596596{- -------------------------------------------------------------------
597597 Subset
598598--------------------------------------------------------------------}
599- -- | /O(n+m)/. Is this a proper subset? (ie. a subset but not equal).
599+ -- | /O(m*log(n\/m + 1)), m <= n/.
600+ -- @(s1 \`isProperSubsetOf\` s2)@ indicates whether @s1@ is a
601+ -- proper subset of @s2@.
602+ --
603+ -- @
604+ -- s1 \`isProperSubsetOf\` s2 = s1 ``isSubsetOf`` s2 && s1 /= s2
605+ -- @
600606isProperSubsetOf :: Ord a => Set a -> Set a -> Bool
601607isProperSubsetOf s1 s2
602- = ( size s1 < size s2) && (isSubsetOf s1 s2)
608+ = size s1 < size s2 && isSubsetOfX s1 s2
603609#if __GLASGOW_HASKELL__
604610{-# INLINABLE isProperSubsetOf #-}
605611#endif
606612
607613
608- -- | /O(n+m)/. Is this a subset?
609- -- @(s1 \`isSubsetOf\` s2)@ tells whether @s1@ is a subset of @s2@.
614+ -- | /O(m*log(n\/m + 1)), m <= n/.
615+ -- @(s1 \`isSubsetOf\` s2)@ indicates whether @s1@ is a subset of @s2@.
616+ --
617+ -- @
618+ -- s1 \`isSubsetOf\` s2 = all (``member`` s2) s1
619+ -- s1 \`isSubsetOf\` s2 = null (s1 ``difference`` s2)
620+ -- s1 \`isSubsetOf\` s2 = s1 ``union`` s2 == s2
621+ -- s1 \`isSubsetOf\` s2 = s1 ``intersection`` s2 == s1
622+ -- @
610623isSubsetOf :: Ord a => Set a -> Set a -> Bool
611624isSubsetOf t1 t2
612- = ( size t1 <= size t2) && ( isSubsetOfX t1 t2)
625+ = size t1 <= size t2 && isSubsetOfX t1 t2
613626#if __GLASGOW_HASKELL__
614627{-# INLINABLE isSubsetOf #-}
615628#endif
616629
630+ -- Test whether a set is a subset of another without the *initial*
631+ -- size test.
632+ --
633+ -- This function is structured very much like `difference`, `union`,
634+ -- and `intersection`. Whereas the bounds proofs for those in Blelloch
635+ -- et al needed to accound for both "split work" and "merge work", we
636+ -- only have to worry about split work here, which is the same as in
637+ -- those functions.
617638isSubsetOfX :: Ord a => Set a -> Set a -> Bool
618639isSubsetOfX Tip _ = True
619640isSubsetOfX _ Tip = False
641+ -- Skip the final split when we hit a singleton.
642+ isSubsetOfX (Bin 1 x _ _) t = member x t
620643isSubsetOfX (Bin _ x l r) t
621- = found && isSubsetOfX l lt && isSubsetOfX r gt
644+ = found &&
645+ -- Cheap size checks can sometimes save expensive recursive calls when the
646+ -- result will be False. Suppose we check whether [1..10] (with root 4) is
647+ -- a subset of [0..9]. After the first split, we have to check if [1..3] is
648+ -- a subset of [0..3] and if [5..10] is a subset of [5..9]. But we can bail
649+ -- immediately because size [5..10] > size [5..9].
650+ --
651+ -- Why not just call `isSubsetOf` on each side to do the size checks?
652+ -- Because that could make a recursive call on the left even though the
653+ -- size check would fail on the right. In principle, we could take this to
654+ -- extremes by maintaining a queue of pairs of sets to be checked, working
655+ -- through the tree level-wise. But that would impose higher administrative
656+ -- costs without obvious benefits. It might be worth considering if we find
657+ -- a way to use it to tighten the bounds in some useful/comprehensible way.
658+ size l <= size lt && size r <= size gt &&
659+ isSubsetOfX l lt && isSubsetOfX r gt
622660 where
623661 (lt,found,gt) = splitMember x t
624662#if __GLASGOW_HASKELL__
@@ -628,19 +666,25 @@ isSubsetOfX (Bin _ x l r) t
628666{- -------------------------------------------------------------------
629667 Disjoint
630668--------------------------------------------------------------------}
631- -- | /O(n+m) /. Check whether two sets are disjoint (i.e. their intersection
632- -- is empty).
669+ -- | /O(m*log(n\/m + 1)), m <= n /. Check whether two sets are disjoint
670+ -- (i.e., their intersection is empty).
633671--
634672-- > disjoint (fromList [2,4,6]) (fromList [1,3]) == True
635673-- > disjoint (fromList [2,4,6,8]) (fromList [2,3,5,7]) == False
636674-- > disjoint (fromList [1,2]) (fromList [1,2,3,4]) == False
637675-- > disjoint (fromList []) (fromList []) == True
638676--
677+ -- @
678+ -- xs ``disjoint`` ys = null (xs ``intersection`` ys)
679+ -- @
680+ --
639681-- @since 0.5.11
640682
641683disjoint :: Ord a => Set a -> Set a -> Bool
642684disjoint Tip _ = True
643685disjoint _ Tip = True
686+ -- Avoid a split for the singleton case.
687+ disjoint (Bin 1 x _ _) t = x `notMember` t
644688disjoint (Bin _ x l r) t
645689 -- Analogous implementation to `subsetOfX`
646690 = not found && disjoint l lt && disjoint r gt
0 commit comments