diff --git a/pkgs/collection/CHANGELOG.md b/pkgs/collection/CHANGELOG.md index 743fddf2..3063b309 100644 --- a/pkgs/collection/CHANGELOG.md +++ b/pkgs/collection/CHANGELOG.md @@ -9,6 +9,8 @@ - Add `PriorityQueue.of` constructor and optimize adding many elements. - Address diagnostics from `strict_top_level_inference`. - Run `dart format` with the new style. +- Replace `quickSort` implementation with a more performant and robust + Pattern-defeating Quicksort (pdqsort) algorithm. ## 1.19.1 diff --git a/pkgs/collection/benchmark/dataset_generator.dart b/pkgs/collection/benchmark/dataset_generator.dart new file mode 100644 index 00000000..b51c07f5 --- /dev/null +++ b/pkgs/collection/benchmark/dataset_generator.dart @@ -0,0 +1,54 @@ +// Copyright (c) 2025, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +/// Centralized generation of datasets for all benchmarks. +/// +/// Ensures all algorithms are tested on the exact same data. +library; + + +import 'dart:math'; + +const size = 50000; +const count = 128; // Number of lists to cycle through. + +final List> random = _generateRandom(); +final List> sorted = _generateSorted(); +final List> reverse = _generateReverse(); +final List> fewUnique = _generateFewUnique(); +final List> pathological = _generatePathological(); + +List> _generateRandom() { + final r = Random(12345); + return List.generate( + count, (_) => List.generate(size, (_) => r.nextInt(2000))); +} + +List> _generateSorted() { + final base = List.generate(size, (i) => i); + return List.generate(count, (_) => List.from(base)); +} + +List> _generateReverse() { + final base = List.generate(size, (i) => size - 1 - i); + return List.generate(count, (_) => List.from(base)); +} + +List> _generateFewUnique() { + final r = Random(67890); + return List.generate(count, (_) => List.generate(size, (_) => r.nextInt(7))); +} + +List> _generatePathological() { + final base = List.generate(size, (i) => i); + // Creates a "V-shape" or "organ pipe" array that can be challenging + // for quicksort implementations by promoting unbalanced partitions. + final pathological = [ + for (int i = 0; i < size; i++) + if (i.isEven) base[i], + for (int i = size - 1; i > 0; i--) + if (i.isOdd) base[i], + ]; + return List.generate(count, (_) => List.from(pathological)); +} \ No newline at end of file diff --git a/pkgs/collection/benchmark/sort_benchmark.dart b/pkgs/collection/benchmark/sort_benchmark.dart new file mode 100644 index 00000000..b4acd587 --- /dev/null +++ b/pkgs/collection/benchmark/sort_benchmark.dart @@ -0,0 +1,372 @@ +// Copyright (c) 2025, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import 'dart:math'; + +import 'package:benchmark_harness/benchmark_harness.dart'; +import 'package:collection/src/algorithms.dart' show quickSort; +import 'package:collection/src/utils.dart'; + +import 'dataset_generator.dart' as dataset_generator; + +// Sink variable to prevent the compiler from optimizing away benchmark code. +int sink = 0; + +/// The final aggregated result of a benchmark. +class BenchmarkResult { + final double mean; + final int median; + BenchmarkResult(this.mean, this.median); +} + +/// A base class for our sort benchmarks to reduce boilerplate. +/// Note: We extend `BenchmarkBase` for its structure but will use our own +/// timing. +abstract class SortBenchmarkBase extends BenchmarkBase { + final List> datasets; + int _iteration = 0; + int _checksum = 0; + + SortBenchmarkBase(super.name, this.datasets); + + List getNextList() { + // Cloning the list is crucial so each run sorts an unsorted list. + return List.from(datasets[_iteration++ % datasets.length]); + } + + void updateChecksum(List list) { + // A simple checksum to ensure the list is used and not optimized away. + sink ^= list.first ^ list.last ^ list[list.length >> 1] ^ _checksum++; + } + + /// The core operation to be benchmarked. + void performSort(); + + @override + void run() => performSort(); +} + +// --- Benchmark Classes --- + +// Baseline (Old SDK quickSort) +class QuickSortBaselineRandomBenchmark extends SortBenchmarkBase { + QuickSortBaselineRandomBenchmark() + : super('Baseline.Random', dataset_generator.random); + @override + void performSort() { + final list = getNextList(); + quickSortBaseline(list, (a, b) => a.compareTo(b)); + updateChecksum(list); + } +} + +class QuickSortBaselineSortedBenchmark extends SortBenchmarkBase { + QuickSortBaselineSortedBenchmark() + : super('Baseline.Sorted', dataset_generator.sorted); + @override + void performSort() { + final list = getNextList(); + quickSortBaseline(list, (a, b) => a.compareTo(b)); + updateChecksum(list); + } +} + +class QuickSortBaselineReverseBenchmark extends SortBenchmarkBase { + QuickSortBaselineReverseBenchmark() + : super('Baseline.Reverse', dataset_generator.reverse); + @override + void performSort() { + final list = getNextList(); + quickSortBaseline(list, (a, b) => a.compareTo(b)); + updateChecksum(list); + } +} + +class QuickSortBaselineFewUniqueBenchmark extends SortBenchmarkBase { + QuickSortBaselineFewUniqueBenchmark() + : super('Baseline.FewUnique', dataset_generator.fewUnique); + @override + void performSort() { + final list = getNextList(); + quickSortBaseline(list, (a, b) => a.compareTo(b)); + updateChecksum(list); + } +} + +class QuickSortBaselinePathologicalBenchmark extends SortBenchmarkBase { + QuickSortBaselinePathologicalBenchmark() + : super('Baseline.Pathological', dataset_generator.pathological); + @override + void performSort() { + final list = getNextList(); + quickSortBaseline(list, (a, b) => a.compareTo(b)); + updateChecksum(list); + } +} + +// Enhancement (New pdqsort) +class PdqSortEnhancementRandomBenchmark extends SortBenchmarkBase { + PdqSortEnhancementRandomBenchmark() + : super('Enhancement.Random', dataset_generator.random); + @override + void performSort() { + final list = getNextList(); + quickSort(list, (a, b) => a.compareTo(b)); + updateChecksum(list); + } +} + +class PdqSortEnhancementSortedBenchmark extends SortBenchmarkBase { + PdqSortEnhancementSortedBenchmark() + : super('Enhancement.Sorted', dataset_generator.sorted); + @override + void performSort() { + final list = getNextList(); + quickSort(list, (a, b) => a.compareTo(b)); + updateChecksum(list); + } +} + +class PdqSortEnhancementReverseBenchmark extends SortBenchmarkBase { + PdqSortEnhancementReverseBenchmark() + : super('Enhancement.Reverse', dataset_generator.reverse); + @override + void performSort() { + final list = getNextList(); + quickSort(list, (a, b) => a.compareTo(b)); + updateChecksum(list); + } +} + +class PdqSortEnhancementFewUniqueBenchmark extends SortBenchmarkBase { + PdqSortEnhancementFewUniqueBenchmark() + : super('Enhancement.FewUnique', dataset_generator.fewUnique); + @override + void performSort() { + final list = getNextList(); + quickSort(list, (a, b) => a.compareTo(b)); + updateChecksum(list); + } +} + +class PdqSortEnhancementPathologicalBenchmark extends SortBenchmarkBase { + PdqSortEnhancementPathologicalBenchmark() + : super('Enhancement.Pathological', dataset_generator.pathological); + @override + void performSort() { + final list = getNextList(); + quickSort(list, (a, b) => a.compareTo(b)); + updateChecksum(list); + } +} + +// --- Main Execution Logic --- + +void main() { + const samples = 12; + + final benchmarks = [ + ( + 'Random', + QuickSortBaselineRandomBenchmark(), + PdqSortEnhancementRandomBenchmark() + ), + ( + 'Sorted', + QuickSortBaselineSortedBenchmark(), + PdqSortEnhancementSortedBenchmark() + ), + ( + 'Reverse Sorted', + QuickSortBaselineReverseBenchmark(), + PdqSortEnhancementReverseBenchmark() + ), + ( + 'Few Unique', + QuickSortBaselineFewUniqueBenchmark(), + PdqSortEnhancementFewUniqueBenchmark() + ), + ( + 'Pathological', + QuickSortBaselinePathologicalBenchmark(), + PdqSortEnhancementPathologicalBenchmark() + ), + ]; + + final results = {}; + + print('Running benchmarks ($samples samples each)...'); + for (final (condition, baseline, enhancement) in benchmarks) { + final baselineResult = _runBenchmark(baseline, samples); + final enhancementResult = _runBenchmark(enhancement, samples); + results[condition] = (baselineResult, enhancementResult); + } + + _printResultsAsMarkdownTable(results); +} + +BenchmarkResult _runBenchmark(SortBenchmarkBase benchmark, int samples) { + final times = []; + // Warmup run (not timed). + benchmark.run(); + for (var i = 0; i < samples; i++) { + final stopwatch = Stopwatch()..start(); + benchmark.run(); + stopwatch.stop(); + times.add(stopwatch.elapsedMicroseconds); + } + times.sort(); + final mean = times.reduce((a, b) => a + b) / samples; + final median = times[samples >> 1]; + return BenchmarkResult(mean, median); +} + +void _printResultsAsMarkdownTable( + Map results) { + final separator = '=' * 80; + print('\n$separator'); + print( + 'Benchmark Results: pdqsort (Enhancement) vs. SDK quickSort (Baseline)'); + print(separator); + print( + '''| Data Condition | Baseline (µs) | Enhancement (µs) | Improvement | Winner |'''); + print( + '''| :------------------ | :------------ | :--------------- | :---------- | :------------ |'''); + print( + '''| *Mean* | | | | |'''); + + for (final entry in results.entries) { + final condition = entry.key; + final (baseline, enhancement) = entry.value; + + final improvement = + (baseline.mean - enhancement.mean) / baseline.mean * 100; + final winner = improvement > 0 ? 'Enhancement 🚀' : 'Baseline'; + final improvementString = + '${improvement > 0 ? '+' : ''}${improvement.toStringAsFixed(2)}%'; + + final baselineMean = baseline.mean.round(); + final enhancementMean = enhancement.mean.round(); + + print( + '''| ${condition.padRight(19)} | ${baselineMean.toString().padLeft(13)} | ${enhancementMean.toString().padLeft(16)} | ${improvementString.padLeft(11)} | $winner |'''); + } + + print( + '''| *Median* | | | | |'''); + + for (final entry in results.entries) { + final condition = entry.key; + final (baseline, enhancement) = entry.value; + + final improvement = + (baseline.median - enhancement.median) / baseline.median * 100; + final winner = improvement > 0 ? 'Enhancement 🚀' : 'Baseline'; + final improvementString = + '${improvement > 0 ? '+' : ''}${improvement.toStringAsFixed(2)}%'; + + // No rounding needed for median as it's an int. + final baselineMedian = baseline.median; + final enhancementMedian = enhancement.median; + + print( + '''| ${condition.padRight(19)} | ${baselineMedian.toString().padLeft(13)} | ${enhancementMedian.toString().padLeft(16)} | ${improvementString.padLeft(11)} | $winner |'''); + } + + print(separator); +} + +// =========================================================================== +// BASELINE IMPLEMENTATION +// A direct copy of the original SDK quickSort, renamed to avoid conflicts. +// =========================================================================== + +void quickSortBaseline( + List elements, + int Function(E a, E b) compare, [ + int start = 0, + int? end, +]) { + end = RangeError.checkValidRange(start, end, elements.length); + _quickSortBaseline(elements, identity, compare, Random(), start, end); +} + +void _quickSortBaseline( + List list, + K Function(E element) keyOf, + int Function(K a, K b) compare, + Random random, + int start, + int end, +) { + const minQuickSortLength = 32; + var length = end - start; + while (length >= minQuickSortLength) { + var pivotIndex = random.nextInt(length) + start; + var pivot = list[pivotIndex]; + var pivotKey = keyOf(pivot); + var endSmaller = start; + var startGreater = end; + var startPivots = end - 1; + list[pivotIndex] = list[startPivots]; + list[startPivots] = pivot; + while (endSmaller < startPivots) { + var current = list[endSmaller]; + var relation = compare(keyOf(current), pivotKey); + if (relation < 0) { + endSmaller++; + } else { + startPivots--; + var currentTarget = startPivots; + list[endSmaller] = list[startPivots]; + if (relation > 0) { + startGreater--; + currentTarget = startGreater; + list[startPivots] = list[startGreater]; + } + list[currentTarget] = current; + } + } + if (endSmaller - start < end - startGreater) { + _quickSortBaseline(list, keyOf, compare, random, start, endSmaller); + start = startGreater; + } else { + _quickSortBaseline(list, keyOf, compare, random, startGreater, end); + end = endSmaller; + } + length = end - start; + } + _movingInsertionSortBaseline( + list, keyOf, compare, start, end, list, start); +} + +void _movingInsertionSortBaseline( + List list, + K Function(E element) keyOf, + int Function(K, K) compare, + int start, + int end, + List target, + int targetOffset, +) { + var length = end - start; + if (length == 0) return; + target[targetOffset] = list[start]; + for (var i = 1; i < length; i++) { + var element = list[start + i]; + var elementKey = keyOf(element); + var min = targetOffset; + var max = targetOffset + i; + while (min < max) { + var mid = min + ((max - min) >> 1); + if (compare(elementKey, keyOf(target[mid])) < 0) { + max = mid; + } else { + min = mid + 1; + } + } + target.setRange(min + 1, targetOffset + i + 1, target, min); + target[min] = element; + } +} \ No newline at end of file diff --git a/pkgs/collection/lib/src/algorithms.dart b/pkgs/collection/lib/src/algorithms.dart index 88d1c4f8..652bdd4e 100644 --- a/pkgs/collection/lib/src/algorithms.dart +++ b/pkgs/collection/lib/src/algorithms.dart @@ -5,7 +5,7 @@ /// A selection of data manipulation algorithms. library; -import 'dart:math' show Random; +import 'dart:math' show Random, ln2, log; import 'utils.dart'; @@ -482,29 +482,42 @@ void _merge( ); } -/// Sort [elements] using a quick-sort algorithm. +// --------------------------------------------------------------------------- +// QuickSort based on Pattern-defeating Quicksort (pdqsort). +// --------------------------------------------------------------------------- + +/// Sorts a list between [start] (inclusive) and [end] (exclusive). /// -/// The elements are compared using [compare] on the elements. -/// If [start] and [end] are provided, only that range is sorted. +/// The sorting algorithm is a Pattern-defeating Quicksort (pdqsort), a +/// hybrid of Quicksort, Heapsort, and Insertion Sort. +/// It is not stable, but is typically very fast. /// -/// Uses insertion sort for smaller sublists. +/// This implementation is highly efficient for common data patterns +/// (such as sorted, reverse-sorted, or with few unique values) and has a +/// guaranteed worst-case time complexity of O(n*log(n)). +/// +/// For a stable sort, use [mergeSort]. void quickSort( List elements, int Function(E a, E b) compare, [ int start = 0, int? end, ]) { - end = RangeError.checkValidRange(start, end, elements.length); - _quickSort(elements, identity, compare, Random(), start, end); + quickSortBy(elements, identity, compare, start, end); } -/// Sort [list] using a quick-sort algorithm. +/// Sorts a list between [start] (inclusive) and [end] (exclusive) by key. /// -/// The elements are compared using [compare] on the value provided by [keyOf] -/// on the element. -/// If [start] and [end] are provided, only that range is sorted. +/// The sorting algorithm is a Pattern-defeating Quicksort (pdqsort), a +/// hybrid of Quicksort, Heapsort, and Insertion Sort. +/// It is not stable, but is typically very fast. /// -/// Uses insertion sort for smaller sublists. +/// This implementation is highly efficient for common data patterns +/// (such as sorted, reverse-sorted, or with few unique values) and has a +/// guaranteed worst-case time complexity of O(n*log(n)). +/// +/// Elements are ordered by the [compare] function applied to the result of +/// the [keyOf] function. For a stable sort, use [mergeSortBy]. void quickSortBy( List list, K Function(E element) keyOf, @@ -513,53 +526,169 @@ void quickSortBy( int? end, ]) { end = RangeError.checkValidRange(start, end, list.length); - _quickSort(list, keyOf, compare, Random(), start, end); + final length = end - start; + if (length < 2) return; + _pdqSortByImpl(list, keyOf, compare, start, end, _log2(length)); } -void _quickSort( - List list, - K Function(E element) keyOf, - int Function(K a, K b) compare, - Random random, - int start, - int end, -) { - const minQuickSortLength = 24; - var length = end - start; - while (length >= minQuickSortLength) { - var pivotIndex = random.nextInt(length) + start; - var pivot = list[pivotIndex]; - var pivotKey = keyOf(pivot); - var endSmaller = start; - var startGreater = end; - var startPivots = end - 1; - list[pivotIndex] = list[startPivots]; - list[startPivots] = pivot; - while (endSmaller < startPivots) { - var current = list[endSmaller]; - var relation = compare(keyOf(current), pivotKey); - if (relation < 0) { - endSmaller++; +/// Minimum list size below which pdqsort uses insertion sort. +const int _pdqInsertionSortThreshold = 24; + +/// Computes the base-2 logarithm of [n]. +int _log2(int n) => n == 0 ? 0 : (log(n) / ln2).floor(); + +/// Swaps the elements at positions [i] and [j] in [elements]. +void _pdqSwap(List elements, int i, int j) { + final temp = elements[i]; + elements[i] = elements[j]; + elements[j] = temp; +} + +/// A simple, non-binary insertion sort for the base case of pdqsort. +void _pdqInsertionSort(List elements, K Function(E) keyOf, + int Function(K, K) compare, int start, int end) { + for (var i = start + 1; i < end; i++) { + final current = elements[i]; + final key = keyOf(current); + var j = i - 1; + while (j >= start && compare(keyOf(elements[j]), key) > 0) { + elements[j + 1] = elements[j]; + j--; + } + elements[j + 1] = current; + } +} + +/// Heapsort implementation for the fallback case of pdqsort. +void _pdqHeapSort(List elements, K Function(E) keyOf, + int Function(K, K) compare, int start, int end) { + final n = end - start; + for (var i = n ~/ 2 - 1; i >= 0; i--) { + _pdqSiftDown(elements, keyOf, compare, i, n, start); + } + for (var i = n - 1; i > 0; i--) { + _pdqSwap(elements, start, start + i); + _pdqSiftDown(elements, keyOf, compare, 0, i, start); + } +} + +/// Sift-down operation for the heapsort fallback. +void _pdqSiftDown(List elements, K Function(E) keyOf, + int Function(K, K) compare, int i, int n, int start) { + var root = i; + while (true) { + final left = 2 * root + 1; + if (left >= n) break; // Root is a leaf. + + var largest = root; + var largestKey = keyOf(elements[start + largest]); + + // Compare with left child. + var child = left; + var childKey = keyOf(elements[start + child]); + if (compare(largestKey, childKey) < 0) { + largest = child; + largestKey = childKey; + } + + // Compare with right child if it exists. + child = left + 1; + if (child < n) { + childKey = keyOf(elements[start + child]); + if (compare(largestKey, childKey) < 0) { + largest = child; + largestKey = childKey; + } + } + + if (largest == root) break; + + _pdqSwap(elements, start + root, start + largest); + root = largest; + } +} + +/// Sorts three elements at indices [a], [b], and [c]. +void _pdqSort3(List elements, K Function(E) keyOf, + int Function(K, K) compare, int a, int b, int c) { + if (compare(keyOf(elements[a]), keyOf(elements[b])) > 0) { + _pdqSwap(elements, a, b); + } + if (compare(keyOf(elements[b]), keyOf(elements[c])) > 0) { + _pdqSwap(elements, b, c); + if (compare(keyOf(elements[a]), keyOf(elements[b])) > 0) { + _pdqSwap(elements, a, b); + } + } +} + +/// The core implementation of Pattern-defeating Quicksort. +/// +/// [badAllowed] tracks how many bad pivot selections are allowed before +/// falling back to heap sort. +void _pdqSortByImpl(List elements, K Function(E) keyOf, + int Function(K, K) compare, int start, int end, int badAllowed) { + while (true) { + final size = end - start; + if (size < _pdqInsertionSortThreshold) { + _pdqInsertionSort(elements, keyOf, compare, start, end); + return; + } + + if (badAllowed == 0) { + _pdqHeapSort(elements, keyOf, compare, start, end); + return; + } + + final mid = start + size ~/ 2; + if (size > 80) { + // Ninther pivot selection for large arrays. + final s = size ~/ 8; + _pdqSort3(elements, keyOf, compare, start, start + s, start + 2 * s); + _pdqSort3(elements, keyOf, compare, mid - s, mid, mid + s); + _pdqSort3( + elements, keyOf, compare, end - 1 - 2 * s, end - 1 - s, end - 1); + _pdqSort3(elements, keyOf, compare, start + s, mid, end - 1 - s); + } else { + // Median-of-three for smaller arrays. + _pdqSort3(elements, keyOf, compare, start, mid, end - 1); + } + + // 3-Way Partitioning (Dutch National Flag). + _pdqSwap(elements, start, mid); + final pivotKey = keyOf(elements[start]); + + var less = start; + var equal = start; + var greater = end; + + while (equal < greater) { + var comparison = compare(keyOf(elements[equal]), pivotKey); + if (comparison < 0) { + _pdqSwap(elements, less++, equal++); + } else if (comparison > 0) { + greater--; + _pdqSwap(elements, equal, greater); } else { - startPivots--; - var currentTarget = startPivots; - list[endSmaller] = list[startPivots]; - if (relation > 0) { - startGreater--; - currentTarget = startGreater; - list[startPivots] = list[startGreater]; - } - list[currentTarget] = current; + equal++; } } - if (endSmaller - start < end - startGreater) { - _quickSort(list, keyOf, compare, random, start, endSmaller); - start = startGreater; + + final leftSize = less - start; + final rightSize = end - greater; + + // Detect highly unbalanced partitions and decrement badAllowed. + if (leftSize < size ~/ 8 || rightSize < size ~/ 8) { + badAllowed--; + } + + // Recurse on the smaller partition first to keep stack depth low. + if (leftSize < rightSize) { + _pdqSortByImpl(elements, keyOf, compare, start, less, badAllowed); + start = greater; // Tail-call optimization on the larger partition } else { - _quickSort(list, keyOf, compare, random, startGreater, end); - end = endSmaller; + _pdqSortByImpl(elements, keyOf, compare, greater, end, badAllowed); + end = less; // Tail-call optimization on the larger partition } - length = end - start; } - _movingInsertionSort(list, keyOf, compare, start, end, list, start); }