Skip to content

Commit ebfae6f

Browse files
committed
Fix the "too many comparisons" issue (fixes #1)
1 parent 4b6329c commit ebfae6f

File tree

3 files changed

+75
-51
lines changed

3 files changed

+75
-51
lines changed

.vscode/settings.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
"xorshift"
1212
],
1313
"cSpell.ignoreWords": [
14+
"Demuth",
1415
"Durstenfeld",
1516
"Marsaglia"
1617
],

src/__tests__/merge-insertion.test.ts

Lines changed: 43 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ test('_binInsertIdx', async () => {
219219
expect(log).toStrictEqual([['O','H'],['O','L'],['O','N']])
220220
})
221221

222-
function testComp<T extends Comparable>(comp :Comparator<T>, maxCalls :number, log :[T,T][]|undefined = undefined) :Comparator<T> {
222+
function testComp<T extends Comparable>(c :Comparator<T>, maxCalls :number, log :[T,T][]|undefined = undefined) :Comparator<T> {
223223
let callCount = 0
224224
const pairMap :Map<T, Map<T, null>> = new Map()
225225
return async ([a,b]) => {
@@ -230,12 +230,10 @@ function testComp<T extends Comparable>(comp :Comparator<T>, maxCalls :number, l
230230
throw new Error(`duplicate comparison of '${a}' and '${b}'`)
231231
if (pairMap.has(a)) pairMap.get(a)?.set(b, null)
232232
else pairMap.set(a, new Map([[b, null]]))
233-
if (log!==undefined) log.push([a,b])
234233
if (++callCount > maxCalls)
235-
// See GH#1: I think the correct solution is optimizing my `_binInsert`?
236-
console.warn('too many Comparator calls',callCount)
237-
//TODO: Reenable: throw new Error(`too many Comparator calls (${callCount})`)
238-
return await comp([a,b])
234+
throw new Error(`too many Comparator calls (${callCount})`)
235+
if (log!==undefined) log.push([a,b])
236+
return await c([a,b])
239237
}
240238
}
241239

@@ -248,8 +246,8 @@ test('testComp', async () => {
248246
await expect( c(['y','x']) ).rejects.toThrow('duplicate comparison')
249247
await expect( c(['x','y']) ).rejects.toThrow('duplicate comparison')
250248
await expect( c(['x','z']) ).rejects.toThrow('duplicate comparison')
249+
await expect( c(['i','j']) ).rejects.toThrow('too many')
251250
expect(log).toStrictEqual([['x','y'],['x','z']])
252-
//TODO: Reenable: await expect( comp(['i','j']) ).rejects.toThrow('too many')
253251
})
254252

255253
test('xorshift32', () => {
@@ -287,32 +285,56 @@ describe('mergeInsertionSort', () => {
287285
await expect( mergeInsertionSort(['A','B','B'], comp) ).rejects.toThrow('duplicate')
288286
})
289287

290-
const lengthTable = Array.from({ length: 101 }, (_,i) => ({ len: i }))
291-
test.each(lengthTable)('array length $len', async ({len}) => {
288+
test('detailed comparisons', async () => {
289+
const log :[string,string][] = []
290+
expect( await mergeInsertionSort(['A','B','C','D','E'], testComp(comp, 7, log)) ).toStrictEqual(['A','B','C','D','E'])
291+
expect( log ).toStrictEqual([
292+
/* According to Knuth, the following is from: Demuth, H. B. (1956). Electronic Data Sorting [PhD thesis, Stanford University].
293+
* First, make and compare the pairs:
294+
* larger: B D (main chain)
295+
* smaller: A C leftover: E */
296+
['A','B'], ['C','D'],
297+
// Next, recursively sort the main chain
298+
['B','D'],
299+
/* Next, the smaller item of the first pair on the main chain can be moved to the main chain:
300+
* main chain: A B D
301+
* smaller: C leftover: E
302+
* And E can be inserted among A B D with two comparisons: */
303+
['E','B'],['E','D'],
304+
// Finally, C can be inserted into E A B, A E B, A B E, or A B (in our case the latter) with two more comparisons:
305+
['C','A'],['C','B'],
306+
])
307+
})
308+
309+
test.each( Array.from({ length: 101 }, (_, i) => ({ len: i })) )('array length $len', async ({len}) => {
292310
const array :Readonly<string[]> = Array.from({ length: len }, (_,i) => String.fromCharCode(65 + i))
293311
const a = Array.from(array)
294-
// in order array
295-
expect( await mergeInsertionSort(a, testComp(comp, mergeInsertionMaxComparisons(len))) ).toStrictEqual(array)
296-
// reverse order
297-
a.reverse()
298-
expect( await mergeInsertionSort(a, testComp(comp, mergeInsertionMaxComparisons(len))) ).toStrictEqual(array)
299-
// a few shuffles
300-
for (let i=0;i<10;i++) {
301-
fisherYates(a)
312+
try {
313+
// in order array
314+
expect( await mergeInsertionSort(a, testComp(comp, mergeInsertionMaxComparisons(len))) ).toStrictEqual(array)
315+
// reverse order
316+
a.reverse()
302317
expect( await mergeInsertionSort(a, testComp(comp, mergeInsertionMaxComparisons(len))) ).toStrictEqual(array)
318+
// a few shuffles
319+
for (let i=0;i<10;i++) {
320+
fisherYates(a)
321+
expect( await mergeInsertionSort(a, testComp(comp, mergeInsertionMaxComparisons(len))) ).toStrictEqual(array)
322+
}
323+
} catch (ex) {
324+
// eslint-disable-next-line @typescript-eslint/restrict-template-expressions
325+
throw new Error(`'${ex}' on ${a.join('')}`)
303326
}
304327
})
305328

306-
// 6! = 720, 7! = 5040, 8! = 40320, 9! = 362880 - already takes a fair amount of time, so don't increase this!
307-
const permTable = Array.from({ length: 8 }, (_,i) => ({ len: i }))
308-
test.each(permTable)('all permutations of length $len', async ({len}) => {
329+
// 6! = 720, 7! = 5040, 8! = 40320, 9! = 362880 - Be very careful with increasing this!
330+
test.each( Array.from({ length: 8 }, (_, i) => ({ len: i })) )('all perms of length $len', async ({len}) => {
309331
const array :Readonly<string[]> = Array.from({ length: len }, (_,i) => String.fromCharCode(65 + i))
310332
for (const perm of permutations(array))
311333
try {
312334
expect( await mergeInsertionSort(perm, testComp(comp, mergeInsertionMaxComparisons(len))) ).toStrictEqual(array)
313335
} catch (ex) {
314336
// eslint-disable-next-line @typescript-eslint/restrict-template-expressions
315-
throw new Error(`${ex} on ${perm.join('')}`)
337+
throw new Error(`'${ex}' on ${perm.join('')}`)
316338
}
317339
})
318340

src/merge-insertion.ts

Lines changed: 31 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,6 @@
3737
/** Turns on debugging output. */
3838
const DEBUG :boolean = false
3939

40-
/* istanbul ignore next */
41-
function assert(condition: unknown, msg?: string): asserts condition { if (!condition) throw new Error(msg) }
42-
4340
/** A type of object that can be compared by a `Comparator` and therefore sorted by `mergeInsertionSort`.
4441
* Must have sensible support for the equality operators. */
4542
export type Comparable = NonNullable<unknown>
@@ -100,7 +97,6 @@ export async function _binInsertIdx<T extends Comparable>(array :ReadonlyArray<T
10097
if (c) r = m - 1
10198
else l = m + 1
10299
}
103-
assert(l>=0 && l<=array.length) // paranoia
104100
/* istanbul ignore next */ if (DEBUG) console.debug('binary insert',item,'into',array,
105101
// eslint-disable-next-line @typescript-eslint/no-base-to-string, @typescript-eslint/restrict-template-expressions
106102
l===0?'at start':l===array.length?'at end':`before ${array[l]}`)
@@ -145,7 +141,7 @@ export default async function mergeInsertionSort<T extends Comparable>(array :Re
145141

146142
/* 4. Insert at the start of the sorted sequence the element that was paired with
147143
* the first and smallest element of the sorted sequence. */
148-
assert(mainChain.length>0) // Known due to the special cases at the beginning of this function.
144+
// Note that we know the main chain has at least one item here due to the special cases at the beginning of this function.
149145
mainChain.unshift({ item: mainChain[0]!.smaller! })
150146
delete mainChain[1]!.smaller
151147
/* istanbul ignore next */ if (DEBUG) console.debug('step 4: first pair', mainChain)
@@ -168,7 +164,8 @@ export default async function mergeInsertionSort<T extends Comparable>(array :Re
168164
* to step 4 above, the item that would have been labeled y₁ has actually already become element x₁, and
169165
* therefore the element that would have been x₁ is now x₂ and no longer has a paired yᵢ element. It
170166
* follows that the first paired elements are x₃ and y₃, and so the first unsorted element to be inserted
171-
* into the output sequence is y₃.
167+
* into the output sequence is y₃. Also noteworthy is that if the input had an odd number of elements,
168+
* the leftover unpaired element is treated as the last yᵢ element.
172169
*
173170
* ² In my opinion, this is lacking detail, and this seems to be true for the other two sources (Ford-Johnson
174171
* and Knuth) as well. So here is my attempt at adding more details to the explanation: The "main chain" is
@@ -177,39 +174,43 @@ export default async function mergeInsertionSort<T extends Comparable>(array :Re
177174
* with the various descriptions is that they don't explicitly explain that the insertion process shifts all
178175
* the indices of the array, and due to the nonlinear insertion order, this makes it tricky to keep track of
179176
* the correct array indices over which to perform the insertion search. So instead, below, I use a linear
180-
* search to find the main chain item being operated on each time, which is expensive, but much easier.
177+
* search to find the main chain item being operated on each time, which is expensive, but much easier. It
178+
* should also be noted that the leftover unpaired element, if there is one, gets inserted across the whole
179+
* main chain as it exists at the time of its insertion - because it may not be inserted last.
181180
*/
182181

183-
/* Build the groups to be inserted (explained above), skipping the already handled first two items.
184-
* (In the current implementation we don't need the original indices.) */
185-
const groups = _makeGroups(mainChain.slice(2)).map(g=>g[1])
182+
// Build the groups to be inserted (explained above), skipping the already handled first two items.
183+
const toInsert = mainChain.slice(2)
184+
/* If there was a leftover item from an odd input length, treat it as the last "smaller" item (special handling below).
185+
* We'll use the fact that at this point, all items in `toInsert` have their `.smaller` property set, so we'll mark
186+
* the leftover item as a special case by it not having its `.smaller` set. */
187+
if (array.length%2) toInsert.push({ item: array[array.length - 1]! })
188+
// In the current implementation we don't need the original indices.
189+
const groups = _makeGroups(toInsert).map(g=>g[1])
186190
/* istanbul ignore next */ if (DEBUG) console.debug('step pre5: groups',groups)
187191

188192
for (const pair of groups) {
189-
assert(pair.smaller!=undefined)
190-
// Locate the pair we're about to insert in the main chain, to limit the extent of the binary search (see also explanation above).
191-
const pairIdx = mainChain.findIndex(v => Object.is(v, pair))
192-
// Locate the index in the main chain where the pair's smaller item needs to be inserted, and insert it.
193-
const insertIdx = await _binInsertIdx(mainChain.slice(0,pairIdx).map(p => p.item), pair.smaller, comparator)
194-
/* istanbul ignore next */ if (DEBUG) console.debug('will insert',pair,'at index',insertIdx)
195-
mainChain.splice(insertIdx, 0, { item: pair.smaller })
193+
// Determine which item to insert and where.
194+
const [insertItem, insertIdx] :[T, number] = await (async () => {
195+
if (pair.smaller===undefined) // see explanation above
196+
// This is the leftover item, it gets inserted into the whole main chain.
197+
return [pair.item, await _binInsertIdx(mainChain.map(p => p.item), pair.item, comparator)]
198+
else {
199+
// Locate the pair we're about to insert in the main chain, to limit the extent of the binary search (see also explanation above).
200+
const pairIdx = mainChain.findIndex(v => Object.is(v, pair))
201+
// Locate the index in the main chain where the pair's smaller item needs to be inserted, and insert it.
202+
return [pair.smaller, await _binInsertIdx(mainChain.slice(0,pairIdx).map(p => p.item), pair.smaller, comparator)]
203+
}
204+
})()
205+
// Actually do the insertion.
206+
mainChain.splice(insertIdx, 0, { item: insertItem })
196207
delete pair.smaller
197-
/* istanbul ignore next */ if (DEBUG) console.debug('main chain is now',mainChain)
208+
/* istanbul ignore next */ if (DEBUG) console.debug('inserted',insertItem,'at index',insertIdx,'main chain is now',mainChain)
198209
}
199210

211+
/* istanbul ignore next */ if (DEBUG) console.debug('fordJohnson done', mainChain)
200212
// Turn the "main chain" data structure back into an array of values.
201-
const results = mainChain.map( pair => { assert(pair.smaller===undefined); return pair.item } )
202-
203-
// If there was a leftover item from an odd input length, insert that last.
204-
if (array.length%2) {
205-
const item = array[array.length-1]!
206-
const idx = await _binInsertIdx(results, item, comparator)
207-
/* istanbul ignore next */ if (DEBUG) console.debug('inserting odd item',item,'at',idx)
208-
results.splice(idx, 0, item)
209-
}
210-
211-
/* istanbul ignore next */ if (DEBUG) console.debug('fordJohnson done', results)
212-
return results
213+
return mainChain.map( pair => pair.item )
213214
}
214215

215216
/** Returns the maximum number of comparisons that `mergeInsertionSort` will perform depending on the input length `n`.

0 commit comments

Comments
 (0)