@@ -5,8 +5,12 @@ open GraphBLAS.FSharp.Backend.Quotes
55open Microsoft.FSharp .Control
66open Microsoft.FSharp .Quotations
77open GraphBLAS.FSharp .Backend .Objects .ClContext
8+ open GraphBLAS.FSharp .Backend .Objects .ArraysExtensions
89
910module Reduce =
11+ /// <summary>
12+ /// Generalized reduction pattern.
13+ /// </summary>
1014 let private runGeneral ( clContext : ClContext ) workGroupSize scan scanToCell =
1115
1216 fun ( processor : MailboxProcessor < _ >) ( inputArray : ClArray < 'a >) ->
@@ -45,8 +49,8 @@ module Reduce =
4549 let result =
4650 scanToCell processor fstVertices verticesLength
4751
48- processor.Post ( Msg.CreateFreeMsg ( firstVerticesArray))
49- processor.Post ( Msg.CreateFreeMsg ( secondVerticesArray))
52+ firstVerticesArray.Free processor
53+ secondVerticesArray.Free processor
5054
5155 result
5256
@@ -127,6 +131,13 @@ module Reduce =
127131
128132 resultCell
129133
134+ /// <summary>
135+ /// Summarize array elements.
136+ /// </summary>
137+ /// <param name="clContext">ClContext.</param>
138+ /// <param name="workGroupSize">Work group size.</param>
139+ /// <param name="op">Summation operation.</param>
140+ /// <param name="zero">Neutral element for summation.</param>
130141 let sum ( clContext : ClContext ) workGroupSize op zero =
131142
132143 let scan = scanSum clContext workGroupSize op zero
@@ -224,6 +235,12 @@ module Reduce =
224235
225236 resultCell
226237
238+ /// <summary>
239+ /// Reduce an array of values.
240+ /// </summary>
241+ /// <param name="clContext">ClContext.</param>
242+ /// <param name="workGroupSize">Work group size.</param>
243+ /// <param name="op">Reduction operation.</param>
227244 let reduce ( clContext : ClContext ) workGroupSize op =
228245
229246 let scan = scanReduce clContext workGroupSize op
@@ -235,3 +252,221 @@ module Reduce =
235252 runGeneral clContext workGroupSize scan scanToCell
236253
237254 fun ( processor : MailboxProcessor < _ >) ( array : ClArray < 'a >) -> run processor array
255+
256+ /// <summary>
257+ /// Reduction of an array of values by an array of keys.
258+ /// </summary>
259+ module ByKey =
260+ /// <summary>
261+ /// Reduce an array of values by key using a single work item.
262+ /// </summary>
263+ /// <param name="clContext">ClContext.</param>
264+ /// <param name="workGroupSize">Work group size.</param>
265+ /// <param name="reduceOp">Operation for reducing values.</param>
266+ /// <remarks>
267+ /// The length of the result must be calculated in advance.
268+ /// </remarks>
269+ let sequential ( clContext : ClContext ) workGroupSize ( reduceOp : Expr < 'a -> 'a -> 'a >) =
270+
271+ let kernel =
272+ <@ fun ( ndRange : Range1D ) length ( keys : ClArray < int >) ( values : ClArray < 'a >) ( reducedValues : ClArray < 'a >) ( reducedKeys : ClArray < int >) ->
273+
274+ let gid = ndRange.GlobalID0
275+
276+ if gid = 0 then
277+ let mutable currentKey = keys.[ 0 ]
278+ let mutable segmentResult = values.[ 0 ]
279+ let mutable segmentCount = 0
280+
281+ for i in 1 .. length - 1 do
282+ if currentKey = keys.[ i] then
283+ segmentResult <- (% reduceOp) segmentResult values.[ i]
284+ else
285+ reducedValues.[ segmentCount] <- segmentResult
286+ reducedKeys.[ segmentCount] <- currentKey
287+
288+ segmentCount <- segmentCount + 1
289+ currentKey <- keys.[ i]
290+ segmentResult <- values.[ i]
291+
292+ reducedKeys.[ segmentCount] <- currentKey
293+ reducedValues.[ segmentCount] <- segmentResult @>
294+
295+ let kernel = clContext.Compile kernel
296+
297+ fun ( processor : MailboxProcessor < _ >) allocationMode ( resultLength : int ) ( keys : ClArray < int >) ( values : ClArray < 'a >) ->
298+
299+ let reducedValues =
300+ clContext.CreateClArrayWithSpecificAllocationMode( allocationMode, resultLength)
301+
302+ let reducedKeys =
303+ clContext.CreateClArrayWithSpecificAllocationMode( allocationMode, resultLength)
304+
305+ let ndRange =
306+ Range1D.CreateValid( resultLength, workGroupSize)
307+
308+ let kernel = kernel.GetKernel()
309+
310+ processor.Post(
311+ Msg.MsgSetArguments
312+ ( fun () -> kernel.KernelFunc ndRange keys.Length keys values reducedValues reducedKeys)
313+ )
314+
315+ processor.Post( Msg.CreateRunMsg<_, _>( kernel))
316+
317+ reducedKeys, reducedValues
318+
319+ /// <summary>
320+ /// Reduces values by key. Each segment is reduced by one work item.
321+ /// </summary>
322+ /// <param name="clContext">ClContext.</param>
323+ /// <param name="workGroupSize">Work group size.</param>
324+ /// <param name="reduceOp">Operation for reducing values.</param>
325+ /// <remarks>
326+ /// The length of the result must be calculated in advance.
327+ /// </remarks>
328+ let segmentSequential ( clContext : ClContext ) workGroupSize ( reduceOp : Expr < 'a -> 'a -> 'a >) =
329+
330+ let kernel =
331+ <@ fun ( ndRange : Range1D ) uniqueKeyCount keysLength ( offsets : ClArray < int >) ( keys : ClArray < int >) ( values : ClArray < 'a >) ( reducedValues : ClArray < 'a >) ( reducedKeys : ClArray < int >) ->
332+
333+ let gid = ndRange.GlobalID0
334+
335+ if gid < uniqueKeyCount then
336+ let startPosition = offsets.[ gid]
337+
338+ let sourceKey = keys.[ startPosition]
339+ let mutable sum = values.[ startPosition]
340+
341+ let mutable currentPosition = startPosition + 1
342+
343+ while currentPosition < keysLength
344+ && sourceKey = keys.[ currentPosition] do
345+
346+ sum <- (% reduceOp) sum values.[ currentPosition]
347+ currentPosition <- currentPosition + 1
348+
349+ reducedValues.[ gid] <- sum
350+ reducedKeys.[ gid] <- sourceKey @>
351+
352+ let kernel = clContext.Compile kernel
353+
354+ fun ( processor : MailboxProcessor < _ >) allocationMode ( resultLength : int ) ( offsets : ClArray < int >) ( keys : ClArray < int >) ( values : ClArray < 'a >) ->
355+
356+ let reducedValues =
357+ clContext.CreateClArrayWithSpecificAllocationMode( allocationMode, resultLength)
358+
359+ let reducedKeys =
360+ clContext.CreateClArrayWithSpecificAllocationMode( allocationMode, resultLength)
361+
362+ let ndRange =
363+ Range1D.CreateValid( resultLength, workGroupSize)
364+
365+ let kernel = kernel.GetKernel()
366+
367+ processor.Post(
368+ Msg.MsgSetArguments
369+ ( fun () ->
370+ kernel.KernelFunc
371+ ndRange
372+ resultLength
373+ keys.Length
374+ offsets
375+ keys
376+ values
377+ reducedValues
378+ reducedKeys)
379+ )
380+
381+ processor.Post( Msg.CreateRunMsg<_, _>( kernel))
382+
383+ reducedKeys, reducedValues
384+
385+ /// <summary>
386+ /// Reduces values by key. One work group participates in the reduction.
387+ /// </summary>
388+ /// <param name="clContext">ClContext.</param>
389+ /// <param name="workGroupSize">Work group size.</param>
390+ /// <param name="reduceOp">Operation for reducing values.</param>
391+ /// <remarks>
392+ /// Reduces an array of values that does not exceed the size of the workgroup.
393+ /// The length of the result must be calculated in advance.
394+ /// </remarks>
395+ let oneWorkGroupSegments ( clContext : ClContext ) workGroupSize ( reduceOp : Expr < 'a -> 'a -> 'a >) =
396+
397+ let kernel =
398+ <@ fun ( ndRange : Range1D ) length ( keys : ClArray < int >) ( values : ClArray < 'a >) ( reducedValues : ClArray < 'a >) ( reducedKeys : ClArray < int >) ->
399+
400+ let lid = ndRange.GlobalID0
401+
402+ // load values to local memory (may be without it)
403+ let localValues = localArray< 'a> workGroupSize
404+
405+ if lid < length then
406+ localValues.[ lid] <- values.[ lid]
407+
408+ // load keys to local memory (mb without it)
409+ let localKeys = localArray< int> workGroupSize
410+
411+ if lid < length then
412+ localKeys.[ lid] <- keys.[ lid]
413+
414+ // get unique keys bitmap
415+ let localBitmap = localArray< int> workGroupSize
416+ localBitmap.[ lid] <- 0
417+ (% PreparePositions.getUniqueBitmapLocal< int>) localKeys workGroupSize lid localBitmap
418+
419+ // get positions from bitmap by prefix sum
420+ // ??? get bitmap by prefix sum in another kernel ???
421+ // ??? we can restrict prefix sum for 0 .. length ???
422+ (% SubSum.localIntPrefixSum) lid workGroupSize localBitmap
423+
424+ let uniqueKeysCount = localBitmap.[ length - 1 ]
425+
426+ if lid < uniqueKeysCount then
427+ let itemKeyId = lid + 1
428+
429+ let startKeyIndex =
430+ (% Search.Bin.lowerPosition) length itemKeyId localBitmap
431+
432+ match startKeyIndex with
433+ | Some startPosition ->
434+ let sourceKeyPosition = localBitmap.[ startPosition]
435+ let mutable currentSum = localValues.[ startPosition]
436+ let mutable currentIndex = startPosition + 1
437+
438+ while currentIndex < length
439+ && localBitmap.[ currentIndex] = sourceKeyPosition do
440+
441+ currentSum <- (% reduceOp) currentSum localValues.[ currentIndex]
442+ currentIndex <- currentIndex + 1
443+
444+ reducedKeys.[ lid] <- localKeys.[ startPosition]
445+ reducedValues.[ lid] <- currentSum
446+ | None -> () @>
447+
448+ let kernel = clContext.Compile kernel
449+
450+ fun ( processor : MailboxProcessor < _ >) allocationMode ( resultLength : int ) ( keys : ClArray < int >) ( values : ClArray < 'a >) ->
451+ if keys.Length > workGroupSize then
452+ failwith " The length of the value should not exceed the size of the workgroup"
453+
454+ let reducedValues =
455+ clContext.CreateClArrayWithSpecificAllocationMode( allocationMode, resultLength)
456+
457+ let reducedKeys =
458+ clContext.CreateClArrayWithSpecificAllocationMode( allocationMode, resultLength)
459+
460+ let ndRange =
461+ Range1D.CreateValid( resultLength, workGroupSize)
462+
463+ let kernel = kernel.GetKernel()
464+
465+ processor.Post(
466+ Msg.MsgSetArguments
467+ ( fun () -> kernel.KernelFunc ndRange keys.Length keys values reducedValues reducedKeys)
468+ )
469+
470+ processor.Post( Msg.CreateRunMsg<_, _>( kernel))
471+
472+ reducedKeys, reducedValues
0 commit comments