@@ -62,7 +62,7 @@ var attrs = exports.attributes = {
6262 } ,
6363 func : {
6464 valType : 'enumerated' ,
65- values : [ 'count' , 'sum' , 'avg' , 'min' , 'max' , 'first' , 'last' ] ,
65+ values : [ 'count' , 'sum' , 'avg' , 'median' , 'mode' , 'rms' , 'stddev' , ' min', 'max' , 'first' , 'last' ] ,
6666 dflt : 'first' ,
6767 role : 'info' ,
6868 description : [
@@ -71,7 +71,16 @@ var attrs = exports.attributes = {
7171 'in the `groups` array, are collected and reduced by this function.' ,
7272 '*count* is simply the number of values in the `groups` array, so does' ,
7373 'not even require the linked array to exist. *first* (*last*) is just' ,
74- 'the first (last) linked value.'
74+ 'the first (last) linked value.' ,
75+ 'Invalid values are ignored, so for example in *avg* they do not' ,
76+ 'contribute to either the numerator or the denominator.' ,
77+ 'Any data type (numeric, date, category) may be aggregated with any' ,
78+ 'function, even though in certain cases it is unlikely to make sense,' ,
79+ 'for example a sum of dates or average of categories.' ,
80+ '*median* will return the average of the two central values if there is' ,
81+ 'an even count. *mode* will return the first value to reach the maximum' ,
82+ 'count, in case of a tie. *stddev* uses the population formula' ,
83+ '(denominator N, not N-1)'
7584 ] . join ( ' ' )
7685 } ,
7786 enabled : {
@@ -246,7 +255,7 @@ function getAggregateFunction(func, conversions) {
246255 var total = 0 ;
247256 for ( var i = 0 ; i < indices . length ; i ++ ) {
248257 var vi = d2c ( array [ indices [ i ] ] ) ;
249- if ( vi !== BADNUM ) total += + vi ;
258+ if ( vi !== BADNUM ) total += vi ;
250259 }
251260 return c2d ( total ) ;
252261 } ;
@@ -259,7 +268,7 @@ function getAggregateFunction(func, conversions) {
259268 for ( var i = 0 ; i < indices . length ; i ++ ) {
260269 var vi = d2c ( array [ indices [ i ] ] ) ;
261270 if ( vi !== BADNUM ) {
262- total += + vi ;
271+ total += vi ;
263272 cnt ++ ;
264273 }
265274 }
@@ -271,7 +280,7 @@ function getAggregateFunction(func, conversions) {
271280 var out = Infinity ;
272281 for ( var i = 0 ; i < indices . length ; i ++ ) {
273282 var vi = d2c ( array [ indices [ i ] ] ) ;
274- if ( vi !== BADNUM ) out = Math . min ( out , + vi ) ;
283+ if ( vi !== BADNUM ) out = Math . min ( out , vi ) ;
275284 }
276285 return ( out === Infinity ) ? BADNUM : c2d ( out ) ;
277286 } ;
@@ -281,10 +290,89 @@ function getAggregateFunction(func, conversions) {
281290 var out = - Infinity ;
282291 for ( var i = 0 ; i < indices . length ; i ++ ) {
283292 var vi = d2c ( array [ indices [ i ] ] ) ;
284- if ( vi !== BADNUM ) out = Math . max ( out , + vi ) ;
293+ if ( vi !== BADNUM ) out = Math . max ( out , vi ) ;
285294 }
286295 return ( out === - Infinity ) ? BADNUM : c2d ( out ) ;
287296 } ;
297+
298+ case 'median' :
299+ return function ( array , indices ) {
300+ var sortCalc = [ ] ;
301+ for ( var i = 0 ; i < indices . length ; i ++ ) {
302+ var vi = d2c ( array [ indices [ i ] ] ) ;
303+ if ( vi !== BADNUM ) sortCalc . push ( vi ) ;
304+ }
305+ if ( ! sortCalc . length ) return BADNUM ;
306+ sortCalc . sort ( ) ;
307+ var mid = ( sortCalc . length - 1 ) / 2 ;
308+ return c2d ( ( sortCalc [ Math . floor ( mid ) ] + sortCalc [ Math . ceil ( mid ) ] ) / 2 ) ;
309+ } ;
310+
311+ case 'mode' :
312+ return function ( array , indices ) {
313+ var counts = { } ;
314+ var maxCnt = 0 ;
315+ var out = BADNUM ;
316+ for ( var i = 0 ; i < indices . length ; i ++ ) {
317+ var vi = d2c ( array [ indices [ i ] ] ) ;
318+ if ( vi !== BADNUM ) {
319+ var counti = counts [ vi ] = ( counts [ vi ] || 0 ) + 1 ;
320+ if ( counti > maxCnt ) {
321+ maxCnt = counti ;
322+ out = vi ;
323+ }
324+ }
325+ }
326+ return maxCnt ? c2d ( out ) : BADNUM ;
327+ } ;
328+
329+ case 'rms' :
330+ return function ( array , indices ) {
331+ var total = 0 ;
332+ var cnt = 0 ;
333+ for ( var i = 0 ; i < indices . length ; i ++ ) {
334+ var vi = d2c ( array [ indices [ i ] ] ) ;
335+ if ( vi !== BADNUM ) {
336+ total += vi * vi ;
337+ cnt ++ ;
338+ }
339+ }
340+ return cnt ? c2d ( Math . sqrt ( total / cnt ) ) : BADNUM ;
341+ } ;
342+
343+ case 'stddev' :
344+ return function ( array , indices ) {
345+ // balance numerical stability with performance:
346+ // so that we call d2c once per element but don't need to
347+ // store them, reference all to the first element
348+ var total = 0 ;
349+ var total2 = 0 ;
350+ var cnt = 1 ;
351+ var v0 = BADNUM ;
352+ var i ;
353+ for ( i = 0 ; i < indices . length && v0 === BADNUM ; i ++ ) {
354+ v0 = d2c ( array [ indices [ i ] ] ) ;
355+ }
356+ if ( v0 === BADNUM ) return BADNUM ;
357+
358+ for ( ; i < indices . length ; i ++ ) {
359+ var vi = d2c ( array [ indices [ i ] ] ) ;
360+ if ( vi !== BADNUM ) {
361+ var dv = vi - v0 ;
362+ total += dv ;
363+ total2 += dv * dv ;
364+ cnt ++ ;
365+ }
366+ }
367+
368+ // This is population std dev, if we want sample std dev
369+ // we would need (...) / (cnt - 1)
370+ // Also note there's no c2d here - that means for dates the result
371+ // is a number of milliseconds, and for categories it's a number
372+ // of category differences, which is not generically meaningful but
373+ // as in other cases we don't forbid it.
374+ return Math . sqrt ( ( total2 - ( total * total / cnt ) ) / cnt ) ;
375+ } ;
288376 }
289377}
290378
0 commit comments