@@ -35,6 +35,7 @@ import org.jetbrains.kotlinx.dataframe.api.mean
3535import org.jetbrains.kotlinx.dataframe.api.meanFor
3636import org.jetbrains.kotlinx.dataframe.api.meanOf
3737import org.jetbrains.kotlinx.dataframe.api.median
38+ import org.jetbrains.kotlinx.dataframe.api.medianBy
3839import org.jetbrains.kotlinx.dataframe.api.medianFor
3940import org.jetbrains.kotlinx.dataframe.api.medianOf
4041import org.jetbrains.kotlinx.dataframe.api.min
@@ -43,6 +44,7 @@ import org.jetbrains.kotlinx.dataframe.api.minFor
4344import org.jetbrains.kotlinx.dataframe.api.minOf
4445import org.jetbrains.kotlinx.dataframe.api.minOrNull
4546import org.jetbrains.kotlinx.dataframe.api.percentile
47+ import org.jetbrains.kotlinx.dataframe.api.percentileBy
4648import org.jetbrains.kotlinx.dataframe.api.percentileFor
4749import org.jetbrains.kotlinx.dataframe.api.percentileOf
4850import org.jetbrains.kotlinx.dataframe.api.pivot
@@ -179,7 +181,7 @@ class Analyze : TestBase() {
179181 // SampleStart
180182 df.sum() // sum of values per every numeric column
181183 df.sum { age and weight } // sum of all values in `age` and `weight`
182- df.sumFor { age and weight } // sum of values per `age` and `weight` separately
184+ df.sumFor(skipNaN = true ) { age and weight } // sum of values per `age` and `weight` separately
183185 df.sumOf { (weight ? : 0 ) / age } // sum of expression evaluated for every row
184186 // SampleEnd
185187 }
@@ -190,7 +192,7 @@ class Analyze : TestBase() {
190192 // SampleStart
191193 df.min() // min of values per every comparable column
192194 df.min { age and weight } // min of all values in `age` and `weight`
193- df.minFor { age and weight } // min of values per `age` and `weight` separately
195+ df.minFor(skipNaN = true ) { age and weight } // min of values per `age` and `weight` separately
194196 df.minOf { (weight ? : 0 ) / age } // min of expression evaluated for every row
195197 df.minBy { age } // DataRow with minimal `age`
196198 // SampleEnd
@@ -214,8 +216,9 @@ class Analyze : TestBase() {
214216 // SampleStart
215217 df.median() // median of values per every comparable column
216218 df.median { age and weight } // median of all values in `age` and `weight`
217- df.medianFor { age and weight } // median of values per `age` and `weight` separately
219+ df.medianFor(skipNaN = true ) { age and weight } // median of values per `age` and `weight` separately
218220 df.medianOf { (weight ? : 0 ) / age } // median of expression evaluated for every row
221+ df.medianBy { age } // DataRow where the median age lies (lower-median for an even number of values)
219222 // SampleEnd
220223 }
221224
@@ -235,10 +238,11 @@ class Analyze : TestBase() {
235238 @TransformDataFrameExpressions
236239 fun percentileModes () {
237240 // SampleStart
238- df.percentile(25.0 ) // percentile of values per every comparable column
239- df.percentile(25.0 ) { age and weight } // percentile of all values in `age` and `weight`
240- df.percentileFor(25.0 ) { age and weight } // percentile of values per `age` and `weight` separately
241- df.percentileOf(25.0 ) { (weight ? : 0 ) / age } // percentile of expression evaluated for every row
241+ df.percentile(25.0 ) // 25th percentile of values per every comparable column
242+ df.percentile(75.0 ) { age and weight } // 75th percentile of all values in `age` and `weight`
243+ df.percentileFor(50.0 , skipNaN = true ) { age and weight } // 50th percentile of values per `age` and `weight` separately
244+ df.percentileOf(75.0 ) { (weight ? : 0 ) / age } // 75th percentile of expression evaluated for every row
245+ df.percentileBy(25.0 ) { age } // DataRow where the 25th percentile of `age` lies (index rounded using R3)
242246 // SampleEnd
243247 }
244248
@@ -247,9 +251,9 @@ class Analyze : TestBase() {
247251 fun percentileAggregations () {
248252 // SampleStart
249253 df.percentile(25.0 )
250- df.age.percentile(25 .0 )
251- df.groupBy { city }.percentile(25 .0 )
252- df.pivot { city }.percentile(25 .0 )
254+ df.age.percentile(75 .0 )
255+ df.groupBy { city }.percentile(50 .0 )
256+ df.pivot { city }.percentile(75 .0 )
253257 df.pivot { city }.groupBy { name.lastName }.percentile(25.0 )
254258 // SampleEnd
255259 }
@@ -259,8 +263,8 @@ class Analyze : TestBase() {
259263 fun meanModes () {
260264 // SampleStart
261265 df.mean() // mean of values per every numeric column
262- df.mean(skipNaN = true ) { age and weight } // mean of all values in `age` and `weight`, skips NA
263- df.meanFor(skipNaN = true ) { age and weight } // mean of values per `age` and `weight` separately, skips NA
266+ df.mean { age and weight } // mean of all values in `age` and `weight`
267+ df.meanFor(skipNaN = true ) { age and weight } // mean of values per `age` and `weight` separately, skips NaN
264268 df.meanOf { (weight ? : 0 ) / age } // median of expression evaluated for every row
265269 // SampleEnd
266270 }
@@ -283,7 +287,7 @@ class Analyze : TestBase() {
283287 // SampleStart
284288 df.std() // std of values per every numeric column
285289 df.std { age and weight } // std of all values in `age` and `weight`
286- df.stdFor { age and weight } // std of values per `age` and `weight` separately, skips NA
290+ df.stdFor(skipNaN = true ) { age and weight } // std of values per `age` and `weight` separately, skips NA
287291 df.stdOf { (weight ? : 0 ) / age } // std of expression evaluated for every row
288292 // SampleEnd
289293 }
0 commit comments