Automated commit of generated code

github-actions[bot] · github-actions[bot] · commit bde1dbac3fd9 · 2025-05-01T14:28:47.000Z
diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/NumberTypeUtils.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/NumberTypeUtils.kt
@@ -6,6 +6,7 @@ import java.math.BigDecimal
 import java.math.BigInteger
 import kotlin.reflect.KClass
 import kotlin.reflect.KType
+import kotlin.reflect.full.isSubtypeOf
 import kotlin.reflect.full.withNullability
 import kotlin.reflect.typeOf
 
@@ -307,10 +308,24 @@ internal fun Sequence<Number?>.convertToUnifiedNumberType(
                 "Cannot find unified number type of types: ${types.joinToString { renderType(it) }}",
             )
     }
-    val converter = createConverter(typeOf<Number>(), commonNumberType)!! as (Number) -> Number?
-    return map {
-        if (it == null) return@map null
-        converter(it) ?: error("Can not convert $it to $commonNumberType")
+    require(commonNumberType.isSubtypeOf(typeOf<Number?>())) {
+        "Cannot convert numbers to $commonNumberType; it is not a subtype of Number?"
+    }
+    return when (commonNumberType) {
+        nothingType -> {
+            require(null !in this) { "Cannot unify numbers to Nothing; it contains nulls" }
+            this
+        }
+
+        nullableNothingType -> this
+
+        else -> {
+            val converter = createConverter(typeOf<Number>(), commonNumberType)!! as (Number) -> Number?
+            this.map {
+                if (it == null) return@map null
+                converter(it) ?: error("Can not convert $it to $commonNumberType")
+            }
+        }
     }
 }
 
diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Analyze.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Analyze.kt
@@ -35,6 +35,7 @@ import org.jetbrains.kotlinx.dataframe.api.mean
 import org.jetbrains.kotlinx.dataframe.api.meanFor
 import org.jetbrains.kotlinx.dataframe.api.meanOf
 import org.jetbrains.kotlinx.dataframe.api.median
+import org.jetbrains.kotlinx.dataframe.api.medianBy
 import org.jetbrains.kotlinx.dataframe.api.medianFor
 import org.jetbrains.kotlinx.dataframe.api.medianOf
 import org.jetbrains.kotlinx.dataframe.api.min
@@ -43,6 +44,7 @@ import org.jetbrains.kotlinx.dataframe.api.minFor
 import org.jetbrains.kotlinx.dataframe.api.minOf
 import org.jetbrains.kotlinx.dataframe.api.minOrNull
 import org.jetbrains.kotlinx.dataframe.api.percentile
+import org.jetbrains.kotlinx.dataframe.api.percentileBy
 import org.jetbrains.kotlinx.dataframe.api.percentileFor
 import org.jetbrains.kotlinx.dataframe.api.percentileOf
 import org.jetbrains.kotlinx.dataframe.api.pivot
@@ -179,7 +181,7 @@ class Analyze : TestBase() {
         // SampleStart
         df.sum() // sum of values per every numeric column
         df.sum { age and weight } // sum of all values in `age` and `weight`
-        df.sumFor { age and weight } // sum of values per `age` and `weight` separately
+        df.sumFor(skipNaN = true) { age and weight } // sum of values per `age` and `weight` separately
         df.sumOf { (weight ?: 0) / age } // sum of expression evaluated for every row
         // SampleEnd
     }
@@ -190,7 +192,7 @@ class Analyze : TestBase() {
         // SampleStart
         df.min() // min of values per every comparable column
         df.min { age and weight } // min of all values in `age` and `weight`
-        df.minFor { age and weight } // min of values per `age` and `weight` separately
+        df.minFor(skipNaN = true) { age and weight } // min of values per `age` and `weight` separately
         df.minOf { (weight ?: 0) / age } // min of expression evaluated for every row
         df.minBy { age } // DataRow with minimal `age`
         // SampleEnd
@@ -214,8 +216,9 @@ class Analyze : TestBase() {
         // SampleStart
         df.median() // median of values per every comparable column
         df.median { age and weight } // median of all values in `age` and `weight`
-        df.medianFor { age and weight } // median of values per `age` and `weight` separately
+        df.medianFor(skipNaN = true) { age and weight } // median of values per `age` and `weight` separately
         df.medianOf { (weight ?: 0) / age } // median of expression evaluated for every row
+        df.medianBy { age } // DataRow where the median age lies (lower-median for an even number of values)
         // SampleEnd
     }
 
@@ -235,10 +238,11 @@ class Analyze : TestBase() {
     @TransformDataFrameExpressions
     fun percentileModes() {
         // SampleStart
-        df.percentile(25.0) // percentile of values per every comparable column
-        df.percentile(25.0) { age and weight } // percentile of all values in `age` and `weight`
-        df.percentileFor(25.0) { age and weight } // percentile of values per `age` and `weight` separately
-        df.percentileOf(25.0) { (weight ?: 0) / age } // percentile of expression evaluated for every row
+        df.percentile(25.0) // 25th percentile of values per every comparable column
+        df.percentile(75.0) { age and weight } // 75th percentile of all values in `age` and `weight`
+        df.percentileFor(50.0, skipNaN = true) { age and weight } // 50th percentile of values per `age` and `weight` separately
+        df.percentileOf(75.0) { (weight ?: 0) / age } // 75th percentile of expression evaluated for every row
+        df.percentileBy(25.0) { age } // DataRow where the 25th percentile of `age` lies (index rounded using R3)
         // SampleEnd
     }
 
@@ -247,9 +251,9 @@ class Analyze : TestBase() {
     fun percentileAggregations() {
         // SampleStart
         df.percentile(25.0)
-        df.age.percentile(25.0)
-        df.groupBy { city }.percentile(25.0)
-        df.pivot { city }.percentile(25.0)
+        df.age.percentile(75.0)
+        df.groupBy { city }.percentile(50.0)
+        df.pivot { city }.percentile(75.0)
         df.pivot { city }.groupBy { name.lastName }.percentile(25.0)
         // SampleEnd
     }
@@ -259,8 +263,8 @@ class Analyze : TestBase() {
     fun meanModes() {
         // SampleStart
         df.mean() // mean of values per every numeric column
-        df.mean(skipNaN = true) { age and weight } // mean of all values in `age` and `weight`, skips NA
-        df.meanFor(skipNaN = true) { age and weight } // mean of values per `age` and `weight` separately, skips NA
+        df.mean { age and weight } // mean of all values in `age` and `weight`
+        df.meanFor(skipNaN = true) { age and weight } // mean of values per `age` and `weight` separately, skips NaN
         df.meanOf { (weight ?: 0) / age } // median of expression evaluated for every row
         // SampleEnd
     }
@@ -283,7 +287,7 @@ class Analyze : TestBase() {
         // SampleStart
         df.std() // std of values per every numeric column
         df.std { age and weight } // std of all values in `age` and `weight`
-        df.stdFor { age and weight } // std of values per `age` and `weight` separately, skips NA
+        df.stdFor(skipNaN = true) { age and weight } // std of values per `age` and `weight` separately, skips NA
         df.stdOf { (weight ?: 0) / age } // std of expression evaluated for every row
         // SampleEnd
     }
diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/sum.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/sum.kt
@@ -5,6 +5,8 @@ import io.kotest.matchers.doubles.shouldBeNaN
 import io.kotest.matchers.floats.shouldBeNaN
 import io.kotest.matchers.shouldBe
 import io.kotest.matchers.string.shouldContain
+import org.jetbrains.kotlinx.dataframe.DataColumn
+import org.jetbrains.kotlinx.dataframe.api.cast
 import org.jetbrains.kotlinx.dataframe.api.columnOf
 import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
 import org.jetbrains.kotlinx.dataframe.api.isEmpty
@@ -14,7 +16,9 @@ import org.jetbrains.kotlinx.dataframe.api.sum
 import org.jetbrains.kotlinx.dataframe.api.sumFor
 import org.jetbrains.kotlinx.dataframe.api.sumOf
 import org.jetbrains.kotlinx.dataframe.api.toDataFrame
+import org.jetbrains.kotlinx.dataframe.impl.nullableNothingType
 import org.junit.Test
+import kotlin.reflect.typeOf
 
 class SumTests {
 
@@ -46,6 +50,28 @@ class SumTests {
         df.sumOf { value() } shouldBe expected
     }
 
+    @Test
+    fun `empty column with types`() {
+        val emptyIntCol by columnOf<Int?>(null, null)
+        emptyIntCol.sum() shouldBe 0
+
+        // empty column with Number type
+        val emptyNumberColumn = DataColumn.createValueColumn<Number?>(
+            "emptyNumberColumn",
+            listOf(null, null),
+            typeOf<Number?>(),
+        )
+        emptyNumberColumn.sum() shouldBe 0.0
+
+        // empty column with nullable Nothing type
+        val emptyNothingColumn = DataColumn.createValueColumn(
+            "emptyNothingColumn",
+            listOf(null, null),
+            nullableNothingType,
+        )
+        emptyNothingColumn.cast<Number?>().sum() shouldBe 0.0
+    }
+
     @Test
     fun `test multiple columns`() {
         val value1 by columnOf(1, 2, 3)