Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions core/api/core.api
Original file line number Diff line number Diff line change
Expand Up @@ -5940,6 +5940,12 @@ public final class org/jetbrains/kotlinx/dataframe/impl/api/UnfoldKt {
public static final fun unfoldImpl (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lkotlin/reflect/KType;Lkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
}

public final class org/jetbrains/kotlinx/dataframe/impl/api/UpdateException : java/lang/IllegalStateException, org/jetbrains/kotlinx/dataframe/exceptions/DataFrameError {
public fun <init> (Ljava/lang/String;Ljava/lang/Throwable;)V
public synthetic fun <init> (Ljava/lang/String;Ljava/lang/Throwable;ILkotlin/jvm/internal/DefaultConstructorMarker;)V
public fun getMessage ()Ljava/lang/String;
}

public final class org/jetbrains/kotlinx/dataframe/impl/api/UpdateKt {
public static final fun updateImpl (Lorg/jetbrains/kotlinx/dataframe/api/Update;Lkotlin/jvm/functions/Function3;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
}
Expand Down
13 changes: 11 additions & 2 deletions core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import org.jetbrains.kotlinx.dataframe.impl.columns.addPath
import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType
import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnKind
import org.jetbrains.kotlinx.dataframe.impl.getValuesType
import org.jetbrains.kotlinx.dataframe.impl.nothingType
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
import org.jetbrains.kotlinx.dataframe.util.CHUNKED_IMPL_IMPORT
import org.jetbrains.kotlinx.dataframe.util.CREATE
Expand Down Expand Up @@ -216,8 +217,16 @@ public interface DataColumn<out T> : BaseColumn<T> {
infer: Infer = Infer.None,
): DataColumn<T> = createByType(name, values, typeOf<T>(), infer)

/** Creates an empty [DataColumn] with given [name]. */
public fun empty(name: String = ""): AnyCol = createValueColumn(name, emptyList<Unit>(), typeOf<Unit>())
/**
* Creates an empty [DataColumn] with given [name].
* @see emptyOf
*/
public fun empty(name: String = ""): DataColumn<Nothing> =
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have two operations that use empty: concatImpl and getColumn. Please see if they can be affected by this change. Otherwise it's quite neat

Copy link
Collaborator Author

@Jolanrensen Jolanrensen Nov 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seems concat fails on dataframes without rows, but not because of Nothing columns, but because it uses ::class reflection in createColumnGuessingType(), interesting

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed it in the next commit :)

createValueColumn(name, emptyList<Unit>(), nothingType).cast()

/** Creates an empty [DataColumn] of type [T] with given [name]. */
public inline fun <reified T> emptyOf(name: String = ""): DataColumn<T> =
createValueColumn(name, emptyList<T>(), typeOf<T>()).cast()

// region deprecated

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import org.jetbrains.kotlinx.dataframe.impl.columns.TransformableColumnSet
import org.jetbrains.kotlinx.dataframe.impl.columns.addPath
import org.jetbrains.kotlinx.dataframe.impl.columns.onResolve
import org.jetbrains.kotlinx.dataframe.impl.columns.transform
import org.jetbrains.kotlinx.dataframe.impl.nullableNothingType
import org.jetbrains.kotlinx.dataframe.impl.owner
import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API
import kotlin.reflect.KProperty
Expand All @@ -40,7 +41,10 @@ import kotlin.reflect.KProperty
public fun <T> DataColumn<T>.all(predicate: Predicate<T>): Boolean = values.all(predicate)

/** Returns `true` if all [values] are `null` or [values] is empty. */
public fun <C> DataColumn<C>.allNulls(): Boolean = size == 0 || all { it == null }
public fun <C> DataColumn<C>.allNulls(): Boolean =
size == 0 ||
type() == nullableNothingType ||
all { it == null }

// endregion

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,11 @@ public interface ColsOfColumnsSelectionDsl {
*
* This function operates solely on columns at the top-level.
*
* __NOTE:__ Null-filled columns of type [Nothing?][Nothing] will be included when selecting [`colsOf`][colsOf]`<T?>()`.
* This is because [Nothing][Nothing] is considered a subtype of all other types in Kotlin.
* To exclude these columns, call `.`[filter][ColumnsSelectionDsl.filter]` { !it.`[allNulls][DataColumn.allNulls]`() }`
* after it.
*
* ### Check out: [Grammar]
*
* #### For example:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,18 @@ package org.jetbrains.kotlinx.dataframe.impl
import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.AnyRow
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.api.asDataColumn
import org.jetbrains.kotlinx.dataframe.api.cast
import org.jetbrains.kotlinx.dataframe.api.concat
import org.jetbrains.kotlinx.dataframe.api.toDataFrame
import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType
import kotlin.reflect.KClass
import kotlin.reflect.KType
import kotlin.reflect.full.isSubclassOf
import kotlin.reflect.full.isSubtypeOf
import kotlin.reflect.full.withNullability
import kotlin.reflect.jvm.jvmErasure
import kotlin.reflect.typeOf

public interface DataCollector<T> {

Expand All @@ -38,17 +40,29 @@ internal abstract class DataCollectorBase<T>(initCapacity: Int) : DataCollector<
data.add(value)
}

protected fun createColumn(name: String, type: KType): DataColumn<T> {
val classifier = type.classifier as KClass<*>
if (classifier.isSubclassOf(DataFrame::class) && !hasNulls) {
return DataColumn.createFrameColumn(name, data as List<AnyFrame>) as DataColumn<T>
}
if (classifier.isSubclassOf(DataRow::class) && !hasNulls) {
val mergedDf = (data as List<AnyRow>).map { it.toDataFrame() }.concat()
return DataColumn.createColumnGroup(name, mergedDf) as DataColumn<T>
}
return DataColumn.createValueColumn(name, data, type.withNullability(hasNulls)) as DataColumn<T>
}
@Suppress("UNCHECKED_CAST")
protected fun createColumn(name: String, type: KType): DataColumn<T> =
when {
type == nothingType -> {
require(values.isEmpty()) { "Cannot create non-empty DataColumn of type Nothing" }
DataColumn.empty(name)
}

type == nullableNothingType -> {
require(values.all { it == null }) { "Cannot create DataColumn of type Nothing? with non-null values" }
DataColumn.createValueColumn(name, values, nullableNothingType)
}

type.isSubtypeOf(typeOf<AnyFrame?>()) && !hasNulls ->
DataColumn.createFrameColumn(name, data as List<AnyFrame>)

type.isSubtypeOf(typeOf<AnyRow?>()) && !hasNulls -> {
val mergedDf = (data as List<AnyRow>).map { it.toDataFrame() }.concat()
DataColumn.createColumnGroup(name, mergedDf).asDataColumn()
}

else -> DataColumn.createValueColumn(name, data, type.withNullability(hasNulls))
}.cast()
}

internal open class ColumnDataCollector(initCapacity: Int = 0, val typeOf: (KClass<*>) -> KType) :
Expand All @@ -65,7 +79,7 @@ internal class TypedColumnDataCollector<T>(initCapacity: Int = 0, val type: KTyp
override fun add(value: T?) {
if (checkTypes && value != null && !value.javaClass.kotlin.isSubclassOf(kclass)) {
throw IllegalArgumentException(
"Can not add value of class ${value.javaClass.kotlin.qualifiedName} to column of type $type. Value = $value",
"Cannot add a value of class ${value.javaClass.kotlin.qualifiedName} to a column of type $type. Value: '$value'.",
)
}
super.add(value)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ internal fun <T> concatImpl(name: String, columns: List<DataColumn<T>>): DataCol
internal fun <T> concatImpl(name: String, columns: List<DataColumn<T>?>, columnSizes: List<Int>): DataColumn<T> {
when (columns.size) {
0 -> return DataColumn.empty(name).cast()
1 -> return columns[0] ?: DataColumn.empty(name).cast()
1 -> return columns.single() ?: DataColumn.empty(name).cast()
}

if (columns.all { it == null || it.isColumnGroup() }) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import org.jetbrains.kotlinx.dataframe.api.with
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
import org.jetbrains.kotlinx.dataframe.columns.size
import org.jetbrains.kotlinx.dataframe.exceptions.DataFrameError
import org.jetbrains.kotlinx.dataframe.impl.columns.AddDataRowImpl
import org.jetbrains.kotlinx.dataframe.impl.createDataCollector
import org.jetbrains.kotlinx.dataframe.index
Expand Down Expand Up @@ -94,28 +95,36 @@ private fun <C, R> ColumnGroup<C>.replaceRowsIf(
.asColumnGroup()
.cast()

public class UpdateException(override val message: String, cause: Throwable? = null) :
IllegalStateException(message, cause),
DataFrameError
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better to add this supertype only if you have example of code that triggers this in compiler plugin

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, that might be difficult, as it most often relies on runtime information. (besides maybe update {}.with { error() }). I thought we now preferred to use the DataFrameError type for any new exception we create

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think about it as a way to opt-in reporting warnings for exceptions, that we know for sure make sense for user. In theory we could report any exception. DataFrameError will help us to validate them first

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

soo... DataFrameWarning? ;P

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could be


internal fun <T, C> DataColumn<C>.updateImpl(
df: DataFrame<T>,
filter: RowValueFilter<T, C>?,
expression: (AddDataRow<T>, DataColumn<C>, C) -> C?,
): DataColumn<C> {
val collector = createDataCollector<C>(size, type)
val src = this
if (filter == null) {
df.indices().forEach { rowIndex ->
val row = AddDataRowImpl(rowIndex, df, collector.values)
collector.add(expression(row, src, src[rowIndex]))
}
} else {
df.indices().forEach { rowIndex ->
val row = AddDataRowImpl(rowIndex, df, collector.values)
val currentValue = row[src]
val newValue =
if (filter.invoke(row, currentValue)) expression(row, src, currentValue) else currentValue
collector.add(newValue)
try {
if (filter == null) {
df.indices().forEach { rowIndex ->
val row = AddDataRowImpl(rowIndex, df, collector.values)
collector.add(expression(row, src, src[rowIndex]))
}
} else {
df.indices().forEach { rowIndex ->
val row = AddDataRowImpl(rowIndex, df, collector.values)
val currentValue = row[src]
val newValue =
if (filter.invoke(row, currentValue)) expression(row, src, currentValue) else currentValue
collector.add(newValue)
}
}
return collector.toColumn(src.name).cast()
} catch (e: Throwable) {
throw UpdateException("Could not update column '${src.name}': ${e.message}", e)
}
return collector.toColumn(src.name).cast()
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import org.jetbrains.kotlinx.dataframe.ColumnsContainer
import org.jetbrains.kotlinx.dataframe.ColumnsSelector
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.Selector
import org.jetbrains.kotlinx.dataframe.api.AddDataRow
import org.jetbrains.kotlinx.dataframe.api.AddExpression
Expand Down Expand Up @@ -43,9 +42,9 @@ import org.jetbrains.kotlinx.dataframe.index
import org.jetbrains.kotlinx.dataframe.nrow
import org.jetbrains.kotlinx.dataframe.util.CREATE_COLUMN
import org.jetbrains.kotlinx.dataframe.util.GUESS_COLUMN_TYPE
import kotlin.reflect.KClass
import kotlin.reflect.KType
import kotlin.reflect.full.isSubtypeOf
import kotlin.reflect.full.starProjectedType
import kotlin.reflect.full.withNullability
import kotlin.reflect.typeOf

Expand Down Expand Up @@ -262,10 +261,10 @@ internal fun <T> createColumnGuessingType(
return { value -> if (value != null && value is Number) converter(value) else value }
}

return when (type.classifier!! as KClass<*>) {
return when (type.classifier?.starProjectedType) {
// guessValueType can only return DataRow if all values are `AnyRow?`
// or allColsMakesColGroup == true, and all values are `AnyCol`
DataRow::class ->
typeOf<AnyRow>() ->
if (allColsMakesColGroup && values.firstOrNull() is AnyCol) {
val df = dataFrameOf(values as Iterable<AnyCol>)
DataColumn.createColumnGroup(name, df)
Expand All @@ -276,7 +275,7 @@ internal fun <T> createColumnGuessingType(
DataColumn.createColumnGroup(name, df)
}.asDataColumn().cast()

DataFrame::class -> {
typeOf<AnyFrame>() -> {
val frames = values.map {
when (it) {
null -> DataFrame.empty()
Expand All @@ -289,7 +288,7 @@ internal fun <T> createColumnGuessingType(
DataColumn.createFrameColumn(name, frames).asDataColumn().cast()
}

List::class -> {
typeOf<List<*>>() -> {
val nullable = type.isMarkedNullable
var isListOfRows: Boolean? = null
val subType = type.arguments.first().type!! // List<T> -> T
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package org.jetbrains.kotlinx.dataframe.api

import io.kotest.matchers.shouldBe
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.junit.Test

class ConcatTests {
Expand All @@ -9,7 +11,7 @@ class ConcatTests {
fun `different types`() {
val a by columnOf(1, 2)
val b by columnOf(3.0, null)
a.concat(b) shouldBe columnOf(1, 2, 3.0, null).named("a")
a.concat(b) shouldBe columnOf<Number?>(1, 2, 3.0, null).named("a")
}

@Test
Expand All @@ -23,4 +25,28 @@ class ConcatTests {

dfWithCategory.columnNames() shouldBe listOf("value", "type", "category")
}

@Test
fun `concat empty DataFrames no rows`() {
val dfWithSchema = DataFrame.emptyOf<Pair<Int, String>>()
(dfWithSchema concat dfWithSchema).let { concatenated ->
concatenated shouldBe dfWithSchema
concatenated.schema() shouldBe dfWithSchema.schema()
}

val dfNothingCols = dataFrameOf(
"a" to DataColumn.empty(),
"b" to DataColumn.empty(),
)
(dfNothingCols concat dfNothingCols).let { concatenated ->
concatenated shouldBe dfNothingCols
concatenated.schema() shouldBe dfNothingCols.schema()
}
}

@Test
fun `concat empty DataFrames no cols`() {
val dfNoCols = DataFrame.empty(5)
(dfNoCols concat dfNoCols) shouldBe DataFrame.empty(10)
}
}
26 changes: 26 additions & 0 deletions core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
package org.jetbrains.kotlinx.dataframe.api

import io.kotest.assertions.throwables.shouldThrow
import io.kotest.matchers.shouldBe
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
import org.jetbrains.kotlinx.dataframe.impl.api.UpdateException
import org.jetbrains.kotlinx.dataframe.impl.nothingType
import org.jetbrains.kotlinx.dataframe.impl.nullableNothingType
import org.jetbrains.kotlinx.dataframe.size
import org.junit.Test

Expand Down Expand Up @@ -79,4 +84,25 @@ class UpdateTests {

df.fillNA(SchemaB::i).with { 42 }
}

@Test
fun `update Nothing columns`() {
val emptyDf = dataFrameOf("a" to DataColumn.empty())
emptyDf["a"].type() shouldBe nothingType

emptyDf.update { "a"<Nothing>() }.with { error("should not happen") }
.schema() shouldBe emptyDf.schema()

val nullFilledDf = dataFrameOf("a" to columnOf(null))
nullFilledDf["a"].type() shouldBe nullableNothingType

// can only update with null
nullFilledDf.update { "a"<Nothing?>() }.with { null }
.schema() shouldBe nullFilledDf.schema()

// or 'Nothing', aka, return early/throw exception
shouldThrow<UpdateException> {
nullFilledDf.update { "a"<Nothing?>() }.with { error("Nothing") }
}.cause!!.message shouldBe "Nothing"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ import org.jetbrains.kotlinx.dataframe.get
import org.jetbrains.kotlinx.dataframe.hasNulls
import org.jetbrains.kotlinx.dataframe.impl.DataFrameImpl
import org.jetbrains.kotlinx.dataframe.impl.DataFrameSize
import org.jetbrains.kotlinx.dataframe.impl.api.UpdateException
import org.jetbrains.kotlinx.dataframe.impl.api.convertToImpl
import org.jetbrains.kotlinx.dataframe.impl.between
import org.jetbrains.kotlinx.dataframe.impl.columns.isMissingColumn
Expand Down Expand Up @@ -1712,7 +1713,7 @@ class DataFrameTests : BaseTest() {
df.update("name").at(0).with { "ALICE" }
}

@Test(expected = IllegalArgumentException::class)
@Test(expected = UpdateException::class)
fun `update with wrong type`() {
typed.update("age").with { "string" }
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package org.jetbrains.kotlinx.dataframe.jupyter
import kotlinx.serialization.ExperimentalSerializationApi
import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.api.FormattedFrame
import org.jetbrains.kotlinx.dataframe.api.allNulls
import org.jetbrains.kotlinx.dataframe.api.colsOf
import org.jetbrains.kotlinx.dataframe.api.getColumns
import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions
Expand Down Expand Up @@ -101,7 +102,7 @@ internal inline fun <reified T : Any> JupyterHtmlRenderer.render(
}

internal fun AnyFrame.hasFormattedColumns() =
this.getColumns { colsAtAnyDepth().colsOf<FormattedFrame<*>?>() }.isNotEmpty()
this.getColumns { colsAtAnyDepth().colsOf<FormattedFrame<*>?> { !it.allNulls() } }.isNotEmpty()

private fun KotlinNotebookPluginUtils.IdeBuildNumber?.supportsDynamicNestedTables() =
this != null && majorVersion >= MIN_IDE_VERSION_SUPPORT_JSON_WITH_METADATA
Expand Down
Loading