Skip to content
13 changes: 11 additions & 2 deletions core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import org.jetbrains.kotlinx.dataframe.impl.columns.addPath
import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType
import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnKind
import org.jetbrains.kotlinx.dataframe.impl.getValuesType
import org.jetbrains.kotlinx.dataframe.impl.nothingType
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
import org.jetbrains.kotlinx.dataframe.util.CHUNKED_IMPL_IMPORT
import org.jetbrains.kotlinx.dataframe.util.CREATE
Expand Down Expand Up @@ -216,8 +217,16 @@ public interface DataColumn<out T> : BaseColumn<T> {
infer: Infer = Infer.None,
): DataColumn<T> = createByType(name, values, typeOf<T>(), infer)

/** Creates an empty [DataColumn] with given [name]. */
public fun empty(name: String = ""): AnyCol = createValueColumn(name, emptyList<Unit>(), typeOf<Unit>())
/**
* Creates an empty [DataColumn] with given [name].
* @see emptyOf
*/
public fun empty(name: String = ""): DataColumn<Nothing> =
createValueColumn(name, emptyList<Unit>(), nothingType).cast()

/** Creates an empty [DataColumn] of type [T] with given [name]. */
public inline fun <reified T> emptyOf(name: String = ""): DataColumn<T> =
createValueColumn(name, emptyList<T>(), typeOf<T>()).cast()

// region deprecated

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import org.jetbrains.kotlinx.dataframe.impl.columns.TransformableColumnSet
import org.jetbrains.kotlinx.dataframe.impl.columns.addPath
import org.jetbrains.kotlinx.dataframe.impl.columns.onResolve
import org.jetbrains.kotlinx.dataframe.impl.columns.transform
import org.jetbrains.kotlinx.dataframe.impl.nullableNothingType
import org.jetbrains.kotlinx.dataframe.impl.owner
import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API
import kotlin.reflect.KProperty
Expand All @@ -40,7 +41,10 @@ import kotlin.reflect.KProperty
public fun <T> DataColumn<T>.all(predicate: Predicate<T>): Boolean = values.all(predicate)

/** Returns `true` if all [values] are `null` or [values] is empty. */
public fun <C> DataColumn<C>.allNulls(): Boolean = size == 0 || all { it == null }
public fun <C> DataColumn<C>.allNulls(): Boolean =
size == 0 ||
type() == nullableNothingType ||
all { it == null }

// endregion

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,11 @@ public interface ColsOfColumnsSelectionDsl {
*
* This function operates solely on columns at the top-level.
*
* __NOTE:__ Null-filled columns of type [Nothing?][Nothing] will be included when selecting [`colsOf`][colsOf]`<T?>()`.
* This is because [Nothing][Nothing] is considered a subtype of all other types in Kotlin.
* To exclude these columns, call `.`[filter][ColumnsSelectionDsl.filter]` { !it.`[allNulls][DataColumn.allNulls]`() }`
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why colsOf<T?> except colsOf<Nothing> is not an option here?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nothing cannot be used as reified argument

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just have a look what we have to do to achieve typeOf<Nothing>() in dataframe XD

internal val nothingType: KType = typeOf<List<Nothing>>().arguments.first().type!!

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we would need to have a shortcut for that nothingCols() for instance, but I'm not sure it's common enough

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or, you know, they type

colsOf<T?>() except colsOf(typeOf<List<Nothing>>().arguments.first().type!!)

you know, elegant

* after it.
*
* ### Check out: [Grammar]
*
* #### For example:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,18 @@ package org.jetbrains.kotlinx.dataframe.impl
import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.AnyRow
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.api.asDataColumn
import org.jetbrains.kotlinx.dataframe.api.cast
import org.jetbrains.kotlinx.dataframe.api.concat
import org.jetbrains.kotlinx.dataframe.api.toDataFrame
import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType
import kotlin.reflect.KClass
import kotlin.reflect.KType
import kotlin.reflect.full.isSubclassOf
import kotlin.reflect.full.isSubtypeOf
import kotlin.reflect.full.withNullability
import kotlin.reflect.jvm.jvmErasure
import kotlin.reflect.typeOf

public interface DataCollector<T> {

Expand All @@ -38,17 +40,29 @@ internal abstract class DataCollectorBase<T>(initCapacity: Int) : DataCollector<
data.add(value)
}

protected fun createColumn(name: String, type: KType): DataColumn<T> {
val classifier = type.classifier as KClass<*>
if (classifier.isSubclassOf(DataFrame::class) && !hasNulls) {
return DataColumn.createFrameColumn(name, data as List<AnyFrame>) as DataColumn<T>
}
if (classifier.isSubclassOf(DataRow::class) && !hasNulls) {
val mergedDf = (data as List<AnyRow>).map { it.toDataFrame() }.concat()
return DataColumn.createColumnGroup(name, mergedDf) as DataColumn<T>
}
return DataColumn.createValueColumn(name, data, type.withNullability(hasNulls)) as DataColumn<T>
}
@Suppress("UNCHECKED_CAST")
protected fun createColumn(name: String, type: KType): DataColumn<T> =
when {
type == nothingType -> {
require(values.isEmpty()) { "Cannot create non-empty DataColumn of type Nothing" }
DataColumn.empty(name)
}

type == nullableNothingType -> {
require(values.all { it == null }) { "Cannot create DataColumn of type Nothing? with non-null values" }
DataColumn.createValueColumn(name, values, nullableNothingType)
}

type.isSubtypeOf(typeOf<AnyFrame?>()) && !hasNulls ->
DataColumn.createFrameColumn(name, data as List<AnyFrame>)

type.isSubtypeOf(typeOf<AnyRow?>()) && !hasNulls -> {
val mergedDf = (data as List<AnyRow>).map { it.toDataFrame() }.concat()
DataColumn.createColumnGroup(name, mergedDf).asDataColumn()
}

else -> DataColumn.createValueColumn(name, data, type.withNullability(hasNulls))
}.cast()
}

internal open class ColumnDataCollector(initCapacity: Int = 0, val typeOf: (KClass<*>) -> KType) :
Expand All @@ -65,7 +79,7 @@ internal class TypedColumnDataCollector<T>(initCapacity: Int = 0, val type: KTyp
override fun add(value: T?) {
if (checkTypes && value != null && !value.javaClass.kotlin.isSubclassOf(kclass)) {
throw IllegalArgumentException(
"Can not add value of class ${value.javaClass.kotlin.qualifiedName} to column of type $type. Value = $value",
"Cannot add a value of class ${value.javaClass.kotlin.qualifiedName} to a column of type $type. Value: '$value'.",
)
}
super.add(value)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import org.jetbrains.kotlinx.dataframe.api.with
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
import org.jetbrains.kotlinx.dataframe.columns.size
import org.jetbrains.kotlinx.dataframe.exceptions.DataFrameError
import org.jetbrains.kotlinx.dataframe.impl.columns.AddDataRowImpl
import org.jetbrains.kotlinx.dataframe.impl.createDataCollector
import org.jetbrains.kotlinx.dataframe.index
Expand Down Expand Up @@ -94,28 +95,36 @@ private fun <C, R> ColumnGroup<C>.replaceRowsIf(
.asColumnGroup()
.cast()

public class UpdateException(override val message: String, cause: Throwable? = null) :
IllegalStateException(message, cause),
DataFrameError

internal fun <T, C> DataColumn<C>.updateImpl(
df: DataFrame<T>,
filter: RowValueFilter<T, C>?,
expression: (AddDataRow<T>, DataColumn<C>, C) -> C?,
): DataColumn<C> {
val collector = createDataCollector<C>(size, type)
val src = this
if (filter == null) {
df.indices().forEach { rowIndex ->
val row = AddDataRowImpl(rowIndex, df, collector.values)
collector.add(expression(row, src, src[rowIndex]))
}
} else {
df.indices().forEach { rowIndex ->
val row = AddDataRowImpl(rowIndex, df, collector.values)
val currentValue = row[src]
val newValue =
if (filter.invoke(row, currentValue)) expression(row, src, currentValue) else currentValue
collector.add(newValue)
try {
if (filter == null) {
df.indices().forEach { rowIndex ->
val row = AddDataRowImpl(rowIndex, df, collector.values)
collector.add(expression(row, src, src[rowIndex]))
}
} else {
df.indices().forEach { rowIndex ->
val row = AddDataRowImpl(rowIndex, df, collector.values)
val currentValue = row[src]
val newValue =
if (filter.invoke(row, currentValue)) expression(row, src, currentValue) else currentValue
collector.add(newValue)
}
}
return collector.toColumn(src.name).cast()
} catch (e: Throwable) {
throw UpdateException("Could not update column '${src.name}': ${e.message}", e)
}
return collector.toColumn(src.name).cast()
}

/**
Expand Down
26 changes: 26 additions & 0 deletions core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
package org.jetbrains.kotlinx.dataframe.api

import io.kotest.assertions.throwables.shouldThrow
import io.kotest.matchers.shouldBe
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
import org.jetbrains.kotlinx.dataframe.impl.api.UpdateException
import org.jetbrains.kotlinx.dataframe.impl.nothingType
import org.jetbrains.kotlinx.dataframe.impl.nullableNothingType
import org.jetbrains.kotlinx.dataframe.size
import org.junit.Test

Expand Down Expand Up @@ -79,4 +84,25 @@ class UpdateTests {

df.fillNA(SchemaB::i).with { 42 }
}

@Test
fun `update Nothing columns`() {
val emptyDf = dataFrameOf("a" to DataColumn.empty())
emptyDf["a"].type() shouldBe nothingType

emptyDf.update { "a"<Nothing>() }.with { error("should not happen") }
.schema() shouldBe emptyDf.schema()

val nullFilledDf = dataFrameOf("a" to columnOf(null))
nullFilledDf["a"].type() shouldBe nullableNothingType

// can only update with null
nullFilledDf.update { "a"<Nothing?>() }.with { null }
.schema() shouldBe nullFilledDf.schema()

// or 'Nothing', aka, return early/throw exception
shouldThrow<UpdateException> {
nullFilledDf.update { "a"<Nothing?>() }.with { error("Nothing") }
}.cause!!.message shouldBe "Nothing"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ import org.jetbrains.kotlinx.dataframe.get
import org.jetbrains.kotlinx.dataframe.hasNulls
import org.jetbrains.kotlinx.dataframe.impl.DataFrameImpl
import org.jetbrains.kotlinx.dataframe.impl.DataFrameSize
import org.jetbrains.kotlinx.dataframe.impl.api.UpdateException
import org.jetbrains.kotlinx.dataframe.impl.api.convertToImpl
import org.jetbrains.kotlinx.dataframe.impl.between
import org.jetbrains.kotlinx.dataframe.impl.columns.isMissingColumn
Expand Down Expand Up @@ -1712,7 +1713,7 @@ class DataFrameTests : BaseTest() {
df.update("name").at(0).with { "ALICE" }
}

@Test(expected = IllegalArgumentException::class)
@Test(expected = UpdateException::class)
fun `update with wrong type`() {
typed.update("age").with { "string" }
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package org.jetbrains.kotlinx.dataframe.jupyter
import kotlinx.serialization.ExperimentalSerializationApi
import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.api.FormattedFrame
import org.jetbrains.kotlinx.dataframe.api.allNulls
import org.jetbrains.kotlinx.dataframe.api.colsOf
import org.jetbrains.kotlinx.dataframe.api.getColumns
import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions
Expand Down Expand Up @@ -101,7 +102,7 @@ internal inline fun <reified T : Any> JupyterHtmlRenderer.render(
}

internal fun AnyFrame.hasFormattedColumns() =
this.getColumns { colsAtAnyDepth().colsOf<FormattedFrame<*>?>() }.isNotEmpty()
this.getColumns { colsAtAnyDepth().colsOf<FormattedFrame<*>?> { !it.allNulls() } }.isNotEmpty()

private fun KotlinNotebookPluginUtils.IdeBuildNumber?.supportsDynamicNestedTables() =
this != null && majorVersion >= MIN_IDE_VERSION_SUPPORT_JSON_WITH_METADATA
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package org.jetbrains.kotlinx.dataframe.jupyter
import io.kotest.assertions.throwables.shouldNotThrow
import io.kotest.matchers.comparables.shouldBeGreaterThan
import io.kotest.matchers.comparables.shouldBeLessThan
import io.kotest.matchers.should
import io.kotest.matchers.shouldBe
import io.kotest.matchers.string.shouldContain
import io.kotest.matchers.string.shouldNotContain
Expand All @@ -14,6 +15,11 @@ import kotlinx.serialization.json.jsonArray
import kotlinx.serialization.json.jsonObject
import kotlinx.serialization.json.jsonPrimitive
import org.intellij.lang.annotations.Language
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.api.columnOf
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
import org.jetbrains.kotlinx.dataframe.api.format
import org.jetbrains.kotlinx.dataframe.api.with
import org.jetbrains.kotlinx.dataframe.jupyter.SerializationKeys.DATA
import org.jetbrains.kotlinx.dataframe.jupyter.SerializationKeys.KOTLIN_DATAFRAME
import org.jetbrains.kotlinx.dataframe.jupyter.SerializationKeys.METADATA
Expand Down Expand Up @@ -607,6 +613,21 @@ class RenderingTests : JupyterReplTestCase() {
json.extractColumn<String>(4, "mixed") shouldBe "1"
}

// Issue #1546
@Test
fun `hasFormattedFrame false positive`() {
val df = dataFrameOf(
"a" to columnOf(1, 2, 3, null),
"b" to DataColumn.createByInference("", listOf(null, null, null, null)),
"c" to columnOf(7, 3, 2, 65),
)

df.hasFormattedColumns() shouldBe false

val formatted = dataFrameOf("a" to columnOf(df.format { "c"() }.with { background(black) }))
formatted.hasFormattedColumns() shouldBe true
}

companion object {
/**
* Set the system property for the IDE version needed for specific serialization testing purposes.
Expand Down