-
Notifications
You must be signed in to change notification settings - Fork 76
Documentation and tests for the first and firstOrNull functions
#1547
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,6 +18,8 @@ import org.jetbrains.kotlinx.dataframe.columns.values | |
| import org.jetbrains.kotlinx.dataframe.documentation.DslGrammarTemplateColumnsSelectionDsl.DslGrammarTemplate | ||
| import org.jetbrains.kotlinx.dataframe.documentation.Indent | ||
| import org.jetbrains.kotlinx.dataframe.documentation.LineBreak | ||
| import org.jetbrains.kotlinx.dataframe.documentation.RowFilterDescription | ||
| import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns | ||
| import org.jetbrains.kotlinx.dataframe.impl.columns.TransformableColumnSet | ||
| import org.jetbrains.kotlinx.dataframe.impl.columns.singleOrNullWithTransformerImpl | ||
| import org.jetbrains.kotlinx.dataframe.impl.columns.transform | ||
|
|
@@ -27,32 +29,141 @@ import kotlin.reflect.KProperty | |
|
|
||
| // region DataColumn | ||
|
|
||
| /** | ||
| * Returns the first value in this [DataColumn]. | ||
| * | ||
| * @param T The type of the values in the [DataColumn]. | ||
| * | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please, add here and in all other places "See also" section with related operations. For example |
||
| * @throws [IndexOutOfBoundsException] if the [DataColumn] is empty. | ||
| */ | ||
| public fun <T> DataColumn<T>.first(): T = get(0) | ||
|
|
||
| /** | ||
| * Returns the first value in this [DataColumn]. If the [DataColumn] is empty, returns `null`. | ||
| * | ||
| * @param T The type of the values in the [DataColumn]. | ||
| */ | ||
| public fun <T> DataColumn<T>.firstOrNull(): T? = if (size > 0) first() else null | ||
|
|
||
| /** | ||
| * Returns the first value in this [DataColumn] that matches the given [predicate]. | ||
| * | ||
| * ### Example | ||
| * ```kotlin | ||
| * // Select from the column "age" the first value where the age is greater than 17 | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. "select" is confusing, as we also have the
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same applies below |
||
| * df.age.first { it > 17 } | ||
| * ``` | ||
| * | ||
| * @param T The type of the values in the [DataColumn]. | ||
| * @param predicate A lambda expression used to select a value | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. *the first value |
||
| * that satisfies a condition specified in this expression. | ||
| * This predicate takes a value from the [DataColumn] as an input | ||
| * and returns `true` if the value satisfies the condition or `false` otherwise. | ||
| * | ||
| * @throws [NoSuchElementException] if the [DataColumn] contains no element matching the [predicate] | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Helpful! We don't add these enough. Though I would add "@see firstOrNull" somewhere around here so people will know how to avoid this exception
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. holds for the other functions as well |
||
| * (including the case when the [DataColumn] is empty). | ||
| */ | ||
| public fun <T> DataColumn<T>.first(predicate: (T) -> Boolean): T = values.first(predicate) | ||
|
|
||
| /** | ||
| * Returns the first value in this [DataColumn] that matches the given [predicate]. | ||
| * Returns `null` if the [DataColumn] contains no element matching the [predicate] | ||
| * (including the case when the [DataColumn] is empty). | ||
| * | ||
| * ### Example | ||
| * ```kotlin | ||
| * // Select from the column "age" the first value where the age is greater than 17, | ||
| * // or null if there is no such value | ||
| * df.age.firstOrNull { it > 17 } | ||
| * ``` | ||
| * | ||
| * @param T The type of the values in the [DataColumn]. | ||
| * @param predicate A lambda expression used to select a value | ||
| * that satisfies a condition specified in this expression. | ||
| * This predicate takes a value from the [DataColumn] as an input | ||
| * and returns `true` if the value satisfies the condition or `false` otherwise. | ||
| */ | ||
| public fun <T> DataColumn<T>.firstOrNull(predicate: (T) -> Boolean): T? = values.firstOrNull(predicate) | ||
|
|
||
| // endregion | ||
|
|
||
| // region DataFrame | ||
|
|
||
| /** | ||
| * Returns the first row in this [DataFrame]. | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you could link to |
||
| * | ||
| * @param T The type of the [DataFrame]. | ||
| * | ||
| * @throws NoSuchElementException if the [DataFrame] contains no rows. | ||
| */ | ||
| public fun <T> DataFrame<T>.first(): DataRow<T> { | ||
| if (nrow == 0) { | ||
| throw NoSuchElementException("DataFrame has no rows. Use `firstOrNull`.") | ||
| } | ||
| return get(0) | ||
| } | ||
|
|
||
| /** | ||
| * Returns the first row in this [DataFrame]. If the [DataFrame] does not contain any rows, returns `null`. | ||
| * | ||
| * @param T The type of the [DataFrame]. | ||
| */ | ||
| public fun <T> DataFrame<T>.firstOrNull(): DataRow<T>? = if (nrow > 0) first() else null | ||
|
|
||
| /** | ||
| * Returns the first row in this [DataFrame] that satisfies the given [predicate]. | ||
| * | ||
| * {@include [RowFilterDescription]} | ||
| * | ||
| * @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention] | ||
| * | ||
| * ### Example | ||
| * ```kotlin | ||
| * // Select the first row where the value in the "age" column is greater than 17 | ||
| * // and the "name/firstName" column starts with 'A' | ||
| * df.first { age > 17 && name.firstName.startsWith("A") } | ||
| * ``` | ||
| * | ||
| * @param T The type of the [DataFrame]. | ||
| * @param predicate A lambda expression used to select a value | ||
| * that satisfies a condition specified in this expression. | ||
| * This predicate takes a value from the [DataFrame] as an input | ||
| * and returns `true` if the value satisfies the condition or `false` otherwise. | ||
| * | ||
| * @return A [DataRow] containing the first row that matches the given [predicate]. | ||
| * | ||
| * @throws [NoSuchElementException] if the [DataFrame] contains no rows matching the [predicate]. | ||
| */ | ||
| public inline fun <T> DataFrame<T>.first(predicate: RowFilter<T>): DataRow<T> = | ||
| rows().first { | ||
| predicate(it, it) | ||
| } | ||
|
|
||
| /** | ||
| * Returns the first row in this [DataFrame] that satisfies the given [predicate]. | ||
| * Returns `null` if the [DataFrame] contains no rows matching the [predicate] | ||
| * (including the case when the [DataFrame] is empty). | ||
| * | ||
| * {@include [RowFilterDescription]} | ||
| * | ||
| * @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention] | ||
| * | ||
| * ### Example | ||
| * ```kotlin | ||
| * // Select the first row where the value in the "age" column is greater than 17 | ||
| * // and the "name/firstName" column starts with 'A' | ||
| * df.firstOrNull { age > 17 && name.firstName.startsWith("A") } | ||
| * ``` | ||
| * | ||
| * @param T The type of the [DataFrame]. | ||
| * @param predicate A lambda expression used to select a value | ||
| * that satisfies a condition specified in this expression. | ||
| * This predicate takes a value from the [DataFrame] as an input | ||
| * and returns `true` if the value satisfies the condition or `false` otherwise. | ||
| * | ||
| * @return A [DataRow] containing the first row that matches the given [predicate], | ||
| * or `null` if the [DataFrame] contains no rows matching the [predicate] | ||
| */ | ||
| public inline fun <T> DataFrame<T>.firstOrNull(predicate: RowFilter<T>): DataRow<T>? = | ||
| rows().firstOrNull { | ||
| predicate(it, it) | ||
|
|
@@ -62,26 +173,209 @@ public inline fun <T> DataFrame<T>.firstOrNull(predicate: RowFilter<T>): DataRow | |
|
|
||
| // region GroupBy | ||
|
|
||
| /** | ||
| * Selects the first row from each group of the given [GroupBy] | ||
| * and returns a [ReducedGroupBy] containing these rows | ||
| * (one row per group, each row is the first row in its group). | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. or null if group is empty
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have faced an issue in this case that might be unexpected behavior. I started with But now I have tried val grouped = df.groupBy { age }
grouped.updateGroups {
if (it == grouped.groups[0]) {
it.take(0)
} else it
}.first()to make the first group empty. And applying This problem does not occur if every group has at least one row, or if I remove the column grouped.updateGroups {
val new = it.remove { age }
if (it == grouped.groups[0]) {
new.take(0)
} else new
}.first()We get But is it expected behavior that we get such an error about conflicting columns? Or maybe I am just obtaining an empty group incorrectly. The val df = dataFrameOf(
"name" to columnOf("Alice", "Bob", "Charlie"),
"age" to columnOf(15, 20, 25),
)Or, to use a bit more natural example, we can make a I am reporting this just in case it is a not known issue :)
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Your first issue can also be reproduced in notebook by df.groupBy { age }.updateGroups { it }.first()I'm not entirely sure why..
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah!
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can be reproduced outside notebooks with |
||
| * | ||
| * ### Example | ||
| * ```kotlin | ||
| * // Select the first employee from each group formed by the job title | ||
| * employees.groupBy { jobTitle }.first() | ||
| * ``` | ||
| * | ||
| * @param T The type of the values in the [GroupBy]. | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think these lines are redundant because they are always infered |
||
| * @param G The type of the groups in the [GroupBy]. | ||
| * | ||
| * @return A [ReducedGroupBy] containing the first row from each group. | ||
| */ | ||
| @Interpretable("GroupByReducePredicate") | ||
| public fun <T, G> GroupBy<T, G>.first(): ReducedGroupBy<T, G> = reduce { firstOrNull() } | ||
|
|
||
| /** | ||
| * Selects from each group of the given [GroupBy] the first row satisfying the given [predicate], | ||
| * and returns a [ReducedGroupBy] containing these rows (one row per group, | ||
| * each row is the first row in its group that satisfies the [predicate]). | ||
| * | ||
| * If the group in [GroupBy] contains no matching rows, | ||
| * the corresponding row in [ReducedGroupBy] will contain `null` values for all columns in the group. | ||
| * | ||
| * {@include [RowFilterDescription]} | ||
| * | ||
| * @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention] | ||
| * | ||
| * ### Example | ||
| * ```kotlin | ||
| * // Select the first employee older than 25 from each group formed by the job title | ||
| * employees.groupBy { jobTitle }.first { age > 25 } | ||
| * ``` | ||
| * | ||
| * @param T The type of the values in the [GroupBy]. | ||
| * @param G The type of the groups in the [GroupBy]. | ||
| * @param predicate A lambda expression used to select a value | ||
| * that satisfies a condition specified in this expression. | ||
| * This predicate takes a value from the [GroupBy] as an input | ||
| * and returns `true` if the value satisfies the condition or `false` otherwise. | ||
| * | ||
| * @return A [ReducedGroupBy] containing the first row matching the [predicate] | ||
| * (or a row with `null` values, except values in the column with the grouping key), from each group. | ||
| */ | ||
| @Interpretable("GroupByReducePredicate") | ||
| public fun <T, G> GroupBy<T, G>.first(predicate: RowFilter<G>): ReducedGroupBy<T, G> = reduce { firstOrNull(predicate) } | ||
|
|
||
| // endregion | ||
|
|
||
| // region Pivot | ||
|
|
||
| /** | ||
| * Reduces this [Pivot] by selecting the first row from each group. | ||
| * | ||
| * Returns a [ReducedPivot] where: | ||
| * - each column corresponds to a [pivot] group — if multiple pivot keys were used, | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think text explanations of pivot make it more scary than it is. For |
||
| * the result will contain column groups for each pivot key, with columns inside | ||
| * corresponding to the values of that key; | ||
| * - each value contains the first row from that group. | ||
| * | ||
| * The original [Pivot] column structure is preserved. | ||
| * If the [Pivot] was created using multiple or nested keys | ||
| * (e.g., via [and][PivotDsl.and] or [then][PivotDsl.then]), | ||
| * the structure remains unchanged — only the contents of each group | ||
| * are replaced with the first row from that group. | ||
| * | ||
| * Equivalent to `reduce { firstOrNull() }`. | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. reduce is internal function, people won't be able to use it like this |
||
| * | ||
| * See also: | ||
| * - [pivot]; | ||
| * - common [reduce][Pivot.reduce]. | ||
| * | ||
| * ### Example | ||
| * ```kotlin | ||
| * // Select the first row for each city. | ||
| * // Returns a ReducedPivot with one column per city and the first row from the group in each column. | ||
| * df.pivot { city }.first() | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please see if you can come up with representative example. Like, in what situation you'd use this function? What df typically it will be and what ideas one can draw from the result? Will be good if example can convey this |
||
| * ``` | ||
| * | ||
| * @return A [ReducedPivot] containing in each column the first row from the corresponding group. | ||
| */ | ||
| public fun <T> Pivot<T>.first(): ReducedPivot<T> = reduce { firstOrNull() } | ||
|
|
||
| /** | ||
| * Reduces this [Pivot] by selecting from each group the first row satisfying the given [predicate]. | ||
| * | ||
| * Returns a [ReducedPivot] where: | ||
| * - each column corresponds to a [pivot] group — if multiple pivot keys were used, | ||
| * the result will contain column groups for each pivot key, with columns inside | ||
| * corresponding to the values of that key; | ||
| * - each value contains the first row from that group that satisfies the [predicate], | ||
| * or a row with `null` values if no rows in this group match the [predicate]. | ||
| * | ||
| * The original [Pivot] column structure is preserved. | ||
| * If the [Pivot] was created using multiple or nested keys | ||
| * (e.g., via [and][PivotDsl.and] or [then][PivotDsl.then]), | ||
| * the structure remains unchanged — only the contents of each group | ||
| * are replaced with the first row from that group that satisfies the [predicate]. | ||
| * | ||
| * Equivalent to `reduce { firstOrNull(predicate) }`. | ||
| * | ||
| * See also: | ||
| * - [pivot]; | ||
| * - common [reduce][Pivot.reduce]. | ||
| * | ||
| * {@include [RowFilterDescription]} | ||
| * | ||
| * @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention] | ||
| * | ||
| * ### Example | ||
| * ```kotlin | ||
| * // Select the first row for each city where the population is greater than 100 000. | ||
| * df.pivot { city }.first { population > 100000 } | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 100_000 is better readable ;P (and compiles!)
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. or |
||
| * ``` | ||
| * | ||
| * @param predicate A lambda expression used to select a value | ||
| * that satisfies a condition specified in this expression. | ||
| * | ||
| * @return A [ReducedPivot] containing in each column the first row | ||
| * that satisfies the [predicate], from the corresponding group (or a row with `null` values) | ||
| */ | ||
| public fun <T> Pivot<T>.first(predicate: RowFilter<T>): ReducedPivot<T> = reduce { firstOrNull(predicate) } | ||
|
|
||
| // endregion | ||
|
|
||
| // region PivotGroupBy | ||
|
|
||
| /** | ||
| * Reduces this [PivotGroupBy] by selecting the first row from each combined [pivot] + [groupBy] group. | ||
| * | ||
| * Returns a [ReducedPivotGroupBy] containing the following matrix: | ||
| * - one row per [groupBy] key (or keys set); | ||
| * - one column group per [pivot] key, where each inner column corresponds to a value of that key; | ||
| * - each combination of a [groupBy] key and a [pivot] key contains either the first row of the corresponding | ||
| * dataframe formed by this pivot–group pair, or a row with `null` values if this dataframe is empty. | ||
| * | ||
| * The original [PivotGroupBy] column structure is preserved. | ||
| * If the [PivotGroupBy] was created using multiple or nested keys | ||
| * (e.g., via [and][PivotDsl.and] or [then][PivotDsl.then]), | ||
| * the result will contain nested column groups reflecting that key structure, | ||
| * with each group containing columns for the values of the corresponding key. | ||
| * | ||
| * Equivalent to `reduce { firstOrNull() }`. | ||
| * | ||
| * See also: | ||
| * - [pivot], [Pivot.groupBy] and [GroupBy.pivot]; | ||
| * - common [reduce][PivotGroupBy.reduce]. | ||
| * | ||
| * ### Example | ||
| * ```kotlin | ||
| * // Select the first student from each combination of faculty and enrollment year. | ||
| * students.pivot { faculty }.groupBy { enrollmentYear }.first() | ||
| * ``` | ||
| * | ||
| * @return A [ReducedPivotGroupBy] containing in each combination of a [groupBy] key and a [pivot] key either | ||
| * the first row of the corresponding dataframe formed by this pivot–group pair, | ||
| * or a row with `null` values if this dataframe is empty. | ||
| */ | ||
| public fun <T> PivotGroupBy<T>.first(): ReducedPivotGroupBy<T> = reduce { firstOrNull() } | ||
|
|
||
| /** | ||
| * Reduces this [PivotGroupBy] by selecting from each combined [pivot] + [groupBy] group | ||
| * the first row satisfying the given [predicate]. | ||
| * | ||
| * Returns a [ReducedPivotGroupBy] containing the following matrix: | ||
| * - one row per [groupBy] key (or keys set); | ||
| * - one column group per [pivot] key, where each inner column corresponds to a value of that key; | ||
| * - each combination of a [groupBy] key and a [pivot] key contains either the first matching the [predicate] row | ||
| * of the corresponding dataframe formed by this pivot–group pair, | ||
| * or a row with `null` values if this dataframe does not contain any rows matching the [predicate]. | ||
| * | ||
| * The original [PivotGroupBy] column structure is preserved. | ||
| * If the [PivotGroupBy] was created using multiple or nested keys | ||
| * (e.g., via [and][PivotDsl.and] or [then][PivotDsl.then]), | ||
| * the result will contain nested column groups reflecting that key structure, | ||
| * with each group containing columns for the values of the corresponding key. | ||
| * | ||
| * Equivalent to `reduce { firstOrNull(predicate) }`. | ||
| * | ||
| * See also: | ||
| * - [pivot], [Pivot.groupBy] and [GroupBy.pivot]; | ||
| * - common [reduce][PivotGroupBy.reduce]. | ||
| * | ||
| * {@include [RowFilterDescription]} | ||
| * | ||
| * @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention] | ||
| * | ||
| * ### Example | ||
| * ```kotlin | ||
| * // From each combination of faculty and enrollment year select the first student older than 21. | ||
| * students.pivot { faculty }.groupBy { enrollmentYear }.first { age > 21 } | ||
| * ``` | ||
| * | ||
| * @param predicate A lambda expression used to select a value | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh you can actually also link to |
||
| * that satisfies a condition specified in this expression. | ||
| * | ||
| * @return A [ReducedPivotGroupBy] containing in each combination of a [groupBy] key and a [pivot] key either | ||
| * the first matching the [predicate] row of the corresponding dataframe formed by this pivot–group pair, | ||
| * or a row with `null` values if this dataframe does not contain any rows matching the [predicate]. | ||
| */ | ||
| public fun <T> PivotGroupBy<T>.first(predicate: RowFilter<T>): ReducedPivotGroupBy<T> = | ||
| reduce { firstOrNull(predicate) } | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Line seems redundant to me because param T is always inferred from DataColumn
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, I'd omit (everywhere) a
@paramwith type parameter, and add@return!