From 59a96309cc2c3ce60458fef801ae3229de94ee93 Mon Sep 17 00:00:00 2001 From: "andrei.kislitsyn" Date: Mon, 3 Nov 2025 19:10:56 +0400 Subject: [PATCH 1/6] pivot kdocs init --- .../kotlinx/dataframe/api/groupBy.kt | 47 +++- .../jetbrains/kotlinx/dataframe/api/pivot.kt | 201 ++++++++++++++++++ .../documentation/DocumentationUrls.kt | 8 +- 3 files changed, 245 insertions(+), 11 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt index 1db11181a7..600abe30a3 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt @@ -41,13 +41,14 @@ import kotlin.reflect.KProperty * defines a group consisting of all rows where the column(s) contain that value combination. * * Returns a [GroupBy] — a dataframe-like structure that contains all unique combinations of key values - * along with the corresponding groups of rows (each represented as a [DataFrame]). + * along with the corresponding groups of rows (each represented as a [DataFrame]) as rows. * * A [GroupBy] can then be: * * [transformed][Transformation] into a new [GroupBy]; * * [reduced][Reducing] into a [DataFrame], where each group is collapsed into a single representative row; * * [aggregated][Aggregation] into a [DataFrame], where each group is transformed into one or more rows of derived values; - * * [pivoted][Pivoting] into a [PivotGroupBy] structure, which combines [pivot] and [groupBy] operations. + * * [pivoted][Pivoting] into a [PivotGroupBy] structure, which combines [pivot] and [groupBy] operations + * and then reduced or aggregated into a [DataFrame]. * * Grouping keys can also be created inline: * ```kotlin @@ -66,6 +67,8 @@ import kotlin.reflect.KProperty * * Don't confuse this with [group], which groups column into * [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup]. + * + * See also [pivot][DataFrame.pivot] that groups rows of [DataFrame] vertically. */ internal interface GroupByDocs { /** @@ -183,17 +186,41 @@ internal interface GroupByDocs { /** * ### [GroupBy] aggregation statistics - * * [count][Grouped.count] - * * [max][Grouped.max]/[maxOf][Grouped.maxOf]/[maxFor][Grouped.maxFor] - * * [min][Grouped.min]/[minOf][Grouped.minOf]/[minFor][Grouped.minFor] - * * [sum][Grouped.sum]/[sumOf][Grouped.sumOf]/[sumFor][Grouped.sumFor] - * * [mean][Grouped.mean]/[meanOf][Grouped.meanOf]/[meanFor][Grouped.meanFor] - * * [std][Grouped.std]/[stdOf][Grouped.stdOf]/[stdFor][Grouped.stdFor] - * * [median][Grouped.median]/[medianOf][Grouped.medianOf]/[medianFor][Grouped.medianFor] - * * [percentile][Grouped.percentile]/[percentileOf][Grouped.percentileOf]/[percentileFor][Grouped.percentileFor] + * + * Provides predefined shortcuts for the most common statistical aggregation operations + * that can be applied to each group within a [GroupBy]. + * + * Each function computes a statistic across the rows of a group and returns the result as + * a new column (or several columns) in the resulting [DataFrame]. + * + * * [count][Grouped.count] — calculate the number of rows in each group; + * * [max][Grouped.max] / [maxOf][Grouped.maxOf] / [maxFor][Grouped.maxFor] — + * calculate the maximum of all values on the selected columns / by a row expression / + * for each of the selected columns within each group; + * * [min][Grouped.min] / [minOf][Grouped.minOf] / [minFor][Grouped.minFor] — + * calculate the minimum of all values on the selected columns / by a row expression / + * for each of the selected columns within each group; + * * [sum][Grouped.sum] / [sumOf][Grouped.sumOf] / [sumFor][Grouped.sumFor] — + * calculate the sum of all values on the selected columns / by a row expression / + * for each of the selected columns within each group; + * * [mean][Grouped.mean] / [meanOf][Grouped.meanOf] / [meanFor][Grouped.meanFor] — + * calculate the mean (average) of all values on the selected columns / by a row expression / + * for each of the selected columns within each group; + * * [std][Grouped.std] / [stdOf][Grouped.stdOf] / [stdFor][Grouped.stdFor] — + * calculate the standard deviation of all values on the selected columns / by a row expression / + * for each of the selected columns within each group; + * * [median][Grouped.median] / [medianOf][Grouped.medianOf] / [medianFor][Grouped.medianFor] — + * calculate the median of all values on the selected columns / by a row expression / + * for each of the selected columns within each group; + * * [percentile][Grouped.percentile] / [percentileOf][Grouped.percentileOf] / [percentileFor][Grouped.percentileFor] — + * calculate a specified percentile of all values on the selected columns / by a row expression / + * for each of the selected columns within each group. + * + * For more information: {@include [DocumentationUrls.GroupByStatistics]} */ interface AggregationStatistics + /** * ### [GroupBy] transformations * diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt index f332df563c..6ba9c601e0 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt @@ -4,15 +4,22 @@ import org.jetbrains.kotlinx.dataframe.AnyColumnReference import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.RowExpression +import org.jetbrains.kotlinx.dataframe.RowFilter import org.jetbrains.kotlinx.dataframe.Selector import org.jetbrains.kotlinx.dataframe.aggregation.Aggregatable import org.jetbrains.kotlinx.dataframe.aggregation.AggregateBody +import org.jetbrains.kotlinx.dataframe.aggregation.AggregateDsl import org.jetbrains.kotlinx.dataframe.aggregation.AggregateGroupedDsl import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload import org.jetbrains.kotlinx.dataframe.columns.ColumnSet import org.jetbrains.kotlinx.dataframe.columns.ColumnsResolver import org.jetbrains.kotlinx.dataframe.columns.toColumnSet import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls +import org.jetbrains.kotlinx.dataframe.documentation.DslGrammarLink +import org.jetbrains.kotlinx.dataframe.documentation.Indent +import org.jetbrains.kotlinx.dataframe.documentation.LineBreak +import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns import org.jetbrains.kotlinx.dataframe.impl.aggregation.PivotGroupByImpl import org.jetbrains.kotlinx.dataframe.impl.aggregation.PivotImpl import org.jetbrains.kotlinx.dataframe.impl.aggregation.PivotInAggregateImpl @@ -20,6 +27,200 @@ import org.jetbrains.kotlinx.dataframe.impl.api.PivotChainColumnSet import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API import kotlin.reflect.KProperty +/** + * Splits the rows of this [DataFrame] and groups them horizontally + * into new columns based on values from one or several provided [\columns] of the original [DataFrame]. + * + * Returns a [Pivot] — a dataframe-like structure that contains all unique combinations of key values + * as columns (or [ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup]s) with a single row + * with the corresponding groups for each key combination (each represented as a [DataFrame]). + * + * Works like [DataFrame.groupBy] but groups rows horizontally. + * + * A [Pivot] can then be: + * * [reduced][Reducing] into a [DataRow], where each group is collapsed into a single representative row; + * * [aggregated][Aggregation] into a [DataRow], where each group is transformed into a new row of derived values; + * * [grouped][Grouping] into a [PivotGroupBy] structure, which combines [pivot] and [groupBy] operations + * and then reduced or aggregated into a [DataFrame]. + * + * For more information: {@include [DocumentationUrls.Pivot]} + */ +internal interface PivotDocs { + + /** + * ## [pivot][pivot] Operation Grammar + * {@include [LineBreak]} + * {@include [DslGrammarLink]} + * {@include [LineBreak]} + * + * ### Create [Pivot] + * + * [**`pivot`**][pivot]**`(`**`inward: `[`Boolean`][Boolean]**` = true) { `**`pivotColumns: `[`PivotColumnsSelector`][PivotColumnsSelector]**` }`** + * + * ### Reduce [Pivot] into [DataRow] + * + * [Pivot][Pivot]`.`[**`minBy`**][Pivot.minBy]**` { `**`column: `[`RowExpression`][RowExpression]**` }`** + * + * {@include [Indent]} + * `| `__`.`__[**`maxBy`**][Pivot.maxBy]**` { `**`column: `[`RowExpression`][RowExpression]**` }`** + * + * {@include [Indent]} + * `| `__`.`__[**`first`**][Pivot.first]` \[ `**` { `**`rowCondition: `[`RowFilter`][RowFilter]**` } `**`]` + * + * {@include [Indent]} + * `| `__`.`__[**`last`**][Pivot.last]` \[ `**`{ `**`rowCondition: `[`RowFilter`][RowFilter]**` } `**`]` + * + * {@include [Indent]} + * `| `__`.`__[**`medianBy`**][Pivot.medianBy]**` { `**`column: `[`RowExpression`][RowExpression]**` }`** + * + * {@include [Indent]} + * `| `__`.`__[**`percentileBy`**][Pivot.percentileBy]**`(`**`percentile: `[`Double`][Double]**`) { `**`column: `[`RowExpression`][RowExpression]**` }`** + * + * {@include [Indent]} + * __`.`__[**`with`**][Pivot.with]**` { `**`rowExpression: `[`RowExpression`][RowExpression]**` }`** + * + * {@include [Indent]} + * `| `__`.`__[**`values`**][Pivot.values]**` { `**`valueColumns: `[`ColumnsSelector`][ColumnsSelector]**` }`** + * + * ### Aggregate [Pivot] into [DataRow] + * + * [Pivot][Pivot]`.`[**`count`**][Pivot.count]**`() `** + * + * {@include [Indent]} + * `| `__`.`__[**`frames`**][Pivot.frames]**`() `** + * + * {@include [Indent]} + * `| `__`.`__[**`with`**][Pivot.with]**` { `**`rowExpression: `[`RowExpression`][RowExpression]**` }`** + * + * {@include [Indent]} + * `| `__`.`__[**`values`**][Pivot.values]**` { `**`valueColumns: `[`ColumnsSelector`][ColumnsSelector]**` }`** + * + * {@include [Indent]} + * `| `__`.`__[**`aggregate`**][Pivot.aggregate]**` { `**`aggregations: `[`AggregateDsl`][AggregateDsl]**` }`** + * + * {@include [Indent]} + * `| `__`.`__[][PivotDocs.AggregationStatistics] + * + * ### Convert [Pivot] into [PivotGroupBy] and then reduce / aggregate + * + * {@include [Indent]} + * [Pivot][Pivot]`.`[**`groupBy`**][Pivot.groupBy]**` { `**`indexColumns: `[`ColumnsSelector`][ColumnsSelector]**` }`** + * + * {@include [Indent]} + * `| `__`.`__[**`groupByOther`**][Pivot.groupByOther]**`() `** + * + * {@include [Indent]} + * ` \[ `__`.`__[**`default`**][PivotGroupBy.default]**`(`**`defaultValue`**`) `**`]` + * + * {@include [Indent]} + * `| `__`.`__[**`minBy`**][PivotGroupBy.minBy]**` { `**`column: `[`ColumnSelector`][ColumnSelector]**` }`** + * + * {@include [Indent]} + * `| `__`.`__[**`maxBy`**][PivotGroupBy.maxBy]**` { `**`column: `[`ColumnSelector`][ColumnSelector]**` }`** + * + * {@include [Indent]} + * `| `__`.`__[**`first`**][PivotGroupBy.first]` \[ `**` { `**`rowCondition: `[`RowFilter`][RowFilter]**` } `**`]` + * + * {@include [Indent]} + * `| `__`.`__[**`last`**][PivotGroupBy.last]` \[ `**`{ `**`rowCondition: `[`RowFilter`][RowFilter]**` } `**`]` + * + * {@include [Indent]} + * `| `__`.`__[**`medianBy`**][PivotGroupBy.medianBy]**` { `**`column: `[`ColumnSelector`][ColumnSelector]**` }`** + * + * {@include [Indent]} + * `| `__`.`__[**`percentileBy`**][PivotGroupBy.percentileBy]**`(`**`percentile: `[`Double`][Double]**`) { `**`column: `[`ColumnSelector`][ColumnSelector]**` }`** + * + * {@include [Indent]} + * `| `__`.`__[**`with`**][PivotGroupBy.with]**` { `**`rowExpression: `[`RowExpression`][RowExpression]**` }`** + * + * {@include [Indent]} + * `| `__`.`__[**`values`**][PivotGroupBy.values]**` { `**`valueColumns: `[`ColumnsSelector`][ColumnsSelector]**` }`** + * + * {@include [Indent]} + * `| `__`.`__[**`count`**][PivotGroupBy.count]**`() `** + * + * {@include [Indent]} + * `| `__`.`__[**`matches`**][PivotGroupBy.matches]**` { `**`predicate: `[`RowFilter`][RowFilter]**` }`** + * + * {@include [Indent]} + * `| `__`.`__[**`frames`**][PivotGroupBy.frames]**`() `** + * + * {@include [Indent]} + * `| `__`.`__[**`aggregate`**][PivotGroupBy.aggregate]**` { `**`aggregations: `[`AggregateDsl`][AggregateDsl]**` }`** + * + * {@include [Indent]} + * `| `__`.`__[][PivotDocs.AggregationStatistics] + */ + interface Grammar + + /** + * ### [Pivot] common description + * + * A pivot reorganizes data **horizontally**: + * * pivot key values → **new columns**; + * * index columns (from [groupBy][Pivot.groupBy] / [groupByOther][Pivot.groupByOther]) → **rows**; + * * each cell is produced either by a **reducer** (single selected row → single value) + * or an **aggregator** (all rows in the cell → one or more values). + * + * If a [default][PivotGroupBy.default] value is set, missing cells are filled with it. + */ + interface CommonDescription + + /** + * ### [Pivot] reducing + * + * Produces a [DataFrame] with **exactly one value per pivot cell**. + * + * Available reducers (both for [Pivot] and [PivotGroupBy]): + * * [minBy] / [maxBy] — select the row with the minimum/maximum value of a column; + * * [first] / [last] — take the first/last row (optionally with a [RowFilter]); + * * [medianBy] / [percentileBy] — select the row at median / a given percentile; + * * [with] — compute the cell value via a [RowExpression] from the selected row; + * * [values] — copy one or more columns from the selected row into cells. + */ + interface Reducing + + /** + * ### [Pivot] aggregation + * + * Produces a [DataFrame] with **one or more values per pivot cell** by combining **all rows** in the cell. + * + * Available aggregators (both for [Pivot] and [PivotGroupBy]): + * * [count] — number of rows in the cell; + * * [matches] — number of rows satisfying a predicate; + * * [frames] — collect rows as a [frame column][org.jetbrains.kotlinx.dataframe.columns.FrameColumn]; + * * [with] — compute a value using a [RowExpression] over all rows in the cell; + * * [values] — project one or more columns as aggregated cell values; + * * [aggregate] — custom multi-aggregation via [AggregateDsl]; + * * [Various aggregation statistics][PivotDocs.AggregationStatistics] — predefined shortcuts + * such as sum/mean/median/std/percentile, etc. + */ + interface Aggregation + + + interface Grouping + + /** + * ### [Pivot] aggregation statistics + * * [count][Pivot.count] + * * [max][Pivot.max]/[maxOf][Pivot.maxOf]/[maxFor][Pivot.maxFor] + * * [min][Pivot.min]/[minOf][Pivot.minOf]/[minFor][Pivot.minFor] + * * [sum][Pivot.sum]/[sumOf][Pivot.sumOf]/[sumFor][Pivot.sumFor] + * * [mean][Pivot.mean]/[meanOf][Pivot.meanOf]/[meanFor][Pivot.meanFor] + * * [std][Pivot.std]/[stdOf][Pivot.stdOf]/[stdFor][Pivot.stdFor] + * * [median][Pivot.median]/[medianOf][Pivot.medianOf]/[medianFor][Pivot.medianFor] + * * [percentile][Pivot.percentile]/[percentileOf][Pivot.percentileOf]/[percentileFor][Pivot.percentileFor] + */ + interface AggregationStatistics + + /** + * {@comment Version of SelectingColumns with correctly filled examples for pivot keys and index columns} + * @include [SelectingColumns] {@include [SetPivotOperationArg]} + */ + interface PivotSelectingOptions +} + + public interface PivotDsl : ColumnsSelectionDsl { public infix fun ColumnsResolver.then(other: ColumnsResolver): ColumnSet = diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt index 12a74b9bf1..6e8ae09929 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt @@ -153,6 +153,12 @@ internal interface DocumentationUrls { /** [See "`GroupBy` Aggregation" on the documentation website.]({@include [Url]}/groupby.html#aggregation) */ interface GroupByAggregation - /** [See "`pivot` + `groupBy`" on the documentation website.]({@include [Url]}/groupby.html#pivot-groupby) */ + /** [See "`groupBy` statistics" on the documentation website.]({@include [Url]}/summarystatistics.html#groupby-statistics) */ + interface GroupByStatistics + + /** [See "`pivot` + `groupBy`" on the documentation website.]({@include [Url]}/pivot.html#pivot-groupby) */ interface PivotGroupBy + + /** [See `pivot` on the documentation website.]({@include [Url]}/pivot.html) */ + interface Pivot } From 3d40ee3511bb38b50666532d0095eac162229d02 Mon Sep 17 00:00:00 2001 From: "andrei.kislitsyn" Date: Wed, 5 Nov 2025 14:24:21 +0400 Subject: [PATCH 2/6] pivot kdocs fixes --- .../kotlinx/dataframe/api/groupBy.kt | 20 +- .../jetbrains/kotlinx/dataframe/api/pivot.kt | 447 ++++++++++++++---- .../documentation/DocumentationUrls.kt | 11 +- .../documentation/SelectingColumns.kt | 2 + 4 files changed, 384 insertions(+), 96 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt index 600abe30a3..f6cc05dd11 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt @@ -163,16 +163,16 @@ internal interface GroupByDocs { * ### Pivot [GroupBy] into [PivotGroupBy] and reduce / aggregate it * * {@include [Indent]} - * `| `__`.`__[**`pivot`**][GroupBy.pivot]**` { `**`columns: `[`ColumnsSelector`][ColumnsSelector]**` }`** + * [GroupBy][GroupBy]`.`[**`pivot`**][GroupBy.pivot]**` { `**`columns: `[`ColumnsSelector`][ColumnsSelector]**` }`** * * {@include [Indent]} * ` \[ `__`.`__[**`default`**][PivotGroupBy.default]**`(`**`defaultValue`**`) `**`]` * * {@include [Indent]} - * `| `__`.`__[][PivotGroupByDocs.Reducing] + * __`.`__[][PivotGroupByDocs.Reducing] * * {@include [Indent]} - * `| `__`.`__[][PivotGroupByDocs.Aggregation] + * `| `__`.`__[][PivotGroupByDocs.Aggregation] * * Check out [PivotGroupBy Grammar][PivotGroupByDocs.Grammar] for more information. */ @@ -262,8 +262,8 @@ internal interface GroupByDocs { * These functions return a [ReducedGroupBy], which can then be transformed into a new [DataFrame] * containing the reduced rows (either original or transformed) using one of the following methods: * * [concat][ReducedGroupBy.concat] — simply concatenates all reduced rows; - * * [values][ReducedGroupBy.values] — creates a [DataFrame] with new rows by transforming each reduced row - * using [ColumnsForAggregateSelectionDsl]; + * * [values][ReducedGroupBy.values] — creates a [DataFrame] containing the values + * from the reduced rows in the selected columns. * * [into][ReducedGroupBy.into] — creates a new column with values computed with [RowExpression] on each row, * or a new [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup] * containing each group reduced to a single row; @@ -289,14 +289,16 @@ internal interface GroupByDocs { * The following aggregation methods are available: * * [concat][GroupBy.concat] — concatenates all rows from all groups into a single [DataFrame], * without preserving grouping keys; + * * [toDataFrame][GroupBy.toDataFrame] — returns this [GroupBy] as [DataFrame] with the grouping keys and + * corresponding groups in [FrameColumn]. * * [concatWithKeys][GroupBy.concatWithKeys] — a variant of [concat][GroupBy.concat] that also includes * grouping keys that were not present in the original [DataFrame]; * * [into][GroupBy.into] — creates a new column containing a list of values computed with a [RowExpression] * for each group, or a new [frame column][org.jetbrains.kotlinx.dataframe.columns.FrameColumn] * containing the groups themselves; - * * [values][ReducedGroupBy.values] — creates a [DataFrame] with new rows produced by transforming - * each group using [ColumnsForAggregateSelectionDsl]; - * * [count][Grouped.count] — returns a [DataFrame] containing the grouping key columns and an additional column + * * [values][Grouped.values] — creates a [DataFrame] containing values collected into a single [List] + * from all rows of each group for the selected columns. + * * [count][Grouped.count] — creates a [DataFrame] containing the grouping key columns and an additional column * with the number of rows in each corresponding group; * * [aggregate][Grouped.aggregate] — performs a set of custom aggregations using [AggregateDsl], * allowing you to compute one or more derived values per group; @@ -375,7 +377,7 @@ public fun DataFrame.groupBy(vararg cols: AnyColumnReference, moveToTop: // endregion /** - * Groups the rows of this [Pivot] into [PivotGroupBy] + * Groups the rows of this [Pivot] groups * based on the values in one or more specified [key columns][\columns]. * * Works like regular [DataFrame.groupBy] on pivot groups. diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt index 6ba9c601e0..fc3eede97f 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt @@ -12,11 +12,15 @@ import org.jetbrains.kotlinx.dataframe.aggregation.AggregateBody import org.jetbrains.kotlinx.dataframe.aggregation.AggregateDsl import org.jetbrains.kotlinx.dataframe.aggregation.AggregateGroupedDsl import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload +import org.jetbrains.kotlinx.dataframe.api.Select.SelectSelectingOptions +import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup import org.jetbrains.kotlinx.dataframe.columns.ColumnSet import org.jetbrains.kotlinx.dataframe.columns.ColumnsResolver +import org.jetbrains.kotlinx.dataframe.columns.FrameColumn import org.jetbrains.kotlinx.dataframe.columns.toColumnSet import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls import org.jetbrains.kotlinx.dataframe.documentation.DslGrammarLink +import org.jetbrains.kotlinx.dataframe.documentation.ExcludeFromSources import org.jetbrains.kotlinx.dataframe.documentation.Indent import org.jetbrains.kotlinx.dataframe.documentation.LineBreak import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns @@ -32,7 +36,7 @@ import kotlin.reflect.KProperty * into new columns based on values from one or several provided [\columns] of the original [DataFrame]. * * Returns a [Pivot] — a dataframe-like structure that contains all unique combinations of key values - * as columns (or [ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup]s) with a single row + * as columns (or [column groups][ColumnGroup] for multiple keys) with a single row * with the corresponding groups for each key combination (each represented as a [DataFrame]). * * Works like [DataFrame.groupBy] but groups rows horizontally. @@ -43,6 +47,12 @@ import kotlin.reflect.KProperty * * [grouped][Grouping] into a [PivotGroupBy] structure, which combines [pivot] and [groupBy] operations * and then reduced or aggregated into a [DataFrame]. * + * Check out [Grammar]. + * + * @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention] + * + * See [Selecting Columns][SelectingColumns]. + * * For more information: {@include [DocumentationUrls.Pivot]} */ internal interface PivotDocs { @@ -101,137 +111,178 @@ internal interface PivotDocs { * {@include [Indent]} * `| `__`.`__[][PivotDocs.AggregationStatistics] * - * ### Convert [Pivot] into [PivotGroupBy] and then reduce / aggregate + * ### Group [Pivot] into [PivotGroupBy] and reduce / aggregate it * - * {@include [Indent]} - * [Pivot][Pivot]`.`[**`groupBy`**][Pivot.groupBy]**` { `**`indexColumns: `[`ColumnsSelector`][ColumnsSelector]**` }`** + * [Pivot][Pivot]`.`[**`groupBy`**][Pivot.groupBy]**` { `**`columns: `[`ColumnsSelector`][ColumnsSelector]**` }`** * * {@include [Indent]} - * `| `__`.`__[**`groupByOther`**][Pivot.groupByOther]**`() `** + * `| `__`.`__[**`groupByOther`**][Pivot.groupByOther]**`()`** * * {@include [Indent]} * ` \[ `__`.`__[**`default`**][PivotGroupBy.default]**`(`**`defaultValue`**`) `**`]` * * {@include [Indent]} - * `| `__`.`__[**`minBy`**][PivotGroupBy.minBy]**` { `**`column: `[`ColumnSelector`][ColumnSelector]**` }`** - * - * {@include [Indent]} - * `| `__`.`__[**`maxBy`**][PivotGroupBy.maxBy]**` { `**`column: `[`ColumnSelector`][ColumnSelector]**` }`** - * - * {@include [Indent]} - * `| `__`.`__[**`first`**][PivotGroupBy.first]` \[ `**` { `**`rowCondition: `[`RowFilter`][RowFilter]**` } `**`]` - * - * {@include [Indent]} - * `| `__`.`__[**`last`**][PivotGroupBy.last]` \[ `**`{ `**`rowCondition: `[`RowFilter`][RowFilter]**` } `**`]` - * - * {@include [Indent]} - * `| `__`.`__[**`medianBy`**][PivotGroupBy.medianBy]**` { `**`column: `[`ColumnSelector`][ColumnSelector]**` }`** - * - * {@include [Indent]} - * `| `__`.`__[**`percentileBy`**][PivotGroupBy.percentileBy]**`(`**`percentile: `[`Double`][Double]**`) { `**`column: `[`ColumnSelector`][ColumnSelector]**` }`** - * - * {@include [Indent]} - * `| `__`.`__[**`with`**][PivotGroupBy.with]**` { `**`rowExpression: `[`RowExpression`][RowExpression]**` }`** + * `| `__`.`__[][PivotGroupByDocs.Reducing] * * {@include [Indent]} - * `| `__`.`__[**`values`**][PivotGroupBy.values]**` { `**`valueColumns: `[`ColumnsSelector`][ColumnsSelector]**` }`** + * `| `__`.`__[][PivotGroupByDocs.Aggregation] * - * {@include [Indent]} - * `| `__`.`__[**`count`**][PivotGroupBy.count]**`() `** - * - * {@include [Indent]} - * `| `__`.`__[**`matches`**][PivotGroupBy.matches]**` { `**`predicate: `[`RowFilter`][RowFilter]**` }`** - * - * {@include [Indent]} - * `| `__`.`__[**`frames`**][PivotGroupBy.frames]**`() `** - * - * {@include [Indent]} - * `| `__`.`__[**`aggregate`**][PivotGroupBy.aggregate]**` { `**`aggregations: `[`AggregateDsl`][AggregateDsl]**` }`** - * - * {@include [Indent]} - * `| `__`.`__[][PivotDocs.AggregationStatistics] + * Check out [PivotGroupBy Grammar][PivotGroupByDocs.Grammar] for more information. */ interface Grammar - /** - * ### [Pivot] common description - * - * A pivot reorganizes data **horizontally**: - * * pivot key values → **new columns**; - * * index columns (from [groupBy][Pivot.groupBy] / [groupByOther][Pivot.groupByOther]) → **rows**; - * * each cell is produced either by a **reducer** (single selected row → single value) - * or an **aggregator** (all rows in the cell → one or more values). - * - * If a [default][PivotGroupBy.default] value is set, missing cells are filled with it. - */ - interface CommonDescription - /** * ### [Pivot] reducing * - * Produces a [DataFrame] with **exactly one value per pivot cell**. - * - * Available reducers (both for [Pivot] and [PivotGroupBy]): - * * [minBy] / [maxBy] — select the row with the minimum/maximum value of a column; - * * [first] / [last] — take the first/last row (optionally with a [RowFilter]); - * * [medianBy] / [percentileBy] — select the row at median / a given percentile; - * * [with] — compute the cell value via a [RowExpression] from the selected row; - * * [values] — copy one or more columns from the selected row into cells. + * Each [Pivot] group can be collapsed into a single row and then concatenated + * into a new [DataRow] with these row values (or their derived representation) + * with [pivot] keys as top-level columns or as [column groups][ColumnGroup]. + * + * Reducing is a specific case of [aggregation][Aggregation]. + * + * First, choose a [Pivot] reducing method: + * * [first][Pivot.first], [last][Pivot.last] — take the first or last row + * (optionally, the first or last one that satisfies a predicate) of each group; + * * [minBy][Pivot.minBy] / [maxBy][Pivot.maxBy] — take the row with the minimum or maximum value + * of the given [RowExpression] evaluated on rows within each group; + * * [medianBy][Pivot.medianBy] / [percentileBy][Pivot.percentileBy] — take the row with + * the median or a specific percentile value of the given [RowExpression] evaluated on rows within each group. + * + * These functions return a [ReducedPivot], which can then be transformed into a new [DataFrame] + * containing a single combined row (either using the original reduced rows or their transformed versions) + * through one of the following methods: + * * [values][ReducedPivot.values] — creates a new row containing the values + * from the reduced rows in the selected columns and produces a [DataRow] of + * these values; + * * [with][ReducedPivot.with] — computes a new value for each reduced row using a [RowExpression], + * and produces a [DataRow] containing these computed values. + * + * Each method returns a new [DataRow] with [pivot] keys as top-level columns + * (or as [column groups][ColumnGroup]) and values composed of the reduced results from each group. + * + * Check out [`Pivot grammar`][Grammar]. */ interface Reducing /** * ### [Pivot] aggregation * - * Produces a [DataFrame] with **one or more values per pivot cell** by combining **all rows** in the cell. - * - * Available aggregators (both for [Pivot] and [PivotGroupBy]): - * * [count] — number of rows in the cell; - * * [matches] — number of rows satisfying a predicate; - * * [frames] — collect rows as a [frame column][org.jetbrains.kotlinx.dataframe.columns.FrameColumn]; - * * [with] — compute a value using a [RowExpression] over all rows in the cell; - * * [values] — project one or more columns as aggregated cell values; - * * [aggregate] — custom multi-aggregation via [AggregateDsl]; - * * [Various aggregation statistics][PivotDocs.AggregationStatistics] — predefined shortcuts - * such as sum/mean/median/std/percentile, etc. + * Each [Pivot] group can be aggregated — that is, transformed into a new value, [DataRow], or [DataFrame] — + * and then concatenated into a single [DataRow] composed of these aggregated results, + * with [pivot] keys as top-level columns or as [column groups][ColumnGroup]. + * + * The following aggregation methods are available: + * * [frames][Pivot.frames] — returns this [Pivot] as a [DataRow] with pivot keys as columns + * (or [column groups][ColumnGroup]) and corresponding groups stored as [FrameColumn]s; + * * [values][Pivot.values] — creates a [DataRow] containing values collected into a single [List] + * from all rows of each group for the selected columns; + * * [count][Pivot.count] — creates a [DataRow] containing the pivot key columns and an additional column + * with the number of rows in each corresponding group; + * * [with][Pivot.with] — creates a [DataRow] containing values computed using a [RowExpression] + * across all rows of each group and collected into a single [List] for every group; + * * [aggregate][Pivot.aggregate] — performs a set of custom aggregations using [AggregateDsl], + * allowing computation of one or more derived values per group; + * * [Various aggregation statistics][AggregationStatistics] — predefined shortcuts + * for common statistical aggregations such as [sum][Pivot.sum], [mean][Pivot.mean], + * [median][Pivot.median], and others. + * + * Each of these methods returns a new [DataRow] with [pivot] keys as top-level columns + * (or as [column groups][ColumnGroup]) and values representing the aggregated results of each group. + * + * Check out [`Pivot grammar`][Grammar]. */ interface Aggregation - + /** + * ### [Pivot] grouping + * + * [Pivot] can be pivoted with [groupBy][Pivot.groupBy] method. It will produce a [PivotGroupBy]. + * + * @include [PivotGroupByDocs.CommonDescription] + */ interface Grouping /** * ### [Pivot] aggregation statistics - * * [count][Pivot.count] - * * [max][Pivot.max]/[maxOf][Pivot.maxOf]/[maxFor][Pivot.maxFor] - * * [min][Pivot.min]/[minOf][Pivot.minOf]/[minFor][Pivot.minFor] - * * [sum][Pivot.sum]/[sumOf][Pivot.sumOf]/[sumFor][Pivot.sumFor] - * * [mean][Pivot.mean]/[meanOf][Pivot.meanOf]/[meanFor][Pivot.meanFor] - * * [std][Pivot.std]/[stdOf][Pivot.stdOf]/[stdFor][Pivot.stdFor] - * * [median][Pivot.median]/[medianOf][Pivot.medianOf]/[medianFor][Pivot.medianFor] - * * [percentile][Pivot.percentile]/[percentileOf][Pivot.percentileOf]/[percentileFor][Pivot.percentileFor] + * + * Provides predefined shortcuts for the most common statistical aggregation operations + * that can be applied to each group within a [Pivot]. + * + * Each function computes a statistic across the rows of a group and returns the result as + * a new row of computed values in the resulting [DataFrame]. + * + * * [count][Pivot.count] — calculate the number of rows in each group; + * * [max][Pivot.max] / [maxOf][Pivot.maxOf] / [maxFor][Pivot.maxFor] — + * calculate the maximum of all values on the selected columns / by a row expression / + * for each of the selected columns within each group; + * * [min][Pivot.min] / [minOf][Pivot.minOf] / [minFor][Pivot.minFor] — + * calculate the minimum of all values on the selected columns / by a row expression / + * for each of the selected columns within each group; + * * [sum][Pivot.sum] / [sumOf][Pivot.sumOf] / [sumFor][Pivot.sumFor] — + * calculate the sum of all values on the selected columns / by a row expression / + * for each of the selected columns within each group; + * * [mean][Pivot.mean] / [meanOf][Pivot.meanOf] / [meanFor][Pivot.meanFor] — + * calculate the mean (average) of all values on the selected columns / by a row expression / + * for each of the selected columns within each group; + * * [std][Pivot.std] / [stdOf][Pivot.stdOf] / [stdFor][Pivot.stdFor] — + * calculate the standard deviation of all values on the selected columns / by a row expression / + * for each of the selected columns within each group; + * * [median][Pivot.median] / [medianOf][Pivot.medianOf] / [medianFor][Pivot.medianFor] — + * calculate the median of all values on the selected columns / by a row expression / + * for each of the selected columns within each group; + * * [percentile][Pivot.percentile] / [percentileOf][Pivot.percentileOf] / [percentileFor][Pivot.percentileFor] — + * calculate a specified percentile of all values on the selected columns / by a row expression / + * for each of the selected columns within each group. + * + * For more information: {@include [DocumentationUrls.PivotStatistics]} */ interface AggregationStatistics - /** - * {@comment Version of SelectingColumns with correctly filled examples for pivot keys and index columns} - * @include [SelectingColumns] {@include [SetPivotOperationArg]} - */ - interface PivotSelectingOptions } - +/** + * A specialized [ColumnsSelectionDsl] that allows specifying [pivot] key ordering + * using the [then] function. + * + * @include [PivotDslDocs] + */ public interface PivotDsl : ColumnsSelectionDsl { + /** + * @include [ThenDocs] + */ public infix fun ColumnsResolver.then(other: ColumnsResolver): ColumnSet = PivotChainColumnSet(this, other) + /** + * @include [ThenDocs] + */ public infix fun String.then(other: ColumnsResolver): ColumnSet = toColumnOf() then other + /** + * @include [ThenDocs] + */ public infix fun ColumnsResolver.then(other: String): ColumnSet = this then other.toColumnOf() + /** + * @include [ThenDocs] + */ public infix fun String.then(other: String): ColumnSet = toColumnAccessor() then other.toColumnAccessor() + /** + * Specifies the ordering of the [pivot] key columns. + * + * In the resulting [Pivot], the receiver column (or columns) will appear + * one level above the keys from columns provided by [\other]. + * + * @receiver pivot key column(s) that appear **above** in the hierarchy. + * @param [\other] pivot key column(s) that appear **below** (as child keys of the receiver + * columns keys) in the hierarchy. + * @return A special [ColumnSet] representing the hierarchical pivot key ordering. + */ + @ExcludeFromSources + private interface ThenDocs + @Deprecated(DEPRECATED_ACCESS_API) @AccessApiOverload public infix fun KProperty.then(other: ColumnsResolver): ColumnSet = toColumnAccessor() then other @@ -254,13 +305,88 @@ public interface PivotDsl : ColumnsSelectionDsl { public infix fun String.then(other: KProperty): ColumnSet = toColumnOf() then other.toColumnAccessor() } +/** + * [PivotDsl] defines how key columns are selected and structured in a [pivot]: + * * [pivot] with a single key column produces a [Pivot] containing one column for each unique key + * (i.e., key column unique values) with the corresponding group; + * * [pivot] with multiple keys combined using [and] produces a [Pivot] + * with independent [column groups][ColumnGroup] for each key column, each having subcolumns + * with the keys corresponding to their unique values; + * * [pivot] with multiple keys ordered using [then] produces a [Pivot] + * with nested [column groups][ColumnGroup], representing a hierarchical structure of + * keys combinations from the pivoted columns — i.e., one group per unique key combination. + * + * See [Columns Selection via DSL][SelectingColumns.Dsl]. + * + * ### Examples + * ```kotlin + * // Pivot by the "city" column + * df.pivot { city } + * + * // Independent pivot by "city" and "lastName" (from the "name" column group) + * df.pivot { city and name.lastName } + * + * // Hierarchical pivot by two columns with composite ("city", "lastName") keys + * df.pivot { city then name.lastName } + * ``` + */ +@ExcludeFromSources +private interface PivotDslDocs + // region DataFrame +/** + * {@include [PivotDocs]} + * ### This `pivot` Overload + */ +@ExcludeFromSources +private interface CommonPivotDocs + + // region pivot +/** + * @include [CommonPivotDocs] + * Select or express pivot columns using the [PivotDsl]. + * + * @include [PivotDslDocs] + * @param inward If `true` (default), the generated pivoted columns are nested inside the original column; + * otherwise, they are placed at the top level. + * @param columns The [Pivot Columns Selector][PivotColumnsSelector] that defines which columns are used + * as keys for pivoting and in which order. + * @return A new [Pivot] containing the unique values of the selected column as new columns + * (or as [column groups][ColumnGroup] for multiple key columns), + * with their corresponding groups of rows represented as [DataFrame]s. + */ public fun DataFrame.pivot(inward: Boolean? = null, columns: PivotColumnsSelector): Pivot = PivotImpl(this, columns, inward) +/** + * @include [CommonPivotDocs] + * @include [SelectingColumns.ColumnNames] + * * [pivot] with a single key column produces a [Pivot] containing one column for each unique key + * (i.e., key column unique values) with the corresponding group; + * * [pivot] with multiple keys combined using [and] produces a [Pivot] + * with independent [column groups][ColumnGroup] for each key column, each having subcolumns + * with the keys corresponding to their unique values; + * + * For pivoting by multiple keys combinations from different columns, use the [pivot] overload with [PivotDsl]. + * ### Examples + * ```kotlin + * // Pivot by the "city" column + * df.pivot("city") + * + * // Independent pivot by "city" and "lastName" + * df.pivot("city", "lastName") + * ``` + * @param inward If `true` (default), the generated pivoted columns are nested inside the original column; + * otherwise, they are placed at the top level. + * @param columns The [Column Names][String] that defines which columns are used + * as keys for pivoting. + * @return A new [Pivot] containing the unique values of the selected column as new columns + * (or as [column groups][ColumnGroup] for multiple key columns), + * with their corresponding groups of rows represented as [DataFrame]s. + */ public fun DataFrame.pivot(vararg columns: String, inward: Boolean? = null): Pivot = pivot(inward) { columns.toColumnSet() } @@ -278,9 +404,79 @@ public fun DataFrame.pivot(vararg columns: KProperty<*>, inward: Boolean? // region pivotMatches +/** + * * Cell values are [Boolean] indicators showing whether matching rows exist + * for each pivoting/grouping key combination. + */ +internal interface PivotMatchesResultDescription + +/** + * Computes whether matching rows exist in this [DataFrame] for all unique values of the + * selected columns (independently) across all possible combinations + * of values in the remaining columns (all expecting selected). + * + * Performs a [pivot] operation on the specified [\columns] of this [DataFrame], + * then [groups it by][Pivot.groupByOther] the remaining columns, + * and produces a new matrix-like [DataFrame]. + * + * @include [PivotGroupByDocs.ResultingMatrixCommonDescription] + * @include [PivotMatchesResultDescription] + * + * This function combines [pivot][DataFrame.pivot], [groupByOther][Pivot.groupByOther], + * and [matches][PivotGroupBy.matches] operations into a single call. + * + * @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention] + * + * See [Selecting Columns][SelectSelectingOptions]. + * + * For more information: {@include [DocumentationUrls.PivotMatches]} + * + * See also: [pivotCounts], which performs a similar operation + * but counts the number of matching rows instead of checking for their presence. + * + * ### This `pivotMatches` Overload + */ +internal interface DataFramePivotMatchesCommonDocs + +/** + * @include [DataFramePivotMatchesCommonDocs] + * @include [SelectingColumns.Dsl] + * + * ### Example + * ```kotlin + * // Compute whether matching rows exist for all unique values of "city" + * // and "name" (independently) across all possible combinations + * // of values in the remaining columns. + * df.pivotMatches { city and name } + * ``` + * + * @param [inward] If `true` (default), the generated pivoted columns are nested inside the original column; + * if `false`, they are placed at the top level. + * @param [columns] The [Columns Selector][ColumnsSelector] that defines which columns are used as [pivot] keys for the operation. + * @return A new [DataFrame] representing a Boolean presence matrix — with grouping key columns as rows, + * pivot key values as columns, and `true`/`false` cells indicating existing combinations. + */ public fun DataFrame.pivotMatches(inward: Boolean = true, columns: ColumnsSelector): DataFrame = pivot(inward, columns).groupByOther().matches() +/** + * @include [DataFramePivotMatchesCommonDocs] + * @include [SelectingColumns.ColumnNames] + * + * ### Example + * ```kotlin + * // Compute whether matching rows exist for all unique values of "city" + * // and "name" (independently) across all possible combinations + * // of values in the remaining columns. + * df.pivotMatches("city", "name") + * ``` + * + * @param [inward] If `true` (default), the generated pivoted columns are nested inside the original column; + * if `false`, they are placed at the top level. + * @param [columns] The [Column Names][String] that defines which columns are used as [pivot] keys for the operation. + * @return A new [DataFrame] representing a Boolean presence matrix — with grouping key columns as rows, + * pivot key values as columns, and `true`/`false` cells indicating existing combinations. + */ public fun DataFrame.pivotMatches(vararg columns: String, inward: Boolean = true): DataFrame = pivotMatches(inward) { columns.toColumnSet() } @@ -298,9 +494,79 @@ public fun DataFrame.pivotMatches(vararg columns: KProperty<*>, inward: B // region pivotCounts +/** + * * Cell values represent the number of matching rows + * for each pivoting/grouping key combination. + */ +internal interface PivotCountsResultDescription + +/** + * Computes number of matching rows in this [DataFrame] for all unique values of the + * selected columns (independently) across all possible combinations + * of values in the remaining columns (all expecting selected). + * + * Performs a [pivot] operation on the specified [\columns] of this [DataFrame], + * then [groups it by][Pivot.groupByOther] the remaining columns, + * and produces a new matrix-like [DataFrame]. + * + * @include [PivotGroupByDocs.ResultingMatrixCommonDescription] + * @include [PivotCountsResultDescription] + * + * This function combines [pivot][DataFrame.pivot], [groupByOther][Pivot.groupByOther], + * and [count][PivotGroupBy.count] operations into a single call. + * + * @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention] + * + * See [Selecting Columns][SelectSelectingOptions]. + * + * For more information: {@include [DocumentationUrls.PivotMatches]} + * + * See also: [pivotMatches], which performs a similar operation + * but check if there is any matching row instead of counting then. + * + * ### This `pivotCounts` Overload + */ +internal interface DataFramePivotCountsCommonDocs + +/** + * @include [DataFramePivotCountsCommonDocs] + * @include [SelectingColumns.Dsl] + * + * ### Example + * ```kotlin + * // Compute number of matching rows for all unique values of "city" + * // and "name" (independently) across all possible combinations + * // of values in the remaining columns. + * df.pivotCounts { city and name } + * ``` + * + * @param [inward] If `true` (default), the generated pivoted columns are nested inside the original column; + * if `false`, they are placed at the top level. + * @param [columns] The [Columns Selector][ColumnsSelector] that defines which columns are used as [pivot] keys for the operation. + * @return A new [DataFrame] representing a counting matrix — with grouping key columns as rows, + * pivot key values as columns, and the number of rows with the corresponding combinations in the cells. + */ public fun DataFrame.pivotCounts(inward: Boolean = true, columns: ColumnsSelector): DataFrame = pivot(inward, columns).groupByOther().count() +/** + * @include [DataFramePivotCountsCommonDocs] + * @include [SelectingColumns.ColumnNames] + * + * ### Example + * ```kotlin + * // Compute number of matching rows for all unique values of "city" + * // and "name" (independently) across all possible combinations + * // of values in the remaining columns. + * df.pivotCounts("city", "name") + * ``` + * + * @param [inward] If `true` (default), the generated pivoted columns are nested inside the original column; + * if `false`, they are placed at the top level. + * @param [columns] The [Column Names][String] that defines which columns are used as [pivot] keys for the operation. + * @return A new [DataFrame] representing a counting matrix — with grouping key columns as rows, + * pivot key values as columns, and the number of rows with the corresponding combinations in the cells. + */ public fun DataFrame.pivotCounts(vararg columns: String, inward: Boolean = true): DataFrame = pivotCounts(inward) { columns.toColumnSet() } @@ -479,6 +745,15 @@ internal inline fun Pivot.delegate(crossinline body: PivotGroupBy.() - internal interface PivotGroupByDocs { + /** + * In the resulting [DataFrame]: + * * Pivoted columns are displayed vertically — as [column groups][ColumnGroup] for each pivoted column, + * with subcolumns corresponding to their unique values; + * * Grouping key columns are displayed horizontally — as columns representing + * unique combinations of grouping key values; + */ + interface ResultingMatrixCommonDescription + /** * [PivotGroupBy] is a dataframe-like structure, combining [Pivot] and [GroupBy] * and representing a matrix table with vertical [Pivot] groups (as columns) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt index 6e8ae09929..a8db8a0d2d 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt @@ -159,6 +159,15 @@ internal interface DocumentationUrls { /** [See "`pivot` + `groupBy`" on the documentation website.]({@include [Url]}/pivot.html#pivot-groupby) */ interface PivotGroupBy - /** [See `pivot` on the documentation website.]({@include [Url]}/pivot.html) */ + /** [See `pivot` on the documentation website.]({@include [Url]}/pivot.html) */ interface Pivot + + /** [See `pivotMatches` on the documentation website.]({@include [Url]}/pivot.html#pivotmatches) */ + interface PivotMatches + + /** [See `pivotCounts` on the documentation website.]({@include [Url]}/pivot.html#pivotcounts) */ + interface PivotCounts + + /** [See "`pivot` statistics" on the documentation website.]({@include [Url]}/summarystatistics.html#pivot-statistics) */ + interface PivotStatistics } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt index 506d1748d1..4cc8853c3c 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt @@ -68,6 +68,8 @@ internal interface SelectingColumns { * expects you to return a [SingleColumn] or [ColumnSet] (so, a [ColumnsResolver]). * This is an entity formed by calling any (combination) of the functions * in the DSL that is or can be resolved into one or more columns. + * This also allows you to use [Extension Properties API][ExtensionPropertiesAPIDocs] + * for type- and name-safe columns selection. * * #### NOTE: * While you can use the {@include [AccessApi.StringApiLink]} and {@include [AccessApi.KPropertiesApiLink]} From d19df01e88b25d4dad28c17a67195634480c19e2 Mon Sep 17 00:00:00 2001 From: "andrei.kislitsyn" Date: Thu, 6 Nov 2025 15:28:32 +0400 Subject: [PATCH 3/6] pivotCount, pivotMatches kdocs --- .../kotlinx/dataframe/api/groupBy.kt | 41 ++-- .../jetbrains/kotlinx/dataframe/api/pivot.kt | 220 +++++++++++++++++- .../documentation/SelectingColumns.kt | 17 +- 3 files changed, 245 insertions(+), 33 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt index 39d81a7a3d..72f6d795bf 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt @@ -50,12 +50,7 @@ import kotlin.reflect.KProperty * * [pivoted][Pivoting] into a [PivotGroupBy] structure, which combines [pivot] and [groupBy] operations * and then reduced or aggregated into a [DataFrame]. * - * Grouping keys can also be created inline: - * ```kotlin - * // Create a new column "newName" based on existing "oldName" values - * // and use it as a grouping key: - * df.groupBy { expr("newName") { oldName.drop(5) } } - * ``` + * @include [GroupingKeysInline] * * Check out [Grammar]. * @@ -324,6 +319,17 @@ internal interface GroupByDocs { * @include [PivotGroupByDocs.CommonDescription] */ interface Pivoting + + /** + * Grouping key columns can also be created inline: + * ```kotlin + * // Create a new column "newName" based on existing "oldName" values + * // and use it as a grouping key: + * df.groupBy { expr("newName") { oldName.drop(5) } } + * ``` + */ + @ExcludeFromSources + interface GroupingKeysInline } /** {@set [SelectingColumns.OPERATION] [groupBy][groupBy]} */ @@ -379,17 +385,11 @@ public fun DataFrame.groupBy(vararg cols: AnyColumnReference, moveToTop: /** * Groups the rows of this [Pivot] groups * based on the values in one or more specified [key columns][\columns]. - * - * Works like regular [DataFrame.groupBy] on pivot groups. - * - * Grouping keys can also be created inline: - * ```kotlin - * // Create a new column "newName" based on existing "oldName" values - * // and use it as a grouping key: - * pivot.groupBy { expr("newName") { oldName.drop(5) } } - * ``` + * Returns a [PivotGroupBy]. * * @include [PivotGroupByDocs.CommonDescription] + * + * @include [GroupByDocs.GroupingKeysInline] */ @ExcludeFromSources private interface GroupByForPivotDocs @@ -405,17 +405,8 @@ private interface CommonGroupByForPivotDocs /** * {@include [CommonGroupByForPivotDocs]} - * @include [SelectingColumns.Dsl] - * - * #### For example: - * - * `pivot.`{@get [OPERATION]}` { length `[and][ColumnsSelectionDsl.and]` age }` + * @include [SelectingColumns.Dsl.WithExample] {@include [SetGroupByOperationArg] {@set [SelectingColumns.RECEIVER] `pivot`}} * - * `pivot.`{@get [OPERATION]}` { `[cols][ColumnsSelectionDsl.cols]`(1..5) }` - * - * `pivot.`{@get [OPERATION]}` { `[colsOf][ColumnsSelectionDsl.colsOf]`<`[Double][Double]`>() }` - * - * {@include [SetGroupByOperationArg]} * @param moveToTop Specifies whether nested grouping columns should be moved to the top level * or kept inside a [ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup]. * Defaults to `true`. diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt index cd657c4d97..513577da73 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt @@ -47,6 +47,8 @@ import kotlin.reflect.KProperty * * [grouped][Grouping] into a [PivotGroupBy] structure, which combines [pivot] and [groupBy] operations * and then reduced or aggregated into a [DataFrame]. * + * @include [PivotedColumnsInline] + * * Check out [Grammar]. * * @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention] @@ -238,8 +240,23 @@ internal interface PivotDocs { */ interface AggregationStatistics + /** + * Pivoted columns can also be created inline: + * ```kotlin + * // Create a new column "newName" based on existing "oldName" values + * // and pivot it: + * df.pivot { expr("newName") { oldName.drop(5) } } + * ``` + */ + @ExcludeFromSources + interface PivotedColumnsInline } +/** {@set [SelectingColumns.OPERATION] [pivot][pivot]} */ +@ExcludeFromSources +private interface SetPivotOperationArg + + /** * A specialized [ColumnsSelectionDsl] that allows specifying [pivot] key ordering * using the [then] function. @@ -412,7 +429,7 @@ internal interface PivotMatchesResultDescription /** * Computes whether matching rows exist in this [DataFrame] for all unique values of the - * selected columns (independently) across all possible combinations + * selected [\columns] (independently) across all possible combinations * of values in the remaining columns (all expecting selected). * * Performs a [pivot] operation on the specified [\columns] of this [DataFrame], @@ -431,8 +448,9 @@ internal interface PivotMatchesResultDescription * * For more information: {@include [DocumentationUrls.PivotMatches]} * - * See also: [pivotCounts], which performs a similar operation - * but counts the number of matching rows instead of checking for their presence. + * See also: + * * [pivotCounts], which performs a similar operation + * but counts the number of matching rows instead of checking for their presence. * * ### This `pivotMatches` Overload */ @@ -502,7 +520,7 @@ internal interface PivotCountsResultDescription /** * Computes number of matching rows in this [DataFrame] for all unique values of the - * selected columns (independently) across all possible combinations + * selected [\columns] (independently) across all possible combinations * of values in the remaining columns (all expecting selected). * * Performs a [pivot] operation on the specified [\columns] of this [DataFrame], @@ -519,7 +537,7 @@ internal interface PivotCountsResultDescription * * See [Selecting Columns][SelectSelectingOptions]. * - * For more information: {@include [DocumentationUrls.PivotMatches]} + * For more information: {@include [DocumentationUrls.PivotCounts]} * * See also: [pivotMatches], which performs a similar operation * but check if there is any matching row instead of counting then. @@ -588,6 +606,33 @@ public fun DataFrame.pivotCounts(vararg columns: KProperty<*>, inward: Bo // region pivot +/** + * Pivots the selected [\columns] of this [GroupBy] groups. + * Returns a [PivotGroupBy]. + * + * @include [PivotGroupByDocs.CommonDescription] + * + * @include [PivotDocs.PivotedColumnsInline] + */ +@ExcludeFromSources +private interface PivotForGroupByDocs + +/** + * {@include [PivotForGroupByDocs]} + * ### This `pivot` Overload + */ +@ExcludeFromSources +private interface CommonPivotForGroupByDocs + +/** + * @include [CommonPivotForGroupByDocs] + * @include [SelectingColumns.Dsl.WithExample] {@include [SetPivotOperationArg] {@set [SelectingColumns.RECEIVER] `gb`}} + * @param [inward] If `true` (default), the generated pivoted columns are nested inside the original column; + * if `false`, they are placed at the top level. + * @param [columns] The [Columns Selector][ColumnsSelector] that defines which columns are pivoted. + * @return A new [PivotGroupBy] that preserves the original [groupBy] key columns + * and pivots the provided columns. + */ public fun GroupBy<*, G>.pivot(inward: Boolean = true, columns: ColumnsSelector): PivotGroupBy = PivotGroupByImpl(this, columns, inward) @@ -596,6 +641,15 @@ public fun GroupBy<*, G>.pivot(inward: Boolean = true, columns: ColumnsSelec public fun GroupBy<*, G>.pivot(vararg columns: AnyColumnReference, inward: Boolean = true): PivotGroupBy = pivot(inward) { columns.toColumnSet() } +/** + * @include [CommonPivotForGroupByDocs] + * @include [SelectingColumns.Dsl.WithExample] {@include [SetPivotOperationArg] {@set [SelectingColumns.RECEIVER] `gb`}} + * @param [inward] If `true` (default), the generated pivoted columns are nested inside the original column; + * if `false`, they are placed at the top level. + * @param [columns] The [Column names][String] that defines which columns are pivoted. + * @return A new [PivotGroupBy] that preserves the original [groupBy] key columns + * and pivots the provided columns. + */ public fun GroupBy<*, G>.pivot(vararg columns: String, inward: Boolean = true): PivotGroupBy = pivot(inward) { columns.toColumnSet() } @@ -608,9 +662,69 @@ public fun GroupBy<*, G>.pivot(vararg columns: KProperty<*>, inward: Boolean // region pivotMatches +/** + * Computes whether matching rows exist in groups of this [GroupBy] for all unique values of the + * selected columns (independently) across all [groupBy] key combinations. + * + * Performs a [pivot][GroupBy.pivot] operation on the specified [\columns] of this [GroupBy] groups, + * and produces a new matrix-like [DataFrame]. + * + * @include [PivotGroupByDocs.ResultingMatrixCommonDescription] + * @include [PivotMatchesResultDescription] + * + * This function combines [pivot][GroupBy.pivot] + * and [matches][PivotGroupBy.matches] operations into a single call. + * + * @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention] + * + * See [Selecting Columns][SelectSelectingOptions]. + * + * For more information: {@include [DocumentationUrls.PivotMatches]} + * + * See also: [pivotCounts][GroupBy.pivotCounts], which performs a similar operation + * but counts the number of matching rows instead of checking for their presence. + * + * ### This `pivotMatches` Overload + */ +internal interface GroupByPivotMatchesCommonDocs + +/** + * @include [GroupByPivotMatchesCommonDocs] + * @include [SelectingColumns.Dsl] + * + * ### Example + * ```kotlin + * // Compute whether matching rows exist for all unique values of "city" + * // and "name" (independently) across all grouping key combinations + * gb.pivotMatches { city and name } + * ``` + * + * @param [inward] If `true` (default), the generated pivoted columns are nested inside the original column; + * if `false`, they are placed at the top level. + * @param [columns] The [Columns Selector][ColumnsSelector] that defines which columns are used as [pivot] keys for the operation. + * @return A new [DataFrame] representing a Boolean presence matrix — with grouping key columns as rows, + * pivot key values as columns, and `true`/`false` cells indicating existing combinations. + */ public fun GroupBy<*, G>.pivotMatches(inward: Boolean = true, columns: ColumnsSelector): DataFrame = pivot(inward, columns).matches() +/** + * @include [GroupByPivotMatchesCommonDocs] + * @include [SelectingColumns.ColumnNames] + * + * ### Example + * ```kotlin + * // Compute whether matching rows exist for all unique values of "city" + * // and "name" (independently) across all grouping key combinations + * df.pivotMatches("city", "name") + * ``` + * + * @param [inward] If `true` (default), the generated pivoted columns are nested inside the original column; + * if `false`, they are placed at the top level. + * @param [columns] The [Column Names][String] that defines which columns are used as [pivot] keys for the operation. + * @return A new [DataFrame] representing a Boolean presence matrix — with grouping key columns as rows, + * pivot key values as columns, and `true`/`false` cells indicating existing combinations. + */ public fun GroupBy<*, G>.pivotMatches(vararg columns: String, inward: Boolean = true): DataFrame = pivotMatches(inward) { columns.toColumnSet() } @@ -628,9 +742,69 @@ public fun GroupBy<*, G>.pivotMatches(vararg columns: KProperty<*>, inward: // region pivotCounts +/** + * Computes number of matching rows in groups of this [GroupBy] for all unique values of the + * selected [\columns] (independently) across all [groupBy] key combinations. + * + * Performs a [pivot] operation on the specified [\columns] of this [DataFrame] + * and produces a new matrix-like [DataFrame]. + * + * @include [PivotGroupByDocs.ResultingMatrixCommonDescription] + * @include [PivotCountsResultDescription] + * + * This function combines [pivot][GroupBy.pivot] + * and [count][PivotGroupBy.count] operations into a single call. + * + * @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention] + * + * See [Selecting Columns][SelectSelectingOptions]. + * + * For more information: {@include [DocumentationUrls.PivotCounts]} + * + * See also: [pivotMatches][GroupBy.pivotMatches], which performs a similar operation + * but check if there is any matching row instead of counting then. + * + * ### This `pivotCounts` Overload + */ +internal interface GroupByPivotCountsCommonDocs + +/** + * @include [GroupByPivotCountsCommonDocs] + * @include [SelectingColumns.Dsl] + * + * ### Example + * ```kotlin + * // Compute number of matching rows for all unique values of "city" + * // and "name" (independently) across all grouping key combinations. + * df.pivotCounts { city and name } + * ``` + * + * @param [inward] If `true` (default), the generated pivoted columns are nested inside the original column; + * if `false`, they are placed at the top level. + * @param [columns] The [Columns Selector][ColumnsSelector] that defines which columns are used as [pivot] keys for the operation. + * @return A new [DataFrame] representing a counting matrix — with grouping key columns as rows, + * pivot key values as columns, and the number of rows with the corresponding combinations in the cells. + */ public fun GroupBy<*, G>.pivotCounts(inward: Boolean = true, columns: ColumnsSelector): DataFrame = pivot(inward, columns).count() +/** + * @include [GroupByPivotCountsCommonDocs] + * @include [SelectingColumns.ColumnNames] + * + * ### Example + * ```kotlin + * // Compute number of matching rows for all unique values of "city" + * // and "name" (independently) across all grouping key combinations. + * df.pivotCounts("city", "name") + * ``` + * + * @param [inward] If `true` (default), the generated pivoted columns are nested inside the original column; + * if `false`, they are placed at the top level. + * @param [columns] The [Column Names][String] that defines which columns are used as [pivot] keys for the operation. + * @return A new [DataFrame] representing a counting matrix — with grouping key columns as rows, + * pivot key values as columns, and the number of rows with the corresponding combinations in the cells. + */ public fun GroupBy<*, G>.pivotCounts(vararg columns: String, inward: Boolean = true): DataFrame = pivotCounts(inward) { columns.toColumnSet() } @@ -652,6 +826,42 @@ public fun GroupBy<*, G>.pivotCounts(vararg columns: KProperty<*>, inward: B // region pivot + +/** + * A special [pivot][GroupBy.pivot] override for usage in [aggregate][Grouped.aggregate] method + * of [GroupBy]. + * This allows combining [column pivoting aggregations][PivotGroupByDocs.Aggregation] + * with common [GroupBy] aggregations in [aggregate][Grouped.aggregate]. + * + * This function itself doesn't affect [aggregate][Grouped.aggregate] result, but + * it allows putting results of [PivotGroupBy aggregations][PivotGroupByDocs.Aggregation] into + * [aggregate][Grouped.aggregate] resulting [DataFrame] by simply calling them. + * See [GroupBy.pivot] and [PivotGroupByDocs.Aggregation] for more information. + * + * Resulting columns added as common [aggregations][Grouped.aggregate] result columns; + * their structure depends on exact + * [PivotGroupBy aggregations][PivotGroupByDocs.Aggregation] used. + * + * ### Example + * ```kotlin + * df.groupBy { name.firstName }.aggregate { + * // Pivot "city" column of each group, resulting into + * // `PivotGroupBy` with "firstName" groping keys and "city" values columns + * pivot { city }.aggregate { + * // Aggregate mean of "age" column values of each of + * // `groupBy` x `pivot` group into "meanAge" column + * mean { age } into "meanAge" + * // Aggregate size of each `PivotGroupBy` group into "count" column + * count() into "count" + * } + * // Shortcut for `count` aggregation in + * // "firstName" x "lastName" groups + * pivot { name.lastName }.count() + * // Common `count` aggregation + * count() into "total" + * } + * ``` + */ public fun AggregateGroupedDsl.pivot( inward: Boolean = true, columns: PivotColumnsSelector, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt index 4cc8853c3c..9a71fb273e 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt @@ -53,12 +53,22 @@ internal interface SelectingColumns { @ExcludeFromSources interface OPERATION + /** + * Operation receiver variable name + */ + @ExcludeFromSources + interface RECEIVER + // Using ` notation to not create double `` when including /** {@set [OPERATION] `operation`} */ @ExcludeFromSources interface SetDefaultOperationArg + /** {@set [RECEIVER] `df`} */ + @ExcludeFromSources + interface SetDefaultReceiverArg + /** * Select or express columns using the {@include [ColumnsSelectionDslLink]}. * (Any (combination of) {@include [AccessApiLink]}). @@ -88,13 +98,14 @@ internal interface SelectingColumns { * * #### For example: * - * `df.`{@get [OPERATION]}` { length `[and][ColumnsSelectionDsl.and]` age }` + * {@get [RECEIVER]}`.`{@get [OPERATION]}` { length `[and][ColumnsSelectionDsl.and]` age }` * - * `df.`{@get [OPERATION]}` { `[cols][ColumnsSelectionDsl.cols]`(1..5) }` + * {@get [RECEIVER]}`.`{@get [OPERATION]}` { `[cols][ColumnsSelectionDsl.cols]`(1..5) }` * - * `df.`{@get [OPERATION]}` { `[colsOf][ColumnsSelectionDsl.colsOf]`<`[Double][Double]`>() }` + * {@get [RECEIVER]}`.`{@get [OPERATION]}` { `[colsOf][ColumnsSelectionDsl.colsOf]`<`[Double][Double]`>() }` * * @include [SetDefaultOperationArg] + * @include [SetDefaultReceiverArg] */ interface WithExample } From 50abd6973a9e75626f3874adc239e420bc657645 Mon Sep 17 00:00:00 2001 From: "andrei.kislitsyn" Date: Thu, 6 Nov 2025 17:31:06 +0400 Subject: [PATCH 4/6] pivot in aggregation kdocs --- .../jetbrains/kotlinx/dataframe/api/pivot.kt | 64 +++++++++++++------ .../documentation/DocumentationUrls.kt | 3 + 2 files changed, 47 insertions(+), 20 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt index 513577da73..35f561e3f8 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt @@ -828,40 +828,64 @@ public fun GroupBy<*, G>.pivotCounts(vararg columns: KProperty<*>, inward: B /** - * A special [pivot][GroupBy.pivot] override for usage in [aggregate][Grouped.aggregate] method - * of [GroupBy]. - * This allows combining [column pivoting aggregations][PivotGroupByDocs.Aggregation] - * with common [GroupBy] aggregations in [aggregate][Grouped.aggregate]. + * Pivots the selected [\columns] within each group for further + * [pivot aggregations][PivotGroupByDocs.Aggregation]. * - * This function itself doesn't affect [aggregate][Grouped.aggregate] result, but - * it allows putting results of [PivotGroupBy aggregations][PivotGroupByDocs.Aggregation] into - * [aggregate][Grouped.aggregate] resulting [DataFrame] by simply calling them. - * See [GroupBy.pivot] and [PivotGroupByDocs.Aggregation] for more information. + * This function itself does not directly affect the [aggregate][Grouped.aggregate] result, + * but instead creates an intermediate [PivotGroupBy]. + * The resulting columns of its [aggregations][PivotGroupByDocs.Aggregation] are then + * putted into the final [DataFrame] produced by [aggregate][Grouped.aggregate] + * when those aggregation functions are invoked. * - * Resulting columns added as common [aggregations][Grouped.aggregate] result columns; - * their structure depends on exact - * [PivotGroupBy aggregations][PivotGroupByDocs.Aggregation] used. + * See [GroupBy.pivot] and [PivotGroupByDocs.Aggregation] for detailed information. + * + * The resulting columns are added as standard [aggregate][Grouped.aggregate] result columns. + * Their structure depends on the specific [PivotGroupBy aggregations][PivotGroupByDocs.Aggregation] used. + * + * For more information: {@include [DocumentationUrls.PivotInsideAggregationStatistics]} + * + * Check out [`PivotGroupBy` Grammar][PivotGroupByDocs.Grammar]. + * + * ### This `pivot` overload * * ### Example * ```kotlin * df.groupBy { name.firstName }.aggregate { - * // Pivot "city" column of each group, resulting into - * // `PivotGroupBy` with "firstName" groping keys and "city" values columns + * // Pivot the "city" column within each group, + * // creating a PivotGroupBy with "firstName" as grouping keys and "city" as pivoted columns * pivot { city }.aggregate { - * // Aggregate mean of "age" column values of each of - * // `groupBy` x `pivot` group into "meanAge" column + * // Aggregate the mean of "age" column values for each + * // groupBy × pivot combination into the "meanAge" column * mean { age } into "meanAge" - * // Aggregate size of each `PivotGroupBy` group into "count" column + * + * // Aggregate the size of each PivotGroupBy group into the "count" column * count() into "count" * } - * // Shortcut for `count` aggregation in - * // "firstName" x "lastName" groups - * pivot { name.lastName }.count() - * // Common `count` aggregation + * + * // Shortcut for `count` aggregation in "firstName" × "lastName" groups + * // into "namesCount" column + * pivot { name.lastName }.count() into "namesCount" + * + * // Standard `count` aggregation across all rows in each "firstName" group + * // into "total" column * count() into "total" * } * ``` */ +internal interface AggregateGroupedDslPivotDocs + +/** + * @include [AggregateGroupedDslPivotDocs] + * Select or express pivot columns using the [PivotDsl]. + * + * @param inward If `true` (default), the generated pivoted columns are nested inside the original column; + * otherwise, they are placed at the top level. + * @param columns The [Pivot Columns Selector][PivotColumnsSelector] that defines which columns are used + * as keys for pivoting and in which order. + * @return A new [Pivot] containing the unique values of the selected column as new columns + * (or as [column groups][ColumnGroup] for multiple key columns), + * with their corresponding groups of rows represented as [DataFrame]s. + */ public fun AggregateGroupedDsl.pivot( inward: Boolean = true, columns: PivotColumnsSelector, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt index a8db8a0d2d..940cea672e 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt @@ -170,4 +170,7 @@ internal interface DocumentationUrls { /** [See "`pivot` statistics" on the documentation website.]({@include [Url]}/summarystatistics.html#pivot-statistics) */ interface PivotStatistics + + /** [See "`pivot` inside aggregation" on the documentation website.]({@include [Url]}/pivot.html#pivot-inside-aggregate) */ + interface PivotInsideAggregationStatistics } From 8768ac0ed333208ff69e241c1a6d4cdf0b98cc2e Mon Sep 17 00:00:00 2001 From: "andrei.kislitsyn" Date: Fri, 7 Nov 2025 19:11:48 +0400 Subject: [PATCH 5/6] pivot derives in aggregation kdocs --- .../jetbrains/kotlinx/dataframe/api/pivot.kt | 311 +++++++++++++++--- .../documentation/DocumentationUrls.kt | 3 + 2 files changed, 265 insertions(+), 49 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt index 35f561e3f8..1aa0da9726 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt @@ -12,6 +12,7 @@ import org.jetbrains.kotlinx.dataframe.aggregation.AggregateBody import org.jetbrains.kotlinx.dataframe.aggregation.AggregateDsl import org.jetbrains.kotlinx.dataframe.aggregation.AggregateGroupedDsl import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload +import org.jetbrains.kotlinx.dataframe.api.GroupByDocs.Grammar import org.jetbrains.kotlinx.dataframe.api.Select.SelectSelectingOptions import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup import org.jetbrains.kotlinx.dataframe.columns.ColumnSet @@ -250,6 +251,27 @@ internal interface PivotDocs { */ @ExcludeFromSources interface PivotedColumnsInline + + /** + * @param [\inward] Defines whether the generated columns are nested under a supercolumn: + * - `true` — pivot key columns are nested under a supercolumn named after + * the original pivoted column (independently for multiple pivoted columns); + * - `false` — pivot key columns are not nested (i.e., placed at the top level); + * - `null` (default) — inferred automatically: `true` for multiple pivoted columns + * or when the [Pivot] has been grouped; `false` otherwise. + */ + @ExcludeFromSources + interface InwardKDocs + + /** + * @param [\inward] Defines whether the generated columns are nested under a supercolumn: + * - `true` (default) — pivot key columns are nested under a supercolumn named after + * the original pivoted column (independently for multiple pivoted columns); + * - `false` — pivot key columns are not nested (i.e., placed at the top level); + */ + @ExcludeFromSources + interface InwardKDocsForGrouped + } /** {@set [SelectingColumns.OPERATION] [pivot][pivot]} */ @@ -367,8 +389,7 @@ private interface CommonPivotDocs * Select or express pivot columns using the [PivotDsl]. * * @include [PivotDslDocs] - * @param inward If `true` (default), the generated pivoted columns are nested inside the original column; - * otherwise, they are placed at the top level. + * @include [PivotDocs.InwardKDocs] * @param columns The [Pivot Columns Selector][PivotColumnsSelector] that defines which columns are used * as keys for pivoting and in which order. * @return A new [Pivot] containing the unique values of the selected column as new columns @@ -396,8 +417,7 @@ public fun DataFrame.pivot(inward: Boolean? = null, columns: PivotColumns * // Independent pivot by "city" and "lastName" * df.pivot("city", "lastName") * ``` - * @param inward If `true` (default), the generated pivoted columns are nested inside the original column; - * otherwise, they are placed at the top level. + * @include [PivotDocs.InwardKDocs] * @param columns The [Column Names][String] that defines which columns are used * as keys for pivoting. * @return A new [Pivot] containing the unique values of the selected column as new columns @@ -489,8 +509,7 @@ public fun DataFrame.pivotMatches(inward: Boolean = true, columns: Column * df.pivotMatches("city", "name") * ``` * - * @param [inward] If `true` (default), the generated pivoted columns are nested inside the original column; - * if `false`, they are placed at the top level. + * @include [PivotDocs.InwardKDocs] * @param [columns] The [Column Names][String] that defines which columns are used as [pivot] keys for the operation. * @return A new [DataFrame] representing a Boolean presence matrix — with grouping key columns as rows, * pivot key values as columns, and `true`/`false` cells indicating existing combinations. @@ -558,8 +577,7 @@ internal interface DataFramePivotCountsCommonDocs * df.pivotCounts { city and name } * ``` * - * @param [inward] If `true` (default), the generated pivoted columns are nested inside the original column; - * if `false`, they are placed at the top level. + * @include [PivotDocs.InwardKDocs] * @param [columns] The [Columns Selector][ColumnsSelector] that defines which columns are used as [pivot] keys for the operation. * @return A new [DataFrame] representing a counting matrix — with grouping key columns as rows, * pivot key values as columns, and the number of rows with the corresponding combinations in the cells. @@ -579,8 +597,7 @@ public fun DataFrame.pivotCounts(inward: Boolean = true, columns: Columns * df.pivotCounts("city", "name") * ``` * - * @param [inward] If `true` (default), the generated pivoted columns are nested inside the original column; - * if `false`, they are placed at the top level. + * @include [PivotDocs.InwardKDocs] * @param [columns] The [Column Names][String] that defines which columns are used as [pivot] keys for the operation. * @return A new [DataFrame] representing a counting matrix — with grouping key columns as rows, * pivot key values as columns, and the number of rows with the corresponding combinations in the cells. @@ -627,8 +644,7 @@ private interface CommonPivotForGroupByDocs /** * @include [CommonPivotForGroupByDocs] * @include [SelectingColumns.Dsl.WithExample] {@include [SetPivotOperationArg] {@set [SelectingColumns.RECEIVER] `gb`}} - * @param [inward] If `true` (default), the generated pivoted columns are nested inside the original column; - * if `false`, they are placed at the top level. + * @include [PivotDocs.InwardKDocsForGrouped] * @param [columns] The [Columns Selector][ColumnsSelector] that defines which columns are pivoted. * @return A new [PivotGroupBy] that preserves the original [groupBy] key columns * and pivots the provided columns. @@ -644,8 +660,7 @@ public fun GroupBy<*, G>.pivot(vararg columns: AnyColumnReference, inward: B /** * @include [CommonPivotForGroupByDocs] * @include [SelectingColumns.Dsl.WithExample] {@include [SetPivotOperationArg] {@set [SelectingColumns.RECEIVER] `gb`}} - * @param [inward] If `true` (default), the generated pivoted columns are nested inside the original column; - * if `false`, they are placed at the top level. + * @include [PivotDocs.InwardKDocsForGrouped] * @param [columns] The [Column names][String] that defines which columns are pivoted. * @return A new [PivotGroupBy] that preserves the original [groupBy] key columns * and pivots the provided columns. @@ -699,8 +714,7 @@ internal interface GroupByPivotMatchesCommonDocs * gb.pivotMatches { city and name } * ``` * - * @param [inward] If `true` (default), the generated pivoted columns are nested inside the original column; - * if `false`, they are placed at the top level. + * @include [PivotDocs.InwardKDocsForGrouped] * @param [columns] The [Columns Selector][ColumnsSelector] that defines which columns are used as [pivot] keys for the operation. * @return A new [DataFrame] representing a Boolean presence matrix — with grouping key columns as rows, * pivot key values as columns, and `true`/`false` cells indicating existing combinations. @@ -719,8 +733,7 @@ public fun GroupBy<*, G>.pivotMatches(inward: Boolean = true, columns: Colum * df.pivotMatches("city", "name") * ``` * - * @param [inward] If `true` (default), the generated pivoted columns are nested inside the original column; - * if `false`, they are placed at the top level. + * @include [PivotDocs.InwardKDocsForGrouped] * @param [columns] The [Column Names][String] that defines which columns are used as [pivot] keys for the operation. * @return A new [DataFrame] representing a Boolean presence matrix — with grouping key columns as rows, * pivot key values as columns, and `true`/`false` cells indicating existing combinations. @@ -779,8 +792,7 @@ internal interface GroupByPivotCountsCommonDocs * df.pivotCounts { city and name } * ``` * - * @param [inward] If `true` (default), the generated pivoted columns are nested inside the original column; - * if `false`, they are placed at the top level. + * @include [PivotDocs.InwardKDocsForGrouped] * @param [columns] The [Columns Selector][ColumnsSelector] that defines which columns are used as [pivot] keys for the operation. * @return A new [DataFrame] representing a counting matrix — with grouping key columns as rows, * pivot key values as columns, and the number of rows with the corresponding combinations in the cells. @@ -799,8 +811,7 @@ public fun GroupBy<*, G>.pivotCounts(inward: Boolean = true, columns: Column * df.pivotCounts("city", "name") * ``` * - * @param [inward] If `true` (default), the generated pivoted columns are nested inside the original column; - * if `false`, they are placed at the top level. + * @include [PivotDocs.InwardKDocsForGrouped] * @param [columns] The [Column Names][String] that defines which columns are used as [pivot] keys for the operation. * @return A new [DataFrame] representing a counting matrix — with grouping key columns as rows, * pivot key values as columns, and the number of rows with the corresponding combinations in the cells. @@ -831,34 +842,44 @@ public fun GroupBy<*, G>.pivotCounts(vararg columns: KProperty<*>, inward: B * Pivots the selected [\columns] within each group for further * [pivot aggregations][PivotGroupByDocs.Aggregation]. * - * This function itself does not directly affect the [aggregate][Grouped.aggregate] result, + * This function itself does not directly modify the result of [aggregate][Grouped.aggregate], * but instead creates an intermediate [PivotGroupBy]. - * The resulting columns of its [aggregations][PivotGroupByDocs.Aggregation] are then - * putted into the final [DataFrame] produced by [aggregate][Grouped.aggregate] - * when those aggregation functions are invoked. - * - * See [GroupBy.pivot] and [PivotGroupByDocs.Aggregation] for detailed information. + * The resulting [DataFrame] columns produced by its [aggregations][PivotGroupByDocs.Aggregation] are then + * inserted into the final [DataFrame] returned by [aggregate][Grouped.aggregate] + * when those aggregation functions are executed (as usual aggregations). + * Their structure depends on the specific + * [PivotGroupBy aggregations][PivotGroupByDocs.Aggregation] used. * - * The resulting columns are added as standard [aggregate][Grouped.aggregate] result columns. - * Their structure depends on the specific [PivotGroupBy aggregations][PivotGroupByDocs.Aggregation] used. + * See [GroupBy.pivot] and [PivotGroupByDocs.Aggregation] for more information. * * For more information: {@include [DocumentationUrls.PivotInsideAggregationStatistics]} * * Check out [`PivotGroupBy` Grammar][PivotGroupByDocs.Grammar]. * - * ### This `pivot` overload + * See also [pivotMatches][AggregateGroupedDsl.pivotMatches] + * and [pivotCounts][AggregateGroupedDsl.pivotCounts] shortcuts. * + * ### This `pivot` overload + */ +@ExcludeFromSources +internal interface AggregateGroupedDslPivotDocs + +/** + * @include [AggregateGroupedDslPivotDocs] + * Select or express pivot columns using the [PivotDsl]. * ### Example * ```kotlin * df.groupBy { name.firstName }.aggregate { * // Pivot the "city" column within each group, - * // creating a PivotGroupBy with "firstName" as grouping keys and "city" as pivoted columns + * // creating a PivotGroupBy with "firstName" as grouping keys + * // and "city" as pivoted columns * pivot { city }.aggregate { * // Aggregate the mean of "age" column values for each - * // groupBy × pivot combination into the "meanAge" column + * // groupBy × pivot combination group into the "meanAge" column * mean { age } into "meanAge" * - * // Aggregate the size of each PivotGroupBy group into the "count" column + * // Aggregate the size of each `PivotGroupBy` group + * // into the "count" column * count() into "count" * } * @@ -866,31 +887,56 @@ public fun GroupBy<*, G>.pivotCounts(vararg columns: KProperty<*>, inward: B * // into "namesCount" column * pivot { name.lastName }.count() into "namesCount" * - * // Standard `count` aggregation across all rows in each "firstName" group + * // Common `count` aggregation * // into "total" column * count() into "total" * } * ``` - */ -internal interface AggregateGroupedDslPivotDocs - -/** - * @include [AggregateGroupedDslPivotDocs] - * Select or express pivot columns using the [PivotDsl]. * - * @param inward If `true` (default), the generated pivoted columns are nested inside the original column; - * otherwise, they are placed at the top level. - * @param columns The [Pivot Columns Selector][PivotColumnsSelector] that defines which columns are used + * @include [PivotDocs.InwardKDocsForGrouped] + * @param [columns] The [Pivot Columns Selector][PivotColumnsSelector] that defines which columns are used * as keys for pivoting and in which order. - * @return A new [Pivot] containing the unique values of the selected column as new columns - * (or as [column groups][ColumnGroup] for multiple key columns), - * with their corresponding groups of rows represented as [DataFrame]s. + * @return A [PivotGroupBy] for further [aggregations][PivotGroupByDocs.Aggregation]. */ public fun AggregateGroupedDsl.pivot( inward: Boolean = true, columns: PivotColumnsSelector, ): PivotGroupBy = PivotInAggregateImpl(this, columns, inward) +/** + * @include [AggregateGroupedDslPivotDocs] + * @include [SelectingColumns.ColumnNames] + * ### Example + * ```kotlin + * df.groupBy("firstName").aggregate { + * // Pivot the "city" column within each group, + * // creating a PivotGroupBy with "firstName" as grouping keys + * // and "city" as pivoted columns + * pivot("city").aggregate { + * // Aggregate the mean of "age" column values for each + * // groupBy × pivot combination group into the "meanAge" column + * mean("age") into "meanAge" + * + * // Aggregate the size of each `PivotGroupBy` group + * // into the "count" column + * count() into "count" + * } + * + * // Shortcut for `count` aggregation in "firstName" × "lastName" groups + * // into "namesCount" column + * pivot("lastName").count() into "namesCount" + * + * // Common `count` aggregation + * // into "total" column + * count() into "total" + * } + * ``` + * + * @include [PivotDocs.InwardKDocsForGrouped] + * @param columns The [Pivot Columns Selector][PivotColumnsSelector] that defines which columns are used + * as keys for pivoting and in which order. + * @return A [PivotGroupBy] for further [aggregations][PivotGroupByDocs.Aggregation]. + */ public fun AggregateGroupedDsl.pivot(vararg columns: String, inward: Boolean = true): PivotGroupBy = pivot(inward) { columns.toColumnSet() } @@ -910,11 +956,71 @@ public fun AggregateGroupedDsl.pivot(vararg columns: KProperty<*>, inward // region pivotMatches +/** + * Computes the [pivotMatches][DataFrame.pivotMatches] statistic for the selected [\columns] + * within each group and adds it to the [aggregate][Grouped.aggregate] result. + * + * This is a shortcut for combining [pivot][AggregateGroupedDsl.pivot] + * and [matches][PivotGroupBy.matches]. + * + * The resulting [DataFrame] columns are inserted into the final [DataFrame] + * returned by [aggregate][Grouped.aggregate]. + * The resulting column name can be specified using [into]. + * + * See [GroupBy.pivotMatches] for more details. + * + * For more information: {@include [DocumentationUrls.PivotMatches]} + * + * See also: [pivot][AggregateGroupedDsl.pivot], [pivotCounts][AggregateGroupedDsl.pivotCounts]. + * + * ### This `pivotMatches` overload + */ +@ExcludeFromSources +internal interface AggregateGroupedDslPivotMatchesDocs + +/** + * @include [AggregateGroupedDslPivotMatchesDocs] + * @include [SelectingColumns.Dsl] + * ### Example + * ```kotlin + * df.groupBy { name.firstName }.aggregate { + * // Compute whether matching rows exist for all unique values of "city" + * // across all "name.firstName" key values and adds it to the aggregation result + * pivotMatches { city } + * } + * ``` + * + * @include [PivotDocs.InwardKDocsForGrouped] + * @param columns The [Columns Selector][ColumnsSelector] that defines which columns are used + * as keys for pivoting and in which order. + * @return A new [DataFrame] representing a Boolean presence matrix — with grouping key columns as rows, + * pivot key values as columns, and `true`/`false` cells indicating existing combinations. + * This [DataFrame] is added to the [aggregate][Grouped.aggregate] result. + */ public fun AggregateGroupedDsl.pivotMatches( inward: Boolean = true, columns: ColumnsSelector, ): DataFrame = pivot(inward, columns).matches() +/** + * @include [AggregateGroupedDslPivotMatchesDocs] + * @include [SelectingColumns.ColumnNames] + * ### Example + * ```kotlin + * df.groupBy("firstName").aggregate { + * // Compute whether matching rows exist for all unique values of "city" + * // across all "firstName" key values and adds it to the aggregation result + * pivotMatches("city") + * } + * ``` + * + * @include [PivotDocs.InwardKDocsForGrouped] + * @param columns The [Column Names][String] that defines which columns are used + * as keys for pivoting and in which order. + * @return A new [DataFrame] representing a Boolean presence matrix — with grouping key columns as rows, + * pivot key values as columns, and `true`/`false` cells indicating existing combinations. + * This [DataFrame] is added to the [aggregate][Grouped.aggregate] result. + */ public fun AggregateGroupedDsl.pivotMatches(vararg columns: String, inward: Boolean = true): DataFrame = pivotMatches(inward) { columns.toColumnSet() } @@ -934,11 +1040,72 @@ public fun AggregateGroupedDsl.pivotMatches(vararg columns: KProperty<*>, // region pivotCounts +/** + * Computes the [pivotCounts][DataFrame.pivotCounts] statistic for the selected [\columns] + * within each group and adds it to the [aggregate][Grouped.aggregate] result. + * + * This is a shortcut for combining [pivot][AggregateGroupedDsl.pivot] + * and [count][PivotGroupBy.count]. + * + * The resulting [DataFrame] columns are inserted into the final [DataFrame] + * returned by [aggregate][Grouped.aggregate]. + * The resulting column name can be specified using [into]. + * + * See [GroupBy.pivotCounts] for more details. + * + * For more information: {@include [DocumentationUrls.PivotCounts]} + * + * See also: [pivot][AggregateGroupedDsl.pivot], [pivotMatches][AggregateGroupedDsl.pivotMatches]. + * + * ### This `pivotCounts` overload + */ +@ExcludeFromSources +internal interface AggregateGroupedDslPivotCountsDocs + +/** + * @include [AggregateGroupedDslPivotCountsDocs] + * @include [SelectingColumns.Dsl] + * ### Example + * ```kotlin + * ```kotlin + * df.groupBy { name.firstName }.aggregate { + * // Compute number of for all unique values of "city" + * // across all "name.firstName" key values and adds it to the aggregation result + * pivotCounts { city } + * } + * ``` + * + * @include [PivotDocs.InwardKDocsForGrouped] + * @param columns The [Columns Selector][ColumnsSelector] that defines which columns are used + * as keys for pivoting and in which order. + * @return A new [DataFrame] representing a counting matrix — with grouping key columns as rows, + * pivot key values as columns, and the number of rows with the corresponding combinations in the cells. + * This [DataFrame] is added to the [aggregate][Grouped.aggregate] result. + */ public fun AggregateGroupedDsl.pivotCounts( inward: Boolean = true, columns: ColumnsSelector, -): DataFrame = pivot(inward, columns).matches() +): DataFrame = pivot(inward, columns).count() +/** + * @include [AggregateGroupedDslPivotCountsDocs] + * @include [SelectingColumns.ColumnNames] + * ### Example + * ```kotlin + * df.groupBy("firstName").aggregate { + * // Compute number of for all unique values of "city" + * // across all "firstName" key values and adds it to the aggregation result + * pivotCounts("city") + * } + * ``` + * + * @include [PivotDocs.InwardKDocsForGrouped] + * @param columns The [Column Names][String] that defines which columns are used + * as keys for pivoting and in which order. + * @return A new [DataFrame] representing a counting matrix — with grouping key columns as rows, + * pivot key values as columns, and the number of rows with the corresponding combinations in the cells. + * This [DataFrame] is added to the [aggregate][Grouped.aggregate] result. + */ public fun AggregateGroupedDsl.pivotCounts(vararg columns: String, inward: Boolean = true): DataFrame = pivotCounts(inward) { columns.toColumnSet() } @@ -958,10 +1125,56 @@ public fun AggregateGroupedDsl.pivotCounts(vararg columns: KProperty<*>, // endregion +/** + * A dataframe-like structure that contains all unique combinations of key values + * as columns (or [column groups][ColumnGroup] for multiple keys) with a single row + * with the corresponding groups for each key combination (each represented as a [DataFrame]). + * + * Similar to [GroupBy] but contains horizontal groups. + * + * A [Pivot] can be: + * * [reduced][PivotDocs.Reducing] into a [DataRow], where each group is collapsed into a single representative row; + * * [aggregated][PivotDocs.Aggregation] into a [DataRow], where each group is transformed into a new row of derived values; + * * [grouped][PivotDocs.Grouping] into a [PivotGroupBy] structure, which combines [pivot] and [groupBy] operations + * and then reduced or aggregated into a [DataFrame]. + * + * Check out [`Pivot` Grammar][PivotDocs.Grammar]. + * + * For more information: {@include [DocumentationUrls.Pivot]} + */ public interface Pivot : Aggregatable +/** + * A specialized [ColumnsSelector] used for selecting columns in a [pivot] operation. + * + * Provides a [PivotDsl] both as the receiver and the lambda parameter, and expects + * a [ColumnsResolver] as the return value. + * + * Enables defining the hierarchy of pivot columns using [then][PivotDsl.then]. + */ public typealias PivotColumnsSelector = Selector, ColumnsResolver> +/** + * An intermediate class used in [`Pivot` reducing][PivotDocs.Reducing] operations. + * + * Serves as a transitional step between performing a reduction on pivot groups + * and specifying how the resulting reduced rows should be represented + * in a resulting [DataFrame]. + * + * Available transformation methods: + * * [values][ReducedPivot.values] — creates a new row containing the values + * from the reduced rows in the selected columns and produces a [DataRow] of + * these values; + * * [with][ReducedPivot.with] — computes a new value for each reduced row using a [RowExpression], + * and produces a [DataRow] containing these computed values. + * + * Each method returns a new [DataRow] with [pivot] keys as top-level columns + * (or as [column groups][ColumnGroup]) and values composed of the reduced results from each group. + * + * Check out [`Pivot grammar`][Grammar]. + * + * For more information, refer to: {@include [DocumentationUrls.PivotReducing]} + */ public class ReducedPivot( @PublishedApi internal val pivot: Pivot, @PublishedApi internal val reducer: Selector, DataRow?>, @@ -1002,7 +1215,7 @@ internal interface PivotGroupByDocs { * will produce the same result. * * [PivotGroupBy] can be [reduced][PivotGroupByDocs.Reducing] - * or [aggregated][PivotGroupByDocs.Aggregation]. + * or [aggregated][PivotGroupByDocs.Aggregation] into a [DataFrame]. * * Check out [PivotGroupBy Grammar][PivotGroupByDocs.Grammar]. * diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt index 940cea672e..07df3408f3 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt @@ -171,6 +171,9 @@ internal interface DocumentationUrls { /** [See "`pivot` statistics" on the documentation website.]({@include [Url]}/summarystatistics.html#pivot-statistics) */ interface PivotStatistics + /** [See "Pivot` reducing" on the documentation website.]({@include [Url]}/pivot.html#reducing) */ + interface PivotReducing + /** [See "`pivot` inside aggregation" on the documentation website.]({@include [Url]}/pivot.html#pivot-inside-aggregate) */ interface PivotInsideAggregationStatistics } From 6c8da01cb30f5eacef056951944a53e26a2e4aa0 Mon Sep 17 00:00:00 2001 From: "andrei.kislitsyn" Date: Fri, 7 Nov 2025 19:12:29 +0400 Subject: [PATCH 6/6] reduced pivot kdocs fix --- .../main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt index 1aa0da9726..81c566b522 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt @@ -1159,7 +1159,7 @@ public typealias PivotColumnsSelector = Selector, ColumnsResol * * Serves as a transitional step between performing a reduction on pivot groups * and specifying how the resulting reduced rows should be represented - * in a resulting [DataFrame]. + * in a resulting [DataRow]. * * Available transformation methods: * * [values][ReducedPivot.values] — creates a new row containing the values