@@ -15,69 +15,78 @@ import org.jetbrains.kotlinx.dataframe.Selector
1515import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload
1616import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
1717import org.jetbrains.kotlinx.dataframe.annotations.Refine
18+ import org.jetbrains.kotlinx.dataframe.api.add
1819import org.jetbrains.kotlinx.dataframe.columns.BaseColumn
1920import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor
2021import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
2122import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
23+ import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls
24+ import org.jetbrains.kotlinx.dataframe.documentation.ExcludeFromSources
2225import org.jetbrains.kotlinx.dataframe.exceptions.DuplicateColumnNamesException
2326import org.jetbrains.kotlinx.dataframe.exceptions.UnequalColumnSizesException
2427import org.jetbrains.kotlinx.dataframe.impl.api.insertImpl
2528import org.jetbrains.kotlinx.dataframe.impl.columns.resolveSingle
2629import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API
2730import kotlin.reflect.KProperty
2831
29- /*
30- * `add` operation adds new columns to DataFrame.
31- */
32-
3332// region Add existing columns
3433
3534/* *
36- * Creates new [DataFrame] with given columns added to the end of original [DataFrame.columns] list .
35+ * Adds new [columns] to the end of this [DataFrame] (at the top level) .
3736 *
38- * Original [DataFrame] is not modified .
37+ * Returns a new [DataFrame] with the new [columns] appended to the original list of [DataFrame.columns] .
3938 *
40- * @param columns columns to add
41- * @throws [DuplicateColumnNamesException] if columns in expected result have repeated names
42- * @throws [UnequalColumnSizesException] if columns in expected result have different sizes
43- * @return new [DataFrame] with added columns
39+ * For more information: {@include [DocumentationUrls.Add]}.
40+ *
41+ * @param columns columns to add.
42+ * @throws [DuplicateColumnNamesException] if columns in an expected result have repeated names.
43+ * @throws [UnequalColumnSizesException] if columns in an expected result have different sizes.
44+ * @return new [DataFrame] with added columns.
4445 */
4546public fun <T > DataFrame<T>.add (vararg columns : AnyBaseCol ): DataFrame <T > = addAll(columns.asIterable())
4647
4748/* *
48- * Creates new [DataFrame] with given columns added to the end of original [DataFrame.columns] list.
49+ * Adds new [columns] to the end of this [DataFrame] (at the top level).
50+ *
51+ * Returns a new [DataFrame] with the new [columns] appended to the original list of [DataFrame.columns].
4952 *
50- * Original [DataFrame] is not modified .
53+ * For more information: {@include [DocumentationUrls.Add]} .
5154 *
52- * @param columns columns to add
53- * @throws [DuplicateColumnNamesException] if columns in expected result have repeated names
54- * @throws [UnequalColumnSizesException] if columns in expected result have different sizes
55- * @return new [DataFrame] with added columns
55+ * @param columns columns to add.
56+ * @throws [DuplicateColumnNamesException] if columns in an expected result have repeated names.
57+ * @throws [UnequalColumnSizesException] if columns in an expected result have different sizes.
58+ * @return new [DataFrame] with added columns.
5659 */
5760public fun <T > DataFrame<T>.addAll (columns : Iterable <AnyBaseCol >): DataFrame <T > =
5861 dataFrameOf(columns() + columns).cast()
5962
6063/* *
61- * Creates new [DataFrame] with all columns from given [dataFrames] added to the end of original [DataFrame.columns] list .
64+ * Adds all columns from the given [dataFrames] to the end of this [DataFrame] (at the top level) .
6265 *
63- * Original [DataFrame] is not modified.
66+ * Returns a new [DataFrame] with the columns from the specified
67+ * [dataFrames] appended to the original list of [DataFrame.columns].
6468 *
65- * @param dataFrames dataFrames to get columns from
66- * @throws [DuplicateColumnNamesException] if columns in expected result have repeated names
67- * @throws [UnequalColumnSizesException] if columns in expected result have different sizes
68- * @return new [DataFrame] with added columns
69+ * For more information: {@include [DocumentationUrls.Add]}.
70+ *
71+ * @param dataFrames dataFrames to get columns from.
72+ * @throws [DuplicateColumnNamesException] if columns in an expected result have repeated names.
73+ * @throws [UnequalColumnSizesException] if columns in an expected result have different sizes.
74+ * @return new [DataFrame] with added columns.
6975 */
7076public fun <T > DataFrame<T>.add (vararg dataFrames : AnyFrame ): DataFrame <T > = addAll(dataFrames.asIterable())
7177
7278/* *
73- * Creates new [DataFrame] with all columns from given [dataFrames] added to the end of original [DataFrame.columns] list.
79+ * Adds all columns from the given [dataFrames] to the end of this [DataFrame] (at the top level).
80+ *
81+ * Returns a new [DataFrame] with the columns from the specified
82+ * [dataFrames] appended to the original list of [DataFrame.columns].
7483 *
75- * Original [DataFrame] is not modified .
84+ * For more information: {@include [DocumentationUrls.Add]} .
7685 *
77- * @param dataFrames dataFrames to get columns from
78- * @throws [DuplicateColumnNamesException] if columns in expected result have repeated names
79- * @throws [UnequalColumnSizesException] if columns in expected result have different sizes
80- * @return new [DataFrame] with added columns
86+ * @param dataFrames dataFrames to get columns from.
87+ * @throws [DuplicateColumnNamesException] if columns in an expected result have repeated names.
88+ * @throws [UnequalColumnSizesException] if columns in an expected result have different sizes.
89+ * @return new [DataFrame] with added columns.
8190 */
8291@JvmName(" addAllFrames" )
8392public fun <T > DataFrame<T>.addAll (dataFrames : Iterable <AnyFrame >): DataFrame <T > =
@@ -115,14 +124,50 @@ public interface AddDataRow<out T> : DataRow<T> {
115124public typealias AddExpression <T , R > = Selector <AddDataRow <T >, R >
116125
117126/* *
118- * Creates new column using row [expression] and adds it to the end of [DataFrame]
127+ * With an [AddExpression], you define the value that each row in the new column should have.
128+ * This can be based on values from the same row in the original [DataFrame].
119129 *
120- * Original [DataFrame] is not modified.
130+ * You can also use functions like [prev] and [next] to access other rows, and combine them with
131+ * [newValue][AddDataRow.newValue] to reference values already computed in the new column.
132+ * For example, use `prev().newValue()` to access the new column value from the previous row.
133+ */
134+ @ExcludeFromSources
135+ internal interface AddExpressionDocs
136+
137+ /* *
138+ * Creates a new column using an [AddExpression] and
139+ * adds a new column to the end of this [DataFrame] (at the top level).
140+ *
141+ * {@include [AddExpressionDocs]}
142+ *
143+ * Returns a new [DataFrame] with the new column appended to the original list of [DataFrame.columns].
144+ *
145+ * ## Example
146+ *
147+ * ```kotlin
148+ * // Add a new column "sum" that contains the sum of values from the "firstValue"
149+ * // and "secondValue" columns for each row.
150+ * val dfWithSum = df.add("sum") { firstValue + secondValue }
151+ *
152+ * // Add a "fibonacci" column with the Fibonacci sequence:
153+ * // for the first two rows, the value is 1;
154+ * // for subsequent rows, it's the sum of the two previous Fibonacci values.
155+ * val dfWithFibonacci = df.add("fibonacci") {
156+ * if (index() < 2) 1
157+ * else prev()!!.newValue<Int>() + prev()!!.prev()!!.newValue<Int>()
158+ * }
159+ * ```
160+ *
161+ * For more information: {@include [DocumentationUrls.Add]}.
162+ *
163+ * @param name name for a new column.
164+ * If it is empty, a unique column name will be generated.
165+ * Otherwise, it should be unique for original [DataFrame].
166+ * @param infer a value of [Infer] that specifies how to compute column [type][BaseColumn.type] for a new column.
167+ * Defaults to [Infer.Nulls].
168+ * @param expression [AddExpression] that computes column value for every [DataRow] of a new column.
169+ * @return new [DataFrame] with added column.
121170 *
122- * @param name name for a new column. If it is empty, a unique column name will be generated. Otherwise, it should be unique for original [DataFrame].
123- * @param infer a value of [Infer] that specifies how to compute column [type][BaseColumn.type] for a new column
124- * @param expression [AddExpression] that computes column value for every [DataRow]
125- * @return new [DataFrame] with added column
126171 * @throws DuplicateColumnNamesException if [DataFrame] already contains a column with given [name]
127172 */
128173@Refine
@@ -149,6 +194,34 @@ public inline fun <reified R, T> DataFrame<T>.add(
149194 noinline expression : AddExpression <T , R >,
150195): DataFrame <T > = add(column.path(), infer, expression)
151196
197+ /* *
198+ * Creates a new column using [AddExpression] and inserts it at the specified [ColumnPath].
199+ *
200+ * {@include [AddExpressionDocs]}
201+ *
202+ * For more information: {@include [DocumentationUrls.Add]}.
203+ *
204+ * Returns a new [DataFrame] with the new column inserted at the given [path].
205+ * {@include [org.jetbrains.kotlinx.dataframe.documentation.ColumnPathCreation]}
206+ *
207+ * ## Example
208+ *
209+ * ```kotlin
210+ * // Add a new column "sum" inside the "info" column group (which will be created if it doesn't exist).
211+ * // The column contains the sum of values from the "firstValue" and "secondValue" columns for each row.
212+ * val dfWithSum = df.add(pathOf("info", "sum")) { firstValue + secondValue }
213+ * ```
214+ *
215+ * @param path Target [ColumnPath] for the new column.
216+ * If it points to a nested location,
217+ * intermediate columns will be created if necessary.
218+ * @param infer A value of [Infer] that specifies how to compute the column [type][BaseColumn.type] for the new column.
219+ * Defaults to [Infer.Nulls].
220+ * @param expression An [AddExpression] that computes the column value for every [DataRow] of the new column.
221+ * @return A new [DataFrame] with the added column.
222+ *
223+ * @throws DuplicateColumnNamesException If the [DataFrame] already contains a column at the specified [path].
224+ */
152225public inline fun <reified R , T > DataFrame<T>.add (
153226 path : ColumnPath ,
154227 infer : Infer = Infer .Nulls ,
@@ -163,6 +236,10 @@ public inline fun <reified R, T> DataFrame<T>.add(
163236
164237// region Create and add several columns
165238
239+ /* *
240+ * Receiver that is used by the [add] and [mapToFrame]
241+ * for adding new columns and column groups based on [DataFrame] columns and row values.
242+ */
166243public class AddDsl <T >(
167244 @PublishedApi internal val df : DataFrame <T >,
168245) : ColumnsContainer<T> by df,
@@ -253,6 +330,43 @@ public class AddDsl<T>(
253330 public infix fun AddGroup<T>.into (column : AnyColumnGroupAccessor ): Unit = into(column.name())
254331}
255332
333+ /* *
334+ * Creates new columns using the [AddDsl] builder.
335+ *
336+ * An [AddDsl] allows to add multiple new columns and column groups to a [DataFrame]
337+ * using concise syntax based on `from`, `into` operations and [AddExpression]s.
338+ *
339+ * Returns a new [DataFrame] with the newly added columns.
340+ *
341+ * ## Example
342+ *
343+ * ```kotlin
344+ * val dfWithAdded = df.add {
345+ * // Add new column "yearOfBirth" computed as 2021 minus value in "age" column
346+ * "yearOfBirth" from { 2021 - age }
347+ *
348+ * // Add column "is adult" with result of age > 18
349+ * age > 18 into "is adult"
350+ *
351+ * // Add new column "role" using expression
352+ * expr { if ( department == "IT") "developer" else "analyst" } into "role"
353+ *
354+ * // Add column group "details"
355+ * group("details") {
356+ * // Add column "last name length" with length of lastName
357+ * name.lastName.length() into "last name length"
358+ *
359+ * // Add column "full name" by combining firstName and lastName
360+ * "full name" from { name.firstName + " " + name.lastName }
361+ * }
362+ * }
363+ * ```
364+ *
365+ * For more information: {@include [DocumentationUrls.Add]}.
366+ *
367+ * @param body An [AddDsl] expression used to define new columns and column groups.
368+ * @return A new [DataFrame] with the added columns.
369+ */
256370@Refine
257371@Interpretable(" AddWithDsl" )
258372public fun <T > DataFrame<T>.add (body : AddDsl <T >.() -> Unit ): DataFrame <T > {
@@ -261,6 +375,43 @@ public fun <T> DataFrame<T>.add(body: AddDsl<T>.() -> Unit): DataFrame<T> {
261375 return dataFrameOf(this @add.columns() + dsl.columns).cast()
262376}
263377
378+ /* *
379+ * Creates a new column using [AddExpression] and
380+ * adds a new column to the end of each group (i.e., [DataFrame]s) of this [GroupBy] (at the top level).
381+ *
382+ * {@include [AddExpressionDocs]}
383+ *
384+ * Returns a new [GroupBy] with the new column
385+ * appended to each group [DataFrame] to the original list of [DataFrame.columns].
386+ *
387+ * ## Example
388+ *
389+ * ```kotlin
390+ * // Add a new column "sum" that contains the sum of values from the "firstValue"
391+ * // and "secondValue" columns for each row.
392+ * val gbWithSum = gb.add("sum") { firstValue + secondValue }
393+ *
394+ * // Add a "fibonacci" column with the Fibonacci sequence:
395+ * // for the first two rows, the value is 1;
396+ * // for subsequent rows, it's the sum of the two previous Fibonacci values.
397+ * val gbWithFibonacci = gb.add("fibonacci") {
398+ * if (index() < 2) 1
399+ * else prev()!!.newValue<Int>() + prev()!!.prev()!!.newValue<Int>()
400+ * }
401+ * ```
402+ *
403+ * For more information: {@include [DocumentationUrls.Add]}.
404+ *
405+ * @param name name for a new column.
406+ * If it is empty, a unique column name will be generated.
407+ * Otherwise, it should be unique for original group [DataFrame]s.
408+ * @param infer a value of [Infer] that specifies how to compute column [type][BaseColumn.type] for a new column.
409+ * Defaults to [Infer.Nulls].
410+ * @param expression [AddExpression] that computes column value for every [DataRow] of a new column.
411+ * @return new [GroupBy] with added column.
412+ *
413+ * @throws DuplicateColumnNamesException if group [DataFrame]s already contains a column with given [name].
414+ */
264415@Refine
265416@Interpretable(" GroupByAdd" )
266417public inline fun <reified R , T , G > GroupBy <T , G >.add (
0 commit comments