11package org.jetbrains.kotlinx.dataframe.api
22
3- import kotlinx.datetime.Instant
4- import kotlinx.datetime.LocalDate
5- import kotlinx.datetime.LocalDateTime
6- import kotlinx.datetime.LocalTime
73import org.jetbrains.kotlinx.dataframe.AnyCol
84import org.jetbrains.kotlinx.dataframe.ColumnsSelector
95import org.jetbrains.kotlinx.dataframe.DataFrame
10- import org.jetbrains.kotlinx.dataframe.RowColumnExpression
11- import org.jetbrains.kotlinx.dataframe.RowValueExpression
126import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload
7+ import org.jetbrains.kotlinx.dataframe.api.CorrDocs.Grammar
8+ import org.jetbrains.kotlinx.dataframe.api.CorrDocs.SelectingOptions
139import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
1410import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
15- import org.jetbrains.kotlinx.dataframe.dataTypes.IFRAME
16- import org.jetbrains.kotlinx.dataframe.dataTypes.IMG
1711import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls
18- import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls.Convert
1912import org.jetbrains.kotlinx.dataframe.documentation.DslGrammarLink
2013import org.jetbrains.kotlinx.dataframe.documentation.ExcludeFromSources
2114import org.jetbrains.kotlinx.dataframe.documentation.Indent
2215import org.jetbrains.kotlinx.dataframe.documentation.LineBreak
2316import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns
24- import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.ColumnGroupsAndNestedColumnsMention
2517import org.jetbrains.kotlinx.dataframe.impl.api.corrImpl
2618import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API
27- import java.math.BigDecimal
28- import java.math.BigInteger
29- import java.net.URL
3019import kotlin.reflect.KProperty
31- import kotlin.reflect.KType
3220import kotlin.reflect.typeOf
3321
3422/* *
3523 * Calculates the correlation between values in the specified [columns\].
3624 *
37- * This function does not perform the calculation immediately. Instead, it selects a primary set of columns
38- * and returns a [Corr] object, which serves as an intermediate step in the correlation analysis.
25+ * This function does not compute the correlation immediately.
26+ * Instead, it defines the primary set of columns
27+ * and returns a [Corr] instance that allows configuring how the correlation should be computed.
3928 *
40- * The [Corr] object provides two methods to perform correlation computations:
41- * - [with][Corr.with] — allows you to specify a second set of columns and computes correlations between
42- * the initially selected columns and this second set.
43- * - [withItself][Corr.withItself] — computes correlations within the initially selected columns.
29+ * The [Corr] object provides two methods to perform correlation calculations:
30+ * - [with][Corr.with] — computes correlations between the initially selected columns and a second set of columns.
31+ * - [withItself][Corr.withItself] — computes pairwise correlations within the initially selected columns.
4432 *
45- * Each of these methods returns a [DataFrame] where rows correspond to one set of columns, columns to the other set,
33+ * Each method returns a square or rectangular correlation matrix represented as a [DataFrame],
34+ * where rows and columns correspond to the selected column sets,
4635 * and each cell contains the correlation coefficient between the corresponding pair of columns.
4736 *
48- * If you need to compute correlations between all columns in a DataFrame, use [DataFrame.corr()][DataFrame.corr].
37+ * To compute correlations between all suitable columns in the [ DataFrame] , use [DataFrame.corr()][DataFrame.corr].
4938 *
5039 * Check out [Grammar].
5140 *
5241 * @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention]
5342 *
54- * See [Selecting Columns][ConvertSelectingOptions ].
43+ * See also: [Selecting Columns][SelectingOptions ].
5544 *
56- * For more information: {@include [DocumentationUrls.Corr]}
45+ * For more information, see : {@include [DocumentationUrls.Corr]}
5746 */
5847internal interface CorrDocs {
5948
6049 /* *
6150 * {@comment Version of [SelectingColumns] with correctly filled in examples}
6251 * @include [SelectingColumns] {@include [SetCorrOperationArg]}
6352 */
64- interface ConvertSelectingOptions
53+ interface SelectingOptions
6554
6655 /* *
6756 * ## Corr Operation Grammar
@@ -98,30 +87,76 @@ internal fun AnyCol.isSuitableForCorr() = isSubtypeOf<Number>() || type() == typ
9887/* *
9988 * An intermediate class used in the [corr] operation.
10089 *
101- * This class itself does not perform any computations — it is a transitional step
102- * before specifying how to compute correlation.
103- * It must be followed by one of the methods specifying correlation
104- * computation to produce a new correlation [DataFrame].
90+ * This class does not perform any computation by itself — it serves as a transitional step
91+ * before specifying how the correlation should be calculated.
92+ * It must be followed by one of the computation methods to produce a correlation [DataFrame].
10593 *
106- * Each of these methods returns a [DataFrame] where rows correspond to one set of columns, columns to the other set,
107- * and each cell contains the correlation coefficient between the corresponding pair of columns.
94+ * The resulting [DataFrame] is a correlation matrix where rows correspond to one set of columns,
95+ * columns to the other set, and each cell contains the correlation coefficient
96+ * between the respective pair of columns.
10897 *
10998 * Use the following methods to perform the computation:
110- * - [with { columnsSelector }][with] – selects a second set of columns and computes correlations between
111- * the initially selected columns and this second set.
112- * - [withItself()][withItself] - computes correlations within the initially selected columns.
99+ * - [with] — selects a second set of columns and computes correlations between
100+ * the initially selected columns and this second set.
101+ * - [withItself] — computes pairwise correlations within the initially selected columns.
113102 *
114103 * See [Grammar][CorrDocs.Grammar] for more details.
115104 */
116105public data class Corr <T , C >(internal val df : DataFrame <T >, internal val columns : ColumnsSelector <T , C >)
117106
107+ /* *
108+ * Computes the correlation matrix between all suitable columns in this [DataFrame],
109+ * including nested columns at any depth.
110+ *
111+ * The result is a square correlation matrix represented as a [DataFrame],
112+ * where both rows and columns correspond to the original columns,
113+ * and each cell contains the correlation coefficient between the respective pair of columns.
114+ *
115+ * Only columns suitable for correlation (e.g., numeric types) are included in the result.
116+ *
117+ * For more information, see: {@include [DocumentationUrls.Corr]}
118+ *
119+ * @return A square correlation matrix as a [DataFrame], where both rows and columns correspond to the original columns.
120+ */
118121public fun <T > DataFrame<T>.corr (): DataFrame <T > =
119122 corr {
120123 colsAtAnyDepth().filter { it.isSuitableForCorr() }
121124 }.withItself()
122125
126+ /* *
127+ * {@include [CommonCorrDocs]}
128+ * @include [SelectingColumns.Dsl] {@include [SetCorrOperationArg]}
129+ *
130+ * ### Examples
131+ * ```kotlin
132+ * // Compute correlations between the "age" column and the "weight" and "height" columns
133+ * df.corr { age }.with { weight and height }
134+ *
135+ * // Compute pairwise correlations between all columns of type `Number`
136+ * df.corr { colsOf<Number>() }.withItself()
137+ * ```
138+ * @param [columns\] The [Columns Selector][ColumnsSelector] used to select the columns
139+ * of this [DataFrame] to compute a correlation.
140+ * @return A [Corr] intermediate object with the selected columns.
141+ */
123142public fun <T , C > DataFrame<T>.corr (columns : ColumnsSelector <T , C >): Corr <T , C > = Corr (this , columns)
124143
144+ /* *
145+ * {@include [CommonCorrDocs]}
146+ * @include [SelectingColumns.ColumnNames] {@include [SetCorrOperationArg]}
147+ *
148+ * ### Examples
149+ * ```kotlin
150+ * // Compute correlations between the "age" column and the "weight" and "height" columns
151+ * df.corr { age }.with { weight and height }
152+ *
153+ * // Compute pairwise correlations between all columns of type `Number`
154+ * df.corr { colsOf<Number>() }.withItself()
155+ * ```
156+ * @param [columns\] The [Column Names][String] used to select the columns
157+ * of this [DataFrame] to compute a correlation.
158+ * @return A [Corr] intermediate object with the selected columns.
159+ */
125160public fun <T > DataFrame<T>.corr (vararg columns : String ): Corr <T , Any ?> = corr { columns.toColumnSet() }
126161
127162@Deprecated(DEPRECATED_ACCESS_API )
@@ -132,8 +167,67 @@ public fun <T, C> DataFrame<T>.corr(vararg columns: KProperty<C>): Corr<T, C> =
132167@AccessApiOverload
133168public fun <T , C > DataFrame<T>.corr (vararg columns : ColumnReference <C >): Corr <T , C > = corr { columns.toColumnSet() }
134169
170+ /* *
171+ * Calculates the correlation of specified [columns][otherColumns]
172+ * with values in the columns previously selected with [corr].
173+ *
174+ * Returns a correlation matrix represented as a [DataFrame],
175+ * where rows and columns correspond to the selected column sets,
176+ * and each cell contains the correlation coefficient between the corresponding pair of columns.
177+ *
178+ * Check out [Grammar].
179+ *
180+ * @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention]
181+ *
182+ * See also: [Selecting Columns][SelectingOptions].
183+ *
184+ * For more information, see: {@include [DocumentationUrls.Corr]}
185+ */
186+ internal interface CorrWithDocs
187+
188+ /* *
189+ * {@include [CorrWithDocs]}
190+ * ### This Corr With Overload
191+ */
192+ @ExcludeFromSources
193+ private interface CommonCorrWithDocs
194+
195+ /* *
196+ * {@include [CommonCorrWithDocs]}
197+ * @include [SelectingColumns.Dsl] {@include [SetCorrOperationArg]}
198+ *
199+ * ### Examples
200+ * ```kotlin
201+ * // Compute correlations between the "age" column and the "weight" and "height" columns
202+ * df.corr { age }.with { weight and height }
203+ *
204+ * // Compute correlations between the "speed" column and all columns of type `Double` (excluding itself)
205+ * df.corr { speed }.with { colsOf<Double>() except speed }
206+ * ```
207+ *
208+ * @param otherColumns The [ColumnsSelector] used to select the second set of columns
209+ * from this [DataFrame] to compute correlations against the initially selected columns.
210+ * @return A [DataFrame] containing the resulting correlation matrix.
211+ */
135212public fun <T , C , R > Corr <T , C >.with (otherColumns : ColumnsSelector <T , R >): DataFrame <T > = corrImpl(otherColumns)
136213
214+ /* *
215+ * {@include [CommonCorrWithDocs]}
216+ * @include [SelectingColumns.ColumnNames] {@include [SetCorrOperationArg]}
217+ *
218+ * ### Examples
219+ * ```kotlin
220+ * // Compute correlations between the "age" column and the "weight" and "height" columns
221+ * df.corr("age").with("weight", "height")
222+ *
223+ * // Compute correlations between the "speed" column and all columns of type `Number`
224+ * df.corr { colsOf<Number>() }.with("speed")
225+ * ```
226+ *
227+ * @param otherColumns The [Column Names][String] used to select the second set of columns
228+ * from this [DataFrame] to compute correlations against the initially selected columns.
229+ * @return A [DataFrame] containing the resulting correlation matrix.
230+ */
137231public fun <T , C > Corr <T , C >.with (vararg otherColumns : String ): DataFrame <T > = with { otherColumns.toColumnSet() }
138232
139233@Deprecated(DEPRECATED_ACCESS_API )
@@ -146,6 +240,20 @@ public fun <T, C, R> Corr<T, C>.with(vararg otherColumns: KProperty<R>): DataFra
146240public fun <T , C , R > Corr <T , C >.with (vararg otherColumns : ColumnReference <R >): DataFrame <T > =
147241 with { otherColumns.toColumnSet() }
148242
243+ /* *
244+ * Calculates pairwise correlations between the columns
245+ * previously selected with [corr].
246+ *
247+ * Returns a square correlation matrix represented as a [DataFrame],
248+ * where both rows and columns correspond to the selected columns,
249+ * and each cell contains the correlation coefficient between the respective pair of columns.
250+ *
251+ * Check out [Grammar].
252+ *
253+ * For more information, see: {@include [DocumentationUrls.Corr]}
254+ *
255+ * @return A [DataFrame] containing the pairwise correlation matrix.
256+ */
149257public fun <T , C > Corr <T , C >.withItself (): DataFrame <T > = with (columns)
150258
151259// endregion
0 commit comments