@@ -4,26 +4,180 @@ import org.jetbrains.kotlinx.dataframe.AnyCol
44import org.jetbrains.kotlinx.dataframe.ColumnsSelector
55import org.jetbrains.kotlinx.dataframe.DataFrame
66import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload
7+ import org.jetbrains.kotlinx.dataframe.api.CorrDocs.Grammar
8+ import org.jetbrains.kotlinx.dataframe.api.CorrDocs.SelectingOptions
9+ import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
710import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
811import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
12+ import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls
13+ import org.jetbrains.kotlinx.dataframe.documentation.DslGrammarLink
14+ import org.jetbrains.kotlinx.dataframe.documentation.ExcludeFromSources
15+ import org.jetbrains.kotlinx.dataframe.documentation.Indent
16+ import org.jetbrains.kotlinx.dataframe.documentation.LineBreak
17+ import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns
918import org.jetbrains.kotlinx.dataframe.impl.api.corrImpl
1019import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API
1120import kotlin.reflect.KProperty
1221import kotlin.reflect.typeOf
1322
23+ /* *
24+ * Calculates the Pearson pairwise correlation between values in the specified [columns\].
25+ *
26+ * This function does not compute the correlation immediately.
27+ * Instead, it defines the primary set of columns
28+ * and returns a [Corr] instance that allows configuring how the correlation should be computed.
29+ *
30+ * The function is available for numeric- and [Boolean] columns.
31+ * [Boolean] values are converted into 1 for true and 0 for false.
32+ * All other columns are ignored.
33+ * If a [ColumnGroup] instance is passed as the target column for correlation,
34+ * it will be unpacked into suitable nested columns.
35+ *
36+ * The [Corr] object provides two methods to perform correlation calculations:
37+ * - [with][Corr.with] — computes correlations between the initially selected columns and a second set of columns.
38+ * - [withItself][Corr.withItself] — computes pairwise correlations within the initially selected columns.
39+ *
40+ * Each method returns a square or rectangular correlation matrix represented by a [DataFrame],
41+ * where rows and columns correspond to the selected column sets,
42+ * and each cell contains the Pearson correlation coefficient between the corresponding pair of columns.
43+ *
44+ * To compute correlations between all suitable columns in the [DataFrame], use [DataFrame.corr()][DataFrame.corr].
45+ *
46+ * Check out [Grammar].
47+ *
48+ * @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention]
49+ *
50+ * See also: [Selecting Columns][SelectingOptions].
51+ *
52+ * For more information, see: {@include [DocumentationUrls.Corr]}
53+ */
54+ internal interface CorrDocs {
55+
56+ /* *
57+ * {@comment Version of [SelectingColumns] with correctly filled in examples}
58+ * @include [SelectingColumns] {@include [SetCorrOperationArg]}
59+ */
60+ interface SelectingOptions
61+
62+ /* *
63+ * ## Corr Operation Grammar
64+ * {@include [LineBreak]}
65+ * {@include [DslGrammarLink]}
66+ * {@include [LineBreak]}
67+ *
68+ * **[`corr`][convert]**` { columnsSelector: `[`ColumnsSelector`][ColumnsSelector]` }`
69+ *
70+ * {@include [Indent]}
71+ * __`.`__[**`with`**][Corr.with]` { columnsSelector: `[`ColumnsSelector`][ColumnsSelector]` }`
72+ *
73+ * {@include [Indent]}
74+ *`| `__`.`__[**`withItself`**][Corr.withItself]`()`
75+ */
76+ interface Grammar
77+ }
78+
79+ /* * {@set [SelectingColumns.OPERATION] [corr][corr]} */
80+ @ExcludeFromSources
81+ private interface SetCorrOperationArg
82+
83+ /* *
84+ * {@include [CorrDocs]}
85+ * ### This Corr Overload
86+ */
87+ @ExcludeFromSources
88+ private interface CommonCorrDocs
89+
1490internal fun AnyCol.isSuitableForCorr () = isSubtypeOf<Number >() || type() == typeOf<Boolean >()
1591
1692// region DataFrame
1793
94+ /* *
95+ * An intermediate class used in the [corr] operation.
96+ *
97+ * This class does not perform any computation by itself — it serves as a transitional step
98+ * before specifying how the correlation should be calculated.
99+ * It must be followed by one of the computation methods to produce a correlation [DataFrame].
100+ *
101+ * The resulting [DataFrame] is a correlation matrix where rows correspond to one set of columns,
102+ * columns to the other set, and each cell contains the Pearson correlation coefficient
103+ * between the respective pair of columns.
104+ *
105+ * Use the following methods to perform the computation:
106+ * - [with] — selects a second set of columns and computes correlations between
107+ * the initially selected columns and this second set.
108+ * - [withItself] — computes pairwise correlations within the initially selected columns.
109+ *
110+ * See [Grammar][CorrDocs.Grammar] for more details.
111+ */
18112public data class Corr <T , C >(internal val df : DataFrame <T >, internal val columns : ColumnsSelector <T , C >)
19113
114+ /* *
115+ * Computes the pearson correlation between all suitable columns in this [DataFrame],
116+ * including nested columns at any depth.
117+ *
118+ * The result is a square correlation matrix represented by a [DataFrame],
119+ * where both rows and columns correspond to the original columns,
120+ * and each cell contains the Pearson correlation coefficient between the respective pair of columns.
121+ *
122+ * The function is available for numeric- and [Boolean] columns.
123+ * [Boolean] values are converted into 1 for true and 0 for false.
124+ * All other columns are ignored.
125+ *
126+ * For more information, see: {@include [DocumentationUrls.Corr]}
127+ *
128+ * @return A square correlation matrix as a [DataFrame], where both rows and columns correspond to the original columns.
129+ */
20130public fun <T > DataFrame<T>.corr (): DataFrame <T > =
21131 corr {
22132 colsAtAnyDepth().filter { it.isSuitableForCorr() }
23133 }.withItself()
24134
135+ /* *
136+ * {@include [CommonCorrDocs]}
137+ * @include [SelectingColumns.Dsl] {@include [SetCorrOperationArg]}
138+ *
139+ * The function is available for numeric- and [Boolean] columns.
140+ * [Boolean] values are converted into 1 for true and 0 for false.
141+ * All other columns are ignored.
142+ * If a [ColumnGroup] instance is passed as the target column for correlation,
143+ * it will be unpacked into suitable nested columns.
144+ *
145+ * ### Examples
146+ * ```kotlin
147+ * // Compute correlations between the "age" column and the "weight" and "height" columns
148+ * df.corr { age }.with { weight and height }
149+ *
150+ * // Compute pairwise correlations between all columns of type `Number`
151+ * df.corr { colsOf<Number>() }.withItself()
152+ * ```
153+ * @param [columns\] The [Columns Selector][ColumnsSelector] used to select the columns
154+ * of this [DataFrame] to compute a correlation.
155+ * @return A [Corr] intermediate object with the selected columns.
156+ */
25157public fun <T , C > DataFrame<T>.corr (columns : ColumnsSelector <T , C >): Corr <T , C > = Corr (this , columns)
26158
159+ /* *
160+ * {@include [CommonCorrDocs]}
161+ * @include [SelectingColumns.ColumnNames] {@include [SetCorrOperationArg]}
162+ *
163+ * The function is available for numeric- and [Boolean] columns.
164+ * [Boolean] values are converted into 1 for true and 0 for false.
165+ * All other columns are ignored.
166+ * If a [ColumnGroup] instance is passed as the target column for correlation,
167+ * it will be unpacked into suitable nested columns.
168+ *
169+ * ### Examples
170+ * ```kotlin
171+ * // Compute correlations between the "age" column and the "weight" and "height" columns
172+ * df.corr { age }.with { weight and height }
173+ *
174+ * // Compute pairwise correlations between all columns of type `Number`
175+ * df.corr { colsOf<Number>() }.withItself()
176+ * ```
177+ * @param [columns\] The [Column Names][String] used to select the columns
178+ * of this [DataFrame] to compute a correlation.
179+ * @return A [Corr] intermediate object with the selected columns.
180+ */
27181public fun <T > DataFrame<T>.corr (vararg columns : String ): Corr <T , Any ?> = corr { columns.toColumnSet() }
28182
29183@Deprecated(DEPRECATED_ACCESS_API )
@@ -34,8 +188,67 @@ public fun <T, C> DataFrame<T>.corr(vararg columns: KProperty<C>): Corr<T, C> =
34188@AccessApiOverload
35189public fun <T , C > DataFrame<T>.corr (vararg columns : ColumnReference <C >): Corr <T , C > = corr { columns.toColumnSet() }
36190
191+ /* *
192+ * Calculates the correlation of specified [columns][otherColumns]
193+ * with values in the columns previously selected with [corr].
194+ *
195+ * Returns a correlation matrix represented by a [DataFrame],
196+ * where rows and columns correspond to the selected column sets,
197+ * and each cell contains the Pearson correlation coefficient between the corresponding pair of columns.
198+ *
199+ * Check out [Grammar].
200+ *
201+ * @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention]
202+ *
203+ * See also: [Selecting Columns][SelectingOptions].
204+ *
205+ * For more information, see: {@include [DocumentationUrls.Corr]}
206+ */
207+ internal interface CorrWithDocs
208+
209+ /* *
210+ * {@include [CorrWithDocs]}
211+ * ### This Corr With Overload
212+ */
213+ @ExcludeFromSources
214+ private interface CommonCorrWithDocs
215+
216+ /* *
217+ * {@include [CommonCorrWithDocs]}
218+ * @include [SelectingColumns.Dsl] {@include [SetCorrOperationArg]}
219+ *
220+ * ### Examples
221+ * ```kotlin
222+ * // Compute correlations between the "age" column and the "weight" and "height" columns
223+ * df.corr { age }.with { weight and height }
224+ *
225+ * // Compute correlations between the "speed" column and all columns of type `Double` (excluding itself)
226+ * df.corr { speed }.with { colsOf<Double>() except speed }
227+ * ```
228+ *
229+ * @param otherColumns The [ColumnsSelector] used to select the second set of columns
230+ * from this [DataFrame] to compute correlations against the initially selected columns.
231+ * @return A [DataFrame] containing the resulting correlation matrix.
232+ */
37233public fun <T , C , R > Corr <T , C >.with (otherColumns : ColumnsSelector <T , R >): DataFrame <T > = corrImpl(otherColumns)
38234
235+ /* *
236+ * {@include [CommonCorrWithDocs]}
237+ * @include [SelectingColumns.ColumnNames] {@include [SetCorrOperationArg]}
238+ *
239+ * ### Examples
240+ * ```kotlin
241+ * // Compute correlations between the "age" column and the "weight" and "height" columns
242+ * df.corr("age").with("weight", "height")
243+ *
244+ * // Compute correlations between the "speed" column and all columns of type `Number`
245+ * df.corr { colsOf<Number>() }.with("speed")
246+ * ```
247+ *
248+ * @param otherColumns The [Column Names][String] used to select the second set of columns
249+ * from this [DataFrame] to compute correlations against the initially selected columns.
250+ * @return A [DataFrame] containing the resulting correlation matrix.
251+ */
39252public fun <T , C > Corr <T , C >.with (vararg otherColumns : String ): DataFrame <T > = with { otherColumns.toColumnSet() }
40253
41254@Deprecated(DEPRECATED_ACCESS_API )
@@ -48,6 +261,20 @@ public fun <T, C, R> Corr<T, C>.with(vararg otherColumns: KProperty<R>): DataFra
48261public fun <T , C , R > Corr <T , C >.with (vararg otherColumns : ColumnReference <R >): DataFrame <T > =
49262 with { otherColumns.toColumnSet() }
50263
264+ /* *
265+ * Calculates Pearson pairwise correlations between the columns
266+ * previously selected with [corr].
267+ *
268+ * Returns a square correlation matrix represented by a [DataFrame],
269+ * where both rows and columns correspond to the selected columns,
270+ * and each cell contains the Pearson correlation coefficient between the respective pair of columns.
271+ *
272+ * Check out [Grammar].
273+ *
274+ * For more information, see: {@include [DocumentationUrls.Corr]}
275+ *
276+ * @return A [DataFrame] containing the pairwise correlation matrix.
277+ */
51278public fun <T , C > Corr <T , C >.withItself (): DataFrame <T > = with (columns)
52279
53280// endregion
0 commit comments