@@ -16,23 +16,30 @@ import org.jetbrains.kotlinx.dataframe.documentation.LineBreak
1616import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns
1717import org.jetbrains.kotlinx.dataframe.impl.api.corrImpl
1818import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API
19+ import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
1920import kotlin.reflect.KProperty
2021import kotlin.reflect.typeOf
2122
2223/* *
23- * Calculates the correlation between values in the specified [columns\].
24+ * Calculates the Pearson pairwise correlation between values in the specified [columns\].
2425 *
2526 * This function does not compute the correlation immediately.
2627 * Instead, it defines the primary set of columns
2728 * and returns a [Corr] instance that allows configuring how the correlation should be computed.
2829 *
30+ * The function is available for numeric- and [Boolean] columns.
31+ * [Boolean] values are converted into 1 for true and 0 for false.
32+ * All other columns are ignored.
33+ * If a [ColumnGroup] instance is passed as the target column for correlation,
34+ * it will be unpacked into suitable nested columns.
35+ *
2936 * The [Corr] object provides two methods to perform correlation calculations:
3037 * - [with][Corr.with] — computes correlations between the initially selected columns and a second set of columns.
3138 * - [withItself][Corr.withItself] — computes pairwise correlations within the initially selected columns.
3239 *
33- * Each method returns a square or rectangular correlation matrix represented as a [DataFrame],
40+ * Each method returns a square or rectangular correlation matrix represented by a [DataFrame],
3441 * where rows and columns correspond to the selected column sets,
35- * and each cell contains the correlation coefficient between the corresponding pair of columns.
42+ * and each cell contains the Pearson correlation coefficient between the corresponding pair of columns.
3643 *
3744 * To compute correlations between all suitable columns in the [DataFrame], use [DataFrame.corr()][DataFrame.corr].
3845 *
@@ -92,7 +99,7 @@ internal fun AnyCol.isSuitableForCorr() = isSubtypeOf<Number>() || type() == typ
9299 * It must be followed by one of the computation methods to produce a correlation [DataFrame].
93100 *
94101 * The resulting [DataFrame] is a correlation matrix where rows correspond to one set of columns,
95- * columns to the other set, and each cell contains the correlation coefficient
102+ * columns to the other set, and each cell contains the Pearson correlation coefficient
96103 * between the respective pair of columns.
97104 *
98105 * Use the following methods to perform the computation:
@@ -108,11 +115,13 @@ public data class Corr<T, C>(internal val df: DataFrame<T>, internal val columns
108115 * Computes the correlation matrix between all suitable columns in this [DataFrame],
109116 * including nested columns at any depth.
110117 *
111- * The result is a square correlation matrix represented as a [DataFrame],
118+ * The result is a square correlation matrix represented by a [DataFrame],
112119 * where both rows and columns correspond to the original columns,
113- * and each cell contains the correlation coefficient between the respective pair of columns.
120+ * and each cell contains the Pearson correlation coefficient between the respective pair of columns.
114121 *
115- * Only columns suitable for correlation (e.g., numeric types) are included in the result.
122+ * The function is available for numeric- and [Boolean] columns.
123+ * [Boolean] values are converted into 1 for true and 0 for false.
124+ * All other columns are ignored.
116125 *
117126 * For more information, see: {@include [DocumentationUrls.Corr]}
118127 *
@@ -127,6 +136,12 @@ public fun <T> DataFrame<T>.corr(): DataFrame<T> =
127136 * {@include [CommonCorrDocs]}
128137 * @include [SelectingColumns.Dsl] {@include [SetCorrOperationArg]}
129138 *
139+ * The function is available for numeric- and [Boolean] columns.
140+ * [Boolean] values are converted into 1 for true and 0 for false.
141+ * All other columns are ignored.
142+ * If a [ColumnGroup] instance is passed as the target column for correlation,
143+ * it will be unpacked into suitable nested columns.
144+ *
130145 * ### Examples
131146 * ```kotlin
132147 * // Compute correlations between the "age" column and the "weight" and "height" columns
@@ -145,6 +160,12 @@ public fun <T, C> DataFrame<T>.corr(columns: ColumnsSelector<T, C>): Corr<T, C>
145160 * {@include [CommonCorrDocs]}
146161 * @include [SelectingColumns.ColumnNames] {@include [SetCorrOperationArg]}
147162 *
163+ * The function is available for numeric- and [Boolean] columns.
164+ * [Boolean] values are converted into 1 for true and 0 for false.
165+ * All other columns are ignored.
166+ * If a [ColumnGroup] instance is passed as the target column for correlation,
167+ * it will be unpacked into suitable nested columns.
168+ *
148169 * ### Examples
149170 * ```kotlin
150171 * // Compute correlations between the "age" column and the "weight" and "height" columns
@@ -171,9 +192,9 @@ public fun <T, C> DataFrame<T>.corr(vararg columns: ColumnReference<C>): Corr<T,
171192 * Calculates the correlation of specified [columns][otherColumns]
172193 * with values in the columns previously selected with [corr].
173194 *
174- * Returns a correlation matrix represented as a [DataFrame],
195+ * Returns a correlation matrix represented by a [DataFrame],
175196 * where rows and columns correspond to the selected column sets,
176- * and each cell contains the correlation coefficient between the corresponding pair of columns.
197+ * and each cell contains the Pearson correlation coefficient between the corresponding pair of columns.
177198 *
178199 * Check out [Grammar].
179200 *
@@ -244,9 +265,9 @@ public fun <T, C, R> Corr<T, C>.with(vararg otherColumns: ColumnReference<R>): D
244265 * Calculates pairwise correlations between the columns
245266 * previously selected with [corr].
246267 *
247- * Returns a square correlation matrix represented as a [DataFrame],
268+ * Returns a square correlation matrix represented by a [DataFrame],
248269 * where both rows and columns correspond to the selected columns,
249- * and each cell contains the correlation coefficient between the respective pair of columns.
270+ * and each cell contains the Pearson correlation coefficient between the respective pair of columns.
250271 *
251272 * Check out [Grammar].
252273 *
0 commit comments