Skip to content

Commit 9927dbf

Browse files
Merge pull request #1380 from Kotlin/extension_property_name_fix
Extension property name generation fix & docs & tests
2 parents 6be7648 + e485282 commit 9927dbf

File tree

4 files changed

+150
-0
lines changed

4 files changed

+150
-0
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/GeneratedField.kt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,8 @@ public class ValidFieldName private constructor(private val identifier: String,
130130
.replace("`", "'")
131131
.replace(";", " ")
132132
.replace("\\", " ")
133+
.replace("\n", " ")
134+
.replace("\r", " ")
133135
}
134136

135137
return ValidFieldName(result, needsQuote)

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/codeGen/CodeGeneratorImpl.kt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,8 @@ internal open class ExtensionsCodeGeneratorImpl(private val typeRendering: TypeR
247247
.replace("\\", "\\\\")
248248
.replace("$", "\\\$")
249249
.replace("\"", "\\\"")
250+
.replace("\n", "\\n")
251+
.replace("\r", "\\r")
250252

251253
private fun String.removeQuotes() = this.removeSurrounding("`")
252254

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
package org.jetbrains.kotlinx.dataframe.codeGen
2+
3+
import io.kotest.matchers.shouldBe
4+
import org.junit.Test
5+
6+
class ValidFieldNameOfTest {
7+
8+
data class Case(val input: String, val expected: String, val needsQuote: Boolean)
9+
10+
@Test
11+
fun `ValidFieldName of covers diverse inputs`() {
12+
val cases = listOf(
13+
// simple valid identifiers (no quoting)
14+
Case("abc", "abc", false),
15+
Case("a_b9", "a_b9", false),
16+
Case("AbC123", "AbC123", false),
17+
Case("Привет", "Привет", false), // Cyrillic letters are letters
18+
Case("mañana", "mañana", false), // Latin letter with diacritic
19+
Case("Δelta", "Δelta", false), // Greek uppercase letter
20+
Case("__name", "__name", false),
21+
// needs quoting due to rules
22+
Case("_", "_", true), // all underscores
23+
Case("__", "__", true), // all underscores
24+
Case("1abc", "1abc", true), // starts with digit
25+
Case("", "", true), // empty
26+
Case(" ", " ", true), // blank (spaces)
27+
Case("fun", "fun", true), // modifier keyword
28+
Case("class", "class", true), // hard keyword
29+
Case("!in", "!in", true), // hard keyword (also special char)
30+
Case("hello world", "hello world", true), // contains space (special char per regex)
31+
Case("a-b", "a-b", true), // '-' is quoted char, not replaced
32+
Case("a|b", "a|b", true), // '|' is quoted char, not replaced
33+
Case("a?b", "a?b", true),
34+
Case("a!b", "a!b", true),
35+
Case("a@b", "a@b", true),
36+
Case("a#b", "a#b", true),
37+
Case("a\$b", "a\$b", true),
38+
Case("a%b", "a%b", true),
39+
Case("a^b", "a^b", true),
40+
Case("a&b", "a&b", true),
41+
Case("a*b", "a*b", true),
42+
Case("a(b)c", "a(b)c", true), // parentheses are quoted, not replaced
43+
Case("{x}", "{x}", true), // braces are quoted, not replaced
44+
// quoting due to non-letter symbol categories
45+
Case("😀", "😀", true), // emoji
46+
Case("你好", "你好", true), // CJK: category OTHER_LETTER -> quote
47+
Case("", "", true), // Devanagari OTHER_LETTER -> quote
48+
Case("a\tb", "a\tb", true), // tab (CONTROL) -> quote, not replaced
49+
Case("a\u200Bb", "a\u200Bb", true), // zero-width space (FORMAT) -> quote
50+
// precise replacement tests when quoting is needed
51+
Case("<name>", "{name}", true), // < > -> { }
52+
Case("a::b", "a - b", true), // :: -> -
53+
Case("a:b", "a - b", true), // : -> -
54+
Case("a: b", "a - b", true), // : -> -
55+
Case("a.b", "a b", true), // . -> space
56+
Case("a/b", "a-b", true), // / -> -
57+
Case("a[b]", "a{b}", true), // [ ] -> { }
58+
Case("a`b", "a'b", true), // backtick -> apostrophe
59+
Case("a;b", "a b", true), // ; -> space
60+
Case("a\\b", "a b", true), // backslash -> space
61+
Case("a\nb", "a b", true), // newline -> space
62+
Case("a\rb", "a b", true), // carriage return -> space
63+
Case(
64+
"a.b/c[d]`e;f\\g\nh\ri",
65+
"a b-c{d}'e f g h i",
66+
true,
67+
),
68+
Case(": leading colon", " - leading colon", true), // ": " -> " - "
69+
Case("a:bc", "a - bc", true), // ":" -> " - "
70+
Case("a: bc", "a - bc", true), // ": " -> " - "
71+
Case("<tag>", "{tag}", true),
72+
Case("name<generic>", "name{generic}", true),
73+
Case("x>y", "x}y", true),
74+
Case("a.b.c", "a b c", true),
75+
Case("a/b/c", "a-b-c", true),
76+
// extra heavy special-symbol cases
77+
Case("::", " - ", true),
78+
Case(":::", " - - ", true),
79+
Case("...---", " ---", true),
80+
Case("//\\", "-- ", true),
81+
Case("[[]]<>", "{{}}{}", true),
82+
Case("x`;;`y", "x' 'y", true),
83+
Case("a:::b", "a - - b", true),
84+
Case("..a..", " a ", true),
85+
// already quoted stays as-is and does not need quoting
86+
Case("`already quoted`", "`already quoted`", false),
87+
)
88+
89+
cases.forEach { (input, expected, needsQuote) ->
90+
val vf = ValidFieldName.of(input)
91+
vf.unquoted shouldBe expected
92+
vf.needsQuote shouldBe needsQuote
93+
val expectedQuotedIfNeeded = if (needsQuote) "`$expected`" else expected
94+
vf.quotedIfNeeded shouldBe expectedQuotedIfNeeded
95+
}
96+
}
97+
}

docs/StardustDocs/topics/extensionPropertiesApi.md

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,3 +163,52 @@ See [Compiler Plugin Example](https://github.com/Kotlin/dataframe/tree/plugin_ex
163163
IDEA project with basic Extension Properties API examples.
164164
</tab>
165165
</tabs>
166+
167+
## Properties name generation
168+
169+
By default, each extension property is generated with a name equal to the original column name.
170+
171+
```kotlin
172+
val df = dataFrameOf("size_in_inches" to listOf(..))
173+
df.size_in_inches
174+
```
175+
176+
If the original column name cannot be used as a property name (for example, if it contains spaces
177+
or has a name equal to a keyword in Kotlin),
178+
it will be enclosed in backticks.
179+
180+
```kotlin
181+
val df = dataFrameOf("size in inches" to listOf(..))
182+
df.`size in inches`
183+
```
184+
185+
However, sometimes the original column name contains special symbols
186+
and can't be used as a property name in backticks.
187+
In such cases, special symbols in the auto-generated property name will be replaced.
188+
189+
```kotlin
190+
val df = dataFrameOf("size\nin:inches" to listOf(..))
191+
df.`size in - inches`
192+
```
193+
194+
> In such cases, use [**`rename`**](rename.md) to update column names,
195+
> or [**`renameToCamelCase`**](rename.md#renametocamelcase) to convert all column names
196+
> in a `DataFrame` to `camelCase`, which is the idiomatic and widely preferred naming style in Kotlin.
197+
198+
If you don't want to change the actual column name, but you need a convenient accessor for this column,
199+
you can use the `@ColumnName` annotation in a manually declared [data schema](schemas.md).
200+
It allows you to use a property name different
201+
from the original column name without changing the column's actual name:
202+
203+
```kotlin
204+
@DataSchema
205+
interface Info {
206+
@ColumnName("size\nin:inches")
207+
val sizeInInches: Double
208+
}
209+
```
210+
211+
```kotlin
212+
val df = dataFrameOf("size\nin:inches" to listOf(..)).cast<Info>()
213+
df.sizeInInches
214+
```

0 commit comments

Comments
 (0)