@@ -122,6 +122,9 @@ internal fun String.utf8Size(startIndex: Int = 0, endIndex: Int = length): Long
122122/* *
123123 * Encodes [codePoint] in UTF-8 and writes it to this sink.
124124 *
125+ * [codePoint] should represent valid Unicode code point, meaning that its value should be within the Unicode codespace
126+ * (`U+000000` .. `U+10ffff`), otherwise [IllegalArgumentException] will be thrown.
127+ *
125128 * Note that in general, a value retrieved from [Char.code] could not be written directly
126129 * as it may be a part of a [surrogate pair](https://www.unicode.org/faq/utf_bom.html#utf16-2) (that could be
127130 * detected using [Char.isSurrogate], or [Char.isHighSurrogate] and [Char.isLowSurrogate]).
@@ -136,6 +139,7 @@ internal fun String.utf8Size(startIndex: Int = 0, endIndex: Int = length): Long
136139 * @param codePoint the codePoint to be written.
137140 *
138141 * @throws IllegalStateException when the sink is closed.
142+ * @throws IllegalArgumentException when [codePoint] value is negative, or greater than `U+10ffff`.
139143 *
140144 * @sample kotlinx.io.samples.KotlinxIoCoreCommonSamples.writeUtf8CodePointSample
141145 * @sample kotlinx.io.samples.KotlinxIoCoreCommonSamples.writeSurrogatePair
@@ -510,6 +514,12 @@ private fun Buffer.commonWriteUtf8(string: String, beginIndex: Int, endIndex: In
510514
511515private fun Buffer.commonWriteUtf8CodePoint (codePoint : Int ) {
512516 when {
517+ codePoint < 0 || codePoint > 0x10ffff -> {
518+ throw IllegalArgumentException (
519+ " Code point value is out of Unicode codespace 0..0x10ffff: 0x${codePoint.toHexString()} ($codePoint )"
520+ )
521+ }
522+
513523 codePoint < 0x80 -> {
514524 // Emit a 7-bit code point with 1 byte.
515525 writeByte(codePoint.toByte())
@@ -539,7 +549,7 @@ private fun Buffer.commonWriteUtf8CodePoint(codePoint: Int) {
539549 size + = 3L
540550 }
541551
542- codePoint <= 0x10ffff -> {
552+ else -> { // [0x10000, 0x10ffff]
543553 // Emit a 21-bit code point with 4 bytes.
544554 val tail = writableSegment(4 )
545555 tail.data[tail.limit] = (codePoint shr 18 or 0xf0 ).toByte() // 11110xxx
@@ -549,10 +559,6 @@ private fun Buffer.commonWriteUtf8CodePoint(codePoint: Int) {
549559 tail.limit + = 4
550560 size + = 4L
551561 }
552-
553- else -> {
554- throw IllegalArgumentException (" Unexpected code point: 0x${codePoint.toHexString()} " )
555- }
556562 }
557563}
558564
0 commit comments