@@ -279,16 +279,44 @@ mod prim_never {}
279279///
280280/// The `char` type represents a single character. More specifically, since
281281/// 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode
282- /// scalar value]', which is similar to, but not the same as, a '[Unicode code
283- /// point]'.
284- ///
285- /// [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value
286- /// [Unicode code point]: https://www.unicode.org/glossary/#code_point
282+ /// scalar value]'.
287283///
288284/// This documentation describes a number of methods and trait implementations on the
289285/// `char` type. For technical reasons, there is additional, separate
290286/// documentation in [the `std::char` module](char/index.html) as well.
291287///
288+ /// # Validity
289+ ///
290+ /// A `char` is a '[Unicode scalar value]', which is any '[Unicode code point]'
291+ /// other than a [surrogate code point]. This has a fixed numerical definition:
292+ /// code points are in the range `'\0'` to `char::MAX` (`'\u{10FFFF}'`), inclusive.
293+ /// Surrogate code points, used by UTF-16, are in the range U+D800 to U+DFFF.
294+ ///
295+ /// No `char` may be constructed, whether as a literal or at runtime, that is not a
296+ /// Unicode scalar value:
297+ ///
298+ /// ```text
299+ /// let forbidden_chars = [
300+ /// // Each of these is a compiler error
301+ /// '\u{D800}', '\u{DFFF}', '\u{110000}',
302+ ///
303+ /// // Panics; from_u32 returns None.
304+ /// char::from_u32(0xDE01).unwrap(),
305+ ///
306+ /// // Undefined behaviour
307+ /// unsafe { char::from_u32_unchecked(0x110000) },
308+ /// ];
309+ /// ```
310+ ///
311+ /// Unicode is regularly updated. Many USVs are not currently assigned to a
312+ /// character, but may be in the future ("reserved"); some will never be a character
313+ /// ("noncharacters"); and some may be given different meanings by different users
314+ /// ("private use").
315+ ///
316+ /// [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value
317+ /// [Unicode code point]: https://www.unicode.org/glossary/#code_point
318+ /// [surrogate code point]: https://www.unicode.org/glossary/#surrogate_code_point
319+ ///
292320/// # Representation
293321///
294322/// `char` is always four bytes in size. This is a different representation than
0 commit comments