@@ -1128,45 +1128,28 @@ point's representation.
11281128/* Largest code point we accept from external sources */
11291129#define MAX_LEGAL_CP ((UV)IV_MAX)
11301130
1131+ #define UTF8_ALLOW_OVERFLOW 0x0001
1132+ #define UTF8_GOT_OVERFLOW UTF8_ALLOW_OVERFLOW
1133+
11311134#define UTF8_ALLOW_EMPTY 0x0002 /* Allow a zero length string */
11321135#define UTF8_GOT_EMPTY UTF8_ALLOW_EMPTY
11331136
11341137/* Allow first byte to be a continuation byte */
11351138#define UTF8_ALLOW_CONTINUATION 0x0004
11361139#define UTF8_GOT_CONTINUATION UTF8_ALLOW_CONTINUATION
11371140
1138- /* Unexpected non-continuation byte */
1139- #define UTF8_ALLOW_NON_CONTINUATION 0x0010
1140- #define UTF8_GOT_NON_CONTINUATION UTF8_ALLOW_NON_CONTINUATION
1141-
11421141/* expecting more bytes than were available in the string */
11431142#define UTF8_ALLOW_SHORT 0x0008
11441143#define UTF8_GOT_SHORT UTF8_ALLOW_SHORT
11451144
1146- /* Overlong sequence; i.e., the code point can be specified in fewer bytes.
1147- * First one will convert the overlong to the REPLACEMENT CHARACTER; second
1148- * will return what the overlong evaluates to */
1149- #define UTF8_ALLOW_LONG 0x2000
1150- #define UTF8_ALLOW_LONG_AND_ITS_VALUE (UTF8_ALLOW_LONG|0x4000)
1151- #define UTF8_GOT_LONG UTF8_ALLOW_LONG
1152-
1153- #define UTF8_ALLOW_OVERFLOW 0x0001
1154- #define UTF8_GOT_OVERFLOW UTF8_ALLOW_OVERFLOW
1145+ /* Unexpected non-continuation byte */
1146+ #define UTF8_ALLOW_NON_CONTINUATION 0x0010
1147+ #define UTF8_GOT_NON_CONTINUATION UTF8_ALLOW_NON_CONTINUATION
11551148
11561149#define UTF8_DISALLOW_SURROGATE 0x0020 /* Unicode surrogates */
11571150#define UTF8_GOT_SURROGATE UTF8_DISALLOW_SURROGATE
11581151#define UTF8_WARN_SURROGATE 0x0040
11591152
1160- /* Unicode non-character code points */
1161- #define UTF8_DISALLOW_NONCHAR 0x0800
1162- #define UTF8_GOT_NONCHAR UTF8_DISALLOW_NONCHAR
1163- #define UTF8_WARN_NONCHAR 0x1000
1164-
1165- /* Super-set of Unicode: code points above the legal max */
1166- #define UTF8_DISALLOW_SUPER 0x0200
1167- #define UTF8_GOT_SUPER UTF8_DISALLOW_SUPER
1168- #define UTF8_WARN_SUPER 0x0400
1169-
11701153/* The original UTF-8 standard did not define UTF-8 with start bytes of 0xFE or
11711154 * 0xFF, though UTF-EBCDIC did. This allowed both versions to represent code
11721155 * points up to 2 ** 31 - 1. Perl extends UTF-8 so that 0xFE and 0xFF are
@@ -1180,6 +1163,23 @@ point's representation.
11801163#define UTF8_GOT_PERL_EXTENDED UTF8_DISALLOW_PERL_EXTENDED
11811164#define UTF8_WARN_PERL_EXTENDED 0x0100
11821165
1166+ /* Super-set of Unicode: code points above the legal max */
1167+ #define UTF8_DISALLOW_SUPER 0x0200
1168+ #define UTF8_GOT_SUPER UTF8_DISALLOW_SUPER
1169+ #define UTF8_WARN_SUPER 0x0400
1170+
1171+ /* Unicode non-character code points */
1172+ #define UTF8_DISALLOW_NONCHAR 0x0800
1173+ #define UTF8_GOT_NONCHAR UTF8_DISALLOW_NONCHAR
1174+ #define UTF8_WARN_NONCHAR 0x1000
1175+
1176+ /* Overlong sequence; i.e., the code point can be specified in fewer bytes.
1177+ * First one will convert the overlong to the REPLACEMENT CHARACTER; second
1178+ * will return what the overlong evaluates to */
1179+ #define UTF8_ALLOW_LONG 0x2000
1180+ #define UTF8_ALLOW_LONG_AND_ITS_VALUE (UTF8_ALLOW_LONG|0x4000)
1181+ #define UTF8_GOT_LONG UTF8_ALLOW_LONG
1182+
11831183/* For back compat, these old names are misleading for overlongs and
11841184 * UTF_EBCDIC. */
11851185#define UTF8_DISALLOW_ABOVE_31_BIT UTF8_DISALLOW_PERL_EXTENDED
0 commit comments