Skip to content

Commit 1519a32

Browse files
committed
utf8.h Reorder some bit flags
This changes the ordering of these bits (whose names are in the public API, but whose values are perl's to set). This is enables a future commit which will replace a series of if else if else ... lines with a switch() statement, by placing the bit positions in the order those existing clauses should be executed in. The new ordering will cause the switch() to preserve the current if clause execution order.
1 parent 080baaf commit 1519a32

File tree

3 files changed

+42
-41
lines changed

3 files changed

+42
-41
lines changed

ext/XS-APItest/t/utf8_setup.pl

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -64,33 +64,33 @@ ($)
6464
# representing a single code point
6565

6666
# Copied from utf8.h
67-
$::UTF8_ALLOW_EMPTY = 0x0001;
67+
$::UTF8_ALLOW_EMPTY = 0x0002;
6868
$::UTF8_GOT_EMPTY = $UTF8_ALLOW_EMPTY;
69-
$::UTF8_ALLOW_CONTINUATION = 0x0002;
69+
$::UTF8_ALLOW_CONTINUATION = 0x0004;
7070
$::UTF8_GOT_CONTINUATION = $UTF8_ALLOW_CONTINUATION;
71-
$::UTF8_ALLOW_NON_CONTINUATION = 0x0004;
71+
$::UTF8_ALLOW_NON_CONTINUATION = 0x0010;
7272
$::UTF8_GOT_NON_CONTINUATION = $UTF8_ALLOW_NON_CONTINUATION;
7373
$::UTF8_ALLOW_SHORT = 0x0008;
7474
$::UTF8_GOT_SHORT = $UTF8_ALLOW_SHORT;
75-
$::UTF8_ALLOW_LONG = 0x0010;
76-
$::UTF8_ALLOW_LONG_AND_ITS_VALUE = $UTF8_ALLOW_LONG|0x0020;
75+
$::UTF8_ALLOW_LONG = 0x2000;
76+
$::UTF8_ALLOW_LONG_AND_ITS_VALUE = $UTF8_ALLOW_LONG|0x4000;
7777
$::UTF8_GOT_LONG = $UTF8_ALLOW_LONG;
78-
$::UTF8_ALLOW_OVERFLOW = 0x0080;
78+
$::UTF8_ALLOW_OVERFLOW = 0x0001;
7979
$::UTF8_GOT_OVERFLOW = $UTF8_ALLOW_OVERFLOW;
80-
$::UTF8_DISALLOW_SURROGATE = 0x0100;
80+
$::UTF8_DISALLOW_SURROGATE = 0x0020;
8181
$::UTF8_GOT_SURROGATE = $UTF8_DISALLOW_SURROGATE;
82-
$::UTF8_WARN_SURROGATE = 0x0200;
83-
$::UTF8_DISALLOW_NONCHAR = 0x0400;
82+
$::UTF8_WARN_SURROGATE = 0x0040;
83+
$::UTF8_DISALLOW_NONCHAR = 0x0800;
8484
$::UTF8_GOT_NONCHAR = $UTF8_DISALLOW_NONCHAR;
85-
$::UTF8_WARN_NONCHAR = 0x0800;
86-
$::UTF8_DISALLOW_SUPER = 0x1000;
85+
$::UTF8_WARN_NONCHAR = 0x1000;
86+
$::UTF8_DISALLOW_SUPER = 0x0200;
8787
$::UTF8_GOT_SUPER = $UTF8_DISALLOW_SUPER;
88-
$::UTF8_WARN_SUPER = 0x2000;
89-
$::UTF8_DISALLOW_PERL_EXTENDED = 0x4000;
88+
$::UTF8_WARN_SUPER = 0x0400;
89+
$::UTF8_DISALLOW_PERL_EXTENDED = 0x0080;
9090
$::UTF8_GOT_PERL_EXTENDED = $UTF8_DISALLOW_PERL_EXTENDED;
91-
$::UTF8_WARN_PERL_EXTENDED = 0x8000;
92-
$::UTF8_CHECK_ONLY = 0x10000;
93-
$::UTF8_NO_CONFIDENCE_IN_CURLEN_ = 0x20000;
91+
$::UTF8_WARN_PERL_EXTENDED = 0x0100;
92+
$::UTF8_CHECK_ONLY = 0x8000;
93+
$::UTF8_NO_CONFIDENCE_IN_CURLEN_ = 0x10000;
9494

9595
$::UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE
9696
= $UTF8_DISALLOW_SUPER|$UTF8_DISALLOW_SURROGATE;

ext/XS-APItest/t/utf8_warn_base.pl

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -585,23 +585,24 @@ ($$)
585585
# Possible flag returns from utf8n_to_uvchr_error(). These should have G_,
586586
# instead of A_, D_, but the prefixes will be used in a later commit, so
587587
# minimize churn by having them here.
588+
# The ordering is important, sorted by lowest bit position first taken from
589+
# utf8.h
588590
my @utf8n_flags_to_text = ( qw(
591+
A_OVERFLOW
589592
A_EMPTY
590593
A_CONTINUATION
591-
A_NON_CONTINUATION
592594
A_SHORT
593-
A_LONG
594-
A_LONG_AND_ITS_VALUE
595-
PLACEHOLDER
596-
A_OVERFLOW
595+
A_NON_CONTINUATION
597596
D_SURROGATE
598597
W_SURROGATE
599-
D_NONCHAR
600-
W_NONCHAR
601-
D_SUPER
602-
W_SUPER
603598
D_PERL_EXTENDED
604599
W_PERL_EXTENDED
600+
D_SUPER
601+
W_SUPER
602+
D_NONCHAR
603+
W_NONCHAR
604+
A_LONG
605+
A_LONG_AND_ITS_VALUE
605606
CHECK_ONLY
606607
NO_CONFIDENCE_IN_CURLEN_
607608
) );

utf8.h

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1128,15 +1128,15 @@ point's representation.
11281128
/* Largest code point we accept from external sources */
11291129
#define MAX_LEGAL_CP ((UV)IV_MAX)
11301130

1131-
#define UTF8_ALLOW_EMPTY 0x0001 /* Allow a zero length string */
1131+
#define UTF8_ALLOW_EMPTY 0x0002 /* Allow a zero length string */
11321132
#define UTF8_GOT_EMPTY UTF8_ALLOW_EMPTY
11331133

11341134
/* Allow first byte to be a continuation byte */
1135-
#define UTF8_ALLOW_CONTINUATION 0x0002
1135+
#define UTF8_ALLOW_CONTINUATION 0x0004
11361136
#define UTF8_GOT_CONTINUATION UTF8_ALLOW_CONTINUATION
11371137

11381138
/* Unexpected non-continuation byte */
1139-
#define UTF8_ALLOW_NON_CONTINUATION 0x0004
1139+
#define UTF8_ALLOW_NON_CONTINUATION 0x0010
11401140
#define UTF8_GOT_NON_CONTINUATION UTF8_ALLOW_NON_CONTINUATION
11411141

11421142
/* expecting more bytes than were available in the string */
@@ -1146,26 +1146,26 @@ point's representation.
11461146
/* Overlong sequence; i.e., the code point can be specified in fewer bytes.
11471147
* First one will convert the overlong to the REPLACEMENT CHARACTER; second
11481148
* will return what the overlong evaluates to */
1149-
#define UTF8_ALLOW_LONG 0x0010
1150-
#define UTF8_ALLOW_LONG_AND_ITS_VALUE (UTF8_ALLOW_LONG|0x0020)
1149+
#define UTF8_ALLOW_LONG 0x2000
1150+
#define UTF8_ALLOW_LONG_AND_ITS_VALUE (UTF8_ALLOW_LONG|0x4000)
11511151
#define UTF8_GOT_LONG UTF8_ALLOW_LONG
11521152

1153-
#define UTF8_ALLOW_OVERFLOW 0x0080
1153+
#define UTF8_ALLOW_OVERFLOW 0x0001
11541154
#define UTF8_GOT_OVERFLOW UTF8_ALLOW_OVERFLOW
11551155

1156-
#define UTF8_DISALLOW_SURROGATE 0x0100 /* Unicode surrogates */
1156+
#define UTF8_DISALLOW_SURROGATE 0x0020 /* Unicode surrogates */
11571157
#define UTF8_GOT_SURROGATE UTF8_DISALLOW_SURROGATE
1158-
#define UTF8_WARN_SURROGATE 0x0200
1158+
#define UTF8_WARN_SURROGATE 0x0040
11591159

11601160
/* Unicode non-character code points */
1161-
#define UTF8_DISALLOW_NONCHAR 0x0400
1161+
#define UTF8_DISALLOW_NONCHAR 0x0800
11621162
#define UTF8_GOT_NONCHAR UTF8_DISALLOW_NONCHAR
1163-
#define UTF8_WARN_NONCHAR 0x0800
1163+
#define UTF8_WARN_NONCHAR 0x1000
11641164

11651165
/* Super-set of Unicode: code points above the legal max */
1166-
#define UTF8_DISALLOW_SUPER 0x1000
1166+
#define UTF8_DISALLOW_SUPER 0x0200
11671167
#define UTF8_GOT_SUPER UTF8_DISALLOW_SUPER
1168-
#define UTF8_WARN_SUPER 0x2000
1168+
#define UTF8_WARN_SUPER 0x0400
11691169

11701170
/* The original UTF-8 standard did not define UTF-8 with start bytes of 0xFE or
11711171
* 0xFF, though UTF-EBCDIC did. This allowed both versions to represent code
@@ -1176,9 +1176,9 @@ point's representation.
11761176
* extensions, and not likely to be interchangeable with other languages. Note
11771177
* that on ASCII platforms, FE overflows a signed 32-bit word, and FF an
11781178
* unsigned one. */
1179-
#define UTF8_DISALLOW_PERL_EXTENDED 0x4000
1179+
#define UTF8_DISALLOW_PERL_EXTENDED 0x0080
11801180
#define UTF8_GOT_PERL_EXTENDED UTF8_DISALLOW_PERL_EXTENDED
1181-
#define UTF8_WARN_PERL_EXTENDED 0x8000
1181+
#define UTF8_WARN_PERL_EXTENDED 0x0100
11821182

11831183
/* For back compat, these old names are misleading for overlongs and
11841184
* UTF_EBCDIC. */
@@ -1188,8 +1188,8 @@ point's representation.
11881188
#define UTF8_DISALLOW_FE_FF UTF8_DISALLOW_PERL_EXTENDED
11891189
#define UTF8_WARN_FE_FF UTF8_WARN_PERL_EXTENDED
11901190

1191-
#define UTF8_CHECK_ONLY 0x10000
1192-
#define _UTF8_NO_CONFIDENCE_IN_CURLEN 0x20000 /* Internal core use only */
1191+
#define UTF8_CHECK_ONLY 0x8000
1192+
#define _UTF8_NO_CONFIDENCE_IN_CURLEN 0x10000 /* Internal core use only */
11931193

11941194
/* For backwards source compatibility. They do nothing, as the default now
11951195
* includes what they used to mean. The first one's meaning was to allow the

0 commit comments

Comments
 (0)