@@ -42,7 +42,7 @@ fn decompose<D, F>(c: char, decompose_char: D, mut emit_char: F)
4242 }
4343
4444 // Perform decomposition for Hangul
45- if is_hangul ( c) {
45+ if is_hangul_syllable ( c) {
4646 decompose_hangul ( c, emit_char) ;
4747 return ;
4848 }
@@ -77,26 +77,34 @@ const T_COUNT: u32 = 28;
7777const N_COUNT : u32 = ( V_COUNT * T_COUNT ) ;
7878const S_COUNT : u32 = ( L_COUNT * N_COUNT ) ;
7979
80- pub ( crate ) fn is_hangul ( c : char ) -> bool {
80+ const S_LAST : u32 = S_BASE + S_COUNT - 1 ;
81+ const L_LAST : u32 = L_BASE + L_COUNT - 1 ;
82+ const V_LAST : u32 = V_BASE + V_COUNT - 1 ;
83+ const T_LAST : u32 = T_BASE + T_COUNT - 1 ;
84+
85+ // Composition only occurs for `TPart`s in `U+11A8 ... U+11C2`,
86+ // i.e. `T_BASE + 1 ... T_LAST`.
87+ const T_FIRST : u32 = T_BASE + 1 ;
88+
89+ pub ( crate ) fn is_hangul_syllable ( c : char ) -> bool {
8190 ( c as u32 ) >= S_BASE && ( c as u32 ) < ( S_BASE + S_COUNT )
8291}
8392
8493// Decompose a precomposed Hangul syllable
8594#[ allow( unsafe_code) ]
8695#[ inline( always) ]
8796fn decompose_hangul < F > ( s : char , mut emit_char : F ) where F : FnMut ( char ) {
88- let si = s as u32 - S_BASE ;
89-
90- let li = si / N_COUNT ;
97+ let s_index = s as u32 - S_BASE ;
98+ let l_index = s_index / N_COUNT ;
9199 unsafe {
92- emit_char ( char:: from_u32_unchecked ( L_BASE + li ) ) ;
100+ emit_char ( char:: from_u32_unchecked ( L_BASE + l_index ) ) ;
93101
94- let vi = ( si % N_COUNT ) / T_COUNT ;
95- emit_char ( char:: from_u32_unchecked ( V_BASE + vi ) ) ;
102+ let v_index = ( s_index % N_COUNT ) / T_COUNT ;
103+ emit_char ( char:: from_u32_unchecked ( V_BASE + v_index ) ) ;
96104
97- let ti = si % T_COUNT ;
98- if ti > 0 {
99- emit_char ( char:: from_u32_unchecked ( T_BASE + ti ) ) ;
105+ let t_index = s_index % T_COUNT ;
106+ if t_index > 0 {
107+ emit_char ( char:: from_u32_unchecked ( T_BASE + t_index ) ) ;
100108 }
101109 }
102110}
@@ -112,20 +120,33 @@ pub(crate) fn hangul_decomposition_length(s: char) -> usize {
112120#[ allow( unsafe_code) ]
113121#[ inline( always) ]
114122fn compose_hangul ( a : char , b : char ) -> Option < char > {
115- let l = a as u32 ;
116- let v = b as u32 ;
117- // Compose an LPart and a VPart
118- if L_BASE <= l && l < ( L_BASE + L_COUNT ) // l should be an L choseong jamo
119- && V_BASE <= v && v < ( V_BASE + V_COUNT ) { // v should be a V jungseong jamo
120- let r = S_BASE + ( l - L_BASE ) * N_COUNT + ( v - V_BASE ) * T_COUNT ;
121- return unsafe { Some ( char:: from_u32_unchecked ( r) ) } ;
123+ let ( a, b) = ( a as u32 , b as u32 ) ;
124+ match ( a, b) {
125+ // Compose a leading consonant and a vowel together into an LV_Syllable
126+ ( L_BASE ... L_LAST , V_BASE ... V_LAST ) => {
127+ let l_index = a - L_BASE ;
128+ let v_index = b - V_BASE ;
129+ let lv_index = l_index * N_COUNT + v_index * T_COUNT ;
130+ let s = S_BASE + lv_index;
131+ Some ( unsafe { char:: from_u32_unchecked ( s) } )
132+ } ,
133+ // Compose an LV_Syllable and a trailing consonant into an LVT_Syllable
134+ ( S_BASE ... S_LAST , T_FIRST ... T_LAST ) if ( a - S_BASE ) % T_COUNT == 0 => {
135+ Some ( unsafe { char:: from_u32_unchecked ( a + ( b - T_BASE ) ) } )
136+ } ,
137+ _ => None ,
122138 }
123- // Compose an LVPart and a TPart
124- if S_BASE <= l && l <= ( S_BASE +S_COUNT -T_COUNT ) // l should be a syllable block
125- && T_BASE <= v && v < ( T_BASE +T_COUNT ) // v should be a T jongseong jamo
126- && ( l - S_BASE ) % T_COUNT == 0 { // l should be an LV syllable block (not LVT)
127- let r = l + ( v - T_BASE ) ;
128- return unsafe { Some ( char:: from_u32_unchecked ( r) ) } ;
139+ }
140+
141+ #[ cfg( test) ]
142+ mod tests {
143+ use super :: compose_hangul;
144+
145+ // Regression test from a bugfix where we were composing an LV_Syllable with
146+ // T_BASE directly. (We should only compose an LV_Syllable with a character
147+ // in the range `T_BASE + 1 ... T_LAST`.)
148+ #[ test]
149+ fn test_hangul_composition ( ) {
150+ assert_eq ! ( compose_hangul( '\u{c8e0}' , '\u{11a7}' ) , None ) ;
129151 }
130- None
131152}
0 commit comments