@@ -42,7 +42,7 @@ fn decompose<D, F>(c: char, decompose_char: D, mut emit_char: F)
4242 }
4343
4444 // Perform decomposition for Hangul
45- if is_hangul ( c) {
45+ if is_hangul_syllable ( c) {
4646 decompose_hangul ( c, emit_char) ;
4747 return ;
4848 }
@@ -77,26 +77,30 @@ const T_COUNT: u32 = 28;
7777const N_COUNT : u32 = ( V_COUNT * T_COUNT ) ;
7878const S_COUNT : u32 = ( L_COUNT * N_COUNT ) ;
7979
80- pub ( crate ) fn is_hangul ( c : char ) -> bool {
80+ const S_END : u32 = S_BASE + S_COUNT - 1 ;
81+ const L_END : u32 = L_BASE + L_COUNT - 1 ;
82+ const V_END : u32 = V_BASE + V_COUNT - 1 ;
83+ const T_END : u32 = T_BASE + T_COUNT - 1 ;
84+
85+ pub ( crate ) fn is_hangul_syllable ( c : char ) -> bool {
8186 ( c as u32 ) >= S_BASE && ( c as u32 ) < ( S_BASE + S_COUNT )
8287}
8388
8489// Decompose a precomposed Hangul syllable
8590#[ allow( unsafe_code) ]
8691#[ inline( always) ]
8792fn decompose_hangul < F > ( s : char , mut emit_char : F ) where F : FnMut ( char ) {
88- let si = s as u32 - S_BASE ;
89-
90- let li = si / N_COUNT ;
93+ let s_index = s as u32 - S_BASE ;
94+ let l_index = s_index / N_COUNT ;
9195 unsafe {
92- emit_char ( char:: from_u32_unchecked ( L_BASE + li ) ) ;
96+ emit_char ( char:: from_u32_unchecked ( L_BASE + l_index ) ) ;
9397
94- let vi = ( si % N_COUNT ) / T_COUNT ;
95- emit_char ( char:: from_u32_unchecked ( V_BASE + vi ) ) ;
98+ let v_index = ( s_index % N_COUNT ) / T_COUNT ;
99+ emit_char ( char:: from_u32_unchecked ( V_BASE + v_index ) ) ;
96100
97- let ti = si % T_COUNT ;
98- if ti > 0 {
99- emit_char ( char:: from_u32_unchecked ( T_BASE + ti ) ) ;
101+ let t_index = s_index % T_COUNT ;
102+ if t_index > 0 {
103+ emit_char ( char:: from_u32_unchecked ( T_BASE + t_index ) ) ;
100104 }
101105 }
102106}
@@ -112,20 +116,33 @@ pub(crate) fn hangul_decomposition_length(s: char) -> usize {
112116#[ allow( unsafe_code) ]
113117#[ inline( always) ]
114118fn compose_hangul ( a : char , b : char ) -> Option < char > {
115- let l = a as u32 ;
116- let v = b as u32 ;
117- // Compose an LPart and a VPart
118- if L_BASE <= l && l < ( L_BASE + L_COUNT ) // l should be an L choseong jamo
119- && V_BASE <= v && v < ( V_BASE + V_COUNT ) { // v should be a V jungseong jamo
120- let r = S_BASE + ( l - L_BASE ) * N_COUNT + ( v - V_BASE ) * T_COUNT ;
121- return unsafe { Some ( char:: from_u32_unchecked ( r) ) } ;
119+ let ( a, b) = ( a as u32 , b as u32 ) ;
120+ match ( a, b) {
121+ // Compose a leading consonant and a vowel together into an LV_Syllable
122+ ( L_BASE ... L_END , V_BASE ... V_END ) => {
123+ let l_index = a - L_BASE ;
124+ let v_index = b - V_BASE ;
125+ let lv_index = l_index * N_COUNT + v_index * T_COUNT ;
126+ let s = S_BASE + lv_index;
127+ Some ( unsafe { char:: from_u32_unchecked ( s) } )
128+ } ,
129+ // Compose an LV_Syllable and a trailing consonant into an LVT_Syllable
130+ ( S_BASE ... S_END , T_BASE ... T_END ) if ( a - S_BASE ) % T_COUNT == 0 && ( b - T_BASE ) > 0 => {
131+ Some ( unsafe { char:: from_u32_unchecked ( a + ( b - T_BASE ) ) } )
132+ } ,
133+ _ => None ,
122134 }
123- // Compose an LVPart and a TPart
124- if S_BASE <= l && l <= ( S_BASE +S_COUNT -T_COUNT ) // l should be a syllable block
125- && T_BASE <= v && v < ( T_BASE +T_COUNT ) // v should be a T jongseong jamo
126- && ( l - S_BASE ) % T_COUNT == 0 { // l should be an LV syllable block (not LVT)
127- let r = l + ( v - T_BASE ) ;
128- return unsafe { Some ( char:: from_u32_unchecked ( r) ) } ;
135+ }
136+
137+ #[ cfg( test) ]
138+ mod tests {
139+ use super :: compose_hangul;
140+
141+ // Regression test from a bugfix where we were composing an LV_Syllable with
142+ // T_BASE directly. (We should only compose an LV_Syllable with a character
143+ // in the range `T_BASE + 1 ... T_END`.)
144+ #[ test]
145+ fn test_hangul_composition ( ) {
146+ assert_eq ! ( compose_hangul( '\u{c8e0}' , '\u{11a7}' ) , None ) ;
129147 }
130- None
131148}
0 commit comments