@@ -412,41 +412,13 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
412412 Some ( ( width, signed) ) => match name {
413413 sym:: ctlz => self . count_leading_zeroes ( width, args[ 0 ] . immediate ( ) ) ,
414414
415- sym:: cttz => {
416- let func = self . current_func ( ) ;
417- let then_block = func. new_block ( "then" ) ;
418- let else_block = func. new_block ( "else" ) ;
419- let after_block = func. new_block ( "after" ) ;
420-
421- let arg = args[ 0 ] . immediate ( ) ;
422- let result = func. new_local ( None , self . u32_type , "zeros" ) ;
423- let zero = self . cx . gcc_zero ( arg. get_type ( ) ) ;
424- let cond = self . gcc_icmp ( IntPredicate :: IntEQ , arg, zero) ;
425- self . llbb ( ) . end_with_conditional ( None , cond, then_block, else_block) ;
426-
427- let zero_result = self . cx . gcc_uint ( self . u32_type , width) ;
428- then_block. add_assignment ( None , result, zero_result) ;
429- then_block. end_with_jump ( None , after_block) ;
430-
431- // NOTE: since jumps were added in a place
432- // count_leading_zeroes() does not expect, the current block
433- // in the state need to be updated.
434- self . switch_to_block ( else_block) ;
435-
436- let zeros = self . count_trailing_zeroes ( width, arg) ;
437- self . llbb ( ) . add_assignment ( None , result, zeros) ;
438- self . llbb ( ) . end_with_jump ( None , after_block) ;
439-
440- // NOTE: since jumps were added in a place rustc does not
441- // expect, the current block in the state need to be updated.
442- self . switch_to_block ( after_block) ;
443-
444- result. to_rvalue ( )
445- }
446415 sym:: ctlz_nonzero => {
447416 self . count_leading_zeroes_nonzero ( width, args[ 0 ] . immediate ( ) )
448417 }
449- sym:: cttz_nonzero => self . count_trailing_zeroes ( width, args[ 0 ] . immediate ( ) ) ,
418+ sym:: cttz => self . count_trailing_zeroes ( width, args[ 0 ] . immediate ( ) ) ,
419+ sym:: cttz_nonzero => {
420+ self . count_trailing_zeroes_nonzero ( width, args[ 0 ] . immediate ( ) )
421+ }
450422 sym:: ctpop => self . pop_count ( args[ 0 ] . immediate ( ) ) ,
451423 sym:: bswap => {
452424 if width == 8 {
@@ -984,7 +956,37 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
984956 self . context . new_cast ( self . location , res, result_type)
985957 }
986958
987- fn count_trailing_zeroes ( & mut self , _width : u64 , arg : RValue < ' gcc > ) -> RValue < ' gcc > {
959+ fn count_trailing_zeroes ( & mut self , width : u64 , arg : RValue < ' gcc > ) -> RValue < ' gcc > {
960+ let func = self . current_func ( ) ;
961+ let then_block = func. new_block ( "then" ) ;
962+ let else_block = func. new_block ( "else" ) ;
963+ let after_block = func. new_block ( "after" ) ;
964+
965+ let result = func. new_local ( None , self . u32_type , "zeros" ) ;
966+ let zero = self . cx . gcc_zero ( arg. get_type ( ) ) ;
967+ let cond = self . gcc_icmp ( IntPredicate :: IntEQ , arg, zero) ;
968+ self . llbb ( ) . end_with_conditional ( None , cond, then_block, else_block) ;
969+
970+ let zero_result = self . cx . gcc_uint ( self . u32_type , width) ;
971+ then_block. add_assignment ( None , result, zero_result) ;
972+ then_block. end_with_jump ( None , after_block) ;
973+
974+ // NOTE: since jumps were added in a place count_trailing_zeroes_nonzero() does not expect,
975+ // the current block in the state need to be updated.
976+ self . switch_to_block ( else_block) ;
977+
978+ let zeros = self . count_trailing_zeroes_nonzero ( width, arg) ;
979+ self . llbb ( ) . add_assignment ( None , result, zeros) ;
980+ self . llbb ( ) . end_with_jump ( None , after_block) ;
981+
982+ // NOTE: since jumps were added in a place rustc does not
983+ // expect, the current block in the state need to be updated.
984+ self . switch_to_block ( after_block) ;
985+
986+ result. to_rvalue ( )
987+ }
988+
989+ fn count_trailing_zeroes_nonzero ( & mut self , _width : u64 , arg : RValue < ' gcc > ) -> RValue < ' gcc > {
988990 let arg_type = arg. get_type ( ) ;
989991 let result_type = self . u32_type ;
990992 let arg = if arg_type. is_signed ( self . cx ) {
@@ -1008,50 +1010,43 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
10081010 ( "__builtin_ctzll" , self . cx . ulonglong_type )
10091011 }
10101012 else if arg_type. is_u128 ( self . cx ) {
1011- // Adapted from the algorithm to count leading zeroes from: https://stackoverflow.com/a/28433850/389119
1012- let array_type = self . context . new_array_type ( None , arg_type, 3 ) ;
1013- let result = self . current_func ( )
1014- . new_local ( None , array_type, "count_loading_zeroes_results" ) ;
1015-
1016- let sixty_four = self . gcc_int ( arg_type, 64 ) ;
1017- let shift = self . gcc_lshr ( arg, sixty_four) ;
1018- let high = self . gcc_int_cast ( shift, self . u64_type ) ;
1019- let low = self . gcc_int_cast ( arg, self . u64_type ) ;
1020-
1021- let zero = self . context . new_rvalue_zero ( self . usize_type ) ;
1022- let one = self . context . new_rvalue_one ( self . usize_type ) ;
1023- let two = self . context . new_rvalue_from_long ( self . usize_type , 2 ) ;
1013+ // __buildin_ctzll is UB when called with 0, so call it on the 64 low bits if they are not 0,
1014+ // else call it on the 64 high bits and add 64. In the else case, 64 high bits can't be 0
1015+ // because arg is not 0.
10241016
1025- let ctzll = self . context . get_builtin_function ( "__builtin_ctzll" ) ;
1017+ let result = self . current_func ( )
1018+ . new_local ( None , result_type, "count_trailing_zeroes_results" ) ;
10261019
1027- let first_elem = self . context . new_array_access ( self . location , result , zero ) ;
1028- let first_value = self . gcc_int_cast ( self . context . new_call ( self . location , ctzll , & [ low ] ) , arg_type ) ;
1029- self . llbb ( )
1030- . add_assignment ( self . location , first_elem , first_value ) ;
1020+ let ctlz_then_block = self . current_func ( ) . new_block ( "ctlz_then" ) ;
1021+ let ctlz_else_block = self . current_func ( ) . new_block ( "ctlz_else" ) ;
1022+ let ctlz_after_block = self . current_func ( ) . new_block ( "ctlz_after" ) ;
1023+ let clzll = self . context . get_builtin_function ( "__builtin_ctzll" ) ;
10311024
1032- let second_elem = self . context . new_array_access ( self . location , result, one) ;
1033- let second_value = self . gcc_add ( self . gcc_int_cast ( self . context . new_call ( self . location , ctzll, & [ high] ) , arg_type) , sixty_four) ;
1034- self . llbb ( )
1035- . add_assignment ( self . location , second_elem, second_value) ;
1025+ let low = self . gcc_int_cast ( arg, self . u64_type ) ;
1026+ let zero_low = self . const_uint ( low. get_type ( ) , 0 ) ;
1027+ let cond = self . gcc_icmp ( IntPredicate :: IntNE , low, zero_low) ;
1028+ self . llbb ( ) . end_with_conditional ( self . location , cond, ctlz_then_block, ctlz_else_block) ;
1029+ self . switch_to_block ( ctlz_then_block) ;
10361030
1037- let third_elem = self . context . new_array_access ( self . location , result, two) ;
1038- let third_value = self . gcc_int ( arg_type, 128 ) ;
1039- self . llbb ( )
1040- . add_assignment ( self . location , third_elem, third_value) ;
1031+ let result_128 =
1032+ self . gcc_int_cast ( self . context . new_call ( None , clzll, & [ low] ) , result_type) ;
10411033
1042- let not_low = self . context . new_unary_op ( self . location , UnaryOp :: LogicalNegate , self . u64_type , low) ;
1043- let not_high = self . context . new_unary_op ( self . location , UnaryOp :: LogicalNegate , self . u64_type , high) ;
1044- let not_low_and_not_high = not_low & not_high;
1045- let index = not_low + not_low_and_not_high;
1046- // NOTE: the following cast is necessary to avoid a GIMPLE verification failure in
1047- // gcc.
1048- // TODO(antoyo): do the correct verification in libgccjit to avoid an error at the
1049- // compilation stage.
1050- let index = self . context . new_cast ( self . location , index, self . i32_type ) ;
1034+ ctlz_then_block. add_assignment ( self . location , result, result_128) ;
1035+ ctlz_then_block. end_with_jump ( self . location , ctlz_after_block) ;
10511036
1052- let res = self . context . new_array_access ( self . location , result, index) ;
1037+ self . switch_to_block ( ctlz_else_block) ;
1038+ let sixty_four = self . const_uint ( arg_type, 64 ) ;
1039+ let shift = self . lshr ( arg, sixty_four) ;
1040+ let high = self . gcc_int_cast ( shift, self . u64_type ) ;
1041+ let high_leading_zeroes =
1042+ self . gcc_int_cast ( self . context . new_call ( None , clzll, & [ high] ) , result_type) ;
10531043
1054- return self . gcc_int_cast ( res. to_rvalue ( ) , result_type) ;
1044+ let sixty_four_result_type = self . const_uint ( result_type, 64 ) ;
1045+ let result_128 = self . add ( high_leading_zeroes, sixty_four_result_type) ;
1046+ ctlz_else_block. add_assignment ( self . location , result, result_128) ;
1047+ ctlz_else_block. end_with_jump ( self . location , ctlz_after_block) ;
1048+ self . switch_to_block ( ctlz_after_block) ;
1049+ return result. to_rvalue ( ) ;
10551050 }
10561051 else {
10571052 let count_trailing_zeroes = self . context . get_builtin_function ( "__builtin_ctzll" ) ;
0 commit comments