@@ -372,6 +372,97 @@ getSomeReg expr = do
372372 Fixed rep reg code ->
373373 return (reg, rep, code)
374374
375+ {- Note [Aarch64 immediates]
376+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
377+ Aarch64 with it's fixed width instruction encoding uses leftover space for
378+ immediates.
379+ If you want the full rundown consult the arch reference document:
380+ "Arm® Architecture Reference Manual" - "C3.4 Data processing - immediate"
381+
382+ The gist of it is that different instructions allow for different immediate encodings.
383+ The ones we care about for better code generation are:
384+
385+ * Simple but potentially repeated bit-patterns for logic instructions.
386+ * 16bit numbers shifted by multiples of 16.
387+ * 12 bit numbers optionally shifted by 12 bits.
388+
389+ It might seem like the ISA allows for 64bit immediates but this isn't the case.
390+ Rather there are some instruction aliases which allow for large unencoded immediates
391+ which will then be transalted to one of the immediate encodings implicitly.
392+
393+ For example mov x1, #0x10000 is allowed but will be assembled to movz x1, #0x1, lsl #16
394+ -}
395+
396+ -- | Move (wide immediate)
397+ -- Allows for 16bit immediate which can be shifted by 0/16/32/48 bits.
398+ -- Used with MOVZ,MOVN, MOVK
399+ -- See Note [Aarch64 immediates]
400+ getMovWideImm :: Integer -> Width -> Maybe Operand
401+ getMovWideImm n w
402+ -- TODO: Handle sign extension/negatives
403+ | n <= 0
404+ = Nothing
405+ -- Fits in 16 bits
406+ | sized_n < 2 ^ (16 :: Int )
407+ = Just $ OpImm (ImmInteger truncated)
408+
409+ -- 0x0000 0000 xxxx 0000
410+ | trailing_zeros >= 16 && sized_n < 2 ^ (32 :: Int )
411+ = Just $ OpImmShift (ImmInteger $ truncated `shiftR` 16 ) SLSL 16
412+
413+ -- 0x 0000 xxxx 0000 0000
414+ | trailing_zeros >= 32 && sized_n < 2 ^ (48 :: Int )
415+ = Just $ OpImmShift (ImmInteger $ truncated `shiftR` 32 ) SLSL 32
416+
417+ -- 0x xxxx 0000 0000 0000
418+ | trailing_zeros >= 48
419+ = Just $ OpImmShift (ImmInteger $ truncated `shiftR` 48 ) SLSL 48
420+
421+ | otherwise
422+ = Nothing
423+ where
424+ truncated = narrowU w n
425+ sized_n = fromIntegral truncated :: Word64
426+ trailing_zeros = countTrailingZeros sized_n
427+
428+ -- | Arithmetic(immediate)
429+ -- Allows for 12bit immediates which can be shifted by 0 or 12 bits.
430+ -- Used with ADD, ADDS, SUB, SUBS, CMP, CMN
431+ -- See Note [Aarch64 immediates]
432+ getArithImm :: Integer -> Width -> Maybe Operand
433+ getArithImm n w
434+ -- TODO: Handle sign extension
435+ | n <= 0
436+ = Nothing
437+ -- Fits in 16 bits
438+ -- Fits in 12 bits
439+ | sized_n < 2 ^ (12 :: Int )
440+ = Just $ OpImm (ImmInteger truncated)
441+
442+ -- 12 bits shifted by 12 places.
443+ | trailing_zeros >= 12 && sized_n < 2 ^ (24 :: Int )
444+ = Just $ OpImmShift (ImmInteger $ truncated `shiftR` 12 ) SLSL 12
445+
446+ | otherwise
447+ = Nothing
448+ where
449+ sized_n = fromIntegral truncated :: Word64
450+ truncated = narrowU w n
451+ trailing_zeros = countTrailingZeros sized_n
452+
453+ -- | Logical (immediate)
454+ -- Allows encoding of some repeated bitpatterns
455+ -- Used with AND, ANDS, EOR, ORR, TST
456+ -- and their aliases which includes at least MOV (bitmask immediate)
457+ -- See Note [Aarch64 immediates]
458+ getBitmaskImm :: Integer -> Width -> Maybe Operand
459+ getBitmaskImm n w
460+ | isAArch64Bitmask truncated = Just $ OpImm (ImmInteger truncated)
461+ | otherwise = Nothing
462+ where
463+ truncated = narrowU w n
464+
465+
375466-- TODO OPT: we might be able give getRegister
376467-- a hint, what kind of register we want.
377468getFloatReg :: HasCallStack => CmmExpr -> NatM (Reg , Format , InstrBlock )
@@ -494,8 +585,14 @@ getRegister' config plat expr
494585 CmmLit lit
495586 -> case lit of
496587
497- -- TODO handle CmmInt 0 specially, use wzr or xzr.
498-
588+ -- Use wzr xzr for CmmInt 0 if the width matches up, otherwise do a move.
589+ -- TODO: Reenable after https://gitlab.haskell.org/ghc/ghc/-/issues/23632 is fixed.
590+ -- CmmInt 0 W32 -> do
591+ -- let format = intFormat W32
592+ -- return (Fixed format reg_zero (unitOL $ (COMMENT ((text . show $ expr))) ))
593+ -- CmmInt 0 W64 -> do
594+ -- let format = intFormat W64
595+ -- return (Fixed format reg_zero (unitOL $ (COMMENT ((text . show $ expr))) ))
499596 CmmInt i W8 | i >= 0 -> do
500597 return (Any (intFormat W8 ) (\ dst -> unitOL $ annExpr expr (MOV (OpReg W8 dst) (OpImm (ImmInteger (narrowU W8 i))))))
501598 CmmInt i W16 | i >= 0 -> do
@@ -510,8 +607,13 @@ getRegister' config plat expr
510607 -- Those need the upper bits set. We'd either have to explicitly sign
511608 -- or figure out something smarter. Lowered to
512609 -- `MOV dst XZR`
610+ CmmInt i w | i >= 0
611+ , Just imm_op <- getMovWideImm i w -> do
612+ return (Any (intFormat w) (\ dst -> unitOL $ annExpr expr (MOVZ (OpReg w dst) imm_op)))
613+
513614 CmmInt i w | isNbitEncodeable 16 i, i >= 0 -> do
514615 return (Any (intFormat w) (\ dst -> unitOL $ annExpr expr (MOV (OpReg W16 dst) (OpImm (ImmInteger i)))))
616+
515617 CmmInt i w | isNbitEncodeable 32 i, i >= 0 -> do
516618 let half0 = fromIntegral (fromIntegral i :: Word16 )
517619 half1 = fromIntegral (fromIntegral (i `shiftR` 16 ) :: Word16 )
@@ -586,7 +688,6 @@ getRegister' config plat expr
586688 (op, imm_code) <- litToImm' lit
587689 let rep = cmmLitType plat lit
588690 format = cmmTypeFormat rep
589- -- width = typeWidth rep
590691 return (Any format (\ dst -> imm_code `snocOL` LDR format (OpReg (formatToWidth format) dst) op))
591692
592693 CmmLabelOff lbl off -> do
@@ -791,17 +892,51 @@ getRegister' config plat expr
791892 -- withTempFloatReg w op = OpReg w <$> getNewRegNat (floatFormat w) >>= op
792893
793894 -- A "plain" operation.
794- bitOp w op = do
895+ bitOpImm w op encode_imm = do
795896 -- compute x<m> <- x
796897 -- compute x<o> <- y
797898 -- <OP> x<n>, x<m>, x<o>
798899 (reg_x, format_x, code_x) <- getSomeReg x
799- (reg_y, format_y, code_y) <- getSomeReg y
800- massertPpr (isIntFormat format_x == isIntFormat format_y) $ text " bitOp: incompatible"
900+ (op_y, format_y, code_y) <- case y of
901+ CmmLit (CmmInt n w)
902+ | Just imm_operand_y <- encode_imm n w
903+ -> return (imm_operand_y, intFormat w, nilOL)
904+ _ -> do
905+ (reg_y, format_y, code_y) <- getSomeReg y
906+ return (OpReg w reg_y, format_y, code_y)
907+ massertPpr (isIntFormat format_x == isIntFormat format_y) $ text " bitOpImm: incompatible"
801908 return $ Any (intFormat w) (\ dst ->
802909 code_x `appOL`
803910 code_y `appOL`
804- op (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y))
911+ op (OpReg w dst) (OpReg w reg_x) op_y)
912+
913+ -- A (potentially signed) integer operation.
914+ -- In the case of 8- and 16-bit signed arithmetic we must first
915+ -- sign-extend both arguments to 32-bits.
916+ -- See Note [Signed arithmetic on AArch64].
917+ intOpImm :: Bool -> Width -> (Operand -> Operand -> Operand -> OrdList Instr ) -> (Integer -> Width -> Maybe Operand ) -> NatM (Register )
918+ intOpImm {- is signed -} True w op _encode_imm = intOp True w op
919+ intOpImm False w op encode_imm = do
920+ -- compute x<m> <- x
921+ -- compute x<o> <- y
922+ -- <OP> x<n>, x<m>, x<o>
923+ (reg_x, format_x, code_x) <- getSomeReg x
924+ (op_y, format_y, code_y) <- case y of
925+ CmmLit (CmmInt n w)
926+ | Just imm_operand_y <- encode_imm n w
927+ -> return (imm_operand_y, intFormat w, nilOL)
928+ _ -> do
929+ (reg_y, format_y, code_y) <- getSomeReg y
930+ return (OpReg w reg_y, format_y, code_y)
931+ massertPpr (isIntFormat format_x && isIntFormat format_y) $ text " intOp: non-int"
932+ -- This is the width of the registers on which the operation
933+ -- should be performed.
934+ let w' = opRegWidth w
935+ return $ Any (intFormat w) $ \ dst ->
936+ code_x `appOL`
937+ code_y `appOL`
938+ op (OpReg w' dst) (OpReg w' reg_x) (op_y) `appOL`
939+ truncateReg w' w dst -- truncate back to the operand's original width
805940
806941 -- A (potentially signed) integer operation.
807942 -- In the case of 8- and 16-bit signed arithmetic we must first
@@ -847,9 +982,9 @@ getRegister' config plat expr
847982 case op of
848983 -- Integer operations
849984 -- Add/Sub should only be Integer Options.
850- MO_Add w -> intOp False w (\ d x y -> unitOL $ annExpr expr (ADD d x y))
985+ MO_Add w -> intOpImm False w (\ d x y -> unitOL $ annExpr expr (ADD d x y)) getArithImm
851986 -- TODO: Handle sub-word case
852- MO_Sub w -> intOp False w (\ d x y -> unitOL $ annExpr expr (SUB d x y))
987+ MO_Sub w -> intOpImm False w (\ d x y -> unitOL $ annExpr expr (SUB d x y)) getArithImm
853988
854989 -- Note [CSET]
855990 -- ~~~~~~~~~~~
@@ -891,8 +1026,8 @@ getRegister' config plat expr
8911026
8921027 -- N.B. We needn't sign-extend sub-word size (in)equality comparisons
8931028 -- since we don't care about ordering.
894- MO_Eq w -> bitOp w (\ d x y -> toOL [ CMP x y, CSET d EQ ])
895- MO_Ne w -> bitOp w (\ d x y -> toOL [ CMP x y, CSET d NE ])
1029+ MO_Eq w -> bitOpImm w (\ d x y -> toOL [ CMP x y, CSET d EQ ]) getArithImm
1030+ MO_Ne w -> bitOpImm w (\ d x y -> toOL [ CMP x y, CSET d NE ]) getArithImm
8961031
8971032 -- Signed multiply/divide
8981033 MO_Mul w -> intOp True w (\ d x y -> unitOL $ MUL d x y)
@@ -921,10 +1056,10 @@ getRegister' config plat expr
9211056 MO_S_Lt w -> intOp True w (\ d x y -> toOL [ CMP x y, CSET d SLT ])
9221057
9231058 -- Unsigned comparisons
924- MO_U_Ge w -> intOp False w (\ d x y -> toOL [ CMP x y, CSET d UGE ])
925- MO_U_Le w -> intOp False w (\ d x y -> toOL [ CMP x y, CSET d ULE ])
926- MO_U_Gt w -> intOp False w (\ d x y -> toOL [ CMP x y, CSET d UGT ])
927- MO_U_Lt w -> intOp False w (\ d x y -> toOL [ CMP x y, CSET d ULT ])
1059+ MO_U_Ge w -> intOpImm False w (\ d x y -> toOL [ CMP x y, CSET d UGE ]) getArithImm
1060+ MO_U_Le w -> intOpImm False w (\ d x y -> toOL [ CMP x y, CSET d ULE ]) getArithImm
1061+ MO_U_Gt w -> intOpImm False w (\ d x y -> toOL [ CMP x y, CSET d UGT ]) getArithImm
1062+ MO_U_Lt w -> intOpImm False w (\ d x y -> toOL [ CMP x y, CSET d ULT ]) getArithImm
9281063
9291064 -- Floating point arithmetic
9301065 MO_F_Add w -> floatOp w (\ d x y -> unitOL $ ADD d x y)
@@ -947,9 +1082,9 @@ getRegister' config plat expr
9471082 MO_F_Lt w -> floatCond w (\ d x y -> toOL [ CMP x y, CSET d OLT ]) -- x < y <=> y >= x
9481083
9491084 -- Bitwise operations
950- MO_And w -> bitOp w (\ d x y -> unitOL $ AND d x y)
951- MO_Or w -> bitOp w (\ d x y -> unitOL $ ORR d x y)
952- MO_Xor w -> bitOp w (\ d x y -> unitOL $ EOR d x y)
1085+ MO_And w -> bitOpImm w (\ d x y -> unitOL $ AND d x y) getBitmaskImm
1086+ MO_Or w -> bitOpImm w (\ d x y -> unitOL $ ORR d x y) getBitmaskImm
1087+ MO_Xor w -> bitOpImm w (\ d x y -> unitOL $ EOR d x y) getBitmaskImm
9531088 MO_Shl w -> intOp False w (\ d x y -> unitOL $ LSL d x y)
9541089 MO_U_Shr w -> intOp False w (\ d x y -> unitOL $ LSR d x y)
9551090 MO_S_Shr w -> intOp True w (\ d x y -> unitOL $ ASR d x y)
@@ -999,7 +1134,7 @@ getRegister' config plat expr
9991134
10001135 where
10011136 isNbitEncodeable :: Int -> Integer -> Bool
1002- isNbitEncodeable n i = let shift = n - 1 in (- 1 `shiftL` shift) <= i && i < (1 `shiftL` shift)
1137+ isNbitEncodeable n_bits i = let shift = n_bits - 1 in (- 1 `shiftL` shift) <= i && i < (1 `shiftL` shift)
10031138
10041139 -- N.B. MUL does not set the overflow flag.
10051140 do_mul_may_oflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
0 commit comments