Skip to content

Commit 3b3d6b9

Browse files
randall77gopherbot
authored andcommitted
cmd/internal/obj/arm64: shorten constant integer loads
Large integer constants can take up to 4 instructions to encode. We can encode some large constants with a single instruction, namely those which are bit patterns (repetitions of certain runs of 0s and 1s). Often the constants we want to encode are *close* to those bit patterns, but don't exactly match. For those, we can use 2 instructions, one to load the close-by bit pattern and one to fix up any mismatches. The constants we use to strength reduce divides often fit this pattern. For unsigned divides by 1 through 15, this CL applies to the constant for N=3,5,6,10,12,15. Triggers 17 times in hello world. Change-Id: I623abf32961fb3e74d0a163f6822f0647cd94499 Reviewed-on: https://go-review.googlesource.com/c/go/+/717900 Auto-Submit: Keith Randall <khr@golang.org> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Keith Randall <khr@google.com> Reviewed-by: Cherry Mui <cherryyz@google.com>
1 parent 5f4b5f1 commit 3b3d6b9

File tree

4 files changed

+61
-1
lines changed

4 files changed

+61
-1
lines changed

src/cmd/asm/internal/asm/testdata/arm64.s

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,8 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
400400
MOVD $0x11110000, R1 // MOVD $286326784, R1 // 2122a2d2
401401
MOVD $0xaaaa0000aaaa1111, R1 // MOVD $-6149102338357718767, R1 // 212282d24155b5f24155f5f2
402402
MOVD $0x1111ffff1111aaaa, R1 // MOVD $1230045644216969898, R1 // a1aa8a922122a2f22122e2f2
403+
MOVD $0xaaaaaaaaaaaaaaab, R1 // MOVD $-6148914691236517205, R1 // e1f301b2615595f2
404+
MOVD $0x0ff019940ff00ff0, R1 // MOVD $1148446028692721648, R1 // e19f0cb28132c3f2
403405
MOVD $0, R1 // e1031faa
404406
MOVD $-1, R1 // 01008092
405407
MOVD $0x210000, R0 // MOVD $2162688, R0 // 2004a0d2

src/cmd/internal/obj/arm64/asm7.go

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ import (
3838
"fmt"
3939
"log"
4040
"math"
41+
"math/bits"
4142
"slices"
4243
"strings"
4344
)
@@ -1976,7 +1977,18 @@ func (c *ctxt7) con64class(a *obj.Addr) int {
19761977
return C_MOVCON
19771978
} else if zeroCount == 2 || negCount == 2 {
19781979
return C_MOVCON2
1979-
} else if zeroCount == 1 || negCount == 1 {
1980+
}
1981+
// See omovlconst for description of this loop.
1982+
for i := 0; i < 4; i++ {
1983+
mask := uint64(0xffff) << (i * 16)
1984+
for period := 2; period <= 32; period *= 2 {
1985+
x := uint64(a.Offset)&^mask | bits.RotateLeft64(uint64(a.Offset), max(period, 16))&mask
1986+
if isbitcon(x) {
1987+
return C_MOVCON2
1988+
}
1989+
}
1990+
}
1991+
if zeroCount == 1 || negCount == 1 {
19801992
return C_MOVCON3
19811993
} else {
19821994
return C_VCON
@@ -7555,6 +7567,31 @@ func (c *ctxt7) omovlconst(as obj.As, p *obj.Prog, a *obj.Addr, rt int, os []uin
75557567
}
75567568
}
75577569
return 2
7570+
}
7571+
7572+
// Look for a two instruction pair, a bit pattern encodeable
7573+
// as a bitcon immediate plus a fixup MOVK instruction.
7574+
// Constants like this often occur from strength reduction of divides.
7575+
for i = 0; i < 4; i++ {
7576+
mask := uint64(0xffff) << (i * 16)
7577+
for period := 2; period <= 32; period *= 2 { // TODO: handle period==64 somehow?
7578+
// Copy in bits from outside of the masked region
7579+
x := uint64(d)&^mask | bits.RotateLeft64(uint64(d), max(period, 16))&mask
7580+
if isbitcon(x) {
7581+
// ORR $c1, ZR, rt
7582+
os[0] = c.opirr(p, AORR)
7583+
os[0] |= bitconEncode(x, 64) | uint32(REGZERO&31)<<5 | uint32(rt&31)
7584+
// MOVK $c2<<(i*16), rt
7585+
os[1] = c.opirr(p, AMOVK)
7586+
os[1] |= MOVCONST(d, i, rt)
7587+
return 2
7588+
}
7589+
}
7590+
}
7591+
// TODO: other fixups, like ADD or SUB?
7592+
// TODO: 3-instruction variant, instead of the full MOVD+3*MOVK version below?
7593+
7594+
switch {
75587595

75597596
case zeroCount == 1:
75607597
// one MOVZ and two MOVKs

src/cmd/internal/obj/arm64/asm_arm64_test.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,16 @@ func TestMOVK(t *testing.T) {
3838
t.Errorf("Got %x want %x\n", x, want)
3939
}
4040
}
41+
42+
func testCombined() (a uint64, b uint64)
43+
func TestCombined(t *testing.T) {
44+
got1, got2 := testCombined()
45+
want1 := uint64(0xaaaaaaaaaaaaaaab)
46+
want2 := uint64(0x0ff019940ff00ff0)
47+
if got1 != want1 {
48+
t.Errorf("First result, got %x want %x", got1, want1)
49+
}
50+
if got2 != want2 {
51+
t.Errorf("First result, got %x want %x", got2, want2)
52+
}
53+
}

src/cmd/internal/obj/arm64/asm_arm64_test.s

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,11 @@ TEXT ·testmovk(SB), NOSPLIT, $0-8
3737
MOVK $(40000<<48), R0
3838
MOVD R0, ret+0(FP)
3939
RET
40+
41+
// testCombined() (uint64, uint64)
42+
TEXT ·testCombined(SB), NOSPLIT, $0-16
43+
MOVD $0xaaaaaaaaaaaaaaab, R0
44+
MOVD $0x0ff019940ff00ff0, R1
45+
MOVD R0, a+0(FP)
46+
MOVD R1, b+8(FP)
47+
RET

0 commit comments

Comments
 (0)