Skip to content

Commit 6eaad82

Browse files
committed
cmd/internal/obj/arm64: improve splitting of 24 bit unsigned scaled immediates
The previous implementation would limit itself to 0xfff000 | 0xfff << shift, while the maximum possible value is 0xfff000 + 0xfff << shift. In practical terms, this means that an additional ((1 << shift) - 1) * 0x1000 of offset is reachable for operations that use this splitting format. In the case of an 8 byte load/store, this is an additional 0x7000 that can be reached without needing to use the literal pool. Updates #59615 Change-Id: Ice7023104042d31c115eafb9398c2b999bdd6583 Reviewed-on: https://go-review.googlesource.com/c/go/+/512540 Reviewed-by: Cherry Mui <[email protected]> TryBot-Result: Gopher Robot <[email protected]> Reviewed-by: David Chase <[email protected]> Run-TryBot: Joel Sing <[email protected]>
1 parent 03bec7d commit 6eaad82

File tree

3 files changed

+41
-40
lines changed

3 files changed

+41
-40
lines changed

src/cmd/asm/internal/asm/testdata/arm64.s

+15-15
Original file line numberDiff line numberDiff line change
@@ -592,43 +592,43 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
592592
MOVB R1, 0x1001(R2) // MOVB R1, 4097(R2) // 5b04409161070039
593593
MOVB R1, 0xffffff(R2) // MOVB R1, 16777215(R2) // 5bfc7f9161ff3f39
594594
MOVH R1, 0x2002(R2) // MOVH R1, 8194(R2) // 5b08409161070079
595-
MOVH R1, 0xfffffe(R2) // MOVH R1, 16777214(R2) // 5bf87f9161ff3f79
595+
MOVH R1, 0x1000ffe(R2) // MOVH R1, 16781310(R2) // 5bfc7f9161ff3f79
596596
MOVW R1, 0x4004(R2) // MOVW R1, 16388(R2) // 5b104091610700b9
597-
MOVW R1, 0xfffffc(R2) // MOVW R1, 16777212(R2) // 5bf07f9161ff3fb9
597+
MOVW R1, 0x1002ffc(R2) // MOVW R1, 16789500(R2) // 5bfc7f9161ff3fb9
598598
MOVD R1, 0x8008(R2) // MOVD R1, 32776(R2) // 5b204091610700f9
599-
MOVD R1, 0xfffff8(R2) // MOVD R1, 16777208(R2) // 5be07f9161ff3ff9
599+
MOVD R1, 0x1006ff8(R2) // MOVD R1, 16805880(R2) // 5bfc7f9161ff3ff9
600600
FMOVS F1, 0x4004(R2) // FMOVS F1, 16388(R2) // 5b104091610700bd
601-
FMOVS F1, 0xfffffc(R2) // FMOVS F1, 16777212(R2) // 5bf07f9161ff3fbd
601+
FMOVS F1, 0x1002ffc(R2) // FMOVS F1, 16789500(R2) // 5bfc7f9161ff3fbd
602602
FMOVD F1, 0x8008(R2) // FMOVD F1, 32776(R2) // 5b204091610700fd
603-
FMOVD F1, 0xfffff8(R2) // FMOVD F1, 16777208(R2) // 5be07f9161ff3ffd
603+
FMOVD F1, 0x1006ff8(R2) // FMOVD F1, 16805880(R2) // 5bfc7f9161ff3ffd
604604

605605
MOVB 0x1001(R1), R2 // MOVB 4097(R1), R2 // 3b04409162078039
606606
MOVB 0xffffff(R1), R2 // MOVB 16777215(R1), R2 // 3bfc7f9162ffbf39
607607
MOVH 0x2002(R1), R2 // MOVH 8194(R1), R2 // 3b08409162078079
608-
MOVH 0xfffffe(R1), R2 // MOVH 16777214(R1), R2 // 3bf87f9162ffbf79
608+
MOVH 0x1000ffe(R1), R2 // MOVH 16781310(R1), R2 // 3bfc7f9162ffbf79
609609
MOVW 0x4004(R1), R2 // MOVW 16388(R1), R2 // 3b104091620780b9
610-
MOVW 0xfffffc(R1), R2 // MOVW 16777212(R1), R2 // 3bf07f9162ffbfb9
610+
MOVW 0x1002ffc(R1), R2 // MOVW 16789500(R1), R2 // 3bfc7f9162ffbfb9
611611
MOVD 0x8008(R1), R2 // MOVD 32776(R1), R2 // 3b204091620740f9
612-
MOVD 0xfffff8(R1), R2 // MOVD 16777208(R1), R2 // 3be07f9162ff7ff9
612+
MOVD 0x1006ff8(R1), R2 // MOVD 16805880(R1), R2 // 3bfc7f9162ff7ff9
613613
FMOVS 0x4004(R1), F2 // FMOVS 16388(R1), F2 // 3b104091620740bd
614-
FMOVS 0xfffffc(R1), F2 // FMOVS 16777212(R1), F2 // 3bf07f9162ff7fbd
614+
FMOVS 0x1002ffc(R1), F2 // FMOVS 16789500(R1), F2 // 3bfc7f9162ff7fbd
615615
FMOVD 0x8008(R1), F2 // FMOVD 32776(R1), F2 // 3b204091620740fd
616-
FMOVD 0xfffff8(R1), F2 // FMOVD 16777208(R1), F2 // 3be07f9162ff7ffd
616+
FMOVD 0x1006ff8(R1), F2 // FMOVD 16805880(R1), F2 // 3bfc7f9162ff7ffd
617617

618618
// very large or unaligned offset uses constant pool.
619619
// the encoding cannot be checked as the address of the constant pool is unknown.
620620
// here we only test that they can be assembled.
621621
MOVB R1, 0x1000000(R2) // MOVB R1, 16777216(R2)
622622
MOVB R1, 0x44332211(R2) // MOVB R1, 1144201745(R2)
623-
MOVH R1, 0x1000000(R2) // MOVH R1, 16777216(R2)
623+
MOVH R1, 0x1001000(R2) // MOVH R1, 16781312(R2)
624624
MOVH R1, 0x44332211(R2) // MOVH R1, 1144201745(R2)
625-
MOVW R1, 0x1000000(R2) // MOVW R1, 16777216(R2)
625+
MOVW R1, 0x1003000(R2) // MOVW R1, 16789504(R2)
626626
MOVW R1, 0x44332211(R2) // MOVW R1, 1144201745(R2)
627-
MOVD R1, 0x1000000(R2) // MOVD R1, 16777216(R2)
627+
MOVD R1, 0x1007000(R2) // MOVD R1, 16805888(R2)
628628
MOVD R1, 0x44332211(R2) // MOVD R1, 1144201745(R2)
629-
FMOVS F1, 0x1000000(R2) // FMOVS F1, 16777216(R2)
629+
FMOVS F1, 0x1003000(R2) // FMOVS F1, 16789504(R2)
630630
FMOVS F1, 0x44332211(R2) // FMOVS F1, 1144201745(R2)
631-
FMOVD F1, 0x1000000(R2) // FMOVD F1, 16777216(R2)
631+
FMOVD F1, 0x1007000(R2) // FMOVD F1, 16805888(R2)
632632
FMOVD F1, 0x44332211(R2) // FMOVD F1, 1144201745(R2)
633633

634634
MOVB 0x1000000(R1), R2 // MOVB 16777216(R1), R2

src/cmd/internal/obj/arm64/asm7.go

+13-6
Original file line numberDiff line numberDiff line change
@@ -1420,13 +1420,20 @@ func splitImm24uScaled(v int32, shift int) (int32, int32, error) {
14201420
if v < 0 {
14211421
return 0, 0, fmt.Errorf("%d is not a 24 bit unsigned immediate", v)
14221422
}
1423+
if v > 0xfff000+0xfff<<shift {
1424+
return 0, 0, fmt.Errorf("%d is too large for a scaled 24 bit unsigned immediate", v)
1425+
}
14231426
if v&((1<<shift)-1) != 0 {
14241427
return 0, 0, fmt.Errorf("%d is not a multiple of %d", v, 1<<shift)
14251428
}
14261429
lo := (v >> shift) & 0xfff
14271430
hi := v - (lo << shift)
1428-
if hi&^0xfff000 != 0 {
1429-
return 0, 0, fmt.Errorf("%d is too large for a scaled 24 bit unsigned immediate %x %x", v, lo, hi)
1431+
if hi > 0xfff000 {
1432+
hi = 0xfff000
1433+
lo = (v - hi) >> shift
1434+
}
1435+
if hi & ^0xfff000 != 0 {
1436+
panic(fmt.Sprintf("bad split for %x with shift %v (%x, %x)", v, shift, hi, lo))
14301437
}
14311438
return hi, lo, nil
14321439
}
@@ -1975,28 +1982,28 @@ func (c *ctxt7) loadStoreClass(p *obj.Prog, lsc int, v int64) int {
19751982
if cmp(C_UAUTO8K, lsc) || cmp(C_UOREG8K, lsc) {
19761983
return lsc
19771984
}
1978-
if v >= 0 && v <= 0xfffffe && v&1 == 0 {
1985+
if v >= 0 && v <= 0xfff000+0xfff<<1 && v&1 == 0 {
19791986
needsPool = false
19801987
}
19811988
case AMOVW, AMOVWU, AFMOVS:
19821989
if cmp(C_UAUTO16K, lsc) || cmp(C_UOREG16K, lsc) {
19831990
return lsc
19841991
}
1985-
if v >= 0 && v <= 0xfffffc && v&3 == 0 {
1992+
if v >= 0 && v <= 0xfff000+0xfff<<2 && v&3 == 0 {
19861993
needsPool = false
19871994
}
19881995
case AMOVD, AFMOVD:
19891996
if cmp(C_UAUTO32K, lsc) || cmp(C_UOREG32K, lsc) {
19901997
return lsc
19911998
}
1992-
if v >= 0 && v <= 0xfffff8 && v&7 == 0 {
1999+
if v >= 0 && v <= 0xfff000+0xfff<<3 && v&7 == 0 {
19932000
needsPool = false
19942001
}
19952002
case AFMOVQ:
19962003
if cmp(C_UAUTO64K, lsc) || cmp(C_UOREG64K, lsc) {
19972004
return lsc
19982005
}
1999-
if v >= 0 && v <= 0xfffff0 && v&15 == 0 {
2006+
if v >= 0 && v <= 0xfff000+0xfff<<4 && v&15 == 0 {
20002007
needsPool = false
20012008
}
20022009
}

src/cmd/internal/obj/arm64/asm_arm64_test.go

+13-19
Original file line numberDiff line numberDiff line change
@@ -70,12 +70,10 @@ func TestSplitImm24uScaled(t *testing.T) {
7070
wantLo: 0xfff,
7171
},
7272
{
73-
// TODO(jsing): Fix splitting to make this fit.
74-
v: 0x1000ffe,
75-
shift: 1,
76-
wantErr: true,
77-
wantHi: 0xfff000,
78-
wantLo: 0xfff,
73+
v: 0x1000ffe,
74+
shift: 1,
75+
wantHi: 0xfff000,
76+
wantLo: 0xfff,
7977
},
8078
{
8179
v: 0x1001000,
@@ -100,12 +98,10 @@ func TestSplitImm24uScaled(t *testing.T) {
10098
wantLo: 0xfff,
10199
},
102100
{
103-
// TODO(jsing): Fix splitting to make this fit.
104-
v: 0x1002ffc,
105-
shift: 2,
106-
wantErr: true,
107-
wantHi: 0xfff000,
108-
wantLo: 0xfff,
101+
v: 0x1002ffc,
102+
shift: 2,
103+
wantHi: 0xfff000,
104+
wantLo: 0xfff,
109105
},
110106
{
111107
v: 0x1003000,
@@ -130,12 +126,10 @@ func TestSplitImm24uScaled(t *testing.T) {
130126
wantLo: 0xfff,
131127
},
132128
{
133-
// TODO(jsing): Fix splitting to make this fit.
134-
v: 0x1006ff8,
135-
shift: 3,
136-
wantErr: true,
137-
wantHi: 0xfff000,
138-
wantLo: 0xfff,
129+
v: 0x1006ff8,
130+
shift: 3,
131+
wantHi: 0xfff000,
132+
wantLo: 0xfff,
139133
},
140134
{
141135
v: 0x1007000,
@@ -160,7 +154,7 @@ func TestSplitImm24uScaled(t *testing.T) {
160154
}
161155
}
162156
for shift := 0; shift <= 3; shift++ {
163-
for v := int32(0); v < 0xfff000|0xfff<<shift; v = v + 1<<shift {
157+
for v := int32(0); v < 0xfff000+0xfff<<shift; v = v + 1<<shift {
164158
hi, lo, err := splitImm24uScaled(v, shift)
165159
if err != nil {
166160
t.Fatalf("splitImm24uScaled(%x, %x) failed: %v", v, shift, err)

0 commit comments

Comments
 (0)