Skip to content

Commit e01b1eb

Browse files
committed
cmd/compile/internal: stack slot merging region formation enhancements
This patch revises the algorithm/strategy used for overlapping the stack slots of disjointly accessed local variables. The main change here is to allow merging the stack slot of B into the slot for A if B's size is less then A (prior to this they had to be identical), and to also allow merging a non-pointer variables into pointer-variable slots. The new algorithm sorts the candidate list first by pointerness (pointer variables first), then by alignment, then by size, and finally by name. We no longer check that two variables have the same GC shape before merging: since it should never be the case that we have two vars X and Y both live across a given callsite where X and Y share a stack slot, their gc shape doesn't matter. Doing things this new way increases the total number of bytes saved (across all functions) from 91256 to 124336 for the sweet benchmarks. Updates #62737. Updates #65532. Updates #65495. Change-Id: I1daaac1b1240aa47a6975e98ccd24e03304ab602 Reviewed-on: https://go-review.googlesource.com/c/go/+/577615 LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Cherry Mui <[email protected]>
1 parent a973b42 commit e01b1eb

File tree

3 files changed

+173
-141
lines changed

3 files changed

+173
-141
lines changed

src/cmd/compile/internal/liveness/mergelocals.go

Lines changed: 110 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,7 @@ import (
88
"cmd/compile/internal/base"
99
"cmd/compile/internal/bitvec"
1010
"cmd/compile/internal/ir"
11-
"cmd/compile/internal/reflectdata"
1211
"cmd/compile/internal/ssa"
13-
"cmd/internal/obj"
1412
"cmd/internal/src"
1513
"fmt"
1614
"os"
@@ -23,12 +21,14 @@ import (
2321
// (stack-allocated) variables within a function can be safely
2422
// merged/overlapped, e.g. share a stack slot with some other auto).
2523
// An instance of MergeLocalsState is produced by MergeLocals() below
26-
// and then consumed in ssagen.AllocFrame. The map 'partition' contains
27-
// entries of the form <N,SL> where N is an *ir.Name and SL is a slice
28-
// holding the indices (within 'vars') of other variables that share the
29-
// same slot. For example, if a function contains five variables where
30-
// v1/v2/v3 are safe to overlap and v4/v5 are safe to overlap, the
31-
// MergeLocalsState content might look like
24+
// and then consumed in ssagen.AllocFrame. The map 'partition'
25+
// contains entries of the form <N,SL> where N is an *ir.Name and SL
26+
// is a slice holding the indices (within 'vars') of other variables
27+
// that share the same slot, specifically the slot of the first
28+
// element in the partition, which we'll call the "leader". For
29+
// example, if a function contains five variables where v1/v2/v3 are
30+
// safe to overlap and v4/v5 are safe to overlap, the MergeLocalsState
31+
// content might look like
3232
//
3333
// vars: [v1, v2, v3, v4, v5]
3434
// partition: v1 -> [1, 0, 2], v2 -> [1, 0, 2], v3 -> [1, 0, 2]
@@ -49,6 +49,22 @@ type candRegion struct {
4949
st, en int
5050
}
5151

52+
// cstate holds state information we'll need during the analysis
53+
// phase of stack slot merging but can be discarded when the analysis
54+
// is done.
55+
type cstate struct {
56+
fn *ir.Func
57+
f *ssa.Func
58+
lv *liveness
59+
cands []*ir.Name
60+
nameToSlot map[*ir.Name]int32
61+
regions []candRegion
62+
indirectUE map[ssa.ID][]*ir.Name
63+
ivs []Intervals
64+
hashDeselected map[*ir.Name]bool
65+
trace int // debug trace level
66+
}
67+
5268
// MergeLocals analyzes the specified ssa function f to determine which
5369
// of its auto variables can safely share the same stack slot, returning
5470
// a state object that describes how the overlap should be done.
@@ -223,6 +239,19 @@ func (mls *MergeLocalsState) check() error {
223239
if !foundk {
224240
return fmt.Errorf("k=%s v=+%v slice value missing k", k.Sym().Name, sl)
225241
}
242+
vl := mls.vars[sl[0]]
243+
for _, v := range sl[1:] {
244+
vv := mls.vars[v]
245+
if vv.Type().Size() > vl.Type().Size() {
246+
return fmt.Errorf("k=%s v=+%v follower %s size %d larger than leader %s size %d", k.Sym().Name, sl, vv.Sym().Name, vv.Type().Size(), vl.Sym().Name, vl.Type().Size())
247+
}
248+
if vv.Type().HasPointers() && !vl.Type().HasPointers() {
249+
return fmt.Errorf("k=%s v=+%v follower %s hasptr=true but leader %s hasptr=false", k.Sym().Name, sl, vv.Sym().Name, vl.Sym().Name)
250+
}
251+
if vv.Type().Alignment() > vl.Type().Alignment() {
252+
return fmt.Errorf("k=%s v=+%v follower %s align %d greater than leader %s align %d", k.Sym().Name, sl, vv.Sym().Name, vv.Type().Alignment(), vl.Sym().Name, vl.Type().Alignment())
253+
}
254+
}
226255
}
227256
for i := range used {
228257
if !used[i] {
@@ -296,14 +325,13 @@ func (cs *cstate) collectMergeCandidates() {
296325

297326
// Now generate an initial pruned candidate list and regions list.
298327
// This may be empty if we don't have enough compatible candidates.
299-
initial, _ := genRegions(cands)
328+
initial, _ := cs.genRegions(cands)
300329
if len(initial) < 2 {
301330
return
302331
}
303332

304-
// When bisecting it can be handy to see debug trace output for
305-
// only those functions that hashdebug selects; set this up here.
306-
cs.setupHashTrace(initial)
333+
// Set up for hash bisection if enabled.
334+
cs.setupHashBisection(initial)
307335

308336
// Create and populate an indirect use table that we'll use
309337
// during interval construction. As part of this process we may
@@ -330,7 +358,9 @@ func (cs *cstate) collectMergeCandidates() {
330358
}
331359
}
332360

333-
func genRegions(cands []*ir.Name) ([]*ir.Name, []candRegion) {
361+
// genRegions generates a set of regions within cands corresponding
362+
// to potentially overlappable/mergeable variables.
363+
func (cs *cstate) genRegions(cands []*ir.Name) ([]*ir.Name, []candRegion) {
334364
var pruned []*ir.Name
335365
var regions []candRegion
336366
st := 0
@@ -346,8 +376,8 @@ func genRegions(cands []*ir.Name) ([]*ir.Name, []candRegion) {
346376
}
347377
pst := len(pruned)
348378
pen := pst + (en - st)
349-
if base.Debug.MergeLocalsTrace > 1 {
350-
fmt.Fprintf(os.Stderr, "=-= add part %d -> %d\n", pst, pen)
379+
if cs.trace > 1 {
380+
fmt.Fprintf(os.Stderr, "=-= addregion st=%d en=%d: add part %d -> %d\n", st, en, pst, pen)
351381
}
352382

353383
// non-empty region, add to pruned
@@ -385,27 +415,29 @@ func (cs *cstate) dumpFuncIfSelected() {
385415
cs.dumpFunc()
386416
}
387417

388-
func (cs *cstate) setupHashTrace(cands []*ir.Name) {
389-
if base.Debug.MergeLocalsHTrace == 0 || base.Debug.MergeLocalsHash == "" {
418+
// setupHashBisection checks to see if any of the candidate
419+
// variables have been de-selected by our hash debug. Here
420+
// we also implement the -d=mergelocalshtrace flag, which turns
421+
// on debug tracing only if we have at least two candidates
422+
// selected by the hash debug for this function.
423+
func (cs *cstate) setupHashBisection(cands []*ir.Name) {
424+
if base.Debug.MergeLocalsHash == "" {
390425
return
391426
}
392-
393-
// With this trace variant, check to see whether any of the
394-
// candidates are selected-- if yes then enable tracing. Hack:
395-
// create a new hashdebug with verbosity turned off and use that
396-
// to test, so as not to confuse bisect.
397-
modified := strings.ReplaceAll(base.Debug.MergeLocalsHash, "v", "q")
398-
quiethd := base.NewHashDebug("qmergelocals", modified, nil)
399-
found := false
427+
deselected := make(map[*ir.Name]bool)
428+
selCount := 0
400429
for _, cand := range cands {
401-
if !quiethd.MatchPosWithInfo(cand.Pos(), "quiet", nil) {
402-
found = true
403-
fmt.Fprintf(os.Stderr, "=-= MergeLocalsHTrace fn=%v n=%s match\n",
404-
cs.fn, cand.Sym().Name)
405-
break
430+
if !base.MergeLocalsHash.MatchPosWithInfo(cand.Pos(), "mergelocals", nil) {
431+
deselected[cand] = true
432+
} else {
433+
deselected[cand] = false
434+
selCount++
406435
}
407436
}
408-
if found {
437+
if selCount < len(cands) {
438+
cs.hashDeselected = deselected
439+
}
440+
if base.Debug.MergeLocalsHTrace != 0 && selCount >= 2 {
409441
cs.trace = base.Debug.MergeLocalsHTrace
410442
}
411443
}
@@ -566,7 +598,7 @@ func (cs *cstate) populateIndirectUseTable(cands []*ir.Name) ([]*ir.Name, []cand
566598
return nameLess(pruned[i], pruned[j])
567599
})
568600
var regions []candRegion
569-
pruned, regions = genRegions(pruned)
601+
pruned, regions = cs.genRegions(pruned)
570602
if len(pruned) < 2 {
571603
return nil, nil
572604
}
@@ -586,29 +618,30 @@ type nameCount struct {
586618
count int32
587619
}
588620

589-
// nameLess compares ci with cj to see if ci should be less than cj
590-
// in a relative ordering of candidate variables. This is used to
591-
// sort vars by size, pointerness, and GC shape.
621+
// nameLess compares ci with cj to see if ci should be less than cj in
622+
// a relative ordering of candidate variables. This is used to sort
623+
// vars by pointerness (variables with pointers first), then in order
624+
// of decreasing alignment, then by decreasing size. We are assuming a
625+
// merging algorithm that merges later entries in the list into
626+
// earlier entries. An example ordered candidate list produced by
627+
// nameLess:
628+
//
629+
// idx name type align size
630+
// 0: abc [10]*int 8 80
631+
// 1: xyz [9]*int 8 72
632+
// 2: qrs [2]*int 8 16
633+
// 3: tuv [9]int 8 72
634+
// 4: wxy [9]int32 4 36
635+
// 5: jkl [8]int32 4 32
592636
func nameLess(ci, cj *ir.Name) bool {
593-
ihp, jhp := 0, 0
594-
var ilsym, jlsym *obj.LSym
595-
if ci.Type().HasPointers() {
596-
ihp = 1
597-
ilsym, _, _ = reflectdata.GCSym(ci.Type())
637+
if ci.Type().HasPointers() != cj.Type().HasPointers() {
638+
return ci.Type().HasPointers()
598639
}
599-
if cj.Type().HasPointers() {
600-
jhp = 1
601-
jlsym, _, _ = reflectdata.GCSym(cj.Type())
602-
}
603-
if ihp != jhp {
604-
return ihp < jhp
640+
if ci.Type().Alignment() != cj.Type().Alignment() {
641+
return cj.Type().Alignment() < ci.Type().Alignment()
605642
}
606643
if ci.Type().Size() != cj.Type().Size() {
607-
return ci.Type().Size() < cj.Type().Size()
608-
}
609-
if ihp != 0 && jhp != 0 && ilsym != jlsym {
610-
// FIXME: find less clunky way to do this
611-
return fmt.Sprintf("%v", ilsym) < fmt.Sprintf("%v", jlsym)
644+
return cj.Type().Size() < ci.Type().Size()
612645
}
613646
if ci.Sym().Name != cj.Sym().Name {
614647
return ci.Sym().Name < cj.Sym().Name
@@ -617,63 +650,48 @@ func nameLess(ci, cj *ir.Name) bool {
617650
}
618651

619652
// nextRegion starts at location idx and walks forward in the cands
620-
// slice looking for variables that are "compatible" (overlappable)
621-
// with the variable at position idx; it returns the end of the new
622-
// region (range of compatible variables starting at idx).
653+
// slice looking for variables that are "compatible" (potentially
654+
// overlappable, in the sense that they could potentially share the
655+
// stack slot of cands[idx]); it returns the end of the new region
656+
// (range of compatible variables starting at idx).
623657
func nextRegion(cands []*ir.Name, idx int) int {
624658
n := len(cands)
625659
if idx >= n {
626660
return -1
627661
}
628662
c0 := cands[idx]
629-
hp0 := c0.Type().HasPointers()
663+
szprev := c0.Type().Size()
664+
alnprev := c0.Type().Alignment()
630665
for j := idx + 1; j < n; j++ {
631666
cj := cands[j]
632-
hpj := cj.Type().HasPointers()
633-
ok := true
634-
if hp0 {
635-
if !hpj || c0.Type().Size() != cj.Type().Size() {
636-
return j - 1
637-
}
638-
// GC shape must match if both types have pointers.
639-
gcsym0, _, _ := reflectdata.GCSym(c0.Type())
640-
gcsymj, _, _ := reflectdata.GCSym(cj.Type())
641-
if gcsym0 != gcsymj {
642-
return j - 1
643-
}
644-
} else {
645-
// If no pointers, match size only.
646-
if !ok || hp0 != hpj || c0.Type().Size() != cj.Type().Size() {
647-
return j - 1
648-
}
667+
szj := cj.Type().Size()
668+
if szj > szprev {
669+
return j - 1
649670
}
671+
alnj := cj.Type().Alignment()
672+
if alnj > alnprev {
673+
return j - 1
674+
}
675+
szprev = szj
676+
alnprev = alnj
650677
}
651678
return n - 1
652679
}
653680

654-
// cstate holds state information we'll need during the analysis
655-
// phase of stack slot merging but can be discarded when the analysis
656-
// is done.
657-
type cstate struct {
658-
fn *ir.Func
659-
f *ssa.Func
660-
lv *liveness
661-
cands []*ir.Name
662-
nameToSlot map[*ir.Name]int32
663-
regions []candRegion
664-
indirectUE map[ssa.ID][]*ir.Name
665-
ivs []Intervals
666-
trace int // debug trace level
667-
}
668-
669681
// mergeVisitRegion tries to perform overlapping of variables with a
670682
// given subrange of cands described by st and en (indices into our
671683
// candidate var list), where the variables within this range have
672684
// already been determined to be compatible with respect to type,
673685
// size, etc. Overlapping is done in a a greedy fashion: we select the
674686
// first element in the st->en range, then walk the rest of the
675687
// elements adding in vars whose lifetimes don't overlap with the
676-
// first element, then repeat the process until we run out of work to do.
688+
// first element, then repeat the process until we run out of work.
689+
// Ordering of the candidates within the region [st,en] is important;
690+
// within the list the assumption is that if we overlap two variables
691+
// X and Y where X precedes Y in the list, we need to make X the
692+
// "leader" (keep X's slot and set Y's frame offset to X's) as opposed
693+
// to the other way around, since it's possible that Y is smaller in
694+
// size than X.
677695
func (cs *cstate) mergeVisitRegion(mls *MergeLocalsState, st, en int) {
678696
if cs.trace > 1 {
679697
fmt.Fprintf(os.Stderr, "=-= mergeVisitRegion(st=%d, en=%d)\n", st, en)
@@ -712,10 +730,8 @@ func (cs *cstate) mergeVisitRegion(mls *MergeLocalsState, st, en int) {
712730
for succ := nxt(leader + 1); succ != -1; succ = nxt(succ + 1) {
713731

714732
// Skip if de-selected by merge locals hash.
715-
if base.Debug.MergeLocalsHash != "" {
716-
if !base.MergeLocalsHash.MatchPosWithInfo(cands[succ].Pos(), "mergelocals", nil) {
717-
continue
718-
}
733+
if cs.hashDeselected != nil && cs.hashDeselected[cands[succ]] {
734+
continue
719735
}
720736
// Skip if already used.
721737
if used.Get(int32(succ - st)) {
@@ -1004,9 +1020,9 @@ func fmtFullPos(p src.XPos) string {
10041020
}
10051021

10061022
func dumpCand(c *ir.Name, i int) {
1007-
fmt.Fprintf(os.Stderr, " %d: %s %q sz=%d hp=%v t=%v\n",
1023+
fmt.Fprintf(os.Stderr, " %d: %s %q sz=%d hp=%v align=%d t=%v\n",
10081024
i, fmtFullPos(c.Pos()), c.Sym().Name, c.Type().Size(),
1009-
c.Type().HasPointers(), c.Type())
1025+
c.Type().HasPointers(), c.Type().Alignment(), c.Type())
10101026
}
10111027

10121028
// for unit testing only.

0 commit comments

Comments
 (0)