8
8
"cmd/compile/internal/base"
9
9
"cmd/compile/internal/bitvec"
10
10
"cmd/compile/internal/ir"
11
- "cmd/compile/internal/reflectdata"
12
11
"cmd/compile/internal/ssa"
13
- "cmd/internal/obj"
14
12
"cmd/internal/src"
15
13
"fmt"
16
14
"os"
@@ -23,12 +21,14 @@ import (
23
21
// (stack-allocated) variables within a function can be safely
24
22
// merged/overlapped, e.g. share a stack slot with some other auto).
25
23
// An instance of MergeLocalsState is produced by MergeLocals() below
26
- // and then consumed in ssagen.AllocFrame. The map 'partition' contains
27
- // entries of the form <N,SL> where N is an *ir.Name and SL is a slice
28
- // holding the indices (within 'vars') of other variables that share the
29
- // same slot. For example, if a function contains five variables where
30
- // v1/v2/v3 are safe to overlap and v4/v5 are safe to overlap, the
31
- // MergeLocalsState content might look like
24
+ // and then consumed in ssagen.AllocFrame. The map 'partition'
25
+ // contains entries of the form <N,SL> where N is an *ir.Name and SL
26
+ // is a slice holding the indices (within 'vars') of other variables
27
+ // that share the same slot, specifically the slot of the first
28
+ // element in the partition, which we'll call the "leader". For
29
+ // example, if a function contains five variables where v1/v2/v3 are
30
+ // safe to overlap and v4/v5 are safe to overlap, the MergeLocalsState
31
+ // content might look like
32
32
//
33
33
// vars: [v1, v2, v3, v4, v5]
34
34
// partition: v1 -> [1, 0, 2], v2 -> [1, 0, 2], v3 -> [1, 0, 2]
@@ -49,6 +49,22 @@ type candRegion struct {
49
49
st , en int
50
50
}
51
51
52
+ // cstate holds state information we'll need during the analysis
53
+ // phase of stack slot merging but can be discarded when the analysis
54
+ // is done.
55
+ type cstate struct {
56
+ fn * ir.Func
57
+ f * ssa.Func
58
+ lv * liveness
59
+ cands []* ir.Name
60
+ nameToSlot map [* ir.Name ]int32
61
+ regions []candRegion
62
+ indirectUE map [ssa.ID ][]* ir.Name
63
+ ivs []Intervals
64
+ hashDeselected map [* ir.Name ]bool
65
+ trace int // debug trace level
66
+ }
67
+
52
68
// MergeLocals analyzes the specified ssa function f to determine which
53
69
// of its auto variables can safely share the same stack slot, returning
54
70
// a state object that describes how the overlap should be done.
@@ -223,6 +239,19 @@ func (mls *MergeLocalsState) check() error {
223
239
if ! foundk {
224
240
return fmt .Errorf ("k=%s v=+%v slice value missing k" , k .Sym ().Name , sl )
225
241
}
242
+ vl := mls .vars [sl [0 ]]
243
+ for _ , v := range sl [1 :] {
244
+ vv := mls .vars [v ]
245
+ if vv .Type ().Size () > vl .Type ().Size () {
246
+ return fmt .Errorf ("k=%s v=+%v follower %s size %d larger than leader %s size %d" , k .Sym ().Name , sl , vv .Sym ().Name , vv .Type ().Size (), vl .Sym ().Name , vl .Type ().Size ())
247
+ }
248
+ if vv .Type ().HasPointers () && ! vl .Type ().HasPointers () {
249
+ return fmt .Errorf ("k=%s v=+%v follower %s hasptr=true but leader %s hasptr=false" , k .Sym ().Name , sl , vv .Sym ().Name , vl .Sym ().Name )
250
+ }
251
+ if vv .Type ().Alignment () > vl .Type ().Alignment () {
252
+ return fmt .Errorf ("k=%s v=+%v follower %s align %d greater than leader %s align %d" , k .Sym ().Name , sl , vv .Sym ().Name , vv .Type ().Alignment (), vl .Sym ().Name , vl .Type ().Alignment ())
253
+ }
254
+ }
226
255
}
227
256
for i := range used {
228
257
if ! used [i ] {
@@ -296,14 +325,13 @@ func (cs *cstate) collectMergeCandidates() {
296
325
297
326
// Now generate an initial pruned candidate list and regions list.
298
327
// This may be empty if we don't have enough compatible candidates.
299
- initial , _ := genRegions (cands )
328
+ initial , _ := cs . genRegions (cands )
300
329
if len (initial ) < 2 {
301
330
return
302
331
}
303
332
304
- // When bisecting it can be handy to see debug trace output for
305
- // only those functions that hashdebug selects; set this up here.
306
- cs .setupHashTrace (initial )
333
+ // Set up for hash bisection if enabled.
334
+ cs .setupHashBisection (initial )
307
335
308
336
// Create and populate an indirect use table that we'll use
309
337
// during interval construction. As part of this process we may
@@ -330,7 +358,9 @@ func (cs *cstate) collectMergeCandidates() {
330
358
}
331
359
}
332
360
333
- func genRegions (cands []* ir.Name ) ([]* ir.Name , []candRegion ) {
361
+ // genRegions generates a set of regions within cands corresponding
362
+ // to potentially overlappable/mergeable variables.
363
+ func (cs * cstate ) genRegions (cands []* ir.Name ) ([]* ir.Name , []candRegion ) {
334
364
var pruned []* ir.Name
335
365
var regions []candRegion
336
366
st := 0
@@ -346,8 +376,8 @@ func genRegions(cands []*ir.Name) ([]*ir.Name, []candRegion) {
346
376
}
347
377
pst := len (pruned )
348
378
pen := pst + (en - st )
349
- if base . Debug . MergeLocalsTrace > 1 {
350
- fmt .Fprintf (os .Stderr , "=-= add part %d -> %d\n " , pst , pen )
379
+ if cs . trace > 1 {
380
+ fmt .Fprintf (os .Stderr , "=-= addregion st=%d en=%d: add part %d -> %d\n " , st , en , pst , pen )
351
381
}
352
382
353
383
// non-empty region, add to pruned
@@ -385,27 +415,29 @@ func (cs *cstate) dumpFuncIfSelected() {
385
415
cs .dumpFunc ()
386
416
}
387
417
388
- func (cs * cstate ) setupHashTrace (cands []* ir.Name ) {
389
- if base .Debug .MergeLocalsHTrace == 0 || base .Debug .MergeLocalsHash == "" {
418
+ // setupHashBisection checks to see if any of the candidate
419
+ // variables have been de-selected by our hash debug. Here
420
+ // we also implement the -d=mergelocalshtrace flag, which turns
421
+ // on debug tracing only if we have at least two candidates
422
+ // selected by the hash debug for this function.
423
+ func (cs * cstate ) setupHashBisection (cands []* ir.Name ) {
424
+ if base .Debug .MergeLocalsHash == "" {
390
425
return
391
426
}
392
-
393
- // With this trace variant, check to see whether any of the
394
- // candidates are selected-- if yes then enable tracing. Hack:
395
- // create a new hashdebug with verbosity turned off and use that
396
- // to test, so as not to confuse bisect.
397
- modified := strings .ReplaceAll (base .Debug .MergeLocalsHash , "v" , "q" )
398
- quiethd := base .NewHashDebug ("qmergelocals" , modified , nil )
399
- found := false
427
+ deselected := make (map [* ir.Name ]bool )
428
+ selCount := 0
400
429
for _ , cand := range cands {
401
- if ! quiethd . MatchPosWithInfo (cand .Pos (), "quiet " , nil ) {
402
- found = true
403
- fmt . Fprintf ( os . Stderr , "=-= MergeLocalsHTrace fn=%v n=%s match \n " ,
404
- cs . fn , cand . Sym (). Name )
405
- break
430
+ if ! base . MergeLocalsHash . MatchPosWithInfo (cand .Pos (), "mergelocals " , nil ) {
431
+ deselected [ cand ] = true
432
+ } else {
433
+ deselected [ cand ] = false
434
+ selCount ++
406
435
}
407
436
}
408
- if found {
437
+ if selCount < len (cands ) {
438
+ cs .hashDeselected = deselected
439
+ }
440
+ if base .Debug .MergeLocalsHTrace != 0 && selCount >= 2 {
409
441
cs .trace = base .Debug .MergeLocalsHTrace
410
442
}
411
443
}
@@ -566,7 +598,7 @@ func (cs *cstate) populateIndirectUseTable(cands []*ir.Name) ([]*ir.Name, []cand
566
598
return nameLess (pruned [i ], pruned [j ])
567
599
})
568
600
var regions []candRegion
569
- pruned , regions = genRegions (pruned )
601
+ pruned , regions = cs . genRegions (pruned )
570
602
if len (pruned ) < 2 {
571
603
return nil , nil
572
604
}
@@ -586,29 +618,30 @@ type nameCount struct {
586
618
count int32
587
619
}
588
620
589
- // nameLess compares ci with cj to see if ci should be less than cj
590
- // in a relative ordering of candidate variables. This is used to
591
- // sort vars by size, pointerness, and GC shape.
621
+ // nameLess compares ci with cj to see if ci should be less than cj in
622
+ // a relative ordering of candidate variables. This is used to sort
623
+ // vars by pointerness (variables with pointers first), then in order
624
+ // of decreasing alignment, then by decreasing size. We are assuming a
625
+ // merging algorithm that merges later entries in the list into
626
+ // earlier entries. An example ordered candidate list produced by
627
+ // nameLess:
628
+ //
629
+ // idx name type align size
630
+ // 0: abc [10]*int 8 80
631
+ // 1: xyz [9]*int 8 72
632
+ // 2: qrs [2]*int 8 16
633
+ // 3: tuv [9]int 8 72
634
+ // 4: wxy [9]int32 4 36
635
+ // 5: jkl [8]int32 4 32
592
636
func nameLess (ci , cj * ir.Name ) bool {
593
- ihp , jhp := 0 , 0
594
- var ilsym , jlsym * obj.LSym
595
- if ci .Type ().HasPointers () {
596
- ihp = 1
597
- ilsym , _ , _ = reflectdata .GCSym (ci .Type ())
637
+ if ci .Type ().HasPointers () != cj .Type ().HasPointers () {
638
+ return ci .Type ().HasPointers ()
598
639
}
599
- if cj .Type ().HasPointers () {
600
- jhp = 1
601
- jlsym , _ , _ = reflectdata .GCSym (cj .Type ())
602
- }
603
- if ihp != jhp {
604
- return ihp < jhp
640
+ if ci .Type ().Alignment () != cj .Type ().Alignment () {
641
+ return cj .Type ().Alignment () < ci .Type ().Alignment ()
605
642
}
606
643
if ci .Type ().Size () != cj .Type ().Size () {
607
- return ci .Type ().Size () < cj .Type ().Size ()
608
- }
609
- if ihp != 0 && jhp != 0 && ilsym != jlsym {
610
- // FIXME: find less clunky way to do this
611
- return fmt .Sprintf ("%v" , ilsym ) < fmt .Sprintf ("%v" , jlsym )
644
+ return cj .Type ().Size () < ci .Type ().Size ()
612
645
}
613
646
if ci .Sym ().Name != cj .Sym ().Name {
614
647
return ci .Sym ().Name < cj .Sym ().Name
@@ -617,63 +650,48 @@ func nameLess(ci, cj *ir.Name) bool {
617
650
}
618
651
619
652
// nextRegion starts at location idx and walks forward in the cands
620
- // slice looking for variables that are "compatible" (overlappable)
621
- // with the variable at position idx; it returns the end of the new
622
- // region (range of compatible variables starting at idx).
653
+ // slice looking for variables that are "compatible" (potentially
654
+ // overlappable, in the sense that they could potentially share the
655
+ // stack slot of cands[idx]); it returns the end of the new region
656
+ // (range of compatible variables starting at idx).
623
657
func nextRegion (cands []* ir.Name , idx int ) int {
624
658
n := len (cands )
625
659
if idx >= n {
626
660
return - 1
627
661
}
628
662
c0 := cands [idx ]
629
- hp0 := c0 .Type ().HasPointers ()
663
+ szprev := c0 .Type ().Size ()
664
+ alnprev := c0 .Type ().Alignment ()
630
665
for j := idx + 1 ; j < n ; j ++ {
631
666
cj := cands [j ]
632
- hpj := cj .Type ().HasPointers ()
633
- ok := true
634
- if hp0 {
635
- if ! hpj || c0 .Type ().Size () != cj .Type ().Size () {
636
- return j - 1
637
- }
638
- // GC shape must match if both types have pointers.
639
- gcsym0 , _ , _ := reflectdata .GCSym (c0 .Type ())
640
- gcsymj , _ , _ := reflectdata .GCSym (cj .Type ())
641
- if gcsym0 != gcsymj {
642
- return j - 1
643
- }
644
- } else {
645
- // If no pointers, match size only.
646
- if ! ok || hp0 != hpj || c0 .Type ().Size () != cj .Type ().Size () {
647
- return j - 1
648
- }
667
+ szj := cj .Type ().Size ()
668
+ if szj > szprev {
669
+ return j - 1
649
670
}
671
+ alnj := cj .Type ().Alignment ()
672
+ if alnj > alnprev {
673
+ return j - 1
674
+ }
675
+ szprev = szj
676
+ alnprev = alnj
650
677
}
651
678
return n - 1
652
679
}
653
680
654
- // cstate holds state information we'll need during the analysis
655
- // phase of stack slot merging but can be discarded when the analysis
656
- // is done.
657
- type cstate struct {
658
- fn * ir.Func
659
- f * ssa.Func
660
- lv * liveness
661
- cands []* ir.Name
662
- nameToSlot map [* ir.Name ]int32
663
- regions []candRegion
664
- indirectUE map [ssa.ID ][]* ir.Name
665
- ivs []Intervals
666
- trace int // debug trace level
667
- }
668
-
669
681
// mergeVisitRegion tries to perform overlapping of variables with a
670
682
// given subrange of cands described by st and en (indices into our
671
683
// candidate var list), where the variables within this range have
672
684
// already been determined to be compatible with respect to type,
673
685
// size, etc. Overlapping is done in a a greedy fashion: we select the
674
686
// first element in the st->en range, then walk the rest of the
675
687
// elements adding in vars whose lifetimes don't overlap with the
676
- // first element, then repeat the process until we run out of work to do.
688
+ // first element, then repeat the process until we run out of work.
689
+ // Ordering of the candidates within the region [st,en] is important;
690
+ // within the list the assumption is that if we overlap two variables
691
+ // X and Y where X precedes Y in the list, we need to make X the
692
+ // "leader" (keep X's slot and set Y's frame offset to X's) as opposed
693
+ // to the other way around, since it's possible that Y is smaller in
694
+ // size than X.
677
695
func (cs * cstate ) mergeVisitRegion (mls * MergeLocalsState , st , en int ) {
678
696
if cs .trace > 1 {
679
697
fmt .Fprintf (os .Stderr , "=-= mergeVisitRegion(st=%d, en=%d)\n " , st , en )
@@ -712,10 +730,8 @@ func (cs *cstate) mergeVisitRegion(mls *MergeLocalsState, st, en int) {
712
730
for succ := nxt (leader + 1 ); succ != - 1 ; succ = nxt (succ + 1 ) {
713
731
714
732
// Skip if de-selected by merge locals hash.
715
- if base .Debug .MergeLocalsHash != "" {
716
- if ! base .MergeLocalsHash .MatchPosWithInfo (cands [succ ].Pos (), "mergelocals" , nil ) {
717
- continue
718
- }
733
+ if cs .hashDeselected != nil && cs.hashDeselected [cands [succ ]] {
734
+ continue
719
735
}
720
736
// Skip if already used.
721
737
if used .Get (int32 (succ - st )) {
@@ -1004,9 +1020,9 @@ func fmtFullPos(p src.XPos) string {
1004
1020
}
1005
1021
1006
1022
func dumpCand (c * ir.Name , i int ) {
1007
- fmt .Fprintf (os .Stderr , " %d: %s %q sz=%d hp=%v t=%v\n " ,
1023
+ fmt .Fprintf (os .Stderr , " %d: %s %q sz=%d hp=%v align=%d t=%v\n " ,
1008
1024
i , fmtFullPos (c .Pos ()), c .Sym ().Name , c .Type ().Size (),
1009
- c .Type ().HasPointers (), c .Type ())
1025
+ c .Type ().HasPointers (), c .Type (). Alignment (), c . Type () )
1010
1026
}
1011
1027
1012
1028
// for unit testing only.
0 commit comments