Skip to content

Commit 7d2123e

Browse files
committed
[SimplifyCFG] Improve range reducing for switches
1 parent d7afafd commit 7d2123e

File tree

2 files changed

+210
-3
lines changed

2 files changed

+210
-3
lines changed

llvm/lib/Transforms/Utils/SimplifyCFG.cpp

Lines changed: 71 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@
8282
#include <cstddef>
8383
#include <cstdint>
8484
#include <iterator>
85+
#include <limits>
8586
#include <map>
8687
#include <optional>
8788
#include <set>
@@ -7158,6 +7159,71 @@ static bool switchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
71587159
return true;
71597160
}
71607161

7162+
/// Try to reduce the range of cases with an unreachable default.
7163+
static bool
7164+
ReduceSwitchRangeWithUnreachableDefault(SwitchInst *SI,
7165+
const SmallVectorImpl<int64_t> &Values,
7166+
uint64_t Base, IRBuilder<> &Builder) {
7167+
bool HasDefault =
7168+
!isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
7169+
if (HasDefault)
7170+
return false;
7171+
7172+
// Try reducing the range to (idx + offset) & mask
7173+
// Mask out common high bits
7174+
uint64_t CommonOnes = std::numeric_limits<uint64_t>::max();
7175+
uint64_t CommonZeros = std::numeric_limits<uint64_t>::max();
7176+
for (auto &V : Values) {
7177+
CommonOnes &= (uint64_t)V;
7178+
CommonZeros &= ~(uint64_t)V;
7179+
}
7180+
uint64_t CommonBits = countl_one(CommonOnes | CommonZeros);
7181+
unsigned LowBits = 64 - CommonBits;
7182+
uint64_t Mask = (1ULL << LowBits) - 1;
7183+
if (Mask == std::numeric_limits<uint64_t>::max())
7184+
return false;
7185+
// Now we have some case values in the additive group Z/(2**k)Z.
7186+
// Find the largest hole in the group and move it to back.
7187+
uint64_t MaxHole = 0;
7188+
uint64_t BestOffset = 0;
7189+
for (unsigned I = 0; I < Values.size(); ++I) {
7190+
uint64_t Hole = ((uint64_t)Values[I] -
7191+
(uint64_t)(I == 0 ? Values.back() : Values[I - 1])) &
7192+
Mask;
7193+
if (Hole > MaxHole) {
7194+
MaxHole = Hole;
7195+
BestOffset = Mask - (uint64_t)Values[I] + 1;
7196+
}
7197+
}
7198+
7199+
SmallVector<int64_t, 4> NewValues;
7200+
for (auto &V : Values)
7201+
NewValues.push_back(
7202+
(((int64_t)(((uint64_t)V + BestOffset) & Mask)) << CommonBits) >>
7203+
CommonBits);
7204+
7205+
llvm::sort(NewValues);
7206+
if (!isSwitchDense(NewValues))
7207+
// Transform didn't create a dense switch.
7208+
return false;
7209+
7210+
auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
7211+
APInt Offset(Ty->getBitWidth(), BestOffset - Base);
7212+
auto *Index = Builder.CreateAnd(
7213+
Builder.CreateAdd(SI->getCondition(), ConstantInt::get(Ty, Offset)),
7214+
Mask);
7215+
SI->replaceUsesOfWith(SI->getCondition(), Index);
7216+
7217+
for (auto Case : SI->cases()) {
7218+
auto *Orig = Case.getCaseValue();
7219+
auto CaseVal =
7220+
(Orig->getValue() + Offset).trunc(LowBits).sext(Ty->getBitWidth());
7221+
Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, CaseVal)));
7222+
}
7223+
7224+
return true;
7225+
}
7226+
71617227
/// Try to transform a switch that has "holes" in it to a contiguous sequence
71627228
/// of cases.
71637229
///
@@ -7173,9 +7239,8 @@ static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
71737239
if (CondTy->getIntegerBitWidth() > 64 ||
71747240
!DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
71757241
return false;
7176-
// Only bother with this optimization if there are more than 3 switch cases;
7177-
// SDAG will only bother creating jump tables for 4 or more cases.
7178-
if (SI->getNumCases() < 4)
7242+
// Ignore switches with less than three cases.
7243+
if (SI->getNumCases() < 3)
71797244
return false;
71807245

71817246
// This transform is agnostic to the signedness of the input or case values. We
@@ -7196,6 +7261,9 @@ static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
71967261
for (auto &V : Values)
71977262
V -= (uint64_t)(Base);
71987263

7264+
if (ReduceSwitchRangeWithUnreachableDefault(SI, Values, Base, Builder))
7265+
return true;
7266+
71997267
// Now we have signed numbers that have been shifted so that, given enough
72007268
// precision, there are no negative values. Since the rest of the transform
72017269
// is bitwise only, we switch now to an unsigned representation.

llvm/test/Transforms/SimplifyCFG/rangereduce.ll

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,3 +305,142 @@ three:
305305
ret i32 99783
306306
}
307307

308+
define i8 @pr67842(i32 %0) {
309+
; CHECK-LABEL: @pr67842(
310+
; CHECK-NEXT: start:
311+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0:%.*]], 1
312+
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 255
313+
; CHECK-NEXT: [[SWITCH_IDX_CAST:%.*]] = trunc i32 [[TMP2]] to i8
314+
; CHECK-NEXT: [[SWITCH_OFFSET:%.*]] = add nsw i8 [[SWITCH_IDX_CAST]], -1
315+
; CHECK-NEXT: ret i8 [[SWITCH_OFFSET]]
316+
;
317+
start:
318+
switch i32 %0, label %bb2 [
319+
i32 0, label %bb5
320+
i32 1, label %bb4
321+
i32 255, label %bb1
322+
]
323+
324+
bb2: ; preds = %start
325+
unreachable
326+
327+
bb4: ; preds = %start
328+
br label %bb5
329+
330+
bb1: ; preds = %start
331+
br label %bb5
332+
333+
bb5: ; preds = %start, %bb1, %bb4
334+
%.0 = phi i8 [ -1, %bb1 ], [ 1, %bb4 ], [ 0, %start ]
335+
ret i8 %.0
336+
}
337+
338+
define i8 @reduce_masked_common_high_bits(i32 %0) {
339+
; CHECK-LABEL: @reduce_masked_common_high_bits(
340+
; CHECK-NEXT: start:
341+
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0:%.*]], -127
342+
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 127
343+
; CHECK-NEXT: [[SWITCH_IDX_CAST:%.*]] = trunc i32 [[TMP2]] to i8
344+
; CHECK-NEXT: [[SWITCH_OFFSET:%.*]] = add nsw i8 [[SWITCH_IDX_CAST]], -1
345+
; CHECK-NEXT: ret i8 [[SWITCH_OFFSET]]
346+
;
347+
start:
348+
switch i32 %0, label %bb2 [
349+
i32 128, label %bb5
350+
i32 129, label %bb4
351+
i32 255, label %bb1
352+
]
353+
354+
bb2: ; preds = %start
355+
unreachable
356+
357+
bb4: ; preds = %start
358+
br label %bb5
359+
360+
bb1: ; preds = %start
361+
br label %bb5
362+
363+
bb5: ; preds = %start, %bb1, %bb4
364+
%.0 = phi i8 [ -1, %bb1 ], [ 1, %bb4 ], [ 0, %start ]
365+
ret i8 %.0
366+
}
367+
368+
define i8 @reduce_masked_common_high_bits_fail(i32 %0) {
369+
; CHECK-LABEL: @reduce_masked_common_high_bits_fail(
370+
; CHECK-NEXT: start:
371+
; CHECK-NEXT: switch i32 [[TMP0:%.*]], label [[BB2:%.*]] [
372+
; CHECK-NEXT: i32 128, label [[BB5:%.*]]
373+
; CHECK-NEXT: i32 129, label [[BB4:%.*]]
374+
; CHECK-NEXT: i32 511, label [[BB1:%.*]]
375+
; CHECK-NEXT: ]
376+
; CHECK: bb2:
377+
; CHECK-NEXT: unreachable
378+
; CHECK: bb4:
379+
; CHECK-NEXT: br label [[BB5]]
380+
; CHECK: bb1:
381+
; CHECK-NEXT: br label [[BB5]]
382+
; CHECK: bb5:
383+
; CHECK-NEXT: [[DOT0:%.*]] = phi i8 [ -1, [[BB1]] ], [ 1, [[BB4]] ], [ 0, [[START:%.*]] ]
384+
; CHECK-NEXT: ret i8 [[DOT0]]
385+
;
386+
start:
387+
switch i32 %0, label %bb2 [
388+
i32 128, label %bb5
389+
i32 129, label %bb4
390+
i32 511, label %bb1
391+
]
392+
393+
bb2: ; preds = %start
394+
unreachable
395+
396+
bb4: ; preds = %start
397+
br label %bb5
398+
399+
bb1: ; preds = %start
400+
br label %bb5
401+
402+
bb5: ; preds = %start, %bb1, %bb4
403+
%.0 = phi i8 [ -1, %bb1 ], [ 1, %bb4 ], [ 0, %start ]
404+
ret i8 %.0
405+
}
406+
407+
; Optimization shouldn't trigger; The default block is reachable.
408+
define i8 @reduce_masked_default_reachable(i32 %0) {
409+
; CHECK-LABEL: @reduce_masked_default_reachable(
410+
; CHECK-NEXT: start:
411+
; CHECK-NEXT: switch i32 [[TMP0:%.*]], label [[COMMON_RET:%.*]] [
412+
; CHECK-NEXT: i32 0, label [[BB5:%.*]]
413+
; CHECK-NEXT: i32 1, label [[BB4:%.*]]
414+
; CHECK-NEXT: i32 255, label [[BB1:%.*]]
415+
; CHECK-NEXT: ]
416+
; CHECK: common.ret:
417+
; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i8 [ [[DOT0:%.*]], [[BB5]] ], [ 24, [[START:%.*]] ]
418+
; CHECK-NEXT: ret i8 [[COMMON_RET_OP]]
419+
; CHECK: bb4:
420+
; CHECK-NEXT: br label [[BB5]]
421+
; CHECK: bb1:
422+
; CHECK-NEXT: br label [[BB5]]
423+
; CHECK: bb5:
424+
; CHECK-NEXT: [[DOT0]] = phi i8 [ -1, [[BB1]] ], [ 1, [[BB4]] ], [ 0, [[START]] ]
425+
; CHECK-NEXT: br label [[COMMON_RET]]
426+
;
427+
start:
428+
switch i32 %0, label %bb2 [
429+
i32 0, label %bb5
430+
i32 1, label %bb4
431+
i32 255, label %bb1
432+
]
433+
434+
bb2: ; preds = %start
435+
ret i8 24
436+
437+
bb4: ; preds = %start
438+
br label %bb5
439+
440+
bb1: ; preds = %start
441+
br label %bb5
442+
443+
bb5: ; preds = %start, %bb1, %bb4
444+
%.0 = phi i8 [ -1, %bb1 ], [ 1, %bb4 ], [ 0, %start ]
445+
ret i8 %.0
446+
}

0 commit comments

Comments
 (0)