Skip to content

Commit 37ed2e6

Browse files
authored
[Scalarizer] Make *_with_overflow intrinsics scalarizable (#126815)
Addresses issue #126809 - Made `uadd_with_overflow`, `sadd_with_overflow`, `usub_with_overflow`, `ssub_with_overflow`, `umul_with_overflow`, and `smul_with_overflow` trivially scalarizable in `isTriviallyScalarizable()` from `VectorUtils.cpp` - Renamed and updated the test `Scalarizer/uadd_overflow.ll` to `Scalarizer/uadd_with_overflow.ll` to check that `uadd_with_overflow` gets scalarized - Added a test `Scalarizer/sincos.ll` to ensure the bug fix #113625 still works
1 parent 424fcc5 commit 37ed2e6

9 files changed

+167
-16
lines changed

llvm/lib/Analysis/VectorUtils.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,12 @@ bool llvm::isTriviallyScalarizable(Intrinsic::ID ID,
125125
// https://github.com/llvm/llvm-project/issues/112408
126126
switch (ID) {
127127
case Intrinsic::frexp:
128+
case Intrinsic::uadd_with_overflow:
129+
case Intrinsic::sadd_with_overflow:
130+
case Intrinsic::ssub_with_overflow:
131+
case Intrinsic::usub_with_overflow:
132+
case Intrinsic::umul_with_overflow:
133+
case Intrinsic::smul_with_overflow:
128134
return true;
129135
}
130136
return false;
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt %s -passes="function(scalarizer)" -S | FileCheck %s
3+
4+
define <3 x i32> @test_(<3 x i32> %a, <3 x i32> %b) {
5+
; CHECK-LABEL: define <3 x i32> @test_(
6+
; CHECK-SAME: <3 x i32> [[A:%.*]], <3 x i32> [[B:%.*]]) {
7+
; CHECK-NEXT: [[B_I0:%.*]] = extractelement <3 x i32> [[B]], i64 0
8+
; CHECK-NEXT: [[R_I0:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[B_I0]], i32 [[B_I0]])
9+
; CHECK-NEXT: [[B_I1:%.*]] = extractelement <3 x i32> [[B]], i64 1
10+
; CHECK-NEXT: [[R_I1:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[B_I1]], i32 [[B_I1]])
11+
; CHECK-NEXT: [[B_I2:%.*]] = extractelement <3 x i32> [[B]], i64 2
12+
; CHECK-NEXT: [[R_I2:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[B_I2]], i32 [[B_I2]])
13+
; CHECK-NEXT: [[EL_ELEM0:%.*]] = extractvalue { i32, i1 } [[R_I0]], 0
14+
; CHECK-NEXT: [[EL_ELEM01:%.*]] = extractvalue { i32, i1 } [[R_I1]], 0
15+
; CHECK-NEXT: [[EL_ELEM02:%.*]] = extractvalue { i32, i1 } [[R_I2]], 0
16+
; CHECK-NEXT: [[EL_UPTO0:%.*]] = insertelement <3 x i32> poison, i32 [[EL_ELEM0]], i64 0
17+
; CHECK-NEXT: [[EL_UPTO1:%.*]] = insertelement <3 x i32> [[EL_UPTO0]], i32 [[EL_ELEM01]], i64 1
18+
; CHECK-NEXT: [[EL:%.*]] = insertelement <3 x i32> [[EL_UPTO1]], i32 [[EL_ELEM02]], i64 2
19+
; CHECK-NEXT: ret <3 x i32> [[EL]]
20+
;
21+
%r = call { <3 x i32>, <3 x i1> } @llvm.sadd.with.overflow.v3i32(<3 x i32> %b, <3 x i32> %b)
22+
%el = extractvalue { <3 x i32>, <3 x i1> } %r, 0
23+
ret <3 x i32> %el
24+
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt %s -passes="function(scalarizer)" -S | FileCheck %s
3+
4+
; Test to make sure that struct return intrinsics that are not `isTriviallyScalarizable` do not get scalarized.
5+
6+
define <4 x float> @test_(<4 x float> %Val) {
7+
; CHECK-LABEL: define <4 x float> @test_(
8+
; CHECK-SAME: <4 x float> [[VAL:%.*]]) {
9+
; CHECK-NEXT: [[R:%.*]] = call { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> [[VAL]])
10+
; CHECK-NEXT: [[EL:%.*]] = extractvalue { <4 x float>, <4 x float> } [[R]], 0
11+
; CHECK-NEXT: ret <4 x float> [[EL]]
12+
;
13+
%r = call { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> %Val)
14+
%el = extractvalue { <4 x float>, <4 x float> } %r, 0
15+
ret <4 x float> %el
16+
}
17+
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt %s -passes="function(scalarizer)" -S | FileCheck %s
3+
4+
define <3 x i32> @test_(<3 x i32> %a, <3 x i32> %b) {
5+
; CHECK-LABEL: define <3 x i32> @test_(
6+
; CHECK-SAME: <3 x i32> [[A:%.*]], <3 x i32> [[B:%.*]]) {
7+
; CHECK-NEXT: [[B_I0:%.*]] = extractelement <3 x i32> [[B]], i64 0
8+
; CHECK-NEXT: [[R_I0:%.*]] = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 [[B_I0]], i32 [[B_I0]])
9+
; CHECK-NEXT: [[B_I1:%.*]] = extractelement <3 x i32> [[B]], i64 1
10+
; CHECK-NEXT: [[R_I1:%.*]] = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 [[B_I1]], i32 [[B_I1]])
11+
; CHECK-NEXT: [[B_I2:%.*]] = extractelement <3 x i32> [[B]], i64 2
12+
; CHECK-NEXT: [[R_I2:%.*]] = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 [[B_I2]], i32 [[B_I2]])
13+
; CHECK-NEXT: [[EL_ELEM0:%.*]] = extractvalue { i32, i1 } [[R_I0]], 0
14+
; CHECK-NEXT: [[EL_ELEM01:%.*]] = extractvalue { i32, i1 } [[R_I1]], 0
15+
; CHECK-NEXT: [[EL_ELEM02:%.*]] = extractvalue { i32, i1 } [[R_I2]], 0
16+
; CHECK-NEXT: [[EL_UPTO0:%.*]] = insertelement <3 x i32> poison, i32 [[EL_ELEM0]], i64 0
17+
; CHECK-NEXT: [[EL_UPTO1:%.*]] = insertelement <3 x i32> [[EL_UPTO0]], i32 [[EL_ELEM01]], i64 1
18+
; CHECK-NEXT: [[EL:%.*]] = insertelement <3 x i32> [[EL_UPTO1]], i32 [[EL_ELEM02]], i64 2
19+
; CHECK-NEXT: ret <3 x i32> [[EL]]
20+
;
21+
%r = call { <3 x i32>, <3 x i1> } @llvm.smul.with.overflow.v3i32(<3 x i32> %b, <3 x i32> %b)
22+
%el = extractvalue { <3 x i32>, <3 x i1> } %r, 0
23+
ret <3 x i32> %el
24+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt %s -passes="function(scalarizer)" -S | FileCheck %s
3+
4+
define <3 x i32> @test_(<3 x i32> %a, <3 x i32> %b) {
5+
; CHECK-LABEL: define <3 x i32> @test_(
6+
; CHECK-SAME: <3 x i32> [[A:%.*]], <3 x i32> [[B:%.*]]) {
7+
; CHECK-NEXT: [[B_I0:%.*]] = extractelement <3 x i32> [[B]], i64 0
8+
; CHECK-NEXT: [[R_I0:%.*]] = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 [[B_I0]], i32 [[B_I0]])
9+
; CHECK-NEXT: [[B_I1:%.*]] = extractelement <3 x i32> [[B]], i64 1
10+
; CHECK-NEXT: [[R_I1:%.*]] = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 [[B_I1]], i32 [[B_I1]])
11+
; CHECK-NEXT: [[B_I2:%.*]] = extractelement <3 x i32> [[B]], i64 2
12+
; CHECK-NEXT: [[R_I2:%.*]] = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 [[B_I2]], i32 [[B_I2]])
13+
; CHECK-NEXT: [[EL_ELEM0:%.*]] = extractvalue { i32, i1 } [[R_I0]], 0
14+
; CHECK-NEXT: [[EL_ELEM01:%.*]] = extractvalue { i32, i1 } [[R_I1]], 0
15+
; CHECK-NEXT: [[EL_ELEM02:%.*]] = extractvalue { i32, i1 } [[R_I2]], 0
16+
; CHECK-NEXT: [[EL_UPTO0:%.*]] = insertelement <3 x i32> poison, i32 [[EL_ELEM0]], i64 0
17+
; CHECK-NEXT: [[EL_UPTO1:%.*]] = insertelement <3 x i32> [[EL_UPTO0]], i32 [[EL_ELEM01]], i64 1
18+
; CHECK-NEXT: [[EL:%.*]] = insertelement <3 x i32> [[EL_UPTO1]], i32 [[EL_ELEM02]], i64 2
19+
; CHECK-NEXT: ret <3 x i32> [[EL]]
20+
;
21+
%r = call { <3 x i32>, <3 x i1> } @llvm.ssub.with.overflow.v3i32(<3 x i32> %b, <3 x i32> %b)
22+
%el = extractvalue { <3 x i32>, <3 x i1> } %r, 0
23+
ret <3 x i32> %el
24+
}

llvm/test/Transforms/Scalarizer/uadd_overflow.ll

Lines changed: 0 additions & 16 deletions
This file was deleted.
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt %s -passes="function(scalarizer)" -S | FileCheck %s
3+
4+
define <3 x i32> @test_(<3 x i32> %a, <3 x i32> %b) {
5+
; CHECK-LABEL: define <3 x i32> @test_(
6+
; CHECK-SAME: <3 x i32> [[A:%.*]], <3 x i32> [[B:%.*]]) {
7+
; CHECK-NEXT: [[B_I0:%.*]] = extractelement <3 x i32> [[B]], i64 0
8+
; CHECK-NEXT: [[R_I0:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[B_I0]], i32 [[B_I0]])
9+
; CHECK-NEXT: [[B_I1:%.*]] = extractelement <3 x i32> [[B]], i64 1
10+
; CHECK-NEXT: [[R_I1:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[B_I1]], i32 [[B_I1]])
11+
; CHECK-NEXT: [[B_I2:%.*]] = extractelement <3 x i32> [[B]], i64 2
12+
; CHECK-NEXT: [[R_I2:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[B_I2]], i32 [[B_I2]])
13+
; CHECK-NEXT: [[EL_ELEM0:%.*]] = extractvalue { i32, i1 } [[R_I0]], 0
14+
; CHECK-NEXT: [[EL_ELEM01:%.*]] = extractvalue { i32, i1 } [[R_I1]], 0
15+
; CHECK-NEXT: [[EL_ELEM02:%.*]] = extractvalue { i32, i1 } [[R_I2]], 0
16+
; CHECK-NEXT: [[EL_UPTO0:%.*]] = insertelement <3 x i32> poison, i32 [[EL_ELEM0]], i64 0
17+
; CHECK-NEXT: [[EL_UPTO1:%.*]] = insertelement <3 x i32> [[EL_UPTO0]], i32 [[EL_ELEM01]], i64 1
18+
; CHECK-NEXT: [[EL:%.*]] = insertelement <3 x i32> [[EL_UPTO1]], i32 [[EL_ELEM02]], i64 2
19+
; CHECK-NEXT: ret <3 x i32> [[EL]]
20+
;
21+
%r = call { <3 x i32>, <3 x i1> } @llvm.uadd.with.overflow.v3i32(<3 x i32> %b, <3 x i32> %b)
22+
%el = extractvalue { <3 x i32>, <3 x i1> } %r, 0
23+
ret <3 x i32> %el
24+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt %s -passes="function(scalarizer)" -S | FileCheck %s
3+
4+
define <3 x i32> @test_(<3 x i32> %a, <3 x i32> %b) {
5+
; CHECK-LABEL: define <3 x i32> @test_(
6+
; CHECK-SAME: <3 x i32> [[A:%.*]], <3 x i32> [[B:%.*]]) {
7+
; CHECK-NEXT: [[B_I0:%.*]] = extractelement <3 x i32> [[B]], i64 0
8+
; CHECK-NEXT: [[R_I0:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[B_I0]], i32 [[B_I0]])
9+
; CHECK-NEXT: [[B_I1:%.*]] = extractelement <3 x i32> [[B]], i64 1
10+
; CHECK-NEXT: [[R_I1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[B_I1]], i32 [[B_I1]])
11+
; CHECK-NEXT: [[B_I2:%.*]] = extractelement <3 x i32> [[B]], i64 2
12+
; CHECK-NEXT: [[R_I2:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[B_I2]], i32 [[B_I2]])
13+
; CHECK-NEXT: [[EL_ELEM0:%.*]] = extractvalue { i32, i1 } [[R_I0]], 0
14+
; CHECK-NEXT: [[EL_ELEM01:%.*]] = extractvalue { i32, i1 } [[R_I1]], 0
15+
; CHECK-NEXT: [[EL_ELEM02:%.*]] = extractvalue { i32, i1 } [[R_I2]], 0
16+
; CHECK-NEXT: [[EL_UPTO0:%.*]] = insertelement <3 x i32> poison, i32 [[EL_ELEM0]], i64 0
17+
; CHECK-NEXT: [[EL_UPTO1:%.*]] = insertelement <3 x i32> [[EL_UPTO0]], i32 [[EL_ELEM01]], i64 1
18+
; CHECK-NEXT: [[EL:%.*]] = insertelement <3 x i32> [[EL_UPTO1]], i32 [[EL_ELEM02]], i64 2
19+
; CHECK-NEXT: ret <3 x i32> [[EL]]
20+
;
21+
%r = call { <3 x i32>, <3 x i1> } @llvm.umul.with.overflow.v3i32(<3 x i32> %b, <3 x i32> %b)
22+
%el = extractvalue { <3 x i32>, <3 x i1> } %r, 0
23+
ret <3 x i32> %el
24+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt %s -passes="function(scalarizer)" -S | FileCheck %s
3+
4+
define <3 x i32> @test_(<3 x i32> %a, <3 x i32> %b) {
5+
; CHECK-LABEL: define <3 x i32> @test_(
6+
; CHECK-SAME: <3 x i32> [[A:%.*]], <3 x i32> [[B:%.*]]) {
7+
; CHECK-NEXT: [[B_I0:%.*]] = extractelement <3 x i32> [[B]], i64 0
8+
; CHECK-NEXT: [[R_I0:%.*]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 [[B_I0]], i32 [[B_I0]])
9+
; CHECK-NEXT: [[B_I1:%.*]] = extractelement <3 x i32> [[B]], i64 1
10+
; CHECK-NEXT: [[R_I1:%.*]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 [[B_I1]], i32 [[B_I1]])
11+
; CHECK-NEXT: [[B_I2:%.*]] = extractelement <3 x i32> [[B]], i64 2
12+
; CHECK-NEXT: [[R_I2:%.*]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 [[B_I2]], i32 [[B_I2]])
13+
; CHECK-NEXT: [[EL_ELEM0:%.*]] = extractvalue { i32, i1 } [[R_I0]], 0
14+
; CHECK-NEXT: [[EL_ELEM01:%.*]] = extractvalue { i32, i1 } [[R_I1]], 0
15+
; CHECK-NEXT: [[EL_ELEM02:%.*]] = extractvalue { i32, i1 } [[R_I2]], 0
16+
; CHECK-NEXT: [[EL_UPTO0:%.*]] = insertelement <3 x i32> poison, i32 [[EL_ELEM0]], i64 0
17+
; CHECK-NEXT: [[EL_UPTO1:%.*]] = insertelement <3 x i32> [[EL_UPTO0]], i32 [[EL_ELEM01]], i64 1
18+
; CHECK-NEXT: [[EL:%.*]] = insertelement <3 x i32> [[EL_UPTO1]], i32 [[EL_ELEM02]], i64 2
19+
; CHECK-NEXT: ret <3 x i32> [[EL]]
20+
;
21+
%r = call { <3 x i32>, <3 x i1> } @llvm.usub.with.overflow.v3i32(<3 x i32> %b, <3 x i32> %b)
22+
%el = extractvalue { <3 x i32>, <3 x i1> } %r, 0
23+
ret <3 x i32> %el
24+
}

0 commit comments

Comments
 (0)