Skip to content

Commit 2df9ed1

Browse files
committed
[LoopVectorize] Pre-commit tests for D157631
Differential Revision: https://reviews.llvm.org/D157630
1 parent 6697afe commit 2df9ed1

File tree

1 file changed

+121
-0
lines changed

1 file changed

+121
-0
lines changed

llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -556,3 +556,124 @@ exit: ; preds = %for.body
556556
%add.lcssa = phi i32 [ %add, %for.body ]
557557
ret i32 %add.lcssa
558558
}
559+
560+
; Make sure that if there are several reductions in the loop, the order of invariant stores sank outside of the loop is preserved
561+
; FIXME: This tests currently shows incorrect behavior and it will fixed in the following patch
562+
; See https://github.com/llvm/llvm-project/issues/64047
563+
define void @reduc_add_mul_store_same_ptr(ptr %dst, ptr readonly %src) {
564+
; CHECK-LABEL: define void @reduc_add_mul_store_same_ptr
565+
; CHECK: middle.block:
566+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP1:%.*]])
567+
; CHECK-NEXT: store i32 [[TMP2]], ptr %dst, align 4
568+
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3:%.*]])
569+
; CHECK-NEXT: store i32 [[TMP4]], ptr %dst, align 4
570+
;
571+
entry:
572+
br label %for.body
573+
574+
for.body:
575+
%sum = phi i32 [ 0, %entry ], [ %sum.next, %for.body ]
576+
%mul = phi i32 [ 1, %entry ], [ %mul.next, %for.body ]
577+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
578+
%gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
579+
%0 = load i32, ptr %gep.src, align 4
580+
%sum.next = add nsw i32 %sum, %0
581+
store i32 %sum.next, ptr %dst, align 4
582+
%mul.next = mul nsw i32 %mul, %0
583+
store i32 %mul.next, ptr %dst, align 4
584+
%iv.next = add nuw nsw i64 %iv, 1
585+
%exitcond = icmp eq i64 %iv.next, 1000
586+
br i1 %exitcond, label %exit, label %for.body
587+
588+
exit:
589+
ret void
590+
}
591+
592+
define void @reduc_mul_add_store_same_ptr(ptr %dst, ptr readonly %src) {
593+
; CHECK-LABEL: define void @reduc_mul_add_store_same_ptr
594+
; CHECK: middle.block:
595+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP1:%.*]])
596+
; CHECK-NEXT: store i32 [[TMP2]], ptr %dst, align 4
597+
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3:%.*]])
598+
; CHECK-NEXT: store i32 [[TMP4]], ptr %dst, align 4
599+
;
600+
entry:
601+
br label %for.body
602+
603+
for.body:
604+
%sum = phi i32 [ 0, %entry ], [ %sum.next, %for.body ]
605+
%mul = phi i32 [ 1, %entry ], [ %mul.next, %for.body ]
606+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
607+
%gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
608+
%0 = load i32, ptr %gep.src, align 4
609+
%mul.next = mul nsw i32 %mul, %0
610+
store i32 %mul.next, ptr %dst, align 4
611+
%sum.next = add nsw i32 %sum, %0
612+
store i32 %sum.next, ptr %dst, align 4
613+
%iv.next = add nuw nsw i64 %iv, 1
614+
%exitcond = icmp eq i64 %iv.next, 1000
615+
br i1 %exitcond, label %exit, label %for.body
616+
617+
exit:
618+
ret void
619+
}
620+
621+
; Same as above but storing is done to two different pointers and they can be aliased
622+
; FIXME: This tests currently shows incorrect behavior and it will fixed in the following patch
623+
define void @reduc_add_mul_store_different_ptr(ptr %dst1, ptr %dst2, ptr readonly %src) {
624+
; CHECK-LABEL: define void @reduc_add_mul_store_different_ptr
625+
; CHECK: middle.block:
626+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP1:%.*]])
627+
; CHECK-NEXT: store i32 [[TMP2]], ptr %dst2, align 4
628+
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3:%.*]])
629+
; CHECK-NEXT: store i32 [[TMP4]], ptr %dst1, align 4
630+
;
631+
entry:
632+
br label %for.body
633+
634+
for.body:
635+
%sum = phi i32 [ 0, %entry ], [ %sum.next, %for.body ]
636+
%mul = phi i32 [ 1, %entry ], [ %mul.next, %for.body ]
637+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
638+
%gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
639+
%0 = load i32, ptr %gep.src, align 4
640+
%sum.next = add nsw i32 %sum, %0
641+
store i32 %sum.next, ptr %dst1, align 4
642+
%mul.next = mul nsw i32 %mul, %0
643+
store i32 %mul.next, ptr %dst2, align 4
644+
%iv.next = add nuw nsw i64 %iv, 1
645+
%exitcond = icmp eq i64 %iv.next, 1000
646+
br i1 %exitcond, label %exit, label %for.body
647+
648+
exit:
649+
ret void
650+
}
651+
652+
define void @reduc_mul_add_store_different_ptr(ptr %dst1, ptr %dst2, ptr readonly %src) {
653+
; CHECK-LABEL: define void @reduc_mul_add_store_different_ptr
654+
; CHECK: middle.block:
655+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP1:%.*]])
656+
; CHECK-NEXT: store i32 [[TMP2]], ptr %dst1, align 4
657+
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3:%.*]])
658+
; CHECK-NEXT: store i32 [[TMP4]], ptr %dst2, align 4
659+
;
660+
entry:
661+
br label %for.body
662+
663+
for.body:
664+
%sum = phi i32 [ 0, %entry ], [ %sum.next, %for.body ]
665+
%mul = phi i32 [ 1, %entry ], [ %mul.next, %for.body ]
666+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
667+
%gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
668+
%0 = load i32, ptr %gep.src, align 4
669+
%mul.next = mul nsw i32 %mul, %0
670+
store i32 %mul.next, ptr %dst1, align 4
671+
%sum.next = add nsw i32 %sum, %0
672+
store i32 %sum.next, ptr %dst2, align 4
673+
%iv.next = add nuw nsw i64 %iv, 1
674+
%exitcond = icmp eq i64 %iv.next, 1000
675+
br i1 %exitcond, label %exit, label %for.body
676+
677+
exit:
678+
ret void
679+
}

0 commit comments

Comments
 (0)