Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit 95185c8

Browse files
authored
Merge pull request #132 from nikic/cherry-picks
Backports
2 parents a784eca + 85ec369 commit 95185c8

File tree

4 files changed

+101
-6
lines changed

4 files changed

+101
-6
lines changed

lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -586,7 +586,7 @@ void X86AvoidSFBPass::breakBlockedCopies(
586586
StDisp2 += OverlapDelta;
587587
Size2 -= OverlapDelta;
588588
}
589-
Size1 = std::abs(std::abs(LdDisp2) - std::abs(LdDisp1));
589+
Size1 = LdDisp2 - LdDisp1;
590590

591591
// Build a copy for the point until the current blocking store's
592592
// displacement.

lib/Transforms/InstCombine/InstCombineCompares.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -522,11 +522,9 @@ static Value *evaluateGEPOffsetExpression(User *GEP, InstCombiner &IC,
522522
}
523523

524524
// Otherwise, there is an index. The computation we will do will be modulo
525-
// the pointer size, so get it.
526-
uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
527-
528-
Offset &= PtrSizeMask;
529-
VariableScale &= PtrSizeMask;
525+
// the pointer size.
526+
Offset = SignExtend64(Offset, IntPtrWidth);
527+
VariableScale = SignExtend64(VariableScale, IntPtrWidth);
530528

531529
// To do this transformation, any constant index must be a multiple of the
532530
// variable scale factor. For example, we can evaluate "12 + 4*i" as "3 + i",

test/CodeGen/X86/pr39926.ll

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx | FileCheck %s
3+
define i8 @test_offset(i8* %base) {
4+
; CHECK-LABEL: test_offset:
5+
; CHECK: # %bb.0: # %entry
6+
; CHECK-NEXT: pushq %rax
7+
; CHECK-NEXT: .cfi_def_cfa_offset 16
8+
; CHECK-NEXT: movb $0, 7(%rdi)
9+
; CHECK-NEXT: movw $0, 5(%rdi)
10+
; CHECK-NEXT: movl $0, 1(%rdi)
11+
; CHECK-NEXT: movl -4(%rdi), %eax
12+
; CHECK-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
13+
; CHECK-NEXT: movb (%rdi), %al
14+
; CHECK-NEXT: movb %al, -{{[0-9]+}}(%rsp)
15+
; CHECK-NEXT: movl 1(%rdi), %eax
16+
; CHECK-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
17+
; CHECK-NEXT: movzwl 5(%rdi), %eax
18+
; CHECK-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
19+
; CHECK-NEXT: movb 7(%rdi), %al
20+
; CHECK-NEXT: movb %al, -{{[0-9]+}}(%rsp)
21+
; CHECK-NEXT: movl 8(%rdi), %eax
22+
; CHECK-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
23+
; CHECK-NEXT: movb -{{[0-9]+}}(%rsp), %al
24+
; CHECK-NEXT: popq %rcx
25+
; CHECK-NEXT: .cfi_def_cfa_offset 8
26+
; CHECK-NEXT: retq
27+
entry:
28+
%z = alloca [128 x i8], align 16
29+
%gep0 = getelementptr inbounds i8, i8* %base, i64 7
30+
store volatile i8 0, i8* %gep0
31+
%gep1 = getelementptr inbounds i8, i8* %base, i64 5
32+
%bc1 = bitcast i8* %gep1 to i16*
33+
store volatile i16 0, i16* %bc1
34+
%gep2 = getelementptr inbounds i8, i8* %base, i64 1
35+
%bc2 = bitcast i8* %gep2 to i32*
36+
store volatile i32 0, i32* %bc2
37+
38+
%y1 = getelementptr inbounds i8, i8* %base, i64 -4
39+
%y2 = bitcast [128 x i8]* %z to i8*
40+
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %y2, i8* %y1, i64 16, i1 false)
41+
42+
%gep4 = getelementptr inbounds [128 x i8], [128 x i8]* %z, i64 0, i64 4
43+
%ret = load i8, i8* %gep4
44+
ret i8 %ret
45+
}
46+
47+
; Function Attrs: argmemonly nounwind
48+
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt < %s -instcombine -S | FileCheck %s
3+
4+
target datalayout = "p:32:32"
5+
6+
%S = type { [2 x i32] }
7+
8+
define i1 @test([0 x %S]* %p, i32 %n) {
9+
; CHECK-LABEL: @test(
10+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[N:%.*]], 1
11+
; CHECK-NEXT: ret i1 [[CMP]]
12+
;
13+
%start.cast = bitcast [0 x %S]* %p to %S*
14+
%end = getelementptr inbounds [0 x %S], [0 x %S]* %p, i32 0, i32 %n, i32 0, i32 0
15+
%end.cast = bitcast i32* %end to %S*
16+
%last = getelementptr inbounds %S, %S* %end.cast, i32 -1
17+
%cmp = icmp eq %S* %last, %start.cast
18+
ret i1 %cmp
19+
}
20+
21+
; Same test using 64-bit indices.
22+
define i1 @test64([0 x %S]* %p, i64 %n) {
23+
; CHECK-LABEL: @test64(
24+
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[N:%.*]] to i32
25+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], 1
26+
; CHECK-NEXT: ret i1 [[CMP]]
27+
;
28+
%start.cast = bitcast [0 x %S]* %p to %S*
29+
%end = getelementptr inbounds [0 x %S], [0 x %S]* %p, i64 0, i64 %n, i32 0, i64 0
30+
%end.cast = bitcast i32* %end to %S*
31+
%last = getelementptr inbounds %S, %S* %end.cast, i64 -1
32+
%cmp = icmp eq %S* %last, %start.cast
33+
ret i1 %cmp
34+
}
35+
36+
; Here the offset overflows and is treated modulo 2^32. This is UB.
37+
define i1 @test64_overflow([0 x %S]* %p, i64 %n) {
38+
; CHECK-LABEL: @test64_overflow(
39+
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[N:%.*]] to i32
40+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], 1
41+
; CHECK-NEXT: ret i1 [[CMP]]
42+
;
43+
%start.cast = bitcast [0 x %S]* %p to %S*
44+
%end = getelementptr inbounds [0 x %S], [0 x %S]* %p, i64 0, i64 %n, i32 0, i64 8589934592
45+
%end.cast = bitcast i32* %end to %S*
46+
%last = getelementptr inbounds %S, %S* %end.cast, i64 -1
47+
%cmp = icmp eq %S* %last, %start.cast
48+
ret i1 %cmp
49+
}

0 commit comments

Comments
 (0)