Closed
Description
Bugzilla Link | 49885 |
Version | unspecified |
OS | All |
CC | @comex,@fhahn,@jrmuizel,@rotateright |
Extended Description
(All content available as godbolt: https://godbolt.org/z/s6deqs71h)
Given the following code, llvm should be able to prove that the bounds checks within the loop are never necessary:
#include <cassert>
#include <cstdint>
#include <cstddef>
#include <vector>
uint64_t f(std::vector<uint64_t>& data, size_t start, size_t end){
assert(start < end && start < data.size() && end <= data.size());
uint64_t total = 0;
for (size_t i = start; i < end; i++) {
total += data.at(i);
}
return total;
}
However the following code is generated:
f(std::vector<unsigned long, std::allocator<unsigned long> >&, unsigned long, unsigned long): # @​f(std::vector<unsigned long, std::allocator<unsigned long> >&, unsigned long, unsigned long)
push rax
cmp rsi, rdx
jae .LBB0_6
mov r9, qword ptr [rdi]
mov r10, qword ptr [rdi + 8]
mov rax, r10
sub rax, r9
mov r8, rax
sar r8, 3
cmp r8, rsi
jbe .LBB0_6
cmp r8, rdx
jb .LBB0_6
test rax, rax
mov rcx, -1
cmovns rcx, rax
test rcx, rcx
mov edi, 1
cmovle rdi, rcx
mov rcx, r9
sub rcx, r10
cmp rcx, rax
cmovle rcx, rax
shr rcx, 3
imul rcx, rdi
cmp rsi, rcx
cmova rcx, rsi
xor eax, eax
.LBB0_4: # =>This Inner Loop Header: Depth=1
cmp rcx, rsi
je .LBB0_5
add rax, qword ptr [r9 + 8*rsi]
add rsi, 1
cmp rdx, rsi
jne .LBB0_4
pop rcx
ret
.LBB0_5:
mov edi, offset .L.str.2
mov rsi, rcx
mov rdx, r8
xor eax, eax
call std::__throw_out_of_range_fmt(char const*, ...)
.LBB0_6:
mov edi, offset .L.str
mov esi, offset .L.str.1
mov ecx, offset .L__PRETTY_FUNCTION__.f(std::vector<unsigned long, std::allocator<unsigned long> >&, unsigned long, unsigned long)
mov edx, 7
call __assert_fail
.L.str:
.asciz "start < end && start < data.size() && end <= data.size()"
.L.str.1:
.asciz "/app/example.cpp"
.L__PRETTY_FUNCTION__.f(std::vector<unsigned long, std::allocator<unsigned long> >&, unsigned long, unsigned long):
.asciz "uint64_t f(std::vector<uint64_t> &, size_t, size_t)"
.L.str.2:
.asciz "vector::_M_range_check: __n (which is %zu) >= this->size() (which is %zu)"
The throw_out_of_range exception code should be able to be optimized out.