Skip to content

Missed optimization leading to inability to remove bounds check #49229

Closed
@alex

Description

@alex
Bugzilla Link 49885
Version unspecified
OS All
CC @comex,@fhahn,@jrmuizel,@rotateright

Extended Description

(All content available as godbolt: https://godbolt.org/z/s6deqs71h)

Given the following code, llvm should be able to prove that the bounds checks within the loop are never necessary:

#include <cassert>
#include <cstdint>
#include <cstddef>
#include <vector>

uint64_t f(std::vector<uint64_t>& data, size_t start, size_t end){
    assert(start < end && start < data.size() && end <= data.size());

    uint64_t total = 0;
    for (size_t i = start; i < end; i++) {
        total += data.at(i);
    }
    return total;
}

However the following code is generated:

f(std::vector<unsigned long, std::allocator<unsigned long> >&, unsigned long, unsigned long):               # @&#8203;f(std::vector<unsigned long, std::allocator<unsigned long> >&, unsigned long, unsigned long)
        push    rax
        cmp     rsi, rdx
        jae     .LBB0_6
        mov     r9, qword ptr [rdi]
        mov     r10, qword ptr [rdi + 8]
        mov     rax, r10
        sub     rax, r9
        mov     r8, rax
        sar     r8, 3
        cmp     r8, rsi
        jbe     .LBB0_6
        cmp     r8, rdx
        jb      .LBB0_6
        test    rax, rax
        mov     rcx, -1
        cmovns  rcx, rax
        test    rcx, rcx
        mov     edi, 1
        cmovle  rdi, rcx
        mov     rcx, r9
        sub     rcx, r10
        cmp     rcx, rax
        cmovle  rcx, rax
        shr     rcx, 3
        imul    rcx, rdi
        cmp     rsi, rcx
        cmova   rcx, rsi
        xor     eax, eax
.LBB0_4:                                # =>This Inner Loop Header: Depth=1
        cmp     rcx, rsi
        je      .LBB0_5
        add     rax, qword ptr [r9 + 8*rsi]
        add     rsi, 1
        cmp     rdx, rsi
        jne     .LBB0_4
        pop     rcx
        ret
.LBB0_5:
        mov     edi, offset .L.str.2
        mov     rsi, rcx
        mov     rdx, r8
        xor     eax, eax
        call    std::__throw_out_of_range_fmt(char const*, ...)
.LBB0_6:
        mov     edi, offset .L.str
        mov     esi, offset .L.str.1
        mov     ecx, offset .L__PRETTY_FUNCTION__.f(std::vector<unsigned long, std::allocator<unsigned long> >&, unsigned long, unsigned long)
        mov     edx, 7
        call    __assert_fail
.L.str:
        .asciz  "start < end && start < data.size() && end <= data.size()"

.L.str.1:
        .asciz  "/app/example.cpp"

.L__PRETTY_FUNCTION__.f(std::vector<unsigned long, std::allocator<unsigned long> >&, unsigned long, unsigned long):
        .asciz  "uint64_t f(std::vector<uint64_t> &, size_t, size_t)"

.L.str.2:
        .asciz  "vector::_M_range_check: __n (which is %zu) >= this->size() (which is %zu)"

The throw_out_of_range exception code should be able to be optimized out.

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions