Skip to content

Unnecessary memcpy when using array initialization shorthand #56882

Closed
@isegal

Description

@isegal

There is a performance regression with shorthand array initialization that causes temporary stack allocation and copy. It appears to have been introduced between Rust 1.11.0 and 1.12.0 and exists all the way up to current beta.

(All examples henceforth are with -C opt-level=3)

Example:

pub struct BigTest {
    arr: [u32; 128]
}

impl BigTest {
    pub fn new() -> BigTest {
        BigTest {
            arr: [123; 128],
        }
    }
}

pub fn test() -> BigTest {
    BigTest::new()
}

It appears that in this case, a temporary is allocated on stack, initialized and then copied.
This could cause performance issues with large array initialization.

.LCPI0_0:
        .long   123
        .long   123
        .long   123
        .long   123
example::BigTest::new:
        push    rbx
        sub     rsp, 512
        mov     rbx, rdi
        movaps  xmm0, xmmword ptr [rip + .LCPI0_0]
        movaps  xmmword ptr [rsp], xmm0
        movaps  xmmword ptr [rsp + 16], xmm0
        movaps  xmmword ptr [rsp + 32], xmm0
        movaps  xmmword ptr [rsp + 48], xmm0
        movaps  xmmword ptr [rsp + 64], xmm0
        movaps  xmmword ptr [rsp + 80], xmm0
        movaps  xmmword ptr [rsp + 96], xmm0
        movaps  xmmword ptr [rsp + 112], xmm0
        movaps  xmmword ptr [rsp + 128], xmm0
        movaps  xmmword ptr [rsp + 144], xmm0
        movaps  xmmword ptr [rsp + 160], xmm0
        movaps  xmmword ptr [rsp + 176], xmm0
        movaps  xmmword ptr [rsp + 192], xmm0
        movaps  xmmword ptr [rsp + 208], xmm0
        movaps  xmmword ptr [rsp + 224], xmm0
        movaps  xmmword ptr [rsp + 240], xmm0
        movaps  xmmword ptr [rsp + 256], xmm0
        movaps  xmmword ptr [rsp + 272], xmm0
        movaps  xmmword ptr [rsp + 288], xmm0
        movaps  xmmword ptr [rsp + 304], xmm0
        movaps  xmmword ptr [rsp + 320], xmm0
        movaps  xmmword ptr [rsp + 336], xmm0
        movaps  xmmword ptr [rsp + 352], xmm0
        movaps  xmmword ptr [rsp + 368], xmm0
        movaps  xmmword ptr [rsp + 384], xmm0
        movaps  xmmword ptr [rsp + 400], xmm0
        movaps  xmmword ptr [rsp + 416], xmm0
        movaps  xmmword ptr [rsp + 432], xmm0
        movaps  xmmword ptr [rsp + 448], xmm0
        movaps  xmmword ptr [rsp + 464], xmm0
        movaps  xmmword ptr [rsp + 480], xmm0
        movaps  xmmword ptr [rsp + 496], xmm0
        mov     rsi, rsp
        mov     edx, 512
        call    qword ptr [rip + memcpy@GOTPCREL]
        mov     rax, rbx
        add     rsp, 512
        pop     rbx
        ret

example::test:
        push    rbx
        mov     rbx, rdi
        call    qword ptr [rip + example::BigTest::new@GOTPCREL]
        mov     rax, rbx
        pop     rbx
        ret

Without shorthand, there is no temporary allocation:

pub struct BigTest {

    arr: [u32; 128]
}

impl BigTest {
    pub fn new() -> BigTest {
        BigTest {
    
            arr: [
            123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,
            123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,
            123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,
            123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,
            123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,
            123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,
            123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,
            123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,
            ], 
        }
    }
}

pub fn test() -> BigTest {
    BigTest::new()
}
.LCPI0_0:
        .long   123
        .long   123
        .long   123
        .long   123
example::BigTest::new:
        mov     rax, rdi
        movaps  xmm0, xmmword ptr [rip + .LCPI0_0]
        movups  xmmword ptr [rdi], xmm0
        movups  xmmword ptr [rdi + 16], xmm0
        movups  xmmword ptr [rdi + 32], xmm0
        movups  xmmword ptr [rdi + 48], xmm0
        movups  xmmword ptr [rdi + 64], xmm0
        movups  xmmword ptr [rdi + 80], xmm0
        movups  xmmword ptr [rdi + 96], xmm0
        movups  xmmword ptr [rdi + 112], xmm0
        movups  xmmword ptr [rdi + 128], xmm0
        movups  xmmword ptr [rdi + 144], xmm0
        movups  xmmword ptr [rdi + 160], xmm0
        movups  xmmword ptr [rdi + 176], xmm0
        movups  xmmword ptr [rdi + 192], xmm0
        movups  xmmword ptr [rdi + 208], xmm0
        movups  xmmword ptr [rdi + 224], xmm0
        movups  xmmword ptr [rdi + 240], xmm0
        movups  xmmword ptr [rdi + 256], xmm0
        movups  xmmword ptr [rdi + 272], xmm0
        movups  xmmword ptr [rdi + 288], xmm0
        movups  xmmword ptr [rdi + 304], xmm0
        movups  xmmword ptr [rdi + 320], xmm0
        movups  xmmword ptr [rdi + 336], xmm0
        movups  xmmword ptr [rdi + 352], xmm0
        movups  xmmword ptr [rdi + 368], xmm0
        movups  xmmword ptr [rdi + 384], xmm0
        movups  xmmword ptr [rdi + 400], xmm0
        movups  xmmword ptr [rdi + 416], xmm0
        movups  xmmword ptr [rdi + 432], xmm0
        movups  xmmword ptr [rdi + 448], xmm0
        movups  xmmword ptr [rdi + 464], xmm0
        movups  xmmword ptr [rdi + 480], xmm0
        movups  xmmword ptr [rdi + 496], xmm0
        ret

example::test:
        push    rbx
        mov     rbx, rdi
        call    qword ptr [rip + example::BigTest::new@GOTPCREL]
        mov     rax, rbx
        pop     rbx
        ret

Rust 1.11.0 when using shorthand, there is no extra allocation:

.LCPI0_0:
        .long   123
        .long   123
        .long   123
        .long   123
example::BigTest::new:
        push    rbp
        mov     rbp, rsp
        movaps  xmm0, xmmword ptr [rip + .LCPI0_0]
        movups  xmmword ptr [rdi], xmm0
        movups  xmmword ptr [rdi + 16], xmm0
        movups  xmmword ptr [rdi + 32], xmm0
        movups  xmmword ptr [rdi + 48], xmm0
        movups  xmmword ptr [rdi + 64], xmm0
        movups  xmmword ptr [rdi + 80], xmm0
        movups  xmmword ptr [rdi + 96], xmm0
        movups  xmmword ptr [rdi + 112], xmm0
        movups  xmmword ptr [rdi + 128], xmm0
        movups  xmmword ptr [rdi + 144], xmm0
        movups  xmmword ptr [rdi + 160], xmm0
        movups  xmmword ptr [rdi + 176], xmm0
        movups  xmmword ptr [rdi + 192], xmm0
        movups  xmmword ptr [rdi + 208], xmm0
        movups  xmmword ptr [rdi + 224], xmm0
        movups  xmmword ptr [rdi + 240], xmm0
        movups  xmmword ptr [rdi + 256], xmm0
        movups  xmmword ptr [rdi + 272], xmm0
        movups  xmmword ptr [rdi + 288], xmm0
        movups  xmmword ptr [rdi + 304], xmm0
        movups  xmmword ptr [rdi + 320], xmm0
        movups  xmmword ptr [rdi + 336], xmm0
        movups  xmmword ptr [rdi + 352], xmm0
        movups  xmmword ptr [rdi + 368], xmm0
        movups  xmmword ptr [rdi + 384], xmm0
        movups  xmmword ptr [rdi + 400], xmm0
        movups  xmmword ptr [rdi + 416], xmm0
        movups  xmmword ptr [rdi + 432], xmm0
        movups  xmmword ptr [rdi + 448], xmm0
        movups  xmmword ptr [rdi + 464], xmm0
        movups  xmmword ptr [rdi + 480], xmm0
        movups  xmmword ptr [rdi + 496], xmm0
        mov     rax, rdi
        pop     rbp
        ret

.LCPI1_0:
        .long   123
        .long   123
        .long   123
        .long   123
example::test:
        push    rbp
        mov     rbp, rsp
        movaps  xmm0, xmmword ptr [rip + .LCPI1_0]
        movups  xmmword ptr [rdi], xmm0
        movups  xmmword ptr [rdi + 16], xmm0
        movups  xmmword ptr [rdi + 32], xmm0
        movups  xmmword ptr [rdi + 48], xmm0
        movups  xmmword ptr [rdi + 64], xmm0
        movups  xmmword ptr [rdi + 80], xmm0
        movups  xmmword ptr [rdi + 96], xmm0
        movups  xmmword ptr [rdi + 112], xmm0
        movups  xmmword ptr [rdi + 128], xmm0
        movups  xmmword ptr [rdi + 144], xmm0
        movups  xmmword ptr [rdi + 160], xmm0
        movups  xmmword ptr [rdi + 176], xmm0
        movups  xmmword ptr [rdi + 192], xmm0
        movups  xmmword ptr [rdi + 208], xmm0
        movups  xmmword ptr [rdi + 224], xmm0
        movups  xmmword ptr [rdi + 240], xmm0
        movups  xmmword ptr [rdi + 256], xmm0
        movups  xmmword ptr [rdi + 272], xmm0
        movups  xmmword ptr [rdi + 288], xmm0
        movups  xmmword ptr [rdi + 304], xmm0
        movups  xmmword ptr [rdi + 320], xmm0
        movups  xmmword ptr [rdi + 336], xmm0
        movups  xmmword ptr [rdi + 352], xmm0
        movups  xmmword ptr [rdi + 368], xmm0
        movups  xmmword ptr [rdi + 384], xmm0
        movups  xmmword ptr [rdi + 400], xmm0
        movups  xmmword ptr [rdi + 416], xmm0
        movups  xmmword ptr [rdi + 432], xmm0
        movups  xmmword ptr [rdi + 448], xmm0
        movups  xmmword ptr [rdi + 464], xmm0
        movups  xmmword ptr [rdi + 480], xmm0
        movups  xmmword ptr [rdi + 496], xmm0
        mov     rax, rdi
        pop     rbp
        ret

Source: https://rust.godbolt.org/z/isxu3Y

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions