Closed
Description
There is a performance regression with shorthand array initialization that causes temporary stack allocation and copy. It appears to have been introduced between Rust 1.11.0 and 1.12.0 and exists all the way up to current beta.
(All examples henceforth are with -C opt-level=3)
Example:
pub struct BigTest {
arr: [u32; 128]
}
impl BigTest {
pub fn new() -> BigTest {
BigTest {
arr: [123; 128],
}
}
}
pub fn test() -> BigTest {
BigTest::new()
}
It appears that in this case, a temporary is allocated on stack, initialized and then copied.
This could cause performance issues with large array initialization.
.LCPI0_0:
.long 123
.long 123
.long 123
.long 123
example::BigTest::new:
push rbx
sub rsp, 512
mov rbx, rdi
movaps xmm0, xmmword ptr [rip + .LCPI0_0]
movaps xmmword ptr [rsp], xmm0
movaps xmmword ptr [rsp + 16], xmm0
movaps xmmword ptr [rsp + 32], xmm0
movaps xmmword ptr [rsp + 48], xmm0
movaps xmmword ptr [rsp + 64], xmm0
movaps xmmword ptr [rsp + 80], xmm0
movaps xmmword ptr [rsp + 96], xmm0
movaps xmmword ptr [rsp + 112], xmm0
movaps xmmword ptr [rsp + 128], xmm0
movaps xmmword ptr [rsp + 144], xmm0
movaps xmmword ptr [rsp + 160], xmm0
movaps xmmword ptr [rsp + 176], xmm0
movaps xmmword ptr [rsp + 192], xmm0
movaps xmmword ptr [rsp + 208], xmm0
movaps xmmword ptr [rsp + 224], xmm0
movaps xmmword ptr [rsp + 240], xmm0
movaps xmmword ptr [rsp + 256], xmm0
movaps xmmword ptr [rsp + 272], xmm0
movaps xmmword ptr [rsp + 288], xmm0
movaps xmmword ptr [rsp + 304], xmm0
movaps xmmword ptr [rsp + 320], xmm0
movaps xmmword ptr [rsp + 336], xmm0
movaps xmmword ptr [rsp + 352], xmm0
movaps xmmword ptr [rsp + 368], xmm0
movaps xmmword ptr [rsp + 384], xmm0
movaps xmmword ptr [rsp + 400], xmm0
movaps xmmword ptr [rsp + 416], xmm0
movaps xmmword ptr [rsp + 432], xmm0
movaps xmmword ptr [rsp + 448], xmm0
movaps xmmword ptr [rsp + 464], xmm0
movaps xmmword ptr [rsp + 480], xmm0
movaps xmmword ptr [rsp + 496], xmm0
mov rsi, rsp
mov edx, 512
call qword ptr [rip + memcpy@GOTPCREL]
mov rax, rbx
add rsp, 512
pop rbx
ret
example::test:
push rbx
mov rbx, rdi
call qword ptr [rip + example::BigTest::new@GOTPCREL]
mov rax, rbx
pop rbx
ret
Without shorthand, there is no temporary allocation:
pub struct BigTest {
arr: [u32; 128]
}
impl BigTest {
pub fn new() -> BigTest {
BigTest {
arr: [
123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,
123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,
123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,
123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,
123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,
123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,
123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,
123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,
],
}
}
}
pub fn test() -> BigTest {
BigTest::new()
}
.LCPI0_0:
.long 123
.long 123
.long 123
.long 123
example::BigTest::new:
mov rax, rdi
movaps xmm0, xmmword ptr [rip + .LCPI0_0]
movups xmmword ptr [rdi], xmm0
movups xmmword ptr [rdi + 16], xmm0
movups xmmword ptr [rdi + 32], xmm0
movups xmmword ptr [rdi + 48], xmm0
movups xmmword ptr [rdi + 64], xmm0
movups xmmword ptr [rdi + 80], xmm0
movups xmmword ptr [rdi + 96], xmm0
movups xmmword ptr [rdi + 112], xmm0
movups xmmword ptr [rdi + 128], xmm0
movups xmmword ptr [rdi + 144], xmm0
movups xmmword ptr [rdi + 160], xmm0
movups xmmword ptr [rdi + 176], xmm0
movups xmmword ptr [rdi + 192], xmm0
movups xmmword ptr [rdi + 208], xmm0
movups xmmword ptr [rdi + 224], xmm0
movups xmmword ptr [rdi + 240], xmm0
movups xmmword ptr [rdi + 256], xmm0
movups xmmword ptr [rdi + 272], xmm0
movups xmmword ptr [rdi + 288], xmm0
movups xmmword ptr [rdi + 304], xmm0
movups xmmword ptr [rdi + 320], xmm0
movups xmmword ptr [rdi + 336], xmm0
movups xmmword ptr [rdi + 352], xmm0
movups xmmword ptr [rdi + 368], xmm0
movups xmmword ptr [rdi + 384], xmm0
movups xmmword ptr [rdi + 400], xmm0
movups xmmword ptr [rdi + 416], xmm0
movups xmmword ptr [rdi + 432], xmm0
movups xmmword ptr [rdi + 448], xmm0
movups xmmword ptr [rdi + 464], xmm0
movups xmmword ptr [rdi + 480], xmm0
movups xmmword ptr [rdi + 496], xmm0
ret
example::test:
push rbx
mov rbx, rdi
call qword ptr [rip + example::BigTest::new@GOTPCREL]
mov rax, rbx
pop rbx
ret
Rust 1.11.0 when using shorthand, there is no extra allocation:
.LCPI0_0:
.long 123
.long 123
.long 123
.long 123
example::BigTest::new:
push rbp
mov rbp, rsp
movaps xmm0, xmmword ptr [rip + .LCPI0_0]
movups xmmword ptr [rdi], xmm0
movups xmmword ptr [rdi + 16], xmm0
movups xmmword ptr [rdi + 32], xmm0
movups xmmword ptr [rdi + 48], xmm0
movups xmmword ptr [rdi + 64], xmm0
movups xmmword ptr [rdi + 80], xmm0
movups xmmword ptr [rdi + 96], xmm0
movups xmmword ptr [rdi + 112], xmm0
movups xmmword ptr [rdi + 128], xmm0
movups xmmword ptr [rdi + 144], xmm0
movups xmmword ptr [rdi + 160], xmm0
movups xmmword ptr [rdi + 176], xmm0
movups xmmword ptr [rdi + 192], xmm0
movups xmmword ptr [rdi + 208], xmm0
movups xmmword ptr [rdi + 224], xmm0
movups xmmword ptr [rdi + 240], xmm0
movups xmmword ptr [rdi + 256], xmm0
movups xmmword ptr [rdi + 272], xmm0
movups xmmword ptr [rdi + 288], xmm0
movups xmmword ptr [rdi + 304], xmm0
movups xmmword ptr [rdi + 320], xmm0
movups xmmword ptr [rdi + 336], xmm0
movups xmmword ptr [rdi + 352], xmm0
movups xmmword ptr [rdi + 368], xmm0
movups xmmword ptr [rdi + 384], xmm0
movups xmmword ptr [rdi + 400], xmm0
movups xmmword ptr [rdi + 416], xmm0
movups xmmword ptr [rdi + 432], xmm0
movups xmmword ptr [rdi + 448], xmm0
movups xmmword ptr [rdi + 464], xmm0
movups xmmword ptr [rdi + 480], xmm0
movups xmmword ptr [rdi + 496], xmm0
mov rax, rdi
pop rbp
ret
.LCPI1_0:
.long 123
.long 123
.long 123
.long 123
example::test:
push rbp
mov rbp, rsp
movaps xmm0, xmmword ptr [rip + .LCPI1_0]
movups xmmword ptr [rdi], xmm0
movups xmmword ptr [rdi + 16], xmm0
movups xmmword ptr [rdi + 32], xmm0
movups xmmword ptr [rdi + 48], xmm0
movups xmmword ptr [rdi + 64], xmm0
movups xmmword ptr [rdi + 80], xmm0
movups xmmword ptr [rdi + 96], xmm0
movups xmmword ptr [rdi + 112], xmm0
movups xmmword ptr [rdi + 128], xmm0
movups xmmword ptr [rdi + 144], xmm0
movups xmmword ptr [rdi + 160], xmm0
movups xmmword ptr [rdi + 176], xmm0
movups xmmword ptr [rdi + 192], xmm0
movups xmmword ptr [rdi + 208], xmm0
movups xmmword ptr [rdi + 224], xmm0
movups xmmword ptr [rdi + 240], xmm0
movups xmmword ptr [rdi + 256], xmm0
movups xmmword ptr [rdi + 272], xmm0
movups xmmword ptr [rdi + 288], xmm0
movups xmmword ptr [rdi + 304], xmm0
movups xmmword ptr [rdi + 320], xmm0
movups xmmword ptr [rdi + 336], xmm0
movups xmmword ptr [rdi + 352], xmm0
movups xmmword ptr [rdi + 368], xmm0
movups xmmword ptr [rdi + 384], xmm0
movups xmmword ptr [rdi + 400], xmm0
movups xmmword ptr [rdi + 416], xmm0
movups xmmword ptr [rdi + 432], xmm0
movups xmmword ptr [rdi + 448], xmm0
movups xmmword ptr [rdi + 464], xmm0
movups xmmword ptr [rdi + 480], xmm0
movups xmmword ptr [rdi + 496], xmm0
mov rax, rdi
pop rbp
ret
Metadata
Metadata
Assignees
Labels
No labels