Closed
Description
Code
I tried this code:
use core::iter;
pub fn array(mut a: [u8; 32], b: [u8; 32]) -> [u8; 32] {
iter::zip(&mut a, b).for_each(|(a, b)| *a |= b);
a
}
pub fn slice(mut a: [u8; 32], b: [u8; 32]) -> [u8; 32] {
iter::zip(&mut a, &b).for_each(|(a, b)| *a |= *b);
a
}
I expected to see this happen: same codegen or similar in performance
Instead, this happened: array has way worse codegen
Assembly
example::array:
vmovdqu xmm1, xmmword ptr [rdx]
vmovdqu xmm0, xmmword ptr [rdx + 16]
mov rax, rdi
vmovd ecx, xmm1
or byte ptr [rsi], cl
vpextrb ecx, xmm1, 1
or byte ptr [rsi + 1], cl
vpextrb ecx, xmm1, 2
or byte ptr [rsi + 2], cl
vpextrb ecx, xmm1, 3
or byte ptr [rsi + 3], cl
vpextrb ecx, xmm1, 4
or byte ptr [rsi + 4], cl
vpextrb ecx, xmm1, 5
or byte ptr [rsi + 5], cl
vpextrb ecx, xmm1, 6
or byte ptr [rsi + 6], cl
vpextrb ecx, xmm1, 7
or byte ptr [rsi + 7], cl
vpextrb ecx, xmm1, 8
or byte ptr [rsi + 8], cl
vpextrb ecx, xmm1, 9
or byte ptr [rsi + 9], cl
vpextrb ecx, xmm1, 10
or byte ptr [rsi + 10], cl
vpextrb ecx, xmm1, 11
or byte ptr [rsi + 11], cl
vpextrb ecx, xmm1, 12
or byte ptr [rsi + 12], cl
vpextrb ecx, xmm1, 13
or byte ptr [rsi + 13], cl
vpextrb ecx, xmm1, 14
or byte ptr [rsi + 14], cl
vpextrb ecx, xmm1, 15
or byte ptr [rsi + 15], cl
vmovd ecx, xmm0
or byte ptr [rsi + 16], cl
vpextrb ecx, xmm0, 1
or byte ptr [rsi + 17], cl
vpextrb ecx, xmm0, 2
or byte ptr [rsi + 18], cl
vpextrb ecx, xmm0, 3
or byte ptr [rsi + 19], cl
vpextrb ecx, xmm0, 4
or byte ptr [rsi + 20], cl
vpextrb ecx, xmm0, 5
or byte ptr [rsi + 21], cl
vpextrb ecx, xmm0, 6
or byte ptr [rsi + 22], cl
vpextrb ecx, xmm0, 7
or byte ptr [rsi + 23], cl
vpextrb ecx, xmm0, 8
or byte ptr [rsi + 24], cl
vpextrb ecx, xmm0, 9
or byte ptr [rsi + 25], cl
vpextrb ecx, xmm0, 10
or byte ptr [rsi + 26], cl
vpextrb ecx, xmm0, 11
or byte ptr [rsi + 27], cl
vpextrb ecx, xmm0, 12
or byte ptr [rsi + 28], cl
vpextrb ecx, xmm0, 13
or byte ptr [rsi + 29], cl
vpextrb ecx, xmm0, 14
or byte ptr [rsi + 30], cl
vpextrb ecx, xmm0, 15
or byte ptr [rsi + 31], cl
vmovups ymm0, ymmword ptr [rsi]
vmovups ymmword ptr [rdi], ymm0
vzeroupper
ret
example::slice:
vmovups ymm0, ymmword ptr [rsi]
mov rax, rdi
vorps ymm0, ymm0, ymmword ptr [rdx]
vmovups ymmword ptr [rsi], ymm0
vmovups ymmword ptr [rdi], ymm0
vzeroupper
ret
Version it worked on
It most recently worked on: 1.64
Version with regression
1.65 till now.
Regressed in nightly-2022-08-13, maybe LLVM 15 #99464?:
fetching (via local git) commits from 20ffea6938b5839c390252e07940b99e3b6a889a to f22819bcce4abaff7d1246a56eec493418f9f4ee
found 8 bors merge commits in the specified range
commit[0] 2022-08-11: Auto merge of #100416 - Dylan-DPC:rollup-m344lh1, r=Dylan-DPC
commit[1] 2022-08-11: Auto merge of #100426 - matthiaskrgr:rollup-0ks4dou, r=matthiaskrgr
commit[2] 2022-08-12: Auto merge of #100419 - flip1995:clippyup, r=Manishearth
commit[3] 2022-08-12: Auto merge of #99464 - nikic:llvm-15, r=cuviper
commit[4] 2022-08-12: Auto merge of #100435 - ehuss:update-cargo, r=ehuss
commit[5] 2022-08-12: Auto merge of #99624 - vincenzopalazzo:macros/unix_error, r=Amanieu
commit[6] 2022-08-12: Auto merge of #100328 - davidtwco:perf-implications, r=nnethercote
commit[7] 2022-08-12: Auto merge of #100456 - Dylan-DPC:rollup-fn17z9f, r=Dylan-DPC
rustc --version --verbose
:
rustc 1.74.0-nightly (8550f15e1 2023-08-27)
binary: rustc
commit-hash: 8550f15e148407159af401e02b1d9259762b3496
commit-date: 2023-08-27
host: x86_64-unknown-linux-gnu
release: 1.74.0-nightly
LLVM version: 17.0.0
Backtrace
Backtrace
N/A
@rustbot modify labels: +regression-from-stable-to-stable -regression-untriaged
Metadata
Metadata
Assignees
Labels
Area: Code generation parts specific to LLVM. Both correctness bugs and optimization-related issues.Category: This is a bug.Category: An issue highlighting optimization opportunities or PRs implementing suchIssue: Problems and improvements with respect to performance of generated code.Medium priorityRelevant to the compiler team, which will review and decide on the PR/issue.Issue expected to be fixed by the next major LLVM upgrade, or backported fixesPerformance or correctness regression from one stable version to another.