Skip to content

[X86] Bad mulhi codegen #109790

Closed
Closed
@nikic

Description

@nikic

From rust-lang/rust#130782, consider these two functions that only differ by the constant in the mul:

define void @ok(ptr sret([32 x i8]) %ret, ptr %a) {
entry:
  %0 = load <16 x i16>, ptr %a, align 32
  %1 = and <16 x i16> %0, <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767>
  %2 = zext nneg <16 x i16> %1 to <16 x i32>
  %3 = mul nsw <16 x i32> %2, <i32 -1000, i32 -1000, i32 -1000, i32 -1000, i32 -1000, i32 -1000, i32 -1000, i32 -1000, i32 -1000, i32 -1000, i32 -1000, i32 -1000, i32 -1000, i32 -1000, i32 -1000, i32 -1000>
  %4 = lshr <16 x i32> %3, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
  %5 = trunc nuw <16 x i32> %4 to <16 x i16>
  store <16 x i16> %5, ptr %ret, align 32
  ret void
}

define void @bad(ptr sret([32 x i8]) %ret, ptr %a) {
entry:
  %0 = load <16 x i16>, ptr %a, align 32
  %1 = and <16 x i16> %0, <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767>
  %2 = zext nneg <16 x i16> %1 to <16 x i32>
  %3 = mul nuw nsw <16 x i32> %2, <i32 1000, i32 1000, i32 1000, i32 1000, i32 1000, i32 1000, i32 1000, i32 1000, i32 1000, i32 1000, i32 1000, i32 1000, i32 1000, i32 1000, i32 1000, i32 1000>
  %4 = lshr <16 x i32> %3, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
  %5 = trunc nuw nsw <16 x i32> %4 to <16 x i16>
  store <16 x i16> %5, ptr %ret, align 32
  ret void
}

These generate:

ok:                                     # @ok
        mov     rax, rdi
        vmovdqa ymm0, ymmword ptr [rsi]
        vpand   ymm0, ymm0, ymmword ptr [rip + .LCPI0_0]
        vpmulhw ymm0, ymm0, ymmword ptr [rip + .LCPI0_1] # [64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536]
        vmovdqa ymmword ptr [rdi], ymm0
        vzeroupper
        ret

bad:                                    # @bad
        mov     rax, rdi
        vmovdqa ymm0, ymmword ptr [rsi]
        vpand   ymm0, ymm0, ymmword ptr [rip + .LCPI1_0]
        vpmovzxwd       ymm1, xmm0              # ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
        vextracti128    xmm0, ymm0, 1
        vpmovzxwd       ymm0, xmm0              # ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
        vpbroadcastd    ymm2, dword ptr [rip + .LCPI1_1] # ymm2 = [1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0]
        vpmulhuw        ymm0, ymm0, ymm2
        vpmulhuw        ymm1, ymm1, ymm2
        vpackusdw       ymm0, ymm1, ymm0
        vpermq  ymm0, ymm0, 216                 # ymm0 = ymm0[0,2,1,3]
        vmovdqa ymmword ptr [rdi], ymm0
        vzeroupper
        ret

Original rust functions:

use std::arch::x86_64::*;

pub unsafe fn ok(a: __m256i) -> __m256i {
    let a = _mm256_and_si256(a, _mm256_set1_epi16(0x7FFF));
    _mm256_mulhi_epi16(a, _mm256_set1_epi16(-1000))
}

pub unsafe fn bad(a: __m256i) -> __m256i {
    let a = _mm256_and_si256(a, _mm256_set1_epi16(0x7FFF));
    _mm256_mulhi_epi16(a, _mm256_set1_epi16(1000))
}

Metadata

Metadata

Assignees

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions