Description
Reduced IR:
target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "amd64-pc-windows-msvc19.41.34123"
%struct.anon = type { [2 x %"struct.(anonymous namespace)::mt_shared_object"], %"class.vsm::atomic_intrusive_ptr", [48 x i8], %"struct.std::atomic.3", [56 x i8], %"struct.std::atomic_flag", [60 x i8] }
%"struct.(anonymous namespace)::mt_shared_object" = type { %"class.vsm::detail::basic_intrusive_refcount", %"struct.std::atomic_flag", ptr, [40 x i8] }
%"class.vsm::detail::basic_intrusive_refcount" = type { %"struct.vsm::detail::intrusive_refcount_base" }
%"struct.vsm::detail::intrusive_refcount_base" = type { %"class.vsm::atomic" }
%"class.vsm::atomic" = type { i64 }
%"class.vsm::atomic_intrusive_ptr" = type { %"class.vsm::atomic.2" }
%"class.vsm::atomic.2" = type { %"struct.vsm::atomic_intrusive_ptr<(anonymous namespace)::mt_shared_object>::atom" }
%"struct.vsm::atomic_intrusive_ptr<(anonymous namespace)::mt_shared_object>::atom" = type { ptr, i64 }
%"struct.std::atomic.3" = type { %"struct.std::_Atomic_integral_facade.4" }
%"struct.std::_Atomic_integral_facade.4" = type { %"struct.std::_Atomic_integral.5" }
%"struct.std::_Atomic_integral.5" = type { %"struct.std::_Atomic_storage.6" }
%"struct.std::_Atomic_storage.6" = type { %"struct.std::_Atomic_padded.7" }
%"struct.std::_Atomic_padded.7" = type { i64 }
%"struct.std::atomic_flag" = type { %"struct.std::atomic" }
%"struct.std::atomic" = type { %"struct.std::_Atomic_integral_facade" }
%"struct.std::_Atomic_integral_facade" = type { %"struct.std::_Atomic_integral" }
%"struct.std::_Atomic_integral" = type { %"struct.std::_Atomic_storage" }
%"struct.std::_Atomic_storage" = type { %"struct.std::_Atomic_padded" }
%"struct.std::_Atomic_padded" = type { i32 }
define fastcc void @"?test_case@?A0x7E1854EA@@YAXXZ"() #0 personality ptr @__CxxFrameHandler3 {
%1 = alloca [0 x [0 x %struct.anon]], i32 0, align 64
%2 = cmpxchg ptr %1, i128 0, i128 0 monotonic monotonic, align 16
invoke void @"?_Throw_Cpp_error@std@@YAXH@Z"(i32 0)
to label %3 unwind label %4
3: ; preds = %0
unreachable
4: ; preds = %0
%5 = cleanuppad within none []
ret void
}
declare i32 @__CxxFrameHandler3(...)
declare void @"?_Throw_Cpp_error@std@@YAXH@Z"()
; uselistorder directives
uselistorder i32 0, { 1, 0 }
attributes #0 = { "target-cpu"="nehalem" }
Here is the resulting object code:
(clang-19 -cc1 -emit-obj -triple "amd64-pc-windows-msvc19.41.34123" -O3 reduced.ll -o - | llvm-objdump-19 -M intel -d -
)
0000000000000000 <?test_case@?A0x7E1854EA@@YAXXZ>:
0: 55 push rbp
1: 53 push rbx
2: 48 83 ec 68 sub rsp, 0x68
6: 48 8d 6c 24 60 lea rbp, [rsp + 0x60]
b: 48 83 e4 c0 and rsp, -0x40
f: 48 89 e3 mov rbx, rsp
12: 48 89 6b 58 mov qword ptr [rbx + 0x58], rbp
16: 48 c7 45 00 fe ff ff ff mov qword ptr [rbp], -0x2
1e: 49 89 d8 mov r8, rbx
21: 45 31 c9 xor r9d, r9d
24: 31 c0 xor eax, eax
26: 31 d2 xor edx, edx
28: 31 c9 xor ecx, ecx
2a: 4c 89 cb mov rbx, r9
2d: f0 lock
2e: 48 0f c7 4b 40 cmpxchg16b xmmword ptr [rbx + 0x40]
33: 4c 89 c3 mov rbx, r8
36: 31 c9 xor ecx, ecx
38: e8 00 00 00 00 call 0x3d <?test_case@?A0x7E1854EA@@YAXXZ+0x3d>
3d: cc int3
3e: 66 90 nop
Note mov rbx, r9
followed by cmpxchg16b xmmword ptr [rbx + 0x40]
where rbx
is used after having just been overwritten for the purposes of cmpxchg16b
which uses it as an input register.
The original unreduced input produces slightly different object code but has the same problem:
00007FF786689459 lea r8,[rbx+100h]
00007FF786689460 mov rax,qword ptr [rbx+140h]
00007FF786689467 mov rdx,qword ptr [rbx+148h]
00007FF78668946E nop
00007FF786689470 mov r9,rbx
00007FF786689473 xor ecx,ecx
00007FF786689475 mov rbx,r8
00007FF786689478 lock cmpxchg16b oword ptr [rbx+140h]