Skip to content

[Win/X86] Make _m_prefetch[w] builtins to avoid winnt.h conflicts #115099

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Feb 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,10 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
}
}

let Features = "sse", Header = "xmmintrin.h", Attributes = [NoThrow, Const] in {
def _mm_prefetch : X86LibBuiltin<"void(void const *, int)">;
}

// AVX
let Attributes = [Const, NoThrow, RequiredVectorWidth<256>], Features = "avx" in {
foreach Op = ["addsub", "hadd", "hsub", "max", "min"] in {
Expand All @@ -138,6 +142,12 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<256>], Features = "avx" in
}
}

// PRFCHW
let Features = "prfchw", Header = "x86intrin.h", Attributes = [NoThrow, Const] in {
def _m_prefetch : X86LibBuiltin<"void(void *)">;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only _m_prefetchw requires "prfchw". _m_prefetch can be put together with _mm_prefetch.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PTAL at the new patch: #138360

At least based on the header structure before this, they were both in prftchwintrin.h , so I haven't made this change.

def _m_prefetchw : X86LibBuiltin<"void(void volatile const *)">;
}


// Mechanically ported builtins from the original `.def` file.
//
Expand All @@ -146,10 +156,6 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<256>], Features = "avx" in
// current formulation is based on what was easiest to recognize from the
// pre-TableGen version.

let Features = "mmx", Attributes = [NoThrow, Const] in {
def _mm_prefetch : X86NoPrefixBuiltin<"void(char const *, int)">;
}

let Features = "sse", Attributes = [NoThrow] in {
def ldmxcsr : X86Builtin<"void(unsigned int)">;
}
Expand Down
11 changes: 11 additions & 0 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15254,6 +15254,17 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
return Builder.CreateCall(F, {Address, RW, Locality, Data});
}
case X86::BI_m_prefetch:
case X86::BI_m_prefetchw: {
Value *Address = Ops[0];
// The 'w' suffix implies write.
Value *RW =
ConstantInt::get(Int32Ty, BuiltinID == X86::BI_m_prefetchw ? 1 : 0);
Value *Locality = ConstantInt::get(Int32Ty, 0x3);
Value *Data = ConstantInt::get(Int32Ty, 1);
Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
return Builder.CreateCall(F, {Address, RW, Locality, Data});
}
case X86::BI_mm_clflush: {
return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
Ops[0]);
Expand Down
23 changes: 10 additions & 13 deletions clang/lib/Headers/prfchwintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
#ifndef __PRFCHWINTRIN_H
#define __PRFCHWINTRIN_H

#if defined(__cplusplus)
extern "C" {
#endif

/// Loads a memory sequence containing the specified memory address into
/// all data cache levels.
///
Expand All @@ -26,11 +30,7 @@
///
/// \param __P
/// A pointer specifying the memory address to be prefetched.
static __inline__ void __attribute__((__always_inline__, __nodebug__))
_m_prefetch(void *__P)
{
__builtin_prefetch (__P, 0, 3 /* _MM_HINT_T0 */);
}
void _m_prefetch(void *__P);

/// Loads a memory sequence containing the specified memory address into
/// the L1 data cache and sets the cache-coherency state to modified.
Expand All @@ -48,13 +48,10 @@ _m_prefetch(void *__P)
///
/// \param __P
/// A pointer specifying the memory address to be prefetched.
static __inline__ void __attribute__((__always_inline__, __nodebug__))
_m_prefetchw(volatile const void *__P)
{
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wcast-qual"
__builtin_prefetch ((const void*)__P, 1, 3 /* _MM_HINT_T0 */);
#pragma clang diagnostic pop
}
void _m_prefetchw(volatile const void *__P);

#if defined(__cplusplus)
} // extern "C"
#endif

#endif /* __PRFCHWINTRIN_H */
9 changes: 5 additions & 4 deletions clang/lib/Headers/xmmintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -2197,10 +2197,7 @@ _mm_storer_ps(float *__p, __m128 __a)
#define _MM_HINT_T2 1
#define _MM_HINT_NTA 0

#ifndef _MSC_VER
/* FIXME: We have to #define this because "sel" must be a constant integer, and
Sema doesn't do any form of constant propagation yet. */

#if 0
/// Loads one cache line of data from the specified address to a location
/// closer to the processor.
///
Expand All @@ -2225,6 +2222,10 @@ _mm_storer_ps(float *__p, __m128 __a)
/// be generated. \n
/// _MM_HINT_T2: Move data using the T2 hint. The PREFETCHT2 instruction will
/// be generated.
///
/// _mm_prefetch is implemented as a "library builtin" directly in Clang,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably not well. I can put this inside an #if 0 block or some other macro construct so that Doxygen sees it but it never interacts with user code, but I don't have a working doxygen install to confirm if it will work.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we use /// \fn _mm_prefetch(const void *a, const int sel)?

/// similar to how it is done in MSVC. Clang will warn if the user doesn't
/// include xmmintrin.h or immintrin.h.
#define _mm_prefetch(a, sel) (__builtin_prefetch((const void *)(a), \
((sel) >> 2) & 1, (sel) & 0x3))
#endif
Expand Down