Skip to content

Commit 1c4e4e0

Browse files
committed
[libc][NFC] Split bcmp implementations per platform
This is a follow up on D154800 and D154770 to make the code structure more principled and avoid too many nested #ifdef/#endif. Reviewed By: courbet Differential Revision: https://reviews.llvm.org/D155076
1 parent e8a5df7 commit 1c4e4e0

File tree

6 files changed

+221
-149
lines changed

6 files changed

+221
-149
lines changed

libc/src/string/memory_utils/CMakeLists.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
#TODO(michaelrj): split out the implementations from memory_utils
1+
# TODO(michaelrj): split out the implementations from memory_utils
22
add_header_library(
33
memory_utils
44
HDRS
5+
aarch64/bcmp_implementations.h
56
aarch64/memcmp_implementations.h
67
aarch64/memcpy_implementations.h
78
bcmp_implementations.h
@@ -16,7 +17,9 @@ add_header_library(
1617
op_builtin.h
1718
op_generic.h
1819
op_x86.h
20+
riscv/bcmp_implementations.h
1921
utils.h
22+
x86_64/bcmp_implementations.h
2023
x86_64/memcmp_implementations.h
2124
x86_64/memcpy_implementations.h
2225
DEPS
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
//===-- Bcmp implementation for aarch64 -------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
#ifndef LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_BCMP_IMPLEMENTATIONS_H
9+
#define LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_BCMP_IMPLEMENTATIONS_H
10+
11+
#include "src/__support/macros/attributes.h" // LIBC_INLINE
12+
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
13+
#include "src/string/memory_utils/op_aarch64.h"
14+
#include "src/string/memory_utils/op_generic.h"
15+
#include "src/string/memory_utils/utils.h" // Ptr, CPtr
16+
17+
#include <stddef.h> // size_t
18+
19+
namespace __llvm_libc {
20+
21+
[[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_aarch64(CPtr p1,
22+
CPtr p2,
23+
size_t count) {
24+
if (LIBC_LIKELY(count <= 32)) {
25+
if (LIBC_UNLIKELY(count >= 16)) {
26+
return aarch64::Bcmp<16>::head_tail(p1, p2, count);
27+
}
28+
switch (count) {
29+
case 0:
30+
return BcmpReturnType::ZERO();
31+
case 1:
32+
return generic::Bcmp<uint8_t>::block(p1, p2);
33+
case 2:
34+
return generic::Bcmp<uint16_t>::block(p1, p2);
35+
case 3:
36+
return generic::Bcmp<uint16_t>::head_tail(p1, p2, count);
37+
case 4:
38+
return generic::Bcmp<uint32_t>::block(p1, p2);
39+
case 5:
40+
case 6:
41+
case 7:
42+
return generic::Bcmp<uint32_t>::head_tail(p1, p2, count);
43+
case 8:
44+
return generic::Bcmp<uint64_t>::block(p1, p2);
45+
case 9:
46+
case 10:
47+
case 11:
48+
case 12:
49+
case 13:
50+
case 14:
51+
case 15:
52+
return generic::Bcmp<uint64_t>::head_tail(p1, p2, count);
53+
}
54+
}
55+
56+
if (count <= 64)
57+
return aarch64::Bcmp<32>::head_tail(p1, p2, count);
58+
59+
// Aligned loop if > 256, otherwise normal loop
60+
if (LIBC_UNLIKELY(count > 256)) {
61+
if (auto value = aarch64::Bcmp<32>::block(p1, p2))
62+
return value;
63+
align_to_next_boundary<16, Arg::P1>(p1, p2, count);
64+
}
65+
return aarch64::Bcmp<32>::loop_and_tail(p1, p2, count);
66+
}
67+
68+
} // namespace __llvm_libc
69+
70+
#endif // LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_BCMP_IMPLEMENTATIONS_H
Lines changed: 18 additions & 148 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//===-- Implementation of bcmp --------------------------------------------===//
1+
//===-- Dispatch logic for bcmp -------------------------------------------===//
22
//
33
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44
// See https://llvm.org/LICENSE.txt for license information.
@@ -10,164 +10,34 @@
1010
#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BCMP_IMPLEMENTATIONS_H
1111

1212
#include "src/__support/common.h"
13-
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY LIBC_LOOP_NOUNROLL
14-
#include "src/__support/macros/properties/architectures.h"
15-
#include "src/string/memory_utils/generic/aligned_access.h"
16-
#include "src/string/memory_utils/generic/byte_per_byte.h"
17-
#include "src/string/memory_utils/op_aarch64.h"
18-
#include "src/string/memory_utils/op_builtin.h"
19-
#include "src/string/memory_utils/op_generic.h"
20-
#include "src/string/memory_utils/op_riscv.h"
21-
#include "src/string/memory_utils/op_x86.h"
13+
#include "src/__support/macros/properties/architectures.h" // LIBC_TARGET_ARCH_IS_
2214

2315
#include <stddef.h> // size_t
2416

25-
namespace __llvm_libc {
26-
27-
#if defined(LIBC_TARGET_ARCH_IS_X86) || defined(LIBC_TARGET_ARCH_IS_AARCH64)
28-
[[maybe_unused]] LIBC_INLINE BcmpReturnType
29-
inline_bcmp_generic_gt16(CPtr p1, CPtr p2, size_t count) {
30-
return generic::Bcmp<uint64_t>::loop_and_tail_align_above(256, p1, p2, count);
31-
}
32-
#endif // defined(LIBC_TARGET_ARCH_IS_X86) ||
33-
// defined(LIBC_TARGET_ARCH_IS_AARCH64)
34-
3517
#if defined(LIBC_TARGET_ARCH_IS_X86)
36-
#if defined(__SSE4_1__)
37-
[[maybe_unused]] LIBC_INLINE BcmpReturnType
38-
inline_bcmp_x86_sse41_gt16(CPtr p1, CPtr p2, size_t count) {
39-
if (count <= 32)
40-
return generic::Bcmp<__m128i>::head_tail(p1, p2, count);
41-
return generic::Bcmp<__m128i>::loop_and_tail_align_above(256, p1, p2, count);
42-
}
43-
#endif // __SSE4_1__
44-
45-
#if defined(__AVX__)
46-
[[maybe_unused]] LIBC_INLINE BcmpReturnType
47-
inline_bcmp_x86_avx_gt16(CPtr p1, CPtr p2, size_t count) {
48-
if (count <= 32)
49-
return generic::Bcmp<__m128i>::head_tail(p1, p2, count);
50-
if (count <= 64)
51-
return generic::Bcmp<__m256i>::head_tail(p1, p2, count);
52-
return generic::Bcmp<__m256i>::loop_and_tail_align_above(256, p1, p2, count);
53-
}
54-
#endif // __AVX__
55-
56-
#if defined(__AVX512BW__)
57-
[[maybe_unused]] LIBC_INLINE BcmpReturnType
58-
inline_bcmp_x86_avx512bw_gt16(CPtr p1, CPtr p2, size_t count) {
59-
if (count <= 32)
60-
return generic::Bcmp<__m128i>::head_tail(p1, p2, count);
61-
if (count <= 64)
62-
return generic::Bcmp<__m256i>::head_tail(p1, p2, count);
63-
if (count <= 128)
64-
return generic::Bcmp<__m512i>::head_tail(p1, p2, count);
65-
return generic::Bcmp<__m512i>::loop_and_tail_align_above(256, p1, p2, count);
66-
}
67-
#endif // __AVX512BW__
68-
69-
[[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_x86(CPtr p1, CPtr p2,
70-
size_t count) {
71-
if (count == 0)
72-
return BcmpReturnType::ZERO();
73-
if (count == 1)
74-
return generic::Bcmp<uint8_t>::block(p1, p2);
75-
if (count == 2)
76-
return generic::Bcmp<uint16_t>::block(p1, p2);
77-
if (count == 3)
78-
return generic::BcmpSequence<uint16_t, uint8_t>::block(p1, p2);
79-
if (count == 4)
80-
return generic::Bcmp<uint32_t>::block(p1, p2);
81-
if (count == 5)
82-
return generic::BcmpSequence<uint32_t, uint8_t>::block(p1, p2);
83-
if (count == 6)
84-
return generic::BcmpSequence<uint32_t, uint16_t>::block(p1, p2);
85-
if (count == 7)
86-
return generic::BcmpSequence<uint32_t, uint16_t, uint8_t>::block(p1, p2);
87-
if (count == 8)
88-
return generic::Bcmp<uint64_t>::block(p1, p2);
89-
if (count <= 16)
90-
return generic::Bcmp<uint64_t>::head_tail(p1, p2, count);
91-
#if defined(__AVX512BW__)
92-
return inline_bcmp_x86_avx512bw_gt16(p1, p2, count);
93-
#elif defined(__AVX__)
94-
return inline_bcmp_x86_avx_gt16(p1, p2, count);
95-
#elif defined(__SSE4_1__)
96-
return inline_bcmp_x86_sse41_gt16(p1, p2, count);
97-
#else
98-
return inline_bcmp_generic_gt16(p1, p2, count);
99-
#endif
100-
}
101-
#endif // defined(LIBC_TARGET_ARCH_IS_X86)
102-
103-
#if defined(LIBC_TARGET_ARCH_IS_AARCH64)
104-
[[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_aarch64(CPtr p1,
105-
CPtr p2,
106-
size_t count) {
107-
if (LIBC_LIKELY(count <= 32)) {
108-
if (LIBC_UNLIKELY(count >= 16)) {
109-
return aarch64::Bcmp<16>::head_tail(p1, p2, count);
110-
}
111-
switch (count) {
112-
case 0:
113-
return BcmpReturnType::ZERO();
114-
case 1:
115-
return generic::Bcmp<uint8_t>::block(p1, p2);
116-
case 2:
117-
return generic::Bcmp<uint16_t>::block(p1, p2);
118-
case 3:
119-
return generic::Bcmp<uint16_t>::head_tail(p1, p2, count);
120-
case 4:
121-
return generic::Bcmp<uint32_t>::block(p1, p2);
122-
case 5:
123-
case 6:
124-
case 7:
125-
return generic::Bcmp<uint32_t>::head_tail(p1, p2, count);
126-
case 8:
127-
return generic::Bcmp<uint64_t>::block(p1, p2);
128-
case 9:
129-
case 10:
130-
case 11:
131-
case 12:
132-
case 13:
133-
case 14:
134-
case 15:
135-
return generic::Bcmp<uint64_t>::head_tail(p1, p2, count);
136-
}
137-
}
138-
139-
if (count <= 64)
140-
return aarch64::Bcmp<32>::head_tail(p1, p2, count);
141-
142-
// Aligned loop if > 256, otherwise normal loop
143-
if (LIBC_UNLIKELY(count > 256)) {
144-
if (auto value = aarch64::Bcmp<32>::block(p1, p2))
145-
return value;
146-
align_to_next_boundary<16, Arg::P1>(p1, p2, count);
147-
}
148-
return aarch64::Bcmp<32>::loop_and_tail(p1, p2, count);
149-
}
150-
#endif // defined(LIBC_TARGET_ARCH_IS_AARCH64)
151-
152-
LIBC_INLINE BcmpReturnType inline_bcmp(CPtr p1, CPtr p2, size_t count) {
153-
#if defined(LIBC_TARGET_ARCH_IS_X86)
154-
return inline_bcmp_x86(p1, p2, count);
18+
#include "src/string/memory_utils/x86_64/bcmp_implementations.h"
19+
#define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_x86
15520
#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
156-
return inline_bcmp_aarch64(p1, p2, count);
157-
#elif defined(LIBC_TARGET_ARCH_IS_RISCV64)
158-
return inline_bcmp_aligned_access_64bit(p1, p2, count);
159-
#elif defined(LIBC_TARGET_ARCH_IS_RISCV32)
160-
return inline_bcmp_aligned_access_32bit(p1, p2, count);
21+
#include "src/string/memory_utils/aarch64/bcmp_implementations.h"
22+
#define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_aarch64
23+
#elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
24+
#include "src/string/memory_utils/riscv/bcmp_implementations.h"
25+
#define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_riscv
16126
#else
162-
return inline_bcmp_byte_per_byte(p1, p2, count);
27+
// We may want to error instead of defaulting to suboptimal implementation.
28+
#include "src/string/memory_utils/generic/byte_per_byte.h"
29+
#define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_byte_per_byte
16330
#endif
164-
}
31+
32+
namespace __llvm_libc {
16533

16634
LIBC_INLINE int inline_bcmp(const void *p1, const void *p2, size_t count) {
167-
return static_cast<int>(inline_bcmp(reinterpret_cast<CPtr>(p1),
168-
reinterpret_cast<CPtr>(p2), count));
35+
return static_cast<int>(LIBC_SRC_STRING_MEMORY_UTILS_BCMP(
36+
reinterpret_cast<CPtr>(p1), reinterpret_cast<CPtr>(p2), count));
16937
}
17038

17139
} // namespace __llvm_libc
17240

41+
#undef LIBC_SRC_STRING_MEMORY_UTILS_BCMP
42+
17343
#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BCMP_IMPLEMENTATIONS_H
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
//===-- Bcmp implementation for riscv ---------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
#ifndef LIBC_SRC_STRING_MEMORY_UTILS_RISCV_BCMP_IMPLEMENTATIONS_H
9+
#define LIBC_SRC_STRING_MEMORY_UTILS_RISCV_BCMP_IMPLEMENTATIONS_H
10+
11+
#include "src/__support/macros/attributes.h" // LIBC_INLINE
12+
#include "src/__support/macros/properties/architectures.h" // LIBC_TARGET_ARCH_IS_RISCV64
13+
#include "src/string/memory_utils/generic/aligned_access.h"
14+
#include "src/string/memory_utils/utils.h" // Ptr, CPtr
15+
16+
#include <stddef.h> // size_t
17+
18+
namespace __llvm_libc {
19+
20+
[[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_riscv(CPtr p1, CPtr p2,
21+
size_t count) {
22+
#if defined(LIBC_TARGET_ARCH_IS_RISCV64)
23+
return inline_bcmp_aligned_access_64bit(p1, p2, count);
24+
#elif defined(LIBC_TARGET_ARCH_IS_RISCV32)
25+
return inline_bcmp_aligned_access_32bit(p1, p2, count);
26+
#else
27+
#error "Unimplemented"
28+
#endif
29+
}
30+
31+
} // namespace __llvm_libc
32+
33+
#endif // LIBC_SRC_STRING_MEMORY_UTILS_RISCV_BCMP_IMPLEMENTATIONS_H

0 commit comments

Comments
 (0)