Skip to content

Commit b6c723f

Browse files
committed
[libc] Implement (v|f)printf on the GPU
Summary: This patch implements the `printf` family of functions on the GPU using the new variadic support. This patch adapts the old handling in the `rpc_fprintf` placeholder, but adds an extra RPC call to get the size of the buffer to copy. This prevents the GPU from needing to parse the string. While it's theoretically possible for the pass to know the size of the struct, it's prohibitively difficult to do while maintaining ABI compatibility with NVIDIA's varargs. Depends on llvm#96015.
1 parent 3693c51 commit b6c723f

File tree

17 files changed

+388
-85
lines changed

17 files changed

+388
-85
lines changed

libc/config/gpu/entrypoints.txt

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,3 @@
1-
if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
2-
set(extra_entrypoints
3-
# stdio.h entrypoints
4-
libc.src.stdio.snprintf
5-
libc.src.stdio.sprintf
6-
libc.src.stdio.vsnprintf
7-
libc.src.stdio.vsprintf
8-
)
9-
endif()
10-
111
set(TARGET_LIBC_ENTRYPOINTS
122
# assert.h entrypoints
133
libc.src.assert.__assert_fail
@@ -185,9 +175,16 @@ set(TARGET_LIBC_ENTRYPOINTS
185175
libc.src.errno.errno
186176

187177
# stdio.h entrypoints
188-
${extra_entrypoints}
189178
libc.src.stdio.clearerr
190179
libc.src.stdio.fclose
180+
libc.src.stdio.printf
181+
libc.src.stdio.vprintf
182+
libc.src.stdio.fprintf
183+
libc.src.stdio.vfprintf
184+
libc.src.stdio.sprintf
185+
libc.src.stdio.snprintf
186+
libc.src.stdio.vsprintf
187+
libc.src.stdio.vsnprintf
191188
libc.src.stdio.feof
192189
libc.src.stdio.ferror
193190
libc.src.stdio.fflush

libc/include/llvm-libc-types/rpc_opcodes_t.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ typedef enum {
3434
RPC_PRINTF_TO_STDOUT,
3535
RPC_PRINTF_TO_STDERR,
3636
RPC_PRINTF_TO_STREAM,
37+
RPC_PRINTF_TO_STDOUT_PACKED,
38+
RPC_PRINTF_TO_STDERR_PACKED,
39+
RPC_PRINTF_TO_STREAM_PACKED,
3740
RPC_REMOVE,
3841
RPC_LAST = 0xFFFF,
3942
} rpc_opcode_t;

libc/src/__support/arg_list.h

Lines changed: 43 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@
1818
namespace LIBC_NAMESPACE {
1919
namespace internal {
2020

21+
template <typename V, typename A>
22+
LIBC_INLINE constexpr V align_up(V val, A align) {
23+
return ((val + V(align) - 1) / V(align)) * V(align);
24+
}
25+
2126
class ArgList {
2227
va_list vlist;
2328

@@ -54,7 +59,34 @@ class MockArgList {
5459
}
5560

5661
template <class T> LIBC_INLINE T next_var() {
57-
++arg_counter;
62+
arg_counter++;
63+
return T(arg_counter);
64+
}
65+
66+
size_t read_count() const { return arg_counter; }
67+
};
68+
69+
// Used by the GPU implementation to parse how many bytes need to be read from
70+
// the variadic argument buffer.
71+
template <bool packed> class DummyArgList {
72+
size_t arg_counter = 0;
73+
74+
public:
75+
LIBC_INLINE DummyArgList() = default;
76+
LIBC_INLINE DummyArgList(va_list) { ; }
77+
LIBC_INLINE DummyArgList(DummyArgList &other) {
78+
arg_counter = other.arg_counter;
79+
}
80+
LIBC_INLINE ~DummyArgList() = default;
81+
82+
LIBC_INLINE DummyArgList &operator=(DummyArgList &rhs) {
83+
arg_counter = rhs.arg_counter;
84+
return *this;
85+
}
86+
87+
template <class T> LIBC_INLINE T next_var() {
88+
arg_counter = packed ? arg_counter + sizeof(T)
89+
: align_up(arg_counter, alignof(T)) + sizeof(T);
5890
return T(arg_counter);
5991
}
6092

@@ -63,7 +95,7 @@ class MockArgList {
6395

6496
// Used for the GPU implementation of `printf`. This models a variadic list as a
6597
// simple array of pointers that are built manually by the implementation.
66-
class StructArgList {
98+
template <bool packed> class StructArgList {
6799
void *ptr;
68100
void *end;
69101

@@ -85,15 +117,18 @@ class StructArgList {
85117
LIBC_INLINE void *get_ptr() const { return ptr; }
86118

87119
template <class T> LIBC_INLINE T next_var() {
88-
ptr = reinterpret_cast<void *>(
89-
((reinterpret_cast<uintptr_t>(ptr) + alignof(T) - 1) / alignof(T)) *
90-
alignof(T));
91-
120+
if (!packed)
121+
ptr = reinterpret_cast<void *>(
122+
align_up(reinterpret_cast<uintptr_t>(ptr), alignof(T)));
92123
if (ptr >= end)
93124
return T(-1);
94125

95-
T val = *reinterpret_cast<T *>(ptr);
96-
ptr = reinterpret_cast<unsigned char *>(ptr) + sizeof(T);
126+
// Memcpy because pointer alignment may be illegal given a packed struct.
127+
T val;
128+
__builtin_memcpy(&val, ptr, sizeof(T));
129+
130+
ptr =
131+
reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(ptr) + sizeof(T));
97132
return val;
98133
}
99134
};

libc/src/gpu/rpc_fprintf.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ int fprintf_impl(::FILE *__restrict file, const char *__restrict format,
2929
}
3030

3131
port.send_n(format, format_size);
32+
port.recv([&](rpc::Buffer *buffer) {
33+
args_size = static_cast<size_t>(buffer->data[0]);
34+
});
3235
port.send_n(args, args_size);
3336

3437
uint32_t ret = 0;
@@ -50,7 +53,7 @@ int fprintf_impl(::FILE *__restrict file, const char *__restrict format,
5053
return ret;
5154
}
5255

53-
// TODO: This is a stand-in function that uses a struct pointer and size in
56+
// TODO: Delete this and port OpenMP to use `printf`.
5457
// place of varargs. Once varargs support is added we will use that to
5558
// implement the real version.
5659
LLVM_LIBC_FUNCTION(int, rpc_fprintf,

libc/src/stdio/CMakeLists.txt

Lines changed: 2 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -159,17 +159,6 @@ add_entrypoint_object(
159159
libc.src.stdio.printf_core.writer
160160
)
161161

162-
add_entrypoint_object(
163-
fprintf
164-
SRCS
165-
fprintf.cpp
166-
HDRS
167-
fprintf.h
168-
DEPENDS
169-
libc.src.__support.arg_list
170-
libc.src.stdio.printf_core.vfprintf_internal
171-
)
172-
173162
add_entrypoint_object(
174163
vsprintf
175164
SRCS
@@ -192,17 +181,6 @@ add_entrypoint_object(
192181
libc.src.stdio.printf_core.writer
193182
)
194183

195-
add_entrypoint_object(
196-
vfprintf
197-
SRCS
198-
vfprintf.cpp
199-
HDRS
200-
vfprintf.h
201-
DEPENDS
202-
libc.src.__support.arg_list
203-
libc.src.stdio.printf_core.vfprintf_internal
204-
)
205-
206184
add_stdio_entrypoint_object(
207185
fileno
208186
SRCS
@@ -261,6 +239,7 @@ add_stdio_entrypoint_object(fputc)
261239
add_stdio_entrypoint_object(putc)
262240
add_stdio_entrypoint_object(putchar)
263241
add_stdio_entrypoint_object(printf)
242+
add_stdio_entrypoint_object(fprintf)
264243
add_stdio_entrypoint_object(fgetc)
265244
add_stdio_entrypoint_object(fgetc_unlocked)
266245
add_stdio_entrypoint_object(getc)
@@ -273,3 +252,4 @@ add_stdio_entrypoint_object(stdin)
273252
add_stdio_entrypoint_object(stdout)
274253
add_stdio_entrypoint_object(stderr)
275254
add_stdio_entrypoint_object(vprintf)
255+
add_stdio_entrypoint_object(vfprintf)

libc/src/stdio/generic/CMakeLists.txt

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,31 @@ add_entrypoint_object(
396396
${printf_deps}
397397
)
398398

399+
add_entrypoint_object(
400+
fprintf
401+
SRCS
402+
fprintf.cpp
403+
HDRS
404+
../fprintf.h
405+
DEPENDS
406+
libc.src.__support.arg_list
407+
libc.src.stdio.printf_core.vfprintf_internal
408+
${printf_deps}
409+
)
410+
411+
add_entrypoint_object(
412+
vfprintf
413+
SRCS
414+
vfprintf.cpp
415+
HDRS
416+
../vfprintf.h
417+
DEPENDS
418+
libc.src.__support.arg_list
419+
libc.src.stdio.printf_core.vfprintf_internal
420+
${printf_deps}
421+
)
422+
423+
399424
add_entrypoint_object(
400425
fgets
401426
SRCS
File renamed without changes.
File renamed without changes.

libc/src/stdio/gpu/CMakeLists.txt

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,14 @@ add_header_library(
1010
.stderr
1111
)
1212

13+
add_header_library(
14+
vfprintf_utils
15+
HDRS
16+
vfprintf_utils.h
17+
DEPENDS
18+
.gpu_file
19+
)
20+
1321
add_entrypoint_object(
1422
feof
1523
SRCS
@@ -273,6 +281,46 @@ add_entrypoint_object(
273281
.gpu_file
274282
)
275283

284+
add_entrypoint_object(
285+
printf
286+
SRCS
287+
printf.cpp
288+
HDRS
289+
../printf.h
290+
DEPENDS
291+
.vfprintf_utils
292+
)
293+
294+
add_entrypoint_object(
295+
vprintf
296+
SRCS
297+
vprintf.cpp
298+
HDRS
299+
../vprintf.h
300+
DEPENDS
301+
.vfprintf_utils
302+
)
303+
304+
add_entrypoint_object(
305+
fprintf
306+
SRCS
307+
fprintf.cpp
308+
HDRS
309+
../fprintf.h
310+
DEPENDS
311+
.vfprintf_utils
312+
)
313+
314+
add_entrypoint_object(
315+
vfprintf
316+
SRCS
317+
vfprintf.cpp
318+
HDRS
319+
../vfprintf.h
320+
DEPENDS
321+
.vfprintf_utils
322+
)
323+
276324
add_entrypoint_object(
277325
stdin
278326
SRCS

libc/src/stdio/gpu/fprintf.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
//===-- GPU Implementation of fprintf -------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "src/stdio/fprintf.h"
10+
11+
#include "src/__support/CPP/string_view.h"
12+
#include "src/__support/arg_list.h"
13+
#include "src/errno/libc_errno.h"
14+
#include "src/stdio/gpu/vfprintf_utils.h"
15+
16+
#include <stdio.h>
17+
18+
namespace LIBC_NAMESPACE {
19+
20+
LLVM_LIBC_FUNCTION(int, fprintf,
21+
(::FILE *__restrict stream, const char *__restrict format,
22+
...)) {
23+
va_list vlist;
24+
va_start(vlist, format);
25+
cpp::string_view str_view(format);
26+
int ret_val = vfprintf_internal(stream, format, str_view.size() + 1, vlist);
27+
va_end(vlist);
28+
return ret_val;
29+
}
30+
31+
} // namespace LIBC_NAMESPACE

libc/src/stdio/gpu/printf.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
//===-- GPU Implementation of printf --------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "src/stdio/printf.h"
10+
11+
#include "src/__support/CPP/string_view.h"
12+
#include "src/__support/arg_list.h"
13+
#include "src/errno/libc_errno.h"
14+
#include "src/stdio/gpu/vfprintf_utils.h"
15+
16+
#include <stdio.h>
17+
18+
namespace LIBC_NAMESPACE {
19+
20+
LLVM_LIBC_FUNCTION(int, printf, (const char *__restrict format, ...)) {
21+
va_list vlist;
22+
va_start(vlist, format);
23+
cpp::string_view str_view(format);
24+
int ret_val = vfprintf_internal(stdout, format, str_view.size() + 1, vlist);
25+
va_end(vlist);
26+
return ret_val;
27+
}
28+
29+
} // namespace LIBC_NAMESPACE

libc/src/stdio/gpu/vfprintf.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
//===-- GPU Implementation of vfprintf ------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "src/stdio/vfprintf.h"
10+
11+
#include "src/__support/CPP/string_view.h"
12+
#include "src/__support/arg_list.h"
13+
#include "src/errno/libc_errno.h"
14+
#include "src/stdio/gpu/vfprintf_utils.h"
15+
16+
#include <stdio.h>
17+
18+
namespace LIBC_NAMESPACE {
19+
20+
LLVM_LIBC_FUNCTION(int, vfprintf,
21+
(::FILE *__restrict stream, const char *__restrict format,
22+
va_list vlist)) {
23+
cpp::string_view str_view(format);
24+
int ret_val = vfprintf_internal(stream, format, str_view.size() + 1, vlist);
25+
return ret_val;
26+
}
27+
28+
} // namespace LIBC_NAMESPACE

0 commit comments

Comments
 (0)