Skip to content

Commit 8e6a550

Browse files
committed
[libc] Implement (v|f)printf on the GPU
Summary: This patch implements the `printf` family of functions on the GPU using the new variadic support. This patch adapts the old handling in the `rpc_fprintf` placeholder, but adds an extra RPC call to get the size of the buffer to copy. This prevents the GPU from needing to parse the string. While it's theoretically possible for the pass to know the size of the struct, it's prohibitively difficult to do while maintaining ABI compatibility with NVIDIA's varargs. Depends on llvm#96015.
1 parent 486d00e commit 8e6a550

File tree

18 files changed

+387
-89
lines changed

18 files changed

+387
-89
lines changed

libc/config/gpu/entrypoints.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,10 @@ set(TARGET_LIBC_ENTRYPOINTS
177177
# stdio.h entrypoints
178178
libc.src.stdio.clearerr
179179
libc.src.stdio.fclose
180+
libc.src.stdio.printf
181+
libc.src.stdio.vprintf
182+
libc.src.stdio.fprintf
183+
libc.src.stdio.vfprintf
180184
libc.src.stdio.sprintf
181185
libc.src.stdio.snprintf
182186
libc.src.stdio.vsprintf

libc/include/llvm-libc-types/rpc_opcodes_t.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ typedef enum {
3434
RPC_PRINTF_TO_STDOUT,
3535
RPC_PRINTF_TO_STDERR,
3636
RPC_PRINTF_TO_STREAM,
37+
RPC_PRINTF_TO_STDOUT_PACKED,
38+
RPC_PRINTF_TO_STDERR_PACKED,
39+
RPC_PRINTF_TO_STREAM_PACKED,
3740
RPC_REMOVE,
3841
RPC_LAST = 0xFFFF,
3942
} rpc_opcode_t;

libc/src/__support/arg_list.h

Lines changed: 43 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@
1919
namespace LIBC_NAMESPACE_DECL {
2020
namespace internal {
2121

22+
template <typename V, typename A>
23+
LIBC_INLINE constexpr V align_up(V val, A align) {
24+
return ((val + V(align) - 1) / V(align)) * V(align);
25+
}
26+
2227
class ArgList {
2328
va_list vlist;
2429

@@ -55,7 +60,34 @@ class MockArgList {
5560
}
5661

5762
template <class T> LIBC_INLINE T next_var() {
58-
++arg_counter;
63+
arg_counter++;
64+
return T(arg_counter);
65+
}
66+
67+
size_t read_count() const { return arg_counter; }
68+
};
69+
70+
// Used by the GPU implementation to parse how many bytes need to be read from
71+
// the variadic argument buffer.
72+
template <bool packed> class DummyArgList {
73+
size_t arg_counter = 0;
74+
75+
public:
76+
LIBC_INLINE DummyArgList() = default;
77+
LIBC_INLINE DummyArgList(va_list) { ; }
78+
LIBC_INLINE DummyArgList(DummyArgList &other) {
79+
arg_counter = other.arg_counter;
80+
}
81+
LIBC_INLINE ~DummyArgList() = default;
82+
83+
LIBC_INLINE DummyArgList &operator=(DummyArgList &rhs) {
84+
arg_counter = rhs.arg_counter;
85+
return *this;
86+
}
87+
88+
template <class T> LIBC_INLINE T next_var() {
89+
arg_counter = packed ? arg_counter + sizeof(T)
90+
: align_up(arg_counter, alignof(T)) + sizeof(T);
5991
return T(arg_counter);
6092
}
6193

@@ -64,7 +96,7 @@ class MockArgList {
6496

6597
// Used for the GPU implementation of `printf`. This models a variadic list as a
6698
// simple array of pointers that are built manually by the implementation.
67-
class StructArgList {
99+
template <bool packed> class StructArgList {
68100
void *ptr;
69101
void *end;
70102

@@ -86,15 +118,18 @@ class StructArgList {
86118
LIBC_INLINE void *get_ptr() const { return ptr; }
87119

88120
template <class T> LIBC_INLINE T next_var() {
89-
ptr = reinterpret_cast<void *>(
90-
((reinterpret_cast<uintptr_t>(ptr) + alignof(T) - 1) / alignof(T)) *
91-
alignof(T));
92-
121+
if (!packed)
122+
ptr = reinterpret_cast<void *>(
123+
align_up(reinterpret_cast<uintptr_t>(ptr), alignof(T)));
93124
if (ptr >= end)
94125
return T(-1);
95126

96-
T val = *reinterpret_cast<T *>(ptr);
97-
ptr = reinterpret_cast<unsigned char *>(ptr) + sizeof(T);
127+
// Memcpy because pointer alignment may be illegal given a packed struct.
128+
T val;
129+
__builtin_memcpy(&val, ptr, sizeof(T));
130+
131+
ptr =
132+
reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(ptr) + sizeof(T));
98133
return val;
99134
}
100135
};

libc/src/gpu/rpc_fprintf.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ int fprintf_impl(::FILE *__restrict file, const char *__restrict format,
3030
}
3131

3232
port.send_n(format, format_size);
33+
port.recv([&](rpc::Buffer *buffer) {
34+
args_size = static_cast<size_t>(buffer->data[0]);
35+
});
3336
port.send_n(args, args_size);
3437

3538
uint32_t ret = 0;
@@ -51,7 +54,7 @@ int fprintf_impl(::FILE *__restrict file, const char *__restrict format,
5154
return ret;
5255
}
5356

54-
// TODO: This is a stand-in function that uses a struct pointer and size in
57+
// TODO: Delete this and port OpenMP to use `printf`.
5558
// place of varargs. Once varargs support is added we will use that to
5659
// implement the real version.
5760
LLVM_LIBC_FUNCTION(int, rpc_fprintf,

libc/src/stdio/CMakeLists.txt

Lines changed: 2 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -163,18 +163,6 @@ add_entrypoint_object(
163163
libc.src.stdio.printf_core.writer
164164
)
165165

166-
add_entrypoint_object(
167-
fprintf
168-
SRCS
169-
fprintf.cpp
170-
HDRS
171-
fprintf.h
172-
DEPENDS
173-
libc.hdr.types.FILE
174-
libc.src.__support.arg_list
175-
libc.src.stdio.printf_core.vfprintf_internal
176-
)
177-
178166
add_entrypoint_object(
179167
vsprintf
180168
SRCS
@@ -197,18 +185,6 @@ add_entrypoint_object(
197185
libc.src.stdio.printf_core.writer
198186
)
199187

200-
add_entrypoint_object(
201-
vfprintf
202-
SRCS
203-
vfprintf.cpp
204-
HDRS
205-
vfprintf.h
206-
DEPENDS
207-
libc.hdr.types.FILE
208-
libc.src.__support.arg_list
209-
libc.src.stdio.printf_core.vfprintf_internal
210-
)
211-
212188
add_subdirectory(printf_core)
213189
add_subdirectory(scanf_core)
214190

@@ -258,6 +234,7 @@ add_stdio_entrypoint_object(fputc)
258234
add_stdio_entrypoint_object(putc)
259235
add_stdio_entrypoint_object(putchar)
260236
add_stdio_entrypoint_object(printf)
237+
add_stdio_entrypoint_object(fprintf)
261238
add_stdio_entrypoint_object(fgetc)
262239
add_stdio_entrypoint_object(fgetc_unlocked)
263240
add_stdio_entrypoint_object(getc)
@@ -270,3 +247,4 @@ add_stdio_entrypoint_object(stdin)
270247
add_stdio_entrypoint_object(stdout)
271248
add_stdio_entrypoint_object(stderr)
272249
add_stdio_entrypoint_object(vprintf)
250+
add_stdio_entrypoint_object(vfprintf)

libc/src/stdio/generic/CMakeLists.txt

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -363,19 +363,6 @@ add_entrypoint_object(
363363
libc.src.__support.File.platform_file
364364
)
365365

366-
list(APPEND printf_deps
367-
libc.src.__support.arg_list
368-
libc.src.stdio.printf_core.vfprintf_internal
369-
)
370-
371-
if(LLVM_LIBC_FULL_BUILD)
372-
list(APPEND printf_deps
373-
libc.src.__support.File.file
374-
libc.src.__support.File.platform_file
375-
libc.src.__support.File.platform_stdout
376-
)
377-
endif()
378-
379366
add_entrypoint_object(
380367
printf
381368
SRCS
@@ -396,6 +383,32 @@ add_entrypoint_object(
396383
${printf_deps}
397384
)
398385

386+
add_entrypoint_object(
387+
fprintf
388+
SRCS
389+
fprintf.cpp
390+
HDRS
391+
../fprintf.h
392+
DEPENDS
393+
libc.hdr.types.FILE
394+
libc.src.__support.arg_list
395+
libc.src.stdio.printf_core.vfprintf_internal
396+
${printf_deps}
397+
)
398+
399+
add_entrypoint_object(
400+
vfprintf
401+
SRCS
402+
vfprintf.cpp
403+
HDRS
404+
../vfprintf.h
405+
DEPENDS
406+
libc.hdr.types.FILE
407+
libc.src.__support.arg_list
408+
libc.src.stdio.printf_core.vfprintf_internal
409+
${printf_deps}
410+
)
411+
399412
add_entrypoint_object(
400413
fgets
401414
SRCS
File renamed without changes.
File renamed without changes.

libc/src/stdio/gpu/CMakeLists.txt

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,14 @@ add_header_library(
1111
.stderr
1212
)
1313

14+
add_header_library(
15+
vfprintf_utils
16+
HDRS
17+
vfprintf_utils.h
18+
DEPENDS
19+
.gpu_file
20+
)
21+
1422
add_entrypoint_object(
1523
feof
1624
SRCS
@@ -246,6 +254,46 @@ add_entrypoint_object(
246254
.gpu_file
247255
)
248256

257+
add_entrypoint_object(
258+
printf
259+
SRCS
260+
printf.cpp
261+
HDRS
262+
../printf.h
263+
DEPENDS
264+
.vfprintf_utils
265+
)
266+
267+
add_entrypoint_object(
268+
vprintf
269+
SRCS
270+
vprintf.cpp
271+
HDRS
272+
../vprintf.h
273+
DEPENDS
274+
.vfprintf_utils
275+
)
276+
277+
add_entrypoint_object(
278+
fprintf
279+
SRCS
280+
fprintf.cpp
281+
HDRS
282+
../fprintf.h
283+
DEPENDS
284+
.vfprintf_utils
285+
)
286+
287+
add_entrypoint_object(
288+
vfprintf
289+
SRCS
290+
vfprintf.cpp
291+
HDRS
292+
../vfprintf.h
293+
DEPENDS
294+
.vfprintf_utils
295+
)
296+
249297
add_entrypoint_object(
250298
stdin
251299
SRCS

libc/src/stdio/gpu/fprintf.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
//===-- GPU Implementation of fprintf -------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "src/stdio/fprintf.h"
10+
11+
#include "src/__support/CPP/string_view.h"
12+
#include "src/__support/arg_list.h"
13+
#include "src/errno/libc_errno.h"
14+
#include "src/stdio/gpu/vfprintf_utils.h"
15+
16+
#include <stdio.h>
17+
18+
namespace LIBC_NAMESPACE {
19+
20+
LLVM_LIBC_FUNCTION(int, fprintf,
21+
(::FILE *__restrict stream, const char *__restrict format,
22+
...)) {
23+
va_list vlist;
24+
va_start(vlist, format);
25+
cpp::string_view str_view(format);
26+
int ret_val = vfprintf_internal(stream, format, str_view.size() + 1, vlist);
27+
va_end(vlist);
28+
return ret_val;
29+
}
30+
31+
} // namespace LIBC_NAMESPACE

libc/src/stdio/gpu/printf.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
//===-- GPU Implementation of printf --------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "src/stdio/printf.h"
10+
11+
#include "src/__support/CPP/string_view.h"
12+
#include "src/__support/arg_list.h"
13+
#include "src/errno/libc_errno.h"
14+
#include "src/stdio/gpu/vfprintf_utils.h"
15+
16+
#include <stdio.h>
17+
18+
namespace LIBC_NAMESPACE {
19+
20+
LLVM_LIBC_FUNCTION(int, printf, (const char *__restrict format, ...)) {
21+
va_list vlist;
22+
va_start(vlist, format);
23+
cpp::string_view str_view(format);
24+
int ret_val = vfprintf_internal(stdout, format, str_view.size() + 1, vlist);
25+
va_end(vlist);
26+
return ret_val;
27+
}
28+
29+
} // namespace LIBC_NAMESPACE

libc/src/stdio/gpu/vfprintf.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
//===-- GPU Implementation of vfprintf ------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "src/stdio/vfprintf.h"
10+
11+
#include "src/__support/CPP/string_view.h"
12+
#include "src/__support/arg_list.h"
13+
#include "src/errno/libc_errno.h"
14+
#include "src/stdio/gpu/vfprintf_utils.h"
15+
16+
#include <stdio.h>
17+
18+
namespace LIBC_NAMESPACE {
19+
20+
LLVM_LIBC_FUNCTION(int, vfprintf,
21+
(::FILE *__restrict stream, const char *__restrict format,
22+
va_list vlist)) {
23+
cpp::string_view str_view(format);
24+
int ret_val = vfprintf_internal(stream, format, str_view.size() + 1, vlist);
25+
return ret_val;
26+
}
27+
28+
} // namespace LIBC_NAMESPACE

0 commit comments

Comments
 (0)