-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[libc] NVPTX Profiling #92009
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[libc] NVPTX Profiling #92009
Conversation
@llvm/pr-subscribers-libc Author: None (jameshu15869) ChangesDraft PR for adding microbenchmarking infrastructure for NVPTX. Patch is 38.98 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/92009.diff 16 Files Affected:
diff --git a/libc/benchmarks/CMakeLists.txt b/libc/benchmarks/CMakeLists.txt
index 4978da65850cc..a802e653a091e 100644
--- a/libc/benchmarks/CMakeLists.txt
+++ b/libc/benchmarks/CMakeLists.txt
@@ -1,205 +1,211 @@
-find_package(Threads)
-
-set(LLVM_LINK_COMPONENTS
- Support
- TargetParser
- )
-
-#==============================================================================
-# Add Unit Testing Support
-#==============================================================================
-
-function(add_libc_benchmark_unittest target_name)
- if(NOT LLVM_INCLUDE_TESTS)
- return()
- endif()
-
- cmake_parse_arguments(
- "LIBC_BENCHMARKS_UNITTEST"
- "" # No optional arguments
- "SUITE" # Single value arguments
- "SRCS;DEPENDS" # Multi-value arguments
- ${ARGN}
- )
-
- add_executable(${target_name}
- EXCLUDE_FROM_ALL
- ${LIBC_BENCHMARKS_UNITTEST_SRCS}
- )
- target_link_libraries(${target_name}
- PRIVATE
- llvm_gtest_main
- llvm_gtest
- ${LIBC_BENCHMARKS_UNITTEST_DEPENDS}
- )
- llvm_update_compile_flags(${target_name})
-
- add_custom_command(
- TARGET ${target_name}
- POST_BUILD
- COMMAND $<TARGET_FILE:${target_name}>
- )
- add_dependencies(libc-benchmark-util-tests ${target_name})
-endfunction()
-
-#==============================================================================
-# Build Google Benchmark for libc
-#==============================================================================
-
-include(ExternalProject)
-ExternalProject_Add(google-benchmark-libc
- EXCLUDE_FROM_ALL ON
- PREFIX google-benchmark-libc
- SOURCE_DIR ${LLVM_THIRD_PARTY_DIR}/benchmark
- INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/google-benchmark-libc
- CMAKE_CACHE_ARGS
- -DBENCHMARK_ENABLE_EXCEPTIONS:BOOL=OFF
- -DBENCHMARK_ENABLE_LTO:BOOL=OFF
- -DBENCHMARK_ENABLE_TESTING:BOOL=OFF
- -DBENCHMARK_ENABLE_WERROR:BOOL=${LLVM_ENABLE_WERROR}
- -DBENCHMARK_FORCE_WERROR:BOOL=OFF
- -DBENCHMARK_USE_LIBCXX:BOOL=OFF
- -DCMAKE_BUILD_TYPE:STRING=Release
-
- -DCMAKE_SYSTEM_NAME:STRING=${CMAKE_SYSTEM_NAME}
- -DCMAKE_SYSTEM_PROCESSOR:STRING=${CMAKE_SYSTEM_PROCESSOR}
- -DCMAKE_C_COMPILER:STRING=${CMAKE_C_COMPILER}
- -DCMAKE_CXX_COMPILER:STRING=${CMAKE_CXX_COMPILER}
- -DCMAKE_CXX_FLAGS:STRING=${CMAKE_CXX_FLAGS}
- -DCMAKE_FIND_ROOT_PATH:STRING=${CMAKE_FIND_ROOT_PATH}
-
- -DBUILD_SHARED_LIBS:BOOL=OFF
- -DCMAKE_EXE_LINKER_FLAGS:STRING=-static
-
- -DCMAKE_CXX_STANDARD:STRING=14
- -DCMAKE_INSTALL_PREFIX:PATH=<INSTALL_DIR>
- )
-
-add_custom_target(libc-benchmark-util-tests)
-
-# libc-benchmark
-add_library(libc-benchmark
- STATIC
- EXCLUDE_FROM_ALL
- LibcBenchmark.cpp
- LibcBenchmark.h
-)
-
-target_include_directories(libc-benchmark
- PUBLIC ${LLVM_INCLUDE_DIR} ${LLVM_MAIN_INCLUDE_DIR}
-)
-target_link_libraries(libc-benchmark
- PUBLIC
- benchmark::benchmark
- LLVMSupport
- LLVMTargetParser
- Threads::Threads
-)
-add_dependencies(libc-benchmark google-benchmark-libc)
-llvm_update_compile_flags(libc-benchmark)
-
-add_libc_benchmark_unittest(libc-benchmark-test
- SRCS LibcBenchmarkTest.cpp
- DEPENDS libc-benchmark
-)
-
-# libc-memory-benchmark
-add_library(libc-memory-benchmark
- STATIC
- EXCLUDE_FROM_ALL
- LibcMemoryBenchmark.cpp
- LibcMemoryBenchmark.h
- LibcFunctionPrototypes.h
- MemorySizeDistributions.cpp
- MemorySizeDistributions.h
-)
-target_include_directories(libc-memory-benchmark
- PUBLIC
- ${CMAKE_CURRENT_SOURCE_DIR}
-)
-target_link_libraries(libc-memory-benchmark
- PUBLIC
- libc-benchmark
-)
-llvm_update_compile_flags(libc-memory-benchmark)
-
-add_libc_benchmark_unittest(libc-memory-benchmark-test
- SRCS LibcMemoryBenchmarkTest.cpp
- DEPENDS libc-memory-benchmark
-)
-
-# json
-add_library(json
- STATIC
- EXCLUDE_FROM_ALL
- JSON.cpp
- JSON.h
-)
-target_link_libraries(json PUBLIC libc-memory-benchmark)
-llvm_update_compile_flags(json)
-
-add_libc_benchmark_unittest(json-test
- SRCS JSONTest.cpp
- DEPENDS json
-)
-
-#==============================================================================
-# Benchmarking tool
-#==============================================================================
-
-# Benchmark all implementations that can run on the target CPU.
-function(add_libc_multi_impl_benchmark name)
- get_property(fq_implementations GLOBAL PROPERTY ${name}_implementations)
- foreach(fq_config_name IN LISTS fq_implementations)
- get_target_property(required_cpu_features ${fq_config_name} REQUIRE_CPU_FEATURES)
- cpu_supports(can_run "${required_cpu_features}")
- if(can_run)
- set(benchmark_name ${fq_config_name}_benchmark)
- add_executable(${benchmark_name}
- EXCLUDE_FROM_ALL
- LibcMemoryBenchmarkMain.cpp
- )
- get_target_property(entrypoint_object_file ${fq_config_name} "OBJECT_FILE_RAW")
- target_link_libraries(${benchmark_name} PUBLIC json ${entrypoint_object_file})
- string(TOUPPER ${name} name_upper)
- target_compile_definitions(${benchmark_name} PRIVATE "-DLIBC_BENCHMARK_FUNCTION_${name_upper}=LIBC_NAMESPACE::${name}" "-DLIBC_BENCHMARK_FUNCTION_NAME=\"${fq_config_name}\"")
- llvm_update_compile_flags(${benchmark_name})
- else()
- message(STATUS "Skipping benchmark for '${fq_config_name}' insufficient host cpu features '${required_cpu_features}'")
- endif()
- endforeach()
-endfunction()
-
-add_libc_multi_impl_benchmark(bcmp)
-add_libc_multi_impl_benchmark(bzero)
-add_libc_multi_impl_benchmark(memcmp)
-add_libc_multi_impl_benchmark(memcpy)
-add_libc_multi_impl_benchmark(memmove)
-add_libc_multi_impl_benchmark(memset)
-
-#==============================================================================
-# Google Benchmarking tool
-#==============================================================================
-
-# This target uses the Google Benchmark facility to report throughput for llvm
-# libc memory functions compiled for the host machine. This is useful to
-# continuously monitor the performance of the memory functions.
-add_executable(libc.benchmarks.memory_functions.opt_host
- EXCLUDE_FROM_ALL
- LibcMemoryGoogleBenchmarkMain.cpp
- LibcDefaultImplementations.cpp
-)
-target_link_libraries(libc.benchmarks.memory_functions.opt_host
- PRIVATE
- libc-memory-benchmark
- libc.src.string.memcmp_opt_host.__internal__
- libc.src.string.bcmp_opt_host.__internal__
- libc.src.string.memcpy_opt_host.__internal__
- libc.src.string.memset_opt_host.__internal__
- libc.src.string.bzero_opt_host.__internal__
- libc.src.string.memmove_opt_host.__internal__
- benchmark_main
-)
-llvm_update_compile_flags(libc.benchmarks.memory_functions.opt_host)
-
-add_subdirectory(automemcpy)
+if(NOT LIBC_TARGET_OS_IS_GPU)
+ find_package(Threads)
+
+ set(LLVM_LINK_COMPONENTS
+ Support
+ TargetParser
+ )
+
+ #==============================================================================
+ # Add Unit Testing Support
+ #==============================================================================
+
+ function(add_libc_benchmark_unittest target_name)
+ if(NOT LLVM_INCLUDE_TESTS)
+ return()
+ endif()
+
+ cmake_parse_arguments(
+ "LIBC_BENCHMARKS_UNITTEST"
+ "" # No optional arguments
+ "SUITE" # Single value arguments
+ "SRCS;DEPENDS" # Multi-value arguments
+ ${ARGN}
+ )
+
+ add_executable(${target_name}
+ EXCLUDE_FROM_ALL
+ ${LIBC_BENCHMARKS_UNITTEST_SRCS}
+ )
+ target_link_libraries(${target_name}
+ PRIVATE
+ llvm_gtest_main
+ llvm_gtest
+ ${LIBC_BENCHMARKS_UNITTEST_DEPENDS}
+ )
+ llvm_update_compile_flags(${target_name})
+
+ add_custom_command(
+ TARGET ${target_name}
+ POST_BUILD
+ COMMAND $<TARGET_FILE:${target_name}>
+ )
+ add_dependencies(libc-benchmark-util-tests ${target_name})
+ endfunction()
+
+ #==============================================================================
+ # Build Google Benchmark for libc
+ #==============================================================================
+
+ include(ExternalProject)
+ ExternalProject_Add(google-benchmark-libc
+ EXCLUDE_FROM_ALL ON
+ PREFIX google-benchmark-libc
+ SOURCE_DIR ${LLVM_THIRD_PARTY_DIR}/benchmark
+ INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/google-benchmark-libc
+ CMAKE_CACHE_ARGS
+ -DBENCHMARK_ENABLE_EXCEPTIONS:BOOL=OFF
+ -DBENCHMARK_ENABLE_LTO:BOOL=OFF
+ -DBENCHMARK_ENABLE_TESTING:BOOL=OFF
+ -DBENCHMARK_ENABLE_WERROR:BOOL=${LLVM_ENABLE_WERROR}
+ -DBENCHMARK_FORCE_WERROR:BOOL=OFF
+ -DBENCHMARK_USE_LIBCXX:BOOL=OFF
+ -DCMAKE_BUILD_TYPE:STRING=Release
+
+ -DCMAKE_SYSTEM_NAME:STRING=${CMAKE_SYSTEM_NAME}
+ -DCMAKE_SYSTEM_PROCESSOR:STRING=${CMAKE_SYSTEM_PROCESSOR}
+ -DCMAKE_C_COMPILER:STRING=${CMAKE_C_COMPILER}
+ -DCMAKE_CXX_COMPILER:STRING=${CMAKE_CXX_COMPILER}
+ -DCMAKE_CXX_FLAGS:STRING=${CMAKE_CXX_FLAGS}
+ -DCMAKE_FIND_ROOT_PATH:STRING=${CMAKE_FIND_ROOT_PATH}
+
+ -DBUILD_SHARED_LIBS:BOOL=OFF
+ -DCMAKE_EXE_LINKER_FLAGS:STRING=-static
+
+ -DCMAKE_CXX_STANDARD:STRING=14
+ -DCMAKE_INSTALL_PREFIX:PATH=<INSTALL_DIR>
+ )
+
+ add_custom_target(libc-benchmark-util-tests)
+
+ # libc-benchmark
+ add_library(libc-benchmark
+ STATIC
+ EXCLUDE_FROM_ALL
+ LibcBenchmark.cpp
+ LibcBenchmark.h
+ )
+
+ target_include_directories(libc-benchmark
+ PUBLIC ${LLVM_INCLUDE_DIR} ${LLVM_MAIN_INCLUDE_DIR}
+ )
+ target_link_libraries(libc-benchmark
+ PUBLIC
+ benchmark::benchmark
+ LLVMSupport
+ LLVMTargetParser
+ Threads::Threads
+ )
+ add_dependencies(libc-benchmark google-benchmark-libc)
+ llvm_update_compile_flags(libc-benchmark)
+
+ add_libc_benchmark_unittest(libc-benchmark-test
+ SRCS LibcBenchmarkTest.cpp
+ DEPENDS libc-benchmark
+ )
+
+ # libc-memory-benchmark
+ add_library(libc-memory-benchmark
+ STATIC
+ EXCLUDE_FROM_ALL
+ LibcMemoryBenchmark.cpp
+ LibcMemoryBenchmark.h
+ LibcFunctionPrototypes.h
+ MemorySizeDistributions.cpp
+ MemorySizeDistributions.h
+ )
+ target_include_directories(libc-memory-benchmark
+ PUBLIC
+ ${CMAKE_CURRENT_SOURCE_DIR}
+ )
+ target_link_libraries(libc-memory-benchmark
+ PUBLIC
+ libc-benchmark
+ )
+ llvm_update_compile_flags(libc-memory-benchmark)
+
+ add_libc_benchmark_unittest(libc-memory-benchmark-test
+ SRCS LibcMemoryBenchmarkTest.cpp
+ DEPENDS libc-memory-benchmark
+ )
+
+ # json
+ add_library(json
+ STATIC
+ EXCLUDE_FROM_ALL
+ JSON.cpp
+ JSON.h
+ )
+ target_link_libraries(json PUBLIC libc-memory-benchmark)
+ llvm_update_compile_flags(json)
+
+ add_libc_benchmark_unittest(json-test
+ SRCS JSONTest.cpp
+ DEPENDS json
+ )
+
+ #==============================================================================
+ # Benchmarking tool
+ #==============================================================================
+
+ # Benchmark all implementations that can run on the target CPU.
+ function(add_libc_multi_impl_benchmark name)
+ get_property(fq_implementations GLOBAL PROPERTY ${name}_implementations)
+ foreach(fq_config_name IN LISTS fq_implementations)
+ get_target_property(required_cpu_features ${fq_config_name} REQUIRE_CPU_FEATURES)
+ cpu_supports(can_run "${required_cpu_features}")
+ if(can_run)
+ set(benchmark_name ${fq_config_name}_benchmark)
+ add_executable(${benchmark_name}
+ EXCLUDE_FROM_ALL
+ LibcMemoryBenchmarkMain.cpp
+ )
+ get_target_property(entrypoint_object_file ${fq_config_name} "OBJECT_FILE_RAW")
+ target_link_libraries(${benchmark_name} PUBLIC json ${entrypoint_object_file})
+ string(TOUPPER ${name} name_upper)
+ target_compile_definitions(${benchmark_name} PRIVATE "-DLIBC_BENCHMARK_FUNCTION_${name_upper}=LIBC_NAMESPACE::${name}" "-DLIBC_BENCHMARK_FUNCTION_NAME=\"${fq_config_name}\"")
+ llvm_update_compile_flags(${benchmark_name})
+ else()
+ message(STATUS "Skipping benchmark for '${fq_config_name}' insufficient host cpu features '${required_cpu_features}'")
+ endif()
+ endforeach()
+ endfunction()
+
+ add_libc_multi_impl_benchmark(bcmp)
+ add_libc_multi_impl_benchmark(bzero)
+ add_libc_multi_impl_benchmark(memcmp)
+ add_libc_multi_impl_benchmark(memcpy)
+ add_libc_multi_impl_benchmark(memmove)
+ add_libc_multi_impl_benchmark(memset)
+
+ #==============================================================================
+ # Google Benchmarking tool
+ #==============================================================================
+
+ # This target uses the Google Benchmark facility to report throughput for llvm
+ # libc memory functions compiled for the host machine. This is useful to
+ # continuously monitor the performance of the memory functions.
+ add_executable(libc.benchmarks.memory_functions.opt_host
+ EXCLUDE_FROM_ALL
+ LibcMemoryGoogleBenchmarkMain.cpp
+ LibcDefaultImplementations.cpp
+ )
+ target_link_libraries(libc.benchmarks.memory_functions.opt_host
+ PRIVATE
+ libc-memory-benchmark
+ libc.src.string.memcmp_opt_host.__internal__
+ libc.src.string.bcmp_opt_host.__internal__
+ libc.src.string.memcpy_opt_host.__internal__
+ libc.src.string.memset_opt_host.__internal__
+ libc.src.string.bzero_opt_host.__internal__
+ libc.src.string.memmove_opt_host.__internal__
+ benchmark_main
+ )
+ llvm_update_compile_flags(libc.benchmarks.memory_functions.opt_host)
+
+ add_subdirectory(automemcpy)
+endif()
+
+if(LIBC_TARGET_OS_IS_GPU)
+ add_subdirectory(gpu)
+endif()
diff --git a/libc/benchmarks/gpu/BenchmarkLogger.cpp b/libc/benchmarks/gpu/BenchmarkLogger.cpp
new file mode 100644
index 0000000000000..94a0d897c9585
--- /dev/null
+++ b/libc/benchmarks/gpu/BenchmarkLogger.cpp
@@ -0,0 +1,89 @@
+#include "benchmarks/gpu/BenchmarkLogger.h"
+#include "src/__support/CPP/string.h"
+#include "src/__support/CPP/string_view.h"
+#include "src/__support/OSUtil/io.h" // write_to_stderr
+#include "src/__support/big_int.h" // is_big_int
+#include "src/__support/macros/properties/types.h" // LIBC_TYPES_HAS_INT128
+#include "src/__support/uint128.h"
+
+#include <stdint.h>
+
+namespace LIBC_NAMESPACE {
+namespace libc_gpu_benchmarks {
+
+// cpp::string_view specialization
+template <>
+BenchmarkLogger &BenchmarkLogger::operator<< <cpp::string_view>(cpp::string_view str) {
+ LIBC_NAMESPACE::write_to_stderr(str);
+ return *this;
+}
+
+// cpp::string specialization
+template <> BenchmarkLogger &BenchmarkLogger::operator<< <cpp::string>(cpp::string str) {
+ return *this << static_cast<cpp::string_view>(str);
+}
+
+// const char* specialization
+template <> BenchmarkLogger &BenchmarkLogger::operator<< <const char *>(const char *str) {
+ return *this << cpp::string_view(str);
+}
+
+// char* specialization
+template <> BenchmarkLogger &BenchmarkLogger::operator<< <char *>(char *str) {
+ return *this << cpp::string_view(str);
+}
+
+// char specialization
+template <> BenchmarkLogger &BenchmarkLogger::operator<<(char ch) {
+ return *this << cpp::string_view(&ch, 1);
+}
+
+// bool specialization
+template <> BenchmarkLogger &BenchmarkLogger::operator<<(bool cond) {
+ return *this << (cond ? "true" : "false");
+}
+
+// void * specialization
+template <> BenchmarkLogger &BenchmarkLogger::operator<<(void *addr) {
+ return *this << "0x" << cpp::to_string(reinterpret_cast<uintptr_t>(addr));
+}
+
+template <typename T> BenchmarkLogger &BenchmarkLogger::operator<<(T t) {
+ if constexpr (is_big_int_v<T> ||
+ (cpp::is_integral_v<T> && cpp::is_unsigned_v<T> &&
+ (sizeof(T) > sizeof(uint64_t)))) {
+ static_assert(sizeof(T) % 8 == 0, "Unsupported size of UInt");
+ const IntegerToString<T, radix::Hex::WithPrefix> buffer(t);
+ return *this << buffer.view();
+ } else {
+ return *this << cpp::to_string(t);
+ }
+}
+
+// is_integral specializations
+// char is already specialized to handle character
+template BenchmarkLogger &BenchmarkLogger::operator<< <short>(short);
+template BenchmarkLogger &BenchmarkLogger::operator<< <int>(int);
+template BenchmarkLogger &BenchmarkLogger::operator<< <long>(long);
+template BenchmarkLogger &BenchmarkLogger::operator<< <long long>(long long);
+template BenchmarkLogger &BenchmarkLogger::operator<< <unsigned char>(unsigned char);
+template BenchmarkLogger &BenchmarkLogger::operator<< <unsigned short>(unsigned short);
+template BenchmarkLogger &BenchmarkLogger::operator<< <unsigned int>(unsigned int);
+template BenchmarkLogger &BenchmarkLogger::operator<< <unsigned long>(unsigned long);
+template BenchmarkLogger &
+ BenchmarkLogger::operator<< <unsigned long long>(unsigned long long);
+
+#ifdef LIBC_TYPES_HAS_INT128
+template BenchmarkLogger &BenchmarkLogger::operator<< <__uint128_t>(__uint128_t);
+#endif // LIBC_TYPES_HAS_INT128
+template BenchmarkLogger &BenchmarkLogger::operator<< <UInt<128>>(UInt<128>);
+template BenchmarkLogger &BenchmarkLogger::operator<< <UInt<192>>(UInt<192>);
+template BenchmarkLogger &BenchmarkLogger::operator<< <UInt<256>>(UInt<256>);
+template BenchmarkLogger &BenchmarkLogger::operator<< <UInt<320>>(UInt<320>);
+
+// TODO: Add floating point formatting once it's supported by StringStream.
+
+BenchmarkLogger blog;
+
+} // namespace libc_gpu_benchmarks
+} // namespace LIBC_NAMESPACE
diff --git a/libc/benchmarks/gpu/BenchmarkLogger.h b/libc/benchmarks/gpu/BenchmarkLogger.h
new file mode 100644
index 0000000000000..ed3cc97e59c6d
--- /dev/null
+++ b/libc/benchmarks/gpu/BenchmarkLogger.h
@@ -0,0 +1,27 @@
+//===-- Utilities to log to standard output during tests --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_BENCHMARKS_GPU_BENCHMARKLOGGER_H
+#define LLVM_LIBC_BENCHMARKS_GPU_BENCHMARKLOGGER_H
+
+namespace LIBC_NAMESPACE {
+namespace libc_gpu_benchmarks {
+
+// A class to log to standard output in the context of hermetic tests.
+struct BenchmarkLogger {
+ constexpr BenchmarkLogger() = default;
+ template <typename T> BenchmarkLogger &operator<<(T);
+};
+
+// A global TestLogger instance to be used in tests.
+extern BenchmarkLogger blog;
+
+} // namespace libc_gpu_benchmarks
+} // namespace LIBC_NAMESPACE
+
+#endif /* LLVM_LIBC_BENCHMARKS_GPU_BENCHMARKLOGGER_H */
diff --git a/libc/benchmarks/gpu/CMakeLists.txt b/libc/benchmarks/gpu/CMakeLists.txt
new file mode 100644
index 0000000000000..a18be27e33573
--- /dev/null
+++ b/libc/benchmarks/gpu/CMakeLists.txt
@@ -0,0 +1,183 @@
+add_subdirectory(timing)
+
+add_custom_target(gpu-benchmark)
+
+function (add_gpu_benchmark test_name)
+ if(NOT TARGET libc.startup.${LIBC_TARGET_OS}.crt1)
+ message(VERBOSE "Skipping ${fq_target_name} as it is not available on ${LIBC_TARGET_OS}.")
+ return()
+ endif()
+
+ cmake_parse_arguments(
+ "GPU_BENCHMARK"
+ "" # No optional arguments
+ "SUITE" # Single value arguments
+ "SRCS;HDRS;DEPENDS;ARGS;ENV;COMPILE_OPTIONS;LINK_LIBRARIES;LOADER_ARGS" # Multi-value arguments
+ ${ARGN}
+ )
+
+ if(NOT GPU_BENCHMARK_SUITE)
+ message(FATAL_ERROR "SUITE not specified for ${fq_target_name}")
+ endif()
+ if(NOT GPU_BENCHMARK_SRCS)
+ message(FATAL_ERROR "The SRCS list for add_gpu_benchmark is missing.")
+ endif()
+
+ get_fq_target_name(${test_name} fq_target_name)
+ get_fq_target_name(${test_name}.libc fq_libc_target_name) # Stores the compiled libc + infrastructure archive to link in
+ get_fq_deps_list(fq_deps_list ${GPU_BENCHMARK_DEPENDS})
+ list(APPEND fq_deps_list
+ # Hermetic tests use the platform's startup object. So, their deps also
+ # have to be collected.
+ libc.startup.${LIBC_TARGET_OS}.crt1
+ # We always add the memory functions objects. This is because the
+ # compiler's codegen can emit calls to the C memory functions.
+ libc.src.string.bcmp
+ libc.src.string.bzero
+ libc.src.string.memcmp
+ libc.src.string.memcpy
+ libc.src.string.memmove
+ libc.src.string.memset
+ libc.src.__support.StringUtil.error_to_string
+ )
+
+ list(REMOVE_DUPLICATES fq_deps_list)
+
+ # TODO: Instead of gathering internal object files from entrypoints,
+ # collect ...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
79772aa
to
e55e9b6
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks! A few passing comments, I'll look into the benchmark in more detail later.
void Run() override { | ||
BenchmarkOptions Options; | ||
auto result = benchmark(Options, Func); | ||
constexpr auto GREEN = "\033[32m"; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Keep in mind that this will need to handle multiple thread sin the future, but I think we can safely restrict this to a single block just to make it easier to manage.
c87222c
to
87a5002
Compare
87a5002
to
087290b
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Some final nits, looking pretty good overall.
break; | ||
} | ||
|
||
iterations *= options.scaling_factor; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit, could probably move this to the for loop.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
which for loop are you referring to? I think my original intent was to increase the number of iterations executed per sample after each sample (i.e. group of iterations) executes. Do you mean moving it to the for loop on line 47?
8951ce6
to
4e6fdfc
Compare
I'm pretty sure the CI fail is caused by my wrong implementation of iterators. Would you be able to review the iterator implementation for anything that seems blatantly wrong? I was a little confused by the implementation of the |
Can you make the iterators a separate patch? In any case, the iterators should be present in the Also might be worth rebasing your branch on main. |
Make a new PR and send it to me and @gchatelet to review? |
37f1ad5
to
9c47039
Compare
9c47039
to
e777a40
Compare
- Implements forward iterators for `cpp::fixed_vector` to use in #92009
This reverts commit a5ebf57f198cd79be132854b036f904c3983341d.
e777a40
to
4aa5e8b
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is looking pretty good. I'll try some local tests later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this looks good as an initial step. Other libc
contributors PTAL if you have any concerns.
@jameshu15869 change the title to reflect its non-draft status.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Minor nit about asm volatile
, but LGTM otherwise.
PR for adding microbenchmarking infrastructure for NVPTX. `nvlink` cannot perform LTO, so we cannot inline `libc` functions and this function call overhead is not adjusted for during microbenchmarking.
PR for adding microbenchmarking infrastructure for NVPTX. `nvlink` cannot perform LTO, so we cannot inline `libc` functions and this function call overhead is not adjusted for during microbenchmarking.
PR for adding microbenchmarking infrastructure for NVPTX.
nvlink
cannot perform LTO, so we cannot inlinelibc
functions and this function call overhead is not adjusted for during microbenchmarking.