7
7
#include " src/__support/GPU/utils.h"
8
8
#include " src/__support/fixedvector.h"
9
9
#include " src/__support/macros/config.h"
10
+ #include " src/stdio/printf.h"
10
11
#include " src/time/gpu/time_utils.h"
11
12
12
13
namespace LIBC_NAMESPACE_DECL {
@@ -73,10 +74,16 @@ struct AtomicBenchmarkSums {
73
74
};
74
75
75
76
AtomicBenchmarkSums all_results;
77
+ const char *header_format_string =
78
+ " Benchmark | Cycles | Min | Max | Iterations | "
79
+ " Time | Stddev | Threads |\n " ;
80
+ const char *output_format_string =
81
+ " %-20s |%8ld |%8ld |%8ld |%11ld |%9ld %2s |%9ld |%9d |\n " ;
82
+
83
+ constexpr auto GREEN = " \033 [32m" ;
84
+ constexpr auto RESET = " \033 [0m" ;
76
85
77
86
void print_results (Benchmark *b) {
78
- constexpr auto GREEN = " \033 [32m" ;
79
- constexpr auto RESET = " \033 [0m" ;
80
87
81
88
BenchmarkResult result;
82
89
cpp::atomic_thread_fence (cpp::MemoryOrder::RELEASE);
@@ -92,21 +99,51 @@ void print_results(Benchmark *b) {
92
99
all_results.samples_sum .load (cpp::MemoryOrder::RELAXED) / num_threads;
93
100
result.total_iterations =
94
101
all_results.iterations_sum .load (cpp::MemoryOrder::RELAXED) / num_threads;
95
- result. total_time =
102
+ const uint64_t duration_ns =
96
103
all_results.time_sum .load (cpp::MemoryOrder::RELAXED) / num_threads;
104
+ const uint64_t duration_us = duration_ns / 1000 ;
105
+ const uint64_t duration_ms = duration_ns / (1000 * 1000 );
106
+ uint64_t converted_duration = duration_ns;
107
+ cpp::string time_unit;
108
+ if (duration_ms != 0 ) {
109
+ converted_duration = duration_ms;
110
+ time_unit = cpp::string (" ms" );
111
+ } else if (duration_us != 0 ) {
112
+ converted_duration = duration_us;
113
+ time_unit = cpp::string (" us" );
114
+ } else {
115
+ converted_duration = duration_ns;
116
+ time_unit = cpp::string (" ns" );
117
+ }
118
+ result.total_time = converted_duration;
119
+ // result.total_time =
120
+ // all_results.time_sum.load(cpp::MemoryOrder::RELAXED) / num_threads;
97
121
cpp::atomic_thread_fence (cpp::MemoryOrder::RELEASE);
98
122
99
- log << GREEN << " [ RUN ] " << RESET << b->get_name () << ' \n ' ;
100
- log << GREEN << " [ OK ] " << RESET << b->get_name () << " : "
101
- << result.cycles << " cycles, " << result.min << " min, " << result.max
102
- << " max, " << result.total_iterations << " iterations, "
103
- << result.total_time << " ns, "
104
- << static_cast <uint64_t >(result.standard_deviation )
105
- << " stddev (num threads: " << num_threads << " )\n " ;
123
+ LIBC_NAMESPACE::printf (
124
+ output_format_string, b->get_test_name ().data (), result.cycles ,
125
+ result.min , result.max , result.total_iterations , result.total_time ,
126
+ time_unit.data (), static_cast <uint64_t >(result.standard_deviation ),
127
+ num_threads);
128
+ }
129
+
130
+ void print_header () {
131
+ LIBC_NAMESPACE::printf (" %s" , GREEN);
132
+ LIBC_NAMESPACE::printf (" Running Suite: %-10s\n " ,
133
+ benchmarks[0 ]->get_suite_name ().data ());
134
+ LIBC_NAMESPACE::printf (" %s" , RESET);
135
+ LIBC_NAMESPACE::printf (header_format_string);
136
+ LIBC_NAMESPACE::printf (
137
+ " ---------------------------------------------------------------------"
138
+ " --------------------------------\n " );
106
139
}
107
140
108
141
void Benchmark::run_benchmarks () {
109
142
uint64_t id = gpu::get_thread_id ();
143
+
144
+ if (id == 0 )
145
+ print_header ();
146
+
110
147
gpu::sync_threads ();
111
148
112
149
for (Benchmark *b : benchmarks) {
0 commit comments