tests: benchmarks: wait_queues: Record metrics

Enhance the wait_queue benchmark to output resulting summary
metrics as records, so when it runs with Twister the results
are parsed and saved into twister.json and recording.csv files
for further analysis.
Minor documentation edits and make ClangFormat happy.

Signed-off-by: Dmitrii Golovanov <dmitrii.golovanov@intel.com>
This commit is contained in:
Dmitrii Golovanov 2024-12-17 17:56:01 +01:00 committed by Benjamin Cabé
commit 33a91daf3b
4 changed files with 64 additions and 36 deletions

View file

@ -21,6 +21,13 @@ config BENCHMARK_NUM_THREADS
stress on the wait queues and better highlight the performance
differences as the number of threads in the wait queue changes.
config BENCHMARK_RECORDING
bool "Log statistics as records"
default n
help
Log summary statistics as records to pass results
to the Twister JSON report and recording.csv file(s).
config BENCHMARK_VERBOSE
bool "Display detailed results"
default n

View file

@ -2,11 +2,12 @@ Wait Queue Measurements
#######################
A Zehpyr application developer may choose between two different wait queue
implementations--dumb and scalable. These two queue implementations perform
implementations: dumb and scalable. These two queue implementations perform
differently under different loads. This benchmark can be used to showcase how
the performance of these two implementations vary under varying conditions.
These conditions include:
* Time to add threads of increasing priority to a wait queue
* Time to add threads of decreasing priority to a wait queue
* Time to remove highest priority thread from a wait queue
@ -16,4 +17,12 @@ By default, these tests show the minimum, maximum, and averages of the measured
times. However, if the verbose option is enabled then the raw timings will also
be displayed. The following will build this project with verbose support:
.. code-block:: shell
EXTRA_CONF_FILE="prj.verbose.conf" west build -p -b <board> <path to project>
Alternative output with ``CONFIG_BENCHMARK_RECORDING=y`` is to show the measured
summary statistics as records to allow Twister parse the log and save that data
into ``recording.csv`` files and ``twister.json`` report.
This output mode can be used together with the verbose output, however only
the summary statistics will be parsed as data records.

View file

@ -138,11 +138,8 @@ static uint64_t sqrt_u64(uint64_t square)
return square;
}
static void compute_and_report_stats(unsigned int num_threads,
unsigned int num_iterations,
uint64_t *cycles,
const char *str)
static void compute_and_report_stats(unsigned int num_threads, unsigned int num_iterations,
uint64_t *cycles, const char *tag, const char *str)
{
uint64_t minimum = cycles[0];
uint64_t maximum = cycles[0];
@ -180,17 +177,38 @@ static void compute_and_report_stats(unsigned int num_threads,
std_dev /= num_threads;
std_dev = sqrt_u64(std_dev);
#ifdef CONFIG_BENCHMARK_RECORDING
int tag_len = strlen(tag);
int descr_len = strlen(str);
int stag_len = strlen(".stddev");
int sdescr_len = strlen(", stddev.");
stag_len = (tag_len + stag_len < 40) ? 40 - tag_len : stag_len;
sdescr_len = (descr_len + sdescr_len < 50) ? 50 - descr_len : sdescr_len;
printk("REC: %s%-*s - %s%-*s : %7llu cycles , %7u ns :\n", tag, stag_len, ".min", str,
sdescr_len, ", min.", minimum, (uint32_t)timing_cycles_to_ns(minimum));
printk("REC: %s%-*s - %s%-*s : %7llu cycles , %7u ns :\n", tag, stag_len, ".max", str,
sdescr_len, ", max.", maximum, (uint32_t)timing_cycles_to_ns(maximum));
printk("REC: %s%-*s - %s%-*s : %7llu cycles , %7u ns :\n", tag, stag_len, ".avg", str,
sdescr_len, ", avg.", average, (uint32_t)timing_cycles_to_ns(average));
printk("REC: %s%-*s - %s%-*s : %7llu cycles , %7u ns :\n", tag, stag_len, ".stddev", str,
sdescr_len, ", stddev.", std_dev, (uint32_t)timing_cycles_to_ns(std_dev));
#else
ARG_UNUSED(tag);
printk("------------------------------------\n");
printk("%s\n", str);
printk(" Minimum : %7llu cycles (%7u nsec)\n",
minimum, (uint32_t)timing_cycles_to_ns(minimum));
printk(" Maximum : %7llu cycles (%7u nsec)\n",
maximum, (uint32_t)timing_cycles_to_ns(maximum));
printk(" Average : %7llu cycles (%7u nsec)\n",
average, (uint32_t)timing_cycles_to_ns(average));
printk(" Std Deviation: %7llu cycles (%7u nsec)\n",
std_dev, (uint32_t)timing_cycles_to_ns(std_dev));
printk(" Minimum : %7llu cycles (%7u nsec)\n", minimum,
(uint32_t)timing_cycles_to_ns(minimum));
printk(" Maximum : %7llu cycles (%7u nsec)\n", maximum,
(uint32_t)timing_cycles_to_ns(maximum));
printk(" Average : %7llu cycles (%7u nsec)\n", average,
(uint32_t)timing_cycles_to_ns(average));
printk(" Std Deviation: %7llu cycles (%7u nsec)\n", std_dev,
(uint32_t)timing_cycles_to_ns(std_dev));
#endif
}
int main(void)
@ -225,28 +243,23 @@ int main(void)
test_decreasing_priority(&wait_q, CONFIG_BENCHMARK_NUM_THREADS);
}
compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS,
CONFIG_BENCHMARK_NUM_ITERATIONS,
add_cycles,
compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS, CONFIG_BENCHMARK_NUM_ITERATIONS,
add_cycles, "sched.add.thread.WaitQ_tail",
"Add threads of decreasing priority");
#ifdef CONFIG_BENCHMARK_VERBOSE
for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) {
snprintf(tag, sizeof(tag),
"WaitQ.add.to.tail.%04u.waiters", i);
snprintf(description, sizeof(description),
"%-40s - Add thread of priority %u",
tag, dummy_thread[i].prio);
snprintf(description, sizeof(description), "%-40s - Add thread of priority %u", tag,
dummy_thread[i].prio);
PRINT_STATS_AVG(description, (uint32_t)add_cycles[i],
CONFIG_BENCHMARK_NUM_ITERATIONS);
}
#endif
printk("------------------------------------\n");
compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS,
CONFIG_BENCHMARK_NUM_ITERATIONS,
remove_cycles,
compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS, CONFIG_BENCHMARK_NUM_ITERATIONS,
remove_cycles, "sched.remove.thread.WaitQ_head",
"Remove threads of decreasing priority");
#ifdef CONFIG_BENCHMARK_VERBOSE
@ -262,17 +275,14 @@ int main(void)
}
#endif
printk("------------------------------------\n");
cycles_reset(CONFIG_BENCHMARK_NUM_THREADS);
for (i = 0; i < CONFIG_BENCHMARK_NUM_ITERATIONS; i++) {
test_increasing_priority(&wait_q, CONFIG_BENCHMARK_NUM_THREADS);
}
compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS,
CONFIG_BENCHMARK_NUM_ITERATIONS,
add_cycles,
compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS, CONFIG_BENCHMARK_NUM_ITERATIONS,
add_cycles, "sched.add.thread.WaitQ_head",
"Add threads of increasing priority");
#ifdef CONFIG_BENCHMARK_VERBOSE
@ -289,11 +299,8 @@ int main(void)
}
#endif
printk("------------------------------------\n");
compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS,
CONFIG_BENCHMARK_NUM_ITERATIONS,
remove_cycles,
compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS, CONFIG_BENCHMARK_NUM_ITERATIONS,
remove_cycles, "sched.remove.thread.WaitQ_tail",
"Remove threads of increasing priority");
#ifdef CONFIG_BENCHMARK_VERBOSE

View file

@ -13,6 +13,11 @@ common:
type: one_line
regex:
- "PROJECT EXECUTION SUCCESSFUL"
record:
regex:
"REC: (?P<metric>.*) - (?P<description>.*):(?P<cycles>.*) cycles ,(?P<nanoseconds>.*) ns"
extra_configs:
- CONFIG_BENCHMARK_RECORDING=y
tests:
benchmark.wait_queues.dumb: