tests: benchmarks: sched_queues: Record metrics

Enhance the sched_queue benchmark to output resulting summary
metrics as records, so when it runs with Twister the results
are parsed and saved into twister.json and recording.csv files
for further analysis.
Minor documentation edits and make ClangFormat happy.

Signed-off-by: Dmitrii Golovanov <dmitrii.golovanov@intel.com>
This commit is contained in:
Dmitrii Golovanov 2024-12-17 17:24:35 +01:00 committed by Benjamin Cabé
commit f3358259d6
4 changed files with 70 additions and 40 deletions

View file

@ -21,6 +21,13 @@ config BENCHMARK_NUM_THREADS
stress on the ready queue and better highlight the performance
differences as the number of threads in the ready queue changes.
config BENCHMARK_RECORDING
bool "Log statistics as records"
default n
help
Log summary statistics as records to pass results
to the Twister JSON report and recording.csv file(s).
config BENCHMARK_VERBOSE
bool "Display detailed results"
default n

View file

@ -2,20 +2,29 @@ Scheduling Queue Measurements
#############################
A Zephyr application developer may choose between three different scheduling
algorithms--dumb, scalable and multiq. These different algorithms have
different performance characteristics--characteristics that vary as the
algorithms: dumb, scalable and multiq. These different algorithms have
different performance characteristics that vary as the
number of ready threads increases. This benchmark can be used to help
determine which scheduling algorithm may best suit the developer's application.
This benchmark measures the ...
* Time to add a threads of increasing priority to the ready queue
* Time to add threads of decreasing priority to the ready queue
* Time to remove highest priority thread from a wait queue
* Time to remove lowest priority thread from a wait queue
This benchmark measures:
* Time to add a threads of increasing priority to the ready queue.
* Time to add threads of decreasing priority to the ready queue.
* Time to remove highest priority thread from a wait queue.
* Time to remove lowest priority thread from a wait queue.
By default, these tests show the minimum, maximum, and averages of the measured
times. However, if the verbose option is enabled then the set of measured
times will be displayed. The following will build this project with verbose
support:
.. code-block:: shell
EXTRA_CONF_FILE="prj.verbose.conf" west build -p -b <board> <path to project>
Alternative output with ``CONFIG_BENCHMARK_RECORDING=y`` is to show the measured
summary statistics as records to allow Twister parse the log and save that data
into ``recording.csv`` files and ``twister.json`` report.
This output mode can be used together with the verbose output, however only
the summary statistics will be parsed as data records.

View file

@ -151,10 +151,8 @@ static uint64_t sqrt_u64(uint64_t square)
return square;
}
static void compute_and_report_stats(unsigned int num_threads,
unsigned int num_iterations,
uint64_t *cycles,
const char *str)
static void compute_and_report_stats(unsigned int num_threads, unsigned int num_iterations,
uint64_t *cycles, const char *tag, const char *str)
{
uint64_t minimum = cycles[0];
uint64_t maximum = cycles[0];
@ -190,16 +188,38 @@ static void compute_and_report_stats(unsigned int num_threads,
std_dev /= num_threads;
std_dev = sqrt_u64(std_dev);
#ifdef CONFIG_BENCHMARK_RECORDING
int tag_len = strlen(tag);
int descr_len = strlen(str);
int stag_len = strlen(".stddev");
int sdescr_len = strlen(", stddev.");
stag_len = (tag_len + stag_len < 40) ? 40 - tag_len : stag_len;
sdescr_len = (descr_len + sdescr_len < 50) ? 50 - descr_len : sdescr_len;
printk("REC: %s%-*s - %s%-*s : %7llu cycles , %7u ns :\n", tag, stag_len, ".min", str,
sdescr_len, ", min.", minimum, (uint32_t)timing_cycles_to_ns(minimum));
printk("REC: %s%-*s - %s%-*s : %7llu cycles , %7u ns :\n", tag, stag_len, ".max", str,
sdescr_len, ", max.", maximum, (uint32_t)timing_cycles_to_ns(maximum));
printk("REC: %s%-*s - %s%-*s : %7llu cycles , %7u ns :\n", tag, stag_len, ".avg", str,
sdescr_len, ", avg.", average, (uint32_t)timing_cycles_to_ns(average));
printk("REC: %s%-*s - %s%-*s : %7llu cycles , %7u ns :\n", tag, stag_len, ".stddev", str,
sdescr_len, ", stddev.", std_dev, (uint32_t)timing_cycles_to_ns(std_dev));
#else
ARG_UNUSED(tag);
printk("------------------------------------\n");
printk("%s\n", str);
printk(" Minimum : %7llu cycles (%7u nsec)\n",
minimum, (uint32_t)timing_cycles_to_ns(minimum));
printk(" Maximum : %7llu cycles (%7u nsec)\n",
maximum, (uint32_t)timing_cycles_to_ns(maximum));
printk(" Average : %7llu cycles (%7u nsec)\n",
average, (uint32_t)timing_cycles_to_ns(average));
printk(" Std Deviation: %7llu cycles (%7u nsec)\n",
std_dev, (uint32_t)timing_cycles_to_ns(std_dev));
printk(" Minimum : %7llu cycles (%7u nsec)\n", minimum,
(uint32_t)timing_cycles_to_ns(minimum));
printk(" Maximum : %7llu cycles (%7u nsec)\n", maximum,
(uint32_t)timing_cycles_to_ns(maximum));
printk(" Average : %7llu cycles (%7u nsec)\n", average,
(uint32_t)timing_cycles_to_ns(average));
printk(" Std Deviation: %7llu cycles (%7u nsec)\n", std_dev,
(uint32_t)timing_cycles_to_ns(std_dev));
#endif
}
int main(void)
@ -233,28 +253,23 @@ int main(void)
test_decreasing_priority(CONFIG_BENCHMARK_NUM_THREADS);
}
compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS,
CONFIG_BENCHMARK_NUM_ITERATIONS,
add_cycles,
compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS, CONFIG_BENCHMARK_NUM_ITERATIONS,
add_cycles, "sched.add.thread.ReadyQ_tail",
"Add threads of decreasing priority");
#ifdef CONFIG_BENCHMARK_VERBOSE
for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) {
snprintf(tag, sizeof(tag),
"ReadyQ.add.to.tail.%04u.waiters", i);
snprintf(description, sizeof(description),
"%-40s - Add thread of priority (%u)",
snprintf(description, sizeof(description), "%-40s - Add thread of priority (%u)",
tag, test_thread[i].base.prio);
PRINT_STATS_AVG(description, (uint32_t)add_cycles[i],
CONFIG_BENCHMARK_NUM_ITERATIONS);
}
#endif
printk("------------------------------------\n");
compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS,
CONFIG_BENCHMARK_NUM_ITERATIONS,
remove_cycles,
compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS, CONFIG_BENCHMARK_NUM_ITERATIONS,
remove_cycles, "sched.remove.thread.ReadyQ_head",
"Remove threads of decreasing priority");
#ifdef CONFIG_BENCHMARK_VERBOSE
@ -269,17 +284,14 @@ int main(void)
}
#endif
printk("------------------------------------\n");
cycles_reset(CONFIG_BENCHMARK_NUM_THREADS);
for (i = 0; i < CONFIG_BENCHMARK_NUM_ITERATIONS; i++) {
test_increasing_priority(CONFIG_BENCHMARK_NUM_THREADS);
}
compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS,
CONFIG_BENCHMARK_NUM_ITERATIONS,
add_cycles,
compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS, CONFIG_BENCHMARK_NUM_ITERATIONS,
add_cycles, "sched.add.thread.ReadyQ_head",
"Add threads of increasing priority");
#ifdef CONFIG_BENCHMARK_VERBOSE
@ -295,12 +307,9 @@ int main(void)
}
#endif
printk("------------------------------------\n");
compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS,
CONFIG_BENCHMARK_NUM_ITERATIONS,
remove_cycles,
"Remove threads or increasing priority");
compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS, CONFIG_BENCHMARK_NUM_ITERATIONS,
remove_cycles, "sched.remove.thread.ReadyQ_tail",
"Remove threads of increasing priority");
#ifdef CONFIG_BENCHMARK_VERBOSE
for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) {

View file

@ -14,6 +14,11 @@ common:
type: one_line
regex:
- "PROJECT EXECUTION SUCCESSFUL"
record:
regex:
"REC: (?P<metric>.*) - (?P<description>.*):(?P<cycles>.*) cycles ,(?P<nanoseconds>.*) ns"
extra_configs:
- CONFIG_BENCHMARK_RECORDING=y
tests:
benchmark.sched_queues.dumb: