tests: Add benchmark for IPI performance

Adds tests to better gauge IPI performance on SMP. In each case, one
CPU is used as the source of IPIs while the remaining CPUs are busy
doing "work". Every 30 seconds the benchmark reports on the amount
of "work" done by the busy CPUs and the amount of work done by the
CPU generating the IPIs.

This can be used to ...
 1. Show how enabling IPI optimization affects system performance
 2. Show the cost of spinlock contention as the number of CPUs increase
 3. Measure the relative performance of scheduler changes on SMP.

Signed-off-by: Peter Mitsis <peter.mitsis@intel.com>
This commit is contained in:
Peter Mitsis 2025-02-13 14:53:20 -08:00 committed by Benjamin Cabé
commit 5c36567c56
10 changed files with 588 additions and 0 deletions

View file

@ -0,0 +1,26 @@
# SPDX-License-Identifier: Apache-2.0
cmake_minimum_required(VERSION 3.20.0)
find_package(Zephyr REQUIRED HINTS $ENV{ZEPHYR_BASE})
project(ipi_metric)
#FILE(GLOB app_sources src/*.c)
#target_sources(app PRIVATE ${app_sources})
target_sources_ifdef(
CONFIG_IPI_METRIC_PREEMPTIVE
app
PRIVATE
src/ipi_metric_preemptive.c
)
target_sources_ifdef(
CONFIG_IPI_METRIC_PRIMITIVE_BROADCAST
app
PRIVATE
src/ipi_metric_primitive.c
)
target_sources_ifdef(
CONFIG_IPI_METRIC_PRIMITIVE_DIRECTED
app
PRIVATE
src/ipi_metric_primitive.c
)

View file

@ -0,0 +1,37 @@
# Copyright (c) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
mainmenu "IPI-Metric RTOS Test Suite"
choice IPI_METRIC_TEST
prompt "Select an IPI-Metric test to execute"
default IPI_METRIC_PREEMPTIVE
help
The IPI-Metric benchmark suite has a single CPU in an SMP system
dedicated to generating IPIs under varying conditions while the
remaining CPUs perform their "work" and process IPIs. These tests
track the amount of "work" and the number of IPIs processed
during 30 second time intervals.
config IPI_METRIC_PREEMPTIVE
bool "IPIs are generated due threads preempting one another"
help
The CPU generating the IPIs does so as a byproduct of resuming and
suspending a series of preemptible threads.
config IPI_METRIC_PRIMITIVE_BROADCAST
bool "IPIs are generated using primitive arch_sched_broadcast_ipi()"
help
The CPU generating the IPIs does so by directly calling
arch_sched_broadcast_ipi() to broadcast them to all CPUs.
config IPI_METRIC_PRIMITIVE_DIRECTED
bool "IPIs are generated using primitive arch_sched_directed_ipi()"
depends on ARCH_HAS_DIRECTED_IPIS
help
The CPU generating the IPIs does so by directly calling
arch_sched_directed_ipi() to direct them to a single CPU.
endchoice
source "Kconfig.zephyr"

View file

@ -0,0 +1,4 @@
# Copyright (c) 2022 Carlo Caione <ccaione@baylibre.com>
# SPDX-License-Identifier: Apache-2.0
CONFIG_MP_MAX_NUM_CPUS=4

View file

@ -0,0 +1,19 @@
/* Copyright 2022 Carlo Caione <ccaione@baylibre.com>
* SPDX-License-Identifier: Apache-2.0
*/
/ {
cpus {
cpu@2 {
device_type = "cpu";
compatible = "arm,cortex-a53";
reg = <2>;
};
cpu@3 {
device_type = "cpu";
compatible = "arm,cortex-a53";
reg = <3>;
};
};
};

View file

@ -0,0 +1 @@
CONFIG_MP_MAX_NUM_CPUS=4

View file

@ -0,0 +1,15 @@
/ {
cpus {
cpu@2 {
device_type = "cpu";
compatible = "intel,x86_64";
reg = <2>;
};
cpu@3 {
device_type = "cpu";
compatible = "intel,x86_64";
reg = <3>;
};
};
};

View file

@ -0,0 +1,31 @@
# Default base configuration file
# Use a tickless kernel to minimize the number of timer interrupts
CONFIG_TICKLESS_KERNEL=y
CONFIG_SYS_CLOCK_TICKS_PER_SEC=100
# Optimize for speed
CONFIG_SPEED_OPTIMIZATIONS=y
# Disable time slicing
CONFIG_TIMESLICING=n
# Disabling hardware stack protection can greatly
# improve system performance.
CONFIG_HW_STACK_PROTECTION=n
# Picolibc is faster than Zephyr's minimal libc memcpy
CONFIG_PICOLIBC_SPEED_OPTIMIZATIONS=y
CONFIG_PICOLIBC_USE_MODULE=y
# Disable Thread Local Storage for better context switching times
CONFIG_THREAD_LOCAL_STORAGE=n
# Disable memory slab pointer validation
CONFIG_MEM_SLAB_POINTER_VALIDATE=n
# Allow for the number of scheduling IPIs to be tracked
CONFIG_TRACE_SCHED_IPI=y
# Enable smarter delivery of scheduling IPIs
CONFIG_IPI_OPTIMIZE=y

View file

@ -0,0 +1,173 @@
/*
* Copyright (c) 2023,2024 Intel Corporation.
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <zephyr/kernel.h>
#if CONFIG_MP_MAX_NUM_CPUS == 1
#error "Test requires a system with more than 1 CPU"
#endif
#define IPI_TEST_INTERVAL_DURATION 30
#define NUM_WORK_THREADS (CONFIG_MP_MAX_NUM_CPUS - 1)
#define WORK_STACK_SIZE 4096
#define NUM_PREEMPTIVE_THREADS 5
#define PREEMPTIVE_STACK_SIZE 4096
static K_THREAD_STACK_ARRAY_DEFINE(work_stack, NUM_WORK_THREADS, WORK_STACK_SIZE);
static K_THREAD_STACK_ARRAY_DEFINE(preemptive_stack, NUM_PREEMPTIVE_THREADS, PREEMPTIVE_STACK_SIZE);
static struct k_thread work_thread[NUM_WORK_THREADS];
static unsigned long work_array[NUM_WORK_THREADS][1024];
static volatile unsigned long work_counter[NUM_WORK_THREADS];
static struct k_thread preemptive_thread[NUM_PREEMPTIVE_THREADS];
static unsigned int preemptive_counter[NUM_PREEMPTIVE_THREADS];
static atomic_t ipi_counter;
void z_trace_sched_ipi(void)
{
atomic_inc(&ipi_counter);
}
void work_entry(void *p1, void *p2, void *p3)
{
unsigned int index = POINTER_TO_UINT(p1);
unsigned long *array = p2;
unsigned long counter;
while (1) {
for (unsigned int i = 0; i < 1024; i++) {
counter = work_counter[index]++;
array[i] = (array[i] + counter) ^ array[i];
}
}
}
void preemptive_entry(void *p1, void *p2, void *p3)
{
unsigned int index = POINTER_TO_UINT(p1);
ARG_UNUSED(p2);
ARG_UNUSED(p3);
struct k_thread *suspend = NULL;
struct k_thread *resume = NULL;
if (index != (NUM_PREEMPTIVE_THREADS - 1)) {
resume = &preemptive_thread[index + 1];
}
if (index != 0) {
suspend = k_current_get();
}
while (1) {
if (resume != NULL) {
k_thread_resume(resume);
}
preemptive_counter[index]++;
if (suspend != NULL) {
k_thread_suspend(suspend);
}
}
}
void report(void)
{
unsigned int elapsed_time = IPI_TEST_INTERVAL_DURATION;
unsigned long total_preempt;
unsigned long total_work;
unsigned long last_work_counter[NUM_WORK_THREADS] = {};
unsigned long last_preempt[NUM_PREEMPTIVE_THREADS] = {};
unsigned long tmp_work_counter[NUM_WORK_THREADS] = {};
unsigned long tmp_preempt[NUM_PREEMPTIVE_THREADS] = {};
unsigned int i;
unsigned int tmp_ipi_counter;
atomic_set(&ipi_counter, 0);
while (1) {
k_sleep(K_SECONDS(IPI_TEST_INTERVAL_DURATION));
/*
* Get local copies of the counters to minimize
* the impacts of delays from printf().
*/
total_work = 0;
for (i = 0; i < NUM_WORK_THREADS; i++) {
tmp_work_counter[i] = work_counter[i];
total_work += (tmp_work_counter[i] - last_work_counter[i]);
}
/* Sum the preemptive counters. */
total_preempt = 0;
for (i = 0; i < NUM_PREEMPTIVE_THREADS; i++) {
tmp_preempt[i] = preemptive_counter[i];
total_preempt += (tmp_preempt[i] - last_preempt[i]);
}
tmp_ipi_counter = (unsigned int)atomic_set(&ipi_counter, 0);
printf("**** IPI-Metric Basic Scheduling Test **** Elapsed Time: %u\n",
elapsed_time);
printf(" Preemptive Counter Total: %lu\n", total_preempt);
for (i = 0; i < NUM_PREEMPTIVE_THREADS; i++) {
printf(" - Counter #%u: %lu\n",
i, tmp_preempt[i] - last_preempt[i]);
last_preempt[i] = tmp_preempt[i];
}
printf(" IPI Count: %u\n", tmp_ipi_counter);
printf(" Total Work: %lu\n", total_work);
for (i = 0; i < NUM_WORK_THREADS; i++) {
printf(" - Work Counter #%u: %lu\n",
i, tmp_work_counter[i] - last_work_counter[i]);
last_work_counter[i] = tmp_work_counter[i];
}
elapsed_time += IPI_TEST_INTERVAL_DURATION;
}
}
int main(void)
{
unsigned int i;
for (i = 0; i < NUM_WORK_THREADS; i++) {
k_thread_create(&work_thread[i], work_stack[i],
WORK_STACK_SIZE, work_entry,
UINT_TO_POINTER(i), work_array[i], NULL,
-1, 0, K_NO_WAIT);
}
/*
* Create the preemptive threads and switch them to
* the suspended state.
*/
for (i = 0; i < NUM_PREEMPTIVE_THREADS; i++) {
k_thread_create(&preemptive_thread[i], preemptive_stack[i],
PREEMPTIVE_STACK_SIZE, preemptive_entry,
UINT_TO_POINTER(i), NULL, NULL,
10 - i, 0, K_FOREVER);
k_thread_suspend(&preemptive_thread[i]);
k_wakeup(&preemptive_thread[i]);
}
k_thread_resume(&preemptive_thread[0]);
report();
}

View file

@ -0,0 +1,176 @@
/*
* Copyright (c) 2025 Intel Corporation.
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <zephyr/kernel.h>
#if CONFIG_MP_MAX_NUM_CPUS <= 1
#error "Test requires a system with more than 1 CPU"
#endif
#define IPI_TEST_INTERVAL_DURATION 30
#define NUM_WORK_THREADS (CONFIG_MP_MAX_NUM_CPUS - 1)
#define WORK_STACK_SIZE 4096
#define PRIMITIVE_STACK_SIZE 4096
static K_THREAD_STACK_ARRAY_DEFINE(work_stack, NUM_WORK_THREADS, WORK_STACK_SIZE);
static K_THREAD_STACK_DEFINE(primitive_stack, PRIMITIVE_STACK_SIZE);
static struct k_thread work_thread[NUM_WORK_THREADS];
static unsigned long work_array[NUM_WORK_THREADS][1024];
static volatile unsigned long work_counter[NUM_WORK_THREADS];
static struct k_thread primitive_thread;
static volatile unsigned long primitives_issued;
static atomic_t ipi_cpu_bitmap;
void z_trace_sched_ipi(void)
{
atomic_or(&ipi_cpu_bitmap, BIT(_current_cpu->id));
}
void work_entry(void *p1, void *p2, void *p3)
{
unsigned int index = POINTER_TO_UINT(p1);
unsigned long *array = p2;
unsigned long counter;
while (1) {
for (unsigned int i = 0; i < 1024; i++) {
counter = work_counter[index]++;
array[i] = (array[i] + counter) ^ array[i];
}
}
}
void primitive_entry(void *p1, void *p2, void *p3)
{
unsigned int desired_ipi_set;
unsigned int value;
int key;
ARG_UNUSED(p1);
ARG_UNUSED(p2);
ARG_UNUSED(p3);
/*
* All other CPUs are executing cooperative threads and are not
* expected to switch in a new thread. Select a CPU targeted for IPIs.
*/
#ifdef CONFIG_IPI_METRIC_PRIMITIVE_DIRECTED
key = arch_irq_lock();
desired_ipi_set = (_current_cpu->id == 0) ? BIT(1) : BIT(0);
arch_irq_unlock(key);
#else
desired_ipi_set = (1 << arch_num_cpus()) - 1;
key = arch_irq_lock();
desired_ipi_set ^= BIT(_current_cpu->id);
arch_irq_unlock(key);
#endif
while (1) {
atomic_set(&ipi_cpu_bitmap, 0);
#ifdef CONFIG_IPI_METRIC_PRIMITIVE_DIRECTED
arch_sched_directed_ipi(desired_ipi_set);
#else
arch_sched_broadcast_ipi();
#endif
primitives_issued++;
/*
* Loop until all the expected CPUs have flagged that they
* have processed the schedule IPI from above.
*/
while (1) {
value = (unsigned int)atomic_get(&ipi_cpu_bitmap);
/*
* Note: z_trace_sched_ipi(), which is used to track
* which CPUs processed an IPI, is not just called as a
* result of the primitives arch_sched_directed_ipi()
* or arch_sched_broadcast_ipi() above. Schedule IPIs
* will also be sent when ticks are announced such as
* when the k_sleep() in report() expires and this
* benchmark can not control which CPUs will receive
* those IPIs. To account for this, a mask is applied.
*/
if ((value & desired_ipi_set) == desired_ipi_set) {
break;
}
key = arch_irq_lock();
arch_spin_relax();
arch_irq_unlock(key);
}
}
}
void report(void)
{
unsigned int elapsed_time = IPI_TEST_INTERVAL_DURATION;
unsigned int i;
unsigned long total;
unsigned long counter[NUM_WORK_THREADS];
unsigned long last_counter[NUM_WORK_THREADS] = {};
unsigned long last_issued = 0;
unsigned long interval_issued;
while (1) {
k_sleep(K_SECONDS(IPI_TEST_INTERVAL_DURATION));
total = 0;
for (i = 0; i < NUM_WORK_THREADS; i++) {
counter[i] = work_counter[i] - last_counter[i];
total += counter[i];
last_counter[i] = work_counter[i];
}
interval_issued = primitives_issued - last_issued;
printf("**** IPI-Metric %s IPI Test **** Elapsed Time: %u\n",
IS_ENABLED(CONFIG_IPI_METRIC_PRIMITIVE_DIRECTED) ?
"Directed" : "Broadcast", elapsed_time);
printf(" Schedule IPIs Issued: %lu\n", interval_issued);
last_issued = primitives_issued;
printf(" Total Work: %lu\n", total);
for (i = 0; i < NUM_WORK_THREADS; i++) {
printf(" - Work Counter #%u: %lu\n",
i, counter[i]);
}
elapsed_time += IPI_TEST_INTERVAL_DURATION;
}
}
int main(void)
{
unsigned int i;
for (i = 0; i < NUM_WORK_THREADS; i++) {
k_thread_create(&work_thread[i], work_stack[i],
WORK_STACK_SIZE, work_entry,
UINT_TO_POINTER(i), work_array[i], NULL,
-1, 0, K_NO_WAIT);
}
/* Create the primitive thread. */
k_thread_create(&primitive_thread, primitive_stack,
PRIMITIVE_STACK_SIZE, primitive_entry,
UINT_TO_POINTER(i), NULL, NULL,
10, 0, K_NO_WAIT);
report();
}

View file

@ -0,0 +1,106 @@
common:
platform_key:
- arch
tags:
- kernel
- benchmark
# Native platforms excluded as they are not relevant: These benchmarks run some kernel primitives
# in a loop during a predefined time counting how many times they execute. But in the POSIX arch,
# time does not pass while the CPU executes. So the benchmark just appears as if hung.
arch_exclude:
- posix
# some slow qemu_* excluded
platform_exclude:
- qemu_malta/qemu_malta
- qemu_malta/qemu_malta/be
- qemu_nios2
integration_platforms:
- qemu_x86_64
- qemu_cortex_a53/qemu_cortex_a53/smp
timeout: 300
filter: CONFIG_SMP and CONFIG_MP_MAX_NUM_CPUS > 1
harness: console
tests:
benchmark.ipi_metric.preemptive.broadcast:
extra_configs:
- CONFIG_IPI_METRIC_PREEMPTIVE=y
- CONFIG_IPI_OPTIMIZE=n
harness_config:
type: multi_line
ordered: true
regex:
# Collect at least 3 measurements for each benchmark:
- "(.*) IPI-Metric(.+) Elapsed Time:[ ]*[0-9]+(.*)"
- "(.*)Preemptive Counter Total:[ ]*[0-9]+(.*)"
- "(.*)IPI Count:[ ]*[0-9]+(.*)"
- "(.*)Total Work:[ ]*[0-9]+(.*)"
- "(.*) IPI-Metric(.+) Elapsed Time:[ ]*[0-9]+(.*)"
- "(.*)Preemptive Counter Total:[ ]*[0-9]+(.*)"
- "(.*)IPI Count:[ ]*[0-9]+(.*)"
- "(.*)Total Work:[ ]*[0-9]+(.*)"
- "(.*) IPI-Metric(.+) Elapsed Time:[ ]*[0-9]+(.*)"
- "(.*)Preemptive Counter Total:[ ]*[0-9]+(.*)"
- "(.*)IPI Count:[ ]*[0-9]+(.*)"
- "(.*)Total Work:[ ]*[0-9]+(.*)"
benchmark.ipi_metric.preemptive.optimize:
extra_configs:
- CONFIG_IPI_METRIC_PREEMPTIVE=y
- CONFIG_IPI_OPTIMIZE=y
filter: ARCH_HAS_DIRECTED_IPIS
harness_config:
type: multi_line
ordered: true
regex:
# Collect at least 3 measurements for each benchmark:
- "(.*) IPI-Metric(.+) Elapsed Time:[ ]*[0-9]+(.*)"
- "(.*)Preemptive Counter Total:[ ]*[0-9]+(.*)"
- "(.*)IPI Count:[ ]*[0-9]+(.*)"
- "(.*)Total Work:[ ]*[0-9]+(.*)"
- "(.*) IPI-Metric(.+) Elapsed Time:[ ]*[0-9]+(.*)"
- "(.*)Preemptive Counter Total:[ ]*[0-9]+(.*)"
- "(.*)IPI Count:[ ]*[0-9]+(.*)"
- "(.*)Total Work:[ ]*[0-9]+(.*)"
- "(.*) IPI-Metric(.+) Elapsed Time:[ ]*[0-9]+(.*)"
- "(.*)Preemptive Counter Total:[ ]*[0-9]+(.*)"
- "(.*)IPI Count:[ ]*[0-9]+(.*)"
- "(.*)Total Work:[ ]*[0-9]+(.*)"
benchmark.ipi_metric.primitive.broadcast:
extra_configs:
- CONFIG_IPI_METRIC_PRIMITIVE_BROADCAST=y
harness_config:
type: multi_line
ordered: true
regex:
# Collect at least 3 measurements for each benchmark:
- "(.*) IPI-Metric(.+) Elapsed Time:[ ]*[0-9]+(.*)"
- "(.*)Schedule IPIs Issued:[ ]*[0-9]+(.*)"
- "(.*)Total Work:[ ]*[0-9]+(.*)"
- "(.*) IPI-Metric(.+) Elapsed Time:[ ]*[0-9]+(.*)"
- "(.*)Schedule IPIs Issued:[ ]*[0-9]+(.*)"
- "(.*)Total Work:[ ]*[0-9]+(.*)"
- "(.*) IPI-Metric(.+) Elapsed Time:[ ]*[0-9]+(.*)"
- "(.*)Schedule IPIs Issued:[ ]*[0-9]+(.*)"
- "(.*)Total Work:[ ]*[0-9]+(.*)"
benchmark.ipi_metric.primitive.directed:
extra_configs:
- CONFIG_IPI_METRIC_PRIMITIVE_DIRECTED=y
filter: ARCH_HAS_DIRECTED_IPIS
harness_config:
type: multi_line
ordered: true
regex:
# Collect at least 3 measurements for each benchmark:
- "(.*) IPI-Metric(.+) Elapsed Time:[ ]*[0-9]+(.*)"
- "(.*)Schedule IPIs Issued:[ ]*[0-9]+(.*)"
- "(.*)Total Work:[ ]*[0-9]+(.*)"
- "(.*) IPI-Metric(.+) Elapsed Time:[ ]*[0-9]+(.*)"
- "(.*)Schedule IPIs Issued:[ ]*[0-9]+(.*)"
- "(.*)Total Work:[ ]*[0-9]+(.*)"
- "(.*) IPI-Metric(.+) Elapsed Time:[ ]*[0-9]+(.*)"
- "(.*)Schedule IPIs Issued:[ ]*[0-9]+(.*)"
- "(.*)Total Work:[ ]*[0-9]+(.*)"