diff --git a/tests/benchmarks/latency_measure/README.rst b/tests/benchmarks/latency_measure/README.rst index 8ba99d05171..a4119c636c2 100644 --- a/tests/benchmarks/latency_measure/README.rst +++ b/tests/benchmarks/latency_measure/README.rst @@ -74,6 +74,8 @@ Sample output of the benchmark (without userspace enabled):: semaphore.take.immediate.kernel - Take a semaphore (no blocking) : 69 cycles , 575 ns : semaphore.take.blocking.k_to_k - Take a semaphore (context switch) : 494 cycles , 4116 ns : semaphore.give.wake+ctx.k_to_k - Give a semaphore (context switch) : 599 cycles , 4992 ns : + condvar.wait.blocking.k_to_k - Wait for a condvar (context switch) : 692 cycles , 5767 ns : + condvar.signal.wake+ctx.k_to_k - Signal a condvar (context switch) : 715 cycles , 5958 ns : mutex.lock.immediate.recursive.kernel - Lock a mutex : 100 cycles , 833 ns : mutex.unlock.immediate.recursive.kernel - Unlock a mutex : 40 cycles , 333 ns : heap.malloc.immediate - Average time for heap malloc : 627 cycles , 5225 ns : @@ -183,6 +185,14 @@ Sample output of the benchmark (with userspace enabled):: semaphore.give.wake+ctx.k_to_u - Give a semaphore (context switch) : 1434 cycles , 11957 ns : semaphore.take.blocking.u_to_u - Take a semaphore (context switch) : 1690 cycles , 14090 ns : semaphore.give.wake+ctx.u_to_u - Give a semaphore (context switch) : 1800 cycles , 15000 ns : + condvar.wait.blocking.k_to_k - Wait for a condvar (context switch) : 1385 cycles , 11542 ns : + condvar.signal.wake+ctx.k_to_k - Signal a condvar (context switch) : 1420 cycles , 11833 ns : + condvar.wait.blocking.k_to_u - Wait for a condvar (context switch) : 1537 cycles , 12815 ns : + condvar.signal.wake+ctx.u_to_k - Signal a condvar (context switch) : 1950 cycles , 16250 ns : + condvar.wait.blocking.u_to_k - Wait for a condvar (context switch) : 2025 cycles , 16875 ns : + condvar.signal.wake+ctx.k_to_u - Signal a condvar (context switch) : 1715 cycles , 14298 ns : + condvar.wait.blocking.u_to_u - Wait for a condvar (context switch) : 2313 cycles , 19279 ns : + condvar.signal.wake+ctx.u_to_u - Signal a condvar (context switch) : 2225 cycles , 18541 ns : mutex.lock.immediate.recursive.kernel - Lock a mutex : 155 cycles , 1291 ns : mutex.unlock.immediate.recursive.kernel - Unlock a mutex : 57 cycles , 475 ns : mutex.lock.immediate.recursive.user - Lock a mutex : 665 cycles , 5541 ns : diff --git a/tests/benchmarks/latency_measure/src/condvar.c b/tests/benchmarks/latency_measure/src/condvar.c new file mode 100644 index 00000000000..5356d5d2410 --- /dev/null +++ b/tests/benchmarks/latency_measure/src/condvar.c @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2024 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * @file measure time for various condition variable operations + * 1. Block waiting for a condition variable + * 2. Signal a condition variable (with context switch) + */ + +#include +#include +#include "utils.h" +#include "timing_sc.h" + +static K_CONDVAR_DEFINE(condvar); +static K_MUTEX_DEFINE(mutex); + +static void start_thread_entry(void *p1, void *p2, void *p3) +{ + uint32_t num_iterations = (uint32_t)(uintptr_t)p1; + uint32_t i; + timing_t start; + timing_t finish; + uint64_t sum[2] = {0ull, 0ull}; + + k_mutex_lock(&mutex, K_FOREVER); + + k_thread_start(&alt_thread); + + for (i = 0; i < num_iterations; i++) { + /* 1. Get the first timestamp and block on condvar */ + + start = timing_timestamp_get(); + k_condvar_wait(&condvar, &mutex, K_FOREVER); + + /* 3. Get the final timstamp */ + + finish = timing_timestamp_get(); + + sum[0] += timing_cycles_get(&start, ×tamp.sample); + sum[1] += timing_cycles_get(×tamp.sample, &finish); + } + + /* Wait for alt_thread to finish */ + + k_thread_join(&alt_thread, K_FOREVER); + + timestamp.cycles = sum[0]; + k_sem_take(&pause_sem, K_FOREVER); + + timestamp.cycles = sum[1]; +} + +static void alt_thread_entry(void *p1, void *p2, void *p3) +{ + uint32_t num_iterations = (uint32_t)(uintptr_t)p1; + uint32_t i; + + for (i = 0; i < num_iterations; i++) { + + /* 2. Get midpoint timestamp and signal the condvar */ + + timestamp.sample = timing_timestamp_get(); + k_condvar_signal(&condvar); + } +} + +int condvar_blocking_ops(uint32_t num_iterations, uint32_t start_options, + uint32_t alt_options) +{ + int priority; + char tag[50]; + char description[120]; + uint64_t cycles; + + priority = k_thread_priority_get(k_current_get()); + + timing_start(); + + k_thread_create(&start_thread, start_stack, + K_THREAD_STACK_SIZEOF(start_stack), + start_thread_entry, + (void *)(uintptr_t)num_iterations, + NULL, NULL, + priority - 2, start_options, K_FOREVER); + + k_thread_create(&alt_thread, alt_stack, + K_THREAD_STACK_SIZEOF(alt_stack), + alt_thread_entry, + (void *)(uintptr_t)num_iterations, + NULL, NULL, + priority - 1, alt_options, K_FOREVER); + + k_thread_access_grant(&start_thread, &alt_thread, + &condvar, &mutex, &pause_sem); + k_thread_access_grant(&alt_thread, &condvar); + + /* Start test thread */ + + k_thread_start(&start_thread); + + /* Stats gathered. Display them. */ + + snprintf(tag, sizeof(tag), "condvar.wait.blocking.%c_to_%c", + (start_options & K_USER) ? 'u' : 'k', + (alt_options & K_USER) ? 'u' : 'k'); + snprintf(description, sizeof(description), + "%-40s - Wait for a condvar (context switch)", tag); + + cycles = timestamp.cycles; + PRINT_STATS_AVG(description, (uint32_t)cycles, + num_iterations, false, ""); + + k_sem_give(&pause_sem); + + snprintf(tag, sizeof(tag), "condvar.signal.wake+ctx.%c_to_%c", + (alt_options & K_USER) ? 'u' : 'k', + (start_options & K_USER) ? 'u' : 'k'); + snprintf(description, sizeof(description), + "%-40s - Signal a condvar (context switch)", tag); + cycles = timestamp.cycles; + PRINT_STATS_AVG(description, (uint32_t)cycles, + num_iterations, false, ""); + + k_thread_join(&start_thread, K_FOREVER); + + timing_stop(); + + return 0; +} diff --git a/tests/benchmarks/latency_measure/src/main.c b/tests/benchmarks/latency_measure/src/main.c index 7a46c77bc99..b21c0c6b3ea 100644 --- a/tests/benchmarks/latency_measure/src/main.c +++ b/tests/benchmarks/latency_measure/src/main.c @@ -53,6 +53,8 @@ extern int lifo_blocking_ops(uint32_t num_iterations, uint32_t start_options, extern int event_ops(uint32_t num_iterations, uint32_t options); extern int event_blocking_ops(uint32_t num_iterations, uint32_t start_options, uint32_t alt_options); +extern int condvar_blocking_ops(uint32_t num_iterations, uint32_t start_options, + uint32_t alt_options); extern void heap_malloc_free(void); static void test_thread(void *arg1, void *arg2, void *arg3) @@ -145,6 +147,13 @@ static void test_thread(void *arg1, void *arg2, void *arg3) sema_context_switch(CONFIG_BENCHMARK_NUM_ITERATIONS, K_USER, K_USER); #endif + condvar_blocking_ops(CONFIG_BENCHMARK_NUM_ITERATIONS, 0, 0); +#ifdef CONFIG_USERSPACE + condvar_blocking_ops(CONFIG_BENCHMARK_NUM_ITERATIONS, 0, K_USER); + condvar_blocking_ops(CONFIG_BENCHMARK_NUM_ITERATIONS, K_USER, 0); + condvar_blocking_ops(CONFIG_BENCHMARK_NUM_ITERATIONS, K_USER, K_USER); +#endif + mutex_lock_unlock(CONFIG_BENCHMARK_NUM_ITERATIONS, 0); #ifdef CONFIG_USERSPACE mutex_lock_unlock(CONFIG_BENCHMARK_NUM_ITERATIONS, K_USER);