tests: latency_measure: Add k_stack object support

Updates the latency_measure test to add support for benchmarking
k_stack_push() and k_stack_pop().

Signed-off-by: Peter Mitsis <peter.mitsis@intel.com>
This commit is contained in:
Peter Mitsis 2024-01-24 15:12:03 -05:00 committed by Fabio Baltieri
commit 590e4f3a82
3 changed files with 255 additions and 0 deletions

View file

@ -23,6 +23,7 @@ including:
* Time it takes to send and receive events
* Time it takes to wait for events (and context switch)
* Time it takes to wake and switch to a thread waiting for events
* Time it takes to push and pop to/from a k_stack
* Measure average time to alloc memory from heap then free that memory
When userspace is enabled using the prj_user.conf configuration file, this benchmark will
@ -76,6 +77,10 @@ Sample output of the benchmark (without userspace enabled)::
semaphore.give.wake+ctx.k_to_k - Give a semaphore (context switch) : 599 cycles , 4992 ns :
condvar.wait.blocking.k_to_k - Wait for a condvar (context switch) : 692 cycles , 5767 ns :
condvar.signal.wake+ctx.k_to_k - Signal a condvar (context switch) : 715 cycles , 5958 ns :
stack.push.immediate.kernel - Add data to k_stack (no ctx switch) : 166 cycles , 1391 ns :
stack.pop.immediate.kernel - Get data from k_stack (no ctx switch) : 82 cycles , 691 ns :
stack.pop.blocking.k_to_k - Get data from k_stack (w/ ctx switch) : 499 cycles , 4166 ns :
stack.push.wake+ctx.k_to_k - Add data to k_stack (w/ ctx switch) : 645 cycles , 5375 ns :
mutex.lock.immediate.recursive.kernel - Lock a mutex : 100 cycles , 833 ns :
mutex.unlock.immediate.recursive.kernel - Unlock a mutex : 40 cycles , 333 ns :
heap.malloc.immediate - Average time for heap malloc : 627 cycles , 5225 ns :
@ -193,6 +198,18 @@ Sample output of the benchmark (with userspace enabled)::
condvar.signal.wake+ctx.k_to_u - Signal a condvar (context switch) : 1715 cycles , 14298 ns :
condvar.wait.blocking.u_to_u - Wait for a condvar (context switch) : 2313 cycles , 19279 ns :
condvar.signal.wake+ctx.u_to_u - Signal a condvar (context switch) : 2225 cycles , 18541 ns :
stack.push.immediate.kernel - Add data to k_stack (no ctx switch) : 244 cycles , 2041 ns :
stack.pop.immediate.kernel - Get data from k_stack (no ctx switch) : 195 cycles , 1630 ns :
stack.push.immediate.user - Add data to k_stack (no ctx switch) : 714 cycles , 5956 ns :
stack.pop.immediate.user - Get data from k_stack (no ctx switch) : 1009 cycles , 8414 ns :
stack.pop.blocking.k_to_k - Get data from k_stack (w/ ctx switch) : 1234 cycles , 10291 ns :
stack.push.wake+ctx.k_to_k - Add data to k_stack (w/ ctx switch) : 1360 cycles , 11333 ns :
stack.pop.blocking.u_to_k - Get data from k_stack (w/ ctx switch) : 2084 cycles , 17374 ns :
stack.push.wake+ctx.k_to_u - Add data to k_stack (w/ ctx switch) : 1665 cycles , 13875 ns :
stack.pop.blocking.k_to_u - Get data from k_stack (w/ ctx switch) : 1544 cycles , 12874 ns :
stack.push.wake+ctx.u_to_k - Add data to k_stack (w/ ctx switch) : 1850 cycles , 15422 ns :
stack.pop.blocking.u_to_u - Get data from k_stack (w/ ctx switch) : 2394 cycles , 19958 ns :
stack.push.wake+ctx.u_to_u - Add data to k_stack (w/ ctx switch) : 2155 cycles , 17958 ns :
mutex.lock.immediate.recursive.kernel - Lock a mutex : 155 cycles , 1291 ns :
mutex.unlock.immediate.recursive.kernel - Unlock a mutex : 57 cycles , 475 ns :
mutex.lock.immediate.recursive.user - Lock a mutex : 665 cycles , 5541 ns :

View file

@ -55,6 +55,9 @@ extern int event_blocking_ops(uint32_t num_iterations, uint32_t start_options,
uint32_t alt_options);
extern int condvar_blocking_ops(uint32_t num_iterations, uint32_t start_options,
uint32_t alt_options);
extern int stack_ops(uint32_t num_iterations, uint32_t options);
extern int stack_blocking_ops(uint32_t num_iterations, uint32_t start_options,
uint32_t alt_options);
extern void heap_malloc_free(void);
static void test_thread(void *arg1, void *arg2, void *arg3)
@ -154,6 +157,18 @@ static void test_thread(void *arg1, void *arg2, void *arg3)
condvar_blocking_ops(CONFIG_BENCHMARK_NUM_ITERATIONS, K_USER, K_USER);
#endif
stack_ops(CONFIG_BENCHMARK_NUM_ITERATIONS, 0);
#ifdef CONFIG_USERSPACE
stack_ops(CONFIG_BENCHMARK_NUM_ITERATIONS, K_USER);
#endif
stack_blocking_ops(CONFIG_BENCHMARK_NUM_ITERATIONS, 0, 0);
#ifdef CONFIG_USERSPACE
stack_blocking_ops(CONFIG_BENCHMARK_NUM_ITERATIONS, 0, K_USER);
stack_blocking_ops(CONFIG_BENCHMARK_NUM_ITERATIONS, K_USER, 0);
stack_blocking_ops(CONFIG_BENCHMARK_NUM_ITERATIONS, K_USER, K_USER);
#endif
mutex_lock_unlock(CONFIG_BENCHMARK_NUM_ITERATIONS, 0);
#ifdef CONFIG_USERSPACE
mutex_lock_unlock(CONFIG_BENCHMARK_NUM_ITERATIONS, K_USER);

View file

@ -0,0 +1,223 @@
/*
* Copyright (c) 2024 Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*/
/*
* @file measure time for various k_stack operations
*
* This file contains the tests that measures the times for the following
* k_stack operations from both kernel threads and user threads:
* 1. Immediately adding a data item to a k_stack
* 2. Immediately removing a data item from a k_stack
* 3. Blocking on removing a data item from a k_stack
* 4. Waking (and context switching to) a thread blocked on a k_stack
*/
#include <zephyr/kernel.h>
#include <zephyr/timing/timing.h>
#include "utils.h"
#include "timing_sc.h"
#define MAX_ITEMS 16
static BENCH_BMEM stack_data_t stack_array[MAX_ITEMS];
static struct k_stack stack;
static void stack_push_pop_thread_entry(void *p1, void *p2, void *p3)
{
uint32_t num_iterations = (uint32_t)(uintptr_t)p1;
timing_t start;
timing_t mid;
timing_t finish;
uint64_t put_sum = 0ULL;
uint64_t get_sum = 0ULL;
stack_data_t data;
for (uint32_t i = 0; i < num_iterations; i++) {
start = timing_timestamp_get();
(void) k_stack_push(&stack, 1234);
mid = timing_timestamp_get();
(void) k_stack_pop(&stack, &data, K_NO_WAIT);
finish = timing_timestamp_get();
put_sum += timing_cycles_get(&start, &mid);
get_sum += timing_cycles_get(&mid, &finish);
}
timestamp.cycles = put_sum;
k_sem_take(&pause_sem, K_FOREVER);
timestamp.cycles = get_sum;
}
int stack_ops(uint32_t num_iterations, uint32_t options)
{
int priority;
uint64_t cycles;
char tag[50];
char description[120];
priority = k_thread_priority_get(k_current_get());
timing_start();
k_stack_init(&stack, stack_array, MAX_ITEMS);
k_thread_create(&start_thread, start_stack,
K_THREAD_STACK_SIZEOF(start_stack),
stack_push_pop_thread_entry,
(void *)(uintptr_t)num_iterations,
NULL, NULL,
priority - 1, options, K_FOREVER);
k_thread_access_grant(&start_thread, &pause_sem, &stack);
k_thread_start(&start_thread);
snprintf(tag, sizeof(tag),
"stack.push.immediate.%s",
options & K_USER ? "user" : "kernel");
snprintf(description, sizeof(description),
"%-40s - Add data to k_stack (no ctx switch)", tag);
cycles = timestamp.cycles;
cycles -= timestamp_overhead_adjustment(options, options);
PRINT_STATS_AVG(description, (uint32_t)cycles,
num_iterations, false, "");
k_sem_give(&pause_sem);
snprintf(tag, sizeof(tag),
"stack.pop.immediate.%s",
options & K_USER ? "user" : "kernel");
snprintf(description, sizeof(description),
"%-40s - Get data from k_stack (no ctx switch)", tag);
cycles = timestamp.cycles;
cycles -= timestamp_overhead_adjustment(options, options);
PRINT_STATS_AVG(description, (uint32_t)cycles,
num_iterations, false, "");
k_thread_join(&start_thread, K_FOREVER);
timing_stop();
return 0;
}
static void alt_thread_entry(void *p1, void *p2, void *p3)
{
uint32_t num_iterations = (uint32_t)(uintptr_t)p1;
timing_t start;
timing_t mid;
timing_t finish;
uint64_t sum[2] = {0ULL, 0ULL};
uint32_t i;
stack_data_t data;
for (i = 0; i < num_iterations; i++) {
/* 1. Block waiting for data on k_stack */
start = timing_timestamp_get();
k_stack_pop(&stack, &data, K_FOREVER);
/* 3. Data obtained. */
finish = timing_timestamp_get();
mid = timestamp.sample;
sum[0] += timing_cycles_get(&start, &mid);
sum[1] += timing_cycles_get(&mid, &finish);
}
timestamp.cycles = sum[0];
k_sem_take(&pause_sem, K_FOREVER);
timestamp.cycles = sum[1];
}
static void start_thread_entry(void *p1, void *p2, void *p3)
{
uint32_t num_iterations = (uint32_t)(uintptr_t)p1;
uint32_t i;
k_thread_start(&alt_thread);
for (i = 0; i < num_iterations; i++) {
/* 2. Add data thereby waking alt thread */
timestamp.sample = timing_timestamp_get();
k_stack_push(&stack, (stack_data_t)123);
}
k_thread_join(&alt_thread, K_FOREVER);
}
int stack_blocking_ops(uint32_t num_iterations, uint32_t start_options,
uint32_t alt_options)
{
int priority;
uint64_t cycles;
char tag[50];
char description[120];
priority = k_thread_priority_get(k_current_get());
timing_start();
k_thread_create(&start_thread, start_stack,
K_THREAD_STACK_SIZEOF(start_stack),
start_thread_entry,
(void *)(uintptr_t)num_iterations,
NULL, NULL,
priority - 1, start_options, K_FOREVER);
k_thread_create(&alt_thread, alt_stack,
K_THREAD_STACK_SIZEOF(alt_stack),
alt_thread_entry,
(void *)(uintptr_t)num_iterations,
NULL, NULL,
priority - 2, alt_options, K_FOREVER);
k_thread_access_grant(&start_thread, &alt_thread, &pause_sem, &stack);
k_thread_access_grant(&alt_thread, &pause_sem, &stack);
k_thread_start(&start_thread);
snprintf(tag, sizeof(tag),
"stack.pop.blocking.%s_to_%s",
alt_options & K_USER ? "u" : "k",
start_options & K_USER ? "u" : "k");
snprintf(description, sizeof(description),
"%-40s - Get data from k_stack (w/ ctx switch)", tag);
cycles = timestamp.cycles;
PRINT_STATS_AVG(description, (uint32_t)cycles,
num_iterations, false, "");
k_sem_give(&pause_sem);
snprintf(tag, sizeof(tag),
"stack.push.wake+ctx.%s_to_%s",
start_options & K_USER ? "u" : "k",
alt_options & K_USER ? "u" : "k");
snprintf(description, sizeof(description),
"%-40s - Add data to k_stack (w/ ctx switch)", tag);
cycles = timestamp.cycles;
PRINT_STATS_AVG(description, (uint32_t)cycles,
num_iterations, false, "");
k_thread_join(&start_thread, K_FOREVER);
timing_stop();
return 0;
}