From 1a7e5c6c8d25665c77fc150f70476fd5cb59e97f Mon Sep 17 00:00:00 2001 From: Peter Mitsis Date: Thu, 5 Oct 2023 10:30:43 -0400 Subject: [PATCH] tests: latency_measure: Use syscall to get timestamp Many architectures and platforms can not directly read a timestamp from userspace as the relevant MMIO registers are inaccessible. This necessitates that the timestamp be obtained by a system call. The additional overhead from these system calls can be taken into account and the recorded times adjusted depending upon whether the test occurred entirely within kernel space, entirely within user space, or a mix between the two. It is worth noting that when the test requires a mix of both user and kernel space threads, the overhead is estimated as the mean average of purely kernel threads and purely user threads overhead times. This might not be technically correct, but it ought to provide a reasonable enough approximation. Signed-off-by: Peter Mitsis --- tests/benchmarks/latency_measure/prj.conf | 1 + .../latency_measure/src/int_to_thread.c | 13 +- tests/benchmarks/latency_measure/src/main.c | 9 +- .../latency_measure/src/mutex_lock_unlock.c | 11 +- .../src/sema_test_signal_release.c | 19 +-- tests/benchmarks/latency_measure/src/thread.c | 26 ++-- .../latency_measure/src/thread_switch_yield.c | 7 +- .../latency_measure/src/timing_sc.c | 112 ++++++++++++++++++ .../latency_measure/src/timing_sc.h | 21 ++++ tests/benchmarks/latency_measure/src/utils.h | 10 ++ 10 files changed, 197 insertions(+), 32 deletions(-) create mode 100644 tests/benchmarks/latency_measure/src/timing_sc.c create mode 100644 tests/benchmarks/latency_measure/src/timing_sc.h diff --git a/tests/benchmarks/latency_measure/prj.conf b/tests/benchmarks/latency_measure/prj.conf index 4a786177bbc..03621afb14e 100644 --- a/tests/benchmarks/latency_measure/prj.conf +++ b/tests/benchmarks/latency_measure/prj.conf @@ -24,3 +24,4 @@ CONFIG_MP_MAX_NUM_CPUS=1 CONFIG_TIMING_FUNCTIONS=y CONFIG_HEAP_MEM_POOL_SIZE=2048 +CONFIG_APPLICATION_DEFINED_SYSCALL=y diff --git a/tests/benchmarks/latency_measure/src/int_to_thread.c b/tests/benchmarks/latency_measure/src/int_to_thread.c index 608d3b30893..ad3cf70fad7 100644 --- a/tests/benchmarks/latency_measure/src/int_to_thread.c +++ b/tests/benchmarks/latency_measure/src/int_to_thread.c @@ -24,6 +24,7 @@ #include #include "utils.h" +#include "timing_sc.h" #include @@ -43,7 +44,7 @@ static void test_isr(const void *arg) k_sem_give(sem); } - timestamp.sample = timing_counter_get(); + timestamp.sample = timing_timestamp_get(); } /** @@ -61,7 +62,7 @@ static void int_to_interrupted_thread(uint32_t num_iterations, uint64_t *sum) for (uint32_t i = 0; i < num_iterations; i++) { irq_offload(test_isr, NULL); - finish = timing_counter_get(); + finish = timing_timestamp_get(); start = timestamp.sample; *sum += timing_cycles_get(&start, &finish); @@ -92,7 +93,7 @@ static void start_thread_entry(void *p1, void *p2, void *p3) /* 3. Obtain the start and finish timestamps */ - finish = timing_counter_get(); + finish = timing_timestamp_get(); start = timestamp.sample; sum += timing_cycles_get(&start, &finish); @@ -171,6 +172,8 @@ int int_to_thread(uint32_t num_iterations) int_to_interrupted_thread(num_iterations, &sum); + sum -= timestamp_overhead_adjustment(0, 0); + PRINT_STATS_AVG("Switch from ISR back to interrupted thread", (uint32_t)sum, num_iterations, false, ""); @@ -178,6 +181,8 @@ int int_to_thread(uint32_t num_iterations) int_to_another_thread(num_iterations, &sum, 0); + sum -= timestamp_overhead_adjustment(0, 0); + PRINT_STATS_AVG("Switch from ISR to another thread (kernel)", (uint32_t)sum, num_iterations, false, ""); @@ -186,6 +191,8 @@ int int_to_thread(uint32_t num_iterations) #if CONFIG_USERSPACE int_to_another_thread(num_iterations, &sum, K_USER); + sum -= timestamp_overhead_adjustment(0, K_USER); + PRINT_STATS_AVG("Switch from ISR to another thread (user)", (uint32_t)sum, num_iterations, false, ""); #endif diff --git a/tests/benchmarks/latency_measure/src/main.c b/tests/benchmarks/latency_measure/src/main.c index ca44f164bad..499ce1273f1 100644 --- a/tests/benchmarks/latency_measure/src/main.c +++ b/tests/benchmarks/latency_measure/src/main.c @@ -14,18 +14,13 @@ #include #include #include "utils.h" +#include "timing_sc.h" #include #define NUM_ITERATIONS 10000 #define STACK_SIZE (1024 + CONFIG_TEST_EXTRA_STACK_SIZE) -#ifdef CONFIG_USERSPACE -#define BENCH_BMEM K_APP_BMEM(bench_mem_partition) -#else -#define BENCH_BMEM -#endif - uint32_t tm_off; BENCH_BMEM struct timestamp_data timestamp; @@ -76,6 +71,8 @@ static void test_thread(void *arg1, void *arg2, void *arg3) TC_START("Time Measurement"); TC_PRINT("Timing results: Clock frequency: %u MHz\n", freq); + timestamp_overhead_init(NUM_ITERATIONS); + /* Preemptive threads context switching */ thread_switch_yield(NUM_ITERATIONS, false); diff --git a/tests/benchmarks/latency_measure/src/mutex_lock_unlock.c b/tests/benchmarks/latency_measure/src/mutex_lock_unlock.c index 4b74a1cfd35..5ab52b43dcd 100644 --- a/tests/benchmarks/latency_measure/src/mutex_lock_unlock.c +++ b/tests/benchmarks/latency_measure/src/mutex_lock_unlock.c @@ -15,6 +15,7 @@ #include #include #include "utils.h" +#include "timing_sc.h" static K_MUTEX_DEFINE(test_mutex); @@ -30,7 +31,7 @@ static void start_lock_unlock(void *p1, void *p2, void *p3) ARG_UNUSED(p2); ARG_UNUSED(p3); - start = timing_counter_get(); + start = timing_timestamp_get(); /* Recursively lock take the mutex */ @@ -38,11 +39,11 @@ static void start_lock_unlock(void *p1, void *p2, void *p3) k_mutex_lock(&test_mutex, K_NO_WAIT); } - finish = timing_counter_get(); + finish = timing_timestamp_get(); lock_cycles = timing_cycles_get(&start, &finish); - start = timing_counter_get(); + start = timing_timestamp_get(); /* Recursively unlock the mutex */ @@ -50,7 +51,7 @@ static void start_lock_unlock(void *p1, void *p2, void *p3) k_mutex_unlock(&test_mutex); } - finish = timing_counter_get(); + finish = timing_timestamp_get(); unlock_cycles = timing_cycles_get(&start, &finish); @@ -90,6 +91,7 @@ int mutex_lock_unlock(uint32_t num_iterations, uint32_t options) k_thread_start(&start_thread); cycles = timestamp.cycles; + cycles -= timestamp_overhead_adjustment(options, options); k_sem_give(&pause_sem); snprintf(description, sizeof(description), @@ -99,6 +101,7 @@ int mutex_lock_unlock(uint32_t num_iterations, uint32_t options) false, ""); cycles = timestamp.cycles; + cycles -= timestamp_overhead_adjustment(options, options); snprintf(description, sizeof(description), "Unlock a mutex from %s thread", diff --git a/tests/benchmarks/latency_measure/src/sema_test_signal_release.c b/tests/benchmarks/latency_measure/src/sema_test_signal_release.c index 4e36f534ba1..e5ea2b72992 100644 --- a/tests/benchmarks/latency_measure/src/sema_test_signal_release.c +++ b/tests/benchmarks/latency_measure/src/sema_test_signal_release.c @@ -15,6 +15,7 @@ #include #include #include "utils.h" +#include "timing_sc.h" static struct k_sem sem; @@ -33,7 +34,7 @@ static void alt_thread_entry(void *p1, void *p2, void *p3) * to . */ - mid = timing_counter_get(); + mid = timing_timestamp_get(); k_sem_give(&sem); /* 5. Share the timestamp. */ @@ -68,12 +69,12 @@ static void start_thread_entry(void *p1, void *p2, void *p3) * to . */ - start = timing_counter_get(); + start = timing_timestamp_get(); k_sem_take(&sem, K_FOREVER); /* 3. Get the timestamp. */ - finish = timing_counter_get(); + finish = timing_timestamp_get(); /* * 4. Let run so it can share its @@ -135,6 +136,7 @@ void sema_context_switch(uint32_t num_iterations, /* Retrieve the number of cycles spent taking the semaphore */ cycles = timestamp.cycles; + cycles -= timestamp_overhead_adjustment(start_options, alt_options); snprintf(description, sizeof(description), "Take a semaphore (context switch %c -> %c)", @@ -150,6 +152,7 @@ void sema_context_switch(uint32_t num_iterations, /* Retrieve the number of cycles spent taking the semaphore */ cycles = timestamp.cycles; + cycles -= timestamp_overhead_adjustment(start_options, alt_options); snprintf(description, sizeof(description), "Give a semaphore (context switch %c -> %c)", @@ -183,24 +186,24 @@ static void immediate_give_take(void *p1, void *p2, void *p3) /* 1. Give a semaphore. No threads are waiting on it */ - start = timing_counter_get(); + start = timing_timestamp_get(); for (uint32_t i = 0; i < num_iterations; i++) { k_sem_give(&sem); } - finish = timing_counter_get(); + finish = timing_timestamp_get(); give_cycles = timing_cycles_get(&start, &finish); /* 2. Take a semaphore--no contention */ - start = timing_counter_get(); + start = timing_timestamp_get(); for (uint32_t i = 0; i < num_iterations; i++) { k_sem_take(&sem, K_NO_WAIT); } - finish = timing_counter_get(); + finish = timing_timestamp_get(); take_cycles = timing_cycles_get(&start, &finish); /* 3. Post the number of cycles spent giving the semaphore */ @@ -250,6 +253,7 @@ int sema_test_signal(uint32_t num_iterations, uint32_t options) /* 5. Retrieve the number of cycles spent giving the semaphore */ cycles = timestamp.cycles; + cycles -= timestamp_overhead_adjustment(options, options); snprintf(description, sizeof(description), "Give a semaphore (no waiters) from %s thread", @@ -269,6 +273,7 @@ int sema_test_signal(uint32_t num_iterations, uint32_t options) /* 9. Retrieve the number of cycles spent taking the semaphore */ cycles = timestamp.cycles; + cycles -= timestamp_overhead_adjustment(options, options); snprintf(description, sizeof(description), "Take a semaphore (no blocking) from %s thread", diff --git a/tests/benchmarks/latency_measure/src/thread.c b/tests/benchmarks/latency_measure/src/thread.c index db4da3e685f..4448b0e2304 100644 --- a/tests/benchmarks/latency_measure/src/thread.c +++ b/tests/benchmarks/latency_measure/src/thread.c @@ -22,6 +22,7 @@ #include #include #include "utils.h" +#include "timing_sc.h" #define STACK_SIZE (512 + CONFIG_TEST_EXTRA_STACK_SIZE) @@ -38,7 +39,7 @@ static void alt_thread_entry(void *p1, void *p2, void *p3) /* 3. Finish measuring time to start */ - timestamp.sample = timing_counter_get(); + timestamp.sample = timing_timestamp_get(); /* 4. Let process the time measurement. */ @@ -46,12 +47,12 @@ static void alt_thread_entry(void *p1, void *p2, void *p3) /* 7. Begin measuring time to suspend active thread (self/alt_thread) */ - timestamp.sample = timing_counter_get(); + timestamp.sample = timing_timestamp_get(); k_thread_suspend(&alt_thread); /* 10. Finish measuring time to resume (self) */ - timestamp.sample = timing_counter_get(); + timestamp.sample = timing_timestamp_get(); /* 11. Lower the priority so can terminate us. */ @@ -81,14 +82,14 @@ static void start_thread_entry(void *p1, void *p2, void *p3) /* 1. Measure time to create, but not start */ if ((bit_options & START_ALT) == START_ALT) { - start = timing_counter_get(); + start = timing_timestamp_get(); k_thread_create(&alt_thread, alt_stack, K_THREAD_STACK_SIZEOF(alt_stack), alt_thread_entry, NULL, NULL, NULL, priority, (bit_options & ALT_USER) == ALT_USER ? K_USER : 0, K_FOREVER); - finish = timing_counter_get(); + finish = timing_timestamp_get(); thread_create_sum += timing_cycles_get(&start, &finish); } else { @@ -116,7 +117,7 @@ static void start_thread_entry(void *p1, void *p2, void *p3) /* 2. Begin measuring time to start */ - start = timing_counter_get(); + start = timing_timestamp_get(); k_thread_start(&alt_thread); /* 5. Process the time to start */ @@ -131,12 +132,12 @@ static void start_thread_entry(void *p1, void *p2, void *p3) /* 8. Finish measuring time to suspend */ start = timestamp.sample; - finish = timing_counter_get(); + finish = timing_timestamp_get(); thread_suspend_sum += timing_cycles_get(&start, &finish); /* 9. Being measuring time to resume */ - start = timing_counter_get(); + start = timing_timestamp_get(); k_thread_resume(&alt_thread); /* 12. Process the time it took to resume */ @@ -146,9 +147,9 @@ static void start_thread_entry(void *p1, void *p2, void *p3) /* 13. Process the time to terminate */ - start = timing_counter_get(); + start = timing_timestamp_get(); k_thread_abort(&alt_thread); - finish = timing_counter_get(); + finish = timing_timestamp_get(); thread_abort_sum += timing_cycles_get(&start, &finish); } @@ -239,6 +240,7 @@ int thread_ops(uint32_t num_iterations, uint32_t start_options, uint32_t alt_opt } cycles = timestamp.cycles; + cycles -= timestamp_overhead_adjustment(start_options, alt_options); k_sem_give(&pause_sem); if ((bit_options & START_ALT) == START_ALT) { @@ -255,6 +257,7 @@ int thread_ops(uint32_t num_iterations, uint32_t start_options, uint32_t alt_opt } cycles = timestamp.cycles; + cycles -= timestamp_overhead_adjustment(start_options, alt_options); k_sem_give(&pause_sem); snprintf(description, sizeof(description), @@ -266,6 +269,7 @@ int thread_ops(uint32_t num_iterations, uint32_t start_options, uint32_t alt_opt num_iterations, false, ""); cycles = timestamp.cycles; + cycles -= timestamp_overhead_adjustment(start_options, alt_options); k_sem_give(&pause_sem); snprintf(description, sizeof(description), @@ -277,6 +281,7 @@ int thread_ops(uint32_t num_iterations, uint32_t start_options, uint32_t alt_opt num_iterations, false, ""); cycles = timestamp.cycles; + cycles -= timestamp_overhead_adjustment(start_options, alt_options); k_sem_give(&pause_sem); snprintf(description, sizeof(description), @@ -288,6 +293,7 @@ int thread_ops(uint32_t num_iterations, uint32_t start_options, uint32_t alt_opt num_iterations, false, ""); cycles = timestamp.cycles; + cycles -= timestamp_overhead_adjustment(start_options, alt_options); k_sem_give(&pause_sem); snprintf(description, sizeof(description), diff --git a/tests/benchmarks/latency_measure/src/thread_switch_yield.c b/tests/benchmarks/latency_measure/src/thread_switch_yield.c index 14bd157d720..44ccc5932c5 100644 --- a/tests/benchmarks/latency_measure/src/thread_switch_yield.c +++ b/tests/benchmarks/latency_measure/src/thread_switch_yield.c @@ -23,6 +23,7 @@ #include #include "utils.h" +#include "timing_sc.h" static void alt_thread_entry(void *p1, void *p2, void *p3) { @@ -37,7 +38,7 @@ static void alt_thread_entry(void *p1, void *p2, void *p3) /* 3. Obtain the 'finish' timestamp */ - timestamp.sample = timing_counter_get(); + timestamp.sample = timing_timestamp_get(); /* 4. Switch to */ @@ -64,7 +65,7 @@ static void start_thread_entry(void *p1, void *p2, void *p3) /* 1. Get 'start' timestamp */ - start = timing_counter_get(); + start = timing_timestamp_get(); /* 2. Switch to */ @@ -127,6 +128,8 @@ static void thread_switch_yield_common(const char *description, sum = timestamp.cycles; + sum -= timestamp_overhead_adjustment(start_options, alt_options); + snprintf(summary, sizeof(summary), "%s (%c -> %c)", description, diff --git a/tests/benchmarks/latency_measure/src/timing_sc.c b/tests/benchmarks/latency_measure/src/timing_sc.c new file mode 100644 index 00000000000..051b9d1810b --- /dev/null +++ b/tests/benchmarks/latency_measure/src/timing_sc.c @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2023 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * @file + * This file contains routines for implementing a timestamp system call + * as well as routines for determining the overhead associated with it. + */ + +#include +#include +#include "utils.h" +#include "timing_sc.h" + +BENCH_BMEM uint64_t timestamp_overhead; +#ifdef CONFIG_USERSPACE +BENCH_BMEM uint64_t user_timestamp_overhead; +#endif + +timing_t z_impl_timing_timestamp_get(void) +{ + return timing_counter_get(); +} + +#ifdef CONFIG_USERSPACE +timing_t z_vrfy_timing_timestamp_get(void) +{ + return z_impl_timing_timestamp_get(); +} +#include +#endif + +static void start_thread_entry(void *p1, void *p2, void *p3) +{ + uint32_t num_iterations = (uint32_t)(uintptr_t)p1; + timing_t start; + timing_t finish; + + ARG_UNUSED(p2); + ARG_UNUSED(p3); + + start = timing_timestamp_get(); + for (uint32_t i = 0; i < num_iterations; i++) { + timing_timestamp_get(); + } + finish = timing_timestamp_get(); + + timestamp.cycles = timing_cycles_get(&start, &finish); +} + +void timestamp_overhead_init(uint32_t num_iterations) +{ + int priority; + + priority = k_thread_priority_get(k_current_get()); + + k_thread_create(&start_thread, start_stack, + K_THREAD_STACK_SIZEOF(start_stack), + start_thread_entry, + (void *)(uintptr_t)num_iterations, NULL, NULL, + priority - 1, 0, K_FOREVER); + + k_thread_start(&start_thread); + + k_thread_join(&start_thread, K_FOREVER); + + timestamp_overhead = timestamp.cycles; + +#ifdef CONFIG_USERSPACE + k_thread_create(&start_thread, start_stack, + K_THREAD_STACK_SIZEOF(start_stack), + start_thread_entry, + (void *)(uintptr_t)num_iterations, NULL, NULL, + priority - 1, K_USER, K_FOREVER); + + k_thread_start(&start_thread); + + k_thread_join(&start_thread, K_FOREVER); + + user_timestamp_overhead = timestamp.cycles; +#endif +} + +uint64_t timestamp_overhead_adjustment(uint32_t options1, uint32_t options2) +{ +#ifdef CONFIG_USERSPACE + if (((options1 | options2) & K_USER) == K_USER) { + if (((options1 & options2) & K_USER) == K_USER) { + /* + * Both start and finish timestamps were obtained + * from userspace. + */ + return user_timestamp_overhead; + } + /* + * One timestamp came from userspace, and the other came + * from kernel space. Estimate the overhead as the mean + * between the two. + */ + return (timestamp_overhead + user_timestamp_overhead) / 2; + } +#endif + + /* + * Both start and finish timestamps were obtained + * from kernel space. + */ + return timestamp_overhead; +} diff --git a/tests/benchmarks/latency_measure/src/timing_sc.h b/tests/benchmarks/latency_measure/src/timing_sc.h new file mode 100644 index 00000000000..265d71f5048 --- /dev/null +++ b/tests/benchmarks/latency_measure/src/timing_sc.h @@ -0,0 +1,21 @@ +/* + * Copyright (c) 2023 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _LATENCY_MEASURE_TIMING_SC_H +#define _LATENCY_MEASURE_TIMING_SC_H + +#include +#include +#include + +__syscall timing_t timing_timestamp_get(void); + +void timestamp_overhead_init(uint32_t num_iterations); +uint64_t timestamp_overhead_adjustment(uint32_t options1, uint32_t options2); + +#include + +#endif diff --git a/tests/benchmarks/latency_measure/src/utils.h b/tests/benchmarks/latency_measure/src/utils.h index a946e38a38b..62d2fb7259e 100644 --- a/tests/benchmarks/latency_measure/src/utils.h +++ b/tests/benchmarks/latency_measure/src/utils.h @@ -19,6 +19,12 @@ #define START_STACK_SIZE (512 + CONFIG_TEST_EXTRA_STACK_SIZE) #define ALT_STACK_SIZE (512 + CONFIG_TEST_EXTRA_STACK_SIZE) +#ifdef CONFIG_USERSPACE +#define BENCH_BMEM K_APP_BMEM(bench_mem_partition) +#else +#define BENCH_BMEM +#endif + struct timestamp_data { uint64_t cycles; timing_t sample; @@ -33,6 +39,10 @@ extern struct k_thread alt_thread; extern struct k_sem pause_sem; extern struct timestamp_data timestamp; +#ifdef CONFIG_USERSPACE +extern uint64_t user_timestamp_overhead; +#endif +extern uint64_t timestamp_overhead; extern int error_count;