diff --git a/tests/benchmarks/latency_measure/prj.conf b/tests/benchmarks/latency_measure/prj.conf index 4a786177bbc..03621afb14e 100644 --- a/tests/benchmarks/latency_measure/prj.conf +++ b/tests/benchmarks/latency_measure/prj.conf @@ -24,3 +24,4 @@ CONFIG_MP_MAX_NUM_CPUS=1 CONFIG_TIMING_FUNCTIONS=y CONFIG_HEAP_MEM_POOL_SIZE=2048 +CONFIG_APPLICATION_DEFINED_SYSCALL=y diff --git a/tests/benchmarks/latency_measure/src/int_to_thread.c b/tests/benchmarks/latency_measure/src/int_to_thread.c index 608d3b30893..ad3cf70fad7 100644 --- a/tests/benchmarks/latency_measure/src/int_to_thread.c +++ b/tests/benchmarks/latency_measure/src/int_to_thread.c @@ -24,6 +24,7 @@ #include #include "utils.h" +#include "timing_sc.h" #include @@ -43,7 +44,7 @@ static void test_isr(const void *arg) k_sem_give(sem); } - timestamp.sample = timing_counter_get(); + timestamp.sample = timing_timestamp_get(); } /** @@ -61,7 +62,7 @@ static void int_to_interrupted_thread(uint32_t num_iterations, uint64_t *sum) for (uint32_t i = 0; i < num_iterations; i++) { irq_offload(test_isr, NULL); - finish = timing_counter_get(); + finish = timing_timestamp_get(); start = timestamp.sample; *sum += timing_cycles_get(&start, &finish); @@ -92,7 +93,7 @@ static void start_thread_entry(void *p1, void *p2, void *p3) /* 3. Obtain the start and finish timestamps */ - finish = timing_counter_get(); + finish = timing_timestamp_get(); start = timestamp.sample; sum += timing_cycles_get(&start, &finish); @@ -171,6 +172,8 @@ int int_to_thread(uint32_t num_iterations) int_to_interrupted_thread(num_iterations, &sum); + sum -= timestamp_overhead_adjustment(0, 0); + PRINT_STATS_AVG("Switch from ISR back to interrupted thread", (uint32_t)sum, num_iterations, false, ""); @@ -178,6 +181,8 @@ int int_to_thread(uint32_t num_iterations) int_to_another_thread(num_iterations, &sum, 0); + sum -= timestamp_overhead_adjustment(0, 0); + PRINT_STATS_AVG("Switch from ISR to another thread (kernel)", (uint32_t)sum, num_iterations, false, ""); @@ -186,6 +191,8 @@ int int_to_thread(uint32_t num_iterations) #if CONFIG_USERSPACE int_to_another_thread(num_iterations, &sum, K_USER); + sum -= timestamp_overhead_adjustment(0, K_USER); + PRINT_STATS_AVG("Switch from ISR to another thread (user)", (uint32_t)sum, num_iterations, false, ""); #endif diff --git a/tests/benchmarks/latency_measure/src/main.c b/tests/benchmarks/latency_measure/src/main.c index ca44f164bad..499ce1273f1 100644 --- a/tests/benchmarks/latency_measure/src/main.c +++ b/tests/benchmarks/latency_measure/src/main.c @@ -14,18 +14,13 @@ #include #include #include "utils.h" +#include "timing_sc.h" #include #define NUM_ITERATIONS 10000 #define STACK_SIZE (1024 + CONFIG_TEST_EXTRA_STACK_SIZE) -#ifdef CONFIG_USERSPACE -#define BENCH_BMEM K_APP_BMEM(bench_mem_partition) -#else -#define BENCH_BMEM -#endif - uint32_t tm_off; BENCH_BMEM struct timestamp_data timestamp; @@ -76,6 +71,8 @@ static void test_thread(void *arg1, void *arg2, void *arg3) TC_START("Time Measurement"); TC_PRINT("Timing results: Clock frequency: %u MHz\n", freq); + timestamp_overhead_init(NUM_ITERATIONS); + /* Preemptive threads context switching */ thread_switch_yield(NUM_ITERATIONS, false); diff --git a/tests/benchmarks/latency_measure/src/mutex_lock_unlock.c b/tests/benchmarks/latency_measure/src/mutex_lock_unlock.c index 4b74a1cfd35..5ab52b43dcd 100644 --- a/tests/benchmarks/latency_measure/src/mutex_lock_unlock.c +++ b/tests/benchmarks/latency_measure/src/mutex_lock_unlock.c @@ -15,6 +15,7 @@ #include #include #include "utils.h" +#include "timing_sc.h" static K_MUTEX_DEFINE(test_mutex); @@ -30,7 +31,7 @@ static void start_lock_unlock(void *p1, void *p2, void *p3) ARG_UNUSED(p2); ARG_UNUSED(p3); - start = timing_counter_get(); + start = timing_timestamp_get(); /* Recursively lock take the mutex */ @@ -38,11 +39,11 @@ static void start_lock_unlock(void *p1, void *p2, void *p3) k_mutex_lock(&test_mutex, K_NO_WAIT); } - finish = timing_counter_get(); + finish = timing_timestamp_get(); lock_cycles = timing_cycles_get(&start, &finish); - start = timing_counter_get(); + start = timing_timestamp_get(); /* Recursively unlock the mutex */ @@ -50,7 +51,7 @@ static void start_lock_unlock(void *p1, void *p2, void *p3) k_mutex_unlock(&test_mutex); } - finish = timing_counter_get(); + finish = timing_timestamp_get(); unlock_cycles = timing_cycles_get(&start, &finish); @@ -90,6 +91,7 @@ int mutex_lock_unlock(uint32_t num_iterations, uint32_t options) k_thread_start(&start_thread); cycles = timestamp.cycles; + cycles -= timestamp_overhead_adjustment(options, options); k_sem_give(&pause_sem); snprintf(description, sizeof(description), @@ -99,6 +101,7 @@ int mutex_lock_unlock(uint32_t num_iterations, uint32_t options) false, ""); cycles = timestamp.cycles; + cycles -= timestamp_overhead_adjustment(options, options); snprintf(description, sizeof(description), "Unlock a mutex from %s thread", diff --git a/tests/benchmarks/latency_measure/src/sema_test_signal_release.c b/tests/benchmarks/latency_measure/src/sema_test_signal_release.c index 4e36f534ba1..e5ea2b72992 100644 --- a/tests/benchmarks/latency_measure/src/sema_test_signal_release.c +++ b/tests/benchmarks/latency_measure/src/sema_test_signal_release.c @@ -15,6 +15,7 @@ #include #include #include "utils.h" +#include "timing_sc.h" static struct k_sem sem; @@ -33,7 +34,7 @@ static void alt_thread_entry(void *p1, void *p2, void *p3) * to . */ - mid = timing_counter_get(); + mid = timing_timestamp_get(); k_sem_give(&sem); /* 5. Share the timestamp. */ @@ -68,12 +69,12 @@ static void start_thread_entry(void *p1, void *p2, void *p3) * to . */ - start = timing_counter_get(); + start = timing_timestamp_get(); k_sem_take(&sem, K_FOREVER); /* 3. Get the timestamp. */ - finish = timing_counter_get(); + finish = timing_timestamp_get(); /* * 4. Let run so it can share its @@ -135,6 +136,7 @@ void sema_context_switch(uint32_t num_iterations, /* Retrieve the number of cycles spent taking the semaphore */ cycles = timestamp.cycles; + cycles -= timestamp_overhead_adjustment(start_options, alt_options); snprintf(description, sizeof(description), "Take a semaphore (context switch %c -> %c)", @@ -150,6 +152,7 @@ void sema_context_switch(uint32_t num_iterations, /* Retrieve the number of cycles spent taking the semaphore */ cycles = timestamp.cycles; + cycles -= timestamp_overhead_adjustment(start_options, alt_options); snprintf(description, sizeof(description), "Give a semaphore (context switch %c -> %c)", @@ -183,24 +186,24 @@ static void immediate_give_take(void *p1, void *p2, void *p3) /* 1. Give a semaphore. No threads are waiting on it */ - start = timing_counter_get(); + start = timing_timestamp_get(); for (uint32_t i = 0; i < num_iterations; i++) { k_sem_give(&sem); } - finish = timing_counter_get(); + finish = timing_timestamp_get(); give_cycles = timing_cycles_get(&start, &finish); /* 2. Take a semaphore--no contention */ - start = timing_counter_get(); + start = timing_timestamp_get(); for (uint32_t i = 0; i < num_iterations; i++) { k_sem_take(&sem, K_NO_WAIT); } - finish = timing_counter_get(); + finish = timing_timestamp_get(); take_cycles = timing_cycles_get(&start, &finish); /* 3. Post the number of cycles spent giving the semaphore */ @@ -250,6 +253,7 @@ int sema_test_signal(uint32_t num_iterations, uint32_t options) /* 5. Retrieve the number of cycles spent giving the semaphore */ cycles = timestamp.cycles; + cycles -= timestamp_overhead_adjustment(options, options); snprintf(description, sizeof(description), "Give a semaphore (no waiters) from %s thread", @@ -269,6 +273,7 @@ int sema_test_signal(uint32_t num_iterations, uint32_t options) /* 9. Retrieve the number of cycles spent taking the semaphore */ cycles = timestamp.cycles; + cycles -= timestamp_overhead_adjustment(options, options); snprintf(description, sizeof(description), "Take a semaphore (no blocking) from %s thread", diff --git a/tests/benchmarks/latency_measure/src/thread.c b/tests/benchmarks/latency_measure/src/thread.c index db4da3e685f..4448b0e2304 100644 --- a/tests/benchmarks/latency_measure/src/thread.c +++ b/tests/benchmarks/latency_measure/src/thread.c @@ -22,6 +22,7 @@ #include #include #include "utils.h" +#include "timing_sc.h" #define STACK_SIZE (512 + CONFIG_TEST_EXTRA_STACK_SIZE) @@ -38,7 +39,7 @@ static void alt_thread_entry(void *p1, void *p2, void *p3) /* 3. Finish measuring time to start */ - timestamp.sample = timing_counter_get(); + timestamp.sample = timing_timestamp_get(); /* 4. Let process the time measurement. */ @@ -46,12 +47,12 @@ static void alt_thread_entry(void *p1, void *p2, void *p3) /* 7. Begin measuring time to suspend active thread (self/alt_thread) */ - timestamp.sample = timing_counter_get(); + timestamp.sample = timing_timestamp_get(); k_thread_suspend(&alt_thread); /* 10. Finish measuring time to resume (self) */ - timestamp.sample = timing_counter_get(); + timestamp.sample = timing_timestamp_get(); /* 11. Lower the priority so can terminate us. */ @@ -81,14 +82,14 @@ static void start_thread_entry(void *p1, void *p2, void *p3) /* 1. Measure time to create, but not start */ if ((bit_options & START_ALT) == START_ALT) { - start = timing_counter_get(); + start = timing_timestamp_get(); k_thread_create(&alt_thread, alt_stack, K_THREAD_STACK_SIZEOF(alt_stack), alt_thread_entry, NULL, NULL, NULL, priority, (bit_options & ALT_USER) == ALT_USER ? K_USER : 0, K_FOREVER); - finish = timing_counter_get(); + finish = timing_timestamp_get(); thread_create_sum += timing_cycles_get(&start, &finish); } else { @@ -116,7 +117,7 @@ static void start_thread_entry(void *p1, void *p2, void *p3) /* 2. Begin measuring time to start */ - start = timing_counter_get(); + start = timing_timestamp_get(); k_thread_start(&alt_thread); /* 5. Process the time to start */ @@ -131,12 +132,12 @@ static void start_thread_entry(void *p1, void *p2, void *p3) /* 8. Finish measuring time to suspend */ start = timestamp.sample; - finish = timing_counter_get(); + finish = timing_timestamp_get(); thread_suspend_sum += timing_cycles_get(&start, &finish); /* 9. Being measuring time to resume */ - start = timing_counter_get(); + start = timing_timestamp_get(); k_thread_resume(&alt_thread); /* 12. Process the time it took to resume */ @@ -146,9 +147,9 @@ static void start_thread_entry(void *p1, void *p2, void *p3) /* 13. Process the time to terminate */ - start = timing_counter_get(); + start = timing_timestamp_get(); k_thread_abort(&alt_thread); - finish = timing_counter_get(); + finish = timing_timestamp_get(); thread_abort_sum += timing_cycles_get(&start, &finish); } @@ -239,6 +240,7 @@ int thread_ops(uint32_t num_iterations, uint32_t start_options, uint32_t alt_opt } cycles = timestamp.cycles; + cycles -= timestamp_overhead_adjustment(start_options, alt_options); k_sem_give(&pause_sem); if ((bit_options & START_ALT) == START_ALT) { @@ -255,6 +257,7 @@ int thread_ops(uint32_t num_iterations, uint32_t start_options, uint32_t alt_opt } cycles = timestamp.cycles; + cycles -= timestamp_overhead_adjustment(start_options, alt_options); k_sem_give(&pause_sem); snprintf(description, sizeof(description), @@ -266,6 +269,7 @@ int thread_ops(uint32_t num_iterations, uint32_t start_options, uint32_t alt_opt num_iterations, false, ""); cycles = timestamp.cycles; + cycles -= timestamp_overhead_adjustment(start_options, alt_options); k_sem_give(&pause_sem); snprintf(description, sizeof(description), @@ -277,6 +281,7 @@ int thread_ops(uint32_t num_iterations, uint32_t start_options, uint32_t alt_opt num_iterations, false, ""); cycles = timestamp.cycles; + cycles -= timestamp_overhead_adjustment(start_options, alt_options); k_sem_give(&pause_sem); snprintf(description, sizeof(description), @@ -288,6 +293,7 @@ int thread_ops(uint32_t num_iterations, uint32_t start_options, uint32_t alt_opt num_iterations, false, ""); cycles = timestamp.cycles; + cycles -= timestamp_overhead_adjustment(start_options, alt_options); k_sem_give(&pause_sem); snprintf(description, sizeof(description), diff --git a/tests/benchmarks/latency_measure/src/thread_switch_yield.c b/tests/benchmarks/latency_measure/src/thread_switch_yield.c index 14bd157d720..44ccc5932c5 100644 --- a/tests/benchmarks/latency_measure/src/thread_switch_yield.c +++ b/tests/benchmarks/latency_measure/src/thread_switch_yield.c @@ -23,6 +23,7 @@ #include #include "utils.h" +#include "timing_sc.h" static void alt_thread_entry(void *p1, void *p2, void *p3) { @@ -37,7 +38,7 @@ static void alt_thread_entry(void *p1, void *p2, void *p3) /* 3. Obtain the 'finish' timestamp */ - timestamp.sample = timing_counter_get(); + timestamp.sample = timing_timestamp_get(); /* 4. Switch to */ @@ -64,7 +65,7 @@ static void start_thread_entry(void *p1, void *p2, void *p3) /* 1. Get 'start' timestamp */ - start = timing_counter_get(); + start = timing_timestamp_get(); /* 2. Switch to */ @@ -127,6 +128,8 @@ static void thread_switch_yield_common(const char *description, sum = timestamp.cycles; + sum -= timestamp_overhead_adjustment(start_options, alt_options); + snprintf(summary, sizeof(summary), "%s (%c -> %c)", description, diff --git a/tests/benchmarks/latency_measure/src/timing_sc.c b/tests/benchmarks/latency_measure/src/timing_sc.c new file mode 100644 index 00000000000..051b9d1810b --- /dev/null +++ b/tests/benchmarks/latency_measure/src/timing_sc.c @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2023 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * @file + * This file contains routines for implementing a timestamp system call + * as well as routines for determining the overhead associated with it. + */ + +#include +#include +#include "utils.h" +#include "timing_sc.h" + +BENCH_BMEM uint64_t timestamp_overhead; +#ifdef CONFIG_USERSPACE +BENCH_BMEM uint64_t user_timestamp_overhead; +#endif + +timing_t z_impl_timing_timestamp_get(void) +{ + return timing_counter_get(); +} + +#ifdef CONFIG_USERSPACE +timing_t z_vrfy_timing_timestamp_get(void) +{ + return z_impl_timing_timestamp_get(); +} +#include +#endif + +static void start_thread_entry(void *p1, void *p2, void *p3) +{ + uint32_t num_iterations = (uint32_t)(uintptr_t)p1; + timing_t start; + timing_t finish; + + ARG_UNUSED(p2); + ARG_UNUSED(p3); + + start = timing_timestamp_get(); + for (uint32_t i = 0; i < num_iterations; i++) { + timing_timestamp_get(); + } + finish = timing_timestamp_get(); + + timestamp.cycles = timing_cycles_get(&start, &finish); +} + +void timestamp_overhead_init(uint32_t num_iterations) +{ + int priority; + + priority = k_thread_priority_get(k_current_get()); + + k_thread_create(&start_thread, start_stack, + K_THREAD_STACK_SIZEOF(start_stack), + start_thread_entry, + (void *)(uintptr_t)num_iterations, NULL, NULL, + priority - 1, 0, K_FOREVER); + + k_thread_start(&start_thread); + + k_thread_join(&start_thread, K_FOREVER); + + timestamp_overhead = timestamp.cycles; + +#ifdef CONFIG_USERSPACE + k_thread_create(&start_thread, start_stack, + K_THREAD_STACK_SIZEOF(start_stack), + start_thread_entry, + (void *)(uintptr_t)num_iterations, NULL, NULL, + priority - 1, K_USER, K_FOREVER); + + k_thread_start(&start_thread); + + k_thread_join(&start_thread, K_FOREVER); + + user_timestamp_overhead = timestamp.cycles; +#endif +} + +uint64_t timestamp_overhead_adjustment(uint32_t options1, uint32_t options2) +{ +#ifdef CONFIG_USERSPACE + if (((options1 | options2) & K_USER) == K_USER) { + if (((options1 & options2) & K_USER) == K_USER) { + /* + * Both start and finish timestamps were obtained + * from userspace. + */ + return user_timestamp_overhead; + } + /* + * One timestamp came from userspace, and the other came + * from kernel space. Estimate the overhead as the mean + * between the two. + */ + return (timestamp_overhead + user_timestamp_overhead) / 2; + } +#endif + + /* + * Both start and finish timestamps were obtained + * from kernel space. + */ + return timestamp_overhead; +} diff --git a/tests/benchmarks/latency_measure/src/timing_sc.h b/tests/benchmarks/latency_measure/src/timing_sc.h new file mode 100644 index 00000000000..265d71f5048 --- /dev/null +++ b/tests/benchmarks/latency_measure/src/timing_sc.h @@ -0,0 +1,21 @@ +/* + * Copyright (c) 2023 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _LATENCY_MEASURE_TIMING_SC_H +#define _LATENCY_MEASURE_TIMING_SC_H + +#include +#include +#include + +__syscall timing_t timing_timestamp_get(void); + +void timestamp_overhead_init(uint32_t num_iterations); +uint64_t timestamp_overhead_adjustment(uint32_t options1, uint32_t options2); + +#include + +#endif diff --git a/tests/benchmarks/latency_measure/src/utils.h b/tests/benchmarks/latency_measure/src/utils.h index a946e38a38b..62d2fb7259e 100644 --- a/tests/benchmarks/latency_measure/src/utils.h +++ b/tests/benchmarks/latency_measure/src/utils.h @@ -19,6 +19,12 @@ #define START_STACK_SIZE (512 + CONFIG_TEST_EXTRA_STACK_SIZE) #define ALT_STACK_SIZE (512 + CONFIG_TEST_EXTRA_STACK_SIZE) +#ifdef CONFIG_USERSPACE +#define BENCH_BMEM K_APP_BMEM(bench_mem_partition) +#else +#define BENCH_BMEM +#endif + struct timestamp_data { uint64_t cycles; timing_t sample; @@ -33,6 +39,10 @@ extern struct k_thread alt_thread; extern struct k_sem pause_sem; extern struct timestamp_data timestamp; +#ifdef CONFIG_USERSPACE +extern uint64_t user_timestamp_overhead; +#endif +extern uint64_t timestamp_overhead; extern int error_count;