From d03b2496cd3c40d75e5bacdd15adf496948b665c Mon Sep 17 00:00:00 2001 From: Adithya Baglody Date: Wed, 3 May 2017 13:11:51 +0530 Subject: [PATCH] test: benchmarking: Timing metrics for the kernel JIRA: ZEP-1822, ZEP-1823, ZEP-1825 Signed-off-by: Adithya Baglody --- arch/arm/core/isr_wrapper.S | 18 + arch/arm/core/swap.S | 31 ++ arch/x86/core/intstub.S | 22 + arch/x86/core/swap.S | 32 ++ drivers/timer/cortex_m_systick.c | 10 + drivers/timer/loapic_timer.c | 21 +- kernel/Kconfig | 10 + kernel/init.c | 8 + tests/benchmarks/timing_info/Makefile | 4 + tests/benchmarks/timing_info/README.txt | 111 +++++ tests/benchmarks/timing_info/prj.conf | 4 + tests/benchmarks/timing_info/src/Makefile | 7 + .../timing_info/src/main_benchmark.c | 57 +++ .../timing_info/src/msg_passing_bench.c | 439 ++++++++++++++++++ .../timing_info/src/semaphore_bench.c | 183 ++++++++ .../benchmarks/timing_info/src/thread_bench.c | 347 ++++++++++++++ .../benchmarks/timing_info/src/timing_info.h | 95 ++++ .../benchmarks/timing_info/src/yield_bench.c | 85 ++++ tests/benchmarks/timing_info/testcase.ini | 3 + 19 files changed, 1486 insertions(+), 1 deletion(-) create mode 100644 tests/benchmarks/timing_info/Makefile create mode 100644 tests/benchmarks/timing_info/README.txt create mode 100644 tests/benchmarks/timing_info/prj.conf create mode 100644 tests/benchmarks/timing_info/src/Makefile create mode 100644 tests/benchmarks/timing_info/src/main_benchmark.c create mode 100644 tests/benchmarks/timing_info/src/msg_passing_bench.c create mode 100644 tests/benchmarks/timing_info/src/semaphore_bench.c create mode 100644 tests/benchmarks/timing_info/src/thread_bench.c create mode 100644 tests/benchmarks/timing_info/src/timing_info.h create mode 100644 tests/benchmarks/timing_info/src/yield_bench.c create mode 100644 tests/benchmarks/timing_info/testcase.ini diff --git a/arch/arm/core/isr_wrapper.S b/arch/arm/core/isr_wrapper.S index 28b3f6ead94..04bee7060fd 100644 --- a/arch/arm/core/isr_wrapper.S +++ b/arch/arm/core/isr_wrapper.S @@ -43,6 +43,10 @@ SECTION_FUNC(TEXT, _isr_wrapper) push {lr} /* lr is now the first item on the stack */ +#ifdef CONFIG_EXECUTION_BENCHMARKING + bl read_systick_start_of_isr +#endif + #ifdef CONFIG_KERNEL_EVENT_LOGGER_INTERRUPT bl _sys_k_event_logger_interrupt #endif @@ -105,6 +109,20 @@ _idle_state_cleared: * in thumb mode */ ldm r1!,{r0,r3} /* arg in r0, ISR in r3 */ +#ifdef CONFIG_EXECUTION_BENCHMARKING +#if defined(CONFIG_ARMV6_M) + push {r3} +#endif + push {lr} + bl read_systick_end_of_isr +#if defined(CONFIG_ARMV6_M) + pop {r3} + mov lr,r3 + pop {r3} +#else + pop {lr} +#endif +#endif blx r3 /* call ISR */ #if defined(CONFIG_ARMV6_M) diff --git a/arch/arm/core/swap.S b/arch/arm/core/swap.S index e24a53d6b57..b0ff7634b78 100644 --- a/arch/arm/core/swap.S +++ b/arch/arm/core/swap.S @@ -177,6 +177,23 @@ _thread_irq_disabled: msr PSP, ip +#ifdef CONFIG_EXECUTION_BENCHMARKING +#if defined(CONFIG_ARMV6_M) + push {r3} +#endif + push {lr} + + bl read_systick_end_of_swap + +#if defined(CONFIG_ARMV6_M) + pop {r3} + mov lr,r3 + pop {r3} +#else + pop {lr} +#endif +#endif + /* exc return */ bx lr @@ -292,6 +309,20 @@ _oops: SECTION_FUNC(TEXT, __swap) +#ifdef CONFIG_EXECUTION_BENCHMARKING +#if defined(CONFIG_ARMV6_M) + push {r3} +#endif + push {lr} + bl read_systick_start_of_swap +#if defined(CONFIG_ARMV6_M) + pop {r3} + mov lr,r3 + pop {r3} +#else + pop {lr} +#endif +#endif ldr r1, =_kernel ldr r2, [r1, #_kernel_offset_to_current] str r0, [r2, #_thread_offset_to_basepri] diff --git a/arch/x86/core/intstub.S b/arch/x86/core/intstub.S index 55bede1f743..9c6edbfa28b 100644 --- a/arch/x86/core/intstub.S +++ b/arch/x86/core/intstub.S @@ -78,6 +78,15 @@ */ SECTION_FUNC(TEXT, _interrupt_enter) +#ifdef CONFIG_EXECUTION_BENCHMARKING + pushl %eax + pushl %edx + rdtsc + mov %eax, __start_intr_tsc + mov %edx, __start_intr_tsc+4 + pop %edx + pop %eax +#endif /* * The gen_idt tool creates an interrupt-gate descriptor for * all connections. The processor will automatically clear the IF @@ -246,6 +255,19 @@ alreadyOnIntStack: */ push %eax #endif +#ifdef CONFIG_EXECUTION_BENCHMARKING + /* Save the eax and edx registers before reading the time stamp + * once done pop the values + */ + pushl %eax + pushl %edx + rdtsc + mov %eax,__end_intr_tsc + mov %edx,__end_intr_tsc+4 + pop %edx + pop %eax +#endif + #ifdef CONFIG_NESTED_INTERRUPTS sti /* re-enable interrupts */ #endif diff --git a/arch/x86/core/swap.S b/arch/x86/core/swap.S index 6c931637382..8687ebec49a 100644 --- a/arch/x86/core/swap.S +++ b/arch/x86/core/swap.S @@ -77,7 +77,28 @@ * */ +.macro read_tsc var_name + push %eax + push %edx + rdtsc + mov %eax,\var_name + mov %edx,\var_name+4 + pop %edx + pop %eax +.endm SECTION_FUNC(TEXT, __swap) +#ifdef CONFIG_EXECUTION_BENCHMARKING + /* Save the eax and edx registers before reading the time stamp + * once done pop the values. + */ + push %eax + push %edx + rdtsc + mov %eax,__start_swap_tsc + mov %edx,__start_swap_tsc+4 + pop %edx + pop %eax +#endif #ifdef CONFIG_X86_IAMCU /* save EFLAGS on stack right before return address, just as SYSV would * have done @@ -335,5 +356,16 @@ skipIntLatencyStop: */ popl %edx movl %edx, (%esp) +#endif + +#ifdef CONFIG_EXECUTION_BENCHMARKING + /* Save the eax and edx registers before reading the time stamp + * once done pop the values. + */ + cmp $0x1,__read_swap_end_tsc_value + jne time_read_not_needed + movw $0x2,__read_swap_end_tsc_value + read_tsc __common_var_swap_end_tsc +time_read_not_needed: #endif ret diff --git a/drivers/timer/cortex_m_systick.c b/drivers/timer/cortex_m_systick.c index 12656724225..fe98e17f7e8 100644 --- a/drivers/timer/cortex_m_systick.c +++ b/drivers/timer/cortex_m_systick.c @@ -211,6 +211,11 @@ void _timer_int_handler(void *unused) { ARG_UNUSED(unused); +#ifdef CONFIG_EXECUTION_BENCHMARKING + extern void read_systick_start_of_tick_handler(void); + read_systick_start_of_tick_handler(); +#endif + #ifdef CONFIG_KERNEL_EVENT_LOGGER_INTERRUPT extern void _sys_k_event_logger_interrupt(void); _sys_k_event_logger_interrupt(); @@ -337,6 +342,11 @@ void _timer_int_handler(void *unused) #endif /* CONFIG_SYS_POWER_MANAGEMENT */ +#ifdef CONFIG_EXECUTION_BENCHMARKING + extern void read_systick_end_of_tick_handler(void); + read_systick_end_of_tick_handler(); +#endif + extern void _ExcExit(void); _ExcExit(); } diff --git a/drivers/timer/loapic_timer.c b/drivers/timer/loapic_timer.c index 2f7d100b278..15c008d8c5a 100644 --- a/drivers/timer/loapic_timer.c +++ b/drivers/timer/loapic_timer.c @@ -252,6 +252,16 @@ static inline void program_max_cycles(void) void _timer_int_handler(void *unused /* parameter is not used */ ) { +#ifdef CONFIG_EXECUTION_BENCHMARKING + __asm__ __volatile__ ( + "pushl %eax\n\t" + "pushl %edx\n\t" + "rdtsc\n\t" + "mov %eax, __start_tick_tsc\n\t" + "mov %edx, __start_tick_tsc+4\n\t" + "pop %edx\n\t" + "pop %eax\n\t"); +#endif ARG_UNUSED(unused); #if defined(CONFIG_TICKLESS_KERNEL) @@ -324,7 +334,16 @@ void _timer_int_handler(void *unused /* parameter is not used */ _sys_clock_tick_announce(); #endif /*CONFIG_TICKLESS_IDLE*/ #endif - +#ifdef CONFIG_EXECUTION_BENCHMARKING + __asm__ __volatile__ ( + "pushl %eax\n\t" + "pushl %edx\n\t" + "rdtsc\n\t" + "mov %eax, __end_tick_tsc\n\t" + "mov %edx, __end_tick_tsc+4\n\t" + "pop %edx\n\t" + "pop %eax\n\t"); +#endif /* CONFIG_EXECUTION_BENCHMARKING */ } #ifdef CONFIG_TICKLESS_KERNEL diff --git a/kernel/Kconfig b/kernel/Kconfig index 03e586bbc46..ec0bc471fec 100644 --- a/kernel/Kconfig +++ b/kernel/Kconfig @@ -204,6 +204,16 @@ config INT_LATENCY_BENCHMARK The metrics are displayed (and a new sampling interval is started) each time int_latency_show() is called thereafter. +config EXECUTION_BENCHMARKING + bool + prompt "Timing metrics " + default n + help + This option enables the tracking of various times inside the kernel + the exact set of metrics being tracked is board-dependent. + All timing measurements are enabled for X86 and ARM based architectures. + In other architectures only a subset are enabled. + config THREAD_MONITOR bool prompt "Thread monitoring [EXPERIMENTAL]" diff --git a/kernel/init.c b/kernel/init.c index 4037db246c3..63d27af1a0d 100644 --- a/kernel/init.c +++ b/kernel/init.c @@ -56,6 +56,14 @@ u64_t __noinit __main_tsc; /* timestamp when main task starts */ u64_t __noinit __idle_tsc; /* timestamp when CPU goes idle */ #endif +#ifdef CONFIG_EXECUTION_BENCHMARKING +u64_t __noinit __start_swap_tsc; +u64_t __noinit __end_swap_tsc; +u64_t __noinit __start_intr_tsc; +u64_t __noinit __end_intr_tsc; +u64_t __noinit __start_tick_tsc; +u64_t __noinit __end_tick_tsc; +#endif /* init/main and idle threads */ #define IDLE_STACK_SIZE CONFIG_IDLE_STACK_SIZE diff --git a/tests/benchmarks/timing_info/Makefile b/tests/benchmarks/timing_info/Makefile new file mode 100644 index 00000000000..4de50f93d4d --- /dev/null +++ b/tests/benchmarks/timing_info/Makefile @@ -0,0 +1,4 @@ +BOARD ?= qemu_x86 +CONF_FILE = prj.conf + +include ${ZEPHYR_BASE}/Makefile.inc diff --git a/tests/benchmarks/timing_info/README.txt b/tests/benchmarks/timing_info/README.txt new file mode 100644 index 00000000000..1586b1a9ea2 --- /dev/null +++ b/tests/benchmarks/timing_info/README.txt @@ -0,0 +1,111 @@ +Title: Timing Information + +Description: + +Timing measurements for the following features of the OS. +1. Context switch + Time taken to compete the context switch, i.e time spent in _Swap function. +2. Interrupt latency + Time taken from the start of the common interrupt handler till the + actual ISR handler being called. +3. Tick overhead + Time spent by the cpu in the tick handler. +4. Thread Creation + Time spent in creating a thread. +5. Thread cancel + Time taken to cancel the thread which is not yet started execution. + So the time taken to complete the function call is measured. +6. Thread abort + Time taken to abort the thread which has already started execution. + So the time measured is from the start of the function call until the + another thread is swapped in. +7. Thread Suspend + The time measured is from the start of the function call until the current + thread is suspended and another thread is swapped in. +8. Thread Resume + The time measured is from the start of the function call until the required + thread is resumed by swap operation. +9. Thread Yield + The time measured is from the start of the function call until the higher priority + thread is swapped in. +10. Thread Sleep + The time measured is from the start of the function call until the current + thread is put on the timeout queue and another thread is swapped in. +11. Heap Malloc + The time to allocate heap memory in fixed size chunks. Continuously allocate + the memory from the pool. Average time taken to complete the function call + is measured. +12. Heap Free + Time to free heap memory in fixed size chunks. Continuously free + the memory that was allocated. Average time taken to complete the function call + is measured. +13. Semaphore Take with context switch + Taking a semaphore causes a thread waiting on the semaphore to be swapped in. + Thus Time measured is the time taken from the function call till the waiting + thread is swapped in. +14. Semaphore Give with context switch + Giving a semaphore causes a thread waiting on the semaphore to be swapped + in (higher priority). + Thus Time measured is the time taken from the function call till the waiting + thread is swapped in. +15. Semaphore Take without context switch + Time taken to take the semaphore. Thus time to complete the function + call is measured. +16. Semaphore Give without context switch + Time taken to give the semaphore. Thus time to complete the function + call is measured. +17. Mutex lock + Time taken to lock the mutex. Thus time to complete the function + call is measured. +18. Mutex unlock + Time taken to unlock the mutex. Thus time to complete the function + call is measured. +19. Message Queue Put with context switch + A thread is waiting for a message to arrive. The time taken from the start + of the function call till the waiting thread is swapped in is measured. +20. Message Queue Put without context switch + The time taken to complete the function call is measured. +21. Message Queue get with context switch + A thread has gone into waiting because the message queue is full. + When a get occurs this thread gets free to execute. The time taken from + the start of the function call till the waiting thread is + swapped in is measured. +22. Message Queue get without context switch + The time taken to complete the function call is measured. +23. MailBox synchronous put + The time taken from the start of the function call till the waiting thread + is swapped in is measured. +24. MailBox synchronous get + The time taken from the start of the function call till the waiting thread + is swapped in is measured. +25. MailBox asynchronous put + The time taken to complete the function call is measured. +26. MailBox get without context switch + The time taken to complete the function call is measured. + + +-------------------------------------------------------------------------------- + +Building and Running Project: + +This benchmark outputs to the console. It can be built and executed +on QEMU as follows: + + make qemu + +-------------------------------------------------------------------------------- + +Troubleshooting: + +Problems caused by out-dated project information can be addressed by +issuing one of the following commands then rebuilding the project: + + make clean # discard results of previous builds + # but keep existing configuration info +or + make pristine # discard results of previous builds + # and restore pre-defined configuration info + +-------------------------------------------------------------------------------- + +Sample Output: diff --git a/tests/benchmarks/timing_info/prj.conf b/tests/benchmarks/timing_info/prj.conf new file mode 100644 index 00000000000..4cd820b63ee --- /dev/null +++ b/tests/benchmarks/timing_info/prj.conf @@ -0,0 +1,4 @@ +CONFIG_EXECUTION_BENCHMARKING=y +CONFIG_TEST_RANDOM_GENERATOR=y +CONFIG_HEAP_MEM_POOL_SIZE=256 +CONFIG_MAIN_STACK_SIZE=2048 \ No newline at end of file diff --git a/tests/benchmarks/timing_info/src/Makefile b/tests/benchmarks/timing_info/src/Makefile new file mode 100644 index 00000000000..5219ad793c3 --- /dev/null +++ b/tests/benchmarks/timing_info/src/Makefile @@ -0,0 +1,7 @@ +ccflags-y += -I$(ZEPHYR_BASE)/tests/include + +obj-$(CONFIG_EXECUTION_BENCHMARKING) += main_benchmark.o +obj-$(CONFIG_EXECUTION_BENCHMARKING) += thread_bench.o +obj-$(CONFIG_EXECUTION_BENCHMARKING) += yield_bench.o +obj-$(CONFIG_EXECUTION_BENCHMARKING) += semaphore_bench.o +obj-$(CONFIG_EXECUTION_BENCHMARKING) += msg_passing_bench.o diff --git a/tests/benchmarks/timing_info/src/main_benchmark.c b/tests/benchmarks/timing_info/src/main_benchmark.c new file mode 100644 index 00000000000..2dec6071427 --- /dev/null +++ b/tests/benchmarks/timing_info/src/main_benchmark.c @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2013-2015 Wind River Systems, Inc. + * Copyright (c) 2016 Intel Corporation. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @file + * @brief Measure time + * + */ +#include +#include +#include +#include +#include "timing_info.h" + +void main(void) +{ + u32_t freq = CONFIG_SYS_CLOCK_HW_CYCLES_PER_SEC / 1000000; + + TC_START("Time Measurement"); + TC_PRINT("Timing Results: Clock Frequency: %d MHz\n", freq); + + /*******************************************************************/ + /* System parameters and thread Benchmarking*/ + system_thread_bench(); + + /*******************************************************************/ + /* Thread yield*/ + yield_bench(); + + /*******************************************************************/ + /* heap Memory benchmarking*/ + heap_malloc_free_bench(); + + /*******************************************************************/ + /* Semaphore take and get*/ + semaphore_bench(); + + /*******************************************************************/ + /* mutex lock and unlock*/ + mutex_bench(); + + /*******************************************************************/ + /* mutex lock and unlock*/ + msg_passing_bench(); + + + TC_PRINT("Timing Measurement finished\n"); + + /* for sanity regression test utility. */ + TC_END_RESULT(TC_PASS); + TC_END_REPORT(TC_PASS); + +} \ No newline at end of file diff --git a/tests/benchmarks/timing_info/src/msg_passing_bench.c b/tests/benchmarks/timing_info/src/msg_passing_bench.c new file mode 100644 index 00000000000..91bc447bbb7 --- /dev/null +++ b/tests/benchmarks/timing_info/src/msg_passing_bench.c @@ -0,0 +1,439 @@ +/* + * Copyright (c) 2013-2015 Wind River Systems, Inc. + * Copyright (c) 2016 Intel Corporation. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include +#include "timing_info.h" + +extern char sline[]; + +/* mailbox*/ +/* K_MBOX_DEFINE(test_msg_queue) */ +K_MSGQ_DEFINE(benchmark_q, sizeof(int), 10, 4); +K_MSGQ_DEFINE(benchmark_q_get, sizeof(int), 3, 4); +K_MBOX_DEFINE(benchmark_mbox); + +/* Declare a semaphore for the msgq*/ +K_SEM_DEFINE(mbox_sem, 1, 1); + +/* common location for the swap to write the tsc data*/ +extern u32_t __read_swap_end_tsc_value; +extern u64_t __common_var_swap_end_tsc; + +/* location of the time stamps*/ +u64_t __msg_q_put_state; +u64_t __msg_q_get_state; + +u64_t __msg_q_put_w_cxt_start_tsc; +u64_t __msg_q_put_w_cxt_end_tsc; + +u64_t __msg_q_put_wo_cxt_start_tsc; /* without context switch */ +u64_t __msg_q_put_wo_cxt_end_tsc; + +u64_t __msg_q_get_w_cxt_start_tsc; +u64_t __msg_q_get_w_cxt_end_tsc; + +u64_t msg_q_get_wo_cxt_start_tsc; +u64_t msg_q_get_wo_cxt_end_tsc; + +u32_t __mbox_sync_put_state; +u64_t mbox_sync_put_start_tsc; +u64_t mbox_sync_put_end_tsc; + +u32_t __mbox_sync_get_state; +u64_t mbox_sync_get_start_tsc; +u64_t mbox_sync_get_end_tsc; + +u64_t mbox_async_put_start_tsc; +u64_t mbox_async_put_end_tsc; + +u64_t mbox_get_w_cxt_start_tsc; +u64_t mbox_get_w_cxt_end_tsc; + +/*For benchmarking msg queues*/ +k_tid_t producer_w_cxt_switch_tid; +k_tid_t producer_wo_cxt_switch_tid; +k_tid_t producer_get_w_cxt_switch_tid; +k_tid_t consumer_get_w_cxt_switch_tid; +k_tid_t consumer_tid; +k_tid_t thread_mbox_sync_put_send_tid; +k_tid_t thread_mbox_sync_put_receive_tid; +k_tid_t thread_mbox_sync_get_send_tid; +k_tid_t thread_mbox_sync_get_receive_tid; +k_tid_t thread_mbox_async_put_send_tid; +k_tid_t thread_mbox_async_put_receive_tid; + +/* To time thread creation*/ +#define STACK_SIZE 500 +extern char __noinit __stack my_stack_area[STACK_SIZE]; +extern char __noinit __stack my_stack_area_0[STACK_SIZE]; + +/* thread functions*/ +void thread_producer_msgq_w_cxt_switch(void *p1, void *p2, void *p3); +void thread_producer_msgq_wo_cxt_switch(void *p1, void *p2, void *p3); + +void thread_producer_get_msgq_w_cxt_switch(void *p1, void *p2, void *p3); +void thread_consumer_get_msgq_w_cxt_switch(void *p1, void *p2, void *p3); + +void thread_mbox_sync_put_send(void *p1, void *p2, void *p3); +void thread_mbox_sync_put_receive(void *p1, void *p2, void *p3); + +void thread_mbox_sync_get_send(void *p1, void *p2, void *p3); +void thread_mbox_sync_get_receive(void *p1, void *p2, void *p3); + +void thread_mbox_async_put_send(void *p1, void *p2, void *p3); +void thread_mbox_async_put_receive(void *p1, void *p2, void *p3); + +volatile u64_t time_check; +int received_data_get; +int received_data_consumer; +int data_to_send; + +void msg_passing_bench(void) +{ + DECLARE_VAR(msg_q, put_w_cxt) + DECLARE_VAR(msg_q, put_wo_cxt) + + DECLARE_VAR(msg_q, get_w_cxt) + DECLARE_VAR(msg_q, get_wo_cxt) + + DECLARE_VAR(mbox, sync_put) + DECLARE_VAR(mbox, sync_get) + DECLARE_VAR(mbox, async_put) + + DECLARE_VAR(mbox, get_w_cxt) + + + /*******************************************************************/ + /* Msg queue for put*/ + int received_data = 0; + + producer_w_cxt_switch_tid = + k_thread_spawn(my_stack_area, STACK_SIZE, + thread_producer_msgq_w_cxt_switch, NULL, + NULL, NULL, 2 /*priority*/, 0, 50); + + u32_t msg_status = k_msgq_get(&benchmark_q, &received_data, 300); + + producer_wo_cxt_switch_tid = + k_thread_spawn(my_stack_area_0, STACK_SIZE, + thread_producer_msgq_wo_cxt_switch, + NULL, NULL, NULL, -2 /*priority*/, 0, 0); + + k_thread_abort(producer_w_cxt_switch_tid); + k_thread_abort(producer_wo_cxt_switch_tid); + __msg_q_put_w_cxt_end_tsc = ((u32_t)__common_var_swap_end_tsc); + ARG_UNUSED(msg_status); + + /*******************************************************************/ + + /* Msg queue for get*/ + + producer_get_w_cxt_switch_tid = + k_thread_spawn(my_stack_area, + STACK_SIZE, + thread_producer_get_msgq_w_cxt_switch, NULL, + NULL, NULL, 1 /*priority*/, 0, 50); + consumer_get_w_cxt_switch_tid = + k_thread_spawn(my_stack_area_0, + STACK_SIZE, + thread_consumer_get_msgq_w_cxt_switch, + NULL, NULL, NULL, + 2 /*priority*/, 0, 50); + k_sleep(2000); /* make the main thread sleep */ + k_thread_abort(producer_get_w_cxt_switch_tid); + __msg_q_get_w_cxt_end_tsc = (__common_var_swap_end_tsc); + + /*******************************************************************/ + + /* Msg queue for get*/ + /* from previous step got the msg_q full now just do a simple read*/ + msg_q_get_wo_cxt_start_tsc = OS_GET_TIME(); + + received_data_get = k_msgq_get(&benchmark_q_get, + &received_data_consumer, + K_NO_WAIT); + + msg_q_get_wo_cxt_end_tsc = OS_GET_TIME(); + + + /*******************************************************************/ + + /* Msg box to benchmark sync put */ + + thread_mbox_sync_put_send_tid = + k_thread_spawn(my_stack_area, + STACK_SIZE, + thread_mbox_sync_put_send, + NULL, NULL, NULL, + 2 /*priority*/, 0, 0); + thread_mbox_sync_put_receive_tid = + k_thread_spawn(my_stack_area_0, + STACK_SIZE, + thread_mbox_sync_put_receive, + NULL, NULL, NULL, + 1 /*priority*/, 0, 0); + k_sleep(1000); /* make the main thread sleep */ + mbox_sync_put_end_tsc = (__common_var_swap_end_tsc); + + /*******************************************************************/ + + /* Msg box to benchmark sync get */ + + thread_mbox_sync_get_send_tid = + k_thread_spawn(my_stack_area, + STACK_SIZE, + thread_mbox_sync_get_send, + NULL, NULL, NULL, + 1 /*prio*/, 0, 0); + thread_mbox_sync_get_receive_tid = + k_thread_spawn(my_stack_area_0, + STACK_SIZE, + thread_mbox_sync_get_receive, NULL, + NULL, NULL, 2 /*priority*/, 0, 0); + k_sleep(1000); /* make the main thread sleep */ + mbox_sync_get_end_tsc = (__common_var_swap_end_tsc); + + /*******************************************************************/ + + /* Msg box to benchmark async put */ + + thread_mbox_async_put_send_tid = + k_thread_spawn(my_stack_area, + STACK_SIZE, + thread_mbox_async_put_send, + NULL, NULL, NULL, + 2 /*prio*/, 0, 0); + thread_mbox_async_put_receive_tid = + k_thread_spawn(my_stack_area_0, + STACK_SIZE, + thread_mbox_async_put_receive, + NULL, NULL, NULL, + 3 /*priority*/, 0, 0); + k_sleep(1000); /* make the main thread sleep */ + + /*******************************************************************/ + int single_element_buffer = 0; + struct k_mbox_msg rx_msg = { + .size = sizeof(int), + .rx_source_thread = K_ANY, + .tx_target_thread = K_ANY + }; + mbox_get_w_cxt_start_tsc = OS_GET_TIME(); + + k_mbox_get(&benchmark_mbox, &rx_msg, &single_element_buffer, 300); + + mbox_get_w_cxt_end_tsc = OS_GET_TIME(); + + /*******************************************************************/ + + /* calculation for msg put with context switch */ + CALCULATE_TIME(__, msg_q, put_w_cxt) + + /* calculation for msg put without context switch */ + CALCULATE_TIME(__, msg_q, put_wo_cxt) + + /* calculation for msg get */ + CALCULATE_TIME(__, msg_q, get_w_cxt) + + /* calculation for msg get without context switch */ + CALCULATE_TIME(, msg_q, get_wo_cxt) + + /*calculation for msg box for sync put + * (i.e with a context to make the pending rx ready) + */ + CALCULATE_TIME(, mbox, sync_put) + + /*calculation for msg box for sync get + * (i.e with a context to make the pending tx ready) + */ + CALCULATE_TIME(, mbox, sync_get) + + /* calculation for msg box for async put */ + CALCULATE_TIME(, mbox, async_put) + + /* calculation for msg box for get without any context switch */ + CALCULATE_TIME(, mbox, get_w_cxt) + + /*******************************************************************/ + + /* Only print lower 32bit of time result */ + PRINT_F("Message Queue Put with context switch", + (u32_t)((__msg_q_put_w_cxt_end_tsc - + __msg_q_put_w_cxt_start_tsc) & 0xFFFFFFFFULL), + (u32_t) (total_msg_q_put_w_cxt_time & 0xFFFFFFFFULL)); + + PRINT_F("Message Queue Put without context switch", + (u32_t)((__msg_q_put_wo_cxt_end_tsc - + __msg_q_put_wo_cxt_start_tsc) & 0xFFFFFFFFULL), + (u32_t) (total_msg_q_put_wo_cxt_time & 0xFFFFFFFFULL)); + + PRINT_F("Message Queue get with context switch", + (u32_t)((__msg_q_get_w_cxt_end_tsc - + __msg_q_get_w_cxt_start_tsc) & 0xFFFFFFFFULL), + (u32_t) (total_msg_q_get_w_cxt_time & 0xFFFFFFFFULL)); + + PRINT_F("Message Queue get without context switch", + (u32_t)((msg_q_get_wo_cxt_end_tsc - + msg_q_get_wo_cxt_start_tsc) & 0xFFFFFFFFULL), + (u32_t) (total_msg_q_get_wo_cxt_time & 0xFFFFFFFFULL)); + + PRINT_F("MailBox synchronous put", + (u32_t)((mbox_sync_put_end_tsc - mbox_sync_put_start_tsc) + & 0xFFFFFFFFULL), + (u32_t) (total_mbox_sync_put_time & 0xFFFFFFFFULL)); + + PRINT_F("MailBox synchronous get", + (u32_t)((mbox_sync_get_end_tsc - mbox_sync_get_start_tsc) + & 0xFFFFFFFFULL), + (u32_t) (total_mbox_sync_get_time & 0xFFFFFFFFULL)); + + PRINT_F("MailBox asynchronous put", + (u32_t)((mbox_async_put_end_tsc - mbox_async_put_start_tsc) + & 0xFFFFFFFFULL), + (u32_t) (total_mbox_async_put_time & 0xFFFFFFFFULL)); + + PRINT_F("MailBox get without context switch", + (u32_t)((mbox_get_w_cxt_end_tsc - mbox_get_w_cxt_start_tsc) + & 0xFFFFFFFFULL), + (u32_t) (total_mbox_get_w_cxt_time & 0xFFFFFFFFULL)); + +} + +void thread_producer_msgq_w_cxt_switch(void *p1, void *p2, void *p3) +{ + int data_to_send = 5050; + + __read_swap_end_tsc_value = 1; + __msg_q_put_w_cxt_start_tsc = (u32_t) OS_GET_TIME(); + k_msgq_put(&benchmark_q, &data_to_send, K_NO_WAIT); +} + + +void thread_producer_msgq_wo_cxt_switch(void *p1, void *p2, void *p3) +{ + int data_to_send = 5050; + + __msg_q_put_wo_cxt_start_tsc = OS_GET_TIME(); + k_msgq_put(&benchmark_q, &data_to_send, K_NO_WAIT); + __msg_q_put_wo_cxt_end_tsc = OS_GET_TIME(); +} + + +void thread_producer_get_msgq_w_cxt_switch(void *p1, void *p2, void *p3) +{ + int status = 0; + + while (1) { + if (status == 0) { + data_to_send++; + } + status = k_msgq_put(&benchmark_q_get, &data_to_send, 20); + } +} + +void thread_consumer_get_msgq_w_cxt_switch(void *p1, void *p2, void *p3) +{ + producer_get_w_cxt_switch_tid->base.timeout.delta_ticks_from_prev = + _EXPIRED; + __read_swap_end_tsc_value = 1; + __msg_q_get_w_cxt_start_tsc = OS_GET_TIME(); + received_data_get = k_msgq_get(&benchmark_q_get, + &received_data_consumer, + 300); + time_check = OS_GET_TIME(); +} + + +void thread_mbox_sync_put_send(void *p1, void *p2, void *p3) +{ + int single_element_buffer = 1234; + struct k_mbox_msg tx_msg = { + .size = sizeof(int), + .info = 5050, + .tx_data = &single_element_buffer, + .rx_source_thread = K_ANY, + .tx_target_thread = K_ANY, + }; + + mbox_sync_put_start_tsc = OS_GET_TIME(); + __read_swap_end_tsc_value = 1; + k_mbox_put(&benchmark_mbox, &tx_msg, 300); + time_check = OS_GET_TIME(); +} + +void thread_mbox_sync_put_receive(void *p1, void *p2, void *p3) +{ + int single_element_buffer = 1234; + struct k_mbox_msg rx_msg = { + .size = sizeof(int), + .rx_source_thread = K_ANY, + .tx_target_thread = K_ANY + }; + + k_mbox_get(&benchmark_mbox, &rx_msg, &single_element_buffer, 300); +} + +void thread_mbox_sync_get_send(void *p1, void *p2, void *p3) +{ + int single_element_buffer = 1234; + struct k_mbox_msg tx_msg = { + .size = sizeof(int), + .info = 5050, + .tx_data = &single_element_buffer, + .rx_source_thread = K_ANY, + .tx_target_thread = K_ANY, + }; + + k_mbox_put(&benchmark_mbox, &tx_msg, 300); +} + +void thread_mbox_sync_get_receive(void *p1, void *p2, void *p3) +{ + int single_element_buffer; + struct k_mbox_msg rx_msg = { + .size = sizeof(int), + .rx_source_thread = K_ANY, + .tx_target_thread = K_ANY + }; + + __read_swap_end_tsc_value = 1; + mbox_sync_get_start_tsc = OS_GET_TIME(); + k_mbox_get(&benchmark_mbox, &rx_msg, &single_element_buffer, 300); +} + +void thread_mbox_async_put_send(void *p1, void *p2, void *p3) +{ + int single_element_buffer = 1234; + struct k_mbox_msg tx_msg = { + .size = sizeof(int), + .info = 5050, + .tx_data = &single_element_buffer, + .rx_source_thread = K_ANY, + .tx_target_thread = K_ANY, + }; + + mbox_async_put_start_tsc = OS_GET_TIME(); + k_mbox_async_put(&benchmark_mbox, &tx_msg, &mbox_sem); + mbox_async_put_end_tsc = OS_GET_TIME(); + k_mbox_async_put(&benchmark_mbox, &tx_msg, &mbox_sem); +} + +void thread_mbox_async_put_receive(void *p1, void *p2, void *p3) +{ + int single_element_buffer; + struct k_mbox_msg rx_msg = { + .size = sizeof(int), + .rx_source_thread = K_ANY, + .tx_target_thread = K_ANY + }; + + k_mbox_get(&benchmark_mbox, &rx_msg, &single_element_buffer, 300); + +} \ No newline at end of file diff --git a/tests/benchmarks/timing_info/src/semaphore_bench.c b/tests/benchmarks/timing_info/src/semaphore_bench.c new file mode 100644 index 00000000000..dda7febcc7f --- /dev/null +++ b/tests/benchmarks/timing_info/src/semaphore_bench.c @@ -0,0 +1,183 @@ +#include +#include +#include +#include +#include "timing_info.h" + + +extern char sline[]; + +K_SEM_DEFINE(sem_bench, 0, 1); +K_SEM_DEFINE(sem_bench_1, 0, 1); + +/* To time thread creation*/ +#define STACK_SIZE 500 +extern char __noinit __stack my_stack_area[STACK_SIZE]; +extern char __noinit __stack my_stack_area_0[STACK_SIZE]; + +/* u64_t thread_yield_start_tsc[1000]; */ +/* u64_t thread_yield_end_tsc[1000]; */ +u64_t thread_start_time; +u64_t thread_end_time; +u64_t sem_start_time; +u64_t sem_end_time; +u64_t sem_give_start_time; +u64_t sem_give_end_time; + +u32_t swap_called; +u64_t test_time2; +u64_t test_time1; + +void thread_sem0_test(void *p1, void *p2, void *p3); +void thread_sem1_test(void *p1, void *p2, void *p3); +void thread_sem0_give_test(void *p1, void *p2, void *p3); +void thread_sem1_give_test(void *p1, void *p2, void *p3); + +k_tid_t sem0_tid; +k_tid_t sem1_tid; + +extern u64_t __common_var_swap_end_tsc; +extern u32_t __read_swap_end_tsc_value; + +void semaphore_bench(void) +{ + + /* Thread yield*/ + + sem0_tid = k_thread_spawn(my_stack_area, + STACK_SIZE, + thread_sem0_test, NULL, NULL, NULL, + 2 /*priority*/, 0, 0); + sem1_tid = k_thread_spawn(my_stack_area_0, + STACK_SIZE, thread_sem1_test, + NULL, NULL, NULL, + 2 /*priority*/, 0, 0); + + k_sleep(1000); + + + /* u64_t test_time1 = _tsc_read(); */ + sem_end_time = (__common_var_swap_end_tsc); + u32_t sem_cycles = sem_end_time - sem_start_time; + + sem0_tid = k_thread_spawn(my_stack_area, + STACK_SIZE, thread_sem0_give_test, + NULL, NULL, NULL, + 2 /*priority*/, 0, 0); + sem1_tid = k_thread_spawn(my_stack_area_0, + STACK_SIZE, thread_sem1_give_test, + NULL, NULL, NULL, + 2 /*priority*/, 0, 0); + + k_sleep(1000); + sem_give_end_time = (__common_var_swap_end_tsc); + u32_t sem_give_cycles = sem_give_end_time - sem_give_start_time; + + + /* Semaphore without context switch*/ + u32_t sem_give_wo_cxt_start = OS_GET_TIME(); + + k_sem_give(&sem_bench); + u32_t sem_give_wo_cxt_end = OS_GET_TIME(); + u32_t sem_give_wo_cxt_cycles = sem_give_wo_cxt_end - + sem_give_wo_cxt_start; + + u32_t sem_take_wo_cxt_start = OS_GET_TIME(); + + k_sem_take(&sem_bench, 10); + u32_t sem_take_wo_cxt_end = OS_GET_TIME(); + u32_t sem_take_wo_cxt_cycles = sem_take_wo_cxt_end - + sem_take_wo_cxt_start; + + /* TC_PRINT("test_time1 , %d cycles\n", (u32_t)test_time1); */ + /* TC_PRINT("test_time2 , %d cycles\n", (u32_t)test_time2); */ + + PRINT_F("Semaphore Take with context switch", + sem_cycles, SYS_CLOCK_HW_CYCLES_TO_NS(sem_cycles)); + PRINT_F("Semaphore Give with context switch", + sem_give_cycles, SYS_CLOCK_HW_CYCLES_TO_NS(sem_give_cycles)); + + PRINT_F("Semaphore Take without context switch", + sem_take_wo_cxt_cycles, + SYS_CLOCK_HW_CYCLES_TO_NS(sem_take_wo_cxt_cycles)); + PRINT_F("Semaphore Give without context switch", + sem_give_wo_cxt_cycles, + SYS_CLOCK_HW_CYCLES_TO_NS(sem_give_wo_cxt_cycles)); + +} +/******************************************************************************/ +K_MUTEX_DEFINE(mutex0); +void mutex_bench(void) +{ + u32_t mutex_lock_start_tsc; + u32_t mutex_lock_end_tsc; + u32_t mutex_lock_diff = 0; + + u32_t mutex_unlock_start_tsc; + u32_t mutex_unlock_end_tsc; + u32_t mutex_unlock_diff = 0; + + for (int i = 0; i < 1000; i++) { + mutex_lock_start_tsc = OS_GET_TIME(); + k_mutex_lock(&mutex0, 100); + mutex_lock_end_tsc = OS_GET_TIME(); + + mutex_unlock_start_tsc = OS_GET_TIME(); + k_mutex_unlock(&mutex0); + mutex_unlock_end_tsc = OS_GET_TIME(); + + mutex_lock_diff += (mutex_lock_end_tsc - mutex_lock_start_tsc); + mutex_unlock_diff += (mutex_unlock_end_tsc - + mutex_unlock_start_tsc); + } + + PRINT_F("Mutex lock", mutex_lock_diff / 1000, + SYS_CLOCK_HW_CYCLES_TO_NS(mutex_lock_diff / 1000)); + + PRINT_F("Mutex unlock", mutex_unlock_diff / 1000, + SYS_CLOCK_HW_CYCLES_TO_NS(mutex_unlock_diff / 1000)); + +} + +/******************************************************************************/ +void thread_sem1_test(void *p1, void *p2, void *p3) +{ + + k_sem_give(&sem_bench); /* sync the 2 threads*/ + + __read_swap_end_tsc_value = 1; + sem_start_time = OS_GET_TIME(); + k_sem_take(&sem_bench, 10); +} + +u32_t sem_count; +void thread_sem0_test(void *p1, void *p2, void *p3) +{ + k_sem_take(&sem_bench, 10);/* To sync threads */ + + k_sem_give(&sem_bench); + sem_count++; + k_thread_abort(sem0_tid); +} +/******************************************************************************/ +void thread_sem1_give_test(void *p1, void *p2, void *p3) +{ + k_sem_give(&sem_bench); /* sync the 2 threads*/ + + k_sem_take(&sem_bench_1, 1000); /* clear the previous sem_give*/ + /* test_time1 = OS_GET_TIME(); */ +} + +void thread_sem0_give_test(void *p1, void *p2, void *p3) +{ + k_sem_take(&sem_bench, 10);/* To sync threads */ + /* test_time2 = OS_GET_TIME(); */ + + /* To make sure that the sem give will cause a swap to occur */ + k_thread_priority_set(sem1_tid, 1); + + __read_swap_end_tsc_value = 1; + sem_give_start_time = OS_GET_TIME(); + k_sem_give(&sem_bench_1); + +} \ No newline at end of file diff --git a/tests/benchmarks/timing_info/src/thread_bench.c b/tests/benchmarks/timing_info/src/thread_bench.c new file mode 100644 index 00000000000..d99dc1a69d1 --- /dev/null +++ b/tests/benchmarks/timing_info/src/thread_bench.c @@ -0,0 +1,347 @@ +/* + * Copyright (c) 2013-2015 Wind River Systems, Inc. + * Copyright (c) 2016 Intel Corporation. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @file + * @brief Measure time + * + */ +#include +#include +#include +#include +#include "timing_info.h" + +char sline[256]; +/* FILE *output_file = stdout; */ + +/* location of the time stamps*/ +u32_t __read_swap_end_tsc_value; +u64_t __common_var_swap_end_tsc; + +volatile u64_t thread_abort_end_tsc; +volatile u64_t thread_abort_start_tsc; + +/* Thread suspend*/ +volatile u64_t thread_suspend_start_tsc; +volatile u64_t thread_suspend_end_tsc; + +/* Thread resume*/ +volatile u64_t thread_resume_start_tsc; +volatile u64_t thread_resume_end_tsc; + +/* Thread sleep*/ +volatile u64_t thread_sleep_start_tsc; +volatile u64_t thread_sleep_end_tsc; + +/*For benchmarking msg queues*/ +k_tid_t producer_tid; +k_tid_t consumer_tid; + +/* To time thread creation*/ +#define STACK_SIZE 500 +char __noinit __stack my_stack_area[STACK_SIZE]; +char __noinit __stack my_stack_area_0[STACK_SIZE]; + +u32_t __read_swap_end_tsc_value_test = 1; +u64_t dummy_time; +u64_t start_time; +u64_t test_end_time; + +#if CONFIG_X86 +u32_t benchmarking_overhead_swap(void) +{ + + __asm__ __volatile__ ( + "pushl %eax\n\t" + "pushl %edx\n\t" + "rdtsc\n\t" + "mov %eax,start_time\n\t" + "mov %edx,start_time+4\n\t" + "cmp $0x1,__read_swap_end_tsc_value_test\n\t" + "jne time_read_not_needed_test\n\t" + "movw $0x2,__read_swap_end_tsc_value\n\t" + "pushl %eax\n\t" + "pushl %edx\n\t" + "rdtsc\n\t" + "mov %eax,dummy_time\n\t" + "mov %edx,dummy_time+4\n\t" + "pop %edx\n\t" + "pop %eax\n\t" + "time_read_not_needed_test:\n\t" + "rdtsc\n\t" + "mov %eax,test_end_time\n\t" + "mov %edx,test_end_time+4\n\t" + "pop %edx\n\t" + "pop %eax\n\t"); + + /* u64_t end_time = OS_GET_TIME(); */ + return(test_end_time - start_time); +} +#endif + +#if CONFIG_ARM +void read_systick_start_of_swap(void) +{ + __start_swap_tsc = (u32_t)SysTick->VAL; +} + +void read_systick_end_of_swap(void) +{ + if (__read_swap_end_tsc_value == 1) { + __read_swap_end_tsc_value = 2; + __common_var_swap_end_tsc = OS_GET_TIME(); + } +} + +void read_systick_start_of_isr(void) +{ + __start_intr_tsc = (u32_t)SysTick->VAL; +} + +void read_systick_end_of_isr(void) +{ + __end_intr_tsc = (u32_t)SysTick->VAL; +} + +void read_systick_start_of_tick_handler(void) +{ + __start_tick_tsc = (u32_t)SysTick->VAL; +} + +void read_systick_end_of_tick_handler(void) +{ + __end_tick_tsc = (u32_t)SysTick->VAL; +} + +#endif + + + +void test_thread_entry(void *p, void *p1, void *p2) +{ + static int i; + + i++; +} + + +void thread_swap_test(void *p1, void *p2, void *p3) +{ + __read_swap_end_tsc_value = 1; + thread_abort_start_tsc = OS_GET_TIME(); + k_thread_abort(_current); +} + +void thread_suspend_test(void *p1, void *p2, void *p3); +void yield_bench(void); +void heap_malloc_free_bench(void); +void main_sem_bench(void); +void main_mutex_bench(void); +void main_msg_bench(void); + +void system_thread_bench(void) +{ + u64_t total_intr_time; + u64_t total_tick_time; + + /*Thread create*/ + u64_t thread_create_start_tsc; + u64_t thread_create_end_tsc; + + DECLARE_VAR(thread, create) + + /*Thread cancel*/ + u64_t thread_cancel_start_tsc; + u64_t thread_cancel_end_tsc; + DECLARE_VAR(thread, cancel) + + /* Thread Abort*/ + DECLARE_VAR(thread, abort) + + /* Thread Suspend*/ + DECLARE_VAR(thread, suspend) + + /* Thread Resume*/ + DECLARE_VAR(thread, resume) + + + /* to measure context switch time */ + k_thread_spawn(my_stack_area_0, + STACK_SIZE, + thread_swap_test, + NULL, NULL, NULL, + -1 /*priority*/, 0, 0); + + thread_abort_end_tsc = (__common_var_swap_end_tsc); + __end_swap_tsc = __common_var_swap_end_tsc; + + + u32_t total_swap_cycles = __end_swap_tsc - + SUBTRACT_CLOCK_CYCLES(__start_swap_tsc); + + /* Interrupt latency*/ + total_intr_time = SYS_CLOCK_HW_CYCLES_TO_NS(__end_intr_tsc - + __start_intr_tsc); + + /* tick overhead*/ + total_tick_time = SYS_CLOCK_HW_CYCLES_TO_NS(__end_tick_tsc - + __start_tick_tsc); + + /*******************************************************************/ + /* thread create*/ + thread_create_start_tsc = OS_GET_TIME(); + + k_tid_t my_tid = k_thread_spawn(my_stack_area, + STACK_SIZE, + thread_swap_test, + NULL, NULL, NULL, + 5 /*priority*/, 0, 10); + + thread_create_end_tsc = OS_GET_TIME(); + + /* thread Termination*/ + thread_cancel_start_tsc = OS_GET_TIME(); + s32_t ret_value_thread_cancel = k_thread_cancel(my_tid); + + thread_cancel_end_tsc = OS_GET_TIME(); + ARG_UNUSED(ret_value_thread_cancel); + + /* Thread suspend*/ + k_tid_t sus_res_tid = k_thread_spawn(my_stack_area, + STACK_SIZE, + thread_suspend_test, + NULL, NULL, NULL, + -1 /*priority*/, 0, 0); + + thread_suspend_end_tsc = OS_GET_TIME(); + /* At this point test for resume*/ + k_thread_resume(sus_res_tid); + + /* calculation for creation */ + CALCULATE_TIME(, thread, create) + + /* calculation for cancel */ + CALCULATE_TIME(, thread, cancel) + + /* calculation for abort */ + CALCULATE_TIME(, thread, abort) + + /* calculation for suspend */ + CALCULATE_TIME(, thread, suspend) + + /* calculation for resume */ + thread_resume_start_tsc = thread_suspend_end_tsc; + CALCULATE_TIME(, thread, resume) + + /*******************************************************************/ + + /* Only print lower 32bit of time result */ + + PRINT_F("Context switch", + (u32_t)(total_swap_cycles & 0xFFFFFFFFULL), + (u32_t)SYS_CLOCK_HW_CYCLES_TO_NS(total_swap_cycles)); + + /*TC_PRINT("Swap Overhead:%d cycles\n", benchmarking_overhead_swap());*/ + + /*Interrupt latency */ + u32_t intr_latency_cycles = SUBTRACT_CLOCK_CYCLES(__end_intr_tsc) - + SUBTRACT_CLOCK_CYCLES(__start_intr_tsc); + + PRINT_F("Interrupt latency", + (u32_t)(intr_latency_cycles), + (u32_t) (SYS_CLOCK_HW_CYCLES_TO_NS(intr_latency_cycles))); + + /*tick overhead*/ + u32_t tick_overhead_cycles = SUBTRACT_CLOCK_CYCLES(__end_tick_tsc) - + SUBTRACT_CLOCK_CYCLES(__start_tick_tsc); + PRINT_F("Tick overhead", + (u32_t)(tick_overhead_cycles), + (u32_t) (SYS_CLOCK_HW_CYCLES_TO_NS(tick_overhead_cycles))); + + /*thread creation*/ + PRINT_F("Thread Creation", + (u32_t)((thread_create_end_tsc - thread_create_start_tsc) & + 0xFFFFFFFFULL), + (u32_t) (total_thread_create_time & 0xFFFFFFFFULL)); + + /*thread cancel*/ + PRINT_F("Thread cancel", + (u32_t)((thread_cancel_end_tsc - thread_cancel_start_tsc) & + 0xFFFFFFFFULL), + (u32_t) (total_thread_cancel_time & 0xFFFFFFFFULL)); + + /*thread abort*/ + PRINT_F("Thread abort", + (u32_t)((thread_abort_end_tsc - thread_abort_start_tsc) & + 0xFFFFFFFFULL), + (u32_t) (total_thread_abort_time & 0xFFFFFFFFULL)); + + /*thread suspend*/ + PRINT_F("Thread Suspend", + (u32_t)((thread_suspend_end_tsc - thread_suspend_start_tsc) & + 0xFFFFFFFFULL), + (u32_t) (total_thread_suspend_time & 0xFFFFFFFFULL)); + + /*thread resume*/ + PRINT_F("Thread Resume", + (u32_t)((thread_resume_end_tsc - thread_suspend_end_tsc) + & 0xFFFFFFFFULL), + (u32_t) (total_thread_resume_time & 0xFFFFFFFFULL)); + + +} + +void thread_suspend_test(void *p1, void *p2, void *p3) +{ + thread_suspend_start_tsc = OS_GET_TIME(); + k_thread_suspend(_current); + + /* comes to this line once its resumed*/ + thread_resume_end_tsc = OS_GET_TIME(); + + /* k_thread_suspend(_current); */ +} + +void heap_malloc_free_bench(void) +{ + /* heap malloc*/ + u64_t heap_malloc_start_tsc = 0; + u64_t heap_malloc_end_tsc = 0; + + /* heap free*/ + u64_t heap_free_start_tsc = 0; + u64_t heap_free_end_tsc = 0; + + s32_t count = 0; + u32_t sum_malloc = 0; + u32_t sum_free = 0; + + while (count++ != 100) { + heap_malloc_start_tsc = OS_GET_TIME(); + void *allocated_mem = k_malloc(10); + + heap_malloc_end_tsc = OS_GET_TIME(); + if (allocated_mem == NULL) { + TC_PRINT("\n Malloc failed at count %d\n", count); + break; + } + heap_free_start_tsc = OS_GET_TIME(); + k_free(allocated_mem); + heap_free_end_tsc = OS_GET_TIME(); + sum_malloc += heap_malloc_end_tsc - heap_malloc_start_tsc; + sum_free += heap_free_end_tsc - heap_free_start_tsc; + } + + PRINT_F("Heap Malloc", + (u32_t)((sum_malloc / count) & 0xFFFFFFFFULL), + (u32_t)(SYS_CLOCK_HW_CYCLES_TO_NS(sum_malloc / count))); + PRINT_F("Heap Free", + (u32_t)((sum_free / count) & 0xFFFFFFFFULL), + (u32_t)(SYS_CLOCK_HW_CYCLES_TO_NS(sum_free / count))); + +} \ No newline at end of file diff --git a/tests/benchmarks/timing_info/src/timing_info.h b/tests/benchmarks/timing_info/src/timing_info.h new file mode 100644 index 00000000000..3f69b16ecfb --- /dev/null +++ b/tests/benchmarks/timing_info/src/timing_info.h @@ -0,0 +1,95 @@ +#include + + +#define CALCULATE_TIME(special_char, profile, name) \ + { \ + total_##profile##_##name##_time = SYS_CLOCK_HW_CYCLES_TO_NS( \ + special_char##profile##_##name##_end_tsc - \ + special_char##profile##_##name##_start_tsc); \ + } + +/*total_##profile##_##name##_time = + * profile##_##name##_end_us - profile##_##name##_start_us; + */ + +#define DECLARE_VAR(profile, name) \ + u64_t total_##profile##_##name##_time; + +extern u64_t __start_swap_tsc; +extern u64_t __end_swap_tsc; +extern u64_t __start_intr_tsc; +extern u64_t __end_intr_tsc; +extern u64_t __start_tick_tsc; +extern u64_t __end_tick_tsc; + + + +/* Function prototypes */ +void system_thread_bench(void); +void yield_bench(void); +void heap_malloc_free_bench(void); +void semaphore_bench(void); +void mutex_bench(void); +void msg_passing_bench(void); + +/* PRINT_F + * Macro to print a formatted output string. fprintf is used when + * Assumed that sline character array of SLINE_LEN + 1 characters + * is defined in the main file + */ + +/* #define CSV_FORMAT_OUTPUT */ +/* printf format defines. */ +#ifdef CSV_FORMAT_OUTPUT +#define FORMAT "%-45s,%4u,%5u\n" +#else +#define FORMAT "%-45s:%4u cycles , %5u ns\n" +#endif +#include + +#define GET_2ND_ARG(first, second, ...) (second) +#define GET_3ND_ARG(first, second, third, ...) (third) + +/* Enable this macro to print all the measurements. + * Note: Some measurements in few architectures are not valid + */ +/* #define PRINT_ALL_MEASUREMENTS */ +#ifndef PRINT_ALL_MEASUREMENTS +/*If the measured cycles is greater than 10000 then one of the following is + * possible. + * 1. the selected measurement is not supported in the architecture + * 2. The measurement went wrong somewhere.(less likely to happen) + */ +#define PRINT_F(...) \ + { \ + if ((GET_2ND_ARG(__VA_ARGS__) <= 20000) && \ + (GET_2ND_ARG(__VA_ARGS__) != 0)) { \ + snprintf(sline, 254, FORMAT, ##__VA_ARGS__); \ + TC_PRINT("%s", sline); \ + } \ + } +#else +/* Prints all outputs*/ +#define PRINT_F(...) \ + { \ + snprintf(sline, 254, FORMAT, ##__VA_ARGS__); \ + TC_PRINT("%s", sline); \ + } + +#endif + + +/* If we are using x86 based controller we tend to read the tsc value which is + * always incrementing i.e count up counter. + * If we are using the ARM based controllers the systick is a + * count down counter. + * Hence to calculate the cycles taken up by the code we need to adjust the + * values accordingly. + * + * NOTE: Needed only when reading value from end of swap operation + */ +#if CONFIG_ARM +#define SUBTRACT_CLOCK_CYCLES(val) (SysTick->LOAD - (u32_t)val) +#else +#define SUBTRACT_CLOCK_CYCLES(val) (val) +#endif diff --git a/tests/benchmarks/timing_info/src/yield_bench.c b/tests/benchmarks/timing_info/src/yield_bench.c new file mode 100644 index 00000000000..8da053cbe41 --- /dev/null +++ b/tests/benchmarks/timing_info/src/yield_bench.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include +#include "timing_info.h" + +K_SEM_DEFINE(yield_sem, 0, 1); + +/* To time thread creation*/ +#define STACK_SIZE 500 +extern char __noinit __stack my_stack_area[STACK_SIZE]; +extern char __noinit __stack my_stack_area_0[STACK_SIZE]; + +/* u64_t thread_yield_start_tsc[1000]; */ +/* u64_t thread_yield_end_tsc[1000]; */ +/* location of the time stamps*/ +extern u32_t __read_swap_end_tsc_value; +extern u64_t __common_var_swap_end_tsc; +extern char sline[]; + +u64_t thread_sleep_start_tsc; +u64_t thread_sleep_end_tsc; +u64_t thread_start_time; +u64_t thread_end_time; +static u32_t count; + +void thread_yield0_test(void *p1, void *p2, void *p3); +void thread_yield1_test(void *p1, void *p2, void *p3); + +k_tid_t yield0_tid; +k_tid_t yield1_tid; +void yield_bench(void) +{ + + /* Thread yield*/ + + yield0_tid = k_thread_spawn(my_stack_area, + STACK_SIZE, + thread_yield0_test, + NULL, NULL, NULL, + 0 /*priority*/, 0, 0); + + yield1_tid = k_thread_spawn(my_stack_area_0, + STACK_SIZE, + thread_yield1_test, + NULL, NULL, NULL, + 0 /*priority*/, 0, 0); + + /*read the time of start of the sleep till the swap happens */ + __read_swap_end_tsc_value = 1; + + thread_sleep_start_tsc = OS_GET_TIME(); + k_sleep(1000); + thread_sleep_end_tsc = ((u32_t)__common_var_swap_end_tsc); + + u32_t yield_cycles = (thread_end_time - thread_start_time) / 2000; + u32_t sleep_cycles = thread_sleep_end_tsc - thread_sleep_start_tsc; + + PRINT_F("Thread Yield", yield_cycles, + SYS_CLOCK_HW_CYCLES_TO_NS(yield_cycles)); + PRINT_F("Thread Sleep", sleep_cycles, + SYS_CLOCK_HW_CYCLES_TO_NS(sleep_cycles)); + +} + + +void thread_yield0_test(void *p1, void *p2, void *p3) +{ + k_sem_take(&yield_sem, 10); + thread_start_time = OS_GET_TIME(); + while (count != 1000) { + count++; + k_yield(); + } + thread_end_time = OS_GET_TIME(); + k_thread_abort(yield1_tid); +} + +void thread_yield1_test(void *p1, void *p2, void *p3) +{ + k_sem_give(&yield_sem); + while (1) { + k_yield(); + } +} \ No newline at end of file diff --git a/tests/benchmarks/timing_info/testcase.ini b/tests/benchmarks/timing_info/testcase.ini new file mode 100644 index 00000000000..717188e0d5d --- /dev/null +++ b/tests/benchmarks/timing_info/testcase.ini @@ -0,0 +1,3 @@ +[test] +tags = benchmark +arch_whitelist = x86 arm