test: benchmarking: Timing metrics for the kernel
JIRA: ZEP-1822, ZEP-1823, ZEP-1825 Signed-off-by: Adithya Baglody <adithya.nagaraj.baglody@intel.com>
This commit is contained in:
parent
e0a643591d
commit
d03b2496cd
19 changed files with 1486 additions and 1 deletions
|
@ -43,6 +43,10 @@ SECTION_FUNC(TEXT, _isr_wrapper)
|
|||
|
||||
push {lr} /* lr is now the first item on the stack */
|
||||
|
||||
#ifdef CONFIG_EXECUTION_BENCHMARKING
|
||||
bl read_systick_start_of_isr
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KERNEL_EVENT_LOGGER_INTERRUPT
|
||||
bl _sys_k_event_logger_interrupt
|
||||
#endif
|
||||
|
@ -105,6 +109,20 @@ _idle_state_cleared:
|
|||
* in thumb mode */
|
||||
|
||||
ldm r1!,{r0,r3} /* arg in r0, ISR in r3 */
|
||||
#ifdef CONFIG_EXECUTION_BENCHMARKING
|
||||
#if defined(CONFIG_ARMV6_M)
|
||||
push {r3}
|
||||
#endif
|
||||
push {lr}
|
||||
bl read_systick_end_of_isr
|
||||
#if defined(CONFIG_ARMV6_M)
|
||||
pop {r3}
|
||||
mov lr,r3
|
||||
pop {r3}
|
||||
#else
|
||||
pop {lr}
|
||||
#endif
|
||||
#endif
|
||||
blx r3 /* call ISR */
|
||||
|
||||
#if defined(CONFIG_ARMV6_M)
|
||||
|
|
|
@ -177,6 +177,23 @@ _thread_irq_disabled:
|
|||
|
||||
msr PSP, ip
|
||||
|
||||
#ifdef CONFIG_EXECUTION_BENCHMARKING
|
||||
#if defined(CONFIG_ARMV6_M)
|
||||
push {r3}
|
||||
#endif
|
||||
push {lr}
|
||||
|
||||
bl read_systick_end_of_swap
|
||||
|
||||
#if defined(CONFIG_ARMV6_M)
|
||||
pop {r3}
|
||||
mov lr,r3
|
||||
pop {r3}
|
||||
#else
|
||||
pop {lr}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* exc return */
|
||||
bx lr
|
||||
|
||||
|
@ -292,6 +309,20 @@ _oops:
|
|||
|
||||
SECTION_FUNC(TEXT, __swap)
|
||||
|
||||
#ifdef CONFIG_EXECUTION_BENCHMARKING
|
||||
#if defined(CONFIG_ARMV6_M)
|
||||
push {r3}
|
||||
#endif
|
||||
push {lr}
|
||||
bl read_systick_start_of_swap
|
||||
#if defined(CONFIG_ARMV6_M)
|
||||
pop {r3}
|
||||
mov lr,r3
|
||||
pop {r3}
|
||||
#else
|
||||
pop {lr}
|
||||
#endif
|
||||
#endif
|
||||
ldr r1, =_kernel
|
||||
ldr r2, [r1, #_kernel_offset_to_current]
|
||||
str r0, [r2, #_thread_offset_to_basepri]
|
||||
|
|
|
@ -78,6 +78,15 @@
|
|||
*/
|
||||
SECTION_FUNC(TEXT, _interrupt_enter)
|
||||
|
||||
#ifdef CONFIG_EXECUTION_BENCHMARKING
|
||||
pushl %eax
|
||||
pushl %edx
|
||||
rdtsc
|
||||
mov %eax, __start_intr_tsc
|
||||
mov %edx, __start_intr_tsc+4
|
||||
pop %edx
|
||||
pop %eax
|
||||
#endif
|
||||
/*
|
||||
* The gen_idt tool creates an interrupt-gate descriptor for
|
||||
* all connections. The processor will automatically clear the IF
|
||||
|
@ -246,6 +255,19 @@ alreadyOnIntStack:
|
|||
*/
|
||||
push %eax
|
||||
#endif
|
||||
#ifdef CONFIG_EXECUTION_BENCHMARKING
|
||||
/* Save the eax and edx registers before reading the time stamp
|
||||
* once done pop the values
|
||||
*/
|
||||
pushl %eax
|
||||
pushl %edx
|
||||
rdtsc
|
||||
mov %eax,__end_intr_tsc
|
||||
mov %edx,__end_intr_tsc+4
|
||||
pop %edx
|
||||
pop %eax
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NESTED_INTERRUPTS
|
||||
sti /* re-enable interrupts */
|
||||
#endif
|
||||
|
|
|
@ -77,7 +77,28 @@
|
|||
*
|
||||
*/
|
||||
|
||||
.macro read_tsc var_name
|
||||
push %eax
|
||||
push %edx
|
||||
rdtsc
|
||||
mov %eax,\var_name
|
||||
mov %edx,\var_name+4
|
||||
pop %edx
|
||||
pop %eax
|
||||
.endm
|
||||
SECTION_FUNC(TEXT, __swap)
|
||||
#ifdef CONFIG_EXECUTION_BENCHMARKING
|
||||
/* Save the eax and edx registers before reading the time stamp
|
||||
* once done pop the values.
|
||||
*/
|
||||
push %eax
|
||||
push %edx
|
||||
rdtsc
|
||||
mov %eax,__start_swap_tsc
|
||||
mov %edx,__start_swap_tsc+4
|
||||
pop %edx
|
||||
pop %eax
|
||||
#endif
|
||||
#ifdef CONFIG_X86_IAMCU
|
||||
/* save EFLAGS on stack right before return address, just as SYSV would
|
||||
* have done
|
||||
|
@ -335,5 +356,16 @@ skipIntLatencyStop:
|
|||
*/
|
||||
popl %edx
|
||||
movl %edx, (%esp)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_EXECUTION_BENCHMARKING
|
||||
/* Save the eax and edx registers before reading the time stamp
|
||||
* once done pop the values.
|
||||
*/
|
||||
cmp $0x1,__read_swap_end_tsc_value
|
||||
jne time_read_not_needed
|
||||
movw $0x2,__read_swap_end_tsc_value
|
||||
read_tsc __common_var_swap_end_tsc
|
||||
time_read_not_needed:
|
||||
#endif
|
||||
ret
|
||||
|
|
|
@ -211,6 +211,11 @@ void _timer_int_handler(void *unused)
|
|||
{
|
||||
ARG_UNUSED(unused);
|
||||
|
||||
#ifdef CONFIG_EXECUTION_BENCHMARKING
|
||||
extern void read_systick_start_of_tick_handler(void);
|
||||
read_systick_start_of_tick_handler();
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KERNEL_EVENT_LOGGER_INTERRUPT
|
||||
extern void _sys_k_event_logger_interrupt(void);
|
||||
_sys_k_event_logger_interrupt();
|
||||
|
@ -337,6 +342,11 @@ void _timer_int_handler(void *unused)
|
|||
|
||||
#endif /* CONFIG_SYS_POWER_MANAGEMENT */
|
||||
|
||||
#ifdef CONFIG_EXECUTION_BENCHMARKING
|
||||
extern void read_systick_end_of_tick_handler(void);
|
||||
read_systick_end_of_tick_handler();
|
||||
#endif
|
||||
|
||||
extern void _ExcExit(void);
|
||||
_ExcExit();
|
||||
}
|
||||
|
|
|
@ -252,6 +252,16 @@ static inline void program_max_cycles(void)
|
|||
void _timer_int_handler(void *unused /* parameter is not used */
|
||||
)
|
||||
{
|
||||
#ifdef CONFIG_EXECUTION_BENCHMARKING
|
||||
__asm__ __volatile__ (
|
||||
"pushl %eax\n\t"
|
||||
"pushl %edx\n\t"
|
||||
"rdtsc\n\t"
|
||||
"mov %eax, __start_tick_tsc\n\t"
|
||||
"mov %edx, __start_tick_tsc+4\n\t"
|
||||
"pop %edx\n\t"
|
||||
"pop %eax\n\t");
|
||||
#endif
|
||||
ARG_UNUSED(unused);
|
||||
|
||||
#if defined(CONFIG_TICKLESS_KERNEL)
|
||||
|
@ -324,7 +334,16 @@ void _timer_int_handler(void *unused /* parameter is not used */
|
|||
_sys_clock_tick_announce();
|
||||
#endif /*CONFIG_TICKLESS_IDLE*/
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_EXECUTION_BENCHMARKING
|
||||
__asm__ __volatile__ (
|
||||
"pushl %eax\n\t"
|
||||
"pushl %edx\n\t"
|
||||
"rdtsc\n\t"
|
||||
"mov %eax, __end_tick_tsc\n\t"
|
||||
"mov %edx, __end_tick_tsc+4\n\t"
|
||||
"pop %edx\n\t"
|
||||
"pop %eax\n\t");
|
||||
#endif /* CONFIG_EXECUTION_BENCHMARKING */
|
||||
}
|
||||
|
||||
#ifdef CONFIG_TICKLESS_KERNEL
|
||||
|
|
|
@ -204,6 +204,16 @@ config INT_LATENCY_BENCHMARK
|
|||
The metrics are displayed (and a new sampling interval is started)
|
||||
each time int_latency_show() is called thereafter.
|
||||
|
||||
config EXECUTION_BENCHMARKING
|
||||
bool
|
||||
prompt "Timing metrics "
|
||||
default n
|
||||
help
|
||||
This option enables the tracking of various times inside the kernel
|
||||
the exact set of metrics being tracked is board-dependent.
|
||||
All timing measurements are enabled for X86 and ARM based architectures.
|
||||
In other architectures only a subset are enabled.
|
||||
|
||||
config THREAD_MONITOR
|
||||
bool
|
||||
prompt "Thread monitoring [EXPERIMENTAL]"
|
||||
|
|
|
@ -56,6 +56,14 @@ u64_t __noinit __main_tsc; /* timestamp when main task starts */
|
|||
u64_t __noinit __idle_tsc; /* timestamp when CPU goes idle */
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_EXECUTION_BENCHMARKING
|
||||
u64_t __noinit __start_swap_tsc;
|
||||
u64_t __noinit __end_swap_tsc;
|
||||
u64_t __noinit __start_intr_tsc;
|
||||
u64_t __noinit __end_intr_tsc;
|
||||
u64_t __noinit __start_tick_tsc;
|
||||
u64_t __noinit __end_tick_tsc;
|
||||
#endif
|
||||
/* init/main and idle threads */
|
||||
|
||||
#define IDLE_STACK_SIZE CONFIG_IDLE_STACK_SIZE
|
||||
|
|
4
tests/benchmarks/timing_info/Makefile
Normal file
4
tests/benchmarks/timing_info/Makefile
Normal file
|
@ -0,0 +1,4 @@
|
|||
BOARD ?= qemu_x86
|
||||
CONF_FILE = prj.conf
|
||||
|
||||
include ${ZEPHYR_BASE}/Makefile.inc
|
111
tests/benchmarks/timing_info/README.txt
Normal file
111
tests/benchmarks/timing_info/README.txt
Normal file
|
@ -0,0 +1,111 @@
|
|||
Title: Timing Information
|
||||
|
||||
Description:
|
||||
|
||||
Timing measurements for the following features of the OS.
|
||||
1. Context switch
|
||||
Time taken to compete the context switch, i.e time spent in _Swap function.
|
||||
2. Interrupt latency
|
||||
Time taken from the start of the common interrupt handler till the
|
||||
actual ISR handler being called.
|
||||
3. Tick overhead
|
||||
Time spent by the cpu in the tick handler.
|
||||
4. Thread Creation
|
||||
Time spent in creating a thread.
|
||||
5. Thread cancel
|
||||
Time taken to cancel the thread which is not yet started execution.
|
||||
So the time taken to complete the function call is measured.
|
||||
6. Thread abort
|
||||
Time taken to abort the thread which has already started execution.
|
||||
So the time measured is from the start of the function call until the
|
||||
another thread is swapped in.
|
||||
7. Thread Suspend
|
||||
The time measured is from the start of the function call until the current
|
||||
thread is suspended and another thread is swapped in.
|
||||
8. Thread Resume
|
||||
The time measured is from the start of the function call until the required
|
||||
thread is resumed by swap operation.
|
||||
9. Thread Yield
|
||||
The time measured is from the start of the function call until the higher priority
|
||||
thread is swapped in.
|
||||
10. Thread Sleep
|
||||
The time measured is from the start of the function call until the current
|
||||
thread is put on the timeout queue and another thread is swapped in.
|
||||
11. Heap Malloc
|
||||
The time to allocate heap memory in fixed size chunks. Continuously allocate
|
||||
the memory from the pool. Average time taken to complete the function call
|
||||
is measured.
|
||||
12. Heap Free
|
||||
Time to free heap memory in fixed size chunks. Continuously free
|
||||
the memory that was allocated. Average time taken to complete the function call
|
||||
is measured.
|
||||
13. Semaphore Take with context switch
|
||||
Taking a semaphore causes a thread waiting on the semaphore to be swapped in.
|
||||
Thus Time measured is the time taken from the function call till the waiting
|
||||
thread is swapped in.
|
||||
14. Semaphore Give with context switch
|
||||
Giving a semaphore causes a thread waiting on the semaphore to be swapped
|
||||
in (higher priority).
|
||||
Thus Time measured is the time taken from the function call till the waiting
|
||||
thread is swapped in.
|
||||
15. Semaphore Take without context switch
|
||||
Time taken to take the semaphore. Thus time to complete the function
|
||||
call is measured.
|
||||
16. Semaphore Give without context switch
|
||||
Time taken to give the semaphore. Thus time to complete the function
|
||||
call is measured.
|
||||
17. Mutex lock
|
||||
Time taken to lock the mutex. Thus time to complete the function
|
||||
call is measured.
|
||||
18. Mutex unlock
|
||||
Time taken to unlock the mutex. Thus time to complete the function
|
||||
call is measured.
|
||||
19. Message Queue Put with context switch
|
||||
A thread is waiting for a message to arrive. The time taken from the start
|
||||
of the function call till the waiting thread is swapped in is measured.
|
||||
20. Message Queue Put without context switch
|
||||
The time taken to complete the function call is measured.
|
||||
21. Message Queue get with context switch
|
||||
A thread has gone into waiting because the message queue is full.
|
||||
When a get occurs this thread gets free to execute. The time taken from
|
||||
the start of the function call till the waiting thread is
|
||||
swapped in is measured.
|
||||
22. Message Queue get without context switch
|
||||
The time taken to complete the function call is measured.
|
||||
23. MailBox synchronous put
|
||||
The time taken from the start of the function call till the waiting thread
|
||||
is swapped in is measured.
|
||||
24. MailBox synchronous get
|
||||
The time taken from the start of the function call till the waiting thread
|
||||
is swapped in is measured.
|
||||
25. MailBox asynchronous put
|
||||
The time taken to complete the function call is measured.
|
||||
26. MailBox get without context switch
|
||||
The time taken to complete the function call is measured.
|
||||
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
Building and Running Project:
|
||||
|
||||
This benchmark outputs to the console. It can be built and executed
|
||||
on QEMU as follows:
|
||||
|
||||
make qemu
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
Troubleshooting:
|
||||
|
||||
Problems caused by out-dated project information can be addressed by
|
||||
issuing one of the following commands then rebuilding the project:
|
||||
|
||||
make clean # discard results of previous builds
|
||||
# but keep existing configuration info
|
||||
or
|
||||
make pristine # discard results of previous builds
|
||||
# and restore pre-defined configuration info
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
Sample Output:
|
4
tests/benchmarks/timing_info/prj.conf
Normal file
4
tests/benchmarks/timing_info/prj.conf
Normal file
|
@ -0,0 +1,4 @@
|
|||
CONFIG_EXECUTION_BENCHMARKING=y
|
||||
CONFIG_TEST_RANDOM_GENERATOR=y
|
||||
CONFIG_HEAP_MEM_POOL_SIZE=256
|
||||
CONFIG_MAIN_STACK_SIZE=2048
|
7
tests/benchmarks/timing_info/src/Makefile
Normal file
7
tests/benchmarks/timing_info/src/Makefile
Normal file
|
@ -0,0 +1,7 @@
|
|||
ccflags-y += -I$(ZEPHYR_BASE)/tests/include
|
||||
|
||||
obj-$(CONFIG_EXECUTION_BENCHMARKING) += main_benchmark.o
|
||||
obj-$(CONFIG_EXECUTION_BENCHMARKING) += thread_bench.o
|
||||
obj-$(CONFIG_EXECUTION_BENCHMARKING) += yield_bench.o
|
||||
obj-$(CONFIG_EXECUTION_BENCHMARKING) += semaphore_bench.o
|
||||
obj-$(CONFIG_EXECUTION_BENCHMARKING) += msg_passing_bench.o
|
57
tests/benchmarks/timing_info/src/main_benchmark.c
Normal file
57
tests/benchmarks/timing_info/src/main_benchmark.c
Normal file
|
@ -0,0 +1,57 @@
|
|||
/*
|
||||
* Copyright (c) 2013-2015 Wind River Systems, Inc.
|
||||
* Copyright (c) 2016 Intel Corporation.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file
|
||||
* @brief Measure time
|
||||
*
|
||||
*/
|
||||
#include <kernel.h>
|
||||
#include <zephyr.h>
|
||||
#include <tc_util.h>
|
||||
#include <ksched.h>
|
||||
#include "timing_info.h"
|
||||
|
||||
void main(void)
|
||||
{
|
||||
u32_t freq = CONFIG_SYS_CLOCK_HW_CYCLES_PER_SEC / 1000000;
|
||||
|
||||
TC_START("Time Measurement");
|
||||
TC_PRINT("Timing Results: Clock Frequency: %d MHz\n", freq);
|
||||
|
||||
/*******************************************************************/
|
||||
/* System parameters and thread Benchmarking*/
|
||||
system_thread_bench();
|
||||
|
||||
/*******************************************************************/
|
||||
/* Thread yield*/
|
||||
yield_bench();
|
||||
|
||||
/*******************************************************************/
|
||||
/* heap Memory benchmarking*/
|
||||
heap_malloc_free_bench();
|
||||
|
||||
/*******************************************************************/
|
||||
/* Semaphore take and get*/
|
||||
semaphore_bench();
|
||||
|
||||
/*******************************************************************/
|
||||
/* mutex lock and unlock*/
|
||||
mutex_bench();
|
||||
|
||||
/*******************************************************************/
|
||||
/* mutex lock and unlock*/
|
||||
msg_passing_bench();
|
||||
|
||||
|
||||
TC_PRINT("Timing Measurement finished\n");
|
||||
|
||||
/* for sanity regression test utility. */
|
||||
TC_END_RESULT(TC_PASS);
|
||||
TC_END_REPORT(TC_PASS);
|
||||
|
||||
}
|
439
tests/benchmarks/timing_info/src/msg_passing_bench.c
Normal file
439
tests/benchmarks/timing_info/src/msg_passing_bench.c
Normal file
|
@ -0,0 +1,439 @@
|
|||
/*
|
||||
* Copyright (c) 2013-2015 Wind River Systems, Inc.
|
||||
* Copyright (c) 2016 Intel Corporation.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <kernel.h>
|
||||
#include <zephyr.h>
|
||||
#include <tc_util.h>
|
||||
#include <ksched.h>
|
||||
#include "timing_info.h"
|
||||
|
||||
extern char sline[];
|
||||
|
||||
/* mailbox*/
|
||||
/* K_MBOX_DEFINE(test_msg_queue) */
|
||||
K_MSGQ_DEFINE(benchmark_q, sizeof(int), 10, 4);
|
||||
K_MSGQ_DEFINE(benchmark_q_get, sizeof(int), 3, 4);
|
||||
K_MBOX_DEFINE(benchmark_mbox);
|
||||
|
||||
/* Declare a semaphore for the msgq*/
|
||||
K_SEM_DEFINE(mbox_sem, 1, 1);
|
||||
|
||||
/* common location for the swap to write the tsc data*/
|
||||
extern u32_t __read_swap_end_tsc_value;
|
||||
extern u64_t __common_var_swap_end_tsc;
|
||||
|
||||
/* location of the time stamps*/
|
||||
u64_t __msg_q_put_state;
|
||||
u64_t __msg_q_get_state;
|
||||
|
||||
u64_t __msg_q_put_w_cxt_start_tsc;
|
||||
u64_t __msg_q_put_w_cxt_end_tsc;
|
||||
|
||||
u64_t __msg_q_put_wo_cxt_start_tsc; /* without context switch */
|
||||
u64_t __msg_q_put_wo_cxt_end_tsc;
|
||||
|
||||
u64_t __msg_q_get_w_cxt_start_tsc;
|
||||
u64_t __msg_q_get_w_cxt_end_tsc;
|
||||
|
||||
u64_t msg_q_get_wo_cxt_start_tsc;
|
||||
u64_t msg_q_get_wo_cxt_end_tsc;
|
||||
|
||||
u32_t __mbox_sync_put_state;
|
||||
u64_t mbox_sync_put_start_tsc;
|
||||
u64_t mbox_sync_put_end_tsc;
|
||||
|
||||
u32_t __mbox_sync_get_state;
|
||||
u64_t mbox_sync_get_start_tsc;
|
||||
u64_t mbox_sync_get_end_tsc;
|
||||
|
||||
u64_t mbox_async_put_start_tsc;
|
||||
u64_t mbox_async_put_end_tsc;
|
||||
|
||||
u64_t mbox_get_w_cxt_start_tsc;
|
||||
u64_t mbox_get_w_cxt_end_tsc;
|
||||
|
||||
/*For benchmarking msg queues*/
|
||||
k_tid_t producer_w_cxt_switch_tid;
|
||||
k_tid_t producer_wo_cxt_switch_tid;
|
||||
k_tid_t producer_get_w_cxt_switch_tid;
|
||||
k_tid_t consumer_get_w_cxt_switch_tid;
|
||||
k_tid_t consumer_tid;
|
||||
k_tid_t thread_mbox_sync_put_send_tid;
|
||||
k_tid_t thread_mbox_sync_put_receive_tid;
|
||||
k_tid_t thread_mbox_sync_get_send_tid;
|
||||
k_tid_t thread_mbox_sync_get_receive_tid;
|
||||
k_tid_t thread_mbox_async_put_send_tid;
|
||||
k_tid_t thread_mbox_async_put_receive_tid;
|
||||
|
||||
/* To time thread creation*/
|
||||
#define STACK_SIZE 500
|
||||
extern char __noinit __stack my_stack_area[STACK_SIZE];
|
||||
extern char __noinit __stack my_stack_area_0[STACK_SIZE];
|
||||
|
||||
/* thread functions*/
|
||||
void thread_producer_msgq_w_cxt_switch(void *p1, void *p2, void *p3);
|
||||
void thread_producer_msgq_wo_cxt_switch(void *p1, void *p2, void *p3);
|
||||
|
||||
void thread_producer_get_msgq_w_cxt_switch(void *p1, void *p2, void *p3);
|
||||
void thread_consumer_get_msgq_w_cxt_switch(void *p1, void *p2, void *p3);
|
||||
|
||||
void thread_mbox_sync_put_send(void *p1, void *p2, void *p3);
|
||||
void thread_mbox_sync_put_receive(void *p1, void *p2, void *p3);
|
||||
|
||||
void thread_mbox_sync_get_send(void *p1, void *p2, void *p3);
|
||||
void thread_mbox_sync_get_receive(void *p1, void *p2, void *p3);
|
||||
|
||||
void thread_mbox_async_put_send(void *p1, void *p2, void *p3);
|
||||
void thread_mbox_async_put_receive(void *p1, void *p2, void *p3);
|
||||
|
||||
volatile u64_t time_check;
|
||||
int received_data_get;
|
||||
int received_data_consumer;
|
||||
int data_to_send;
|
||||
|
||||
void msg_passing_bench(void)
|
||||
{
|
||||
DECLARE_VAR(msg_q, put_w_cxt)
|
||||
DECLARE_VAR(msg_q, put_wo_cxt)
|
||||
|
||||
DECLARE_VAR(msg_q, get_w_cxt)
|
||||
DECLARE_VAR(msg_q, get_wo_cxt)
|
||||
|
||||
DECLARE_VAR(mbox, sync_put)
|
||||
DECLARE_VAR(mbox, sync_get)
|
||||
DECLARE_VAR(mbox, async_put)
|
||||
|
||||
DECLARE_VAR(mbox, get_w_cxt)
|
||||
|
||||
|
||||
/*******************************************************************/
|
||||
/* Msg queue for put*/
|
||||
int received_data = 0;
|
||||
|
||||
producer_w_cxt_switch_tid =
|
||||
k_thread_spawn(my_stack_area, STACK_SIZE,
|
||||
thread_producer_msgq_w_cxt_switch, NULL,
|
||||
NULL, NULL, 2 /*priority*/, 0, 50);
|
||||
|
||||
u32_t msg_status = k_msgq_get(&benchmark_q, &received_data, 300);
|
||||
|
||||
producer_wo_cxt_switch_tid =
|
||||
k_thread_spawn(my_stack_area_0, STACK_SIZE,
|
||||
thread_producer_msgq_wo_cxt_switch,
|
||||
NULL, NULL, NULL, -2 /*priority*/, 0, 0);
|
||||
|
||||
k_thread_abort(producer_w_cxt_switch_tid);
|
||||
k_thread_abort(producer_wo_cxt_switch_tid);
|
||||
__msg_q_put_w_cxt_end_tsc = ((u32_t)__common_var_swap_end_tsc);
|
||||
ARG_UNUSED(msg_status);
|
||||
|
||||
/*******************************************************************/
|
||||
|
||||
/* Msg queue for get*/
|
||||
|
||||
producer_get_w_cxt_switch_tid =
|
||||
k_thread_spawn(my_stack_area,
|
||||
STACK_SIZE,
|
||||
thread_producer_get_msgq_w_cxt_switch, NULL,
|
||||
NULL, NULL, 1 /*priority*/, 0, 50);
|
||||
consumer_get_w_cxt_switch_tid =
|
||||
k_thread_spawn(my_stack_area_0,
|
||||
STACK_SIZE,
|
||||
thread_consumer_get_msgq_w_cxt_switch,
|
||||
NULL, NULL, NULL,
|
||||
2 /*priority*/, 0, 50);
|
||||
k_sleep(2000); /* make the main thread sleep */
|
||||
k_thread_abort(producer_get_w_cxt_switch_tid);
|
||||
__msg_q_get_w_cxt_end_tsc = (__common_var_swap_end_tsc);
|
||||
|
||||
/*******************************************************************/
|
||||
|
||||
/* Msg queue for get*/
|
||||
/* from previous step got the msg_q full now just do a simple read*/
|
||||
msg_q_get_wo_cxt_start_tsc = OS_GET_TIME();
|
||||
|
||||
received_data_get = k_msgq_get(&benchmark_q_get,
|
||||
&received_data_consumer,
|
||||
K_NO_WAIT);
|
||||
|
||||
msg_q_get_wo_cxt_end_tsc = OS_GET_TIME();
|
||||
|
||||
|
||||
/*******************************************************************/
|
||||
|
||||
/* Msg box to benchmark sync put */
|
||||
|
||||
thread_mbox_sync_put_send_tid =
|
||||
k_thread_spawn(my_stack_area,
|
||||
STACK_SIZE,
|
||||
thread_mbox_sync_put_send,
|
||||
NULL, NULL, NULL,
|
||||
2 /*priority*/, 0, 0);
|
||||
thread_mbox_sync_put_receive_tid =
|
||||
k_thread_spawn(my_stack_area_0,
|
||||
STACK_SIZE,
|
||||
thread_mbox_sync_put_receive,
|
||||
NULL, NULL, NULL,
|
||||
1 /*priority*/, 0, 0);
|
||||
k_sleep(1000); /* make the main thread sleep */
|
||||
mbox_sync_put_end_tsc = (__common_var_swap_end_tsc);
|
||||
|
||||
/*******************************************************************/
|
||||
|
||||
/* Msg box to benchmark sync get */
|
||||
|
||||
thread_mbox_sync_get_send_tid =
|
||||
k_thread_spawn(my_stack_area,
|
||||
STACK_SIZE,
|
||||
thread_mbox_sync_get_send,
|
||||
NULL, NULL, NULL,
|
||||
1 /*prio*/, 0, 0);
|
||||
thread_mbox_sync_get_receive_tid =
|
||||
k_thread_spawn(my_stack_area_0,
|
||||
STACK_SIZE,
|
||||
thread_mbox_sync_get_receive, NULL,
|
||||
NULL, NULL, 2 /*priority*/, 0, 0);
|
||||
k_sleep(1000); /* make the main thread sleep */
|
||||
mbox_sync_get_end_tsc = (__common_var_swap_end_tsc);
|
||||
|
||||
/*******************************************************************/
|
||||
|
||||
/* Msg box to benchmark async put */
|
||||
|
||||
thread_mbox_async_put_send_tid =
|
||||
k_thread_spawn(my_stack_area,
|
||||
STACK_SIZE,
|
||||
thread_mbox_async_put_send,
|
||||
NULL, NULL, NULL,
|
||||
2 /*prio*/, 0, 0);
|
||||
thread_mbox_async_put_receive_tid =
|
||||
k_thread_spawn(my_stack_area_0,
|
||||
STACK_SIZE,
|
||||
thread_mbox_async_put_receive,
|
||||
NULL, NULL, NULL,
|
||||
3 /*priority*/, 0, 0);
|
||||
k_sleep(1000); /* make the main thread sleep */
|
||||
|
||||
/*******************************************************************/
|
||||
int single_element_buffer = 0;
|
||||
struct k_mbox_msg rx_msg = {
|
||||
.size = sizeof(int),
|
||||
.rx_source_thread = K_ANY,
|
||||
.tx_target_thread = K_ANY
|
||||
};
|
||||
mbox_get_w_cxt_start_tsc = OS_GET_TIME();
|
||||
|
||||
k_mbox_get(&benchmark_mbox, &rx_msg, &single_element_buffer, 300);
|
||||
|
||||
mbox_get_w_cxt_end_tsc = OS_GET_TIME();
|
||||
|
||||
/*******************************************************************/
|
||||
|
||||
/* calculation for msg put with context switch */
|
||||
CALCULATE_TIME(__, msg_q, put_w_cxt)
|
||||
|
||||
/* calculation for msg put without context switch */
|
||||
CALCULATE_TIME(__, msg_q, put_wo_cxt)
|
||||
|
||||
/* calculation for msg get */
|
||||
CALCULATE_TIME(__, msg_q, get_w_cxt)
|
||||
|
||||
/* calculation for msg get without context switch */
|
||||
CALCULATE_TIME(, msg_q, get_wo_cxt)
|
||||
|
||||
/*calculation for msg box for sync put
|
||||
* (i.e with a context to make the pending rx ready)
|
||||
*/
|
||||
CALCULATE_TIME(, mbox, sync_put)
|
||||
|
||||
/*calculation for msg box for sync get
|
||||
* (i.e with a context to make the pending tx ready)
|
||||
*/
|
||||
CALCULATE_TIME(, mbox, sync_get)
|
||||
|
||||
/* calculation for msg box for async put */
|
||||
CALCULATE_TIME(, mbox, async_put)
|
||||
|
||||
/* calculation for msg box for get without any context switch */
|
||||
CALCULATE_TIME(, mbox, get_w_cxt)
|
||||
|
||||
/*******************************************************************/
|
||||
|
||||
/* Only print lower 32bit of time result */
|
||||
PRINT_F("Message Queue Put with context switch",
|
||||
(u32_t)((__msg_q_put_w_cxt_end_tsc -
|
||||
__msg_q_put_w_cxt_start_tsc) & 0xFFFFFFFFULL),
|
||||
(u32_t) (total_msg_q_put_w_cxt_time & 0xFFFFFFFFULL));
|
||||
|
||||
PRINT_F("Message Queue Put without context switch",
|
||||
(u32_t)((__msg_q_put_wo_cxt_end_tsc -
|
||||
__msg_q_put_wo_cxt_start_tsc) & 0xFFFFFFFFULL),
|
||||
(u32_t) (total_msg_q_put_wo_cxt_time & 0xFFFFFFFFULL));
|
||||
|
||||
PRINT_F("Message Queue get with context switch",
|
||||
(u32_t)((__msg_q_get_w_cxt_end_tsc -
|
||||
__msg_q_get_w_cxt_start_tsc) & 0xFFFFFFFFULL),
|
||||
(u32_t) (total_msg_q_get_w_cxt_time & 0xFFFFFFFFULL));
|
||||
|
||||
PRINT_F("Message Queue get without context switch",
|
||||
(u32_t)((msg_q_get_wo_cxt_end_tsc -
|
||||
msg_q_get_wo_cxt_start_tsc) & 0xFFFFFFFFULL),
|
||||
(u32_t) (total_msg_q_get_wo_cxt_time & 0xFFFFFFFFULL));
|
||||
|
||||
PRINT_F("MailBox synchronous put",
|
||||
(u32_t)((mbox_sync_put_end_tsc - mbox_sync_put_start_tsc)
|
||||
& 0xFFFFFFFFULL),
|
||||
(u32_t) (total_mbox_sync_put_time & 0xFFFFFFFFULL));
|
||||
|
||||
PRINT_F("MailBox synchronous get",
|
||||
(u32_t)((mbox_sync_get_end_tsc - mbox_sync_get_start_tsc)
|
||||
& 0xFFFFFFFFULL),
|
||||
(u32_t) (total_mbox_sync_get_time & 0xFFFFFFFFULL));
|
||||
|
||||
PRINT_F("MailBox asynchronous put",
|
||||
(u32_t)((mbox_async_put_end_tsc - mbox_async_put_start_tsc)
|
||||
& 0xFFFFFFFFULL),
|
||||
(u32_t) (total_mbox_async_put_time & 0xFFFFFFFFULL));
|
||||
|
||||
PRINT_F("MailBox get without context switch",
|
||||
(u32_t)((mbox_get_w_cxt_end_tsc - mbox_get_w_cxt_start_tsc)
|
||||
& 0xFFFFFFFFULL),
|
||||
(u32_t) (total_mbox_get_w_cxt_time & 0xFFFFFFFFULL));
|
||||
|
||||
}
|
||||
|
||||
void thread_producer_msgq_w_cxt_switch(void *p1, void *p2, void *p3)
|
||||
{
|
||||
int data_to_send = 5050;
|
||||
|
||||
__read_swap_end_tsc_value = 1;
|
||||
__msg_q_put_w_cxt_start_tsc = (u32_t) OS_GET_TIME();
|
||||
k_msgq_put(&benchmark_q, &data_to_send, K_NO_WAIT);
|
||||
}
|
||||
|
||||
|
||||
void thread_producer_msgq_wo_cxt_switch(void *p1, void *p2, void *p3)
|
||||
{
|
||||
int data_to_send = 5050;
|
||||
|
||||
__msg_q_put_wo_cxt_start_tsc = OS_GET_TIME();
|
||||
k_msgq_put(&benchmark_q, &data_to_send, K_NO_WAIT);
|
||||
__msg_q_put_wo_cxt_end_tsc = OS_GET_TIME();
|
||||
}
|
||||
|
||||
|
||||
void thread_producer_get_msgq_w_cxt_switch(void *p1, void *p2, void *p3)
|
||||
{
|
||||
int status = 0;
|
||||
|
||||
while (1) {
|
||||
if (status == 0) {
|
||||
data_to_send++;
|
||||
}
|
||||
status = k_msgq_put(&benchmark_q_get, &data_to_send, 20);
|
||||
}
|
||||
}
|
||||
|
||||
void thread_consumer_get_msgq_w_cxt_switch(void *p1, void *p2, void *p3)
|
||||
{
|
||||
producer_get_w_cxt_switch_tid->base.timeout.delta_ticks_from_prev =
|
||||
_EXPIRED;
|
||||
__read_swap_end_tsc_value = 1;
|
||||
__msg_q_get_w_cxt_start_tsc = OS_GET_TIME();
|
||||
received_data_get = k_msgq_get(&benchmark_q_get,
|
||||
&received_data_consumer,
|
||||
300);
|
||||
time_check = OS_GET_TIME();
|
||||
}
|
||||
|
||||
|
||||
void thread_mbox_sync_put_send(void *p1, void *p2, void *p3)
|
||||
{
|
||||
int single_element_buffer = 1234;
|
||||
struct k_mbox_msg tx_msg = {
|
||||
.size = sizeof(int),
|
||||
.info = 5050,
|
||||
.tx_data = &single_element_buffer,
|
||||
.rx_source_thread = K_ANY,
|
||||
.tx_target_thread = K_ANY,
|
||||
};
|
||||
|
||||
mbox_sync_put_start_tsc = OS_GET_TIME();
|
||||
__read_swap_end_tsc_value = 1;
|
||||
k_mbox_put(&benchmark_mbox, &tx_msg, 300);
|
||||
time_check = OS_GET_TIME();
|
||||
}
|
||||
|
||||
void thread_mbox_sync_put_receive(void *p1, void *p2, void *p3)
|
||||
{
|
||||
int single_element_buffer = 1234;
|
||||
struct k_mbox_msg rx_msg = {
|
||||
.size = sizeof(int),
|
||||
.rx_source_thread = K_ANY,
|
||||
.tx_target_thread = K_ANY
|
||||
};
|
||||
|
||||
k_mbox_get(&benchmark_mbox, &rx_msg, &single_element_buffer, 300);
|
||||
}
|
||||
|
||||
void thread_mbox_sync_get_send(void *p1, void *p2, void *p3)
|
||||
{
|
||||
int single_element_buffer = 1234;
|
||||
struct k_mbox_msg tx_msg = {
|
||||
.size = sizeof(int),
|
||||
.info = 5050,
|
||||
.tx_data = &single_element_buffer,
|
||||
.rx_source_thread = K_ANY,
|
||||
.tx_target_thread = K_ANY,
|
||||
};
|
||||
|
||||
k_mbox_put(&benchmark_mbox, &tx_msg, 300);
|
||||
}
|
||||
|
||||
void thread_mbox_sync_get_receive(void *p1, void *p2, void *p3)
|
||||
{
|
||||
int single_element_buffer;
|
||||
struct k_mbox_msg rx_msg = {
|
||||
.size = sizeof(int),
|
||||
.rx_source_thread = K_ANY,
|
||||
.tx_target_thread = K_ANY
|
||||
};
|
||||
|
||||
__read_swap_end_tsc_value = 1;
|
||||
mbox_sync_get_start_tsc = OS_GET_TIME();
|
||||
k_mbox_get(&benchmark_mbox, &rx_msg, &single_element_buffer, 300);
|
||||
}
|
||||
|
||||
void thread_mbox_async_put_send(void *p1, void *p2, void *p3)
|
||||
{
|
||||
int single_element_buffer = 1234;
|
||||
struct k_mbox_msg tx_msg = {
|
||||
.size = sizeof(int),
|
||||
.info = 5050,
|
||||
.tx_data = &single_element_buffer,
|
||||
.rx_source_thread = K_ANY,
|
||||
.tx_target_thread = K_ANY,
|
||||
};
|
||||
|
||||
mbox_async_put_start_tsc = OS_GET_TIME();
|
||||
k_mbox_async_put(&benchmark_mbox, &tx_msg, &mbox_sem);
|
||||
mbox_async_put_end_tsc = OS_GET_TIME();
|
||||
k_mbox_async_put(&benchmark_mbox, &tx_msg, &mbox_sem);
|
||||
}
|
||||
|
||||
void thread_mbox_async_put_receive(void *p1, void *p2, void *p3)
|
||||
{
|
||||
int single_element_buffer;
|
||||
struct k_mbox_msg rx_msg = {
|
||||
.size = sizeof(int),
|
||||
.rx_source_thread = K_ANY,
|
||||
.tx_target_thread = K_ANY
|
||||
};
|
||||
|
||||
k_mbox_get(&benchmark_mbox, &rx_msg, &single_element_buffer, 300);
|
||||
|
||||
}
|
183
tests/benchmarks/timing_info/src/semaphore_bench.c
Normal file
183
tests/benchmarks/timing_info/src/semaphore_bench.c
Normal file
|
@ -0,0 +1,183 @@
|
|||
#include <kernel.h>
|
||||
#include <zephyr.h>
|
||||
#include <tc_util.h>
|
||||
#include <ksched.h>
|
||||
#include "timing_info.h"
|
||||
|
||||
|
||||
extern char sline[];
|
||||
|
||||
K_SEM_DEFINE(sem_bench, 0, 1);
|
||||
K_SEM_DEFINE(sem_bench_1, 0, 1);
|
||||
|
||||
/* To time thread creation*/
|
||||
#define STACK_SIZE 500
|
||||
extern char __noinit __stack my_stack_area[STACK_SIZE];
|
||||
extern char __noinit __stack my_stack_area_0[STACK_SIZE];
|
||||
|
||||
/* u64_t thread_yield_start_tsc[1000]; */
|
||||
/* u64_t thread_yield_end_tsc[1000]; */
|
||||
u64_t thread_start_time;
|
||||
u64_t thread_end_time;
|
||||
u64_t sem_start_time;
|
||||
u64_t sem_end_time;
|
||||
u64_t sem_give_start_time;
|
||||
u64_t sem_give_end_time;
|
||||
|
||||
u32_t swap_called;
|
||||
u64_t test_time2;
|
||||
u64_t test_time1;
|
||||
|
||||
void thread_sem0_test(void *p1, void *p2, void *p3);
|
||||
void thread_sem1_test(void *p1, void *p2, void *p3);
|
||||
void thread_sem0_give_test(void *p1, void *p2, void *p3);
|
||||
void thread_sem1_give_test(void *p1, void *p2, void *p3);
|
||||
|
||||
k_tid_t sem0_tid;
|
||||
k_tid_t sem1_tid;
|
||||
|
||||
extern u64_t __common_var_swap_end_tsc;
|
||||
extern u32_t __read_swap_end_tsc_value;
|
||||
|
||||
void semaphore_bench(void)
|
||||
{
|
||||
|
||||
/* Thread yield*/
|
||||
|
||||
sem0_tid = k_thread_spawn(my_stack_area,
|
||||
STACK_SIZE,
|
||||
thread_sem0_test, NULL, NULL, NULL,
|
||||
2 /*priority*/, 0, 0);
|
||||
sem1_tid = k_thread_spawn(my_stack_area_0,
|
||||
STACK_SIZE, thread_sem1_test,
|
||||
NULL, NULL, NULL,
|
||||
2 /*priority*/, 0, 0);
|
||||
|
||||
k_sleep(1000);
|
||||
|
||||
|
||||
/* u64_t test_time1 = _tsc_read(); */
|
||||
sem_end_time = (__common_var_swap_end_tsc);
|
||||
u32_t sem_cycles = sem_end_time - sem_start_time;
|
||||
|
||||
sem0_tid = k_thread_spawn(my_stack_area,
|
||||
STACK_SIZE, thread_sem0_give_test,
|
||||
NULL, NULL, NULL,
|
||||
2 /*priority*/, 0, 0);
|
||||
sem1_tid = k_thread_spawn(my_stack_area_0,
|
||||
STACK_SIZE, thread_sem1_give_test,
|
||||
NULL, NULL, NULL,
|
||||
2 /*priority*/, 0, 0);
|
||||
|
||||
k_sleep(1000);
|
||||
sem_give_end_time = (__common_var_swap_end_tsc);
|
||||
u32_t sem_give_cycles = sem_give_end_time - sem_give_start_time;
|
||||
|
||||
|
||||
/* Semaphore without context switch*/
|
||||
u32_t sem_give_wo_cxt_start = OS_GET_TIME();
|
||||
|
||||
k_sem_give(&sem_bench);
|
||||
u32_t sem_give_wo_cxt_end = OS_GET_TIME();
|
||||
u32_t sem_give_wo_cxt_cycles = sem_give_wo_cxt_end -
|
||||
sem_give_wo_cxt_start;
|
||||
|
||||
u32_t sem_take_wo_cxt_start = OS_GET_TIME();
|
||||
|
||||
k_sem_take(&sem_bench, 10);
|
||||
u32_t sem_take_wo_cxt_end = OS_GET_TIME();
|
||||
u32_t sem_take_wo_cxt_cycles = sem_take_wo_cxt_end -
|
||||
sem_take_wo_cxt_start;
|
||||
|
||||
/* TC_PRINT("test_time1 , %d cycles\n", (u32_t)test_time1); */
|
||||
/* TC_PRINT("test_time2 , %d cycles\n", (u32_t)test_time2); */
|
||||
|
||||
PRINT_F("Semaphore Take with context switch",
|
||||
sem_cycles, SYS_CLOCK_HW_CYCLES_TO_NS(sem_cycles));
|
||||
PRINT_F("Semaphore Give with context switch",
|
||||
sem_give_cycles, SYS_CLOCK_HW_CYCLES_TO_NS(sem_give_cycles));
|
||||
|
||||
PRINT_F("Semaphore Take without context switch",
|
||||
sem_take_wo_cxt_cycles,
|
||||
SYS_CLOCK_HW_CYCLES_TO_NS(sem_take_wo_cxt_cycles));
|
||||
PRINT_F("Semaphore Give without context switch",
|
||||
sem_give_wo_cxt_cycles,
|
||||
SYS_CLOCK_HW_CYCLES_TO_NS(sem_give_wo_cxt_cycles));
|
||||
|
||||
}
|
||||
/******************************************************************************/
|
||||
K_MUTEX_DEFINE(mutex0);
|
||||
void mutex_bench(void)
|
||||
{
|
||||
u32_t mutex_lock_start_tsc;
|
||||
u32_t mutex_lock_end_tsc;
|
||||
u32_t mutex_lock_diff = 0;
|
||||
|
||||
u32_t mutex_unlock_start_tsc;
|
||||
u32_t mutex_unlock_end_tsc;
|
||||
u32_t mutex_unlock_diff = 0;
|
||||
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
mutex_lock_start_tsc = OS_GET_TIME();
|
||||
k_mutex_lock(&mutex0, 100);
|
||||
mutex_lock_end_tsc = OS_GET_TIME();
|
||||
|
||||
mutex_unlock_start_tsc = OS_GET_TIME();
|
||||
k_mutex_unlock(&mutex0);
|
||||
mutex_unlock_end_tsc = OS_GET_TIME();
|
||||
|
||||
mutex_lock_diff += (mutex_lock_end_tsc - mutex_lock_start_tsc);
|
||||
mutex_unlock_diff += (mutex_unlock_end_tsc -
|
||||
mutex_unlock_start_tsc);
|
||||
}
|
||||
|
||||
PRINT_F("Mutex lock", mutex_lock_diff / 1000,
|
||||
SYS_CLOCK_HW_CYCLES_TO_NS(mutex_lock_diff / 1000));
|
||||
|
||||
PRINT_F("Mutex unlock", mutex_unlock_diff / 1000,
|
||||
SYS_CLOCK_HW_CYCLES_TO_NS(mutex_unlock_diff / 1000));
|
||||
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
void thread_sem1_test(void *p1, void *p2, void *p3)
|
||||
{
|
||||
|
||||
k_sem_give(&sem_bench); /* sync the 2 threads*/
|
||||
|
||||
__read_swap_end_tsc_value = 1;
|
||||
sem_start_time = OS_GET_TIME();
|
||||
k_sem_take(&sem_bench, 10);
|
||||
}
|
||||
|
||||
u32_t sem_count;
|
||||
void thread_sem0_test(void *p1, void *p2, void *p3)
|
||||
{
|
||||
k_sem_take(&sem_bench, 10);/* To sync threads */
|
||||
|
||||
k_sem_give(&sem_bench);
|
||||
sem_count++;
|
||||
k_thread_abort(sem0_tid);
|
||||
}
|
||||
/******************************************************************************/
|
||||
void thread_sem1_give_test(void *p1, void *p2, void *p3)
|
||||
{
|
||||
k_sem_give(&sem_bench); /* sync the 2 threads*/
|
||||
|
||||
k_sem_take(&sem_bench_1, 1000); /* clear the previous sem_give*/
|
||||
/* test_time1 = OS_GET_TIME(); */
|
||||
}
|
||||
|
||||
void thread_sem0_give_test(void *p1, void *p2, void *p3)
|
||||
{
|
||||
k_sem_take(&sem_bench, 10);/* To sync threads */
|
||||
/* test_time2 = OS_GET_TIME(); */
|
||||
|
||||
/* To make sure that the sem give will cause a swap to occur */
|
||||
k_thread_priority_set(sem1_tid, 1);
|
||||
|
||||
__read_swap_end_tsc_value = 1;
|
||||
sem_give_start_time = OS_GET_TIME();
|
||||
k_sem_give(&sem_bench_1);
|
||||
|
||||
}
|
347
tests/benchmarks/timing_info/src/thread_bench.c
Normal file
347
tests/benchmarks/timing_info/src/thread_bench.c
Normal file
|
@ -0,0 +1,347 @@
|
|||
/*
|
||||
* Copyright (c) 2013-2015 Wind River Systems, Inc.
|
||||
* Copyright (c) 2016 Intel Corporation.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file
|
||||
* @brief Measure time
|
||||
*
|
||||
*/
|
||||
#include <kernel.h>
|
||||
#include <zephyr.h>
|
||||
#include <tc_util.h>
|
||||
#include <ksched.h>
|
||||
#include "timing_info.h"
|
||||
|
||||
char sline[256];
|
||||
/* FILE *output_file = stdout; */
|
||||
|
||||
/* location of the time stamps*/
|
||||
u32_t __read_swap_end_tsc_value;
|
||||
u64_t __common_var_swap_end_tsc;
|
||||
|
||||
volatile u64_t thread_abort_end_tsc;
|
||||
volatile u64_t thread_abort_start_tsc;
|
||||
|
||||
/* Thread suspend*/
|
||||
volatile u64_t thread_suspend_start_tsc;
|
||||
volatile u64_t thread_suspend_end_tsc;
|
||||
|
||||
/* Thread resume*/
|
||||
volatile u64_t thread_resume_start_tsc;
|
||||
volatile u64_t thread_resume_end_tsc;
|
||||
|
||||
/* Thread sleep*/
|
||||
volatile u64_t thread_sleep_start_tsc;
|
||||
volatile u64_t thread_sleep_end_tsc;
|
||||
|
||||
/*For benchmarking msg queues*/
|
||||
k_tid_t producer_tid;
|
||||
k_tid_t consumer_tid;
|
||||
|
||||
/* To time thread creation*/
|
||||
#define STACK_SIZE 500
|
||||
char __noinit __stack my_stack_area[STACK_SIZE];
|
||||
char __noinit __stack my_stack_area_0[STACK_SIZE];
|
||||
|
||||
u32_t __read_swap_end_tsc_value_test = 1;
|
||||
u64_t dummy_time;
|
||||
u64_t start_time;
|
||||
u64_t test_end_time;
|
||||
|
||||
#if CONFIG_X86
|
||||
u32_t benchmarking_overhead_swap(void)
|
||||
{
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"pushl %eax\n\t"
|
||||
"pushl %edx\n\t"
|
||||
"rdtsc\n\t"
|
||||
"mov %eax,start_time\n\t"
|
||||
"mov %edx,start_time+4\n\t"
|
||||
"cmp $0x1,__read_swap_end_tsc_value_test\n\t"
|
||||
"jne time_read_not_needed_test\n\t"
|
||||
"movw $0x2,__read_swap_end_tsc_value\n\t"
|
||||
"pushl %eax\n\t"
|
||||
"pushl %edx\n\t"
|
||||
"rdtsc\n\t"
|
||||
"mov %eax,dummy_time\n\t"
|
||||
"mov %edx,dummy_time+4\n\t"
|
||||
"pop %edx\n\t"
|
||||
"pop %eax\n\t"
|
||||
"time_read_not_needed_test:\n\t"
|
||||
"rdtsc\n\t"
|
||||
"mov %eax,test_end_time\n\t"
|
||||
"mov %edx,test_end_time+4\n\t"
|
||||
"pop %edx\n\t"
|
||||
"pop %eax\n\t");
|
||||
|
||||
/* u64_t end_time = OS_GET_TIME(); */
|
||||
return(test_end_time - start_time);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if CONFIG_ARM
|
||||
void read_systick_start_of_swap(void)
|
||||
{
|
||||
__start_swap_tsc = (u32_t)SysTick->VAL;
|
||||
}
|
||||
|
||||
void read_systick_end_of_swap(void)
|
||||
{
|
||||
if (__read_swap_end_tsc_value == 1) {
|
||||
__read_swap_end_tsc_value = 2;
|
||||
__common_var_swap_end_tsc = OS_GET_TIME();
|
||||
}
|
||||
}
|
||||
|
||||
void read_systick_start_of_isr(void)
|
||||
{
|
||||
__start_intr_tsc = (u32_t)SysTick->VAL;
|
||||
}
|
||||
|
||||
void read_systick_end_of_isr(void)
|
||||
{
|
||||
__end_intr_tsc = (u32_t)SysTick->VAL;
|
||||
}
|
||||
|
||||
void read_systick_start_of_tick_handler(void)
|
||||
{
|
||||
__start_tick_tsc = (u32_t)SysTick->VAL;
|
||||
}
|
||||
|
||||
void read_systick_end_of_tick_handler(void)
|
||||
{
|
||||
__end_tick_tsc = (u32_t)SysTick->VAL;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
void test_thread_entry(void *p, void *p1, void *p2)
|
||||
{
|
||||
static int i;
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
|
||||
void thread_swap_test(void *p1, void *p2, void *p3)
|
||||
{
|
||||
__read_swap_end_tsc_value = 1;
|
||||
thread_abort_start_tsc = OS_GET_TIME();
|
||||
k_thread_abort(_current);
|
||||
}
|
||||
|
||||
void thread_suspend_test(void *p1, void *p2, void *p3);
|
||||
void yield_bench(void);
|
||||
void heap_malloc_free_bench(void);
|
||||
void main_sem_bench(void);
|
||||
void main_mutex_bench(void);
|
||||
void main_msg_bench(void);
|
||||
|
||||
void system_thread_bench(void)
|
||||
{
|
||||
u64_t total_intr_time;
|
||||
u64_t total_tick_time;
|
||||
|
||||
/*Thread create*/
|
||||
u64_t thread_create_start_tsc;
|
||||
u64_t thread_create_end_tsc;
|
||||
|
||||
DECLARE_VAR(thread, create)
|
||||
|
||||
/*Thread cancel*/
|
||||
u64_t thread_cancel_start_tsc;
|
||||
u64_t thread_cancel_end_tsc;
|
||||
DECLARE_VAR(thread, cancel)
|
||||
|
||||
/* Thread Abort*/
|
||||
DECLARE_VAR(thread, abort)
|
||||
|
||||
/* Thread Suspend*/
|
||||
DECLARE_VAR(thread, suspend)
|
||||
|
||||
/* Thread Resume*/
|
||||
DECLARE_VAR(thread, resume)
|
||||
|
||||
|
||||
/* to measure context switch time */
|
||||
k_thread_spawn(my_stack_area_0,
|
||||
STACK_SIZE,
|
||||
thread_swap_test,
|
||||
NULL, NULL, NULL,
|
||||
-1 /*priority*/, 0, 0);
|
||||
|
||||
thread_abort_end_tsc = (__common_var_swap_end_tsc);
|
||||
__end_swap_tsc = __common_var_swap_end_tsc;
|
||||
|
||||
|
||||
u32_t total_swap_cycles = __end_swap_tsc -
|
||||
SUBTRACT_CLOCK_CYCLES(__start_swap_tsc);
|
||||
|
||||
/* Interrupt latency*/
|
||||
total_intr_time = SYS_CLOCK_HW_CYCLES_TO_NS(__end_intr_tsc -
|
||||
__start_intr_tsc);
|
||||
|
||||
/* tick overhead*/
|
||||
total_tick_time = SYS_CLOCK_HW_CYCLES_TO_NS(__end_tick_tsc -
|
||||
__start_tick_tsc);
|
||||
|
||||
/*******************************************************************/
|
||||
/* thread create*/
|
||||
thread_create_start_tsc = OS_GET_TIME();
|
||||
|
||||
k_tid_t my_tid = k_thread_spawn(my_stack_area,
|
||||
STACK_SIZE,
|
||||
thread_swap_test,
|
||||
NULL, NULL, NULL,
|
||||
5 /*priority*/, 0, 10);
|
||||
|
||||
thread_create_end_tsc = OS_GET_TIME();
|
||||
|
||||
/* thread Termination*/
|
||||
thread_cancel_start_tsc = OS_GET_TIME();
|
||||
s32_t ret_value_thread_cancel = k_thread_cancel(my_tid);
|
||||
|
||||
thread_cancel_end_tsc = OS_GET_TIME();
|
||||
ARG_UNUSED(ret_value_thread_cancel);
|
||||
|
||||
/* Thread suspend*/
|
||||
k_tid_t sus_res_tid = k_thread_spawn(my_stack_area,
|
||||
STACK_SIZE,
|
||||
thread_suspend_test,
|
||||
NULL, NULL, NULL,
|
||||
-1 /*priority*/, 0, 0);
|
||||
|
||||
thread_suspend_end_tsc = OS_GET_TIME();
|
||||
/* At this point test for resume*/
|
||||
k_thread_resume(sus_res_tid);
|
||||
|
||||
/* calculation for creation */
|
||||
CALCULATE_TIME(, thread, create)
|
||||
|
||||
/* calculation for cancel */
|
||||
CALCULATE_TIME(, thread, cancel)
|
||||
|
||||
/* calculation for abort */
|
||||
CALCULATE_TIME(, thread, abort)
|
||||
|
||||
/* calculation for suspend */
|
||||
CALCULATE_TIME(, thread, suspend)
|
||||
|
||||
/* calculation for resume */
|
||||
thread_resume_start_tsc = thread_suspend_end_tsc;
|
||||
CALCULATE_TIME(, thread, resume)
|
||||
|
||||
/*******************************************************************/
|
||||
|
||||
/* Only print lower 32bit of time result */
|
||||
|
||||
PRINT_F("Context switch",
|
||||
(u32_t)(total_swap_cycles & 0xFFFFFFFFULL),
|
||||
(u32_t)SYS_CLOCK_HW_CYCLES_TO_NS(total_swap_cycles));
|
||||
|
||||
/*TC_PRINT("Swap Overhead:%d cycles\n", benchmarking_overhead_swap());*/
|
||||
|
||||
/*Interrupt latency */
|
||||
u32_t intr_latency_cycles = SUBTRACT_CLOCK_CYCLES(__end_intr_tsc) -
|
||||
SUBTRACT_CLOCK_CYCLES(__start_intr_tsc);
|
||||
|
||||
PRINT_F("Interrupt latency",
|
||||
(u32_t)(intr_latency_cycles),
|
||||
(u32_t) (SYS_CLOCK_HW_CYCLES_TO_NS(intr_latency_cycles)));
|
||||
|
||||
/*tick overhead*/
|
||||
u32_t tick_overhead_cycles = SUBTRACT_CLOCK_CYCLES(__end_tick_tsc) -
|
||||
SUBTRACT_CLOCK_CYCLES(__start_tick_tsc);
|
||||
PRINT_F("Tick overhead",
|
||||
(u32_t)(tick_overhead_cycles),
|
||||
(u32_t) (SYS_CLOCK_HW_CYCLES_TO_NS(tick_overhead_cycles)));
|
||||
|
||||
/*thread creation*/
|
||||
PRINT_F("Thread Creation",
|
||||
(u32_t)((thread_create_end_tsc - thread_create_start_tsc) &
|
||||
0xFFFFFFFFULL),
|
||||
(u32_t) (total_thread_create_time & 0xFFFFFFFFULL));
|
||||
|
||||
/*thread cancel*/
|
||||
PRINT_F("Thread cancel",
|
||||
(u32_t)((thread_cancel_end_tsc - thread_cancel_start_tsc) &
|
||||
0xFFFFFFFFULL),
|
||||
(u32_t) (total_thread_cancel_time & 0xFFFFFFFFULL));
|
||||
|
||||
/*thread abort*/
|
||||
PRINT_F("Thread abort",
|
||||
(u32_t)((thread_abort_end_tsc - thread_abort_start_tsc) &
|
||||
0xFFFFFFFFULL),
|
||||
(u32_t) (total_thread_abort_time & 0xFFFFFFFFULL));
|
||||
|
||||
/*thread suspend*/
|
||||
PRINT_F("Thread Suspend",
|
||||
(u32_t)((thread_suspend_end_tsc - thread_suspend_start_tsc) &
|
||||
0xFFFFFFFFULL),
|
||||
(u32_t) (total_thread_suspend_time & 0xFFFFFFFFULL));
|
||||
|
||||
/*thread resume*/
|
||||
PRINT_F("Thread Resume",
|
||||
(u32_t)((thread_resume_end_tsc - thread_suspend_end_tsc)
|
||||
& 0xFFFFFFFFULL),
|
||||
(u32_t) (total_thread_resume_time & 0xFFFFFFFFULL));
|
||||
|
||||
|
||||
}
|
||||
|
||||
void thread_suspend_test(void *p1, void *p2, void *p3)
|
||||
{
|
||||
thread_suspend_start_tsc = OS_GET_TIME();
|
||||
k_thread_suspend(_current);
|
||||
|
||||
/* comes to this line once its resumed*/
|
||||
thread_resume_end_tsc = OS_GET_TIME();
|
||||
|
||||
/* k_thread_suspend(_current); */
|
||||
}
|
||||
|
||||
void heap_malloc_free_bench(void)
|
||||
{
|
||||
/* heap malloc*/
|
||||
u64_t heap_malloc_start_tsc = 0;
|
||||
u64_t heap_malloc_end_tsc = 0;
|
||||
|
||||
/* heap free*/
|
||||
u64_t heap_free_start_tsc = 0;
|
||||
u64_t heap_free_end_tsc = 0;
|
||||
|
||||
s32_t count = 0;
|
||||
u32_t sum_malloc = 0;
|
||||
u32_t sum_free = 0;
|
||||
|
||||
while (count++ != 100) {
|
||||
heap_malloc_start_tsc = OS_GET_TIME();
|
||||
void *allocated_mem = k_malloc(10);
|
||||
|
||||
heap_malloc_end_tsc = OS_GET_TIME();
|
||||
if (allocated_mem == NULL) {
|
||||
TC_PRINT("\n Malloc failed at count %d\n", count);
|
||||
break;
|
||||
}
|
||||
heap_free_start_tsc = OS_GET_TIME();
|
||||
k_free(allocated_mem);
|
||||
heap_free_end_tsc = OS_GET_TIME();
|
||||
sum_malloc += heap_malloc_end_tsc - heap_malloc_start_tsc;
|
||||
sum_free += heap_free_end_tsc - heap_free_start_tsc;
|
||||
}
|
||||
|
||||
PRINT_F("Heap Malloc",
|
||||
(u32_t)((sum_malloc / count) & 0xFFFFFFFFULL),
|
||||
(u32_t)(SYS_CLOCK_HW_CYCLES_TO_NS(sum_malloc / count)));
|
||||
PRINT_F("Heap Free",
|
||||
(u32_t)((sum_free / count) & 0xFFFFFFFFULL),
|
||||
(u32_t)(SYS_CLOCK_HW_CYCLES_TO_NS(sum_free / count)));
|
||||
|
||||
}
|
95
tests/benchmarks/timing_info/src/timing_info.h
Normal file
95
tests/benchmarks/timing_info/src/timing_info.h
Normal file
|
@ -0,0 +1,95 @@
|
|||
#include <timestamp.h>
|
||||
|
||||
|
||||
#define CALCULATE_TIME(special_char, profile, name) \
|
||||
{ \
|
||||
total_##profile##_##name##_time = SYS_CLOCK_HW_CYCLES_TO_NS( \
|
||||
special_char##profile##_##name##_end_tsc - \
|
||||
special_char##profile##_##name##_start_tsc); \
|
||||
}
|
||||
|
||||
/*total_##profile##_##name##_time =
|
||||
* profile##_##name##_end_us - profile##_##name##_start_us;
|
||||
*/
|
||||
|
||||
#define DECLARE_VAR(profile, name) \
|
||||
u64_t total_##profile##_##name##_time;
|
||||
|
||||
extern u64_t __start_swap_tsc;
|
||||
extern u64_t __end_swap_tsc;
|
||||
extern u64_t __start_intr_tsc;
|
||||
extern u64_t __end_intr_tsc;
|
||||
extern u64_t __start_tick_tsc;
|
||||
extern u64_t __end_tick_tsc;
|
||||
|
||||
|
||||
|
||||
/* Function prototypes */
|
||||
void system_thread_bench(void);
|
||||
void yield_bench(void);
|
||||
void heap_malloc_free_bench(void);
|
||||
void semaphore_bench(void);
|
||||
void mutex_bench(void);
|
||||
void msg_passing_bench(void);
|
||||
|
||||
/* PRINT_F
|
||||
* Macro to print a formatted output string. fprintf is used when
|
||||
* Assumed that sline character array of SLINE_LEN + 1 characters
|
||||
* is defined in the main file
|
||||
*/
|
||||
|
||||
/* #define CSV_FORMAT_OUTPUT */
|
||||
/* printf format defines. */
|
||||
#ifdef CSV_FORMAT_OUTPUT
|
||||
#define FORMAT "%-45s,%4u,%5u\n"
|
||||
#else
|
||||
#define FORMAT "%-45s:%4u cycles , %5u ns\n"
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
|
||||
#define GET_2ND_ARG(first, second, ...) (second)
|
||||
#define GET_3ND_ARG(first, second, third, ...) (third)
|
||||
|
||||
/* Enable this macro to print all the measurements.
|
||||
* Note: Some measurements in few architectures are not valid
|
||||
*/
|
||||
/* #define PRINT_ALL_MEASUREMENTS */
|
||||
#ifndef PRINT_ALL_MEASUREMENTS
|
||||
/*If the measured cycles is greater than 10000 then one of the following is
|
||||
* possible.
|
||||
* 1. the selected measurement is not supported in the architecture
|
||||
* 2. The measurement went wrong somewhere.(less likely to happen)
|
||||
*/
|
||||
#define PRINT_F(...) \
|
||||
{ \
|
||||
if ((GET_2ND_ARG(__VA_ARGS__) <= 20000) && \
|
||||
(GET_2ND_ARG(__VA_ARGS__) != 0)) { \
|
||||
snprintf(sline, 254, FORMAT, ##__VA_ARGS__); \
|
||||
TC_PRINT("%s", sline); \
|
||||
} \
|
||||
}
|
||||
#else
|
||||
/* Prints all outputs*/
|
||||
#define PRINT_F(...) \
|
||||
{ \
|
||||
snprintf(sline, 254, FORMAT, ##__VA_ARGS__); \
|
||||
TC_PRINT("%s", sline); \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* If we are using x86 based controller we tend to read the tsc value which is
|
||||
* always incrementing i.e count up counter.
|
||||
* If we are using the ARM based controllers the systick is a
|
||||
* count down counter.
|
||||
* Hence to calculate the cycles taken up by the code we need to adjust the
|
||||
* values accordingly.
|
||||
*
|
||||
* NOTE: Needed only when reading value from end of swap operation
|
||||
*/
|
||||
#if CONFIG_ARM
|
||||
#define SUBTRACT_CLOCK_CYCLES(val) (SysTick->LOAD - (u32_t)val)
|
||||
#else
|
||||
#define SUBTRACT_CLOCK_CYCLES(val) (val)
|
||||
#endif
|
85
tests/benchmarks/timing_info/src/yield_bench.c
Normal file
85
tests/benchmarks/timing_info/src/yield_bench.c
Normal file
|
@ -0,0 +1,85 @@
|
|||
#include <kernel.h>
|
||||
#include <zephyr.h>
|
||||
#include <tc_util.h>
|
||||
#include <ksched.h>
|
||||
#include "timing_info.h"
|
||||
|
||||
K_SEM_DEFINE(yield_sem, 0, 1);
|
||||
|
||||
/* To time thread creation*/
|
||||
#define STACK_SIZE 500
|
||||
extern char __noinit __stack my_stack_area[STACK_SIZE];
|
||||
extern char __noinit __stack my_stack_area_0[STACK_SIZE];
|
||||
|
||||
/* u64_t thread_yield_start_tsc[1000]; */
|
||||
/* u64_t thread_yield_end_tsc[1000]; */
|
||||
/* location of the time stamps*/
|
||||
extern u32_t __read_swap_end_tsc_value;
|
||||
extern u64_t __common_var_swap_end_tsc;
|
||||
extern char sline[];
|
||||
|
||||
u64_t thread_sleep_start_tsc;
|
||||
u64_t thread_sleep_end_tsc;
|
||||
u64_t thread_start_time;
|
||||
u64_t thread_end_time;
|
||||
static u32_t count;
|
||||
|
||||
void thread_yield0_test(void *p1, void *p2, void *p3);
|
||||
void thread_yield1_test(void *p1, void *p2, void *p3);
|
||||
|
||||
k_tid_t yield0_tid;
|
||||
k_tid_t yield1_tid;
|
||||
void yield_bench(void)
|
||||
{
|
||||
|
||||
/* Thread yield*/
|
||||
|
||||
yield0_tid = k_thread_spawn(my_stack_area,
|
||||
STACK_SIZE,
|
||||
thread_yield0_test,
|
||||
NULL, NULL, NULL,
|
||||
0 /*priority*/, 0, 0);
|
||||
|
||||
yield1_tid = k_thread_spawn(my_stack_area_0,
|
||||
STACK_SIZE,
|
||||
thread_yield1_test,
|
||||
NULL, NULL, NULL,
|
||||
0 /*priority*/, 0, 0);
|
||||
|
||||
/*read the time of start of the sleep till the swap happens */
|
||||
__read_swap_end_tsc_value = 1;
|
||||
|
||||
thread_sleep_start_tsc = OS_GET_TIME();
|
||||
k_sleep(1000);
|
||||
thread_sleep_end_tsc = ((u32_t)__common_var_swap_end_tsc);
|
||||
|
||||
u32_t yield_cycles = (thread_end_time - thread_start_time) / 2000;
|
||||
u32_t sleep_cycles = thread_sleep_end_tsc - thread_sleep_start_tsc;
|
||||
|
||||
PRINT_F("Thread Yield", yield_cycles,
|
||||
SYS_CLOCK_HW_CYCLES_TO_NS(yield_cycles));
|
||||
PRINT_F("Thread Sleep", sleep_cycles,
|
||||
SYS_CLOCK_HW_CYCLES_TO_NS(sleep_cycles));
|
||||
|
||||
}
|
||||
|
||||
|
||||
void thread_yield0_test(void *p1, void *p2, void *p3)
|
||||
{
|
||||
k_sem_take(&yield_sem, 10);
|
||||
thread_start_time = OS_GET_TIME();
|
||||
while (count != 1000) {
|
||||
count++;
|
||||
k_yield();
|
||||
}
|
||||
thread_end_time = OS_GET_TIME();
|
||||
k_thread_abort(yield1_tid);
|
||||
}
|
||||
|
||||
void thread_yield1_test(void *p1, void *p2, void *p3)
|
||||
{
|
||||
k_sem_give(&yield_sem);
|
||||
while (1) {
|
||||
k_yield();
|
||||
}
|
||||
}
|
3
tests/benchmarks/timing_info/testcase.ini
Normal file
3
tests/benchmarks/timing_info/testcase.ini
Normal file
|
@ -0,0 +1,3 @@
|
|||
[test]
|
||||
tags = benchmark
|
||||
arch_whitelist = x86 arm
|
Loading…
Add table
Add a link
Reference in a new issue