test: benchmarking: Timing metrics for the kernel

JIRA: ZEP-1822, ZEP-1823, ZEP-1825

Signed-off-by: Adithya Baglody <adithya.nagaraj.baglody@intel.com>
This commit is contained in:
Adithya Baglody 2017-05-03 13:11:51 +05:30 committed by Anas Nashif
commit d03b2496cd
19 changed files with 1486 additions and 1 deletions

View file

@ -43,6 +43,10 @@ SECTION_FUNC(TEXT, _isr_wrapper)
push {lr} /* lr is now the first item on the stack */ push {lr} /* lr is now the first item on the stack */
#ifdef CONFIG_EXECUTION_BENCHMARKING
bl read_systick_start_of_isr
#endif
#ifdef CONFIG_KERNEL_EVENT_LOGGER_INTERRUPT #ifdef CONFIG_KERNEL_EVENT_LOGGER_INTERRUPT
bl _sys_k_event_logger_interrupt bl _sys_k_event_logger_interrupt
#endif #endif
@ -105,6 +109,20 @@ _idle_state_cleared:
* in thumb mode */ * in thumb mode */
ldm r1!,{r0,r3} /* arg in r0, ISR in r3 */ ldm r1!,{r0,r3} /* arg in r0, ISR in r3 */
#ifdef CONFIG_EXECUTION_BENCHMARKING
#if defined(CONFIG_ARMV6_M)
push {r3}
#endif
push {lr}
bl read_systick_end_of_isr
#if defined(CONFIG_ARMV6_M)
pop {r3}
mov lr,r3
pop {r3}
#else
pop {lr}
#endif
#endif
blx r3 /* call ISR */ blx r3 /* call ISR */
#if defined(CONFIG_ARMV6_M) #if defined(CONFIG_ARMV6_M)

View file

@ -177,6 +177,23 @@ _thread_irq_disabled:
msr PSP, ip msr PSP, ip
#ifdef CONFIG_EXECUTION_BENCHMARKING
#if defined(CONFIG_ARMV6_M)
push {r3}
#endif
push {lr}
bl read_systick_end_of_swap
#if defined(CONFIG_ARMV6_M)
pop {r3}
mov lr,r3
pop {r3}
#else
pop {lr}
#endif
#endif
/* exc return */ /* exc return */
bx lr bx lr
@ -292,6 +309,20 @@ _oops:
SECTION_FUNC(TEXT, __swap) SECTION_FUNC(TEXT, __swap)
#ifdef CONFIG_EXECUTION_BENCHMARKING
#if defined(CONFIG_ARMV6_M)
push {r3}
#endif
push {lr}
bl read_systick_start_of_swap
#if defined(CONFIG_ARMV6_M)
pop {r3}
mov lr,r3
pop {r3}
#else
pop {lr}
#endif
#endif
ldr r1, =_kernel ldr r1, =_kernel
ldr r2, [r1, #_kernel_offset_to_current] ldr r2, [r1, #_kernel_offset_to_current]
str r0, [r2, #_thread_offset_to_basepri] str r0, [r2, #_thread_offset_to_basepri]

View file

@ -78,6 +78,15 @@
*/ */
SECTION_FUNC(TEXT, _interrupt_enter) SECTION_FUNC(TEXT, _interrupt_enter)
#ifdef CONFIG_EXECUTION_BENCHMARKING
pushl %eax
pushl %edx
rdtsc
mov %eax, __start_intr_tsc
mov %edx, __start_intr_tsc+4
pop %edx
pop %eax
#endif
/* /*
* The gen_idt tool creates an interrupt-gate descriptor for * The gen_idt tool creates an interrupt-gate descriptor for
* all connections. The processor will automatically clear the IF * all connections. The processor will automatically clear the IF
@ -246,6 +255,19 @@ alreadyOnIntStack:
*/ */
push %eax push %eax
#endif #endif
#ifdef CONFIG_EXECUTION_BENCHMARKING
/* Save the eax and edx registers before reading the time stamp
* once done pop the values
*/
pushl %eax
pushl %edx
rdtsc
mov %eax,__end_intr_tsc
mov %edx,__end_intr_tsc+4
pop %edx
pop %eax
#endif
#ifdef CONFIG_NESTED_INTERRUPTS #ifdef CONFIG_NESTED_INTERRUPTS
sti /* re-enable interrupts */ sti /* re-enable interrupts */
#endif #endif

View file

@ -77,7 +77,28 @@
* *
*/ */
.macro read_tsc var_name
push %eax
push %edx
rdtsc
mov %eax,\var_name
mov %edx,\var_name+4
pop %edx
pop %eax
.endm
SECTION_FUNC(TEXT, __swap) SECTION_FUNC(TEXT, __swap)
#ifdef CONFIG_EXECUTION_BENCHMARKING
/* Save the eax and edx registers before reading the time stamp
* once done pop the values.
*/
push %eax
push %edx
rdtsc
mov %eax,__start_swap_tsc
mov %edx,__start_swap_tsc+4
pop %edx
pop %eax
#endif
#ifdef CONFIG_X86_IAMCU #ifdef CONFIG_X86_IAMCU
/* save EFLAGS on stack right before return address, just as SYSV would /* save EFLAGS on stack right before return address, just as SYSV would
* have done * have done
@ -335,5 +356,16 @@ skipIntLatencyStop:
*/ */
popl %edx popl %edx
movl %edx, (%esp) movl %edx, (%esp)
#endif
#ifdef CONFIG_EXECUTION_BENCHMARKING
/* Save the eax and edx registers before reading the time stamp
* once done pop the values.
*/
cmp $0x1,__read_swap_end_tsc_value
jne time_read_not_needed
movw $0x2,__read_swap_end_tsc_value
read_tsc __common_var_swap_end_tsc
time_read_not_needed:
#endif #endif
ret ret

View file

@ -211,6 +211,11 @@ void _timer_int_handler(void *unused)
{ {
ARG_UNUSED(unused); ARG_UNUSED(unused);
#ifdef CONFIG_EXECUTION_BENCHMARKING
extern void read_systick_start_of_tick_handler(void);
read_systick_start_of_tick_handler();
#endif
#ifdef CONFIG_KERNEL_EVENT_LOGGER_INTERRUPT #ifdef CONFIG_KERNEL_EVENT_LOGGER_INTERRUPT
extern void _sys_k_event_logger_interrupt(void); extern void _sys_k_event_logger_interrupt(void);
_sys_k_event_logger_interrupt(); _sys_k_event_logger_interrupt();
@ -337,6 +342,11 @@ void _timer_int_handler(void *unused)
#endif /* CONFIG_SYS_POWER_MANAGEMENT */ #endif /* CONFIG_SYS_POWER_MANAGEMENT */
#ifdef CONFIG_EXECUTION_BENCHMARKING
extern void read_systick_end_of_tick_handler(void);
read_systick_end_of_tick_handler();
#endif
extern void _ExcExit(void); extern void _ExcExit(void);
_ExcExit(); _ExcExit();
} }

View file

@ -252,6 +252,16 @@ static inline void program_max_cycles(void)
void _timer_int_handler(void *unused /* parameter is not used */ void _timer_int_handler(void *unused /* parameter is not used */
) )
{ {
#ifdef CONFIG_EXECUTION_BENCHMARKING
__asm__ __volatile__ (
"pushl %eax\n\t"
"pushl %edx\n\t"
"rdtsc\n\t"
"mov %eax, __start_tick_tsc\n\t"
"mov %edx, __start_tick_tsc+4\n\t"
"pop %edx\n\t"
"pop %eax\n\t");
#endif
ARG_UNUSED(unused); ARG_UNUSED(unused);
#if defined(CONFIG_TICKLESS_KERNEL) #if defined(CONFIG_TICKLESS_KERNEL)
@ -324,7 +334,16 @@ void _timer_int_handler(void *unused /* parameter is not used */
_sys_clock_tick_announce(); _sys_clock_tick_announce();
#endif /*CONFIG_TICKLESS_IDLE*/ #endif /*CONFIG_TICKLESS_IDLE*/
#endif #endif
#ifdef CONFIG_EXECUTION_BENCHMARKING
__asm__ __volatile__ (
"pushl %eax\n\t"
"pushl %edx\n\t"
"rdtsc\n\t"
"mov %eax, __end_tick_tsc\n\t"
"mov %edx, __end_tick_tsc+4\n\t"
"pop %edx\n\t"
"pop %eax\n\t");
#endif /* CONFIG_EXECUTION_BENCHMARKING */
} }
#ifdef CONFIG_TICKLESS_KERNEL #ifdef CONFIG_TICKLESS_KERNEL

View file

@ -204,6 +204,16 @@ config INT_LATENCY_BENCHMARK
The metrics are displayed (and a new sampling interval is started) The metrics are displayed (and a new sampling interval is started)
each time int_latency_show() is called thereafter. each time int_latency_show() is called thereafter.
config EXECUTION_BENCHMARKING
bool
prompt "Timing metrics "
default n
help
This option enables the tracking of various times inside the kernel
the exact set of metrics being tracked is board-dependent.
All timing measurements are enabled for X86 and ARM based architectures.
In other architectures only a subset are enabled.
config THREAD_MONITOR config THREAD_MONITOR
bool bool
prompt "Thread monitoring [EXPERIMENTAL]" prompt "Thread monitoring [EXPERIMENTAL]"

View file

@ -56,6 +56,14 @@ u64_t __noinit __main_tsc; /* timestamp when main task starts */
u64_t __noinit __idle_tsc; /* timestamp when CPU goes idle */ u64_t __noinit __idle_tsc; /* timestamp when CPU goes idle */
#endif #endif
#ifdef CONFIG_EXECUTION_BENCHMARKING
u64_t __noinit __start_swap_tsc;
u64_t __noinit __end_swap_tsc;
u64_t __noinit __start_intr_tsc;
u64_t __noinit __end_intr_tsc;
u64_t __noinit __start_tick_tsc;
u64_t __noinit __end_tick_tsc;
#endif
/* init/main and idle threads */ /* init/main and idle threads */
#define IDLE_STACK_SIZE CONFIG_IDLE_STACK_SIZE #define IDLE_STACK_SIZE CONFIG_IDLE_STACK_SIZE

View file

@ -0,0 +1,4 @@
BOARD ?= qemu_x86
CONF_FILE = prj.conf
include ${ZEPHYR_BASE}/Makefile.inc

View file

@ -0,0 +1,111 @@
Title: Timing Information
Description:
Timing measurements for the following features of the OS.
1. Context switch
Time taken to compete the context switch, i.e time spent in _Swap function.
2. Interrupt latency
Time taken from the start of the common interrupt handler till the
actual ISR handler being called.
3. Tick overhead
Time spent by the cpu in the tick handler.
4. Thread Creation
Time spent in creating a thread.
5. Thread cancel
Time taken to cancel the thread which is not yet started execution.
So the time taken to complete the function call is measured.
6. Thread abort
Time taken to abort the thread which has already started execution.
So the time measured is from the start of the function call until the
another thread is swapped in.
7. Thread Suspend
The time measured is from the start of the function call until the current
thread is suspended and another thread is swapped in.
8. Thread Resume
The time measured is from the start of the function call until the required
thread is resumed by swap operation.
9. Thread Yield
The time measured is from the start of the function call until the higher priority
thread is swapped in.
10. Thread Sleep
The time measured is from the start of the function call until the current
thread is put on the timeout queue and another thread is swapped in.
11. Heap Malloc
The time to allocate heap memory in fixed size chunks. Continuously allocate
the memory from the pool. Average time taken to complete the function call
is measured.
12. Heap Free
Time to free heap memory in fixed size chunks. Continuously free
the memory that was allocated. Average time taken to complete the function call
is measured.
13. Semaphore Take with context switch
Taking a semaphore causes a thread waiting on the semaphore to be swapped in.
Thus Time measured is the time taken from the function call till the waiting
thread is swapped in.
14. Semaphore Give with context switch
Giving a semaphore causes a thread waiting on the semaphore to be swapped
in (higher priority).
Thus Time measured is the time taken from the function call till the waiting
thread is swapped in.
15. Semaphore Take without context switch
Time taken to take the semaphore. Thus time to complete the function
call is measured.
16. Semaphore Give without context switch
Time taken to give the semaphore. Thus time to complete the function
call is measured.
17. Mutex lock
Time taken to lock the mutex. Thus time to complete the function
call is measured.
18. Mutex unlock
Time taken to unlock the mutex. Thus time to complete the function
call is measured.
19. Message Queue Put with context switch
A thread is waiting for a message to arrive. The time taken from the start
of the function call till the waiting thread is swapped in is measured.
20. Message Queue Put without context switch
The time taken to complete the function call is measured.
21. Message Queue get with context switch
A thread has gone into waiting because the message queue is full.
When a get occurs this thread gets free to execute. The time taken from
the start of the function call till the waiting thread is
swapped in is measured.
22. Message Queue get without context switch
The time taken to complete the function call is measured.
23. MailBox synchronous put
The time taken from the start of the function call till the waiting thread
is swapped in is measured.
24. MailBox synchronous get
The time taken from the start of the function call till the waiting thread
is swapped in is measured.
25. MailBox asynchronous put
The time taken to complete the function call is measured.
26. MailBox get without context switch
The time taken to complete the function call is measured.
--------------------------------------------------------------------------------
Building and Running Project:
This benchmark outputs to the console. It can be built and executed
on QEMU as follows:
make qemu
--------------------------------------------------------------------------------
Troubleshooting:
Problems caused by out-dated project information can be addressed by
issuing one of the following commands then rebuilding the project:
make clean # discard results of previous builds
# but keep existing configuration info
or
make pristine # discard results of previous builds
# and restore pre-defined configuration info
--------------------------------------------------------------------------------
Sample Output:

View file

@ -0,0 +1,4 @@
CONFIG_EXECUTION_BENCHMARKING=y
CONFIG_TEST_RANDOM_GENERATOR=y
CONFIG_HEAP_MEM_POOL_SIZE=256
CONFIG_MAIN_STACK_SIZE=2048

View file

@ -0,0 +1,7 @@
ccflags-y += -I$(ZEPHYR_BASE)/tests/include
obj-$(CONFIG_EXECUTION_BENCHMARKING) += main_benchmark.o
obj-$(CONFIG_EXECUTION_BENCHMARKING) += thread_bench.o
obj-$(CONFIG_EXECUTION_BENCHMARKING) += yield_bench.o
obj-$(CONFIG_EXECUTION_BENCHMARKING) += semaphore_bench.o
obj-$(CONFIG_EXECUTION_BENCHMARKING) += msg_passing_bench.o

View file

@ -0,0 +1,57 @@
/*
* Copyright (c) 2013-2015 Wind River Systems, Inc.
* Copyright (c) 2016 Intel Corporation.
*
* SPDX-License-Identifier: Apache-2.0
*/
/**
* @file
* @brief Measure time
*
*/
#include <kernel.h>
#include <zephyr.h>
#include <tc_util.h>
#include <ksched.h>
#include "timing_info.h"
void main(void)
{
u32_t freq = CONFIG_SYS_CLOCK_HW_CYCLES_PER_SEC / 1000000;
TC_START("Time Measurement");
TC_PRINT("Timing Results: Clock Frequency: %d MHz\n", freq);
/*******************************************************************/
/* System parameters and thread Benchmarking*/
system_thread_bench();
/*******************************************************************/
/* Thread yield*/
yield_bench();
/*******************************************************************/
/* heap Memory benchmarking*/
heap_malloc_free_bench();
/*******************************************************************/
/* Semaphore take and get*/
semaphore_bench();
/*******************************************************************/
/* mutex lock and unlock*/
mutex_bench();
/*******************************************************************/
/* mutex lock and unlock*/
msg_passing_bench();
TC_PRINT("Timing Measurement finished\n");
/* for sanity regression test utility. */
TC_END_RESULT(TC_PASS);
TC_END_REPORT(TC_PASS);
}

View file

@ -0,0 +1,439 @@
/*
* Copyright (c) 2013-2015 Wind River Systems, Inc.
* Copyright (c) 2016 Intel Corporation.
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <kernel.h>
#include <zephyr.h>
#include <tc_util.h>
#include <ksched.h>
#include "timing_info.h"
extern char sline[];
/* mailbox*/
/* K_MBOX_DEFINE(test_msg_queue) */
K_MSGQ_DEFINE(benchmark_q, sizeof(int), 10, 4);
K_MSGQ_DEFINE(benchmark_q_get, sizeof(int), 3, 4);
K_MBOX_DEFINE(benchmark_mbox);
/* Declare a semaphore for the msgq*/
K_SEM_DEFINE(mbox_sem, 1, 1);
/* common location for the swap to write the tsc data*/
extern u32_t __read_swap_end_tsc_value;
extern u64_t __common_var_swap_end_tsc;
/* location of the time stamps*/
u64_t __msg_q_put_state;
u64_t __msg_q_get_state;
u64_t __msg_q_put_w_cxt_start_tsc;
u64_t __msg_q_put_w_cxt_end_tsc;
u64_t __msg_q_put_wo_cxt_start_tsc; /* without context switch */
u64_t __msg_q_put_wo_cxt_end_tsc;
u64_t __msg_q_get_w_cxt_start_tsc;
u64_t __msg_q_get_w_cxt_end_tsc;
u64_t msg_q_get_wo_cxt_start_tsc;
u64_t msg_q_get_wo_cxt_end_tsc;
u32_t __mbox_sync_put_state;
u64_t mbox_sync_put_start_tsc;
u64_t mbox_sync_put_end_tsc;
u32_t __mbox_sync_get_state;
u64_t mbox_sync_get_start_tsc;
u64_t mbox_sync_get_end_tsc;
u64_t mbox_async_put_start_tsc;
u64_t mbox_async_put_end_tsc;
u64_t mbox_get_w_cxt_start_tsc;
u64_t mbox_get_w_cxt_end_tsc;
/*For benchmarking msg queues*/
k_tid_t producer_w_cxt_switch_tid;
k_tid_t producer_wo_cxt_switch_tid;
k_tid_t producer_get_w_cxt_switch_tid;
k_tid_t consumer_get_w_cxt_switch_tid;
k_tid_t consumer_tid;
k_tid_t thread_mbox_sync_put_send_tid;
k_tid_t thread_mbox_sync_put_receive_tid;
k_tid_t thread_mbox_sync_get_send_tid;
k_tid_t thread_mbox_sync_get_receive_tid;
k_tid_t thread_mbox_async_put_send_tid;
k_tid_t thread_mbox_async_put_receive_tid;
/* To time thread creation*/
#define STACK_SIZE 500
extern char __noinit __stack my_stack_area[STACK_SIZE];
extern char __noinit __stack my_stack_area_0[STACK_SIZE];
/* thread functions*/
void thread_producer_msgq_w_cxt_switch(void *p1, void *p2, void *p3);
void thread_producer_msgq_wo_cxt_switch(void *p1, void *p2, void *p3);
void thread_producer_get_msgq_w_cxt_switch(void *p1, void *p2, void *p3);
void thread_consumer_get_msgq_w_cxt_switch(void *p1, void *p2, void *p3);
void thread_mbox_sync_put_send(void *p1, void *p2, void *p3);
void thread_mbox_sync_put_receive(void *p1, void *p2, void *p3);
void thread_mbox_sync_get_send(void *p1, void *p2, void *p3);
void thread_mbox_sync_get_receive(void *p1, void *p2, void *p3);
void thread_mbox_async_put_send(void *p1, void *p2, void *p3);
void thread_mbox_async_put_receive(void *p1, void *p2, void *p3);
volatile u64_t time_check;
int received_data_get;
int received_data_consumer;
int data_to_send;
void msg_passing_bench(void)
{
DECLARE_VAR(msg_q, put_w_cxt)
DECLARE_VAR(msg_q, put_wo_cxt)
DECLARE_VAR(msg_q, get_w_cxt)
DECLARE_VAR(msg_q, get_wo_cxt)
DECLARE_VAR(mbox, sync_put)
DECLARE_VAR(mbox, sync_get)
DECLARE_VAR(mbox, async_put)
DECLARE_VAR(mbox, get_w_cxt)
/*******************************************************************/
/* Msg queue for put*/
int received_data = 0;
producer_w_cxt_switch_tid =
k_thread_spawn(my_stack_area, STACK_SIZE,
thread_producer_msgq_w_cxt_switch, NULL,
NULL, NULL, 2 /*priority*/, 0, 50);
u32_t msg_status = k_msgq_get(&benchmark_q, &received_data, 300);
producer_wo_cxt_switch_tid =
k_thread_spawn(my_stack_area_0, STACK_SIZE,
thread_producer_msgq_wo_cxt_switch,
NULL, NULL, NULL, -2 /*priority*/, 0, 0);
k_thread_abort(producer_w_cxt_switch_tid);
k_thread_abort(producer_wo_cxt_switch_tid);
__msg_q_put_w_cxt_end_tsc = ((u32_t)__common_var_swap_end_tsc);
ARG_UNUSED(msg_status);
/*******************************************************************/
/* Msg queue for get*/
producer_get_w_cxt_switch_tid =
k_thread_spawn(my_stack_area,
STACK_SIZE,
thread_producer_get_msgq_w_cxt_switch, NULL,
NULL, NULL, 1 /*priority*/, 0, 50);
consumer_get_w_cxt_switch_tid =
k_thread_spawn(my_stack_area_0,
STACK_SIZE,
thread_consumer_get_msgq_w_cxt_switch,
NULL, NULL, NULL,
2 /*priority*/, 0, 50);
k_sleep(2000); /* make the main thread sleep */
k_thread_abort(producer_get_w_cxt_switch_tid);
__msg_q_get_w_cxt_end_tsc = (__common_var_swap_end_tsc);
/*******************************************************************/
/* Msg queue for get*/
/* from previous step got the msg_q full now just do a simple read*/
msg_q_get_wo_cxt_start_tsc = OS_GET_TIME();
received_data_get = k_msgq_get(&benchmark_q_get,
&received_data_consumer,
K_NO_WAIT);
msg_q_get_wo_cxt_end_tsc = OS_GET_TIME();
/*******************************************************************/
/* Msg box to benchmark sync put */
thread_mbox_sync_put_send_tid =
k_thread_spawn(my_stack_area,
STACK_SIZE,
thread_mbox_sync_put_send,
NULL, NULL, NULL,
2 /*priority*/, 0, 0);
thread_mbox_sync_put_receive_tid =
k_thread_spawn(my_stack_area_0,
STACK_SIZE,
thread_mbox_sync_put_receive,
NULL, NULL, NULL,
1 /*priority*/, 0, 0);
k_sleep(1000); /* make the main thread sleep */
mbox_sync_put_end_tsc = (__common_var_swap_end_tsc);
/*******************************************************************/
/* Msg box to benchmark sync get */
thread_mbox_sync_get_send_tid =
k_thread_spawn(my_stack_area,
STACK_SIZE,
thread_mbox_sync_get_send,
NULL, NULL, NULL,
1 /*prio*/, 0, 0);
thread_mbox_sync_get_receive_tid =
k_thread_spawn(my_stack_area_0,
STACK_SIZE,
thread_mbox_sync_get_receive, NULL,
NULL, NULL, 2 /*priority*/, 0, 0);
k_sleep(1000); /* make the main thread sleep */
mbox_sync_get_end_tsc = (__common_var_swap_end_tsc);
/*******************************************************************/
/* Msg box to benchmark async put */
thread_mbox_async_put_send_tid =
k_thread_spawn(my_stack_area,
STACK_SIZE,
thread_mbox_async_put_send,
NULL, NULL, NULL,
2 /*prio*/, 0, 0);
thread_mbox_async_put_receive_tid =
k_thread_spawn(my_stack_area_0,
STACK_SIZE,
thread_mbox_async_put_receive,
NULL, NULL, NULL,
3 /*priority*/, 0, 0);
k_sleep(1000); /* make the main thread sleep */
/*******************************************************************/
int single_element_buffer = 0;
struct k_mbox_msg rx_msg = {
.size = sizeof(int),
.rx_source_thread = K_ANY,
.tx_target_thread = K_ANY
};
mbox_get_w_cxt_start_tsc = OS_GET_TIME();
k_mbox_get(&benchmark_mbox, &rx_msg, &single_element_buffer, 300);
mbox_get_w_cxt_end_tsc = OS_GET_TIME();
/*******************************************************************/
/* calculation for msg put with context switch */
CALCULATE_TIME(__, msg_q, put_w_cxt)
/* calculation for msg put without context switch */
CALCULATE_TIME(__, msg_q, put_wo_cxt)
/* calculation for msg get */
CALCULATE_TIME(__, msg_q, get_w_cxt)
/* calculation for msg get without context switch */
CALCULATE_TIME(, msg_q, get_wo_cxt)
/*calculation for msg box for sync put
* (i.e with a context to make the pending rx ready)
*/
CALCULATE_TIME(, mbox, sync_put)
/*calculation for msg box for sync get
* (i.e with a context to make the pending tx ready)
*/
CALCULATE_TIME(, mbox, sync_get)
/* calculation for msg box for async put */
CALCULATE_TIME(, mbox, async_put)
/* calculation for msg box for get without any context switch */
CALCULATE_TIME(, mbox, get_w_cxt)
/*******************************************************************/
/* Only print lower 32bit of time result */
PRINT_F("Message Queue Put with context switch",
(u32_t)((__msg_q_put_w_cxt_end_tsc -
__msg_q_put_w_cxt_start_tsc) & 0xFFFFFFFFULL),
(u32_t) (total_msg_q_put_w_cxt_time & 0xFFFFFFFFULL));
PRINT_F("Message Queue Put without context switch",
(u32_t)((__msg_q_put_wo_cxt_end_tsc -
__msg_q_put_wo_cxt_start_tsc) & 0xFFFFFFFFULL),
(u32_t) (total_msg_q_put_wo_cxt_time & 0xFFFFFFFFULL));
PRINT_F("Message Queue get with context switch",
(u32_t)((__msg_q_get_w_cxt_end_tsc -
__msg_q_get_w_cxt_start_tsc) & 0xFFFFFFFFULL),
(u32_t) (total_msg_q_get_w_cxt_time & 0xFFFFFFFFULL));
PRINT_F("Message Queue get without context switch",
(u32_t)((msg_q_get_wo_cxt_end_tsc -
msg_q_get_wo_cxt_start_tsc) & 0xFFFFFFFFULL),
(u32_t) (total_msg_q_get_wo_cxt_time & 0xFFFFFFFFULL));
PRINT_F("MailBox synchronous put",
(u32_t)((mbox_sync_put_end_tsc - mbox_sync_put_start_tsc)
& 0xFFFFFFFFULL),
(u32_t) (total_mbox_sync_put_time & 0xFFFFFFFFULL));
PRINT_F("MailBox synchronous get",
(u32_t)((mbox_sync_get_end_tsc - mbox_sync_get_start_tsc)
& 0xFFFFFFFFULL),
(u32_t) (total_mbox_sync_get_time & 0xFFFFFFFFULL));
PRINT_F("MailBox asynchronous put",
(u32_t)((mbox_async_put_end_tsc - mbox_async_put_start_tsc)
& 0xFFFFFFFFULL),
(u32_t) (total_mbox_async_put_time & 0xFFFFFFFFULL));
PRINT_F("MailBox get without context switch",
(u32_t)((mbox_get_w_cxt_end_tsc - mbox_get_w_cxt_start_tsc)
& 0xFFFFFFFFULL),
(u32_t) (total_mbox_get_w_cxt_time & 0xFFFFFFFFULL));
}
void thread_producer_msgq_w_cxt_switch(void *p1, void *p2, void *p3)
{
int data_to_send = 5050;
__read_swap_end_tsc_value = 1;
__msg_q_put_w_cxt_start_tsc = (u32_t) OS_GET_TIME();
k_msgq_put(&benchmark_q, &data_to_send, K_NO_WAIT);
}
void thread_producer_msgq_wo_cxt_switch(void *p1, void *p2, void *p3)
{
int data_to_send = 5050;
__msg_q_put_wo_cxt_start_tsc = OS_GET_TIME();
k_msgq_put(&benchmark_q, &data_to_send, K_NO_WAIT);
__msg_q_put_wo_cxt_end_tsc = OS_GET_TIME();
}
void thread_producer_get_msgq_w_cxt_switch(void *p1, void *p2, void *p3)
{
int status = 0;
while (1) {
if (status == 0) {
data_to_send++;
}
status = k_msgq_put(&benchmark_q_get, &data_to_send, 20);
}
}
void thread_consumer_get_msgq_w_cxt_switch(void *p1, void *p2, void *p3)
{
producer_get_w_cxt_switch_tid->base.timeout.delta_ticks_from_prev =
_EXPIRED;
__read_swap_end_tsc_value = 1;
__msg_q_get_w_cxt_start_tsc = OS_GET_TIME();
received_data_get = k_msgq_get(&benchmark_q_get,
&received_data_consumer,
300);
time_check = OS_GET_TIME();
}
void thread_mbox_sync_put_send(void *p1, void *p2, void *p3)
{
int single_element_buffer = 1234;
struct k_mbox_msg tx_msg = {
.size = sizeof(int),
.info = 5050,
.tx_data = &single_element_buffer,
.rx_source_thread = K_ANY,
.tx_target_thread = K_ANY,
};
mbox_sync_put_start_tsc = OS_GET_TIME();
__read_swap_end_tsc_value = 1;
k_mbox_put(&benchmark_mbox, &tx_msg, 300);
time_check = OS_GET_TIME();
}
void thread_mbox_sync_put_receive(void *p1, void *p2, void *p3)
{
int single_element_buffer = 1234;
struct k_mbox_msg rx_msg = {
.size = sizeof(int),
.rx_source_thread = K_ANY,
.tx_target_thread = K_ANY
};
k_mbox_get(&benchmark_mbox, &rx_msg, &single_element_buffer, 300);
}
void thread_mbox_sync_get_send(void *p1, void *p2, void *p3)
{
int single_element_buffer = 1234;
struct k_mbox_msg tx_msg = {
.size = sizeof(int),
.info = 5050,
.tx_data = &single_element_buffer,
.rx_source_thread = K_ANY,
.tx_target_thread = K_ANY,
};
k_mbox_put(&benchmark_mbox, &tx_msg, 300);
}
void thread_mbox_sync_get_receive(void *p1, void *p2, void *p3)
{
int single_element_buffer;
struct k_mbox_msg rx_msg = {
.size = sizeof(int),
.rx_source_thread = K_ANY,
.tx_target_thread = K_ANY
};
__read_swap_end_tsc_value = 1;
mbox_sync_get_start_tsc = OS_GET_TIME();
k_mbox_get(&benchmark_mbox, &rx_msg, &single_element_buffer, 300);
}
void thread_mbox_async_put_send(void *p1, void *p2, void *p3)
{
int single_element_buffer = 1234;
struct k_mbox_msg tx_msg = {
.size = sizeof(int),
.info = 5050,
.tx_data = &single_element_buffer,
.rx_source_thread = K_ANY,
.tx_target_thread = K_ANY,
};
mbox_async_put_start_tsc = OS_GET_TIME();
k_mbox_async_put(&benchmark_mbox, &tx_msg, &mbox_sem);
mbox_async_put_end_tsc = OS_GET_TIME();
k_mbox_async_put(&benchmark_mbox, &tx_msg, &mbox_sem);
}
void thread_mbox_async_put_receive(void *p1, void *p2, void *p3)
{
int single_element_buffer;
struct k_mbox_msg rx_msg = {
.size = sizeof(int),
.rx_source_thread = K_ANY,
.tx_target_thread = K_ANY
};
k_mbox_get(&benchmark_mbox, &rx_msg, &single_element_buffer, 300);
}

View file

@ -0,0 +1,183 @@
#include <kernel.h>
#include <zephyr.h>
#include <tc_util.h>
#include <ksched.h>
#include "timing_info.h"
extern char sline[];
K_SEM_DEFINE(sem_bench, 0, 1);
K_SEM_DEFINE(sem_bench_1, 0, 1);
/* To time thread creation*/
#define STACK_SIZE 500
extern char __noinit __stack my_stack_area[STACK_SIZE];
extern char __noinit __stack my_stack_area_0[STACK_SIZE];
/* u64_t thread_yield_start_tsc[1000]; */
/* u64_t thread_yield_end_tsc[1000]; */
u64_t thread_start_time;
u64_t thread_end_time;
u64_t sem_start_time;
u64_t sem_end_time;
u64_t sem_give_start_time;
u64_t sem_give_end_time;
u32_t swap_called;
u64_t test_time2;
u64_t test_time1;
void thread_sem0_test(void *p1, void *p2, void *p3);
void thread_sem1_test(void *p1, void *p2, void *p3);
void thread_sem0_give_test(void *p1, void *p2, void *p3);
void thread_sem1_give_test(void *p1, void *p2, void *p3);
k_tid_t sem0_tid;
k_tid_t sem1_tid;
extern u64_t __common_var_swap_end_tsc;
extern u32_t __read_swap_end_tsc_value;
void semaphore_bench(void)
{
/* Thread yield*/
sem0_tid = k_thread_spawn(my_stack_area,
STACK_SIZE,
thread_sem0_test, NULL, NULL, NULL,
2 /*priority*/, 0, 0);
sem1_tid = k_thread_spawn(my_stack_area_0,
STACK_SIZE, thread_sem1_test,
NULL, NULL, NULL,
2 /*priority*/, 0, 0);
k_sleep(1000);
/* u64_t test_time1 = _tsc_read(); */
sem_end_time = (__common_var_swap_end_tsc);
u32_t sem_cycles = sem_end_time - sem_start_time;
sem0_tid = k_thread_spawn(my_stack_area,
STACK_SIZE, thread_sem0_give_test,
NULL, NULL, NULL,
2 /*priority*/, 0, 0);
sem1_tid = k_thread_spawn(my_stack_area_0,
STACK_SIZE, thread_sem1_give_test,
NULL, NULL, NULL,
2 /*priority*/, 0, 0);
k_sleep(1000);
sem_give_end_time = (__common_var_swap_end_tsc);
u32_t sem_give_cycles = sem_give_end_time - sem_give_start_time;
/* Semaphore without context switch*/
u32_t sem_give_wo_cxt_start = OS_GET_TIME();
k_sem_give(&sem_bench);
u32_t sem_give_wo_cxt_end = OS_GET_TIME();
u32_t sem_give_wo_cxt_cycles = sem_give_wo_cxt_end -
sem_give_wo_cxt_start;
u32_t sem_take_wo_cxt_start = OS_GET_TIME();
k_sem_take(&sem_bench, 10);
u32_t sem_take_wo_cxt_end = OS_GET_TIME();
u32_t sem_take_wo_cxt_cycles = sem_take_wo_cxt_end -
sem_take_wo_cxt_start;
/* TC_PRINT("test_time1 , %d cycles\n", (u32_t)test_time1); */
/* TC_PRINT("test_time2 , %d cycles\n", (u32_t)test_time2); */
PRINT_F("Semaphore Take with context switch",
sem_cycles, SYS_CLOCK_HW_CYCLES_TO_NS(sem_cycles));
PRINT_F("Semaphore Give with context switch",
sem_give_cycles, SYS_CLOCK_HW_CYCLES_TO_NS(sem_give_cycles));
PRINT_F("Semaphore Take without context switch",
sem_take_wo_cxt_cycles,
SYS_CLOCK_HW_CYCLES_TO_NS(sem_take_wo_cxt_cycles));
PRINT_F("Semaphore Give without context switch",
sem_give_wo_cxt_cycles,
SYS_CLOCK_HW_CYCLES_TO_NS(sem_give_wo_cxt_cycles));
}
/******************************************************************************/
K_MUTEX_DEFINE(mutex0);
void mutex_bench(void)
{
u32_t mutex_lock_start_tsc;
u32_t mutex_lock_end_tsc;
u32_t mutex_lock_diff = 0;
u32_t mutex_unlock_start_tsc;
u32_t mutex_unlock_end_tsc;
u32_t mutex_unlock_diff = 0;
for (int i = 0; i < 1000; i++) {
mutex_lock_start_tsc = OS_GET_TIME();
k_mutex_lock(&mutex0, 100);
mutex_lock_end_tsc = OS_GET_TIME();
mutex_unlock_start_tsc = OS_GET_TIME();
k_mutex_unlock(&mutex0);
mutex_unlock_end_tsc = OS_GET_TIME();
mutex_lock_diff += (mutex_lock_end_tsc - mutex_lock_start_tsc);
mutex_unlock_diff += (mutex_unlock_end_tsc -
mutex_unlock_start_tsc);
}
PRINT_F("Mutex lock", mutex_lock_diff / 1000,
SYS_CLOCK_HW_CYCLES_TO_NS(mutex_lock_diff / 1000));
PRINT_F("Mutex unlock", mutex_unlock_diff / 1000,
SYS_CLOCK_HW_CYCLES_TO_NS(mutex_unlock_diff / 1000));
}
/******************************************************************************/
void thread_sem1_test(void *p1, void *p2, void *p3)
{
k_sem_give(&sem_bench); /* sync the 2 threads*/
__read_swap_end_tsc_value = 1;
sem_start_time = OS_GET_TIME();
k_sem_take(&sem_bench, 10);
}
u32_t sem_count;
void thread_sem0_test(void *p1, void *p2, void *p3)
{
k_sem_take(&sem_bench, 10);/* To sync threads */
k_sem_give(&sem_bench);
sem_count++;
k_thread_abort(sem0_tid);
}
/******************************************************************************/
void thread_sem1_give_test(void *p1, void *p2, void *p3)
{
k_sem_give(&sem_bench); /* sync the 2 threads*/
k_sem_take(&sem_bench_1, 1000); /* clear the previous sem_give*/
/* test_time1 = OS_GET_TIME(); */
}
void thread_sem0_give_test(void *p1, void *p2, void *p3)
{
k_sem_take(&sem_bench, 10);/* To sync threads */
/* test_time2 = OS_GET_TIME(); */
/* To make sure that the sem give will cause a swap to occur */
k_thread_priority_set(sem1_tid, 1);
__read_swap_end_tsc_value = 1;
sem_give_start_time = OS_GET_TIME();
k_sem_give(&sem_bench_1);
}

View file

@ -0,0 +1,347 @@
/*
* Copyright (c) 2013-2015 Wind River Systems, Inc.
* Copyright (c) 2016 Intel Corporation.
*
* SPDX-License-Identifier: Apache-2.0
*/
/**
* @file
* @brief Measure time
*
*/
#include <kernel.h>
#include <zephyr.h>
#include <tc_util.h>
#include <ksched.h>
#include "timing_info.h"
char sline[256];
/* FILE *output_file = stdout; */
/* location of the time stamps*/
u32_t __read_swap_end_tsc_value;
u64_t __common_var_swap_end_tsc;
volatile u64_t thread_abort_end_tsc;
volatile u64_t thread_abort_start_tsc;
/* Thread suspend*/
volatile u64_t thread_suspend_start_tsc;
volatile u64_t thread_suspend_end_tsc;
/* Thread resume*/
volatile u64_t thread_resume_start_tsc;
volatile u64_t thread_resume_end_tsc;
/* Thread sleep*/
volatile u64_t thread_sleep_start_tsc;
volatile u64_t thread_sleep_end_tsc;
/*For benchmarking msg queues*/
k_tid_t producer_tid;
k_tid_t consumer_tid;
/* To time thread creation*/
#define STACK_SIZE 500
char __noinit __stack my_stack_area[STACK_SIZE];
char __noinit __stack my_stack_area_0[STACK_SIZE];
u32_t __read_swap_end_tsc_value_test = 1;
u64_t dummy_time;
u64_t start_time;
u64_t test_end_time;
#if CONFIG_X86
u32_t benchmarking_overhead_swap(void)
{
__asm__ __volatile__ (
"pushl %eax\n\t"
"pushl %edx\n\t"
"rdtsc\n\t"
"mov %eax,start_time\n\t"
"mov %edx,start_time+4\n\t"
"cmp $0x1,__read_swap_end_tsc_value_test\n\t"
"jne time_read_not_needed_test\n\t"
"movw $0x2,__read_swap_end_tsc_value\n\t"
"pushl %eax\n\t"
"pushl %edx\n\t"
"rdtsc\n\t"
"mov %eax,dummy_time\n\t"
"mov %edx,dummy_time+4\n\t"
"pop %edx\n\t"
"pop %eax\n\t"
"time_read_not_needed_test:\n\t"
"rdtsc\n\t"
"mov %eax,test_end_time\n\t"
"mov %edx,test_end_time+4\n\t"
"pop %edx\n\t"
"pop %eax\n\t");
/* u64_t end_time = OS_GET_TIME(); */
return(test_end_time - start_time);
}
#endif
#if CONFIG_ARM
void read_systick_start_of_swap(void)
{
__start_swap_tsc = (u32_t)SysTick->VAL;
}
void read_systick_end_of_swap(void)
{
if (__read_swap_end_tsc_value == 1) {
__read_swap_end_tsc_value = 2;
__common_var_swap_end_tsc = OS_GET_TIME();
}
}
void read_systick_start_of_isr(void)
{
__start_intr_tsc = (u32_t)SysTick->VAL;
}
void read_systick_end_of_isr(void)
{
__end_intr_tsc = (u32_t)SysTick->VAL;
}
void read_systick_start_of_tick_handler(void)
{
__start_tick_tsc = (u32_t)SysTick->VAL;
}
void read_systick_end_of_tick_handler(void)
{
__end_tick_tsc = (u32_t)SysTick->VAL;
}
#endif
void test_thread_entry(void *p, void *p1, void *p2)
{
static int i;
i++;
}
void thread_swap_test(void *p1, void *p2, void *p3)
{
__read_swap_end_tsc_value = 1;
thread_abort_start_tsc = OS_GET_TIME();
k_thread_abort(_current);
}
void thread_suspend_test(void *p1, void *p2, void *p3);
void yield_bench(void);
void heap_malloc_free_bench(void);
void main_sem_bench(void);
void main_mutex_bench(void);
void main_msg_bench(void);
void system_thread_bench(void)
{
u64_t total_intr_time;
u64_t total_tick_time;
/*Thread create*/
u64_t thread_create_start_tsc;
u64_t thread_create_end_tsc;
DECLARE_VAR(thread, create)
/*Thread cancel*/
u64_t thread_cancel_start_tsc;
u64_t thread_cancel_end_tsc;
DECLARE_VAR(thread, cancel)
/* Thread Abort*/
DECLARE_VAR(thread, abort)
/* Thread Suspend*/
DECLARE_VAR(thread, suspend)
/* Thread Resume*/
DECLARE_VAR(thread, resume)
/* to measure context switch time */
k_thread_spawn(my_stack_area_0,
STACK_SIZE,
thread_swap_test,
NULL, NULL, NULL,
-1 /*priority*/, 0, 0);
thread_abort_end_tsc = (__common_var_swap_end_tsc);
__end_swap_tsc = __common_var_swap_end_tsc;
u32_t total_swap_cycles = __end_swap_tsc -
SUBTRACT_CLOCK_CYCLES(__start_swap_tsc);
/* Interrupt latency*/
total_intr_time = SYS_CLOCK_HW_CYCLES_TO_NS(__end_intr_tsc -
__start_intr_tsc);
/* tick overhead*/
total_tick_time = SYS_CLOCK_HW_CYCLES_TO_NS(__end_tick_tsc -
__start_tick_tsc);
/*******************************************************************/
/* thread create*/
thread_create_start_tsc = OS_GET_TIME();
k_tid_t my_tid = k_thread_spawn(my_stack_area,
STACK_SIZE,
thread_swap_test,
NULL, NULL, NULL,
5 /*priority*/, 0, 10);
thread_create_end_tsc = OS_GET_TIME();
/* thread Termination*/
thread_cancel_start_tsc = OS_GET_TIME();
s32_t ret_value_thread_cancel = k_thread_cancel(my_tid);
thread_cancel_end_tsc = OS_GET_TIME();
ARG_UNUSED(ret_value_thread_cancel);
/* Thread suspend*/
k_tid_t sus_res_tid = k_thread_spawn(my_stack_area,
STACK_SIZE,
thread_suspend_test,
NULL, NULL, NULL,
-1 /*priority*/, 0, 0);
thread_suspend_end_tsc = OS_GET_TIME();
/* At this point test for resume*/
k_thread_resume(sus_res_tid);
/* calculation for creation */
CALCULATE_TIME(, thread, create)
/* calculation for cancel */
CALCULATE_TIME(, thread, cancel)
/* calculation for abort */
CALCULATE_TIME(, thread, abort)
/* calculation for suspend */
CALCULATE_TIME(, thread, suspend)
/* calculation for resume */
thread_resume_start_tsc = thread_suspend_end_tsc;
CALCULATE_TIME(, thread, resume)
/*******************************************************************/
/* Only print lower 32bit of time result */
PRINT_F("Context switch",
(u32_t)(total_swap_cycles & 0xFFFFFFFFULL),
(u32_t)SYS_CLOCK_HW_CYCLES_TO_NS(total_swap_cycles));
/*TC_PRINT("Swap Overhead:%d cycles\n", benchmarking_overhead_swap());*/
/*Interrupt latency */
u32_t intr_latency_cycles = SUBTRACT_CLOCK_CYCLES(__end_intr_tsc) -
SUBTRACT_CLOCK_CYCLES(__start_intr_tsc);
PRINT_F("Interrupt latency",
(u32_t)(intr_latency_cycles),
(u32_t) (SYS_CLOCK_HW_CYCLES_TO_NS(intr_latency_cycles)));
/*tick overhead*/
u32_t tick_overhead_cycles = SUBTRACT_CLOCK_CYCLES(__end_tick_tsc) -
SUBTRACT_CLOCK_CYCLES(__start_tick_tsc);
PRINT_F("Tick overhead",
(u32_t)(tick_overhead_cycles),
(u32_t) (SYS_CLOCK_HW_CYCLES_TO_NS(tick_overhead_cycles)));
/*thread creation*/
PRINT_F("Thread Creation",
(u32_t)((thread_create_end_tsc - thread_create_start_tsc) &
0xFFFFFFFFULL),
(u32_t) (total_thread_create_time & 0xFFFFFFFFULL));
/*thread cancel*/
PRINT_F("Thread cancel",
(u32_t)((thread_cancel_end_tsc - thread_cancel_start_tsc) &
0xFFFFFFFFULL),
(u32_t) (total_thread_cancel_time & 0xFFFFFFFFULL));
/*thread abort*/
PRINT_F("Thread abort",
(u32_t)((thread_abort_end_tsc - thread_abort_start_tsc) &
0xFFFFFFFFULL),
(u32_t) (total_thread_abort_time & 0xFFFFFFFFULL));
/*thread suspend*/
PRINT_F("Thread Suspend",
(u32_t)((thread_suspend_end_tsc - thread_suspend_start_tsc) &
0xFFFFFFFFULL),
(u32_t) (total_thread_suspend_time & 0xFFFFFFFFULL));
/*thread resume*/
PRINT_F("Thread Resume",
(u32_t)((thread_resume_end_tsc - thread_suspend_end_tsc)
& 0xFFFFFFFFULL),
(u32_t) (total_thread_resume_time & 0xFFFFFFFFULL));
}
void thread_suspend_test(void *p1, void *p2, void *p3)
{
thread_suspend_start_tsc = OS_GET_TIME();
k_thread_suspend(_current);
/* comes to this line once its resumed*/
thread_resume_end_tsc = OS_GET_TIME();
/* k_thread_suspend(_current); */
}
void heap_malloc_free_bench(void)
{
/* heap malloc*/
u64_t heap_malloc_start_tsc = 0;
u64_t heap_malloc_end_tsc = 0;
/* heap free*/
u64_t heap_free_start_tsc = 0;
u64_t heap_free_end_tsc = 0;
s32_t count = 0;
u32_t sum_malloc = 0;
u32_t sum_free = 0;
while (count++ != 100) {
heap_malloc_start_tsc = OS_GET_TIME();
void *allocated_mem = k_malloc(10);
heap_malloc_end_tsc = OS_GET_TIME();
if (allocated_mem == NULL) {
TC_PRINT("\n Malloc failed at count %d\n", count);
break;
}
heap_free_start_tsc = OS_GET_TIME();
k_free(allocated_mem);
heap_free_end_tsc = OS_GET_TIME();
sum_malloc += heap_malloc_end_tsc - heap_malloc_start_tsc;
sum_free += heap_free_end_tsc - heap_free_start_tsc;
}
PRINT_F("Heap Malloc",
(u32_t)((sum_malloc / count) & 0xFFFFFFFFULL),
(u32_t)(SYS_CLOCK_HW_CYCLES_TO_NS(sum_malloc / count)));
PRINT_F("Heap Free",
(u32_t)((sum_free / count) & 0xFFFFFFFFULL),
(u32_t)(SYS_CLOCK_HW_CYCLES_TO_NS(sum_free / count)));
}

View file

@ -0,0 +1,95 @@
#include <timestamp.h>
#define CALCULATE_TIME(special_char, profile, name) \
{ \
total_##profile##_##name##_time = SYS_CLOCK_HW_CYCLES_TO_NS( \
special_char##profile##_##name##_end_tsc - \
special_char##profile##_##name##_start_tsc); \
}
/*total_##profile##_##name##_time =
* profile##_##name##_end_us - profile##_##name##_start_us;
*/
#define DECLARE_VAR(profile, name) \
u64_t total_##profile##_##name##_time;
extern u64_t __start_swap_tsc;
extern u64_t __end_swap_tsc;
extern u64_t __start_intr_tsc;
extern u64_t __end_intr_tsc;
extern u64_t __start_tick_tsc;
extern u64_t __end_tick_tsc;
/* Function prototypes */
void system_thread_bench(void);
void yield_bench(void);
void heap_malloc_free_bench(void);
void semaphore_bench(void);
void mutex_bench(void);
void msg_passing_bench(void);
/* PRINT_F
* Macro to print a formatted output string. fprintf is used when
* Assumed that sline character array of SLINE_LEN + 1 characters
* is defined in the main file
*/
/* #define CSV_FORMAT_OUTPUT */
/* printf format defines. */
#ifdef CSV_FORMAT_OUTPUT
#define FORMAT "%-45s,%4u,%5u\n"
#else
#define FORMAT "%-45s:%4u cycles , %5u ns\n"
#endif
#include <stdio.h>
#define GET_2ND_ARG(first, second, ...) (second)
#define GET_3ND_ARG(first, second, third, ...) (third)
/* Enable this macro to print all the measurements.
* Note: Some measurements in few architectures are not valid
*/
/* #define PRINT_ALL_MEASUREMENTS */
#ifndef PRINT_ALL_MEASUREMENTS
/*If the measured cycles is greater than 10000 then one of the following is
* possible.
* 1. the selected measurement is not supported in the architecture
* 2. The measurement went wrong somewhere.(less likely to happen)
*/
#define PRINT_F(...) \
{ \
if ((GET_2ND_ARG(__VA_ARGS__) <= 20000) && \
(GET_2ND_ARG(__VA_ARGS__) != 0)) { \
snprintf(sline, 254, FORMAT, ##__VA_ARGS__); \
TC_PRINT("%s", sline); \
} \
}
#else
/* Prints all outputs*/
#define PRINT_F(...) \
{ \
snprintf(sline, 254, FORMAT, ##__VA_ARGS__); \
TC_PRINT("%s", sline); \
}
#endif
/* If we are using x86 based controller we tend to read the tsc value which is
* always incrementing i.e count up counter.
* If we are using the ARM based controllers the systick is a
* count down counter.
* Hence to calculate the cycles taken up by the code we need to adjust the
* values accordingly.
*
* NOTE: Needed only when reading value from end of swap operation
*/
#if CONFIG_ARM
#define SUBTRACT_CLOCK_CYCLES(val) (SysTick->LOAD - (u32_t)val)
#else
#define SUBTRACT_CLOCK_CYCLES(val) (val)
#endif

View file

@ -0,0 +1,85 @@
#include <kernel.h>
#include <zephyr.h>
#include <tc_util.h>
#include <ksched.h>
#include "timing_info.h"
K_SEM_DEFINE(yield_sem, 0, 1);
/* To time thread creation*/
#define STACK_SIZE 500
extern char __noinit __stack my_stack_area[STACK_SIZE];
extern char __noinit __stack my_stack_area_0[STACK_SIZE];
/* u64_t thread_yield_start_tsc[1000]; */
/* u64_t thread_yield_end_tsc[1000]; */
/* location of the time stamps*/
extern u32_t __read_swap_end_tsc_value;
extern u64_t __common_var_swap_end_tsc;
extern char sline[];
u64_t thread_sleep_start_tsc;
u64_t thread_sleep_end_tsc;
u64_t thread_start_time;
u64_t thread_end_time;
static u32_t count;
void thread_yield0_test(void *p1, void *p2, void *p3);
void thread_yield1_test(void *p1, void *p2, void *p3);
k_tid_t yield0_tid;
k_tid_t yield1_tid;
void yield_bench(void)
{
/* Thread yield*/
yield0_tid = k_thread_spawn(my_stack_area,
STACK_SIZE,
thread_yield0_test,
NULL, NULL, NULL,
0 /*priority*/, 0, 0);
yield1_tid = k_thread_spawn(my_stack_area_0,
STACK_SIZE,
thread_yield1_test,
NULL, NULL, NULL,
0 /*priority*/, 0, 0);
/*read the time of start of the sleep till the swap happens */
__read_swap_end_tsc_value = 1;
thread_sleep_start_tsc = OS_GET_TIME();
k_sleep(1000);
thread_sleep_end_tsc = ((u32_t)__common_var_swap_end_tsc);
u32_t yield_cycles = (thread_end_time - thread_start_time) / 2000;
u32_t sleep_cycles = thread_sleep_end_tsc - thread_sleep_start_tsc;
PRINT_F("Thread Yield", yield_cycles,
SYS_CLOCK_HW_CYCLES_TO_NS(yield_cycles));
PRINT_F("Thread Sleep", sleep_cycles,
SYS_CLOCK_HW_CYCLES_TO_NS(sleep_cycles));
}
void thread_yield0_test(void *p1, void *p2, void *p3)
{
k_sem_take(&yield_sem, 10);
thread_start_time = OS_GET_TIME();
while (count != 1000) {
count++;
k_yield();
}
thread_end_time = OS_GET_TIME();
k_thread_abort(yield1_tid);
}
void thread_yield1_test(void *p1, void *p2, void *p3)
{
k_sem_give(&yield_sem);
while (1) {
k_yield();
}
}

View file

@ -0,0 +1,3 @@
[test]
tags = benchmark
arch_whitelist = x86 arm