kernel: optimize ms-to-ticks for certain tick frequencies

Some tick frequencies lend themselves to optimized conversions from ms
to ticks and vice-versa.

- 1000Hz which does not need any conversion
- 500Hz, 250Hz, 125Hz where the division/multiplication are a straight
  shift since they are power-of-two factors of 1000.

In addition, some more generally used values are made to use optimized
conversion equations rather than the generic one that uses 64-bit math,
and often results in calling compiler intrinsics.

These values are: 100Hz, 50Hz, 25Hz, 20Hz, 10Hz, 1Hz (the last one used
in some testing).

Avoiding the 64-bit math intrisics has the additional benefit, in
addition to increased performance, of using a significant lower amount
of stack space: 52 bytes on ARM Cortex-M and 80 bytes on x86.

Change-Id: I080eb338a2637d6b1c6838c119af1a9fa37fe869
Signed-off-by: Benjamin Walsh <benjamin.walsh@windriver.com>
This commit is contained in:
Benjamin Walsh 2016-12-20 14:39:08 -05:00 committed by Anas Nashif
commit 6209218f40
6 changed files with 73 additions and 10 deletions

View file

@ -32,6 +32,7 @@
#include <misc/__assert.h>
#include <misc/dlist.h>
#include <misc/slist.h>
#include <misc/util.h>
#ifdef __cplusplus
extern "C" {
@ -634,13 +635,47 @@ extern void *k_thread_custom_data_get(void);
* @cond INTERNAL_HIDDEN
*/
/* kernel clocks */
#if (sys_clock_ticks_per_sec == 1000) || \
(sys_clock_ticks_per_sec == 500) || \
(sys_clock_ticks_per_sec == 250) || \
(sys_clock_ticks_per_sec == 125) || \
(sys_clock_ticks_per_sec == 100) || \
(sys_clock_ticks_per_sec == 50) || \
(sys_clock_ticks_per_sec == 25) || \
(sys_clock_ticks_per_sec == 20) || \
(sys_clock_ticks_per_sec == 10) || \
(sys_clock_ticks_per_sec == 1)
#define _ms_per_tick (MSEC_PER_SEC / sys_clock_ticks_per_sec)
#else
/* yields horrible 64-bit math on many architectures: try to avoid */
#define _NON_OPTIMIZED_TICKS_PER_SEC
#endif
#ifdef _NON_OPTIMIZED_TICKS_PER_SEC
extern int32_t _ms_to_ticks(int32_t ms);
#else
static ALWAYS_INLINE int32_t _ms_to_ticks(int32_t ms)
{
return (int32_t)ceiling_fraction((uint32_t)ms, _ms_per_tick);
}
#endif
/* added tick needed to account for tick in progress */
#define _TICK_ALIGN 1
static int64_t __ticks_to_ms(int64_t ticks)
static inline int64_t __ticks_to_ms(int64_t ticks)
{
#if CONFIG_SYS_CLOCK_EXISTS
#ifdef CONFIG_SYS_CLOCK_EXISTS
#ifdef _NON_OPTIMIZED_TICKS_PER_SEC
return (MSEC_PER_SEC * (uint64_t)ticks) / sys_clock_ticks_per_sec;
#else
return (uint64_t)ticks * _ms_per_tick;
#endif
#else
__ASSERT(ticks == 0, "");
return 0;

View file

@ -2892,10 +2892,6 @@ nano_stack_pop(struct nano_stack *stack, uint32_t *data,
*/
#define nano_task_stack_pop nano_stack_pop
/* kernel clocks */
extern int32_t _ms_to_ticks(int32_t ms);
/**
* @brief Return the current system tick count.
*

View file

@ -399,6 +399,30 @@ config SYS_CLOCK_TICKS_PER_SEC
help
This option specifies the frequency of the system clock in Hz.
Depending on the choice made, an amount of possibly expensive math must
occur when converting ticks to milliseconds and vice-versa. Some values
are optimized, and yield significantly less math.
The optimal values from a computational point-of-view are 1000, 500,
250 and 125, since in these cases there is either no computation
required, or it is all done via bit-shifting. These also give a
granularity from 1ms to 8ms.
Other good values are 100, 50, 25, 20 and 10. In this case, some math
is required but is minimized. These are also values that necessitate a
reduced number of clock interrupts per second, at the cost of
granularity (10ms to 100ms).
All other values require some extensive 64-bit math, and in some
configurations even require calls to compiler built-in functions, and
can require a non-trivial extra amount of stack space (e.g. around 80
bytes on x86).
Using the legacy API also incurs an extra penalty, since when asking
for a timeout, a translation is made from ticks to milliseconds to call
the native kernel APIs, and then another translation is made back to
ticks, since the kernel is tick-based.
config SYS_CLOCK_HW_CYCLES_PER_SEC
int "System clock's h/w timer frequency"
help

View file

@ -31,7 +31,9 @@ extern void _pend_thread(struct k_thread *thread,
extern void _pend_current_thread(_wait_q_t *wait_q, int32_t timeout);
extern void _move_thread_to_end_of_prio_q(struct k_thread *thread);
extern int __must_switch_threads(void);
#ifdef _NON_OPTIMIZED_TICKS_PER_SEC
extern int32_t _ms_to_ticks(int32_t ms);
#endif
extern void idle(void *, void *, void *);
/* find which one is the next thread to run */

View file

@ -19,6 +19,7 @@
#include <atomic.h>
#include <ksched.h>
#include <wait_q.h>
#include <misc/util.h>
/* the only struct _kernel instance */
struct _kernel _kernel = {0};
@ -191,15 +192,14 @@ static int _is_wait_q_insert_point(sys_dnode_t *node, void *insert_prio)
/* convert milliseconds to ticks */
#define ceiling(numerator, divider) \
(((numerator) + ((divider) - 1)) / (divider))
#ifdef _NON_OPTIMIZED_TICKS_PER_SEC
int32_t _ms_to_ticks(int32_t ms)
{
int64_t ms_ticks_per_sec = (int64_t)ms * sys_clock_ticks_per_sec;
return (int32_t)ceiling(ms_ticks_per_sec, MSEC_PER_SEC);
return (int32_t)ceiling_fraction(ms_ticks_per_sec, MSEC_PER_SEC);
}
#endif
/* pend the specified thread: it must *not* be in the ready queue */
/* must be called with interrupts locked */

View file

@ -23,6 +23,12 @@
#include <wait_q.h>
#include <drivers/system_timer.h>
#ifdef CONFIG_SYS_CLOCK_EXISTS
#ifdef _NON_OPTIMIZED_TICKS_PER_SEC
#warning "non-optimized system clock frequency chosen: performance may suffer"
#endif
#endif
#ifdef CONFIG_SYS_CLOCK_EXISTS
int sys_clock_us_per_tick = 1000000 / sys_clock_ticks_per_sec;
int sys_clock_hw_cycles_per_tick =