tests: kernel: import floating point sharing tests to unified
This is the microkernel version of the FPU sharing test from legacy modified to use unified APIs directly. Jira: ZEP-932 Change-Id: I133a1466ea75201a97c2f8b83c3586fea0a19447 Signed-off-by: Jithu Joseph <jithu.joseph@intel.com> Signed-off-by: Anas Nashif <anas.nashif@intel.com>
This commit is contained in:
parent
d33c42a19d
commit
5800f16484
10 changed files with 981 additions and 0 deletions
4
tests/kernel/fp_sharing/Makefile
Normal file
4
tests/kernel/fp_sharing/Makefile
Normal file
|
@ -0,0 +1,4 @@
|
|||
BOARD ?= qemu_x86
|
||||
CONF_FILE = prj.conf
|
||||
|
||||
include ${ZEPHYR_BASE}/Makefile.test
|
66
tests/kernel/fp_sharing/README.txt
Normal file
66
tests/kernel/fp_sharing/README.txt
Normal file
|
@ -0,0 +1,66 @@
|
|||
Title: Shared Floating Point Support
|
||||
|
||||
Description:
|
||||
|
||||
This test uses two tasks to independently compute pi, while two other tasks
|
||||
load and store floating point registers and check for corruption. This tests
|
||||
the ability of tasks to safely share floating point hardware resources, even
|
||||
when switching occurs preemptively. (Note that both sets of tests run
|
||||
concurrently even though they report their progress at different times.)
|
||||
|
||||
The demonstration utilizes mutex APIs, timers, semaphores,
|
||||
round robin scheduling, and floating point support.
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
Building and Running Project:
|
||||
|
||||
This project outputs to the console. It can be built and executed
|
||||
on QEMU as follows:
|
||||
|
||||
make qemu
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
Troubleshooting:
|
||||
|
||||
Problems caused by out-dated project information can be addressed by
|
||||
issuing one of the following commands then rebuilding the project:
|
||||
|
||||
make clean # discard results of previous builds
|
||||
# but keep existing configuration info
|
||||
or
|
||||
make pristine # discard results of previous builds
|
||||
# and restore pre-defined configuration info
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
Advanced:
|
||||
|
||||
Depending upon the board's speed, the frequency of test output may range from
|
||||
every few seconds to every few minutes. The speed of the test can be controlled
|
||||
through the variable PI_NUM_ITERATIONS (default 700000). Lowering this value
|
||||
will increase the test's speed, but at the expense of the calculation's
|
||||
precision.
|
||||
|
||||
make qemu PI_NUM_ITERATIONS=100000
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
Sample Output:
|
||||
|
||||
***** BOOTING ZEPHYR OS vxxx - BUILD: Jan xxxx *****
|
||||
Floating point sharing tests started
|
||||
===================================================================
|
||||
Load and store OK after 100 (high) + 47119 (low) tests
|
||||
Pi calculation OK after 50 (high) + 2 (low) tests (computed 3.141594)
|
||||
Load and store OK after 200 (high) + 94186 (low) tests
|
||||
Load and store OK after 300 (high) + 142416 (low) tests
|
||||
Pi calculation OK after 150 (high) + 7 (low) tests (computed 3.141594)
|
||||
Load and store OK after 400 (high) + 190736 (low) tests
|
||||
Load and store OK after 500 (high) + 238618 (low) tests
|
||||
===================================================================
|
||||
PASS - load_store_high.
|
||||
===================================================================
|
||||
PROJECT EXECUTION SUCCESSFUL
|
||||
|
5
tests/kernel/fp_sharing/prj.conf
Normal file
5
tests/kernel/fp_sharing/prj.conf
Normal file
|
@ -0,0 +1,5 @@
|
|||
CONFIG_FLOAT=y
|
||||
CONFIG_SSE=y
|
||||
CONFIG_FP_SHARING=y
|
||||
CONFIG_SSE_FP_MATH=y
|
||||
CONFIG_STDOUT_CONSOLE=y
|
11
tests/kernel/fp_sharing/src/Makefile
Normal file
11
tests/kernel/fp_sharing/src/Makefile
Normal file
|
@ -0,0 +1,11 @@
|
|||
ccflags-y += -I${ZEPHYR_BASE}/tests/include
|
||||
|
||||
obj-y += main.o pi.o
|
||||
|
||||
# Some boards are significantly slower than others resulting in the test
|
||||
# output being in the range of every few seconds to every few minutes. To
|
||||
# compensate for this, one can control the number of iterations in the PI
|
||||
# calculation through PI_NUM_ITERATIONS. Lowering this value will increase
|
||||
# the speed of the test but it will come at the expense of precision.
|
||||
PI_NUM_ITERATIONS ?= 700000
|
||||
ccflags-y += "-DPI_NUM_ITERATIONS=${PI_NUM_ITERATIONS}"
|
120
tests/kernel/fp_sharing/src/float_context.h
Normal file
120
tests/kernel/fp_sharing/src/float_context.h
Normal file
|
@ -0,0 +1,120 @@
|
|||
/**
|
||||
* @file
|
||||
* @brief common definitions for the FPU sharing test application
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2011-2014 Wind River Systems, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#ifndef _FLOATCONTEXT_H
|
||||
#define _FLOATCONTEXT_H
|
||||
|
||||
/*
|
||||
* Each architecture must define the following structures (which may be empty):
|
||||
* 'struct fp_volatile_register_set'
|
||||
* 'struct fp_non_volatile_register_set'
|
||||
*
|
||||
* Each architecture must also define the following macros:
|
||||
* SIZEOF_FP_VOLATILE_REGISTER_SET
|
||||
* SIZEOF_FP_NON_VOLATILE_REGISTER_SET
|
||||
* Those macros are used as sizeof(<an empty structure>) is compiler specific;
|
||||
* that is, it may evaluate to a non-zero value.
|
||||
*
|
||||
* Each architecture shall also have custom implementations of:
|
||||
* _load_all_float_registers()
|
||||
* _load_then_store_all_float_registers()
|
||||
* _store_all_float_registers()
|
||||
*/
|
||||
|
||||
#if defined(CONFIG_ISA_IA32)
|
||||
|
||||
#define FP_OPTION 0
|
||||
|
||||
/*
|
||||
* In the future, the struct definitions may need to be refined based on the
|
||||
* specific IA-32 processor, but for now only the Pentium4 is supported:
|
||||
*
|
||||
* 8 x 80 bit floating point registers (ST[0] -> ST[7])
|
||||
* 8 x 128 bit XMM registers (XMM[0] -> XMM[7])
|
||||
*
|
||||
* All these registers are considered volatile across a function invocation.
|
||||
*/
|
||||
|
||||
struct fp_register {
|
||||
unsigned char reg[10];
|
||||
};
|
||||
|
||||
struct xmm_register {
|
||||
unsigned char reg[16];
|
||||
};
|
||||
|
||||
struct fp_volatile_register_set {
|
||||
struct xmm_register xmm[8]; /* XMM[0] -> XMM[7] */
|
||||
struct fp_register st[8]; /* ST[0] -> ST[7] */
|
||||
};
|
||||
|
||||
struct fp_non_volatile_register_set {
|
||||
/* No non-volatile floating point registers */
|
||||
};
|
||||
|
||||
#define SIZEOF_FP_VOLATILE_REGISTER_SET sizeof(struct fp_volatile_register_set)
|
||||
#define SIZEOF_FP_NON_VOLATILE_REGISTER_SET 0
|
||||
|
||||
#elif defined(CONFIG_CPU_CORTEX_M4)
|
||||
|
||||
#define FP_OPTION 0
|
||||
|
||||
/*
|
||||
* Registers s0..s15 are volatile and do not
|
||||
* need to be preserved across function calls.
|
||||
*/
|
||||
struct fp_volatile_register_set {
|
||||
float s[16];
|
||||
};
|
||||
|
||||
/*
|
||||
* Registers s16..s31 are non-volatile and
|
||||
* need to be preserved across function calls.
|
||||
*/
|
||||
struct fp_non_volatile_register_set {
|
||||
float s[16];
|
||||
};
|
||||
|
||||
#define SIZEOF_FP_VOLATILE_REGISTER_SET \
|
||||
sizeof(struct fp_volatile_register_set)
|
||||
#define SIZEOF_FP_NON_VOLATILE_REGISTER_SET \
|
||||
sizeof(struct fp_non_volatile_register_set)
|
||||
|
||||
#else
|
||||
|
||||
#error "Architecture must provide the following definitions:\n"
|
||||
"\t'struct fp_volatile_registers'\n"
|
||||
"\t'struct fp_non_volatile_registers'\n"
|
||||
"\t'SIZEOF_FP_VOLATILE_REGISTER_SET'\n"
|
||||
"\t'SIZEOF_FP_NON_VOLATILE_REGISTER_SET'\n"
|
||||
#endif /* CONFIG_ISA_IA32 */
|
||||
|
||||
/* the set of ALL floating point registers */
|
||||
|
||||
struct fp_register_set {
|
||||
struct fp_volatile_register_set fp_volatile;
|
||||
struct fp_non_volatile_register_set fp_non_volatile;
|
||||
};
|
||||
|
||||
#define SIZEOF_FP_REGISTER_SET \
|
||||
(SIZEOF_FP_VOLATILE_REGISTER_SET + SIZEOF_FP_NON_VOLATILE_REGISTER_SET)
|
||||
|
||||
/*
|
||||
* The following constants define the initial byte value used by the background
|
||||
* task, and the fiber when loading up the floating point registers.
|
||||
*/
|
||||
|
||||
#define MAIN_FLOAT_REG_CHECK_BYTE ((unsigned char)0xe5)
|
||||
#define FIBER_FLOAT_REG_CHECK_BYTE ((unsigned char)0xf9)
|
||||
|
||||
extern int fpu_sharing_error;
|
||||
|
||||
#endif /* _FLOATCONTEXT_H */
|
91
tests/kernel/fp_sharing/src/float_regs_arm_gcc.h
Normal file
91
tests/kernel/fp_sharing/src/float_regs_arm_gcc.h
Normal file
|
@ -0,0 +1,91 @@
|
|||
/**
|
||||
* @file
|
||||
* @brief ARM Cortex-M4 GCC specific floating point register macros
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2016, Wind River Systems, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#ifndef _FLOAT_REGS_ARM_GCC_H
|
||||
#define _FLOAT_REGS_ARM_GCC_H
|
||||
|
||||
#if !defined(__GNUC__) || !defined(CONFIG_CPU_CORTEX_M4)
|
||||
#error __FILE__ goes only with Cortex-M4 GCC
|
||||
#endif
|
||||
|
||||
#include <toolchain.h>
|
||||
#include "float_context.h"
|
||||
|
||||
/**
|
||||
*
|
||||
* @brief Load all floating point registers
|
||||
*
|
||||
* This function loads ALL floating point registers pointed to by @a regs.
|
||||
* It is expected that a subsequent call to _store_all_float_registers()
|
||||
* will be issued to dump the floating point registers to memory.
|
||||
*
|
||||
* The format/organization of 'struct fp_register_set'; the generic C test
|
||||
* code (main.c) merely treat the register set as an array of bytes.
|
||||
*
|
||||
* The only requirement is that the arch specific implementations of
|
||||
* _load_all_float_registers() and _store_all_float_registers() agree
|
||||
* on the format.
|
||||
*
|
||||
* @return N/A
|
||||
*/
|
||||
|
||||
static inline void _load_all_float_registers(struct fp_register_set *regs)
|
||||
{
|
||||
__asm__ volatile (
|
||||
"vldmia %0, {s0-s15};\n\t"
|
||||
"vldmia %1, {s16-s31};\n\t"
|
||||
:: "r" (®s->fp_volatile), "r" (®s->fp_non_volatile)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @brief Dump all floating point registers to memory
|
||||
*
|
||||
* This function stores ALL floating point registers to the memory buffer
|
||||
* specified by @a regs. It is expected that a previous invocation of
|
||||
* _load_all_float_registers() occurred to load all the floating point
|
||||
* registers from a memory buffer.
|
||||
*
|
||||
* @return N/A
|
||||
*/
|
||||
|
||||
static inline void _store_all_float_registers(struct fp_register_set *regs)
|
||||
{
|
||||
__asm__ volatile (
|
||||
"vstmia %0, {s0-s15};\n\t"
|
||||
"vstmia %1, {s16-s31};\n\t"
|
||||
:: "r" (®s->fp_volatile), "r" (®s->fp_non_volatile)
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @brief Load then dump all float registers to memory
|
||||
*
|
||||
* This function loads ALL floating point registers from the memory buffer
|
||||
* specified by @a regs, and then stores them back to that buffer.
|
||||
*
|
||||
* This routine is called by a high priority thread prior to calling a primitive
|
||||
* that pends and triggers a co-operative context switch to a low priority
|
||||
* thread.
|
||||
*
|
||||
* @return N/A
|
||||
*/
|
||||
|
||||
static inline void _load_then_store_all_float_registers(struct fp_register_set
|
||||
*regs)
|
||||
{
|
||||
_load_all_float_registers(regs);
|
||||
_store_all_float_registers(regs);
|
||||
}
|
||||
#endif /* _FLOAT_REGS_ARM_GCC_H */
|
158
tests/kernel/fp_sharing/src/float_regs_x86_gcc.h
Normal file
158
tests/kernel/fp_sharing/src/float_regs_x86_gcc.h
Normal file
|
@ -0,0 +1,158 @@
|
|||
/**
|
||||
* @file
|
||||
* @brief Intel x86 GCC specific floating point register macros
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2015, Wind River Systems, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#ifndef _FLOAT_REGS_X86_GCC_H
|
||||
#define _FLOAT_REGS_X86_GCC_H
|
||||
|
||||
#if !defined(__GNUC__) || !defined(CONFIG_ISA_IA32)
|
||||
#error __FILE__ goes only with x86 GCC
|
||||
#endif
|
||||
|
||||
#include <toolchain.h>
|
||||
#include "float_context.h"
|
||||
|
||||
/**
|
||||
*
|
||||
* @brief Load all floating point registers
|
||||
*
|
||||
* This function loads ALL floating point registers pointed to by @a regs.
|
||||
* It is expected that a subsequent call to _store_all_float_registers()
|
||||
* will be issued to dump the floating point registers to memory.
|
||||
*
|
||||
* The format/organization of 'struct fp_register_set'; the generic C test
|
||||
* code (main.c) merely treat the register set as an array of bytes.
|
||||
*
|
||||
* The only requirement is that the arch specific implementations of
|
||||
* _load_all_float_registers(), _store_all_float_registers() and
|
||||
* _load_then_store_all_float_registers() agree on the format.
|
||||
*
|
||||
* @return N/A
|
||||
*/
|
||||
|
||||
static inline void _load_all_float_registers(struct fp_register_set *regs)
|
||||
{
|
||||
__asm__ volatile (
|
||||
"movdqu 0(%0), %%xmm0\n\t;"
|
||||
"movdqu 16(%0), %%xmm1\n\t;"
|
||||
"movdqu 32(%0), %%xmm2\n\t;"
|
||||
"movdqu 48(%0), %%xmm3\n\t;"
|
||||
"movdqu 64(%0), %%xmm4\n\t;"
|
||||
"movdqu 80(%0), %%xmm5\n\t;"
|
||||
"movdqu 96(%0), %%xmm6\n\t;"
|
||||
"movdqu 112(%0), %%xmm7\n\t;"
|
||||
|
||||
"fldt 128(%0)\n\t;"
|
||||
"fldt 138(%0)\n\t;"
|
||||
"fldt 148(%0)\n\t;"
|
||||
"fldt 158(%0)\n\t;"
|
||||
"fldt 168(%0)\n\t;"
|
||||
"fldt 178(%0)\n\t;"
|
||||
"fldt 188(%0)\n\t;"
|
||||
"fldt 198(%0)\n\t;"
|
||||
|
||||
:: "r" (regs)
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @brief Load then dump all float registers to memory
|
||||
*
|
||||
* This function loads ALL floating point registers from the memory buffer
|
||||
* specified by @a regs, and then stores them back to that buffer.
|
||||
*
|
||||
* This routine is called by a high priority thread prior to calling a primitive
|
||||
* that pends and triggers a co-operative context switch to a low priority
|
||||
* thread. Because the kernel doesn't save floating point context for
|
||||
* co-operative context switches, the x87 FPU register stack must be put back
|
||||
* in an empty state before the switch occurs in case the next task to perform
|
||||
* floating point operations was also co-operatively switched out and simply
|
||||
* inherits the existing x87 FPU state (expecting the stack to be empty).
|
||||
*
|
||||
* @return N/A
|
||||
*/
|
||||
|
||||
static inline void
|
||||
_load_then_store_all_float_registers(struct fp_register_set *regs)
|
||||
{
|
||||
__asm__ volatile (
|
||||
"movdqu 0(%0), %%xmm0\n\t;"
|
||||
"movdqu 16(%0), %%xmm1\n\t;"
|
||||
"movdqu 32(%0), %%xmm2\n\t;"
|
||||
"movdqu 48(%0), %%xmm3\n\t;"
|
||||
"movdqu 64(%0), %%xmm4\n\t;"
|
||||
"movdqu 80(%0), %%xmm5\n\t;"
|
||||
"movdqu 96(%0), %%xmm6\n\t;"
|
||||
"movdqu 112(%0), %%xmm7\n\t;"
|
||||
|
||||
"fldt 128(%0)\n\t;"
|
||||
"fldt 138(%0)\n\t;"
|
||||
"fldt 148(%0)\n\t;"
|
||||
"fldt 158(%0)\n\t;"
|
||||
"fldt 168(%0)\n\t;"
|
||||
"fldt 178(%0)\n\t;"
|
||||
"fldt 188(%0)\n\t;"
|
||||
"fldt 198(%0)\n\t;"
|
||||
|
||||
/* pop the x87 FPU registers back to memory */
|
||||
|
||||
"fstpt 198(%0)\n\t;"
|
||||
"fstpt 188(%0)\n\t;"
|
||||
"fstpt 178(%0)\n\t;"
|
||||
"fstpt 168(%0)\n\t;"
|
||||
"fstpt 158(%0)\n\t;"
|
||||
"fstpt 148(%0)\n\t;"
|
||||
"fstpt 138(%0)\n\t;"
|
||||
"fstpt 128(%0)\n\t;"
|
||||
|
||||
:: "r" (regs)
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @brief Dump all floating point registers to memory
|
||||
*
|
||||
* This function stores ALL floating point registers to the memory buffer
|
||||
* specified by @a regs. It is expected that a previous invocation of
|
||||
* _load_all_float_registers() occurred to load all the floating point
|
||||
* registers from a memory buffer.
|
||||
*
|
||||
* @return N/A
|
||||
*/
|
||||
|
||||
static inline void _store_all_float_registers(struct fp_register_set *regs)
|
||||
{
|
||||
__asm__ volatile (
|
||||
"movdqu %%xmm0, 0(%0)\n\t;"
|
||||
"movdqu %%xmm1, 16(%0)\n\t;"
|
||||
"movdqu %%xmm2, 32(%0)\n\t;"
|
||||
"movdqu %%xmm3, 48(%0)\n\t;"
|
||||
"movdqu %%xmm4, 64(%0)\n\t;"
|
||||
"movdqu %%xmm5, 80(%0)\n\t;"
|
||||
"movdqu %%xmm6, 96(%0)\n\t;"
|
||||
"movdqu %%xmm7, 112(%0)\n\t;"
|
||||
|
||||
"fstpt 198(%0)\n\t;"
|
||||
"fstpt 188(%0)\n\t;"
|
||||
"fstpt 178(%0)\n\t;"
|
||||
"fstpt 168(%0)\n\t;"
|
||||
"fstpt 158(%0)\n\t;"
|
||||
"fstpt 148(%0)\n\t;"
|
||||
"fstpt 138(%0)\n\t;"
|
||||
"fstpt 128(%0)\n\t;"
|
||||
|
||||
:: "r" (regs) : "memory"
|
||||
);
|
||||
}
|
||||
#endif /* _FLOAT_REGS_X86_GCC_H */
|
353
tests/kernel/fp_sharing/src/main.c
Normal file
353
tests/kernel/fp_sharing/src/main.c
Normal file
|
@ -0,0 +1,353 @@
|
|||
/* main.c - load/store portion of FPU sharing test */
|
||||
|
||||
/*
|
||||
* Copyright (c) 2011-2014 Wind River Systems, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
/*
|
||||
* DESCRIPTION
|
||||
* This module implements the load/store portion of the FPU sharing test. This
|
||||
* version of this test utilizes a pair of tasks.
|
||||
*
|
||||
* The load/store test validates the floating point unit context
|
||||
* save/restore mechanism. This test utilizes a pair of threads of different
|
||||
* priorities that each use the floating point registers. The context
|
||||
* switching that occurs exercises the kernel's ability to properly preserve the
|
||||
* floating point registers. The test also exercises the kernel's ability to
|
||||
* automatically enable floating point support for a task, if supported.
|
||||
*
|
||||
* FUTURE IMPROVEMENTS
|
||||
* On architectures where the non-integer capabilities are provided in a
|
||||
* hierarchy, for example on IA-32 the USE_FP and USE_SSE options are provided,
|
||||
* this test should be enhanced to ensure that the architectures' _Swap()
|
||||
* routine doesn't context switch more registers that it needs to (which would
|
||||
* represent a performance issue). For example, on the IA-32, the test should
|
||||
* issue a fiber_fp_disable() from main(), and then indicate that only x87 FPU
|
||||
* registers will be utilized (fiber_fp_enable()). The fiber should continue
|
||||
* to load ALL non-integer registers, but main() should validate that only the
|
||||
* x87 FPU registers are being saved/restored.
|
||||
*/
|
||||
|
||||
#ifndef CONFIG_FLOAT
|
||||
#error Rebuild with the FLOAT config option enabled
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_FP_SHARING
|
||||
#error Rebuild with the FP_SHARING config option enabled
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_ISA_IA32)
|
||||
#ifndef CONFIG_SSE
|
||||
#error Rebuild with the SSE config option enabled
|
||||
#endif
|
||||
#endif /* CONFIG_ISA_IA32 */
|
||||
|
||||
#include <zephyr.h>
|
||||
|
||||
#if defined(CONFIG_ISA_IA32)
|
||||
#if defined(__GNUC__)
|
||||
#include <float_regs_x86_gcc.h>
|
||||
#else
|
||||
#include <float_regs_x86_other.h>
|
||||
#endif /* __GNUC__ */
|
||||
#elif defined(CONFIG_CPU_CORTEX_M4)
|
||||
#if defined(__GNUC__)
|
||||
#include <float_regs_arm_gcc.h>
|
||||
#else
|
||||
#include <float_regs_arm_other.h>
|
||||
#endif /* __GNUC__ */
|
||||
#endif
|
||||
|
||||
#include <arch/cpu.h>
|
||||
#include <tc_util.h>
|
||||
#include "float_context.h"
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
#define MAX_TESTS 500
|
||||
#define STACKSIZE 2048
|
||||
#define HI_PRI 5
|
||||
#define LO_PRI 10
|
||||
|
||||
/* space for float register load/store area used by low priority task */
|
||||
|
||||
static struct fp_register_set float_reg_set_load;
|
||||
static struct fp_register_set float_reg_set_store;
|
||||
|
||||
/* space for float register load/store area used by high priority thread */
|
||||
|
||||
static struct fp_register_set float_reg_set;
|
||||
|
||||
|
||||
/* flag indicating that an error has occurred */
|
||||
|
||||
int fpu_sharing_error;
|
||||
|
||||
/*
|
||||
* Test counters are "volatile" because GCC may not update them properly
|
||||
* otherwise. (See description of pi calculation test for more details.)
|
||||
*/
|
||||
|
||||
static volatile unsigned int load_store_low_count;
|
||||
static volatile unsigned int load_store_high_count;
|
||||
|
||||
extern uint32_t _tick_get_32(void);
|
||||
extern void calculate_pi_low(void);
|
||||
extern void calculate_pi_high(void);
|
||||
|
||||
/**
|
||||
*
|
||||
* @brief Low priority FPU load/store thread
|
||||
*
|
||||
* @return N/A
|
||||
*/
|
||||
|
||||
void load_store_low(void)
|
||||
{
|
||||
unsigned int i;
|
||||
unsigned char init_byte;
|
||||
unsigned char *store_ptr = (unsigned char *)&float_reg_set_store;
|
||||
unsigned char *load_ptr = (unsigned char *)&float_reg_set_load;
|
||||
|
||||
volatile char volatile_stack_var = 0;
|
||||
|
||||
PRINT_DATA("Floating point sharing tests started\n");
|
||||
PRINT_LINE;
|
||||
|
||||
/*
|
||||
* The high priority thread has a sleep to get this (low pri) thread
|
||||
* running and here (low priority) we enable slicing and waste cycles
|
||||
* to run hi pri thread in between fp ops.
|
||||
*
|
||||
* Enable round robin scheduling to allow both the low priority pi
|
||||
* computation and load/store tasks to execute. The high priority pi
|
||||
* computation and load/store tasks will preempt the low priority tasks
|
||||
* periodically.
|
||||
*/
|
||||
|
||||
k_sched_time_slice_set(10, LO_PRI);
|
||||
|
||||
/*
|
||||
* Initialize floating point load buffer to known values;
|
||||
* these values must be different than the value used in other threads.
|
||||
*/
|
||||
|
||||
init_byte = MAIN_FLOAT_REG_CHECK_BYTE;
|
||||
for (i = 0; i < SIZEOF_FP_REGISTER_SET; i++) {
|
||||
load_ptr[i] = init_byte++;
|
||||
}
|
||||
|
||||
/* Keep cranking forever, or until an error is detected. */
|
||||
|
||||
for (load_store_low_count = 0; ; load_store_low_count++) {
|
||||
|
||||
/*
|
||||
* Clear store buffer to erase all traces of any previous
|
||||
* floating point values that have been saved.
|
||||
*/
|
||||
|
||||
memset(&float_reg_set_store, 0, SIZEOF_FP_REGISTER_SET);
|
||||
|
||||
/*
|
||||
* Utilize an architecture specific function to load all the
|
||||
* floating point registers with known values.
|
||||
*/
|
||||
|
||||
_load_all_float_registers(&float_reg_set_load);
|
||||
|
||||
/*
|
||||
* Waste some cycles to give the high priority load/store
|
||||
* thread an opportunity to run when the low priority thread is
|
||||
* using the floating point registers.
|
||||
*
|
||||
* IMPORTANT: This logic requires that sys_tick_get_32() not
|
||||
* perform any floating point operations!
|
||||
*/
|
||||
|
||||
while ((_tick_get_32() % 5) != 0) {
|
||||
/*
|
||||
* Use a volatile variable to prevent compiler
|
||||
* optimizing out the spin loop.
|
||||
*/
|
||||
++volatile_stack_var;
|
||||
}
|
||||
|
||||
/*
|
||||
* Utilize an architecture specific function to dump the
|
||||
* contents of all floating point registers to memory.
|
||||
*/
|
||||
|
||||
_store_all_float_registers(&float_reg_set_store);
|
||||
|
||||
/*
|
||||
* Compare each byte of buffer to ensure the expected value is
|
||||
* present, indicating that the floating point registers weren't
|
||||
* impacted by the operation of the high priority thread(s).
|
||||
*
|
||||
* Display error message and terminate if discrepancies are
|
||||
* detected.
|
||||
*/
|
||||
|
||||
init_byte = MAIN_FLOAT_REG_CHECK_BYTE;
|
||||
|
||||
for (i = 0; i < SIZEOF_FP_REGISTER_SET; i++) {
|
||||
if (store_ptr[i] != init_byte) {
|
||||
TC_ERROR("load_store_low found 0x%x instead "
|
||||
"of 0x%x @ offset 0x%x\n",
|
||||
store_ptr[i],
|
||||
init_byte, i);
|
||||
TC_ERROR("Discrepancy found during "
|
||||
"iteration %d\n",
|
||||
load_store_low_count);
|
||||
fpu_sharing_error = 1;
|
||||
}
|
||||
init_byte++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Terminate if a test error has been reported.
|
||||
*/
|
||||
|
||||
if (fpu_sharing_error) {
|
||||
TC_END_RESULT(TC_FAIL);
|
||||
TC_END_REPORT(TC_FAIL);
|
||||
return;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_ISA_IA32)
|
||||
/*
|
||||
* After every 1000 iterations (arbitrarily chosen), explicitly
|
||||
* disable floating point operations for the task. The
|
||||
* subsequent execution of _load_all_float_registers() will
|
||||
* result in an exception to automatically re-enable
|
||||
* floating point support for the task.
|
||||
*
|
||||
* The purpose of this part of the test is to exercise the
|
||||
* k_float_disable() API, and to also continue exercising
|
||||
* the (exception based) floating enabling mechanism.
|
||||
*/
|
||||
if ((load_store_low_count % 1000) == 0) {
|
||||
k_float_disable(k_current_get());
|
||||
}
|
||||
#elif defined(CONFIG_CPU_CORTEX_M4)
|
||||
/*
|
||||
* The routine k_float_disable() allows for thread-level
|
||||
* granularity for disabling floating point. Furthermore, it
|
||||
* is useful for testing on the fly thread enabling of floating
|
||||
* point. Neither of these capabilities are currently supported
|
||||
* for ARM.
|
||||
*/
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @brief High priority FPU load/store thread
|
||||
*
|
||||
* @return N/A
|
||||
*/
|
||||
|
||||
void load_store_high(void)
|
||||
{
|
||||
unsigned int i;
|
||||
unsigned char init_byte;
|
||||
unsigned char *reg_set_ptr = (unsigned char *)&float_reg_set;
|
||||
|
||||
/* test until the specified time limit, or until an error is detected */
|
||||
|
||||
while (1) {
|
||||
/*
|
||||
* Initialize the float_reg_set structure by treating it as
|
||||
* a simple array of bytes (the arrangement and actual number
|
||||
* of registers is not important for this generic C code). The
|
||||
* structure is initialized by using the byte value specified
|
||||
* by the constant FIBER_FLOAT_REG_CHECK_BYTE, and then
|
||||
* incrementing the value for each successive location in the
|
||||
* float_reg_set structure.
|
||||
*
|
||||
* The initial byte value, and thus the contents of the entire
|
||||
* float_reg_set structure, must be different for each
|
||||
* thread to effectively test the nanokernel's ability to
|
||||
* properly save/restore the floating point values during a
|
||||
* context switch.
|
||||
*/
|
||||
|
||||
init_byte = FIBER_FLOAT_REG_CHECK_BYTE;
|
||||
|
||||
for (i = 0; i < SIZEOF_FP_REGISTER_SET; i++) {
|
||||
reg_set_ptr[i] = init_byte++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Utilize an architecture specific function to load all the
|
||||
* floating point registers with the contents of the
|
||||
* float_reg_set structure.
|
||||
*
|
||||
* The goal of the loading all floating point registers with
|
||||
* values that differ from the values used in other threads is
|
||||
* to help determine whether the floating point register
|
||||
* save/restore mechanism in the nanokernel's context switcher
|
||||
* is operating correctly.
|
||||
*
|
||||
* When a subsequent nano_fiber_timer_test() invocation is
|
||||
* performed, a (cooperative) context switch back to the
|
||||
* preempted task will occur. This context switch should result
|
||||
* in restoring the state of the task's floating point
|
||||
* registers when the task was swapped out due to the
|
||||
* occurrence of the timer tick.
|
||||
*/
|
||||
|
||||
_load_then_store_all_float_registers(&float_reg_set);
|
||||
|
||||
/*
|
||||
* Relinquish the processor for the remainder of the current
|
||||
* system clock tick, so that lower priority threads get a
|
||||
* chance to run.
|
||||
*
|
||||
* This exercises the ability of the nanokernel to restore the
|
||||
* FPU state of a low priority thread _and_ the ability of the
|
||||
* nanokernel to provide a "clean" FPU state to this thread
|
||||
* once the sleep ends.
|
||||
*/
|
||||
|
||||
k_sleep(1);
|
||||
|
||||
/* periodically issue progress report */
|
||||
|
||||
if ((++load_store_high_count % 100) == 0) {
|
||||
PRINT_DATA("Load and store OK after %u (high) "
|
||||
"+ %u (low) tests\n",
|
||||
load_store_high_count,
|
||||
load_store_low_count);
|
||||
}
|
||||
|
||||
/* terminate testing if specified limit has been reached */
|
||||
|
||||
if (load_store_high_count == MAX_TESTS) {
|
||||
TC_END_RESULT(TC_PASS);
|
||||
TC_END_REPORT(TC_PASS);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(CONFIG_ISA_IA32)
|
||||
#define THREAD_FP_FLAGS (K_FP_REGS | K_SSE_REGS)
|
||||
#else
|
||||
#define THREAD_FP_FLAGS (K_FP_REGS)
|
||||
#endif
|
||||
|
||||
K_THREAD_DEFINE(load_low, STACKSIZE, load_store_low, NULL, NULL, NULL,
|
||||
LO_PRI, THREAD_FP_FLAGS, K_NO_WAIT);
|
||||
|
||||
K_THREAD_DEFINE(load_high, STACKSIZE, load_store_high, NULL, NULL, NULL,
|
||||
HI_PRI, THREAD_FP_FLAGS, K_NO_WAIT);
|
||||
|
||||
K_THREAD_DEFINE(pi_low, STACKSIZE, calculate_pi_low, NULL, NULL, NULL,
|
||||
LO_PRI, THREAD_FP_FLAGS, K_NO_WAIT);
|
||||
|
||||
K_THREAD_DEFINE(pi_high, STACKSIZE, calculate_pi_high, NULL, NULL, NULL,
|
||||
HI_PRI, THREAD_FP_FLAGS, K_NO_WAIT);
|
158
tests/kernel/fp_sharing/src/pi.c
Normal file
158
tests/kernel/fp_sharing/src/pi.c
Normal file
|
@ -0,0 +1,158 @@
|
|||
/* pi.c - pi computation portion of FPU sharing test */
|
||||
|
||||
/*
|
||||
* Copyright (c) 2011-2014 Wind River Systems, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
/*
|
||||
* DESCRIPTION
|
||||
* This module is used for the FPU sharing test, and supplements the basic
|
||||
* load/store test by incorporating two additional threads that utilize the
|
||||
* floating point unit.
|
||||
*
|
||||
* Testing utilizes a pair of tasks that independently compute pi. The lower
|
||||
* priority task is regularly preempted by the higher priority task, thereby
|
||||
* testing whether floating point context information is properly preserved.
|
||||
*
|
||||
* The following formula is used to compute pi:
|
||||
*
|
||||
* pi = 4 * (1 - 1/3 + 1/5 - 1/7 + 1/9 - ... )
|
||||
*
|
||||
* This series converges to pi very slowly. For example, performing 50,000
|
||||
* iterations results in an accuracy of 3 decimal places.
|
||||
*
|
||||
* A reference value of pi is computed once at the start of the test. All
|
||||
* subsequent computations must produce the same value, otherwise an error
|
||||
* has occurred.
|
||||
*/
|
||||
|
||||
#include <zephyr.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <tc_util.h>
|
||||
|
||||
#include <float_context.h>
|
||||
|
||||
/*
|
||||
* PI_NUM_ITERATIONS: This macro is defined in the project's Makefile and
|
||||
* is configurable from the command line.
|
||||
*/
|
||||
|
||||
static double reference_pi = 0.0f;
|
||||
|
||||
/*
|
||||
* Test counters are "volatile" because GCC wasn't properly updating
|
||||
* calc_pi_low_count properly when calculate_pi_low() contained a "return"
|
||||
* in its error handling logic -- the value was incremented in a register,
|
||||
* but never written back to memory. (Seems to be a compiler bug!)
|
||||
*/
|
||||
|
||||
static volatile unsigned int calc_pi_low_count;
|
||||
static volatile unsigned int calc_pi_high_count;
|
||||
|
||||
/**
|
||||
*
|
||||
* @brief Entry point for the low priority pi compute task
|
||||
*
|
||||
* @return N/A
|
||||
*/
|
||||
|
||||
void calculate_pi_low(void)
|
||||
{
|
||||
volatile double pi; /* volatile to avoid optimizing out of loop */
|
||||
double divisor = 3.0;
|
||||
double sign = -1.0;
|
||||
unsigned int ix;
|
||||
|
||||
/* loop forever, unless an error is detected */
|
||||
|
||||
while (1) {
|
||||
|
||||
sign = -1.0;
|
||||
pi = 1.0;
|
||||
divisor = 3.0;
|
||||
|
||||
for (ix = 0; ix < PI_NUM_ITERATIONS; ix++) {
|
||||
pi += sign / divisor;
|
||||
divisor += 2.0;
|
||||
sign *= -1.0;
|
||||
}
|
||||
|
||||
pi *= 4;
|
||||
|
||||
if (reference_pi == 0.0f) {
|
||||
reference_pi = pi;
|
||||
} else if (reference_pi != pi) {
|
||||
TC_ERROR("Computed pi %1.6f, reference pi %1.6f\n",
|
||||
pi, reference_pi);
|
||||
fpu_sharing_error = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
++calc_pi_low_count;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @brief Entry point for the high priority pi compute task
|
||||
*
|
||||
* @return N/A
|
||||
*/
|
||||
|
||||
void calculate_pi_high(void)
|
||||
{
|
||||
volatile double pi; /* volatile to avoid optimizing out of loop */
|
||||
double divisor = 3.0;
|
||||
double sign = -1.0;
|
||||
unsigned int ix;
|
||||
|
||||
/* loop forever, unless an error is detected */
|
||||
|
||||
while (1) {
|
||||
|
||||
sign = -1.0;
|
||||
pi = 1.0;
|
||||
divisor = 3.0;
|
||||
|
||||
for (ix = 0; ix < PI_NUM_ITERATIONS; ix++) {
|
||||
pi += sign / divisor;
|
||||
divisor += 2.0;
|
||||
sign *= -1.0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Relinquish the processor for the remainder of the current
|
||||
* system clock tick, so that lower priority threads get a
|
||||
* chance to run.
|
||||
*
|
||||
* This exercises the ability of the nanokernel to restore the
|
||||
* FPU state of a low priority thread _and_ the ability of the
|
||||
* nanokernel to provide a "clean" FPU state to this thread
|
||||
* once the sleep ends.
|
||||
*/
|
||||
|
||||
k_sleep(10);
|
||||
|
||||
pi *= 4;
|
||||
|
||||
if (reference_pi == 0.0f) {
|
||||
reference_pi = pi;
|
||||
} else if (reference_pi != pi) {
|
||||
TC_ERROR("Computed pi %1.6f, reference pi %1.6f\n",
|
||||
pi, reference_pi);
|
||||
fpu_sharing_error = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
/* periodically issue progress report */
|
||||
|
||||
if ((++calc_pi_high_count % 100) == 50) {
|
||||
PRINT_DATA("Pi calculation OK after %u (high) +"
|
||||
" %u (low) tests (computed %1.6f)\n",
|
||||
calc_pi_high_count, calc_pi_low_count, pi);
|
||||
}
|
||||
}
|
||||
}
|
15
tests/kernel/fp_sharing/testcase.ini
Normal file
15
tests/kernel/fp_sharing/testcase.ini
Normal file
|
@ -0,0 +1,15 @@
|
|||
[test_x86]
|
||||
tags = core
|
||||
platform_whitelist = qemu_x86
|
||||
slow = true
|
||||
# One may expect this test to take about two or three minutes to finish
|
||||
# under normal circumstances. On a heavily loaded machine, extra time
|
||||
# may be required--hence the 10 minute timeout.
|
||||
timeout = 600
|
||||
|
||||
[test_arm]
|
||||
tags = core
|
||||
platform_whitelist = frdm_k64f
|
||||
slow = true
|
||||
extra_args = PI_NUM_ITERATIONS=70000
|
||||
timeout = 600
|
Loading…
Add table
Add a link
Reference in a new issue