kernel: Clean up of x86 floating point code

Updates x86 floating point support to reflect changes that have been made in recent months. * Many, many, many cosmetic changes (mostly revisions to comments). * Elimination of unnecessary function aliases that were needed to support the task and fiber versions of certain APIs. * Elimination of run-time code to enable a thread's "FP regs" option bit if the "SSE regs" option bit was set. The kernel now recognizes that the thread is using the FPU as long as either option bit is set. (If the thread has both option bits enabled this is the same as if only the "SSE regs" bit is set.) Change-Id: Ic12abc54b6fa78921749b546d8debf23e7ad232d Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
2016-11-07 08:55:13 -06:00 · 2016-11-07 08:55:13 -06:00 · bce8fbb61e
commit bce8fbb61e
parent d3e2b0255a
7 changed files with 211 additions and 297 deletions
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@ -88,12 +88,15 @@ config FLOAT
 	prompt "Floating point registers"
 	default n
 	help
-	This option allows tasks and fibers to use the floating point registers.
+	This option allows threads to use the x87 FPU/MMX registers. The
-	By default, only a single task or fiber may use the registers, and only
+	registers may be used by any number of cooperative threads or by
-	the x87 FPU/MMX registers may be used.
+	a single preemptible thread, but not both, since the kernel does not
 	preserve FPU context information when switching between threads.
 	Additional options must be enabled to permit the use of SSE registers or
 	to permit floating point register use by multiple preemptible threads.
-	Disabling this option means that any task or fiber that uses a
+	Disabling this option means that any thread that uses the floating
-	floating point register will get a fatal exception.
+	point registers will get a fatal exception.
 config FP_SHARING
 	bool
@ -101,12 +104,12 @@ config FP_SHARING
 	depends on FLOAT
 	default n
 	help
-	This option allows multiple tasks and fibers to use the floating point
+	This option allows multiple preemptible threads to use the floating
-	registers. Any task that uses the floating point registers must provide
+	point registers. Any preemptible thread that uses the registers must
-	stack space where the kernel can save these registers during context
+	provide stack space where the kernel can save FPU context info during
-	switches; a task that uses only the x87 FPU/MMX registers must provide
+	a preemptive context switch. A thread that uses only the x87 FPU/MMX
-	108 bytes of added stack space, while a task the uses the SSE registers
+	registers must provide 108 bytes of added stack space, while a thread
-	must provide 464 bytes of added stack space.
+	the uses the SSE registers must provide 464 bytes of added stack space.
 config SSE
 	bool
@ -114,7 +117,7 @@ config SSE
 	depends on FLOAT
 	default n
 	help
-	This option enables the use of SSE registers by tasks and fibers.
+	This option enables the use of SSE registers by threads.
 config SSE_FP_MATH
 	bool
--- a/arch/x86/core/float.c
+++ b/arch/x86/core/float.c
@ -16,64 +16,37 @@
 /**
 * @file
- * @brief Floating point resource sharing routines
+ * @brief Floating point register sharing routines
 *
- * This module allows multiple tasks and fibers to safely share the system's
+ * This module allows multiple preemptible threads to safely share the system's
- * floating point resources, by allowing the system to save FPU state
+ * floating point registers, by allowing the system to save FPU state info
- * information in a task or fiber's stack region when a pre-emptive context
+ * in a thread's stack region when a preemptive context switch occurs.
 * switch occurs.
 *
- * The floating point resource sharing mechanism is designed for minimal
+ * Note: If the kernel has been built without floating point register sharing
- * intrusiveness.  Floating point thread saving is only performed for tasks and
+ * support (CONFIG_FP_SHARING), the floating point registers can still be used
- * fibers that explicitly enable FP resource sharing, to avoid impacting the
+ * safely by one or more cooperative threads OR by a single preemptive thread,
- * stack size requirements of all other tasks and fibers.  For those tasks and
+ * but not by both.
- * fibers that do require FP resource sharing, a "lazy save/restore" mechanism
+ *
 * The floating point register sharing mechanism is designed for minimal
 * intrusiveness.  Floating point state saving is only performed for threads
 * that explicitly indicate they are using FPU registers, to avoid impacting
 * the stack size requirements of all other threads. Also, the SSE registers
 * are only saved for threads that actually used them. For those threads that
 * do require floating point state saving, a "lazy save/restore" mechanism
 * is employed so that the FPU's register sets are only switched in and out
 * when absolutely necessary; this avoids wasting effort preserving them when
 * there is no risk that they will be altered, or when there is no need to
 * preserve their contents.
 *
 * The following APIs are provided to allow floating point resource sharing to
 * be enabled or disabled at run-time:
 *
 * void fiber_float_enable  (nano_thread_id_t thread_id, unsigned int options)
 * void task_float_enable   (nano_thread_id_t thread_id, unsigned int options)
 * void fiber_float_disable (nano_thread_id_t thread_id)
 * void task_float_disable  (nano_thread_id_t thread_id)
 *
 * The 'options' parameter is used to specify what non-integer capabilities are
 * being used.  The same options accepted by fiber_fiber_start() are used in the
 * aforementioned APIs, namely K_FP_REGS and K_SSE_REGS.
 *
 * If the nanokernel has been built without SSE instruction support
 * (CONFIG_SSE), the system treats K_SSE_REGS as if it was K_FP_REGS.
 *
 * If the nanokernel has been built without floating point resource sharing
 * support (CONFIG_FP_SHARING), the aforementioned APIs and capabilities do not
 * exist.
 *
 * NOTE
 * It is possible for a single task or fiber to utilize floating instructions
 * _without_ enabling the FP resource sharing feature.  Since no other task or
 * fiber uses the FPU the FP registers won't change when the FP-capable task or
 * fiber isn't executing, meaning there is no need to save the registers.
 *
 * WARNING
 * The use of floating point instructions by ISRs is not supported by the
 * kernel.
 *
 * INTERNAL
- * If automatic enabling of floating point resource sharing _is not_ configured
+ * The kernel sets CR0[TS] to 0 only for threads that require FP register
- * the system leaves CR0[TS] = 0 for all tasks and fibers.  This means that any
+ * sharing. All other threads have CR0[TS] set to 1 so that an attempt
- * task or fiber can perform floating point operations at any time without
+ * to perform an FP operation will cause an exception, allowing the kernel
- * causing an exception, and the system won't stop a task or fiber that
+ * to enable FP register sharing on its behalf.
 * shouldn't be doing FP stuff from doing it.
 *
 * If automatic enabling of floating point resource sharing _is_ configured
 * the system leaves CR0[TS] = 0 only for tasks and fibers that are allowed to
 * perform FP operations.  All other tasks and fibers have CR0[TS] = 1 so that
 * an attempt to perform an FP operation will cause an exception, allowing the
 * system to enable FP resource sharing on its behalf.
 */
 #include <nano_private.h>
@ -84,110 +57,115 @@
 #ifdef CONFIG_FP_SHARING
-#if defined(CONFIG_SSE)
+/* SSE control/status register default value (used by assembler code) */
-extern uint32_t _sse_mxcsr_default_value; /* SSE control/status register default value */
+extern uint32_t _sse_mxcsr_default_value;
 #endif			/* CONFIG_SSE */
 /**
 *
- * @brief Save non-integer context information
+ * @brief Save a thread's floating point context information.
 *
- * This routine saves the system's "live" non-integer context into the
+ * This routine saves the system's "live" floating point context into the
- * specified TCS.  If the specified task or fiber supports SSE then
+ * specified thread control block. The SSE registers are saved only if the
- * x87/MMX/SSEx thread info is saved, otherwise only x87/MMX thread is saved.
+ * thread is actually using them.
 *
- * @param tcs TBD
+ * @param tcs Pointer to thread control block.
 *
 * @return N/A
 */
 static void _FpCtxSave(struct tcs *tcs)
 {
-	_do_fp_ctx_save(tcs->flags & K_SSE_REGS, &tcs->preempFloatReg);
+#ifdef CONFIG_SSE
 	if (tcs->flags & K_SSE_REGS) {
 		_do_fp_and_sse_regs_save(&tcs->preempFloatReg);
 		return;
 	}
 #endif
 	_do_fp_regs_save(&tcs->preempFloatReg);
 }
 /**
 *
- * @brief Initialize non-integer context information
+ * @brief Initialize a thread's floating point context information.
 *
- * This routine initializes the system's "live" non-integer context.
+ * This routine initializes the system's "live" floating point context.
 * The SSE registers are initialized only if the thread is actually using them.
 *
- * @param tcs TBD
+ * @param tcs Pointer to thread control block.
 *
 * @return N/A
 */
 static inline void _FpCtxInit(struct tcs *tcs)
 {
-	_do_fp_ctx_init(tcs->flags & K_SSE_REGS);
+	_do_fp_regs_init();
 #ifdef CONFIG_SSE
 	if (tcs->flags & K_SSE_REGS) {
 		_do_sse_regs_init();
 	}
 #endif
 }
 /**
 *
- * @brief Enable preservation of non-integer context information
+ * @brief Enable preservation of floating point context information.
 *
- * This routine allows the specified task/fiber (which may be the active
+ * This routine informs the kernel that the specified thread (which may be
- * task/fiber) to safely share the system's floating point registers with
+ * the current thread) will be using the floating point registers.
- * other tasks/fibers.  The <options> parameter indicates which floating point
+ * The @a options parameter indicates which floating point register sets
- * register sets will be used by the specified task/fiber:
+ * will be used by the specified thread:
 *
 *  a) K_FP_REGS  indicates x87 FPU and MMX registers only
- *  b) K_SSE_REGS indicates x87 FPU and MMX and SSEx registers
+ *  b) K_SSE_REGS indicates SSE registers (and also x87 FPU and MMX registers)
 *
- * Invoking this routine creates a floating point thread for the task/fiber
+ * Invoking this routine initializes the thread's floating point context info
- * that corresponds to an FPU that has been reset.  The system will thereafter
+ * to that of an FPU that has been reset. The next time the thread is scheduled
- * protect the task/fiber's FP context so that it is not altered during
+ * by _Swap() it will either inherit an FPU that is guaranteed to be in a "sane"
- * a pre-emptive context switch.
+ * state (if the most recent user of the FPU was cooperatively swapped out)
 * or the thread's own floating point context will be loaded (if the most
 * recent user of the FPU was pre-empted, or if this thread is the first user
 * of the FPU). Thereafter, the kernel will protect the thread's FP context
 * so that it is not altered during a preemptive context switch.
 *
- * WARNING
+ * @warning
 * This routine should only be used to enable floating point support for a
- * task/fiber that does not currently have such support enabled already.
+ * thread that does not currently have such support enabled already.
 *
- * @param tcs  TDB
+ * @param tcs Pointer to thread control block.
- * @param options set to either K_FP_REGS or K_SSE_REGS
+ * @param options Registers to be preserved (K_FP_REGS or K_SSE_REGS).
 *
 * @return N/A
 *
- * INTERNAL
+ * @internal
- * Since the transition from "non-FP supporting" to "FP supporting" must be done
+ * The transition from "non-FP supporting" to "FP supporting" must be done
- * atomically to avoid confusing the floating point logic used by _Swap(),
+ * atomically to avoid confusing the floating point logic used by _Swap(), so
- * this routine locks interrupts to ensure that a context switch does not occur,
+ * this routine locks interrupts to ensure that a context switch does not occur.
- * The locking isn't really needed when the routine is called by a fiber
+ * The locking isn't really needed when the routine is called by a cooperative
- * (since context switching can't occur), but it is harmless and allows a single
+ * thread (since context switching can't occur), but it is harmless.
 * routine to be called by both tasks and fibers (thus saving code space).
 *
 * If necessary, the interrupt latency impact of calling this routine from a
 * fiber could be lessened by re-designing things so that only task-type callers
 * locked interrupts (i.e. move the locking to task_float_enable()). However,
 * all calls to fiber_float_enable() would need to be reviewed to ensure they
 * are only used from a fiber, rather than from "generic" code used by both
 * tasks and fibers.
 */
-void _FpEnable(struct tcs *tcs, unsigned int options)
+void k_float_enable(struct tcs *tcs, unsigned int options)
 {
 	unsigned int imask;
 	struct tcs *fp_owner;
-	/* Lock interrupts to prevent a pre-emptive context switch from occuring
+	/* Ensure a preemptive context switch does not occur */
 	 */
 	imask = irq_lock();
-	/* Indicate task/fiber requires non-integer context saving */
+	/* Indicate thread requires floating point context saving */
-	tcs->flags |= options | K_FP_REGS;
+	tcs->flags |= options;
 	/*
-	 * Current task/fiber might not allow FP instructions, so clear CR0[TS]
+	 * The current thread might not allow FP instructions, so clear CR0[TS]
 	 * so we can use them. (CR0[TS] gets restored later on, if necessary.)
 	 */
 	__asm__ volatile("clts\n\t");
 	/*
-	 * Save the existing non-integer context (since it is about to change),
+	 * Save existing floating point context (since it is about to change),
 	 * but only if the FPU is "owned" by an FP-capable task that is
-	 * currently
+	 * currently handling an interrupt or exception (meaning its FP context
-	 * handling an interrupt or exception (meaning it's FP context must be
+	 * must be preserved).
 	 * preserved).
 	 */
 	fp_owner = _nanokernel.current_fp;
@ -201,15 +179,14 @@ void _FpEnable(struct tcs *tcs, unsigned int options)
 	_FpCtxInit(tcs);
-	/* Associate the new FP context with the specified task/fiber */
+	/* Associate the new FP context with the specified thread */
 	if (tcs == _nanokernel.current) {
 		/*
-		 * When enabling FP support for self, just claim ownership of
+		 * When enabling FP support for the current thread, just claim
-		 *the FPU
+		 * ownership of the FPU and leave CR0[TS] unset.
 		 * and leave CR0[TS] unset.
 		 *
-		 * (Note: the FP context is "live" in hardware, not saved in TCS.)
+		 * (The FP context is "live" in hardware, not saved in TCS.)
 		 */
 		_nanokernel.current_fp = tcs;
@ -219,13 +196,12 @@ void _FpEnable(struct tcs *tcs, unsigned int options)
 		 * of the FPU to them (unless we need it ourselves).
 		 */
-		if ((_nanokernel.current->flags & K_FP_REGS) != K_FP_REGS) {
+		if ((_nanokernel.current->flags & _FP_USER_MASK) == 0) {
 			/*
 			 * We are not FP-capable, so mark FPU as owned by the
-			 * thread
+			 * thread we've just enabled FP support for, then
-			 * we've just enabled FP support for, then disable our
+			 * disable our own FP access by setting CR0[TS] back
-			 * own
+			 * to its original state.
 			 * FP access by setting CR0[TS] to its original state.
 			 */
 			_nanokernel.current_fp = tcs;
@ -233,24 +209,19 @@ void _FpEnable(struct tcs *tcs, unsigned int options)
 		} else {
 			/*
 			 * We are FP-capable (and thus had FPU ownership on
-			 *entry), so save
+			 * entry), so save the new FP context in their TCS,
-			 * the new FP context in their TCS, leave FPU ownership
+			 * leave FPU ownership with self, and leave CR0[TS]
-			 *with self,
+			 * unset.
 			 * and leave CR0[TS] unset.
 			 *
-			 * Note: The saved FP context is needed in case the task
+			 * The saved FP context is needed in case the thread
 			 *or fiber
 			 * we enabled FP support for is currently pre-empted,
-			 *since _Swap()
+			 * since _Swap() uses it to restore FP context when
-			 * uses it to restore FP context when the task/fiber
+			 * the thread re-activates.
 			 *re-activates.
 			 *
-			 * Note: Saving the FP context reinits the FPU, and thus
+			 * Saving the FP context reinits the FPU, and thus
-			 *our own
+			 * our own FP context, but that's OK since it didn't
-			 * FP context, but that's OK since it didn't need to be
+			 * need to be preserved. (i.e. We aren't currently
-			 *preserved.
+			 * handling an interrupt or exception.)
 			 * (i.e. We aren't currently handling an interrupt or
 			 *exception.)
 			 */
 			_FpCtxSave(tcs);
@ -262,64 +233,37 @@ void _FpEnable(struct tcs *tcs, unsigned int options)
 /**
 *
- * @brief Enable preservation of non-integer context information
+ * @brief Disable preservation of floating point context information.
 *
- * This routine allows a thread to permit any thread (including itself) to
+ * This routine informs the kernel that the specified thread (which may be
- * safely share the system's floating point registers with other threads.
+ * the current thread) will no longer be using the floating point registers.
 *
- * See the description of _FpEnable() for further details.
+ * @warning
 *
 * @return N/A
 */
 FUNC_ALIAS(_FpEnable, k_float_enable, void);
 /**
 *
 * @brief Disable preservation of non-integer context information
 *
 * This routine prevents the specified task/fiber (which may be the active
 * task/fiber) from safely sharing any of the system's floating point registers
 * with other tasks/fibers.
 *
 * WARNING
 * This routine should only be used to disable floating point support for
- * a task/fiber that currently has such support enabled.
+ * a thread that currently has such support enabled.
 *
- * @param tcs TBD
+ * @param tcs Pointer to thread control block.
 *
 * @return N/A
 *
- * INTERNAL
+ * @internal
- * Since the transition from "FP supporting" to "non-FP supporting" must be done
+ * The transition from "FP supporting" to "non-FP supporting" must be done
- * atomically to avoid confusing the floating point logic used by _Swap(),
+ * atomically to avoid confusing the floating point logic used by _Swap(), so
- * this routine locks interrupts to ensure that a context switch does not occur,
+ * this routine locks interrupts to ensure that a context switch does not occur.
- * The locking isn't really needed when the routine is called by a fiber
+ * The locking isn't really needed when the routine is called by a cooperative
- * (since context switching can't occur), but it is harmless and allows a single
+ * thread (since context switching can't occur), but it is harmless.
 * routine to be called by both tasks and fibers (thus saving code space).
 *
 * If necessary, the interrupt latency impact of calling this routine from a
 * fiber could be lessened by re-designing things so that only task-type callers
 * locked interrupts (i.e. move the locking to task_float_disable()). However,
 * all calls to fiber_float_disable() would need to be reviewed to ensure they
 * are only used from a fiber, rather than from "generic" code used by both
 * tasks and fibers.
 */
-void _FpDisable(struct tcs *tcs)
+void k_float_disable(struct tcs *tcs)
 {
 	unsigned int imask;
-	/* Lock interrupts to prevent a pre-emptive context switch from occuring
+	/* Ensure a preemptive context switch does not occur */
 	 */
 	imask = irq_lock();
-	/*
+	/* Disable all floating point capabilities for the thread */
 	 * Disable _all_ floating point capabilities for the task/fiber,
 	 * regardless
 	 * of the options specified at the time support was enabled.
 	 */
-	tcs->flags &= ~(K_FP_REGS | K_SSE_REGS);
+	tcs->flags &= ~_FP_USER_MASK;
 	if (tcs == _nanokernel.current) {
 		_FpAccessDisable();
@ -334,40 +278,21 @@ void _FpDisable(struct tcs *tcs)
 /**
 *
- * @brief Disable preservation of non-integer context information
+ * @brief Handler for "device not available" exception.
 *
 * This routine allows a thread to disallow any thread (including itself) from
 * safely sharing any of the system's floating point registers with other
 * threads.
 *
 * WARNING
 * This routine should only be used to disable floating point support for
 * a thread that currently has such support enabled.
 *
 * @return N/A
 */
 FUNC_ALIAS(_FpDisable, k_float_disable, void);
 /**
 *
 * @brief Handler for "device not available" exception
 *
 * This routine is registered to handle the "device not available" exception
- * (vector = 7)
+ * (vector = 7).
 *
 * The processor will generate this exception if any x87 FPU, MMX, or SSEx
- * instruction is executed while CR0[TS]=1.  The handler then enables the
+ * instruction is executed while CR0[TS]=1. The handler then enables the
- * current task or fiber with the K_FP_REGS option (or the K_SSE_REGS option
+ * current thread to use all supported floating point registers.
 * if the SSE configuration option has been enabled).
 *
- * @param pEsf this value is not used for this architecture
+ * @param pEsf This value is not used.
 *
 * @return N/A
 */
-void _FpNotAvailableExcHandler(NANO_ESF * pEsf)
+void _FpNotAvailableExcHandler(NANO_ESF *pEsf)
 {
 	unsigned int enableOption;
 	ARG_UNUSED(pEsf);
 	/*
@ -381,13 +306,7 @@ void _FpNotAvailableExcHandler(NANO_ESF * pEsf)
 	/* Enable highest level of FP capability configured into the kernel */
-#ifdef CONFIG_SSE
+	k_float_enable(_nanokernel.current, _FP_USER_MASK);
 	enableOption = K_SSE_REGS;
 #else
 	enableOption = K_FP_REGS;
 #endif
 	_FpEnable(_nanokernel.current, enableOption);
 }
 _EXCEPTION_CONNECT_NOCODE(_FpNotAvailableExcHandler, IV_DEVICE_NOT_AVAILABLE);
--- a/arch/x86/core/swap.S
+++ b/arch/x86/core/swap.S
@ -162,19 +162,18 @@ SECTION_FUNC(TEXT, _Swap)
 	/*
-	 * Determine whether the incoming thread utilizes non-integer
+	 * Determine whether the incoming thread utilizes floating point registers
-	 * capabilities _and_ whether the thread was context switched
+	 * _and_ whether the thread was context switched out preemptively.
 	 * out preemptively.
 	 */
-	testl	$K_FP_REGS, __tTCS_flags_OFFSET (%eax)
+	testl	$_FP_USER_MASK, __tTCS_flags_OFFSET (%eax)
 	je 	restoreContext_NoFloatSwap
 	/*
-	 * The incoming thread uses non-integer capabilities (x87 FPU and/or
+	 * The incoming thread uses floating point registers:
-	 * XMM regs): Was it the last thread to use non-integer capabilities?
+	 * Was it the last thread to use floating point registers?
-	 * If so, there there is no need to restore the non-integer context.
+	 * If so, there there is no need to restore the floating point context.
 	 */
 	movl	__tNANO_current_fp_OFFSET (%edi), %ebx
@ -183,10 +182,10 @@ SECTION_FUNC(TEXT, _Swap)
 	/*
-	 * The incoming thread uses non-integer capabilities (x87 FPU and/or
+	 * The incoming thread uses floating point registers and it was _not_
-	 * XMM regs) and it was _not_ the last thread to use the non-integer
+	 * the last thread to use those registers:
-	 * capabilities: Check whether the current FP context actually needs
+	 * Check whether the current FP context actually needs to be saved
-	 * to be saved before swapping in the context of the incoming thread
+	 * before swapping in the context of the incoming thread.
 	 */
 	testl	%ebx, %ebx
@ -194,9 +193,9 @@ SECTION_FUNC(TEXT, _Swap)
 	/*
-	 * The incoming thread uses non-integer capabilities (x87 FPU and/or
+	 * The incoming thread uses floating point registers and it was _not_
-	 * XMM regs) and it was _not_ the last thread to use the non-integer
+	 * the last thread to use those registers _and_ the current FP context
-	 * capabilities _and_ the current FP context needs to be saved.
+	 * needs to be saved.
 	 *
 	 * Given that the ST[0] -> ST[7] and XMM0 -> XMM7 registers are all
 	 * 'volatile', only save the registers if the "current FP context"
@ -237,10 +236,10 @@ restoreContext_NoFloatSave:
 	 * Restore floating point context of the incoming thread.
 	 *********************************************************/
-        /*
+	/*
 	 * Again, given that the ST[0] -> ST[7] and XMM0 -> XMM7 registers are
-	 * all 'volatile', only restore the registers if the incoming
+	 * all 'volatile', only restore the registers if the incoming thread
-	 * thread was previously preemptively context switched out.
+	 * was previously preemptively context switched out.
 	 */
 	testl   $INT_OR_EXC_MASK, __tTCS_flags_OFFSET (%eax)
@ -264,31 +263,30 @@ x87FloatRestore:
 floatRestoreDone:
 restoreContext_NoFloatRestore:
-	/* record that the incoming thread "owns" the non-integer registers */
+	/* record that the incoming thread "owns" the floating point registers */
 	movl	%eax, __tNANO_current_fp_OFFSET (%edi)
 	/*
-	 * Branch point when none of the non-integer registers need to be
+	 * Branch point when none of the floating point registers need to be
-	 * swapped either due to a) the incoming thread does not
+	 * swapped because: a) the incoming thread does not use them OR
-	 * K_FP_REGS | K_SSE_REGS, or b) the incoming thread is the same as
+	 * b) the incoming thread is the last thread that used those registers.
 	 * the last thread that utilized the non-integer registers.
 	 */
 restoreContext_NoFloatSwap:
 	/*
-	 * Leave CR0[TS] clear if incoming thread utilizes "floating point"
+	 * Leave CR0[TS] clear if incoming thread utilizes the floating point
-	 * instructions
+	 * registers
 	 */
-	testl	$K_FP_REGS, __tTCS_flags_OFFSET (%eax)
+	testl	$_FP_USER_MASK, __tTCS_flags_OFFSET (%eax)
 	jne	CROHandlingDone
 	/*
-	 * The incoming thread does NOT currently utilize "floating point"
+	 * The incoming thread does NOT currently utilize the floating point
-	 * instructions, so set CR0[TS] to ensure the "device not available"
+	 * registers, so set CR0[TS] to ensure the "device not available"
 	 * exception occurs on the first attempt to access a x87 FPU, MMX,
 	 * or XMM register.
 	 */
@ -301,9 +299,6 @@ CROHandlingDone:
 #endif /* CONFIG_FP_SHARING */
 	/* update _nanokernel.current to reflect incoming thread */
 	movl    %eax, __tNANO_current_OFFSET (%edi)
--- a/arch/x86/core/thread.c
+++ b/arch/x86/core/thread.c
@ -93,12 +93,7 @@ static void _new_thread_internal(char *pStackMem, unsigned stackSize,
 #endif /* CONFIG_FP_SHARING || CONFIG_GDB_INFO */
 	/* k_q_node initialized upon first insertion in a list */
-#ifdef CONFIG_FP_SHARING
+
 	/* ensure K_FP_REGS is set when K_SSE_REGS is set */
 	if (options & K_SSE_REGS) {
 		options |= K_FP_REGS;
 	}
 #endif
 	tcs->flags = options | K_PRESTART;
 	tcs->sched_locked = 0;
--- a/arch/x86/include/asm_inline_gcc.h
+++ b/arch/x86/include/asm_inline_gcc.h
@ -86,51 +86,62 @@ static inline void _FpAccessDisable(void)
 *
 * @return N/A
 */
-static inline void _do_fp_ctx_save(int flags, void *preemp_float_reg)
+static inline void _do_fp_regs_save(void *preemp_float_reg)
 {
-#ifdef CONFIG_SSE
+	__asm__ volatile("fnsave (%0);\n\t"
-	if (flags) {
+			 :
-		__asm__ volatile("fxsave (%0);\n\t"
+			 : "r"(preemp_float_reg)
-				 :
+			 : "memory");
 				 : "r"(preemp_float_reg)
 				 : "memory");
 	} else
 #else
 	ARG_UNUSED(flags);
 #endif /* CONFIG_SSE */
 	{
 		__asm__ volatile("fnsave (%0);\n\t"
 				 :
 				 : "r"(preemp_float_reg)
 				 : "memory");
 	}
 }
 #ifdef CONFIG_SSE
 /**
 *
- * @brief Initialize non-integer context information
+ * @brief Save non-integer context information
 *
- * This routine initializes the system's "live" non-integer context.
+ * This routine saves the system's "live" non-integer context into the
- * Function is invoked by _FpCtxInit(struct tcs *tcs)
+ * specified area.  If the specified task or fiber supports SSE then
 * x87/MMX/SSEx thread info is saved, otherwise only x87/MMX thread is saved.
 * Function is invoked by _FpCtxSave(struct tcs *tcs)
 *
 * @return N/A
 */
-static inline void _do_fp_ctx_init(int flags)
+static inline void _do_fp_and_sse_regs_save(void *preemp_float_reg)
 {
-	/* initialize x87 FPU */
+	__asm__ volatile("fxsave (%0);\n\t"
-	__asm__ volatile("fninit\n\t");
+			 :
 			 : "r"(preemp_float_reg)
 			 : "memory");
 }
 #endif /* CONFIG_SSE */
 /**
 *
 * @brief Initialize floating point register context information.
 *
 * This routine initializes the system's "live" floating point registers.
 *
 * @return N/A
 */
 static inline void _do_fp_regs_init(void)
 {
 	__asm__ volatile("fninit\n\t");
 }
 #ifdef CONFIG_SSE
-	if (flags) {
+/**
-		/* initialize SSE (since thread uses it) */
+ *
-		__asm__ volatile("ldmxcsr _sse_mxcsr_default_value\n\t");
+ * @brief Initialize SSE register context information.
-
+ *
-	}
+ * This routine initializes the system's "live" SSE registers.
-#else
+ *
-	ARG_UNUSED(flags);
+ * @return N/A
-#endif /* CONFIG_SSE */
+ */
 static inline void _do_sse_regs_init(void)
 {
 	__asm__ volatile("ldmxcsr _sse_mxcsr_default_value\n\t");
 }
 #endif /* CONFIG_SSE */
 #endif /* CONFIG_FP_SHARING */
--- a/arch/x86/include/nano_private.h
+++ b/arch/x86/include/nano_private.h
@ -52,14 +52,6 @@
 /*
 * Bitmask definitions for the struct tcs->flags bit field
 *
 * The K_FP_REGS flag bit will be set whenever a thread uses any non-integer
 * capability, whether it's just the x87 FPU capability, SSE instructions, or
 * a combination of both. The K_SSE_REGS flag bit will only be set if a thread
 * uses SSE instructions.
 *
 * Note: Any change to the definitions K_FP_REGS and K_SSE_REGS must also
 * be made to nanokernel/x86/arch.h.
 */
 #define K_STATIC  0x00000800
@ -76,14 +68,24 @@
 #define INT_ACTIVE 0x2     /* 1 = executing context is interrupt handler */
 #define EXC_ACTIVE 0x4     /* 1 = executing context is exception handler */
-#define K_FP_REGS 0x10	   /* 1 = thread uses floating point registers */
+#if defined(CONFIG_FP_SHARING)
-#define K_SSE_REGS 0x20    /* 1 = thread uses SSEx registers */
+#define K_FP_REGS  0x10    /* 1 = thread uses floating point registers */
 #endif
 #if defined(CONFIG_FP_SHARING) && defined(CONFIG_SSE)
 #define K_SSE_REGS 0x20    /* 1 = thread uses SSEx (and also FP) registers */
 #endif
 #define K_ESSENTIAL 0x200  /* 1 = system thread that must not abort */
 #define NO_METRICS 0x400   /* 1 = _Swap() not to update task metrics */
 #define NO_METRICS_BIT_OFFSET 0xa /* Bit position of NO_METRICS */
 #define INT_OR_EXC_MASK (INT_ACTIVE | EXC_ACTIVE)
 #if defined(CONFIG_FP_SHARING) && defined(CONFIG_SSE)
 #define _FP_USER_MASK (K_FP_REGS | K_SSE_REGS)
 #elif defined(CONFIG_FP_SHARING)
 #define _FP_USER_MASK (K_FP_REGS)
 #endif
 /*
 * Exception/interrupt vector definitions: vectors 20 to 31 are reserved for
 * Intel; vectors 32 to 255 are user defined interrupt vectors.
--- a/include/arch/x86/arch.h
+++ b/include/arch/x86/arch.h
@ -386,17 +386,6 @@ static ALWAYS_INLINE void _arch_irq_unlock(unsigned int key)
 */
 #define NANO_SOFT_IRQ	((unsigned int) (-1))
 #ifdef CONFIG_FP_SHARING
 /* Definitions for the 'options' parameter to the fiber_fiber_start() API */
 /** thread uses floating point registers */
 #define K_FP_REGS		0x10
 #ifdef CONFIG_SSE
 /** thread uses SSEx registers */
 #define K_SSE_REGS		0x20
 #endif /* CONFIG_SSE */
 #endif /* CONFIG_FP_SHARING */
 /**
 * @brief Enable a specific IRQ
 * @param irq IRQ