summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSamuel Thibault <samuel.thibault@ens-lyon.org>2020-11-29 16:20:21 +0100
committerSamuel Thibault <samuel.thibault@ens-lyon.org>2020-11-29 16:20:21 +0100
commit261eb7f1ade265bb359609a796715fa06f4d32fd (patch)
tree283adca5d013fa7c9d0efdb430ac7167f5750bae
parent5dbc6990b16a969e470392abe48e8ff9552780b3 (diff)
x86: Fix initialization of new threads
fninit does not clear MMX/SSE/AVX registers, so we have to use rstor to clear them when starting a new thread. Along the way, we can as well just have a default state to be loaded in each new thread. * i386/include/mach/i386/fp_reg.h (XSAVE_XCOMP_BV_COMPACT): New macro. * i386/i386/fpu.h (fp_default_state): New variable declaration. * i386/i386/fpu.c (fp_default_state): New variable. (MXCSR_DEFAULT, CWD_DEFAULT): New macros. (fpu_module_init): Allocate and initialize fp_default_state. (fpinit): rstor from fp_default_state instead of setting FPU state by hand. (fp_load): Copy initial state from fp_default_state instead of setting it to 0. This is more future-proof since this is the exact state that we are loading in fpinit. (fp_state_alloc): fp_state_alloc: Copy initial state from fp_default_state.
-rw-r--r--i386/i386/fpu.c94
-rw-r--r--i386/i386/fpu.h1
-rw-r--r--i386/include/mach/i386/fp_reg.h1
3 files changed, 47 insertions, 49 deletions
diff --git a/i386/i386/fpu.c b/i386/i386/fpu.c
index 03f43f9d..365187ee 100644
--- a/i386/i386/fpu.c
+++ b/i386/i386/fpu.c
@@ -69,13 +69,20 @@
#define ASSERT_IPL(L)
#endif
-int fp_kind = FP_387; /* 80387 present */
+int fp_kind = FP_387; /* 80387 present */
enum fp_save_kind fp_save_kind = FP_FNSAVE; /* Which instruction we use to save/restore FPU state */
-uint64_t fp_xsave_support; /* Bitmap of supported XSAVE save areas */
-unsigned fp_xsave_size = sizeof(struct i386_fpsave_state);
+uint64_t fp_xsave_support; /* Bitmap of supported XSAVE save areas */
+unsigned fp_xsave_size = sizeof(struct i386_fpsave_state);
+struct i386_fpsave_state *fp_default_state;
struct kmem_cache ifps_cache; /* cache for FPU save area */
static unsigned long mxcsr_feature_mask = 0xffffffff; /* Always AND user-provided mxcsr with this security mask */
+/* Default FPU configuration */
+#define MXCSR_DEFAULT 0x1f80
+#define CWD_DEFAULT ((0x037f \
+ & ~(FPC_IM|FPC_ZM|FPC_OM|FPC_PC)) \
+ | (FPC_PC_64|FPC_IC_AFF))
+
#if NCPUS == 1
volatile thread_t fp_thread = THREAD_NULL;
/* thread whose state is in FPU */
@@ -239,6 +246,33 @@ fpu_module_init(void)
fp_xsave_size,
alignof(struct i386_fpsave_state),
NULL, 0);
+
+ fp_default_state = (struct i386_fpsave_state *) kmem_cache_alloc(&ifps_cache);
+ memset(fp_default_state, 0, fp_xsave_size);
+
+ switch (fp_save_kind) {
+ case FP_XSAVEC:
+ case FP_XSAVES:
+ /* XRSTORS requires compact format, a bit faster anyway */
+ fp_default_state->xfp_save_state.header.xcomp_bv = XSAVE_XCOMP_BV_COMPACT;
+ /* Fallthrough */
+ case FP_XSAVE:
+ case FP_XSAVEOPT:
+ case FP_FXSAVE:
+ fp_default_state->xfp_save_state.fp_control = CWD_DEFAULT;
+ fp_default_state->xfp_save_state.fp_status = 0;
+ fp_default_state->xfp_save_state.fp_tag = 0xffff; /* all empty */
+ if (CPU_HAS_FEATURE(CPU_FEATURE_SSE))
+ fp_default_state->xfp_save_state.fp_mxcsr = MXCSR_DEFAULT;
+ break;
+ case FP_FNSAVE:
+ fp_default_state->fp_save_state.fp_control = CWD_DEFAULT;
+ fp_default_state->fp_save_state.fp_status = 0;
+ fp_default_state->fp_save_state.fp_tag = 0xffff; /* all empty */
+ break;
+ }
+
+ fp_default_state->fp_valid = TRUE;
}
/*
@@ -534,14 +568,7 @@ ASSERT_IPL(SPL0);
}
/*
- * Initialize FPU.
- *
- * Raise exceptions for:
- * invalid operation
- * divide by zero
- * overflow
- *
- * Use 64-bit precision.
+ * Initialize FPU for an already-running thread.
*/
static void fpinit(thread_t thread)
{
@@ -549,24 +576,11 @@ static void fpinit(thread_t thread)
ASSERT_IPL(SPL0);
clear_ts();
- fninit();
- if (thread->pcb->init_control) {
- control = thread->pcb->init_control;
- }
- else
- {
- fnstcw(&control);
- control &= ~(FPC_PC|FPC_RC); /* Clear precision & rounding control */
- control |= (FPC_PC_64 | /* Set precision */
- FPC_RC_RN | /* round-to-nearest */
- FPC_ZE | /* Suppress zero-divide */
- FPC_OE | /* and overflow */
- FPC_UE | /* underflow */
- FPC_IE | /* Allow NaNQs and +-INF */
- FPC_DE | /* Allow denorms as operands */
- FPC_PE); /* No trap for precision loss */
- }
- fldcw(control);
+ fpu_rstor(fp_default_state);
+
+ control = thread->pcb->init_control;
+ if (control)
+ fldcw(control);
}
/*
@@ -845,7 +859,7 @@ ASSERT_IPL(SPL0);
ifps = pcb->ims.ifps;
if (ifps == 0) {
ifps = (struct i386_fpsave_state *) kmem_cache_alloc(&ifps_cache);
- memset(ifps, 0, fp_xsave_size);
+ memcpy(ifps, fp_default_state, fp_xsave_size);
pcb->ims.ifps = ifps;
fpinit(thread);
#if 1
@@ -893,26 +907,8 @@ fp_state_alloc(void)
struct i386_fpsave_state *ifps;
ifps = (struct i386_fpsave_state *)kmem_cache_alloc(&ifps_cache);
- memset(ifps, 0, fp_xsave_size);
+ memcpy(ifps, fp_default_state, fp_xsave_size);
pcb->ims.ifps = ifps;
-
- ifps->fp_valid = TRUE;
-
- if (fp_save_kind != FP_FNSAVE) {
- ifps->xfp_save_state.fp_control = (0x037f
- & ~(FPC_IM|FPC_ZM|FPC_OM|FPC_PC))
- | (FPC_PC_64|FPC_IC_AFF);
- ifps->xfp_save_state.fp_status = 0;
- ifps->xfp_save_state.fp_tag = 0xffff; /* all empty */
- if (CPU_HAS_FEATURE(CPU_FEATURE_SSE))
- ifps->xfp_save_state.fp_mxcsr = 0x1f80;
- } else {
- ifps->fp_save_state.fp_control = (0x037f
- & ~(FPC_IM|FPC_ZM|FPC_OM|FPC_PC))
- | (FPC_PC_64|FPC_IC_AFF);
- ifps->fp_save_state.fp_status = 0;
- ifps->fp_save_state.fp_tag = 0xffff; /* all empty */
- }
}
#if (defined(AT386) || defined(ATX86_64)) && !defined(MACH_XEN)
diff --git a/i386/i386/fpu.h b/i386/i386/fpu.h
index 33838cc0..a7d8bade 100644
--- a/i386/i386/fpu.h
+++ b/i386/i386/fpu.h
@@ -223,6 +223,7 @@ enum fp_save_kind {
};
extern int fp_kind;
extern enum fp_save_kind fp_save_kind;
+extern struct i386_fpsave_state *fp_default_state;
extern uint64_t fp_xsave_support;
extern void fp_save(thread_t thread);
extern void fp_load(thread_t thread);
diff --git a/i386/include/mach/i386/fp_reg.h b/i386/include/mach/i386/fp_reg.h
index 41301ec4..7a3735ae 100644
--- a/i386/include/mach/i386/fp_reg.h
+++ b/i386/include/mach/i386/fp_reg.h
@@ -53,6 +53,7 @@ struct i386_fp_regs {
/* space for 8 80-bit FP registers */
};
+#define XSAVE_XCOMP_BV_COMPACT (((uint64_t)1) << 63)
struct i386_xfp_xstate_header {
uint64_t xfp_features;
uint64_t xcomp_bv;