1/*-
  2 * Copyright (c) 1990 The Regents of the University of California.
  3 * All rights reserved.
  4 *
  5 * This code is derived from software contributed to Berkeley by
  6 * William Jolitz.
  7 *
  8 * Redistribution and use in source and binary forms, with or without
  9 * modification, are permitted provided that the following conditions
 10 * are met:
 11 * 1. Redistributions of source code must retain the above copyright
 12 *    notice, this list of conditions and the following disclaimer.
 13 * 2. Redistributions in binary form must reproduce the above copyright
 14 *    notice, this list of conditions and the following disclaimer in the
 15 *    documentation and/or other materials provided with the distribution.
 16 * 3. Neither the name of the University nor the names of its contributors
 17 *    may be used to endorse or promote products derived from this software
 18 *    without specific prior written permission.
 19 *
 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 30 * SUCH DAMAGE.
 31 *
 32 *	from: @(#)npx.h	5.3 (Berkeley) 1/18/91
 33 */
 34
 35/*
 36 * Floating Point Data Structures and Constants
 37 * W. Jolitz 1/90
 38 */
 39
 40#ifndef _X86_FPU_H_
 41#define	_X86_FPU_H_
 42
 43/* Environment information of floating point unit. */
 44struct env87 {
 45	int32_t		en_cw;		/* control word (16bits) */
 46	int32_t		en_sw;		/* status word (16bits) */
 47	int32_t		en_tw;		/* tag word (16bits) */
 48	int32_t		en_fip;		/* fp instruction pointer */
 49	uint16_t	en_fcs;		/* fp code segment selector */
 50	uint16_t	en_opcode;	/* opcode last executed (11 bits) */
 51	int32_t		en_foo;		/* fp operand offset */
 52	int32_t		en_fos;		/* fp operand segment selector */
 53};
 54
 55/* Contents of each x87 floating point accumulator. */
 56struct fpacc87 {
 57	uint8_t		fp_bytes[10];
 58};
 59
 60/* Floating point context. (i386 fnsave/frstor) */
 61struct save87 {
 62	struct env87	sv_env;		/* floating point control/status */
 63	struct fpacc87	sv_ac[8];	/* accumulator contents, 0-7 */
 64	uint8_t		sv_pad0[4];	/* saved status word (now unused) */
 65	uint8_t		sv_pad[64];
 66};
 67
 68/* Contents of each SSE extended accumulator. */
 69struct xmmacc {
 70	uint8_t		xmm_bytes[16];
 71};
 72
 73/* Contents of the upper 16 bytes of each AVX extended accumulator. */
 74struct ymmacc {
 75	uint8_t		ymm_bytes[16];
 76};
 77
 78/* Rename structs below depending on machine architecture. */
 79#ifdef __i386__
 80#define	__envxmm32	envxmm
 81#else
 82#define	__envxmm32	envxmm32
 83#define	__envxmm64	envxmm
 84#endif
 85
 86struct __envxmm32 {
 87	uint16_t	en_cw;		/* control word (16bits) */
 88	uint16_t	en_sw;		/* status word (16bits) */
 89	uint16_t	en_tw;		/* tag word (16bits) */
 90	uint16_t	en_opcode;	/* opcode last executed (11 bits) */
 91	uint32_t	en_fip;		/* fp instruction pointer */
 92	uint16_t	en_fcs;		/* fp code segment selector */
 93	uint16_t	en_pad0;	/* padding */
 94	uint32_t	en_foo;		/* fp operand offset */
 95	uint16_t	en_fos;		/* fp operand segment selector */
 96	uint16_t	en_pad1;	/* padding */
 97	uint32_t	en_mxcsr;	/* SSE control/status register */
 98	uint32_t	en_mxcsr_mask;	/* valid bits in mxcsr */
 99};
100
101struct __envxmm64 {
102	uint16_t	en_cw;		/* control word (16bits) */
103	uint16_t	en_sw;		/* status word (16bits) */
104	uint8_t		en_tw;		/* tag word (8bits) */
105	uint8_t		en_zero;
106	uint16_t	en_opcode;	/* opcode last executed (11 bits ) */
107	uint64_t	en_rip;		/* fp instruction pointer */
108	uint64_t	en_rdp;		/* fp operand pointer */
109	uint32_t	en_mxcsr;	/* SSE control/status register */
110	uint32_t	en_mxcsr_mask;	/* valid bits in mxcsr */
111};
112
113/* Floating point context. (i386 fxsave/fxrstor) */
114struct savexmm {
115	struct __envxmm32	sv_env;
116	struct {
117		struct fpacc87	fp_acc;
118		uint8_t		fp_pad[6];      /* padding */
119	} sv_fp[8];
120	struct xmmacc		sv_xmm[8];
121	uint8_t			sv_pad[224];
122} __aligned(16);
123
124#ifdef __i386__
125union savefpu {
126	struct save87	sv_87;
127	struct savexmm	sv_xmm;
128};
129#else
130/* Floating point context. (amd64 fxsave/fxrstor) */
131struct savefpu {
132	struct __envxmm64	sv_env;
133	struct {
134		struct fpacc87	fp_acc;
135		uint8_t		fp_pad[6];	/* padding */
136	} sv_fp[8];
137	struct xmmacc		sv_xmm[16];
138	uint8_t			sv_pad[96];
139} __aligned(16);
140#endif
141
142struct xstate_hdr {
143	uint64_t	xstate_bv;
144	uint64_t	xstate_xcomp_bv;
145	uint8_t		xstate_rsrv0[8];
146	uint8_t		xstate_rsrv[40];
147};
148#define	XSTATE_XCOMP_BV_COMPACT	(1ULL << 63)
149
150struct savexmm_xstate {
151	struct xstate_hdr	sx_hd;
152	struct ymmacc		sx_ymm[16];
153};
154
155struct savexmm_ymm {
156	struct __envxmm32	sv_env;
157	struct {
158		struct fpacc87	fp_acc;
159		int8_t		fp_pad[6];	/* padding */
160	} sv_fp[8];
161	struct xmmacc		sv_xmm[16];
162	uint8_t			sv_pad[96];
163	struct savexmm_xstate	sv_xstate;
164} __aligned(64);
165
166struct savefpu_xstate {
167	struct xstate_hdr	sx_hd;
168	struct ymmacc		sx_ymm[16];
169};
170
171struct savefpu_ymm {
172	struct __envxmm64	sv_env;
173	struct {
174		struct fpacc87	fp_acc;
175		int8_t		fp_pad[6];	/* padding */
176	} sv_fp[8];
177	struct xmmacc		sv_xmm[16];
178	uint8_t			sv_pad[96];
179	struct savefpu_xstate	sv_xstate;
180} __aligned(64);
181
182#undef __envxmm32
183#undef __envxmm64
184
185/*
186 * The hardware default control word for i387's and later coprocessors is
187 * 0x37F, giving:
188 *
189 *	round to nearest
190 *	64-bit precision
191 *	all exceptions masked.
192 *
193 * FreeBSD/i386 uses 53 bit precision for things like fadd/fsub/fsqrt etc
194 * because of the difference between memory and fpu register stack arguments.
195 * If its using an intermediate fpu register, it has 80/64 bits to work
196 * with.  If it uses memory, it has 64/53 bits to work with.  However,
197 * gcc is aware of this and goes to a fair bit of trouble to make the
198 * best use of it.
199 *
200 * This is mostly academic for AMD64, because the ABI prefers the use
201 * SSE2 based math.  For FreeBSD/amd64, we go with the default settings.
202 */
203#define	__INITIAL_FPUCW__	0x037F
204#define	__INITIAL_FPUCW_I386__	0x127F
205#define	__INITIAL_NPXCW__	__INITIAL_FPUCW_I386__
206#define	__INITIAL_MXCSR__	0x1F80
207#define	__INITIAL_MXCSR_MASK__	0xFFBF
208
209/*
210 * The current value of %xcr0 is saved in the sv_pad[] field of the FPU
211 * state in the NT_X86_XSTATE note in core dumps.  This offset is chosen
212 * to match the offset used by NT_X86_XSTATE in other systems.
213 */
214#define	X86_XSTATE_XCR0_OFFSET	464
215
216#ifdef _KERNEL
217/*
218 * CR0_MP and CR0_EM are always set.  Use CR0_TS to force traps when
219 * FPU access is disabled.
220 */
221#define	fpu_enable()	clts()
222#define	fpu_disable()	load_cr0(rcr0() | CR0_TS)
223#endif
224
225#endif /* !_X86_FPU_H_ */