master
1/**
2 * This file has no copyright assigned and is placed in the Public Domain.
3 * This file is part of the mingw-w64 runtime package.
4 * No warranty is given; refer to the file DISCLAIMER.PD within this package.
5 */
6
7#include "internal.h"
8
9#if defined(__i386__) || defined(__x86_64__)
10
11static unsigned int get_mxcsr(void)
12{
13 unsigned int ret;
14#ifdef __arm64ec__
15 extern NTSTATUS (*__os_arm64x_get_x64_information)(ULONG,void*,void*);
16 __os_arm64x_get_x64_information( 0, &ret, NULL );
17#else
18 __asm__ __volatile__( "stmxcsr %0" : "=m" (ret) );
19#endif
20 return ret;
21}
22
23static void set_mxcsr( unsigned int val )
24{
25#ifdef __arm64ec__
26 extern NTSTATUS (*__os_arm64x_set_x64_information)(ULONG,ULONG_PTR,void*);
27 __os_arm64x_set_x64_information( 0, val, NULL );
28#else
29 __asm__ __volatile__( "ldmxcsr %0" : : "m" (val) );
30#endif
31}
32
33void __mingw_setfp_sse( unsigned int *cw, unsigned int cw_mask, unsigned int *sw, unsigned int sw_mask )
34{
35 unsigned int old_fpword, fpword = get_mxcsr();
36 unsigned int flags;
37
38 old_fpword = fpword;
39
40 cw_mask &= _MCW_EM | _MCW_RC | _MCW_DN;
41 sw_mask &= _MCW_EM;
42
43 if (sw)
44 {
45 flags = 0;
46 if (fpword & 0x1) flags |= _SW_INVALID;
47 if (fpword & 0x2) flags |= _SW_DENORMAL;
48 if (fpword & 0x4) flags |= _SW_ZERODIVIDE;
49 if (fpword & 0x8) flags |= _SW_OVERFLOW;
50 if (fpword & 0x10) flags |= _SW_UNDERFLOW;
51 if (fpword & 0x20) flags |= _SW_INEXACT;
52
53 *sw = (flags & ~sw_mask) | (*sw & sw_mask);
54 fpword &= ~0x3f;
55 if (*sw & _SW_INVALID) fpword |= 0x1;
56 if (*sw & _SW_DENORMAL) fpword |= 0x2;
57 if (*sw & _SW_ZERODIVIDE) fpword |= 0x4;
58 if (*sw & _SW_OVERFLOW) fpword |= 0x8;
59 if (*sw & _SW_UNDERFLOW) fpword |= 0x10;
60 if (*sw & _SW_INEXACT) fpword |= 0x20;
61 *sw = flags;
62 }
63
64 if (cw)
65 {
66 flags = 0;
67 if (fpword & 0x80) flags |= _EM_INVALID;
68 if (fpword & 0x100) flags |= _EM_DENORMAL;
69 if (fpword & 0x200) flags |= _EM_ZERODIVIDE;
70 if (fpword & 0x400) flags |= _EM_OVERFLOW;
71 if (fpword & 0x800) flags |= _EM_UNDERFLOW;
72 if (fpword & 0x1000) flags |= _EM_INEXACT;
73 switch (fpword & 0x6000)
74 {
75 case 0x6000: flags |= _RC_UP|_RC_DOWN; break;
76 case 0x4000: flags |= _RC_UP; break;
77 case 0x2000: flags |= _RC_DOWN; break;
78 }
79 switch (fpword & 0x8040)
80 {
81 case 0x0040: flags |= _DN_FLUSH_OPERANDS_SAVE_RESULTS; break;
82 case 0x8000: flags |= _DN_SAVE_OPERANDS_FLUSH_RESULTS; break;
83 case 0x8040: flags |= _DN_FLUSH; break;
84 }
85
86 *cw = (flags & ~cw_mask) | (*cw & cw_mask);
87 fpword &= ~0xffc0;
88 if (*cw & _EM_INVALID) fpword |= 0x80;
89 if (*cw & _EM_DENORMAL) fpword |= 0x100;
90 if (*cw & _EM_ZERODIVIDE) fpword |= 0x200;
91 if (*cw & _EM_OVERFLOW) fpword |= 0x400;
92 if (*cw & _EM_UNDERFLOW) fpword |= 0x800;
93 if (*cw & _EM_INEXACT) fpword |= 0x1000;
94 switch (*cw & _MCW_RC)
95 {
96 case _RC_UP|_RC_DOWN: fpword |= 0x6000; break;
97 case _RC_UP: fpword |= 0x4000; break;
98 case _RC_DOWN: fpword |= 0x2000; break;
99 }
100 switch (*cw & _MCW_DN)
101 {
102 case _DN_FLUSH_OPERANDS_SAVE_RESULTS: fpword |= 0x0040; break;
103 case _DN_SAVE_OPERANDS_FLUSH_RESULTS: fpword |= 0x8000; break;
104 case _DN_FLUSH: fpword |= 0x8040; break;
105 }
106
107 /* clear status word if anything changes */
108 if (fpword != old_fpword && !sw) fpword &= ~0x3f;
109 }
110
111 if (fpword != old_fpword) set_mxcsr( fpword );
112}
113#endif
114
115void __mingw_setfp( unsigned int *cw, unsigned int cw_mask,
116 unsigned int *sw, unsigned int sw_mask )
117{
118#if defined(__arm64ec__)
119 __mingw_setfp_sse(cw, cw_mask, sw, sw_mask);
120#elif defined(__i386__) || defined(__x86_64__)
121 unsigned long oldcw = 0, newcw = 0;
122 unsigned long oldsw = 0, newsw = 0;
123 unsigned int flags;
124
125 cw_mask &= _MCW_EM | _MCW_IC | _MCW_RC | _MCW_PC;
126 sw_mask &= _MCW_EM;
127
128 if (sw)
129 {
130 __asm__ __volatile__( "fstsw %0" : "=m" (newsw) );
131 oldsw = newsw;
132
133 flags = 0;
134 if (newsw & 0x1) flags |= _SW_INVALID;
135 if (newsw & 0x2) flags |= _SW_DENORMAL;
136 if (newsw & 0x4) flags |= _SW_ZERODIVIDE;
137 if (newsw & 0x8) flags |= _SW_OVERFLOW;
138 if (newsw & 0x10) flags |= _SW_UNDERFLOW;
139 if (newsw & 0x20) flags |= _SW_INEXACT;
140
141 *sw = (flags & ~sw_mask) | (*sw & sw_mask);
142 newsw &= ~0x3f;
143 if (*sw & _SW_INVALID) newsw |= 0x1;
144 if (*sw & _SW_DENORMAL) newsw |= 0x2;
145 if (*sw & _SW_ZERODIVIDE) newsw |= 0x4;
146 if (*sw & _SW_OVERFLOW) newsw |= 0x8;
147 if (*sw & _SW_UNDERFLOW) newsw |= 0x10;
148 if (*sw & _SW_INEXACT) newsw |= 0x20;
149 *sw = flags;
150 }
151
152 if (cw)
153 {
154 __asm__ __volatile__( "fstcw %0" : "=m" (newcw) );
155 oldcw = newcw;
156
157 flags = 0;
158 if (newcw & 0x1) flags |= _EM_INVALID;
159 if (newcw & 0x2) flags |= _EM_DENORMAL;
160 if (newcw & 0x4) flags |= _EM_ZERODIVIDE;
161 if (newcw & 0x8) flags |= _EM_OVERFLOW;
162 if (newcw & 0x10) flags |= _EM_UNDERFLOW;
163 if (newcw & 0x20) flags |= _EM_INEXACT;
164 switch (newcw & 0xc00)
165 {
166 case 0xc00: flags |= _RC_UP|_RC_DOWN; break;
167 case 0x800: flags |= _RC_UP; break;
168 case 0x400: flags |= _RC_DOWN; break;
169 }
170 switch (newcw & 0x300)
171 {
172 case 0x0: flags |= _PC_24; break;
173 case 0x200: flags |= _PC_53; break;
174 case 0x300: flags |= _PC_64; break;
175 }
176 if (newcw & 0x1000) flags |= _IC_AFFINE;
177
178 *cw = (flags & ~cw_mask) | (*cw & cw_mask);
179 newcw &= ~0x1f3f;
180 if (*cw & _EM_INVALID) newcw |= 0x1;
181 if (*cw & _EM_DENORMAL) newcw |= 0x2;
182 if (*cw & _EM_ZERODIVIDE) newcw |= 0x4;
183 if (*cw & _EM_OVERFLOW) newcw |= 0x8;
184 if (*cw & _EM_UNDERFLOW) newcw |= 0x10;
185 if (*cw & _EM_INEXACT) newcw |= 0x20;
186 switch (*cw & _MCW_RC)
187 {
188 case _RC_UP|_RC_DOWN: newcw |= 0xc00; break;
189 case _RC_UP: newcw |= 0x800; break;
190 case _RC_DOWN: newcw |= 0x400; break;
191 }
192 switch (*cw & _MCW_PC)
193 {
194 case _PC_64: newcw |= 0x300; break;
195 case _PC_53: newcw |= 0x200; break;
196 case _PC_24: newcw |= 0x0; break;
197 }
198 if (*cw & _IC_AFFINE) newcw |= 0x1000;
199 }
200
201 if (oldsw != newsw && (newsw & 0x3f))
202 {
203 struct {
204 WORD control_word;
205 WORD unused1;
206 WORD status_word;
207 WORD unused2;
208 WORD tag_word;
209 WORD unused3;
210 DWORD instruction_pointer;
211 WORD code_segment;
212 WORD unused4;
213 DWORD operand_addr;
214 WORD data_segment;
215 WORD unused5;
216 } fenv;
217
218 __asm__ __volatile__( "fnstenv %0" : "=m" (fenv) );
219 fenv.control_word = newcw;
220 fenv.status_word = newsw;
221 __asm__ __volatile__( "fldenv %0" : : "m" (fenv) : "st", "st(1)",
222 "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)" );
223 return;
224 }
225
226 if (oldsw != newsw)
227 __asm__ __volatile__( "fnclex" );
228 if (oldcw != newcw)
229 __asm__ __volatile__( "fldcw %0" : : "m" (newcw) );
230#elif defined(__aarch64__)
231 ULONG_PTR old_fpsr = 0, fpsr = 0, old_fpcr = 0, fpcr = 0;
232 unsigned int flags;
233
234 cw_mask &= _MCW_EM | _MCW_RC;
235 sw_mask &= _MCW_EM;
236
237 if (sw)
238 {
239 __asm__ __volatile__( "mrs %0, fpsr" : "=r" (fpsr) );
240 old_fpsr = fpsr;
241
242 flags = 0;
243 if (fpsr & 0x1) flags |= _SW_INVALID;
244 if (fpsr & 0x2) flags |= _SW_ZERODIVIDE;
245 if (fpsr & 0x4) flags |= _SW_OVERFLOW;
246 if (fpsr & 0x8) flags |= _SW_UNDERFLOW;
247 if (fpsr & 0x10) flags |= _SW_INEXACT;
248 if (fpsr & 0x80) flags |= _SW_DENORMAL;
249
250 *sw = (flags & ~sw_mask) | (*sw & sw_mask);
251 fpsr &= ~0x9f;
252 if (*sw & _SW_INVALID) fpsr |= 0x1;
253 if (*sw & _SW_ZERODIVIDE) fpsr |= 0x2;
254 if (*sw & _SW_OVERFLOW) fpsr |= 0x4;
255 if (*sw & _SW_UNDERFLOW) fpsr |= 0x8;
256 if (*sw & _SW_INEXACT) fpsr |= 0x10;
257 if (*sw & _SW_DENORMAL) fpsr |= 0x80;
258 *sw = flags;
259 }
260
261 if (cw)
262 {
263 __asm__ __volatile__( "mrs %0, fpcr" : "=r" (fpcr) );
264 old_fpcr = fpcr;
265
266 flags = 0;
267 if (!(fpcr & 0x100)) flags |= _EM_INVALID;
268 if (!(fpcr & 0x200)) flags |= _EM_ZERODIVIDE;
269 if (!(fpcr & 0x400)) flags |= _EM_OVERFLOW;
270 if (!(fpcr & 0x800)) flags |= _EM_UNDERFLOW;
271 if (!(fpcr & 0x1000)) flags |= _EM_INEXACT;
272 if (!(fpcr & 0x8000)) flags |= _EM_DENORMAL;
273 switch (fpcr & 0xc00000)
274 {
275 case 0x400000: flags |= _RC_UP; break;
276 case 0x800000: flags |= _RC_DOWN; break;
277 case 0xc00000: flags |= _RC_CHOP; break;
278 }
279
280 *cw = (flags & ~cw_mask) | (*cw & cw_mask);
281 fpcr &= ~0xc09f00ul;
282 if (!(*cw & _EM_INVALID)) fpcr |= 0x100;
283 if (!(*cw & _EM_ZERODIVIDE)) fpcr |= 0x200;
284 if (!(*cw & _EM_OVERFLOW)) fpcr |= 0x400;
285 if (!(*cw & _EM_UNDERFLOW)) fpcr |= 0x800;
286 if (!(*cw & _EM_INEXACT)) fpcr |= 0x1000;
287 if (!(*cw & _EM_DENORMAL)) fpcr |= 0x8000;
288 switch (*cw & _MCW_RC)
289 {
290 case _RC_CHOP: fpcr |= 0xc00000; break;
291 case _RC_UP: fpcr |= 0x400000; break;
292 case _RC_DOWN: fpcr |= 0x800000; break;
293 }
294 }
295
296 /* mask exceptions if needed */
297 if (old_fpcr != fpcr && ~(old_fpcr >> 8) & fpsr & 0x9f != fpsr & 0x9f)
298 {
299 ULONG_PTR mask = fpcr & ~0x9f00;
300 __asm__ __volatile__( "msr fpcr, %0" :: "r" (mask) );
301 }
302
303 if (old_fpsr != fpsr)
304 __asm__ __volatile__( "msr fpsr, %0" :: "r" (fpsr) );
305 if (old_fpcr != fpcr)
306 __asm__ __volatile__( "msr fpcr, %0" :: "r" (fpcr) );
307#elif defined(__arm__)
308 DWORD old_fpscr, fpscr;
309 unsigned int flags;
310
311 __asm__ __volatile__( "vmrs %0, fpscr" : "=r" (fpscr) );
312 old_fpscr = fpscr;
313
314 cw_mask &= _MCW_EM | _MCW_RC;
315 sw_mask &= _MCW_EM;
316
317 if (sw)
318 {
319 flags = 0;
320 if (fpscr & 0x1) flags |= _SW_INVALID;
321 if (fpscr & 0x2) flags |= _SW_ZERODIVIDE;
322 if (fpscr & 0x4) flags |= _SW_OVERFLOW;
323 if (fpscr & 0x8) flags |= _SW_UNDERFLOW;
324 if (fpscr & 0x10) flags |= _SW_INEXACT;
325 if (fpscr & 0x80) flags |= _SW_DENORMAL;
326
327 *sw = (flags & ~sw_mask) | (*sw & sw_mask);
328 fpscr &= ~0x9f;
329 if (*sw & _SW_INVALID) fpscr |= 0x1;
330 if (*sw & _SW_ZERODIVIDE) fpscr |= 0x2;
331 if (*sw & _SW_OVERFLOW) fpscr |= 0x4;
332 if (*sw & _SW_UNDERFLOW) fpscr |= 0x8;
333 if (*sw & _SW_INEXACT) fpscr |= 0x10;
334 if (*sw & _SW_DENORMAL) fpscr |= 0x80;
335 *sw = flags;
336 }
337
338 if (cw)
339 {
340 flags = 0;
341 if (!(fpscr & 0x100)) flags |= _EM_INVALID;
342 if (!(fpscr & 0x200)) flags |= _EM_ZERODIVIDE;
343 if (!(fpscr & 0x400)) flags |= _EM_OVERFLOW;
344 if (!(fpscr & 0x800)) flags |= _EM_UNDERFLOW;
345 if (!(fpscr & 0x1000)) flags |= _EM_INEXACT;
346 if (!(fpscr & 0x8000)) flags |= _EM_DENORMAL;
347 switch (fpscr & 0xc00000)
348 {
349 case 0x400000: flags |= _RC_UP; break;
350 case 0x800000: flags |= _RC_DOWN; break;
351 case 0xc00000: flags |= _RC_CHOP; break;
352 }
353
354 *cw = (flags & ~cw_mask) | (*cw & cw_mask);
355 fpscr &= ~0xc09f00ul;
356 if (!(*cw & _EM_INVALID)) fpscr |= 0x100;
357 if (!(*cw & _EM_ZERODIVIDE)) fpscr |= 0x200;
358 if (!(*cw & _EM_OVERFLOW)) fpscr |= 0x400;
359 if (!(*cw & _EM_UNDERFLOW)) fpscr |= 0x800;
360 if (!(*cw & _EM_INEXACT)) fpscr |= 0x1000;
361 if (!(*cw & _EM_DENORMAL)) fpscr |= 0x8000;
362 switch (*cw & _MCW_RC)
363 {
364 case _RC_CHOP: fpscr |= 0xc00000; break;
365 case _RC_UP: fpscr |= 0x400000; break;
366 case _RC_DOWN: fpscr |= 0x800000; break;
367 }
368 }
369
370 if (old_fpscr != fpscr)
371 __asm__ __volatile__( "vmsr fpscr, %0" :: "r" (fpscr) );
372#endif
373}