master
  1/* Assembler macros for x86.
  2   Copyright (C) 2017-2025 Free Software Foundation, Inc.
  3   This file is part of the GNU C Library.
  4
  5   The GNU C Library is free software; you can redistribute it and/or
  6   modify it under the terms of the GNU Lesser General Public
  7   License as published by the Free Software Foundation; either
  8   version 2.1 of the License, or (at your option) any later version.
  9
 10   The GNU C Library is distributed in the hope that it will be useful,
 11   but WITHOUT ANY WARRANTY; without even the implied warranty of
 12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 13   Lesser General Public License for more details.
 14
 15   You should have received a copy of the GNU Lesser General Public
 16   License along with the GNU C Library; if not, see
 17   <https://www.gnu.org/licenses/>.  */
 18
 19#ifndef _X86_SYSDEP_H
 20#define _X86_SYSDEP_H 1
 21
 22#include <sysdeps/generic/sysdep.h>
 23
 24/* The extended state feature IDs in the state component bitmap.  */
 25#define X86_XSTATE_X87_ID	0
 26#define X86_XSTATE_SSE_ID	1
 27#define X86_XSTATE_AVX_ID	2
 28#define X86_XSTATE_BNDREGS_ID	3
 29#define X86_XSTATE_BNDCFG_ID	4
 30#define X86_XSTATE_K_ID		5
 31#define X86_XSTATE_ZMM_H_ID	6
 32#define X86_XSTATE_ZMM_ID	7
 33#define X86_XSTATE_PKRU_ID	9
 34#define X86_XSTATE_TILECFG_ID	17
 35#define X86_XSTATE_TILEDATA_ID	18
 36#define X86_XSTATE_APX_F_ID	19
 37
 38#ifdef __x86_64__
 39/* Offset for fxsave/xsave area used by _dl_runtime_resolve.  Also need
 40   space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX.  It must be
 41   aligned to 16 bytes for fxsave and 64 bytes for xsave.  It is non-zero
 42   because MOV, instead of PUSH, is used to save registers onto stack.
 43
 44   +==================+<- stack frame start aligned at 8 or 16 bytes
 45   |                  |<- paddings for stack realignment of 64 bytes
 46   |------------------|<- xsave buffer end aligned at 64 bytes
 47   |                  |<-
 48   |                  |<-
 49   |                  |<-
 50   |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp)
 51   |                  |<- 8-byte padding for 64-byte alignment
 52   |                  |<- R9
 53   |                  |<- R8
 54   |                  |<- RDI
 55   |                  |<- RSI
 56   |                  |<- RDX
 57   |                  |<- RCX
 58   |                  |<- RAX
 59   +==================+<- RSP aligned at 64 bytes
 60
 61 */
 62# define STATE_SAVE_OFFSET (8 * 7 + 8)
 63
 64/* _dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning
 65   stack.  After realigning stack, it saves RCX, RDX, R8, R9, R10 and
 66   R11.  Allocate space for RDI, RSI and RBX to avoid clobbering saved
 67   RDI, RSI and RBX values on stack by xsave.
 68
 69   +==================+<- stack frame start aligned at 8 or 16 bytes
 70   |                  |<- RDI saved in the red zone
 71   |                  |<- RSI saved in the red zone
 72   |                  |<- RBX saved in the red zone
 73   |                  |<- paddings for stack realignment of 64 bytes
 74   |------------------|<- xsave buffer end aligned at 64 bytes
 75   |                  |<-
 76   |                  |<-
 77   |                  |<-
 78   |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp)
 79   |                  |<- 8-byte padding for 64-byte alignment
 80   |                  |<- 8-byte padding for 64-byte alignment
 81   |                  |<- R11
 82   |                  |<- R10
 83   |                  |<- R9
 84   |                  |<- R8
 85   |                  |<- RDX
 86   |                  |<- RCX
 87   +==================+<- RSP aligned at 64 bytes
 88
 89   Define the total register save area size for all integer registers by
 90   adding 24 to STATE_SAVE_OFFSET since RDI, RSI and RBX are saved onto
 91   stack without adjusting stack pointer first, using the red-zone.  */
 92# define TLSDESC_CALL_REGISTER_SAVE_AREA (STATE_SAVE_OFFSET + 24)
 93
 94/* Save SSE, AVX, AVX512, mask, bound and APX registers.  Bound and APX
 95   registers are mutually exclusive.  */
 96# define STATE_SAVE_MASK		\
 97  ((1 << X86_XSTATE_SSE_ID)		\
 98   | (1 << X86_XSTATE_AVX_ID)		\
 99   | (1 << X86_XSTATE_BNDREGS_ID)	\
100   | (1 << X86_XSTATE_K_ID)		\
101   | (1 << X86_XSTATE_ZMM_H_ID) 	\
102   | (1 << X86_XSTATE_ZMM_ID)		\
103   | (1 << X86_XSTATE_APX_F_ID))
104
105/* The maximum supported xstate ID.  */
106# define X86_XSTATE_MAX_ID	X86_XSTATE_APX_F_ID
107
108/* AMX state mask.  */
109# define AMX_STATE_SAVE_MASK		\
110  ((1 << X86_XSTATE_TILECFG_ID) | (1 << X86_XSTATE_TILEDATA_ID))
111
112/* States to be included in xsave_state_full_size.  */
113# define FULL_STATE_SAVE_MASK		\
114  (STATE_SAVE_MASK | AMX_STATE_SAVE_MASK)
115#else
116/* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic.  Since i386
117   uses PUSH to save registers onto stack, use 0 here.  */
118# define STATE_SAVE_OFFSET 0
119# define TLSDESC_CALL_REGISTER_SAVE_AREA 0
120
121/* Save SSE, AVX, AXV512, mask and bound registers.   */
122# define STATE_SAVE_MASK		\
123  ((1 << X86_XSTATE_SSE_ID)		\
124   | (1 << X86_XSTATE_AVX_ID)		\
125   | (1 << X86_XSTATE_BNDREGS_ID)	\
126   | (1 << X86_XSTATE_K_ID)		\
127   | (1 << X86_XSTATE_ZMM_H_ID))
128
129/* The maximum supported xstate ID.  */
130# define X86_XSTATE_MAX_ID	X86_XSTATE_ZMM_H_ID
131
132/* States to be included in xsave_state_size.  */
133# define FULL_STATE_SAVE_MASK		STATE_SAVE_MASK
134#endif
135
136/* States which should be saved for TLSDESC_CALL and TLS_DESC_CALL.
137   Compiler assumes that all registers, including AMX and x87 FPU
138   stack registers, are unchanged after CALL, except for EFLAGS and
139   RAX/EAX.  */
140#define TLSDESC_CALL_STATE_SAVE_MASK	\
141  (FULL_STATE_SAVE_MASK | (1 << X86_XSTATE_X87_ID))
142
143/* Constants for bits in __x86_string_control:  */
144
145/* Avoid short distance REP MOVSB.  */
146#define X86_STRING_CONTROL_AVOID_SHORT_DISTANCE_REP_MOVSB	(1 << 0)
147
148#ifdef	__ASSEMBLER__
149
150/* Syntactic details of assembler.  */
151
152/* ELF uses byte-counts for .align, most others use log2 of count of bytes.  */
153#define ALIGNARG(log2) 1<<log2
154#define ASM_SIZE_DIRECTIVE(name) .size name,.-name;
155
156/* Common entry 16 byte aligns.  */
157#define ENTRY(name) ENTRY_P2ALIGN (name, 4)
158
159#undef	END
160#define END(name)							      \
161  cfi_endproc;								      \
162  ASM_SIZE_DIRECTIVE(name)
163
164#define ENTRY_CHK(name) ENTRY (name)
165#define END_CHK(name) END (name)
166
167/* Since C identifiers are not normally prefixed with an underscore
168   on this system, the asm identifier `syscall_error' intrudes on the
169   C name space.  Make sure we use an innocuous name.  */
170#define	syscall_error	__syscall_error
171#define mcount		_mcount
172
173#undef	PSEUDO_END
174#define	PSEUDO_END(name)						      \
175  END (name)
176
177/* Local label name for asm code. */
178#ifndef L
179/* ELF-like local names start with `.L'.  */
180# define LOCAL_LABEL(name) .L##name
181# define L(name)	LOCAL_LABEL(name)
182#endif
183
184#define atom_text_section .section ".text.atom", "ax"
185
186#ifndef DL_STACK_ALIGNMENT
187/* Due to GCC bug:
188
189   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
190
191   __tls_get_addr may be called with 8-byte/4-byte stack alignment.
192   Although this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't
193   assume that stack will be always aligned at 16 bytes.  */
194# ifdef __x86_64__
195#  define DL_STACK_ALIGNMENT 8
196#  define MINIMUM_ALIGNMENT 16
197# else
198#  define DL_STACK_ALIGNMENT 4
199# endif
200#endif
201
202/* True if _dl_runtime_resolve/_dl_tlsdesc_dynamic should align stack for
203   STATE_SAVE or align stack to MINIMUM_ALIGNMENT bytes before calling
204   _dl_fixup/__tls_get_addr.  */
205#define DL_RUNTIME_RESOLVE_REALIGN_STACK \
206  (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \
207   || MINIMUM_ALIGNMENT > DL_STACK_ALIGNMENT)
208
209#endif	/* __ASSEMBLER__ */
210
211#endif	/* _X86_SYSDEP_H */