master
1/*-
2 * SPDX-License-Identifier: Beerware
3 *
4 * ----------------------------------------------------------------------------
5 * "THE BEER-WARE LICENSE" (Revision 42):
6 * <phk@FreeBSD.org> wrote this file. As long as you retain this notice you
7 * can do whatever you want with this stuff. If we meet some day, and you think
8 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
9 * ----------------------------------------------------------------------------
10 */
11
12#ifndef _SYS_SMP_H_
13#define _SYS_SMP_H_
14
15#ifdef _KERNEL
16
17#ifndef LOCORE
18
19#include <sys/cpuset.h>
20#include <sys/queue.h>
21
22/*
23 * Types of nodes in the topological tree.
24 */
25typedef enum {
26 /* No node has this type; can be used in topo API calls. */
27 TOPO_TYPE_DUMMY,
28 /* Processing unit aka computing unit aka logical CPU. */
29 TOPO_TYPE_PU,
30 /* Physical subdivision of a package. */
31 TOPO_TYPE_CORE,
32 /* CPU L1/L2/L3 cache. */
33 TOPO_TYPE_CACHE,
34 /* Package aka chip, equivalent to socket. */
35 TOPO_TYPE_PKG,
36 /* NUMA node. */
37 TOPO_TYPE_NODE,
38 /* Other logical or physical grouping of PUs. */
39 /* E.g. PUs on the same dye, or PUs sharing an FPU. */
40 TOPO_TYPE_GROUP,
41 /* The whole system. */
42 TOPO_TYPE_SYSTEM
43} topo_node_type;
44
45/* Hardware indenitifier of a topology component. */
46typedef unsigned int hwid_t;
47/* Logical CPU idenitifier. */
48typedef int cpuid_t;
49
50/* A node in the topology. */
51struct topo_node {
52 struct topo_node *parent;
53 TAILQ_HEAD(topo_children, topo_node) children;
54 TAILQ_ENTRY(topo_node) siblings;
55 cpuset_t cpuset;
56 topo_node_type type;
57 uintptr_t subtype;
58 hwid_t hwid;
59 cpuid_t id;
60 int nchildren;
61 int cpu_count;
62};
63
64/*
65 * Scheduling topology of a NUMA or SMP system.
66 *
67 * The top level topology is an array of pointers to groups. Each group
68 * contains a bitmask of cpus in its group or subgroups. It may also
69 * contain a pointer to an array of child groups.
70 *
71 * The bitmasks at non leaf groups may be used by consumers who support
72 * a smaller depth than the hardware provides.
73 *
74 * The topology may be omitted by systems where all CPUs are equal.
75 */
76
77struct cpu_group {
78 struct cpu_group *cg_parent; /* Our parent group. */
79 struct cpu_group *cg_child; /* Optional children groups. */
80 cpuset_t cg_mask; /* Mask of cpus in this group. */
81 int32_t cg_count; /* Count of cpus in this group. */
82 int32_t cg_first; /* First cpu in this group. */
83 int32_t cg_last; /* Last cpu in this group. */
84 int16_t cg_children; /* Number of children groups. */
85 int8_t cg_level; /* Shared cache level. */
86 int8_t cg_flags; /* Traversal modifiers. */
87};
88
89typedef struct cpu_group *cpu_group_t;
90
91/*
92 * Defines common resources for CPUs in the group. The highest level
93 * resource should be used when multiple are shared.
94 */
95#define CG_SHARE_NONE 0
96#define CG_SHARE_L1 1
97#define CG_SHARE_L2 2
98#define CG_SHARE_L3 3
99
100#define MAX_CACHE_LEVELS CG_SHARE_L3
101
102/*
103 * Behavior modifiers for load balancing and affinity.
104 */
105#define CG_FLAG_HTT 0x01 /* Schedule the alternate core last. */
106#define CG_FLAG_SMT 0x02 /* New age htt, less crippled. */
107#define CG_FLAG_THREAD (CG_FLAG_HTT | CG_FLAG_SMT) /* Any threading. */
108#define CG_FLAG_NODE 0x04 /* NUMA node. */
109
110/*
111 * Convenience routines for building and traversing topologies.
112 */
113#ifdef SMP
114void topo_init_node(struct topo_node *node);
115void topo_init_root(struct topo_node *root);
116struct topo_node * topo_add_node_by_hwid(struct topo_node *parent, int hwid,
117 topo_node_type type, uintptr_t subtype);
118struct topo_node * topo_find_node_by_hwid(struct topo_node *parent, int hwid,
119 topo_node_type type, uintptr_t subtype);
120void topo_promote_child(struct topo_node *child);
121struct topo_node * topo_next_node(struct topo_node *top,
122 struct topo_node *node);
123struct topo_node * topo_next_nonchild_node(struct topo_node *top,
124 struct topo_node *node);
125void topo_set_pu_id(struct topo_node *node, cpuid_t id);
126
127enum topo_level {
128 TOPO_LEVEL_PKG = 0,
129 /*
130 * Some systems have useful sub-package core organizations. On these,
131 * a package has one or more subgroups. Each subgroup contains one or
132 * more cache groups (cores that share a last level cache).
133 */
134 TOPO_LEVEL_GROUP,
135 TOPO_LEVEL_CACHEGROUP,
136 TOPO_LEVEL_CORE,
137 TOPO_LEVEL_THREAD,
138 TOPO_LEVEL_COUNT /* Must be last */
139};
140struct topo_analysis {
141 int entities[TOPO_LEVEL_COUNT];
142};
143int topo_analyze(struct topo_node *topo_root, int all,
144 struct topo_analysis *results);
145
146#define TOPO_FOREACH(i, root) \
147 for (i = root; i != NULL; i = topo_next_node(root, i))
148
149struct cpu_group *smp_topo(void);
150struct cpu_group *smp_topo_alloc(u_int count);
151struct cpu_group *smp_topo_none(void);
152struct cpu_group *smp_topo_1level(int l1share, int l1count, int l1flags);
153struct cpu_group *smp_topo_2level(int l2share, int l2count, int l1share,
154 int l1count, int l1flags);
155struct cpu_group *smp_topo_find(struct cpu_group *top, int cpu);
156
157extern void (*cpustop_restartfunc)(void);
158/* The suspend/resume cpusets are x86 only, but minimize ifdefs. */
159extern volatile cpuset_t resuming_cpus; /* woken up cpus in suspend pen */
160extern volatile cpuset_t started_cpus; /* cpus to let out of stop pen */
161extern volatile cpuset_t stopped_cpus; /* cpus in stop pen */
162extern volatile cpuset_t suspended_cpus; /* cpus [near] sleeping in susp pen */
163extern volatile cpuset_t toresume_cpus; /* cpus to let out of suspend pen */
164extern cpuset_t hlt_cpus_mask; /* XXX 'mask' is detail in old impl */
165extern cpuset_t logical_cpus_mask;
166#endif /* SMP */
167
168extern u_int mp_maxid;
169extern int mp_maxcpus;
170extern int mp_ncores;
171extern int mp_ncpus;
172extern int smp_cpus;
173extern volatile int smp_started;
174extern int smp_threads_per_core;
175
176extern cpuset_t all_cpus;
177extern cpuset_t cpuset_domain[MAXMEMDOM]; /* CPUs in each NUMA domain. */
178
179struct pcb;
180extern struct pcb *stoppcbs;
181
182/*
183 * Macro allowing us to determine whether a CPU is absent at any given
184 * time, thus permitting us to configure sparse maps of cpuid-dependent
185 * (per-CPU) structures.
186 */
187#define CPU_ABSENT(x_cpu) (!CPU_ISSET(x_cpu, &all_cpus))
188
189/*
190 * Macros to iterate over non-absent CPUs. CPU_FOREACH() takes an
191 * integer iterator and iterates over the available set of CPUs.
192 * CPU_FIRST() returns the id of the first non-absent CPU. CPU_NEXT()
193 * returns the id of the next non-absent CPU. It will wrap back to
194 * CPU_FIRST() once the end of the list is reached. The iterators are
195 * currently implemented via inline functions.
196 */
197#define CPU_FOREACH(i) \
198 for ((i) = 0; (i) <= mp_maxid; (i)++) \
199 if (!CPU_ABSENT((i)))
200
201static __inline int
202cpu_first(void)
203{
204 int i;
205
206 for (i = 0;; i++)
207 if (!CPU_ABSENT(i))
208 return (i);
209}
210
211static __inline int
212cpu_next(int i)
213{
214
215 for (;;) {
216 i++;
217 if ((u_int)i > mp_maxid)
218 i = 0;
219 if (!CPU_ABSENT(i))
220 return (i);
221 }
222}
223
224#define CPU_FIRST() cpu_first()
225#define CPU_NEXT(i) cpu_next((i))
226
227#ifdef SMP
228/*
229 * Machine dependent functions used to initialize MP support.
230 *
231 * The cpu_mp_probe() should check to see if MP support is present and return
232 * zero if it is not or non-zero if it is. If MP support is present, then
233 * cpu_mp_start() will be called so that MP can be enabled. This function
234 * should do things such as startup secondary processors. It should also
235 * setup mp_ncpus, all_cpus, and smp_cpus. It should also ensure that
236 * smp_started is initialized at the appropriate time.
237 * Once cpu_mp_start() returns, machine independent MP startup code will be
238 * executed and a simple message will be output to the console. Finally,
239 * cpu_mp_announce() will be called so that machine dependent messages about
240 * the MP support may be output to the console if desired.
241 *
242 * The cpu_setmaxid() function is called very early during the boot process
243 * so that the MD code may set mp_maxid to provide an upper bound on CPU IDs
244 * that other subsystems may use. If a platform is not able to determine
245 * the exact maximum ID that early, then it may set mp_maxid to MAXCPU - 1.
246 */
247struct thread;
248
249struct cpu_group *cpu_topo(void);
250void cpu_mp_announce(void);
251int cpu_mp_probe(void);
252void cpu_mp_setmaxid(void);
253void cpu_mp_start(void);
254
255void forward_signal(struct thread *);
256int restart_cpus(cpuset_t);
257int stop_cpus(cpuset_t);
258int stop_cpus_hard(cpuset_t);
259#if defined(__amd64__) || defined(__i386__)
260int suspend_cpus(cpuset_t);
261int resume_cpus(cpuset_t);
262#endif
263
264void smp_rendezvous_action(void);
265extern struct mtx smp_ipi_mtx;
266
267#endif /* SMP */
268
269int quiesce_all_cpus(const char *, int);
270int quiesce_cpus(cpuset_t, const char *, int);
271void quiesce_all_critical(void);
272void cpus_fence_seq_cst(void);
273void smp_no_rendezvous_barrier(void *);
274void smp_rendezvous(void (*)(void *),
275 void (*)(void *),
276 void (*)(void *),
277 void *arg);
278void smp_rendezvous_cpus(cpuset_t,
279 void (*)(void *),
280 void (*)(void *),
281 void (*)(void *),
282 void *arg);
283
284struct smp_rendezvous_cpus_retry_arg {
285 cpuset_t cpus;
286};
287void smp_rendezvous_cpus_retry(cpuset_t,
288 void (*)(void *),
289 void (*)(void *),
290 void (*)(void *),
291 void (*)(void *, int),
292 struct smp_rendezvous_cpus_retry_arg *);
293
294void smp_rendezvous_cpus_done(struct smp_rendezvous_cpus_retry_arg *);
295
296#endif /* !LOCORE */
297#endif /* _KERNEL */
298#endif /* _SYS_SMP_H_ */