1/*-
  2 * Copyright (c) 2015 Ruslan Bukin <br@bsdpad.com>
  3 * All rights reserved.
  4 *
  5 * Portions of this software were developed by SRI International and the
  6 * University of Cambridge Computer Laboratory under DARPA/AFRL contract
  7 * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
  8 *
  9 * Portions of this software were developed by the University of Cambridge
 10 * Computer Laboratory as part of the CTSRD Project, with support from the
 11 * UK Higher Education Innovation Fund (HEIF).
 12 *
 13 * Redistribution and use in source and binary forms, with or without
 14 * modification, are permitted provided that the following conditions
 15 * are met:
 16 * 1. Redistributions of source code must retain the above copyright
 17 *    notice, this list of conditions and the following disclaimer.
 18 * 2. Redistributions in binary form must reproduce the above copyright
 19 *    notice, this list of conditions and the following disclaimer in the
 20 *    documentation and/or other materials provided with the distribution.
 21 *
 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 32 * SUCH DAMAGE.
 33 */
 34
 35#ifndef	_MACHINE_ATOMIC_H_
 36#define	_MACHINE_ATOMIC_H_
 37
 38#include <sys/atomic_common.h>
 39
 40#define	fence()	__asm __volatile("fence" ::: "memory");
 41#define	mb()	fence()
 42#define	rmb()	fence()
 43#define	wmb()	fence()
 44
 45static __inline int atomic_cmpset_8(__volatile uint8_t *, uint8_t, uint8_t);
 46static __inline int atomic_fcmpset_8(__volatile uint8_t *, uint8_t *, uint8_t);
 47static __inline int atomic_cmpset_16(__volatile uint16_t *, uint16_t, uint16_t);
 48static __inline int atomic_fcmpset_16(__volatile uint16_t *, uint16_t *,
 49    uint16_t);
 50
 51#define	ATOMIC_ACQ_REL(NAME, WIDTH)					\
 52static __inline  void							\
 53atomic_##NAME##_acq_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
 54{									\
 55	atomic_##NAME##_##WIDTH(p, v);					\
 56	fence(); 							\
 57}									\
 58									\
 59static __inline  void							\
 60atomic_##NAME##_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
 61{									\
 62	fence();							\
 63	atomic_##NAME##_##WIDTH(p, v);					\
 64}
 65
 66#define	ATOMIC_CMPSET_ACQ_REL(WIDTH)					\
 67static __inline  int							\
 68atomic_cmpset_acq_##WIDTH(__volatile uint##WIDTH##_t *p,		\
 69    uint##WIDTH##_t cmpval, uint##WIDTH##_t newval)			\
 70{									\
 71	int retval;							\
 72									\
 73	retval = atomic_cmpset_##WIDTH(p, cmpval, newval);		\
 74	fence();							\
 75	return (retval);						\
 76}									\
 77									\
 78static __inline  int							\
 79atomic_cmpset_rel_##WIDTH(__volatile uint##WIDTH##_t *p,		\
 80    uint##WIDTH##_t cmpval, uint##WIDTH##_t newval)			\
 81{									\
 82	fence();							\
 83	return (atomic_cmpset_##WIDTH(p, cmpval, newval));		\
 84}
 85
 86#define	ATOMIC_FCMPSET_ACQ_REL(WIDTH)					\
 87static __inline  int							\
 88atomic_fcmpset_acq_##WIDTH(__volatile uint##WIDTH##_t *p,		\
 89    uint##WIDTH##_t *cmpval, uint##WIDTH##_t newval)			\
 90{									\
 91	int retval;							\
 92									\
 93	retval = atomic_fcmpset_##WIDTH(p, cmpval, newval);		\
 94	fence();							\
 95	return (retval);						\
 96}									\
 97									\
 98static __inline  int							\
 99atomic_fcmpset_rel_##WIDTH(__volatile uint##WIDTH##_t *p,		\
100    uint##WIDTH##_t *cmpval, uint##WIDTH##_t newval)			\
101{									\
102	fence();							\
103	return (atomic_fcmpset_##WIDTH(p, cmpval, newval));		\
104}
105
106ATOMIC_CMPSET_ACQ_REL(8);
107ATOMIC_FCMPSET_ACQ_REL(8);
108ATOMIC_CMPSET_ACQ_REL(16);
109ATOMIC_FCMPSET_ACQ_REL(16);
110
111#define	atomic_cmpset_char		atomic_cmpset_8
112#define	atomic_cmpset_acq_char		atomic_cmpset_acq_8
113#define	atomic_cmpset_rel_char		atomic_cmpset_rel_8
114#define	atomic_fcmpset_char		atomic_fcmpset_8
115#define	atomic_fcmpset_acq_char		atomic_fcmpset_acq_8
116#define	atomic_fcmpset_rel_char		atomic_fcmpset_rel_8
117
118#define	atomic_cmpset_short		atomic_cmpset_16
119#define	atomic_cmpset_acq_short		atomic_cmpset_acq_16
120#define	atomic_cmpset_rel_short		atomic_cmpset_rel_16
121#define	atomic_fcmpset_short		atomic_fcmpset_16
122#define	atomic_fcmpset_acq_short	atomic_fcmpset_acq_16
123#define	atomic_fcmpset_rel_short	atomic_fcmpset_rel_16
124
125static __inline void
126atomic_add_32(volatile uint32_t *p, uint32_t val)
127{
128
129	__asm __volatile("amoadd.w zero, %1, %0"
130			: "+A" (*p)
131			: "r" (val)
132			: "memory");
133}
134
135static __inline void
136atomic_subtract_32(volatile uint32_t *p, uint32_t val)
137{
138
139	__asm __volatile("amoadd.w zero, %1, %0"
140			: "+A" (*p)
141			: "r" (-val)
142			: "memory");
143}
144
145static __inline void
146atomic_set_32(volatile uint32_t *p, uint32_t val)
147{
148
149	__asm __volatile("amoor.w zero, %1, %0"
150			: "+A" (*p)
151			: "r" (val)
152			: "memory");
153}
154
155static __inline void
156atomic_clear_32(volatile uint32_t *p, uint32_t val)
157{
158
159	__asm __volatile("amoand.w zero, %1, %0"
160			: "+A" (*p)
161			: "r" (~val)
162			: "memory");
163}
164
165static __inline int
166atomic_cmpset_32(volatile uint32_t *p, uint32_t cmpval, uint32_t newval)
167{
168	uint32_t tmp;
169	int res;
170
171	res = 0;
172
173	__asm __volatile(
174		"0:"
175			"li   %1, 1\n" /* Preset to fail */
176			"lr.w %0, %2\n"
177			"bne  %0, %z3, 1f\n"
178			"sc.w %1, %z4, %2\n"
179			"bnez %1, 0b\n"
180		"1:"
181			: "=&r" (tmp), "=&r" (res), "+A" (*p)
182			: "rJ" ((long)(int32_t)cmpval), "rJ" (newval)
183			: "memory");
184
185	return (!res);
186}
187
188static __inline int
189atomic_fcmpset_32(volatile uint32_t *p, uint32_t *cmpval, uint32_t newval)
190{
191	uint32_t tmp;
192	int res;
193
194	res = 0;
195
196	__asm __volatile(
197		"0:"
198			"li   %1, 1\n"		/* Preset to fail */
199			"lr.w %0, %2\n"		/* Load old value */
200			"bne  %0, %z4, 1f\n"	/* Compare */
201			"sc.w %1, %z5, %2\n"	/* Try to store new value */
202			"j 2f\n"
203		"1:"
204			"sw   %0, %3\n"		/* Save old value */
205		"2:"
206			: "=&r" (tmp), "=&r" (res), "+A" (*p), "+A" (*cmpval)
207			: "rJ" ((long)(int32_t)*cmpval), "rJ" (newval)
208			: "memory");
209
210	return (!res);
211}
212
213static __inline uint32_t
214atomic_fetchadd_32(volatile uint32_t *p, uint32_t val)
215{
216	uint32_t ret;
217
218	__asm __volatile("amoadd.w %0, %2, %1"
219			: "=&r" (ret), "+A" (*p)
220			: "r" (val)
221			: "memory");
222
223	return (ret);
224}
225
226static __inline uint32_t
227atomic_readandclear_32(volatile uint32_t *p)
228{
229	uint32_t ret;
230	uint32_t val;
231
232	val = 0;
233
234	__asm __volatile("amoswap.w %0, %2, %1"
235			: "=&r"(ret), "+A" (*p)
236			: "r" (val)
237			: "memory");
238
239	return (ret);
240}
241
242#define	atomic_add_int		atomic_add_32
243#define	atomic_clear_int	atomic_clear_32
244#define	atomic_cmpset_int	atomic_cmpset_32
245#define	atomic_fcmpset_int	atomic_fcmpset_32
246#define	atomic_fetchadd_int	atomic_fetchadd_32
247#define	atomic_readandclear_int	atomic_readandclear_32
248#define	atomic_set_int		atomic_set_32
249#define	atomic_subtract_int	atomic_subtract_32
250
251ATOMIC_ACQ_REL(set, 32)
252ATOMIC_ACQ_REL(clear, 32)
253ATOMIC_ACQ_REL(add, 32)
254ATOMIC_ACQ_REL(subtract, 32)
255
256ATOMIC_CMPSET_ACQ_REL(32);
257ATOMIC_FCMPSET_ACQ_REL(32);
258
259static __inline uint32_t
260atomic_load_acq_32(volatile uint32_t *p)
261{
262	uint32_t ret;
263
264	ret = *p;
265
266	fence();
267
268	return (ret);
269}
270
271static __inline void
272atomic_store_rel_32(volatile uint32_t *p, uint32_t val)
273{
274
275	fence();
276
277	*p = val;
278}
279
280#define	atomic_add_acq_int	atomic_add_acq_32
281#define	atomic_clear_acq_int	atomic_clear_acq_32
282#define	atomic_cmpset_acq_int	atomic_cmpset_acq_32
283#define	atomic_fcmpset_acq_int	atomic_fcmpset_acq_32
284#define	atomic_load_acq_int	atomic_load_acq_32
285#define	atomic_set_acq_int	atomic_set_acq_32
286#define	atomic_subtract_acq_int	atomic_subtract_acq_32
287
288#define	atomic_add_rel_int	atomic_add_rel_32
289#define	atomic_clear_rel_int	atomic_clear_rel_32
290#define	atomic_cmpset_rel_int	atomic_cmpset_rel_32
291#define	atomic_fcmpset_rel_int	atomic_fcmpset_rel_32
292#define	atomic_set_rel_int	atomic_set_rel_32
293#define	atomic_subtract_rel_int	atomic_subtract_rel_32
294#define	atomic_store_rel_int	atomic_store_rel_32
295
296static __inline void
297atomic_add_64(volatile uint64_t *p, uint64_t val)
298{
299
300	__asm __volatile("amoadd.d zero, %1, %0"
301			: "+A" (*p)
302			: "r" (val)
303			: "memory");
304}
305
306static __inline void
307atomic_subtract_64(volatile uint64_t *p, uint64_t val)
308{
309
310	__asm __volatile("amoadd.d zero, %1, %0"
311			: "+A" (*p)
312			: "r" (-val)
313			: "memory");
314}
315
316static __inline void
317atomic_set_64(volatile uint64_t *p, uint64_t val)
318{
319
320	__asm __volatile("amoor.d zero, %1, %0"
321			: "+A" (*p)
322			: "r" (val)
323			: "memory");
324}
325
326static __inline void
327atomic_clear_64(volatile uint64_t *p, uint64_t val)
328{
329
330	__asm __volatile("amoand.d zero, %1, %0"
331			: "+A" (*p)
332			: "r" (~val)
333			: "memory");
334}
335
336static __inline int
337atomic_cmpset_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval)
338{
339	uint64_t tmp;
340	int res;
341
342	res = 0;
343
344	__asm __volatile(
345		"0:"
346			"li   %1, 1\n" /* Preset to fail */
347			"lr.d %0, %2\n"
348			"bne  %0, %z3, 1f\n"
349			"sc.d %1, %z4, %2\n"
350			"bnez %1, 0b\n"
351		"1:"
352			: "=&r" (tmp), "=&r" (res), "+A" (*p)
353			: "rJ" (cmpval), "rJ" (newval)
354			: "memory");
355
356	return (!res);
357}
358
359static __inline int
360atomic_fcmpset_64(volatile uint64_t *p, uint64_t *cmpval, uint64_t newval)
361{
362	uint64_t tmp;
363	int res;
364
365	res = 0;
366
367	__asm __volatile(
368		"0:"
369			"li   %1, 1\n"		/* Preset to fail */
370			"lr.d %0, %2\n"		/* Load old value */
371			"bne  %0, %z4, 1f\n"	/* Compare */
372			"sc.d %1, %z5, %2\n"	/* Try to store new value */
373			"j 2f\n"
374		"1:"
375			"sd   %0, %3\n"		/* Save old value */
376		"2:"
377			: "=&r" (tmp), "=&r" (res), "+A" (*p), "+A" (*cmpval)
378			: "rJ" (*cmpval), "rJ" (newval)
379			: "memory");
380
381	return (!res);
382}
383
384static __inline uint64_t
385atomic_fetchadd_64(volatile uint64_t *p, uint64_t val)
386{
387	uint64_t ret;
388
389	__asm __volatile("amoadd.d %0, %2, %1"
390			: "=&r" (ret), "+A" (*p)
391			: "r" (val)
392			: "memory");
393
394	return (ret);
395}
396
397static __inline uint64_t
398atomic_readandclear_64(volatile uint64_t *p)
399{
400	uint64_t ret;
401	uint64_t val;
402
403	val = 0;
404
405	__asm __volatile("amoswap.d %0, %2, %1"
406			: "=&r"(ret), "+A" (*p)
407			: "r" (val)
408			: "memory");
409
410	return (ret);
411}
412
413static __inline uint32_t
414atomic_swap_32(volatile uint32_t *p, uint32_t val)
415{
416	uint32_t old;
417
418	__asm __volatile("amoswap.w %0, %2, %1"
419			: "=&r"(old), "+A" (*p)
420			: "r" (val)
421			: "memory");
422
423	return (old);
424}
425
426static __inline uint64_t
427atomic_swap_64(volatile uint64_t *p, uint64_t val)
428{
429	uint64_t old;
430
431	__asm __volatile("amoswap.d %0, %2, %1"
432			: "=&r"(old), "+A" (*p)
433			: "r" (val)
434			: "memory");
435
436	return (old);
437}
438
439#define	atomic_swap_int			atomic_swap_32
440
441#define	atomic_add_long			atomic_add_64
442#define	atomic_clear_long		atomic_clear_64
443#define	atomic_cmpset_long		atomic_cmpset_64
444#define	atomic_fcmpset_long		atomic_fcmpset_64
445#define	atomic_fetchadd_long		atomic_fetchadd_64
446#define	atomic_readandclear_long	atomic_readandclear_64
447#define	atomic_set_long			atomic_set_64
448#define	atomic_subtract_long		atomic_subtract_64
449#define	atomic_swap_long		atomic_swap_64
450
451#define	atomic_add_ptr			atomic_add_64
452#define	atomic_clear_ptr		atomic_clear_64
453#define	atomic_cmpset_ptr		atomic_cmpset_64
454#define	atomic_fcmpset_ptr		atomic_fcmpset_64
455#define	atomic_fetchadd_ptr		atomic_fetchadd_64
456#define	atomic_readandclear_ptr		atomic_readandclear_64
457#define	atomic_set_ptr			atomic_set_64
458#define	atomic_subtract_ptr		atomic_subtract_64
459#define	atomic_swap_ptr			atomic_swap_64
460
461ATOMIC_ACQ_REL(set, 64)
462ATOMIC_ACQ_REL(clear, 64)
463ATOMIC_ACQ_REL(add, 64)
464ATOMIC_ACQ_REL(subtract, 64)
465
466ATOMIC_CMPSET_ACQ_REL(64);
467ATOMIC_FCMPSET_ACQ_REL(64);
468
469static __inline uint64_t
470atomic_load_acq_64(volatile uint64_t *p)
471{
472	uint64_t ret;
473
474	ret = *p;
475
476	fence();
477
478	return (ret);
479}
480
481static __inline void
482atomic_store_rel_64(volatile uint64_t *p, uint64_t val)
483{
484
485	fence();
486
487	*p = val;
488}
489
490#define	atomic_add_acq_long		atomic_add_acq_64
491#define	atomic_clear_acq_long		atomic_clear_acq_64
492#define	atomic_cmpset_acq_long		atomic_cmpset_acq_64
493#define	atomic_fcmpset_acq_long		atomic_fcmpset_acq_64
494#define	atomic_load_acq_long		atomic_load_acq_64
495#define	atomic_set_acq_long		atomic_set_acq_64
496#define	atomic_subtract_acq_long	atomic_subtract_acq_64
497
498#define	atomic_add_acq_ptr		atomic_add_acq_64
499#define	atomic_clear_acq_ptr		atomic_clear_acq_64
500#define	atomic_cmpset_acq_ptr		atomic_cmpset_acq_64
501#define	atomic_fcmpset_acq_ptr		atomic_fcmpset_acq_64
502#define	atomic_load_acq_ptr		atomic_load_acq_64
503#define	atomic_set_acq_ptr		atomic_set_acq_64
504#define	atomic_subtract_acq_ptr		atomic_subtract_acq_64
505
506#undef ATOMIC_ACQ_REL
507
508static __inline void
509atomic_thread_fence_acq(void)
510{
511
512	fence();
513}
514
515static __inline void
516atomic_thread_fence_rel(void)
517{
518
519	fence();
520}
521
522static __inline void
523atomic_thread_fence_acq_rel(void)
524{
525
526	fence();
527}
528
529static __inline void
530atomic_thread_fence_seq_cst(void)
531{
532
533	fence();
534}
535
536#define	atomic_add_rel_long		atomic_add_rel_64
537#define	atomic_clear_rel_long		atomic_clear_rel_64
538
539#define	atomic_add_rel_long		atomic_add_rel_64
540#define	atomic_clear_rel_long		atomic_clear_rel_64
541#define	atomic_cmpset_rel_long		atomic_cmpset_rel_64
542#define	atomic_fcmpset_rel_long		atomic_fcmpset_rel_64
543#define	atomic_set_rel_long		atomic_set_rel_64
544#define	atomic_subtract_rel_long	atomic_subtract_rel_64
545#define	atomic_store_rel_long		atomic_store_rel_64
546
547#define	atomic_add_rel_ptr		atomic_add_rel_64
548#define	atomic_clear_rel_ptr		atomic_clear_rel_64
549#define	atomic_cmpset_rel_ptr		atomic_cmpset_rel_64
550#define	atomic_fcmpset_rel_ptr		atomic_fcmpset_rel_64
551#define	atomic_set_rel_ptr		atomic_set_rel_64
552#define	atomic_subtract_rel_ptr		atomic_subtract_rel_64
553#define	atomic_store_rel_ptr		atomic_store_rel_64
554
555#include <sys/_atomic_subword.h>
556
557#endif /* _MACHINE_ATOMIC_H_ */