1/* $NetBSD: atomic.h,v 1.1 2002/10/19 12:22:34 bsh Exp $ */
   2
   3/*-
   4 * SPDX-License-Identifier: BSD-4-Clause
   5 *
   6 * Copyright (C) 2003-2004 Olivier Houchard
   7 * Copyright (C) 1994-1997 Mark Brinicombe
   8 * Copyright (C) 1994 Brini
   9 * All rights reserved.
  10 *
  11 * This code is derived from software written for Brini by Mark Brinicombe
  12 *
  13 * Redistribution and use in source and binary forms, with or without
  14 * modification, are permitted provided that the following conditions
  15 * are met:
  16 * 1. Redistributions of source code must retain the above copyright
  17 *    notice, this list of conditions and the following disclaimer.
  18 * 2. Redistributions in binary form must reproduce the above copyright
  19 *    notice, this list of conditions and the following disclaimer in the
  20 *    documentation and/or other materials provided with the distribution.
  21 * 3. All advertising materials mentioning features or use of this software
  22 *    must display the following acknowledgement:
  23 *	This product includes software developed by Brini.
  24 * 4. The name of Brini may not be used to endorse or promote products
  25 *    derived from this software without specific prior written permission.
  26 *
  27 * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR
  28 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  29 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  30 * IN NO EVENT SHALL BRINI BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  31 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  32 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  33 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  34 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  35 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  36 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  37 */
  38
  39#ifndef	_MACHINE_ATOMIC_H_
  40#define	_MACHINE_ATOMIC_H_
  41
  42#include <sys/atomic_common.h>
  43
  44#if __ARM_ARCH >= 7
  45#define isb()  __asm __volatile("isb" : : : "memory")
  46#define dsb()  __asm __volatile("dsb" : : : "memory")
  47#define dmb()  __asm __volatile("dmb" : : : "memory")
  48#else
  49#define isb()  __asm __volatile("mcr p15, 0, %0, c7, c5, 4" : : "r" (0) : "memory")
  50#define dsb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 4" : : "r" (0) : "memory")
  51#define dmb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory")
  52#endif
  53
  54#define mb()   dmb()
  55#define wmb()  dmb()
  56#define rmb()  dmb()
  57
  58#define	ARM_HAVE_ATOMIC64
  59
  60#define ATOMIC_ACQ_REL_LONG(NAME)					\
  61static __inline void							\
  62atomic_##NAME##_acq_long(__volatile u_long *p, u_long v)		\
  63{									\
  64	atomic_##NAME##_long(p, v);					\
  65	dmb();								\
  66}									\
  67									\
  68static __inline  void							\
  69atomic_##NAME##_rel_long(__volatile u_long *p, u_long v)		\
  70{									\
  71	dmb();								\
  72	atomic_##NAME##_long(p, v);					\
  73}
  74
  75#define	ATOMIC_ACQ_REL(NAME, WIDTH)					\
  76static __inline  void							\
  77atomic_##NAME##_acq_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
  78{									\
  79	atomic_##NAME##_##WIDTH(p, v);					\
  80	dmb();								\
  81}									\
  82									\
  83static __inline  void							\
  84atomic_##NAME##_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
  85{									\
  86	dmb();								\
  87	atomic_##NAME##_##WIDTH(p, v);					\
  88}
  89
  90static __inline void
  91atomic_add_32(volatile uint32_t *p, uint32_t val)
  92{
  93	uint32_t tmp = 0, tmp2 = 0;
  94
  95	__asm __volatile(
  96	    "1: ldrex	%0, [%2]	\n"
  97	    "   add	%0, %0, %3	\n"
  98	    "   strex	%1, %0, [%2]	\n"
  99	    "   cmp	%1, #0		\n"
 100	    "   it	ne		\n"
 101	    "   bne	1b		\n"
 102	    : "=&r" (tmp), "+r" (tmp2)
 103	    ,"+r" (p), "+r" (val) : : "cc", "memory");
 104}
 105
 106static __inline void
 107atomic_add_64(volatile uint64_t *p, uint64_t val)
 108{
 109	uint64_t tmp;
 110	uint32_t exflag;
 111
 112	__asm __volatile(
 113	    "1:							\n"
 114	    "   ldrexd	%Q[tmp], %R[tmp], [%[ptr]]		\n"
 115	    "   adds	%Q[tmp], %Q[val]			\n"
 116	    "   adc	%R[tmp], %R[tmp], %R[val]		\n"
 117	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
 118	    "   teq	%[exf], #0				\n"
 119	    "   it	ne					\n"
 120	    "   bne	1b					\n"
 121	    : [exf] "=&r" (exflag),
 122	      [tmp] "=&r" (tmp)
 123	    : [ptr] "r"   (p),
 124	      [val] "r"   (val)
 125	    : "cc", "memory");
 126}
 127
 128static __inline void
 129atomic_add_long(volatile u_long *p, u_long val)
 130{
 131
 132	atomic_add_32((volatile uint32_t *)p, val);
 133}
 134
 135ATOMIC_ACQ_REL(add, 32)
 136ATOMIC_ACQ_REL(add, 64)
 137ATOMIC_ACQ_REL_LONG(add)
 138
 139static __inline void
 140atomic_clear_32(volatile uint32_t *address, uint32_t setmask)
 141{
 142	uint32_t tmp = 0, tmp2 = 0;
 143
 144	__asm __volatile(
 145	    "1: ldrex	%0, [%2]	\n"
 146	    "   bic	%0, %0, %3	\n"
 147	    "   strex	%1, %0, [%2]	\n"
 148	    "   cmp	%1, #0		\n"
 149	    "   it	ne		\n"
 150	    "   bne	1b		\n"
 151	    : "=&r" (tmp), "+r" (tmp2), "+r" (address), "+r" (setmask)
 152	    : : "cc", "memory");
 153}
 154
 155static __inline void
 156atomic_clear_64(volatile uint64_t *p, uint64_t val)
 157{
 158	uint64_t tmp;
 159	uint32_t exflag;
 160
 161	__asm __volatile(
 162	    "1:							\n"
 163	    "   ldrexd	%Q[tmp], %R[tmp], [%[ptr]]		\n"
 164	    "   bic	%Q[tmp], %Q[val]			\n"
 165	    "   bic	%R[tmp], %R[val]			\n"
 166	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
 167	    "   teq	%[exf], #0				\n"
 168	    "   it	ne					\n"
 169	    "   bne	1b					\n"
 170	    : [exf] "=&r" (exflag),
 171	      [tmp] "=&r" (tmp)
 172	    : [ptr] "r"   (p),
 173	      [val] "r"   (val)
 174	    : "cc", "memory");
 175}
 176
 177static __inline void
 178atomic_clear_long(volatile u_long *address, u_long setmask)
 179{
 180
 181	atomic_clear_32((volatile uint32_t *)address, setmask);
 182}
 183
 184ATOMIC_ACQ_REL(clear, 32)
 185ATOMIC_ACQ_REL(clear, 64)
 186ATOMIC_ACQ_REL_LONG(clear)
 187
 188#define ATOMIC_FCMPSET_CODE(RET, TYPE, SUF)                   \
 189    {                                                         \
 190	TYPE tmp;                                             \
 191                                                              \
 192	__asm __volatile(                                     \
 193	    "1: ldrex" SUF "   %[tmp], [%[ptr]]          \n"  \
 194	    "   ldr" SUF "     %[ret], [%[oldv]]         \n"  \
 195	    "   teq            %[tmp], %[ret]            \n"  \
 196	    "   ittee          ne                        \n"  \
 197	    "   str" SUF "ne   %[tmp], [%[oldv]]         \n"  \
 198	    "   movne          %[ret], #0                \n"  \
 199	    "   strex" SUF "eq %[ret], %[newv], [%[ptr]] \n"  \
 200	    "   eorseq         %[ret], #1                \n"  \
 201	    "   beq            1b                        \n"  \
 202	    : [ret] "=&r" (RET),                              \
 203	      [tmp] "=&r" (tmp)                               \
 204	    : [ptr] "r"   (_ptr),                             \
 205	      [oldv] "r"  (_old),                             \
 206	      [newv] "r"  (_new)                              \
 207	    : "cc", "memory");                                \
 208    }
 209
 210#define ATOMIC_FCMPSET_CODE64(RET)                                 \
 211    {                                                              \
 212	uint64_t cmp, tmp;                                         \
 213                                                                   \
 214	__asm __volatile(                                          \
 215	    "1: ldrexd   %Q[tmp], %R[tmp], [%[ptr]]           \n"  \
 216	    "   ldrd     %Q[cmp], %R[cmp], [%[oldv]]          \n"  \
 217	    "   teq      %Q[tmp], %Q[cmp]                     \n"  \
 218	    "   it       eq                                   \n"  \
 219	    "   teqeq    %R[tmp], %R[cmp]                     \n"  \
 220	    "   ittee    ne                                   \n"  \
 221	    "   movne    %[ret], #0                           \n"  \
 222	    "   strdne   %[cmp], [%[oldv]]                    \n"  \
 223	    "   strexdeq %[ret], %Q[newv], %R[newv], [%[ptr]] \n"  \
 224	    "   eorseq   %[ret], #1                           \n"  \
 225	    "   beq      1b                                   \n"  \
 226	    : [ret] "=&r" (RET),                                   \
 227	      [cmp] "=&r" (cmp),                                   \
 228	      [tmp] "=&r" (tmp)                                    \
 229	    : [ptr] "r"   (_ptr),                                  \
 230	      [oldv] "r"  (_old),                                  \
 231	      [newv] "r"  (_new)                                   \
 232	    : "cc", "memory");                                     \
 233    }
 234
 235static __inline int
 236atomic_fcmpset_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
 237{
 238	int ret;
 239
 240	ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
 241	return (ret);
 242}
 243#define	atomic_fcmpset_8	atomic_fcmpset_8
 244
 245static __inline int
 246atomic_fcmpset_acq_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
 247{
 248	int ret;
 249
 250	ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
 251	dmb();
 252	return (ret);
 253}
 254
 255static __inline int
 256atomic_fcmpset_rel_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
 257{
 258	int ret;
 259
 260	dmb();
 261	ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
 262	return (ret);
 263}
 264
 265static __inline int
 266atomic_fcmpset_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
 267{
 268	int ret;
 269
 270	ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
 271	return (ret);
 272}
 273#define	atomic_fcmpset_16	atomic_fcmpset_16
 274
 275static __inline int
 276atomic_fcmpset_acq_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
 277{
 278	int ret;
 279
 280	ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
 281	dmb();
 282	return (ret);
 283}
 284
 285static __inline int
 286atomic_fcmpset_rel_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
 287{
 288	int ret;
 289
 290	dmb();
 291	ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
 292	return (ret);
 293}
 294
 295static __inline int
 296atomic_fcmpset_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
 297{
 298	int ret;
 299
 300	ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
 301	return (ret);
 302}
 303
 304static __inline int
 305atomic_fcmpset_acq_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
 306{
 307	int ret;
 308
 309	ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
 310	dmb();
 311	return (ret);
 312}
 313
 314static __inline int
 315atomic_fcmpset_rel_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
 316{
 317	int ret;
 318
 319	dmb();
 320	ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
 321	return (ret);
 322}
 323
 324static __inline int
 325atomic_fcmpset_long(volatile u_long *_ptr, u_long *_old, u_long _new)
 326{
 327	int ret;
 328
 329	ATOMIC_FCMPSET_CODE(ret, u_long, "");
 330	return (ret);
 331}
 332
 333static __inline int
 334atomic_fcmpset_acq_long(volatile u_long *_ptr, u_long *_old, u_long _new)
 335{
 336	int ret;
 337
 338	ATOMIC_FCMPSET_CODE(ret, u_long, "");
 339	dmb();
 340	return (ret);
 341}
 342
 343static __inline int
 344atomic_fcmpset_rel_long(volatile u_long *_ptr, u_long *_old, u_long _new)
 345{
 346	int ret;
 347
 348	dmb();
 349	ATOMIC_FCMPSET_CODE(ret, u_long, "");
 350	return (ret);
 351}
 352
 353static __inline int
 354atomic_fcmpset_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
 355{
 356	int ret;
 357
 358	ATOMIC_FCMPSET_CODE64(ret);
 359	return (ret);
 360}
 361
 362static __inline int
 363atomic_fcmpset_acq_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
 364{
 365	int ret;
 366
 367	ATOMIC_FCMPSET_CODE64(ret);
 368	dmb();
 369	return (ret);
 370}
 371
 372static __inline int
 373atomic_fcmpset_rel_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
 374{
 375	int ret;
 376
 377	dmb();
 378	ATOMIC_FCMPSET_CODE64(ret);
 379	return (ret);
 380}
 381
 382#define ATOMIC_CMPSET_CODE(RET, SUF)                         \
 383    {                                                        \
 384	__asm __volatile(                                    \
 385	    "1: ldrex" SUF "   %[ret], [%[ptr]]          \n" \
 386	    "   teq            %[ret], %[oldv]           \n" \
 387	    "   itee           ne                        \n" \
 388	    "   movne          %[ret], #0                \n" \
 389	    "   strex" SUF "eq %[ret], %[newv], [%[ptr]] \n" \
 390	    "   eorseq         %[ret], #1                \n" \
 391	    "   beq            1b                        \n" \
 392	    : [ret] "=&r" (RET)                              \
 393	    : [ptr] "r"   (_ptr),                            \
 394	      [oldv] "r"  (_old),                            \
 395	      [newv] "r"  (_new)                             \
 396	    : "cc", "memory");                               \
 397    }
 398
 399#define ATOMIC_CMPSET_CODE64(RET)                                 \
 400    {                                                             \
 401	uint64_t tmp;                                             \
 402	                                                          \
 403	__asm __volatile(                                         \
 404	    "1: ldrexd   %Q[tmp], %R[tmp], [%[ptr]]           \n" \
 405	    "   teq      %Q[tmp], %Q[oldv]                    \n" \
 406	    "   it       eq                                   \n" \
 407	    "   teqeq    %R[tmp], %R[oldv]                    \n" \
 408	    "   itee     ne                                   \n" \
 409	    "   movne    %[ret], #0                           \n" \
 410	    "   strexdeq %[ret], %Q[newv], %R[newv], [%[ptr]] \n" \
 411	    "   eorseq   %[ret], #1                           \n" \
 412	    "   beq      1b                                   \n" \
 413	    : [ret] "=&r" (RET),                                  \
 414	      [tmp] "=&r" (tmp)                                   \
 415	    : [ptr] "r"   (_ptr),                                 \
 416	      [oldv] "r"  (_old),                                 \
 417	      [newv] "r"  (_new)                                  \
 418	    : "cc", "memory");                                    \
 419    }
 420
 421static __inline int
 422atomic_cmpset_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
 423{
 424	int ret;
 425
 426	ATOMIC_CMPSET_CODE(ret, "b");
 427	return (ret);
 428}
 429#define	atomic_cmpset_8		atomic_cmpset_8
 430
 431static __inline int
 432atomic_cmpset_acq_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
 433{
 434	int ret;
 435
 436	ATOMIC_CMPSET_CODE(ret, "b");
 437	dmb();
 438	return (ret);
 439}
 440
 441static __inline int
 442atomic_cmpset_rel_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
 443{
 444	int ret;
 445
 446	dmb();
 447	ATOMIC_CMPSET_CODE(ret, "b");
 448	return (ret);
 449}
 450
 451static __inline int
 452atomic_cmpset_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
 453{
 454	int ret;
 455
 456	ATOMIC_CMPSET_CODE(ret, "h");
 457	return (ret);
 458}
 459#define	atomic_cmpset_16	atomic_cmpset_16
 460
 461static __inline int
 462atomic_cmpset_acq_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
 463{
 464	int ret;
 465
 466	ATOMIC_CMPSET_CODE(ret, "h");
 467	dmb();
 468	return (ret);
 469}
 470
 471static __inline int
 472atomic_cmpset_rel_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
 473{
 474	int ret;
 475
 476	dmb();
 477	ATOMIC_CMPSET_CODE(ret, "h");
 478	return (ret);
 479}
 480
 481static __inline int
 482atomic_cmpset_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
 483{
 484	int ret;
 485
 486	ATOMIC_CMPSET_CODE(ret, "");
 487	return (ret);
 488}
 489
 490static __inline int
 491atomic_cmpset_acq_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
 492{
 493	int ret;
 494
 495	ATOMIC_CMPSET_CODE(ret, "");
 496	dmb();
 497	return (ret);
 498}
 499
 500static __inline int
 501atomic_cmpset_rel_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
 502{
 503	int ret;
 504
 505	dmb();
 506	ATOMIC_CMPSET_CODE(ret, "");
 507	return (ret);
 508}
 509
 510static __inline int
 511atomic_cmpset_long(volatile u_long *_ptr, u_long _old, u_long _new)
 512{
 513	int ret;
 514
 515	ATOMIC_CMPSET_CODE(ret, "");
 516	return (ret);
 517}
 518
 519static __inline int
 520atomic_cmpset_acq_long(volatile u_long *_ptr, u_long _old, u_long _new)
 521{
 522	int ret;
 523
 524	ATOMIC_CMPSET_CODE(ret, "");
 525	dmb();
 526	return (ret);
 527}
 528
 529static __inline int
 530atomic_cmpset_rel_long(volatile u_long *_ptr, u_long _old, u_long _new)
 531{
 532	int ret;
 533
 534	dmb();
 535	ATOMIC_CMPSET_CODE(ret, "");
 536	return (ret);
 537}
 538
 539static __inline int
 540atomic_cmpset_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
 541{
 542	int ret;
 543
 544	ATOMIC_CMPSET_CODE64(ret);
 545	return (ret);
 546}
 547
 548static __inline int
 549atomic_cmpset_acq_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
 550{
 551	int ret;
 552
 553	ATOMIC_CMPSET_CODE64(ret);
 554	dmb();
 555	return (ret);
 556}
 557
 558static __inline int
 559atomic_cmpset_rel_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
 560{
 561	int ret;
 562
 563	dmb();
 564	ATOMIC_CMPSET_CODE64(ret);
 565	return (ret);
 566}
 567
 568static __inline uint32_t
 569atomic_fetchadd_32(volatile uint32_t *p, uint32_t val)
 570{
 571	uint32_t tmp = 0, tmp2 = 0, ret = 0;
 572
 573	__asm __volatile(
 574	    "1: ldrex	%0, [%3]	\n"
 575	    "   add	%1, %0, %4	\n"
 576	    "   strex	%2, %1, [%3]	\n"
 577	    "   cmp	%2, #0		\n"
 578	    "   it	ne		\n"
 579	    "   bne	1b		\n"
 580	    : "+r" (ret), "=&r" (tmp), "+r" (tmp2), "+r" (p), "+r" (val)
 581	    : : "cc", "memory");
 582	return (ret);
 583}
 584
 585static __inline uint64_t
 586atomic_fetchadd_64(volatile uint64_t *p, uint64_t val)
 587{
 588	uint64_t ret, tmp;
 589	uint32_t exflag;
 590
 591	__asm __volatile(
 592	    "1:							\n"
 593	    "   ldrexd	%Q[ret], %R[ret], [%[ptr]]		\n"
 594	    "   adds	%Q[tmp], %Q[ret], %Q[val]		\n"
 595	    "   adc	%R[tmp], %R[ret], %R[val]		\n"
 596	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
 597	    "   teq	%[exf], #0				\n"
 598	    "   it	ne					\n"
 599	    "   bne	1b					\n"
 600	    : [ret] "=&r" (ret),
 601	      [exf] "=&r" (exflag),
 602	      [tmp] "=&r" (tmp)
 603	    : [ptr] "r"   (p),
 604	      [val] "r"   (val)
 605	    : "cc", "memory");
 606	return (ret);
 607}
 608
 609static __inline u_long
 610atomic_fetchadd_long(volatile u_long *p, u_long val)
 611{
 612
 613	return (atomic_fetchadd_32((volatile uint32_t *)p, val));
 614}
 615
 616static __inline uint32_t
 617atomic_load_acq_32(volatile uint32_t *p)
 618{
 619	uint32_t v;
 620
 621	v = *p;
 622	dmb();
 623	return (v);
 624}
 625
 626static __inline uint64_t
 627atomic_load_64(volatile uint64_t *p)
 628{
 629	uint64_t ret;
 630
 631	/*
 632	 * The only way to atomically load 64 bits is with LDREXD which puts the
 633	 * exclusive monitor into the exclusive state, so reset it to open state
 634	 * with CLREX because we don't actually need to store anything.
 635	 */
 636	__asm __volatile(
 637	    "ldrexd	%Q[ret], %R[ret], [%[ptr]]	\n"
 638	    "clrex					\n"
 639	    : [ret] "=&r" (ret)
 640	    : [ptr] "r"   (p)
 641	    : "cc", "memory");
 642	return (ret);
 643}
 644
 645static __inline uint64_t
 646atomic_load_acq_64(volatile uint64_t *p)
 647{
 648	uint64_t ret;
 649
 650	ret = atomic_load_64(p);
 651	dmb();
 652	return (ret);
 653}
 654
 655static __inline u_long
 656atomic_load_acq_long(volatile u_long *p)
 657{
 658	u_long v;
 659
 660	v = *p;
 661	dmb();
 662	return (v);
 663}
 664
 665static __inline uint32_t
 666atomic_readandclear_32(volatile uint32_t *p)
 667{
 668	uint32_t ret, tmp = 0, tmp2 = 0;
 669
 670	__asm __volatile(
 671	    "1: ldrex	%0, [%3]	\n"
 672	    "   mov	%1, #0		\n"
 673	    "   strex	%2, %1, [%3]	\n"
 674	    "   cmp	%2, #0		\n"
 675	    "   it	ne		\n"
 676	    "   bne	1b		\n"
 677	    : "=r" (ret), "=&r" (tmp), "+r" (tmp2), "+r" (p)
 678	    : : "cc", "memory");
 679	return (ret);
 680}
 681
 682static __inline uint64_t
 683atomic_readandclear_64(volatile uint64_t *p)
 684{
 685	uint64_t ret, tmp;
 686	uint32_t exflag;
 687
 688	__asm __volatile(
 689	    "1:							\n"
 690	    "   ldrexd	%Q[ret], %R[ret], [%[ptr]]		\n"
 691	    "   mov	%Q[tmp], #0				\n"
 692	    "   mov	%R[tmp], #0				\n"
 693	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
 694	    "   teq	%[exf], #0				\n"
 695	    "   it	ne					\n"
 696	    "   bne	1b					\n"
 697	    : [ret] "=&r" (ret),
 698	      [exf] "=&r" (exflag),
 699	      [tmp] "=&r" (tmp)
 700	    : [ptr] "r"   (p)
 701	    : "cc", "memory");
 702	return (ret);
 703}
 704
 705static __inline u_long
 706atomic_readandclear_long(volatile u_long *p)
 707{
 708
 709	return (atomic_readandclear_32((volatile uint32_t *)p));
 710}
 711
 712static __inline void
 713atomic_set_32(volatile uint32_t *address, uint32_t setmask)
 714{
 715	uint32_t tmp = 0, tmp2 = 0;
 716
 717	__asm __volatile(
 718	    "1: ldrex	%0, [%2]	\n"
 719	    "   orr	%0, %0, %3	\n"
 720	    "   strex	%1, %0, [%2]	\n"
 721	    "   cmp	%1, #0		\n"
 722	    "   it	ne		\n"
 723	    "   bne	1b		\n"
 724	    : "=&r" (tmp), "+r" (tmp2), "+r" (address), "+r" (setmask)
 725	    : : "cc", "memory");
 726}
 727
 728static __inline void
 729atomic_set_64(volatile uint64_t *p, uint64_t val)
 730{
 731	uint64_t tmp;
 732	uint32_t exflag;
 733
 734	__asm __volatile(
 735	    "1:							\n"
 736	    "   ldrexd	%Q[tmp], %R[tmp], [%[ptr]]		\n"
 737	    "   orr	%Q[tmp], %Q[val]			\n"
 738	    "   orr	%R[tmp], %R[val]			\n"
 739	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
 740	    "   teq	%[exf], #0				\n"
 741	    "   it	ne					\n"
 742	    "   bne	1b					\n"
 743	    : [exf] "=&r" (exflag),
 744	      [tmp] "=&r" (tmp)
 745	    : [ptr] "r"   (p),
 746	      [val] "r"   (val)
 747	    : "cc", "memory");
 748}
 749
 750static __inline void
 751atomic_set_long(volatile u_long *address, u_long setmask)
 752{
 753
 754	atomic_set_32((volatile uint32_t *)address, setmask);
 755}
 756
 757ATOMIC_ACQ_REL(set, 32)
 758ATOMIC_ACQ_REL(set, 64)
 759ATOMIC_ACQ_REL_LONG(set)
 760
 761static __inline void
 762atomic_subtract_32(volatile uint32_t *p, uint32_t val)
 763{
 764	uint32_t tmp = 0, tmp2 = 0;
 765
 766	__asm __volatile(
 767	    "1: ldrex	%0, [%2]	\n"
 768	    "   sub	%0, %0, %3	\n"
 769	    "   strex	%1, %0, [%2]	\n"
 770	    "   cmp	%1, #0		\n"
 771	    "   it	ne		\n"
 772	    "   bne	1b		\n"
 773	    : "=&r" (tmp), "+r" (tmp2), "+r" (p), "+r" (val)
 774	    : : "cc", "memory");
 775}
 776
 777static __inline void
 778atomic_subtract_64(volatile uint64_t *p, uint64_t val)
 779{
 780	uint64_t tmp;
 781	uint32_t exflag;
 782
 783	__asm __volatile(
 784	    "1:							\n"
 785	    "   ldrexd	%Q[tmp], %R[tmp], [%[ptr]]		\n"
 786	    "   subs	%Q[tmp], %Q[val]			\n"
 787	    "   sbc	%R[tmp], %R[tmp], %R[val]		\n"
 788	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
 789	    "   teq	%[exf], #0				\n"
 790	    "   it	ne					\n"
 791	    "   bne	1b					\n"
 792	    : [exf] "=&r" (exflag),
 793	      [tmp] "=&r" (tmp)
 794	    : [ptr] "r"   (p),
 795	      [val] "r"   (val)
 796	    : "cc", "memory");
 797}
 798
 799static __inline void
 800atomic_subtract_long(volatile u_long *p, u_long val)
 801{
 802
 803	atomic_subtract_32((volatile uint32_t *)p, val);
 804}
 805
 806ATOMIC_ACQ_REL(subtract, 32)
 807ATOMIC_ACQ_REL(subtract, 64)
 808ATOMIC_ACQ_REL_LONG(subtract)
 809
 810static __inline void
 811atomic_store_64(volatile uint64_t *p, uint64_t val)
 812{
 813	uint64_t tmp;
 814	uint32_t exflag;
 815
 816	/*
 817	 * The only way to atomically store 64 bits is with STREXD, which will
 818	 * succeed only if paired up with a preceeding LDREXD using the same
 819	 * address, so we read and discard the existing value before storing.
 820	 */
 821	__asm __volatile(
 822	    "1:							\n"
 823	    "   ldrexd	%Q[tmp], %R[tmp], [%[ptr]]		\n"
 824	    "   strexd	%[exf], %Q[val], %R[val], [%[ptr]]	\n"
 825	    "   teq	%[exf], #0				\n"
 826	    "   it	ne					\n"
 827	    "   bne	1b					\n"
 828	    : [tmp] "=&r" (tmp),
 829	      [exf] "=&r" (exflag)
 830	    : [ptr] "r"   (p),
 831	      [val] "r"   (val)
 832	    : "cc", "memory");
 833}
 834
 835static __inline void
 836atomic_store_rel_32(volatile uint32_t *p, uint32_t v)
 837{
 838
 839	dmb();
 840	*p = v;
 841}
 842
 843static __inline void
 844atomic_store_rel_64(volatile uint64_t *p, uint64_t val)
 845{
 846
 847	dmb();
 848	atomic_store_64(p, val);
 849}
 850
 851static __inline void
 852atomic_store_rel_long(volatile u_long *p, u_long v)
 853{
 854
 855	dmb();
 856	*p = v;
 857}
 858
 859static __inline int
 860atomic_testandclear_32(volatile uint32_t *ptr, u_int bit)
 861{
 862	int newv, oldv, result;
 863
 864	__asm __volatile(
 865	    "   mov     ip, #1					\n"
 866	    "   lsl     ip, ip, %[bit]				\n"
 867	    /*  Done with %[bit] as input, reuse below as output. */
 868	    "1:							\n"
 869	    "   ldrex	%[oldv], [%[ptr]]			\n"
 870	    "   bic     %[newv], %[oldv], ip			\n"
 871	    "   strex	%[bit], %[newv], [%[ptr]]		\n"
 872	    "   teq	%[bit], #0				\n"
 873	    "   it	ne					\n"
 874	    "   bne	1b					\n"
 875	    "   ands	%[bit], %[oldv], ip			\n"
 876	    "   it	ne					\n"
 877	    "   movne   %[bit], #1                              \n"
 878	    : [bit]  "=&r"   (result),
 879	      [oldv] "=&r"   (oldv),
 880	      [newv] "=&r"   (newv)
 881	    : [ptr]  "r"     (ptr),
 882	             "[bit]" (bit & 0x1f)
 883	    : "cc", "ip", "memory");
 884
 885	return (result);
 886}
 887
 888static __inline int
 889atomic_testandclear_int(volatile u_int *p, u_int v)
 890{
 891
 892	return (atomic_testandclear_32((volatile uint32_t *)p, v));
 893}
 894
 895static __inline int
 896atomic_testandclear_long(volatile u_long *p, u_int v)
 897{
 898
 899	return (atomic_testandclear_32((volatile uint32_t *)p, v));
 900}
 901#define	atomic_testandclear_long	atomic_testandclear_long
 902
 903
 904static __inline int
 905atomic_testandclear_64(volatile uint64_t *p, u_int v)
 906{
 907	volatile uint32_t *p32;
 908
 909	p32 = (volatile uint32_t *)p;
 910	/*
 911	 * Assume little-endian,
 912	 * atomic_testandclear_32() uses only last 5 bits of v
 913	 */
 914	if ((v & 0x20) != 0)
 915		p32++;
 916	return (atomic_testandclear_32(p32, v));
 917}
 918
 919static __inline int
 920atomic_testandset_32(volatile uint32_t *ptr, u_int bit)
 921{
 922	int newv, oldv, result;
 923
 924	__asm __volatile(
 925	    "   mov     ip, #1					\n"
 926	    "   lsl     ip, ip, %[bit]				\n"
 927	    /*  Done with %[bit] as input, reuse below as output. */
 928	    "1:							\n"
 929	    "   ldrex	%[oldv], [%[ptr]]			\n"
 930	    "   orr     %[newv], %[oldv], ip			\n"
 931	    "   strex	%[bit], %[newv], [%[ptr]]		\n"
 932	    "   teq	%[bit], #0				\n"
 933	    "   it	ne					\n"
 934	    "   bne	1b					\n"
 935	    "   ands	%[bit], %[oldv], ip			\n"
 936	    "   it	ne					\n"
 937	    "   movne   %[bit], #1                              \n"
 938	    : [bit]  "=&r"   (result),
 939	      [oldv] "=&r"   (oldv),
 940	      [newv] "=&r"   (newv)
 941	    : [ptr]  "r"     (ptr),
 942	             "[bit]" (bit & 0x1f)
 943	    : "cc", "ip", "memory");
 944
 945	return (result);
 946}
 947
 948static __inline int
 949atomic_testandset_int(volatile u_int *p, u_int v)
 950{
 951
 952	return (atomic_testandset_32((volatile uint32_t *)p, v));
 953}
 954
 955static __inline int
 956atomic_testandset_long(volatile u_long *p, u_int v)
 957{
 958
 959	return (atomic_testandset_32((volatile uint32_t *)p, v));
 960}
 961#define	atomic_testandset_long	atomic_testandset_long
 962
 963static __inline int
 964atomic_testandset_64(volatile uint64_t *p, u_int v)
 965{
 966	volatile uint32_t *p32;
 967
 968	p32 = (volatile uint32_t *)p;
 969	/*
 970	 * Assume little-endian,
 971	 * atomic_testandset_32() uses only last 5 bits of v
 972	 */
 973	if ((v & 0x20) != 0)
 974		p32++;
 975	return (atomic_testandset_32(p32, v));
 976}
 977
 978static __inline uint32_t
 979atomic_swap_32(volatile uint32_t *p, uint32_t v)
 980{
 981	uint32_t ret, exflag;
 982
 983	__asm __volatile(
 984	    "1: ldrex	%[ret], [%[ptr]]		\n"
 985	    "   strex	%[exf], %[val], [%[ptr]]	\n"
 986	    "   teq	%[exf], #0			\n"
 987	    "   it	ne				\n"
 988	    "   bne	1b				\n"
 989	    : [ret] "=&r"  (ret),
 990	      [exf] "=&r" (exflag)
 991	    : [val] "r"  (v),
 992	      [ptr] "r"  (p)
 993	    : "cc", "memory");
 994	return (ret);
 995}
 996
 997static __inline u_long
 998atomic_swap_long(volatile u_long *p, u_long v)
 999{
1000
1001	return (atomic_swap_32((volatile uint32_t *)p, v));
1002}
1003
1004static __inline uint64_t
1005atomic_swap_64(volatile uint64_t *p, uint64_t v)
1006{
1007	uint64_t ret;
1008	uint32_t exflag;
1009
1010	__asm __volatile(
1011	    "1: ldrexd	%Q[ret], %R[ret], [%[ptr]]		\n"
1012	    "   strexd	%[exf], %Q[val], %R[val], [%[ptr]]	\n"
1013	    "   teq	%[exf], #0				\n"
1014	    "   it	ne					\n"
1015	    "   bne	1b					\n"
1016	    : [ret] "=&r" (ret),
1017	      [exf] "=&r" (exflag)
1018	    : [val] "r"   (v),
1019	      [ptr] "r"   (p)
1020	    : "cc", "memory");
1021	return (ret);
1022}
1023
1024#undef ATOMIC_ACQ_REL
1025#undef ATOMIC_ACQ_REL_LONG
1026
1027static __inline void
1028atomic_thread_fence_acq(void)
1029{
1030
1031	dmb();
1032}
1033
1034static __inline void
1035atomic_thread_fence_rel(void)
1036{
1037
1038	dmb();
1039}
1040
1041static __inline void
1042atomic_thread_fence_acq_rel(void)
1043{
1044
1045	dmb();
1046}
1047
1048static __inline void
1049atomic_thread_fence_seq_cst(void)
1050{
1051
1052	dmb();
1053}
1054
1055#define atomic_clear_ptr		atomic_clear_32
1056#define atomic_clear_acq_ptr		atomic_clear_acq_32
1057#define atomic_clear_rel_ptr		atomic_clear_rel_32
1058#define atomic_set_ptr			atomic_set_32
1059#define atomic_set_acq_ptr		atomic_set_acq_32
1060#define atomic_set_rel_ptr		atomic_set_rel_32
1061#define atomic_fcmpset_ptr		atomic_fcmpset_32
1062#define atomic_fcmpset_rel_ptr		atomic_fcmpset_rel_32
1063#define atomic_fcmpset_acq_ptr		atomic_fcmpset_acq_32
1064#define atomic_cmpset_ptr		atomic_cmpset_32
1065#define atomic_cmpset_acq_ptr		atomic_cmpset_acq_32
1066#define atomic_cmpset_rel_ptr		atomic_cmpset_rel_32
1067#define atomic_load_acq_ptr		atomic_load_acq_32
1068#define atomic_store_rel_ptr		atomic_store_rel_32
1069#define atomic_swap_ptr			atomic_swap_32
1070#define atomic_readandclear_ptr		atomic_readandclear_32
1071
1072#define atomic_add_int			atomic_add_32
1073#define atomic_add_acq_int		atomic_add_acq_32
1074#define atomic_add_rel_int		atomic_add_rel_32
1075#define atomic_subtract_int		atomic_subtract_32
1076#define atomic_subtract_acq_int		atomic_subtract_acq_32
1077#define atomic_subtract_rel_int		atomic_subtract_rel_32
1078#define atomic_clear_int		atomic_clear_32
1079#define atomic_clear_acq_int		atomic_clear_acq_32
1080#define atomic_clear_rel_int		atomic_clear_rel_32
1081#define atomic_set_int			atomic_set_32
1082#define atomic_set_acq_int		atomic_set_acq_32
1083#define atomic_set_rel_int		atomic_set_rel_32
1084#define atomic_fcmpset_int		atomic_fcmpset_32
1085#define atomic_fcmpset_acq_int		atomic_fcmpset_acq_32
1086#define atomic_fcmpset_rel_int		atomic_fcmpset_rel_32
1087#define atomic_cmpset_int		atomic_cmpset_32
1088#define atomic_cmpset_acq_int		atomic_cmpset_acq_32
1089#define atomic_cmpset_rel_int		atomic_cmpset_rel_32
1090#define atomic_fetchadd_int		atomic_fetchadd_32
1091#define atomic_readandclear_int		atomic_readandclear_32
1092#define atomic_load_acq_int		atomic_load_acq_32
1093#define atomic_store_rel_int		atomic_store_rel_32
1094#define atomic_swap_int			atomic_swap_32
1095
1096/*
1097 * For:
1098 *  - atomic_load_acq_8
1099 *  - atomic_load_acq_16
1100 *  - atomic_testandset_acq_long
1101 */
1102#include <sys/_atomic_subword.h>
1103
1104#endif /* _MACHINE_ATOMIC_H_ */