master
   1const builtin = @import("builtin");
   2const common = @import("./common.zig");
   3
   4fn __hexagon_divsi3() callconv(.naked) noreturn {
   5    asm volatile (
   6        \\ {
   7        \\   p0 = cmp.ge(r0,#0)
   8        \\   p1 = cmp.ge(r1,#0)
   9        \\   r1 = abs(r0)
  10        \\   r2 = abs(r1)
  11        \\  }
  12        \\  {
  13        \\   r3 = cl0(r1)
  14        \\   r4 = cl0(r2)
  15        \\   r5 = sub(r1,r2)
  16        \\   p2 = cmp.gtu(r2,r1)
  17        \\  }
  18        \\  {
  19        \\   r0 = #0
  20        \\   p1 = xor(p0,p1)
  21        \\   p0 = cmp.gtu(r2,r5)
  22        \\   if (p2) jumpr r31
  23        \\  }
  24        \\
  25        \\  {
  26        \\   r0 = mux(p1,#-1,#1)
  27        \\   if (p0) jumpr r31
  28        \\   r4 = sub(r4,r3)
  29        \\   r3 = #1
  30        \\  }
  31        \\  {
  32        \\   r0 = #0
  33        \\   r3:2 = vlslw(r3:2,r4)
  34        \\   loop0(1f,r4)
  35        \\  }
  36        \\  .falign
  37        \\ 1:
  38        \\  {
  39        \\   p0 = cmp.gtu(r2,r1)
  40        \\   if (!p0.new) r1 = sub(r1,r2)
  41        \\   if (!p0.new) r0 = add(r0,r3)
  42        \\   r3:2 = vlsrw(r3:2,#1)
  43        \\  }:endloop0
  44        \\  {
  45        \\   p0 = cmp.gtu(r2,r1)
  46        \\   if (!p0.new) r0 = add(r0,r3)
  47        \\   if (!p1) jumpr r31
  48        \\  }
  49        \\  {
  50        \\   r0 = neg(r0)
  51        \\   jumpr r31
  52        \\  }
  53    );
  54}
  55
  56fn __hexagon_umodsi3() callconv(.naked) noreturn {
  57    asm volatile (
  58        \\ {
  59        \\   r2 = cl0(r0)
  60        \\   r3 = cl0(r1)
  61        \\   p0 = cmp.gtu(r1,r0)
  62        \\  }
  63        \\  {
  64        \\   r2 = sub(r3,r2)
  65        \\   if (p0) jumpr r31
  66        \\  }
  67        \\  {
  68        \\   loop0(1f,r2)
  69        \\   p1 = cmp.eq(r2,#0)
  70        \\   r2 = lsl(r1,r2)
  71        \\  }
  72        \\  .falign
  73        \\ 1:
  74        \\  {
  75        \\   p0 = cmp.gtu(r2,r0)
  76        \\   if (!p0.new) r0 = sub(r0,r2)
  77        \\   r2 = lsr(r2,#1)
  78        \\   if (p1) r1 = #0
  79        \\  }:endloop0
  80        \\  {
  81        \\   p0 = cmp.gtu(r2,r0)
  82        \\   if (!p0.new) r0 = sub(r0,r1)
  83        \\   jumpr r31
  84        \\  }
  85    );
  86}
  87
  88fn __hexagon_sqrtf() callconv(.naked) noreturn {
  89    asm volatile (
  90        \\ {
  91        \\     r3,p0 = sfinvsqrta(r0)
  92        \\     r5 = sffixupr(r0)
  93        \\     r4 = ##0x3f000000
  94        \\     r1:0 = combine(#0,#0)
  95        \\   }
  96        \\   {
  97        \\     r0 += sfmpy(r3,r5):lib
  98        \\     r1 += sfmpy(r3,r4):lib
  99        \\     r2 = r4
 100        \\     r3 = r5
 101        \\   }
 102        \\   {
 103        \\     r2 -= sfmpy(r0,r1):lib
 104        \\     p1 = sfclass(r5,#1)
 105        \\
 106        \\   }
 107        \\   {
 108        \\     r0 += sfmpy(r0,r2):lib
 109        \\     r1 += sfmpy(r1,r2):lib
 110        \\     r2 = r4
 111        \\     r3 = r5
 112        \\   }
 113        \\   {
 114        \\     r2 -= sfmpy(r0,r1):lib
 115        \\     r3 -= sfmpy(r0,r0):lib
 116        \\   }
 117        \\   {
 118        \\     r0 += sfmpy(r1,r3):lib
 119        \\     r1 += sfmpy(r1,r2):lib
 120        \\     r2 = r4
 121        \\     r3 = r5
 122        \\   }
 123        \\   {
 124        \\
 125        \\     r3 -= sfmpy(r0,r0):lib
 126        \\     if (p1) r0 = or(r0,r5)
 127        \\   }
 128        \\   {
 129        \\     r0 += sfmpy(r1,r3,p0):scale
 130        \\     jumpr r31
 131        \\   }
 132    );
 133}
 134
 135fn __hexagon_moddi3() callconv(.naked) noreturn {
 136    asm volatile (
 137        \\ {
 138        \\   p3 = tstbit(r1,#31)
 139        \\  }
 140        \\  {
 141        \\   r1:0 = abs(r1:0)
 142        \\   r3:2 = abs(r3:2)
 143        \\  }
 144        \\  {
 145        \\   r6 = cl0(r1:0)
 146        \\   r7 = cl0(r3:2)
 147        \\   r5:4 = r3:2
 148        \\   r3:2 = r1:0
 149        \\  }
 150        \\  {
 151        \\   r10 = sub(r7,r6)
 152        \\   r1:0 = #0
 153        \\   r15:14 = #1
 154        \\  }
 155        \\  {
 156        \\   r11 = add(r10,#1)
 157        \\   r13:12 = lsl(r5:4,r10)
 158        \\   r15:14 = lsl(r15:14,r10)
 159        \\  }
 160        \\  {
 161        \\   p0 = cmp.gtu(r5:4,r3:2)
 162        \\   loop0(1f,r11)
 163        \\  }
 164        \\  {
 165        \\   if (p0) jump .hexagon_moddi3_return
 166        \\  }
 167        \\  .falign
 168        \\ 1:
 169        \\  {
 170        \\   p0 = cmp.gtu(r13:12,r3:2)
 171        \\  }
 172        \\  {
 173        \\   r7:6 = sub(r3:2, r13:12)
 174        \\   r9:8 = add(r1:0, r15:14)
 175        \\  }
 176        \\  {
 177        \\   r1:0 = vmux(p0, r1:0, r9:8)
 178        \\   r3:2 = vmux(p0, r3:2, r7:6)
 179        \\  }
 180        \\  {
 181        \\   r15:14 = lsr(r15:14, #1)
 182        \\   r13:12 = lsr(r13:12, #1)
 183        \\  }:endloop0
 184        \\
 185        \\ .hexagon_moddi3_return:
 186        \\  {
 187        \\   r1:0 = neg(r3:2)
 188        \\  }
 189        \\  {
 190        \\   r1:0 = vmux(p3,r1:0,r3:2)
 191        \\   jumpr r31
 192        \\  }
 193    );
 194}
 195
 196fn __hexagon_divdi3() callconv(.naked) noreturn {
 197    asm volatile (
 198        \\ {
 199        \\   p2 = tstbit(r1,#31)
 200        \\   p3 = tstbit(r3,#31)
 201        \\  }
 202        \\  {
 203        \\   r1:0 = abs(r1:0)
 204        \\   r3:2 = abs(r3:2)
 205        \\  }
 206        \\  {
 207        \\   r6 = cl0(r1:0)
 208        \\   r7 = cl0(r3:2)
 209        \\   r5:4 = r3:2
 210        \\   r3:2 = r1:0
 211        \\  }
 212        \\  {
 213        \\   p3 = xor(p2,p3)
 214        \\   r10 = sub(r7,r6)
 215        \\   r1:0 = #0
 216        \\   r15:14 = #1
 217        \\  }
 218        \\  {
 219        \\   r11 = add(r10,#1)
 220        \\   r13:12 = lsl(r5:4,r10)
 221        \\   r15:14 = lsl(r15:14,r10)
 222        \\  }
 223        \\  {
 224        \\   p0 = cmp.gtu(r5:4,r3:2)
 225        \\   loop0(1f,r11)
 226        \\  }
 227        \\  {
 228        \\   if (p0) jump .hexagon_divdi3_return
 229        \\  }
 230        \\  .falign
 231        \\ 1:
 232        \\  {
 233        \\   p0 = cmp.gtu(r13:12,r3:2)
 234        \\  }
 235        \\  {
 236        \\   r7:6 = sub(r3:2, r13:12)
 237        \\   r9:8 = add(r1:0, r15:14)
 238        \\  }
 239        \\  {
 240        \\   r1:0 = vmux(p0, r1:0, r9:8)
 241        \\   r3:2 = vmux(p0, r3:2, r7:6)
 242        \\  }
 243        \\  {
 244        \\   r15:14 = lsr(r15:14, #1)
 245        \\   r13:12 = lsr(r13:12, #1)
 246        \\  }:endloop0
 247        \\
 248        \\ .hexagon_divdi3_return:
 249        \\  {
 250        \\   r3:2 = neg(r1:0)
 251        \\  }
 252        \\  {
 253        \\   r1:0 = vmux(p3,r3:2,r1:0)
 254        \\   jumpr r31
 255        \\  }
 256    );
 257}
 258
 259fn __hexagon_divsf3() callconv(.naked) noreturn {
 260    asm volatile (
 261        \\ {
 262        \\     r2,p0 = sfrecipa(r0,r1)
 263        \\     r4 = sffixupd(r0,r1)
 264        \\     r3 = ##0x3f800000
 265        \\   }
 266        \\   {
 267        \\     r5 = sffixupn(r0,r1)
 268        \\     r3 -= sfmpy(r4,r2):lib
 269        \\     r6 = ##0x80000000
 270        \\     r7 = r3
 271        \\   }
 272        \\   {
 273        \\     r2 += sfmpy(r3,r2):lib
 274        \\     r3 = r7
 275        \\     r6 = r5
 276        \\     r0 = and(r6,r5)
 277        \\   }
 278        \\   {
 279        \\     r3 -= sfmpy(r4,r2):lib
 280        \\     r0 += sfmpy(r5,r2):lib
 281        \\   }
 282        \\   {
 283        \\     r2 += sfmpy(r3,r2):lib
 284        \\     r6 -= sfmpy(r0,r4):lib
 285        \\   }
 286        \\   {
 287        \\     r0 += sfmpy(r6,r2):lib
 288        \\   }
 289        \\   {
 290        \\     r5 -= sfmpy(r0,r4):lib
 291        \\   }
 292        \\   {
 293        \\     r0 += sfmpy(r5,r2,p0):scale
 294        \\     jumpr r31
 295        \\   }
 296    );
 297}
 298
 299fn __hexagon_udivdi3() callconv(.naked) noreturn {
 300    asm volatile (
 301        \\ {
 302        \\   r6 = cl0(r1:0)
 303        \\   r7 = cl0(r3:2)
 304        \\   r5:4 = r3:2
 305        \\   r3:2 = r1:0
 306        \\  }
 307        \\  {
 308        \\   r10 = sub(r7,r6)
 309        \\   r1:0 = #0
 310        \\   r15:14 = #1
 311        \\  }
 312        \\  {
 313        \\   r11 = add(r10,#1)
 314        \\   r13:12 = lsl(r5:4,r10)
 315        \\   r15:14 = lsl(r15:14,r10)
 316        \\  }
 317        \\  {
 318        \\   p0 = cmp.gtu(r5:4,r3:2)
 319        \\   loop0(1f,r11)
 320        \\  }
 321        \\  {
 322        \\   if (p0) jumpr r31
 323        \\  }
 324        \\  .falign
 325        \\ 1:
 326        \\  {
 327        \\   p0 = cmp.gtu(r13:12,r3:2)
 328        \\  }
 329        \\  {
 330        \\   r7:6 = sub(r3:2, r13:12)
 331        \\   r9:8 = add(r1:0, r15:14)
 332        \\  }
 333        \\  {
 334        \\   r1:0 = vmux(p0, r1:0, r9:8)
 335        \\   r3:2 = vmux(p0, r3:2, r7:6)
 336        \\  }
 337        \\  {
 338        \\   r15:14 = lsr(r15:14, #1)
 339        \\   r13:12 = lsr(r13:12, #1)
 340        \\  }:endloop0
 341        \\  {
 342        \\   jumpr r31
 343        \\  }
 344    );
 345}
 346
 347fn __hexagon_umoddi3() callconv(.naked) noreturn {
 348    asm volatile (
 349        \\ {
 350        \\   r6 = cl0(r1:0)
 351        \\   r7 = cl0(r3:2)
 352        \\   r5:4 = r3:2
 353        \\   r3:2 = r1:0
 354        \\  }
 355        \\  {
 356        \\   r10 = sub(r7,r6)
 357        \\   r1:0 = #0
 358        \\   r15:14 = #1
 359        \\  }
 360        \\  {
 361        \\   r11 = add(r10,#1)
 362        \\   r13:12 = lsl(r5:4,r10)
 363        \\   r15:14 = lsl(r15:14,r10)
 364        \\  }
 365        \\  {
 366        \\   p0 = cmp.gtu(r5:4,r3:2)
 367        \\   loop0(1f,r11)
 368        \\  }
 369        \\  {
 370        \\   if (p0) jump .hexagon_umoddi3_return
 371        \\  }
 372        \\  .falign
 373        \\ 1:
 374        \\  {
 375        \\   p0 = cmp.gtu(r13:12,r3:2)
 376        \\  }
 377        \\  {
 378        \\   r7:6 = sub(r3:2, r13:12)
 379        \\   r9:8 = add(r1:0, r15:14)
 380        \\  }
 381        \\  {
 382        \\   r1:0 = vmux(p0, r1:0, r9:8)
 383        \\   r3:2 = vmux(p0, r3:2, r7:6)
 384        \\  }
 385        \\  {
 386        \\   r15:14 = lsr(r15:14, #1)
 387        \\   r13:12 = lsr(r13:12, #1)
 388        \\  }:endloop0
 389        \\
 390        \\ .hexagon_umoddi3_return:
 391        \\  {
 392        \\   r1:0 = r3:2
 393        \\   jumpr r31
 394        \\  }
 395    );
 396}
 397
 398fn __hexagon_modsi3() callconv(.naked) noreturn {
 399    asm volatile (
 400        \\ {
 401        \\   p2 = cmp.ge(r0,#0)
 402        \\   r2 = abs(r0)
 403        \\   r1 = abs(r1)
 404        \\  }
 405        \\  {
 406        \\   r3 = cl0(r2)
 407        \\   r4 = cl0(r1)
 408        \\   p0 = cmp.gtu(r1,r2)
 409        \\  }
 410        \\  {
 411        \\   r3 = sub(r4,r3)
 412        \\   if (p0) jumpr r31
 413        \\  }
 414        \\  {
 415        \\   p1 = cmp.eq(r3,#0)
 416        \\   loop0(1f,r3)
 417        \\   r0 = r2
 418        \\   r2 = lsl(r1,r3)
 419        \\  }
 420        \\  .falign
 421        \\ 1:
 422        \\  {
 423        \\   p0 = cmp.gtu(r2,r0)
 424        \\   if (!p0.new) r0 = sub(r0,r2)
 425        \\   r2 = lsr(r2,#1)
 426        \\   if (p1) r1 = #0
 427        \\  }:endloop0
 428        \\  {
 429        \\   p0 = cmp.gtu(r2,r0)
 430        \\   if (!p0.new) r0 = sub(r0,r1)
 431        \\   if (p2) jumpr r31
 432        \\  }
 433        \\  {
 434        \\   r0 = neg(r0)
 435        \\   jumpr r31
 436        \\  }
 437    );
 438}
 439
 440fn __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes() callconv(.naked) noreturn {
 441    asm volatile (
 442        \\ {
 443        \\   p0 = bitsclr(r1,#7)
 444        \\   p0 = bitsclr(r0,#7)
 445        \\   if (p0.new) r5:4 = memd(r1)
 446        \\   r3 = #-3
 447        \\  }
 448        \\  {
 449        \\   if (!p0) jump .Lmemcpy_call
 450        \\   if (p0) memd(r0++#8) = r5:4
 451        \\   if (p0) r5:4 = memd(r1+#8)
 452        \\   r3 += lsr(r2,#3)
 453        \\  }
 454        \\  {
 455        \\   memd(r0++#8) = r5:4
 456        \\   r5:4 = memd(r1+#16)
 457        \\   r1 = add(r1,#24)
 458        \\   loop0(1f,r3)
 459        \\  }
 460        \\  .falign
 461        \\ 1:
 462        \\  {
 463        \\   memd(r0++#8) = r5:4
 464        \\   r5:4 = memd(r1++#8)
 465        \\  }:endloop0
 466        \\  {
 467        \\   memd(r0) = r5:4
 468        \\   r0 -= add(r2,#-8)
 469        \\   jumpr r31
 470        \\  }
 471        \\ .Lmemcpy_call:
 472        \\      jump memcpy@PLT
 473    );
 474}
 475
 476fn __hexagon_udivsi3() callconv(.naked) noreturn {
 477    asm volatile (
 478        \\ {
 479        \\   r2 = cl0(r0)
 480        \\   r3 = cl0(r1)
 481        \\   r5:4 = combine(#1,#0)
 482        \\   p0 = cmp.gtu(r1,r0)
 483        \\  }
 484        \\  {
 485        \\   r6 = sub(r3,r2)
 486        \\   r4 = r1
 487        \\   r1:0 = combine(r0,r4)
 488        \\   if (p0) jumpr r31
 489        \\  }
 490        \\  {
 491        \\   r3:2 = vlslw(r5:4,r6)
 492        \\   loop0(1f,r6)
 493        \\  }
 494        \\  .falign
 495        \\ 1:
 496        \\  {
 497        \\   p0 = cmp.gtu(r2,r1)
 498        \\   if (!p0.new) r1 = sub(r1,r2)
 499        \\   if (!p0.new) r0 = add(r0,r3)
 500        \\   r3:2 = vlsrw(r3:2,#1)
 501        \\  }:endloop0
 502        \\  {
 503        \\   p0 = cmp.gtu(r2,r1)
 504        \\   if (!p0.new) r0 = add(r0,r3)
 505        \\   jumpr r31
 506        \\  }
 507    );
 508}
 509
 510fn __hexagon_adddf3() align(32) callconv(.naked) noreturn {
 511    asm volatile (
 512        \\  {
 513        \\   r4 = extractu(r1,#11,#20)
 514        \\   r5 = extractu(r3,#11,#20)
 515        \\   r13:12 = combine(##0x20000000,#0)
 516        \\  }
 517        \\  {
 518        \\   p3 = dfclass(r1:0,#2)
 519        \\   p3 = dfclass(r3:2,#2)
 520        \\   r9:8 = r13:12
 521        \\   p2 = cmp.gtu(r5,r4)
 522        \\  }
 523        \\  {
 524        \\   if (!p3) jump .Ladd_abnormal
 525        \\   if (p2) r1:0 = r3:2
 526        \\   if (p2) r3:2 = r1:0
 527        \\   if (p2) r5:4 = combine(r4,r5)
 528        \\  }
 529        \\  {
 530        \\   r13:12 = insert(r1:0,#52,#11 -2)
 531        \\   r9:8 = insert(r3:2,#52,#11 -2)
 532        \\   r15 = sub(r4,r5)
 533        \\   r7:6 = combine(#62,#1)
 534        \\  }
 535        \\
 536        \\
 537        \\
 538        \\
 539        \\
 540        \\ .Ladd_continue:
 541        \\  {
 542        \\   r15 = min(r15,r7)
 543        \\
 544        \\   r11:10 = neg(r13:12)
 545        \\   p2 = cmp.gt(r1,#-1)
 546        \\   r14 = #0
 547        \\  }
 548        \\  {
 549        \\   if (!p2) r13:12 = r11:10
 550        \\   r11:10 = extractu(r9:8,r15:14)
 551        \\   r9:8 = ASR(r9:8,r15)
 552        \\
 553        \\
 554        \\
 555        \\
 556        \\   r15:14 = #0
 557        \\  }
 558        \\  {
 559        \\   p1 = cmp.eq(r11:10,r15:14)
 560        \\   if (!p1.new) r8 = or(r8,r6)
 561        \\   r5 = add(r4,#-1024 -60)
 562        \\   p3 = cmp.gt(r3,#-1)
 563        \\  }
 564        \\  {
 565        \\   r13:12 = add(r13:12,r9:8)
 566        \\   r11:10 = sub(r13:12,r9:8)
 567        \\   r7:6 = combine(#54,##2045)
 568        \\  }
 569        \\  {
 570        \\   p0 = cmp.gtu(r4,r7)
 571        \\   p0 = !cmp.gtu(r4,r6)
 572        \\   if (!p0.new) jump:nt .Ladd_ovf_unf
 573        \\   if (!p3) r13:12 = r11:10
 574        \\  }
 575        \\  {
 576        \\   r1:0 = convert_d2df(r13:12)
 577        \\   p0 = cmp.eq(r13,#0)
 578        \\   p0 = cmp.eq(r12,#0)
 579        \\   if (p0.new) jump:nt .Ladd_zero
 580        \\  }
 581        \\  {
 582        \\   r1 += asl(r5,#20)
 583        \\   jumpr r31
 584        \\  }
 585        \\
 586        \\  .falign
 587        \\ .Ladd_zero:
 588        \\
 589        \\
 590        \\  {
 591        \\   r28 = USR
 592        \\   r1:0 = #0
 593        \\   r3 = #1
 594        \\  }
 595        \\  {
 596        \\   r28 = extractu(r28,#2,#22)
 597        \\   r3 = asl(r3,#31)
 598        \\  }
 599        \\  {
 600        \\   p0 = cmp.eq(r28,#2)
 601        \\   if (p0.new) r1 = xor(r1,r3)
 602        \\   jumpr r31
 603        \\  }
 604        \\  .falign
 605        \\ .Ladd_ovf_unf:
 606        \\  {
 607        \\   r1:0 = convert_d2df(r13:12)
 608        \\   p0 = cmp.eq(r13,#0)
 609        \\   p0 = cmp.eq(r12,#0)
 610        \\   if (p0.new) jump:nt .Ladd_zero
 611        \\  }
 612        \\  {
 613        \\   r28 = extractu(r1,#11,#20)
 614        \\   r1 += asl(r5,#20)
 615        \\  }
 616        \\  {
 617        \\   r5 = add(r5,r28)
 618        \\   r3:2 = combine(##0x00100000,#0)
 619        \\  }
 620        \\  {
 621        \\   p0 = cmp.gt(r5,##1024 +1024 -2)
 622        \\   if (p0.new) jump:nt .Ladd_ovf
 623        \\  }
 624        \\  {
 625        \\   p0 = cmp.gt(r5,#0)
 626        \\   if (p0.new) jumpr:t r31
 627        \\   r28 = sub(#1,r5)
 628        \\  }
 629        \\  {
 630        \\   r3:2 = insert(r1:0,#52,#0)
 631        \\   r1:0 = r13:12
 632        \\  }
 633        \\  {
 634        \\   r3:2 = lsr(r3:2,r28)
 635        \\  }
 636        \\  {
 637        \\   r1:0 = insert(r3:2,#63,#0)
 638        \\   jumpr r31
 639        \\  }
 640        \\  .falign
 641        \\ .Ladd_ovf:
 642        \\
 643        \\  {
 644        \\   r1:0 = r13:12
 645        \\   r28 = USR
 646        \\   r13:12 = combine(##0x7fefffff,#-1)
 647        \\  }
 648        \\  {
 649        \\   r5 = extractu(r28,#2,#22)
 650        \\   r28 = or(r28,#0x28)
 651        \\   r9:8 = combine(##0x7ff00000,#0)
 652        \\  }
 653        \\  {
 654        \\   USR = r28
 655        \\   r5 ^= lsr(r1,#31)
 656        \\   r28 = r5
 657        \\  }
 658        \\  {
 659        \\   p0 = !cmp.eq(r28,#1)
 660        \\   p0 = !cmp.eq(r5,#2)
 661        \\   if (p0.new) r13:12 = r9:8
 662        \\  }
 663        \\  {
 664        \\   r1:0 = insert(r13:12,#63,#0)
 665        \\  }
 666        \\  {
 667        \\   p0 = dfcmp.eq(r1:0,r1:0)
 668        \\   jumpr r31
 669        \\  }
 670        \\
 671        \\ .Ladd_abnormal:
 672        \\  {
 673        \\   r13:12 = extractu(r1:0,#63,#0)
 674        \\   r9:8 = extractu(r3:2,#63,#0)
 675        \\  }
 676        \\  {
 677        \\   p3 = cmp.gtu(r13:12,r9:8)
 678        \\   if (!p3.new) r1:0 = r3:2
 679        \\   if (!p3.new) r3:2 = r1:0
 680        \\  }
 681        \\  {
 682        \\
 683        \\   p0 = dfclass(r1:0,#0x0f)
 684        \\   if (!p0.new) jump:nt .Linvalid_nan_add
 685        \\   if (!p3) r13:12 = r9:8
 686        \\   if (!p3) r9:8 = r13:12
 687        \\  }
 688        \\  {
 689        \\
 690        \\
 691        \\   p1 = dfclass(r1:0,#0x08)
 692        \\   if (p1.new) jump:nt .Linf_add
 693        \\  }
 694        \\  {
 695        \\   p2 = dfclass(r3:2,#0x01)
 696        \\   if (p2.new) jump:nt .LB_zero
 697        \\   r13:12 = #0
 698        \\  }
 699        \\
 700        \\  {
 701        \\   p0 = dfclass(r1:0,#4)
 702        \\   if (p0.new) jump:nt .Ladd_two_subnormal
 703        \\   r13:12 = combine(##0x20000000,#0)
 704        \\  }
 705        \\  {
 706        \\   r4 = extractu(r1,#11,#20)
 707        \\   r5 = #1
 708        \\
 709        \\   r9:8 = asl(r9:8,#11 -2)
 710        \\  }
 711        \\
 712        \\
 713        \\
 714        \\  {
 715        \\   r13:12 = insert(r1:0,#52,#11 -2)
 716        \\   r15 = sub(r4,r5)
 717        \\   r7:6 = combine(#62,#1)
 718        \\   jump .Ladd_continue
 719        \\  }
 720        \\
 721        \\ .Ladd_two_subnormal:
 722        \\  {
 723        \\   r13:12 = extractu(r1:0,#63,#0)
 724        \\   r9:8 = extractu(r3:2,#63,#0)
 725        \\  }
 726        \\  {
 727        \\   r13:12 = neg(r13:12)
 728        \\   r9:8 = neg(r9:8)
 729        \\   p0 = cmp.gt(r1,#-1)
 730        \\   p1 = cmp.gt(r3,#-1)
 731        \\  }
 732        \\  {
 733        \\   if (p0) r13:12 = r1:0
 734        \\   if (p1) r9:8 = r3:2
 735        \\  }
 736        \\  {
 737        \\   r13:12 = add(r13:12,r9:8)
 738        \\  }
 739        \\  {
 740        \\   r9:8 = neg(r13:12)
 741        \\   p0 = cmp.gt(r13,#-1)
 742        \\   r3:2 = #0
 743        \\  }
 744        \\  {
 745        \\   if (!p0) r1:0 = r9:8
 746        \\   if (p0) r1:0 = r13:12
 747        \\   r3 = ##0x80000000
 748        \\  }
 749        \\  {
 750        \\   if (!p0) r1 = or(r1,r3)
 751        \\   p0 = dfcmp.eq(r1:0,r3:2)
 752        \\   if (p0.new) jump:nt .Lzero_plus_zero
 753        \\  }
 754        \\  {
 755        \\   jumpr r31
 756        \\  }
 757        \\
 758        \\ .Linvalid_nan_add:
 759        \\  {
 760        \\   r28 = convert_df2sf(r1:0)
 761        \\   p0 = dfclass(r3:2,#0x0f)
 762        \\   if (p0.new) r3:2 = r1:0
 763        \\  }
 764        \\  {
 765        \\   r2 = convert_df2sf(r3:2)
 766        \\   r1:0 = #-1
 767        \\   jumpr r31
 768        \\  }
 769        \\  .falign
 770        \\ .LB_zero:
 771        \\  {
 772        \\   p0 = dfcmp.eq(r13:12,r1:0)
 773        \\   if (!p0.new) jumpr:t r31
 774        \\  }
 775        \\
 776        \\
 777        \\
 778        \\
 779        \\ .Lzero_plus_zero:
 780        \\  {
 781        \\   p0 = cmp.eq(r1:0,r3:2)
 782        \\   if (p0.new) jumpr:t r31
 783        \\  }
 784        \\  {
 785        \\   r28 = USR
 786        \\  }
 787        \\  {
 788        \\   r28 = extractu(r28,#2,#22)
 789        \\   r1:0 = #0
 790        \\  }
 791        \\  {
 792        \\   p0 = cmp.eq(r28,#2)
 793        \\   if (p0.new) r1 = ##0x80000000
 794        \\   jumpr r31
 795        \\  }
 796        \\ .Linf_add:
 797        \\
 798        \\  {
 799        \\   p0 = !cmp.eq(r1,r3)
 800        \\   p0 = dfclass(r3:2,#8)
 801        \\   if (!p0.new) jumpr:t r31
 802        \\  }
 803        \\  {
 804        \\   r2 = ##0x7f800001
 805        \\  }
 806        \\  {
 807        \\   r1:0 = convert_sf2df(r2)
 808        \\   jumpr r31
 809        \\  }
 810    );
 811}
 812
 813fn __hexagon_subdf3() align(32) callconv(.naked) noreturn {
 814    asm volatile (
 815        \\ {
 816        \\   r3 = togglebit(r3,#31)
 817        \\   jump ##__hexagon_adddf3
 818        \\ }
 819    );
 820}
 821
 822fn __hexagon_divdf3() align(32) callconv(.naked) noreturn {
 823    asm volatile (
 824        \\  {
 825        \\   p2 = dfclass(r1:0,#0x02)
 826        \\   p2 = dfclass(r3:2,#0x02)
 827        \\   r13:12 = combine(r3,r1)
 828        \\   r28 = xor(r1,r3)
 829        \\  }
 830        \\  {
 831        \\   if (!p2) jump .Ldiv_abnormal
 832        \\   r7:6 = extractu(r3:2,#23,#52 -23)
 833        \\   r8 = ##0x3f800001
 834        \\  }
 835        \\  {
 836        \\   r9 = or(r8,r6)
 837        \\   r13 = extractu(r13,#11,#52 -32)
 838        \\   r12 = extractu(r12,#11,#52 -32)
 839        \\   p3 = cmp.gt(r28,#-1)
 840        \\  }
 841        \\
 842        \\
 843        \\ .Ldenorm_continue:
 844        \\  {
 845        \\   r11,p0 = sfrecipa(r8,r9)
 846        \\   r10 = and(r8,#-2)
 847        \\   r28 = #1
 848        \\   r12 = sub(r12,r13)
 849        \\  }
 850        \\
 851        \\
 852        \\  {
 853        \\   r10 -= sfmpy(r11,r9):lib
 854        \\   r1 = insert(r28,#11 +1,#52 -32)
 855        \\   r13 = ##0x00800000 << 3
 856        \\  }
 857        \\  {
 858        \\   r11 += sfmpy(r11,r10):lib
 859        \\   r3 = insert(r28,#11 +1,#52 -32)
 860        \\   r10 = and(r8,#-2)
 861        \\  }
 862        \\  {
 863        \\   r10 -= sfmpy(r11,r9):lib
 864        \\   r5 = #-0x3ff +1
 865        \\   r4 = #0x3ff -1
 866        \\  }
 867        \\  {
 868        \\   r11 += sfmpy(r11,r10):lib
 869        \\   p1 = cmp.gt(r12,r5)
 870        \\   p1 = !cmp.gt(r12,r4)
 871        \\  }
 872        \\  {
 873        \\   r13 = insert(r11,#23,#3)
 874        \\   r5:4 = #0
 875        \\   r12 = add(r12,#-61)
 876        \\  }
 877        \\
 878        \\
 879        \\
 880        \\
 881        \\  {
 882        \\   r13 = add(r13,#((-3) << 3))
 883        \\  }
 884        \\  { r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASL(r7:6, # ( 14 )); r1:0 -= asl(r15:14, # 32); }
 885        \\  { r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASR(r7:6, # ( 1 )); r1:0 -= asl(r15:14, # 32); }
 886        \\  { r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASR(r7:6, # ( 16 )); r1:0 -= asl(r15:14, # 32); }
 887        \\  { r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASR(r7:6, # ( 31 )); r1:0 -= asl(r15:14, # 32); r7:6=# ( 0 ); }
 888        \\
 889        \\
 890        \\
 891        \\
 892        \\
 893        \\
 894        \\
 895        \\  {
 896        \\
 897        \\   r15:14 = sub(r1:0,r3:2)
 898        \\   p0 = cmp.gtu(r3:2,r1:0)
 899        \\
 900        \\   if (!p0.new) r6 = #2
 901        \\  }
 902        \\  {
 903        \\   r5:4 = add(r5:4,r7:6)
 904        \\   if (!p0) r1:0 = r15:14
 905        \\   r15:14 = #0
 906        \\  }
 907        \\  {
 908        \\   p0 = cmp.eq(r1:0,r15:14)
 909        \\   if (!p0.new) r4 = or(r4,r28)
 910        \\  }
 911        \\  {
 912        \\   r7:6 = neg(r5:4)
 913        \\  }
 914        \\  {
 915        \\   if (!p3) r5:4 = r7:6
 916        \\  }
 917        \\  {
 918        \\   r1:0 = convert_d2df(r5:4)
 919        \\   if (!p1) jump .Ldiv_ovf_unf
 920        \\  }
 921        \\  {
 922        \\   r1 += asl(r12,#52 -32)
 923        \\   jumpr r31
 924        \\  }
 925        \\
 926        \\ .Ldiv_ovf_unf:
 927        \\  {
 928        \\   r1 += asl(r12,#52 -32)
 929        \\   r13 = extractu(r1,#11,#52 -32)
 930        \\  }
 931        \\  {
 932        \\   r7:6 = abs(r5:4)
 933        \\   r12 = add(r12,r13)
 934        \\  }
 935        \\  {
 936        \\   p0 = cmp.gt(r12,##0x3ff +0x3ff)
 937        \\   if (p0.new) jump:nt .Ldiv_ovf
 938        \\  }
 939        \\  {
 940        \\   p0 = cmp.gt(r12,#0)
 941        \\   if (p0.new) jump:nt .Ldiv_possible_unf
 942        \\  }
 943        \\  {
 944        \\   r13 = add(clb(r7:6),#-1)
 945        \\   r12 = sub(#7,r12)
 946        \\   r10 = USR
 947        \\   r11 = #63
 948        \\  }
 949        \\  {
 950        \\   r13 = min(r12,r11)
 951        \\   r11 = or(r10,#0x030)
 952        \\   r7:6 = asl(r7:6,r13)
 953        \\   r12 = #0
 954        \\  }
 955        \\  {
 956        \\   r15:14 = extractu(r7:6,r13:12)
 957        \\   r7:6 = lsr(r7:6,r13)
 958        \\   r3:2 = #1
 959        \\  }
 960        \\  {
 961        \\   p0 = cmp.gtu(r3:2,r15:14)
 962        \\   if (!p0.new) r6 = or(r2,r6)
 963        \\   r7 = setbit(r7,#52 -32+4)
 964        \\  }
 965        \\  {
 966        \\   r5:4 = neg(r7:6)
 967        \\   p0 = bitsclr(r6,#(1<<4)-1)
 968        \\   if (!p0.new) r10 = r11
 969        \\  }
 970        \\  {
 971        \\   USR = r10
 972        \\   if (p3) r5:4 = r7:6
 973        \\   r10 = #-0x3ff -(52 +4)
 974        \\  }
 975        \\  {
 976        \\   r1:0 = convert_d2df(r5:4)
 977        \\  }
 978        \\  {
 979        \\   r1 += asl(r10,#52 -32)
 980        \\   jumpr r31
 981        \\  }
 982        \\
 983        \\
 984        \\ .Ldiv_possible_unf:
 985        \\
 986        \\
 987        \\  {
 988        \\   r3:2 = extractu(r1:0,#63,#0)
 989        \\   r15:14 = combine(##0x00100000,#0)
 990        \\   r10 = #0x7FFF
 991        \\  }
 992        \\  {
 993        \\   p0 = dfcmp.eq(r15:14,r3:2)
 994        \\   p0 = bitsset(r7,r10)
 995        \\  }
 996        \\
 997        \\
 998        \\
 999        \\
1000        \\
1001        \\
1002        \\  {
1003        \\   if (!p0) jumpr r31
1004        \\   r10 = USR
1005        \\  }
1006        \\
1007        \\  {
1008        \\   r10 = or(r10,#0x30)
1009        \\  }
1010        \\  {
1011        \\   USR = r10
1012        \\  }
1013        \\  {
1014        \\   p0 = dfcmp.eq(r1:0,r1:0)
1015        \\   jumpr r31
1016        \\  }
1017        \\
1018        \\ .Ldiv_ovf:
1019        \\
1020        \\
1021        \\
1022        \\  {
1023        \\   r10 = USR
1024        \\   r3:2 = combine(##0x7fefffff,#-1)
1025        \\   r1 = mux(p3,#0,#-1)
1026        \\  }
1027        \\  {
1028        \\   r7:6 = combine(##0x7ff00000,#0)
1029        \\   r5 = extractu(r10,#2,#22)
1030        \\   r10 = or(r10,#0x28)
1031        \\  }
1032        \\  {
1033        \\   USR = r10
1034        \\   r5 ^= lsr(r1,#31)
1035        \\   r4 = r5
1036        \\  }
1037        \\  {
1038        \\   p0 = !cmp.eq(r4,#1)
1039        \\   p0 = !cmp.eq(r5,#2)
1040        \\   if (p0.new) r3:2 = r7:6
1041        \\   p0 = dfcmp.eq(r3:2,r3:2)
1042        \\  }
1043        \\  {
1044        \\   r1:0 = insert(r3:2,#63,#0)
1045        \\   jumpr r31
1046        \\  }
1047        \\
1048        \\
1049        \\
1050        \\
1051        \\
1052        \\
1053        \\
1054        \\ .Ldiv_abnormal:
1055        \\  {
1056        \\   p0 = dfclass(r1:0,#0x0F)
1057        \\   p0 = dfclass(r3:2,#0x0F)
1058        \\   p3 = cmp.gt(r28,#-1)
1059        \\  }
1060        \\  {
1061        \\   p1 = dfclass(r1:0,#0x08)
1062        \\   p1 = dfclass(r3:2,#0x08)
1063        \\  }
1064        \\  {
1065        \\   p2 = dfclass(r1:0,#0x01)
1066        \\   p2 = dfclass(r3:2,#0x01)
1067        \\  }
1068        \\  {
1069        \\   if (!p0) jump .Ldiv_nan
1070        \\   if (p1) jump .Ldiv_invalid
1071        \\  }
1072        \\  {
1073        \\   if (p2) jump .Ldiv_invalid
1074        \\  }
1075        \\  {
1076        \\   p2 = dfclass(r1:0,#(0x0F ^ 0x01))
1077        \\   p2 = dfclass(r3:2,#(0x0F ^ 0x08))
1078        \\  }
1079        \\  {
1080        \\   p1 = dfclass(r1:0,#(0x0F ^ 0x08))
1081        \\   p1 = dfclass(r3:2,#(0x0F ^ 0x01))
1082        \\  }
1083        \\  {
1084        \\   if (!p2) jump .Ldiv_zero_result
1085        \\   if (!p1) jump .Ldiv_inf_result
1086        \\  }
1087        \\
1088        \\
1089        \\
1090        \\
1091        \\
1092        \\  {
1093        \\   p0 = dfclass(r1:0,#0x02)
1094        \\   p1 = dfclass(r3:2,#0x02)
1095        \\   r10 = ##0x00100000
1096        \\  }
1097        \\  {
1098        \\   r13:12 = combine(r3,r1)
1099        \\   r1 = insert(r10,#11 +1,#52 -32)
1100        \\   r3 = insert(r10,#11 +1,#52 -32)
1101        \\  }
1102        \\  {
1103        \\   if (p0) r1 = or(r1,r10)
1104        \\   if (p1) r3 = or(r3,r10)
1105        \\  }
1106        \\  {
1107        \\   r5 = add(clb(r1:0),#-11)
1108        \\   r4 = add(clb(r3:2),#-11)
1109        \\   r10 = #1
1110        \\  }
1111        \\  {
1112        \\   r12 = extractu(r12,#11,#52 -32)
1113        \\   r13 = extractu(r13,#11,#52 -32)
1114        \\  }
1115        \\  {
1116        \\   r1:0 = asl(r1:0,r5)
1117        \\   r3:2 = asl(r3:2,r4)
1118        \\   if (!p0) r12 = sub(r10,r5)
1119        \\   if (!p1) r13 = sub(r10,r4)
1120        \\  }
1121        \\  {
1122        \\   r7:6 = extractu(r3:2,#23,#52 -23)
1123        \\  }
1124        \\  {
1125        \\   r9 = or(r8,r6)
1126        \\   jump .Ldenorm_continue
1127        \\  }
1128        \\
1129        \\ .Ldiv_zero_result:
1130        \\  {
1131        \\   r1 = xor(r1,r3)
1132        \\   r3:2 = #0
1133        \\  }
1134        \\  {
1135        \\   r1:0 = insert(r3:2,#63,#0)
1136        \\   jumpr r31
1137        \\  }
1138        \\ .Ldiv_inf_result:
1139        \\  {
1140        \\   p2 = dfclass(r3:2,#0x01)
1141        \\   p2 = dfclass(r1:0,#(0x0F ^ 0x08))
1142        \\  }
1143        \\  {
1144        \\   r10 = USR
1145        \\   if (!p2) jump 1f
1146        \\   r1 = xor(r1,r3)
1147        \\  }
1148        \\  {
1149        \\   r10 = or(r10,#0x04)
1150        \\  }
1151        \\  {
1152        \\   USR = r10
1153        \\  }
1154        \\ 1:
1155        \\  {
1156        \\   r3:2 = combine(##0x7ff00000,#0)
1157        \\   p0 = dfcmp.uo(r3:2,r3:2)
1158        \\  }
1159        \\  {
1160        \\   r1:0 = insert(r3:2,#63,#0)
1161        \\   jumpr r31
1162        \\  }
1163        \\ .Ldiv_nan:
1164        \\  {
1165        \\   p0 = dfclass(r1:0,#0x10)
1166        \\   p1 = dfclass(r3:2,#0x10)
1167        \\   if (!p0.new) r1:0 = r3:2
1168        \\   if (!p1.new) r3:2 = r1:0
1169        \\  }
1170        \\  {
1171        \\   r5 = convert_df2sf(r1:0)
1172        \\   r4 = convert_df2sf(r3:2)
1173        \\  }
1174        \\  {
1175        \\   r1:0 = #-1
1176        \\   jumpr r31
1177        \\  }
1178        \\
1179        \\ .Ldiv_invalid:
1180        \\  {
1181        \\   r10 = ##0x7f800001
1182        \\  }
1183        \\  {
1184        \\   r1:0 = convert_sf2df(r10)
1185        \\   jumpr r31
1186        \\  }
1187    );
1188}
1189
1190fn __hexagon_muldf3() align(32) callconv(.naked) noreturn {
1191    asm volatile (
1192        \\  {
1193        \\   p0 = dfclass(r1:0,#2)
1194        \\   p0 = dfclass(r3:2,#2)
1195        \\   r13:12 = combine(##0x40000000,#0)
1196        \\  }
1197        \\  {
1198        \\   r13:12 = insert(r1:0,#52,#11 -1)
1199        \\   r5:4 = asl(r3:2,#11 -1)
1200        \\   r28 = #-1024
1201        \\   r9:8 = #1
1202        \\  }
1203        \\  {
1204        \\   r7:6 = mpyu(r4,r13)
1205        \\   r5:4 = insert(r9:8,#2,#62)
1206        \\  }
1207        \\
1208        \\
1209        \\
1210        \\
1211        \\  {
1212        \\   r15:14 = mpyu(r12,r4)
1213        \\   r7:6 += mpyu(r12,r5)
1214        \\  }
1215        \\  {
1216        \\   r7:6 += lsr(r15:14,#32)
1217        \\   r11:10 = mpyu(r13,r5)
1218        \\   r5:4 = combine(##1024 +1024 -4,#0)
1219        \\  }
1220        \\  {
1221        \\   r11:10 += lsr(r7:6,#32)
1222        \\   if (!p0) jump .Lmul_abnormal
1223        \\   p1 = cmp.eq(r14,#0)
1224        \\   p1 = cmp.eq(r6,#0)
1225        \\  }
1226        \\  {
1227        \\   if (!p1) r10 = or(r10,r8)
1228        \\   r6 = extractu(r1,#11,#20)
1229        \\   r7 = extractu(r3,#11,#20)
1230        \\  }
1231        \\  {
1232        \\   r15:14 = neg(r11:10)
1233        \\   r6 += add(r28,r7)
1234        \\   r28 = xor(r1,r3)
1235        \\  }
1236        \\  {
1237        \\   if (!p2.new) r11:10 = r15:14
1238        \\   p2 = cmp.gt(r28,#-1)
1239        \\   p0 = !cmp.gt(r6,r5)
1240        \\   p0 = cmp.gt(r6,r4)
1241        \\   if (!p0.new) jump:nt .Lmul_ovf_unf
1242        \\  }
1243        \\  {
1244        \\   r1:0 = convert_d2df(r11:10)
1245        \\   r6 = add(r6,#-1024 -58)
1246        \\  }
1247        \\  {
1248        \\   r1 += asl(r6,#20)
1249        \\   jumpr r31
1250        \\  }
1251        \\
1252        \\  .falign
1253        \\ .Lmul_possible_unf:
1254        \\  {
1255        \\   p0 = cmp.eq(r0,#0)
1256        \\   p0 = bitsclr(r1,r4)
1257        \\   if (!p0.new) jumpr:t r31
1258        \\   r5 = #0x7fff
1259        \\  }
1260        \\  {
1261        \\   p0 = bitsset(r13,r5)
1262        \\   r4 = USR
1263        \\   r5 = #0x030
1264        \\  }
1265        \\  {
1266        \\   if (p0) r4 = or(r4,r5)
1267        \\  }
1268        \\  {
1269        \\   USR = r4
1270        \\  }
1271        \\  {
1272        \\   p0 = dfcmp.eq(r1:0,r1:0)
1273        \\   jumpr r31
1274        \\  }
1275        \\  .falign
1276        \\ .Lmul_ovf_unf:
1277        \\  {
1278        \\   r1:0 = convert_d2df(r11:10)
1279        \\   r13:12 = abs(r11:10)
1280        \\   r7 = add(r6,#-1024 -58)
1281        \\  }
1282        \\  {
1283        \\   r1 += asl(r7,#20)
1284        \\   r7 = extractu(r1,#11,#20)
1285        \\   r4 = ##0x7FEFFFFF
1286        \\  }
1287        \\  {
1288        \\   r7 += add(r6,##-1024 -58)
1289        \\
1290        \\   r5 = #0
1291        \\  }
1292        \\  {
1293        \\   p0 = cmp.gt(r7,##1024 +1024 -2)
1294        \\   if (p0.new) jump:nt .Lmul_ovf
1295        \\  }
1296        \\  {
1297        \\   p0 = cmp.gt(r7,#0)
1298        \\   if (p0.new) jump:nt .Lmul_possible_unf
1299        \\   r5 = sub(r6,r5)
1300        \\   r28 = #63
1301        \\  }
1302        \\  {
1303        \\   r4 = #0
1304        \\   r5 = sub(#5,r5)
1305        \\  }
1306        \\  {
1307        \\   p3 = cmp.gt(r11,#-1)
1308        \\   r5 = min(r5,r28)
1309        \\   r11:10 = r13:12
1310        \\  }
1311        \\  {
1312        \\   r28 = USR
1313        \\   r15:14 = extractu(r11:10,r5:4)
1314        \\  }
1315        \\  {
1316        \\   r11:10 = asr(r11:10,r5)
1317        \\   r4 = #0x0030
1318        \\   r1 = insert(r9,#11,#20)
1319        \\  }
1320        \\  {
1321        \\   p0 = cmp.gtu(r9:8,r15:14)
1322        \\   if (!p0.new) r10 = or(r10,r8)
1323        \\   r11 = setbit(r11,#20 +3)
1324        \\  }
1325        \\  {
1326        \\   r15:14 = neg(r11:10)
1327        \\   p1 = bitsclr(r10,#0x7)
1328        \\   if (!p1.new) r28 = or(r4,r28)
1329        \\  }
1330        \\  {
1331        \\   if (!p3) r11:10 = r15:14
1332        \\   USR = r28
1333        \\  }
1334        \\  {
1335        \\   r1:0 = convert_d2df(r11:10)
1336        \\   p0 = dfcmp.eq(r1:0,r1:0)
1337        \\  }
1338        \\  {
1339        \\   r1 = insert(r9,#11 -1,#20 +1)
1340        \\   jumpr r31
1341        \\  }
1342        \\  .falign
1343        \\ .Lmul_ovf:
1344        \\
1345        \\  {
1346        \\   r28 = USR
1347        \\   r13:12 = combine(##0x7fefffff,#-1)
1348        \\   r1:0 = r11:10
1349        \\  }
1350        \\  {
1351        \\   r14 = extractu(r28,#2,#22)
1352        \\   r28 = or(r28,#0x28)
1353        \\   r5:4 = combine(##0x7ff00000,#0)
1354        \\  }
1355        \\  {
1356        \\   USR = r28
1357        \\   r14 ^= lsr(r1,#31)
1358        \\   r28 = r14
1359        \\  }
1360        \\  {
1361        \\   p0 = !cmp.eq(r28,#1)
1362        \\   p0 = !cmp.eq(r14,#2)
1363        \\   if (p0.new) r13:12 = r5:4
1364        \\   p0 = dfcmp.eq(r1:0,r1:0)
1365        \\  }
1366        \\  {
1367        \\   r1:0 = insert(r13:12,#63,#0)
1368        \\   jumpr r31
1369        \\  }
1370        \\
1371        \\ .Lmul_abnormal:
1372        \\  {
1373        \\   r13:12 = extractu(r1:0,#63,#0)
1374        \\   r5:4 = extractu(r3:2,#63,#0)
1375        \\  }
1376        \\  {
1377        \\   p3 = cmp.gtu(r13:12,r5:4)
1378        \\   if (!p3.new) r1:0 = r3:2
1379        \\   if (!p3.new) r3:2 = r1:0
1380        \\  }
1381        \\  {
1382        \\
1383        \\   p0 = dfclass(r1:0,#0x0f)
1384        \\   if (!p0.new) jump:nt .Linvalid_nan
1385        \\   if (!p3) r13:12 = r5:4
1386        \\   if (!p3) r5:4 = r13:12
1387        \\  }
1388        \\  {
1389        \\
1390        \\   p1 = dfclass(r1:0,#0x08)
1391        \\   p1 = dfclass(r3:2,#0x0e)
1392        \\  }
1393        \\  {
1394        \\
1395        \\
1396        \\   p0 = dfclass(r1:0,#0x08)
1397        \\   p0 = dfclass(r3:2,#0x01)
1398        \\  }
1399        \\  {
1400        \\   if (p1) jump .Ltrue_inf
1401        \\   p2 = dfclass(r3:2,#0x01)
1402        \\  }
1403        \\  {
1404        \\   if (p0) jump .Linvalid_zeroinf
1405        \\   if (p2) jump .Ltrue_zero
1406        \\   r28 = ##0x7c000000
1407        \\  }
1408        \\
1409        \\
1410        \\
1411        \\
1412        \\
1413        \\  {
1414        \\   p0 = bitsclr(r1,r28)
1415        \\   if (p0.new) jump:nt .Lmul_tiny
1416        \\  }
1417        \\  {
1418        \\   r28 = cl0(r5:4)
1419        \\  }
1420        \\  {
1421        \\   r28 = add(r28,#-11)
1422        \\  }
1423        \\  {
1424        \\   r5:4 = asl(r5:4,r28)
1425        \\  }
1426        \\  {
1427        \\   r3:2 = insert(r5:4,#63,#0)
1428        \\   r1 -= asl(r28,#20)
1429        \\  }
1430        \\  jump __hexagon_muldf3
1431        \\ .Lmul_tiny:
1432        \\  {
1433        \\   r28 = USR
1434        \\   r1:0 = xor(r1:0,r3:2)
1435        \\  }
1436        \\  {
1437        \\   r28 = or(r28,#0x30)
1438        \\   r1:0 = insert(r9:8,#63,#0)
1439        \\   r5 = extractu(r28,#2,#22)
1440        \\  }
1441        \\  {
1442        \\   USR = r28
1443        \\   p0 = cmp.gt(r5,#1)
1444        \\   if (!p0.new) r0 = #0
1445        \\   r5 ^= lsr(r1,#31)
1446        \\  }
1447        \\  {
1448        \\   p0 = cmp.eq(r5,#3)
1449        \\   if (!p0.new) r0 = #0
1450        \\   jumpr r31
1451        \\  }
1452        \\ .Linvalid_zeroinf:
1453        \\  {
1454        \\   r28 = USR
1455        \\  }
1456        \\  {
1457        \\   r1:0 = #-1
1458        \\   r28 = or(r28,#2)
1459        \\  }
1460        \\  {
1461        \\   USR = r28
1462        \\  }
1463        \\  {
1464        \\   p0 = dfcmp.uo(r1:0,r1:0)
1465        \\   jumpr r31
1466        \\  }
1467        \\ .Linvalid_nan:
1468        \\  {
1469        \\   p0 = dfclass(r3:2,#0x0f)
1470        \\   r28 = convert_df2sf(r1:0)
1471        \\   if (p0.new) r3:2 = r1:0
1472        \\  }
1473        \\  {
1474        \\   r2 = convert_df2sf(r3:2)
1475        \\   r1:0 = #-1
1476        \\   jumpr r31
1477        \\  }
1478        \\  .falign
1479        \\ .Ltrue_zero:
1480        \\  {
1481        \\   r1:0 = r3:2
1482        \\   r3:2 = r1:0
1483        \\  }
1484        \\ .Ltrue_inf:
1485        \\  {
1486        \\   r3 = extract(r3,#1,#31)
1487        \\  }
1488        \\  {
1489        \\   r1 ^= asl(r3,#31)
1490        \\   jumpr r31
1491        \\  }
1492    );
1493}
1494
1495fn __hexagon_sqrtdf2() align(32) callconv(.naked) noreturn {
1496    asm volatile (
1497        \\  {
1498        \\   r15:14 = extractu(r1:0,#23 +1,#52 -23)
1499        \\   r28 = extractu(r1,#11,#52 -32)
1500        \\   r5:4 = combine(##0x3f000004,#1)
1501        \\  }
1502        \\  {
1503        \\   p2 = dfclass(r1:0,#0x02)
1504        \\   p2 = cmp.gt(r1,#-1)
1505        \\   if (!p2.new) jump:nt .Lsqrt_abnormal
1506        \\   r9 = or(r5,r14)
1507        \\  }
1508        \\
1509        \\ .Ldenormal_restart:
1510        \\  {
1511        \\   r11:10 = r1:0
1512        \\   r7,p0 = sfinvsqrta(r9)
1513        \\   r5 = and(r5,#-16)
1514        \\   r3:2 = #0
1515        \\  }
1516        \\  {
1517        \\   r3 += sfmpy(r7,r9):lib
1518        \\   r2 += sfmpy(r7,r5):lib
1519        \\   r6 = r5
1520        \\
1521        \\
1522        \\   r9 = and(r28,#1)
1523        \\  }
1524        \\  {
1525        \\   r6 -= sfmpy(r3,r2):lib
1526        \\   r11 = insert(r4,#11 +1,#52 -32)
1527        \\   p1 = cmp.gtu(r9,#0)
1528        \\  }
1529        \\  {
1530        \\   r3 += sfmpy(r3,r6):lib
1531        \\   r2 += sfmpy(r2,r6):lib
1532        \\   r6 = r5
1533        \\   r9 = mux(p1,#8,#9)
1534        \\  }
1535        \\  {
1536        \\   r6 -= sfmpy(r3,r2):lib
1537        \\   r11:10 = asl(r11:10,r9)
1538        \\   r9 = mux(p1,#3,#2)
1539        \\  }
1540        \\  {
1541        \\   r2 += sfmpy(r2,r6):lib
1542        \\
1543        \\   r15:14 = asl(r11:10,r9)
1544        \\  }
1545        \\  {
1546        \\   r2 = and(r2,##0x007fffff)
1547        \\  }
1548        \\  {
1549        \\   r2 = add(r2,##0x00800000 - 3)
1550        \\   r9 = mux(p1,#7,#8)
1551        \\  }
1552        \\  {
1553        \\   r8 = asl(r2,r9)
1554        \\   r9 = mux(p1,#15-(1+1),#15-(1+0))
1555        \\  }
1556        \\  {
1557        \\   r13:12 = mpyu(r8,r15)
1558        \\  }
1559        \\  {
1560        \\   r1:0 = asl(r11:10,#15)
1561        \\   r15:14 = mpyu(r13,r13)
1562        \\   p1 = cmp.eq(r0,r0)
1563        \\  }
1564        \\  {
1565        \\   r1:0 -= asl(r15:14,#15)
1566        \\   r15:14 = mpyu(r13,r12)
1567        \\   p2 = cmp.eq(r0,r0)
1568        \\  }
1569        \\  {
1570        \\   r1:0 -= lsr(r15:14,#16)
1571        \\   p3 = cmp.eq(r0,r0)
1572        \\  }
1573        \\  {
1574        \\   r1:0 = mpyu(r1,r8)
1575        \\  }
1576        \\  {
1577        \\   r13:12 += lsr(r1:0,r9)
1578        \\   r9 = add(r9,#16)
1579        \\   r1:0 = asl(r11:10,#31)
1580        \\  }
1581        \\
1582        \\  {
1583        \\   r15:14 = mpyu(r13,r13)
1584        \\   r1:0 -= mpyu(r13,r12)
1585        \\  }
1586        \\  {
1587        \\   r1:0 -= asl(r15:14,#31)
1588        \\   r15:14 = mpyu(r12,r12)
1589        \\  }
1590        \\  {
1591        \\   r1:0 -= lsr(r15:14,#33)
1592        \\  }
1593        \\  {
1594        \\   r1:0 = mpyu(r1,r8)
1595        \\  }
1596        \\  {
1597        \\   r13:12 += lsr(r1:0,r9)
1598        \\   r9 = add(r9,#16)
1599        \\   r1:0 = asl(r11:10,#47)
1600        \\  }
1601        \\
1602        \\  {
1603        \\   r15:14 = mpyu(r13,r13)
1604        \\  }
1605        \\  {
1606        \\   r1:0 -= asl(r15:14,#47)
1607        \\   r15:14 = mpyu(r13,r12)
1608        \\  }
1609        \\  {
1610        \\   r1:0 -= asl(r15:14,#16)
1611        \\   r15:14 = mpyu(r12,r12)
1612        \\  }
1613        \\  {
1614        \\   r1:0 -= lsr(r15:14,#17)
1615        \\  }
1616        \\  {
1617        \\   r1:0 = mpyu(r1,r8)
1618        \\  }
1619        \\  {
1620        \\   r13:12 += lsr(r1:0,r9)
1621        \\  }
1622        \\  {
1623        \\   r3:2 = mpyu(r13,r12)
1624        \\   r5:4 = mpyu(r12,r12)
1625        \\   r15:14 = #0
1626        \\   r1:0 = #0
1627        \\  }
1628        \\  {
1629        \\   r3:2 += lsr(r5:4,#33)
1630        \\   r5:4 += asl(r3:2,#33)
1631        \\   p1 = cmp.eq(r0,r0)
1632        \\  }
1633        \\  {
1634        \\   r7:6 = mpyu(r13,r13)
1635        \\   r1:0 = sub(r1:0,r5:4,p1):carry
1636        \\   r9:8 = #1
1637        \\  }
1638        \\  {
1639        \\   r7:6 += lsr(r3:2,#31)
1640        \\   r9:8 += asl(r13:12,#1)
1641        \\  }
1642        \\
1643        \\
1644        \\
1645        \\
1646        \\
1647        \\  {
1648        \\   r15:14 = sub(r11:10,r7:6,p1):carry
1649        \\   r5:4 = sub(r1:0,r9:8,p2):carry
1650        \\
1651        \\
1652        \\
1653        \\
1654        \\   r7:6 = #1
1655        \\   r11:10 = #0
1656        \\  }
1657        \\  {
1658        \\   r3:2 = sub(r15:14,r11:10,p2):carry
1659        \\   r7:6 = add(r13:12,r7:6)
1660        \\   r28 = add(r28,#-0x3ff)
1661        \\  }
1662        \\  {
1663        \\
1664        \\   if (p2) r13:12 = r7:6
1665        \\   if (p2) r1:0 = r5:4
1666        \\   if (p2) r15:14 = r3:2
1667        \\  }
1668        \\  {
1669        \\   r5:4 = sub(r1:0,r9:8,p3):carry
1670        \\   r7:6 = #1
1671        \\   r28 = asr(r28,#1)
1672        \\  }
1673        \\  {
1674        \\   r3:2 = sub(r15:14,r11:10,p3):carry
1675        \\   r7:6 = add(r13:12,r7:6)
1676        \\  }
1677        \\  {
1678        \\   if (p3) r13:12 = r7:6
1679        \\   if (p3) r1:0 = r5:4
1680        \\
1681        \\
1682        \\
1683        \\
1684        \\
1685        \\   r2 = #1
1686        \\  }
1687        \\  {
1688        \\   p0 = cmp.eq(r1:0,r11:10)
1689        \\   if (!p0.new) r12 = or(r12,r2)
1690        \\   r3 = cl0(r13:12)
1691        \\   r28 = add(r28,#-63)
1692        \\  }
1693        \\
1694        \\
1695        \\
1696        \\  {
1697        \\   r1:0 = convert_ud2df(r13:12)
1698        \\   r28 = add(r28,r3)
1699        \\  }
1700        \\  {
1701        \\   r1 += asl(r28,#52 -32)
1702        \\   jumpr r31
1703        \\  }
1704        \\ .Lsqrt_abnormal:
1705        \\  {
1706        \\   p0 = dfclass(r1:0,#0x01)
1707        \\   if (p0.new) jumpr:t r31
1708        \\  }
1709        \\  {
1710        \\   p0 = dfclass(r1:0,#0x10)
1711        \\   if (p0.new) jump:nt .Lsqrt_nan
1712        \\  }
1713        \\  {
1714        \\   p0 = cmp.gt(r1,#-1)
1715        \\   if (!p0.new) jump:nt .Lsqrt_invalid_neg
1716        \\   if (!p0.new) r28 = ##0x7F800001
1717        \\  }
1718        \\  {
1719        \\   p0 = dfclass(r1:0,#0x08)
1720        \\   if (p0.new) jumpr:nt r31
1721        \\  }
1722        \\
1723        \\
1724        \\  {
1725        \\   r1:0 = extractu(r1:0,#52,#0)
1726        \\  }
1727        \\  {
1728        \\   r28 = add(clb(r1:0),#-11)
1729        \\  }
1730        \\  {
1731        \\   r1:0 = asl(r1:0,r28)
1732        \\   r28 = sub(#1,r28)
1733        \\  }
1734        \\  {
1735        \\   r1 = insert(r28,#1,#52 -32)
1736        \\  }
1737        \\  {
1738        \\   r3:2 = extractu(r1:0,#23 +1,#52 -23)
1739        \\   r5 = ##0x3f000004
1740        \\  }
1741        \\  {
1742        \\   r9 = or(r5,r2)
1743        \\   r5 = and(r5,#-16)
1744        \\   jump .Ldenormal_restart
1745        \\  }
1746        \\ .Lsqrt_nan:
1747        \\  {
1748        \\   r28 = convert_df2sf(r1:0)
1749        \\   r1:0 = #-1
1750        \\   jumpr r31
1751        \\  }
1752        \\ .Lsqrt_invalid_neg:
1753        \\  {
1754        \\   r1:0 = convert_sf2df(r28)
1755        \\   jumpr r31
1756        \\  }
1757    );
1758}
1759
1760comptime {
1761    if (builtin.cpu.arch == .hexagon) {
1762        @export(&__hexagon_adddf3, .{ .name = "__hexagon_adddf3", .linkage = common.linkage, .visibility = common.visibility });
1763        @export(&__hexagon_adddf3, .{ .name = "__hexagon_fast_adddf3", .linkage = common.linkage, .visibility = common.visibility });
1764        @export(&__hexagon_subdf3, .{ .name = "__hexagon_subdf3", .linkage = common.linkage, .visibility = common.visibility });
1765        @export(&__hexagon_subdf3, .{ .name = "__hexagon_fast_subdf3", .linkage = common.linkage, .visibility = common.visibility });
1766        @export(&__hexagon_divdf3, .{ .name = "__hexagon_divdf3", .linkage = common.linkage, .visibility = common.visibility });
1767        @export(&__hexagon_divdf3, .{ .name = "__hexagon_fast_divdf3", .linkage = common.linkage, .visibility = common.visibility });
1768        @export(&__hexagon_muldf3, .{ .name = "__hexagon_muldf3", .linkage = common.linkage, .visibility = common.visibility });
1769        @export(&__hexagon_muldf3, .{ .name = "__hexagon_fast_muldf3", .linkage = common.linkage, .visibility = common.visibility });
1770        @export(&__hexagon_sqrtdf2, .{ .name = "__hexagon_sqrtdf2", .linkage = common.linkage, .visibility = common.visibility });
1771        @export(&__hexagon_sqrtdf2, .{ .name = "__hexagon_fast2_sqrtdf2", .linkage = common.linkage, .visibility = common.visibility });
1772        @export(&__hexagon_sqrtdf2, .{ .name = "__hexagon_sqrt", .linkage = common.linkage, .visibility = common.visibility });
1773        @export(&__hexagon_divsf3, .{ .name = "__hexagon_divsf3", .linkage = common.linkage, .visibility = common.visibility });
1774        @export(&__hexagon_divsf3, .{ .name = "__hexagon_fast_divsf3", .linkage = common.linkage, .visibility = common.visibility });
1775        @export(&__hexagon_divsi3, .{ .name = "__hexagon_divsi3", .linkage = common.linkage, .visibility = common.visibility });
1776        @export(&__hexagon_umodsi3, .{ .name = "__hexagon_umodsi3", .linkage = common.linkage, .visibility = common.visibility });
1777        @export(&__hexagon_sqrtf, .{ .name = "__hexagon_sqrtf", .linkage = common.linkage, .visibility = common.visibility });
1778        @export(&__hexagon_sqrtf, .{ .name = "__hexagon_fast2_sqrtf", .linkage = common.linkage, .visibility = common.visibility });
1779        @export(&__hexagon_moddi3, .{ .name = "__hexagon_moddi3", .linkage = common.linkage, .visibility = common.visibility });
1780        @export(&__hexagon_divdi3, .{ .name = "__hexagon_divdi3", .linkage = common.linkage, .visibility = common.visibility });
1781        @export(&__hexagon_udivdi3, .{ .name = "__hexagon_udivdi3", .linkage = common.linkage, .visibility = common.visibility });
1782        @export(&__hexagon_umoddi3, .{ .name = "__hexagon_umoddi3", .linkage = common.linkage, .visibility = common.visibility });
1783        @export(&__hexagon_modsi3, .{ .name = "__hexagon_modsi3", .linkage = common.linkage, .visibility = common.visibility });
1784        @export(&__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes, .{ .name = "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes", .linkage = common.linkage, .visibility = common.visibility });
1785        @export(&__hexagon_udivsi3, .{ .name = "__hexagon_udivsi3", .linkage = common.linkage, .visibility = common.visibility });
1786    }
1787}