master
1const builtin = @import("builtin");
2const common = @import("./common.zig");
3
4fn __hexagon_divsi3() callconv(.naked) noreturn {
5 asm volatile (
6 \\ {
7 \\ p0 = cmp.ge(r0,#0)
8 \\ p1 = cmp.ge(r1,#0)
9 \\ r1 = abs(r0)
10 \\ r2 = abs(r1)
11 \\ }
12 \\ {
13 \\ r3 = cl0(r1)
14 \\ r4 = cl0(r2)
15 \\ r5 = sub(r1,r2)
16 \\ p2 = cmp.gtu(r2,r1)
17 \\ }
18 \\ {
19 \\ r0 = #0
20 \\ p1 = xor(p0,p1)
21 \\ p0 = cmp.gtu(r2,r5)
22 \\ if (p2) jumpr r31
23 \\ }
24 \\
25 \\ {
26 \\ r0 = mux(p1,#-1,#1)
27 \\ if (p0) jumpr r31
28 \\ r4 = sub(r4,r3)
29 \\ r3 = #1
30 \\ }
31 \\ {
32 \\ r0 = #0
33 \\ r3:2 = vlslw(r3:2,r4)
34 \\ loop0(1f,r4)
35 \\ }
36 \\ .falign
37 \\ 1:
38 \\ {
39 \\ p0 = cmp.gtu(r2,r1)
40 \\ if (!p0.new) r1 = sub(r1,r2)
41 \\ if (!p0.new) r0 = add(r0,r3)
42 \\ r3:2 = vlsrw(r3:2,#1)
43 \\ }:endloop0
44 \\ {
45 \\ p0 = cmp.gtu(r2,r1)
46 \\ if (!p0.new) r0 = add(r0,r3)
47 \\ if (!p1) jumpr r31
48 \\ }
49 \\ {
50 \\ r0 = neg(r0)
51 \\ jumpr r31
52 \\ }
53 );
54}
55
56fn __hexagon_umodsi3() callconv(.naked) noreturn {
57 asm volatile (
58 \\ {
59 \\ r2 = cl0(r0)
60 \\ r3 = cl0(r1)
61 \\ p0 = cmp.gtu(r1,r0)
62 \\ }
63 \\ {
64 \\ r2 = sub(r3,r2)
65 \\ if (p0) jumpr r31
66 \\ }
67 \\ {
68 \\ loop0(1f,r2)
69 \\ p1 = cmp.eq(r2,#0)
70 \\ r2 = lsl(r1,r2)
71 \\ }
72 \\ .falign
73 \\ 1:
74 \\ {
75 \\ p0 = cmp.gtu(r2,r0)
76 \\ if (!p0.new) r0 = sub(r0,r2)
77 \\ r2 = lsr(r2,#1)
78 \\ if (p1) r1 = #0
79 \\ }:endloop0
80 \\ {
81 \\ p0 = cmp.gtu(r2,r0)
82 \\ if (!p0.new) r0 = sub(r0,r1)
83 \\ jumpr r31
84 \\ }
85 );
86}
87
88fn __hexagon_sqrtf() callconv(.naked) noreturn {
89 asm volatile (
90 \\ {
91 \\ r3,p0 = sfinvsqrta(r0)
92 \\ r5 = sffixupr(r0)
93 \\ r4 = ##0x3f000000
94 \\ r1:0 = combine(#0,#0)
95 \\ }
96 \\ {
97 \\ r0 += sfmpy(r3,r5):lib
98 \\ r1 += sfmpy(r3,r4):lib
99 \\ r2 = r4
100 \\ r3 = r5
101 \\ }
102 \\ {
103 \\ r2 -= sfmpy(r0,r1):lib
104 \\ p1 = sfclass(r5,#1)
105 \\
106 \\ }
107 \\ {
108 \\ r0 += sfmpy(r0,r2):lib
109 \\ r1 += sfmpy(r1,r2):lib
110 \\ r2 = r4
111 \\ r3 = r5
112 \\ }
113 \\ {
114 \\ r2 -= sfmpy(r0,r1):lib
115 \\ r3 -= sfmpy(r0,r0):lib
116 \\ }
117 \\ {
118 \\ r0 += sfmpy(r1,r3):lib
119 \\ r1 += sfmpy(r1,r2):lib
120 \\ r2 = r4
121 \\ r3 = r5
122 \\ }
123 \\ {
124 \\
125 \\ r3 -= sfmpy(r0,r0):lib
126 \\ if (p1) r0 = or(r0,r5)
127 \\ }
128 \\ {
129 \\ r0 += sfmpy(r1,r3,p0):scale
130 \\ jumpr r31
131 \\ }
132 );
133}
134
135fn __hexagon_moddi3() callconv(.naked) noreturn {
136 asm volatile (
137 \\ {
138 \\ p3 = tstbit(r1,#31)
139 \\ }
140 \\ {
141 \\ r1:0 = abs(r1:0)
142 \\ r3:2 = abs(r3:2)
143 \\ }
144 \\ {
145 \\ r6 = cl0(r1:0)
146 \\ r7 = cl0(r3:2)
147 \\ r5:4 = r3:2
148 \\ r3:2 = r1:0
149 \\ }
150 \\ {
151 \\ r10 = sub(r7,r6)
152 \\ r1:0 = #0
153 \\ r15:14 = #1
154 \\ }
155 \\ {
156 \\ r11 = add(r10,#1)
157 \\ r13:12 = lsl(r5:4,r10)
158 \\ r15:14 = lsl(r15:14,r10)
159 \\ }
160 \\ {
161 \\ p0 = cmp.gtu(r5:4,r3:2)
162 \\ loop0(1f,r11)
163 \\ }
164 \\ {
165 \\ if (p0) jump .hexagon_moddi3_return
166 \\ }
167 \\ .falign
168 \\ 1:
169 \\ {
170 \\ p0 = cmp.gtu(r13:12,r3:2)
171 \\ }
172 \\ {
173 \\ r7:6 = sub(r3:2, r13:12)
174 \\ r9:8 = add(r1:0, r15:14)
175 \\ }
176 \\ {
177 \\ r1:0 = vmux(p0, r1:0, r9:8)
178 \\ r3:2 = vmux(p0, r3:2, r7:6)
179 \\ }
180 \\ {
181 \\ r15:14 = lsr(r15:14, #1)
182 \\ r13:12 = lsr(r13:12, #1)
183 \\ }:endloop0
184 \\
185 \\ .hexagon_moddi3_return:
186 \\ {
187 \\ r1:0 = neg(r3:2)
188 \\ }
189 \\ {
190 \\ r1:0 = vmux(p3,r1:0,r3:2)
191 \\ jumpr r31
192 \\ }
193 );
194}
195
196fn __hexagon_divdi3() callconv(.naked) noreturn {
197 asm volatile (
198 \\ {
199 \\ p2 = tstbit(r1,#31)
200 \\ p3 = tstbit(r3,#31)
201 \\ }
202 \\ {
203 \\ r1:0 = abs(r1:0)
204 \\ r3:2 = abs(r3:2)
205 \\ }
206 \\ {
207 \\ r6 = cl0(r1:0)
208 \\ r7 = cl0(r3:2)
209 \\ r5:4 = r3:2
210 \\ r3:2 = r1:0
211 \\ }
212 \\ {
213 \\ p3 = xor(p2,p3)
214 \\ r10 = sub(r7,r6)
215 \\ r1:0 = #0
216 \\ r15:14 = #1
217 \\ }
218 \\ {
219 \\ r11 = add(r10,#1)
220 \\ r13:12 = lsl(r5:4,r10)
221 \\ r15:14 = lsl(r15:14,r10)
222 \\ }
223 \\ {
224 \\ p0 = cmp.gtu(r5:4,r3:2)
225 \\ loop0(1f,r11)
226 \\ }
227 \\ {
228 \\ if (p0) jump .hexagon_divdi3_return
229 \\ }
230 \\ .falign
231 \\ 1:
232 \\ {
233 \\ p0 = cmp.gtu(r13:12,r3:2)
234 \\ }
235 \\ {
236 \\ r7:6 = sub(r3:2, r13:12)
237 \\ r9:8 = add(r1:0, r15:14)
238 \\ }
239 \\ {
240 \\ r1:0 = vmux(p0, r1:0, r9:8)
241 \\ r3:2 = vmux(p0, r3:2, r7:6)
242 \\ }
243 \\ {
244 \\ r15:14 = lsr(r15:14, #1)
245 \\ r13:12 = lsr(r13:12, #1)
246 \\ }:endloop0
247 \\
248 \\ .hexagon_divdi3_return:
249 \\ {
250 \\ r3:2 = neg(r1:0)
251 \\ }
252 \\ {
253 \\ r1:0 = vmux(p3,r3:2,r1:0)
254 \\ jumpr r31
255 \\ }
256 );
257}
258
259fn __hexagon_divsf3() callconv(.naked) noreturn {
260 asm volatile (
261 \\ {
262 \\ r2,p0 = sfrecipa(r0,r1)
263 \\ r4 = sffixupd(r0,r1)
264 \\ r3 = ##0x3f800000
265 \\ }
266 \\ {
267 \\ r5 = sffixupn(r0,r1)
268 \\ r3 -= sfmpy(r4,r2):lib
269 \\ r6 = ##0x80000000
270 \\ r7 = r3
271 \\ }
272 \\ {
273 \\ r2 += sfmpy(r3,r2):lib
274 \\ r3 = r7
275 \\ r6 = r5
276 \\ r0 = and(r6,r5)
277 \\ }
278 \\ {
279 \\ r3 -= sfmpy(r4,r2):lib
280 \\ r0 += sfmpy(r5,r2):lib
281 \\ }
282 \\ {
283 \\ r2 += sfmpy(r3,r2):lib
284 \\ r6 -= sfmpy(r0,r4):lib
285 \\ }
286 \\ {
287 \\ r0 += sfmpy(r6,r2):lib
288 \\ }
289 \\ {
290 \\ r5 -= sfmpy(r0,r4):lib
291 \\ }
292 \\ {
293 \\ r0 += sfmpy(r5,r2,p0):scale
294 \\ jumpr r31
295 \\ }
296 );
297}
298
299fn __hexagon_udivdi3() callconv(.naked) noreturn {
300 asm volatile (
301 \\ {
302 \\ r6 = cl0(r1:0)
303 \\ r7 = cl0(r3:2)
304 \\ r5:4 = r3:2
305 \\ r3:2 = r1:0
306 \\ }
307 \\ {
308 \\ r10 = sub(r7,r6)
309 \\ r1:0 = #0
310 \\ r15:14 = #1
311 \\ }
312 \\ {
313 \\ r11 = add(r10,#1)
314 \\ r13:12 = lsl(r5:4,r10)
315 \\ r15:14 = lsl(r15:14,r10)
316 \\ }
317 \\ {
318 \\ p0 = cmp.gtu(r5:4,r3:2)
319 \\ loop0(1f,r11)
320 \\ }
321 \\ {
322 \\ if (p0) jumpr r31
323 \\ }
324 \\ .falign
325 \\ 1:
326 \\ {
327 \\ p0 = cmp.gtu(r13:12,r3:2)
328 \\ }
329 \\ {
330 \\ r7:6 = sub(r3:2, r13:12)
331 \\ r9:8 = add(r1:0, r15:14)
332 \\ }
333 \\ {
334 \\ r1:0 = vmux(p0, r1:0, r9:8)
335 \\ r3:2 = vmux(p0, r3:2, r7:6)
336 \\ }
337 \\ {
338 \\ r15:14 = lsr(r15:14, #1)
339 \\ r13:12 = lsr(r13:12, #1)
340 \\ }:endloop0
341 \\ {
342 \\ jumpr r31
343 \\ }
344 );
345}
346
347fn __hexagon_umoddi3() callconv(.naked) noreturn {
348 asm volatile (
349 \\ {
350 \\ r6 = cl0(r1:0)
351 \\ r7 = cl0(r3:2)
352 \\ r5:4 = r3:2
353 \\ r3:2 = r1:0
354 \\ }
355 \\ {
356 \\ r10 = sub(r7,r6)
357 \\ r1:0 = #0
358 \\ r15:14 = #1
359 \\ }
360 \\ {
361 \\ r11 = add(r10,#1)
362 \\ r13:12 = lsl(r5:4,r10)
363 \\ r15:14 = lsl(r15:14,r10)
364 \\ }
365 \\ {
366 \\ p0 = cmp.gtu(r5:4,r3:2)
367 \\ loop0(1f,r11)
368 \\ }
369 \\ {
370 \\ if (p0) jump .hexagon_umoddi3_return
371 \\ }
372 \\ .falign
373 \\ 1:
374 \\ {
375 \\ p0 = cmp.gtu(r13:12,r3:2)
376 \\ }
377 \\ {
378 \\ r7:6 = sub(r3:2, r13:12)
379 \\ r9:8 = add(r1:0, r15:14)
380 \\ }
381 \\ {
382 \\ r1:0 = vmux(p0, r1:0, r9:8)
383 \\ r3:2 = vmux(p0, r3:2, r7:6)
384 \\ }
385 \\ {
386 \\ r15:14 = lsr(r15:14, #1)
387 \\ r13:12 = lsr(r13:12, #1)
388 \\ }:endloop0
389 \\
390 \\ .hexagon_umoddi3_return:
391 \\ {
392 \\ r1:0 = r3:2
393 \\ jumpr r31
394 \\ }
395 );
396}
397
398fn __hexagon_modsi3() callconv(.naked) noreturn {
399 asm volatile (
400 \\ {
401 \\ p2 = cmp.ge(r0,#0)
402 \\ r2 = abs(r0)
403 \\ r1 = abs(r1)
404 \\ }
405 \\ {
406 \\ r3 = cl0(r2)
407 \\ r4 = cl0(r1)
408 \\ p0 = cmp.gtu(r1,r2)
409 \\ }
410 \\ {
411 \\ r3 = sub(r4,r3)
412 \\ if (p0) jumpr r31
413 \\ }
414 \\ {
415 \\ p1 = cmp.eq(r3,#0)
416 \\ loop0(1f,r3)
417 \\ r0 = r2
418 \\ r2 = lsl(r1,r3)
419 \\ }
420 \\ .falign
421 \\ 1:
422 \\ {
423 \\ p0 = cmp.gtu(r2,r0)
424 \\ if (!p0.new) r0 = sub(r0,r2)
425 \\ r2 = lsr(r2,#1)
426 \\ if (p1) r1 = #0
427 \\ }:endloop0
428 \\ {
429 \\ p0 = cmp.gtu(r2,r0)
430 \\ if (!p0.new) r0 = sub(r0,r1)
431 \\ if (p2) jumpr r31
432 \\ }
433 \\ {
434 \\ r0 = neg(r0)
435 \\ jumpr r31
436 \\ }
437 );
438}
439
440fn __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes() callconv(.naked) noreturn {
441 asm volatile (
442 \\ {
443 \\ p0 = bitsclr(r1,#7)
444 \\ p0 = bitsclr(r0,#7)
445 \\ if (p0.new) r5:4 = memd(r1)
446 \\ r3 = #-3
447 \\ }
448 \\ {
449 \\ if (!p0) jump .Lmemcpy_call
450 \\ if (p0) memd(r0++#8) = r5:4
451 \\ if (p0) r5:4 = memd(r1+#8)
452 \\ r3 += lsr(r2,#3)
453 \\ }
454 \\ {
455 \\ memd(r0++#8) = r5:4
456 \\ r5:4 = memd(r1+#16)
457 \\ r1 = add(r1,#24)
458 \\ loop0(1f,r3)
459 \\ }
460 \\ .falign
461 \\ 1:
462 \\ {
463 \\ memd(r0++#8) = r5:4
464 \\ r5:4 = memd(r1++#8)
465 \\ }:endloop0
466 \\ {
467 \\ memd(r0) = r5:4
468 \\ r0 -= add(r2,#-8)
469 \\ jumpr r31
470 \\ }
471 \\ .Lmemcpy_call:
472 \\ jump memcpy@PLT
473 );
474}
475
476fn __hexagon_udivsi3() callconv(.naked) noreturn {
477 asm volatile (
478 \\ {
479 \\ r2 = cl0(r0)
480 \\ r3 = cl0(r1)
481 \\ r5:4 = combine(#1,#0)
482 \\ p0 = cmp.gtu(r1,r0)
483 \\ }
484 \\ {
485 \\ r6 = sub(r3,r2)
486 \\ r4 = r1
487 \\ r1:0 = combine(r0,r4)
488 \\ if (p0) jumpr r31
489 \\ }
490 \\ {
491 \\ r3:2 = vlslw(r5:4,r6)
492 \\ loop0(1f,r6)
493 \\ }
494 \\ .falign
495 \\ 1:
496 \\ {
497 \\ p0 = cmp.gtu(r2,r1)
498 \\ if (!p0.new) r1 = sub(r1,r2)
499 \\ if (!p0.new) r0 = add(r0,r3)
500 \\ r3:2 = vlsrw(r3:2,#1)
501 \\ }:endloop0
502 \\ {
503 \\ p0 = cmp.gtu(r2,r1)
504 \\ if (!p0.new) r0 = add(r0,r3)
505 \\ jumpr r31
506 \\ }
507 );
508}
509
510fn __hexagon_adddf3() align(32) callconv(.naked) noreturn {
511 asm volatile (
512 \\ {
513 \\ r4 = extractu(r1,#11,#20)
514 \\ r5 = extractu(r3,#11,#20)
515 \\ r13:12 = combine(##0x20000000,#0)
516 \\ }
517 \\ {
518 \\ p3 = dfclass(r1:0,#2)
519 \\ p3 = dfclass(r3:2,#2)
520 \\ r9:8 = r13:12
521 \\ p2 = cmp.gtu(r5,r4)
522 \\ }
523 \\ {
524 \\ if (!p3) jump .Ladd_abnormal
525 \\ if (p2) r1:0 = r3:2
526 \\ if (p2) r3:2 = r1:0
527 \\ if (p2) r5:4 = combine(r4,r5)
528 \\ }
529 \\ {
530 \\ r13:12 = insert(r1:0,#52,#11 -2)
531 \\ r9:8 = insert(r3:2,#52,#11 -2)
532 \\ r15 = sub(r4,r5)
533 \\ r7:6 = combine(#62,#1)
534 \\ }
535 \\
536 \\
537 \\
538 \\
539 \\
540 \\ .Ladd_continue:
541 \\ {
542 \\ r15 = min(r15,r7)
543 \\
544 \\ r11:10 = neg(r13:12)
545 \\ p2 = cmp.gt(r1,#-1)
546 \\ r14 = #0
547 \\ }
548 \\ {
549 \\ if (!p2) r13:12 = r11:10
550 \\ r11:10 = extractu(r9:8,r15:14)
551 \\ r9:8 = ASR(r9:8,r15)
552 \\
553 \\
554 \\
555 \\
556 \\ r15:14 = #0
557 \\ }
558 \\ {
559 \\ p1 = cmp.eq(r11:10,r15:14)
560 \\ if (!p1.new) r8 = or(r8,r6)
561 \\ r5 = add(r4,#-1024 -60)
562 \\ p3 = cmp.gt(r3,#-1)
563 \\ }
564 \\ {
565 \\ r13:12 = add(r13:12,r9:8)
566 \\ r11:10 = sub(r13:12,r9:8)
567 \\ r7:6 = combine(#54,##2045)
568 \\ }
569 \\ {
570 \\ p0 = cmp.gtu(r4,r7)
571 \\ p0 = !cmp.gtu(r4,r6)
572 \\ if (!p0.new) jump:nt .Ladd_ovf_unf
573 \\ if (!p3) r13:12 = r11:10
574 \\ }
575 \\ {
576 \\ r1:0 = convert_d2df(r13:12)
577 \\ p0 = cmp.eq(r13,#0)
578 \\ p0 = cmp.eq(r12,#0)
579 \\ if (p0.new) jump:nt .Ladd_zero
580 \\ }
581 \\ {
582 \\ r1 += asl(r5,#20)
583 \\ jumpr r31
584 \\ }
585 \\
586 \\ .falign
587 \\ .Ladd_zero:
588 \\
589 \\
590 \\ {
591 \\ r28 = USR
592 \\ r1:0 = #0
593 \\ r3 = #1
594 \\ }
595 \\ {
596 \\ r28 = extractu(r28,#2,#22)
597 \\ r3 = asl(r3,#31)
598 \\ }
599 \\ {
600 \\ p0 = cmp.eq(r28,#2)
601 \\ if (p0.new) r1 = xor(r1,r3)
602 \\ jumpr r31
603 \\ }
604 \\ .falign
605 \\ .Ladd_ovf_unf:
606 \\ {
607 \\ r1:0 = convert_d2df(r13:12)
608 \\ p0 = cmp.eq(r13,#0)
609 \\ p0 = cmp.eq(r12,#0)
610 \\ if (p0.new) jump:nt .Ladd_zero
611 \\ }
612 \\ {
613 \\ r28 = extractu(r1,#11,#20)
614 \\ r1 += asl(r5,#20)
615 \\ }
616 \\ {
617 \\ r5 = add(r5,r28)
618 \\ r3:2 = combine(##0x00100000,#0)
619 \\ }
620 \\ {
621 \\ p0 = cmp.gt(r5,##1024 +1024 -2)
622 \\ if (p0.new) jump:nt .Ladd_ovf
623 \\ }
624 \\ {
625 \\ p0 = cmp.gt(r5,#0)
626 \\ if (p0.new) jumpr:t r31
627 \\ r28 = sub(#1,r5)
628 \\ }
629 \\ {
630 \\ r3:2 = insert(r1:0,#52,#0)
631 \\ r1:0 = r13:12
632 \\ }
633 \\ {
634 \\ r3:2 = lsr(r3:2,r28)
635 \\ }
636 \\ {
637 \\ r1:0 = insert(r3:2,#63,#0)
638 \\ jumpr r31
639 \\ }
640 \\ .falign
641 \\ .Ladd_ovf:
642 \\
643 \\ {
644 \\ r1:0 = r13:12
645 \\ r28 = USR
646 \\ r13:12 = combine(##0x7fefffff,#-1)
647 \\ }
648 \\ {
649 \\ r5 = extractu(r28,#2,#22)
650 \\ r28 = or(r28,#0x28)
651 \\ r9:8 = combine(##0x7ff00000,#0)
652 \\ }
653 \\ {
654 \\ USR = r28
655 \\ r5 ^= lsr(r1,#31)
656 \\ r28 = r5
657 \\ }
658 \\ {
659 \\ p0 = !cmp.eq(r28,#1)
660 \\ p0 = !cmp.eq(r5,#2)
661 \\ if (p0.new) r13:12 = r9:8
662 \\ }
663 \\ {
664 \\ r1:0 = insert(r13:12,#63,#0)
665 \\ }
666 \\ {
667 \\ p0 = dfcmp.eq(r1:0,r1:0)
668 \\ jumpr r31
669 \\ }
670 \\
671 \\ .Ladd_abnormal:
672 \\ {
673 \\ r13:12 = extractu(r1:0,#63,#0)
674 \\ r9:8 = extractu(r3:2,#63,#0)
675 \\ }
676 \\ {
677 \\ p3 = cmp.gtu(r13:12,r9:8)
678 \\ if (!p3.new) r1:0 = r3:2
679 \\ if (!p3.new) r3:2 = r1:0
680 \\ }
681 \\ {
682 \\
683 \\ p0 = dfclass(r1:0,#0x0f)
684 \\ if (!p0.new) jump:nt .Linvalid_nan_add
685 \\ if (!p3) r13:12 = r9:8
686 \\ if (!p3) r9:8 = r13:12
687 \\ }
688 \\ {
689 \\
690 \\
691 \\ p1 = dfclass(r1:0,#0x08)
692 \\ if (p1.new) jump:nt .Linf_add
693 \\ }
694 \\ {
695 \\ p2 = dfclass(r3:2,#0x01)
696 \\ if (p2.new) jump:nt .LB_zero
697 \\ r13:12 = #0
698 \\ }
699 \\
700 \\ {
701 \\ p0 = dfclass(r1:0,#4)
702 \\ if (p0.new) jump:nt .Ladd_two_subnormal
703 \\ r13:12 = combine(##0x20000000,#0)
704 \\ }
705 \\ {
706 \\ r4 = extractu(r1,#11,#20)
707 \\ r5 = #1
708 \\
709 \\ r9:8 = asl(r9:8,#11 -2)
710 \\ }
711 \\
712 \\
713 \\
714 \\ {
715 \\ r13:12 = insert(r1:0,#52,#11 -2)
716 \\ r15 = sub(r4,r5)
717 \\ r7:6 = combine(#62,#1)
718 \\ jump .Ladd_continue
719 \\ }
720 \\
721 \\ .Ladd_two_subnormal:
722 \\ {
723 \\ r13:12 = extractu(r1:0,#63,#0)
724 \\ r9:8 = extractu(r3:2,#63,#0)
725 \\ }
726 \\ {
727 \\ r13:12 = neg(r13:12)
728 \\ r9:8 = neg(r9:8)
729 \\ p0 = cmp.gt(r1,#-1)
730 \\ p1 = cmp.gt(r3,#-1)
731 \\ }
732 \\ {
733 \\ if (p0) r13:12 = r1:0
734 \\ if (p1) r9:8 = r3:2
735 \\ }
736 \\ {
737 \\ r13:12 = add(r13:12,r9:8)
738 \\ }
739 \\ {
740 \\ r9:8 = neg(r13:12)
741 \\ p0 = cmp.gt(r13,#-1)
742 \\ r3:2 = #0
743 \\ }
744 \\ {
745 \\ if (!p0) r1:0 = r9:8
746 \\ if (p0) r1:0 = r13:12
747 \\ r3 = ##0x80000000
748 \\ }
749 \\ {
750 \\ if (!p0) r1 = or(r1,r3)
751 \\ p0 = dfcmp.eq(r1:0,r3:2)
752 \\ if (p0.new) jump:nt .Lzero_plus_zero
753 \\ }
754 \\ {
755 \\ jumpr r31
756 \\ }
757 \\
758 \\ .Linvalid_nan_add:
759 \\ {
760 \\ r28 = convert_df2sf(r1:0)
761 \\ p0 = dfclass(r3:2,#0x0f)
762 \\ if (p0.new) r3:2 = r1:0
763 \\ }
764 \\ {
765 \\ r2 = convert_df2sf(r3:2)
766 \\ r1:0 = #-1
767 \\ jumpr r31
768 \\ }
769 \\ .falign
770 \\ .LB_zero:
771 \\ {
772 \\ p0 = dfcmp.eq(r13:12,r1:0)
773 \\ if (!p0.new) jumpr:t r31
774 \\ }
775 \\
776 \\
777 \\
778 \\
779 \\ .Lzero_plus_zero:
780 \\ {
781 \\ p0 = cmp.eq(r1:0,r3:2)
782 \\ if (p0.new) jumpr:t r31
783 \\ }
784 \\ {
785 \\ r28 = USR
786 \\ }
787 \\ {
788 \\ r28 = extractu(r28,#2,#22)
789 \\ r1:0 = #0
790 \\ }
791 \\ {
792 \\ p0 = cmp.eq(r28,#2)
793 \\ if (p0.new) r1 = ##0x80000000
794 \\ jumpr r31
795 \\ }
796 \\ .Linf_add:
797 \\
798 \\ {
799 \\ p0 = !cmp.eq(r1,r3)
800 \\ p0 = dfclass(r3:2,#8)
801 \\ if (!p0.new) jumpr:t r31
802 \\ }
803 \\ {
804 \\ r2 = ##0x7f800001
805 \\ }
806 \\ {
807 \\ r1:0 = convert_sf2df(r2)
808 \\ jumpr r31
809 \\ }
810 );
811}
812
813fn __hexagon_subdf3() align(32) callconv(.naked) noreturn {
814 asm volatile (
815 \\ {
816 \\ r3 = togglebit(r3,#31)
817 \\ jump ##__hexagon_adddf3
818 \\ }
819 );
820}
821
822fn __hexagon_divdf3() align(32) callconv(.naked) noreturn {
823 asm volatile (
824 \\ {
825 \\ p2 = dfclass(r1:0,#0x02)
826 \\ p2 = dfclass(r3:2,#0x02)
827 \\ r13:12 = combine(r3,r1)
828 \\ r28 = xor(r1,r3)
829 \\ }
830 \\ {
831 \\ if (!p2) jump .Ldiv_abnormal
832 \\ r7:6 = extractu(r3:2,#23,#52 -23)
833 \\ r8 = ##0x3f800001
834 \\ }
835 \\ {
836 \\ r9 = or(r8,r6)
837 \\ r13 = extractu(r13,#11,#52 -32)
838 \\ r12 = extractu(r12,#11,#52 -32)
839 \\ p3 = cmp.gt(r28,#-1)
840 \\ }
841 \\
842 \\
843 \\ .Ldenorm_continue:
844 \\ {
845 \\ r11,p0 = sfrecipa(r8,r9)
846 \\ r10 = and(r8,#-2)
847 \\ r28 = #1
848 \\ r12 = sub(r12,r13)
849 \\ }
850 \\
851 \\
852 \\ {
853 \\ r10 -= sfmpy(r11,r9):lib
854 \\ r1 = insert(r28,#11 +1,#52 -32)
855 \\ r13 = ##0x00800000 << 3
856 \\ }
857 \\ {
858 \\ r11 += sfmpy(r11,r10):lib
859 \\ r3 = insert(r28,#11 +1,#52 -32)
860 \\ r10 = and(r8,#-2)
861 \\ }
862 \\ {
863 \\ r10 -= sfmpy(r11,r9):lib
864 \\ r5 = #-0x3ff +1
865 \\ r4 = #0x3ff -1
866 \\ }
867 \\ {
868 \\ r11 += sfmpy(r11,r10):lib
869 \\ p1 = cmp.gt(r12,r5)
870 \\ p1 = !cmp.gt(r12,r4)
871 \\ }
872 \\ {
873 \\ r13 = insert(r11,#23,#3)
874 \\ r5:4 = #0
875 \\ r12 = add(r12,#-61)
876 \\ }
877 \\
878 \\
879 \\
880 \\
881 \\ {
882 \\ r13 = add(r13,#((-3) << 3))
883 \\ }
884 \\ { r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASL(r7:6, # ( 14 )); r1:0 -= asl(r15:14, # 32); }
885 \\ { r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASR(r7:6, # ( 1 )); r1:0 -= asl(r15:14, # 32); }
886 \\ { r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASR(r7:6, # ( 16 )); r1:0 -= asl(r15:14, # 32); }
887 \\ { r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASR(r7:6, # ( 31 )); r1:0 -= asl(r15:14, # 32); r7:6=# ( 0 ); }
888 \\
889 \\
890 \\
891 \\
892 \\
893 \\
894 \\
895 \\ {
896 \\
897 \\ r15:14 = sub(r1:0,r3:2)
898 \\ p0 = cmp.gtu(r3:2,r1:0)
899 \\
900 \\ if (!p0.new) r6 = #2
901 \\ }
902 \\ {
903 \\ r5:4 = add(r5:4,r7:6)
904 \\ if (!p0) r1:0 = r15:14
905 \\ r15:14 = #0
906 \\ }
907 \\ {
908 \\ p0 = cmp.eq(r1:0,r15:14)
909 \\ if (!p0.new) r4 = or(r4,r28)
910 \\ }
911 \\ {
912 \\ r7:6 = neg(r5:4)
913 \\ }
914 \\ {
915 \\ if (!p3) r5:4 = r7:6
916 \\ }
917 \\ {
918 \\ r1:0 = convert_d2df(r5:4)
919 \\ if (!p1) jump .Ldiv_ovf_unf
920 \\ }
921 \\ {
922 \\ r1 += asl(r12,#52 -32)
923 \\ jumpr r31
924 \\ }
925 \\
926 \\ .Ldiv_ovf_unf:
927 \\ {
928 \\ r1 += asl(r12,#52 -32)
929 \\ r13 = extractu(r1,#11,#52 -32)
930 \\ }
931 \\ {
932 \\ r7:6 = abs(r5:4)
933 \\ r12 = add(r12,r13)
934 \\ }
935 \\ {
936 \\ p0 = cmp.gt(r12,##0x3ff +0x3ff)
937 \\ if (p0.new) jump:nt .Ldiv_ovf
938 \\ }
939 \\ {
940 \\ p0 = cmp.gt(r12,#0)
941 \\ if (p0.new) jump:nt .Ldiv_possible_unf
942 \\ }
943 \\ {
944 \\ r13 = add(clb(r7:6),#-1)
945 \\ r12 = sub(#7,r12)
946 \\ r10 = USR
947 \\ r11 = #63
948 \\ }
949 \\ {
950 \\ r13 = min(r12,r11)
951 \\ r11 = or(r10,#0x030)
952 \\ r7:6 = asl(r7:6,r13)
953 \\ r12 = #0
954 \\ }
955 \\ {
956 \\ r15:14 = extractu(r7:6,r13:12)
957 \\ r7:6 = lsr(r7:6,r13)
958 \\ r3:2 = #1
959 \\ }
960 \\ {
961 \\ p0 = cmp.gtu(r3:2,r15:14)
962 \\ if (!p0.new) r6 = or(r2,r6)
963 \\ r7 = setbit(r7,#52 -32+4)
964 \\ }
965 \\ {
966 \\ r5:4 = neg(r7:6)
967 \\ p0 = bitsclr(r6,#(1<<4)-1)
968 \\ if (!p0.new) r10 = r11
969 \\ }
970 \\ {
971 \\ USR = r10
972 \\ if (p3) r5:4 = r7:6
973 \\ r10 = #-0x3ff -(52 +4)
974 \\ }
975 \\ {
976 \\ r1:0 = convert_d2df(r5:4)
977 \\ }
978 \\ {
979 \\ r1 += asl(r10,#52 -32)
980 \\ jumpr r31
981 \\ }
982 \\
983 \\
984 \\ .Ldiv_possible_unf:
985 \\
986 \\
987 \\ {
988 \\ r3:2 = extractu(r1:0,#63,#0)
989 \\ r15:14 = combine(##0x00100000,#0)
990 \\ r10 = #0x7FFF
991 \\ }
992 \\ {
993 \\ p0 = dfcmp.eq(r15:14,r3:2)
994 \\ p0 = bitsset(r7,r10)
995 \\ }
996 \\
997 \\
998 \\
999 \\
1000 \\
1001 \\
1002 \\ {
1003 \\ if (!p0) jumpr r31
1004 \\ r10 = USR
1005 \\ }
1006 \\
1007 \\ {
1008 \\ r10 = or(r10,#0x30)
1009 \\ }
1010 \\ {
1011 \\ USR = r10
1012 \\ }
1013 \\ {
1014 \\ p0 = dfcmp.eq(r1:0,r1:0)
1015 \\ jumpr r31
1016 \\ }
1017 \\
1018 \\ .Ldiv_ovf:
1019 \\
1020 \\
1021 \\
1022 \\ {
1023 \\ r10 = USR
1024 \\ r3:2 = combine(##0x7fefffff,#-1)
1025 \\ r1 = mux(p3,#0,#-1)
1026 \\ }
1027 \\ {
1028 \\ r7:6 = combine(##0x7ff00000,#0)
1029 \\ r5 = extractu(r10,#2,#22)
1030 \\ r10 = or(r10,#0x28)
1031 \\ }
1032 \\ {
1033 \\ USR = r10
1034 \\ r5 ^= lsr(r1,#31)
1035 \\ r4 = r5
1036 \\ }
1037 \\ {
1038 \\ p0 = !cmp.eq(r4,#1)
1039 \\ p0 = !cmp.eq(r5,#2)
1040 \\ if (p0.new) r3:2 = r7:6
1041 \\ p0 = dfcmp.eq(r3:2,r3:2)
1042 \\ }
1043 \\ {
1044 \\ r1:0 = insert(r3:2,#63,#0)
1045 \\ jumpr r31
1046 \\ }
1047 \\
1048 \\
1049 \\
1050 \\
1051 \\
1052 \\
1053 \\
1054 \\ .Ldiv_abnormal:
1055 \\ {
1056 \\ p0 = dfclass(r1:0,#0x0F)
1057 \\ p0 = dfclass(r3:2,#0x0F)
1058 \\ p3 = cmp.gt(r28,#-1)
1059 \\ }
1060 \\ {
1061 \\ p1 = dfclass(r1:0,#0x08)
1062 \\ p1 = dfclass(r3:2,#0x08)
1063 \\ }
1064 \\ {
1065 \\ p2 = dfclass(r1:0,#0x01)
1066 \\ p2 = dfclass(r3:2,#0x01)
1067 \\ }
1068 \\ {
1069 \\ if (!p0) jump .Ldiv_nan
1070 \\ if (p1) jump .Ldiv_invalid
1071 \\ }
1072 \\ {
1073 \\ if (p2) jump .Ldiv_invalid
1074 \\ }
1075 \\ {
1076 \\ p2 = dfclass(r1:0,#(0x0F ^ 0x01))
1077 \\ p2 = dfclass(r3:2,#(0x0F ^ 0x08))
1078 \\ }
1079 \\ {
1080 \\ p1 = dfclass(r1:0,#(0x0F ^ 0x08))
1081 \\ p1 = dfclass(r3:2,#(0x0F ^ 0x01))
1082 \\ }
1083 \\ {
1084 \\ if (!p2) jump .Ldiv_zero_result
1085 \\ if (!p1) jump .Ldiv_inf_result
1086 \\ }
1087 \\
1088 \\
1089 \\
1090 \\
1091 \\
1092 \\ {
1093 \\ p0 = dfclass(r1:0,#0x02)
1094 \\ p1 = dfclass(r3:2,#0x02)
1095 \\ r10 = ##0x00100000
1096 \\ }
1097 \\ {
1098 \\ r13:12 = combine(r3,r1)
1099 \\ r1 = insert(r10,#11 +1,#52 -32)
1100 \\ r3 = insert(r10,#11 +1,#52 -32)
1101 \\ }
1102 \\ {
1103 \\ if (p0) r1 = or(r1,r10)
1104 \\ if (p1) r3 = or(r3,r10)
1105 \\ }
1106 \\ {
1107 \\ r5 = add(clb(r1:0),#-11)
1108 \\ r4 = add(clb(r3:2),#-11)
1109 \\ r10 = #1
1110 \\ }
1111 \\ {
1112 \\ r12 = extractu(r12,#11,#52 -32)
1113 \\ r13 = extractu(r13,#11,#52 -32)
1114 \\ }
1115 \\ {
1116 \\ r1:0 = asl(r1:0,r5)
1117 \\ r3:2 = asl(r3:2,r4)
1118 \\ if (!p0) r12 = sub(r10,r5)
1119 \\ if (!p1) r13 = sub(r10,r4)
1120 \\ }
1121 \\ {
1122 \\ r7:6 = extractu(r3:2,#23,#52 -23)
1123 \\ }
1124 \\ {
1125 \\ r9 = or(r8,r6)
1126 \\ jump .Ldenorm_continue
1127 \\ }
1128 \\
1129 \\ .Ldiv_zero_result:
1130 \\ {
1131 \\ r1 = xor(r1,r3)
1132 \\ r3:2 = #0
1133 \\ }
1134 \\ {
1135 \\ r1:0 = insert(r3:2,#63,#0)
1136 \\ jumpr r31
1137 \\ }
1138 \\ .Ldiv_inf_result:
1139 \\ {
1140 \\ p2 = dfclass(r3:2,#0x01)
1141 \\ p2 = dfclass(r1:0,#(0x0F ^ 0x08))
1142 \\ }
1143 \\ {
1144 \\ r10 = USR
1145 \\ if (!p2) jump 1f
1146 \\ r1 = xor(r1,r3)
1147 \\ }
1148 \\ {
1149 \\ r10 = or(r10,#0x04)
1150 \\ }
1151 \\ {
1152 \\ USR = r10
1153 \\ }
1154 \\ 1:
1155 \\ {
1156 \\ r3:2 = combine(##0x7ff00000,#0)
1157 \\ p0 = dfcmp.uo(r3:2,r3:2)
1158 \\ }
1159 \\ {
1160 \\ r1:0 = insert(r3:2,#63,#0)
1161 \\ jumpr r31
1162 \\ }
1163 \\ .Ldiv_nan:
1164 \\ {
1165 \\ p0 = dfclass(r1:0,#0x10)
1166 \\ p1 = dfclass(r3:2,#0x10)
1167 \\ if (!p0.new) r1:0 = r3:2
1168 \\ if (!p1.new) r3:2 = r1:0
1169 \\ }
1170 \\ {
1171 \\ r5 = convert_df2sf(r1:0)
1172 \\ r4 = convert_df2sf(r3:2)
1173 \\ }
1174 \\ {
1175 \\ r1:0 = #-1
1176 \\ jumpr r31
1177 \\ }
1178 \\
1179 \\ .Ldiv_invalid:
1180 \\ {
1181 \\ r10 = ##0x7f800001
1182 \\ }
1183 \\ {
1184 \\ r1:0 = convert_sf2df(r10)
1185 \\ jumpr r31
1186 \\ }
1187 );
1188}
1189
1190fn __hexagon_muldf3() align(32) callconv(.naked) noreturn {
1191 asm volatile (
1192 \\ {
1193 \\ p0 = dfclass(r1:0,#2)
1194 \\ p0 = dfclass(r3:2,#2)
1195 \\ r13:12 = combine(##0x40000000,#0)
1196 \\ }
1197 \\ {
1198 \\ r13:12 = insert(r1:0,#52,#11 -1)
1199 \\ r5:4 = asl(r3:2,#11 -1)
1200 \\ r28 = #-1024
1201 \\ r9:8 = #1
1202 \\ }
1203 \\ {
1204 \\ r7:6 = mpyu(r4,r13)
1205 \\ r5:4 = insert(r9:8,#2,#62)
1206 \\ }
1207 \\
1208 \\
1209 \\
1210 \\
1211 \\ {
1212 \\ r15:14 = mpyu(r12,r4)
1213 \\ r7:6 += mpyu(r12,r5)
1214 \\ }
1215 \\ {
1216 \\ r7:6 += lsr(r15:14,#32)
1217 \\ r11:10 = mpyu(r13,r5)
1218 \\ r5:4 = combine(##1024 +1024 -4,#0)
1219 \\ }
1220 \\ {
1221 \\ r11:10 += lsr(r7:6,#32)
1222 \\ if (!p0) jump .Lmul_abnormal
1223 \\ p1 = cmp.eq(r14,#0)
1224 \\ p1 = cmp.eq(r6,#0)
1225 \\ }
1226 \\ {
1227 \\ if (!p1) r10 = or(r10,r8)
1228 \\ r6 = extractu(r1,#11,#20)
1229 \\ r7 = extractu(r3,#11,#20)
1230 \\ }
1231 \\ {
1232 \\ r15:14 = neg(r11:10)
1233 \\ r6 += add(r28,r7)
1234 \\ r28 = xor(r1,r3)
1235 \\ }
1236 \\ {
1237 \\ if (!p2.new) r11:10 = r15:14
1238 \\ p2 = cmp.gt(r28,#-1)
1239 \\ p0 = !cmp.gt(r6,r5)
1240 \\ p0 = cmp.gt(r6,r4)
1241 \\ if (!p0.new) jump:nt .Lmul_ovf_unf
1242 \\ }
1243 \\ {
1244 \\ r1:0 = convert_d2df(r11:10)
1245 \\ r6 = add(r6,#-1024 -58)
1246 \\ }
1247 \\ {
1248 \\ r1 += asl(r6,#20)
1249 \\ jumpr r31
1250 \\ }
1251 \\
1252 \\ .falign
1253 \\ .Lmul_possible_unf:
1254 \\ {
1255 \\ p0 = cmp.eq(r0,#0)
1256 \\ p0 = bitsclr(r1,r4)
1257 \\ if (!p0.new) jumpr:t r31
1258 \\ r5 = #0x7fff
1259 \\ }
1260 \\ {
1261 \\ p0 = bitsset(r13,r5)
1262 \\ r4 = USR
1263 \\ r5 = #0x030
1264 \\ }
1265 \\ {
1266 \\ if (p0) r4 = or(r4,r5)
1267 \\ }
1268 \\ {
1269 \\ USR = r4
1270 \\ }
1271 \\ {
1272 \\ p0 = dfcmp.eq(r1:0,r1:0)
1273 \\ jumpr r31
1274 \\ }
1275 \\ .falign
1276 \\ .Lmul_ovf_unf:
1277 \\ {
1278 \\ r1:0 = convert_d2df(r11:10)
1279 \\ r13:12 = abs(r11:10)
1280 \\ r7 = add(r6,#-1024 -58)
1281 \\ }
1282 \\ {
1283 \\ r1 += asl(r7,#20)
1284 \\ r7 = extractu(r1,#11,#20)
1285 \\ r4 = ##0x7FEFFFFF
1286 \\ }
1287 \\ {
1288 \\ r7 += add(r6,##-1024 -58)
1289 \\
1290 \\ r5 = #0
1291 \\ }
1292 \\ {
1293 \\ p0 = cmp.gt(r7,##1024 +1024 -2)
1294 \\ if (p0.new) jump:nt .Lmul_ovf
1295 \\ }
1296 \\ {
1297 \\ p0 = cmp.gt(r7,#0)
1298 \\ if (p0.new) jump:nt .Lmul_possible_unf
1299 \\ r5 = sub(r6,r5)
1300 \\ r28 = #63
1301 \\ }
1302 \\ {
1303 \\ r4 = #0
1304 \\ r5 = sub(#5,r5)
1305 \\ }
1306 \\ {
1307 \\ p3 = cmp.gt(r11,#-1)
1308 \\ r5 = min(r5,r28)
1309 \\ r11:10 = r13:12
1310 \\ }
1311 \\ {
1312 \\ r28 = USR
1313 \\ r15:14 = extractu(r11:10,r5:4)
1314 \\ }
1315 \\ {
1316 \\ r11:10 = asr(r11:10,r5)
1317 \\ r4 = #0x0030
1318 \\ r1 = insert(r9,#11,#20)
1319 \\ }
1320 \\ {
1321 \\ p0 = cmp.gtu(r9:8,r15:14)
1322 \\ if (!p0.new) r10 = or(r10,r8)
1323 \\ r11 = setbit(r11,#20 +3)
1324 \\ }
1325 \\ {
1326 \\ r15:14 = neg(r11:10)
1327 \\ p1 = bitsclr(r10,#0x7)
1328 \\ if (!p1.new) r28 = or(r4,r28)
1329 \\ }
1330 \\ {
1331 \\ if (!p3) r11:10 = r15:14
1332 \\ USR = r28
1333 \\ }
1334 \\ {
1335 \\ r1:0 = convert_d2df(r11:10)
1336 \\ p0 = dfcmp.eq(r1:0,r1:0)
1337 \\ }
1338 \\ {
1339 \\ r1 = insert(r9,#11 -1,#20 +1)
1340 \\ jumpr r31
1341 \\ }
1342 \\ .falign
1343 \\ .Lmul_ovf:
1344 \\
1345 \\ {
1346 \\ r28 = USR
1347 \\ r13:12 = combine(##0x7fefffff,#-1)
1348 \\ r1:0 = r11:10
1349 \\ }
1350 \\ {
1351 \\ r14 = extractu(r28,#2,#22)
1352 \\ r28 = or(r28,#0x28)
1353 \\ r5:4 = combine(##0x7ff00000,#0)
1354 \\ }
1355 \\ {
1356 \\ USR = r28
1357 \\ r14 ^= lsr(r1,#31)
1358 \\ r28 = r14
1359 \\ }
1360 \\ {
1361 \\ p0 = !cmp.eq(r28,#1)
1362 \\ p0 = !cmp.eq(r14,#2)
1363 \\ if (p0.new) r13:12 = r5:4
1364 \\ p0 = dfcmp.eq(r1:0,r1:0)
1365 \\ }
1366 \\ {
1367 \\ r1:0 = insert(r13:12,#63,#0)
1368 \\ jumpr r31
1369 \\ }
1370 \\
1371 \\ .Lmul_abnormal:
1372 \\ {
1373 \\ r13:12 = extractu(r1:0,#63,#0)
1374 \\ r5:4 = extractu(r3:2,#63,#0)
1375 \\ }
1376 \\ {
1377 \\ p3 = cmp.gtu(r13:12,r5:4)
1378 \\ if (!p3.new) r1:0 = r3:2
1379 \\ if (!p3.new) r3:2 = r1:0
1380 \\ }
1381 \\ {
1382 \\
1383 \\ p0 = dfclass(r1:0,#0x0f)
1384 \\ if (!p0.new) jump:nt .Linvalid_nan
1385 \\ if (!p3) r13:12 = r5:4
1386 \\ if (!p3) r5:4 = r13:12
1387 \\ }
1388 \\ {
1389 \\
1390 \\ p1 = dfclass(r1:0,#0x08)
1391 \\ p1 = dfclass(r3:2,#0x0e)
1392 \\ }
1393 \\ {
1394 \\
1395 \\
1396 \\ p0 = dfclass(r1:0,#0x08)
1397 \\ p0 = dfclass(r3:2,#0x01)
1398 \\ }
1399 \\ {
1400 \\ if (p1) jump .Ltrue_inf
1401 \\ p2 = dfclass(r3:2,#0x01)
1402 \\ }
1403 \\ {
1404 \\ if (p0) jump .Linvalid_zeroinf
1405 \\ if (p2) jump .Ltrue_zero
1406 \\ r28 = ##0x7c000000
1407 \\ }
1408 \\
1409 \\
1410 \\
1411 \\
1412 \\
1413 \\ {
1414 \\ p0 = bitsclr(r1,r28)
1415 \\ if (p0.new) jump:nt .Lmul_tiny
1416 \\ }
1417 \\ {
1418 \\ r28 = cl0(r5:4)
1419 \\ }
1420 \\ {
1421 \\ r28 = add(r28,#-11)
1422 \\ }
1423 \\ {
1424 \\ r5:4 = asl(r5:4,r28)
1425 \\ }
1426 \\ {
1427 \\ r3:2 = insert(r5:4,#63,#0)
1428 \\ r1 -= asl(r28,#20)
1429 \\ }
1430 \\ jump __hexagon_muldf3
1431 \\ .Lmul_tiny:
1432 \\ {
1433 \\ r28 = USR
1434 \\ r1:0 = xor(r1:0,r3:2)
1435 \\ }
1436 \\ {
1437 \\ r28 = or(r28,#0x30)
1438 \\ r1:0 = insert(r9:8,#63,#0)
1439 \\ r5 = extractu(r28,#2,#22)
1440 \\ }
1441 \\ {
1442 \\ USR = r28
1443 \\ p0 = cmp.gt(r5,#1)
1444 \\ if (!p0.new) r0 = #0
1445 \\ r5 ^= lsr(r1,#31)
1446 \\ }
1447 \\ {
1448 \\ p0 = cmp.eq(r5,#3)
1449 \\ if (!p0.new) r0 = #0
1450 \\ jumpr r31
1451 \\ }
1452 \\ .Linvalid_zeroinf:
1453 \\ {
1454 \\ r28 = USR
1455 \\ }
1456 \\ {
1457 \\ r1:0 = #-1
1458 \\ r28 = or(r28,#2)
1459 \\ }
1460 \\ {
1461 \\ USR = r28
1462 \\ }
1463 \\ {
1464 \\ p0 = dfcmp.uo(r1:0,r1:0)
1465 \\ jumpr r31
1466 \\ }
1467 \\ .Linvalid_nan:
1468 \\ {
1469 \\ p0 = dfclass(r3:2,#0x0f)
1470 \\ r28 = convert_df2sf(r1:0)
1471 \\ if (p0.new) r3:2 = r1:0
1472 \\ }
1473 \\ {
1474 \\ r2 = convert_df2sf(r3:2)
1475 \\ r1:0 = #-1
1476 \\ jumpr r31
1477 \\ }
1478 \\ .falign
1479 \\ .Ltrue_zero:
1480 \\ {
1481 \\ r1:0 = r3:2
1482 \\ r3:2 = r1:0
1483 \\ }
1484 \\ .Ltrue_inf:
1485 \\ {
1486 \\ r3 = extract(r3,#1,#31)
1487 \\ }
1488 \\ {
1489 \\ r1 ^= asl(r3,#31)
1490 \\ jumpr r31
1491 \\ }
1492 );
1493}
1494
1495fn __hexagon_sqrtdf2() align(32) callconv(.naked) noreturn {
1496 asm volatile (
1497 \\ {
1498 \\ r15:14 = extractu(r1:0,#23 +1,#52 -23)
1499 \\ r28 = extractu(r1,#11,#52 -32)
1500 \\ r5:4 = combine(##0x3f000004,#1)
1501 \\ }
1502 \\ {
1503 \\ p2 = dfclass(r1:0,#0x02)
1504 \\ p2 = cmp.gt(r1,#-1)
1505 \\ if (!p2.new) jump:nt .Lsqrt_abnormal
1506 \\ r9 = or(r5,r14)
1507 \\ }
1508 \\
1509 \\ .Ldenormal_restart:
1510 \\ {
1511 \\ r11:10 = r1:0
1512 \\ r7,p0 = sfinvsqrta(r9)
1513 \\ r5 = and(r5,#-16)
1514 \\ r3:2 = #0
1515 \\ }
1516 \\ {
1517 \\ r3 += sfmpy(r7,r9):lib
1518 \\ r2 += sfmpy(r7,r5):lib
1519 \\ r6 = r5
1520 \\
1521 \\
1522 \\ r9 = and(r28,#1)
1523 \\ }
1524 \\ {
1525 \\ r6 -= sfmpy(r3,r2):lib
1526 \\ r11 = insert(r4,#11 +1,#52 -32)
1527 \\ p1 = cmp.gtu(r9,#0)
1528 \\ }
1529 \\ {
1530 \\ r3 += sfmpy(r3,r6):lib
1531 \\ r2 += sfmpy(r2,r6):lib
1532 \\ r6 = r5
1533 \\ r9 = mux(p1,#8,#9)
1534 \\ }
1535 \\ {
1536 \\ r6 -= sfmpy(r3,r2):lib
1537 \\ r11:10 = asl(r11:10,r9)
1538 \\ r9 = mux(p1,#3,#2)
1539 \\ }
1540 \\ {
1541 \\ r2 += sfmpy(r2,r6):lib
1542 \\
1543 \\ r15:14 = asl(r11:10,r9)
1544 \\ }
1545 \\ {
1546 \\ r2 = and(r2,##0x007fffff)
1547 \\ }
1548 \\ {
1549 \\ r2 = add(r2,##0x00800000 - 3)
1550 \\ r9 = mux(p1,#7,#8)
1551 \\ }
1552 \\ {
1553 \\ r8 = asl(r2,r9)
1554 \\ r9 = mux(p1,#15-(1+1),#15-(1+0))
1555 \\ }
1556 \\ {
1557 \\ r13:12 = mpyu(r8,r15)
1558 \\ }
1559 \\ {
1560 \\ r1:0 = asl(r11:10,#15)
1561 \\ r15:14 = mpyu(r13,r13)
1562 \\ p1 = cmp.eq(r0,r0)
1563 \\ }
1564 \\ {
1565 \\ r1:0 -= asl(r15:14,#15)
1566 \\ r15:14 = mpyu(r13,r12)
1567 \\ p2 = cmp.eq(r0,r0)
1568 \\ }
1569 \\ {
1570 \\ r1:0 -= lsr(r15:14,#16)
1571 \\ p3 = cmp.eq(r0,r0)
1572 \\ }
1573 \\ {
1574 \\ r1:0 = mpyu(r1,r8)
1575 \\ }
1576 \\ {
1577 \\ r13:12 += lsr(r1:0,r9)
1578 \\ r9 = add(r9,#16)
1579 \\ r1:0 = asl(r11:10,#31)
1580 \\ }
1581 \\
1582 \\ {
1583 \\ r15:14 = mpyu(r13,r13)
1584 \\ r1:0 -= mpyu(r13,r12)
1585 \\ }
1586 \\ {
1587 \\ r1:0 -= asl(r15:14,#31)
1588 \\ r15:14 = mpyu(r12,r12)
1589 \\ }
1590 \\ {
1591 \\ r1:0 -= lsr(r15:14,#33)
1592 \\ }
1593 \\ {
1594 \\ r1:0 = mpyu(r1,r8)
1595 \\ }
1596 \\ {
1597 \\ r13:12 += lsr(r1:0,r9)
1598 \\ r9 = add(r9,#16)
1599 \\ r1:0 = asl(r11:10,#47)
1600 \\ }
1601 \\
1602 \\ {
1603 \\ r15:14 = mpyu(r13,r13)
1604 \\ }
1605 \\ {
1606 \\ r1:0 -= asl(r15:14,#47)
1607 \\ r15:14 = mpyu(r13,r12)
1608 \\ }
1609 \\ {
1610 \\ r1:0 -= asl(r15:14,#16)
1611 \\ r15:14 = mpyu(r12,r12)
1612 \\ }
1613 \\ {
1614 \\ r1:0 -= lsr(r15:14,#17)
1615 \\ }
1616 \\ {
1617 \\ r1:0 = mpyu(r1,r8)
1618 \\ }
1619 \\ {
1620 \\ r13:12 += lsr(r1:0,r9)
1621 \\ }
1622 \\ {
1623 \\ r3:2 = mpyu(r13,r12)
1624 \\ r5:4 = mpyu(r12,r12)
1625 \\ r15:14 = #0
1626 \\ r1:0 = #0
1627 \\ }
1628 \\ {
1629 \\ r3:2 += lsr(r5:4,#33)
1630 \\ r5:4 += asl(r3:2,#33)
1631 \\ p1 = cmp.eq(r0,r0)
1632 \\ }
1633 \\ {
1634 \\ r7:6 = mpyu(r13,r13)
1635 \\ r1:0 = sub(r1:0,r5:4,p1):carry
1636 \\ r9:8 = #1
1637 \\ }
1638 \\ {
1639 \\ r7:6 += lsr(r3:2,#31)
1640 \\ r9:8 += asl(r13:12,#1)
1641 \\ }
1642 \\
1643 \\
1644 \\
1645 \\
1646 \\
1647 \\ {
1648 \\ r15:14 = sub(r11:10,r7:6,p1):carry
1649 \\ r5:4 = sub(r1:0,r9:8,p2):carry
1650 \\
1651 \\
1652 \\
1653 \\
1654 \\ r7:6 = #1
1655 \\ r11:10 = #0
1656 \\ }
1657 \\ {
1658 \\ r3:2 = sub(r15:14,r11:10,p2):carry
1659 \\ r7:6 = add(r13:12,r7:6)
1660 \\ r28 = add(r28,#-0x3ff)
1661 \\ }
1662 \\ {
1663 \\
1664 \\ if (p2) r13:12 = r7:6
1665 \\ if (p2) r1:0 = r5:4
1666 \\ if (p2) r15:14 = r3:2
1667 \\ }
1668 \\ {
1669 \\ r5:4 = sub(r1:0,r9:8,p3):carry
1670 \\ r7:6 = #1
1671 \\ r28 = asr(r28,#1)
1672 \\ }
1673 \\ {
1674 \\ r3:2 = sub(r15:14,r11:10,p3):carry
1675 \\ r7:6 = add(r13:12,r7:6)
1676 \\ }
1677 \\ {
1678 \\ if (p3) r13:12 = r7:6
1679 \\ if (p3) r1:0 = r5:4
1680 \\
1681 \\
1682 \\
1683 \\
1684 \\
1685 \\ r2 = #1
1686 \\ }
1687 \\ {
1688 \\ p0 = cmp.eq(r1:0,r11:10)
1689 \\ if (!p0.new) r12 = or(r12,r2)
1690 \\ r3 = cl0(r13:12)
1691 \\ r28 = add(r28,#-63)
1692 \\ }
1693 \\
1694 \\
1695 \\
1696 \\ {
1697 \\ r1:0 = convert_ud2df(r13:12)
1698 \\ r28 = add(r28,r3)
1699 \\ }
1700 \\ {
1701 \\ r1 += asl(r28,#52 -32)
1702 \\ jumpr r31
1703 \\ }
1704 \\ .Lsqrt_abnormal:
1705 \\ {
1706 \\ p0 = dfclass(r1:0,#0x01)
1707 \\ if (p0.new) jumpr:t r31
1708 \\ }
1709 \\ {
1710 \\ p0 = dfclass(r1:0,#0x10)
1711 \\ if (p0.new) jump:nt .Lsqrt_nan
1712 \\ }
1713 \\ {
1714 \\ p0 = cmp.gt(r1,#-1)
1715 \\ if (!p0.new) jump:nt .Lsqrt_invalid_neg
1716 \\ if (!p0.new) r28 = ##0x7F800001
1717 \\ }
1718 \\ {
1719 \\ p0 = dfclass(r1:0,#0x08)
1720 \\ if (p0.new) jumpr:nt r31
1721 \\ }
1722 \\
1723 \\
1724 \\ {
1725 \\ r1:0 = extractu(r1:0,#52,#0)
1726 \\ }
1727 \\ {
1728 \\ r28 = add(clb(r1:0),#-11)
1729 \\ }
1730 \\ {
1731 \\ r1:0 = asl(r1:0,r28)
1732 \\ r28 = sub(#1,r28)
1733 \\ }
1734 \\ {
1735 \\ r1 = insert(r28,#1,#52 -32)
1736 \\ }
1737 \\ {
1738 \\ r3:2 = extractu(r1:0,#23 +1,#52 -23)
1739 \\ r5 = ##0x3f000004
1740 \\ }
1741 \\ {
1742 \\ r9 = or(r5,r2)
1743 \\ r5 = and(r5,#-16)
1744 \\ jump .Ldenormal_restart
1745 \\ }
1746 \\ .Lsqrt_nan:
1747 \\ {
1748 \\ r28 = convert_df2sf(r1:0)
1749 \\ r1:0 = #-1
1750 \\ jumpr r31
1751 \\ }
1752 \\ .Lsqrt_invalid_neg:
1753 \\ {
1754 \\ r1:0 = convert_sf2df(r28)
1755 \\ jumpr r31
1756 \\ }
1757 );
1758}
1759
1760comptime {
1761 if (builtin.cpu.arch == .hexagon) {
1762 @export(&__hexagon_adddf3, .{ .name = "__hexagon_adddf3", .linkage = common.linkage, .visibility = common.visibility });
1763 @export(&__hexagon_adddf3, .{ .name = "__hexagon_fast_adddf3", .linkage = common.linkage, .visibility = common.visibility });
1764 @export(&__hexagon_subdf3, .{ .name = "__hexagon_subdf3", .linkage = common.linkage, .visibility = common.visibility });
1765 @export(&__hexagon_subdf3, .{ .name = "__hexagon_fast_subdf3", .linkage = common.linkage, .visibility = common.visibility });
1766 @export(&__hexagon_divdf3, .{ .name = "__hexagon_divdf3", .linkage = common.linkage, .visibility = common.visibility });
1767 @export(&__hexagon_divdf3, .{ .name = "__hexagon_fast_divdf3", .linkage = common.linkage, .visibility = common.visibility });
1768 @export(&__hexagon_muldf3, .{ .name = "__hexagon_muldf3", .linkage = common.linkage, .visibility = common.visibility });
1769 @export(&__hexagon_muldf3, .{ .name = "__hexagon_fast_muldf3", .linkage = common.linkage, .visibility = common.visibility });
1770 @export(&__hexagon_sqrtdf2, .{ .name = "__hexagon_sqrtdf2", .linkage = common.linkage, .visibility = common.visibility });
1771 @export(&__hexagon_sqrtdf2, .{ .name = "__hexagon_fast2_sqrtdf2", .linkage = common.linkage, .visibility = common.visibility });
1772 @export(&__hexagon_sqrtdf2, .{ .name = "__hexagon_sqrt", .linkage = common.linkage, .visibility = common.visibility });
1773 @export(&__hexagon_divsf3, .{ .name = "__hexagon_divsf3", .linkage = common.linkage, .visibility = common.visibility });
1774 @export(&__hexagon_divsf3, .{ .name = "__hexagon_fast_divsf3", .linkage = common.linkage, .visibility = common.visibility });
1775 @export(&__hexagon_divsi3, .{ .name = "__hexagon_divsi3", .linkage = common.linkage, .visibility = common.visibility });
1776 @export(&__hexagon_umodsi3, .{ .name = "__hexagon_umodsi3", .linkage = common.linkage, .visibility = common.visibility });
1777 @export(&__hexagon_sqrtf, .{ .name = "__hexagon_sqrtf", .linkage = common.linkage, .visibility = common.visibility });
1778 @export(&__hexagon_sqrtf, .{ .name = "__hexagon_fast2_sqrtf", .linkage = common.linkage, .visibility = common.visibility });
1779 @export(&__hexagon_moddi3, .{ .name = "__hexagon_moddi3", .linkage = common.linkage, .visibility = common.visibility });
1780 @export(&__hexagon_divdi3, .{ .name = "__hexagon_divdi3", .linkage = common.linkage, .visibility = common.visibility });
1781 @export(&__hexagon_udivdi3, .{ .name = "__hexagon_udivdi3", .linkage = common.linkage, .visibility = common.visibility });
1782 @export(&__hexagon_umoddi3, .{ .name = "__hexagon_umoddi3", .linkage = common.linkage, .visibility = common.visibility });
1783 @export(&__hexagon_modsi3, .{ .name = "__hexagon_modsi3", .linkage = common.linkage, .visibility = common.visibility });
1784 @export(&__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes, .{ .name = "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes", .linkage = common.linkage, .visibility = common.visibility });
1785 @export(&__hexagon_udivsi3, .{ .name = "__hexagon_udivsi3", .linkage = common.linkage, .visibility = common.visibility });
1786 }
1787}