xref: /netbsd-src/external/gpl3/gcc.old/dist/libgcc/config/avr/libf7/libf7-asm.sx (revision 4ac76180e904e771b9d522c7e57296d371f06499)
1;; Copyright (C) 2019-2020 Free Software Foundation, Inc.
2;;
3;; This file is part of LIBF7, which is part of GCC.
4;;
5;; GCC is free software; you can redistribute it and/or modify it under
6;; the terms of the GNU General Public License as published by the Free
7;; Software Foundation; either version 3, or (at your option) any later
8;; version.
9;;
10;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
12;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13;; for more details.
14;;
15;; Under Section 7 of GPL version 3, you are granted additional
16;; permissions described in the GCC Runtime Library Exception, version
17;; 3.1, as published by the Free Software Foundation.
18;;
19;; You should have received a copy of the GNU General Public License and
20;; a copy of the GCC Runtime Library Exception along with this program;
21;; see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22;; <http://www.gnu.org/licenses/>.  */
23
24#ifndef __AVR_TINY__
25
26#define ASM_DEFS_HAVE_DEFUN
27
28#include "asm-defs.h"
29#include "libf7.h"
30
31#define ZERO __zero_reg__
32#define TMP  __tmp_reg__
33
34#define F7(name)   F7_(name##_asm)
35
36.macro F7call name
37    .global F7(\name\())
38    XCALL   F7(\name\())
39.endm
40
41.macro F7jmp name
42    .global F7(\name\())
43    XJMP    F7(\name\())
44.endm
45
46;; Just for visibility in disassembly.
47.macro LLL name
48    .global LLL.\name
49    LLL.\name:
50    nop
51.endm
52
53.macro DEFUN name
54    .section .text.libf7.asm.\name, "ax", @progbits
55    .global F7(\name\())
56    .func F7(\name\())
57    F7(\name\()) :
58.endm
59
60.macro ENDF name
61    .size F7(\name\()), . - F7(\name\())
62    .endfunc
63.endm
64
65.macro LABEL name
66    .global F7(\name\())
67    F7(\name\()) :
68.endm
69
70.macro _DEFUN name
71    .section .text.libf7.asm.\name, "ax", @progbits
72    .weak \name
73    .type \name, @function
74    \name :
75.endm
76
77.macro _ENDF name
78    .size \name, . - \name
79.endm
80
81.macro _LABEL name
82    .weak \name
83    .type \name, @function
84    \name :
85.endm
86
87#define F7_NAME(X)   F7_(X)
88
89;; Make a weak alias.
90.macro  ALIAS  sym
91    .weak \sym
92    .type \sym, @function
93    \sym:
94.endm
95
96;; Make a weak alias if double is 64 bits wide.
97.macro  DALIAS  sym
98#if defined (WITH_LIBF7_MATH_SYMBOLS) && __SIZEOF_DOUBLE__ == 8
99ALIAS \sym
100#endif
101.endm
102
103;; Make a weak alias if long double is 64 bits wide.
104.macro  LALIAS  sym
105#if defined (WITH_LIBF7_MATH_SYMBOLS) && __SIZEOF_LONG_DOUBLE__ == 8
106ALIAS \sym
107#endif
108.endm
109
110#define     Off 1
111#define     Expo (Off + F7_MANT_BYTES)
112
113#ifdef F7MOD_classify_
114;;  r24 = classify (*Z)
115;;  NaN  ->  F7_FLAG_nan
116;;  INF  ->  F7_FLAG_inf [ | F7_FLAG_sign ]
117;;  ==0  ->  F7_FLAG_zero
118;;  ...  ->  0 [ | F7_FLAG_sign ]
119
120;; Clobbers:  None (no TMP, no T).
121DEFUN classify
122
123    ld      r24,    Z
124    lsr     r24
125    brne .Lnan_or_inf
126
127    ldd     r24,    Z+6+Off
128    tst     r24
129    brpl 0f
130    sbc     r24,    r24
131    andi    r24,    F7_FLAG_sign
132    ret
133
1340:  ldi     r24,    F7_FLAG_zero
135    ret
136
137.Lnan_or_inf:
138    rol     r24
139    ret
140
141ENDF classify
142#endif /* F7MOD_classify_ */
143
144#ifdef F7MOD_clr_
145DEFUN clr
146    std     Z+0,     ZERO
147    std     Z+0+Off, ZERO
148    std     Z+1+Off, ZERO
149    std     Z+2+Off, ZERO
150    std     Z+3+Off, ZERO
151    std     Z+4+Off, ZERO
152    std     Z+5+Off, ZERO
153    std     Z+6+Off, ZERO
154    std     Z+0+Expo, ZERO
155    std     Z+1+Expo, ZERO
156    ret
157ENDF clr
158
159#endif /* F7MOD_clr_ */
160
161#ifdef F7MOD_clz_
162;; The libcc CLZ implementations like __clzsi2 aka. __builtin_clzl are
163;; not very well suited for out purpose, so implement our own.
164
165#define ZBITS   r26
166.macro  .test.byte  reg
167    or      ZERO,   \reg
168    brne    .Loop_bit
169    subi    ZBITS, -8
170.endm
171
172;; R26 = CLZ (uint64_t R18);  CLZ (0) = 64.
173;; Unchanged: T
174DEFUN clzdi2
175    clr     ZBITS
176    ;; Catch the common case of normalized .mant for speed-up.
177    tst     r25
178    brmi 9f
179    .test.byte  r25
180    .test.byte  r24
181    .test.byte  r23
182    .test.byte  r22
183    .test.byte  r21
184    .test.byte  r20
185    .test.byte  r19
186    .test.byte  r18
187.Ldone:
188    clr     ZERO
1899:  ret
190
191.Loop_bit:
192    lsl     ZERO
193    brcs .Ldone
194    inc     ZBITS
195    rjmp .Loop_bit
196
197ENDF clzdi2
198#undef  ZBITS
199#endif /* F7MOD_clz_ */
200
201#ifdef F7MOD_cmp_mant_
202DEFUN cmp_mant
203
204    adiw    X,   6 + Off
205    ld      r24, X      $ ldd   TMP, Z+6+Off    $ SUB   r24, TMP
206    brne .Lunequal
207
208    sbiw    X,  6
209    ld      r24, X+     $ ldd   TMP, Z+0+Off    $ SUB   r24, TMP
210    ld      r24, X+     $ ldd   TMP, Z+1+Off    $ sbc   r24, TMP
211    ld      r24, X+     $ ldd   TMP, Z+2+Off    $ sbc   r24, TMP
212    ld      r24, X+     $ ldd   TMP, Z+3+Off    $ sbc   r24, TMP
213    ld      r24, X+     $ ldd   TMP, Z+4+Off    $ sbc   r24, TMP
214    ld      r24, X+     $ ldd   TMP, Z+5+Off    $ sbc   r24, TMP
215    ;; MSBs are already known to be equal
216    breq 9f
217.Lunequal:
218    sbc     r24,    r24
219    sbci    r24,    -1
2209:  sbiw    X,      6 + Off
221    ret
222ENDF cmp_mant
223#endif /* F7MOD_cmp_mant_ */
224
225#define     CA      18
226#define     C0      CA+1
227#define     C1      C0+1
228#define     C2      C0+2
229#define     C3      C0+3
230#define     C4      C0+4
231#define     C5      C0+5
232#define     C6      C0+6
233#define     Carry   r16
234#define     Flags   18
235
236#ifdef F7MOD_store_
237;; Z->flags = CA.
238;; Z->mant  = C[7].
239DEFUN store_mant.with_flags
240    st      Z,      CA
241
242;; Z->mant = C[7].
243LABEL store_mant
244    std     Z+0+Off, C0
245    std     Z+1+Off, C1
246    std     Z+2+Off, C2
247    std     Z+3+Off, C3
248    std     Z+4+Off, C4
249    std     Z+5+Off, C5
250    std     Z+6+Off, C6
251    ret
252ENDF store_mant.with_flags
253#endif /* F7MOD_store_ */
254
255#ifdef F7MOD_load_
256;; CA   = Z->flags
257;; C[7] = Z->mant
258DEFUN load_mant.with_flags
259    ld      CA,     Z
260    skipnext
261
262;; CA   = 0
263;; C[7] = Z->mant
264LABEL load_mant.clr_CA
265LABEL load_mant.clr_flags
266    clr     CA      ; May be skipped
267
268;; C[7] = Z->mant
269LABEL load_mant
270    ldd     C0,     Z+0+Off
271    ldd     C1,     Z+1+Off
272    ldd     C2,     Z+2+Off
273    ldd     C3,     Z+3+Off
274    ldd     C4,     Z+4+Off
275    ldd     C5,     Z+5+Off
276    ldd     C6,     Z+6+Off
277    ret
278ENDF load_mant.with_flags
279#endif /* F7MOD_load_ */
280
281#ifdef F7MOD_copy_
282DEFUN copy
283    cp      XL,     ZL
284    cpc     XH,     ZH
285    breq 9f
286    adiw    XL,     10
287    adiw    ZL,     10
288    set
289    bld     ZERO,   1
290    bld     ZERO,   3   ; ZERO = 0b1010 = 10.
291.Loop:
292    ld      TMP,    -X
293    st      -Z,     TMP
294    dec     ZERO
295    brne .Loop
2969:  ret
297ENDF copy
298#endif /* F7MOD_copy_ */
299
300#ifdef F7MOD_copy_P_
301DEFUN copy_P
302    set
303    bld     ZERO,   1
304    bld     ZERO,   3   ; ZERO = 0b1010 = 10.
305.Loop:
306#ifdef __AVR_HAVE_LPMX__
307    lpm     TMP,    Z+
308#else
309    lpm
310    adiw    Z,      1
311#endif /* Have LPMx */
312    st      X+,     TMP
313    dec     ZERO
314    brne .Loop
315    sbiw    X,      10
316    sbiw    Z,      10
317    ret
318ENDF copy_P
319#endif /* F7MOD_copy_P_ */
320
321#ifdef F7MOD_copy_mant_
322DEFUN copy_mant
323    cp      XL,     ZL
324    cpc     XH,     ZH
325    breq 9f
326    adiw    XL,     1
327    adiw    ZL,     1
328    set
329    bld     ZERO,   3
330    dec     ZERO        ; ZERO = 7
331.Loop:
332    ld      TMP,    X+
333    st      Z+,     TMP
334    dec     ZERO
335    brne    .Loop
336    sbiw    XL,     8
337    sbiw    ZL,     8
3389:  ret
339ENDF copy_mant
340#endif /* F7MOD_copy_mant_ */
341
342
343#ifdef F7MOD_clr_mant_lsbs_
344DEFUN clr_mant_lsbs
345    push    r16
346    mov     r16,    r20
347    wmov    XL,     r24
348
349    wmov    ZL,     r22
350    F7call  load_mant
351
352    F7call  lshrdi3
353
354    clr     CA
355
356    F7call   ashldi3
357
358    pop     r16
359
360    wmov    ZL,     XL
361    F7jmp  store_mant
362
363ENDF clr_mant_lsbs
364#endif /* F7MOD_clr_mant_lsbs_ */
365
366
367#ifdef F7MOD_normalize_with_carry_
368;; Z = &f7_t
369;; C[] = .mant may be not normalized
370;; Carry === r16 = Addend to Z->expo in [-64, 128).
371;; Normalize C[], set Flags, and adjust Z->expo.
372;; Return CA (after normalization) in TMP.
373;; Unchanged: T
374#define Addend  r17
375#define Zbits   r26
376#define expL    r26
377#define expH    r27
378DEFUN normalize_with_carry
379    mov     Addend, Carry
380    tst     C6
381    brmi .Lshift.0
382    ;; r26 = CLZ (uint64_t R18)
383    F7call  clzdi2
384    cpi     Zbits,  64
385    breq .Lclr
386    sub     Addend, Zbits
387    mov     r16,    Zbits
388
389    F7call  ashldi3
390    ;; Assert (R25.7 == 1)
391.Lshift.0:
392    mov     TMP,    CA
393    ld      Flags,  Z
394
395    ;; .expo += Addend
396    ldd     expL,   Z+0+Expo
397    ldd     expH,   Z+1+Expo
398    ;; Sign-extend Addend
399    clr     r16
400    sbrc    Addend, 7
401    com     r16
402
403    ;; exp += (int8_t) Addend, i.e. sign-extend Addend.
404    add     expL,   Addend
405    adc     expH,   r16
406    brvc .Lnormal
407    tst     r16
408    brmi .Lclr
409    ;; Overflow
410#if F7_HAVE_Inf == 1
411    ori     Flags,  F7_FLAG_inf
412#else
413    ldi     Flags,  F7_FLAG_nan
414#endif /* Have Inf */
415    ret
416
417.Lnormal:
418    std     Z+0+Expo,   expL
419    std     Z+1+Expo,   expH
420    ret
421
422.Lclr:
423    ;; Underflow or Zero.
424    clr     TMP
425    .global __clr_8
426    XJMP    __clr_8
427
428LABEL normalize.store_with_flags
429    ;; no rounding
430    set
431    skipnext
432LABEL normalize.round.store_with_flags
433    ;; with rounding
434    clt     ; skipped ?
435LABEL normalize.maybe_round.store_with_flags
436    F7call  normalize_with_carry
437    ;; We have:
438    ;; Z   = &f7_t
439    ;; X   = .expo
440    ;; C[] = .mant
441    ;; R18 = .flags
442    ;; TMP = byte below .mant after normalization
443    ;; T = 1  =>  no rounding.
444    brts .Lstore
445    lsl     TMP
446    adc     C0,     ZERO
447    brcc .Lstore
448    adc     C1,     ZERO
449    adc     C2,     ZERO
450    adc     C3,     ZERO
451    adc     C4,     ZERO
452    adc     C5,     ZERO
453    adc     C6,     ZERO
454    brcc .Lstore
455    ;; We only come here if C6 overflowed, i.e. C[] is 0 now.
456    ;; .mant = 1.0 by restoring the MSbit.
457    ror     C6
458    ;; .expo += 1 and override the .expo stored during normalize.
459    adiw    expL,   1
460    std     Z+0+Expo,   expL
461    std     Z+1+Expo,   expH
462
463.Lstore:
464    F7call  store_mant.with_flags
465
466    ;; Return the byte below .mant after normalization.
467    ;; This is only useful without rounding; the caller will know.
468    mov     R24,    TMP
469    ret
470ENDF normalize_with_carry
471#endif /* F7MOD_normalize_with_carry_ */
472
473
474#ifdef F7MOD_normalize_
475;; Using above functionality from C.
476;; f7_t* normalize (f7_t *cc)
477;; Adjusts cc->expo
478;; Clears cc->flags
479DEFUN normalize
480    push    r17
481    push    r16
482    wmov    ZL,     r24
483    F7call  load_mant.clr_CA
484    clr     Carry
485    st      Z,      ZERO
486    F7call  normalize.store_with_flags
487    wmov    r24,    Z
488    pop     r16
489    pop     r17
490    ret
491ENDF normalize
492#endif /* F7MOD_normalize_ */
493
494
495#ifdef F7MOD_store_expo_
496#define Done    r24
497#define expLO   r24
498#define expHI   r25
499;; expo == INT16_MAX  =>  *Z = Inf,         return Done = true.
500;; expo == INT16_MIN  =>  *Z = 0x0,         return Done = true.
501;; else               =>  Z->expo = expo,   return Done = false.
502DEFUN store_expo
503    cpi     expHI,   0x80
504    cpc     expLO,  ZERO
505    breq .Ltiny
506    adiw    expLO,  1
507    brvs .Lhuge
508    sbiw    expLO,  1
509    std     Z+0+Expo,   expLO
510    std     Z+1+Expo,   expHI
511    ldi     Done,   0
512    ret
513
514.Lhuge:
515#if F7_HAVE_Inf == 1
516    ld      Done,   Z
517    andi    Done,   F7_FLAG_sign
518    ori     Done,   F7_FLAG_inf
519#else
520    ldi     Done,   F7_FLAG_nan
521#endif /* Have Inf */
522    st      Z,      Done
523    ldi     Done,   1
524    ret
525
526.Ltiny:
527    ldi     Done,   1
528    F7jmp   clr
529ENDF store_expo
530#endif /* F7MOD_store_expo_ */
531
532
533#ifdef F7MOD_set_u64_
534DEFUN set_s64
535    set
536    skipnext
537    ;; ...
538LABEL set_u64
539    clt     ; Skipped?
540    wmov    Zl,     r16
541    ;; TMP holds .flags.
542    clr     TMP
543    brtc .Lnot.negative
544
545    bst     C6,     7
546    brtc .Lnot.negative
547    bld     TMP,    F7_FLAGNO_sign
548    .global __negdi2
549    XCALL   __negdi2
550
551.Lnot.negative:
552    st      Z,          TMP
553    std     Z+0+Expo,   ZERO
554    std     Z+1+Expo,   ZERO
555    ldi     Carry,      63
556    F7call  normalize.round.store_with_flags
557    wmov    r24,        Z
558    wmov    r16,        Z   ; Unclobber r16.
559    ret
560ENDF set_s64
561#endif /* F7MOD_set_u64_ */
562
563
564#ifdef F7MOD_to_integer_
565#define Mask    r26
566DEFUN to_integer
567    wmov    ZL,     r24
568    mov     Mask,   r22
569
570    F7call  load_mant.with_flags
571
572    sbrc    Flags, F7_FLAGNO_nan
573    rjmp .Lset_0x8000
574
575    sbrc    Flags, F7_FLAGNO_inf
576    rjmp .Lsaturate
577
578    sbrs    C6, 7
579    rjmp .Lset_0x0000
580
581    bst     Flags, F7_FLAGNO_sign
582    ldd     r27,    Z+0+Expo
583    ;; Does .expo have bits outside Mask? ...
584    mov     TMP,    Mask
585    com     TMP
586    and     TMP,    r27
587    ldd     r27,    Z+1+Expo
588    tst     r27
589    brmi .Lset_0x0000       ; ...yes: .expo is < 0  =>  return 0
590    or      TMP,    r27
591    brne .Lsaturate.T       ; ...yes: .expo > Mask  =>  saturate
592
593    ;; ...no:  Shift right to meet .expo = 0.
594    PUSH    r16
595    ldd     r16,    Z+0+Expo
596    eor     r16,    Mask
597    and     r16,    Mask
598    clr     CA
599    F7call  lshrdi3
600    POP     r16
601    tst     C6
602    brmi    .Lsaturate.T    ;   > INTxx_MAX  =>  saturate
603
604    brtc 9f                 ;   >= 0         =>  return
605    sbrc    Mask,   5
606    .global __negdi2
607    XJMP    __negdi2
608    sbrc    Mask,   4
609    .global __negsi2
610    XJMP    __negsi2
611    neg     C6
612    neg     C5
613    sbci    C6,     0
6149:  ret
615
616.Lsaturate:
617    bst     Flags, F7_FLAGNO_sign
618.Lsaturate.T:
619
620#if F7_HAVE_Inf
621    brtc .Lset_0x7fff
622    ;; -Inf  =>  return 1 + INTxx_MIN
623    mov     ZL,     Flags
624    .global __clr_8
625    XCALL   __clr_8
626    ldi     C6,     0x80
627
628    ldi     CA+0,   0x01
629
630    sbrs    Mask,   5
631    ldi     CA+4,   0x01
632
633    sbrs    Mask,   4
634    ldi     CA+6,   0x01
635    ret
636
637.Lset_0x7fff:
638    ;; +Inf  =>  return INTxx_MAX
639    sec
640    .global __sbc_8
641    XCALL   __sbc_8
642    ldi     C6,     0x7f
643    ret
644#endif /* F7_HAVE_Inf */
645
646.Lset_0x8000:
647    ;; NaN  =>  return INTxx_MIN
648    .global __clr_8
649    XCALL   __clr_8
650    ldi     C6,     0x80
651    ret
652
653.Lset_0x0000:
654    ;; Small value  =>  return 0x0
655    .global __clr_8
656    XJMP    __clr_8
657
658ENDF to_integer
659#endif /* F7MOD_to_integer_ */
660
661
662#ifdef F7MOD_to_unsigned_
663#define Mask    r26
664DEFUN to_unsigned
665    wmov    ZL,     r24
666    mov     Mask,   r22
667
668    F7call  load_mant.with_flags
669
670    sbrc    Flags, F7_FLAGNO_nan
671    rjmp .Lset_0xffff
672
673    sbrc    Flags, F7_FLAGNO_sign
674    rjmp .Lset_0x0000
675
676    sbrc    Flags, F7_FLAGNO_inf
677    rjmp .Lset_0xffff
678
679    sbrs    C6, 7
680    rjmp .Lset_0x0000
681
682    ldd     r27,    Z+0+Expo
683    ;; Does .expo have bits outside Mask? ...
684    mov     TMP,    Mask
685    com     TMP
686    and     TMP,    r27
687    ldd     r27,    Z+1+Expo
688    tst     r27
689    brmi .Lset_0x0000       ; ...yes: .expo is < 0  =>  return 0
690    or      TMP,    r27
691    brne .Lset_0xffff       ; ...yes: .expo > Mask  =>  saturate
692
693    ;; ...no:  Shift right to meet .expo = 0.
694    PUSH    r16
695    ldd     r16,    Z+0+Expo
696    eor     r16,    Mask
697    and     r16,    Mask
698    clr     CA
699    F7call  lshrdi3
700    POP     r16
701    ret
702
703.Lset_0xffff:
704    ;; return UINTxx_MAX
705    sec
706    .global __sbc_8
707    XJMP    __sbc_8
708
709.Lset_0x0000:
710    ;; Small value  =>  return 0x0
711    .global __clr_8
712    XJMP    __clr_8
713
714ENDF to_unsigned
715#endif /* F7MOD_to_unsigned_ */
716
717
718#ifdef F7MOD_addsub_mant_scaled_
719;; int8_t f7_addsub_mant_scaled_asm (f7_t *r24, const f7_t *r22, const f7_t 20*,
720;;                                   uint8_t r18);
721;; R18.0 = 1 : ADD
722;; R18.0 = 0 : SUB
723;; R18[7..1] : Scale
724;; Compute *R24 = *R22 + *R20 >> R18[7..1].
725
726#define     BA      10
727#define     B0      BA+1
728#define     B1      B0+1
729#define     B2      B0+2
730#define     B3      B0+3
731#define     B4      B0+4
732#define     B5      B0+5
733#define     B6      B0+6
734
735DEFUN addsub_mant_scaled
736    do_prologue_saves  10
737
738    bst     r18,    0  ;; ADD ?
739    lsr     r18
740    mov     r16,    r18
741
742    wmov    ZL,     r20
743    wmov    YL,     r22
744    ;; C[] = bb >> shift
745    wmov    XL,     r24
746
747    F7call  load_mant.clr_CA
748    F7call  lshrdi3
749
750    wmov    BA,     CA
751    wmov    B1,     C1
752    wmov    B3,     C3
753    wmov    B5,     C5
754    wmov    ZL,     YL
755    F7call  load_mant.clr_CA
756
757    wmov    ZL,     XL
758
759    brts .Ladd
760
761    .global __subdi3
762    XCALL   __subdi3
763
764    breq .Lzero
765    brcc .Lround
766    ;; C = 1: Can underflow happen at all ?
767.Lzero:
768    F7call  clr
769    rjmp .Lepilogue
770
771.Ladd:
772    .global __adddi3
773    XCALL   __adddi3
774    brcc .Lround
775    ldi     Carry,  1
776    .global __lshrdi3
777    XCALL   __lshrdi3
778    ori     C6, 1 << 7
779    skipnext
780.Lround:
781    clr     Carry   ; skipped?
782    F7call  normalize.round.store_with_flags
783
784.Lepilogue:
785    do_epilogue_restores 10
786
787ENDF addsub_mant_scaled
788
789#if !defined (__AVR_HAVE_MOVW__) || !defined (__AVR_HAVE_JMP_CALL__)
790DEFUN lshrdi3
791    .global __lshrdi3
792    XJMP    __lshrdi3
793ENDF lshrdi3
794DEFUN ashldi3
795    .global __ashldi3
796    XJMP    __ashldi3
797ENDF ashldi3
798#else
799
800# Basically just a wrapper around libgcc's __lshrdi3.
801DEFUN lshrdi3
802    ;; Handle bit 5 of shift offset.
803    sbrs    r16,    5
804    rjmp 4f
805    wmov    CA,     C3
806    wmov    C1,     C5
807    clr     C6          $   clr     C5  $   wmov    C3, C5
8084:
809    ;; Handle bit 4 of shift offset.
810    sbrs    r16,    4
811    rjmp 3f
812    wmov CA, C1
813    wmov C1, C3
814    wmov C3, C5
815    clr     C6          $   clr     C5
8163:
817    ;; Handle bits 3...0 of shift offset.
818    push    r16
819    andi    r16,    0xf
820    breq 0f
821
822    .global __lshrdi3
823    XCALL   __lshrdi3
8240:
825    pop     r16
826    ret
827ENDF lshrdi3
828
829# Basically just a wrapper around libgcc's __ashldi3.
830DEFUN ashldi3
831    ;; Handle bit 5 of shift offset.
832    sbrs    r16,    5
833    rjmp 4f
834    wmov    C5,     C1
835    wmov    C3,     CA
836    clr     C2          $   clr     C1  $   wmov    CA, C1
8374:
838    ;; Handle bit 4 of shift offset.
839    sbrs    r16,    4
840    rjmp 3f
841    wmov C5, C3
842    wmov C3, C1
843    wmov C1, CA
844    clr     CA          $   clr     C0
8453:
846    ;; Handle bits 3...0 of shift offset.
847    push    r16
848    andi    r16,    0xf
849    breq 0f
850
851    .global __ashldi3
852    XCALL   __ashldi3
8530:
854    pop     r16
855    ret
856ENDF ashldi3
857#endif /* Small device */
858
859#endif /* F7MOD_addsub_mant_scaled_ */
860
861#if defined F7MOD_mul_mant_ && defined (__AVR_HAVE_MUL__)
862    #define     A0      11
863    #define     A1      A0+1
864    #define     A2      A0+2
865    #define     A3      A0+3
866    #define     A4      A0+4
867    #define     A5      A0+5
868    #define     A6      A0+6
869
870    #define     TT0     26
871    #define     TT1     TT0+1
872    #define     TT2     28
873    #define     TT3     TT2+1
874
875    #define     BB      10
876
877;; R18.0 = 1: No rounding.
878
879DEFUN mul_mant
880    do_prologue_saves 10
881    bst     r18,    0
882    push    r25
883    push    r24
884    movw    ZL,     r22
885    LDD     A0,     Z+0+Off
886    LDD     A1,     Z+1+Off
887    LDD     A2,     Z+2+Off
888    LDD     A3,     Z+3+Off
889    LDD     A4,     Z+4+Off
890    LDD     A5,     Z+5+Off
891    LDD     A6,     Z+6+Off
892    movw    ZL,     r20
893
894    ;; 6 * 6 -> 6:5
895    ;; 4 * 6 -> 4:3
896    ;; 2 * 6 -> 2:1
897    ;; 0 * 6 -> 0:a
898    ldd     BB, Z+6+Off
899    mul     A6, BB      $   movw    C5, r0
900    mul     A4, BB      $   movw    C3, r0
901    mul     A2, BB      $   movw    C1, r0
902    mul     A0, BB      $   movw    CA, r0
903
904    ;; 5 * 6 -> 5:4
905    ;; 3 * 6 -> 3:2
906    ;; 1 * 6 -> 1:0
907    mul     A5, BB      $   movw    TT2, r0
908    mul     A3, BB      $   movw    TT0, r0
909    mul     A1, BB
910    ADD     C0, r0      $   adc     C1, r1
911    adc     C2, TT0     $   adc     C3, TT1
912    adc     C4, TT2     $   adc     C5, TT3     $   clr ZERO
913    adc     C6, ZERO
914    ;; Done B6
915
916    ;; 3 * 3 -> 0:a
917    ;; 4 * 4 -> 2:1
918    ;; 5 * 5 -> 4:3
919    ldd     BB, Z+3+Off $   mul     A3, BB      $   movw    TT0, r0
920    ldd     BB, Z+4+Off $   mul     A4, BB      $   movw    TT2, r0
921    ldd     BB, Z+5+Off $   mul     A5, BB
922
923    ADD     CA, TT0     $   adc     C0, TT1
924    adc     C1, TT2     $   adc     C2, TT3
925    adc     C3, r0      $   adc     C4, r1
926    brcc .+2
927    adiw    C5, 1
928
929    ;; 6 * 5 -> 5:4
930    ;; 4 * 5 -> 3:2
931    ;; 2 * 5 -> 1:0
932    ;; 0 * 5 -> a:-
933    mul     A0, BB
934    ;; A0 done
935#define Atmp A0
936
937    mov     Atmp, r1
938    mul     A6, BB      $   movw    TT2, r0
939    mul     A4, BB      $   movw    TT0, r0
940    mul     A2, BB
941
942    ADD     CA, Atmp
943    adc     C0, r0      $   adc     C1, r1
944    adc     C2, TT0     $   adc     C3, TT1
945    adc     C4, TT2     $   adc     C5, TT3     $   clr ZERO
946    adc     C6, ZERO
947
948    ;; 1 * 5 -> 0:a
949    ;; 3 * 5 -> 2:1
950    ;; 6 * 4 -> 4:3
951    mul     A1, BB      $   movw    TT0, r0
952    mul     A3, BB      $   movw    TT2, r0
953    ldd     BB, Z+4+Off
954    mul     A6, BB
955
956    ADD     CA, TT0     $   adc     C0, TT1
957    adc     C1, TT2     $   adc     C2, TT3
958    adc     C3, r0      $   adc     C4, r1      $   clr ZERO
959    adc     C5, ZERO    $   adc     C6, ZERO
960    ;; B5 done
961
962    ;; 6 * 3 -> 3:2
963    ;; 6 * 1 -> 1:0
964    ;; 4 * 1 -> a:-
965    mov     TT0, A6     $   ldd TMP,  Z+3+Off
966    mov     BB,  A4     $   ldd Atmp, Z+1+Off
967    rcall   .Lmul.help.3
968
969    ;; 5 * 4 -> 3:2
970    ;; 5 * 2 -> 1:0
971    ;; 3 * 2 -> a:-
972    mov     TT0, A5     $   ldd TMP,  Z+4+Off
973    mov     BB,  A3     $   ldd Atmp, Z+2+Off
974    rcall   .Lmul.help.3
975
976    ;; 4 *   -> 3:2 (=0)
977    ;; 4 * 3 -> 1:0
978    ;; 2 * 3 -> a:-
979    mov     TT0, A4     $   clr TMP
980    mov     BB,  A2     $   ldd Atmp, Z+3+Off
981    rcall   .Lmul.help.3
982
983    ;; 3 * . -> 3:2 (=0)
984    ;; 3 * 4 -> 1:0
985    ;; 1 * 4 -> a:-
986    mov     TT0, A3     $   clr TMP
987    mov     BB,  A1     $   ldd Atmp, Z+4+Off
988    rcall   .Lmul.help.3
989
990    ;; . * ? -> 3:2 (=0)
991    ;; . * 0 -> 1:0 (=0)
992    ;; 5 * 0 -> a:-
993    clr     TT0
994    mov     BB,  A5     $   ldd Atmp, Z+0+Off
995    rcall   .Lmul.help.3
996
997    clr TT3  ;; Asserted by .Lmul.help.2
998    ;; 6 * 2 -> 2:1
999    ;; 6 * 0 -> 0:a
1000                        $   ldd TMP,  Z+2+Off
1001    mov     BB, A6     ;$   ldd Atmp, Z+0+Off
1002    rcall   .Lmul.help.2
1003
1004    ;; 5 * 3 -> 2:1
1005    ;; 5 * 1 -> 0:a
1006                        $   ldd TMP,  Z+3+Off
1007    mov     BB, A5      $   ldd Atmp, Z+1+Off
1008    rcall   .Lmul.help.2
1009
1010    ;; 4 * . -> 2:1 (=0)
1011    ;; 4 * 2 -> 0:a
1012                        $   clr TMP
1013    mov     BB, A4      $   ldd Atmp, Z+2+Off
1014    rcall   .Lmul.help.2
1015
1016    ;; 2 * . -> 2:1 (=0)
1017    ;; 2 * 4 -> 0:a
1018                        $   clr TMP
1019    mov     BB, A2      $   ldd Atmp, Z+4+Off
1020    rcall   .Lmul.help.2
1021
1022    ;; Finally...
1023
1024    pop     ZL
1025    pop     ZH
1026    ;; The high byte is at least 0x40 and at most 0xfe.
1027    ;; The result has to be left-shifted by one in order to scale it
1028    ;; correctly.
1029
1030    ldi     Carry,  1
1031    F7call  normalize.maybe_round.store_with_flags
1032
1033    do_epilogue_restores 10
1034
1035;; TT0 * Tmp  -> 3:2
1036;; TT0 * Atmp -> 1:0
1037;; BB  * Atmp -> a:-
1038;;
1039;; Clobbers : TMP, TT0...TT3.
1040;; Sets     : ZERO = 0.
1041.Lmul.help.3:
1042    mul     TT0, TMP    $   movw    TT2, r0
1043    mul     TT0, Atmp   $   movw    TT0, r0
1044    mul     BB,  Atmp
1045
1046    ADD     CA, r1
1047    adc     C0, TT0     $   adc     C1, TT1
1048    adc     C2, TT2
1049.Lmul.help.3.C3:        $   adc     C3, TT3     $ clr ZERO
1050    adc     C4, ZERO    $   adc     C5, ZERO
1051    adc     C6, ZERO
1052    ret
1053
1054;; BB * TMP  -> 2:1
1055;; BB * Atmp -> 0:a
1056;;
1057;; Asserts  : TT3 = 0
1058;; Clobbers : TMP, TT0, TT1.
1059;; Sets     : ZERO = 0.
1060.Lmul.help.2:
1061    mul     BB, TMP     $   movw    TT0, r0
1062    mul     BB, Atmp
1063    ADD     CA, r0      $   adc     C0, r1
1064    adc     C1, TT0     $   adc     C2, TT1
1065    rjmp .Lmul.help.3.C3
1066
1067ENDF mul_mant
1068#endif /* F7MOD_mul_mant_ && MUL */
1069
1070
1071#if defined (F7MOD_div_)
1072
1073;; Dividend is C[]
1074
1075;; Divisor
1076#define A0       9
1077#define A1      10
1078#define A2      11
1079#define A3      12
1080#define A4      13
1081#define A5      14
1082#define A6      15
1083
1084;; Quotient
1085#define Q0      0       /* === TMP  */
1086#define Q1      Q0+1    /* === ZERO */
1087#define Q2      26
1088#define Q3      Q2+1
1089#define Q4      28
1090#define Q5      Q4+1
1091#define Q6      16
1092#define Q7      Q6+1
1093
1094#define Cnt     CA
1095#define QBits   r8
1096
1097DEFUN div
1098    do_prologue_saves 12
1099
1100    ;; Number of bits requested for the quotient.
1101    ;; This is usually 2 + F7_MANT_BITS.
1102    mov     QBits,  r20
1103    wmov    ZL,     r22
1104    LDD     A0,     Z+0+Off
1105    LDD     A1,     Z+1+Off
1106    LDD     A2,     Z+2+Off
1107    LDD     A3,     Z+3+Off
1108    LDD     A4,     Z+4+Off
1109    LDD     A5,     Z+5+Off
1110    LDD     A6,     Z+6+Off
1111    wmov    ZL,     r24
1112    F7call  load_mant
1113
1114    ;; Clear quotient Q[].
1115    clr     Q0      ; === TMP
1116    ;clr    Q1      ; === ZERO
1117    wmov    Q2,     Q0
1118    wmov    Q4,     Q0
1119    wmov    Q6,     Q0
1120
1121    ;; C[] and A[] are valid mantissae, i.e. their MSBit is set.  Therefore,
1122    ;; quotient Q[] will be in  [0x0.ff..., 0x0.40...]  and to adjust Q[] we
1123    ;; need at most 1 left-shift.  Compute F7_MANT_BITS + 2 bits of the
1124    ;; quotient:  One bit is used for rounding, and one bit might be consumed
1125    ;; by the mentioned left-shift.
1126    mov     Cnt,    QBits
1127    rjmp .Loop_start
1128
1129.Loop:
1130    ;; Shift dividend.
1131    LSL     C0
1132    rol     C1
1133    rol     C2
1134    rol     C3
1135    rol     C4
1136    rol     C5
1137    rol     C6
1138    brcs .Lfits
1139    ;; Compare dividend against divisor.
1140.Loop_start:
1141    CP      C0,     A0
1142    cpc     C1,     A1
1143    cpc     C2,     A2
1144    cpc     C3,     A3
1145    cpc     C4,     A4
1146    cpc     C5,     A5
1147    cpc     C6,     A6
1148    ;; Shift 0 into quotient.
1149    brlo 1f
1150.Lfits:
1151    ;; Divisor fits into dividend.
1152    SUB     C0,     A0
1153    sbc     C1,     A1
1154    sbc     C2,     A2
1155    sbc     C3,     A3
1156    sbc     C4,     A4
1157    sbc     C5,     A5
1158    sbc     C6,     A6
1159    ;; Shift 1 into quotient.
1160    sec
1161    rol     Q0
1162    skipnext
11631:  lsl     Q0
1164    rol     Q1
1165    rol     Q2
1166    rol     Q3
1167    rol     Q4
1168    rol     Q5
1169    rol     Q6
1170    rol     Q7
1171    dec     Cnt
1172    brne .Loop
1173
1174    wmov    CA,     Q0
1175    wmov    C1,     Q2
1176    wmov    C3,     Q4
1177    wmov    C5,     Q6
1178    clr     ZERO
1179
1180    ldi     Carry,  64
1181    sub     Carry,  QBits
1182    F7call  normalize.round.store_with_flags
1183
1184    do_epilogue_restores 12
1185ENDF div
1186
1187#endif /* F7MOD_div_ */
1188
1189
1190#if defined (F7MOD_sqrt16_) && defined (__AVR_HAVE_MUL__)
1191
1192#define     Mask    C6
1193#define     Q0      C3      /*  = R22  */
1194#define     Q1      C4      /*  = R23  */
1195
1196;; uint16_t R24 = sqrt16_XXX (uint16_t R24);
1197;; Clobbers:   R22, R23, TMP.
1198;;
1199;; XXX = floor:  Return integral part of square-root of R25:R24 with R25 = 0.
1200;;               Error is in [0, -1 LSB).
1201;; XXX = round:  Return quare-root of R25:R24 rounded to nearest integer.
1202;;               R25 = (Q[] >= 65281) = (Q > 0xff00),  i.e. if Q[] is not
1203;;               bigger than 0xff00, then the result fits in 8 bits.
1204;;               Return C = 0 if the result is the same as for XXX = floor,
1205;;               error in [0, -1/2 LSB)
1206;;               Return C = 1 if the result is one higher than for XXX = floor,
1207;;               error in [1/2 LSB, 0).
1208DEFUN sqrt16_round
1209    set
1210    skipnext
1211    ;; ...
1212LABEL sqrt16_floor
1213    clt ; Skipped?
1214    movw    Q0,     r24
1215    clr     C5
1216    ldi     Mask,   1 << 7
1217
1218.Loop_mask:
1219    add     C5,     Mask
1220    mul     C5,     C5
1221    cp      Q0,     R0
1222    cpc     Q1,     R1
1223    brsh 1f
1224    sub     C5,     Mask
12251:  lsr     Mask
1226    brne .Loop_mask
1227
1228    brtc .Ldone             ; No rounding  =>  C6 will be 0.
1229
1230    ;; Rounding:  (X + 1/2)^2  =  X^2 + X + 1/4,  thus probing
1231    ;; for bit -1 is testing Q[] against  C5^2 + C5.
1232    mul     C5,     C5
1233    add     R0,     C5
1234    adc     R1,     C6      ; Exploit C6 === Mask = 0.
1235    cp      R0,     Q0
1236    cpc     R1,     Q1
1237    brcc .Ldone
1238    ;; If  C5^2 + C5 + 1/4  fits into Q[], then round up and C = 1.
1239    adiw    C5,     1       ; Exploit C6 === Mask = 0.
1240    sec
1241
1242.Ldone:
1243    clr     __zero_reg__
1244    ret
1245ENDF sqrt16_round
1246#undef Mask
1247#undef Q0
1248#undef Q1
1249#endif /* F7MOD_sqrt16_ && MUL */
1250
1251#ifdef F7MOD_sqrt_approx_
1252DEFUN sqrt_approx
1253    push    r17
1254    push    r16
1255    wmov    XL,     r24
1256    wmov    ZL,     r22
1257
1258    ;; C[] = 0.
1259    .global __clr_8
1260    XCALL   __clr_8
1261
1262    ldd     C5,     Z+5+Off
1263    ldd     C6,     Z+6+Off
1264
1265    ldd     Carry,  Z+0+Expo
1266    ldd     TMP,    Z+1+Expo
1267    wmov    ZL,     XL
1268
1269    st      Z,      ZERO
1270
1271    asr     TMP
1272    ror     Carry
1273    std     Z+1+Expo,   TMP
1274    std     Z+0+Expo,   Carry
1275
1276    ;; Re-interpreting our Q-format 1.xx mantissa as Q2.yy, we have to shift
1277    ;; the mantissa to the right by 1.  As we need an even exponent, multiply
1278    ;; the mantissa by 2 for odd exponents, i.e. only right-shift if .expo
1279    ;; is even.
1280
1281    brcs 1f
1282    lsr     C6
1283    ror     C5
1284
12851:
1286    F7call  sqrt16_round
1287
1288    ;; sqrt16_round() returns:   C = 0:  error in [0, -1/2 LSB).
1289    ;;                           C = 1:  error in [1/2 LSB, 0)
1290
1291    brcc 2f
1292    ;; Undo the round-up from sqrt16_round(); this will transform to
1293    ;; error in [-1/2 LSB, -1 LSB).
1294    sbiw    C5,     1
1295    ;; Together with the correct bit C4.7, the error is in  [0, -1/2 LSB).
1296    ori     C4,     1 << 7
1297
12982:  ;; Setting C4.6 adds 1/4 LSB and the error is now in [1/4 LSB, -1/4 LSB)
1299    ;; in either case.
1300    ori     C4,     1 << 6
1301
1302    ;; ????????????
1303    ;; sqrt16_round() runs on integers which means that it computes the
1304    ;; square root of  mant * 2^14  if we regard  mant as Q-format 2.yy,
1305    ;; i.e. 2 integral bits.  The result is  sqrt(mant) * 2^7,
1306    ;; and in order to get the same scaling like the input, .expo has to
1307    ;; be adjusted by 7. ???????????????
1308
1309    ldi     Carry,  8
1310    F7call  normalize.store_with_flags
1311
1312    pop     r16
1313    pop     r17
1314    ret
1315
1316ENDF sqrt_approx
1317#endif /* F7MOD_sqrt_approx_ */
1318
1319
1320#undef CA
1321#undef C0
1322#undef C1
1323#undef C2
1324#undef C3
1325#undef C4
1326#undef C5
1327#undef C6
1328#undef Carry
1329
1330
1331#ifdef F7MOD_D_fabs_
1332_DEFUN __fabs
1333    DALIAS fabs
1334    LALIAS fabsl
1335    andi    R25,    0b01111111
1336    ret
1337_ENDF __fabs
1338#endif /* F7MOD_D_fabs_ */
1339
1340
1341#ifdef F7MOD_D_neg_
1342_DEFUN __neg
1343_LABEL __negdf2
1344    subi    R25,    0b10000000
1345    ret
1346_ENDF __neg
1347#endif /* F7MOD_D_neg_ */
1348
1349
1350#ifdef F7MOD_D_signbit_
1351_DEFUN __signbit
1352    DALIAS signbit
1353    LALIAS signbitl
1354    bst     R25,    7
1355    clr     R25
1356    clr     R24
1357    bld     R24,    0
1358    ret
1359_ENDF __signbit
1360#endif /* F7MOD_D_signbit_ */
1361
1362
1363#ifdef F7MOD_D_copysign_
1364_DEFUN __copysign
1365    DALIAS copysign
1366    LALIAS copysignl
1367    bst     R17,    7
1368    bld     R25,    7
1369    ret
1370_ENDF __copysign
1371#endif /* F7MOD_D_copysign_ */
1372
1373
1374#ifdef F7MOD_D_isinf_
1375_DEFUN __isinf
1376    DALIAS isinf
1377    LALIAS isinfl
1378    F7call  class_D
1379    ;; Inf: T = Z = 1.
1380    brtc 0f
1381    ldi     R24,    1
1382    breq 1f
13830:
1384    clr     R24
13851:
1386    clr     R25
1387    ret
1388_ENDF __isinf
1389#endif /* F7MOD_D_isinf_ */
1390
1391
1392#ifdef F7MOD_D_isnan_
1393_DEFUN __isnan
1394    DALIAS isnan
1395    LALIAS isnanl
1396    F7call  class_D
1397    ;; NaN: T = 1, Z = 0.
1398    brtc 0f
1399    ldi     R24,    1
1400    brne 1f
14010:
1402    clr     R24
14031:
1404    clr     R25
1405    ret
1406_ENDF __isnan
1407#endif /* F7MOD_D_isnan_ */
1408
1409
1410#ifdef F7MOD_D_isfinite_
1411_DEFUN __isfinite
1412    DALIAS isfinite
1413    LALIAS isfinitel
1414    F7call  class_D
1415    ;; Number <=> T = 0.
1416    bld     R24,    0
1417    com     R24
1418    andi    R24,    1
1419    clr     R25
1420    ret
1421_ENDF __isfinite
1422#endif /* F7MOD_D_isfinite_ */
1423
1424
1425#ifdef F7MOD_D_class_
1426;; The encoded exponent has 11 Bits.
1427#define MAX_BIASED_EXPO 0b0111111111110000
1428
1429;; Classify a double in R18[]
1430;; Number: T-Flag = 0.
1431;; +-Inf : T-Flag = 1, Z-Flag = 1.
1432;; NaN   : T-Flag = 1, Z-Flag = 0.
1433DEFUN class_D
1434    wmov    R26,    R24
1435    andi    R26,    lo8 (MAX_BIASED_EXPO)
1436    andi    R27,    hi8 (MAX_BIASED_EXPO)
1437    subi    R26,    lo8 (MAX_BIASED_EXPO)
1438    sbci    R27,    hi8 (MAX_BIASED_EXPO)
1439    clt
1440    brne .L.number
1441    set
1442    ;; Set sign and expo to 0.
1443    clr     R25
1444    andi    R24,    lo8 (~MAX_BIASED_EXPO)
1445    ;; What remains is the mantissa.
1446    ;; Mantissa == 0  =>  +/-Inf.
1447    ;; Mantissa != 0  =>  NaN.
1448    ;; Compare R18[] against sign_extend(R26) with R26 = 0.
1449    .global __cmpdi2_s8
1450    XJMP    __cmpdi2_s8
1451.L.number:
1452    ret
1453
1454ENDF class_D
1455#endif /* F7MOD_D_class_ */
1456
1457
1458#ifdef F7MOD_call_dd_
1459
1460;; Provide double wrappers for functions that operate on f7_t and get f7_t*.
1461;;
1462;; We set up a frame of sizeof(f7_t), convert the input double in R18[] to
1463;; f7_t in that frame location, then call *Z and finally convert the result f7_t
1464;; to double R18[] if that's requested.
1465;;
1466;; call_dd:     double func (double A)
1467;;              void (*Z) (f7_t *aa, const f7_t *aa)
1468;;
1469;; call_dx:     double func (type_t A)  , sizeof(type_t) <= 4
1470;;              void (*Z) (f7_t *aa, type_t)
1471;;
1472;; call_xd:     type_t func (double A)
1473;;              type_t (*Z) (const f7_t *aa)
1474;;
1475;; call_ddx:    double func (double A, word_t)  , sizeof (word_t) <= 2
1476;;              void (*Z) (f7_t *aa, const f7_t *aa, word_t)
1477
1478#define WHAT    R13
1479
1480DEFUN call_dd   ; WHAT = R13 = 3
1481    inc     ZERO
1482LABEL call_xd   ; WHAT = R13 = 2
1483    inc     ZERO
1484LABEL call_ddx  ; WHAT = R13 = 1
1485    inc     ZERO
1486LABEL call_dx   ; WHAT = R13 = 0
1487    push    WHAT
1488    mov     WHAT,   ZERO
1489    clr     ZERO
1490    ;; R14/R15 hold Z, the address of the f7_worker function, until we need it.
1491    push    r14
1492    push    r15
1493    wmov    r14,     Z
1494
1495#define n_pushed    4
1496#define n_frame     10
1497
1498    do_prologue_saves n_pushed, n_frame
1499    ;; Y = FramePointer + 1
1500    adiw    Y,      1
1501    dec     WHAT
1502    brmi .Ldx                   ; WHAT was initially 0.
1503    ;; FP + 1 = (f7_t) arg1
1504    wmov    r16,    Y
1505    ;; The double argument is in R18[].
1506    XCALL   F7_NAME (set_double_impl)
1507    tst     WHAT
1508    brne .Lno.ddx               ; WHAT was initially != 1.
1509    ;; call_ddx: Set R20/21 to the 2-byte scalar / pointer argument.
1510    ;; Fetch it from where prologue_saves put it.
1511    ldd     r20,    Y + n_frame + 3     ; Saved R16
1512    ldd     r21,    Y + n_frame + 2     ; Saved R17
1513.Lno.ddx:
1514    wmov    r22,    Y           ; &arg1 (input)
1515.Ldo.dx:
1516    wmov    r24,    Y           ; &arg1 (output)
1517    wmov    Z,      r14
1518    XICALL
1519    dec     WHAT
1520    breq .Lepilogue             ; WHAT was initially 2: Return non-double.
1521    wmov    r24,    Y           ; &arg1
1522    XCALL   F7_NAME (get_double)
1523.Lepilogue:
1524    ;; + 3 to account for R13...R15 pushed prior to do_prologue_saves.
1525    do_epilogue_restores n_pushed + 3, n_frame
1526
1527.Ldx:
1528    ;; call_dx: Copy the 4-byte input scalar from R22[4] to R20[4].
1529    wmov    r20,    r22
1530    wmov    r22,    r24
1531    rjmp .Ldo.dx
1532
1533ENDF call_dd
1534#endif /* F7MOD_call_dd_ */
1535
1536
1537#ifdef F7MOD_call_ddd_
1538
1539;; Provide double wrappers for functions that operate on f7_t and get f7_t*.
1540;;
1541;; We set up a frame of 2 * sizeof(f7_t), convert the input doubles in R18[]
1542;; and R10[] to f7_t in these frame locations, then call *Z and finally
1543;; convert the result f7_t to double R18[] if that's requested.
1544;;
1545;; call_ddd:    double func (double A, double B)
1546;;              void (*Z) (f7_t *aa, const f7_t *aa, const f7_t *bb)
1547;;
1548;; call_xdd:    type_t func (double A, double B)
1549;;              type_t (*Z) (const f7_t *aa, const f7_t *bb)
1550
1551DEFUN call_ddd
1552    inc     ZERO
1553LABEL call_xdd
1554    ;; R8/R9 hold Z, the address of the f7_worker function, until we need it.
1555    push    r9
1556    push    r8
1557    wmov    r8,     Z
1558    ;; This is an argument to call.2 and will be accessed by the arg pointer.
1559    push    ZERO
1560    clr     ZERO
1561    rcall   call.2
1562    pop     TMP
1563    pop     r8
1564    pop     r9
1565    ret
1566
1567#define n_pushed    4
1568#define n_frame     20
1569
1570call.2:
1571    do_prologue_saves n_pushed, n_frame
1572    ;; Y = FramePointer + 1
1573    adiw    Y,      1
1574    ;; FP + 1 = (f7_t) arg1
1575    wmov    r16,    Y
1576    ;; First double argument is already in R18[].
1577    XCALL   F7_NAME (set_double_impl)
1578    ;; FP + 11 = (f7_t) arg2
1579    wmov    r16,    Y
1580    subi    r16,    lo8 (-10)
1581    sbci    r17,    hi8 (-10)
1582    ;; Move second double argument to R18[].
1583    wmov    r18,    r10
1584    wmov    r20,    r12
1585    wmov    r22,    r14
1586    ;; Get high word of arg2 from where prologue_saves put it.
1587    ldd     r24,    Y + n_frame + 3     ; Saved R16
1588    ldd     r25,    Y + n_frame + 2     ; Saved R17
1589    XCALL   F7_NAME (set_double_impl)
1590    ;; Z (f7_t *arg1, const f7_t *arg1, const f7_t *arg2)
1591    wmov    Z,      r8
1592    wmov    r24,    Y                   ; &arg1
1593    ;; WHAT == 0  =>  call_xdd
1594    ;; WHAT != 0  =>  call_ddd
1595    ldd     TMP,    Y + n_frame + n_pushed + PC_SIZE
1596    tst     TMP
1597    breq .Lxdd
1598    wmov    r22,    Y                   ; &arg1
1599    wmov    r20,    r16                 ; &arg2
1600    XICALL
1601    wmov    r24,    Y                   ; &arg1
1602    XCALL   F7_NAME (get_double)
1603.Lepilogue:
1604    do_epilogue_restores n_pushed, n_frame
1605.Lxdd:
1606    wmov    r22,    r16                 ; &arg2
1607    XICALL
1608    rjmp .Lepilogue
1609ENDF call_ddd
1610#endif /* F7MOD_call_ddd_ */
1611
1612#include "f7-wraps.h"
1613
1614#endif /* !AVR_TINY */
1615