xref: /netbsd-src/external/gpl3/gcc/dist/libgcc/config/avr/lib1funcs.S (revision 924795e69c8bb3f17afd8fcbb799710cc1719dc4)
1/*  -*- Mode: Asm -*-  */
2/* Copyright (C) 1998-2022 Free Software Foundation, Inc.
3   Contributed by Denis Chertykov <chertykov@gmail.com>
4
5This file is free software; you can redistribute it and/or modify it
6under the terms of the GNU General Public License as published by the
7Free Software Foundation; either version 3, or (at your option) any
8later version.
9
10This file is distributed in the hope that it will be useful, but
11WITHOUT ANY WARRANTY; without even the implied warranty of
12MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13General Public License for more details.
14
15Under Section 7 of GPL version 3, you are granted additional
16permissions described in the GCC Runtime Library Exception, version
173.1, as published by the Free Software Foundation.
18
19You should have received a copy of the GNU General Public License and
20a copy of the GCC Runtime Library Exception along with this program;
21see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22<http://www.gnu.org/licenses/>.  */
23
24#if defined (__AVR_TINY__)
25#define __zero_reg__ r17
26#define __tmp_reg__ r16
27#else
28#define __zero_reg__ r1
29#define __tmp_reg__ r0
30#endif
31#define __SREG__ 0x3f
32#if defined (__AVR_HAVE_SPH__)
33#define __SP_H__ 0x3e
34#endif
35#define __SP_L__ 0x3d
36#define __RAMPZ__ 0x3B
37#define __EIND__  0x3C
38
39/* Most of the functions here are called directly from avr.md
40   patterns, instead of using the standard libcall mechanisms.
41   This can make better code because GCC knows exactly which
42   of the call-used registers (not all of them) are clobbered.  */
43
44/* FIXME:  At present, there is no SORT directive in the linker
45           script so that we must not assume that different modules
46           in the same input section like .libgcc.text.mul will be
47           located close together.  Therefore, we cannot use
48           RCALL/RJMP to call a function like __udivmodhi4 from
49           __divmodhi4 and have to use lengthy XCALL/XJMP even
50           though they are in the same input section and all same
51           input sections together are small enough to reach every
52           location with a RCALL/RJMP instruction.  */
53
54#if defined (__AVR_HAVE_EIJMP_EICALL__) && !defined (__AVR_HAVE_ELPMX__)
55#error device not supported
56#endif
57
58	.macro	mov_l  r_dest, r_src
59#if defined (__AVR_HAVE_MOVW__)
60	movw	\r_dest, \r_src
61#else
62	mov	\r_dest, \r_src
63#endif
64	.endm
65
66	.macro	mov_h  r_dest, r_src
67#if defined (__AVR_HAVE_MOVW__)
68	; empty
69#else
70	mov	\r_dest, \r_src
71#endif
72	.endm
73
74.macro	wmov  r_dest, r_src
75#if defined (__AVR_HAVE_MOVW__)
76    movw \r_dest,   \r_src
77#else
78    mov \r_dest,    \r_src
79    mov \r_dest+1,  \r_src+1
80#endif
81.endm
82
83#if defined (__AVR_HAVE_JMP_CALL__)
84#define XCALL call
85#define XJMP  jmp
86#else
87#define XCALL rcall
88#define XJMP  rjmp
89#endif
90
91#if defined (__AVR_HAVE_EIJMP_EICALL__)
92#define XICALL eicall
93#define XIJMP  eijmp
94#else
95#define XICALL icall
96#define XIJMP  ijmp
97#endif
98
99;; Prologue stuff
100
101.macro do_prologue_saves n_pushed n_frame=0
102    ldi r26, lo8(\n_frame)
103    ldi r27, hi8(\n_frame)
104    ldi r30, lo8(gs(.L_prologue_saves.\@))
105    ldi r31, hi8(gs(.L_prologue_saves.\@))
106    XJMP __prologue_saves__ + ((18 - (\n_pushed)) * 2)
107.L_prologue_saves.\@:
108.endm
109
110;; Epilogue stuff
111
112.macro do_epilogue_restores n_pushed n_frame=0
113    in      r28, __SP_L__
114#ifdef __AVR_HAVE_SPH__
115    in      r29, __SP_H__
116.if \n_frame > 63
117    subi    r28, lo8(-\n_frame)
118    sbci    r29, hi8(-\n_frame)
119.elseif \n_frame > 0
120    adiw    r28, \n_frame
121.endif
122#else
123    clr     r29
124.if \n_frame > 0
125    subi    r28, lo8(-\n_frame)
126.endif
127#endif /* HAVE SPH */
128    ldi     r30, \n_pushed
129    XJMP __epilogue_restores__ + ((18 - (\n_pushed)) * 2)
130.endm
131
132;; Support function entry and exit for convenience
133
134.macro wsubi r_arg1, i_arg2
135#if defined (__AVR_TINY__)
136    subi \r_arg1,   lo8(\i_arg2)
137    sbci \r_arg1+1, hi8(\i_arg2)
138#else
139    sbiw \r_arg1, \i_arg2
140#endif
141.endm
142
143.macro waddi r_arg1, i_arg2
144#if defined (__AVR_TINY__)
145    subi \r_arg1,   lo8(-\i_arg2)
146    sbci \r_arg1+1, hi8(-\i_arg2)
147#else
148    adiw \r_arg1, \i_arg2
149#endif
150.endm
151
152.macro DEFUN name
153.global \name
154.func \name
155\name:
156.endm
157
158.macro ENDF name
159.size \name, .-\name
160.endfunc
161.endm
162
163.macro FALIAS name
164.global \name
165.func \name
166\name:
167.size \name, .-\name
168.endfunc
169.endm
170
171;; Skip next instruction, typically a jump target
172#define skip cpse 16,16
173
174;; Negate a 2-byte value held in consecutive registers
175.macro NEG2  reg
176    com     \reg+1
177    neg     \reg
178    sbci    \reg+1, -1
179.endm
180
181;; Negate a 4-byte value held in consecutive registers
182;; Sets the V flag for signed overflow tests if REG >= 16
183.macro NEG4  reg
184    com     \reg+3
185    com     \reg+2
186    com     \reg+1
187.if \reg >= 16
188    neg     \reg
189    sbci    \reg+1, -1
190    sbci    \reg+2, -1
191    sbci    \reg+3, -1
192.else
193    com     \reg
194    adc     \reg,   __zero_reg__
195    adc     \reg+1, __zero_reg__
196    adc     \reg+2, __zero_reg__
197    adc     \reg+3, __zero_reg__
198.endif
199.endm
200
201#define exp_lo(N)  hlo8 ((N) << 23)
202#define exp_hi(N)  hhi8 ((N) << 23)
203
204
205.section .text.libgcc.mul, "ax", @progbits
206
207;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
208/* Note: mulqi3, mulhi3 are open-coded on the enhanced core.  */
209#if !defined (__AVR_HAVE_MUL__)
210/*******************************************************
211    Multiplication  8 x 8  without MUL
212*******************************************************/
213#if defined (L_mulqi3)
214
215#define	r_arg2	r22		/* multiplicand */
216#define	r_arg1 	r24		/* multiplier */
217#define r_res	__tmp_reg__	/* result */
218
219DEFUN __mulqi3
220	clr	r_res		; clear result
221__mulqi3_loop:
222	sbrc	r_arg1,0
223	add	r_res,r_arg2
224	add	r_arg2,r_arg2	; shift multiplicand
225	breq	__mulqi3_exit	; while multiplicand != 0
226	lsr	r_arg1		;
227	brne	__mulqi3_loop	; exit if multiplier = 0
228__mulqi3_exit:
229	mov	r_arg1,r_res	; result to return register
230	ret
231ENDF __mulqi3
232
233#undef r_arg2
234#undef r_arg1
235#undef r_res
236
237#endif 	/* defined (L_mulqi3) */
238
239
240/*******************************************************
241    Widening Multiplication  16 = 8 x 8  without MUL
242    Multiplication  16 x 16  without MUL
243*******************************************************/
244
245#define A0  22
246#define A1  23
247#define B0  24
248#define BB0 20
249#define B1  25
250;; Output overlaps input, thus expand result in CC0/1
251#define C0  24
252#define C1  25
253#define CC0  __tmp_reg__
254#define CC1  21
255
256#if defined (L_umulqihi3)
257;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24
258;;; (C1:C0) = (unsigned int) A0  * (unsigned int) B0
259;;; Clobbers: __tmp_reg__, R21..R23
260DEFUN __umulqihi3
261    clr     A1
262    clr     B1
263    XJMP    __mulhi3
264ENDF __umulqihi3
265#endif /* L_umulqihi3 */
266
267#if defined (L_mulqihi3)
268;;; R25:R24 = (signed int) R22 * (signed int) R24
269;;; (C1:C0) = (signed int) A0  * (signed int) B0
270;;; Clobbers: __tmp_reg__, R20..R23
271DEFUN __mulqihi3
272    ;; Sign-extend B0
273    clr     B1
274    sbrc    B0, 7
275    com     B1
276    ;; The multiplication runs twice as fast if A1 is zero, thus:
277    ;; Zero-extend A0
278    clr     A1
279#ifdef __AVR_HAVE_JMP_CALL__
280    ;; Store  B0 * sign of A
281    clr     BB0
282    sbrc    A0, 7
283    mov     BB0, B0
284    call    __mulhi3
285#else /* have no CALL */
286    ;; Skip sign-extension of A if A >= 0
287    ;; Same size as with the first alternative but avoids errata skip
288    ;; and is faster if A >= 0
289    sbrs    A0, 7
290    rjmp    __mulhi3
291    ;; If  A < 0  store B
292    mov     BB0, B0
293    rcall   __mulhi3
294#endif /* HAVE_JMP_CALL */
295    ;; 1-extend A after the multiplication
296    sub     C1, BB0
297    ret
298ENDF __mulqihi3
299#endif /* L_mulqihi3 */
300
301#if defined (L_mulhi3)
302;;; R25:R24 = R23:R22 * R25:R24
303;;; (C1:C0) = (A1:A0) * (B1:B0)
304;;; Clobbers: __tmp_reg__, R21..R23
305DEFUN __mulhi3
306
307    ;; Clear result
308    clr     CC0
309    clr     CC1
310    rjmp 3f
3111:
312    ;; Bit n of A is 1  -->  C += B << n
313    add     CC0, B0
314    adc     CC1, B1
3152:
316    lsl     B0
317    rol     B1
3183:
319    ;; If B == 0 we are ready
320    wsubi   B0, 0
321    breq 9f
322
323    ;; Carry = n-th bit of A
324    lsr     A1
325    ror     A0
326    ;; If bit n of A is set, then go add  B * 2^n  to  C
327    brcs 1b
328
329    ;; Carry = 0  -->  The ROR above acts like  CP A0, 0
330    ;; Thus, it is sufficient to CPC the high part to test A against 0
331    cpc     A1, __zero_reg__
332    ;; Only proceed if A != 0
333    brne    2b
3349:
335    ;; Move Result into place
336    mov     C0, CC0
337    mov     C1, CC1
338    ret
339ENDF  __mulhi3
340#endif /* L_mulhi3 */
341
342#undef A0
343#undef A1
344#undef B0
345#undef BB0
346#undef B1
347#undef C0
348#undef C1
349#undef CC0
350#undef CC1
351
352
353#define A0 22
354#define A1 A0+1
355#define A2 A0+2
356#define A3 A0+3
357
358#define B0 18
359#define B1 B0+1
360#define B2 B0+2
361#define B3 B0+3
362
363#define CC0 26
364#define CC1 CC0+1
365#define CC2 30
366#define CC3 CC2+1
367
368#define C0 22
369#define C1 C0+1
370#define C2 C0+2
371#define C3 C0+3
372
373/*******************************************************
374    Widening Multiplication  32 = 16 x 16  without MUL
375*******************************************************/
376
377#if defined (L_umulhisi3)
378DEFUN __umulhisi3
379    wmov    B0, 24
380    ;; Zero-extend B
381    clr     B2
382    clr     B3
383    ;; Zero-extend A
384    wmov    A2, B2
385    XJMP    __mulsi3
386ENDF __umulhisi3
387#endif /* L_umulhisi3 */
388
389#if defined (L_mulhisi3)
390DEFUN __mulhisi3
391    wmov    B0, 24
392    ;; Sign-extend B
393    lsl     r25
394    sbc     B2, B2
395    mov     B3, B2
396#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
397    ;; Sign-extend A
398    clr     A2
399    sbrc    A1, 7
400    com     A2
401    mov     A3, A2
402    XJMP __mulsi3
403#else /*  no __AVR_ERRATA_SKIP_JMP_CALL__ */
404    ;; Zero-extend A and __mulsi3 will run at least twice as fast
405    ;; compared to a sign-extended A.
406    clr     A2
407    clr     A3
408    sbrs    A1, 7
409    XJMP __mulsi3
410    ;; If  A < 0  then perform the  B * 0xffff.... before the
411    ;; very multiplication by initializing the high part of the
412    ;; result CC with -B.
413    wmov    CC2, A2
414    sub     CC2, B0
415    sbc     CC3, B1
416    XJMP __mulsi3_helper
417#endif /*  __AVR_ERRATA_SKIP_JMP_CALL__ */
418ENDF __mulhisi3
419#endif /* L_mulhisi3 */
420
421
422/*******************************************************
423    Multiplication  32 x 32  without MUL
424*******************************************************/
425
426#if defined (L_mulsi3)
427DEFUN __mulsi3
428#if defined (__AVR_TINY__)
429    in     r26, __SP_L__ ; safe to use X, as it is CC0/CC1
430    in     r27, __SP_H__
431    subi   r26, lo8(-3)   ; Add 3 to point past return address
432    sbci   r27, hi8(-3)
433    push   B0    ; save callee saved regs
434    push   B1
435    ld     B0, X+   ; load from caller stack
436    ld     B1, X+
437    ld     B2, X+
438    ld     B3, X
439#endif
440    ;; Clear result
441    clr     CC2
442    clr     CC3
443    ;; FALLTHRU
444ENDF  __mulsi3
445
446DEFUN __mulsi3_helper
447    clr     CC0
448    clr     CC1
449    rjmp 3f
450
4511:  ;; If bit n of A is set, then add  B * 2^n  to the result in CC
452    ;; CC += B
453    add  CC0,B0  $  adc  CC1,B1  $  adc  CC2,B2  $  adc  CC3,B3
454
4552:  ;; B <<= 1
456    lsl  B0      $  rol  B1      $  rol  B2      $  rol  B3
457
4583:  ;; A >>= 1:  Carry = n-th bit of A
459    lsr  A3      $  ror  A2      $  ror  A1      $  ror  A0
460
461    brcs 1b
462    ;; Only continue if  A != 0
463    sbci    A1, 0
464    brne 2b
465    wsubi   A2, 0
466    brne 2b
467
468    ;; All bits of A are consumed:  Copy result to return register C
469    wmov    C0, CC0
470    wmov    C2, CC2
471#if defined (__AVR_TINY__)
472    pop     B1      ; restore callee saved regs
473    pop     B0
474#endif  /* defined (__AVR_TINY__) */
475
476    ret
477ENDF __mulsi3_helper
478#endif /* L_mulsi3 */
479
480#undef A0
481#undef A1
482#undef A2
483#undef A3
484#undef B0
485#undef B1
486#undef B2
487#undef B3
488#undef C0
489#undef C1
490#undef C2
491#undef C3
492#undef CC0
493#undef CC1
494#undef CC2
495#undef CC3
496
497#endif /* !defined (__AVR_HAVE_MUL__) */
498;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
499
500;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
501#if defined (__AVR_HAVE_MUL__)
502#define A0 26
503#define B0 18
504#define C0 22
505
506#define A1 A0+1
507
508#define B1 B0+1
509#define B2 B0+2
510#define B3 B0+3
511
512#define C1 C0+1
513#define C2 C0+2
514#define C3 C0+3
515
516/*******************************************************
517    Widening Multiplication  32 = 16 x 16  with MUL
518*******************************************************/
519
520#if defined (L_mulhisi3)
521;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
522;;; C3:C0   = (signed long) A1:A0   * (signed long) B1:B0
523;;; Clobbers: __tmp_reg__
524DEFUN __mulhisi3
525    XCALL   __umulhisi3
526    ;; Sign-extend B
527    tst     B1
528    brpl    1f
529    sub     C2, A0
530    sbc     C3, A1
5311:  ;; Sign-extend A
532    XJMP __usmulhisi3_tail
533ENDF __mulhisi3
534#endif /* L_mulhisi3 */
535
536#if defined (L_usmulhisi3)
537;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
538;;; C3:C0   = (signed long) A1:A0   * (unsigned long) B1:B0
539;;; Clobbers: __tmp_reg__
540DEFUN __usmulhisi3
541    XCALL   __umulhisi3
542    ;; FALLTHRU
543ENDF __usmulhisi3
544
545DEFUN __usmulhisi3_tail
546    ;; Sign-extend A
547    sbrs    A1, 7
548    ret
549    sub     C2, B0
550    sbc     C3, B1
551    ret
552ENDF __usmulhisi3_tail
553#endif /* L_usmulhisi3 */
554
555#if defined (L_umulhisi3)
556;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
557;;; C3:C0   = (unsigned long) A1:A0   * (unsigned long) B1:B0
558;;; Clobbers: __tmp_reg__
559DEFUN __umulhisi3
560    mul     A0, B0
561    movw    C0, r0
562    mul     A1, B1
563    movw    C2, r0
564    mul     A0, B1
565#ifdef __AVR_HAVE_JMP_CALL__
566    ;; This function is used by many other routines, often multiple times.
567    ;; Therefore, if the flash size is not too limited, avoid the RCALL
568    ;; and inverst 6 Bytes to speed things up.
569    add     C1, r0
570    adc     C2, r1
571    clr     __zero_reg__
572    adc     C3, __zero_reg__
573#else
574    rcall   1f
575#endif
576    mul     A1, B0
5771:  add     C1, r0
578    adc     C2, r1
579    clr     __zero_reg__
580    adc     C3, __zero_reg__
581    ret
582ENDF __umulhisi3
583#endif /* L_umulhisi3 */
584
585/*******************************************************
586    Widening Multiplication  32 = 16 x 32  with MUL
587*******************************************************/
588
589#if defined (L_mulshisi3)
590;;; R25:R22 = (signed long) R27:R26 * R21:R18
591;;; (C3:C0) = (signed long) A1:A0   * B3:B0
592;;; Clobbers: __tmp_reg__
593DEFUN __mulshisi3
594#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
595    ;; Some cores have problem skipping 2-word instruction
596    tst     A1
597    brmi    __mulohisi3
598#else
599    sbrs    A1, 7
600#endif /* __AVR_HAVE_JMP_CALL__ */
601    XJMP    __muluhisi3
602    ;; FALLTHRU
603ENDF __mulshisi3
604
605;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
606;;; (C3:C0) = (one-extended long) A1:A0   * B3:B0
607;;; Clobbers: __tmp_reg__
608DEFUN __mulohisi3
609    XCALL   __muluhisi3
610    ;; One-extend R27:R26 (A1:A0)
611    sub     C2, B0
612    sbc     C3, B1
613    ret
614ENDF __mulohisi3
615#endif /* L_mulshisi3 */
616
617#if defined (L_muluhisi3)
618;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
619;;; (C3:C0) = (unsigned long) A1:A0   * B3:B0
620;;; Clobbers: __tmp_reg__
621DEFUN __muluhisi3
622    XCALL   __umulhisi3
623    mul     A0, B3
624    add     C3, r0
625    mul     A1, B2
626    add     C3, r0
627    mul     A0, B2
628    add     C2, r0
629    adc     C3, r1
630    clr     __zero_reg__
631    ret
632ENDF __muluhisi3
633#endif /* L_muluhisi3 */
634
635/*******************************************************
636    Multiplication  32 x 32  with MUL
637*******************************************************/
638
639#if defined (L_mulsi3)
640;;; R25:R22 = R25:R22 * R21:R18
641;;; (C3:C0) = C3:C0   * B3:B0
642;;; Clobbers: R26, R27, __tmp_reg__
643DEFUN __mulsi3
644    movw    A0, C0
645    push    C2
646    push    C3
647    XCALL   __muluhisi3
648    pop     A1
649    pop     A0
650    ;; A1:A0 now contains the high word of A
651    mul     A0, B0
652    add     C2, r0
653    adc     C3, r1
654    mul     A0, B1
655    add     C3, r0
656    mul     A1, B0
657    add     C3, r0
658    clr     __zero_reg__
659    ret
660ENDF __mulsi3
661#endif /* L_mulsi3 */
662
663#undef A0
664#undef A1
665
666#undef B0
667#undef B1
668#undef B2
669#undef B3
670
671#undef C0
672#undef C1
673#undef C2
674#undef C3
675
676#endif /* __AVR_HAVE_MUL__ */
677
678/*******************************************************
679       Multiplication 24 x 24 with MUL
680*******************************************************/
681
682#if defined (L_mulpsi3)
683
684;; A[0..2]: In: Multiplicand; Out: Product
685#define A0  22
686#define A1  A0+1
687#define A2  A0+2
688
689;; B[0..2]: In: Multiplier
690#define B0  18
691#define B1  B0+1
692#define B2  B0+2
693
694#if defined (__AVR_HAVE_MUL__)
695
696;; C[0..2]: Expand Result
697#define C0  22
698#define C1  C0+1
699#define C2  C0+2
700
701;; R24:R22 *= R20:R18
702;; Clobbers: r21, r25, r26, r27, __tmp_reg__
703
704#define AA0 26
705#define AA2 21
706
707DEFUN __mulpsi3
708    wmov    AA0, A0
709    mov     AA2, A2
710    XCALL   __umulhisi3
711    mul     AA2, B0     $  add  C2, r0
712    mul     AA0, B2     $  add  C2, r0
713    clr     __zero_reg__
714    ret
715ENDF __mulpsi3
716
717#undef AA2
718#undef AA0
719
720#undef C2
721#undef C1
722#undef C0
723
724#else /* !HAVE_MUL */
725;; C[0..2]: Expand Result
726#if defined (__AVR_TINY__)
727#define C0  16
728#else
729#define C0  0
730#endif /* defined (__AVR_TINY__) */
731#define C1  C0+1
732#define C2  21
733
734;; R24:R22 *= R20:R18
735;; Clobbers: __tmp_reg__, R18, R19, R20, R21
736
737DEFUN __mulpsi3
738#if defined (__AVR_TINY__)
739    in r26,__SP_L__
740    in r27,__SP_H__
741    subi r26, lo8(-3)   ; Add 3 to point past return address
742    sbci r27, hi8(-3)
743    push B0    ; save callee saved regs
744    push B1
745    ld B0,X+   ; load from caller stack
746    ld B1,X+
747    ld B2,X+
748#endif /* defined (__AVR_TINY__) */
749
750    ;; C[] = 0
751    clr     __tmp_reg__
752    clr     C2
753
7540:  ;; Shift N-th Bit of B[] into Carry.  N = 24 - Loop
755    LSR  B2     $  ror  B1     $  ror  B0
756
757    ;; If the N-th Bit of B[] was set...
758    brcc    1f
759
760    ;; ...then add A[] * 2^N to the Result C[]
761    ADD  C0,A0  $  adc  C1,A1  $  adc  C2,A2
762
7631:  ;; Multiply A[] by 2
764    LSL  A0     $  rol  A1     $  rol  A2
765
766    ;; Loop until B[] is 0
767    subi B0,0   $  sbci B1,0   $  sbci B2,0
768    brne    0b
769
770    ;; Copy C[] to the return Register A[]
771    wmov    A0, C0
772    mov     A2, C2
773
774    clr     __zero_reg__
775#if defined (__AVR_TINY__)
776    pop B1
777    pop B0
778#endif /* (__AVR_TINY__) */
779    ret
780ENDF __mulpsi3
781
782#undef C2
783#undef C1
784#undef C0
785
786#endif /* HAVE_MUL */
787
788#undef B2
789#undef B1
790#undef B0
791
792#undef A2
793#undef A1
794#undef A0
795
796#endif /* L_mulpsi3 */
797
798#if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__)
799
800;; A[0..2]: In: Multiplicand
801#define A0  22
802#define A1  A0+1
803#define A2  A0+2
804
805;; BB: In: Multiplier
806#define BB  25
807
808;; C[0..2]: Result
809#define C0  18
810#define C1  C0+1
811#define C2  C0+2
812
813;; C[] = A[] * sign_extend (BB)
814DEFUN __mulsqipsi3
815    mul     A0, BB
816    movw    C0, r0
817    mul     A2, BB
818    mov     C2, r0
819    mul     A1, BB
820    add     C1, r0
821    adc     C2, r1
822    clr     __zero_reg__
823    sbrs    BB, 7
824    ret
825    ;; One-extend BB
826    sub     C1, A0
827    sbc     C2, A1
828    ret
829ENDF __mulsqipsi3
830
831#undef C2
832#undef C1
833#undef C0
834
835#undef BB
836
837#undef A2
838#undef A1
839#undef A0
840
841#endif /* L_mulsqipsi3  &&  HAVE_MUL */
842
843/*******************************************************
844       Multiplication 64 x 64
845*******************************************************/
846
847;; A[] = A[] * B[]
848
849;; A[0..7]: In: Multiplicand
850;; Out: Product
851#define A0  18
852#define A1  A0+1
853#define A2  A0+2
854#define A3  A0+3
855#define A4  A0+4
856#define A5  A0+5
857#define A6  A0+6
858#define A7  A0+7
859
860;; B[0..7]: In: Multiplier
861#define B0  10
862#define B1  B0+1
863#define B2  B0+2
864#define B3  B0+3
865#define B4  B0+4
866#define B5  B0+5
867#define B6  B0+6
868#define B7  B0+7
869
870#ifndef __AVR_TINY__
871#if defined (__AVR_HAVE_MUL__)
872;; Define C[] for convenience
873;; Notice that parts of C[] overlap A[] respective B[]
874#define C0  16
875#define C1  C0+1
876#define C2  20
877#define C3  C2+1
878#define C4  28
879#define C5  C4+1
880#define C6  C4+2
881#define C7  C4+3
882
883#if defined (L_muldi3)
884
885;; A[]     *= B[]
886;; R25:R18 *= R17:R10
887;; Ordinary ABI-Function
888
889DEFUN __muldi3
890    push    r29
891    push    r28
892    push    r17
893    push    r16
894
895    ;; Counting in Words, we have to perform a 4 * 4 Multiplication
896
897    ;; 3 * 0  +  0 * 3
898    mul  A7,B0  $             $  mov C7,r0
899    mul  A0,B7  $             $  add C7,r0
900    mul  A6,B1  $             $  add C7,r0
901    mul  A6,B0  $  mov C6,r0  $  add C7,r1
902    mul  B6,A1  $             $  add C7,r0
903    mul  B6,A0  $  add C6,r0  $  adc C7,r1
904
905    ;; 1 * 2
906    mul  A2,B4  $  add C6,r0  $  adc C7,r1
907    mul  A3,B4  $             $  add C7,r0
908    mul  A2,B5  $             $  add C7,r0
909
910    push    A5
911    push    A4
912    push    B1
913    push    B0
914    push    A3
915    push    A2
916
917    ;; 0 * 0
918    wmov    26, B0
919    XCALL   __umulhisi3
920    wmov    C0, 22
921    wmov    C2, 24
922
923    ;; 0 * 2
924    wmov    26, B4
925    XCALL   __umulhisi3  $  wmov C4,22            $ add C6,24 $ adc C7,25
926
927    wmov    26, B2
928    ;; 0 * 1
929    XCALL   __muldi3_6
930
931    pop     A0
932    pop     A1
933    ;; 1 * 1
934    wmov    26, B2
935    XCALL   __umulhisi3  $  add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
936
937    pop     r26
938    pop     r27
939    ;; 1 * 0
940    XCALL   __muldi3_6
941
942    pop     A0
943    pop     A1
944    ;; 2 * 0
945    XCALL   __umulhisi3  $  add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
946
947    ;; 2 * 1
948    wmov    26, B2
949    XCALL   __umulhisi3  $            $           $ add C6,22 $ adc C7,23
950
951    ;; A[] = C[]
952    wmov    A0, C0
953    ;; A2 = C2 already
954    wmov    A4, C4
955    wmov    A6, C6
956
957    pop     r16
958    pop     r17
959    pop     r28
960    pop     r29
961    ret
962ENDF __muldi3
963#endif /* L_muldi3 */
964
965#if defined (L_muldi3_6)
966;; A helper for some 64-bit multiplications with MUL available
967DEFUN __muldi3_6
968__muldi3_6:
969    XCALL   __umulhisi3
970    add     C2, 22
971    adc     C3, 23
972    adc     C4, 24
973    adc     C5, 25
974    brcc    0f
975    adiw    C6, 1
9760:  ret
977ENDF __muldi3_6
978#endif /* L_muldi3_6 */
979
980#undef C7
981#undef C6
982#undef C5
983#undef C4
984#undef C3
985#undef C2
986#undef C1
987#undef C0
988
989#else /* !HAVE_MUL */
990
991#if defined (L_muldi3)
992
993#define C0  26
994#define C1  C0+1
995#define C2  C0+2
996#define C3  C0+3
997#define C4  C0+4
998#define C5  C0+5
999#define C6  0
1000#define C7  C6+1
1001
1002#define Loop 9
1003
1004;; A[]     *= B[]
1005;; R25:R18 *= R17:R10
1006;; Ordinary ABI-Function
1007
1008DEFUN __muldi3
1009    push    r29
1010    push    r28
1011    push    Loop
1012
1013    ldi     C0, 64
1014    mov     Loop, C0
1015
1016    ;; C[] = 0
1017    clr     __tmp_reg__
1018    wmov    C0, 0
1019    wmov    C2, 0
1020    wmov    C4, 0
1021
10220:  ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[]
1023    ;; where N = 64 - Loop.
1024    ;; Notice that B[] = B[] >>> 64 so after this Routine has finished,
1025    ;; B[] will have its initial Value again.
1026    LSR  B7     $  ror  B6     $  ror  B5     $  ror  B4
1027    ror  B3     $  ror  B2     $  ror  B1     $  ror  B0
1028
1029    ;; If the N-th Bit of B[] was set then...
1030    brcc    1f
1031    ;; ...finish Rotation...
1032    ori     B7, 1 << 7
1033
1034    ;; ...and add A[] * 2^N to the Result C[]
1035    ADD  C0,A0  $  adc  C1,A1  $  adc  C2,A2  $  adc  C3,A3
1036    adc  C4,A4  $  adc  C5,A5  $  adc  C6,A6  $  adc  C7,A7
1037
10381:  ;; Multiply A[] by 2
1039    LSL  A0     $  rol  A1     $  rol  A2     $  rol  A3
1040    rol  A4     $  rol  A5     $  rol  A6     $  rol  A7
1041
1042    dec     Loop
1043    brne    0b
1044
1045    ;; We expanded the Result in C[]
1046    ;; Copy Result to the Return Register A[]
1047    wmov    A0, C0
1048    wmov    A2, C2
1049    wmov    A4, C4
1050    wmov    A6, C6
1051
1052    clr     __zero_reg__
1053    pop     Loop
1054    pop     r28
1055    pop     r29
1056    ret
1057ENDF __muldi3
1058
1059#undef Loop
1060
1061#undef C7
1062#undef C6
1063#undef C5
1064#undef C4
1065#undef C3
1066#undef C2
1067#undef C1
1068#undef C0
1069
1070#endif /* L_muldi3 */
1071#endif /* HAVE_MUL */
1072#endif /* if not __AVR_TINY__ */
1073
1074#undef B7
1075#undef B6
1076#undef B5
1077#undef B4
1078#undef B3
1079#undef B2
1080#undef B1
1081#undef B0
1082
1083#undef A7
1084#undef A6
1085#undef A5
1086#undef A4
1087#undef A3
1088#undef A2
1089#undef A1
1090#undef A0
1091
1092/*******************************************************
1093   Widening Multiplication 64 = 32 x 32  with  MUL
1094*******************************************************/
1095
1096#if defined (__AVR_HAVE_MUL__)
1097#define A0 r22
1098#define A1 r23
1099#define A2 r24
1100#define A3 r25
1101
1102#define B0 r18
1103#define B1 r19
1104#define B2 r20
1105#define B3 r21
1106
1107#define C0  18
1108#define C1  C0+1
1109#define C2  20
1110#define C3  C2+1
1111#define C4  28
1112#define C5  C4+1
1113#define C6  C4+2
1114#define C7  C4+3
1115
1116#if defined (L_umulsidi3)
1117
1118;; Unsigned widening 64 = 32 * 32 Multiplication with MUL
1119
1120;; R18[8] = R22[4] * R18[4]
1121;;
1122;; Ordinary ABI Function, but additionally sets
1123;; X = R20[2] = B2[2]
1124;; Z = R22[2] = A0[2]
1125DEFUN __umulsidi3
1126    clt
1127    ;; FALLTHRU
1128ENDF  __umulsidi3
1129    ;; T = sign (A)
1130DEFUN __umulsidi3_helper
1131    push    29  $  push    28 ; Y
1132    wmov    30, A2
1133    ;; Counting in Words, we have to perform 4 Multiplications
1134    ;; 0 * 0
1135    wmov    26, A0
1136    XCALL __umulhisi3
1137    push    23  $  push    22 ; C0
1138    wmov    28, B0
1139    wmov    18, B2
1140    wmov    C2, 24
1141    push    27  $  push    26 ; A0
1142    push    19  $  push    18 ; B2
1143    ;;
1144    ;;  18  20  22  24  26  28  30  |  B2, B3, A0, A1, C0, C1, Y
1145    ;;  B2  C2  --  --  --  B0  A2
1146    ;; 1 * 1
1147    wmov    26, 30      ; A2
1148    XCALL __umulhisi3
1149    ;; Sign-extend A.  T holds the sign of A
1150    brtc    0f
1151    ;; Subtract B from the high part of the result
1152    sub     22, 28
1153    sbc     23, 29
1154    sbc     24, 18
1155    sbc     25, 19
11560:  wmov    18, 28      ;; B0
1157    wmov    C4, 22
1158    wmov    C6, 24
1159    ;;
1160    ;;  18  20  22  24  26  28  30  |  B2, B3, A0, A1, C0, C1, Y
1161    ;;  B0  C2  --  --  A2  C4  C6
1162    ;;
1163    ;; 1 * 0
1164    XCALL __muldi3_6
1165    ;; 0 * 1
1166    pop     26  $   pop 27  ;; B2
1167    pop     18  $   pop 19  ;; A0
1168    XCALL __muldi3_6
1169
1170    ;; Move result C into place and save A0 in Z
1171    wmov    22, C4
1172    wmov    24, C6
1173    wmov    30, 18 ; A0
1174    pop     C0  $   pop C1
1175
1176    ;; Epilogue
1177    pop     28  $   pop 29  ;; Y
1178    ret
1179ENDF __umulsidi3_helper
1180#endif /* L_umulsidi3 */
1181
1182
1183#if defined (L_mulsidi3)
1184
1185;; Signed widening 64 = 32 * 32 Multiplication
1186;;
1187;; R18[8] = R22[4] * R18[4]
1188;; Ordinary ABI Function
1189DEFUN __mulsidi3
1190    bst     A3, 7
1191    sbrs    B3, 7           ; Enhanced core has no skip bug
1192    XJMP __umulsidi3_helper
1193
1194    ;; B needs sign-extension
1195    push    A3
1196    push    A2
1197    XCALL __umulsidi3_helper
1198    ;; A0 survived in Z
1199    sub     r22, r30
1200    sbc     r23, r31
1201    pop     r26
1202    pop     r27
1203    sbc     r24, r26
1204    sbc     r25, r27
1205    ret
1206ENDF __mulsidi3
1207#endif /* L_mulsidi3 */
1208
1209#undef A0
1210#undef A1
1211#undef A2
1212#undef A3
1213#undef B0
1214#undef B1
1215#undef B2
1216#undef B3
1217#undef C0
1218#undef C1
1219#undef C2
1220#undef C3
1221#undef C4
1222#undef C5
1223#undef C6
1224#undef C7
1225#endif /* HAVE_MUL */
1226
1227/**********************************************************
1228    Widening Multiplication 64 = 32 x 32  without  MUL
1229**********************************************************/
1230#ifndef __AVR_TINY__ /* if not __AVR_TINY__ */
1231#if defined (L_mulsidi3) && !defined (__AVR_HAVE_MUL__)
1232#define A0 18
1233#define A1 A0+1
1234#define A2 A0+2
1235#define A3 A0+3
1236#define A4 A0+4
1237#define A5 A0+5
1238#define A6 A0+6
1239#define A7 A0+7
1240
1241#define B0 10
1242#define B1 B0+1
1243#define B2 B0+2
1244#define B3 B0+3
1245#define B4 B0+4
1246#define B5 B0+5
1247#define B6 B0+6
1248#define B7 B0+7
1249
1250#define AA0 22
1251#define AA1 AA0+1
1252#define AA2 AA0+2
1253#define AA3 AA0+3
1254
1255#define BB0 18
1256#define BB1 BB0+1
1257#define BB2 BB0+2
1258#define BB3 BB0+3
1259
1260#define Mask r30
1261
1262;; Signed / Unsigned widening 64 = 32 * 32 Multiplication without MUL
1263;;
1264;; R18[8] = R22[4] * R18[4]
1265;; Ordinary ABI Function
1266DEFUN __mulsidi3
1267    set
1268    skip
1269    ;; FALLTHRU
1270ENDF  __mulsidi3
1271
1272DEFUN __umulsidi3
1273    clt     ; skipped
1274    ;; Save 10 Registers: R10..R17, R28, R29
1275    do_prologue_saves 10
1276    ldi     Mask, 0xff
1277    bld     Mask, 7
1278    ;; Move B into place...
1279    wmov    B0, BB0
1280    wmov    B2, BB2
1281    ;; ...and extend it
1282    and     BB3, Mask
1283    lsl     BB3
1284    sbc     B4, B4
1285    mov     B5, B4
1286    wmov    B6, B4
1287    ;; Move A into place...
1288    wmov    A0, AA0
1289    wmov    A2, AA2
1290    ;; ...and extend it
1291    and     AA3, Mask
1292    lsl     AA3
1293    sbc     A4, A4
1294    mov     A5, A4
1295    wmov    A6, A4
1296    XCALL   __muldi3
1297    do_epilogue_restores 10
1298ENDF __umulsidi3
1299
1300#undef A0
1301#undef A1
1302#undef A2
1303#undef A3
1304#undef A4
1305#undef A5
1306#undef A6
1307#undef A7
1308#undef B0
1309#undef B1
1310#undef B2
1311#undef B3
1312#undef B4
1313#undef B5
1314#undef B6
1315#undef B7
1316#undef AA0
1317#undef AA1
1318#undef AA2
1319#undef AA3
1320#undef BB0
1321#undef BB1
1322#undef BB2
1323#undef BB3
1324#undef Mask
1325#endif /* L_mulsidi3 && !HAVE_MUL */
1326#endif /* if not __AVR_TINY__ */
1327;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1328
1329
1330.section .text.libgcc.div, "ax", @progbits
1331
1332/*******************************************************
1333       Division 8 / 8 => (result + remainder)
1334*******************************************************/
1335#define	r_rem	r25	/* remainder */
1336#define	r_arg1	r24	/* dividend, quotient */
1337#define	r_arg2	r22	/* divisor */
1338#define	r_cnt	r23	/* loop count */
1339
1340#if defined (L_udivmodqi4)
1341DEFUN __udivmodqi4
1342	sub	r_rem,r_rem	; clear remainder and carry
1343	ldi	r_cnt,9		; init loop counter
1344	rjmp	__udivmodqi4_ep	; jump to entry point
1345__udivmodqi4_loop:
1346	rol	r_rem		; shift dividend into remainder
1347	cp	r_rem,r_arg2	; compare remainder & divisor
1348	brcs	__udivmodqi4_ep	; remainder <= divisor
1349	sub	r_rem,r_arg2	; restore remainder
1350__udivmodqi4_ep:
1351	rol	r_arg1		; shift dividend (with CARRY)
1352	dec	r_cnt		; decrement loop counter
1353	brne	__udivmodqi4_loop
1354	com	r_arg1		; complement result
1355				; because C flag was complemented in loop
1356	ret
1357ENDF __udivmodqi4
1358#endif /* defined (L_udivmodqi4) */
1359
1360#if defined (L_divmodqi4)
1361DEFUN __divmodqi4
1362        bst     r_arg1,7	; store sign of dividend
1363        mov     __tmp_reg__,r_arg1
1364        eor     __tmp_reg__,r_arg2; r0.7 is sign of result
1365        sbrc	r_arg1,7
1366	neg     r_arg1		; dividend negative : negate
1367        sbrc	r_arg2,7
1368	neg     r_arg2		; divisor negative : negate
1369	XCALL	__udivmodqi4	; do the unsigned div/mod
1370	brtc	__divmodqi4_1
1371	neg	r_rem		; correct remainder sign
1372__divmodqi4_1:
1373	sbrc	__tmp_reg__,7
1374	neg	r_arg1		; correct result sign
1375__divmodqi4_exit:
1376	ret
1377ENDF __divmodqi4
1378#endif /* defined (L_divmodqi4) */
1379
1380#undef r_rem
1381#undef r_arg1
1382#undef r_arg2
1383#undef r_cnt
1384
1385
1386/*******************************************************
1387       Division 16 / 16 => (result + remainder)
1388*******************************************************/
1389#define	r_remL	r26	/* remainder Low */
1390#define	r_remH	r27	/* remainder High */
1391
1392/* return: remainder */
1393#define	r_arg1L	r24	/* dividend Low */
1394#define	r_arg1H	r25	/* dividend High */
1395
1396/* return: quotient */
1397#define	r_arg2L	r22	/* divisor Low */
1398#define	r_arg2H	r23	/* divisor High */
1399
1400#define	r_cnt	r21	/* loop count */
1401
1402#if defined (L_udivmodhi4)
1403DEFUN __udivmodhi4
1404	sub	r_remL,r_remL
1405	sub	r_remH,r_remH	; clear remainder and carry
1406	ldi	r_cnt,17	; init loop counter
1407	rjmp	__udivmodhi4_ep	; jump to entry point
1408__udivmodhi4_loop:
1409        rol	r_remL		; shift dividend into remainder
1410	rol	r_remH
1411        cp	r_remL,r_arg2L	; compare remainder & divisor
1412	cpc	r_remH,r_arg2H
1413        brcs	__udivmodhi4_ep	; remainder < divisor
1414        sub	r_remL,r_arg2L	; restore remainder
1415        sbc	r_remH,r_arg2H
1416__udivmodhi4_ep:
1417        rol	r_arg1L		; shift dividend (with CARRY)
1418        rol	r_arg1H
1419        dec	r_cnt		; decrement loop counter
1420        brne	__udivmodhi4_loop
1421	com	r_arg1L
1422	com	r_arg1H
1423; div/mod results to return registers, as for the div() function
1424	mov_l	r_arg2L, r_arg1L	; quotient
1425	mov_h	r_arg2H, r_arg1H
1426	mov_l	r_arg1L, r_remL		; remainder
1427	mov_h	r_arg1H, r_remH
1428	ret
1429ENDF __udivmodhi4
1430#endif /* defined (L_udivmodhi4) */
1431
1432#if defined (L_divmodhi4)
1433DEFUN __divmodhi4
1434    .global _div
1435_div:
1436    bst     r_arg1H,7           ; store sign of dividend
1437    mov     __tmp_reg__,r_arg2H
1438    brtc    0f
1439    com     __tmp_reg__         ; r0.7 is sign of result
1440    rcall   __divmodhi4_neg1    ; dividend negative: negate
14410:
1442    sbrc    r_arg2H,7
1443    rcall   __divmodhi4_neg2    ; divisor negative: negate
1444    XCALL   __udivmodhi4        ; do the unsigned div/mod
1445    sbrc    __tmp_reg__,7
1446    rcall   __divmodhi4_neg2    ; correct remainder sign
1447    brtc    __divmodhi4_exit
1448__divmodhi4_neg1:
1449    ;; correct dividend/remainder sign
1450    com     r_arg1H
1451    neg     r_arg1L
1452    sbci    r_arg1H,0xff
1453    ret
1454__divmodhi4_neg2:
1455    ;; correct divisor/result sign
1456    com     r_arg2H
1457    neg     r_arg2L
1458    sbci    r_arg2H,0xff
1459__divmodhi4_exit:
1460    ret
1461ENDF __divmodhi4
1462#endif /* defined (L_divmodhi4) */
1463
1464#undef r_remH
1465#undef r_remL
1466
1467#undef r_arg1H
1468#undef r_arg1L
1469
1470#undef r_arg2H
1471#undef r_arg2L
1472
1473#undef r_cnt
1474
1475/*******************************************************
1476       Division 24 / 24 => (result + remainder)
1477*******************************************************/
1478
1479;; A[0..2]: In: Dividend; Out: Quotient
1480#define A0  22
1481#define A1  A0+1
1482#define A2  A0+2
1483
1484;; B[0..2]: In: Divisor;   Out: Remainder
1485#define B0  18
1486#define B1  B0+1
1487#define B2  B0+2
1488
1489;; C[0..2]: Expand remainder
1490#define C0  __zero_reg__
1491#define C1  26
1492#define C2  25
1493
1494;; Loop counter
1495#define r_cnt   21
1496
1497#if defined (L_udivmodpsi4)
1498;; R24:R22 = R24:R24  udiv  R20:R18
1499;; R20:R18 = R24:R22  umod  R20:R18
1500;; Clobbers: R21, R25, R26
1501
1502DEFUN __udivmodpsi4
1503    ; init loop counter
1504    ldi     r_cnt, 24+1
1505    ; Clear remainder and carry.  C0 is already 0
1506    clr     C1
1507    sub     C2, C2
1508    ; jump to entry point
1509    rjmp    __udivmodpsi4_start
1510__udivmodpsi4_loop:
1511    ; shift dividend into remainder
1512    rol     C0
1513    rol     C1
1514    rol     C2
1515    ; compare remainder & divisor
1516    cp      C0, B0
1517    cpc     C1, B1
1518    cpc     C2, B2
1519    brcs    __udivmodpsi4_start ; remainder <= divisor
1520    sub     C0, B0              ; restore remainder
1521    sbc     C1, B1
1522    sbc     C2, B2
1523__udivmodpsi4_start:
1524    ; shift dividend (with CARRY)
1525    rol     A0
1526    rol     A1
1527    rol     A2
1528    ; decrement loop counter
1529    dec     r_cnt
1530    brne    __udivmodpsi4_loop
1531    com     A0
1532    com     A1
1533    com     A2
1534    ; div/mod results to return registers
1535    ; remainder
1536    mov     B0, C0
1537    mov     B1, C1
1538    mov     B2, C2
1539    clr     __zero_reg__ ; C0
1540    ret
1541ENDF __udivmodpsi4
1542#endif /* defined (L_udivmodpsi4) */
1543
1544#if defined (L_divmodpsi4)
1545;; R24:R22 = R24:R22  div  R20:R18
1546;; R20:R18 = R24:R22  mod  R20:R18
1547;; Clobbers: T, __tmp_reg__, R21, R25, R26
1548
1549DEFUN __divmodpsi4
1550    ; R0.7 will contain the sign of the result:
1551    ; R0.7 = A.sign ^ B.sign
1552    mov __tmp_reg__, B2
1553    ; T-flag = sign of dividend
1554    bst     A2, 7
1555    brtc    0f
1556    com     __tmp_reg__
1557    ; Adjust dividend's sign
1558    rcall   __divmodpsi4_negA
15590:
1560    ; Adjust divisor's sign
1561    sbrc    B2, 7
1562    rcall   __divmodpsi4_negB
1563
1564    ; Do the unsigned div/mod
1565    XCALL   __udivmodpsi4
1566
1567    ; Adjust quotient's sign
1568    sbrc    __tmp_reg__, 7
1569    rcall   __divmodpsi4_negA
1570
1571    ; Adjust remainder's sign
1572    brtc    __divmodpsi4_end
1573
1574__divmodpsi4_negB:
1575    ; Correct divisor/remainder sign
1576    com     B2
1577    com     B1
1578    neg     B0
1579    sbci    B1, -1
1580    sbci    B2, -1
1581    ret
1582
1583    ; Correct dividend/quotient sign
1584__divmodpsi4_negA:
1585    com     A2
1586    com     A1
1587    neg     A0
1588    sbci    A1, -1
1589    sbci    A2, -1
1590__divmodpsi4_end:
1591    ret
1592
1593ENDF __divmodpsi4
1594#endif /* defined (L_divmodpsi4) */
1595
1596#undef A0
1597#undef A1
1598#undef A2
1599
1600#undef B0
1601#undef B1
1602#undef B2
1603
1604#undef C0
1605#undef C1
1606#undef C2
1607
1608#undef r_cnt
1609
1610/*******************************************************
1611       Division 32 / 32 => (result + remainder)
1612*******************************************************/
1613#define	r_remHH	r31	/* remainder High */
1614#define	r_remHL	r30
1615#define	r_remH	r27
1616#define	r_remL	r26	/* remainder Low */
1617
1618/* return: remainder */
1619#define	r_arg1HH r25	/* dividend High */
1620#define	r_arg1HL r24
1621#define	r_arg1H  r23
1622#define	r_arg1L  r22	/* dividend Low */
1623
1624/* return: quotient */
1625#define	r_arg2HH r21	/* divisor High */
1626#define	r_arg2HL r20
1627#define	r_arg2H  r19
1628#define	r_arg2L  r18	/* divisor Low */
1629
1630#define	r_cnt __zero_reg__  /* loop count (0 after the loop!) */
1631
1632#if defined (L_udivmodsi4)
1633DEFUN __udivmodsi4
1634	ldi	r_remL, 33	; init loop counter
1635	mov	r_cnt, r_remL
1636	sub	r_remL,r_remL
1637	sub	r_remH,r_remH	; clear remainder and carry
1638	mov_l	r_remHL, r_remL
1639	mov_h	r_remHH, r_remH
1640	rjmp	__udivmodsi4_ep	; jump to entry point
1641__udivmodsi4_loop:
1642        rol	r_remL		; shift dividend into remainder
1643	rol	r_remH
1644	rol	r_remHL
1645	rol	r_remHH
1646        cp	r_remL,r_arg2L	; compare remainder & divisor
1647	cpc	r_remH,r_arg2H
1648	cpc	r_remHL,r_arg2HL
1649	cpc	r_remHH,r_arg2HH
1650	brcs	__udivmodsi4_ep	; remainder <= divisor
1651        sub	r_remL,r_arg2L	; restore remainder
1652        sbc	r_remH,r_arg2H
1653        sbc	r_remHL,r_arg2HL
1654        sbc	r_remHH,r_arg2HH
1655__udivmodsi4_ep:
1656        rol	r_arg1L		; shift dividend (with CARRY)
1657        rol	r_arg1H
1658        rol	r_arg1HL
1659        rol	r_arg1HH
1660        dec	r_cnt		; decrement loop counter
1661        brne	__udivmodsi4_loop
1662				; __zero_reg__ now restored (r_cnt == 0)
1663	com	r_arg1L
1664	com	r_arg1H
1665	com	r_arg1HL
1666	com	r_arg1HH
1667; div/mod results to return registers, as for the ldiv() function
1668	mov_l	r_arg2L,  r_arg1L	; quotient
1669	mov_h	r_arg2H,  r_arg1H
1670	mov_l	r_arg2HL, r_arg1HL
1671	mov_h	r_arg2HH, r_arg1HH
1672	mov_l	r_arg1L,  r_remL	; remainder
1673	mov_h	r_arg1H,  r_remH
1674	mov_l	r_arg1HL, r_remHL
1675	mov_h	r_arg1HH, r_remHH
1676	ret
1677ENDF __udivmodsi4
1678#endif /* defined (L_udivmodsi4) */
1679
1680#if defined (L_divmodsi4)
1681DEFUN __divmodsi4
1682    mov     __tmp_reg__,r_arg2HH
1683    bst     r_arg1HH,7          ; store sign of dividend
1684    brtc    0f
1685    com     __tmp_reg__         ; r0.7 is sign of result
1686    XCALL   __negsi2            ; dividend negative: negate
16870:
1688    sbrc    r_arg2HH,7
1689    rcall   __divmodsi4_neg2    ; divisor negative: negate
1690    XCALL   __udivmodsi4        ; do the unsigned div/mod
1691    sbrc    __tmp_reg__, 7      ; correct quotient sign
1692    rcall   __divmodsi4_neg2
1693    brtc    __divmodsi4_exit    ; correct remainder sign
1694    XJMP    __negsi2
1695__divmodsi4_neg2:
1696    ;; correct divisor/quotient sign
1697    com     r_arg2HH
1698    com     r_arg2HL
1699    com     r_arg2H
1700    neg     r_arg2L
1701    sbci    r_arg2H,0xff
1702    sbci    r_arg2HL,0xff
1703    sbci    r_arg2HH,0xff
1704__divmodsi4_exit:
1705    ret
1706ENDF __divmodsi4
1707#endif /* defined (L_divmodsi4) */
1708
1709#if defined (L_negsi2)
1710;; (set (reg:SI 22)
1711;;      (neg:SI (reg:SI 22)))
1712;; Sets the V flag for signed overflow tests
1713DEFUN __negsi2
1714    NEG4    22
1715    ret
1716ENDF __negsi2
1717#endif /* L_negsi2 */
1718
1719#undef r_remHH
1720#undef r_remHL
1721#undef r_remH
1722#undef r_remL
1723#undef r_arg1HH
1724#undef r_arg1HL
1725#undef r_arg1H
1726#undef r_arg1L
1727#undef r_arg2HH
1728#undef r_arg2HL
1729#undef r_arg2H
1730#undef r_arg2L
1731#undef r_cnt
1732
1733/* *di routines use registers below R19 and won't work with tiny arch
1734   right now. */
1735
1736#if !defined (__AVR_TINY__)
1737/*******************************************************
1738       Division 64 / 64
1739       Modulo   64 % 64
1740*******************************************************/
1741
1742;; Use Speed-optimized Version on "big" Devices, i.e. Devices with
1743;; at least 16k of Program Memory.  For smaller Devices, depend
1744;; on MOVW and SP Size.  There is a Connexion between SP Size and
1745;; Flash Size so that SP Size can be used to test for Flash Size.
1746
1747#if defined (__AVR_HAVE_JMP_CALL__)
1748#   define SPEED_DIV 8
1749#elif defined (__AVR_HAVE_MOVW__) && defined (__AVR_HAVE_SPH__)
1750#   define SPEED_DIV 16
1751#else
1752#   define SPEED_DIV 0
1753#endif
1754
1755;; A[0..7]: In: Dividend;
1756;; Out: Quotient  (T = 0)
1757;; Out: Remainder (T = 1)
1758#define A0  18
1759#define A1  A0+1
1760#define A2  A0+2
1761#define A3  A0+3
1762#define A4  A0+4
1763#define A5  A0+5
1764#define A6  A0+6
1765#define A7  A0+7
1766
1767;; B[0..7]: In: Divisor;   Out: Clobber
1768#define B0  10
1769#define B1  B0+1
1770#define B2  B0+2
1771#define B3  B0+3
1772#define B4  B0+4
1773#define B5  B0+5
1774#define B6  B0+6
1775#define B7  B0+7
1776
1777;; C[0..7]: Expand remainder;  Out: Remainder (unused)
1778#define C0  8
1779#define C1  C0+1
1780#define C2  30
1781#define C3  C2+1
1782#define C4  28
1783#define C5  C4+1
1784#define C6  26
1785#define C7  C6+1
1786
1787;; Holds Signs during Division Routine
1788#define SS      __tmp_reg__
1789
1790;; Bit-Counter in Division Routine
1791#define R_cnt   __zero_reg__
1792
1793;; Scratch Register for Negation
1794#define NN      r31
1795
1796#if defined (L_udivdi3)
1797
1798;; R25:R18 = R24:R18  umod  R17:R10
1799;; Ordinary ABI-Function
1800
1801DEFUN __umoddi3
1802    set
1803    rjmp __udivdi3_umoddi3
1804ENDF __umoddi3
1805
1806;; R25:R18 = R24:R18  udiv  R17:R10
1807;; Ordinary ABI-Function
1808
1809DEFUN __udivdi3
1810    clt
1811ENDF __udivdi3
1812
1813DEFUN __udivdi3_umoddi3
1814    push    C0
1815    push    C1
1816    push    C4
1817    push    C5
1818    XCALL   __udivmod64
1819    pop     C5
1820    pop     C4
1821    pop     C1
1822    pop     C0
1823    ret
1824ENDF __udivdi3_umoddi3
1825#endif /* L_udivdi3 */
1826
1827#if defined (L_udivmod64)
1828
1829;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation
1830;; No Registers saved/restored; the Callers will take Care.
1831;; Preserves B[] and T-flag
1832;; T = 0: Compute Quotient  in A[]
1833;; T = 1: Compute Remainder in A[] and shift SS one Bit left
1834
1835DEFUN __udivmod64
1836
1837    ;; Clear Remainder (C6, C7 will follow)
1838    clr     C0
1839    clr     C1
1840    wmov    C2, C0
1841    wmov    C4, C0
1842    ldi     C7, 64
1843
1844#if SPEED_DIV == 0 || SPEED_DIV == 16
1845    ;; Initialize Loop-Counter
1846    mov     R_cnt, C7
1847    wmov    C6, C0
1848#endif /* SPEED_DIV */
1849
1850#if SPEED_DIV == 8
1851
1852    push    A7
1853    clr     C6
1854
18551:  ;; Compare shifted Devidend against Divisor
1856    ;; If -- even after Shifting -- it is smaller...
1857    CP  A7,B0  $  cpc C0,B1  $  cpc C1,B2  $  cpc C2,B3
1858    cpc C3,B4  $  cpc C4,B5  $  cpc C5,B6  $  cpc C6,B7
1859    brcc    2f
1860
1861    ;; ...then we can subtract it.  Thus, it is legal to shift left
1862               $  mov C6,C5  $  mov C5,C4  $  mov C4,C3
1863    mov C3,C2  $  mov C2,C1  $  mov C1,C0  $  mov C0,A7
1864    mov A7,A6  $  mov A6,A5  $  mov A5,A4  $  mov A4,A3
1865    mov A3,A2  $  mov A2,A1  $  mov A1,A0  $  clr A0
1866
1867    ;; 8 Bits are done
1868    subi    C7, 8
1869    brne    1b
1870
1871    ;; Shifted 64 Bits:  A7 has traveled to C7
1872    pop     C7
1873    ;; Divisor is greater than Dividend. We have:
1874    ;; A[] % B[] = A[]
1875    ;; A[] / B[] = 0
1876    ;; Thus, we can return immediately
1877    rjmp    5f
1878
18792:  ;; Initialze Bit-Counter with Number of Bits still to be performed
1880    mov     R_cnt, C7
1881
1882    ;; Push of A7 is not needed because C7 is still 0
1883    pop     C7
1884    clr     C7
1885
1886#elif  SPEED_DIV == 16
1887
1888    ;; Compare shifted Dividend against Divisor
1889    cp      A7, B3
1890    cpc     C0, B4
1891    cpc     C1, B5
1892    cpc     C2, B6
1893    cpc     C3, B7
1894    brcc    2f
1895
1896    ;; Divisor is greater than shifted Dividen: We can shift the Dividend
1897    ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk
1898    wmov  C2,A6  $  wmov C0,A4
1899    wmov  A6,A2  $  wmov A4,A0
1900    wmov  A2,C6  $  wmov A0,C4
1901
1902    ;; Set Bit Counter to 32
1903    lsr     R_cnt
19042:
1905#elif SPEED_DIV
1906#error SPEED_DIV = ?
1907#endif /* SPEED_DIV */
1908
1909;; The very Division + Remainder Routine
1910
19113:  ;; Left-shift Dividend...
1912    lsl A0     $  rol A1     $  rol A2     $  rol A3
1913    rol A4     $  rol A5     $  rol A6     $  rol A7
1914
1915    ;; ...into Remainder
1916    rol C0     $  rol C1     $  rol C2     $  rol C3
1917    rol C4     $  rol C5     $  rol C6     $  rol C7
1918
1919    ;; Compare Remainder and Divisor
1920    CP  C0,B0  $  cpc C1,B1  $  cpc C2,B2  $  cpc C3,B3
1921    cpc C4,B4  $  cpc C5,B5  $  cpc C6,B6  $  cpc C7,B7
1922
1923    brcs 4f
1924
1925    ;; Divisor fits into Remainder:  Subtract it from Remainder...
1926    SUB C0,B0  $  sbc C1,B1  $  sbc C2,B2  $  sbc C3,B3
1927    sbc C4,B4  $  sbc C5,B5  $  sbc C6,B6  $  sbc C7,B7
1928
1929    ;; ...and set according Bit in the upcoming Quotient
1930    ;; The Bit will travel to its final Position
1931    ori A0, 1
1932
19334:  ;; This Bit is done
1934    dec     R_cnt
1935    brne    3b
1936    ;; __zero_reg__ is 0 again
1937
1938    ;; T = 0: We are fine with the Quotient in A[]
1939    ;; T = 1: Copy Remainder to A[]
19405:  brtc    6f
1941    wmov    A0, C0
1942    wmov    A2, C2
1943    wmov    A4, C4
1944    wmov    A6, C6
1945    ;; Move the Sign of the Result to SS.7
1946    lsl     SS
1947
19486:  ret
1949
1950ENDF __udivmod64
1951#endif /* L_udivmod64 */
1952
1953
1954#if defined (L_divdi3)
1955
1956;; R25:R18 = R24:R18  mod  R17:R10
1957;; Ordinary ABI-Function
1958
1959DEFUN __moddi3
1960    set
1961    rjmp    __divdi3_moddi3
1962ENDF __moddi3
1963
1964;; R25:R18 = R24:R18  div  R17:R10
1965;; Ordinary ABI-Function
1966
1967DEFUN __divdi3
1968    clt
1969ENDF __divdi3
1970
1971DEFUN  __divdi3_moddi3
1972#if SPEED_DIV
1973    mov     r31, A7
1974    or      r31, B7
1975    brmi    0f
1976    ;; Both Signs are 0:  the following Complexitiy is not needed
1977    XJMP    __udivdi3_umoddi3
1978#endif /* SPEED_DIV */
1979
19800:  ;; The Prologue
1981    ;; Save 12 Registers:  Y, 17...8
1982    ;; No Frame needed
1983    do_prologue_saves 12
1984
1985    ;; SS.7 will contain the Sign of the Quotient  (A.sign * B.sign)
1986    ;; SS.6 will contain the Sign of the Remainder (A.sign)
1987    mov     SS, A7
1988    asr     SS
1989    ;; Adjust Dividend's Sign as needed
1990#if SPEED_DIV
1991    ;; Compiling for Speed we know that at least one Sign must be < 0
1992    ;; Thus, if A[] >= 0 then we know B[] < 0
1993    brpl    22f
1994#else
1995    brpl    21f
1996#endif /* SPEED_DIV */
1997
1998    XCALL   __negdi2
1999
2000    ;; Adjust Divisor's Sign and SS.7 as needed
200121: tst     B7
2002    brpl    3f
200322: ldi     NN, 1 << 7
2004    eor     SS, NN
2005
2006    ldi NN, -1
2007    com B4     $  com B5     $  com B6     $  com B7
2008               $  com B1     $  com B2     $  com B3
2009    NEG B0
2010               $  sbc B1,NN  $  sbc B2,NN  $  sbc B3,NN
2011    sbc B4,NN  $  sbc B5,NN  $  sbc B6,NN  $  sbc B7,NN
2012
20133:  ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag)
2014    XCALL   __udivmod64
2015
2016    ;; Adjust Result's Sign
2017#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2018    tst     SS
2019    brpl    4f
2020#else
2021    sbrc    SS, 7
2022#endif /* __AVR_HAVE_JMP_CALL__ */
2023    XCALL   __negdi2
2024
20254:  ;; Epilogue: Restore 12 Registers and return
2026    do_epilogue_restores 12
2027
2028ENDF __divdi3_moddi3
2029
2030#endif /* L_divdi3 */
2031
2032#undef R_cnt
2033#undef SS
2034#undef NN
2035
2036.section .text.libgcc, "ax", @progbits
2037
2038#define TT __tmp_reg__
2039
2040#if defined (L_adddi3)
2041;; (set (reg:DI 18)
2042;;      (plus:DI (reg:DI 18)
2043;;               (reg:DI 10)))
2044;; Sets the V flag for signed overflow tests
2045;; Sets the C flag for unsigned overflow tests
2046DEFUN __adddi3
2047    ADD A0,B0  $  adc A1,B1  $  adc A2,B2  $  adc A3,B3
2048    adc A4,B4  $  adc A5,B5  $  adc A6,B6  $  adc A7,B7
2049    ret
2050ENDF __adddi3
2051#endif /* L_adddi3 */
2052
2053#if defined (L_adddi3_s8)
2054;; (set (reg:DI 18)
2055;;      (plus:DI (reg:DI 18)
2056;;               (sign_extend:SI (reg:QI 26))))
2057;; Sets the V flag for signed overflow tests
2058;; Sets the C flag for unsigned overflow tests provided 0 <= R26 < 128
2059DEFUN __adddi3_s8
2060    clr     TT
2061    sbrc    r26, 7
2062    com     TT
2063    ADD A0,r26 $  adc A1,TT  $  adc A2,TT  $  adc A3,TT
2064    adc A4,TT  $  adc A5,TT  $  adc A6,TT  $  adc A7,TT
2065    ret
2066ENDF __adddi3_s8
2067#endif /* L_adddi3_s8 */
2068
2069#if defined (L_subdi3)
2070;; (set (reg:DI 18)
2071;;      (minus:DI (reg:DI 18)
2072;;                (reg:DI 10)))
2073;; Sets the V flag for signed overflow tests
2074;; Sets the C flag for unsigned overflow tests
2075DEFUN __subdi3
2076    SUB A0,B0  $  sbc A1,B1  $  sbc A2,B2  $  sbc A3,B3
2077    sbc A4,B4  $  sbc A5,B5  $  sbc A6,B6  $  sbc A7,B7
2078    ret
2079ENDF __subdi3
2080#endif /* L_subdi3 */
2081
2082#if defined (L_cmpdi2)
2083;; (set (cc0)
2084;;      (compare (reg:DI 18)
2085;;               (reg:DI 10)))
2086DEFUN __cmpdi2
2087    CP  A0,B0  $  cpc A1,B1  $  cpc A2,B2  $  cpc A3,B3
2088    cpc A4,B4  $  cpc A5,B5  $  cpc A6,B6  $  cpc A7,B7
2089    ret
2090ENDF __cmpdi2
2091#endif /* L_cmpdi2 */
2092
2093#if defined (L_cmpdi2_s8)
2094;; (set (cc0)
2095;;      (compare (reg:DI 18)
2096;;               (sign_extend:SI (reg:QI 26))))
2097DEFUN __cmpdi2_s8
2098    clr     TT
2099    sbrc    r26, 7
2100    com     TT
2101    CP  A0,r26 $  cpc A1,TT  $  cpc A2,TT  $  cpc A3,TT
2102    cpc A4,TT  $  cpc A5,TT  $  cpc A6,TT  $  cpc A7,TT
2103    ret
2104ENDF __cmpdi2_s8
2105#endif /* L_cmpdi2_s8 */
2106
2107#if defined (L_negdi2)
2108;; (set (reg:DI 18)
2109;;      (neg:DI (reg:DI 18)))
2110;; Sets the V flag for signed overflow tests
2111DEFUN __negdi2
2112
2113    com  A4    $  com  A5    $  com  A6    $  com  A7
2114               $  com  A1    $  com  A2    $  com  A3
2115    NEG  A0
2116               $  sbci A1,-1 $  sbci A2,-1 $  sbci A3,-1
2117    sbci A4,-1 $  sbci A5,-1 $  sbci A6,-1 $  sbci A7,-1
2118    ret
2119
2120ENDF __negdi2
2121#endif /* L_negdi2 */
2122
2123#undef TT
2124
2125#undef C7
2126#undef C6
2127#undef C5
2128#undef C4
2129#undef C3
2130#undef C2
2131#undef C1
2132#undef C0
2133
2134#undef B7
2135#undef B6
2136#undef B5
2137#undef B4
2138#undef B3
2139#undef B2
2140#undef B1
2141#undef B0
2142
2143#undef A7
2144#undef A6
2145#undef A5
2146#undef A4
2147#undef A3
2148#undef A2
2149#undef A1
2150#undef A0
2151
2152#endif /* !defined (__AVR_TINY__) */
2153
2154
2155.section .text.libgcc.prologue, "ax", @progbits
2156
2157/**********************************
2158 * This is a prologue subroutine
2159 **********************************/
2160#if !defined (__AVR_TINY__)
2161#if defined (L_prologue)
2162
2163;; This function does not clobber T-flag; 64-bit division relies on it
2164DEFUN __prologue_saves__
2165	push r2
2166	push r3
2167	push r4
2168	push r5
2169	push r6
2170	push r7
2171	push r8
2172	push r9
2173	push r10
2174	push r11
2175	push r12
2176	push r13
2177	push r14
2178	push r15
2179	push r16
2180	push r17
2181	push r28
2182	push r29
2183#if !defined (__AVR_HAVE_SPH__)
2184	in	r28,__SP_L__
2185	sub	r28,r26
2186	out	__SP_L__,r28
2187	clr	r29
2188#elif defined (__AVR_XMEGA__)
2189	in	r28,__SP_L__
2190	in	r29,__SP_H__
2191	sub	r28,r26
2192	sbc	r29,r27
2193	out	__SP_L__,r28
2194	out	__SP_H__,r29
2195#else
2196	in	r28,__SP_L__
2197	in	r29,__SP_H__
2198	sub	r28,r26
2199	sbc	r29,r27
2200	in	__tmp_reg__,__SREG__
2201	cli
2202	out	__SP_H__,r29
2203	out	__SREG__,__tmp_reg__
2204	out	__SP_L__,r28
2205#endif /* #SP = 8/16 */
2206
2207	XIJMP
2208
2209ENDF __prologue_saves__
2210#endif /* defined (L_prologue) */
2211
2212/*
2213 * This is an epilogue subroutine
2214 */
2215#if defined (L_epilogue)
2216
2217DEFUN __epilogue_restores__
2218	ldd	r2,Y+18
2219	ldd	r3,Y+17
2220	ldd	r4,Y+16
2221	ldd	r5,Y+15
2222	ldd	r6,Y+14
2223	ldd	r7,Y+13
2224	ldd	r8,Y+12
2225	ldd	r9,Y+11
2226	ldd	r10,Y+10
2227	ldd	r11,Y+9
2228	ldd	r12,Y+8
2229	ldd	r13,Y+7
2230	ldd	r14,Y+6
2231	ldd	r15,Y+5
2232	ldd	r16,Y+4
2233	ldd	r17,Y+3
2234	ldd	r26,Y+2
2235#if !defined (__AVR_HAVE_SPH__)
2236	ldd	r29,Y+1
2237	add	r28,r30
2238	out	__SP_L__,r28
2239	mov	r28, r26
2240#elif defined (__AVR_XMEGA__)
2241	ldd  r27,Y+1
2242	add  r28,r30
2243	adc  r29,__zero_reg__
2244	out  __SP_L__,r28
2245	out  __SP_H__,r29
2246	wmov 28, 26
2247#else
2248	ldd	r27,Y+1
2249	add	r28,r30
2250	adc	r29,__zero_reg__
2251	in	__tmp_reg__,__SREG__
2252	cli
2253	out	__SP_H__,r29
2254	out	__SREG__,__tmp_reg__
2255	out	__SP_L__,r28
2256	mov_l	r28, r26
2257	mov_h	r29, r27
2258#endif /* #SP = 8/16 */
2259	ret
2260ENDF __epilogue_restores__
2261#endif /* defined (L_epilogue) */
2262#endif /* !defined (__AVR_TINY__) */
2263
2264#ifdef L_exit
2265	.section .fini9,"ax",@progbits
2266DEFUN _exit
2267	.weak	exit
2268exit:
2269ENDF _exit
2270
2271	/* Code from .fini8 ... .fini1 sections inserted by ld script.  */
2272
2273	.section .fini0,"ax",@progbits
2274	cli
2275__stop_program:
2276	rjmp	__stop_program
2277#endif /* defined (L_exit) */
2278
2279#ifdef L_cleanup
2280	.weak	_cleanup
2281	.func	_cleanup
2282_cleanup:
2283	ret
2284.endfunc
2285#endif /* defined (L_cleanup) */
2286
2287
2288.section .text.libgcc, "ax", @progbits
2289
2290#ifdef L_tablejump2
2291DEFUN __tablejump2__
2292    lsl     r30
2293    rol     r31
2294#if defined (__AVR_HAVE_EIJMP_EICALL__)
2295    ;; Word address of gs() jumptable entry in R24:Z
2296    rol     r24
2297    out     __RAMPZ__, r24
2298#elif defined (__AVR_HAVE_ELPM__)
2299    ;; Word address of jumptable entry in Z
2300    clr     __tmp_reg__
2301    rol     __tmp_reg__
2302    out     __RAMPZ__, __tmp_reg__
2303#endif
2304
2305    ;; Read word address from jumptable and jump
2306
2307#if defined (__AVR_HAVE_ELPMX__)
2308    elpm    __tmp_reg__, Z+
2309    elpm    r31, Z
2310    mov     r30, __tmp_reg__
2311#ifdef __AVR_HAVE_RAMPD__
2312    ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2313    out     __RAMPZ__, __zero_reg__
2314#endif /* RAMPD */
2315    XIJMP
2316#elif defined (__AVR_HAVE_ELPM__)
2317    elpm
2318    push    r0
2319    adiw    r30, 1
2320    elpm
2321    push    r0
2322    ret
2323#elif defined (__AVR_HAVE_LPMX__)
2324    lpm     __tmp_reg__, Z+
2325    lpm     r31, Z
2326    mov     r30, __tmp_reg__
2327    ijmp
2328#elif defined (__AVR_TINY__)
2329    wsubi 30, -(__AVR_TINY_PM_BASE_ADDRESS__) ; Add PM offset to Z
2330    ld __tmp_reg__, Z+
2331    ld r31, Z   ; Use ld instead of lpm to load Z
2332    mov r30, __tmp_reg__
2333    ijmp
2334#else
2335    lpm
2336    push    r0
2337    adiw    r30, 1
2338    lpm
2339    push    r0
2340    ret
2341#endif
2342ENDF __tablejump2__
2343#endif /* L_tablejump2 */
2344
2345#if defined(__AVR_TINY__)
2346#ifdef L_copy_data
2347        .section .init4,"ax",@progbits
2348        .global __do_copy_data
2349__do_copy_data:
2350        ldi     r18, hi8(__data_end)
2351        ldi     r26, lo8(__data_start)
2352        ldi     r27, hi8(__data_start)
2353        ldi     r30, lo8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__)
2354        ldi     r31, hi8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__)
2355        rjmp    .L__do_copy_data_start
2356.L__do_copy_data_loop:
2357        ld      r19, z+
2358        st      X+, r19
2359.L__do_copy_data_start:
2360        cpi     r26, lo8(__data_end)
2361        cpc     r27, r18
2362        brne    .L__do_copy_data_loop
2363#endif
2364#else
2365#ifdef L_copy_data
2366	.section .init4,"ax",@progbits
2367DEFUN __do_copy_data
2368#if defined(__AVR_HAVE_ELPMX__)
2369	ldi	r17, hi8(__data_end)
2370	ldi	r26, lo8(__data_start)
2371	ldi	r27, hi8(__data_start)
2372	ldi	r30, lo8(__data_load_start)
2373	ldi	r31, hi8(__data_load_start)
2374	ldi	r16, hh8(__data_load_start)
2375	out	__RAMPZ__, r16
2376	rjmp	.L__do_copy_data_start
2377.L__do_copy_data_loop:
2378	elpm	r0, Z+
2379	st	X+, r0
2380.L__do_copy_data_start:
2381	cpi	r26, lo8(__data_end)
2382	cpc	r27, r17
2383	brne	.L__do_copy_data_loop
2384#elif  !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
2385	ldi	r17, hi8(__data_end)
2386	ldi	r26, lo8(__data_start)
2387	ldi	r27, hi8(__data_start)
2388	ldi	r30, lo8(__data_load_start)
2389	ldi	r31, hi8(__data_load_start)
2390	ldi	r16, hh8(__data_load_start - 0x10000)
2391.L__do_copy_data_carry:
2392	inc	r16
2393	out	__RAMPZ__, r16
2394	rjmp	.L__do_copy_data_start
2395.L__do_copy_data_loop:
2396	elpm
2397	st	X+, r0
2398	adiw	r30, 1
2399	brcs	.L__do_copy_data_carry
2400.L__do_copy_data_start:
2401	cpi	r26, lo8(__data_end)
2402	cpc	r27, r17
2403	brne	.L__do_copy_data_loop
2404#elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
2405	ldi	r17, hi8(__data_end)
2406	ldi	r26, lo8(__data_start)
2407	ldi	r27, hi8(__data_start)
2408	ldi	r30, lo8(__data_load_start)
2409	ldi	r31, hi8(__data_load_start)
2410	rjmp	.L__do_copy_data_start
2411.L__do_copy_data_loop:
2412#if defined (__AVR_HAVE_LPMX__)
2413	lpm	r0, Z+
2414#else
2415	lpm
2416	adiw	r30, 1
2417#endif
2418	st	X+, r0
2419.L__do_copy_data_start:
2420	cpi	r26, lo8(__data_end)
2421	cpc	r27, r17
2422	brne	.L__do_copy_data_loop
2423#endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
2424#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2425	;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2426	out	__RAMPZ__, __zero_reg__
2427#endif /* ELPM && RAMPD */
2428ENDF __do_copy_data
2429#endif /* L_copy_data */
2430#endif /* !defined (__AVR_TINY__) */
2431
2432/* __do_clear_bss is only necessary if there is anything in .bss section.  */
2433
2434#ifdef L_clear_bss
2435	.section .init4,"ax",@progbits
2436DEFUN __do_clear_bss
2437	ldi	r18, hi8(__bss_end)
2438	ldi	r26, lo8(__bss_start)
2439	ldi	r27, hi8(__bss_start)
2440	rjmp	.do_clear_bss_start
2441.do_clear_bss_loop:
2442	st	X+, __zero_reg__
2443.do_clear_bss_start:
2444	cpi	r26, lo8(__bss_end)
2445	cpc	r27, r18
2446	brne	.do_clear_bss_loop
2447ENDF __do_clear_bss
2448#endif /* L_clear_bss */
2449
2450/* __do_global_ctors and __do_global_dtors are only necessary
2451   if there are any constructors/destructors.  */
2452
2453#if defined(__AVR_TINY__)
2454#define cdtors_tst_reg r18
2455#else
2456#define cdtors_tst_reg r17
2457#endif
2458
2459#ifdef L_ctors
2460	.section .init6,"ax",@progbits
2461DEFUN __do_global_ctors
2462    ldi     cdtors_tst_reg, pm_hi8(__ctors_start)
2463    ldi     r28, pm_lo8(__ctors_end)
2464    ldi     r29, pm_hi8(__ctors_end)
2465#ifdef __AVR_HAVE_EIJMP_EICALL__
2466    ldi     r16, pm_hh8(__ctors_end)
2467#endif /* HAVE_EIJMP */
2468    rjmp    .L__do_global_ctors_start
2469.L__do_global_ctors_loop:
2470    wsubi   28, 1
2471#ifdef __AVR_HAVE_EIJMP_EICALL__
2472    sbc     r16, __zero_reg__
2473    mov     r24, r16
2474#endif /* HAVE_EIJMP */
2475    mov_h   r31, r29
2476    mov_l   r30, r28
2477    XCALL   __tablejump2__
2478.L__do_global_ctors_start:
2479    cpi     r28, pm_lo8(__ctors_start)
2480    cpc     r29, cdtors_tst_reg
2481#ifdef __AVR_HAVE_EIJMP_EICALL__
2482    ldi     r24, pm_hh8(__ctors_start)
2483    cpc     r16, r24
2484#endif /* HAVE_EIJMP */
2485    brne    .L__do_global_ctors_loop
2486ENDF __do_global_ctors
2487#endif /* L_ctors */
2488
2489#ifdef L_dtors
2490	.section .fini6,"ax",@progbits
2491DEFUN __do_global_dtors
2492    ldi     cdtors_tst_reg, pm_hi8(__dtors_end)
2493    ldi     r28, pm_lo8(__dtors_start)
2494    ldi     r29, pm_hi8(__dtors_start)
2495#ifdef __AVR_HAVE_EIJMP_EICALL__
2496    ldi     r16, pm_hh8(__dtors_start)
2497#endif /* HAVE_EIJMP */
2498    rjmp    .L__do_global_dtors_start
2499.L__do_global_dtors_loop:
2500#ifdef __AVR_HAVE_EIJMP_EICALL__
2501    mov     r24, r16
2502#endif /* HAVE_EIJMP */
2503    mov_h   r31, r29
2504    mov_l   r30, r28
2505    XCALL   __tablejump2__
2506    waddi   28, 1
2507#ifdef __AVR_HAVE_EIJMP_EICALL__
2508    adc     r16, __zero_reg__
2509#endif /* HAVE_EIJMP */
2510.L__do_global_dtors_start:
2511    cpi     r28, pm_lo8(__dtors_end)
2512    cpc     r29, cdtors_tst_reg
2513#ifdef __AVR_HAVE_EIJMP_EICALL__
2514    ldi     r24, pm_hh8(__dtors_end)
2515    cpc     r16, r24
2516#endif /* HAVE_EIJMP */
2517    brne    .L__do_global_dtors_loop
2518ENDF __do_global_dtors
2519#endif /* L_dtors */
2520
2521#undef cdtors_tst_reg
2522
2523.section .text.libgcc, "ax", @progbits
2524
2525#if !defined (__AVR_TINY__)
2526;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2527;; Loading n bytes from Flash; n = 3,4
2528;; R22... = Flash[Z]
2529;; Clobbers: __tmp_reg__
2530
2531#if (defined (L_load_3)        \
2532     || defined (L_load_4))    \
2533    && !defined (__AVR_HAVE_LPMX__)
2534
2535;; Destination
2536#define D0  22
2537#define D1  D0+1
2538#define D2  D0+2
2539#define D3  D0+3
2540
2541.macro  .load dest, n
2542    lpm
2543    mov     \dest, r0
2544.if \dest != D0+\n-1
2545    adiw    r30, 1
2546.else
2547    sbiw    r30, \n-1
2548.endif
2549.endm
2550
2551#if defined (L_load_3)
2552DEFUN __load_3
2553    push  D3
2554    XCALL __load_4
2555    pop   D3
2556    ret
2557ENDF __load_3
2558#endif /* L_load_3 */
2559
2560#if defined (L_load_4)
2561DEFUN __load_4
2562    .load D0, 4
2563    .load D1, 4
2564    .load D2, 4
2565    .load D3, 4
2566    ret
2567ENDF __load_4
2568#endif /* L_load_4 */
2569
2570#endif /* L_load_3 || L_load_3 */
2571#endif /* !defined (__AVR_TINY__) */
2572
2573#if !defined (__AVR_TINY__)
2574;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2575;; Loading n bytes from Flash or RAM;  n = 1,2,3,4
2576;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7
2577;; Clobbers: __tmp_reg__, R21, R30, R31
2578
2579#if (defined (L_xload_1)            \
2580     || defined (L_xload_2)         \
2581     || defined (L_xload_3)         \
2582     || defined (L_xload_4))
2583
2584;; Destination
2585#define D0  22
2586#define D1  D0+1
2587#define D2  D0+2
2588#define D3  D0+3
2589
2590;; Register containing bits 16+ of the address
2591
2592#define HHI8  21
2593
2594.macro  .xload dest, n
2595#if defined (__AVR_HAVE_ELPMX__)
2596    elpm    \dest, Z+
2597#elif defined (__AVR_HAVE_ELPM__)
2598    elpm
2599    mov     \dest, r0
2600.if \dest != D0+\n-1
2601    adiw    r30, 1
2602    adc     HHI8, __zero_reg__
2603    out     __RAMPZ__, HHI8
2604.endif
2605#elif defined (__AVR_HAVE_LPMX__)
2606    lpm     \dest, Z+
2607#else
2608    lpm
2609    mov     \dest, r0
2610.if \dest != D0+\n-1
2611    adiw    r30, 1
2612.endif
2613#endif
2614#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2615.if \dest == D0+\n-1
2616    ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2617    out     __RAMPZ__, __zero_reg__
2618.endif
2619#endif
2620.endm ; .xload
2621
2622#if defined (L_xload_1)
2623DEFUN __xload_1
2624#if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_ELPM__)
2625    sbrc    HHI8, 7
2626    ld      D0, Z
2627    sbrs    HHI8, 7
2628    lpm     D0, Z
2629    ret
2630#else
2631    sbrc    HHI8, 7
2632    rjmp    1f
2633#if defined (__AVR_HAVE_ELPM__)
2634    out     __RAMPZ__, HHI8
2635#endif /* __AVR_HAVE_ELPM__ */
2636    .xload  D0, 1
2637    ret
26381:  ld      D0, Z
2639    ret
2640#endif /* LPMx && ! ELPM */
2641ENDF __xload_1
2642#endif /* L_xload_1 */
2643
2644#if defined (L_xload_2)
2645DEFUN __xload_2
2646    sbrc    HHI8, 7
2647    rjmp    1f
2648#if defined (__AVR_HAVE_ELPM__)
2649    out     __RAMPZ__, HHI8
2650#endif /* __AVR_HAVE_ELPM__ */
2651    .xload  D0, 2
2652    .xload  D1, 2
2653    ret
26541:  ld      D0, Z+
2655    ld      D1, Z+
2656    ret
2657ENDF __xload_2
2658#endif /* L_xload_2 */
2659
2660#if defined (L_xload_3)
2661DEFUN __xload_3
2662    sbrc    HHI8, 7
2663    rjmp    1f
2664#if defined (__AVR_HAVE_ELPM__)
2665    out     __RAMPZ__, HHI8
2666#endif /* __AVR_HAVE_ELPM__ */
2667    .xload  D0, 3
2668    .xload  D1, 3
2669    .xload  D2, 3
2670    ret
26711:  ld      D0, Z+
2672    ld      D1, Z+
2673    ld      D2, Z+
2674    ret
2675ENDF __xload_3
2676#endif /* L_xload_3 */
2677
2678#if defined (L_xload_4)
2679DEFUN __xload_4
2680    sbrc    HHI8, 7
2681    rjmp    1f
2682#if defined (__AVR_HAVE_ELPM__)
2683    out     __RAMPZ__, HHI8
2684#endif /* __AVR_HAVE_ELPM__ */
2685    .xload  D0, 4
2686    .xload  D1, 4
2687    .xload  D2, 4
2688    .xload  D3, 4
2689    ret
26901:  ld      D0, Z+
2691    ld      D1, Z+
2692    ld      D2, Z+
2693    ld      D3, Z+
2694    ret
2695ENDF __xload_4
2696#endif /* L_xload_4 */
2697
2698#endif /* L_xload_{1|2|3|4} */
2699#endif /* if !defined (__AVR_TINY__) */
2700
2701#if !defined (__AVR_TINY__)
2702;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2703;; memcopy from Address Space __pgmx to RAM
2704;; R23:Z = Source Address
2705;; X     = Destination Address
2706;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z
2707
2708#if defined (L_movmemx)
2709
2710#define HHI8  23
2711#define LOOP  24
2712
2713DEFUN __movmemx_qi
2714    ;; #Bytes to copy fity in 8 Bits (1..255)
2715    ;; Zero-extend Loop Counter
2716    clr     LOOP+1
2717    ;; FALLTHRU
2718ENDF __movmemx_qi
2719
2720DEFUN __movmemx_hi
2721
2722;; Read from where?
2723    sbrc    HHI8, 7
2724    rjmp    1f
2725
2726;; Read from Flash
2727
2728#if defined (__AVR_HAVE_ELPM__)
2729    out     __RAMPZ__, HHI8
2730#endif
2731
27320:  ;; Load 1 Byte from Flash...
2733
2734#if defined (__AVR_HAVE_ELPMX__)
2735    elpm    r0, Z+
2736#elif defined (__AVR_HAVE_ELPM__)
2737    elpm
2738    adiw    r30, 1
2739    adc     HHI8, __zero_reg__
2740    out     __RAMPZ__, HHI8
2741#elif defined (__AVR_HAVE_LPMX__)
2742    lpm     r0, Z+
2743#else
2744    lpm
2745    adiw    r30, 1
2746#endif
2747
2748    ;; ...and store that Byte to RAM Destination
2749    st      X+, r0
2750    sbiw    LOOP, 1
2751    brne    0b
2752#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2753    ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2754    out	__RAMPZ__, __zero_reg__
2755#endif /* ELPM && RAMPD */
2756    ret
2757
2758;; Read from RAM
2759
27601:  ;; Read 1 Byte from RAM...
2761    ld      r0, Z+
2762    ;; and store that Byte to RAM Destination
2763    st      X+, r0
2764    sbiw    LOOP, 1
2765    brne    1b
2766    ret
2767ENDF __movmemx_hi
2768
2769#undef HHI8
2770#undef LOOP
2771
2772#endif /* L_movmemx */
2773#endif /* !defined (__AVR_TINY__) */
2774
2775
2776.section .text.libgcc.builtins, "ax", @progbits
2777
2778/**********************************
2779 * Find first set Bit (ffs)
2780 **********************************/
2781
2782#if defined (L_ffssi2)
2783;; find first set bit
2784;; r25:r24 = ffs32 (r25:r22)
2785;; clobbers: r22, r26
2786DEFUN __ffssi2
2787    clr  r26
2788    tst  r22
2789    brne 1f
2790    subi r26, -8
2791    or   r22, r23
2792    brne 1f
2793    subi r26, -8
2794    or   r22, r24
2795    brne 1f
2796    subi r26, -8
2797    or   r22, r25
2798    brne 1f
2799    ret
28001:  mov  r24, r22
2801    XJMP __loop_ffsqi2
2802ENDF __ffssi2
2803#endif /* defined (L_ffssi2) */
2804
2805#if defined (L_ffshi2)
2806;; find first set bit
2807;; r25:r24 = ffs16 (r25:r24)
2808;; clobbers: r26
2809DEFUN __ffshi2
2810    clr  r26
2811#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2812    ;; Some cores have problem skipping 2-word instruction
2813    tst  r24
2814    breq 2f
2815#else
2816    cpse r24, __zero_reg__
2817#endif /* __AVR_HAVE_JMP_CALL__ */
28181:  XJMP __loop_ffsqi2
28192:  ldi  r26, 8
2820    or   r24, r25
2821    brne 1b
2822    ret
2823ENDF __ffshi2
2824#endif /* defined (L_ffshi2) */
2825
2826#if defined (L_loop_ffsqi2)
2827;; Helper for ffshi2, ffssi2
2828;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
2829;; r24 must be != 0
2830;; clobbers: r26
2831DEFUN __loop_ffsqi2
2832    inc  r26
2833    lsr  r24
2834    brcc __loop_ffsqi2
2835    mov  r24, r26
2836    clr  r25
2837    ret
2838ENDF __loop_ffsqi2
2839#endif /* defined (L_loop_ffsqi2) */
2840
2841
2842/**********************************
2843 * Count trailing Zeros (ctz)
2844 **********************************/
2845
2846#if defined (L_ctzsi2)
2847;; count trailing zeros
2848;; r25:r24 = ctz32 (r25:r22)
2849;; clobbers: r26, r22
2850;; ctz(0) = 255
2851;; Note that ctz(0) in undefined for GCC
2852DEFUN __ctzsi2
2853    XCALL __ffssi2
2854    dec  r24
2855    ret
2856ENDF __ctzsi2
2857#endif /* defined (L_ctzsi2) */
2858
2859#if defined (L_ctzhi2)
2860;; count trailing zeros
2861;; r25:r24 = ctz16 (r25:r24)
2862;; clobbers: r26
2863;; ctz(0) = 255
2864;; Note that ctz(0) in undefined for GCC
2865DEFUN __ctzhi2
2866    XCALL __ffshi2
2867    dec  r24
2868    ret
2869ENDF __ctzhi2
2870#endif /* defined (L_ctzhi2) */
2871
2872
2873/**********************************
2874 * Count leading Zeros (clz)
2875 **********************************/
2876
2877#if defined (L_clzdi2)
2878;; count leading zeros
2879;; r25:r24 = clz64 (r25:r18)
2880;; clobbers: r22, r23, r26
2881DEFUN __clzdi2
2882    XCALL __clzsi2
2883    sbrs r24, 5
2884    ret
2885    mov_l r22, r18
2886    mov_h r23, r19
2887    mov_l r24, r20
2888    mov_h r25, r21
2889    XCALL __clzsi2
2890    subi r24, -32
2891    ret
2892ENDF __clzdi2
2893#endif /* defined (L_clzdi2) */
2894
2895#if defined (L_clzsi2)
2896;; count leading zeros
2897;; r25:r24 = clz32 (r25:r22)
2898;; clobbers: r26
2899DEFUN __clzsi2
2900    XCALL __clzhi2
2901    sbrs r24, 4
2902    ret
2903    mov_l r24, r22
2904    mov_h r25, r23
2905    XCALL __clzhi2
2906    subi r24, -16
2907    ret
2908ENDF __clzsi2
2909#endif /* defined (L_clzsi2) */
2910
2911#if defined (L_clzhi2)
2912;; count leading zeros
2913;; r25:r24 = clz16 (r25:r24)
2914;; clobbers: r26
2915DEFUN __clzhi2
2916    clr  r26
2917    tst  r25
2918    brne 1f
2919    subi r26, -8
2920    or   r25, r24
2921    brne 1f
2922    ldi  r24, 16
2923    ret
29241:  cpi  r25, 16
2925    brsh 3f
2926    subi r26, -3
2927    swap r25
29282:  inc  r26
29293:  lsl  r25
2930    brcc 2b
2931    mov  r24, r26
2932    clr  r25
2933    ret
2934ENDF __clzhi2
2935#endif /* defined (L_clzhi2) */
2936
2937
2938/**********************************
2939 * Parity
2940 **********************************/
2941
2942#if defined (L_paritydi2)
2943;; r25:r24 = parity64 (r25:r18)
2944;; clobbers: __tmp_reg__
2945DEFUN __paritydi2
2946    eor  r24, r18
2947    eor  r24, r19
2948    eor  r24, r20
2949    eor  r24, r21
2950    XJMP __paritysi2
2951ENDF __paritydi2
2952#endif /* defined (L_paritydi2) */
2953
2954#if defined (L_paritysi2)
2955;; r25:r24 = parity32 (r25:r22)
2956;; clobbers: __tmp_reg__
2957DEFUN __paritysi2
2958    eor  r24, r22
2959    eor  r24, r23
2960    XJMP __parityhi2
2961ENDF __paritysi2
2962#endif /* defined (L_paritysi2) */
2963
2964#if defined (L_parityhi2)
2965;; r25:r24 = parity16 (r25:r24)
2966;; clobbers: __tmp_reg__
2967DEFUN __parityhi2
2968    eor  r24, r25
2969;; FALLTHRU
2970ENDF __parityhi2
2971
2972;; r25:r24 = parity8 (r24)
2973;; clobbers: __tmp_reg__
2974DEFUN __parityqi2
2975    ;; parity is in r24[0..7]
2976    mov  __tmp_reg__, r24
2977    swap __tmp_reg__
2978    eor  r24, __tmp_reg__
2979    ;; parity is in r24[0..3]
2980    subi r24, -4
2981    andi r24, -5
2982    subi r24, -6
2983    ;; parity is in r24[0,3]
2984    sbrc r24, 3
2985    inc  r24
2986    ;; parity is in r24[0]
2987    andi r24, 1
2988    clr  r25
2989    ret
2990ENDF __parityqi2
2991#endif /* defined (L_parityhi2) */
2992
2993
2994/**********************************
2995 * Population Count
2996 **********************************/
2997
2998#if defined (L_popcounthi2)
2999;; population count
3000;; r25:r24 = popcount16 (r25:r24)
3001;; clobbers: __tmp_reg__
3002DEFUN __popcounthi2
3003    XCALL __popcountqi2
3004    push r24
3005    mov  r24, r25
3006    XCALL __popcountqi2
3007    clr  r25
3008    ;; FALLTHRU
3009ENDF __popcounthi2
3010
3011DEFUN __popcounthi2_tail
3012    pop   __tmp_reg__
3013    add   r24, __tmp_reg__
3014    ret
3015ENDF __popcounthi2_tail
3016#endif /* defined (L_popcounthi2) */
3017
3018#if defined (L_popcountsi2)
3019;; population count
3020;; r25:r24 = popcount32 (r25:r22)
3021;; clobbers: __tmp_reg__
3022DEFUN __popcountsi2
3023    XCALL __popcounthi2
3024    push  r24
3025    mov_l r24, r22
3026    mov_h r25, r23
3027    XCALL __popcounthi2
3028    XJMP  __popcounthi2_tail
3029ENDF __popcountsi2
3030#endif /* defined (L_popcountsi2) */
3031
3032#if defined (L_popcountdi2)
3033;; population count
3034;; r25:r24 = popcount64 (r25:r18)
3035;; clobbers: r22, r23, __tmp_reg__
3036DEFUN __popcountdi2
3037    XCALL __popcountsi2
3038    push  r24
3039    mov_l r22, r18
3040    mov_h r23, r19
3041    mov_l r24, r20
3042    mov_h r25, r21
3043    XCALL __popcountsi2
3044    XJMP  __popcounthi2_tail
3045ENDF __popcountdi2
3046#endif /* defined (L_popcountdi2) */
3047
3048#if defined (L_popcountqi2)
3049;; population count
3050;; r24 = popcount8 (r24)
3051;; clobbers: __tmp_reg__
3052DEFUN __popcountqi2
3053    mov  __tmp_reg__, r24
3054    andi r24, 1
3055    lsr  __tmp_reg__
3056    lsr  __tmp_reg__
3057    adc  r24, __zero_reg__
3058    lsr  __tmp_reg__
3059    adc  r24, __zero_reg__
3060    lsr  __tmp_reg__
3061    adc  r24, __zero_reg__
3062    lsr  __tmp_reg__
3063    adc  r24, __zero_reg__
3064    lsr  __tmp_reg__
3065    adc  r24, __zero_reg__
3066    lsr  __tmp_reg__
3067    adc  r24, __tmp_reg__
3068    ret
3069ENDF __popcountqi2
3070#endif /* defined (L_popcountqi2) */
3071
3072
3073/**********************************
3074 * Swap bytes
3075 **********************************/
3076
3077;; swap two registers with different register number
3078.macro bswap a, b
3079    eor \a, \b
3080    eor \b, \a
3081    eor \a, \b
3082.endm
3083
3084#if defined (L_bswapsi2)
3085;; swap bytes
3086;; r25:r22 = bswap32 (r25:r22)
3087DEFUN __bswapsi2
3088    bswap r22, r25
3089    bswap r23, r24
3090    ret
3091ENDF __bswapsi2
3092#endif /* defined (L_bswapsi2) */
3093
3094#if defined (L_bswapdi2)
3095;; swap bytes
3096;; r25:r18 = bswap64 (r25:r18)
3097DEFUN __bswapdi2
3098    bswap r18, r25
3099    bswap r19, r24
3100    bswap r20, r23
3101    bswap r21, r22
3102    ret
3103ENDF __bswapdi2
3104#endif /* defined (L_bswapdi2) */
3105
3106
3107/**********************************
3108 * 64-bit shifts
3109 **********************************/
3110
3111#if defined (L_ashrdi3)
3112
3113#define SS __zero_reg__
3114
3115;; Arithmetic shift right
3116;; r25:r18 = ashr64 (r25:r18, r17:r16)
3117DEFUN __ashrdi3
3118    sbrc    r25, 7
3119    com     SS
3120    ;; FALLTHRU
3121ENDF  __ashrdi3
3122
3123;; Logic shift right
3124;; r25:r18 = lshr64 (r25:r18, r17:r16)
3125DEFUN __lshrdi3
3126    ;; Signs are in SS (zero_reg)
3127    mov     __tmp_reg__, r16
31280:  cpi     r16, 8
3129    brlo 2f
3130    subi    r16, 8
3131    mov     r18, r19
3132    mov     r19, r20
3133    mov     r20, r21
3134    mov     r21, r22
3135    mov     r22, r23
3136    mov     r23, r24
3137    mov     r24, r25
3138    mov     r25, SS
3139    rjmp 0b
31401:  asr     SS
3141    ror     r25
3142    ror     r24
3143    ror     r23
3144    ror     r22
3145    ror     r21
3146    ror     r20
3147    ror     r19
3148    ror     r18
31492:  dec     r16
3150    brpl 1b
3151    clr     __zero_reg__
3152    mov     r16, __tmp_reg__
3153    ret
3154ENDF __lshrdi3
3155
3156#undef SS
3157
3158#endif /* defined (L_ashrdi3) */
3159
3160#if defined (L_ashldi3)
3161;; Shift left
3162;; r25:r18 = ashl64 (r25:r18, r17:r16)
3163;; This function does not clobber T.
3164DEFUN __ashldi3
3165    mov     __tmp_reg__, r16
31660:  cpi     r16, 8
3167    brlo 2f
3168    mov     r25, r24
3169    mov     r24, r23
3170    mov     r23, r22
3171    mov     r22, r21
3172    mov     r21, r20
3173    mov     r20, r19
3174    mov     r19, r18
3175    clr     r18
3176    subi    r16, 8
3177    rjmp 0b
31781:  lsl     r18
3179    rol     r19
3180    rol     r20
3181    rol     r21
3182    rol     r22
3183    rol     r23
3184    rol     r24
3185    rol     r25
31862:  dec     r16
3187    brpl 1b
3188    mov     r16, __tmp_reg__
3189    ret
3190ENDF __ashldi3
3191#endif /* defined (L_ashldi3) */
3192
3193#if defined (L_rotldi3)
3194;; Rotate left
3195;; r25:r18 = rotl64 (r25:r18, r17:r16)
3196DEFUN __rotldi3
3197    push    r16
31980:  cpi     r16, 8
3199    brlo 2f
3200    subi    r16, 8
3201    mov     __tmp_reg__, r25
3202    mov     r25, r24
3203    mov     r24, r23
3204    mov     r23, r22
3205    mov     r22, r21
3206    mov     r21, r20
3207    mov     r20, r19
3208    mov     r19, r18
3209    mov     r18, __tmp_reg__
3210    rjmp 0b
32111:  lsl     r18
3212    rol     r19
3213    rol     r20
3214    rol     r21
3215    rol     r22
3216    rol     r23
3217    rol     r24
3218    rol     r25
3219    adc     r18, __zero_reg__
32202:  dec     r16
3221    brpl 1b
3222    pop     r16
3223    ret
3224ENDF __rotldi3
3225#endif /* defined (L_rotldi3) */
3226
3227
3228.section .text.libgcc.fmul, "ax", @progbits
3229
3230/***********************************************************/
3231;;; Softmul versions of FMUL, FMULS and FMULSU to implement
3232;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
3233/***********************************************************/
3234
3235#define A1 24
3236#define B1 25
3237#define C0 22
3238#define C1 23
3239#define A0 __tmp_reg__
3240
3241#ifdef L_fmuls
3242;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
3243;;; Clobbers: r24, r25, __tmp_reg__
3244DEFUN __fmuls
3245    ;; A0.7 = negate result?
3246    mov  A0, A1
3247    eor  A0, B1
3248    ;; B1 = |B1|
3249    sbrc B1, 7
3250    neg  B1
3251    XJMP __fmulsu_exit
3252ENDF __fmuls
3253#endif /* L_fmuls */
3254
3255#ifdef L_fmulsu
3256;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
3257;;; Clobbers: r24, r25, __tmp_reg__
3258DEFUN __fmulsu
3259    ;; A0.7 = negate result?
3260    mov  A0, A1
3261;; FALLTHRU
3262ENDF __fmulsu
3263
3264;; Helper for __fmuls and __fmulsu
3265DEFUN __fmulsu_exit
3266    ;; A1 = |A1|
3267    sbrc A1, 7
3268    neg  A1
3269#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
3270    ;; Some cores have problem skipping 2-word instruction
3271    tst  A0
3272    brmi 1f
3273#else
3274    sbrs A0, 7
3275#endif /* __AVR_HAVE_JMP_CALL__ */
3276    XJMP  __fmul
32771:  XCALL __fmul
3278    ;; C = -C iff A0.7 = 1
3279    NEG2 C0
3280    ret
3281ENDF __fmulsu_exit
3282#endif /* L_fmulsu */
3283
3284
3285#ifdef L_fmul
3286;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
3287;;; Clobbers: r24, r25, __tmp_reg__
3288DEFUN __fmul
3289    ; clear result
3290    clr   C0
3291    clr   C1
3292    clr   A0
32931:  tst   B1
3294    ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
32952:  brpl  3f
3296    ;; C += A
3297    add   C0, A0
3298    adc   C1, A1
32993:  ;; A >>= 1
3300    lsr   A1
3301    ror   A0
3302    ;; B <<= 1
3303    lsl   B1
3304    brne  2b
3305    ret
3306ENDF __fmul
3307#endif /* L_fmul */
3308
3309#undef A0
3310#undef A1
3311#undef B1
3312#undef C0
3313#undef C1
3314
3315#include "lib1funcs-fixed.S"
3316