xref: /netbsd-src/external/gpl3/gcc.old/dist/libgcc/config/avr/lib1funcs.S (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1/*  -*- Mode: Asm -*-  */
2/* Copyright (C) 1998-2015 Free Software Foundation, Inc.
3   Contributed by Denis Chertykov <chertykov@gmail.com>
4
5This file is free software; you can redistribute it and/or modify it
6under the terms of the GNU General Public License as published by the
7Free Software Foundation; either version 3, or (at your option) any
8later version.
9
10This file is distributed in the hope that it will be useful, but
11WITHOUT ANY WARRANTY; without even the implied warranty of
12MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13General Public License for more details.
14
15Under Section 7 of GPL version 3, you are granted additional
16permissions described in the GCC Runtime Library Exception, version
173.1, as published by the Free Software Foundation.
18
19You should have received a copy of the GNU General Public License and
20a copy of the GCC Runtime Library Exception along with this program;
21see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22<http://www.gnu.org/licenses/>.  */
23
24#if defined (__AVR_TINY__)
25#define __zero_reg__ r17
26#define __tmp_reg__ r16
27#else
28#define __zero_reg__ r1
29#define __tmp_reg__ r0
30#endif
31#define __SREG__ 0x3f
32#if defined (__AVR_HAVE_SPH__)
33#define __SP_H__ 0x3e
34#endif
35#define __SP_L__ 0x3d
36#define __RAMPZ__ 0x3B
37#define __EIND__  0x3C
38
39/* Most of the functions here are called directly from avr.md
40   patterns, instead of using the standard libcall mechanisms.
41   This can make better code because GCC knows exactly which
42   of the call-used registers (not all of them) are clobbered.  */
43
44/* FIXME:  At present, there is no SORT directive in the linker
45           script so that we must not assume that different modules
46           in the same input section like .libgcc.text.mul will be
47           located close together.  Therefore, we cannot use
48           RCALL/RJMP to call a function like __udivmodhi4 from
49           __divmodhi4 and have to use lengthy XCALL/XJMP even
50           though they are in the same input section and all same
51           input sections together are small enough to reach every
52           location with a RCALL/RJMP instruction.  */
53
54#if defined (__AVR_HAVE_EIJMP_EICALL__) && !defined (__AVR_HAVE_ELPMX__)
55#error device not supported
56#endif
57
58	.macro	mov_l  r_dest, r_src
59#if defined (__AVR_HAVE_MOVW__)
60	movw	\r_dest, \r_src
61#else
62	mov	\r_dest, \r_src
63#endif
64	.endm
65
66	.macro	mov_h  r_dest, r_src
67#if defined (__AVR_HAVE_MOVW__)
68	; empty
69#else
70	mov	\r_dest, \r_src
71#endif
72	.endm
73
74.macro	wmov  r_dest, r_src
75#if defined (__AVR_HAVE_MOVW__)
76    movw \r_dest,   \r_src
77#else
78    mov \r_dest,    \r_src
79    mov \r_dest+1,  \r_src+1
80#endif
81.endm
82
83#if defined (__AVR_HAVE_JMP_CALL__)
84#define XCALL call
85#define XJMP  jmp
86#else
87#define XCALL rcall
88#define XJMP  rjmp
89#endif
90
91#if defined (__AVR_HAVE_EIJMP_EICALL__)
92#define XICALL eicall
93#define XIJMP  eijmp
94#else
95#define XICALL icall
96#define XIJMP  ijmp
97#endif
98
99;; Prologue stuff
100
101.macro do_prologue_saves n_pushed n_frame=0
102    ldi r26, lo8(\n_frame)
103    ldi r27, hi8(\n_frame)
104    ldi r30, lo8(gs(.L_prologue_saves.\@))
105    ldi r31, hi8(gs(.L_prologue_saves.\@))
106    XJMP __prologue_saves__ + ((18 - (\n_pushed)) * 2)
107.L_prologue_saves.\@:
108.endm
109
110;; Epilogue stuff
111
112.macro do_epilogue_restores n_pushed n_frame=0
113    in      r28, __SP_L__
114#ifdef __AVR_HAVE_SPH__
115    in      r29, __SP_H__
116.if \n_frame > 63
117    subi    r28, lo8(-\n_frame)
118    sbci    r29, hi8(-\n_frame)
119.elseif \n_frame > 0
120    adiw    r28, \n_frame
121.endif
122#else
123    clr     r29
124.if \n_frame > 0
125    subi    r28, lo8(-\n_frame)
126.endif
127#endif /* HAVE SPH */
128    ldi     r30, \n_pushed
129    XJMP __epilogue_restores__ + ((18 - (\n_pushed)) * 2)
130.endm
131
132;; Support function entry and exit for convenience
133
134.macro wsubi r_arg1, i_arg2
135#if defined (__AVR_TINY__)
136    subi \r_arg1,   lo8(\i_arg2)
137    sbci \r_arg1+1, hi8(\i_arg2)
138#else
139    sbiw \r_arg1, \i_arg2
140#endif
141.endm
142
143.macro waddi r_arg1, i_arg2
144#if defined (__AVR_TINY__)
145    subi \r_arg1,   lo8(-\i_arg2)
146    sbci \r_arg1+1, hi8(-\i_arg2)
147#else
148    adiw \r_arg1, \i_arg2
149#endif
150.endm
151
152.macro DEFUN name
153.global \name
154.func \name
155\name:
156.endm
157
158.macro ENDF name
159.size \name, .-\name
160.endfunc
161.endm
162
163.macro FALIAS name
164.global \name
165.func \name
166\name:
167.size \name, .-\name
168.endfunc
169.endm
170
171;; Skip next instruction, typically a jump target
172#if defined(__AVR_TINY__)
173#define skip cpse 0,0
174#else
175#define skip cpse 16,16
176#endif
177
178;; Negate a 2-byte value held in consecutive registers
179.macro NEG2  reg
180    com     \reg+1
181    neg     \reg
182    sbci    \reg+1, -1
183.endm
184
185;; Negate a 4-byte value held in consecutive registers
186;; Sets the V flag for signed overflow tests if REG >= 16
187.macro NEG4  reg
188    com     \reg+3
189    com     \reg+2
190    com     \reg+1
191.if \reg >= 16
192    neg     \reg
193    sbci    \reg+1, -1
194    sbci    \reg+2, -1
195    sbci    \reg+3, -1
196.else
197    com     \reg
198    adc     \reg,   __zero_reg__
199    adc     \reg+1, __zero_reg__
200    adc     \reg+2, __zero_reg__
201    adc     \reg+3, __zero_reg__
202.endif
203.endm
204
205#define exp_lo(N)  hlo8 ((N) << 23)
206#define exp_hi(N)  hhi8 ((N) << 23)
207
208
209.section .text.libgcc.mul, "ax", @progbits
210
211;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
212/* Note: mulqi3, mulhi3 are open-coded on the enhanced core.  */
213#if !defined (__AVR_HAVE_MUL__)
214/*******************************************************
215    Multiplication  8 x 8  without MUL
216*******************************************************/
217#if defined (L_mulqi3)
218
219#define	r_arg2	r22		/* multiplicand */
220#define	r_arg1 	r24		/* multiplier */
221#define r_res	__tmp_reg__	/* result */
222
223DEFUN __mulqi3
224	clr	r_res		; clear result
225__mulqi3_loop:
226	sbrc	r_arg1,0
227	add	r_res,r_arg2
228	add	r_arg2,r_arg2	; shift multiplicand
229	breq	__mulqi3_exit	; while multiplicand != 0
230	lsr	r_arg1		;
231	brne	__mulqi3_loop	; exit if multiplier = 0
232__mulqi3_exit:
233	mov	r_arg1,r_res	; result to return register
234	ret
235ENDF __mulqi3
236
237#undef r_arg2
238#undef r_arg1
239#undef r_res
240
241#endif 	/* defined (L_mulqi3) */
242
243
244/*******************************************************
245    Widening Multiplication  16 = 8 x 8  without MUL
246    Multiplication  16 x 16  without MUL
247*******************************************************/
248
249#define A0  22
250#define A1  23
251#define B0  24
252#define BB0 20
253#define B1  25
254;; Output overlaps input, thus expand result in CC0/1
255#define C0  24
256#define C1  25
257#define CC0  __tmp_reg__
258#define CC1  21
259
260#if defined (L_umulqihi3)
261;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24
262;;; (C1:C0) = (unsigned int) A0  * (unsigned int) B0
263;;; Clobbers: __tmp_reg__, R21..R23
264DEFUN __umulqihi3
265    clr     A1
266    clr     B1
267    XJMP    __mulhi3
268ENDF __umulqihi3
269#endif /* L_umulqihi3 */
270
271#if defined (L_mulqihi3)
272;;; R25:R24 = (signed int) R22 * (signed int) R24
273;;; (C1:C0) = (signed int) A0  * (signed int) B0
274;;; Clobbers: __tmp_reg__, R20..R23
275DEFUN __mulqihi3
276    ;; Sign-extend B0
277    clr     B1
278    sbrc    B0, 7
279    com     B1
280    ;; The multiplication runs twice as fast if A1 is zero, thus:
281    ;; Zero-extend A0
282    clr     A1
283#ifdef __AVR_HAVE_JMP_CALL__
284    ;; Store  B0 * sign of A
285    clr     BB0
286    sbrc    A0, 7
287    mov     BB0, B0
288    call    __mulhi3
289#else /* have no CALL */
290    ;; Skip sign-extension of A if A >= 0
291    ;; Same size as with the first alternative but avoids errata skip
292    ;; and is faster if A >= 0
293    sbrs    A0, 7
294    rjmp    __mulhi3
295    ;; If  A < 0  store B
296    mov     BB0, B0
297    rcall   __mulhi3
298#endif /* HAVE_JMP_CALL */
299    ;; 1-extend A after the multiplication
300    sub     C1, BB0
301    ret
302ENDF __mulqihi3
303#endif /* L_mulqihi3 */
304
305#if defined (L_mulhi3)
306;;; R25:R24 = R23:R22 * R25:R24
307;;; (C1:C0) = (A1:A0) * (B1:B0)
308;;; Clobbers: __tmp_reg__, R21..R23
309DEFUN __mulhi3
310
311    ;; Clear result
312    clr     CC0
313    clr     CC1
314    rjmp 3f
3151:
316    ;; Bit n of A is 1  -->  C += B << n
317    add     CC0, B0
318    adc     CC1, B1
3192:
320    lsl     B0
321    rol     B1
3223:
323    ;; If B == 0 we are ready
324    wsubi   B0, 0
325    breq 9f
326
327    ;; Carry = n-th bit of A
328    lsr     A1
329    ror     A0
330    ;; If bit n of A is set, then go add  B * 2^n  to  C
331    brcs 1b
332
333    ;; Carry = 0  -->  The ROR above acts like  CP A0, 0
334    ;; Thus, it is sufficient to CPC the high part to test A against 0
335    cpc     A1, __zero_reg__
336    ;; Only proceed if A != 0
337    brne    2b
3389:
339    ;; Move Result into place
340    mov     C0, CC0
341    mov     C1, CC1
342    ret
343ENDF  __mulhi3
344#endif /* L_mulhi3 */
345
346#undef A0
347#undef A1
348#undef B0
349#undef BB0
350#undef B1
351#undef C0
352#undef C1
353#undef CC0
354#undef CC1
355
356
357#define A0 22
358#define A1 A0+1
359#define A2 A0+2
360#define A3 A0+3
361
362#define B0 18
363#define B1 B0+1
364#define B2 B0+2
365#define B3 B0+3
366
367#define CC0 26
368#define CC1 CC0+1
369#define CC2 30
370#define CC3 CC2+1
371
372#define C0 22
373#define C1 C0+1
374#define C2 C0+2
375#define C3 C0+3
376
377/*******************************************************
378    Widening Multiplication  32 = 16 x 16  without MUL
379*******************************************************/
380
381#if defined (L_umulhisi3)
382DEFUN __umulhisi3
383    wmov    B0, 24
384    ;; Zero-extend B
385    clr     B2
386    clr     B3
387    ;; Zero-extend A
388    wmov    A2, B2
389    XJMP    __mulsi3
390ENDF __umulhisi3
391#endif /* L_umulhisi3 */
392
393#if defined (L_mulhisi3)
394DEFUN __mulhisi3
395    wmov    B0, 24
396    ;; Sign-extend B
397    lsl     r25
398    sbc     B2, B2
399    mov     B3, B2
400#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
401    ;; Sign-extend A
402    clr     A2
403    sbrc    A1, 7
404    com     A2
405    mov     A3, A2
406    XJMP __mulsi3
407#else /*  no __AVR_ERRATA_SKIP_JMP_CALL__ */
408    ;; Zero-extend A and __mulsi3 will run at least twice as fast
409    ;; compared to a sign-extended A.
410    clr     A2
411    clr     A3
412    sbrs    A1, 7
413    XJMP __mulsi3
414    ;; If  A < 0  then perform the  B * 0xffff.... before the
415    ;; very multiplication by initializing the high part of the
416    ;; result CC with -B.
417    wmov    CC2, A2
418    sub     CC2, B0
419    sbc     CC3, B1
420    XJMP __mulsi3_helper
421#endif /*  __AVR_ERRATA_SKIP_JMP_CALL__ */
422ENDF __mulhisi3
423#endif /* L_mulhisi3 */
424
425
426/*******************************************************
427    Multiplication  32 x 32  without MUL
428*******************************************************/
429
430#if defined (L_mulsi3)
431DEFUN __mulsi3
432#if defined (__AVR_TINY__)
433    in     r26, __SP_L__ ; safe to use X, as it is CC0/CC1
434    in     r27, __SP_H__
435    subi   r26, lo8(-3)   ; Add 3 to point past return address
436    sbci   r27, hi8(-3)
437    push   B0    ; save callee saved regs
438    push   B1
439    ld     B0, X+   ; load from caller stack
440    ld     B1, X+
441    ld     B2, X+
442    ld     B3, X
443#endif
444    ;; Clear result
445    clr     CC2
446    clr     CC3
447    ;; FALLTHRU
448ENDF  __mulsi3
449
450DEFUN __mulsi3_helper
451    clr     CC0
452    clr     CC1
453    rjmp 3f
454
4551:  ;; If bit n of A is set, then add  B * 2^n  to the result in CC
456    ;; CC += B
457    add  CC0,B0  $  adc  CC1,B1  $  adc  CC2,B2  $  adc  CC3,B3
458
4592:  ;; B <<= 1
460    lsl  B0      $  rol  B1      $  rol  B2      $  rol  B3
461
4623:  ;; A >>= 1:  Carry = n-th bit of A
463    lsr  A3      $  ror  A2      $  ror  A1      $  ror  A0
464
465    brcs 1b
466    ;; Only continue if  A != 0
467    sbci    A1, 0
468    brne 2b
469    wsubi   A2, 0
470    brne 2b
471
472    ;; All bits of A are consumed:  Copy result to return register C
473    wmov    C0, CC0
474    wmov    C2, CC2
475#if defined (__AVR_TINY__)
476    pop     B1      ; restore callee saved regs
477    pop     B0
478#endif  /* defined (__AVR_TINY__) */
479
480    ret
481ENDF __mulsi3_helper
482#endif /* L_mulsi3 */
483
484#undef A0
485#undef A1
486#undef A2
487#undef A3
488#undef B0
489#undef B1
490#undef B2
491#undef B3
492#undef C0
493#undef C1
494#undef C2
495#undef C3
496#undef CC0
497#undef CC1
498#undef CC2
499#undef CC3
500
501#endif /* !defined (__AVR_HAVE_MUL__) */
502;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
503
504;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
505#if defined (__AVR_HAVE_MUL__)
506#define A0 26
507#define B0 18
508#define C0 22
509
510#define A1 A0+1
511
512#define B1 B0+1
513#define B2 B0+2
514#define B3 B0+3
515
516#define C1 C0+1
517#define C2 C0+2
518#define C3 C0+3
519
520/*******************************************************
521    Widening Multiplication  32 = 16 x 16  with MUL
522*******************************************************/
523
524#if defined (L_mulhisi3)
525;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
526;;; C3:C0   = (signed long) A1:A0   * (signed long) B1:B0
527;;; Clobbers: __tmp_reg__
528DEFUN __mulhisi3
529    XCALL   __umulhisi3
530    ;; Sign-extend B
531    tst     B1
532    brpl    1f
533    sub     C2, A0
534    sbc     C3, A1
5351:  ;; Sign-extend A
536    XJMP __usmulhisi3_tail
537ENDF __mulhisi3
538#endif /* L_mulhisi3 */
539
540#if defined (L_usmulhisi3)
541;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
542;;; C3:C0   = (signed long) A1:A0   * (unsigned long) B1:B0
543;;; Clobbers: __tmp_reg__
544DEFUN __usmulhisi3
545    XCALL   __umulhisi3
546    ;; FALLTHRU
547ENDF __usmulhisi3
548
549DEFUN __usmulhisi3_tail
550    ;; Sign-extend A
551    sbrs    A1, 7
552    ret
553    sub     C2, B0
554    sbc     C3, B1
555    ret
556ENDF __usmulhisi3_tail
557#endif /* L_usmulhisi3 */
558
559#if defined (L_umulhisi3)
560;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
561;;; C3:C0   = (unsigned long) A1:A0   * (unsigned long) B1:B0
562;;; Clobbers: __tmp_reg__
563DEFUN __umulhisi3
564    mul     A0, B0
565    movw    C0, r0
566    mul     A1, B1
567    movw    C2, r0
568    mul     A0, B1
569#ifdef __AVR_HAVE_JMP_CALL__
570    ;; This function is used by many other routines, often multiple times.
571    ;; Therefore, if the flash size is not too limited, avoid the RCALL
572    ;; and inverst 6 Bytes to speed things up.
573    add     C1, r0
574    adc     C2, r1
575    clr     __zero_reg__
576    adc     C3, __zero_reg__
577#else
578    rcall   1f
579#endif
580    mul     A1, B0
5811:  add     C1, r0
582    adc     C2, r1
583    clr     __zero_reg__
584    adc     C3, __zero_reg__
585    ret
586ENDF __umulhisi3
587#endif /* L_umulhisi3 */
588
589/*******************************************************
590    Widening Multiplication  32 = 16 x 32  with MUL
591*******************************************************/
592
593#if defined (L_mulshisi3)
594;;; R25:R22 = (signed long) R27:R26 * R21:R18
595;;; (C3:C0) = (signed long) A1:A0   * B3:B0
596;;; Clobbers: __tmp_reg__
597DEFUN __mulshisi3
598#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
599    ;; Some cores have problem skipping 2-word instruction
600    tst     A1
601    brmi    __mulohisi3
602#else
603    sbrs    A1, 7
604#endif /* __AVR_HAVE_JMP_CALL__ */
605    XJMP    __muluhisi3
606    ;; FALLTHRU
607ENDF __mulshisi3
608
609;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
610;;; (C3:C0) = (one-extended long) A1:A0   * B3:B0
611;;; Clobbers: __tmp_reg__
612DEFUN __mulohisi3
613    XCALL   __muluhisi3
614    ;; One-extend R27:R26 (A1:A0)
615    sub     C2, B0
616    sbc     C3, B1
617    ret
618ENDF __mulohisi3
619#endif /* L_mulshisi3 */
620
621#if defined (L_muluhisi3)
622;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
623;;; (C3:C0) = (unsigned long) A1:A0   * B3:B0
624;;; Clobbers: __tmp_reg__
625DEFUN __muluhisi3
626    XCALL   __umulhisi3
627    mul     A0, B3
628    add     C3, r0
629    mul     A1, B2
630    add     C3, r0
631    mul     A0, B2
632    add     C2, r0
633    adc     C3, r1
634    clr     __zero_reg__
635    ret
636ENDF __muluhisi3
637#endif /* L_muluhisi3 */
638
639/*******************************************************
640    Multiplication  32 x 32  with MUL
641*******************************************************/
642
643#if defined (L_mulsi3)
644;;; R25:R22 = R25:R22 * R21:R18
645;;; (C3:C0) = C3:C0   * B3:B0
646;;; Clobbers: R26, R27, __tmp_reg__
647DEFUN __mulsi3
648    movw    A0, C0
649    push    C2
650    push    C3
651    XCALL   __muluhisi3
652    pop     A1
653    pop     A0
654    ;; A1:A0 now contains the high word of A
655    mul     A0, B0
656    add     C2, r0
657    adc     C3, r1
658    mul     A0, B1
659    add     C3, r0
660    mul     A1, B0
661    add     C3, r0
662    clr     __zero_reg__
663    ret
664ENDF __mulsi3
665#endif /* L_mulsi3 */
666
667#undef A0
668#undef A1
669
670#undef B0
671#undef B1
672#undef B2
673#undef B3
674
675#undef C0
676#undef C1
677#undef C2
678#undef C3
679
680#endif /* __AVR_HAVE_MUL__ */
681
682/*******************************************************
683       Multiplication 24 x 24 with MUL
684*******************************************************/
685
686#if defined (L_mulpsi3)
687
688;; A[0..2]: In: Multiplicand; Out: Product
689#define A0  22
690#define A1  A0+1
691#define A2  A0+2
692
693;; B[0..2]: In: Multiplier
694#define B0  18
695#define B1  B0+1
696#define B2  B0+2
697
698#if defined (__AVR_HAVE_MUL__)
699
700;; C[0..2]: Expand Result
701#define C0  22
702#define C1  C0+1
703#define C2  C0+2
704
705;; R24:R22 *= R20:R18
706;; Clobbers: r21, r25, r26, r27, __tmp_reg__
707
708#define AA0 26
709#define AA2 21
710
711DEFUN __mulpsi3
712    wmov    AA0, A0
713    mov     AA2, A2
714    XCALL   __umulhisi3
715    mul     AA2, B0     $  add  C2, r0
716    mul     AA0, B2     $  add  C2, r0
717    clr     __zero_reg__
718    ret
719ENDF __mulpsi3
720
721#undef AA2
722#undef AA0
723
724#undef C2
725#undef C1
726#undef C0
727
728#else /* !HAVE_MUL */
729;; C[0..2]: Expand Result
730#if defined (__AVR_TINY__)
731#define C0  16
732#else
733#define C0  0
734#endif /* defined (__AVR_TINY__) */
735#define C1  C0+1
736#define C2  21
737
738;; R24:R22 *= R20:R18
739;; Clobbers: __tmp_reg__, R18, R19, R20, R21
740
741DEFUN __mulpsi3
742#if defined (__AVR_TINY__)
743    in r26,__SP_L__
744    in r27,__SP_H__
745    subi r26, lo8(-3)   ; Add 3 to point past return address
746    sbci r27, hi8(-3)
747    push B0    ; save callee saved regs
748    push B1
749    ld B0,X+   ; load from caller stack
750    ld B1,X+
751    ld B2,X+
752#endif /* defined (__AVR_TINY__) */
753
754    ;; C[] = 0
755    clr     __tmp_reg__
756    clr     C2
757
7580:  ;; Shift N-th Bit of B[] into Carry.  N = 24 - Loop
759    LSR  B2     $  ror  B1     $  ror  B0
760
761    ;; If the N-th Bit of B[] was set...
762    brcc    1f
763
764    ;; ...then add A[] * 2^N to the Result C[]
765    ADD  C0,A0  $  adc  C1,A1  $  adc  C2,A2
766
7671:  ;; Multiply A[] by 2
768    LSL  A0     $  rol  A1     $  rol  A2
769
770    ;; Loop until B[] is 0
771    subi B0,0   $  sbci B1,0   $  sbci B2,0
772    brne    0b
773
774    ;; Copy C[] to the return Register A[]
775    wmov    A0, C0
776    mov     A2, C2
777
778    clr     __zero_reg__
779#if defined (__AVR_TINY__)
780    pop B1
781    pop B0
782#endif /* (__AVR_TINY__) */
783    ret
784ENDF __mulpsi3
785
786#undef C2
787#undef C1
788#undef C0
789
790#endif /* HAVE_MUL */
791
792#undef B2
793#undef B1
794#undef B0
795
796#undef A2
797#undef A1
798#undef A0
799
800#endif /* L_mulpsi3 */
801
802#if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__)
803
804;; A[0..2]: In: Multiplicand
805#define A0  22
806#define A1  A0+1
807#define A2  A0+2
808
809;; BB: In: Multiplier
810#define BB  25
811
812;; C[0..2]: Result
813#define C0  18
814#define C1  C0+1
815#define C2  C0+2
816
817;; C[] = A[] * sign_extend (BB)
818DEFUN __mulsqipsi3
819    mul     A0, BB
820    movw    C0, r0
821    mul     A2, BB
822    mov     C2, r0
823    mul     A1, BB
824    add     C1, r0
825    adc     C2, r1
826    clr     __zero_reg__
827    sbrs    BB, 7
828    ret
829    ;; One-extend BB
830    sub     C1, A0
831    sbc     C2, A1
832    ret
833ENDF __mulsqipsi3
834
835#undef C2
836#undef C1
837#undef C0
838
839#undef BB
840
841#undef A2
842#undef A1
843#undef A0
844
845#endif /* L_mulsqipsi3  &&  HAVE_MUL */
846
847/*******************************************************
848       Multiplication 64 x 64
849*******************************************************/
850
851;; A[] = A[] * B[]
852
853;; A[0..7]: In: Multiplicand
854;; Out: Product
855#define A0  18
856#define A1  A0+1
857#define A2  A0+2
858#define A3  A0+3
859#define A4  A0+4
860#define A5  A0+5
861#define A6  A0+6
862#define A7  A0+7
863
864;; B[0..7]: In: Multiplier
865#define B0  10
866#define B1  B0+1
867#define B2  B0+2
868#define B3  B0+3
869#define B4  B0+4
870#define B5  B0+5
871#define B6  B0+6
872#define B7  B0+7
873
874#ifndef __AVR_TINY__
875#if defined (__AVR_HAVE_MUL__)
876;; Define C[] for convenience
877;; Notice that parts of C[] overlap A[] respective B[]
878#define C0  16
879#define C1  C0+1
880#define C2  20
881#define C3  C2+1
882#define C4  28
883#define C5  C4+1
884#define C6  C4+2
885#define C7  C4+3
886
887#if defined (L_muldi3)
888
889;; A[]     *= B[]
890;; R25:R18 *= R17:R10
891;; Ordinary ABI-Function
892
893DEFUN __muldi3
894    push    r29
895    push    r28
896    push    r17
897    push    r16
898
899    ;; Counting in Words, we have to perform a 4 * 4 Multiplication
900
901    ;; 3 * 0  +  0 * 3
902    mul  A7,B0  $             $  mov C7,r0
903    mul  A0,B7  $             $  add C7,r0
904    mul  A6,B1  $             $  add C7,r0
905    mul  A6,B0  $  mov C6,r0  $  add C7,r1
906    mul  B6,A1  $             $  add C7,r0
907    mul  B6,A0  $  add C6,r0  $  adc C7,r1
908
909    ;; 1 * 2
910    mul  A2,B4  $  add C6,r0  $  adc C7,r1
911    mul  A3,B4  $             $  add C7,r0
912    mul  A2,B5  $             $  add C7,r0
913
914    push    A5
915    push    A4
916    push    B1
917    push    B0
918    push    A3
919    push    A2
920
921    ;; 0 * 0
922    wmov    26, B0
923    XCALL   __umulhisi3
924    wmov    C0, 22
925    wmov    C2, 24
926
927    ;; 0 * 2
928    wmov    26, B4
929    XCALL   __umulhisi3  $  wmov C4,22            $ add C6,24 $ adc C7,25
930
931    wmov    26, B2
932    ;; 0 * 1
933    XCALL   __muldi3_6
934
935    pop     A0
936    pop     A1
937    ;; 1 * 1
938    wmov    26, B2
939    XCALL   __umulhisi3  $  add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
940
941    pop     r26
942    pop     r27
943    ;; 1 * 0
944    XCALL   __muldi3_6
945
946    pop     A0
947    pop     A1
948    ;; 2 * 0
949    XCALL   __umulhisi3  $  add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
950
951    ;; 2 * 1
952    wmov    26, B2
953    XCALL   __umulhisi3  $            $           $ add C6,22 $ adc C7,23
954
955    ;; A[] = C[]
956    wmov    A0, C0
957    ;; A2 = C2 already
958    wmov    A4, C4
959    wmov    A6, C6
960
961    clr     __zero_reg__
962    pop     r16
963    pop     r17
964    pop     r28
965    pop     r29
966    ret
967ENDF __muldi3
968#endif /* L_muldi3 */
969
970#if defined (L_muldi3_6)
971;; A helper for some 64-bit multiplications with MUL available
972DEFUN __muldi3_6
973__muldi3_6:
974    XCALL   __umulhisi3
975    add     C2, 22
976    adc     C3, 23
977    adc     C4, 24
978    adc     C5, 25
979    brcc    0f
980    adiw    C6, 1
9810:  ret
982ENDF __muldi3_6
983#endif /* L_muldi3_6 */
984
985#undef C7
986#undef C6
987#undef C5
988#undef C4
989#undef C3
990#undef C2
991#undef C1
992#undef C0
993
994#else /* !HAVE_MUL */
995
996#if defined (L_muldi3)
997
998#define C0  26
999#define C1  C0+1
1000#define C2  C0+2
1001#define C3  C0+3
1002#define C4  C0+4
1003#define C5  C0+5
1004#define C6  0
1005#define C7  C6+1
1006
1007#define Loop 9
1008
1009;; A[]     *= B[]
1010;; R25:R18 *= R17:R10
1011;; Ordinary ABI-Function
1012
1013DEFUN __muldi3
1014    push    r29
1015    push    r28
1016    push    Loop
1017
1018    ldi     C0, 64
1019    mov     Loop, C0
1020
1021    ;; C[] = 0
1022    clr     __tmp_reg__
1023    wmov    C0, 0
1024    wmov    C2, 0
1025    wmov    C4, 0
1026
10270:  ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[]
1028    ;; where N = 64 - Loop.
1029    ;; Notice that B[] = B[] >>> 64 so after this Routine has finished,
1030    ;; B[] will have its initial Value again.
1031    LSR  B7     $  ror  B6     $  ror  B5     $  ror  B4
1032    ror  B3     $  ror  B2     $  ror  B1     $  ror  B0
1033
1034    ;; If the N-th Bit of B[] was set then...
1035    brcc    1f
1036    ;; ...finish Rotation...
1037    ori     B7, 1 << 7
1038
1039    ;; ...and add A[] * 2^N to the Result C[]
1040    ADD  C0,A0  $  adc  C1,A1  $  adc  C2,A2  $  adc  C3,A3
1041    adc  C4,A4  $  adc  C5,A5  $  adc  C6,A6  $  adc  C7,A7
1042
10431:  ;; Multiply A[] by 2
1044    LSL  A0     $  rol  A1     $  rol  A2     $  rol  A3
1045    rol  A4     $  rol  A5     $  rol  A6     $  rol  A7
1046
1047    dec     Loop
1048    brne    0b
1049
1050    ;; We expanded the Result in C[]
1051    ;; Copy Result to the Return Register A[]
1052    wmov    A0, C0
1053    wmov    A2, C2
1054    wmov    A4, C4
1055    wmov    A6, C6
1056
1057    clr     __zero_reg__
1058    pop     Loop
1059    pop     r28
1060    pop     r29
1061    ret
1062ENDF __muldi3
1063
1064#undef Loop
1065
1066#undef C7
1067#undef C6
1068#undef C5
1069#undef C4
1070#undef C3
1071#undef C2
1072#undef C1
1073#undef C0
1074
1075#endif /* L_muldi3 */
1076#endif /* HAVE_MUL */
1077#endif /* if not __AVR_TINY__ */
1078
1079#undef B7
1080#undef B6
1081#undef B5
1082#undef B4
1083#undef B3
1084#undef B2
1085#undef B1
1086#undef B0
1087
1088#undef A7
1089#undef A6
1090#undef A5
1091#undef A4
1092#undef A3
1093#undef A2
1094#undef A1
1095#undef A0
1096
1097/*******************************************************
1098   Widening Multiplication 64 = 32 x 32  with  MUL
1099*******************************************************/
1100
1101#if defined (__AVR_HAVE_MUL__)
1102#define A0 r22
1103#define A1 r23
1104#define A2 r24
1105#define A3 r25
1106
1107#define B0 r18
1108#define B1 r19
1109#define B2 r20
1110#define B3 r21
1111
1112#define C0  18
1113#define C1  C0+1
1114#define C2  20
1115#define C3  C2+1
1116#define C4  28
1117#define C5  C4+1
1118#define C6  C4+2
1119#define C7  C4+3
1120
1121#if defined (L_umulsidi3)
1122
1123;; Unsigned widening 64 = 32 * 32 Multiplication with MUL
1124
1125;; R18[8] = R22[4] * R18[4]
1126;;
1127;; Ordinary ABI Function, but additionally sets
1128;; X = R20[2] = B2[2]
1129;; Z = R22[2] = A0[2]
1130DEFUN __umulsidi3
1131    clt
1132    ;; FALLTHRU
1133ENDF  __umulsidi3
1134    ;; T = sign (A)
1135DEFUN __umulsidi3_helper
1136    push    29  $  push    28 ; Y
1137    wmov    30, A2
1138    ;; Counting in Words, we have to perform 4 Multiplications
1139    ;; 0 * 0
1140    wmov    26, A0
1141    XCALL __umulhisi3
1142    push    23  $  push    22 ; C0
1143    wmov    28, B0
1144    wmov    18, B2
1145    wmov    C2, 24
1146    push    27  $  push    26 ; A0
1147    push    19  $  push    18 ; B2
1148    ;;
1149    ;;  18  20  22  24  26  28  30  |  B2, B3, A0, A1, C0, C1, Y
1150    ;;  B2  C2  --  --  --  B0  A2
1151    ;; 1 * 1
1152    wmov    26, 30      ; A2
1153    XCALL __umulhisi3
1154    ;; Sign-extend A.  T holds the sign of A
1155    brtc    0f
1156    ;; Subtract B from the high part of the result
1157    sub     22, 28
1158    sbc     23, 29
1159    sbc     24, 18
1160    sbc     25, 19
11610:  wmov    18, 28      ;; B0
1162    wmov    C4, 22
1163    wmov    C6, 24
1164    ;;
1165    ;;  18  20  22  24  26  28  30  |  B2, B3, A0, A1, C0, C1, Y
1166    ;;  B0  C2  --  --  A2  C4  C6
1167    ;;
1168    ;; 1 * 0
1169    XCALL __muldi3_6
1170    ;; 0 * 1
1171    pop     26  $   pop 27  ;; B2
1172    pop     18  $   pop 19  ;; A0
1173    XCALL __muldi3_6
1174
1175    ;; Move result C into place and save A0 in Z
1176    wmov    22, C4
1177    wmov    24, C6
1178    wmov    30, 18 ; A0
1179    pop     C0  $   pop C1
1180
1181    ;; Epilogue
1182    pop     28  $   pop 29  ;; Y
1183    ret
1184ENDF __umulsidi3_helper
1185#endif /* L_umulsidi3 */
1186
1187
1188#if defined (L_mulsidi3)
1189
1190;; Signed widening 64 = 32 * 32 Multiplication
1191;;
1192;; R18[8] = R22[4] * R18[4]
1193;; Ordinary ABI Function
1194DEFUN __mulsidi3
1195    bst     A3, 7
1196    sbrs    B3, 7           ; Enhanced core has no skip bug
1197    XJMP __umulsidi3_helper
1198
1199    ;; B needs sign-extension
1200    push    A3
1201    push    A2
1202    XCALL __umulsidi3_helper
1203    ;; A0 survived in Z
1204    sub     r22, r30
1205    sbc     r23, r31
1206    pop     r26
1207    pop     r27
1208    sbc     r24, r26
1209    sbc     r25, r27
1210    ret
1211ENDF __mulsidi3
1212#endif /* L_mulsidi3 */
1213
1214#undef A0
1215#undef A1
1216#undef A2
1217#undef A3
1218#undef B0
1219#undef B1
1220#undef B2
1221#undef B3
1222#undef C0
1223#undef C1
1224#undef C2
1225#undef C3
1226#undef C4
1227#undef C5
1228#undef C6
1229#undef C7
1230#endif /* HAVE_MUL */
1231
1232/**********************************************************
1233    Widening Multiplication 64 = 32 x 32  without  MUL
1234**********************************************************/
1235#ifndef __AVR_TINY__ /* if not __AVR_TINY__ */
1236#if defined (L_mulsidi3) && !defined (__AVR_HAVE_MUL__)
1237#define A0 18
1238#define A1 A0+1
1239#define A2 A0+2
1240#define A3 A0+3
1241#define A4 A0+4
1242#define A5 A0+5
1243#define A6 A0+6
1244#define A7 A0+7
1245
1246#define B0 10
1247#define B1 B0+1
1248#define B2 B0+2
1249#define B3 B0+3
1250#define B4 B0+4
1251#define B5 B0+5
1252#define B6 B0+6
1253#define B7 B0+7
1254
1255#define AA0 22
1256#define AA1 AA0+1
1257#define AA2 AA0+2
1258#define AA3 AA0+3
1259
1260#define BB0 18
1261#define BB1 BB0+1
1262#define BB2 BB0+2
1263#define BB3 BB0+3
1264
1265#define Mask r30
1266
1267;; Signed / Unsigned widening 64 = 32 * 32 Multiplication without MUL
1268;;
1269;; R18[8] = R22[4] * R18[4]
1270;; Ordinary ABI Function
1271DEFUN __mulsidi3
1272    set
1273    skip
1274    ;; FALLTHRU
1275ENDF  __mulsidi3
1276
1277DEFUN __umulsidi3
1278    clt     ; skipped
1279    ;; Save 10 Registers: R10..R17, R28, R29
1280    do_prologue_saves 10
1281    ldi     Mask, 0xff
1282    bld     Mask, 7
1283    ;; Move B into place...
1284    wmov    B0, BB0
1285    wmov    B2, BB2
1286    ;; ...and extend it
1287    and     BB3, Mask
1288    lsl     BB3
1289    sbc     B4, B4
1290    mov     B5, B4
1291    wmov    B6, B4
1292    ;; Move A into place...
1293    wmov    A0, AA0
1294    wmov    A2, AA2
1295    ;; ...and extend it
1296    and     AA3, Mask
1297    lsl     AA3
1298    sbc     A4, A4
1299    mov     A5, A4
1300    wmov    A6, A4
1301    XCALL   __muldi3
1302    do_epilogue_restores 10
1303ENDF __umulsidi3
1304
1305#undef A0
1306#undef A1
1307#undef A2
1308#undef A3
1309#undef A4
1310#undef A5
1311#undef A6
1312#undef A7
1313#undef B0
1314#undef B1
1315#undef B2
1316#undef B3
1317#undef B4
1318#undef B5
1319#undef B6
1320#undef B7
1321#undef AA0
1322#undef AA1
1323#undef AA2
1324#undef AA3
1325#undef BB0
1326#undef BB1
1327#undef BB2
1328#undef BB3
1329#undef Mask
1330#endif /* L_mulsidi3 && !HAVE_MUL */
1331#endif /* if not __AVR_TINY__ */
1332;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1333
1334
1335.section .text.libgcc.div, "ax", @progbits
1336
1337/*******************************************************
1338       Division 8 / 8 => (result + remainder)
1339*******************************************************/
1340#define	r_rem	r25	/* remainder */
1341#define	r_arg1	r24	/* dividend, quotient */
1342#define	r_arg2	r22	/* divisor */
1343#define	r_cnt	r23	/* loop count */
1344
1345#if defined (L_udivmodqi4)
1346DEFUN __udivmodqi4
1347	sub	r_rem,r_rem	; clear remainder and carry
1348	ldi	r_cnt,9		; init loop counter
1349	rjmp	__udivmodqi4_ep	; jump to entry point
1350__udivmodqi4_loop:
1351	rol	r_rem		; shift dividend into remainder
1352	cp	r_rem,r_arg2	; compare remainder & divisor
1353	brcs	__udivmodqi4_ep	; remainder <= divisor
1354	sub	r_rem,r_arg2	; restore remainder
1355__udivmodqi4_ep:
1356	rol	r_arg1		; shift dividend (with CARRY)
1357	dec	r_cnt		; decrement loop counter
1358	brne	__udivmodqi4_loop
1359	com	r_arg1		; complement result
1360				; because C flag was complemented in loop
1361	ret
1362ENDF __udivmodqi4
1363#endif /* defined (L_udivmodqi4) */
1364
1365#if defined (L_divmodqi4)
1366DEFUN __divmodqi4
1367        bst     r_arg1,7	; store sign of dividend
1368        mov     __tmp_reg__,r_arg1
1369        eor     __tmp_reg__,r_arg2; r0.7 is sign of result
1370        sbrc	r_arg1,7
1371	neg     r_arg1		; dividend negative : negate
1372        sbrc	r_arg2,7
1373	neg     r_arg2		; divisor negative : negate
1374	XCALL	__udivmodqi4	; do the unsigned div/mod
1375	brtc	__divmodqi4_1
1376	neg	r_rem		; correct remainder sign
1377__divmodqi4_1:
1378	sbrc	__tmp_reg__,7
1379	neg	r_arg1		; correct result sign
1380__divmodqi4_exit:
1381	ret
1382ENDF __divmodqi4
1383#endif /* defined (L_divmodqi4) */
1384
1385#undef r_rem
1386#undef r_arg1
1387#undef r_arg2
1388#undef r_cnt
1389
1390
1391/*******************************************************
1392       Division 16 / 16 => (result + remainder)
1393*******************************************************/
1394#define	r_remL	r26	/* remainder Low */
1395#define	r_remH	r27	/* remainder High */
1396
1397/* return: remainder */
1398#define	r_arg1L	r24	/* dividend Low */
1399#define	r_arg1H	r25	/* dividend High */
1400
1401/* return: quotient */
1402#define	r_arg2L	r22	/* divisor Low */
1403#define	r_arg2H	r23	/* divisor High */
1404
1405#define	r_cnt	r21	/* loop count */
1406
1407#if defined (L_udivmodhi4)
1408DEFUN __udivmodhi4
1409	sub	r_remL,r_remL
1410	sub	r_remH,r_remH	; clear remainder and carry
1411	ldi	r_cnt,17	; init loop counter
1412	rjmp	__udivmodhi4_ep	; jump to entry point
1413__udivmodhi4_loop:
1414        rol	r_remL		; shift dividend into remainder
1415	rol	r_remH
1416        cp	r_remL,r_arg2L	; compare remainder & divisor
1417	cpc	r_remH,r_arg2H
1418        brcs	__udivmodhi4_ep	; remainder < divisor
1419        sub	r_remL,r_arg2L	; restore remainder
1420        sbc	r_remH,r_arg2H
1421__udivmodhi4_ep:
1422        rol	r_arg1L		; shift dividend (with CARRY)
1423        rol	r_arg1H
1424        dec	r_cnt		; decrement loop counter
1425        brne	__udivmodhi4_loop
1426	com	r_arg1L
1427	com	r_arg1H
1428; div/mod results to return registers, as for the div() function
1429	mov_l	r_arg2L, r_arg1L	; quotient
1430	mov_h	r_arg2H, r_arg1H
1431	mov_l	r_arg1L, r_remL		; remainder
1432	mov_h	r_arg1H, r_remH
1433	ret
1434ENDF __udivmodhi4
1435#endif /* defined (L_udivmodhi4) */
1436
1437#if defined (L_divmodhi4)
1438DEFUN __divmodhi4
1439    .global _div
1440_div:
1441    bst     r_arg1H,7           ; store sign of dividend
1442    mov     __tmp_reg__,r_arg2H
1443    brtc    0f
1444    com     __tmp_reg__         ; r0.7 is sign of result
1445    rcall   __divmodhi4_neg1    ; dividend negative: negate
14460:
1447    sbrc    r_arg2H,7
1448    rcall   __divmodhi4_neg2    ; divisor negative: negate
1449    XCALL   __udivmodhi4        ; do the unsigned div/mod
1450    sbrc    __tmp_reg__,7
1451    rcall   __divmodhi4_neg2    ; correct remainder sign
1452    brtc    __divmodhi4_exit
1453__divmodhi4_neg1:
1454    ;; correct dividend/remainder sign
1455    com     r_arg1H
1456    neg     r_arg1L
1457    sbci    r_arg1H,0xff
1458    ret
1459__divmodhi4_neg2:
1460    ;; correct divisor/result sign
1461    com     r_arg2H
1462    neg     r_arg2L
1463    sbci    r_arg2H,0xff
1464__divmodhi4_exit:
1465    ret
1466ENDF __divmodhi4
1467#endif /* defined (L_divmodhi4) */
1468
1469#undef r_remH
1470#undef r_remL
1471
1472#undef r_arg1H
1473#undef r_arg1L
1474
1475#undef r_arg2H
1476#undef r_arg2L
1477
1478#undef r_cnt
1479
1480/*******************************************************
1481       Division 24 / 24 => (result + remainder)
1482*******************************************************/
1483
1484;; A[0..2]: In: Dividend; Out: Quotient
1485#define A0  22
1486#define A1  A0+1
1487#define A2  A0+2
1488
1489;; B[0..2]: In: Divisor;   Out: Remainder
1490#define B0  18
1491#define B1  B0+1
1492#define B2  B0+2
1493
1494;; C[0..2]: Expand remainder
1495#define C0  __zero_reg__
1496#define C1  26
1497#define C2  25
1498
1499;; Loop counter
1500#define r_cnt   21
1501
1502#if defined (L_udivmodpsi4)
1503;; R24:R22 = R24:R24  udiv  R20:R18
1504;; R20:R18 = R24:R22  umod  R20:R18
1505;; Clobbers: R21, R25, R26
1506
1507DEFUN __udivmodpsi4
1508    ; init loop counter
1509    ldi     r_cnt, 24+1
1510    ; Clear remainder and carry.  C0 is already 0
1511    clr     C1
1512    sub     C2, C2
1513    ; jump to entry point
1514    rjmp    __udivmodpsi4_start
1515__udivmodpsi4_loop:
1516    ; shift dividend into remainder
1517    rol     C0
1518    rol     C1
1519    rol     C2
1520    ; compare remainder & divisor
1521    cp      C0, B0
1522    cpc     C1, B1
1523    cpc     C2, B2
1524    brcs    __udivmodpsi4_start ; remainder <= divisor
1525    sub     C0, B0              ; restore remainder
1526    sbc     C1, B1
1527    sbc     C2, B2
1528__udivmodpsi4_start:
1529    ; shift dividend (with CARRY)
1530    rol     A0
1531    rol     A1
1532    rol     A2
1533    ; decrement loop counter
1534    dec     r_cnt
1535    brne    __udivmodpsi4_loop
1536    com     A0
1537    com     A1
1538    com     A2
1539    ; div/mod results to return registers
1540    ; remainder
1541    mov     B0, C0
1542    mov     B1, C1
1543    mov     B2, C2
1544    clr     __zero_reg__ ; C0
1545    ret
1546ENDF __udivmodpsi4
1547#endif /* defined (L_udivmodpsi4) */
1548
1549#if defined (L_divmodpsi4)
1550;; R24:R22 = R24:R22  div  R20:R18
1551;; R20:R18 = R24:R22  mod  R20:R18
1552;; Clobbers: T, __tmp_reg__, R21, R25, R26
1553
1554DEFUN __divmodpsi4
1555    ; R0.7 will contain the sign of the result:
1556    ; R0.7 = A.sign ^ B.sign
1557    mov __tmp_reg__, B2
1558    ; T-flag = sign of dividend
1559    bst     A2, 7
1560    brtc    0f
1561    com     __tmp_reg__
1562    ; Adjust dividend's sign
1563    rcall   __divmodpsi4_negA
15640:
1565    ; Adjust divisor's sign
1566    sbrc    B2, 7
1567    rcall   __divmodpsi4_negB
1568
1569    ; Do the unsigned div/mod
1570    XCALL   __udivmodpsi4
1571
1572    ; Adjust quotient's sign
1573    sbrc    __tmp_reg__, 7
1574    rcall   __divmodpsi4_negA
1575
1576    ; Adjust remainder's sign
1577    brtc    __divmodpsi4_end
1578
1579__divmodpsi4_negB:
1580    ; Correct divisor/remainder sign
1581    com     B2
1582    com     B1
1583    neg     B0
1584    sbci    B1, -1
1585    sbci    B2, -1
1586    ret
1587
1588    ; Correct dividend/quotient sign
1589__divmodpsi4_negA:
1590    com     A2
1591    com     A1
1592    neg     A0
1593    sbci    A1, -1
1594    sbci    A2, -1
1595__divmodpsi4_end:
1596    ret
1597
1598ENDF __divmodpsi4
1599#endif /* defined (L_divmodpsi4) */
1600
1601#undef A0
1602#undef A1
1603#undef A2
1604
1605#undef B0
1606#undef B1
1607#undef B2
1608
1609#undef C0
1610#undef C1
1611#undef C2
1612
1613#undef r_cnt
1614
1615/*******************************************************
1616       Division 32 / 32 => (result + remainder)
1617*******************************************************/
1618#define	r_remHH	r31	/* remainder High */
1619#define	r_remHL	r30
1620#define	r_remH	r27
1621#define	r_remL	r26	/* remainder Low */
1622
1623/* return: remainder */
1624#define	r_arg1HH r25	/* dividend High */
1625#define	r_arg1HL r24
1626#define	r_arg1H  r23
1627#define	r_arg1L  r22	/* dividend Low */
1628
1629/* return: quotient */
1630#define	r_arg2HH r21	/* divisor High */
1631#define	r_arg2HL r20
1632#define	r_arg2H  r19
1633#define	r_arg2L  r18	/* divisor Low */
1634
1635#define	r_cnt __zero_reg__  /* loop count (0 after the loop!) */
1636
1637#if defined (L_udivmodsi4)
1638DEFUN __udivmodsi4
1639	ldi	r_remL, 33	; init loop counter
1640	mov	r_cnt, r_remL
1641	sub	r_remL,r_remL
1642	sub	r_remH,r_remH	; clear remainder and carry
1643	mov_l	r_remHL, r_remL
1644	mov_h	r_remHH, r_remH
1645	rjmp	__udivmodsi4_ep	; jump to entry point
1646__udivmodsi4_loop:
1647        rol	r_remL		; shift dividend into remainder
1648	rol	r_remH
1649	rol	r_remHL
1650	rol	r_remHH
1651        cp	r_remL,r_arg2L	; compare remainder & divisor
1652	cpc	r_remH,r_arg2H
1653	cpc	r_remHL,r_arg2HL
1654	cpc	r_remHH,r_arg2HH
1655	brcs	__udivmodsi4_ep	; remainder <= divisor
1656        sub	r_remL,r_arg2L	; restore remainder
1657        sbc	r_remH,r_arg2H
1658        sbc	r_remHL,r_arg2HL
1659        sbc	r_remHH,r_arg2HH
1660__udivmodsi4_ep:
1661        rol	r_arg1L		; shift dividend (with CARRY)
1662        rol	r_arg1H
1663        rol	r_arg1HL
1664        rol	r_arg1HH
1665        dec	r_cnt		; decrement loop counter
1666        brne	__udivmodsi4_loop
1667				; __zero_reg__ now restored (r_cnt == 0)
1668	com	r_arg1L
1669	com	r_arg1H
1670	com	r_arg1HL
1671	com	r_arg1HH
1672; div/mod results to return registers, as for the ldiv() function
1673	mov_l	r_arg2L,  r_arg1L	; quotient
1674	mov_h	r_arg2H,  r_arg1H
1675	mov_l	r_arg2HL, r_arg1HL
1676	mov_h	r_arg2HH, r_arg1HH
1677	mov_l	r_arg1L,  r_remL	; remainder
1678	mov_h	r_arg1H,  r_remH
1679	mov_l	r_arg1HL, r_remHL
1680	mov_h	r_arg1HH, r_remHH
1681	ret
1682ENDF __udivmodsi4
1683#endif /* defined (L_udivmodsi4) */
1684
1685#if defined (L_divmodsi4)
1686DEFUN __divmodsi4
1687    mov     __tmp_reg__,r_arg2HH
1688    bst     r_arg1HH,7          ; store sign of dividend
1689    brtc    0f
1690    com     __tmp_reg__         ; r0.7 is sign of result
1691    XCALL   __negsi2            ; dividend negative: negate
16920:
1693    sbrc    r_arg2HH,7
1694    rcall   __divmodsi4_neg2    ; divisor negative: negate
1695    XCALL   __udivmodsi4        ; do the unsigned div/mod
1696    sbrc    __tmp_reg__, 7      ; correct quotient sign
1697    rcall   __divmodsi4_neg2
1698    brtc    __divmodsi4_exit    ; correct remainder sign
1699    XJMP    __negsi2
1700__divmodsi4_neg2:
1701    ;; correct divisor/quotient sign
1702    com     r_arg2HH
1703    com     r_arg2HL
1704    com     r_arg2H
1705    neg     r_arg2L
1706    sbci    r_arg2H,0xff
1707    sbci    r_arg2HL,0xff
1708    sbci    r_arg2HH,0xff
1709__divmodsi4_exit:
1710    ret
1711ENDF __divmodsi4
1712#endif /* defined (L_divmodsi4) */
1713
1714#if defined (L_negsi2)
1715;; (set (reg:SI 22)
1716;;      (neg:SI (reg:SI 22)))
1717;; Sets the V flag for signed overflow tests
1718DEFUN __negsi2
1719    NEG4    22
1720    ret
1721ENDF __negsi2
1722#endif /* L_negsi2 */
1723
1724#undef r_remHH
1725#undef r_remHL
1726#undef r_remH
1727#undef r_remL
1728#undef r_arg1HH
1729#undef r_arg1HL
1730#undef r_arg1H
1731#undef r_arg1L
1732#undef r_arg2HH
1733#undef r_arg2HL
1734#undef r_arg2H
1735#undef r_arg2L
1736#undef r_cnt
1737
1738/* *di routines use registers below R19 and won't work with tiny arch
1739   right now. */
1740
1741#if !defined (__AVR_TINY__)
1742/*******************************************************
1743       Division 64 / 64
1744       Modulo   64 % 64
1745*******************************************************/
1746
1747;; Use Speed-optimized Version on "big" Devices, i.e. Devices with
1748;; at least 16k of Program Memory.  For smaller Devices, depend
1749;; on MOVW and SP Size.  There is a Connexion between SP Size and
1750;; Flash Size so that SP Size can be used to test for Flash Size.
1751
1752#if defined (__AVR_HAVE_JMP_CALL__)
1753#   define SPEED_DIV 8
1754#elif defined (__AVR_HAVE_MOVW__) && defined (__AVR_HAVE_SPH__)
1755#   define SPEED_DIV 16
1756#else
1757#   define SPEED_DIV 0
1758#endif
1759
1760;; A[0..7]: In: Dividend;
1761;; Out: Quotient  (T = 0)
1762;; Out: Remainder (T = 1)
1763#define A0  18
1764#define A1  A0+1
1765#define A2  A0+2
1766#define A3  A0+3
1767#define A4  A0+4
1768#define A5  A0+5
1769#define A6  A0+6
1770#define A7  A0+7
1771
1772;; B[0..7]: In: Divisor;   Out: Clobber
1773#define B0  10
1774#define B1  B0+1
1775#define B2  B0+2
1776#define B3  B0+3
1777#define B4  B0+4
1778#define B5  B0+5
1779#define B6  B0+6
1780#define B7  B0+7
1781
1782;; C[0..7]: Expand remainder;  Out: Remainder (unused)
1783#define C0  8
1784#define C1  C0+1
1785#define C2  30
1786#define C3  C2+1
1787#define C4  28
1788#define C5  C4+1
1789#define C6  26
1790#define C7  C6+1
1791
1792;; Holds Signs during Division Routine
1793#define SS      __tmp_reg__
1794
1795;; Bit-Counter in Division Routine
1796#define R_cnt   __zero_reg__
1797
1798;; Scratch Register for Negation
1799#define NN      r31
1800
1801#if defined (L_udivdi3)
1802
1803;; R25:R18 = R24:R18  umod  R17:R10
1804;; Ordinary ABI-Function
1805
1806DEFUN __umoddi3
1807    set
1808    rjmp __udivdi3_umoddi3
1809ENDF __umoddi3
1810
1811;; R25:R18 = R24:R18  udiv  R17:R10
1812;; Ordinary ABI-Function
1813
1814DEFUN __udivdi3
1815    clt
1816ENDF __udivdi3
1817
1818DEFUN __udivdi3_umoddi3
1819    push    C0
1820    push    C1
1821    push    C4
1822    push    C5
1823    XCALL   __udivmod64
1824    pop     C5
1825    pop     C4
1826    pop     C1
1827    pop     C0
1828    ret
1829ENDF __udivdi3_umoddi3
1830#endif /* L_udivdi3 */
1831
1832#if defined (L_udivmod64)
1833
1834;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation
1835;; No Registers saved/restored; the Callers will take Care.
1836;; Preserves B[] and T-flag
1837;; T = 0: Compute Quotient  in A[]
1838;; T = 1: Compute Remainder in A[] and shift SS one Bit left
1839
1840DEFUN __udivmod64
1841
1842    ;; Clear Remainder (C6, C7 will follow)
1843    clr     C0
1844    clr     C1
1845    wmov    C2, C0
1846    wmov    C4, C0
1847    ldi     C7, 64
1848
1849#if SPEED_DIV == 0 || SPEED_DIV == 16
1850    ;; Initialize Loop-Counter
1851    mov     R_cnt, C7
1852    wmov    C6, C0
1853#endif /* SPEED_DIV */
1854
1855#if SPEED_DIV == 8
1856
1857    push    A7
1858    clr     C6
1859
18601:  ;; Compare shifted Devidend against Divisor
1861    ;; If -- even after Shifting -- it is smaller...
1862    CP  A7,B0  $  cpc C0,B1  $  cpc C1,B2  $  cpc C2,B3
1863    cpc C3,B4  $  cpc C4,B5  $  cpc C5,B6  $  cpc C6,B7
1864    brcc    2f
1865
1866    ;; ...then we can subtract it.  Thus, it is legal to shift left
1867               $  mov C6,C5  $  mov C5,C4  $  mov C4,C3
1868    mov C3,C2  $  mov C2,C1  $  mov C1,C0  $  mov C0,A7
1869    mov A7,A6  $  mov A6,A5  $  mov A5,A4  $  mov A4,A3
1870    mov A3,A2  $  mov A2,A1  $  mov A1,A0  $  clr A0
1871
1872    ;; 8 Bits are done
1873    subi    C7, 8
1874    brne    1b
1875
1876    ;; Shifted 64 Bits:  A7 has traveled to C7
1877    pop     C7
1878    ;; Divisor is greater than Dividend. We have:
1879    ;; A[] % B[] = A[]
1880    ;; A[] / B[] = 0
1881    ;; Thus, we can return immediately
1882    rjmp    5f
1883
18842:  ;; Initialze Bit-Counter with Number of Bits still to be performed
1885    mov     R_cnt, C7
1886
1887    ;; Push of A7 is not needed because C7 is still 0
1888    pop     C7
1889    clr     C7
1890
1891#elif  SPEED_DIV == 16
1892
1893    ;; Compare shifted Dividend against Divisor
1894    cp      A7, B3
1895    cpc     C0, B4
1896    cpc     C1, B5
1897    cpc     C2, B6
1898    cpc     C3, B7
1899    brcc    2f
1900
1901    ;; Divisor is greater than shifted Dividen: We can shift the Dividend
1902    ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk
1903    wmov  C2,A6  $  wmov C0,A4
1904    wmov  A6,A2  $  wmov A4,A0
1905    wmov  A2,C6  $  wmov A0,C4
1906
1907    ;; Set Bit Counter to 32
1908    lsr     R_cnt
19092:
1910#elif SPEED_DIV
1911#error SPEED_DIV = ?
1912#endif /* SPEED_DIV */
1913
1914;; The very Division + Remainder Routine
1915
19163:  ;; Left-shift Dividend...
1917    lsl A0     $  rol A1     $  rol A2     $  rol A3
1918    rol A4     $  rol A5     $  rol A6     $  rol A7
1919
1920    ;; ...into Remainder
1921    rol C0     $  rol C1     $  rol C2     $  rol C3
1922    rol C4     $  rol C5     $  rol C6     $  rol C7
1923
1924    ;; Compare Remainder and Divisor
1925    CP  C0,B0  $  cpc C1,B1  $  cpc C2,B2  $  cpc C3,B3
1926    cpc C4,B4  $  cpc C5,B5  $  cpc C6,B6  $  cpc C7,B7
1927
1928    brcs 4f
1929
1930    ;; Divisor fits into Remainder:  Subtract it from Remainder...
1931    SUB C0,B0  $  sbc C1,B1  $  sbc C2,B2  $  sbc C3,B3
1932    sbc C4,B4  $  sbc C5,B5  $  sbc C6,B6  $  sbc C7,B7
1933
1934    ;; ...and set according Bit in the upcoming Quotient
1935    ;; The Bit will travel to its final Position
1936    ori A0, 1
1937
19384:  ;; This Bit is done
1939    dec     R_cnt
1940    brne    3b
1941    ;; __zero_reg__ is 0 again
1942
1943    ;; T = 0: We are fine with the Quotient in A[]
1944    ;; T = 1: Copy Remainder to A[]
19455:  brtc    6f
1946    wmov    A0, C0
1947    wmov    A2, C2
1948    wmov    A4, C4
1949    wmov    A6, C6
1950    ;; Move the Sign of the Result to SS.7
1951    lsl     SS
1952
19536:  ret
1954
1955ENDF __udivmod64
1956#endif /* L_udivmod64 */
1957
1958
1959#if defined (L_divdi3)
1960
1961;; R25:R18 = R24:R18  mod  R17:R10
1962;; Ordinary ABI-Function
1963
1964DEFUN __moddi3
1965    set
1966    rjmp    __divdi3_moddi3
1967ENDF __moddi3
1968
1969;; R25:R18 = R24:R18  div  R17:R10
1970;; Ordinary ABI-Function
1971
1972DEFUN __divdi3
1973    clt
1974ENDF __divdi3
1975
1976DEFUN  __divdi3_moddi3
1977#if SPEED_DIV
1978    mov     r31, A7
1979    or      r31, B7
1980    brmi    0f
1981    ;; Both Signs are 0:  the following Complexitiy is not needed
1982    XJMP    __udivdi3_umoddi3
1983#endif /* SPEED_DIV */
1984
19850:  ;; The Prologue
1986    ;; Save 12 Registers:  Y, 17...8
1987    ;; No Frame needed
1988    do_prologue_saves 12
1989
1990    ;; SS.7 will contain the Sign of the Quotient  (A.sign * B.sign)
1991    ;; SS.6 will contain the Sign of the Remainder (A.sign)
1992    mov     SS, A7
1993    asr     SS
1994    ;; Adjust Dividend's Sign as needed
1995#if SPEED_DIV
1996    ;; Compiling for Speed we know that at least one Sign must be < 0
1997    ;; Thus, if A[] >= 0 then we know B[] < 0
1998    brpl    22f
1999#else
2000    brpl    21f
2001#endif /* SPEED_DIV */
2002
2003    XCALL   __negdi2
2004
2005    ;; Adjust Divisor's Sign and SS.7 as needed
200621: tst     B7
2007    brpl    3f
200822: ldi     NN, 1 << 7
2009    eor     SS, NN
2010
2011    ldi NN, -1
2012    com B4     $  com B5     $  com B6     $  com B7
2013               $  com B1     $  com B2     $  com B3
2014    NEG B0
2015               $  sbc B1,NN  $  sbc B2,NN  $  sbc B3,NN
2016    sbc B4,NN  $  sbc B5,NN  $  sbc B6,NN  $  sbc B7,NN
2017
20183:  ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag)
2019    XCALL   __udivmod64
2020
2021    ;; Adjust Result's Sign
2022#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2023    tst     SS
2024    brpl    4f
2025#else
2026    sbrc    SS, 7
2027#endif /* __AVR_HAVE_JMP_CALL__ */
2028    XCALL   __negdi2
2029
20304:  ;; Epilogue: Restore 12 Registers and return
2031    do_epilogue_restores 12
2032
2033ENDF __divdi3_moddi3
2034
2035#endif /* L_divdi3 */
2036
2037#undef R_cnt
2038#undef SS
2039#undef NN
2040
2041.section .text.libgcc, "ax", @progbits
2042
2043#define TT __tmp_reg__
2044
2045#if defined (L_adddi3)
2046;; (set (reg:DI 18)
2047;;      (plus:DI (reg:DI 18)
2048;;               (reg:DI 10)))
2049;; Sets the V flag for signed overflow tests
2050;; Sets the C flag for unsigned overflow tests
2051DEFUN __adddi3
2052    ADD A0,B0  $  adc A1,B1  $  adc A2,B2  $  adc A3,B3
2053    adc A4,B4  $  adc A5,B5  $  adc A6,B6  $  adc A7,B7
2054    ret
2055ENDF __adddi3
2056#endif /* L_adddi3 */
2057
2058#if defined (L_adddi3_s8)
2059;; (set (reg:DI 18)
2060;;      (plus:DI (reg:DI 18)
2061;;               (sign_extend:SI (reg:QI 26))))
2062;; Sets the V flag for signed overflow tests
2063;; Sets the C flag for unsigned overflow tests provided 0 <= R26 < 128
2064DEFUN __adddi3_s8
2065    clr     TT
2066    sbrc    r26, 7
2067    com     TT
2068    ADD A0,r26 $  adc A1,TT  $  adc A2,TT  $  adc A3,TT
2069    adc A4,TT  $  adc A5,TT  $  adc A6,TT  $  adc A7,TT
2070    ret
2071ENDF __adddi3_s8
2072#endif /* L_adddi3_s8 */
2073
2074#if defined (L_subdi3)
2075;; (set (reg:DI 18)
2076;;      (minus:DI (reg:DI 18)
2077;;                (reg:DI 10)))
2078;; Sets the V flag for signed overflow tests
2079;; Sets the C flag for unsigned overflow tests
2080DEFUN __subdi3
2081    SUB A0,B0  $  sbc A1,B1  $  sbc A2,B2  $  sbc A3,B3
2082    sbc A4,B4  $  sbc A5,B5  $  sbc A6,B6  $  sbc A7,B7
2083    ret
2084ENDF __subdi3
2085#endif /* L_subdi3 */
2086
2087#if defined (L_cmpdi2)
2088;; (set (cc0)
2089;;      (compare (reg:DI 18)
2090;;               (reg:DI 10)))
2091DEFUN __cmpdi2
2092    CP  A0,B0  $  cpc A1,B1  $  cpc A2,B2  $  cpc A3,B3
2093    cpc A4,B4  $  cpc A5,B5  $  cpc A6,B6  $  cpc A7,B7
2094    ret
2095ENDF __cmpdi2
2096#endif /* L_cmpdi2 */
2097
2098#if defined (L_cmpdi2_s8)
2099;; (set (cc0)
2100;;      (compare (reg:DI 18)
2101;;               (sign_extend:SI (reg:QI 26))))
2102DEFUN __cmpdi2_s8
2103    clr     TT
2104    sbrc    r26, 7
2105    com     TT
2106    CP  A0,r26 $  cpc A1,TT  $  cpc A2,TT  $  cpc A3,TT
2107    cpc A4,TT  $  cpc A5,TT  $  cpc A6,TT  $  cpc A7,TT
2108    ret
2109ENDF __cmpdi2_s8
2110#endif /* L_cmpdi2_s8 */
2111
2112#if defined (L_negdi2)
2113;; (set (reg:DI 18)
2114;;      (neg:DI (reg:DI 18)))
2115;; Sets the V flag for signed overflow tests
2116DEFUN __negdi2
2117
2118    com  A4    $  com  A5    $  com  A6    $  com  A7
2119               $  com  A1    $  com  A2    $  com  A3
2120    NEG  A0
2121               $  sbci A1,-1 $  sbci A2,-1 $  sbci A3,-1
2122    sbci A4,-1 $  sbci A5,-1 $  sbci A6,-1 $  sbci A7,-1
2123    ret
2124
2125ENDF __negdi2
2126#endif /* L_negdi2 */
2127
2128#undef TT
2129
2130#undef C7
2131#undef C6
2132#undef C5
2133#undef C4
2134#undef C3
2135#undef C2
2136#undef C1
2137#undef C0
2138
2139#undef B7
2140#undef B6
2141#undef B5
2142#undef B4
2143#undef B3
2144#undef B2
2145#undef B1
2146#undef B0
2147
2148#undef A7
2149#undef A6
2150#undef A5
2151#undef A4
2152#undef A3
2153#undef A2
2154#undef A1
2155#undef A0
2156
2157#endif /* !defined (__AVR_TINY__) */
2158
2159
2160.section .text.libgcc.prologue, "ax", @progbits
2161
2162/**********************************
2163 * This is a prologue subroutine
2164 **********************************/
2165#if !defined (__AVR_TINY__)
2166#if defined (L_prologue)
2167
2168;; This function does not clobber T-flag; 64-bit division relies on it
2169DEFUN __prologue_saves__
2170	push r2
2171	push r3
2172	push r4
2173	push r5
2174	push r6
2175	push r7
2176	push r8
2177	push r9
2178	push r10
2179	push r11
2180	push r12
2181	push r13
2182	push r14
2183	push r15
2184	push r16
2185	push r17
2186	push r28
2187	push r29
2188#if !defined (__AVR_HAVE_SPH__)
2189	in	r28,__SP_L__
2190	sub	r28,r26
2191	out	__SP_L__,r28
2192	clr	r29
2193#elif defined (__AVR_XMEGA__)
2194	in	r28,__SP_L__
2195	in	r29,__SP_H__
2196	sub	r28,r26
2197	sbc	r29,r27
2198	out	__SP_L__,r28
2199	out	__SP_H__,r29
2200#else
2201	in	r28,__SP_L__
2202	in	r29,__SP_H__
2203	sub	r28,r26
2204	sbc	r29,r27
2205	in	__tmp_reg__,__SREG__
2206	cli
2207	out	__SP_H__,r29
2208	out	__SREG__,__tmp_reg__
2209	out	__SP_L__,r28
2210#endif /* #SP = 8/16 */
2211
2212	XIJMP
2213
2214ENDF __prologue_saves__
2215#endif /* defined (L_prologue) */
2216
2217/*
2218 * This is an epilogue subroutine
2219 */
2220#if defined (L_epilogue)
2221
2222DEFUN __epilogue_restores__
2223	ldd	r2,Y+18
2224	ldd	r3,Y+17
2225	ldd	r4,Y+16
2226	ldd	r5,Y+15
2227	ldd	r6,Y+14
2228	ldd	r7,Y+13
2229	ldd	r8,Y+12
2230	ldd	r9,Y+11
2231	ldd	r10,Y+10
2232	ldd	r11,Y+9
2233	ldd	r12,Y+8
2234	ldd	r13,Y+7
2235	ldd	r14,Y+6
2236	ldd	r15,Y+5
2237	ldd	r16,Y+4
2238	ldd	r17,Y+3
2239	ldd	r26,Y+2
2240#if !defined (__AVR_HAVE_SPH__)
2241	ldd	r29,Y+1
2242	add	r28,r30
2243	out	__SP_L__,r28
2244	mov	r28, r26
2245#elif defined (__AVR_XMEGA__)
2246	ldd  r27,Y+1
2247	add  r28,r30
2248	adc  r29,__zero_reg__
2249	out  __SP_L__,r28
2250	out  __SP_H__,r29
2251	wmov 28, 26
2252#else
2253	ldd	r27,Y+1
2254	add	r28,r30
2255	adc	r29,__zero_reg__
2256	in	__tmp_reg__,__SREG__
2257	cli
2258	out	__SP_H__,r29
2259	out	__SREG__,__tmp_reg__
2260	out	__SP_L__,r28
2261	mov_l	r28, r26
2262	mov_h	r29, r27
2263#endif /* #SP = 8/16 */
2264	ret
2265ENDF __epilogue_restores__
2266#endif /* defined (L_epilogue) */
2267#endif /* !defined (__AVR_TINY__) */
2268
2269#ifdef L_exit
2270	.section .fini9,"ax",@progbits
2271DEFUN _exit
2272	.weak	exit
2273exit:
2274ENDF _exit
2275
2276	/* Code from .fini8 ... .fini1 sections inserted by ld script.  */
2277
2278	.section .fini0,"ax",@progbits
2279	cli
2280__stop_program:
2281	rjmp	__stop_program
2282#endif /* defined (L_exit) */
2283
2284#ifdef L_cleanup
2285	.weak	_cleanup
2286	.func	_cleanup
2287_cleanup:
2288	ret
2289.endfunc
2290#endif /* defined (L_cleanup) */
2291
2292
2293.section .text.libgcc, "ax", @progbits
2294
2295#ifdef L_tablejump2
2296DEFUN __tablejump2__
2297    lsl     r30
2298    rol     r31
2299#if defined (__AVR_HAVE_EIJMP_EICALL__)
2300    ;; Word address of gs() jumptable entry in R24:Z
2301    rol     r24
2302    out     __RAMPZ__, r24
2303#elif defined (__AVR_HAVE_ELPM__)
2304    ;; Word address of jumptable entry in Z
2305    clr     __tmp_reg__
2306    rol     __tmp_reg__
2307    out     __RAMPZ__, __tmp_reg__
2308#endif
2309
2310    ;; Read word address from jumptable and jump
2311
2312#if defined (__AVR_HAVE_ELPMX__)
2313    elpm    __tmp_reg__, Z+
2314    elpm    r31, Z
2315    mov     r30, __tmp_reg__
2316#ifdef __AVR_HAVE_RAMPD__
2317    ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2318    out     __RAMPZ__, __zero_reg__
2319#endif /* RAMPD */
2320    XIJMP
2321#elif defined (__AVR_HAVE_ELPM__)
2322    elpm
2323    push    r0
2324    adiw    r30, 1
2325    elpm
2326    push    r0
2327    ret
2328#elif defined (__AVR_HAVE_LPMX__)
2329    lpm     __tmp_reg__, Z+
2330    lpm     r31, Z
2331    mov     r30, __tmp_reg__
2332    ijmp
2333#elif defined (__AVR_TINY__)
2334    wsubi 30, -(__AVR_TINY_PM_BASE_ADDRESS__) ; Add PM offset to Z
2335    ld __tmp_reg__, Z+
2336    ld r31, Z   ; Use ld instead of lpm to load Z
2337    mov r30, __tmp_reg__
2338    ijmp
2339#else
2340    lpm
2341    push    r0
2342    adiw    r30, 1
2343    lpm
2344    push    r0
2345    ret
2346#endif
2347ENDF __tablejump2__
2348#endif /* L_tablejump2 */
2349
2350#if defined(__AVR_TINY__)
2351#ifdef L_copy_data
2352        .section .init4,"ax",@progbits
2353        .global __do_copy_data
2354__do_copy_data:
2355        ldi     r18, hi8(__data_end)
2356        ldi     r26, lo8(__data_start)
2357        ldi     r27, hi8(__data_start)
2358        ldi     r30, lo8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__)
2359        ldi     r31, hi8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__)
2360        rjmp    .L__do_copy_data_start
2361.L__do_copy_data_loop:
2362        ld      r19, z+
2363        st      X+, r19
2364.L__do_copy_data_start:
2365        cpi     r26, lo8(__data_end)
2366        cpc     r27, r18
2367        brne    .L__do_copy_data_loop
2368#endif
2369#else
2370#ifdef L_copy_data
2371	.section .init4,"ax",@progbits
2372DEFUN __do_copy_data
2373#if defined(__AVR_HAVE_ELPMX__)
2374	ldi	r17, hi8(__data_end)
2375	ldi	r26, lo8(__data_start)
2376	ldi	r27, hi8(__data_start)
2377	ldi	r30, lo8(__data_load_start)
2378	ldi	r31, hi8(__data_load_start)
2379	ldi	r16, hh8(__data_load_start)
2380	out	__RAMPZ__, r16
2381	rjmp	.L__do_copy_data_start
2382.L__do_copy_data_loop:
2383	elpm	r0, Z+
2384	st	X+, r0
2385.L__do_copy_data_start:
2386	cpi	r26, lo8(__data_end)
2387	cpc	r27, r17
2388	brne	.L__do_copy_data_loop
2389#elif  !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
2390	ldi	r17, hi8(__data_end)
2391	ldi	r26, lo8(__data_start)
2392	ldi	r27, hi8(__data_start)
2393	ldi	r30, lo8(__data_load_start)
2394	ldi	r31, hi8(__data_load_start)
2395	ldi	r16, hh8(__data_load_start - 0x10000)
2396.L__do_copy_data_carry:
2397	inc	r16
2398	out	__RAMPZ__, r16
2399	rjmp	.L__do_copy_data_start
2400.L__do_copy_data_loop:
2401	elpm
2402	st	X+, r0
2403	adiw	r30, 1
2404	brcs	.L__do_copy_data_carry
2405.L__do_copy_data_start:
2406	cpi	r26, lo8(__data_end)
2407	cpc	r27, r17
2408	brne	.L__do_copy_data_loop
2409#elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
2410	ldi	r17, hi8(__data_end)
2411	ldi	r26, lo8(__data_start)
2412	ldi	r27, hi8(__data_start)
2413	ldi	r30, lo8(__data_load_start)
2414	ldi	r31, hi8(__data_load_start)
2415	rjmp	.L__do_copy_data_start
2416.L__do_copy_data_loop:
2417#if defined (__AVR_HAVE_LPMX__)
2418	lpm	r0, Z+
2419#else
2420	lpm
2421	adiw	r30, 1
2422#endif
2423	st	X+, r0
2424.L__do_copy_data_start:
2425	cpi	r26, lo8(__data_end)
2426	cpc	r27, r17
2427	brne	.L__do_copy_data_loop
2428#endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
2429#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2430	;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2431	out	__RAMPZ__, __zero_reg__
2432#endif /* ELPM && RAMPD */
2433ENDF __do_copy_data
2434#endif /* L_copy_data */
2435#endif /* !defined (__AVR_TINY__) */
2436
2437/* __do_clear_bss is only necessary if there is anything in .bss section.  */
2438
2439#ifdef L_clear_bss
2440	.section .init4,"ax",@progbits
2441DEFUN __do_clear_bss
2442	ldi	r18, hi8(__bss_end)
2443	ldi	r26, lo8(__bss_start)
2444	ldi	r27, hi8(__bss_start)
2445	rjmp	.do_clear_bss_start
2446.do_clear_bss_loop:
2447	st	X+, __zero_reg__
2448.do_clear_bss_start:
2449	cpi	r26, lo8(__bss_end)
2450	cpc	r27, r18
2451	brne	.do_clear_bss_loop
2452ENDF __do_clear_bss
2453#endif /* L_clear_bss */
2454
2455/* __do_global_ctors and __do_global_dtors are only necessary
2456   if there are any constructors/destructors.  */
2457
2458#if defined(__AVR_TINY__)
2459#define cdtors_tst_reg r18
2460#else
2461#define cdtors_tst_reg r17
2462#endif
2463
2464#ifdef L_ctors
2465	.section .init6,"ax",@progbits
2466DEFUN __do_global_ctors
2467    ldi     cdtors_tst_reg, pm_hi8(__ctors_start)
2468    ldi     r28, pm_lo8(__ctors_end)
2469    ldi     r29, pm_hi8(__ctors_end)
2470#ifdef __AVR_HAVE_EIJMP_EICALL__
2471    ldi     r16, pm_hh8(__ctors_end)
2472#endif /* HAVE_EIJMP */
2473    rjmp    .L__do_global_ctors_start
2474.L__do_global_ctors_loop:
2475    wsubi   28, 1
2476#ifdef __AVR_HAVE_EIJMP_EICALL__
2477    sbc     r16, __zero_reg__
2478    mov     r24, r16
2479#endif /* HAVE_EIJMP */
2480    mov_h   r31, r29
2481    mov_l   r30, r28
2482    XCALL   __tablejump2__
2483.L__do_global_ctors_start:
2484    cpi     r28, pm_lo8(__ctors_start)
2485    cpc     r29, cdtors_tst_reg
2486#ifdef __AVR_HAVE_EIJMP_EICALL__
2487    ldi     r24, pm_hh8(__ctors_start)
2488    cpc     r16, r24
2489#endif /* HAVE_EIJMP */
2490    brne    .L__do_global_ctors_loop
2491ENDF __do_global_ctors
2492#endif /* L_ctors */
2493
2494#ifdef L_dtors
2495	.section .fini6,"ax",@progbits
2496DEFUN __do_global_dtors
2497    ldi     cdtors_tst_reg, pm_hi8(__dtors_end)
2498    ldi     r28, pm_lo8(__dtors_start)
2499    ldi     r29, pm_hi8(__dtors_start)
2500#ifdef __AVR_HAVE_EIJMP_EICALL__
2501    ldi     r16, pm_hh8(__dtors_start)
2502#endif /* HAVE_EIJMP */
2503    rjmp    .L__do_global_dtors_start
2504.L__do_global_dtors_loop:
2505#ifdef __AVR_HAVE_EIJMP_EICALL__
2506    mov     r24, r16
2507#endif /* HAVE_EIJMP */
2508    mov_h   r31, r29
2509    mov_l   r30, r28
2510    XCALL   __tablejump2__
2511    waddi   28, 1
2512#ifdef __AVR_HAVE_EIJMP_EICALL__
2513    adc     r16, __zero_reg__
2514#endif /* HAVE_EIJMP */
2515.L__do_global_dtors_start:
2516    cpi     r28, pm_lo8(__dtors_end)
2517    cpc     r29, cdtors_tst_reg
2518#ifdef __AVR_HAVE_EIJMP_EICALL__
2519    ldi     r24, pm_hh8(__dtors_end)
2520    cpc     r16, r24
2521#endif /* HAVE_EIJMP */
2522    brne    .L__do_global_dtors_loop
2523ENDF __do_global_dtors
2524#endif /* L_dtors */
2525
2526#undef cdtors_tst_reg
2527
2528.section .text.libgcc, "ax", @progbits
2529
2530#if !defined (__AVR_TINY__)
2531;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2532;; Loading n bytes from Flash; n = 3,4
2533;; R22... = Flash[Z]
2534;; Clobbers: __tmp_reg__
2535
2536#if (defined (L_load_3)        \
2537     || defined (L_load_4))    \
2538    && !defined (__AVR_HAVE_LPMX__)
2539
2540;; Destination
2541#define D0  22
2542#define D1  D0+1
2543#define D2  D0+2
2544#define D3  D0+3
2545
2546.macro  .load dest, n
2547    lpm
2548    mov     \dest, r0
2549.if \dest != D0+\n-1
2550    adiw    r30, 1
2551.else
2552    sbiw    r30, \n-1
2553.endif
2554.endm
2555
2556#if defined (L_load_3)
2557DEFUN __load_3
2558    push  D3
2559    XCALL __load_4
2560    pop   D3
2561    ret
2562ENDF __load_3
2563#endif /* L_load_3 */
2564
2565#if defined (L_load_4)
2566DEFUN __load_4
2567    .load D0, 4
2568    .load D1, 4
2569    .load D2, 4
2570    .load D3, 4
2571    ret
2572ENDF __load_4
2573#endif /* L_load_4 */
2574
2575#endif /* L_load_3 || L_load_3 */
2576#endif /* !defined (__AVR_TINY__) */
2577
2578#if !defined (__AVR_TINY__)
2579;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2580;; Loading n bytes from Flash or RAM;  n = 1,2,3,4
2581;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7
2582;; Clobbers: __tmp_reg__, R21, R30, R31
2583
2584#if (defined (L_xload_1)            \
2585     || defined (L_xload_2)         \
2586     || defined (L_xload_3)         \
2587     || defined (L_xload_4))
2588
2589;; Destination
2590#define D0  22
2591#define D1  D0+1
2592#define D2  D0+2
2593#define D3  D0+3
2594
2595;; Register containing bits 16+ of the address
2596
2597#define HHI8  21
2598
2599.macro  .xload dest, n
2600#if defined (__AVR_HAVE_ELPMX__)
2601    elpm    \dest, Z+
2602#elif defined (__AVR_HAVE_ELPM__)
2603    elpm
2604    mov     \dest, r0
2605.if \dest != D0+\n-1
2606    adiw    r30, 1
2607    adc     HHI8, __zero_reg__
2608    out     __RAMPZ__, HHI8
2609.endif
2610#elif defined (__AVR_HAVE_LPMX__)
2611    lpm     \dest, Z+
2612#else
2613    lpm
2614    mov     \dest, r0
2615.if \dest != D0+\n-1
2616    adiw    r30, 1
2617.endif
2618#endif
2619#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2620.if \dest == D0+\n-1
2621    ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2622    out     __RAMPZ__, __zero_reg__
2623.endif
2624#endif
2625.endm ; .xload
2626
2627#if defined (L_xload_1)
2628DEFUN __xload_1
2629#if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_ELPM__)
2630    sbrc    HHI8, 7
2631    ld      D0, Z
2632    sbrs    HHI8, 7
2633    lpm     D0, Z
2634    ret
2635#else
2636    sbrc    HHI8, 7
2637    rjmp    1f
2638#if defined (__AVR_HAVE_ELPM__)
2639    out     __RAMPZ__, HHI8
2640#endif /* __AVR_HAVE_ELPM__ */
2641    .xload  D0, 1
2642    ret
26431:  ld      D0, Z
2644    ret
2645#endif /* LPMx && ! ELPM */
2646ENDF __xload_1
2647#endif /* L_xload_1 */
2648
2649#if defined (L_xload_2)
2650DEFUN __xload_2
2651    sbrc    HHI8, 7
2652    rjmp    1f
2653#if defined (__AVR_HAVE_ELPM__)
2654    out     __RAMPZ__, HHI8
2655#endif /* __AVR_HAVE_ELPM__ */
2656    .xload  D0, 2
2657    .xload  D1, 2
2658    ret
26591:  ld      D0, Z+
2660    ld      D1, Z+
2661    ret
2662ENDF __xload_2
2663#endif /* L_xload_2 */
2664
2665#if defined (L_xload_3)
2666DEFUN __xload_3
2667    sbrc    HHI8, 7
2668    rjmp    1f
2669#if defined (__AVR_HAVE_ELPM__)
2670    out     __RAMPZ__, HHI8
2671#endif /* __AVR_HAVE_ELPM__ */
2672    .xload  D0, 3
2673    .xload  D1, 3
2674    .xload  D2, 3
2675    ret
26761:  ld      D0, Z+
2677    ld      D1, Z+
2678    ld      D2, Z+
2679    ret
2680ENDF __xload_3
2681#endif /* L_xload_3 */
2682
2683#if defined (L_xload_4)
2684DEFUN __xload_4
2685    sbrc    HHI8, 7
2686    rjmp    1f
2687#if defined (__AVR_HAVE_ELPM__)
2688    out     __RAMPZ__, HHI8
2689#endif /* __AVR_HAVE_ELPM__ */
2690    .xload  D0, 4
2691    .xload  D1, 4
2692    .xload  D2, 4
2693    .xload  D3, 4
2694    ret
26951:  ld      D0, Z+
2696    ld      D1, Z+
2697    ld      D2, Z+
2698    ld      D3, Z+
2699    ret
2700ENDF __xload_4
2701#endif /* L_xload_4 */
2702
2703#endif /* L_xload_{1|2|3|4} */
2704#endif /* if !defined (__AVR_TINY__) */
2705
2706#if !defined (__AVR_TINY__)
2707;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2708;; memcopy from Address Space __pgmx to RAM
2709;; R23:Z = Source Address
2710;; X     = Destination Address
2711;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z
2712
2713#if defined (L_movmemx)
2714
2715#define HHI8  23
2716#define LOOP  24
2717
2718DEFUN __movmemx_qi
2719    ;; #Bytes to copy fity in 8 Bits (1..255)
2720    ;; Zero-extend Loop Counter
2721    clr     LOOP+1
2722    ;; FALLTHRU
2723ENDF __movmemx_qi
2724
2725DEFUN __movmemx_hi
2726
2727;; Read from where?
2728    sbrc    HHI8, 7
2729    rjmp    1f
2730
2731;; Read from Flash
2732
2733#if defined (__AVR_HAVE_ELPM__)
2734    out     __RAMPZ__, HHI8
2735#endif
2736
27370:  ;; Load 1 Byte from Flash...
2738
2739#if defined (__AVR_HAVE_ELPMX__)
2740    elpm    r0, Z+
2741#elif defined (__AVR_HAVE_ELPM__)
2742    elpm
2743    adiw    r30, 1
2744    adc     HHI8, __zero_reg__
2745    out     __RAMPZ__, HHI8
2746#elif defined (__AVR_HAVE_LPMX__)
2747    lpm     r0, Z+
2748#else
2749    lpm
2750    adiw    r30, 1
2751#endif
2752
2753    ;; ...and store that Byte to RAM Destination
2754    st      X+, r0
2755    sbiw    LOOP, 1
2756    brne    0b
2757#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2758    ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2759    out	__RAMPZ__, __zero_reg__
2760#endif /* ELPM && RAMPD */
2761    ret
2762
2763;; Read from RAM
2764
27651:  ;; Read 1 Byte from RAM...
2766    ld      r0, Z+
2767    ;; and store that Byte to RAM Destination
2768    st      X+, r0
2769    sbiw    LOOP, 1
2770    brne    1b
2771    ret
2772ENDF __movmemx_hi
2773
2774#undef HHI8
2775#undef LOOP
2776
2777#endif /* L_movmemx */
2778#endif /* !defined (__AVR_TINY__) */
2779
2780
2781.section .text.libgcc.builtins, "ax", @progbits
2782
2783/**********************************
2784 * Find first set Bit (ffs)
2785 **********************************/
2786
2787#if defined (L_ffssi2)
2788;; find first set bit
2789;; r25:r24 = ffs32 (r25:r22)
2790;; clobbers: r22, r26
2791DEFUN __ffssi2
2792    clr  r26
2793    tst  r22
2794    brne 1f
2795    subi r26, -8
2796    or   r22, r23
2797    brne 1f
2798    subi r26, -8
2799    or   r22, r24
2800    brne 1f
2801    subi r26, -8
2802    or   r22, r25
2803    brne 1f
2804    ret
28051:  mov  r24, r22
2806    XJMP __loop_ffsqi2
2807ENDF __ffssi2
2808#endif /* defined (L_ffssi2) */
2809
2810#if defined (L_ffshi2)
2811;; find first set bit
2812;; r25:r24 = ffs16 (r25:r24)
2813;; clobbers: r26
2814DEFUN __ffshi2
2815    clr  r26
2816#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2817    ;; Some cores have problem skipping 2-word instruction
2818    tst  r24
2819    breq 2f
2820#else
2821    cpse r24, __zero_reg__
2822#endif /* __AVR_HAVE_JMP_CALL__ */
28231:  XJMP __loop_ffsqi2
28242:  ldi  r26, 8
2825    or   r24, r25
2826    brne 1b
2827    ret
2828ENDF __ffshi2
2829#endif /* defined (L_ffshi2) */
2830
2831#if defined (L_loop_ffsqi2)
2832;; Helper for ffshi2, ffssi2
2833;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
2834;; r24 must be != 0
2835;; clobbers: r26
2836DEFUN __loop_ffsqi2
2837    inc  r26
2838    lsr  r24
2839    brcc __loop_ffsqi2
2840    mov  r24, r26
2841    clr  r25
2842    ret
2843ENDF __loop_ffsqi2
2844#endif /* defined (L_loop_ffsqi2) */
2845
2846
2847/**********************************
2848 * Count trailing Zeros (ctz)
2849 **********************************/
2850
2851#if defined (L_ctzsi2)
2852;; count trailing zeros
2853;; r25:r24 = ctz32 (r25:r22)
2854;; clobbers: r26, r22
2855;; ctz(0) = 255
2856;; Note that ctz(0) in undefined for GCC
2857DEFUN __ctzsi2
2858    XCALL __ffssi2
2859    dec  r24
2860    ret
2861ENDF __ctzsi2
2862#endif /* defined (L_ctzsi2) */
2863
2864#if defined (L_ctzhi2)
2865;; count trailing zeros
2866;; r25:r24 = ctz16 (r25:r24)
2867;; clobbers: r26
2868;; ctz(0) = 255
2869;; Note that ctz(0) in undefined for GCC
2870DEFUN __ctzhi2
2871    XCALL __ffshi2
2872    dec  r24
2873    ret
2874ENDF __ctzhi2
2875#endif /* defined (L_ctzhi2) */
2876
2877
2878/**********************************
2879 * Count leading Zeros (clz)
2880 **********************************/
2881
2882#if defined (L_clzdi2)
2883;; count leading zeros
2884;; r25:r24 = clz64 (r25:r18)
2885;; clobbers: r22, r23, r26
2886DEFUN __clzdi2
2887    XCALL __clzsi2
2888    sbrs r24, 5
2889    ret
2890    mov_l r22, r18
2891    mov_h r23, r19
2892    mov_l r24, r20
2893    mov_h r25, r21
2894    XCALL __clzsi2
2895    subi r24, -32
2896    ret
2897ENDF __clzdi2
2898#endif /* defined (L_clzdi2) */
2899
2900#if defined (L_clzsi2)
2901;; count leading zeros
2902;; r25:r24 = clz32 (r25:r22)
2903;; clobbers: r26
2904DEFUN __clzsi2
2905    XCALL __clzhi2
2906    sbrs r24, 4
2907    ret
2908    mov_l r24, r22
2909    mov_h r25, r23
2910    XCALL __clzhi2
2911    subi r24, -16
2912    ret
2913ENDF __clzsi2
2914#endif /* defined (L_clzsi2) */
2915
2916#if defined (L_clzhi2)
2917;; count leading zeros
2918;; r25:r24 = clz16 (r25:r24)
2919;; clobbers: r26
2920DEFUN __clzhi2
2921    clr  r26
2922    tst  r25
2923    brne 1f
2924    subi r26, -8
2925    or   r25, r24
2926    brne 1f
2927    ldi  r24, 16
2928    ret
29291:  cpi  r25, 16
2930    brsh 3f
2931    subi r26, -3
2932    swap r25
29332:  inc  r26
29343:  lsl  r25
2935    brcc 2b
2936    mov  r24, r26
2937    clr  r25
2938    ret
2939ENDF __clzhi2
2940#endif /* defined (L_clzhi2) */
2941
2942
2943/**********************************
2944 * Parity
2945 **********************************/
2946
2947#if defined (L_paritydi2)
2948;; r25:r24 = parity64 (r25:r18)
2949;; clobbers: __tmp_reg__
2950DEFUN __paritydi2
2951    eor  r24, r18
2952    eor  r24, r19
2953    eor  r24, r20
2954    eor  r24, r21
2955    XJMP __paritysi2
2956ENDF __paritydi2
2957#endif /* defined (L_paritydi2) */
2958
2959#if defined (L_paritysi2)
2960;; r25:r24 = parity32 (r25:r22)
2961;; clobbers: __tmp_reg__
2962DEFUN __paritysi2
2963    eor  r24, r22
2964    eor  r24, r23
2965    XJMP __parityhi2
2966ENDF __paritysi2
2967#endif /* defined (L_paritysi2) */
2968
2969#if defined (L_parityhi2)
2970;; r25:r24 = parity16 (r25:r24)
2971;; clobbers: __tmp_reg__
2972DEFUN __parityhi2
2973    eor  r24, r25
2974;; FALLTHRU
2975ENDF __parityhi2
2976
2977;; r25:r24 = parity8 (r24)
2978;; clobbers: __tmp_reg__
2979DEFUN __parityqi2
2980    ;; parity is in r24[0..7]
2981    mov  __tmp_reg__, r24
2982    swap __tmp_reg__
2983    eor  r24, __tmp_reg__
2984    ;; parity is in r24[0..3]
2985    subi r24, -4
2986    andi r24, -5
2987    subi r24, -6
2988    ;; parity is in r24[0,3]
2989    sbrc r24, 3
2990    inc  r24
2991    ;; parity is in r24[0]
2992    andi r24, 1
2993    clr  r25
2994    ret
2995ENDF __parityqi2
2996#endif /* defined (L_parityhi2) */
2997
2998
2999/**********************************
3000 * Population Count
3001 **********************************/
3002
3003#if defined (L_popcounthi2)
3004;; population count
3005;; r25:r24 = popcount16 (r25:r24)
3006;; clobbers: __tmp_reg__
3007DEFUN __popcounthi2
3008    XCALL __popcountqi2
3009    push r24
3010    mov  r24, r25
3011    XCALL __popcountqi2
3012    clr  r25
3013    ;; FALLTHRU
3014ENDF __popcounthi2
3015
3016DEFUN __popcounthi2_tail
3017    pop   __tmp_reg__
3018    add   r24, __tmp_reg__
3019    ret
3020ENDF __popcounthi2_tail
3021#endif /* defined (L_popcounthi2) */
3022
3023#if defined (L_popcountsi2)
3024;; population count
3025;; r25:r24 = popcount32 (r25:r22)
3026;; clobbers: __tmp_reg__
3027DEFUN __popcountsi2
3028    XCALL __popcounthi2
3029    push  r24
3030    mov_l r24, r22
3031    mov_h r25, r23
3032    XCALL __popcounthi2
3033    XJMP  __popcounthi2_tail
3034ENDF __popcountsi2
3035#endif /* defined (L_popcountsi2) */
3036
3037#if defined (L_popcountdi2)
3038;; population count
3039;; r25:r24 = popcount64 (r25:r18)
3040;; clobbers: r22, r23, __tmp_reg__
3041DEFUN __popcountdi2
3042    XCALL __popcountsi2
3043    push  r24
3044    mov_l r22, r18
3045    mov_h r23, r19
3046    mov_l r24, r20
3047    mov_h r25, r21
3048    XCALL __popcountsi2
3049    XJMP  __popcounthi2_tail
3050ENDF __popcountdi2
3051#endif /* defined (L_popcountdi2) */
3052
3053#if defined (L_popcountqi2)
3054;; population count
3055;; r24 = popcount8 (r24)
3056;; clobbers: __tmp_reg__
3057DEFUN __popcountqi2
3058    mov  __tmp_reg__, r24
3059    andi r24, 1
3060    lsr  __tmp_reg__
3061    lsr  __tmp_reg__
3062    adc  r24, __zero_reg__
3063    lsr  __tmp_reg__
3064    adc  r24, __zero_reg__
3065    lsr  __tmp_reg__
3066    adc  r24, __zero_reg__
3067    lsr  __tmp_reg__
3068    adc  r24, __zero_reg__
3069    lsr  __tmp_reg__
3070    adc  r24, __zero_reg__
3071    lsr  __tmp_reg__
3072    adc  r24, __tmp_reg__
3073    ret
3074ENDF __popcountqi2
3075#endif /* defined (L_popcountqi2) */
3076
3077
3078/**********************************
3079 * Swap bytes
3080 **********************************/
3081
3082;; swap two registers with different register number
3083.macro bswap a, b
3084    eor \a, \b
3085    eor \b, \a
3086    eor \a, \b
3087.endm
3088
3089#if defined (L_bswapsi2)
3090;; swap bytes
3091;; r25:r22 = bswap32 (r25:r22)
3092DEFUN __bswapsi2
3093    bswap r22, r25
3094    bswap r23, r24
3095    ret
3096ENDF __bswapsi2
3097#endif /* defined (L_bswapsi2) */
3098
3099#if defined (L_bswapdi2)
3100;; swap bytes
3101;; r25:r18 = bswap64 (r25:r18)
3102DEFUN __bswapdi2
3103    bswap r18, r25
3104    bswap r19, r24
3105    bswap r20, r23
3106    bswap r21, r22
3107    ret
3108ENDF __bswapdi2
3109#endif /* defined (L_bswapdi2) */
3110
3111
3112/**********************************
3113 * 64-bit shifts
3114 **********************************/
3115
3116#if defined (L_ashrdi3)
3117;; Arithmetic shift right
3118;; r25:r18 = ashr64 (r25:r18, r17:r16)
3119DEFUN __ashrdi3
3120    bst     r25, 7
3121    bld     __zero_reg__, 0
3122    ;; FALLTHRU
3123ENDF  __ashrdi3
3124
3125;; Logic shift right
3126;; r25:r18 = lshr64 (r25:r18, r17:r16)
3127DEFUN __lshrdi3
3128    lsr     __zero_reg__
3129    sbc     __tmp_reg__, __tmp_reg__
3130    push    r16
31310:  cpi     r16, 8
3132    brlo 2f
3133    subi    r16, 8
3134    mov     r18, r19
3135    mov     r19, r20
3136    mov     r20, r21
3137    mov     r21, r22
3138    mov     r22, r23
3139    mov     r23, r24
3140    mov     r24, r25
3141    mov     r25, __tmp_reg__
3142    rjmp 0b
31431:  asr     __tmp_reg__
3144    ror     r25
3145    ror     r24
3146    ror     r23
3147    ror     r22
3148    ror     r21
3149    ror     r20
3150    ror     r19
3151    ror     r18
31522:  dec     r16
3153    brpl 1b
3154    pop     r16
3155    ret
3156ENDF __lshrdi3
3157#endif /* defined (L_ashrdi3) */
3158
3159#if defined (L_ashldi3)
3160;; Shift left
3161;; r25:r18 = ashl64 (r25:r18, r17:r16)
3162DEFUN __ashldi3
3163    push    r16
31640:  cpi     r16, 8
3165    brlo 2f
3166    mov     r25, r24
3167    mov     r24, r23
3168    mov     r23, r22
3169    mov     r22, r21
3170    mov     r21, r20
3171    mov     r20, r19
3172    mov     r19, r18
3173    clr     r18
3174    subi    r16, 8
3175    rjmp 0b
31761:  lsl     r18
3177    rol     r19
3178    rol     r20
3179    rol     r21
3180    rol     r22
3181    rol     r23
3182    rol     r24
3183    rol     r25
31842:  dec     r16
3185    brpl 1b
3186    pop     r16
3187    ret
3188ENDF __ashldi3
3189#endif /* defined (L_ashldi3) */
3190
3191#if defined (L_rotldi3)
3192;; Shift left
3193;; r25:r18 = rotl64 (r25:r18, r17:r16)
3194DEFUN __rotldi3
3195    push    r16
31960:  cpi     r16, 8
3197    brlo 2f
3198    subi    r16, 8
3199    mov     __tmp_reg__, r25
3200    mov     r25, r24
3201    mov     r24, r23
3202    mov     r23, r22
3203    mov     r22, r21
3204    mov     r21, r20
3205    mov     r20, r19
3206    mov     r19, r18
3207    mov     r18, __tmp_reg__
3208    rjmp 0b
32091:  lsl     r18
3210    rol     r19
3211    rol     r20
3212    rol     r21
3213    rol     r22
3214    rol     r23
3215    rol     r24
3216    rol     r25
3217    adc     r18, __zero_reg__
32182:  dec     r16
3219    brpl 1b
3220    pop     r16
3221    ret
3222ENDF __rotldi3
3223#endif /* defined (L_rotldi3) */
3224
3225
3226.section .text.libgcc.fmul, "ax", @progbits
3227
3228/***********************************************************/
3229;;; Softmul versions of FMUL, FMULS and FMULSU to implement
3230;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
3231/***********************************************************/
3232
3233#define A1 24
3234#define B1 25
3235#define C0 22
3236#define C1 23
3237#define A0 __tmp_reg__
3238
3239#ifdef L_fmuls
3240;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
3241;;; Clobbers: r24, r25, __tmp_reg__
3242DEFUN __fmuls
3243    ;; A0.7 = negate result?
3244    mov  A0, A1
3245    eor  A0, B1
3246    ;; B1 = |B1|
3247    sbrc B1, 7
3248    neg  B1
3249    XJMP __fmulsu_exit
3250ENDF __fmuls
3251#endif /* L_fmuls */
3252
3253#ifdef L_fmulsu
3254;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
3255;;; Clobbers: r24, r25, __tmp_reg__
3256DEFUN __fmulsu
3257    ;; A0.7 = negate result?
3258    mov  A0, A1
3259;; FALLTHRU
3260ENDF __fmulsu
3261
3262;; Helper for __fmuls and __fmulsu
3263DEFUN __fmulsu_exit
3264    ;; A1 = |A1|
3265    sbrc A1, 7
3266    neg  A1
3267#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
3268    ;; Some cores have problem skipping 2-word instruction
3269    tst  A0
3270    brmi 1f
3271#else
3272    sbrs A0, 7
3273#endif /* __AVR_HAVE_JMP_CALL__ */
3274    XJMP  __fmul
32751:  XCALL __fmul
3276    ;; C = -C iff A0.7 = 1
3277    NEG2 C0
3278    ret
3279ENDF __fmulsu_exit
3280#endif /* L_fmulsu */
3281
3282
3283#ifdef L_fmul
3284;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
3285;;; Clobbers: r24, r25, __tmp_reg__
3286DEFUN __fmul
3287    ; clear result
3288    clr   C0
3289    clr   C1
3290    clr   A0
32911:  tst   B1
3292    ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
32932:  brpl  3f
3294    ;; C += A
3295    add   C0, A0
3296    adc   C1, A1
32973:  ;; A >>= 1
3298    lsr   A1
3299    ror   A0
3300    ;; B <<= 1
3301    lsl   B1
3302    brne  2b
3303    ret
3304ENDF __fmul
3305#endif /* L_fmul */
3306
3307#undef A0
3308#undef A1
3309#undef B1
3310#undef C0
3311#undef C1
3312
3313#include "lib1funcs-fixed.S"
3314