1/* -*- Mode: Asm -*- */ 2/* Copyright (C) 1998-2015 Free Software Foundation, Inc. 3 Contributed by Denis Chertykov <chertykov@gmail.com> 4 5This file is free software; you can redistribute it and/or modify it 6under the terms of the GNU General Public License as published by the 7Free Software Foundation; either version 3, or (at your option) any 8later version. 9 10This file is distributed in the hope that it will be useful, but 11WITHOUT ANY WARRANTY; without even the implied warranty of 12MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13General Public License for more details. 14 15Under Section 7 of GPL version 3, you are granted additional 16permissions described in the GCC Runtime Library Exception, version 173.1, as published by the Free Software Foundation. 18 19You should have received a copy of the GNU General Public License and 20a copy of the GCC Runtime Library Exception along with this program; 21see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22<http://www.gnu.org/licenses/>. */ 23 24#if defined (__AVR_TINY__) 25#define __zero_reg__ r17 26#define __tmp_reg__ r16 27#else 28#define __zero_reg__ r1 29#define __tmp_reg__ r0 30#endif 31#define __SREG__ 0x3f 32#if defined (__AVR_HAVE_SPH__) 33#define __SP_H__ 0x3e 34#endif 35#define __SP_L__ 0x3d 36#define __RAMPZ__ 0x3B 37#define __EIND__ 0x3C 38 39/* Most of the functions here are called directly from avr.md 40 patterns, instead of using the standard libcall mechanisms. 41 This can make better code because GCC knows exactly which 42 of the call-used registers (not all of them) are clobbered. */ 43 44/* FIXME: At present, there is no SORT directive in the linker 45 script so that we must not assume that different modules 46 in the same input section like .libgcc.text.mul will be 47 located close together. Therefore, we cannot use 48 RCALL/RJMP to call a function like __udivmodhi4 from 49 __divmodhi4 and have to use lengthy XCALL/XJMP even 50 though they are in the same input section and all same 51 input sections together are small enough to reach every 52 location with a RCALL/RJMP instruction. */ 53 54#if defined (__AVR_HAVE_EIJMP_EICALL__) && !defined (__AVR_HAVE_ELPMX__) 55#error device not supported 56#endif 57 58 .macro mov_l r_dest, r_src 59#if defined (__AVR_HAVE_MOVW__) 60 movw \r_dest, \r_src 61#else 62 mov \r_dest, \r_src 63#endif 64 .endm 65 66 .macro mov_h r_dest, r_src 67#if defined (__AVR_HAVE_MOVW__) 68 ; empty 69#else 70 mov \r_dest, \r_src 71#endif 72 .endm 73 74.macro wmov r_dest, r_src 75#if defined (__AVR_HAVE_MOVW__) 76 movw \r_dest, \r_src 77#else 78 mov \r_dest, \r_src 79 mov \r_dest+1, \r_src+1 80#endif 81.endm 82 83#if defined (__AVR_HAVE_JMP_CALL__) 84#define XCALL call 85#define XJMP jmp 86#else 87#define XCALL rcall 88#define XJMP rjmp 89#endif 90 91#if defined (__AVR_HAVE_EIJMP_EICALL__) 92#define XICALL eicall 93#define XIJMP eijmp 94#else 95#define XICALL icall 96#define XIJMP ijmp 97#endif 98 99;; Prologue stuff 100 101.macro do_prologue_saves n_pushed n_frame=0 102 ldi r26, lo8(\n_frame) 103 ldi r27, hi8(\n_frame) 104 ldi r30, lo8(gs(.L_prologue_saves.\@)) 105 ldi r31, hi8(gs(.L_prologue_saves.\@)) 106 XJMP __prologue_saves__ + ((18 - (\n_pushed)) * 2) 107.L_prologue_saves.\@: 108.endm 109 110;; Epilogue stuff 111 112.macro do_epilogue_restores n_pushed n_frame=0 113 in r28, __SP_L__ 114#ifdef __AVR_HAVE_SPH__ 115 in r29, __SP_H__ 116.if \n_frame > 63 117 subi r28, lo8(-\n_frame) 118 sbci r29, hi8(-\n_frame) 119.elseif \n_frame > 0 120 adiw r28, \n_frame 121.endif 122#else 123 clr r29 124.if \n_frame > 0 125 subi r28, lo8(-\n_frame) 126.endif 127#endif /* HAVE SPH */ 128 ldi r30, \n_pushed 129 XJMP __epilogue_restores__ + ((18 - (\n_pushed)) * 2) 130.endm 131 132;; Support function entry and exit for convenience 133 134.macro wsubi r_arg1, i_arg2 135#if defined (__AVR_TINY__) 136 subi \r_arg1, lo8(\i_arg2) 137 sbci \r_arg1+1, hi8(\i_arg2) 138#else 139 sbiw \r_arg1, \i_arg2 140#endif 141.endm 142 143.macro waddi r_arg1, i_arg2 144#if defined (__AVR_TINY__) 145 subi \r_arg1, lo8(-\i_arg2) 146 sbci \r_arg1+1, hi8(-\i_arg2) 147#else 148 adiw \r_arg1, \i_arg2 149#endif 150.endm 151 152.macro DEFUN name 153.global \name 154.func \name 155\name: 156.endm 157 158.macro ENDF name 159.size \name, .-\name 160.endfunc 161.endm 162 163.macro FALIAS name 164.global \name 165.func \name 166\name: 167.size \name, .-\name 168.endfunc 169.endm 170 171;; Skip next instruction, typically a jump target 172#if defined(__AVR_TINY__) 173#define skip cpse 0,0 174#else 175#define skip cpse 16,16 176#endif 177 178;; Negate a 2-byte value held in consecutive registers 179.macro NEG2 reg 180 com \reg+1 181 neg \reg 182 sbci \reg+1, -1 183.endm 184 185;; Negate a 4-byte value held in consecutive registers 186;; Sets the V flag for signed overflow tests if REG >= 16 187.macro NEG4 reg 188 com \reg+3 189 com \reg+2 190 com \reg+1 191.if \reg >= 16 192 neg \reg 193 sbci \reg+1, -1 194 sbci \reg+2, -1 195 sbci \reg+3, -1 196.else 197 com \reg 198 adc \reg, __zero_reg__ 199 adc \reg+1, __zero_reg__ 200 adc \reg+2, __zero_reg__ 201 adc \reg+3, __zero_reg__ 202.endif 203.endm 204 205#define exp_lo(N) hlo8 ((N) << 23) 206#define exp_hi(N) hhi8 ((N) << 23) 207 208 209.section .text.libgcc.mul, "ax", @progbits 210 211;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 212/* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */ 213#if !defined (__AVR_HAVE_MUL__) 214/******************************************************* 215 Multiplication 8 x 8 without MUL 216*******************************************************/ 217#if defined (L_mulqi3) 218 219#define r_arg2 r22 /* multiplicand */ 220#define r_arg1 r24 /* multiplier */ 221#define r_res __tmp_reg__ /* result */ 222 223DEFUN __mulqi3 224 clr r_res ; clear result 225__mulqi3_loop: 226 sbrc r_arg1,0 227 add r_res,r_arg2 228 add r_arg2,r_arg2 ; shift multiplicand 229 breq __mulqi3_exit ; while multiplicand != 0 230 lsr r_arg1 ; 231 brne __mulqi3_loop ; exit if multiplier = 0 232__mulqi3_exit: 233 mov r_arg1,r_res ; result to return register 234 ret 235ENDF __mulqi3 236 237#undef r_arg2 238#undef r_arg1 239#undef r_res 240 241#endif /* defined (L_mulqi3) */ 242 243 244/******************************************************* 245 Widening Multiplication 16 = 8 x 8 without MUL 246 Multiplication 16 x 16 without MUL 247*******************************************************/ 248 249#define A0 22 250#define A1 23 251#define B0 24 252#define BB0 20 253#define B1 25 254;; Output overlaps input, thus expand result in CC0/1 255#define C0 24 256#define C1 25 257#define CC0 __tmp_reg__ 258#define CC1 21 259 260#if defined (L_umulqihi3) 261;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24 262;;; (C1:C0) = (unsigned int) A0 * (unsigned int) B0 263;;; Clobbers: __tmp_reg__, R21..R23 264DEFUN __umulqihi3 265 clr A1 266 clr B1 267 XJMP __mulhi3 268ENDF __umulqihi3 269#endif /* L_umulqihi3 */ 270 271#if defined (L_mulqihi3) 272;;; R25:R24 = (signed int) R22 * (signed int) R24 273;;; (C1:C0) = (signed int) A0 * (signed int) B0 274;;; Clobbers: __tmp_reg__, R20..R23 275DEFUN __mulqihi3 276 ;; Sign-extend B0 277 clr B1 278 sbrc B0, 7 279 com B1 280 ;; The multiplication runs twice as fast if A1 is zero, thus: 281 ;; Zero-extend A0 282 clr A1 283#ifdef __AVR_HAVE_JMP_CALL__ 284 ;; Store B0 * sign of A 285 clr BB0 286 sbrc A0, 7 287 mov BB0, B0 288 call __mulhi3 289#else /* have no CALL */ 290 ;; Skip sign-extension of A if A >= 0 291 ;; Same size as with the first alternative but avoids errata skip 292 ;; and is faster if A >= 0 293 sbrs A0, 7 294 rjmp __mulhi3 295 ;; If A < 0 store B 296 mov BB0, B0 297 rcall __mulhi3 298#endif /* HAVE_JMP_CALL */ 299 ;; 1-extend A after the multiplication 300 sub C1, BB0 301 ret 302ENDF __mulqihi3 303#endif /* L_mulqihi3 */ 304 305#if defined (L_mulhi3) 306;;; R25:R24 = R23:R22 * R25:R24 307;;; (C1:C0) = (A1:A0) * (B1:B0) 308;;; Clobbers: __tmp_reg__, R21..R23 309DEFUN __mulhi3 310 311 ;; Clear result 312 clr CC0 313 clr CC1 314 rjmp 3f 3151: 316 ;; Bit n of A is 1 --> C += B << n 317 add CC0, B0 318 adc CC1, B1 3192: 320 lsl B0 321 rol B1 3223: 323 ;; If B == 0 we are ready 324 wsubi B0, 0 325 breq 9f 326 327 ;; Carry = n-th bit of A 328 lsr A1 329 ror A0 330 ;; If bit n of A is set, then go add B * 2^n to C 331 brcs 1b 332 333 ;; Carry = 0 --> The ROR above acts like CP A0, 0 334 ;; Thus, it is sufficient to CPC the high part to test A against 0 335 cpc A1, __zero_reg__ 336 ;; Only proceed if A != 0 337 brne 2b 3389: 339 ;; Move Result into place 340 mov C0, CC0 341 mov C1, CC1 342 ret 343ENDF __mulhi3 344#endif /* L_mulhi3 */ 345 346#undef A0 347#undef A1 348#undef B0 349#undef BB0 350#undef B1 351#undef C0 352#undef C1 353#undef CC0 354#undef CC1 355 356 357#define A0 22 358#define A1 A0+1 359#define A2 A0+2 360#define A3 A0+3 361 362#define B0 18 363#define B1 B0+1 364#define B2 B0+2 365#define B3 B0+3 366 367#define CC0 26 368#define CC1 CC0+1 369#define CC2 30 370#define CC3 CC2+1 371 372#define C0 22 373#define C1 C0+1 374#define C2 C0+2 375#define C3 C0+3 376 377/******************************************************* 378 Widening Multiplication 32 = 16 x 16 without MUL 379*******************************************************/ 380 381#if defined (L_umulhisi3) 382DEFUN __umulhisi3 383 wmov B0, 24 384 ;; Zero-extend B 385 clr B2 386 clr B3 387 ;; Zero-extend A 388 wmov A2, B2 389 XJMP __mulsi3 390ENDF __umulhisi3 391#endif /* L_umulhisi3 */ 392 393#if defined (L_mulhisi3) 394DEFUN __mulhisi3 395 wmov B0, 24 396 ;; Sign-extend B 397 lsl r25 398 sbc B2, B2 399 mov B3, B2 400#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ 401 ;; Sign-extend A 402 clr A2 403 sbrc A1, 7 404 com A2 405 mov A3, A2 406 XJMP __mulsi3 407#else /* no __AVR_ERRATA_SKIP_JMP_CALL__ */ 408 ;; Zero-extend A and __mulsi3 will run at least twice as fast 409 ;; compared to a sign-extended A. 410 clr A2 411 clr A3 412 sbrs A1, 7 413 XJMP __mulsi3 414 ;; If A < 0 then perform the B * 0xffff.... before the 415 ;; very multiplication by initializing the high part of the 416 ;; result CC with -B. 417 wmov CC2, A2 418 sub CC2, B0 419 sbc CC3, B1 420 XJMP __mulsi3_helper 421#endif /* __AVR_ERRATA_SKIP_JMP_CALL__ */ 422ENDF __mulhisi3 423#endif /* L_mulhisi3 */ 424 425 426/******************************************************* 427 Multiplication 32 x 32 without MUL 428*******************************************************/ 429 430#if defined (L_mulsi3) 431DEFUN __mulsi3 432#if defined (__AVR_TINY__) 433 in r26, __SP_L__ ; safe to use X, as it is CC0/CC1 434 in r27, __SP_H__ 435 subi r26, lo8(-3) ; Add 3 to point past return address 436 sbci r27, hi8(-3) 437 push B0 ; save callee saved regs 438 push B1 439 ld B0, X+ ; load from caller stack 440 ld B1, X+ 441 ld B2, X+ 442 ld B3, X 443#endif 444 ;; Clear result 445 clr CC2 446 clr CC3 447 ;; FALLTHRU 448ENDF __mulsi3 449 450DEFUN __mulsi3_helper 451 clr CC0 452 clr CC1 453 rjmp 3f 454 4551: ;; If bit n of A is set, then add B * 2^n to the result in CC 456 ;; CC += B 457 add CC0,B0 $ adc CC1,B1 $ adc CC2,B2 $ adc CC3,B3 458 4592: ;; B <<= 1 460 lsl B0 $ rol B1 $ rol B2 $ rol B3 461 4623: ;; A >>= 1: Carry = n-th bit of A 463 lsr A3 $ ror A2 $ ror A1 $ ror A0 464 465 brcs 1b 466 ;; Only continue if A != 0 467 sbci A1, 0 468 brne 2b 469 wsubi A2, 0 470 brne 2b 471 472 ;; All bits of A are consumed: Copy result to return register C 473 wmov C0, CC0 474 wmov C2, CC2 475#if defined (__AVR_TINY__) 476 pop B1 ; restore callee saved regs 477 pop B0 478#endif /* defined (__AVR_TINY__) */ 479 480 ret 481ENDF __mulsi3_helper 482#endif /* L_mulsi3 */ 483 484#undef A0 485#undef A1 486#undef A2 487#undef A3 488#undef B0 489#undef B1 490#undef B2 491#undef B3 492#undef C0 493#undef C1 494#undef C2 495#undef C3 496#undef CC0 497#undef CC1 498#undef CC2 499#undef CC3 500 501#endif /* !defined (__AVR_HAVE_MUL__) */ 502;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 503 504;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 505#if defined (__AVR_HAVE_MUL__) 506#define A0 26 507#define B0 18 508#define C0 22 509 510#define A1 A0+1 511 512#define B1 B0+1 513#define B2 B0+2 514#define B3 B0+3 515 516#define C1 C0+1 517#define C2 C0+2 518#define C3 C0+3 519 520/******************************************************* 521 Widening Multiplication 32 = 16 x 16 with MUL 522*******************************************************/ 523 524#if defined (L_mulhisi3) 525;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18 526;;; C3:C0 = (signed long) A1:A0 * (signed long) B1:B0 527;;; Clobbers: __tmp_reg__ 528DEFUN __mulhisi3 529 XCALL __umulhisi3 530 ;; Sign-extend B 531 tst B1 532 brpl 1f 533 sub C2, A0 534 sbc C3, A1 5351: ;; Sign-extend A 536 XJMP __usmulhisi3_tail 537ENDF __mulhisi3 538#endif /* L_mulhisi3 */ 539 540#if defined (L_usmulhisi3) 541;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18 542;;; C3:C0 = (signed long) A1:A0 * (unsigned long) B1:B0 543;;; Clobbers: __tmp_reg__ 544DEFUN __usmulhisi3 545 XCALL __umulhisi3 546 ;; FALLTHRU 547ENDF __usmulhisi3 548 549DEFUN __usmulhisi3_tail 550 ;; Sign-extend A 551 sbrs A1, 7 552 ret 553 sub C2, B0 554 sbc C3, B1 555 ret 556ENDF __usmulhisi3_tail 557#endif /* L_usmulhisi3 */ 558 559#if defined (L_umulhisi3) 560;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18 561;;; C3:C0 = (unsigned long) A1:A0 * (unsigned long) B1:B0 562;;; Clobbers: __tmp_reg__ 563DEFUN __umulhisi3 564 mul A0, B0 565 movw C0, r0 566 mul A1, B1 567 movw C2, r0 568 mul A0, B1 569#ifdef __AVR_HAVE_JMP_CALL__ 570 ;; This function is used by many other routines, often multiple times. 571 ;; Therefore, if the flash size is not too limited, avoid the RCALL 572 ;; and inverst 6 Bytes to speed things up. 573 add C1, r0 574 adc C2, r1 575 clr __zero_reg__ 576 adc C3, __zero_reg__ 577#else 578 rcall 1f 579#endif 580 mul A1, B0 5811: add C1, r0 582 adc C2, r1 583 clr __zero_reg__ 584 adc C3, __zero_reg__ 585 ret 586ENDF __umulhisi3 587#endif /* L_umulhisi3 */ 588 589/******************************************************* 590 Widening Multiplication 32 = 16 x 32 with MUL 591*******************************************************/ 592 593#if defined (L_mulshisi3) 594;;; R25:R22 = (signed long) R27:R26 * R21:R18 595;;; (C3:C0) = (signed long) A1:A0 * B3:B0 596;;; Clobbers: __tmp_reg__ 597DEFUN __mulshisi3 598#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ 599 ;; Some cores have problem skipping 2-word instruction 600 tst A1 601 brmi __mulohisi3 602#else 603 sbrs A1, 7 604#endif /* __AVR_HAVE_JMP_CALL__ */ 605 XJMP __muluhisi3 606 ;; FALLTHRU 607ENDF __mulshisi3 608 609;;; R25:R22 = (one-extended long) R27:R26 * R21:R18 610;;; (C3:C0) = (one-extended long) A1:A0 * B3:B0 611;;; Clobbers: __tmp_reg__ 612DEFUN __mulohisi3 613 XCALL __muluhisi3 614 ;; One-extend R27:R26 (A1:A0) 615 sub C2, B0 616 sbc C3, B1 617 ret 618ENDF __mulohisi3 619#endif /* L_mulshisi3 */ 620 621#if defined (L_muluhisi3) 622;;; R25:R22 = (unsigned long) R27:R26 * R21:R18 623;;; (C3:C0) = (unsigned long) A1:A0 * B3:B0 624;;; Clobbers: __tmp_reg__ 625DEFUN __muluhisi3 626 XCALL __umulhisi3 627 mul A0, B3 628 add C3, r0 629 mul A1, B2 630 add C3, r0 631 mul A0, B2 632 add C2, r0 633 adc C3, r1 634 clr __zero_reg__ 635 ret 636ENDF __muluhisi3 637#endif /* L_muluhisi3 */ 638 639/******************************************************* 640 Multiplication 32 x 32 with MUL 641*******************************************************/ 642 643#if defined (L_mulsi3) 644;;; R25:R22 = R25:R22 * R21:R18 645;;; (C3:C0) = C3:C0 * B3:B0 646;;; Clobbers: R26, R27, __tmp_reg__ 647DEFUN __mulsi3 648 movw A0, C0 649 push C2 650 push C3 651 XCALL __muluhisi3 652 pop A1 653 pop A0 654 ;; A1:A0 now contains the high word of A 655 mul A0, B0 656 add C2, r0 657 adc C3, r1 658 mul A0, B1 659 add C3, r0 660 mul A1, B0 661 add C3, r0 662 clr __zero_reg__ 663 ret 664ENDF __mulsi3 665#endif /* L_mulsi3 */ 666 667#undef A0 668#undef A1 669 670#undef B0 671#undef B1 672#undef B2 673#undef B3 674 675#undef C0 676#undef C1 677#undef C2 678#undef C3 679 680#endif /* __AVR_HAVE_MUL__ */ 681 682/******************************************************* 683 Multiplication 24 x 24 with MUL 684*******************************************************/ 685 686#if defined (L_mulpsi3) 687 688;; A[0..2]: In: Multiplicand; Out: Product 689#define A0 22 690#define A1 A0+1 691#define A2 A0+2 692 693;; B[0..2]: In: Multiplier 694#define B0 18 695#define B1 B0+1 696#define B2 B0+2 697 698#if defined (__AVR_HAVE_MUL__) 699 700;; C[0..2]: Expand Result 701#define C0 22 702#define C1 C0+1 703#define C2 C0+2 704 705;; R24:R22 *= R20:R18 706;; Clobbers: r21, r25, r26, r27, __tmp_reg__ 707 708#define AA0 26 709#define AA2 21 710 711DEFUN __mulpsi3 712 wmov AA0, A0 713 mov AA2, A2 714 XCALL __umulhisi3 715 mul AA2, B0 $ add C2, r0 716 mul AA0, B2 $ add C2, r0 717 clr __zero_reg__ 718 ret 719ENDF __mulpsi3 720 721#undef AA2 722#undef AA0 723 724#undef C2 725#undef C1 726#undef C0 727 728#else /* !HAVE_MUL */ 729;; C[0..2]: Expand Result 730#if defined (__AVR_TINY__) 731#define C0 16 732#else 733#define C0 0 734#endif /* defined (__AVR_TINY__) */ 735#define C1 C0+1 736#define C2 21 737 738;; R24:R22 *= R20:R18 739;; Clobbers: __tmp_reg__, R18, R19, R20, R21 740 741DEFUN __mulpsi3 742#if defined (__AVR_TINY__) 743 in r26,__SP_L__ 744 in r27,__SP_H__ 745 subi r26, lo8(-3) ; Add 3 to point past return address 746 sbci r27, hi8(-3) 747 push B0 ; save callee saved regs 748 push B1 749 ld B0,X+ ; load from caller stack 750 ld B1,X+ 751 ld B2,X+ 752#endif /* defined (__AVR_TINY__) */ 753 754 ;; C[] = 0 755 clr __tmp_reg__ 756 clr C2 757 7580: ;; Shift N-th Bit of B[] into Carry. N = 24 - Loop 759 LSR B2 $ ror B1 $ ror B0 760 761 ;; If the N-th Bit of B[] was set... 762 brcc 1f 763 764 ;; ...then add A[] * 2^N to the Result C[] 765 ADD C0,A0 $ adc C1,A1 $ adc C2,A2 766 7671: ;; Multiply A[] by 2 768 LSL A0 $ rol A1 $ rol A2 769 770 ;; Loop until B[] is 0 771 subi B0,0 $ sbci B1,0 $ sbci B2,0 772 brne 0b 773 774 ;; Copy C[] to the return Register A[] 775 wmov A0, C0 776 mov A2, C2 777 778 clr __zero_reg__ 779#if defined (__AVR_TINY__) 780 pop B1 781 pop B0 782#endif /* (__AVR_TINY__) */ 783 ret 784ENDF __mulpsi3 785 786#undef C2 787#undef C1 788#undef C0 789 790#endif /* HAVE_MUL */ 791 792#undef B2 793#undef B1 794#undef B0 795 796#undef A2 797#undef A1 798#undef A0 799 800#endif /* L_mulpsi3 */ 801 802#if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__) 803 804;; A[0..2]: In: Multiplicand 805#define A0 22 806#define A1 A0+1 807#define A2 A0+2 808 809;; BB: In: Multiplier 810#define BB 25 811 812;; C[0..2]: Result 813#define C0 18 814#define C1 C0+1 815#define C2 C0+2 816 817;; C[] = A[] * sign_extend (BB) 818DEFUN __mulsqipsi3 819 mul A0, BB 820 movw C0, r0 821 mul A2, BB 822 mov C2, r0 823 mul A1, BB 824 add C1, r0 825 adc C2, r1 826 clr __zero_reg__ 827 sbrs BB, 7 828 ret 829 ;; One-extend BB 830 sub C1, A0 831 sbc C2, A1 832 ret 833ENDF __mulsqipsi3 834 835#undef C2 836#undef C1 837#undef C0 838 839#undef BB 840 841#undef A2 842#undef A1 843#undef A0 844 845#endif /* L_mulsqipsi3 && HAVE_MUL */ 846 847/******************************************************* 848 Multiplication 64 x 64 849*******************************************************/ 850 851;; A[] = A[] * B[] 852 853;; A[0..7]: In: Multiplicand 854;; Out: Product 855#define A0 18 856#define A1 A0+1 857#define A2 A0+2 858#define A3 A0+3 859#define A4 A0+4 860#define A5 A0+5 861#define A6 A0+6 862#define A7 A0+7 863 864;; B[0..7]: In: Multiplier 865#define B0 10 866#define B1 B0+1 867#define B2 B0+2 868#define B3 B0+3 869#define B4 B0+4 870#define B5 B0+5 871#define B6 B0+6 872#define B7 B0+7 873 874#ifndef __AVR_TINY__ 875#if defined (__AVR_HAVE_MUL__) 876;; Define C[] for convenience 877;; Notice that parts of C[] overlap A[] respective B[] 878#define C0 16 879#define C1 C0+1 880#define C2 20 881#define C3 C2+1 882#define C4 28 883#define C5 C4+1 884#define C6 C4+2 885#define C7 C4+3 886 887#if defined (L_muldi3) 888 889;; A[] *= B[] 890;; R25:R18 *= R17:R10 891;; Ordinary ABI-Function 892 893DEFUN __muldi3 894 push r29 895 push r28 896 push r17 897 push r16 898 899 ;; Counting in Words, we have to perform a 4 * 4 Multiplication 900 901 ;; 3 * 0 + 0 * 3 902 mul A7,B0 $ $ mov C7,r0 903 mul A0,B7 $ $ add C7,r0 904 mul A6,B1 $ $ add C7,r0 905 mul A6,B0 $ mov C6,r0 $ add C7,r1 906 mul B6,A1 $ $ add C7,r0 907 mul B6,A0 $ add C6,r0 $ adc C7,r1 908 909 ;; 1 * 2 910 mul A2,B4 $ add C6,r0 $ adc C7,r1 911 mul A3,B4 $ $ add C7,r0 912 mul A2,B5 $ $ add C7,r0 913 914 push A5 915 push A4 916 push B1 917 push B0 918 push A3 919 push A2 920 921 ;; 0 * 0 922 wmov 26, B0 923 XCALL __umulhisi3 924 wmov C0, 22 925 wmov C2, 24 926 927 ;; 0 * 2 928 wmov 26, B4 929 XCALL __umulhisi3 $ wmov C4,22 $ add C6,24 $ adc C7,25 930 931 wmov 26, B2 932 ;; 0 * 1 933 XCALL __muldi3_6 934 935 pop A0 936 pop A1 937 ;; 1 * 1 938 wmov 26, B2 939 XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25 940 941 pop r26 942 pop r27 943 ;; 1 * 0 944 XCALL __muldi3_6 945 946 pop A0 947 pop A1 948 ;; 2 * 0 949 XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25 950 951 ;; 2 * 1 952 wmov 26, B2 953 XCALL __umulhisi3 $ $ $ add C6,22 $ adc C7,23 954 955 ;; A[] = C[] 956 wmov A0, C0 957 ;; A2 = C2 already 958 wmov A4, C4 959 wmov A6, C6 960 961 clr __zero_reg__ 962 pop r16 963 pop r17 964 pop r28 965 pop r29 966 ret 967ENDF __muldi3 968#endif /* L_muldi3 */ 969 970#if defined (L_muldi3_6) 971;; A helper for some 64-bit multiplications with MUL available 972DEFUN __muldi3_6 973__muldi3_6: 974 XCALL __umulhisi3 975 add C2, 22 976 adc C3, 23 977 adc C4, 24 978 adc C5, 25 979 brcc 0f 980 adiw C6, 1 9810: ret 982ENDF __muldi3_6 983#endif /* L_muldi3_6 */ 984 985#undef C7 986#undef C6 987#undef C5 988#undef C4 989#undef C3 990#undef C2 991#undef C1 992#undef C0 993 994#else /* !HAVE_MUL */ 995 996#if defined (L_muldi3) 997 998#define C0 26 999#define C1 C0+1 1000#define C2 C0+2 1001#define C3 C0+3 1002#define C4 C0+4 1003#define C5 C0+5 1004#define C6 0 1005#define C7 C6+1 1006 1007#define Loop 9 1008 1009;; A[] *= B[] 1010;; R25:R18 *= R17:R10 1011;; Ordinary ABI-Function 1012 1013DEFUN __muldi3 1014 push r29 1015 push r28 1016 push Loop 1017 1018 ldi C0, 64 1019 mov Loop, C0 1020 1021 ;; C[] = 0 1022 clr __tmp_reg__ 1023 wmov C0, 0 1024 wmov C2, 0 1025 wmov C4, 0 1026 10270: ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[] 1028 ;; where N = 64 - Loop. 1029 ;; Notice that B[] = B[] >>> 64 so after this Routine has finished, 1030 ;; B[] will have its initial Value again. 1031 LSR B7 $ ror B6 $ ror B5 $ ror B4 1032 ror B3 $ ror B2 $ ror B1 $ ror B0 1033 1034 ;; If the N-th Bit of B[] was set then... 1035 brcc 1f 1036 ;; ...finish Rotation... 1037 ori B7, 1 << 7 1038 1039 ;; ...and add A[] * 2^N to the Result C[] 1040 ADD C0,A0 $ adc C1,A1 $ adc C2,A2 $ adc C3,A3 1041 adc C4,A4 $ adc C5,A5 $ adc C6,A6 $ adc C7,A7 1042 10431: ;; Multiply A[] by 2 1044 LSL A0 $ rol A1 $ rol A2 $ rol A3 1045 rol A4 $ rol A5 $ rol A6 $ rol A7 1046 1047 dec Loop 1048 brne 0b 1049 1050 ;; We expanded the Result in C[] 1051 ;; Copy Result to the Return Register A[] 1052 wmov A0, C0 1053 wmov A2, C2 1054 wmov A4, C4 1055 wmov A6, C6 1056 1057 clr __zero_reg__ 1058 pop Loop 1059 pop r28 1060 pop r29 1061 ret 1062ENDF __muldi3 1063 1064#undef Loop 1065 1066#undef C7 1067#undef C6 1068#undef C5 1069#undef C4 1070#undef C3 1071#undef C2 1072#undef C1 1073#undef C0 1074 1075#endif /* L_muldi3 */ 1076#endif /* HAVE_MUL */ 1077#endif /* if not __AVR_TINY__ */ 1078 1079#undef B7 1080#undef B6 1081#undef B5 1082#undef B4 1083#undef B3 1084#undef B2 1085#undef B1 1086#undef B0 1087 1088#undef A7 1089#undef A6 1090#undef A5 1091#undef A4 1092#undef A3 1093#undef A2 1094#undef A1 1095#undef A0 1096 1097/******************************************************* 1098 Widening Multiplication 64 = 32 x 32 with MUL 1099*******************************************************/ 1100 1101#if defined (__AVR_HAVE_MUL__) 1102#define A0 r22 1103#define A1 r23 1104#define A2 r24 1105#define A3 r25 1106 1107#define B0 r18 1108#define B1 r19 1109#define B2 r20 1110#define B3 r21 1111 1112#define C0 18 1113#define C1 C0+1 1114#define C2 20 1115#define C3 C2+1 1116#define C4 28 1117#define C5 C4+1 1118#define C6 C4+2 1119#define C7 C4+3 1120 1121#if defined (L_umulsidi3) 1122 1123;; Unsigned widening 64 = 32 * 32 Multiplication with MUL 1124 1125;; R18[8] = R22[4] * R18[4] 1126;; 1127;; Ordinary ABI Function, but additionally sets 1128;; X = R20[2] = B2[2] 1129;; Z = R22[2] = A0[2] 1130DEFUN __umulsidi3 1131 clt 1132 ;; FALLTHRU 1133ENDF __umulsidi3 1134 ;; T = sign (A) 1135DEFUN __umulsidi3_helper 1136 push 29 $ push 28 ; Y 1137 wmov 30, A2 1138 ;; Counting in Words, we have to perform 4 Multiplications 1139 ;; 0 * 0 1140 wmov 26, A0 1141 XCALL __umulhisi3 1142 push 23 $ push 22 ; C0 1143 wmov 28, B0 1144 wmov 18, B2 1145 wmov C2, 24 1146 push 27 $ push 26 ; A0 1147 push 19 $ push 18 ; B2 1148 ;; 1149 ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y 1150 ;; B2 C2 -- -- -- B0 A2 1151 ;; 1 * 1 1152 wmov 26, 30 ; A2 1153 XCALL __umulhisi3 1154 ;; Sign-extend A. T holds the sign of A 1155 brtc 0f 1156 ;; Subtract B from the high part of the result 1157 sub 22, 28 1158 sbc 23, 29 1159 sbc 24, 18 1160 sbc 25, 19 11610: wmov 18, 28 ;; B0 1162 wmov C4, 22 1163 wmov C6, 24 1164 ;; 1165 ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y 1166 ;; B0 C2 -- -- A2 C4 C6 1167 ;; 1168 ;; 1 * 0 1169 XCALL __muldi3_6 1170 ;; 0 * 1 1171 pop 26 $ pop 27 ;; B2 1172 pop 18 $ pop 19 ;; A0 1173 XCALL __muldi3_6 1174 1175 ;; Move result C into place and save A0 in Z 1176 wmov 22, C4 1177 wmov 24, C6 1178 wmov 30, 18 ; A0 1179 pop C0 $ pop C1 1180 1181 ;; Epilogue 1182 pop 28 $ pop 29 ;; Y 1183 ret 1184ENDF __umulsidi3_helper 1185#endif /* L_umulsidi3 */ 1186 1187 1188#if defined (L_mulsidi3) 1189 1190;; Signed widening 64 = 32 * 32 Multiplication 1191;; 1192;; R18[8] = R22[4] * R18[4] 1193;; Ordinary ABI Function 1194DEFUN __mulsidi3 1195 bst A3, 7 1196 sbrs B3, 7 ; Enhanced core has no skip bug 1197 XJMP __umulsidi3_helper 1198 1199 ;; B needs sign-extension 1200 push A3 1201 push A2 1202 XCALL __umulsidi3_helper 1203 ;; A0 survived in Z 1204 sub r22, r30 1205 sbc r23, r31 1206 pop r26 1207 pop r27 1208 sbc r24, r26 1209 sbc r25, r27 1210 ret 1211ENDF __mulsidi3 1212#endif /* L_mulsidi3 */ 1213 1214#undef A0 1215#undef A1 1216#undef A2 1217#undef A3 1218#undef B0 1219#undef B1 1220#undef B2 1221#undef B3 1222#undef C0 1223#undef C1 1224#undef C2 1225#undef C3 1226#undef C4 1227#undef C5 1228#undef C6 1229#undef C7 1230#endif /* HAVE_MUL */ 1231 1232/********************************************************** 1233 Widening Multiplication 64 = 32 x 32 without MUL 1234**********************************************************/ 1235#ifndef __AVR_TINY__ /* if not __AVR_TINY__ */ 1236#if defined (L_mulsidi3) && !defined (__AVR_HAVE_MUL__) 1237#define A0 18 1238#define A1 A0+1 1239#define A2 A0+2 1240#define A3 A0+3 1241#define A4 A0+4 1242#define A5 A0+5 1243#define A6 A0+6 1244#define A7 A0+7 1245 1246#define B0 10 1247#define B1 B0+1 1248#define B2 B0+2 1249#define B3 B0+3 1250#define B4 B0+4 1251#define B5 B0+5 1252#define B6 B0+6 1253#define B7 B0+7 1254 1255#define AA0 22 1256#define AA1 AA0+1 1257#define AA2 AA0+2 1258#define AA3 AA0+3 1259 1260#define BB0 18 1261#define BB1 BB0+1 1262#define BB2 BB0+2 1263#define BB3 BB0+3 1264 1265#define Mask r30 1266 1267;; Signed / Unsigned widening 64 = 32 * 32 Multiplication without MUL 1268;; 1269;; R18[8] = R22[4] * R18[4] 1270;; Ordinary ABI Function 1271DEFUN __mulsidi3 1272 set 1273 skip 1274 ;; FALLTHRU 1275ENDF __mulsidi3 1276 1277DEFUN __umulsidi3 1278 clt ; skipped 1279 ;; Save 10 Registers: R10..R17, R28, R29 1280 do_prologue_saves 10 1281 ldi Mask, 0xff 1282 bld Mask, 7 1283 ;; Move B into place... 1284 wmov B0, BB0 1285 wmov B2, BB2 1286 ;; ...and extend it 1287 and BB3, Mask 1288 lsl BB3 1289 sbc B4, B4 1290 mov B5, B4 1291 wmov B6, B4 1292 ;; Move A into place... 1293 wmov A0, AA0 1294 wmov A2, AA2 1295 ;; ...and extend it 1296 and AA3, Mask 1297 lsl AA3 1298 sbc A4, A4 1299 mov A5, A4 1300 wmov A6, A4 1301 XCALL __muldi3 1302 do_epilogue_restores 10 1303ENDF __umulsidi3 1304 1305#undef A0 1306#undef A1 1307#undef A2 1308#undef A3 1309#undef A4 1310#undef A5 1311#undef A6 1312#undef A7 1313#undef B0 1314#undef B1 1315#undef B2 1316#undef B3 1317#undef B4 1318#undef B5 1319#undef B6 1320#undef B7 1321#undef AA0 1322#undef AA1 1323#undef AA2 1324#undef AA3 1325#undef BB0 1326#undef BB1 1327#undef BB2 1328#undef BB3 1329#undef Mask 1330#endif /* L_mulsidi3 && !HAVE_MUL */ 1331#endif /* if not __AVR_TINY__ */ 1332;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1333 1334 1335.section .text.libgcc.div, "ax", @progbits 1336 1337/******************************************************* 1338 Division 8 / 8 => (result + remainder) 1339*******************************************************/ 1340#define r_rem r25 /* remainder */ 1341#define r_arg1 r24 /* dividend, quotient */ 1342#define r_arg2 r22 /* divisor */ 1343#define r_cnt r23 /* loop count */ 1344 1345#if defined (L_udivmodqi4) 1346DEFUN __udivmodqi4 1347 sub r_rem,r_rem ; clear remainder and carry 1348 ldi r_cnt,9 ; init loop counter 1349 rjmp __udivmodqi4_ep ; jump to entry point 1350__udivmodqi4_loop: 1351 rol r_rem ; shift dividend into remainder 1352 cp r_rem,r_arg2 ; compare remainder & divisor 1353 brcs __udivmodqi4_ep ; remainder <= divisor 1354 sub r_rem,r_arg2 ; restore remainder 1355__udivmodqi4_ep: 1356 rol r_arg1 ; shift dividend (with CARRY) 1357 dec r_cnt ; decrement loop counter 1358 brne __udivmodqi4_loop 1359 com r_arg1 ; complement result 1360 ; because C flag was complemented in loop 1361 ret 1362ENDF __udivmodqi4 1363#endif /* defined (L_udivmodqi4) */ 1364 1365#if defined (L_divmodqi4) 1366DEFUN __divmodqi4 1367 bst r_arg1,7 ; store sign of dividend 1368 mov __tmp_reg__,r_arg1 1369 eor __tmp_reg__,r_arg2; r0.7 is sign of result 1370 sbrc r_arg1,7 1371 neg r_arg1 ; dividend negative : negate 1372 sbrc r_arg2,7 1373 neg r_arg2 ; divisor negative : negate 1374 XCALL __udivmodqi4 ; do the unsigned div/mod 1375 brtc __divmodqi4_1 1376 neg r_rem ; correct remainder sign 1377__divmodqi4_1: 1378 sbrc __tmp_reg__,7 1379 neg r_arg1 ; correct result sign 1380__divmodqi4_exit: 1381 ret 1382ENDF __divmodqi4 1383#endif /* defined (L_divmodqi4) */ 1384 1385#undef r_rem 1386#undef r_arg1 1387#undef r_arg2 1388#undef r_cnt 1389 1390 1391/******************************************************* 1392 Division 16 / 16 => (result + remainder) 1393*******************************************************/ 1394#define r_remL r26 /* remainder Low */ 1395#define r_remH r27 /* remainder High */ 1396 1397/* return: remainder */ 1398#define r_arg1L r24 /* dividend Low */ 1399#define r_arg1H r25 /* dividend High */ 1400 1401/* return: quotient */ 1402#define r_arg2L r22 /* divisor Low */ 1403#define r_arg2H r23 /* divisor High */ 1404 1405#define r_cnt r21 /* loop count */ 1406 1407#if defined (L_udivmodhi4) 1408DEFUN __udivmodhi4 1409 sub r_remL,r_remL 1410 sub r_remH,r_remH ; clear remainder and carry 1411 ldi r_cnt,17 ; init loop counter 1412 rjmp __udivmodhi4_ep ; jump to entry point 1413__udivmodhi4_loop: 1414 rol r_remL ; shift dividend into remainder 1415 rol r_remH 1416 cp r_remL,r_arg2L ; compare remainder & divisor 1417 cpc r_remH,r_arg2H 1418 brcs __udivmodhi4_ep ; remainder < divisor 1419 sub r_remL,r_arg2L ; restore remainder 1420 sbc r_remH,r_arg2H 1421__udivmodhi4_ep: 1422 rol r_arg1L ; shift dividend (with CARRY) 1423 rol r_arg1H 1424 dec r_cnt ; decrement loop counter 1425 brne __udivmodhi4_loop 1426 com r_arg1L 1427 com r_arg1H 1428; div/mod results to return registers, as for the div() function 1429 mov_l r_arg2L, r_arg1L ; quotient 1430 mov_h r_arg2H, r_arg1H 1431 mov_l r_arg1L, r_remL ; remainder 1432 mov_h r_arg1H, r_remH 1433 ret 1434ENDF __udivmodhi4 1435#endif /* defined (L_udivmodhi4) */ 1436 1437#if defined (L_divmodhi4) 1438DEFUN __divmodhi4 1439 .global _div 1440_div: 1441 bst r_arg1H,7 ; store sign of dividend 1442 mov __tmp_reg__,r_arg2H 1443 brtc 0f 1444 com __tmp_reg__ ; r0.7 is sign of result 1445 rcall __divmodhi4_neg1 ; dividend negative: negate 14460: 1447 sbrc r_arg2H,7 1448 rcall __divmodhi4_neg2 ; divisor negative: negate 1449 XCALL __udivmodhi4 ; do the unsigned div/mod 1450 sbrc __tmp_reg__,7 1451 rcall __divmodhi4_neg2 ; correct remainder sign 1452 brtc __divmodhi4_exit 1453__divmodhi4_neg1: 1454 ;; correct dividend/remainder sign 1455 com r_arg1H 1456 neg r_arg1L 1457 sbci r_arg1H,0xff 1458 ret 1459__divmodhi4_neg2: 1460 ;; correct divisor/result sign 1461 com r_arg2H 1462 neg r_arg2L 1463 sbci r_arg2H,0xff 1464__divmodhi4_exit: 1465 ret 1466ENDF __divmodhi4 1467#endif /* defined (L_divmodhi4) */ 1468 1469#undef r_remH 1470#undef r_remL 1471 1472#undef r_arg1H 1473#undef r_arg1L 1474 1475#undef r_arg2H 1476#undef r_arg2L 1477 1478#undef r_cnt 1479 1480/******************************************************* 1481 Division 24 / 24 => (result + remainder) 1482*******************************************************/ 1483 1484;; A[0..2]: In: Dividend; Out: Quotient 1485#define A0 22 1486#define A1 A0+1 1487#define A2 A0+2 1488 1489;; B[0..2]: In: Divisor; Out: Remainder 1490#define B0 18 1491#define B1 B0+1 1492#define B2 B0+2 1493 1494;; C[0..2]: Expand remainder 1495#define C0 __zero_reg__ 1496#define C1 26 1497#define C2 25 1498 1499;; Loop counter 1500#define r_cnt 21 1501 1502#if defined (L_udivmodpsi4) 1503;; R24:R22 = R24:R24 udiv R20:R18 1504;; R20:R18 = R24:R22 umod R20:R18 1505;; Clobbers: R21, R25, R26 1506 1507DEFUN __udivmodpsi4 1508 ; init loop counter 1509 ldi r_cnt, 24+1 1510 ; Clear remainder and carry. C0 is already 0 1511 clr C1 1512 sub C2, C2 1513 ; jump to entry point 1514 rjmp __udivmodpsi4_start 1515__udivmodpsi4_loop: 1516 ; shift dividend into remainder 1517 rol C0 1518 rol C1 1519 rol C2 1520 ; compare remainder & divisor 1521 cp C0, B0 1522 cpc C1, B1 1523 cpc C2, B2 1524 brcs __udivmodpsi4_start ; remainder <= divisor 1525 sub C0, B0 ; restore remainder 1526 sbc C1, B1 1527 sbc C2, B2 1528__udivmodpsi4_start: 1529 ; shift dividend (with CARRY) 1530 rol A0 1531 rol A1 1532 rol A2 1533 ; decrement loop counter 1534 dec r_cnt 1535 brne __udivmodpsi4_loop 1536 com A0 1537 com A1 1538 com A2 1539 ; div/mod results to return registers 1540 ; remainder 1541 mov B0, C0 1542 mov B1, C1 1543 mov B2, C2 1544 clr __zero_reg__ ; C0 1545 ret 1546ENDF __udivmodpsi4 1547#endif /* defined (L_udivmodpsi4) */ 1548 1549#if defined (L_divmodpsi4) 1550;; R24:R22 = R24:R22 div R20:R18 1551;; R20:R18 = R24:R22 mod R20:R18 1552;; Clobbers: T, __tmp_reg__, R21, R25, R26 1553 1554DEFUN __divmodpsi4 1555 ; R0.7 will contain the sign of the result: 1556 ; R0.7 = A.sign ^ B.sign 1557 mov __tmp_reg__, B2 1558 ; T-flag = sign of dividend 1559 bst A2, 7 1560 brtc 0f 1561 com __tmp_reg__ 1562 ; Adjust dividend's sign 1563 rcall __divmodpsi4_negA 15640: 1565 ; Adjust divisor's sign 1566 sbrc B2, 7 1567 rcall __divmodpsi4_negB 1568 1569 ; Do the unsigned div/mod 1570 XCALL __udivmodpsi4 1571 1572 ; Adjust quotient's sign 1573 sbrc __tmp_reg__, 7 1574 rcall __divmodpsi4_negA 1575 1576 ; Adjust remainder's sign 1577 brtc __divmodpsi4_end 1578 1579__divmodpsi4_negB: 1580 ; Correct divisor/remainder sign 1581 com B2 1582 com B1 1583 neg B0 1584 sbci B1, -1 1585 sbci B2, -1 1586 ret 1587 1588 ; Correct dividend/quotient sign 1589__divmodpsi4_negA: 1590 com A2 1591 com A1 1592 neg A0 1593 sbci A1, -1 1594 sbci A2, -1 1595__divmodpsi4_end: 1596 ret 1597 1598ENDF __divmodpsi4 1599#endif /* defined (L_divmodpsi4) */ 1600 1601#undef A0 1602#undef A1 1603#undef A2 1604 1605#undef B0 1606#undef B1 1607#undef B2 1608 1609#undef C0 1610#undef C1 1611#undef C2 1612 1613#undef r_cnt 1614 1615/******************************************************* 1616 Division 32 / 32 => (result + remainder) 1617*******************************************************/ 1618#define r_remHH r31 /* remainder High */ 1619#define r_remHL r30 1620#define r_remH r27 1621#define r_remL r26 /* remainder Low */ 1622 1623/* return: remainder */ 1624#define r_arg1HH r25 /* dividend High */ 1625#define r_arg1HL r24 1626#define r_arg1H r23 1627#define r_arg1L r22 /* dividend Low */ 1628 1629/* return: quotient */ 1630#define r_arg2HH r21 /* divisor High */ 1631#define r_arg2HL r20 1632#define r_arg2H r19 1633#define r_arg2L r18 /* divisor Low */ 1634 1635#define r_cnt __zero_reg__ /* loop count (0 after the loop!) */ 1636 1637#if defined (L_udivmodsi4) 1638DEFUN __udivmodsi4 1639 ldi r_remL, 33 ; init loop counter 1640 mov r_cnt, r_remL 1641 sub r_remL,r_remL 1642 sub r_remH,r_remH ; clear remainder and carry 1643 mov_l r_remHL, r_remL 1644 mov_h r_remHH, r_remH 1645 rjmp __udivmodsi4_ep ; jump to entry point 1646__udivmodsi4_loop: 1647 rol r_remL ; shift dividend into remainder 1648 rol r_remH 1649 rol r_remHL 1650 rol r_remHH 1651 cp r_remL,r_arg2L ; compare remainder & divisor 1652 cpc r_remH,r_arg2H 1653 cpc r_remHL,r_arg2HL 1654 cpc r_remHH,r_arg2HH 1655 brcs __udivmodsi4_ep ; remainder <= divisor 1656 sub r_remL,r_arg2L ; restore remainder 1657 sbc r_remH,r_arg2H 1658 sbc r_remHL,r_arg2HL 1659 sbc r_remHH,r_arg2HH 1660__udivmodsi4_ep: 1661 rol r_arg1L ; shift dividend (with CARRY) 1662 rol r_arg1H 1663 rol r_arg1HL 1664 rol r_arg1HH 1665 dec r_cnt ; decrement loop counter 1666 brne __udivmodsi4_loop 1667 ; __zero_reg__ now restored (r_cnt == 0) 1668 com r_arg1L 1669 com r_arg1H 1670 com r_arg1HL 1671 com r_arg1HH 1672; div/mod results to return registers, as for the ldiv() function 1673 mov_l r_arg2L, r_arg1L ; quotient 1674 mov_h r_arg2H, r_arg1H 1675 mov_l r_arg2HL, r_arg1HL 1676 mov_h r_arg2HH, r_arg1HH 1677 mov_l r_arg1L, r_remL ; remainder 1678 mov_h r_arg1H, r_remH 1679 mov_l r_arg1HL, r_remHL 1680 mov_h r_arg1HH, r_remHH 1681 ret 1682ENDF __udivmodsi4 1683#endif /* defined (L_udivmodsi4) */ 1684 1685#if defined (L_divmodsi4) 1686DEFUN __divmodsi4 1687 mov __tmp_reg__,r_arg2HH 1688 bst r_arg1HH,7 ; store sign of dividend 1689 brtc 0f 1690 com __tmp_reg__ ; r0.7 is sign of result 1691 XCALL __negsi2 ; dividend negative: negate 16920: 1693 sbrc r_arg2HH,7 1694 rcall __divmodsi4_neg2 ; divisor negative: negate 1695 XCALL __udivmodsi4 ; do the unsigned div/mod 1696 sbrc __tmp_reg__, 7 ; correct quotient sign 1697 rcall __divmodsi4_neg2 1698 brtc __divmodsi4_exit ; correct remainder sign 1699 XJMP __negsi2 1700__divmodsi4_neg2: 1701 ;; correct divisor/quotient sign 1702 com r_arg2HH 1703 com r_arg2HL 1704 com r_arg2H 1705 neg r_arg2L 1706 sbci r_arg2H,0xff 1707 sbci r_arg2HL,0xff 1708 sbci r_arg2HH,0xff 1709__divmodsi4_exit: 1710 ret 1711ENDF __divmodsi4 1712#endif /* defined (L_divmodsi4) */ 1713 1714#if defined (L_negsi2) 1715;; (set (reg:SI 22) 1716;; (neg:SI (reg:SI 22))) 1717;; Sets the V flag for signed overflow tests 1718DEFUN __negsi2 1719 NEG4 22 1720 ret 1721ENDF __negsi2 1722#endif /* L_negsi2 */ 1723 1724#undef r_remHH 1725#undef r_remHL 1726#undef r_remH 1727#undef r_remL 1728#undef r_arg1HH 1729#undef r_arg1HL 1730#undef r_arg1H 1731#undef r_arg1L 1732#undef r_arg2HH 1733#undef r_arg2HL 1734#undef r_arg2H 1735#undef r_arg2L 1736#undef r_cnt 1737 1738/* *di routines use registers below R19 and won't work with tiny arch 1739 right now. */ 1740 1741#if !defined (__AVR_TINY__) 1742/******************************************************* 1743 Division 64 / 64 1744 Modulo 64 % 64 1745*******************************************************/ 1746 1747;; Use Speed-optimized Version on "big" Devices, i.e. Devices with 1748;; at least 16k of Program Memory. For smaller Devices, depend 1749;; on MOVW and SP Size. There is a Connexion between SP Size and 1750;; Flash Size so that SP Size can be used to test for Flash Size. 1751 1752#if defined (__AVR_HAVE_JMP_CALL__) 1753# define SPEED_DIV 8 1754#elif defined (__AVR_HAVE_MOVW__) && defined (__AVR_HAVE_SPH__) 1755# define SPEED_DIV 16 1756#else 1757# define SPEED_DIV 0 1758#endif 1759 1760;; A[0..7]: In: Dividend; 1761;; Out: Quotient (T = 0) 1762;; Out: Remainder (T = 1) 1763#define A0 18 1764#define A1 A0+1 1765#define A2 A0+2 1766#define A3 A0+3 1767#define A4 A0+4 1768#define A5 A0+5 1769#define A6 A0+6 1770#define A7 A0+7 1771 1772;; B[0..7]: In: Divisor; Out: Clobber 1773#define B0 10 1774#define B1 B0+1 1775#define B2 B0+2 1776#define B3 B0+3 1777#define B4 B0+4 1778#define B5 B0+5 1779#define B6 B0+6 1780#define B7 B0+7 1781 1782;; C[0..7]: Expand remainder; Out: Remainder (unused) 1783#define C0 8 1784#define C1 C0+1 1785#define C2 30 1786#define C3 C2+1 1787#define C4 28 1788#define C5 C4+1 1789#define C6 26 1790#define C7 C6+1 1791 1792;; Holds Signs during Division Routine 1793#define SS __tmp_reg__ 1794 1795;; Bit-Counter in Division Routine 1796#define R_cnt __zero_reg__ 1797 1798;; Scratch Register for Negation 1799#define NN r31 1800 1801#if defined (L_udivdi3) 1802 1803;; R25:R18 = R24:R18 umod R17:R10 1804;; Ordinary ABI-Function 1805 1806DEFUN __umoddi3 1807 set 1808 rjmp __udivdi3_umoddi3 1809ENDF __umoddi3 1810 1811;; R25:R18 = R24:R18 udiv R17:R10 1812;; Ordinary ABI-Function 1813 1814DEFUN __udivdi3 1815 clt 1816ENDF __udivdi3 1817 1818DEFUN __udivdi3_umoddi3 1819 push C0 1820 push C1 1821 push C4 1822 push C5 1823 XCALL __udivmod64 1824 pop C5 1825 pop C4 1826 pop C1 1827 pop C0 1828 ret 1829ENDF __udivdi3_umoddi3 1830#endif /* L_udivdi3 */ 1831 1832#if defined (L_udivmod64) 1833 1834;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation 1835;; No Registers saved/restored; the Callers will take Care. 1836;; Preserves B[] and T-flag 1837;; T = 0: Compute Quotient in A[] 1838;; T = 1: Compute Remainder in A[] and shift SS one Bit left 1839 1840DEFUN __udivmod64 1841 1842 ;; Clear Remainder (C6, C7 will follow) 1843 clr C0 1844 clr C1 1845 wmov C2, C0 1846 wmov C4, C0 1847 ldi C7, 64 1848 1849#if SPEED_DIV == 0 || SPEED_DIV == 16 1850 ;; Initialize Loop-Counter 1851 mov R_cnt, C7 1852 wmov C6, C0 1853#endif /* SPEED_DIV */ 1854 1855#if SPEED_DIV == 8 1856 1857 push A7 1858 clr C6 1859 18601: ;; Compare shifted Devidend against Divisor 1861 ;; If -- even after Shifting -- it is smaller... 1862 CP A7,B0 $ cpc C0,B1 $ cpc C1,B2 $ cpc C2,B3 1863 cpc C3,B4 $ cpc C4,B5 $ cpc C5,B6 $ cpc C6,B7 1864 brcc 2f 1865 1866 ;; ...then we can subtract it. Thus, it is legal to shift left 1867 $ mov C6,C5 $ mov C5,C4 $ mov C4,C3 1868 mov C3,C2 $ mov C2,C1 $ mov C1,C0 $ mov C0,A7 1869 mov A7,A6 $ mov A6,A5 $ mov A5,A4 $ mov A4,A3 1870 mov A3,A2 $ mov A2,A1 $ mov A1,A0 $ clr A0 1871 1872 ;; 8 Bits are done 1873 subi C7, 8 1874 brne 1b 1875 1876 ;; Shifted 64 Bits: A7 has traveled to C7 1877 pop C7 1878 ;; Divisor is greater than Dividend. We have: 1879 ;; A[] % B[] = A[] 1880 ;; A[] / B[] = 0 1881 ;; Thus, we can return immediately 1882 rjmp 5f 1883 18842: ;; Initialze Bit-Counter with Number of Bits still to be performed 1885 mov R_cnt, C7 1886 1887 ;; Push of A7 is not needed because C7 is still 0 1888 pop C7 1889 clr C7 1890 1891#elif SPEED_DIV == 16 1892 1893 ;; Compare shifted Dividend against Divisor 1894 cp A7, B3 1895 cpc C0, B4 1896 cpc C1, B5 1897 cpc C2, B6 1898 cpc C3, B7 1899 brcc 2f 1900 1901 ;; Divisor is greater than shifted Dividen: We can shift the Dividend 1902 ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk 1903 wmov C2,A6 $ wmov C0,A4 1904 wmov A6,A2 $ wmov A4,A0 1905 wmov A2,C6 $ wmov A0,C4 1906 1907 ;; Set Bit Counter to 32 1908 lsr R_cnt 19092: 1910#elif SPEED_DIV 1911#error SPEED_DIV = ? 1912#endif /* SPEED_DIV */ 1913 1914;; The very Division + Remainder Routine 1915 19163: ;; Left-shift Dividend... 1917 lsl A0 $ rol A1 $ rol A2 $ rol A3 1918 rol A4 $ rol A5 $ rol A6 $ rol A7 1919 1920 ;; ...into Remainder 1921 rol C0 $ rol C1 $ rol C2 $ rol C3 1922 rol C4 $ rol C5 $ rol C6 $ rol C7 1923 1924 ;; Compare Remainder and Divisor 1925 CP C0,B0 $ cpc C1,B1 $ cpc C2,B2 $ cpc C3,B3 1926 cpc C4,B4 $ cpc C5,B5 $ cpc C6,B6 $ cpc C7,B7 1927 1928 brcs 4f 1929 1930 ;; Divisor fits into Remainder: Subtract it from Remainder... 1931 SUB C0,B0 $ sbc C1,B1 $ sbc C2,B2 $ sbc C3,B3 1932 sbc C4,B4 $ sbc C5,B5 $ sbc C6,B6 $ sbc C7,B7 1933 1934 ;; ...and set according Bit in the upcoming Quotient 1935 ;; The Bit will travel to its final Position 1936 ori A0, 1 1937 19384: ;; This Bit is done 1939 dec R_cnt 1940 brne 3b 1941 ;; __zero_reg__ is 0 again 1942 1943 ;; T = 0: We are fine with the Quotient in A[] 1944 ;; T = 1: Copy Remainder to A[] 19455: brtc 6f 1946 wmov A0, C0 1947 wmov A2, C2 1948 wmov A4, C4 1949 wmov A6, C6 1950 ;; Move the Sign of the Result to SS.7 1951 lsl SS 1952 19536: ret 1954 1955ENDF __udivmod64 1956#endif /* L_udivmod64 */ 1957 1958 1959#if defined (L_divdi3) 1960 1961;; R25:R18 = R24:R18 mod R17:R10 1962;; Ordinary ABI-Function 1963 1964DEFUN __moddi3 1965 set 1966 rjmp __divdi3_moddi3 1967ENDF __moddi3 1968 1969;; R25:R18 = R24:R18 div R17:R10 1970;; Ordinary ABI-Function 1971 1972DEFUN __divdi3 1973 clt 1974ENDF __divdi3 1975 1976DEFUN __divdi3_moddi3 1977#if SPEED_DIV 1978 mov r31, A7 1979 or r31, B7 1980 brmi 0f 1981 ;; Both Signs are 0: the following Complexitiy is not needed 1982 XJMP __udivdi3_umoddi3 1983#endif /* SPEED_DIV */ 1984 19850: ;; The Prologue 1986 ;; Save 12 Registers: Y, 17...8 1987 ;; No Frame needed 1988 do_prologue_saves 12 1989 1990 ;; SS.7 will contain the Sign of the Quotient (A.sign * B.sign) 1991 ;; SS.6 will contain the Sign of the Remainder (A.sign) 1992 mov SS, A7 1993 asr SS 1994 ;; Adjust Dividend's Sign as needed 1995#if SPEED_DIV 1996 ;; Compiling for Speed we know that at least one Sign must be < 0 1997 ;; Thus, if A[] >= 0 then we know B[] < 0 1998 brpl 22f 1999#else 2000 brpl 21f 2001#endif /* SPEED_DIV */ 2002 2003 XCALL __negdi2 2004 2005 ;; Adjust Divisor's Sign and SS.7 as needed 200621: tst B7 2007 brpl 3f 200822: ldi NN, 1 << 7 2009 eor SS, NN 2010 2011 ldi NN, -1 2012 com B4 $ com B5 $ com B6 $ com B7 2013 $ com B1 $ com B2 $ com B3 2014 NEG B0 2015 $ sbc B1,NN $ sbc B2,NN $ sbc B3,NN 2016 sbc B4,NN $ sbc B5,NN $ sbc B6,NN $ sbc B7,NN 2017 20183: ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag) 2019 XCALL __udivmod64 2020 2021 ;; Adjust Result's Sign 2022#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ 2023 tst SS 2024 brpl 4f 2025#else 2026 sbrc SS, 7 2027#endif /* __AVR_HAVE_JMP_CALL__ */ 2028 XCALL __negdi2 2029 20304: ;; Epilogue: Restore 12 Registers and return 2031 do_epilogue_restores 12 2032 2033ENDF __divdi3_moddi3 2034 2035#endif /* L_divdi3 */ 2036 2037#undef R_cnt 2038#undef SS 2039#undef NN 2040 2041.section .text.libgcc, "ax", @progbits 2042 2043#define TT __tmp_reg__ 2044 2045#if defined (L_adddi3) 2046;; (set (reg:DI 18) 2047;; (plus:DI (reg:DI 18) 2048;; (reg:DI 10))) 2049;; Sets the V flag for signed overflow tests 2050;; Sets the C flag for unsigned overflow tests 2051DEFUN __adddi3 2052 ADD A0,B0 $ adc A1,B1 $ adc A2,B2 $ adc A3,B3 2053 adc A4,B4 $ adc A5,B5 $ adc A6,B6 $ adc A7,B7 2054 ret 2055ENDF __adddi3 2056#endif /* L_adddi3 */ 2057 2058#if defined (L_adddi3_s8) 2059;; (set (reg:DI 18) 2060;; (plus:DI (reg:DI 18) 2061;; (sign_extend:SI (reg:QI 26)))) 2062;; Sets the V flag for signed overflow tests 2063;; Sets the C flag for unsigned overflow tests provided 0 <= R26 < 128 2064DEFUN __adddi3_s8 2065 clr TT 2066 sbrc r26, 7 2067 com TT 2068 ADD A0,r26 $ adc A1,TT $ adc A2,TT $ adc A3,TT 2069 adc A4,TT $ adc A5,TT $ adc A6,TT $ adc A7,TT 2070 ret 2071ENDF __adddi3_s8 2072#endif /* L_adddi3_s8 */ 2073 2074#if defined (L_subdi3) 2075;; (set (reg:DI 18) 2076;; (minus:DI (reg:DI 18) 2077;; (reg:DI 10))) 2078;; Sets the V flag for signed overflow tests 2079;; Sets the C flag for unsigned overflow tests 2080DEFUN __subdi3 2081 SUB A0,B0 $ sbc A1,B1 $ sbc A2,B2 $ sbc A3,B3 2082 sbc A4,B4 $ sbc A5,B5 $ sbc A6,B6 $ sbc A7,B7 2083 ret 2084ENDF __subdi3 2085#endif /* L_subdi3 */ 2086 2087#if defined (L_cmpdi2) 2088;; (set (cc0) 2089;; (compare (reg:DI 18) 2090;; (reg:DI 10))) 2091DEFUN __cmpdi2 2092 CP A0,B0 $ cpc A1,B1 $ cpc A2,B2 $ cpc A3,B3 2093 cpc A4,B4 $ cpc A5,B5 $ cpc A6,B6 $ cpc A7,B7 2094 ret 2095ENDF __cmpdi2 2096#endif /* L_cmpdi2 */ 2097 2098#if defined (L_cmpdi2_s8) 2099;; (set (cc0) 2100;; (compare (reg:DI 18) 2101;; (sign_extend:SI (reg:QI 26)))) 2102DEFUN __cmpdi2_s8 2103 clr TT 2104 sbrc r26, 7 2105 com TT 2106 CP A0,r26 $ cpc A1,TT $ cpc A2,TT $ cpc A3,TT 2107 cpc A4,TT $ cpc A5,TT $ cpc A6,TT $ cpc A7,TT 2108 ret 2109ENDF __cmpdi2_s8 2110#endif /* L_cmpdi2_s8 */ 2111 2112#if defined (L_negdi2) 2113;; (set (reg:DI 18) 2114;; (neg:DI (reg:DI 18))) 2115;; Sets the V flag for signed overflow tests 2116DEFUN __negdi2 2117 2118 com A4 $ com A5 $ com A6 $ com A7 2119 $ com A1 $ com A2 $ com A3 2120 NEG A0 2121 $ sbci A1,-1 $ sbci A2,-1 $ sbci A3,-1 2122 sbci A4,-1 $ sbci A5,-1 $ sbci A6,-1 $ sbci A7,-1 2123 ret 2124 2125ENDF __negdi2 2126#endif /* L_negdi2 */ 2127 2128#undef TT 2129 2130#undef C7 2131#undef C6 2132#undef C5 2133#undef C4 2134#undef C3 2135#undef C2 2136#undef C1 2137#undef C0 2138 2139#undef B7 2140#undef B6 2141#undef B5 2142#undef B4 2143#undef B3 2144#undef B2 2145#undef B1 2146#undef B0 2147 2148#undef A7 2149#undef A6 2150#undef A5 2151#undef A4 2152#undef A3 2153#undef A2 2154#undef A1 2155#undef A0 2156 2157#endif /* !defined (__AVR_TINY__) */ 2158 2159 2160.section .text.libgcc.prologue, "ax", @progbits 2161 2162/********************************** 2163 * This is a prologue subroutine 2164 **********************************/ 2165#if !defined (__AVR_TINY__) 2166#if defined (L_prologue) 2167 2168;; This function does not clobber T-flag; 64-bit division relies on it 2169DEFUN __prologue_saves__ 2170 push r2 2171 push r3 2172 push r4 2173 push r5 2174 push r6 2175 push r7 2176 push r8 2177 push r9 2178 push r10 2179 push r11 2180 push r12 2181 push r13 2182 push r14 2183 push r15 2184 push r16 2185 push r17 2186 push r28 2187 push r29 2188#if !defined (__AVR_HAVE_SPH__) 2189 in r28,__SP_L__ 2190 sub r28,r26 2191 out __SP_L__,r28 2192 clr r29 2193#elif defined (__AVR_XMEGA__) 2194 in r28,__SP_L__ 2195 in r29,__SP_H__ 2196 sub r28,r26 2197 sbc r29,r27 2198 out __SP_L__,r28 2199 out __SP_H__,r29 2200#else 2201 in r28,__SP_L__ 2202 in r29,__SP_H__ 2203 sub r28,r26 2204 sbc r29,r27 2205 in __tmp_reg__,__SREG__ 2206 cli 2207 out __SP_H__,r29 2208 out __SREG__,__tmp_reg__ 2209 out __SP_L__,r28 2210#endif /* #SP = 8/16 */ 2211 2212 XIJMP 2213 2214ENDF __prologue_saves__ 2215#endif /* defined (L_prologue) */ 2216 2217/* 2218 * This is an epilogue subroutine 2219 */ 2220#if defined (L_epilogue) 2221 2222DEFUN __epilogue_restores__ 2223 ldd r2,Y+18 2224 ldd r3,Y+17 2225 ldd r4,Y+16 2226 ldd r5,Y+15 2227 ldd r6,Y+14 2228 ldd r7,Y+13 2229 ldd r8,Y+12 2230 ldd r9,Y+11 2231 ldd r10,Y+10 2232 ldd r11,Y+9 2233 ldd r12,Y+8 2234 ldd r13,Y+7 2235 ldd r14,Y+6 2236 ldd r15,Y+5 2237 ldd r16,Y+4 2238 ldd r17,Y+3 2239 ldd r26,Y+2 2240#if !defined (__AVR_HAVE_SPH__) 2241 ldd r29,Y+1 2242 add r28,r30 2243 out __SP_L__,r28 2244 mov r28, r26 2245#elif defined (__AVR_XMEGA__) 2246 ldd r27,Y+1 2247 add r28,r30 2248 adc r29,__zero_reg__ 2249 out __SP_L__,r28 2250 out __SP_H__,r29 2251 wmov 28, 26 2252#else 2253 ldd r27,Y+1 2254 add r28,r30 2255 adc r29,__zero_reg__ 2256 in __tmp_reg__,__SREG__ 2257 cli 2258 out __SP_H__,r29 2259 out __SREG__,__tmp_reg__ 2260 out __SP_L__,r28 2261 mov_l r28, r26 2262 mov_h r29, r27 2263#endif /* #SP = 8/16 */ 2264 ret 2265ENDF __epilogue_restores__ 2266#endif /* defined (L_epilogue) */ 2267#endif /* !defined (__AVR_TINY__) */ 2268 2269#ifdef L_exit 2270 .section .fini9,"ax",@progbits 2271DEFUN _exit 2272 .weak exit 2273exit: 2274ENDF _exit 2275 2276 /* Code from .fini8 ... .fini1 sections inserted by ld script. */ 2277 2278 .section .fini0,"ax",@progbits 2279 cli 2280__stop_program: 2281 rjmp __stop_program 2282#endif /* defined (L_exit) */ 2283 2284#ifdef L_cleanup 2285 .weak _cleanup 2286 .func _cleanup 2287_cleanup: 2288 ret 2289.endfunc 2290#endif /* defined (L_cleanup) */ 2291 2292 2293.section .text.libgcc, "ax", @progbits 2294 2295#ifdef L_tablejump2 2296DEFUN __tablejump2__ 2297 lsl r30 2298 rol r31 2299#if defined (__AVR_HAVE_EIJMP_EICALL__) 2300 ;; Word address of gs() jumptable entry in R24:Z 2301 rol r24 2302 out __RAMPZ__, r24 2303#elif defined (__AVR_HAVE_ELPM__) 2304 ;; Word address of jumptable entry in Z 2305 clr __tmp_reg__ 2306 rol __tmp_reg__ 2307 out __RAMPZ__, __tmp_reg__ 2308#endif 2309 2310 ;; Read word address from jumptable and jump 2311 2312#if defined (__AVR_HAVE_ELPMX__) 2313 elpm __tmp_reg__, Z+ 2314 elpm r31, Z 2315 mov r30, __tmp_reg__ 2316#ifdef __AVR_HAVE_RAMPD__ 2317 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM 2318 out __RAMPZ__, __zero_reg__ 2319#endif /* RAMPD */ 2320 XIJMP 2321#elif defined (__AVR_HAVE_ELPM__) 2322 elpm 2323 push r0 2324 adiw r30, 1 2325 elpm 2326 push r0 2327 ret 2328#elif defined (__AVR_HAVE_LPMX__) 2329 lpm __tmp_reg__, Z+ 2330 lpm r31, Z 2331 mov r30, __tmp_reg__ 2332 ijmp 2333#elif defined (__AVR_TINY__) 2334 wsubi 30, -(__AVR_TINY_PM_BASE_ADDRESS__) ; Add PM offset to Z 2335 ld __tmp_reg__, Z+ 2336 ld r31, Z ; Use ld instead of lpm to load Z 2337 mov r30, __tmp_reg__ 2338 ijmp 2339#else 2340 lpm 2341 push r0 2342 adiw r30, 1 2343 lpm 2344 push r0 2345 ret 2346#endif 2347ENDF __tablejump2__ 2348#endif /* L_tablejump2 */ 2349 2350#if defined(__AVR_TINY__) 2351#ifdef L_copy_data 2352 .section .init4,"ax",@progbits 2353 .global __do_copy_data 2354__do_copy_data: 2355 ldi r18, hi8(__data_end) 2356 ldi r26, lo8(__data_start) 2357 ldi r27, hi8(__data_start) 2358 ldi r30, lo8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__) 2359 ldi r31, hi8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__) 2360 rjmp .L__do_copy_data_start 2361.L__do_copy_data_loop: 2362 ld r19, z+ 2363 st X+, r19 2364.L__do_copy_data_start: 2365 cpi r26, lo8(__data_end) 2366 cpc r27, r18 2367 brne .L__do_copy_data_loop 2368#endif 2369#else 2370#ifdef L_copy_data 2371 .section .init4,"ax",@progbits 2372DEFUN __do_copy_data 2373#if defined(__AVR_HAVE_ELPMX__) 2374 ldi r17, hi8(__data_end) 2375 ldi r26, lo8(__data_start) 2376 ldi r27, hi8(__data_start) 2377 ldi r30, lo8(__data_load_start) 2378 ldi r31, hi8(__data_load_start) 2379 ldi r16, hh8(__data_load_start) 2380 out __RAMPZ__, r16 2381 rjmp .L__do_copy_data_start 2382.L__do_copy_data_loop: 2383 elpm r0, Z+ 2384 st X+, r0 2385.L__do_copy_data_start: 2386 cpi r26, lo8(__data_end) 2387 cpc r27, r17 2388 brne .L__do_copy_data_loop 2389#elif !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__) 2390 ldi r17, hi8(__data_end) 2391 ldi r26, lo8(__data_start) 2392 ldi r27, hi8(__data_start) 2393 ldi r30, lo8(__data_load_start) 2394 ldi r31, hi8(__data_load_start) 2395 ldi r16, hh8(__data_load_start - 0x10000) 2396.L__do_copy_data_carry: 2397 inc r16 2398 out __RAMPZ__, r16 2399 rjmp .L__do_copy_data_start 2400.L__do_copy_data_loop: 2401 elpm 2402 st X+, r0 2403 adiw r30, 1 2404 brcs .L__do_copy_data_carry 2405.L__do_copy_data_start: 2406 cpi r26, lo8(__data_end) 2407 cpc r27, r17 2408 brne .L__do_copy_data_loop 2409#elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) 2410 ldi r17, hi8(__data_end) 2411 ldi r26, lo8(__data_start) 2412 ldi r27, hi8(__data_start) 2413 ldi r30, lo8(__data_load_start) 2414 ldi r31, hi8(__data_load_start) 2415 rjmp .L__do_copy_data_start 2416.L__do_copy_data_loop: 2417#if defined (__AVR_HAVE_LPMX__) 2418 lpm r0, Z+ 2419#else 2420 lpm 2421 adiw r30, 1 2422#endif 2423 st X+, r0 2424.L__do_copy_data_start: 2425 cpi r26, lo8(__data_end) 2426 cpc r27, r17 2427 brne .L__do_copy_data_loop 2428#endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */ 2429#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__) 2430 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM 2431 out __RAMPZ__, __zero_reg__ 2432#endif /* ELPM && RAMPD */ 2433ENDF __do_copy_data 2434#endif /* L_copy_data */ 2435#endif /* !defined (__AVR_TINY__) */ 2436 2437/* __do_clear_bss is only necessary if there is anything in .bss section. */ 2438 2439#ifdef L_clear_bss 2440 .section .init4,"ax",@progbits 2441DEFUN __do_clear_bss 2442 ldi r18, hi8(__bss_end) 2443 ldi r26, lo8(__bss_start) 2444 ldi r27, hi8(__bss_start) 2445 rjmp .do_clear_bss_start 2446.do_clear_bss_loop: 2447 st X+, __zero_reg__ 2448.do_clear_bss_start: 2449 cpi r26, lo8(__bss_end) 2450 cpc r27, r18 2451 brne .do_clear_bss_loop 2452ENDF __do_clear_bss 2453#endif /* L_clear_bss */ 2454 2455/* __do_global_ctors and __do_global_dtors are only necessary 2456 if there are any constructors/destructors. */ 2457 2458#if defined(__AVR_TINY__) 2459#define cdtors_tst_reg r18 2460#else 2461#define cdtors_tst_reg r17 2462#endif 2463 2464#ifdef L_ctors 2465 .section .init6,"ax",@progbits 2466DEFUN __do_global_ctors 2467 ldi cdtors_tst_reg, pm_hi8(__ctors_start) 2468 ldi r28, pm_lo8(__ctors_end) 2469 ldi r29, pm_hi8(__ctors_end) 2470#ifdef __AVR_HAVE_EIJMP_EICALL__ 2471 ldi r16, pm_hh8(__ctors_end) 2472#endif /* HAVE_EIJMP */ 2473 rjmp .L__do_global_ctors_start 2474.L__do_global_ctors_loop: 2475 wsubi 28, 1 2476#ifdef __AVR_HAVE_EIJMP_EICALL__ 2477 sbc r16, __zero_reg__ 2478 mov r24, r16 2479#endif /* HAVE_EIJMP */ 2480 mov_h r31, r29 2481 mov_l r30, r28 2482 XCALL __tablejump2__ 2483.L__do_global_ctors_start: 2484 cpi r28, pm_lo8(__ctors_start) 2485 cpc r29, cdtors_tst_reg 2486#ifdef __AVR_HAVE_EIJMP_EICALL__ 2487 ldi r24, pm_hh8(__ctors_start) 2488 cpc r16, r24 2489#endif /* HAVE_EIJMP */ 2490 brne .L__do_global_ctors_loop 2491ENDF __do_global_ctors 2492#endif /* L_ctors */ 2493 2494#ifdef L_dtors 2495 .section .fini6,"ax",@progbits 2496DEFUN __do_global_dtors 2497 ldi cdtors_tst_reg, pm_hi8(__dtors_end) 2498 ldi r28, pm_lo8(__dtors_start) 2499 ldi r29, pm_hi8(__dtors_start) 2500#ifdef __AVR_HAVE_EIJMP_EICALL__ 2501 ldi r16, pm_hh8(__dtors_start) 2502#endif /* HAVE_EIJMP */ 2503 rjmp .L__do_global_dtors_start 2504.L__do_global_dtors_loop: 2505#ifdef __AVR_HAVE_EIJMP_EICALL__ 2506 mov r24, r16 2507#endif /* HAVE_EIJMP */ 2508 mov_h r31, r29 2509 mov_l r30, r28 2510 XCALL __tablejump2__ 2511 waddi 28, 1 2512#ifdef __AVR_HAVE_EIJMP_EICALL__ 2513 adc r16, __zero_reg__ 2514#endif /* HAVE_EIJMP */ 2515.L__do_global_dtors_start: 2516 cpi r28, pm_lo8(__dtors_end) 2517 cpc r29, cdtors_tst_reg 2518#ifdef __AVR_HAVE_EIJMP_EICALL__ 2519 ldi r24, pm_hh8(__dtors_end) 2520 cpc r16, r24 2521#endif /* HAVE_EIJMP */ 2522 brne .L__do_global_dtors_loop 2523ENDF __do_global_dtors 2524#endif /* L_dtors */ 2525 2526#undef cdtors_tst_reg 2527 2528.section .text.libgcc, "ax", @progbits 2529 2530#if !defined (__AVR_TINY__) 2531;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2532;; Loading n bytes from Flash; n = 3,4 2533;; R22... = Flash[Z] 2534;; Clobbers: __tmp_reg__ 2535 2536#if (defined (L_load_3) \ 2537 || defined (L_load_4)) \ 2538 && !defined (__AVR_HAVE_LPMX__) 2539 2540;; Destination 2541#define D0 22 2542#define D1 D0+1 2543#define D2 D0+2 2544#define D3 D0+3 2545 2546.macro .load dest, n 2547 lpm 2548 mov \dest, r0 2549.if \dest != D0+\n-1 2550 adiw r30, 1 2551.else 2552 sbiw r30, \n-1 2553.endif 2554.endm 2555 2556#if defined (L_load_3) 2557DEFUN __load_3 2558 push D3 2559 XCALL __load_4 2560 pop D3 2561 ret 2562ENDF __load_3 2563#endif /* L_load_3 */ 2564 2565#if defined (L_load_4) 2566DEFUN __load_4 2567 .load D0, 4 2568 .load D1, 4 2569 .load D2, 4 2570 .load D3, 4 2571 ret 2572ENDF __load_4 2573#endif /* L_load_4 */ 2574 2575#endif /* L_load_3 || L_load_3 */ 2576#endif /* !defined (__AVR_TINY__) */ 2577 2578#if !defined (__AVR_TINY__) 2579;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2580;; Loading n bytes from Flash or RAM; n = 1,2,3,4 2581;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7 2582;; Clobbers: __tmp_reg__, R21, R30, R31 2583 2584#if (defined (L_xload_1) \ 2585 || defined (L_xload_2) \ 2586 || defined (L_xload_3) \ 2587 || defined (L_xload_4)) 2588 2589;; Destination 2590#define D0 22 2591#define D1 D0+1 2592#define D2 D0+2 2593#define D3 D0+3 2594 2595;; Register containing bits 16+ of the address 2596 2597#define HHI8 21 2598 2599.macro .xload dest, n 2600#if defined (__AVR_HAVE_ELPMX__) 2601 elpm \dest, Z+ 2602#elif defined (__AVR_HAVE_ELPM__) 2603 elpm 2604 mov \dest, r0 2605.if \dest != D0+\n-1 2606 adiw r30, 1 2607 adc HHI8, __zero_reg__ 2608 out __RAMPZ__, HHI8 2609.endif 2610#elif defined (__AVR_HAVE_LPMX__) 2611 lpm \dest, Z+ 2612#else 2613 lpm 2614 mov \dest, r0 2615.if \dest != D0+\n-1 2616 adiw r30, 1 2617.endif 2618#endif 2619#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__) 2620.if \dest == D0+\n-1 2621 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM 2622 out __RAMPZ__, __zero_reg__ 2623.endif 2624#endif 2625.endm ; .xload 2626 2627#if defined (L_xload_1) 2628DEFUN __xload_1 2629#if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_ELPM__) 2630 sbrc HHI8, 7 2631 ld D0, Z 2632 sbrs HHI8, 7 2633 lpm D0, Z 2634 ret 2635#else 2636 sbrc HHI8, 7 2637 rjmp 1f 2638#if defined (__AVR_HAVE_ELPM__) 2639 out __RAMPZ__, HHI8 2640#endif /* __AVR_HAVE_ELPM__ */ 2641 .xload D0, 1 2642 ret 26431: ld D0, Z 2644 ret 2645#endif /* LPMx && ! ELPM */ 2646ENDF __xload_1 2647#endif /* L_xload_1 */ 2648 2649#if defined (L_xload_2) 2650DEFUN __xload_2 2651 sbrc HHI8, 7 2652 rjmp 1f 2653#if defined (__AVR_HAVE_ELPM__) 2654 out __RAMPZ__, HHI8 2655#endif /* __AVR_HAVE_ELPM__ */ 2656 .xload D0, 2 2657 .xload D1, 2 2658 ret 26591: ld D0, Z+ 2660 ld D1, Z+ 2661 ret 2662ENDF __xload_2 2663#endif /* L_xload_2 */ 2664 2665#if defined (L_xload_3) 2666DEFUN __xload_3 2667 sbrc HHI8, 7 2668 rjmp 1f 2669#if defined (__AVR_HAVE_ELPM__) 2670 out __RAMPZ__, HHI8 2671#endif /* __AVR_HAVE_ELPM__ */ 2672 .xload D0, 3 2673 .xload D1, 3 2674 .xload D2, 3 2675 ret 26761: ld D0, Z+ 2677 ld D1, Z+ 2678 ld D2, Z+ 2679 ret 2680ENDF __xload_3 2681#endif /* L_xload_3 */ 2682 2683#if defined (L_xload_4) 2684DEFUN __xload_4 2685 sbrc HHI8, 7 2686 rjmp 1f 2687#if defined (__AVR_HAVE_ELPM__) 2688 out __RAMPZ__, HHI8 2689#endif /* __AVR_HAVE_ELPM__ */ 2690 .xload D0, 4 2691 .xload D1, 4 2692 .xload D2, 4 2693 .xload D3, 4 2694 ret 26951: ld D0, Z+ 2696 ld D1, Z+ 2697 ld D2, Z+ 2698 ld D3, Z+ 2699 ret 2700ENDF __xload_4 2701#endif /* L_xload_4 */ 2702 2703#endif /* L_xload_{1|2|3|4} */ 2704#endif /* if !defined (__AVR_TINY__) */ 2705 2706#if !defined (__AVR_TINY__) 2707;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2708;; memcopy from Address Space __pgmx to RAM 2709;; R23:Z = Source Address 2710;; X = Destination Address 2711;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z 2712 2713#if defined (L_movmemx) 2714 2715#define HHI8 23 2716#define LOOP 24 2717 2718DEFUN __movmemx_qi 2719 ;; #Bytes to copy fity in 8 Bits (1..255) 2720 ;; Zero-extend Loop Counter 2721 clr LOOP+1 2722 ;; FALLTHRU 2723ENDF __movmemx_qi 2724 2725DEFUN __movmemx_hi 2726 2727;; Read from where? 2728 sbrc HHI8, 7 2729 rjmp 1f 2730 2731;; Read from Flash 2732 2733#if defined (__AVR_HAVE_ELPM__) 2734 out __RAMPZ__, HHI8 2735#endif 2736 27370: ;; Load 1 Byte from Flash... 2738 2739#if defined (__AVR_HAVE_ELPMX__) 2740 elpm r0, Z+ 2741#elif defined (__AVR_HAVE_ELPM__) 2742 elpm 2743 adiw r30, 1 2744 adc HHI8, __zero_reg__ 2745 out __RAMPZ__, HHI8 2746#elif defined (__AVR_HAVE_LPMX__) 2747 lpm r0, Z+ 2748#else 2749 lpm 2750 adiw r30, 1 2751#endif 2752 2753 ;; ...and store that Byte to RAM Destination 2754 st X+, r0 2755 sbiw LOOP, 1 2756 brne 0b 2757#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__) 2758 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM 2759 out __RAMPZ__, __zero_reg__ 2760#endif /* ELPM && RAMPD */ 2761 ret 2762 2763;; Read from RAM 2764 27651: ;; Read 1 Byte from RAM... 2766 ld r0, Z+ 2767 ;; and store that Byte to RAM Destination 2768 st X+, r0 2769 sbiw LOOP, 1 2770 brne 1b 2771 ret 2772ENDF __movmemx_hi 2773 2774#undef HHI8 2775#undef LOOP 2776 2777#endif /* L_movmemx */ 2778#endif /* !defined (__AVR_TINY__) */ 2779 2780 2781.section .text.libgcc.builtins, "ax", @progbits 2782 2783/********************************** 2784 * Find first set Bit (ffs) 2785 **********************************/ 2786 2787#if defined (L_ffssi2) 2788;; find first set bit 2789;; r25:r24 = ffs32 (r25:r22) 2790;; clobbers: r22, r26 2791DEFUN __ffssi2 2792 clr r26 2793 tst r22 2794 brne 1f 2795 subi r26, -8 2796 or r22, r23 2797 brne 1f 2798 subi r26, -8 2799 or r22, r24 2800 brne 1f 2801 subi r26, -8 2802 or r22, r25 2803 brne 1f 2804 ret 28051: mov r24, r22 2806 XJMP __loop_ffsqi2 2807ENDF __ffssi2 2808#endif /* defined (L_ffssi2) */ 2809 2810#if defined (L_ffshi2) 2811;; find first set bit 2812;; r25:r24 = ffs16 (r25:r24) 2813;; clobbers: r26 2814DEFUN __ffshi2 2815 clr r26 2816#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ 2817 ;; Some cores have problem skipping 2-word instruction 2818 tst r24 2819 breq 2f 2820#else 2821 cpse r24, __zero_reg__ 2822#endif /* __AVR_HAVE_JMP_CALL__ */ 28231: XJMP __loop_ffsqi2 28242: ldi r26, 8 2825 or r24, r25 2826 brne 1b 2827 ret 2828ENDF __ffshi2 2829#endif /* defined (L_ffshi2) */ 2830 2831#if defined (L_loop_ffsqi2) 2832;; Helper for ffshi2, ffssi2 2833;; r25:r24 = r26 + zero_extend16 (ffs8(r24)) 2834;; r24 must be != 0 2835;; clobbers: r26 2836DEFUN __loop_ffsqi2 2837 inc r26 2838 lsr r24 2839 brcc __loop_ffsqi2 2840 mov r24, r26 2841 clr r25 2842 ret 2843ENDF __loop_ffsqi2 2844#endif /* defined (L_loop_ffsqi2) */ 2845 2846 2847/********************************** 2848 * Count trailing Zeros (ctz) 2849 **********************************/ 2850 2851#if defined (L_ctzsi2) 2852;; count trailing zeros 2853;; r25:r24 = ctz32 (r25:r22) 2854;; clobbers: r26, r22 2855;; ctz(0) = 255 2856;; Note that ctz(0) in undefined for GCC 2857DEFUN __ctzsi2 2858 XCALL __ffssi2 2859 dec r24 2860 ret 2861ENDF __ctzsi2 2862#endif /* defined (L_ctzsi2) */ 2863 2864#if defined (L_ctzhi2) 2865;; count trailing zeros 2866;; r25:r24 = ctz16 (r25:r24) 2867;; clobbers: r26 2868;; ctz(0) = 255 2869;; Note that ctz(0) in undefined for GCC 2870DEFUN __ctzhi2 2871 XCALL __ffshi2 2872 dec r24 2873 ret 2874ENDF __ctzhi2 2875#endif /* defined (L_ctzhi2) */ 2876 2877 2878/********************************** 2879 * Count leading Zeros (clz) 2880 **********************************/ 2881 2882#if defined (L_clzdi2) 2883;; count leading zeros 2884;; r25:r24 = clz64 (r25:r18) 2885;; clobbers: r22, r23, r26 2886DEFUN __clzdi2 2887 XCALL __clzsi2 2888 sbrs r24, 5 2889 ret 2890 mov_l r22, r18 2891 mov_h r23, r19 2892 mov_l r24, r20 2893 mov_h r25, r21 2894 XCALL __clzsi2 2895 subi r24, -32 2896 ret 2897ENDF __clzdi2 2898#endif /* defined (L_clzdi2) */ 2899 2900#if defined (L_clzsi2) 2901;; count leading zeros 2902;; r25:r24 = clz32 (r25:r22) 2903;; clobbers: r26 2904DEFUN __clzsi2 2905 XCALL __clzhi2 2906 sbrs r24, 4 2907 ret 2908 mov_l r24, r22 2909 mov_h r25, r23 2910 XCALL __clzhi2 2911 subi r24, -16 2912 ret 2913ENDF __clzsi2 2914#endif /* defined (L_clzsi2) */ 2915 2916#if defined (L_clzhi2) 2917;; count leading zeros 2918;; r25:r24 = clz16 (r25:r24) 2919;; clobbers: r26 2920DEFUN __clzhi2 2921 clr r26 2922 tst r25 2923 brne 1f 2924 subi r26, -8 2925 or r25, r24 2926 brne 1f 2927 ldi r24, 16 2928 ret 29291: cpi r25, 16 2930 brsh 3f 2931 subi r26, -3 2932 swap r25 29332: inc r26 29343: lsl r25 2935 brcc 2b 2936 mov r24, r26 2937 clr r25 2938 ret 2939ENDF __clzhi2 2940#endif /* defined (L_clzhi2) */ 2941 2942 2943/********************************** 2944 * Parity 2945 **********************************/ 2946 2947#if defined (L_paritydi2) 2948;; r25:r24 = parity64 (r25:r18) 2949;; clobbers: __tmp_reg__ 2950DEFUN __paritydi2 2951 eor r24, r18 2952 eor r24, r19 2953 eor r24, r20 2954 eor r24, r21 2955 XJMP __paritysi2 2956ENDF __paritydi2 2957#endif /* defined (L_paritydi2) */ 2958 2959#if defined (L_paritysi2) 2960;; r25:r24 = parity32 (r25:r22) 2961;; clobbers: __tmp_reg__ 2962DEFUN __paritysi2 2963 eor r24, r22 2964 eor r24, r23 2965 XJMP __parityhi2 2966ENDF __paritysi2 2967#endif /* defined (L_paritysi2) */ 2968 2969#if defined (L_parityhi2) 2970;; r25:r24 = parity16 (r25:r24) 2971;; clobbers: __tmp_reg__ 2972DEFUN __parityhi2 2973 eor r24, r25 2974;; FALLTHRU 2975ENDF __parityhi2 2976 2977;; r25:r24 = parity8 (r24) 2978;; clobbers: __tmp_reg__ 2979DEFUN __parityqi2 2980 ;; parity is in r24[0..7] 2981 mov __tmp_reg__, r24 2982 swap __tmp_reg__ 2983 eor r24, __tmp_reg__ 2984 ;; parity is in r24[0..3] 2985 subi r24, -4 2986 andi r24, -5 2987 subi r24, -6 2988 ;; parity is in r24[0,3] 2989 sbrc r24, 3 2990 inc r24 2991 ;; parity is in r24[0] 2992 andi r24, 1 2993 clr r25 2994 ret 2995ENDF __parityqi2 2996#endif /* defined (L_parityhi2) */ 2997 2998 2999/********************************** 3000 * Population Count 3001 **********************************/ 3002 3003#if defined (L_popcounthi2) 3004;; population count 3005;; r25:r24 = popcount16 (r25:r24) 3006;; clobbers: __tmp_reg__ 3007DEFUN __popcounthi2 3008 XCALL __popcountqi2 3009 push r24 3010 mov r24, r25 3011 XCALL __popcountqi2 3012 clr r25 3013 ;; FALLTHRU 3014ENDF __popcounthi2 3015 3016DEFUN __popcounthi2_tail 3017 pop __tmp_reg__ 3018 add r24, __tmp_reg__ 3019 ret 3020ENDF __popcounthi2_tail 3021#endif /* defined (L_popcounthi2) */ 3022 3023#if defined (L_popcountsi2) 3024;; population count 3025;; r25:r24 = popcount32 (r25:r22) 3026;; clobbers: __tmp_reg__ 3027DEFUN __popcountsi2 3028 XCALL __popcounthi2 3029 push r24 3030 mov_l r24, r22 3031 mov_h r25, r23 3032 XCALL __popcounthi2 3033 XJMP __popcounthi2_tail 3034ENDF __popcountsi2 3035#endif /* defined (L_popcountsi2) */ 3036 3037#if defined (L_popcountdi2) 3038;; population count 3039;; r25:r24 = popcount64 (r25:r18) 3040;; clobbers: r22, r23, __tmp_reg__ 3041DEFUN __popcountdi2 3042 XCALL __popcountsi2 3043 push r24 3044 mov_l r22, r18 3045 mov_h r23, r19 3046 mov_l r24, r20 3047 mov_h r25, r21 3048 XCALL __popcountsi2 3049 XJMP __popcounthi2_tail 3050ENDF __popcountdi2 3051#endif /* defined (L_popcountdi2) */ 3052 3053#if defined (L_popcountqi2) 3054;; population count 3055;; r24 = popcount8 (r24) 3056;; clobbers: __tmp_reg__ 3057DEFUN __popcountqi2 3058 mov __tmp_reg__, r24 3059 andi r24, 1 3060 lsr __tmp_reg__ 3061 lsr __tmp_reg__ 3062 adc r24, __zero_reg__ 3063 lsr __tmp_reg__ 3064 adc r24, __zero_reg__ 3065 lsr __tmp_reg__ 3066 adc r24, __zero_reg__ 3067 lsr __tmp_reg__ 3068 adc r24, __zero_reg__ 3069 lsr __tmp_reg__ 3070 adc r24, __zero_reg__ 3071 lsr __tmp_reg__ 3072 adc r24, __tmp_reg__ 3073 ret 3074ENDF __popcountqi2 3075#endif /* defined (L_popcountqi2) */ 3076 3077 3078/********************************** 3079 * Swap bytes 3080 **********************************/ 3081 3082;; swap two registers with different register number 3083.macro bswap a, b 3084 eor \a, \b 3085 eor \b, \a 3086 eor \a, \b 3087.endm 3088 3089#if defined (L_bswapsi2) 3090;; swap bytes 3091;; r25:r22 = bswap32 (r25:r22) 3092DEFUN __bswapsi2 3093 bswap r22, r25 3094 bswap r23, r24 3095 ret 3096ENDF __bswapsi2 3097#endif /* defined (L_bswapsi2) */ 3098 3099#if defined (L_bswapdi2) 3100;; swap bytes 3101;; r25:r18 = bswap64 (r25:r18) 3102DEFUN __bswapdi2 3103 bswap r18, r25 3104 bswap r19, r24 3105 bswap r20, r23 3106 bswap r21, r22 3107 ret 3108ENDF __bswapdi2 3109#endif /* defined (L_bswapdi2) */ 3110 3111 3112/********************************** 3113 * 64-bit shifts 3114 **********************************/ 3115 3116#if defined (L_ashrdi3) 3117;; Arithmetic shift right 3118;; r25:r18 = ashr64 (r25:r18, r17:r16) 3119DEFUN __ashrdi3 3120 bst r25, 7 3121 bld __zero_reg__, 0 3122 ;; FALLTHRU 3123ENDF __ashrdi3 3124 3125;; Logic shift right 3126;; r25:r18 = lshr64 (r25:r18, r17:r16) 3127DEFUN __lshrdi3 3128 lsr __zero_reg__ 3129 sbc __tmp_reg__, __tmp_reg__ 3130 push r16 31310: cpi r16, 8 3132 brlo 2f 3133 subi r16, 8 3134 mov r18, r19 3135 mov r19, r20 3136 mov r20, r21 3137 mov r21, r22 3138 mov r22, r23 3139 mov r23, r24 3140 mov r24, r25 3141 mov r25, __tmp_reg__ 3142 rjmp 0b 31431: asr __tmp_reg__ 3144 ror r25 3145 ror r24 3146 ror r23 3147 ror r22 3148 ror r21 3149 ror r20 3150 ror r19 3151 ror r18 31522: dec r16 3153 brpl 1b 3154 pop r16 3155 ret 3156ENDF __lshrdi3 3157#endif /* defined (L_ashrdi3) */ 3158 3159#if defined (L_ashldi3) 3160;; Shift left 3161;; r25:r18 = ashl64 (r25:r18, r17:r16) 3162DEFUN __ashldi3 3163 push r16 31640: cpi r16, 8 3165 brlo 2f 3166 mov r25, r24 3167 mov r24, r23 3168 mov r23, r22 3169 mov r22, r21 3170 mov r21, r20 3171 mov r20, r19 3172 mov r19, r18 3173 clr r18 3174 subi r16, 8 3175 rjmp 0b 31761: lsl r18 3177 rol r19 3178 rol r20 3179 rol r21 3180 rol r22 3181 rol r23 3182 rol r24 3183 rol r25 31842: dec r16 3185 brpl 1b 3186 pop r16 3187 ret 3188ENDF __ashldi3 3189#endif /* defined (L_ashldi3) */ 3190 3191#if defined (L_rotldi3) 3192;; Shift left 3193;; r25:r18 = rotl64 (r25:r18, r17:r16) 3194DEFUN __rotldi3 3195 push r16 31960: cpi r16, 8 3197 brlo 2f 3198 subi r16, 8 3199 mov __tmp_reg__, r25 3200 mov r25, r24 3201 mov r24, r23 3202 mov r23, r22 3203 mov r22, r21 3204 mov r21, r20 3205 mov r20, r19 3206 mov r19, r18 3207 mov r18, __tmp_reg__ 3208 rjmp 0b 32091: lsl r18 3210 rol r19 3211 rol r20 3212 rol r21 3213 rol r22 3214 rol r23 3215 rol r24 3216 rol r25 3217 adc r18, __zero_reg__ 32182: dec r16 3219 brpl 1b 3220 pop r16 3221 ret 3222ENDF __rotldi3 3223#endif /* defined (L_rotldi3) */ 3224 3225 3226.section .text.libgcc.fmul, "ax", @progbits 3227 3228/***********************************************************/ 3229;;; Softmul versions of FMUL, FMULS and FMULSU to implement 3230;;; __builtin_avr_fmul* if !AVR_HAVE_MUL 3231/***********************************************************/ 3232 3233#define A1 24 3234#define B1 25 3235#define C0 22 3236#define C1 23 3237#define A0 __tmp_reg__ 3238 3239#ifdef L_fmuls 3240;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction 3241;;; Clobbers: r24, r25, __tmp_reg__ 3242DEFUN __fmuls 3243 ;; A0.7 = negate result? 3244 mov A0, A1 3245 eor A0, B1 3246 ;; B1 = |B1| 3247 sbrc B1, 7 3248 neg B1 3249 XJMP __fmulsu_exit 3250ENDF __fmuls 3251#endif /* L_fmuls */ 3252 3253#ifdef L_fmulsu 3254;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction 3255;;; Clobbers: r24, r25, __tmp_reg__ 3256DEFUN __fmulsu 3257 ;; A0.7 = negate result? 3258 mov A0, A1 3259;; FALLTHRU 3260ENDF __fmulsu 3261 3262;; Helper for __fmuls and __fmulsu 3263DEFUN __fmulsu_exit 3264 ;; A1 = |A1| 3265 sbrc A1, 7 3266 neg A1 3267#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ 3268 ;; Some cores have problem skipping 2-word instruction 3269 tst A0 3270 brmi 1f 3271#else 3272 sbrs A0, 7 3273#endif /* __AVR_HAVE_JMP_CALL__ */ 3274 XJMP __fmul 32751: XCALL __fmul 3276 ;; C = -C iff A0.7 = 1 3277 NEG2 C0 3278 ret 3279ENDF __fmulsu_exit 3280#endif /* L_fmulsu */ 3281 3282 3283#ifdef L_fmul 3284;;; r22:r23 = fmul (r24, r25) like in FMUL instruction 3285;;; Clobbers: r24, r25, __tmp_reg__ 3286DEFUN __fmul 3287 ; clear result 3288 clr C0 3289 clr C1 3290 clr A0 32911: tst B1 3292 ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C. 32932: brpl 3f 3294 ;; C += A 3295 add C0, A0 3296 adc C1, A1 32973: ;; A >>= 1 3298 lsr A1 3299 ror A0 3300 ;; B <<= 1 3301 lsl B1 3302 brne 2b 3303 ret 3304ENDF __fmul 3305#endif /* L_fmul */ 3306 3307#undef A0 3308#undef A1 3309#undef B1 3310#undef C0 3311#undef C1 3312 3313#include "lib1funcs-fixed.S" 3314