1/* -*- Mode: Asm -*- */ 2/* Copyright (C) 1998-2022 Free Software Foundation, Inc. 3 Contributed by Denis Chertykov <chertykov@gmail.com> 4 5This file is free software; you can redistribute it and/or modify it 6under the terms of the GNU General Public License as published by the 7Free Software Foundation; either version 3, or (at your option) any 8later version. 9 10This file is distributed in the hope that it will be useful, but 11WITHOUT ANY WARRANTY; without even the implied warranty of 12MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13General Public License for more details. 14 15Under Section 7 of GPL version 3, you are granted additional 16permissions described in the GCC Runtime Library Exception, version 173.1, as published by the Free Software Foundation. 18 19You should have received a copy of the GNU General Public License and 20a copy of the GCC Runtime Library Exception along with this program; 21see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22<http://www.gnu.org/licenses/>. */ 23 24#if defined (__AVR_TINY__) 25#define __zero_reg__ r17 26#define __tmp_reg__ r16 27#else 28#define __zero_reg__ r1 29#define __tmp_reg__ r0 30#endif 31#define __SREG__ 0x3f 32#if defined (__AVR_HAVE_SPH__) 33#define __SP_H__ 0x3e 34#endif 35#define __SP_L__ 0x3d 36#define __RAMPZ__ 0x3B 37#define __EIND__ 0x3C 38 39/* Most of the functions here are called directly from avr.md 40 patterns, instead of using the standard libcall mechanisms. 41 This can make better code because GCC knows exactly which 42 of the call-used registers (not all of them) are clobbered. */ 43 44/* FIXME: At present, there is no SORT directive in the linker 45 script so that we must not assume that different modules 46 in the same input section like .libgcc.text.mul will be 47 located close together. Therefore, we cannot use 48 RCALL/RJMP to call a function like __udivmodhi4 from 49 __divmodhi4 and have to use lengthy XCALL/XJMP even 50 though they are in the same input section and all same 51 input sections together are small enough to reach every 52 location with a RCALL/RJMP instruction. */ 53 54#if defined (__AVR_HAVE_EIJMP_EICALL__) && !defined (__AVR_HAVE_ELPMX__) 55#error device not supported 56#endif 57 58 .macro mov_l r_dest, r_src 59#if defined (__AVR_HAVE_MOVW__) 60 movw \r_dest, \r_src 61#else 62 mov \r_dest, \r_src 63#endif 64 .endm 65 66 .macro mov_h r_dest, r_src 67#if defined (__AVR_HAVE_MOVW__) 68 ; empty 69#else 70 mov \r_dest, \r_src 71#endif 72 .endm 73 74.macro wmov r_dest, r_src 75#if defined (__AVR_HAVE_MOVW__) 76 movw \r_dest, \r_src 77#else 78 mov \r_dest, \r_src 79 mov \r_dest+1, \r_src+1 80#endif 81.endm 82 83#if defined (__AVR_HAVE_JMP_CALL__) 84#define XCALL call 85#define XJMP jmp 86#else 87#define XCALL rcall 88#define XJMP rjmp 89#endif 90 91#if defined (__AVR_HAVE_EIJMP_EICALL__) 92#define XICALL eicall 93#define XIJMP eijmp 94#else 95#define XICALL icall 96#define XIJMP ijmp 97#endif 98 99;; Prologue stuff 100 101.macro do_prologue_saves n_pushed n_frame=0 102 ldi r26, lo8(\n_frame) 103 ldi r27, hi8(\n_frame) 104 ldi r30, lo8(gs(.L_prologue_saves.\@)) 105 ldi r31, hi8(gs(.L_prologue_saves.\@)) 106 XJMP __prologue_saves__ + ((18 - (\n_pushed)) * 2) 107.L_prologue_saves.\@: 108.endm 109 110;; Epilogue stuff 111 112.macro do_epilogue_restores n_pushed n_frame=0 113 in r28, __SP_L__ 114#ifdef __AVR_HAVE_SPH__ 115 in r29, __SP_H__ 116.if \n_frame > 63 117 subi r28, lo8(-\n_frame) 118 sbci r29, hi8(-\n_frame) 119.elseif \n_frame > 0 120 adiw r28, \n_frame 121.endif 122#else 123 clr r29 124.if \n_frame > 0 125 subi r28, lo8(-\n_frame) 126.endif 127#endif /* HAVE SPH */ 128 ldi r30, \n_pushed 129 XJMP __epilogue_restores__ + ((18 - (\n_pushed)) * 2) 130.endm 131 132;; Support function entry and exit for convenience 133 134.macro wsubi r_arg1, i_arg2 135#if defined (__AVR_TINY__) 136 subi \r_arg1, lo8(\i_arg2) 137 sbci \r_arg1+1, hi8(\i_arg2) 138#else 139 sbiw \r_arg1, \i_arg2 140#endif 141.endm 142 143.macro waddi r_arg1, i_arg2 144#if defined (__AVR_TINY__) 145 subi \r_arg1, lo8(-\i_arg2) 146 sbci \r_arg1+1, hi8(-\i_arg2) 147#else 148 adiw \r_arg1, \i_arg2 149#endif 150.endm 151 152.macro DEFUN name 153.global \name 154.func \name 155\name: 156.endm 157 158.macro ENDF name 159.size \name, .-\name 160.endfunc 161.endm 162 163.macro FALIAS name 164.global \name 165.func \name 166\name: 167.size \name, .-\name 168.endfunc 169.endm 170 171;; Skip next instruction, typically a jump target 172#define skip cpse 16,16 173 174;; Negate a 2-byte value held in consecutive registers 175.macro NEG2 reg 176 com \reg+1 177 neg \reg 178 sbci \reg+1, -1 179.endm 180 181;; Negate a 4-byte value held in consecutive registers 182;; Sets the V flag for signed overflow tests if REG >= 16 183.macro NEG4 reg 184 com \reg+3 185 com \reg+2 186 com \reg+1 187.if \reg >= 16 188 neg \reg 189 sbci \reg+1, -1 190 sbci \reg+2, -1 191 sbci \reg+3, -1 192.else 193 com \reg 194 adc \reg, __zero_reg__ 195 adc \reg+1, __zero_reg__ 196 adc \reg+2, __zero_reg__ 197 adc \reg+3, __zero_reg__ 198.endif 199.endm 200 201#define exp_lo(N) hlo8 ((N) << 23) 202#define exp_hi(N) hhi8 ((N) << 23) 203 204 205.section .text.libgcc.mul, "ax", @progbits 206 207;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 208/* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */ 209#if !defined (__AVR_HAVE_MUL__) 210/******************************************************* 211 Multiplication 8 x 8 without MUL 212*******************************************************/ 213#if defined (L_mulqi3) 214 215#define r_arg2 r22 /* multiplicand */ 216#define r_arg1 r24 /* multiplier */ 217#define r_res __tmp_reg__ /* result */ 218 219DEFUN __mulqi3 220 clr r_res ; clear result 221__mulqi3_loop: 222 sbrc r_arg1,0 223 add r_res,r_arg2 224 add r_arg2,r_arg2 ; shift multiplicand 225 breq __mulqi3_exit ; while multiplicand != 0 226 lsr r_arg1 ; 227 brne __mulqi3_loop ; exit if multiplier = 0 228__mulqi3_exit: 229 mov r_arg1,r_res ; result to return register 230 ret 231ENDF __mulqi3 232 233#undef r_arg2 234#undef r_arg1 235#undef r_res 236 237#endif /* defined (L_mulqi3) */ 238 239 240/******************************************************* 241 Widening Multiplication 16 = 8 x 8 without MUL 242 Multiplication 16 x 16 without MUL 243*******************************************************/ 244 245#define A0 22 246#define A1 23 247#define B0 24 248#define BB0 20 249#define B1 25 250;; Output overlaps input, thus expand result in CC0/1 251#define C0 24 252#define C1 25 253#define CC0 __tmp_reg__ 254#define CC1 21 255 256#if defined (L_umulqihi3) 257;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24 258;;; (C1:C0) = (unsigned int) A0 * (unsigned int) B0 259;;; Clobbers: __tmp_reg__, R21..R23 260DEFUN __umulqihi3 261 clr A1 262 clr B1 263 XJMP __mulhi3 264ENDF __umulqihi3 265#endif /* L_umulqihi3 */ 266 267#if defined (L_mulqihi3) 268;;; R25:R24 = (signed int) R22 * (signed int) R24 269;;; (C1:C0) = (signed int) A0 * (signed int) B0 270;;; Clobbers: __tmp_reg__, R20..R23 271DEFUN __mulqihi3 272 ;; Sign-extend B0 273 clr B1 274 sbrc B0, 7 275 com B1 276 ;; The multiplication runs twice as fast if A1 is zero, thus: 277 ;; Zero-extend A0 278 clr A1 279#ifdef __AVR_HAVE_JMP_CALL__ 280 ;; Store B0 * sign of A 281 clr BB0 282 sbrc A0, 7 283 mov BB0, B0 284 call __mulhi3 285#else /* have no CALL */ 286 ;; Skip sign-extension of A if A >= 0 287 ;; Same size as with the first alternative but avoids errata skip 288 ;; and is faster if A >= 0 289 sbrs A0, 7 290 rjmp __mulhi3 291 ;; If A < 0 store B 292 mov BB0, B0 293 rcall __mulhi3 294#endif /* HAVE_JMP_CALL */ 295 ;; 1-extend A after the multiplication 296 sub C1, BB0 297 ret 298ENDF __mulqihi3 299#endif /* L_mulqihi3 */ 300 301#if defined (L_mulhi3) 302;;; R25:R24 = R23:R22 * R25:R24 303;;; (C1:C0) = (A1:A0) * (B1:B0) 304;;; Clobbers: __tmp_reg__, R21..R23 305DEFUN __mulhi3 306 307 ;; Clear result 308 clr CC0 309 clr CC1 310 rjmp 3f 3111: 312 ;; Bit n of A is 1 --> C += B << n 313 add CC0, B0 314 adc CC1, B1 3152: 316 lsl B0 317 rol B1 3183: 319 ;; If B == 0 we are ready 320 wsubi B0, 0 321 breq 9f 322 323 ;; Carry = n-th bit of A 324 lsr A1 325 ror A0 326 ;; If bit n of A is set, then go add B * 2^n to C 327 brcs 1b 328 329 ;; Carry = 0 --> The ROR above acts like CP A0, 0 330 ;; Thus, it is sufficient to CPC the high part to test A against 0 331 cpc A1, __zero_reg__ 332 ;; Only proceed if A != 0 333 brne 2b 3349: 335 ;; Move Result into place 336 mov C0, CC0 337 mov C1, CC1 338 ret 339ENDF __mulhi3 340#endif /* L_mulhi3 */ 341 342#undef A0 343#undef A1 344#undef B0 345#undef BB0 346#undef B1 347#undef C0 348#undef C1 349#undef CC0 350#undef CC1 351 352 353#define A0 22 354#define A1 A0+1 355#define A2 A0+2 356#define A3 A0+3 357 358#define B0 18 359#define B1 B0+1 360#define B2 B0+2 361#define B3 B0+3 362 363#define CC0 26 364#define CC1 CC0+1 365#define CC2 30 366#define CC3 CC2+1 367 368#define C0 22 369#define C1 C0+1 370#define C2 C0+2 371#define C3 C0+3 372 373/******************************************************* 374 Widening Multiplication 32 = 16 x 16 without MUL 375*******************************************************/ 376 377#if defined (L_umulhisi3) 378DEFUN __umulhisi3 379 wmov B0, 24 380 ;; Zero-extend B 381 clr B2 382 clr B3 383 ;; Zero-extend A 384 wmov A2, B2 385 XJMP __mulsi3 386ENDF __umulhisi3 387#endif /* L_umulhisi3 */ 388 389#if defined (L_mulhisi3) 390DEFUN __mulhisi3 391 wmov B0, 24 392 ;; Sign-extend B 393 lsl r25 394 sbc B2, B2 395 mov B3, B2 396#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ 397 ;; Sign-extend A 398 clr A2 399 sbrc A1, 7 400 com A2 401 mov A3, A2 402 XJMP __mulsi3 403#else /* no __AVR_ERRATA_SKIP_JMP_CALL__ */ 404 ;; Zero-extend A and __mulsi3 will run at least twice as fast 405 ;; compared to a sign-extended A. 406 clr A2 407 clr A3 408 sbrs A1, 7 409 XJMP __mulsi3 410 ;; If A < 0 then perform the B * 0xffff.... before the 411 ;; very multiplication by initializing the high part of the 412 ;; result CC with -B. 413 wmov CC2, A2 414 sub CC2, B0 415 sbc CC3, B1 416 XJMP __mulsi3_helper 417#endif /* __AVR_ERRATA_SKIP_JMP_CALL__ */ 418ENDF __mulhisi3 419#endif /* L_mulhisi3 */ 420 421 422/******************************************************* 423 Multiplication 32 x 32 without MUL 424*******************************************************/ 425 426#if defined (L_mulsi3) 427DEFUN __mulsi3 428#if defined (__AVR_TINY__) 429 in r26, __SP_L__ ; safe to use X, as it is CC0/CC1 430 in r27, __SP_H__ 431 subi r26, lo8(-3) ; Add 3 to point past return address 432 sbci r27, hi8(-3) 433 push B0 ; save callee saved regs 434 push B1 435 ld B0, X+ ; load from caller stack 436 ld B1, X+ 437 ld B2, X+ 438 ld B3, X 439#endif 440 ;; Clear result 441 clr CC2 442 clr CC3 443 ;; FALLTHRU 444ENDF __mulsi3 445 446DEFUN __mulsi3_helper 447 clr CC0 448 clr CC1 449 rjmp 3f 450 4511: ;; If bit n of A is set, then add B * 2^n to the result in CC 452 ;; CC += B 453 add CC0,B0 $ adc CC1,B1 $ adc CC2,B2 $ adc CC3,B3 454 4552: ;; B <<= 1 456 lsl B0 $ rol B1 $ rol B2 $ rol B3 457 4583: ;; A >>= 1: Carry = n-th bit of A 459 lsr A3 $ ror A2 $ ror A1 $ ror A0 460 461 brcs 1b 462 ;; Only continue if A != 0 463 sbci A1, 0 464 brne 2b 465 wsubi A2, 0 466 brne 2b 467 468 ;; All bits of A are consumed: Copy result to return register C 469 wmov C0, CC0 470 wmov C2, CC2 471#if defined (__AVR_TINY__) 472 pop B1 ; restore callee saved regs 473 pop B0 474#endif /* defined (__AVR_TINY__) */ 475 476 ret 477ENDF __mulsi3_helper 478#endif /* L_mulsi3 */ 479 480#undef A0 481#undef A1 482#undef A2 483#undef A3 484#undef B0 485#undef B1 486#undef B2 487#undef B3 488#undef C0 489#undef C1 490#undef C2 491#undef C3 492#undef CC0 493#undef CC1 494#undef CC2 495#undef CC3 496 497#endif /* !defined (__AVR_HAVE_MUL__) */ 498;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 499 500;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 501#if defined (__AVR_HAVE_MUL__) 502#define A0 26 503#define B0 18 504#define C0 22 505 506#define A1 A0+1 507 508#define B1 B0+1 509#define B2 B0+2 510#define B3 B0+3 511 512#define C1 C0+1 513#define C2 C0+2 514#define C3 C0+3 515 516/******************************************************* 517 Widening Multiplication 32 = 16 x 16 with MUL 518*******************************************************/ 519 520#if defined (L_mulhisi3) 521;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18 522;;; C3:C0 = (signed long) A1:A0 * (signed long) B1:B0 523;;; Clobbers: __tmp_reg__ 524DEFUN __mulhisi3 525 XCALL __umulhisi3 526 ;; Sign-extend B 527 tst B1 528 brpl 1f 529 sub C2, A0 530 sbc C3, A1 5311: ;; Sign-extend A 532 XJMP __usmulhisi3_tail 533ENDF __mulhisi3 534#endif /* L_mulhisi3 */ 535 536#if defined (L_usmulhisi3) 537;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18 538;;; C3:C0 = (signed long) A1:A0 * (unsigned long) B1:B0 539;;; Clobbers: __tmp_reg__ 540DEFUN __usmulhisi3 541 XCALL __umulhisi3 542 ;; FALLTHRU 543ENDF __usmulhisi3 544 545DEFUN __usmulhisi3_tail 546 ;; Sign-extend A 547 sbrs A1, 7 548 ret 549 sub C2, B0 550 sbc C3, B1 551 ret 552ENDF __usmulhisi3_tail 553#endif /* L_usmulhisi3 */ 554 555#if defined (L_umulhisi3) 556;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18 557;;; C3:C0 = (unsigned long) A1:A0 * (unsigned long) B1:B0 558;;; Clobbers: __tmp_reg__ 559DEFUN __umulhisi3 560 mul A0, B0 561 movw C0, r0 562 mul A1, B1 563 movw C2, r0 564 mul A0, B1 565#ifdef __AVR_HAVE_JMP_CALL__ 566 ;; This function is used by many other routines, often multiple times. 567 ;; Therefore, if the flash size is not too limited, avoid the RCALL 568 ;; and inverst 6 Bytes to speed things up. 569 add C1, r0 570 adc C2, r1 571 clr __zero_reg__ 572 adc C3, __zero_reg__ 573#else 574 rcall 1f 575#endif 576 mul A1, B0 5771: add C1, r0 578 adc C2, r1 579 clr __zero_reg__ 580 adc C3, __zero_reg__ 581 ret 582ENDF __umulhisi3 583#endif /* L_umulhisi3 */ 584 585/******************************************************* 586 Widening Multiplication 32 = 16 x 32 with MUL 587*******************************************************/ 588 589#if defined (L_mulshisi3) 590;;; R25:R22 = (signed long) R27:R26 * R21:R18 591;;; (C3:C0) = (signed long) A1:A0 * B3:B0 592;;; Clobbers: __tmp_reg__ 593DEFUN __mulshisi3 594#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ 595 ;; Some cores have problem skipping 2-word instruction 596 tst A1 597 brmi __mulohisi3 598#else 599 sbrs A1, 7 600#endif /* __AVR_HAVE_JMP_CALL__ */ 601 XJMP __muluhisi3 602 ;; FALLTHRU 603ENDF __mulshisi3 604 605;;; R25:R22 = (one-extended long) R27:R26 * R21:R18 606;;; (C3:C0) = (one-extended long) A1:A0 * B3:B0 607;;; Clobbers: __tmp_reg__ 608DEFUN __mulohisi3 609 XCALL __muluhisi3 610 ;; One-extend R27:R26 (A1:A0) 611 sub C2, B0 612 sbc C3, B1 613 ret 614ENDF __mulohisi3 615#endif /* L_mulshisi3 */ 616 617#if defined (L_muluhisi3) 618;;; R25:R22 = (unsigned long) R27:R26 * R21:R18 619;;; (C3:C0) = (unsigned long) A1:A0 * B3:B0 620;;; Clobbers: __tmp_reg__ 621DEFUN __muluhisi3 622 XCALL __umulhisi3 623 mul A0, B3 624 add C3, r0 625 mul A1, B2 626 add C3, r0 627 mul A0, B2 628 add C2, r0 629 adc C3, r1 630 clr __zero_reg__ 631 ret 632ENDF __muluhisi3 633#endif /* L_muluhisi3 */ 634 635/******************************************************* 636 Multiplication 32 x 32 with MUL 637*******************************************************/ 638 639#if defined (L_mulsi3) 640;;; R25:R22 = R25:R22 * R21:R18 641;;; (C3:C0) = C3:C0 * B3:B0 642;;; Clobbers: R26, R27, __tmp_reg__ 643DEFUN __mulsi3 644 movw A0, C0 645 push C2 646 push C3 647 XCALL __muluhisi3 648 pop A1 649 pop A0 650 ;; A1:A0 now contains the high word of A 651 mul A0, B0 652 add C2, r0 653 adc C3, r1 654 mul A0, B1 655 add C3, r0 656 mul A1, B0 657 add C3, r0 658 clr __zero_reg__ 659 ret 660ENDF __mulsi3 661#endif /* L_mulsi3 */ 662 663#undef A0 664#undef A1 665 666#undef B0 667#undef B1 668#undef B2 669#undef B3 670 671#undef C0 672#undef C1 673#undef C2 674#undef C3 675 676#endif /* __AVR_HAVE_MUL__ */ 677 678/******************************************************* 679 Multiplication 24 x 24 with MUL 680*******************************************************/ 681 682#if defined (L_mulpsi3) 683 684;; A[0..2]: In: Multiplicand; Out: Product 685#define A0 22 686#define A1 A0+1 687#define A2 A0+2 688 689;; B[0..2]: In: Multiplier 690#define B0 18 691#define B1 B0+1 692#define B2 B0+2 693 694#if defined (__AVR_HAVE_MUL__) 695 696;; C[0..2]: Expand Result 697#define C0 22 698#define C1 C0+1 699#define C2 C0+2 700 701;; R24:R22 *= R20:R18 702;; Clobbers: r21, r25, r26, r27, __tmp_reg__ 703 704#define AA0 26 705#define AA2 21 706 707DEFUN __mulpsi3 708 wmov AA0, A0 709 mov AA2, A2 710 XCALL __umulhisi3 711 mul AA2, B0 $ add C2, r0 712 mul AA0, B2 $ add C2, r0 713 clr __zero_reg__ 714 ret 715ENDF __mulpsi3 716 717#undef AA2 718#undef AA0 719 720#undef C2 721#undef C1 722#undef C0 723 724#else /* !HAVE_MUL */ 725;; C[0..2]: Expand Result 726#if defined (__AVR_TINY__) 727#define C0 16 728#else 729#define C0 0 730#endif /* defined (__AVR_TINY__) */ 731#define C1 C0+1 732#define C2 21 733 734;; R24:R22 *= R20:R18 735;; Clobbers: __tmp_reg__, R18, R19, R20, R21 736 737DEFUN __mulpsi3 738#if defined (__AVR_TINY__) 739 in r26,__SP_L__ 740 in r27,__SP_H__ 741 subi r26, lo8(-3) ; Add 3 to point past return address 742 sbci r27, hi8(-3) 743 push B0 ; save callee saved regs 744 push B1 745 ld B0,X+ ; load from caller stack 746 ld B1,X+ 747 ld B2,X+ 748#endif /* defined (__AVR_TINY__) */ 749 750 ;; C[] = 0 751 clr __tmp_reg__ 752 clr C2 753 7540: ;; Shift N-th Bit of B[] into Carry. N = 24 - Loop 755 LSR B2 $ ror B1 $ ror B0 756 757 ;; If the N-th Bit of B[] was set... 758 brcc 1f 759 760 ;; ...then add A[] * 2^N to the Result C[] 761 ADD C0,A0 $ adc C1,A1 $ adc C2,A2 762 7631: ;; Multiply A[] by 2 764 LSL A0 $ rol A1 $ rol A2 765 766 ;; Loop until B[] is 0 767 subi B0,0 $ sbci B1,0 $ sbci B2,0 768 brne 0b 769 770 ;; Copy C[] to the return Register A[] 771 wmov A0, C0 772 mov A2, C2 773 774 clr __zero_reg__ 775#if defined (__AVR_TINY__) 776 pop B1 777 pop B0 778#endif /* (__AVR_TINY__) */ 779 ret 780ENDF __mulpsi3 781 782#undef C2 783#undef C1 784#undef C0 785 786#endif /* HAVE_MUL */ 787 788#undef B2 789#undef B1 790#undef B0 791 792#undef A2 793#undef A1 794#undef A0 795 796#endif /* L_mulpsi3 */ 797 798#if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__) 799 800;; A[0..2]: In: Multiplicand 801#define A0 22 802#define A1 A0+1 803#define A2 A0+2 804 805;; BB: In: Multiplier 806#define BB 25 807 808;; C[0..2]: Result 809#define C0 18 810#define C1 C0+1 811#define C2 C0+2 812 813;; C[] = A[] * sign_extend (BB) 814DEFUN __mulsqipsi3 815 mul A0, BB 816 movw C0, r0 817 mul A2, BB 818 mov C2, r0 819 mul A1, BB 820 add C1, r0 821 adc C2, r1 822 clr __zero_reg__ 823 sbrs BB, 7 824 ret 825 ;; One-extend BB 826 sub C1, A0 827 sbc C2, A1 828 ret 829ENDF __mulsqipsi3 830 831#undef C2 832#undef C1 833#undef C0 834 835#undef BB 836 837#undef A2 838#undef A1 839#undef A0 840 841#endif /* L_mulsqipsi3 && HAVE_MUL */ 842 843/******************************************************* 844 Multiplication 64 x 64 845*******************************************************/ 846 847;; A[] = A[] * B[] 848 849;; A[0..7]: In: Multiplicand 850;; Out: Product 851#define A0 18 852#define A1 A0+1 853#define A2 A0+2 854#define A3 A0+3 855#define A4 A0+4 856#define A5 A0+5 857#define A6 A0+6 858#define A7 A0+7 859 860;; B[0..7]: In: Multiplier 861#define B0 10 862#define B1 B0+1 863#define B2 B0+2 864#define B3 B0+3 865#define B4 B0+4 866#define B5 B0+5 867#define B6 B0+6 868#define B7 B0+7 869 870#ifndef __AVR_TINY__ 871#if defined (__AVR_HAVE_MUL__) 872;; Define C[] for convenience 873;; Notice that parts of C[] overlap A[] respective B[] 874#define C0 16 875#define C1 C0+1 876#define C2 20 877#define C3 C2+1 878#define C4 28 879#define C5 C4+1 880#define C6 C4+2 881#define C7 C4+3 882 883#if defined (L_muldi3) 884 885;; A[] *= B[] 886;; R25:R18 *= R17:R10 887;; Ordinary ABI-Function 888 889DEFUN __muldi3 890 push r29 891 push r28 892 push r17 893 push r16 894 895 ;; Counting in Words, we have to perform a 4 * 4 Multiplication 896 897 ;; 3 * 0 + 0 * 3 898 mul A7,B0 $ $ mov C7,r0 899 mul A0,B7 $ $ add C7,r0 900 mul A6,B1 $ $ add C7,r0 901 mul A6,B0 $ mov C6,r0 $ add C7,r1 902 mul B6,A1 $ $ add C7,r0 903 mul B6,A0 $ add C6,r0 $ adc C7,r1 904 905 ;; 1 * 2 906 mul A2,B4 $ add C6,r0 $ adc C7,r1 907 mul A3,B4 $ $ add C7,r0 908 mul A2,B5 $ $ add C7,r0 909 910 push A5 911 push A4 912 push B1 913 push B0 914 push A3 915 push A2 916 917 ;; 0 * 0 918 wmov 26, B0 919 XCALL __umulhisi3 920 wmov C0, 22 921 wmov C2, 24 922 923 ;; 0 * 2 924 wmov 26, B4 925 XCALL __umulhisi3 $ wmov C4,22 $ add C6,24 $ adc C7,25 926 927 wmov 26, B2 928 ;; 0 * 1 929 XCALL __muldi3_6 930 931 pop A0 932 pop A1 933 ;; 1 * 1 934 wmov 26, B2 935 XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25 936 937 pop r26 938 pop r27 939 ;; 1 * 0 940 XCALL __muldi3_6 941 942 pop A0 943 pop A1 944 ;; 2 * 0 945 XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25 946 947 ;; 2 * 1 948 wmov 26, B2 949 XCALL __umulhisi3 $ $ $ add C6,22 $ adc C7,23 950 951 ;; A[] = C[] 952 wmov A0, C0 953 ;; A2 = C2 already 954 wmov A4, C4 955 wmov A6, C6 956 957 pop r16 958 pop r17 959 pop r28 960 pop r29 961 ret 962ENDF __muldi3 963#endif /* L_muldi3 */ 964 965#if defined (L_muldi3_6) 966;; A helper for some 64-bit multiplications with MUL available 967DEFUN __muldi3_6 968__muldi3_6: 969 XCALL __umulhisi3 970 add C2, 22 971 adc C3, 23 972 adc C4, 24 973 adc C5, 25 974 brcc 0f 975 adiw C6, 1 9760: ret 977ENDF __muldi3_6 978#endif /* L_muldi3_6 */ 979 980#undef C7 981#undef C6 982#undef C5 983#undef C4 984#undef C3 985#undef C2 986#undef C1 987#undef C0 988 989#else /* !HAVE_MUL */ 990 991#if defined (L_muldi3) 992 993#define C0 26 994#define C1 C0+1 995#define C2 C0+2 996#define C3 C0+3 997#define C4 C0+4 998#define C5 C0+5 999#define C6 0 1000#define C7 C6+1 1001 1002#define Loop 9 1003 1004;; A[] *= B[] 1005;; R25:R18 *= R17:R10 1006;; Ordinary ABI-Function 1007 1008DEFUN __muldi3 1009 push r29 1010 push r28 1011 push Loop 1012 1013 ldi C0, 64 1014 mov Loop, C0 1015 1016 ;; C[] = 0 1017 clr __tmp_reg__ 1018 wmov C0, 0 1019 wmov C2, 0 1020 wmov C4, 0 1021 10220: ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[] 1023 ;; where N = 64 - Loop. 1024 ;; Notice that B[] = B[] >>> 64 so after this Routine has finished, 1025 ;; B[] will have its initial Value again. 1026 LSR B7 $ ror B6 $ ror B5 $ ror B4 1027 ror B3 $ ror B2 $ ror B1 $ ror B0 1028 1029 ;; If the N-th Bit of B[] was set then... 1030 brcc 1f 1031 ;; ...finish Rotation... 1032 ori B7, 1 << 7 1033 1034 ;; ...and add A[] * 2^N to the Result C[] 1035 ADD C0,A0 $ adc C1,A1 $ adc C2,A2 $ adc C3,A3 1036 adc C4,A4 $ adc C5,A5 $ adc C6,A6 $ adc C7,A7 1037 10381: ;; Multiply A[] by 2 1039 LSL A0 $ rol A1 $ rol A2 $ rol A3 1040 rol A4 $ rol A5 $ rol A6 $ rol A7 1041 1042 dec Loop 1043 brne 0b 1044 1045 ;; We expanded the Result in C[] 1046 ;; Copy Result to the Return Register A[] 1047 wmov A0, C0 1048 wmov A2, C2 1049 wmov A4, C4 1050 wmov A6, C6 1051 1052 clr __zero_reg__ 1053 pop Loop 1054 pop r28 1055 pop r29 1056 ret 1057ENDF __muldi3 1058 1059#undef Loop 1060 1061#undef C7 1062#undef C6 1063#undef C5 1064#undef C4 1065#undef C3 1066#undef C2 1067#undef C1 1068#undef C0 1069 1070#endif /* L_muldi3 */ 1071#endif /* HAVE_MUL */ 1072#endif /* if not __AVR_TINY__ */ 1073 1074#undef B7 1075#undef B6 1076#undef B5 1077#undef B4 1078#undef B3 1079#undef B2 1080#undef B1 1081#undef B0 1082 1083#undef A7 1084#undef A6 1085#undef A5 1086#undef A4 1087#undef A3 1088#undef A2 1089#undef A1 1090#undef A0 1091 1092/******************************************************* 1093 Widening Multiplication 64 = 32 x 32 with MUL 1094*******************************************************/ 1095 1096#if defined (__AVR_HAVE_MUL__) 1097#define A0 r22 1098#define A1 r23 1099#define A2 r24 1100#define A3 r25 1101 1102#define B0 r18 1103#define B1 r19 1104#define B2 r20 1105#define B3 r21 1106 1107#define C0 18 1108#define C1 C0+1 1109#define C2 20 1110#define C3 C2+1 1111#define C4 28 1112#define C5 C4+1 1113#define C6 C4+2 1114#define C7 C4+3 1115 1116#if defined (L_umulsidi3) 1117 1118;; Unsigned widening 64 = 32 * 32 Multiplication with MUL 1119 1120;; R18[8] = R22[4] * R18[4] 1121;; 1122;; Ordinary ABI Function, but additionally sets 1123;; X = R20[2] = B2[2] 1124;; Z = R22[2] = A0[2] 1125DEFUN __umulsidi3 1126 clt 1127 ;; FALLTHRU 1128ENDF __umulsidi3 1129 ;; T = sign (A) 1130DEFUN __umulsidi3_helper 1131 push 29 $ push 28 ; Y 1132 wmov 30, A2 1133 ;; Counting in Words, we have to perform 4 Multiplications 1134 ;; 0 * 0 1135 wmov 26, A0 1136 XCALL __umulhisi3 1137 push 23 $ push 22 ; C0 1138 wmov 28, B0 1139 wmov 18, B2 1140 wmov C2, 24 1141 push 27 $ push 26 ; A0 1142 push 19 $ push 18 ; B2 1143 ;; 1144 ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y 1145 ;; B2 C2 -- -- -- B0 A2 1146 ;; 1 * 1 1147 wmov 26, 30 ; A2 1148 XCALL __umulhisi3 1149 ;; Sign-extend A. T holds the sign of A 1150 brtc 0f 1151 ;; Subtract B from the high part of the result 1152 sub 22, 28 1153 sbc 23, 29 1154 sbc 24, 18 1155 sbc 25, 19 11560: wmov 18, 28 ;; B0 1157 wmov C4, 22 1158 wmov C6, 24 1159 ;; 1160 ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y 1161 ;; B0 C2 -- -- A2 C4 C6 1162 ;; 1163 ;; 1 * 0 1164 XCALL __muldi3_6 1165 ;; 0 * 1 1166 pop 26 $ pop 27 ;; B2 1167 pop 18 $ pop 19 ;; A0 1168 XCALL __muldi3_6 1169 1170 ;; Move result C into place and save A0 in Z 1171 wmov 22, C4 1172 wmov 24, C6 1173 wmov 30, 18 ; A0 1174 pop C0 $ pop C1 1175 1176 ;; Epilogue 1177 pop 28 $ pop 29 ;; Y 1178 ret 1179ENDF __umulsidi3_helper 1180#endif /* L_umulsidi3 */ 1181 1182 1183#if defined (L_mulsidi3) 1184 1185;; Signed widening 64 = 32 * 32 Multiplication 1186;; 1187;; R18[8] = R22[4] * R18[4] 1188;; Ordinary ABI Function 1189DEFUN __mulsidi3 1190 bst A3, 7 1191 sbrs B3, 7 ; Enhanced core has no skip bug 1192 XJMP __umulsidi3_helper 1193 1194 ;; B needs sign-extension 1195 push A3 1196 push A2 1197 XCALL __umulsidi3_helper 1198 ;; A0 survived in Z 1199 sub r22, r30 1200 sbc r23, r31 1201 pop r26 1202 pop r27 1203 sbc r24, r26 1204 sbc r25, r27 1205 ret 1206ENDF __mulsidi3 1207#endif /* L_mulsidi3 */ 1208 1209#undef A0 1210#undef A1 1211#undef A2 1212#undef A3 1213#undef B0 1214#undef B1 1215#undef B2 1216#undef B3 1217#undef C0 1218#undef C1 1219#undef C2 1220#undef C3 1221#undef C4 1222#undef C5 1223#undef C6 1224#undef C7 1225#endif /* HAVE_MUL */ 1226 1227/********************************************************** 1228 Widening Multiplication 64 = 32 x 32 without MUL 1229**********************************************************/ 1230#ifndef __AVR_TINY__ /* if not __AVR_TINY__ */ 1231#if defined (L_mulsidi3) && !defined (__AVR_HAVE_MUL__) 1232#define A0 18 1233#define A1 A0+1 1234#define A2 A0+2 1235#define A3 A0+3 1236#define A4 A0+4 1237#define A5 A0+5 1238#define A6 A0+6 1239#define A7 A0+7 1240 1241#define B0 10 1242#define B1 B0+1 1243#define B2 B0+2 1244#define B3 B0+3 1245#define B4 B0+4 1246#define B5 B0+5 1247#define B6 B0+6 1248#define B7 B0+7 1249 1250#define AA0 22 1251#define AA1 AA0+1 1252#define AA2 AA0+2 1253#define AA3 AA0+3 1254 1255#define BB0 18 1256#define BB1 BB0+1 1257#define BB2 BB0+2 1258#define BB3 BB0+3 1259 1260#define Mask r30 1261 1262;; Signed / Unsigned widening 64 = 32 * 32 Multiplication without MUL 1263;; 1264;; R18[8] = R22[4] * R18[4] 1265;; Ordinary ABI Function 1266DEFUN __mulsidi3 1267 set 1268 skip 1269 ;; FALLTHRU 1270ENDF __mulsidi3 1271 1272DEFUN __umulsidi3 1273 clt ; skipped 1274 ;; Save 10 Registers: R10..R17, R28, R29 1275 do_prologue_saves 10 1276 ldi Mask, 0xff 1277 bld Mask, 7 1278 ;; Move B into place... 1279 wmov B0, BB0 1280 wmov B2, BB2 1281 ;; ...and extend it 1282 and BB3, Mask 1283 lsl BB3 1284 sbc B4, B4 1285 mov B5, B4 1286 wmov B6, B4 1287 ;; Move A into place... 1288 wmov A0, AA0 1289 wmov A2, AA2 1290 ;; ...and extend it 1291 and AA3, Mask 1292 lsl AA3 1293 sbc A4, A4 1294 mov A5, A4 1295 wmov A6, A4 1296 XCALL __muldi3 1297 do_epilogue_restores 10 1298ENDF __umulsidi3 1299 1300#undef A0 1301#undef A1 1302#undef A2 1303#undef A3 1304#undef A4 1305#undef A5 1306#undef A6 1307#undef A7 1308#undef B0 1309#undef B1 1310#undef B2 1311#undef B3 1312#undef B4 1313#undef B5 1314#undef B6 1315#undef B7 1316#undef AA0 1317#undef AA1 1318#undef AA2 1319#undef AA3 1320#undef BB0 1321#undef BB1 1322#undef BB2 1323#undef BB3 1324#undef Mask 1325#endif /* L_mulsidi3 && !HAVE_MUL */ 1326#endif /* if not __AVR_TINY__ */ 1327;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1328 1329 1330.section .text.libgcc.div, "ax", @progbits 1331 1332/******************************************************* 1333 Division 8 / 8 => (result + remainder) 1334*******************************************************/ 1335#define r_rem r25 /* remainder */ 1336#define r_arg1 r24 /* dividend, quotient */ 1337#define r_arg2 r22 /* divisor */ 1338#define r_cnt r23 /* loop count */ 1339 1340#if defined (L_udivmodqi4) 1341DEFUN __udivmodqi4 1342 sub r_rem,r_rem ; clear remainder and carry 1343 ldi r_cnt,9 ; init loop counter 1344 rjmp __udivmodqi4_ep ; jump to entry point 1345__udivmodqi4_loop: 1346 rol r_rem ; shift dividend into remainder 1347 cp r_rem,r_arg2 ; compare remainder & divisor 1348 brcs __udivmodqi4_ep ; remainder <= divisor 1349 sub r_rem,r_arg2 ; restore remainder 1350__udivmodqi4_ep: 1351 rol r_arg1 ; shift dividend (with CARRY) 1352 dec r_cnt ; decrement loop counter 1353 brne __udivmodqi4_loop 1354 com r_arg1 ; complement result 1355 ; because C flag was complemented in loop 1356 ret 1357ENDF __udivmodqi4 1358#endif /* defined (L_udivmodqi4) */ 1359 1360#if defined (L_divmodqi4) 1361DEFUN __divmodqi4 1362 bst r_arg1,7 ; store sign of dividend 1363 mov __tmp_reg__,r_arg1 1364 eor __tmp_reg__,r_arg2; r0.7 is sign of result 1365 sbrc r_arg1,7 1366 neg r_arg1 ; dividend negative : negate 1367 sbrc r_arg2,7 1368 neg r_arg2 ; divisor negative : negate 1369 XCALL __udivmodqi4 ; do the unsigned div/mod 1370 brtc __divmodqi4_1 1371 neg r_rem ; correct remainder sign 1372__divmodqi4_1: 1373 sbrc __tmp_reg__,7 1374 neg r_arg1 ; correct result sign 1375__divmodqi4_exit: 1376 ret 1377ENDF __divmodqi4 1378#endif /* defined (L_divmodqi4) */ 1379 1380#undef r_rem 1381#undef r_arg1 1382#undef r_arg2 1383#undef r_cnt 1384 1385 1386/******************************************************* 1387 Division 16 / 16 => (result + remainder) 1388*******************************************************/ 1389#define r_remL r26 /* remainder Low */ 1390#define r_remH r27 /* remainder High */ 1391 1392/* return: remainder */ 1393#define r_arg1L r24 /* dividend Low */ 1394#define r_arg1H r25 /* dividend High */ 1395 1396/* return: quotient */ 1397#define r_arg2L r22 /* divisor Low */ 1398#define r_arg2H r23 /* divisor High */ 1399 1400#define r_cnt r21 /* loop count */ 1401 1402#if defined (L_udivmodhi4) 1403DEFUN __udivmodhi4 1404 sub r_remL,r_remL 1405 sub r_remH,r_remH ; clear remainder and carry 1406 ldi r_cnt,17 ; init loop counter 1407 rjmp __udivmodhi4_ep ; jump to entry point 1408__udivmodhi4_loop: 1409 rol r_remL ; shift dividend into remainder 1410 rol r_remH 1411 cp r_remL,r_arg2L ; compare remainder & divisor 1412 cpc r_remH,r_arg2H 1413 brcs __udivmodhi4_ep ; remainder < divisor 1414 sub r_remL,r_arg2L ; restore remainder 1415 sbc r_remH,r_arg2H 1416__udivmodhi4_ep: 1417 rol r_arg1L ; shift dividend (with CARRY) 1418 rol r_arg1H 1419 dec r_cnt ; decrement loop counter 1420 brne __udivmodhi4_loop 1421 com r_arg1L 1422 com r_arg1H 1423; div/mod results to return registers, as for the div() function 1424 mov_l r_arg2L, r_arg1L ; quotient 1425 mov_h r_arg2H, r_arg1H 1426 mov_l r_arg1L, r_remL ; remainder 1427 mov_h r_arg1H, r_remH 1428 ret 1429ENDF __udivmodhi4 1430#endif /* defined (L_udivmodhi4) */ 1431 1432#if defined (L_divmodhi4) 1433DEFUN __divmodhi4 1434 .global _div 1435_div: 1436 bst r_arg1H,7 ; store sign of dividend 1437 mov __tmp_reg__,r_arg2H 1438 brtc 0f 1439 com __tmp_reg__ ; r0.7 is sign of result 1440 rcall __divmodhi4_neg1 ; dividend negative: negate 14410: 1442 sbrc r_arg2H,7 1443 rcall __divmodhi4_neg2 ; divisor negative: negate 1444 XCALL __udivmodhi4 ; do the unsigned div/mod 1445 sbrc __tmp_reg__,7 1446 rcall __divmodhi4_neg2 ; correct remainder sign 1447 brtc __divmodhi4_exit 1448__divmodhi4_neg1: 1449 ;; correct dividend/remainder sign 1450 com r_arg1H 1451 neg r_arg1L 1452 sbci r_arg1H,0xff 1453 ret 1454__divmodhi4_neg2: 1455 ;; correct divisor/result sign 1456 com r_arg2H 1457 neg r_arg2L 1458 sbci r_arg2H,0xff 1459__divmodhi4_exit: 1460 ret 1461ENDF __divmodhi4 1462#endif /* defined (L_divmodhi4) */ 1463 1464#undef r_remH 1465#undef r_remL 1466 1467#undef r_arg1H 1468#undef r_arg1L 1469 1470#undef r_arg2H 1471#undef r_arg2L 1472 1473#undef r_cnt 1474 1475/******************************************************* 1476 Division 24 / 24 => (result + remainder) 1477*******************************************************/ 1478 1479;; A[0..2]: In: Dividend; Out: Quotient 1480#define A0 22 1481#define A1 A0+1 1482#define A2 A0+2 1483 1484;; B[0..2]: In: Divisor; Out: Remainder 1485#define B0 18 1486#define B1 B0+1 1487#define B2 B0+2 1488 1489;; C[0..2]: Expand remainder 1490#define C0 __zero_reg__ 1491#define C1 26 1492#define C2 25 1493 1494;; Loop counter 1495#define r_cnt 21 1496 1497#if defined (L_udivmodpsi4) 1498;; R24:R22 = R24:R24 udiv R20:R18 1499;; R20:R18 = R24:R22 umod R20:R18 1500;; Clobbers: R21, R25, R26 1501 1502DEFUN __udivmodpsi4 1503 ; init loop counter 1504 ldi r_cnt, 24+1 1505 ; Clear remainder and carry. C0 is already 0 1506 clr C1 1507 sub C2, C2 1508 ; jump to entry point 1509 rjmp __udivmodpsi4_start 1510__udivmodpsi4_loop: 1511 ; shift dividend into remainder 1512 rol C0 1513 rol C1 1514 rol C2 1515 ; compare remainder & divisor 1516 cp C0, B0 1517 cpc C1, B1 1518 cpc C2, B2 1519 brcs __udivmodpsi4_start ; remainder <= divisor 1520 sub C0, B0 ; restore remainder 1521 sbc C1, B1 1522 sbc C2, B2 1523__udivmodpsi4_start: 1524 ; shift dividend (with CARRY) 1525 rol A0 1526 rol A1 1527 rol A2 1528 ; decrement loop counter 1529 dec r_cnt 1530 brne __udivmodpsi4_loop 1531 com A0 1532 com A1 1533 com A2 1534 ; div/mod results to return registers 1535 ; remainder 1536 mov B0, C0 1537 mov B1, C1 1538 mov B2, C2 1539 clr __zero_reg__ ; C0 1540 ret 1541ENDF __udivmodpsi4 1542#endif /* defined (L_udivmodpsi4) */ 1543 1544#if defined (L_divmodpsi4) 1545;; R24:R22 = R24:R22 div R20:R18 1546;; R20:R18 = R24:R22 mod R20:R18 1547;; Clobbers: T, __tmp_reg__, R21, R25, R26 1548 1549DEFUN __divmodpsi4 1550 ; R0.7 will contain the sign of the result: 1551 ; R0.7 = A.sign ^ B.sign 1552 mov __tmp_reg__, B2 1553 ; T-flag = sign of dividend 1554 bst A2, 7 1555 brtc 0f 1556 com __tmp_reg__ 1557 ; Adjust dividend's sign 1558 rcall __divmodpsi4_negA 15590: 1560 ; Adjust divisor's sign 1561 sbrc B2, 7 1562 rcall __divmodpsi4_negB 1563 1564 ; Do the unsigned div/mod 1565 XCALL __udivmodpsi4 1566 1567 ; Adjust quotient's sign 1568 sbrc __tmp_reg__, 7 1569 rcall __divmodpsi4_negA 1570 1571 ; Adjust remainder's sign 1572 brtc __divmodpsi4_end 1573 1574__divmodpsi4_negB: 1575 ; Correct divisor/remainder sign 1576 com B2 1577 com B1 1578 neg B0 1579 sbci B1, -1 1580 sbci B2, -1 1581 ret 1582 1583 ; Correct dividend/quotient sign 1584__divmodpsi4_negA: 1585 com A2 1586 com A1 1587 neg A0 1588 sbci A1, -1 1589 sbci A2, -1 1590__divmodpsi4_end: 1591 ret 1592 1593ENDF __divmodpsi4 1594#endif /* defined (L_divmodpsi4) */ 1595 1596#undef A0 1597#undef A1 1598#undef A2 1599 1600#undef B0 1601#undef B1 1602#undef B2 1603 1604#undef C0 1605#undef C1 1606#undef C2 1607 1608#undef r_cnt 1609 1610/******************************************************* 1611 Division 32 / 32 => (result + remainder) 1612*******************************************************/ 1613#define r_remHH r31 /* remainder High */ 1614#define r_remHL r30 1615#define r_remH r27 1616#define r_remL r26 /* remainder Low */ 1617 1618/* return: remainder */ 1619#define r_arg1HH r25 /* dividend High */ 1620#define r_arg1HL r24 1621#define r_arg1H r23 1622#define r_arg1L r22 /* dividend Low */ 1623 1624/* return: quotient */ 1625#define r_arg2HH r21 /* divisor High */ 1626#define r_arg2HL r20 1627#define r_arg2H r19 1628#define r_arg2L r18 /* divisor Low */ 1629 1630#define r_cnt __zero_reg__ /* loop count (0 after the loop!) */ 1631 1632#if defined (L_udivmodsi4) 1633DEFUN __udivmodsi4 1634 ldi r_remL, 33 ; init loop counter 1635 mov r_cnt, r_remL 1636 sub r_remL,r_remL 1637 sub r_remH,r_remH ; clear remainder and carry 1638 mov_l r_remHL, r_remL 1639 mov_h r_remHH, r_remH 1640 rjmp __udivmodsi4_ep ; jump to entry point 1641__udivmodsi4_loop: 1642 rol r_remL ; shift dividend into remainder 1643 rol r_remH 1644 rol r_remHL 1645 rol r_remHH 1646 cp r_remL,r_arg2L ; compare remainder & divisor 1647 cpc r_remH,r_arg2H 1648 cpc r_remHL,r_arg2HL 1649 cpc r_remHH,r_arg2HH 1650 brcs __udivmodsi4_ep ; remainder <= divisor 1651 sub r_remL,r_arg2L ; restore remainder 1652 sbc r_remH,r_arg2H 1653 sbc r_remHL,r_arg2HL 1654 sbc r_remHH,r_arg2HH 1655__udivmodsi4_ep: 1656 rol r_arg1L ; shift dividend (with CARRY) 1657 rol r_arg1H 1658 rol r_arg1HL 1659 rol r_arg1HH 1660 dec r_cnt ; decrement loop counter 1661 brne __udivmodsi4_loop 1662 ; __zero_reg__ now restored (r_cnt == 0) 1663 com r_arg1L 1664 com r_arg1H 1665 com r_arg1HL 1666 com r_arg1HH 1667; div/mod results to return registers, as for the ldiv() function 1668 mov_l r_arg2L, r_arg1L ; quotient 1669 mov_h r_arg2H, r_arg1H 1670 mov_l r_arg2HL, r_arg1HL 1671 mov_h r_arg2HH, r_arg1HH 1672 mov_l r_arg1L, r_remL ; remainder 1673 mov_h r_arg1H, r_remH 1674 mov_l r_arg1HL, r_remHL 1675 mov_h r_arg1HH, r_remHH 1676 ret 1677ENDF __udivmodsi4 1678#endif /* defined (L_udivmodsi4) */ 1679 1680#if defined (L_divmodsi4) 1681DEFUN __divmodsi4 1682 mov __tmp_reg__,r_arg2HH 1683 bst r_arg1HH,7 ; store sign of dividend 1684 brtc 0f 1685 com __tmp_reg__ ; r0.7 is sign of result 1686 XCALL __negsi2 ; dividend negative: negate 16870: 1688 sbrc r_arg2HH,7 1689 rcall __divmodsi4_neg2 ; divisor negative: negate 1690 XCALL __udivmodsi4 ; do the unsigned div/mod 1691 sbrc __tmp_reg__, 7 ; correct quotient sign 1692 rcall __divmodsi4_neg2 1693 brtc __divmodsi4_exit ; correct remainder sign 1694 XJMP __negsi2 1695__divmodsi4_neg2: 1696 ;; correct divisor/quotient sign 1697 com r_arg2HH 1698 com r_arg2HL 1699 com r_arg2H 1700 neg r_arg2L 1701 sbci r_arg2H,0xff 1702 sbci r_arg2HL,0xff 1703 sbci r_arg2HH,0xff 1704__divmodsi4_exit: 1705 ret 1706ENDF __divmodsi4 1707#endif /* defined (L_divmodsi4) */ 1708 1709#if defined (L_negsi2) 1710;; (set (reg:SI 22) 1711;; (neg:SI (reg:SI 22))) 1712;; Sets the V flag for signed overflow tests 1713DEFUN __negsi2 1714 NEG4 22 1715 ret 1716ENDF __negsi2 1717#endif /* L_negsi2 */ 1718 1719#undef r_remHH 1720#undef r_remHL 1721#undef r_remH 1722#undef r_remL 1723#undef r_arg1HH 1724#undef r_arg1HL 1725#undef r_arg1H 1726#undef r_arg1L 1727#undef r_arg2HH 1728#undef r_arg2HL 1729#undef r_arg2H 1730#undef r_arg2L 1731#undef r_cnt 1732 1733/* *di routines use registers below R19 and won't work with tiny arch 1734 right now. */ 1735 1736#if !defined (__AVR_TINY__) 1737/******************************************************* 1738 Division 64 / 64 1739 Modulo 64 % 64 1740*******************************************************/ 1741 1742;; Use Speed-optimized Version on "big" Devices, i.e. Devices with 1743;; at least 16k of Program Memory. For smaller Devices, depend 1744;; on MOVW and SP Size. There is a Connexion between SP Size and 1745;; Flash Size so that SP Size can be used to test for Flash Size. 1746 1747#if defined (__AVR_HAVE_JMP_CALL__) 1748# define SPEED_DIV 8 1749#elif defined (__AVR_HAVE_MOVW__) && defined (__AVR_HAVE_SPH__) 1750# define SPEED_DIV 16 1751#else 1752# define SPEED_DIV 0 1753#endif 1754 1755;; A[0..7]: In: Dividend; 1756;; Out: Quotient (T = 0) 1757;; Out: Remainder (T = 1) 1758#define A0 18 1759#define A1 A0+1 1760#define A2 A0+2 1761#define A3 A0+3 1762#define A4 A0+4 1763#define A5 A0+5 1764#define A6 A0+6 1765#define A7 A0+7 1766 1767;; B[0..7]: In: Divisor; Out: Clobber 1768#define B0 10 1769#define B1 B0+1 1770#define B2 B0+2 1771#define B3 B0+3 1772#define B4 B0+4 1773#define B5 B0+5 1774#define B6 B0+6 1775#define B7 B0+7 1776 1777;; C[0..7]: Expand remainder; Out: Remainder (unused) 1778#define C0 8 1779#define C1 C0+1 1780#define C2 30 1781#define C3 C2+1 1782#define C4 28 1783#define C5 C4+1 1784#define C6 26 1785#define C7 C6+1 1786 1787;; Holds Signs during Division Routine 1788#define SS __tmp_reg__ 1789 1790;; Bit-Counter in Division Routine 1791#define R_cnt __zero_reg__ 1792 1793;; Scratch Register for Negation 1794#define NN r31 1795 1796#if defined (L_udivdi3) 1797 1798;; R25:R18 = R24:R18 umod R17:R10 1799;; Ordinary ABI-Function 1800 1801DEFUN __umoddi3 1802 set 1803 rjmp __udivdi3_umoddi3 1804ENDF __umoddi3 1805 1806;; R25:R18 = R24:R18 udiv R17:R10 1807;; Ordinary ABI-Function 1808 1809DEFUN __udivdi3 1810 clt 1811ENDF __udivdi3 1812 1813DEFUN __udivdi3_umoddi3 1814 push C0 1815 push C1 1816 push C4 1817 push C5 1818 XCALL __udivmod64 1819 pop C5 1820 pop C4 1821 pop C1 1822 pop C0 1823 ret 1824ENDF __udivdi3_umoddi3 1825#endif /* L_udivdi3 */ 1826 1827#if defined (L_udivmod64) 1828 1829;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation 1830;; No Registers saved/restored; the Callers will take Care. 1831;; Preserves B[] and T-flag 1832;; T = 0: Compute Quotient in A[] 1833;; T = 1: Compute Remainder in A[] and shift SS one Bit left 1834 1835DEFUN __udivmod64 1836 1837 ;; Clear Remainder (C6, C7 will follow) 1838 clr C0 1839 clr C1 1840 wmov C2, C0 1841 wmov C4, C0 1842 ldi C7, 64 1843 1844#if SPEED_DIV == 0 || SPEED_DIV == 16 1845 ;; Initialize Loop-Counter 1846 mov R_cnt, C7 1847 wmov C6, C0 1848#endif /* SPEED_DIV */ 1849 1850#if SPEED_DIV == 8 1851 1852 push A7 1853 clr C6 1854 18551: ;; Compare shifted Devidend against Divisor 1856 ;; If -- even after Shifting -- it is smaller... 1857 CP A7,B0 $ cpc C0,B1 $ cpc C1,B2 $ cpc C2,B3 1858 cpc C3,B4 $ cpc C4,B5 $ cpc C5,B6 $ cpc C6,B7 1859 brcc 2f 1860 1861 ;; ...then we can subtract it. Thus, it is legal to shift left 1862 $ mov C6,C5 $ mov C5,C4 $ mov C4,C3 1863 mov C3,C2 $ mov C2,C1 $ mov C1,C0 $ mov C0,A7 1864 mov A7,A6 $ mov A6,A5 $ mov A5,A4 $ mov A4,A3 1865 mov A3,A2 $ mov A2,A1 $ mov A1,A0 $ clr A0 1866 1867 ;; 8 Bits are done 1868 subi C7, 8 1869 brne 1b 1870 1871 ;; Shifted 64 Bits: A7 has traveled to C7 1872 pop C7 1873 ;; Divisor is greater than Dividend. We have: 1874 ;; A[] % B[] = A[] 1875 ;; A[] / B[] = 0 1876 ;; Thus, we can return immediately 1877 rjmp 5f 1878 18792: ;; Initialze Bit-Counter with Number of Bits still to be performed 1880 mov R_cnt, C7 1881 1882 ;; Push of A7 is not needed because C7 is still 0 1883 pop C7 1884 clr C7 1885 1886#elif SPEED_DIV == 16 1887 1888 ;; Compare shifted Dividend against Divisor 1889 cp A7, B3 1890 cpc C0, B4 1891 cpc C1, B5 1892 cpc C2, B6 1893 cpc C3, B7 1894 brcc 2f 1895 1896 ;; Divisor is greater than shifted Dividen: We can shift the Dividend 1897 ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk 1898 wmov C2,A6 $ wmov C0,A4 1899 wmov A6,A2 $ wmov A4,A0 1900 wmov A2,C6 $ wmov A0,C4 1901 1902 ;; Set Bit Counter to 32 1903 lsr R_cnt 19042: 1905#elif SPEED_DIV 1906#error SPEED_DIV = ? 1907#endif /* SPEED_DIV */ 1908 1909;; The very Division + Remainder Routine 1910 19113: ;; Left-shift Dividend... 1912 lsl A0 $ rol A1 $ rol A2 $ rol A3 1913 rol A4 $ rol A5 $ rol A6 $ rol A7 1914 1915 ;; ...into Remainder 1916 rol C0 $ rol C1 $ rol C2 $ rol C3 1917 rol C4 $ rol C5 $ rol C6 $ rol C7 1918 1919 ;; Compare Remainder and Divisor 1920 CP C0,B0 $ cpc C1,B1 $ cpc C2,B2 $ cpc C3,B3 1921 cpc C4,B4 $ cpc C5,B5 $ cpc C6,B6 $ cpc C7,B7 1922 1923 brcs 4f 1924 1925 ;; Divisor fits into Remainder: Subtract it from Remainder... 1926 SUB C0,B0 $ sbc C1,B1 $ sbc C2,B2 $ sbc C3,B3 1927 sbc C4,B4 $ sbc C5,B5 $ sbc C6,B6 $ sbc C7,B7 1928 1929 ;; ...and set according Bit in the upcoming Quotient 1930 ;; The Bit will travel to its final Position 1931 ori A0, 1 1932 19334: ;; This Bit is done 1934 dec R_cnt 1935 brne 3b 1936 ;; __zero_reg__ is 0 again 1937 1938 ;; T = 0: We are fine with the Quotient in A[] 1939 ;; T = 1: Copy Remainder to A[] 19405: brtc 6f 1941 wmov A0, C0 1942 wmov A2, C2 1943 wmov A4, C4 1944 wmov A6, C6 1945 ;; Move the Sign of the Result to SS.7 1946 lsl SS 1947 19486: ret 1949 1950ENDF __udivmod64 1951#endif /* L_udivmod64 */ 1952 1953 1954#if defined (L_divdi3) 1955 1956;; R25:R18 = R24:R18 mod R17:R10 1957;; Ordinary ABI-Function 1958 1959DEFUN __moddi3 1960 set 1961 rjmp __divdi3_moddi3 1962ENDF __moddi3 1963 1964;; R25:R18 = R24:R18 div R17:R10 1965;; Ordinary ABI-Function 1966 1967DEFUN __divdi3 1968 clt 1969ENDF __divdi3 1970 1971DEFUN __divdi3_moddi3 1972#if SPEED_DIV 1973 mov r31, A7 1974 or r31, B7 1975 brmi 0f 1976 ;; Both Signs are 0: the following Complexitiy is not needed 1977 XJMP __udivdi3_umoddi3 1978#endif /* SPEED_DIV */ 1979 19800: ;; The Prologue 1981 ;; Save 12 Registers: Y, 17...8 1982 ;; No Frame needed 1983 do_prologue_saves 12 1984 1985 ;; SS.7 will contain the Sign of the Quotient (A.sign * B.sign) 1986 ;; SS.6 will contain the Sign of the Remainder (A.sign) 1987 mov SS, A7 1988 asr SS 1989 ;; Adjust Dividend's Sign as needed 1990#if SPEED_DIV 1991 ;; Compiling for Speed we know that at least one Sign must be < 0 1992 ;; Thus, if A[] >= 0 then we know B[] < 0 1993 brpl 22f 1994#else 1995 brpl 21f 1996#endif /* SPEED_DIV */ 1997 1998 XCALL __negdi2 1999 2000 ;; Adjust Divisor's Sign and SS.7 as needed 200121: tst B7 2002 brpl 3f 200322: ldi NN, 1 << 7 2004 eor SS, NN 2005 2006 ldi NN, -1 2007 com B4 $ com B5 $ com B6 $ com B7 2008 $ com B1 $ com B2 $ com B3 2009 NEG B0 2010 $ sbc B1,NN $ sbc B2,NN $ sbc B3,NN 2011 sbc B4,NN $ sbc B5,NN $ sbc B6,NN $ sbc B7,NN 2012 20133: ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag) 2014 XCALL __udivmod64 2015 2016 ;; Adjust Result's Sign 2017#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ 2018 tst SS 2019 brpl 4f 2020#else 2021 sbrc SS, 7 2022#endif /* __AVR_HAVE_JMP_CALL__ */ 2023 XCALL __negdi2 2024 20254: ;; Epilogue: Restore 12 Registers and return 2026 do_epilogue_restores 12 2027 2028ENDF __divdi3_moddi3 2029 2030#endif /* L_divdi3 */ 2031 2032#undef R_cnt 2033#undef SS 2034#undef NN 2035 2036.section .text.libgcc, "ax", @progbits 2037 2038#define TT __tmp_reg__ 2039 2040#if defined (L_adddi3) 2041;; (set (reg:DI 18) 2042;; (plus:DI (reg:DI 18) 2043;; (reg:DI 10))) 2044;; Sets the V flag for signed overflow tests 2045;; Sets the C flag for unsigned overflow tests 2046DEFUN __adddi3 2047 ADD A0,B0 $ adc A1,B1 $ adc A2,B2 $ adc A3,B3 2048 adc A4,B4 $ adc A5,B5 $ adc A6,B6 $ adc A7,B7 2049 ret 2050ENDF __adddi3 2051#endif /* L_adddi3 */ 2052 2053#if defined (L_adddi3_s8) 2054;; (set (reg:DI 18) 2055;; (plus:DI (reg:DI 18) 2056;; (sign_extend:SI (reg:QI 26)))) 2057;; Sets the V flag for signed overflow tests 2058;; Sets the C flag for unsigned overflow tests provided 0 <= R26 < 128 2059DEFUN __adddi3_s8 2060 clr TT 2061 sbrc r26, 7 2062 com TT 2063 ADD A0,r26 $ adc A1,TT $ adc A2,TT $ adc A3,TT 2064 adc A4,TT $ adc A5,TT $ adc A6,TT $ adc A7,TT 2065 ret 2066ENDF __adddi3_s8 2067#endif /* L_adddi3_s8 */ 2068 2069#if defined (L_subdi3) 2070;; (set (reg:DI 18) 2071;; (minus:DI (reg:DI 18) 2072;; (reg:DI 10))) 2073;; Sets the V flag for signed overflow tests 2074;; Sets the C flag for unsigned overflow tests 2075DEFUN __subdi3 2076 SUB A0,B0 $ sbc A1,B1 $ sbc A2,B2 $ sbc A3,B3 2077 sbc A4,B4 $ sbc A5,B5 $ sbc A6,B6 $ sbc A7,B7 2078 ret 2079ENDF __subdi3 2080#endif /* L_subdi3 */ 2081 2082#if defined (L_cmpdi2) 2083;; (set (cc0) 2084;; (compare (reg:DI 18) 2085;; (reg:DI 10))) 2086DEFUN __cmpdi2 2087 CP A0,B0 $ cpc A1,B1 $ cpc A2,B2 $ cpc A3,B3 2088 cpc A4,B4 $ cpc A5,B5 $ cpc A6,B6 $ cpc A7,B7 2089 ret 2090ENDF __cmpdi2 2091#endif /* L_cmpdi2 */ 2092 2093#if defined (L_cmpdi2_s8) 2094;; (set (cc0) 2095;; (compare (reg:DI 18) 2096;; (sign_extend:SI (reg:QI 26)))) 2097DEFUN __cmpdi2_s8 2098 clr TT 2099 sbrc r26, 7 2100 com TT 2101 CP A0,r26 $ cpc A1,TT $ cpc A2,TT $ cpc A3,TT 2102 cpc A4,TT $ cpc A5,TT $ cpc A6,TT $ cpc A7,TT 2103 ret 2104ENDF __cmpdi2_s8 2105#endif /* L_cmpdi2_s8 */ 2106 2107#if defined (L_negdi2) 2108;; (set (reg:DI 18) 2109;; (neg:DI (reg:DI 18))) 2110;; Sets the V flag for signed overflow tests 2111DEFUN __negdi2 2112 2113 com A4 $ com A5 $ com A6 $ com A7 2114 $ com A1 $ com A2 $ com A3 2115 NEG A0 2116 $ sbci A1,-1 $ sbci A2,-1 $ sbci A3,-1 2117 sbci A4,-1 $ sbci A5,-1 $ sbci A6,-1 $ sbci A7,-1 2118 ret 2119 2120ENDF __negdi2 2121#endif /* L_negdi2 */ 2122 2123#undef TT 2124 2125#undef C7 2126#undef C6 2127#undef C5 2128#undef C4 2129#undef C3 2130#undef C2 2131#undef C1 2132#undef C0 2133 2134#undef B7 2135#undef B6 2136#undef B5 2137#undef B4 2138#undef B3 2139#undef B2 2140#undef B1 2141#undef B0 2142 2143#undef A7 2144#undef A6 2145#undef A5 2146#undef A4 2147#undef A3 2148#undef A2 2149#undef A1 2150#undef A0 2151 2152#endif /* !defined (__AVR_TINY__) */ 2153 2154 2155.section .text.libgcc.prologue, "ax", @progbits 2156 2157/********************************** 2158 * This is a prologue subroutine 2159 **********************************/ 2160#if !defined (__AVR_TINY__) 2161#if defined (L_prologue) 2162 2163;; This function does not clobber T-flag; 64-bit division relies on it 2164DEFUN __prologue_saves__ 2165 push r2 2166 push r3 2167 push r4 2168 push r5 2169 push r6 2170 push r7 2171 push r8 2172 push r9 2173 push r10 2174 push r11 2175 push r12 2176 push r13 2177 push r14 2178 push r15 2179 push r16 2180 push r17 2181 push r28 2182 push r29 2183#if !defined (__AVR_HAVE_SPH__) 2184 in r28,__SP_L__ 2185 sub r28,r26 2186 out __SP_L__,r28 2187 clr r29 2188#elif defined (__AVR_XMEGA__) 2189 in r28,__SP_L__ 2190 in r29,__SP_H__ 2191 sub r28,r26 2192 sbc r29,r27 2193 out __SP_L__,r28 2194 out __SP_H__,r29 2195#else 2196 in r28,__SP_L__ 2197 in r29,__SP_H__ 2198 sub r28,r26 2199 sbc r29,r27 2200 in __tmp_reg__,__SREG__ 2201 cli 2202 out __SP_H__,r29 2203 out __SREG__,__tmp_reg__ 2204 out __SP_L__,r28 2205#endif /* #SP = 8/16 */ 2206 2207 XIJMP 2208 2209ENDF __prologue_saves__ 2210#endif /* defined (L_prologue) */ 2211 2212/* 2213 * This is an epilogue subroutine 2214 */ 2215#if defined (L_epilogue) 2216 2217DEFUN __epilogue_restores__ 2218 ldd r2,Y+18 2219 ldd r3,Y+17 2220 ldd r4,Y+16 2221 ldd r5,Y+15 2222 ldd r6,Y+14 2223 ldd r7,Y+13 2224 ldd r8,Y+12 2225 ldd r9,Y+11 2226 ldd r10,Y+10 2227 ldd r11,Y+9 2228 ldd r12,Y+8 2229 ldd r13,Y+7 2230 ldd r14,Y+6 2231 ldd r15,Y+5 2232 ldd r16,Y+4 2233 ldd r17,Y+3 2234 ldd r26,Y+2 2235#if !defined (__AVR_HAVE_SPH__) 2236 ldd r29,Y+1 2237 add r28,r30 2238 out __SP_L__,r28 2239 mov r28, r26 2240#elif defined (__AVR_XMEGA__) 2241 ldd r27,Y+1 2242 add r28,r30 2243 adc r29,__zero_reg__ 2244 out __SP_L__,r28 2245 out __SP_H__,r29 2246 wmov 28, 26 2247#else 2248 ldd r27,Y+1 2249 add r28,r30 2250 adc r29,__zero_reg__ 2251 in __tmp_reg__,__SREG__ 2252 cli 2253 out __SP_H__,r29 2254 out __SREG__,__tmp_reg__ 2255 out __SP_L__,r28 2256 mov_l r28, r26 2257 mov_h r29, r27 2258#endif /* #SP = 8/16 */ 2259 ret 2260ENDF __epilogue_restores__ 2261#endif /* defined (L_epilogue) */ 2262#endif /* !defined (__AVR_TINY__) */ 2263 2264#ifdef L_exit 2265 .section .fini9,"ax",@progbits 2266DEFUN _exit 2267 .weak exit 2268exit: 2269ENDF _exit 2270 2271 /* Code from .fini8 ... .fini1 sections inserted by ld script. */ 2272 2273 .section .fini0,"ax",@progbits 2274 cli 2275__stop_program: 2276 rjmp __stop_program 2277#endif /* defined (L_exit) */ 2278 2279#ifdef L_cleanup 2280 .weak _cleanup 2281 .func _cleanup 2282_cleanup: 2283 ret 2284.endfunc 2285#endif /* defined (L_cleanup) */ 2286 2287 2288.section .text.libgcc, "ax", @progbits 2289 2290#ifdef L_tablejump2 2291DEFUN __tablejump2__ 2292 lsl r30 2293 rol r31 2294#if defined (__AVR_HAVE_EIJMP_EICALL__) 2295 ;; Word address of gs() jumptable entry in R24:Z 2296 rol r24 2297 out __RAMPZ__, r24 2298#elif defined (__AVR_HAVE_ELPM__) 2299 ;; Word address of jumptable entry in Z 2300 clr __tmp_reg__ 2301 rol __tmp_reg__ 2302 out __RAMPZ__, __tmp_reg__ 2303#endif 2304 2305 ;; Read word address from jumptable and jump 2306 2307#if defined (__AVR_HAVE_ELPMX__) 2308 elpm __tmp_reg__, Z+ 2309 elpm r31, Z 2310 mov r30, __tmp_reg__ 2311#ifdef __AVR_HAVE_RAMPD__ 2312 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM 2313 out __RAMPZ__, __zero_reg__ 2314#endif /* RAMPD */ 2315 XIJMP 2316#elif defined (__AVR_HAVE_ELPM__) 2317 elpm 2318 push r0 2319 adiw r30, 1 2320 elpm 2321 push r0 2322 ret 2323#elif defined (__AVR_HAVE_LPMX__) 2324 lpm __tmp_reg__, Z+ 2325 lpm r31, Z 2326 mov r30, __tmp_reg__ 2327 ijmp 2328#elif defined (__AVR_TINY__) 2329 wsubi 30, -(__AVR_TINY_PM_BASE_ADDRESS__) ; Add PM offset to Z 2330 ld __tmp_reg__, Z+ 2331 ld r31, Z ; Use ld instead of lpm to load Z 2332 mov r30, __tmp_reg__ 2333 ijmp 2334#else 2335 lpm 2336 push r0 2337 adiw r30, 1 2338 lpm 2339 push r0 2340 ret 2341#endif 2342ENDF __tablejump2__ 2343#endif /* L_tablejump2 */ 2344 2345#if defined(__AVR_TINY__) 2346#ifdef L_copy_data 2347 .section .init4,"ax",@progbits 2348 .global __do_copy_data 2349__do_copy_data: 2350 ldi r18, hi8(__data_end) 2351 ldi r26, lo8(__data_start) 2352 ldi r27, hi8(__data_start) 2353 ldi r30, lo8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__) 2354 ldi r31, hi8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__) 2355 rjmp .L__do_copy_data_start 2356.L__do_copy_data_loop: 2357 ld r19, z+ 2358 st X+, r19 2359.L__do_copy_data_start: 2360 cpi r26, lo8(__data_end) 2361 cpc r27, r18 2362 brne .L__do_copy_data_loop 2363#endif 2364#else 2365#ifdef L_copy_data 2366 .section .init4,"ax",@progbits 2367DEFUN __do_copy_data 2368#if defined(__AVR_HAVE_ELPMX__) 2369 ldi r17, hi8(__data_end) 2370 ldi r26, lo8(__data_start) 2371 ldi r27, hi8(__data_start) 2372 ldi r30, lo8(__data_load_start) 2373 ldi r31, hi8(__data_load_start) 2374 ldi r16, hh8(__data_load_start) 2375 out __RAMPZ__, r16 2376 rjmp .L__do_copy_data_start 2377.L__do_copy_data_loop: 2378 elpm r0, Z+ 2379 st X+, r0 2380.L__do_copy_data_start: 2381 cpi r26, lo8(__data_end) 2382 cpc r27, r17 2383 brne .L__do_copy_data_loop 2384#elif !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__) 2385 ldi r17, hi8(__data_end) 2386 ldi r26, lo8(__data_start) 2387 ldi r27, hi8(__data_start) 2388 ldi r30, lo8(__data_load_start) 2389 ldi r31, hi8(__data_load_start) 2390 ldi r16, hh8(__data_load_start - 0x10000) 2391.L__do_copy_data_carry: 2392 inc r16 2393 out __RAMPZ__, r16 2394 rjmp .L__do_copy_data_start 2395.L__do_copy_data_loop: 2396 elpm 2397 st X+, r0 2398 adiw r30, 1 2399 brcs .L__do_copy_data_carry 2400.L__do_copy_data_start: 2401 cpi r26, lo8(__data_end) 2402 cpc r27, r17 2403 brne .L__do_copy_data_loop 2404#elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) 2405 ldi r17, hi8(__data_end) 2406 ldi r26, lo8(__data_start) 2407 ldi r27, hi8(__data_start) 2408 ldi r30, lo8(__data_load_start) 2409 ldi r31, hi8(__data_load_start) 2410 rjmp .L__do_copy_data_start 2411.L__do_copy_data_loop: 2412#if defined (__AVR_HAVE_LPMX__) 2413 lpm r0, Z+ 2414#else 2415 lpm 2416 adiw r30, 1 2417#endif 2418 st X+, r0 2419.L__do_copy_data_start: 2420 cpi r26, lo8(__data_end) 2421 cpc r27, r17 2422 brne .L__do_copy_data_loop 2423#endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */ 2424#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__) 2425 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM 2426 out __RAMPZ__, __zero_reg__ 2427#endif /* ELPM && RAMPD */ 2428ENDF __do_copy_data 2429#endif /* L_copy_data */ 2430#endif /* !defined (__AVR_TINY__) */ 2431 2432/* __do_clear_bss is only necessary if there is anything in .bss section. */ 2433 2434#ifdef L_clear_bss 2435 .section .init4,"ax",@progbits 2436DEFUN __do_clear_bss 2437 ldi r18, hi8(__bss_end) 2438 ldi r26, lo8(__bss_start) 2439 ldi r27, hi8(__bss_start) 2440 rjmp .do_clear_bss_start 2441.do_clear_bss_loop: 2442 st X+, __zero_reg__ 2443.do_clear_bss_start: 2444 cpi r26, lo8(__bss_end) 2445 cpc r27, r18 2446 brne .do_clear_bss_loop 2447ENDF __do_clear_bss 2448#endif /* L_clear_bss */ 2449 2450/* __do_global_ctors and __do_global_dtors are only necessary 2451 if there are any constructors/destructors. */ 2452 2453#if defined(__AVR_TINY__) 2454#define cdtors_tst_reg r18 2455#else 2456#define cdtors_tst_reg r17 2457#endif 2458 2459#ifdef L_ctors 2460 .section .init6,"ax",@progbits 2461DEFUN __do_global_ctors 2462 ldi cdtors_tst_reg, pm_hi8(__ctors_start) 2463 ldi r28, pm_lo8(__ctors_end) 2464 ldi r29, pm_hi8(__ctors_end) 2465#ifdef __AVR_HAVE_EIJMP_EICALL__ 2466 ldi r16, pm_hh8(__ctors_end) 2467#endif /* HAVE_EIJMP */ 2468 rjmp .L__do_global_ctors_start 2469.L__do_global_ctors_loop: 2470 wsubi 28, 1 2471#ifdef __AVR_HAVE_EIJMP_EICALL__ 2472 sbc r16, __zero_reg__ 2473 mov r24, r16 2474#endif /* HAVE_EIJMP */ 2475 mov_h r31, r29 2476 mov_l r30, r28 2477 XCALL __tablejump2__ 2478.L__do_global_ctors_start: 2479 cpi r28, pm_lo8(__ctors_start) 2480 cpc r29, cdtors_tst_reg 2481#ifdef __AVR_HAVE_EIJMP_EICALL__ 2482 ldi r24, pm_hh8(__ctors_start) 2483 cpc r16, r24 2484#endif /* HAVE_EIJMP */ 2485 brne .L__do_global_ctors_loop 2486ENDF __do_global_ctors 2487#endif /* L_ctors */ 2488 2489#ifdef L_dtors 2490 .section .fini6,"ax",@progbits 2491DEFUN __do_global_dtors 2492 ldi cdtors_tst_reg, pm_hi8(__dtors_end) 2493 ldi r28, pm_lo8(__dtors_start) 2494 ldi r29, pm_hi8(__dtors_start) 2495#ifdef __AVR_HAVE_EIJMP_EICALL__ 2496 ldi r16, pm_hh8(__dtors_start) 2497#endif /* HAVE_EIJMP */ 2498 rjmp .L__do_global_dtors_start 2499.L__do_global_dtors_loop: 2500#ifdef __AVR_HAVE_EIJMP_EICALL__ 2501 mov r24, r16 2502#endif /* HAVE_EIJMP */ 2503 mov_h r31, r29 2504 mov_l r30, r28 2505 XCALL __tablejump2__ 2506 waddi 28, 1 2507#ifdef __AVR_HAVE_EIJMP_EICALL__ 2508 adc r16, __zero_reg__ 2509#endif /* HAVE_EIJMP */ 2510.L__do_global_dtors_start: 2511 cpi r28, pm_lo8(__dtors_end) 2512 cpc r29, cdtors_tst_reg 2513#ifdef __AVR_HAVE_EIJMP_EICALL__ 2514 ldi r24, pm_hh8(__dtors_end) 2515 cpc r16, r24 2516#endif /* HAVE_EIJMP */ 2517 brne .L__do_global_dtors_loop 2518ENDF __do_global_dtors 2519#endif /* L_dtors */ 2520 2521#undef cdtors_tst_reg 2522 2523.section .text.libgcc, "ax", @progbits 2524 2525#if !defined (__AVR_TINY__) 2526;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2527;; Loading n bytes from Flash; n = 3,4 2528;; R22... = Flash[Z] 2529;; Clobbers: __tmp_reg__ 2530 2531#if (defined (L_load_3) \ 2532 || defined (L_load_4)) \ 2533 && !defined (__AVR_HAVE_LPMX__) 2534 2535;; Destination 2536#define D0 22 2537#define D1 D0+1 2538#define D2 D0+2 2539#define D3 D0+3 2540 2541.macro .load dest, n 2542 lpm 2543 mov \dest, r0 2544.if \dest != D0+\n-1 2545 adiw r30, 1 2546.else 2547 sbiw r30, \n-1 2548.endif 2549.endm 2550 2551#if defined (L_load_3) 2552DEFUN __load_3 2553 push D3 2554 XCALL __load_4 2555 pop D3 2556 ret 2557ENDF __load_3 2558#endif /* L_load_3 */ 2559 2560#if defined (L_load_4) 2561DEFUN __load_4 2562 .load D0, 4 2563 .load D1, 4 2564 .load D2, 4 2565 .load D3, 4 2566 ret 2567ENDF __load_4 2568#endif /* L_load_4 */ 2569 2570#endif /* L_load_3 || L_load_3 */ 2571#endif /* !defined (__AVR_TINY__) */ 2572 2573#if !defined (__AVR_TINY__) 2574;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2575;; Loading n bytes from Flash or RAM; n = 1,2,3,4 2576;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7 2577;; Clobbers: __tmp_reg__, R21, R30, R31 2578 2579#if (defined (L_xload_1) \ 2580 || defined (L_xload_2) \ 2581 || defined (L_xload_3) \ 2582 || defined (L_xload_4)) 2583 2584;; Destination 2585#define D0 22 2586#define D1 D0+1 2587#define D2 D0+2 2588#define D3 D0+3 2589 2590;; Register containing bits 16+ of the address 2591 2592#define HHI8 21 2593 2594.macro .xload dest, n 2595#if defined (__AVR_HAVE_ELPMX__) 2596 elpm \dest, Z+ 2597#elif defined (__AVR_HAVE_ELPM__) 2598 elpm 2599 mov \dest, r0 2600.if \dest != D0+\n-1 2601 adiw r30, 1 2602 adc HHI8, __zero_reg__ 2603 out __RAMPZ__, HHI8 2604.endif 2605#elif defined (__AVR_HAVE_LPMX__) 2606 lpm \dest, Z+ 2607#else 2608 lpm 2609 mov \dest, r0 2610.if \dest != D0+\n-1 2611 adiw r30, 1 2612.endif 2613#endif 2614#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__) 2615.if \dest == D0+\n-1 2616 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM 2617 out __RAMPZ__, __zero_reg__ 2618.endif 2619#endif 2620.endm ; .xload 2621 2622#if defined (L_xload_1) 2623DEFUN __xload_1 2624#if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_ELPM__) 2625 sbrc HHI8, 7 2626 ld D0, Z 2627 sbrs HHI8, 7 2628 lpm D0, Z 2629 ret 2630#else 2631 sbrc HHI8, 7 2632 rjmp 1f 2633#if defined (__AVR_HAVE_ELPM__) 2634 out __RAMPZ__, HHI8 2635#endif /* __AVR_HAVE_ELPM__ */ 2636 .xload D0, 1 2637 ret 26381: ld D0, Z 2639 ret 2640#endif /* LPMx && ! ELPM */ 2641ENDF __xload_1 2642#endif /* L_xload_1 */ 2643 2644#if defined (L_xload_2) 2645DEFUN __xload_2 2646 sbrc HHI8, 7 2647 rjmp 1f 2648#if defined (__AVR_HAVE_ELPM__) 2649 out __RAMPZ__, HHI8 2650#endif /* __AVR_HAVE_ELPM__ */ 2651 .xload D0, 2 2652 .xload D1, 2 2653 ret 26541: ld D0, Z+ 2655 ld D1, Z+ 2656 ret 2657ENDF __xload_2 2658#endif /* L_xload_2 */ 2659 2660#if defined (L_xload_3) 2661DEFUN __xload_3 2662 sbrc HHI8, 7 2663 rjmp 1f 2664#if defined (__AVR_HAVE_ELPM__) 2665 out __RAMPZ__, HHI8 2666#endif /* __AVR_HAVE_ELPM__ */ 2667 .xload D0, 3 2668 .xload D1, 3 2669 .xload D2, 3 2670 ret 26711: ld D0, Z+ 2672 ld D1, Z+ 2673 ld D2, Z+ 2674 ret 2675ENDF __xload_3 2676#endif /* L_xload_3 */ 2677 2678#if defined (L_xload_4) 2679DEFUN __xload_4 2680 sbrc HHI8, 7 2681 rjmp 1f 2682#if defined (__AVR_HAVE_ELPM__) 2683 out __RAMPZ__, HHI8 2684#endif /* __AVR_HAVE_ELPM__ */ 2685 .xload D0, 4 2686 .xload D1, 4 2687 .xload D2, 4 2688 .xload D3, 4 2689 ret 26901: ld D0, Z+ 2691 ld D1, Z+ 2692 ld D2, Z+ 2693 ld D3, Z+ 2694 ret 2695ENDF __xload_4 2696#endif /* L_xload_4 */ 2697 2698#endif /* L_xload_{1|2|3|4} */ 2699#endif /* if !defined (__AVR_TINY__) */ 2700 2701#if !defined (__AVR_TINY__) 2702;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2703;; memcopy from Address Space __pgmx to RAM 2704;; R23:Z = Source Address 2705;; X = Destination Address 2706;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z 2707 2708#if defined (L_movmemx) 2709 2710#define HHI8 23 2711#define LOOP 24 2712 2713DEFUN __movmemx_qi 2714 ;; #Bytes to copy fity in 8 Bits (1..255) 2715 ;; Zero-extend Loop Counter 2716 clr LOOP+1 2717 ;; FALLTHRU 2718ENDF __movmemx_qi 2719 2720DEFUN __movmemx_hi 2721 2722;; Read from where? 2723 sbrc HHI8, 7 2724 rjmp 1f 2725 2726;; Read from Flash 2727 2728#if defined (__AVR_HAVE_ELPM__) 2729 out __RAMPZ__, HHI8 2730#endif 2731 27320: ;; Load 1 Byte from Flash... 2733 2734#if defined (__AVR_HAVE_ELPMX__) 2735 elpm r0, Z+ 2736#elif defined (__AVR_HAVE_ELPM__) 2737 elpm 2738 adiw r30, 1 2739 adc HHI8, __zero_reg__ 2740 out __RAMPZ__, HHI8 2741#elif defined (__AVR_HAVE_LPMX__) 2742 lpm r0, Z+ 2743#else 2744 lpm 2745 adiw r30, 1 2746#endif 2747 2748 ;; ...and store that Byte to RAM Destination 2749 st X+, r0 2750 sbiw LOOP, 1 2751 brne 0b 2752#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__) 2753 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM 2754 out __RAMPZ__, __zero_reg__ 2755#endif /* ELPM && RAMPD */ 2756 ret 2757 2758;; Read from RAM 2759 27601: ;; Read 1 Byte from RAM... 2761 ld r0, Z+ 2762 ;; and store that Byte to RAM Destination 2763 st X+, r0 2764 sbiw LOOP, 1 2765 brne 1b 2766 ret 2767ENDF __movmemx_hi 2768 2769#undef HHI8 2770#undef LOOP 2771 2772#endif /* L_movmemx */ 2773#endif /* !defined (__AVR_TINY__) */ 2774 2775 2776.section .text.libgcc.builtins, "ax", @progbits 2777 2778/********************************** 2779 * Find first set Bit (ffs) 2780 **********************************/ 2781 2782#if defined (L_ffssi2) 2783;; find first set bit 2784;; r25:r24 = ffs32 (r25:r22) 2785;; clobbers: r22, r26 2786DEFUN __ffssi2 2787 clr r26 2788 tst r22 2789 brne 1f 2790 subi r26, -8 2791 or r22, r23 2792 brne 1f 2793 subi r26, -8 2794 or r22, r24 2795 brne 1f 2796 subi r26, -8 2797 or r22, r25 2798 brne 1f 2799 ret 28001: mov r24, r22 2801 XJMP __loop_ffsqi2 2802ENDF __ffssi2 2803#endif /* defined (L_ffssi2) */ 2804 2805#if defined (L_ffshi2) 2806;; find first set bit 2807;; r25:r24 = ffs16 (r25:r24) 2808;; clobbers: r26 2809DEFUN __ffshi2 2810 clr r26 2811#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ 2812 ;; Some cores have problem skipping 2-word instruction 2813 tst r24 2814 breq 2f 2815#else 2816 cpse r24, __zero_reg__ 2817#endif /* __AVR_HAVE_JMP_CALL__ */ 28181: XJMP __loop_ffsqi2 28192: ldi r26, 8 2820 or r24, r25 2821 brne 1b 2822 ret 2823ENDF __ffshi2 2824#endif /* defined (L_ffshi2) */ 2825 2826#if defined (L_loop_ffsqi2) 2827;; Helper for ffshi2, ffssi2 2828;; r25:r24 = r26 + zero_extend16 (ffs8(r24)) 2829;; r24 must be != 0 2830;; clobbers: r26 2831DEFUN __loop_ffsqi2 2832 inc r26 2833 lsr r24 2834 brcc __loop_ffsqi2 2835 mov r24, r26 2836 clr r25 2837 ret 2838ENDF __loop_ffsqi2 2839#endif /* defined (L_loop_ffsqi2) */ 2840 2841 2842/********************************** 2843 * Count trailing Zeros (ctz) 2844 **********************************/ 2845 2846#if defined (L_ctzsi2) 2847;; count trailing zeros 2848;; r25:r24 = ctz32 (r25:r22) 2849;; clobbers: r26, r22 2850;; ctz(0) = 255 2851;; Note that ctz(0) in undefined for GCC 2852DEFUN __ctzsi2 2853 XCALL __ffssi2 2854 dec r24 2855 ret 2856ENDF __ctzsi2 2857#endif /* defined (L_ctzsi2) */ 2858 2859#if defined (L_ctzhi2) 2860;; count trailing zeros 2861;; r25:r24 = ctz16 (r25:r24) 2862;; clobbers: r26 2863;; ctz(0) = 255 2864;; Note that ctz(0) in undefined for GCC 2865DEFUN __ctzhi2 2866 XCALL __ffshi2 2867 dec r24 2868 ret 2869ENDF __ctzhi2 2870#endif /* defined (L_ctzhi2) */ 2871 2872 2873/********************************** 2874 * Count leading Zeros (clz) 2875 **********************************/ 2876 2877#if defined (L_clzdi2) 2878;; count leading zeros 2879;; r25:r24 = clz64 (r25:r18) 2880;; clobbers: r22, r23, r26 2881DEFUN __clzdi2 2882 XCALL __clzsi2 2883 sbrs r24, 5 2884 ret 2885 mov_l r22, r18 2886 mov_h r23, r19 2887 mov_l r24, r20 2888 mov_h r25, r21 2889 XCALL __clzsi2 2890 subi r24, -32 2891 ret 2892ENDF __clzdi2 2893#endif /* defined (L_clzdi2) */ 2894 2895#if defined (L_clzsi2) 2896;; count leading zeros 2897;; r25:r24 = clz32 (r25:r22) 2898;; clobbers: r26 2899DEFUN __clzsi2 2900 XCALL __clzhi2 2901 sbrs r24, 4 2902 ret 2903 mov_l r24, r22 2904 mov_h r25, r23 2905 XCALL __clzhi2 2906 subi r24, -16 2907 ret 2908ENDF __clzsi2 2909#endif /* defined (L_clzsi2) */ 2910 2911#if defined (L_clzhi2) 2912;; count leading zeros 2913;; r25:r24 = clz16 (r25:r24) 2914;; clobbers: r26 2915DEFUN __clzhi2 2916 clr r26 2917 tst r25 2918 brne 1f 2919 subi r26, -8 2920 or r25, r24 2921 brne 1f 2922 ldi r24, 16 2923 ret 29241: cpi r25, 16 2925 brsh 3f 2926 subi r26, -3 2927 swap r25 29282: inc r26 29293: lsl r25 2930 brcc 2b 2931 mov r24, r26 2932 clr r25 2933 ret 2934ENDF __clzhi2 2935#endif /* defined (L_clzhi2) */ 2936 2937 2938/********************************** 2939 * Parity 2940 **********************************/ 2941 2942#if defined (L_paritydi2) 2943;; r25:r24 = parity64 (r25:r18) 2944;; clobbers: __tmp_reg__ 2945DEFUN __paritydi2 2946 eor r24, r18 2947 eor r24, r19 2948 eor r24, r20 2949 eor r24, r21 2950 XJMP __paritysi2 2951ENDF __paritydi2 2952#endif /* defined (L_paritydi2) */ 2953 2954#if defined (L_paritysi2) 2955;; r25:r24 = parity32 (r25:r22) 2956;; clobbers: __tmp_reg__ 2957DEFUN __paritysi2 2958 eor r24, r22 2959 eor r24, r23 2960 XJMP __parityhi2 2961ENDF __paritysi2 2962#endif /* defined (L_paritysi2) */ 2963 2964#if defined (L_parityhi2) 2965;; r25:r24 = parity16 (r25:r24) 2966;; clobbers: __tmp_reg__ 2967DEFUN __parityhi2 2968 eor r24, r25 2969;; FALLTHRU 2970ENDF __parityhi2 2971 2972;; r25:r24 = parity8 (r24) 2973;; clobbers: __tmp_reg__ 2974DEFUN __parityqi2 2975 ;; parity is in r24[0..7] 2976 mov __tmp_reg__, r24 2977 swap __tmp_reg__ 2978 eor r24, __tmp_reg__ 2979 ;; parity is in r24[0..3] 2980 subi r24, -4 2981 andi r24, -5 2982 subi r24, -6 2983 ;; parity is in r24[0,3] 2984 sbrc r24, 3 2985 inc r24 2986 ;; parity is in r24[0] 2987 andi r24, 1 2988 clr r25 2989 ret 2990ENDF __parityqi2 2991#endif /* defined (L_parityhi2) */ 2992 2993 2994/********************************** 2995 * Population Count 2996 **********************************/ 2997 2998#if defined (L_popcounthi2) 2999;; population count 3000;; r25:r24 = popcount16 (r25:r24) 3001;; clobbers: __tmp_reg__ 3002DEFUN __popcounthi2 3003 XCALL __popcountqi2 3004 push r24 3005 mov r24, r25 3006 XCALL __popcountqi2 3007 clr r25 3008 ;; FALLTHRU 3009ENDF __popcounthi2 3010 3011DEFUN __popcounthi2_tail 3012 pop __tmp_reg__ 3013 add r24, __tmp_reg__ 3014 ret 3015ENDF __popcounthi2_tail 3016#endif /* defined (L_popcounthi2) */ 3017 3018#if defined (L_popcountsi2) 3019;; population count 3020;; r25:r24 = popcount32 (r25:r22) 3021;; clobbers: __tmp_reg__ 3022DEFUN __popcountsi2 3023 XCALL __popcounthi2 3024 push r24 3025 mov_l r24, r22 3026 mov_h r25, r23 3027 XCALL __popcounthi2 3028 XJMP __popcounthi2_tail 3029ENDF __popcountsi2 3030#endif /* defined (L_popcountsi2) */ 3031 3032#if defined (L_popcountdi2) 3033;; population count 3034;; r25:r24 = popcount64 (r25:r18) 3035;; clobbers: r22, r23, __tmp_reg__ 3036DEFUN __popcountdi2 3037 XCALL __popcountsi2 3038 push r24 3039 mov_l r22, r18 3040 mov_h r23, r19 3041 mov_l r24, r20 3042 mov_h r25, r21 3043 XCALL __popcountsi2 3044 XJMP __popcounthi2_tail 3045ENDF __popcountdi2 3046#endif /* defined (L_popcountdi2) */ 3047 3048#if defined (L_popcountqi2) 3049;; population count 3050;; r24 = popcount8 (r24) 3051;; clobbers: __tmp_reg__ 3052DEFUN __popcountqi2 3053 mov __tmp_reg__, r24 3054 andi r24, 1 3055 lsr __tmp_reg__ 3056 lsr __tmp_reg__ 3057 adc r24, __zero_reg__ 3058 lsr __tmp_reg__ 3059 adc r24, __zero_reg__ 3060 lsr __tmp_reg__ 3061 adc r24, __zero_reg__ 3062 lsr __tmp_reg__ 3063 adc r24, __zero_reg__ 3064 lsr __tmp_reg__ 3065 adc r24, __zero_reg__ 3066 lsr __tmp_reg__ 3067 adc r24, __tmp_reg__ 3068 ret 3069ENDF __popcountqi2 3070#endif /* defined (L_popcountqi2) */ 3071 3072 3073/********************************** 3074 * Swap bytes 3075 **********************************/ 3076 3077;; swap two registers with different register number 3078.macro bswap a, b 3079 eor \a, \b 3080 eor \b, \a 3081 eor \a, \b 3082.endm 3083 3084#if defined (L_bswapsi2) 3085;; swap bytes 3086;; r25:r22 = bswap32 (r25:r22) 3087DEFUN __bswapsi2 3088 bswap r22, r25 3089 bswap r23, r24 3090 ret 3091ENDF __bswapsi2 3092#endif /* defined (L_bswapsi2) */ 3093 3094#if defined (L_bswapdi2) 3095;; swap bytes 3096;; r25:r18 = bswap64 (r25:r18) 3097DEFUN __bswapdi2 3098 bswap r18, r25 3099 bswap r19, r24 3100 bswap r20, r23 3101 bswap r21, r22 3102 ret 3103ENDF __bswapdi2 3104#endif /* defined (L_bswapdi2) */ 3105 3106 3107/********************************** 3108 * 64-bit shifts 3109 **********************************/ 3110 3111#if defined (L_ashrdi3) 3112 3113#define SS __zero_reg__ 3114 3115;; Arithmetic shift right 3116;; r25:r18 = ashr64 (r25:r18, r17:r16) 3117DEFUN __ashrdi3 3118 sbrc r25, 7 3119 com SS 3120 ;; FALLTHRU 3121ENDF __ashrdi3 3122 3123;; Logic shift right 3124;; r25:r18 = lshr64 (r25:r18, r17:r16) 3125DEFUN __lshrdi3 3126 ;; Signs are in SS (zero_reg) 3127 mov __tmp_reg__, r16 31280: cpi r16, 8 3129 brlo 2f 3130 subi r16, 8 3131 mov r18, r19 3132 mov r19, r20 3133 mov r20, r21 3134 mov r21, r22 3135 mov r22, r23 3136 mov r23, r24 3137 mov r24, r25 3138 mov r25, SS 3139 rjmp 0b 31401: asr SS 3141 ror r25 3142 ror r24 3143 ror r23 3144 ror r22 3145 ror r21 3146 ror r20 3147 ror r19 3148 ror r18 31492: dec r16 3150 brpl 1b 3151 clr __zero_reg__ 3152 mov r16, __tmp_reg__ 3153 ret 3154ENDF __lshrdi3 3155 3156#undef SS 3157 3158#endif /* defined (L_ashrdi3) */ 3159 3160#if defined (L_ashldi3) 3161;; Shift left 3162;; r25:r18 = ashl64 (r25:r18, r17:r16) 3163;; This function does not clobber T. 3164DEFUN __ashldi3 3165 mov __tmp_reg__, r16 31660: cpi r16, 8 3167 brlo 2f 3168 mov r25, r24 3169 mov r24, r23 3170 mov r23, r22 3171 mov r22, r21 3172 mov r21, r20 3173 mov r20, r19 3174 mov r19, r18 3175 clr r18 3176 subi r16, 8 3177 rjmp 0b 31781: lsl r18 3179 rol r19 3180 rol r20 3181 rol r21 3182 rol r22 3183 rol r23 3184 rol r24 3185 rol r25 31862: dec r16 3187 brpl 1b 3188 mov r16, __tmp_reg__ 3189 ret 3190ENDF __ashldi3 3191#endif /* defined (L_ashldi3) */ 3192 3193#if defined (L_rotldi3) 3194;; Rotate left 3195;; r25:r18 = rotl64 (r25:r18, r17:r16) 3196DEFUN __rotldi3 3197 push r16 31980: cpi r16, 8 3199 brlo 2f 3200 subi r16, 8 3201 mov __tmp_reg__, r25 3202 mov r25, r24 3203 mov r24, r23 3204 mov r23, r22 3205 mov r22, r21 3206 mov r21, r20 3207 mov r20, r19 3208 mov r19, r18 3209 mov r18, __tmp_reg__ 3210 rjmp 0b 32111: lsl r18 3212 rol r19 3213 rol r20 3214 rol r21 3215 rol r22 3216 rol r23 3217 rol r24 3218 rol r25 3219 adc r18, __zero_reg__ 32202: dec r16 3221 brpl 1b 3222 pop r16 3223 ret 3224ENDF __rotldi3 3225#endif /* defined (L_rotldi3) */ 3226 3227 3228.section .text.libgcc.fmul, "ax", @progbits 3229 3230/***********************************************************/ 3231;;; Softmul versions of FMUL, FMULS and FMULSU to implement 3232;;; __builtin_avr_fmul* if !AVR_HAVE_MUL 3233/***********************************************************/ 3234 3235#define A1 24 3236#define B1 25 3237#define C0 22 3238#define C1 23 3239#define A0 __tmp_reg__ 3240 3241#ifdef L_fmuls 3242;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction 3243;;; Clobbers: r24, r25, __tmp_reg__ 3244DEFUN __fmuls 3245 ;; A0.7 = negate result? 3246 mov A0, A1 3247 eor A0, B1 3248 ;; B1 = |B1| 3249 sbrc B1, 7 3250 neg B1 3251 XJMP __fmulsu_exit 3252ENDF __fmuls 3253#endif /* L_fmuls */ 3254 3255#ifdef L_fmulsu 3256;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction 3257;;; Clobbers: r24, r25, __tmp_reg__ 3258DEFUN __fmulsu 3259 ;; A0.7 = negate result? 3260 mov A0, A1 3261;; FALLTHRU 3262ENDF __fmulsu 3263 3264;; Helper for __fmuls and __fmulsu 3265DEFUN __fmulsu_exit 3266 ;; A1 = |A1| 3267 sbrc A1, 7 3268 neg A1 3269#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ 3270 ;; Some cores have problem skipping 2-word instruction 3271 tst A0 3272 brmi 1f 3273#else 3274 sbrs A0, 7 3275#endif /* __AVR_HAVE_JMP_CALL__ */ 3276 XJMP __fmul 32771: XCALL __fmul 3278 ;; C = -C iff A0.7 = 1 3279 NEG2 C0 3280 ret 3281ENDF __fmulsu_exit 3282#endif /* L_fmulsu */ 3283 3284 3285#ifdef L_fmul 3286;;; r22:r23 = fmul (r24, r25) like in FMUL instruction 3287;;; Clobbers: r24, r25, __tmp_reg__ 3288DEFUN __fmul 3289 ; clear result 3290 clr C0 3291 clr C1 3292 clr A0 32931: tst B1 3294 ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C. 32952: brpl 3f 3296 ;; C += A 3297 add C0, A0 3298 adc C1, A1 32993: ;; A >>= 1 3300 lsr A1 3301 ror A0 3302 ;; B <<= 1 3303 lsl B1 3304 brne 2b 3305 ret 3306ENDF __fmul 3307#endif /* L_fmul */ 3308 3309#undef A0 3310#undef A1 3311#undef B1 3312#undef C0 3313#undef C1 3314 3315#include "lib1funcs-fixed.S" 3316