1; Copyright (C) 2014-2022 Free Software Foundation, Inc. 2; Contributed by Red Hat. 3; 4; This file is free software; you can redistribute it and/or modify it 5; under the terms of the GNU General Public License as published by the 6; Free Software Foundation; either version 3, or (at your option) any 7; later version. 8; 9; This file is distributed in the hope that it will be useful, but 10; WITHOUT ANY WARRANTY; without even the implied warranty of 11; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12; General Public License for more details. 13; 14; Under Section 7 of GPL version 3, you are granted additional 15; permissions described in the GCC Runtime Library Exception, version 16; 3.1, as published by the Free Software Foundation. 17; 18; You should have received a copy of the GNU General Public License and 19; a copy of the GCC Runtime Library Exception along with this program; 20; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 21; <http://www.gnu.org/licenses/>. 22 23 ;; Macro to start a multiply function. Each function has three 24 ;; names, and hence three entry points - although they all go 25 ;; through the same code. The first name is the version generated 26 ;; by GCC. The second is the MSP430 EABI mandated name for the 27 ;; *software* version of the function. The third is the EABI 28 ;; mandated name for the *hardware* version of the function. 29 ;; 30 ;; Since we are using the hardware and software names to point 31 ;; to the same code this effectively means that we are mapping 32 ;; the software function onto the hardware function. Thus if 33 ;; the library containing this code is linked into an application 34 ;; (before the libgcc.a library) *all* multiply functions will 35 ;; be mapped onto the hardware versions. 36 ;; 37 ;; We construct each function in its own section so that linker 38 ;; garbage collection can be used to delete any unused functions 39 ;; from this file. 40.macro start_func gcc_name eabi_soft_name eabi_hard_name 41 .pushsection .text.\gcc_name,"ax",@progbits 42 .p2align 1 43 .global \eabi_hard_name 44 .type \eabi_hard_name , @function 45\eabi_hard_name: 46 .global \eabi_soft_name 47 .type \eabi_soft_name , @function 48\eabi_soft_name: 49 .global \gcc_name 50 .type \gcc_name , @function 51\gcc_name: 52 PUSH.W sr ; Save current interrupt state 53 DINT ; Disable interrupts 54 NOP ; Account for latency 55.endm 56 57 58 ;; End a function started with the start_func macro. 59.macro end_func name 60#ifdef __MSP430X_LARGE__ 61 POP.W sr 62 RETA 63#else 64 RETI 65#endif 66 .size \name , . - \name 67 .popsection 68.endm 69 70 71 ;; Like the start_func macro except that it is used to 72 ;; create a false entry point that just jumps to the 73 ;; software function (implemented elsewhere). 74.macro fake_func gcc_name eabi_soft_name eabi_hard_name 75 .pushsection .text.\gcc_name,"ax",@progbits 76 .p2align 1 77 .global \eabi_hard_name 78 .type \eabi_hard_name , @function 79\eabi_hard_name: 80 .global \gcc_name 81 .type \gcc_name , @function 82\gcc_name: 83#ifdef __MSP430X_LARGE__ 84 BRA #\eabi_soft_name 85#else 86 BR #\eabi_soft_name 87#endif 88 .size \gcc_name , . - \gcc_name 89 .popsection 90.endm 91 92 93.macro mult16 OP1, OP2, RESULT 94;* * 16-bit hardware multiply: int16 = int16 * int16 95;* 96;* - Operand 1 is in R12 97;* - Operand 2 is in R13 98;* - Result is in R12 99;* 100;* To ensure that the multiply is performed atomically, interrupts are 101;* disabled upon routine entry. Interrupt state is restored upon exit. 102;* 103;* Registers used: R12, R13 104;* 105;* Macro arguments are the memory locations of the hardware registers. 106 107 MOV.W r12, &\OP1 ; Load operand 1 into multiplier 108 MOV.W r13, &\OP2 ; Load operand 2 which triggers MPY 109 MOV.W &\RESULT, r12 ; Move result into return register 110.endm 111 112.macro mult1632 OP1, OP2, RESLO, RESHI 113;* * 16-bit hardware multiply with a 32-bit result: 114;* int32 = int16 * int16 115;* uint32 = uint16 * uint16 116;* 117;* - Operand 1 is in R12 118;* - Operand 2 is in R13 119;* - Result is in R12, R13 120;* 121;* To ensure that the multiply is performed atomically, interrupts are 122;* disabled upon routine entry. Interrupt state is restored upon exit. 123;* 124;* Registers used: R12, R13 125;* 126;* Macro arguments are the memory locations of the hardware registers. 127 128 MOV.W r12, &\OP1 ; Load operand 1 into multiplier 129 MOV.W r13, &\OP2 ; Load operand 2 which triggers MPY 130 MOV.W &\RESLO, r12 ; Move low result into return register 131 MOV.W &\RESHI, r13 ; Move high result into return register 132.endm 133 134.macro mult32 OP1, OP2, MAC_OP1, MAC_OP2, RESLO, RESHI 135;* * 32-bit hardware multiply with a 32-bit result using 16 multiply and accumulate: 136;* int32 = int32 * int32 137;* 138;* - Operand 1 is in R12, R13 139;* - Operand 2 is in R14, R15 140;* - Result is in R12, R13 141;* 142;* To ensure that the multiply is performed atomically, interrupts are 143;* disabled upon routine entry. Interrupt state is restored upon exit. 144;* 145;* Registers used: R12, R13, R14, R15 146;* 147;* Macro arguments are the memory locations of the hardware registers. 148 149 MOV.W r12, &\OP1 ; Load operand 1 Low into multiplier 150 MOV.W r14, &\OP2 ; Load operand 2 Low which triggers MPY 151 MOV.W r12, &\MAC_OP1 ; Load operand 1 Low into mac 152 MOV.W &\RESLO, r12 ; Low 16-bits of result ready for return 153 MOV.W &\RESHI, &\RESLO ; MOV intermediate mpy high into low 154 MOV.W r15, &\MAC_OP2 ; Load operand 2 High, trigger MAC 155 MOV.W r13, &\MAC_OP1 ; Load operand 1 High 156 MOV.W r14, &\MAC_OP2 ; Load operand 2 Lo, trigger MAC 157 MOV.W &\RESLO, r13 ; Upper 16-bits result ready for return 158.endm 159 160 161.macro mult32_hw OP1_LO OP1_HI OP2_LO OP2_HI RESLO RESHI 162;* * 32-bit hardware multiply with a 32-bit result 163;* int32 = int32 * int32 164;* 165;* - Operand 1 is in R12, R13 166;* - Operand 2 is in R14, R15 167;* - Result is in R12, R13 168;* 169;* To ensure that the multiply is performed atomically, interrupts are 170;* disabled upon routine entry. Interrupt state is restored upon exit. 171;* 172;* Registers used: R12, R13, R14, R15 173;* 174;* Macro arguments are the memory locations of the hardware registers. 175 176 MOV.W r12, &\OP1_LO ; Load operand 1 Low into multiplier 177 MOV.W r13, &\OP1_HI ; Load operand 1 High into multiplier 178 MOV.W r14, &\OP2_LO ; Load operand 2 Low into multiplier 179 MOV.W r15, &\OP2_HI ; Load operand 2 High, trigger MPY 180 MOV.W &\RESLO, r12 ; Ready low 16-bits for return 181 MOV.W &\RESHI, r13 ; Ready high 16-bits for return 182.endm 183 184.macro mult3264_hw OP1_LO OP1_HI OP2_LO OP2_HI RES0 RES1 RES2 RES3 185;* * 32-bit hardware multiply with a 64-bit result 186;* int64 = int32 * int32 187;* uint64 = uint32 * uint32 188;* 189;* - Operand 1 is in R12, R13 190;* - Operand 2 is in R14, R15 191;* - Result is in R12, R13, R14, R15 192;* 193;* To ensure that the multiply is performed atomically, interrupts are 194;* disabled upon routine entry. Interrupt state is restored upon exit. 195;* 196;* Registers used: R12, R13, R14, R15 197;* 198;* Macro arguments are the memory locations of the hardware registers. 199 200 MOV.W r12, &\OP1_LO ; Load operand 1 Low into multiplier 201 MOV.W r13, &\OP1_HI ; Load operand 1 High into multiplier 202 MOV.W r14, &\OP2_LO ; Load operand 2 Low into multiplier 203 MOV.W r15, &\OP2_HI ; Load operand 2 High, trigger MPY 204 MOV.W &\RES0, R12 ; Ready low 16-bits for return 205 MOV.W &\RES1, R13 ; 206 MOV.W &\RES2, R14 ; 207 MOV.W &\RES3, R15 ; Ready high 16-bits for return 208.endm 209 210.macro mult64_hw MPY32_LO MPY32_HI OP2_LO OP2_HI RES0 RES1 RES2 RES3 211;* * 64-bit hardware multiply with a 64-bit result 212;* int64 = int64 * int64 213;* 214;* - Operand 1 is in R8, R9, R10, R11 215;* - Operand 2 is in R12, R13, R14, R15 216;* - Result is in R12, R13, R14, R15 217;* 218;* 64-bit multiplication is achieved using the 32-bit hardware multiplier with 219;* the following equation: 220;* R12:R15 = (R8:R9 * R12:R13) + ((R8:R9 * R14:R15) << 32) + ((R10:R11 * R12:R13) << 32) 221;* 222;* The left shift by 32 is handled with minimal cost by saving the two low 223;* words and discarding the two high words. 224;* 225;* To ensure that the multiply is performed atomically, interrupts are 226;* disabled upon routine entry. Interrupt state is restored upon exit. 227;* 228;* Registers used: R6, R7, R8, R9, R10, R11, R12, R13, R14, R15 229;* 230;* Macro arguments are the memory locations of the hardware registers. 231;* 232#if defined(__MSP430X_LARGE__) 233 PUSHM.A #5, R10 234#elif defined(__MSP430X__) 235 PUSHM.W #5, R10 236#else 237 PUSH R10 { PUSH R9 { PUSH R8 { PUSH R7 { PUSH R6 238#endif 239 ; Multiply the low 32-bits of op0 and the high 32-bits of op1. 240 MOV.W R8, &\MPY32_LO 241 MOV.W R9, &\MPY32_HI 242 MOV.W R14, &\OP2_LO 243 MOV.W R15, &\OP2_HI 244 ; Save the low 32-bits of the result. 245 MOV.W &\RES0, R6 246 MOV.W &\RES1, R7 247 ; Multiply the high 32-bits of op0 and the low 32-bits of op1. 248 MOV.W R10, &\MPY32_LO 249 MOV.W R11, &\MPY32_HI 250 MOV.W R12, &\OP2_LO 251 MOV.W R13, &\OP2_HI 252 ; Add the low 32-bits of the result to the previously saved result. 253 ADD.W &\RES0, R6 254 ADDC.W &\RES1, R7 255 ; Multiply the low 32-bits of op0 and op1. 256 MOV.W R8, &\MPY32_LO 257 MOV.W R9, &\MPY32_HI 258 MOV.W R12, &\OP2_LO 259 MOV.W R13, &\OP2_HI 260 ; Write the return values 261 MOV.W &\RES0, R12 262 MOV.W &\RES1, R13 263 MOV.W &\RES2, R14 264 MOV.W &\RES3, R15 265 ; Add the saved low 32-bit results from earlier to the high 32-bits of 266 ; this result, effectively shifting those two results left by 32 bits. 267 ADD.W R6, R14 268 ADDC.W R7, R15 269#if defined(__MSP430X_LARGE__) 270 POPM.A #5, R10 271#elif defined(__MSP430X__) 272 POPM.W #5, R10 273#else 274 POP R6 { POP R7 { POP R8 { POP R9 { POP R10 275#endif 276.endm 277 278;; EABI mandated names: 279;; 280;; int16 __mspabi_mpyi (int16 x, int16 y) 281;; Multiply int by int. 282;; int16 __mspabi_mpyi_hw (int16 x, int16 y) 283;; Multiply int by int. Uses hardware MPY16 or MPY32. 284;; int16 __mspabi_mpyi_f5hw (int16 x, int16 y) 285;; Multiply int by int. Uses hardware MPY32 (F5xx devices and up). 286;; 287;; int32 __mspabi_mpyl (int32 x, int32 y); 288;; Multiply long by long. 289;; int32 __mspabi_mpyl_hw (int32 x, int32 y) 290;; Multiply long by long. Uses hardware MPY16. 291;; int32 __mspabi_mpyl_hw32 (int32 x, int32 y) 292;; Multiply long by long. Uses hardware MPY32 (F4xx devices). 293;; int32 __mspabi_mpyl_f5hw (int32 x, int32 y) 294;; Multiply long by long. Uses hardware MPY32 (F5xx devices and up). 295;; 296;; int64 __mspabi_mpyll (int64 x, int64 y) 297;; Multiply long long by long long. 298;; int64 __mspabi_mpyll_hw (int64 x, int64 y) 299;; Multiply long long by long long. Uses hardware MPY16. 300;; int64 __mspabi_mpyll_hw32 (int64 x, int64 y) 301;; Multiply long long by long long. Uses hardware MPY32 (F4xx devices). 302;; int64 __mspabi_mpyll_f5hw (int64 x, int64 y) 303;; Multiply long long by long long. Uses hardware MPY32 (F5xx devices and up). 304;; 305;; int32 __mspabi_mpysl (int16 x, int16 y) 306;; Multiply int by int; result is long. 307;; int32 __mspabi_mpysl_hw(int16 x, int16 y) 308;; Multiply int by int; result is long. Uses hardware MPY16 or MPY32 309;; int32 __mspabi_mpysl_f5hw(int16 x, int16 y) 310;; Multiply int by int; result is long. Uses hardware MPY32 (F5xx devices and up). 311;; 312;; int64 __mspabi_mpysll(int32 x, int32 y) 313;; Multiply long by long; result is long long. 314;; int64 __mspabi_mpysll_hw(int32 x, int32 y) 315;; Multiply long by long; result is long long. Uses hardware MPY16. 316;; int64 __mspabi_mpysll_hw32(int32 x, int32 y) 317;; Multiply long by long; result is long long. Uses hardware MPY32 (F4xx devices). 318;; int64 __mspabi_mpysll_f5hw(int32 x, int32 y) 319;; Multiply long by long; result is long long. Uses hardware MPY32 (F5xx devices and up). 320;; 321;; uint32 __mspabi_mpyul(uint16 x, uint16 y) 322;; Multiply unsigned int by unsigned int; result is unsigned long. 323;; uint32 __mspabi_mpyul_hw(uint16 x, uint16 y) 324;; Multiply unsigned int by unsigned int; result is unsigned long. Uses hardware MPY16 or MPY32 325;; uint32 __mspabi_mpyul_f5hw(uint16 x, uint16 y) 326;; Multiply unsigned int by unsigned int; result is unsigned long. Uses hardware MPY32 (F5xx devices and up). 327;; 328;; uint64 __mspabi_mpyull(uint32 x, uint32 y) 329;; Multiply unsigned long by unsigned long; result is unsigned long long. 330;; uint64 __mspabi_mpyull_hw(uint32 x, uint32 y) 331;; Multiply unsigned long by unsigned long; result is unsigned long long. Uses hardware MPY16 332;; uint64 __mspabi_mpyull_hw32(uint32 x, uint32 y) 333;; Multiply unsigned long by unsigned long; result is unsigned long long. Uses hardware MPY32 (F4xx devices). 334;; uint64 __mspabi_mpyull_f5hw(uint32 x, uint32 y) 335;; Multiply unsigned long by unsigned long; result is unsigned long long. Uses hardware MPY32 (F5xx devices and up) 336 337;;;; The register names below are the standardised versions used across TI 338;;;; literature. 339 340;; Hardware multiply register addresses for devices with 16-bit hardware 341;; multiply. 342.set MPY, 0x0130 343.set MPYS, 0x0132 344.set MAC, 0x0134 345.set OP2, 0x0138 346.set RESLO, 0x013A 347.set RESHI, 0x013C 348;; Hardware multiply register addresses for devices with 32-bit (non-f5) 349;; hardware multiply. 350.set MPY32L, 0x0140 351.set MPY32H, 0x0142 352.set MPYS32L, 0x0144 353.set MPYS32H, 0x0146 354.set OP2L, 0x0150 355.set OP2H, 0x0152 356.set RES0, 0x0154 357.set RES1, 0x0156 358.set RES2, 0x0158 359.set RES3, 0x015A 360;; Hardware multiply register addresses for devices with f5series hardware 361;; multiply. 362;; The F5xxx series of MCUs support the same 16-bit and 32-bit multiply 363;; as the second generation hardware, but they are accessed from different 364;; memory registers. 365;; These names AREN'T standard. We've appended _F5 to the standard names. 366.set MPY_F5, 0x04C0 367.set MPYS_F5, 0x04C2 368.set MAC_F5, 0x04C4 369.set OP2_F5, 0x04C8 370.set RESLO_F5, 0x04CA 371.set RESHI_F5, 0x04CC 372.set MPY32L_F5, 0x04D0 373.set MPY32H_F5, 0x04D2 374.set MPYS32L_F5, 0x04D4 375.set MPYS32H_F5, 0x04D6 376.set OP2L_F5, 0x04E0 377.set OP2H_F5, 0x04E2 378.set RES0_F5, 0x04E4 379.set RES1_F5, 0x04E6 380.set RES2_F5, 0x04E8 381.set RES3_F5, 0x04EA 382 383#if defined MUL_16 384;; First generation MSP430 hardware multiplies ... 385 386 start_func __mulhi2 __mspabi_mpyi __mspabi_mpyi_hw 387 mult16 MPY, OP2, RESLO 388 end_func __mulhi2 389 390 start_func __mulhisi2 __mspabi_mpysl __mspabi_mpysl_hw 391 mult1632 MPYS, OP2, RESLO, RESHI 392 end_func __mulhisi2 393 394 start_func __umulhisi2 __mspabi_mpyul __mspabi_mpyul_hw 395 mult1632 MPY, OP2, RESLO, RESHI 396 end_func __umulhisi2 397 398 start_func __mulsi2 __mspabi_mpyl __mspabi_mpyl_hw 399 mult32 MPY, OP2, MAC, OP2, RESLO, RESHI 400 end_func __mulsi2 401 402 ;; FIXME: We do not have hardware implementations of these 403 ;; routines, so just jump to the software versions instead. 404 fake_func __mulsidi2 __mspabi_mpysll __mspabi_mpysll_hw 405 fake_func __umulsidi2 __mspabi_mpyull __mspabi_mpyull_hw 406 fake_func __muldi3 __mspabi_mpyll __mspabi_mpyll_hw 407 408#elif defined MUL_32 409;; Second generation MSP430 hardware multiplies ... 410 411 start_func __mulhi2 __mspabi_mpyi __mspabi_mpyi_hw 412 mult16 MPY, OP2, RESLO 413 end_func __mulhi2 414 415 start_func __mulhisi2 __mspabi_mpysl __mspabi_mpysl_hw 416 mult1632 MPYS, OP2, RESLO, RESHI 417 end_func __mulhisi2 418 419 start_func __umulhisi2 __mspabi_mpyul __mspabi_mpyul_hw 420 mult1632 MPY, OP2, RESLO, RESHI 421 end_func __umulhisi2 422 423 start_func __mulsi2 __mspabi_mpyl __mspabi_mpyl_hw32 424 mult32_hw MPY32L, MPY32H, OP2L, OP2H, RES0, RES1 425 end_func __mulsi2 426 427 start_func __mulsidi2 __mspabi_mpysll __mspabi_mpysll_hw32 428 mult3264_hw MPYS32L, MPYS32H, OP2L, OP2H, RES0, RES1, RES2, RES3 429 end_func __mulsidi2 430 431 start_func __umulsidi2 __mspabi_mpyull __mspabi_mpyull_hw32 432 mult3264_hw MPY32L, MPY32H, OP2L, OP2H, RES0, RES1, RES2, RES3 433 end_func __umulsidi2 434 435 start_func __muldi3 __mspabi_mpyll __mspabi_mpyll_hw32 436 mult64_hw MPY32L, MPY32H, OP2L, OP2H, RES0, RES1, RES2, RES3 437 end_func __muldi3 438 439#elif defined MUL_F5 440/* The F5xxx series of MCUs support the same 16-bit and 32-bit multiply 441 as the second generation hardware, but they are accessed from different 442 memory registers. */ 443 444 start_func __mulhi2 __mspabi_mpyi __mspabi_mpyi_f5hw 445 mult16 MPY_F5, OP2_F5, RESLO_F5 446 end_func __mulhi2 447 448 start_func __mulhisi2 __mspabi_mpysl __mspabi_mpysl_f5hw 449 mult1632 MPYS_F5, OP2_F5, RESLO_F5, RESHI_F5 450 end_func __mulhisi2 451 452 start_func __umulhisi2 __mspabi_mpyul __mspabi_mpyul_f5hw 453 mult1632 MPY_F5, OP2_F5, RESLO_F5, RESHI_F5 454 end_func __umulhisi2 455 456 start_func __mulsi2 __mspabi_mpyl __mspabi_mpyl_f5hw 457 mult32_hw MPY32L_F5, MPY32H_F5, OP2L_F5, OP2H_F5, RES0_F5, RES1_F5 458 end_func __mulsi2 459 460 start_func __mulsidi2 __mspabi_mpysll __mspabi_mpysll_f5hw 461 mult3264_hw MPYS32L_F5, MPYS32H_F5, OP2L_F5, OP2H_F5, RES0_F5, RES1_F5, RES2_F5, RES3_F5 462 end_func __mulsidi2 463 464 start_func __umulsidi2 __mspabi_mpyull __mspabi_mpyull_f5hw 465 mult3264_hw MPY32L_F5, MPY32H_F5, OP2L_F5, OP2H_F5, RES0_F5, RES1_F5, RES2_F5, RES3_F5 466 end_func __umulsidi2 467 468 start_func __muldi3 __mspabi_mpyll __mspabi_mpyll_f5hw 469 mult64_hw MPY32L_F5, MPY32H_F5, OP2L_F5, OP2H_F5, RES0_F5, RES1_F5, RES2_F5, RES3_F5 470 end_func __muldi3 471 472#else 473#error MUL type not defined 474#endif 475