xref: /netbsd-src/external/gpl3/gcc/dist/libgcc/config/msp430/lib2hw_mul.S (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1;   Copyright (C) 2014-2022 Free Software Foundation, Inc.
2;   Contributed by Red Hat.
3;
4; This file is free software; you can redistribute it and/or modify it
5; under the terms of the GNU General Public License as published by the
6; Free Software Foundation; either version 3, or (at your option) any
7; later version.
8;
9; This file is distributed in the hope that it will be useful, but
10; WITHOUT ANY WARRANTY; without even the implied warranty of
11; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12; General Public License for more details.
13;
14; Under Section 7 of GPL version 3, you are granted additional
15; permissions described in the GCC Runtime Library Exception, version
16; 3.1, as published by the Free Software Foundation.
17;
18; You should have received a copy of the GNU General Public License and
19; a copy of the GCC Runtime Library Exception along with this program;
20; see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
21; <http://www.gnu.org/licenses/>.
22
23	;;  Macro to start a multiply function.  Each function has three
24	;; names, and hence three entry points - although they all go
25	;; through the same code.  The first name is the version generated
26	;; by GCC.  The second is the MSP430 EABI mandated name for the
27	;; *software* version of the function.  The third is the EABI
28	;; mandated name for the *hardware* version of the function.
29	;;
30	;;  Since we are using the hardware and software names to point
31	;; to the same code this effectively means that we are mapping
32	;; the software function onto the hardware function.  Thus if
33	;; the library containing this code is linked into an application
34	;; (before the libgcc.a library) *all* multiply functions will
35	;; be mapped onto the hardware versions.
36	;;
37	;;  We construct each function in its own section so that linker
38	;; garbage collection can be used to delete any unused functions
39	;; from this file.
40.macro start_func gcc_name eabi_soft_name eabi_hard_name
41	.pushsection .text.\gcc_name,"ax",@progbits
42	.p2align 1
43	.global \eabi_hard_name
44	.type \eabi_hard_name , @function
45\eabi_hard_name:
46	.global \eabi_soft_name
47	.type \eabi_soft_name , @function
48\eabi_soft_name:
49	.global \gcc_name
50	.type \gcc_name , @function
51\gcc_name:
52	PUSH.W	sr			; Save current interrupt state
53	DINT				; Disable interrupts
54	NOP				; Account for latency
55.endm
56
57
58	;; End a function started with the start_func macro.
59.macro end_func name
60#ifdef __MSP430X_LARGE__
61	POP.W  sr
62        RETA
63#else
64	RETI
65#endif
66	.size \name , . - \name
67	.popsection
68.endm
69
70
71	;; Like the start_func macro except that it is used to
72	;; create a false entry point that just jumps to the
73	;; software function (implemented elsewhere).
74.macro fake_func gcc_name  eabi_soft_name  eabi_hard_name
75 	.pushsection .text.\gcc_name,"ax",@progbits
76	.p2align 1
77	.global \eabi_hard_name
78	.type \eabi_hard_name , @function
79\eabi_hard_name:
80	.global \gcc_name
81	.type \gcc_name , @function
82\gcc_name:
83#ifdef __MSP430X_LARGE__
84	BRA	#\eabi_soft_name
85#else
86	BR	#\eabi_soft_name
87#endif
88	.size \gcc_name , . - \gcc_name
89	.popsection
90.endm
91
92
93.macro mult16 OP1, OP2, RESULT
94;* * 16-bit hardware multiply:  int16 = int16 * int16
95;*
96;*   - Operand 1 is in R12
97;*   - Operand 2 is in R13
98;*   - Result is in R12
99;*
100;* To ensure that the multiply is performed atomically, interrupts are
101;* disabled upon routine entry.  Interrupt state is restored upon exit.
102;*
103;*   Registers used:  R12, R13
104;*
105;* Macro arguments are the memory locations of the hardware registers.
106
107	MOV.W	r12, &\OP1		; Load operand 1 into multiplier
108	MOV.W	r13, &\OP2		; Load operand 2 which triggers MPY
109	MOV.W	&\RESULT, r12		; Move result into return register
110.endm
111
112.macro mult1632 OP1, OP2, RESLO, RESHI
113;* * 16-bit hardware multiply with a 32-bit result:
114;*	int32 = int16 * int16
115;* 	uint32 = uint16 * uint16
116;*
117;*   - Operand 1 is in R12
118;*   - Operand 2 is in R13
119;*   - Result is in R12, R13
120;*
121;* To ensure that the multiply is performed atomically, interrupts are
122;* disabled upon routine entry.  Interrupt state is restored upon exit.
123;*
124;*   Registers used:  R12, R13
125;*
126;* Macro arguments are the memory locations of the hardware registers.
127
128	MOV.W	r12, &\OP1		; Load operand 1 into multiplier
129	MOV.W	r13, &\OP2		; Load operand 2 which triggers MPY
130	MOV.W	&\RESLO, r12		; Move low result into return register
131	MOV.W	&\RESHI, r13		; Move high result into return register
132.endm
133
134.macro mult32 OP1, OP2, MAC_OP1, MAC_OP2, RESLO, RESHI
135;* * 32-bit hardware multiply with a 32-bit result using 16 multiply and accumulate:
136;*	int32 = int32 * int32
137;*
138;*   - Operand 1 is in R12, R13
139;*   - Operand 2 is in R14, R15
140;*   - Result    is in R12, R13
141;*
142;* To ensure that the multiply is performed atomically, interrupts are
143;* disabled upon routine entry.  Interrupt state is restored upon exit.
144;*
145;*   Registers used:  R12, R13, R14, R15
146;*
147;* Macro arguments are the memory locations of the hardware registers.
148
149	MOV.W	r12, &\OP1		; Load operand 1 Low into multiplier
150	MOV.W	r14, &\OP2		; Load operand 2 Low which triggers MPY
151	MOV.W	r12, &\MAC_OP1		; Load operand 1 Low into mac
152	MOV.W   &\RESLO, r12		; Low 16-bits of result ready for return
153	MOV.W   &\RESHI, &\RESLO	; MOV intermediate mpy high into low
154	MOV.W	r15, &\MAC_OP2		; Load operand 2 High, trigger MAC
155	MOV.W	r13, &\MAC_OP1		; Load operand 1 High
156	MOV.W	r14, &\MAC_OP2		; Load operand 2 Lo, trigger MAC
157	MOV.W	&\RESLO, r13		; Upper 16-bits result ready for return
158.endm
159
160
161.macro mult32_hw  OP1_LO  OP1_HI  OP2_LO  OP2_HI  RESLO  RESHI
162;* * 32-bit hardware multiply with a 32-bit result
163;*	int32 = int32 * int32
164;*
165;*   - Operand 1 is in R12, R13
166;*   - Operand 2 is in R14, R15
167;*   - Result    is in R12, R13
168;*
169;* To ensure that the multiply is performed atomically, interrupts are
170;* disabled upon routine entry.  Interrupt state is restored upon exit.
171;*
172;*   Registers used:  R12, R13, R14, R15
173;*
174;* Macro arguments are the memory locations of the hardware registers.
175
176	MOV.W	r12, &\OP1_LO		; Load operand 1 Low into multiplier
177	MOV.W	r13, &\OP1_HI		; Load operand 1 High into multiplier
178	MOV.W	r14, &\OP2_LO		; Load operand 2 Low into multiplier
179	MOV.W	r15, &\OP2_HI		; Load operand 2 High, trigger MPY
180	MOV.W	&\RESLO, r12		; Ready low 16-bits for return
181	MOV.W   &\RESHI, r13		; Ready high 16-bits for return
182.endm
183
184.macro mult3264_hw  OP1_LO  OP1_HI  OP2_LO  OP2_HI  RES0 RES1 RES2 RES3
185;* * 32-bit hardware multiply with a 64-bit result
186;*	int64 = int32 * int32
187;*	uint64 = uint32 * uint32
188;*
189;*   - Operand 1 is in R12, R13
190;*   - Operand 2 is in R14, R15
191;*   - Result    is in R12, R13, R14, R15
192;*
193;* To ensure that the multiply is performed atomically, interrupts are
194;* disabled upon routine entry.  Interrupt state is restored upon exit.
195;*
196;*   Registers used:  R12, R13, R14, R15
197;*
198;* Macro arguments are the memory locations of the hardware registers.
199
200	MOV.W	r12, &\OP1_LO		; Load operand 1 Low into multiplier
201	MOV.W	r13, &\OP1_HI		; Load operand 1 High into multiplier
202	MOV.W	r14, &\OP2_LO		; Load operand 2 Low into multiplier
203	MOV.W	r15, &\OP2_HI		; Load operand 2 High, trigger MPY
204	MOV.W	&\RES0, R12		; Ready low 16-bits for return
205	MOV.W   &\RES1, R13		;
206	MOV.W	&\RES2, R14		;
207	MOV.W   &\RES3, R15		; Ready high 16-bits for return
208.endm
209
210.macro mult64_hw  MPY32_LO MPY32_HI OP2_LO OP2_HI RES0 RES1 RES2 RES3
211;* * 64-bit hardware multiply with a 64-bit result
212;*	int64 = int64 * int64
213;*
214;*   - Operand 1 is in R8, R9, R10, R11
215;*   - Operand 2 is in R12, R13, R14, R15
216;*   - Result    is in R12, R13, R14, R15
217;*
218;* 64-bit multiplication is achieved using the 32-bit hardware multiplier with
219;* the following equation:
220;*    R12:R15 = (R8:R9 * R12:R13) + ((R8:R9 * R14:R15) << 32) + ((R10:R11 * R12:R13) << 32)
221;*
222;* The left shift by 32 is handled with minimal cost by saving the two low
223;* words and discarding the two high words.
224;*
225;* To ensure that the multiply is performed atomically, interrupts are
226;* disabled upon routine entry.  Interrupt state is restored upon exit.
227;*
228;*   Registers used:  R6, R7, R8, R9, R10, R11, R12, R13, R14, R15
229;*
230;* Macro arguments are the memory locations of the hardware registers.
231;*
232#if defined(__MSP430X_LARGE__)
233	PUSHM.A	#5, R10
234#elif defined(__MSP430X__)
235	PUSHM.W	#5, R10
236#else
237	PUSH R10 { PUSH R9 { PUSH R8 { PUSH R7 { PUSH R6
238#endif
239	; Multiply the low 32-bits of op0 and the high 32-bits of op1.
240	MOV.W	R8, &\MPY32_LO
241	MOV.W	R9, &\MPY32_HI
242	MOV.W	R14, &\OP2_LO
243	MOV.W	R15, &\OP2_HI
244	; Save the low 32-bits of the result.
245	MOV.W	&\RES0, R6
246	MOV.W	&\RES1, R7
247	; Multiply the high 32-bits of op0 and the low 32-bits of op1.
248	MOV.W	R10, &\MPY32_LO
249	MOV.W	R11, &\MPY32_HI
250	MOV.W	R12, &\OP2_LO
251	MOV.W	R13, &\OP2_HI
252	; Add the low 32-bits of the result to the previously saved result.
253	ADD.W	&\RES0, R6
254	ADDC.W	&\RES1, R7
255	; Multiply the low 32-bits of op0 and op1.
256	MOV.W	R8, &\MPY32_LO
257	MOV.W	R9, &\MPY32_HI
258	MOV.W	R12, &\OP2_LO
259	MOV.W	R13, &\OP2_HI
260	; Write the return values
261	MOV.W	&\RES0, R12
262	MOV.W   &\RES1, R13
263	MOV.W	&\RES2, R14
264	MOV.W   &\RES3, R15
265	; Add the saved low 32-bit results from earlier to the high 32-bits of
266	; this result, effectively shifting those two results left by 32 bits.
267	ADD.W	R6, R14
268	ADDC.W  R7, R15
269#if defined(__MSP430X_LARGE__)
270	POPM.A	#5, R10
271#elif defined(__MSP430X__)
272	POPM.W	#5, R10
273#else
274	POP R6 { POP R7 { POP R8 { POP R9 { POP R10
275#endif
276.endm
277
278;; EABI mandated names:
279;;
280;; int16 __mspabi_mpyi (int16 x, int16 y)
281;;            Multiply int by int.
282;; int16 __mspabi_mpyi_hw (int16 x, int16 y)
283;;            Multiply int by int. Uses hardware MPY16 or MPY32.
284;; int16 __mspabi_mpyi_f5hw (int16 x, int16 y)
285;;            Multiply int by int. Uses hardware MPY32 (F5xx devices and up).
286;;
287;; int32 __mspabi_mpyl (int32 x, int32 y);
288;;  	      Multiply long by long.
289;; int32 __mspabi_mpyl_hw (int32 x, int32 y)
290;; 	      Multiply long by long. Uses hardware MPY16.
291;; int32 __mspabi_mpyl_hw32 (int32 x, int32 y)
292;; 	      Multiply long by long. Uses hardware MPY32 (F4xx devices).
293;; int32 __mspabi_mpyl_f5hw (int32 x, int32 y)
294;; 	      Multiply long by long. Uses hardware MPY32 (F5xx devices and up).
295;;
296;; int64 __mspabi_mpyll (int64 x, int64 y)
297;; 	      Multiply long long by long long.
298;; int64 __mspabi_mpyll_hw (int64 x, int64 y)
299;; 	      Multiply long long by long long. Uses hardware MPY16.
300;; int64 __mspabi_mpyll_hw32 (int64 x, int64 y)
301;; 	      Multiply long long by long long. Uses hardware MPY32 (F4xx devices).
302;; int64 __mspabi_mpyll_f5hw (int64 x, int64 y)
303;; 	      Multiply long long by long long. Uses hardware MPY32 (F5xx devices and up).
304;;
305;; int32 __mspabi_mpysl (int16 x, int16 y)
306;;            Multiply int by int; result is long.
307;; int32 __mspabi_mpysl_hw(int16 x, int16 y)
308;; 	      Multiply int by int; result is long. Uses hardware MPY16 or MPY32
309;; int32 __mspabi_mpysl_f5hw(int16 x, int16 y)
310;; 	      Multiply int by int; result is long. Uses hardware MPY32 (F5xx devices and up).
311;;
312;; int64 __mspabi_mpysll(int32 x, int32 y)
313;;            Multiply long by long; result is long long.
314;; int64 __mspabi_mpysll_hw(int32 x, int32 y)
315;; 	      Multiply long by long; result is long long. Uses hardware MPY16.
316;; int64 __mspabi_mpysll_hw32(int32 x, int32 y)
317;; 	      Multiply long by long; result is long long. Uses hardware MPY32 (F4xx devices).
318;; int64 __mspabi_mpysll_f5hw(int32 x, int32 y)
319;; 	      Multiply long by long; result is long long. Uses hardware MPY32 (F5xx devices and up).
320;;
321;; uint32 __mspabi_mpyul(uint16 x, uint16 y)
322;; 	      Multiply unsigned int by unsigned int; result is unsigned long.
323;; uint32 __mspabi_mpyul_hw(uint16 x, uint16 y)
324;; 	      Multiply unsigned int by unsigned int; result is unsigned long. Uses hardware MPY16 or MPY32
325;; uint32 __mspabi_mpyul_f5hw(uint16 x, uint16 y)
326;; 	      Multiply unsigned int by unsigned int; result is unsigned long. Uses hardware MPY32 (F5xx devices and up).
327;;
328;; uint64 __mspabi_mpyull(uint32 x, uint32 y)
329;; 	      Multiply unsigned long by unsigned long; result is unsigned long long.
330;; uint64 __mspabi_mpyull_hw(uint32 x, uint32 y)
331;; 	      Multiply unsigned long by unsigned long; result is unsigned long long. Uses hardware MPY16
332;; uint64 __mspabi_mpyull_hw32(uint32 x, uint32 y)
333;; 	      Multiply unsigned long by unsigned long; result is unsigned long long. Uses hardware MPY32 (F4xx devices).
334;; uint64 __mspabi_mpyull_f5hw(uint32 x, uint32 y)
335;;            Multiply unsigned long by unsigned long; result is unsigned long long. Uses hardware MPY32 (F5xx devices and up)
336
337;;;; The register names below are the standardised versions used across TI
338;;;; literature.
339
340;; Hardware multiply register addresses for devices with 16-bit hardware
341;; multiply.
342.set MPY,	0x0130
343.set MPYS,	0x0132
344.set MAC, 	0x0134
345.set OP2, 	0x0138
346.set RESLO,	0x013A
347.set RESHI,	0x013C
348;; Hardware multiply register addresses for devices with 32-bit (non-f5)
349;; hardware multiply.
350.set MPY32L,	0x0140
351.set MPY32H,	0x0142
352.set MPYS32L,	0x0144
353.set MPYS32H,	0x0146
354.set OP2L,	0x0150
355.set OP2H,	0x0152
356.set RES0,	0x0154
357.set RES1,	0x0156
358.set RES2,	0x0158
359.set RES3,	0x015A
360;; Hardware multiply register addresses for devices with f5series hardware
361;; multiply.
362;; The F5xxx series of MCUs support the same 16-bit and 32-bit multiply
363;; as the second generation hardware, but they are accessed from different
364;; memory registers.
365;; These names AREN'T standard.  We've appended _F5 to the standard names.
366.set MPY_F5,		0x04C0
367.set MPYS_F5,		0x04C2
368.set MAC_F5,		0x04C4
369.set OP2_F5,		0x04C8
370.set RESLO_F5,		0x04CA
371.set RESHI_F5,		0x04CC
372.set MPY32L_F5,		0x04D0
373.set MPY32H_F5,		0x04D2
374.set MPYS32L_F5,	0x04D4
375.set MPYS32H_F5,	0x04D6
376.set OP2L_F5,		0x04E0
377.set OP2H_F5,		0x04E2
378.set RES0_F5,		0x04E4
379.set RES1_F5,		0x04E6
380.set RES2_F5,		0x04E8
381.set RES3_F5,		0x04EA
382
383#if defined MUL_16
384;;  First generation MSP430 hardware multiplies ...
385
386	start_func __mulhi2 __mspabi_mpyi  __mspabi_mpyi_hw
387	mult16 MPY, OP2, RESLO
388	end_func   __mulhi2
389
390	start_func __mulhisi2  __mspabi_mpysl  __mspabi_mpysl_hw
391	mult1632 MPYS, OP2, RESLO, RESHI
392	end_func   __mulhisi2
393
394	start_func __umulhisi2  __mspabi_mpyul  __mspabi_mpyul_hw
395	mult1632 MPY, OP2, RESLO, RESHI
396	end_func   __umulhisi2
397
398	start_func __mulsi2  __mspabi_mpyl  __mspabi_mpyl_hw
399	mult32 MPY, OP2, MAC, OP2, RESLO, RESHI
400	end_func   __mulsi2
401
402	;; FIXME: We do not have hardware implementations of these
403	;; routines, so just jump to the software versions instead.
404	fake_func __mulsidi2   __mspabi_mpysll  __mspabi_mpysll_hw
405	fake_func __umulsidi2  __mspabi_mpyull  __mspabi_mpyull_hw
406	fake_func __muldi3     __mspabi_mpyll   __mspabi_mpyll_hw
407
408#elif defined MUL_32
409;;  Second generation MSP430 hardware multiplies ...
410
411	start_func __mulhi2  __mspabi_mpyi  __mspabi_mpyi_hw
412	mult16 MPY, OP2, RESLO
413	end_func   __mulhi2
414
415	start_func __mulhisi2  __mspabi_mpysl  __mspabi_mpysl_hw
416	mult1632 MPYS, OP2, RESLO, RESHI
417	end_func   __mulhisi2
418
419	start_func __umulhisi2  __mspabi_mpyul  __mspabi_mpyul_hw
420	mult1632 MPY, OP2, RESLO, RESHI
421	end_func   __umulhisi2
422
423	start_func __mulsi2  __mspabi_mpyl  __mspabi_mpyl_hw32
424	mult32_hw MPY32L, MPY32H, OP2L, OP2H, RES0, RES1
425	end_func   __mulsi2
426
427	start_func __mulsidi2  __mspabi_mpysll  __mspabi_mpysll_hw32
428	mult3264_hw MPYS32L, MPYS32H, OP2L, OP2H, RES0, RES1, RES2, RES3
429	end_func   __mulsidi2
430
431	start_func __umulsidi2 __mspabi_mpyull  __mspabi_mpyull_hw32
432	mult3264_hw MPY32L, MPY32H, OP2L, OP2H, RES0, RES1, RES2, RES3
433	end_func   __umulsidi2
434
435	start_func __muldi3   __mspabi_mpyll __mspabi_mpyll_hw32
436	mult64_hw MPY32L, MPY32H, OP2L, OP2H, RES0, RES1, RES2, RES3
437	end_func __muldi3
438
439#elif defined MUL_F5
440/* The F5xxx series of MCUs support the same 16-bit and 32-bit multiply
441   as the second generation hardware, but they are accessed from different
442   memory registers.  */
443
444	start_func __mulhi2 __mspabi_mpyi  __mspabi_mpyi_f5hw
445	mult16 MPY_F5, OP2_F5, RESLO_F5
446	end_func   __mulhi2
447
448	start_func __mulhisi2  __mspabi_mpysl  __mspabi_mpysl_f5hw
449	mult1632 MPYS_F5, OP2_F5, RESLO_F5, RESHI_F5
450	end_func   __mulhisi2
451
452	start_func __umulhisi2  __mspabi_mpyul  __mspabi_mpyul_f5hw
453	mult1632 MPY_F5, OP2_F5, RESLO_F5, RESHI_F5
454	end_func   __umulhisi2
455
456	start_func __mulsi2  __mspabi_mpyl  __mspabi_mpyl_f5hw
457	mult32_hw MPY32L_F5, MPY32H_F5, OP2L_F5, OP2H_F5, RES0_F5, RES1_F5
458	end_func   __mulsi2
459
460	start_func __mulsidi2  __mspabi_mpysll  __mspabi_mpysll_f5hw
461	mult3264_hw MPYS32L_F5, MPYS32H_F5, OP2L_F5, OP2H_F5, RES0_F5, RES1_F5, RES2_F5, RES3_F5
462	end_func   __mulsidi2
463
464	start_func __umulsidi2  __mspabi_mpyull  __mspabi_mpyull_f5hw
465	mult3264_hw MPY32L_F5, MPY32H_F5, OP2L_F5, OP2H_F5, RES0_F5, RES1_F5, RES2_F5, RES3_F5
466	end_func   __umulsidi2
467
468	start_func __muldi3   __mspabi_mpyll __mspabi_mpyll_f5hw
469	mult64_hw MPY32L_F5, MPY32H_F5, OP2L_F5, OP2H_F5, RES0_F5, RES1_F5, RES2_F5, RES3_F5
470	end_func __muldi3
471
472#else
473#error MUL type not defined
474#endif
475