xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/powerpc64/mode32/sqr_diagonal.asm (revision dd3ee07da436799d8de85f3055253118b76bf345)
1dnl  PowerPC-64 mpn_sqr_diagonal.
2
3dnl  Copyright 2001-2003, 2005, 2006, 20010 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33C		    cycles/limb
34C POWER3/PPC630		18
35C POWER4/PPC970		 ?
36C POWER5		 7.25
37C POWER6		 9.5
38
39C INPUT PARAMETERS
40define(`rp',  r3)
41define(`up',  r4)
42define(`n',   r5)
43
44ASM_START()
45PROLOGUE(mpn_sqr_diagonal)
46ifdef(`HAVE_ABI_mode32',
47`	rldicl	n, n, 0, 32')		C zero extend n
48
49	rldicl.	r0, n, 0,62		C r0 = n & 3, set cr0
50	addi	n, n, 3			C compute count...
51	cmpdi	cr6, r0, 2
52	srdi	n, n, 2			C ...for ctr
53	mtctr	n			C copy count into ctr
54	beq	cr0, L(b00)
55	blt	cr6, L(b01)
56	beq	cr6, L(b10)
57
58L(b11):	ld	r0, 0(up)
59	ld	r10, 8(up)
60	ld	r12, 16(up)
61	addi	rp, rp, -16
62	mulld	r7, r0, r0
63	mulhdu	r8, r0, r0
64	mulld	r9, r10, r10
65	mulhdu	r10, r10, r10
66	mulld	r11, r12, r12
67	mulhdu	r12, r12, r12
68	addi	up, up, 24
69	b	L(11)
70
71	ALIGN(16)
72L(b01):	ld	r0, 0(up)
73	addi	rp, rp, -48
74	addi	up, up, 8
75	mulld	r11, r0, r0
76	mulhdu	r12, r0, r0
77	b	L(01)
78
79	ALIGN(16)
80L(b10):	ld	r0, 0(up)
81	ld	r12, 8(up)
82	addi	rp, rp, -32
83	addi	up, up, 16
84	mulld	r9, r0, r0
85	mulhdu	r10, r0, r0
86	mulld	r11, r12, r12
87	mulhdu	r12, r12, r12
88	b	L(10)
89
90	ALIGN(32)
91L(b00):
92L(top):	ld	r0, 0(up)
93	ld	r8, 8(up)
94	ld	r10, 16(up)
95	ld	r12, 24(up)
96	mulld	r5, r0, r0
97	mulhdu	r6, r0, r0
98	mulld	r7, r8, r8
99	mulhdu	r8, r8, r8
100	mulld	r9, r10, r10
101	mulhdu	r10, r10, r10
102	mulld	r11, r12, r12
103	mulhdu	r12, r12, r12
104	addi	up, up, 32
105	std	r5, 0(rp)
106	std	r6, 8(rp)
107L(11):	std	r7, 16(rp)
108	std	r8, 24(rp)
109L(10):	std	r9, 32(rp)
110	std	r10, 40(rp)
111L(01):	std	r11, 48(rp)
112	std	r12, 56(rp)
113	addi	rp, rp, 64
114	bdnz	L(top)
115
116	blr
117EPILOGUE()
118