xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/powerpc64/mode64/rsh1aors_n.asm (revision 70f7362772ba52b749c976fb5e86e39a8b2c9afc)
1dnl  PowerPC-64 mpn_rsh1add_n, mpn_rsh1sub_n
2
3dnl  Copyright 2003, 2005, 2010, 2013 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33C		   cycles/limb
34C POWER3/PPC630		 ?
35C POWER4/PPC970		 2.9
36C POWER5		 ?
37C POWER6		 3.5
38C POWER7		 2.25
39
40define(`rp', `r3')
41define(`up', `r4')
42define(`vp', `r5')
43define(`n',  `r6')
44
45ifdef(`OPERATION_rsh1add_n', `
46  define(`ADDSUBC',	`addc')
47  define(`ADDSUBE',	`adde')
48  define(INITCY,	`addic	$1, r1, 0')
49  define(`func',	mpn_rsh1add_n)')
50ifdef(`OPERATION_rsh1sub_n', `
51  define(`ADDSUBC',	`subfc')
52  define(`ADDSUBE',	`subfe')
53  define(INITCY,	`addic	$1, r1, -1')
54  define(`func',	mpn_rsh1sub_n)')
55
56define(`s0', `r9')
57define(`s1', `r7')
58define(`x0', `r0')
59define(`x1', `r12')
60define(`u0', `r8')
61define(`v0', `r10')
62
63MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1sub_n)
64
65ASM_START()
66PROLOGUE(func)
67	ld	u0, 0(up)
68	ld	v0, 0(vp)
69
70	cmpdi	cr6, n, 2
71
72	addi	r0, n, 1
73	srdi	r0, r0, 2
74	mtctr	r0			C copy size to count register
75
76	andi.	r0, n, 1
77	bne	cr0, L(bx1)
78
79L(bx0):	ADDSUBC	x1, v0, u0
80	ld	u0, 8(up)
81	ld	v0, 8(vp)
82	ADDSUBE	x0, v0, u0
83	ble	cr6, L(n2)
84	ld	u0, 16(up)
85	ld	v0, 16(vp)
86	srdi	s0, x1, 1
87	rldicl	r11, x1, 0, 63		C return value
88	ADDSUBE	x1, v0, u0
89	andi.	n, n, 2
90	bne	cr0, L(b10)
91L(b00):	addi	rp, rp, -24
92	b	L(lo0)
93L(b10):	addi	up, up, 16
94	addi	vp, vp, 16
95	addi	rp, rp, -8
96	b	L(lo2)
97
98	ALIGN(16)
99L(bx1):	ADDSUBC	x0, v0, u0
100	ble	cr6, L(n1)
101	ld	u0, 8(up)
102	ld	v0, 8(vp)
103	ADDSUBE	x1, v0, u0
104	ld	u0, 16(up)
105	ld	v0, 16(vp)
106	srdi	s1, x0, 1
107	rldicl	r11, x0, 0, 63		C return value
108	ADDSUBE	x0, v0, u0
109	andi.	n, n, 2
110	bne	cr0, L(b11)
111L(b01):	addi	up, up, 8
112	addi	vp, vp, 8
113	addi	rp, rp, -16
114	b	L(lo1)
115L(b11):	addi	up, up, 24
116	addi	vp, vp, 24
117	bdz	L(end)
118
119	ALIGN(32)
120L(top):	ld	u0, 0(up)
121	ld	v0, 0(vp)
122	srdi	s0, x1, 1
123	rldimi	s1, x1, 63, 0
124	std	s1, 0(rp)
125	ADDSUBE	x1, v0, u0
126L(lo2):	ld	u0, 8(up)
127	ld	v0, 8(vp)
128	srdi	s1, x0, 1
129	rldimi	s0, x0, 63, 0
130	std	s0, 8(rp)
131	ADDSUBE	x0, v0, u0
132L(lo1):	ld	u0, 16(up)
133	ld	v0, 16(vp)
134	srdi	s0, x1, 1
135	rldimi	s1, x1, 63, 0
136	std	s1, 16(rp)
137	ADDSUBE	x1, v0, u0
138L(lo0):	ld	u0, 24(up)
139	ld	v0, 24(vp)
140	srdi	s1, x0, 1
141	rldimi	s0, x0, 63, 0
142	std	s0, 24(rp)
143	ADDSUBE	x0, v0, u0
144	addi	up, up, 32
145	addi	vp, vp, 32
146	addi	rp, rp, 32
147	bdnz	L(top)
148
149L(end):	srdi	s0, x1, 1
150	rldimi	s1, x1, 63, 0
151	std	s1, 0(rp)
152L(cj2):	srdi	s1, x0, 1
153	rldimi	s0, x0, 63, 0
154	std	s0, 8(rp)
155L(cj1):	ADDSUBE	x1, x1, x1		C pseudo-depends on x1
156	rldimi	s1, x1, 63, 0
157	std	s1, 16(rp)
158	mr	r3, r11
159	blr
160
161L(n1):	srdi	s1, x0, 1
162	rldicl	r11, x0, 0, 63		C return value
163	ADDSUBE	x1, x1, x1		C pseudo-depends on x1
164	rldimi	s1, x1, 63, 0
165	std	s1, 0(rp)
166	mr	r3, r11
167	blr
168
169L(n2):	addi	rp, rp, -8
170	srdi	s0, x1, 1
171	rldicl	r11, x1, 0, 63		C return value
172	b	L(cj2)
173EPILOGUE()
174