xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/s390_32/copyd.asm (revision 37afb7eb6895c833050f8bfb1d1bb2f99f332539)
1dnl  S/390-32 mpn_copyd
2
3dnl  Copyright 2011 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of the GNU Lesser General Public License as published
9dnl  by the Free Software Foundation; either version 3 of the License, or (at
10dnl  your option) any later version.
11
12dnl  The GNU MP Library is distributed in the hope that it will be useful, but
13dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15dnl  License for more details.
16
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20
21include(`../config.m4')
22
23C            cycles/limb
24C            cycles/limb
25C z900		 1.65
26C z990           1.125
27C z9		 ?
28C z10		 ?
29C z196		 ?
30
31C FIXME:
32C  * Avoid saving/restoring callee-saves registers for n < 3.  This could be
33C    done by setting rp=r1, up=r2, i=r0 and r3,r4,r5 for clock regs.
34C    We could then use r3...r10 in main loop.
35
36C INPUT PARAMETERS
37define(`rp_param',	`%r2')
38define(`up_param',	`%r3')
39define(`n',		`%r4')
40
41define(`rp',	`%r8')
42define(`up',	`%r9')
43
44ASM_START()
45PROLOGUE(mpn_copyd)
46	stm	%r6, %r11, 24(%r15)
47
48	lr	%r1, n
49	sll	%r1, 2
50	la	%r10, 8(n)
51	ahi	%r1, -32
52	srl	%r10, 3
53	lhi	%r11, -32
54
55	la	rp, 0(%r1,rp_param)	C FIXME use lay on z990 and later
56	la	up, 0(%r1,up_param)	C FIXME use lay on z990 and later
57
58	lhi	%r7, 7
59	nr	%r7, n			C n mod 8
60	chi	%r7, 2
61	jh	L(b34567)
62	chi	%r7, 1
63	je	L(b1)
64	jh	L(b2)
65
66L(b0):	brct	%r10, L(top)
67	j	L(end)
68
69L(b1):	l	%r0, 28(up)
70	ahi	up, -4
71	st	%r0, 28(rp)
72	ahi	rp, -4
73	brct	%r10, L(top)
74	j	L(end)
75
76L(b2):	lm	%r0, %r1, 24(up)
77	ahi	up, -8
78	stm	%r0, %r1, 24(rp)
79	ahi	rp, -8
80	brct	%r10, L(top)
81	j	L(end)
82
83L(b34567):
84	chi	%r7, 4
85	jl	L(b3)
86	je	L(b4)
87	chi	%r7, 6
88	je	L(b6)
89	jh	L(b7)
90
91L(b5):	lm	%r0, %r4, 12(up)
92	ahi	up, -20
93	stm	%r0, %r4, 12(rp)
94	ahi	rp, -20
95	brct	%r10, L(top)
96	j	L(end)
97
98L(b3):	lm	%r0, %r2, 20(up)
99	ahi	up, -12
100	stm	%r0, %r2, 20(rp)
101	ahi	rp, -12
102	brct	%r10, L(top)
103	j	L(end)
104
105L(b4):	lm	%r0, %r3, 16(up)
106	ahi	up, -16
107	stm	%r0, %r3, 16(rp)
108	ahi	rp, -16
109	brct	%r10, L(top)
110	j	L(end)
111
112L(b6):	lm	%r0, %r5, 8(up)
113	ahi	up, -24
114	stm	%r0, %r5, 8(rp)
115	ahi	rp, -24
116	brct	%r10, L(top)
117	j	L(end)
118
119L(b7):	lm	%r0, %r6, 4(up)
120	ahi	up, -28
121	stm	%r0, %r6, 4(rp)
122	ahi	rp, -28
123	brct	%r10, L(top)
124	j	L(end)
125
126L(top):	lm	%r0, %r7, 0(up)
127	la	up, 0(%r11,up)
128	stm	%r0, %r7, 0(rp)
129	la	rp, 0(%r11,rp)
130	brct	%r10, L(top)
131
132L(end):	lm	%r6, %r11, 24(%r15)
133	br	%r14
134EPILOGUE()
135