xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/ia64/copyd.asm (revision 6cd39ddb8550f6fa1bff3fed32053d7f19fd0453)
1dnl  IA-64 mpn_copyd -- copy limb vector, decrementing.
2
3dnl  Contributed to the GNU project by Torbjorn Granlund.
4
5dnl  Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
6
7dnl  This file is part of the GNU MP Library.
8
9dnl  The GNU MP Library is free software; you can redistribute it and/or modify
10dnl  it under the terms of the GNU Lesser General Public License as published
11dnl  by the Free Software Foundation; either version 3 of the License, or (at
12dnl  your option) any later version.
13
14dnl  The GNU MP Library is distributed in the hope that it will be useful, but
15dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
17dnl  License for more details.
18
19dnl  You should have received a copy of the GNU Lesser General Public License
20dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
21
22include(`../config.m4')
23
24C         cycles/limb
25C Itanium:    1
26C Itanium 2:  0.5
27
28C INPUT PARAMETERS
29C rp = r32
30C sp = r33
31C n = r34
32
33ASM_START()
34PROLOGUE(mpn_copyd)
35	.prologue
36	.save ar.lc, r2
37	.body
38ifdef(`HAVE_ABI_32',
39`	addp4		r32 = 0, r32
40	addp4		r33 = 0, r33
41	sxt4		r34 = r34
42	;;
43')
44{.mmi
45	shladd		r32 = r34, 3, r32
46	shladd		r33 = r34, 3, r33
47	mov.i		r2 = ar.lc
48}
49{.mmi
50	and		r14 = 3, r34
51	cmp.ge		p14, p15 = 3, r34
52	add		r34 = -4, r34
53	;;
54}
55{.mmi
56	cmp.eq		p8, p0 = 1, r14
57	cmp.eq		p10, p0 = 2, r14
58	cmp.eq		p12, p0 = 3, r14
59}
60{.bbb
61  (p8)	br.dptk		.Lb01
62  (p10)	br.dptk		.Lb10
63  (p12)	br.dptk		.Lb11
64}
65
66.Lb00:	C  n = 0, 4, 8, 12, ...
67	add		r32 = -8, r32
68	add		r33 = -8, r33
69  (p14)	br.dptk		.Ls00
70	;;
71	add		r21 = -8, r33
72	ld8		r16 = [r33], -16
73	shr		r15 = r34, 2
74	;;
75	ld8		r17 = [r21], -16
76	mov.i		ar.lc = r15
77	ld8		r18 = [r33], -16
78	add		r20 = -8, r32
79	;;
80	ld8		r19 = [r21], -16
81	br.cloop.dptk	.Loop
82	;;
83	br.sptk		.Lend
84	;;
85
86.Lb01:	C  n = 1, 5, 9, 13, ...
87	add		r21 = -8, r33
88	add		r20 = -8, r32
89	add		r33 = -16, r33
90	add		r32 = -16, r32
91	;;
92	ld8		r19 = [r21], -16
93	shr		r15 = r34, 2
94  (p14)	br.dptk		.Ls01
95	;;
96	ld8		r16 = [r33], -16
97	mov.i		ar.lc = r15
98	;;
99	ld8		r17 = [r21], -16
100	ld8		r18 = [r33], -16
101	br.sptk		.Li01
102	;;
103
104.Lb10:	C  n = 2,6, 10, 14, ...
105	add		r21 = -16, r33
106	shr		r15 = r34, 2
107	add		r20 = -16, r32
108	add		r32 = -8, r32
109	add		r33 = -8, r33
110	;;
111	ld8		r18 = [r33], -16
112	ld8		r19 = [r21], -16
113	mov.i		ar.lc = r15
114  (p14)	br.dptk		.Ls10
115	;;
116	ld8		r16 = [r33], -16
117	ld8		r17 = [r21], -16
118	br.sptk		.Li10
119	;;
120
121.Lb11:	C  n = 3, 7, 11, 15, ...
122	add		r21 = -8, r33
123	add		r20 = -8, r32
124	add		r33 = -16, r33
125	add		r32 = -16, r32
126	;;
127	ld8		r17 = [r21], -16
128	shr		r15 = r34, 2
129	;;
130	ld8		r18 = [r33], -16
131	mov.i		ar.lc = r15
132	ld8		r19 = [r21], -16
133  (p14)	br.dptk		.Ls11
134	;;
135	ld8		r16 = [r33], -16
136	br.sptk		.Li11
137	;;
138
139	ALIGN(32)
140.Loop:
141.Li00:
142{.mmb
143	st8		[r32] = r16, -16
144	ld8		r16 = [r33], -16
145	nop.b		0
146}
147.Li11:
148{.mmb
149	st8		[r20] = r17, -16
150	ld8		r17 = [r21], -16
151	nop.b		0
152	;;
153}
154.Li10:
155{.mmb
156	st8		[r32] = r18, -16
157	ld8		r18 = [r33], -16
158	nop.b		0
159}
160.Li01:
161{.mmb
162	st8		[r20] = r19, -16
163	ld8		r19 = [r21], -16
164	br.cloop.dptk	.Loop
165	;;
166}
167.Lend:	st8		[r32] = r16, -16
168.Ls11:	st8		[r20] = r17, -16
169	;;
170.Ls10:	st8		[r32] = r18, -16
171.Ls01:	st8		[r20] = r19, -16
172.Ls00:	mov.i		ar.lc = r2
173	br.ret.sptk.many b0
174EPILOGUE()
175ASM_END()
176