xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/ia64/copyi.asm (revision af56d1fe9956bd7c616e18c1b7f025f464618471)
1dnl  IA-64 mpn_copyi -- copy limb vector, incrementing.
2
3dnl  Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of the GNU Lesser General Public License as published
9dnl  by the Free Software Foundation; either version 3 of the License, or (at
10dnl  your option) any later version.
11
12dnl  The GNU MP Library is distributed in the hope that it will be useful, but
13dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15dnl  License for more details.
16
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20include(`../config.m4')
21
22C         cycles/limb
23C Itanium:    1
24C Itanium 2:  0.5
25
26C INPUT PARAMETERS
27C rp = r32
28C sp = r33
29C n = r34
30
31ASM_START()
32PROLOGUE(mpn_copyi)
33	.prologue
34	.save ar.lc, r2
35	.body
36ifdef(`HAVE_ABI_32',
37`	addp4		r32 = 0, r32
38	addp4		r33 = 0, r33
39	sxt4		r34 = r34
40	;;
41')
42{.mmi
43	nop		0
44	nop		0
45	mov.i		r2 = ar.lc
46}
47{.mmi
48	and		r14 = 3, r34
49	cmp.ge		p14, p15 = 3, r34
50	add		r34 = -4, r34
51	;;
52}
53{.mmi
54	cmp.eq		p8, p0 = 1, r14
55	cmp.eq		p10, p0 = 2, r14
56	cmp.eq		p12, p0 = 3, r14
57}
58{.bbb
59  (p8)	br.dptk		.Lb01
60  (p10)	br.dptk		.Lb10
61  (p12)	br.dptk		.Lb11
62}
63
64.Lb00:	C  n = 0, 4, 8, 12, ...
65  (p14)	br.dptk		.Ls00
66	;;
67	add		r21 = 8, r33
68	ld8		r16 = [r33], 16
69	shr		r15 = r34, 2
70	;;
71	ld8		r17 = [r21], 16
72	mov.i		ar.lc = r15
73	ld8		r18 = [r33], 16
74	add		r20 = 8, r32
75	;;
76	ld8		r19 = [r21], 16
77	br.cloop.dptk	.Loop
78	;;
79	br.sptk		.Lend
80	;;
81
82.Lb01:	C  n = 1, 5, 9, 13, ...
83	add		r21 = 0, r33
84	add		r20 = 0, r32
85	add		r33 = 8, r33
86	add		r32 = 8, r32
87	;;
88	ld8		r19 = [r21], 16
89	shr		r15 = r34, 2
90  (p14)	br.dptk		.Ls01
91	;;
92	ld8		r16 = [r33], 16
93	mov.i		ar.lc = r15
94	;;
95	ld8		r17 = [r21], 16
96	ld8		r18 = [r33], 16
97	br.sptk		.Li01
98	;;
99
100.Lb10:	C  n = 2,6, 10, 14, ...
101	add		r21 = 8, r33
102	add		r20 = 8, r32
103	ld8		r18 = [r33], 16
104	shr		r15 = r34, 2
105	;;
106	ld8		r19 = [r21], 16
107	mov.i		ar.lc = r15
108  (p14)	br.dptk		.Ls10
109	;;
110	ld8		r16 = [r33], 16
111	ld8		r17 = [r21], 16
112	br.sptk		.Li10
113	;;
114
115.Lb11:	C  n = 3, 7, 11, 15, ...
116	add		r21 = 0, r33
117	add		r20 = 0, r32
118	add		r33 = 8, r33
119	add		r32 = 8, r32
120	;;
121	ld8		r17 = [r21], 16
122	shr		r15 = r34, 2
123	;;
124	ld8		r18 = [r33], 16
125	mov.i		ar.lc = r15
126	ld8		r19 = [r21], 16
127  (p14)	br.dptk		.Ls11
128	;;
129	ld8		r16 = [r33], 16
130	br.sptk		.Li11
131	;;
132
133	ALIGN(32)
134.Loop:
135.Li00:
136{.mmb
137	st8		[r32] = r16, 16
138	ld8		r16 = [r33], 16
139	nop.b		0
140}
141.Li11:
142{.mmb
143	st8		[r20] = r17, 16
144	ld8		r17 = [r21], 16
145	nop.b		0
146	;;
147}
148.Li10:
149{.mmb
150	st8		[r32] = r18, 16
151	ld8		r18 = [r33], 16
152	nop.b		0
153}
154.Li01:
155{.mmb
156	st8		[r20] = r19, 16
157	ld8		r19 = [r21], 16
158	br.cloop.dptk	.Loop
159	;;
160}
161.Lend:	st8		[r32] = r16, 16
162.Ls11:	st8		[r20] = r17, 16
163	;;
164.Ls10:	st8		[r32] = r18, 16
165.Ls01:	st8		[r20] = r19, 16
166.Ls00:	mov.i		ar.lc = r2
167	br.ret.sptk.many b0
168EPILOGUE()
169ASM_END()
170