xref: /netbsd-src/crypto/external/bsd/openssl/lib/libcrypto/arch/mips/mips-mont.S (revision e0ea3921ea68e51b93ffc215f08ae1647c8e1796)
1#include "mips_arch.h"
2
3.text
4
5.set	noat
6.set	noreorder
7
8.align	5
9.globl	bn_mul_mont
10.ent	bn_mul_mont
11bn_mul_mont:
12	lw	$8,16($29)
13	lw	$9,20($29)
14	slt	$1,$9,4
15	bnez	$1,1f
16	li	$2,0
17	slt	$1,$9,17	# on in-order CPU
18	bnez	$1,bn_mul_mont_internal
19	nop
201:	jr	$31
21	li	$4,0
22.end	bn_mul_mont
23
24.align	5
25.ent	bn_mul_mont_internal
26bn_mul_mont_internal:
27	.frame	$30,14*4,$31
28	.mask	0x40000000|16711680,-4
29	subu $29,14*4
30	sw	$30,(14-1)*4($29)
31	sw	$23,(14-2)*4($29)
32	sw	$22,(14-3)*4($29)
33	sw	$21,(14-4)*4($29)
34	sw	$20,(14-5)*4($29)
35	sw	$19,(14-6)*4($29)
36	sw	$18,(14-7)*4($29)
37	sw	$17,(14-8)*4($29)
38	sw	$16,(14-9)*4($29)
39	move	$30,$29
40
41	.set	reorder
42	lw	$8,0($8)
43	lw	$13,0($6)	# bp[0]
44	lw	$12,0($5)	# ap[0]
45	lw	$14,0($7)	# np[0]
46
47	subu $29,2*4	# place for two extra words
48	sll	$9,2
49	li	$1,-4096
50	subu $29,$9
51	and	$29,$1
52
53	multu	($12,$13)
54	lw	$17,4($5)
55	lw	$19,4($7)
56	mflo	($10,$12,$13)
57	mfhi	($11,$12,$13)
58	multu	($10,$8)
59	mflo	($23,$10,$8)
60
61	multu	($17,$13)
62	mflo	($16,$17,$13)
63	mfhi	($17,$17,$13)
64
65	multu	($14,$23)
66	mflo	($24,$14,$23)
67	mfhi	($25,$14,$23)
68	multu	($19,$23)
69	addu	$24,$10
70	sltu	$1,$24,$10
71	addu	$25,$1
72	mflo	($18,$19,$23)
73	mfhi	($19,$19,$23)
74
75	move	$15,$29
76	li	$22,2*4
77.align	4
78.L1st:
79	.set	noreorder
80	addu $12,$5,$22
81	addu $14,$7,$22
82	lw	$12,($12)
83	lw	$14,($14)
84
85	multu	($12,$13)
86	addu	$10,$16,$11
87	addu	$24,$18,$25
88	sltu	$1,$10,$11
89	sltu	$2,$24,$25
90	addu	$11,$17,$1
91	addu	$25,$19,$2
92	mflo	($16,$12,$13)
93	mfhi	($17,$12,$13)
94
95	addu	$24,$10
96	sltu	$1,$24,$10
97	multu	($14,$23)
98	addu	$25,$1
99	addu	$22,4
100	sw	$24,($15)
101	sltu	$2,$22,$9
102	mflo	($18,$14,$23)
103	mfhi	($19,$14,$23)
104
105	bnez	$2,.L1st
106	addu $15,4
107	.set	reorder
108
109	addu	$10,$16,$11
110	sltu	$1,$10,$11
111	addu	$11,$17,$1
112
113	addu	$24,$18,$25
114	sltu	$2,$24,$25
115	addu	$25,$19,$2
116	addu	$24,$10
117	sltu	$1,$24,$10
118	addu	$25,$1
119
120	sw	$24,($15)
121
122	addu	$25,$11
123	sltu	$1,$25,$11
124	sw	$25,4($15)
125	sw	$1,2*4($15)
126
127	li	$21,4
128.align	4
129.Louter:
130	addu $13,$6,$21
131	lw	$13,($13)
132	lw	$12,($5)
133	lw	$17,4($5)
134	lw	$20,($29)
135
136	multu	($12,$13)
137	lw	$14,($7)
138	lw	$19,4($7)
139	mflo	($10,$12,$13)
140	mfhi	($11,$12,$13)
141	addu	$10,$20
142	multu	($10,$8)
143	sltu	$1,$10,$20
144	addu	$11,$1
145	mflo	($23,$10,$8)
146
147	multu	($17,$13)
148	mflo	($16,$17,$13)
149	mfhi	($17,$17,$13)
150
151	multu	($14,$23)
152	mflo	($24,$14,$23)
153	mfhi	($25,$14,$23)
154
155	multu	($19,$23)
156	addu	$24,$10
157	sltu	$1,$24,$10
158	addu	$25,$1
159	mflo	($18,$19,$23)
160	mfhi	($19,$19,$23)
161
162	move	$15,$29
163	li	$22,2*4
164	lw	$20,4($15)
165.align	4
166.Linner:
167	.set	noreorder
168	addu $12,$5,$22
169	addu $14,$7,$22
170	lw	$12,($12)
171	lw	$14,($14)
172
173	multu	($12,$13)
174	addu	$10,$16,$11
175	addu	$24,$18,$25
176	sltu	$1,$10,$11
177	sltu	$2,$24,$25
178	addu	$11,$17,$1
179	addu	$25,$19,$2
180	mflo	($16,$12,$13)
181	mfhi	($17,$12,$13)
182
183	addu	$10,$20
184	addu	$22,4
185	multu	($14,$23)
186	sltu	$1,$10,$20
187	addu	$24,$10
188	addu	$11,$1
189	sltu	$2,$24,$10
190	lw	$20,2*4($15)
191	addu	$25,$2
192	sltu	$1,$22,$9
193	mflo	($18,$14,$23)
194	mfhi	($19,$14,$23)
195	sw	$24,($15)
196	bnez	$1,.Linner
197	addu $15,4
198	.set	reorder
199
200	addu	$10,$16,$11
201	sltu	$1,$10,$11
202	addu	$11,$17,$1
203	addu	$10,$20
204	sltu	$2,$10,$20
205	addu	$11,$2
206
207	lw	$20,2*4($15)
208	addu	$24,$18,$25
209	sltu	$1,$24,$25
210	addu	$25,$19,$1
211	addu	$24,$10
212	sltu	$2,$24,$10
213	addu	$25,$2
214	sw	$24,($15)
215
216	addu	$24,$25,$11
217	sltu	$25,$24,$11
218	addu	$24,$20
219	sltu	$1,$24,$20
220	addu	$25,$1
221	sw	$24,4($15)
222	sw	$25,2*4($15)
223
224	addu	$21,4
225	sltu	$2,$21,$9
226	bnez	$2,.Louter
227
228	.set	noreorder
229	addu $20,$29,$9	# &tp[num]
230	move	$15,$29
231	move	$5,$29
232	li	$11,0		# clear borrow bit
233
234.align	4
235.Lsub:	lw	$10,($15)
236	lw	$24,($7)
237	addu $15,4
238	addu $7,4
239	subu	$24,$10,$24	# tp[i]-np[i]
240	sgtu	$1,$24,$10
241	subu	$10,$24,$11
242	sgtu	$11,$10,$24
243	sw	$10,($4)
244	or	$11,$1
245	sltu	$1,$15,$20
246	bnez	$1,.Lsub
247	addu $4,4
248
249	subu	$11,$25,$11	# handle upmost overflow bit
250	move	$15,$29
251	subu $4,$9	# restore rp
252	not	$25,$11
253
254.Lcopy:	lw	$14,($15)	# conditional move
255	lw	$12,($4)
256	sw	$0,($15)
257	addu $15,4
258	and	$14,$11
259	and	$12,$25
260	or	$12,$14
261	sltu	$1,$15,$20
262	sw	$12,($4)
263	bnez	$1,.Lcopy
264	addu $4,4
265
266	li	$4,1
267	li	$2,1
268
269	.set	noreorder
270	move	$29,$30
271	lw	$30,(14-1)*4($29)
272	lw	$23,(14-2)*4($29)
273	lw	$22,(14-3)*4($29)
274	lw	$21,(14-4)*4($29)
275	lw	$20,(14-5)*4($29)
276	lw	$19,(14-6)*4($29)
277	lw	$18,(14-7)*4($29)
278	lw	$17,(14-8)*4($29)
279	lw	$16,(14-9)*4($29)
280	jr	$31
281	addu $29,14*4
282.end	bn_mul_mont_internal
283.rdata
284.asciiz	"Montgomery Multiplication for MIPS, CRYPTOGAMS by <appro@openssl.org>"
285