xref: /netbsd-src/crypto/external/bsd/openssl/lib/libcrypto/arch/mips/mips-mont64.S (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1.text
2
3.set	noat
4.set	noreorder
5
6.align	5
7.globl	bn_mul_mont
8.ent	bn_mul_mont
9bn_mul_mont:
10	slt	$1,$9,4
11	bnez	$1,1f
12	li	$2,0
13	slt	$1,$9,17	# on in-order CPU
14	bnez	$1,bn_mul_mont_internal
15	nop
161:	jr	$31
17	li	$4,0
18.end	bn_mul_mont
19
20.align	5
21.ent	bn_mul_mont_internal
22bn_mul_mont_internal:
23	.frame	$30,14*8,$31
24	.mask	0x40000000|16711680,-8
25	dsub $29,14*8
26	sd	$30,(14-1)*8($29)
27	sd	$23,(14-2)*8($29)
28	sd	$22,(14-3)*8($29)
29	sd	$21,(14-4)*8($29)
30	sd	$20,(14-5)*8($29)
31	sd	$19,(14-6)*8($29)
32	sd	$18,(14-7)*8($29)
33	sd	$17,(14-8)*8($29)
34	sd	$16,(14-9)*8($29)
35	move	$30,$29
36
37	.set	reorder
38	ld	$8,0($8)
39	ld	$13,0($6)	# bp[0]
40	ld	$12,0($5)	# ap[0]
41	ld	$14,0($7)	# np[0]
42
43	dsub $29,2*8	# place for two extra words
44	sll	$9,3
45	li	$1,-4096
46	dsub $29,$9
47	and	$29,$1
48
49	dmultu	$12,$13
50	ld	$16,8($5)
51	ld	$18,8($7)
52	mflo	$10
53	mfhi	$11
54	dmultu	$10,$8
55	mflo	$23
56
57	dmultu	$16,$13
58	mflo	$16
59	mfhi	$17
60
61	dmultu	$14,$23
62	mflo	$24
63	mfhi	$25
64	dmultu	$18,$23
65	daddu	$24,$10
66	sltu	$1,$24,$10
67	daddu	$25,$1
68	mflo	$18
69	mfhi	$19
70
71	move	$15,$29
72	li	$22,2*8
73.align	4
74.L1st:
75	.set	noreorder
76	dadd $12,$5,$22
77	dadd $14,$7,$22
78	ld	$12,($12)
79	ld	$14,($14)
80
81	dmultu	$12,$13
82	daddu	$10,$16,$11
83	daddu	$24,$18,$25
84	sltu	$1,$10,$11
85	sltu	$2,$24,$25
86	daddu	$11,$17,$1
87	daddu	$25,$19,$2
88	mflo	$16
89	mfhi	$17
90
91	daddu	$24,$10
92	sltu	$1,$24,$10
93	dmultu	$14,$23
94	daddu	$25,$1
95	addu	$22,8
96	sd	$24,($15)
97	sltu	$2,$22,$9
98	mflo	$18
99	mfhi	$19
100
101	bnez	$2,.L1st
102	dadd $15,8
103	.set	reorder
104
105	daddu	$10,$16,$11
106	sltu	$1,$10,$11
107	daddu	$11,$17,$1
108
109	daddu	$24,$18,$25
110	sltu	$2,$24,$25
111	daddu	$25,$19,$2
112	daddu	$24,$10
113	sltu	$1,$24,$10
114	daddu	$25,$1
115
116	sd	$24,($15)
117
118	daddu	$25,$11
119	sltu	$1,$25,$11
120	sd	$25,8($15)
121	sd	$1,2*8($15)
122
123	li	$21,8
124.align	4
125.Louter:
126	dadd $13,$6,$21
127	ld	$13,($13)
128	ld	$12,($5)
129	ld	$16,8($5)
130	ld	$20,($29)
131
132	dmultu	$12,$13
133	ld	$14,($7)
134	ld	$18,8($7)
135	mflo	$10
136	mfhi	$11
137	daddu	$10,$20
138	dmultu	$10,$8
139	sltu	$1,$10,$20
140	daddu	$11,$1
141	mflo	$23
142
143	dmultu	$16,$13
144	mflo	$16
145	mfhi	$17
146
147	dmultu	$14,$23
148	mflo	$24
149	mfhi	$25
150
151	dmultu	$18,$23
152	daddu	$24,$10
153	sltu	$1,$24,$10
154	daddu	$25,$1
155	mflo	$18
156	mfhi	$19
157
158	move	$15,$29
159	li	$22,2*8
160	ld	$20,8($15)
161.align	4
162.Linner:
163	.set	noreorder
164	dadd $12,$5,$22
165	dadd $14,$7,$22
166	ld	$12,($12)
167	ld	$14,($14)
168
169	dmultu	$12,$13
170	daddu	$10,$16,$11
171	daddu	$24,$18,$25
172	sltu	$1,$10,$11
173	sltu	$2,$24,$25
174	daddu	$11,$17,$1
175	daddu	$25,$19,$2
176	mflo	$16
177	mfhi	$17
178
179	daddu	$10,$20
180	addu	$22,8
181	dmultu	$14,$23
182	sltu	$1,$10,$20
183	daddu	$24,$10
184	daddu	$11,$1
185	sltu	$2,$24,$10
186	ld	$20,2*8($15)
187	daddu	$25,$2
188	sltu	$1,$22,$9
189	mflo	$18
190	mfhi	$19
191	sd	$24,($15)
192	bnez	$1,.Linner
193	dadd $15,8
194	.set	reorder
195
196	daddu	$10,$16,$11
197	sltu	$1,$10,$11
198	daddu	$11,$17,$1
199	daddu	$10,$20
200	sltu	$2,$10,$20
201	daddu	$11,$2
202
203	ld	$20,2*8($15)
204	daddu	$24,$18,$25
205	sltu	$1,$24,$25
206	daddu	$25,$19,$1
207	daddu	$24,$10
208	sltu	$2,$24,$10
209	daddu	$25,$2
210	sd	$24,($15)
211
212	daddu	$24,$25,$11
213	sltu	$25,$24,$11
214	daddu	$24,$20
215	sltu	$1,$24,$20
216	daddu	$25,$1
217	sd	$24,8($15)
218	sd	$25,2*8($15)
219
220	addu	$21,8
221	sltu	$2,$21,$9
222	bnez	$2,.Louter
223
224	.set	noreorder
225	dadd $20,$29,$9	# &tp[num]
226	move	$15,$29
227	move	$5,$29
228	li	$11,0		# clear borrow bit
229
230.align	4
231.Lsub:	ld	$10,($15)
232	ld	$24,($7)
233	dadd $15,8
234	dadd $7,8
235	dsubu	$24,$10,$24	# tp[i]-np[i]
236	sgtu	$1,$24,$10
237	dsubu	$10,$24,$11
238	sgtu	$11,$10,$24
239	sd	$10,($4)
240	or	$11,$1
241	sltu	$1,$15,$20
242	bnez	$1,.Lsub
243	dadd $4,8
244
245	dsubu	$11,$25,$11	# handle upmost overflow bit
246	move	$15,$29
247	dsub $4,$9	# restore rp
248	not	$25,$11
249
250	and	$5,$11,$29
251	and	$6,$25,$4
252	or	$5,$5,$6	# ap=borrow?tp:rp
253
254.align	4
255.Lcopy:	ld	$12,($5)
256	dadd $5,8
257	sd	$0,($15)
258	dadd $15,8
259	sltu	$1,$15,$20
260	sd	$12,($4)
261	bnez	$1,.Lcopy
262	dadd $4,8
263
264	li	$4,1
265	li	$2,1
266
267	.set	noreorder
268	move	$29,$30
269	ld	$30,(14-1)*8($29)
270	ld	$23,(14-2)*8($29)
271	ld	$22,(14-3)*8($29)
272	ld	$21,(14-4)*8($29)
273	ld	$20,(14-5)*8($29)
274	ld	$19,(14-6)*8($29)
275	ld	$18,(14-7)*8($29)
276	ld	$17,(14-8)*8($29)
277	ld	$16,(14-9)*8($29)
278	jr	$31
279	dadd $29,14*8
280.end	bn_mul_mont_internal
281.rdata
282.asciiz	"Montgomery Multiplication for MIPS, CRYPTOGAMS by <appro@openssl.org>"
283