xref: /netbsd-src/crypto/external/bsd/openssl/lib/libcrypto/arch/powerpc/ppc-mont.S (revision e0ea3921ea68e51b93ffc215f08ae1647c8e1796)
1.machine	"any"
2.text
3
4.globl	bn_mul_mont_int
5.type	bn_mul_mont_int,@function
6.align	5
7bn_mul_mont_int:
8	mr	9,3
9	li	3,0
10	cmpwi	8,32
11	bgelr
12	slwi	8,8,2
13	li	12,-4096
14	addi	3,8,256
15	subf	3,3,1
16	and	3,3,12
17	subf	3,1,3
18	mr	12,1
19	srwi	8,8,2
20	stwux	1,1,3
21
22	stw	20,-48(12)
23	stw	21,-44(12)
24	stw	22,-40(12)
25	stw	23,-36(12)
26	stw	24,-32(12)
27	stw	25,-28(12)
28	stw	26,-24(12)
29	stw	27,-20(12)
30	stw	28,-16(12)
31	stw	29,-12(12)
32	stw	30,-8(12)
33	stw	31,-4(12)
34
35	lwz	7,0(7)
36	addi	8,8,-2
37
38	lwz	23,0(5)
39	lwz	10,0(4)
40	addi	22,1,32
41	mullw	25,10,23
42	mulhwu	26,10,23
43
44	lwz	10,4(4)
45	lwz	11,0(6)
46
47	mullw	24,25,7
48
49	mullw	29,10,23
50	mulhwu	30,10,23
51
52	mullw	27,11,24
53	mulhwu	28,11,24
54	lwz	11,4(6)
55	addc	27,27,25
56	addze	28,28
57
58	mullw	31,11,24
59	mulhwu	0,11,24
60
61	mtctr	8
62	li	21,8
63.align	4
64.L1st:
65	lwzx	10,4,21
66	addc	25,29,26
67	lwzx	11,6,21
68	addze	26,30
69	mullw	29,10,23
70	addc	27,31,28
71	mulhwu	30,10,23
72	addze	28,0
73	mullw	31,11,24
74	addc	27,27,25
75	mulhwu	0,11,24
76	addze	28,28
77	stw	27,0(22)
78
79	addi	21,21,4
80	addi	22,22,4
81	bc	16,0,.L1st
82
83	addc	25,29,26
84	addze	26,30
85
86	addc	27,31,28
87	addze	28,0
88	addc	27,27,25
89	addze	28,28
90	stw	27,0(22)
91
92	li	3,0
93	addc	28,28,26
94	addze	3,3
95	stw	28,4(22)
96
97	li	20,4
98.align	4
99.Louter:
100	lwzx	23,5,20
101	lwz	10,0(4)
102	addi	22,1,32
103	lwz	12,32(1)
104	mullw	25,10,23
105	mulhwu	26,10,23
106	lwz	10,4(4)
107	lwz	11,0(6)
108	addc	25,25,12
109	mullw	29,10,23
110	addze	26,26
111	mullw	24,25,7
112	mulhwu	30,10,23
113	mullw	27,11,24
114	mulhwu	28,11,24
115	lwz	11,4(6)
116	addc	27,27,25
117	mullw	31,11,24
118	addze	28,28
119	mulhwu	0,11,24
120
121	mtctr	8
122	li	21,8
123.align	4
124.Linner:
125	lwzx	10,4,21
126	addc	25,29,26
127	lwz	12,4(22)
128	addze	26,30
129	lwzx	11,6,21
130	addc	27,31,28
131	mullw	29,10,23
132	addze	28,0
133	mulhwu	30,10,23
134	addc	25,25,12
135	mullw	31,11,24
136	addze	26,26
137	mulhwu	0,11,24
138	addc	27,27,25
139	addi	21,21,4
140	addze	28,28
141	stw	27,0(22)
142	addi	22,22,4
143	bc	16,0,.Linner
144
145	lwz	12,4(22)
146	addc	25,29,26
147	addze	26,30
148	addc	25,25,12
149	addze	26,26
150
151	addc	27,31,28
152	addze	28,0
153	addc	27,27,25
154	addze	28,28
155	stw	27,0(22)
156
157	addic	3,3,-1
158	li	3,0
159	adde	28,28,26
160	addze	3,3
161	stw	28,4(22)
162
163	slwi	12,8,2
164	.long	0x7c146040
165	addi	20,20,4
166	ble	.Louter
167
168	addi	8,8,2
169	subfc	21,21,21
170	addi	22,1,32
171	mtctr	8
172
173.align	4
174.Lsub:	lwzx	12,22,21
175	lwzx	11,6,21
176	subfe	10,11,12
177	stwx	10,9,21
178	addi	21,21,4
179	bc	16,0,.Lsub
180
181	li	21,0
182	mtctr	8
183	subfe	3,21,3
184
185.align	4
186.Lcopy:
187	lwzx	12,22,21
188	lwzx	10,9,21
189	and	12,12,3
190	andc	10,10,3
191	stwx	21,22,21
192	or	10,10,12
193	stwx	10,9,21
194	addi	21,21,4
195	bc	16,0,.Lcopy
196
197	lwz	12,0(1)
198	li	3,1
199	lwz	20,-48(12)
200	lwz	21,-44(12)
201	lwz	22,-40(12)
202	lwz	23,-36(12)
203	lwz	24,-32(12)
204	lwz	25,-28(12)
205	lwz	26,-24(12)
206	lwz	27,-20(12)
207	lwz	28,-16(12)
208	lwz	29,-12(12)
209	lwz	30,-8(12)
210	lwz	31,-4(12)
211	mr	1,12
212	blr
213.long	0
214.byte	0,12,4,0,0x80,12,6,0
215.long	0
216
217.globl	bn_mul4x_mont_int
218.type	bn_mul4x_mont_int,@function
219.align	5
220bn_mul4x_mont_int:
221	andi.	0,8,7
222	bne	.Lmul4x_do
223	.long	0x7c042840
224	bne	.Lmul4x_do
225	b	.Lsqr8x_do
226.Lmul4x_do:
227	slwi	8,8,2
228	mr	9,1
229	li	10,-32*4
230	sub	10,10,8
231	stwux	1,1,10
232
233	stw	14,-4*18(9)
234	stw	15,-4*17(9)
235	stw	16,-4*16(9)
236	stw	17,-4*15(9)
237	stw	18,-4*14(9)
238	stw	19,-4*13(9)
239	stw	20,-4*12(9)
240	stw	21,-4*11(9)
241	stw	22,-4*10(9)
242	stw	23,-4*9(9)
243	stw	24,-4*8(9)
244	stw	25,-4*7(9)
245	stw	26,-4*6(9)
246	stw	27,-4*5(9)
247	stw	28,-4*4(9)
248	stw	29,-4*3(9)
249	stw	30,-4*2(9)
250	stw	31,-4*1(9)
251
252	subi	4,4,4
253	subi	6,6,4
254	subi	3,3,4
255	lwz	7,0(7)
256
257	add	14,5,8
258	add	30,4,8
259	subi	14,14,4*4
260
261	lwz	27,4*0(5)
262	li	22,0
263	lwz	9,4*1(4)
264	li	23,0
265	lwz	10,4*2(4)
266	li	24,0
267	lwz	11,4*3(4)
268	li	25,0
269	lwzu	12,4*4(4)
270	lwz	18,4*1(6)
271	lwz	19,4*2(6)
272	lwz	20,4*3(6)
273	lwzu	21,4*4(6)
274
275	stw	3,4*6(1)
276	stw	14,4*7(1)
277	li	3,0
278	addic	29,1,4*7
279	li	31,0
280	li	0,0
281	b	.Loop_mul4x_1st_reduction
282
283.align	5
284.Loop_mul4x_1st_reduction:
285	mullw	14,9,27
286	addze	3,3
287	mullw	15,10,27
288	addi	31,31,4
289	mullw	16,11,27
290	andi.	31,31,4*4-1
291	mullw	17,12,27
292	addc	22,22,14
293	mulhwu	14,9,27
294	adde	23,23,15
295	mulhwu	15,10,27
296	adde	24,24,16
297	mullw	28,22,7
298	adde	25,25,17
299	mulhwu	16,11,27
300	addze	26,0
301	mulhwu	17,12,27
302	lwzx	27,5,31
303	addc	23,23,14
304
305	stwu	28,4(29)
306	adde	24,24,15
307	mullw	15,19,28
308	adde	25,25,16
309	mullw	16,20,28
310	adde	26,26,17
311	mullw	17,21,28
312
313
314
315
316
317
318
319
320
321
322	addic	22,22,-1
323	mulhwu	14,18,28
324	adde	22,23,15
325	mulhwu	15,19,28
326	adde	23,24,16
327	mulhwu	16,20,28
328	adde	24,25,17
329	mulhwu	17,21,28
330	adde	25,26,3
331	addze	3,0
332	addc	22,22,14
333	adde	23,23,15
334	adde	24,24,16
335	adde	25,25,17
336
337	bne	.Loop_mul4x_1st_reduction
338
339	.long	0x7c1e2040
340	beq	.Lmul4x4_post_condition
341
342	lwz	9,4*1(4)
343	lwz	10,4*2(4)
344	lwz	11,4*3(4)
345	lwzu	12,4*4(4)
346	lwz	28,4*8(1)
347	lwz	18,4*1(6)
348	lwz	19,4*2(6)
349	lwz	20,4*3(6)
350	lwzu	21,4*4(6)
351	b	.Loop_mul4x_1st_tail
352
353.align	5
354.Loop_mul4x_1st_tail:
355	mullw	14,9,27
356	addze	3,3
357	mullw	15,10,27
358	addi	31,31,4
359	mullw	16,11,27
360	andi.	31,31,4*4-1
361	mullw	17,12,27
362	addc	22,22,14
363	mulhwu	14,9,27
364	adde	23,23,15
365	mulhwu	15,10,27
366	adde	24,24,16
367	mulhwu	16,11,27
368	adde	25,25,17
369	mulhwu	17,12,27
370	addze	26,0
371	lwzx	27,5,31
372	addc	23,23,14
373	mullw	14,18,28
374	adde	24,24,15
375	mullw	15,19,28
376	adde	25,25,16
377	mullw	16,20,28
378	adde	26,26,17
379	mullw	17,21,28
380	addc	22,22,14
381	mulhwu	14,18,28
382	adde	23,23,15
383	mulhwu	15,19,28
384	adde	24,24,16
385	mulhwu	16,20,28
386	adde	25,25,17
387	adde	26,26,3
388	mulhwu	17,21,28
389	addze	3,0
390	addi	28,1,4*8
391	lwzx	28,28,31
392	stwu	22,4(29)
393	addc	22,23,14
394	adde	23,24,15
395	adde	24,25,16
396	adde	25,26,17
397
398	bne	.Loop_mul4x_1st_tail
399
400	sub	15,30,8
401	.long	0x7c1e2040
402	beq	.Lmul4x_proceed
403
404	lwz	9,4*1(4)
405	lwz	10,4*2(4)
406	lwz	11,4*3(4)
407	lwzu	12,4*4(4)
408	lwz	18,4*1(6)
409	lwz	19,4*2(6)
410	lwz	20,4*3(6)
411	lwzu	21,4*4(6)
412	b	.Loop_mul4x_1st_tail
413
414.align	5
415.Lmul4x_proceed:
416	lwzu	27,4*4(5)
417	addze	3,3
418	lwz	9,4*1(15)
419	lwz	10,4*2(15)
420	lwz	11,4*3(15)
421	lwz	12,4*4(15)
422	addi	4,15,4*4
423	sub	6,6,8
424
425	stw	22,4*1(29)
426	stw	23,4*2(29)
427	stw	24,4*3(29)
428	stw	25,4*4(29)
429	stw	3,4*5(29)
430	lwz	22,4*12(1)
431	lwz	23,4*13(1)
432	lwz	24,4*14(1)
433	lwz	25,4*15(1)
434
435	lwz	18,4*1(6)
436	lwz	19,4*2(6)
437	lwz	20,4*3(6)
438	lwzu	21,4*4(6)
439	addic	29,1,4*7
440	li	3,0
441	b	.Loop_mul4x_reduction
442
443.align	5
444.Loop_mul4x_reduction:
445	mullw	14,9,27
446	addze	3,3
447	mullw	15,10,27
448	addi	31,31,4
449	mullw	16,11,27
450	andi.	31,31,4*4-1
451	mullw	17,12,27
452	addc	22,22,14
453	mulhwu	14,9,27
454	adde	23,23,15
455	mulhwu	15,10,27
456	adde	24,24,16
457	mullw	28,22,7
458	adde	25,25,17
459	mulhwu	16,11,27
460	addze	26,0
461	mulhwu	17,12,27
462	lwzx	27,5,31
463	addc	23,23,14
464
465	stwu	28,4(29)
466	adde	24,24,15
467	mullw	15,19,28
468	adde	25,25,16
469	mullw	16,20,28
470	adde	26,26,17
471	mullw	17,21,28
472
473	addic	22,22,-1
474	mulhwu	14,18,28
475	adde	22,23,15
476	mulhwu	15,19,28
477	adde	23,24,16
478	mulhwu	16,20,28
479	adde	24,25,17
480	mulhwu	17,21,28
481	adde	25,26,3
482	addze	3,0
483	addc	22,22,14
484	adde	23,23,15
485	adde	24,24,16
486	adde	25,25,17
487
488	bne	.Loop_mul4x_reduction
489
490	lwz	14,4*5(29)
491	addze	3,3
492	lwz	15,4*6(29)
493	lwz	16,4*7(29)
494	lwz	17,4*8(29)
495	lwz	9,4*1(4)
496	lwz	10,4*2(4)
497	lwz	11,4*3(4)
498	lwzu	12,4*4(4)
499	addc	22,22,14
500	adde	23,23,15
501	adde	24,24,16
502	adde	25,25,17
503
504
505	lwz	28,4*8(1)
506	lwz	18,4*1(6)
507	lwz	19,4*2(6)
508	lwz	20,4*3(6)
509	lwzu	21,4*4(6)
510	b	.Loop_mul4x_tail
511
512.align	5
513.Loop_mul4x_tail:
514	mullw	14,9,27
515	addze	3,3
516	mullw	15,10,27
517	addi	31,31,4
518	mullw	16,11,27
519	andi.	31,31,4*4-1
520	mullw	17,12,27
521	addc	22,22,14
522	mulhwu	14,9,27
523	adde	23,23,15
524	mulhwu	15,10,27
525	adde	24,24,16
526	mulhwu	16,11,27
527	adde	25,25,17
528	mulhwu	17,12,27
529	addze	26,0
530	lwzx	27,5,31
531	addc	23,23,14
532	mullw	14,18,28
533	adde	24,24,15
534	mullw	15,19,28
535	adde	25,25,16
536	mullw	16,20,28
537	adde	26,26,17
538	mullw	17,21,28
539	addc	22,22,14
540	mulhwu	14,18,28
541	adde	23,23,15
542	mulhwu	15,19,28
543	adde	24,24,16
544	mulhwu	16,20,28
545	adde	25,25,17
546	mulhwu	17,21,28
547	adde	26,26,3
548	addi	28,1,4*8
549	lwzx	28,28,31
550	addze	3,0
551	stwu	22,4(29)
552	addc	22,23,14
553	adde	23,24,15
554	adde	24,25,16
555	adde	25,26,17
556
557	bne	.Loop_mul4x_tail
558
559	lwz	14,4*5(29)
560	sub	15,6,8
561	addze	3,3
562	.long	0x7c1e2040
563	beq	.Loop_mul4x_break
564
565	lwz	15,4*6(29)
566	lwz	16,4*7(29)
567	lwz	17,4*8(29)
568	lwz	9,4*1(4)
569	lwz	10,4*2(4)
570	lwz	11,4*3(4)
571	lwzu	12,4*4(4)
572	addc	22,22,14
573	adde	23,23,15
574	adde	24,24,16
575	adde	25,25,17
576
577
578	lwz	18,4*1(6)
579	lwz	19,4*2(6)
580	lwz	20,4*3(6)
581	lwzu	21,4*4(6)
582	b	.Loop_mul4x_tail
583
584.align	5
585.Loop_mul4x_break:
586	lwz	16,4*6(1)
587	lwz	17,4*7(1)
588	addc	9,22,14
589	lwz	22,4*12(1)
590	addze	10,23
591	lwz	23,4*13(1)
592	addze	11,24
593	lwz	24,4*14(1)
594	addze	12,25
595	lwz	25,4*15(1)
596	addze	3,3
597	stw	9,4*1(29)
598	sub	4,30,8
599	stw	10,4*2(29)
600	stw	11,4*3(29)
601	stw	12,4*4(29)
602	stw	3,4*5(29)
603
604	lwz	18,4*1(15)
605	lwz	19,4*2(15)
606	lwz	20,4*3(15)
607	lwz	21,4*4(15)
608	addi	6,15,4*4
609	.long	0x7c058840
610	beq	.Lmul4x_post
611
612	lwzu	27,4*4(5)
613	lwz	9,4*1(4)
614	lwz	10,4*2(4)
615	lwz	11,4*3(4)
616	lwzu	12,4*4(4)
617	li	3,0
618	addic	29,1,4*7
619	b	.Loop_mul4x_reduction
620
621.align	5
622.Lmul4x_post:
623
624
625
626
627	srwi	31,8,4
628	mr	5,16
629	subi	31,31,1
630	mr	30,16
631	subfc	14,18,22
632	addi	29,1,4*15
633	subfe	15,19,23
634
635	mtctr	31
636.Lmul4x_sub:
637	lwz	18,4*1(6)
638	lwz	22,4*1(29)
639	subfe	16,20,24
640	lwz	19,4*2(6)
641	lwz	23,4*2(29)
642	subfe	17,21,25
643	lwz	20,4*3(6)
644	lwz	24,4*3(29)
645	lwzu	21,4*4(6)
646	lwzu	25,4*4(29)
647	stw	14,4*1(5)
648	stw	15,4*2(5)
649	subfe	14,18,22
650	stw	16,4*3(5)
651	stwu	17,4*4(5)
652	subfe	15,19,23
653	bc	16,0,.Lmul4x_sub
654
655	lwz	9,4*1(30)
656	stw	14,4*1(5)
657	lwz	14,4*12(1)
658	subfe	16,20,24
659	lwz	10,4*2(30)
660	stw	15,4*2(5)
661	lwz	15,4*13(1)
662	subfe	17,21,25
663	subfe	3,0,3
664	addi	29,1,4*12
665	lwz	11,4*3(30)
666	stw	16,4*3(5)
667	lwz	16,4*14(1)
668	lwz	12,4*4(30)
669	stw	17,4*4(5)
670	lwz	17,4*15(1)
671
672	mtctr	31
673.Lmul4x_cond_copy:
674	and	14,14,3
675	andc	9,9,3
676	stw	0,4*0(29)
677	and	15,15,3
678	andc	10,10,3
679	stw	0,4*1(29)
680	and	16,16,3
681	andc	11,11,3
682	stw	0,4*2(29)
683	and	17,17,3
684	andc	12,12,3
685	stw	0,4*3(29)
686	or	22,14,9
687	lwz	9,4*5(30)
688	lwz	14,4*4(29)
689	or	23,15,10
690	lwz	10,4*6(30)
691	lwz	15,4*5(29)
692	or	24,16,11
693	lwz	11,4*7(30)
694	lwz	16,4*6(29)
695	or	25,17,12
696	lwz	12,4*8(30)
697	lwz	17,4*7(29)
698	addi	29,29,4*4
699	stw	22,4*1(30)
700	stw	23,4*2(30)
701	stw	24,4*3(30)
702	stwu	25,4*4(30)
703	bc	16,0,.Lmul4x_cond_copy
704
705	lwz	5,0(1)
706	and	14,14,3
707	andc	9,9,3
708	stw	0,4*0(29)
709	and	15,15,3
710	andc	10,10,3
711	stw	0,4*1(29)
712	and	16,16,3
713	andc	11,11,3
714	stw	0,4*2(29)
715	and	17,17,3
716	andc	12,12,3
717	stw	0,4*3(29)
718	or	22,14,9
719	or	23,15,10
720	stw	0,4*4(29)
721	or	24,16,11
722	or	25,17,12
723	stw	22,4*1(30)
724	stw	23,4*2(30)
725	stw	24,4*3(30)
726	stw	25,4*4(30)
727
728	b	.Lmul4x_done
729
730.align	4
731.Lmul4x4_post_condition:
732	lwz	4,4*6(1)
733	lwz	5,0(1)
734	addze	3,3
735
736	subfc	9,18,22
737	subfe	10,19,23
738	subfe	11,20,24
739	subfe	12,21,25
740	subfe	3,0,3
741
742	and	18,18,3
743	and	19,19,3
744	addc	9,9,18
745	and	20,20,3
746	adde	10,10,19
747	and	21,21,3
748	adde	11,11,20
749	adde	12,12,21
750
751	stw	9,4*1(4)
752	stw	10,4*2(4)
753	stw	11,4*3(4)
754	stw	12,4*4(4)
755
756.Lmul4x_done:
757	stw	0,4*8(1)
758	stw	0,4*9(1)
759	stw	0,4*10(1)
760	stw	0,4*11(1)
761	li	3,1
762	lwz	14,-4*18(5)
763	lwz	15,-4*17(5)
764	lwz	16,-4*16(5)
765	lwz	17,-4*15(5)
766	lwz	18,-4*14(5)
767	lwz	19,-4*13(5)
768	lwz	20,-4*12(5)
769	lwz	21,-4*11(5)
770	lwz	22,-4*10(5)
771	lwz	23,-4*9(5)
772	lwz	24,-4*8(5)
773	lwz	25,-4*7(5)
774	lwz	26,-4*6(5)
775	lwz	27,-4*5(5)
776	lwz	28,-4*4(5)
777	lwz	29,-4*3(5)
778	lwz	30,-4*2(5)
779	lwz	31,-4*1(5)
780	mr	1,5
781	blr
782.long	0
783.byte	0,12,4,0x20,0x80,18,6,0
784.long	0
785
786.align	5
787__bn_sqr8x_mont:
788.Lsqr8x_do:
789	mr	9,1
790	slwi	10,8,3
791	li	11,-32*4
792	sub	10,11,10
793	slwi	8,8,2
794	stwux	1,1,10
795
796	stw	14,-4*18(9)
797	stw	15,-4*17(9)
798	stw	16,-4*16(9)
799	stw	17,-4*15(9)
800	stw	18,-4*14(9)
801	stw	19,-4*13(9)
802	stw	20,-4*12(9)
803	stw	21,-4*11(9)
804	stw	22,-4*10(9)
805	stw	23,-4*9(9)
806	stw	24,-4*8(9)
807	stw	25,-4*7(9)
808	stw	26,-4*6(9)
809	stw	27,-4*5(9)
810	stw	28,-4*4(9)
811	stw	29,-4*3(9)
812	stw	30,-4*2(9)
813	stw	31,-4*1(9)
814
815	subi	4,4,4
816	subi	18,6,4
817	subi	3,3,4
818	lwz	7,0(7)
819	li	0,0
820
821	add	6,4,8
822	lwz	9,4*1(4)
823
824	lwz	10,4*2(4)
825	li	23,0
826	lwz	11,4*3(4)
827	li	24,0
828	lwz	12,4*4(4)
829	li	25,0
830	lwz	14,4*5(4)
831	li	26,0
832	lwz	15,4*6(4)
833	li	27,0
834	lwz	16,4*7(4)
835	li	28,0
836	lwzu	17,4*8(4)
837	li	29,0
838
839	addi	5,1,4*11
840	subic.	30,8,4*8
841	b	.Lsqr8x_zero_start
842
843.align	5
844.Lsqr8x_zero:
845	subic.	30,30,4*8
846	stw	0,4*1(5)
847	stw	0,4*2(5)
848	stw	0,4*3(5)
849	stw	0,4*4(5)
850	stw	0,4*5(5)
851	stw	0,4*6(5)
852	stw	0,4*7(5)
853	stw	0,4*8(5)
854.Lsqr8x_zero_start:
855	stw	0,4*9(5)
856	stw	0,4*10(5)
857	stw	0,4*11(5)
858	stw	0,4*12(5)
859	stw	0,4*13(5)
860	stw	0,4*14(5)
861	stw	0,4*15(5)
862	stwu	0,4*16(5)
863	bne	.Lsqr8x_zero
864
865	stw	3,4*6(1)
866	stw	18,4*7(1)
867	stw	7,4*8(1)
868	stw	5,4*9(1)
869	stw	0,4*10(1)
870	addi	5,1,4*11
871
872
873.align	5
874.Lsqr8x_outer_loop:
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904	mullw	18,10,9
905	mullw	19,11,9
906	mullw	20,12,9
907	mullw	21,14,9
908	addc	23,23,18
909	mullw	18,15,9
910	adde	24,24,19
911	mullw	19,16,9
912	adde	25,25,20
913	mullw	20,17,9
914	adde	26,26,21
915	mulhwu	21,10,9
916	adde	27,27,18
917	mulhwu	18,11,9
918	adde	28,28,19
919	mulhwu	19,12,9
920	adde	29,29,20
921	mulhwu	20,14,9
922	stw	22,4*1(5)
923	addze	22,0
924	stw	23,4*2(5)
925	addc	24,24,21
926	mulhwu	21,15,9
927	adde	25,25,18
928	mulhwu	18,16,9
929	adde	26,26,19
930	mulhwu	19,17,9
931	adde	27,27,20
932	mullw	20,11,10
933	adde	28,28,21
934	mullw	21,12,10
935	adde	29,29,18
936	mullw	18,14,10
937	adde	22,22,19
938
939	mullw	19,15,10
940	addc	25,25,20
941	mullw	20,16,10
942	adde	26,26,21
943	mullw	21,17,10
944	adde	27,27,18
945	mulhwu	18,11,10
946	adde	28,28,19
947	mulhwu	19,12,10
948	adde	29,29,20
949	mulhwu	20,14,10
950	adde	22,22,21
951	mulhwu	21,15,10
952	stw	24,4*3(5)
953	addze	23,0
954	stw	25,4*4(5)
955	addc	26,26,18
956	mulhwu	18,16,10
957	adde	27,27,19
958	mulhwu	19,17,10
959	adde	28,28,20
960	mullw	20,12,11
961	adde	29,29,21
962	mullw	21,14,11
963	adde	22,22,18
964	mullw	18,15,11
965	adde	23,23,19
966
967	mullw	19,16,11
968	addc	27,27,20
969	mullw	20,17,11
970	adde	28,28,21
971	mulhwu	21,12,11
972	adde	29,29,18
973	mulhwu	18,14,11
974	adde	22,22,19
975	mulhwu	19,15,11
976	adde	23,23,20
977	mulhwu	20,16,11
978	stw	26,4*5(5)
979	addze	24,0
980	stw	27,4*6(5)
981	addc	28,28,21
982	mulhwu	21,17,11
983	adde	29,29,18
984	mullw	18,14,12
985	adde	22,22,19
986	mullw	19,15,12
987	adde	23,23,20
988	mullw	20,16,12
989	adde	24,24,21
990
991	mullw	21,17,12
992	addc	29,29,18
993	mulhwu	18,14,12
994	adde	22,22,19
995	mulhwu	19,15,12
996	adde	23,23,20
997	mulhwu	20,16,12
998	adde	24,24,21
999	mulhwu	21,17,12
1000	stw	28,4*7(5)
1001	addze	25,0
1002	stwu	29,4*8(5)
1003	addc	22,22,18
1004	mullw	18,15,14
1005	adde	23,23,19
1006	mullw	19,16,14
1007	adde	24,24,20
1008	mullw	20,17,14
1009	adde	25,25,21
1010
1011	mulhwu	21,15,14
1012	addc	23,23,18
1013	mulhwu	18,16,14
1014	adde	24,24,19
1015	mulhwu	19,17,14
1016	adde	25,25,20
1017	mullw	20,16,15
1018	addze	26,0
1019	addc	24,24,21
1020	mullw	21,17,15
1021	adde	25,25,18
1022	mulhwu	18,16,15
1023	adde	26,26,19
1024
1025	mulhwu	19,17,15
1026	addc	25,25,20
1027	mullw	20,17,16
1028	adde	26,26,21
1029	mulhwu	21,17,16
1030	addze	27,0
1031	addc	26,26,18
1032	.long	0x7c062040
1033	adde	27,27,19
1034
1035	addc	27,27,20
1036	sub	18,6,8
1037	addze	28,0
1038	add	28,28,21
1039
1040	beq	.Lsqr8x_outer_break
1041
1042	mr	7,9
1043	lwz	9,4*1(5)
1044	lwz	10,4*2(5)
1045	lwz	11,4*3(5)
1046	lwz	12,4*4(5)
1047	lwz	14,4*5(5)
1048	lwz	15,4*6(5)
1049	lwz	16,4*7(5)
1050	lwz	17,4*8(5)
1051	addc	22,22,9
1052	lwz	9,4*1(4)
1053	adde	23,23,10
1054	lwz	10,4*2(4)
1055	adde	24,24,11
1056	lwz	11,4*3(4)
1057	adde	25,25,12
1058	lwz	12,4*4(4)
1059	adde	26,26,14
1060	lwz	14,4*5(4)
1061	adde	27,27,15
1062	lwz	15,4*6(4)
1063	adde	28,28,16
1064	lwz	16,4*7(4)
1065	subi	3,4,4*7
1066	addze	29,17
1067	lwzu	17,4*8(4)
1068
1069	li	30,0
1070	b	.Lsqr8x_mul
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094.align	5
1095.Lsqr8x_mul:
1096	mullw	18,9,7
1097	addze	31,0
1098	mullw	19,10,7
1099	addi	30,30,4
1100	mullw	20,11,7
1101	andi.	30,30,4*8-1
1102	mullw	21,12,7
1103	addc	22,22,18
1104	mullw	18,14,7
1105	adde	23,23,19
1106	mullw	19,15,7
1107	adde	24,24,20
1108	mullw	20,16,7
1109	adde	25,25,21
1110	mullw	21,17,7
1111	adde	26,26,18
1112	mulhwu	18,9,7
1113	adde	27,27,19
1114	mulhwu	19,10,7
1115	adde	28,28,20
1116	mulhwu	20,11,7
1117	adde	29,29,21
1118	mulhwu	21,12,7
1119	addze	31,31
1120	stwu	22,4(5)
1121	addc	22,23,18
1122	mulhwu	18,14,7
1123	adde	23,24,19
1124	mulhwu	19,15,7
1125	adde	24,25,20
1126	mulhwu	20,16,7
1127	adde	25,26,21
1128	mulhwu	21,17,7
1129	lwzx	7,3,30
1130	adde	26,27,18
1131	adde	27,28,19
1132	adde	28,29,20
1133	adde	29,31,21
1134
1135	bne	.Lsqr8x_mul
1136
1137
1138	.long	0x7c043040
1139	beq	.Lsqr8x_break
1140
1141	lwz	9,4*1(5)
1142	lwz	10,4*2(5)
1143	lwz	11,4*3(5)
1144	lwz	12,4*4(5)
1145	lwz	14,4*5(5)
1146	lwz	15,4*6(5)
1147	lwz	16,4*7(5)
1148	lwz	17,4*8(5)
1149	addc	22,22,9
1150	lwz	9,4*1(4)
1151	adde	23,23,10
1152	lwz	10,4*2(4)
1153	adde	24,24,11
1154	lwz	11,4*3(4)
1155	adde	25,25,12
1156	lwz	12,4*4(4)
1157	adde	26,26,14
1158	lwz	14,4*5(4)
1159	adde	27,27,15
1160	lwz	15,4*6(4)
1161	adde	28,28,16
1162	lwz	16,4*7(4)
1163	adde	29,29,17
1164	lwzu	17,4*8(4)
1165
1166	b	.Lsqr8x_mul
1167
1168.align	5
1169.Lsqr8x_break:
1170	lwz	9,4*8(3)
1171	addi	4,3,4*15
1172	lwz	10,4*9(3)
1173	sub.	18,6,4
1174	lwz	11,4*10(3)
1175	sub	19,5,18
1176	lwz	12,4*11(3)
1177	lwz	14,4*12(3)
1178	lwz	15,4*13(3)
1179	lwz	16,4*14(3)
1180	lwz	17,4*15(3)
1181	beq	.Lsqr8x_outer_loop
1182
1183	stw	22,4*1(5)
1184	lwz	22,4*1(19)
1185	stw	23,4*2(5)
1186	lwz	23,4*2(19)
1187	stw	24,4*3(5)
1188	lwz	24,4*3(19)
1189	stw	25,4*4(5)
1190	lwz	25,4*4(19)
1191	stw	26,4*5(5)
1192	lwz	26,4*5(19)
1193	stw	27,4*6(5)
1194	lwz	27,4*6(19)
1195	stw	28,4*7(5)
1196	lwz	28,4*7(19)
1197	stw	29,4*8(5)
1198	lwz	29,4*8(19)
1199	mr	5,19
1200	b	.Lsqr8x_outer_loop
1201
1202.align	5
1203.Lsqr8x_outer_break:
1204
1205
1206	lwz	10,4*1(18)
1207	lwz	12,4*2(18)
1208	lwz	15,4*3(18)
1209	lwz	17,4*4(18)
1210	addi	4,18,4*4
1211
1212	lwz	19,4*13(1)
1213	lwz	20,4*14(1)
1214	lwz	21,4*15(1)
1215	lwz	18,4*16(1)
1216
1217	stw	22,4*1(5)
1218	srwi	30,8,4
1219	stw	23,4*2(5)
1220	subi	30,30,1
1221	stw	24,4*3(5)
1222	stw	25,4*4(5)
1223	stw	26,4*5(5)
1224	stw	27,4*6(5)
1225	stw	28,4*7(5)
1226
1227	addi	5,1,4*11
1228	mullw	22,10,10
1229	mulhwu	10,10,10
1230	add	23,19,19
1231	srwi	19,19,32-1
1232	mullw	11,12,12
1233	mulhwu	12,12,12
1234	addc	23,23,10
1235	add	24,20,20
1236	srwi	20,20,32-1
1237	add	25,21,21
1238	srwi	21,21,32-1
1239	or	24,24,19
1240
1241	mtctr	30
1242.Lsqr4x_shift_n_add:
1243	mullw	14,15,15
1244	mulhwu	15,15,15
1245	lwz	19,4*6(5)
1246	lwz	10,4*1(4)
1247	adde	24,24,11
1248	add	26,18,18
1249	srwi	18,18,32-1
1250	or	25,25,20
1251	lwz	20,4*7(5)
1252	adde	25,25,12
1253	lwz	12,4*2(4)
1254	add	27,19,19
1255	srwi	19,19,32-1
1256	or	26,26,21
1257	lwz	21,4*8(5)
1258	mullw	16,17,17
1259	mulhwu	17,17,17
1260	adde	26,26,14
1261	add	28,20,20
1262	srwi	20,20,32-1
1263	or	27,27,18
1264	lwz	18,4*9(5)
1265	adde	27,27,15
1266	lwz	15,4*3(4)
1267	add	29,21,21
1268	srwi	21,21,32-1
1269	or	28,28,19
1270	lwz	19,4*10(5)
1271	mullw	9,10,10
1272	mulhwu	10,10,10
1273	adde	28,28,16
1274	stw	22,4*1(5)
1275	add	22,18,18
1276	srwi	18,18,32-1
1277	or	29,29,20
1278	lwz	20,4*11(5)
1279	adde	29,29,17
1280	lwzu	17,4*4(4)
1281	stw	23,4*2(5)
1282	add	23,19,19
1283	srwi	19,19,32-1
1284	or	22,22,21
1285	lwz	21,4*12(5)
1286	mullw	11,12,12
1287	mulhwu	12,12,12
1288	adde	22,22,9
1289	stw	24,4*3(5)
1290	add	24,20,20
1291	srwi	20,20,32-1
1292	or	23,23,18
1293	lwz	18,4*13(5)
1294	adde	23,23,10
1295	stw	25,4*4(5)
1296	stw	26,4*5(5)
1297	stw	27,4*6(5)
1298	stw	28,4*7(5)
1299	stwu	29,4*8(5)
1300	add	25,21,21
1301	srwi	21,21,32-1
1302	or	24,24,19
1303	bc	16,0,.Lsqr4x_shift_n_add
1304	lwz	4,4*7(1)
1305	lwz	7,4*8(1)
1306
1307	mullw	14,15,15
1308	mulhwu	15,15,15
1309	stw	22,4*1(5)
1310	lwz	22,4*12(1)
1311	lwz	19,4*6(5)
1312	adde	24,24,11
1313	add	26,18,18
1314	srwi	18,18,32-1
1315	or	25,25,20
1316	lwz	20,4*7(5)
1317	adde	25,25,12
1318	add	27,19,19
1319	srwi	19,19,32-1
1320	or	26,26,21
1321	mullw	16,17,17
1322	mulhwu	17,17,17
1323	adde	26,26,14
1324	add	28,20,20
1325	srwi	20,20,32-1
1326	or	27,27,18
1327	stw	23,4*2(5)
1328	lwz	23,4*13(1)
1329	adde	27,27,15
1330	or	28,28,19
1331	lwz	9,4*1(4)
1332	lwz	10,4*2(4)
1333	adde	28,28,16
1334	lwz	11,4*3(4)
1335	lwz	12,4*4(4)
1336	adde	29,17,20
1337	lwz	14,4*5(4)
1338	lwz	15,4*6(4)
1339
1340
1341
1342	mullw	31,7,22
1343	li	30,8
1344	lwz	16,4*7(4)
1345	add	6,4,8
1346	lwzu	17,4*8(4)
1347	stw	24,4*3(5)
1348	lwz	24,4*14(1)
1349	stw	25,4*4(5)
1350	lwz	25,4*15(1)
1351	stw	26,4*5(5)
1352	lwz	26,4*16(1)
1353	stw	27,4*6(5)
1354	lwz	27,4*17(1)
1355	stw	28,4*7(5)
1356	lwz	28,4*18(1)
1357	stw	29,4*8(5)
1358	lwz	29,4*19(1)
1359	addi	5,1,4*11
1360	mtctr	30
1361	b	.Lsqr8x_reduction
1362
1363.align	5
1364.Lsqr8x_reduction:
1365
1366	mullw	19,10,31
1367	mullw	20,11,31
1368	stwu	31,4(5)
1369	mullw	21,12,31
1370
1371	addic	22,22,-1
1372	mullw	18,14,31
1373	adde	22,23,19
1374	mullw	19,15,31
1375	adde	23,24,20
1376	mullw	20,16,31
1377	adde	24,25,21
1378	mullw	21,17,31
1379	adde	25,26,18
1380	mulhwu	18,9,31
1381	adde	26,27,19
1382	mulhwu	19,10,31
1383	adde	27,28,20
1384	mulhwu	20,11,31
1385	adde	28,29,21
1386	mulhwu	21,12,31
1387	addze	29,0
1388	addc	22,22,18
1389	mulhwu	18,14,31
1390	adde	23,23,19
1391	mulhwu	19,15,31
1392	adde	24,24,20
1393	mulhwu	20,16,31
1394	adde	25,25,21
1395	mulhwu	21,17,31
1396	mullw	31,7,22
1397	adde	26,26,18
1398	adde	27,27,19
1399	adde	28,28,20
1400	adde	29,29,21
1401	bc	16,0,.Lsqr8x_reduction
1402
1403	lwz	18,4*1(5)
1404	lwz	19,4*2(5)
1405	lwz	20,4*3(5)
1406	lwz	21,4*4(5)
1407	subi	3,5,4*7
1408	.long	0x7c062040
1409	addc	22,22,18
1410	lwz	18,4*5(5)
1411	adde	23,23,19
1412	lwz	19,4*6(5)
1413	adde	24,24,20
1414	lwz	20,4*7(5)
1415	adde	25,25,21
1416	lwz	21,4*8(5)
1417	adde	26,26,18
1418	adde	27,27,19
1419	adde	28,28,20
1420	adde	29,29,21
1421
1422	beq	.Lsqr8x8_post_condition
1423
1424	lwz	7,4*0(3)
1425	lwz	9,4*1(4)
1426	lwz	10,4*2(4)
1427	lwz	11,4*3(4)
1428	lwz	12,4*4(4)
1429	lwz	14,4*5(4)
1430	lwz	15,4*6(4)
1431	lwz	16,4*7(4)
1432	lwzu	17,4*8(4)
1433	li	30,0
1434
1435.align	5
1436.Lsqr8x_tail:
1437	mullw	18,9,7
1438	addze	31,0
1439	mullw	19,10,7
1440	addi	30,30,4
1441	mullw	20,11,7
1442	andi.	30,30,4*8-1
1443	mullw	21,12,7
1444	addc	22,22,18
1445	mullw	18,14,7
1446	adde	23,23,19
1447	mullw	19,15,7
1448	adde	24,24,20
1449	mullw	20,16,7
1450	adde	25,25,21
1451	mullw	21,17,7
1452	adde	26,26,18
1453	mulhwu	18,9,7
1454	adde	27,27,19
1455	mulhwu	19,10,7
1456	adde	28,28,20
1457	mulhwu	20,11,7
1458	adde	29,29,21
1459	mulhwu	21,12,7
1460	addze	31,31
1461	stwu	22,4(5)
1462	addc	22,23,18
1463	mulhwu	18,14,7
1464	adde	23,24,19
1465	mulhwu	19,15,7
1466	adde	24,25,20
1467	mulhwu	20,16,7
1468	adde	25,26,21
1469	mulhwu	21,17,7
1470	lwzx	7,3,30
1471	adde	26,27,18
1472	adde	27,28,19
1473	adde	28,29,20
1474	adde	29,31,21
1475
1476	bne	.Lsqr8x_tail
1477
1478
1479	lwz	9,4*1(5)
1480	lwz	31,4*10(1)
1481	.long	0x7c062040
1482	lwz	10,4*2(5)
1483	sub	20,6,8
1484	lwz	11,4*3(5)
1485	lwz	12,4*4(5)
1486	lwz	14,4*5(5)
1487	lwz	15,4*6(5)
1488	lwz	16,4*7(5)
1489	lwz	17,4*8(5)
1490	beq	.Lsqr8x_tail_break
1491
1492	addc	22,22,9
1493	lwz	9,4*1(4)
1494	adde	23,23,10
1495	lwz	10,4*2(4)
1496	adde	24,24,11
1497	lwz	11,4*3(4)
1498	adde	25,25,12
1499	lwz	12,4*4(4)
1500	adde	26,26,14
1501	lwz	14,4*5(4)
1502	adde	27,27,15
1503	lwz	15,4*6(4)
1504	adde	28,28,16
1505	lwz	16,4*7(4)
1506	adde	29,29,17
1507	lwzu	17,4*8(4)
1508
1509	b	.Lsqr8x_tail
1510
1511.align	5
1512.Lsqr8x_tail_break:
1513	lwz	7,4*8(1)
1514	lwz	21,4*9(1)
1515	addi	30,5,4*8
1516
1517	addic	31,31,-1
1518	adde	18,22,9
1519	lwz	22,4*8(3)
1520	lwz	9,4*1(20)
1521	adde	19,23,10
1522	lwz	23,4*9(3)
1523	lwz	10,4*2(20)
1524	adde	24,24,11
1525	lwz	11,4*3(20)
1526	adde	25,25,12
1527	lwz	12,4*4(20)
1528	adde	26,26,14
1529	lwz	14,4*5(20)
1530	adde	27,27,15
1531	lwz	15,4*6(20)
1532	adde	28,28,16
1533	lwz	16,4*7(20)
1534	adde	29,29,17
1535	lwz	17,4*8(20)
1536	addi	4,20,4*8
1537	addze	20,0
1538	mullw	31,7,22
1539	stw	18,4*1(5)
1540	.long	0x7c1ea840
1541	stw	19,4*2(5)
1542	li	30,8
1543	stw	24,4*3(5)
1544	lwz	24,4*10(3)
1545	stw	25,4*4(5)
1546	lwz	25,4*11(3)
1547	stw	26,4*5(5)
1548	lwz	26,4*12(3)
1549	stw	27,4*6(5)
1550	lwz	27,4*13(3)
1551	stw	28,4*7(5)
1552	lwz	28,4*14(3)
1553	stw	29,4*8(5)
1554	lwz	29,4*15(3)
1555	stw	20,4*10(1)
1556	addi	5,3,4*7
1557	mtctr	30
1558	bne	.Lsqr8x_reduction
1559
1560
1561
1562
1563
1564
1565	lwz	3,4*6(1)
1566	srwi	30,8,5
1567	mr	7,5
1568	addi	5,5,4*8
1569	subi	30,30,1
1570	subfc	18,9,22
1571	subfe	19,10,23
1572	mr	31,20
1573	mr	6,3
1574
1575	mtctr	30
1576	b	.Lsqr8x_sub
1577
1578.align	5
1579.Lsqr8x_sub:
1580	lwz	9,4*1(4)
1581	lwz	22,4*1(5)
1582	lwz	10,4*2(4)
1583	lwz	23,4*2(5)
1584	subfe	20,11,24
1585	lwz	11,4*3(4)
1586	lwz	24,4*3(5)
1587	subfe	21,12,25
1588	lwz	12,4*4(4)
1589	lwz	25,4*4(5)
1590	stw	18,4*1(3)
1591	subfe	18,14,26
1592	lwz	14,4*5(4)
1593	lwz	26,4*5(5)
1594	stw	19,4*2(3)
1595	subfe	19,15,27
1596	lwz	15,4*6(4)
1597	lwz	27,4*6(5)
1598	stw	20,4*3(3)
1599	subfe	20,16,28
1600	lwz	16,4*7(4)
1601	lwz	28,4*7(5)
1602	stw	21,4*4(3)
1603	subfe	21,17,29
1604	lwzu	17,4*8(4)
1605	lwzu	29,4*8(5)
1606	stw	18,4*5(3)
1607	subfe	18,9,22
1608	stw	19,4*6(3)
1609	subfe	19,10,23
1610	stw	20,4*7(3)
1611	stwu	21,4*8(3)
1612	bc	16,0,.Lsqr8x_sub
1613
1614	srwi	30,8,4
1615	lwz	9,4*1(6)
1616	lwz	22,4*1(7)
1617	subi	30,30,1
1618	lwz	10,4*2(6)
1619	lwz	23,4*2(7)
1620	subfe	20,11,24
1621	lwz	11,4*3(6)
1622	lwz	24,4*3(7)
1623	subfe	21,12,25
1624	lwz	12,4*4(6)
1625	lwzu	25,4*4(7)
1626	stw	18,4*1(3)
1627	subfe	18,14,26
1628	stw	19,4*2(3)
1629	subfe	19,15,27
1630	stw	20,4*3(3)
1631	subfe	20,16,28
1632	stw	21,4*4(3)
1633	subfe	21,17,29
1634	stw	18,4*5(3)
1635	subfe	31,0,31
1636	stw	19,4*6(3)
1637	stw	20,4*7(3)
1638	stw	21,4*8(3)
1639
1640	addi	5,1,4*11
1641	mtctr	30
1642
1643.Lsqr4x_cond_copy:
1644	andc	9,9,31
1645	stw	0,-4*3(7)
1646	and	22,22,31
1647	stw	0,-4*2(7)
1648	andc	10,10,31
1649	stw	0,-4*1(7)
1650	and	23,23,31
1651	stw	0,-4*0(7)
1652	andc	11,11,31
1653	stw	0,4*1(5)
1654	and	24,24,31
1655	stw	0,4*2(5)
1656	andc	12,12,31
1657	stw	0,4*3(5)
1658	and	25,25,31
1659	stwu	0,4*4(5)
1660	or	18,9,22
1661	lwz	9,4*5(6)
1662	lwz	22,4*1(7)
1663	or	19,10,23
1664	lwz	10,4*6(6)
1665	lwz	23,4*2(7)
1666	or	20,11,24
1667	lwz	11,4*7(6)
1668	lwz	24,4*3(7)
1669	or	21,12,25
1670	lwz	12,4*8(6)
1671	lwzu	25,4*4(7)
1672	stw	18,4*1(6)
1673	stw	19,4*2(6)
1674	stw	20,4*3(6)
1675	stwu	21,4*4(6)
1676	bc	16,0,.Lsqr4x_cond_copy
1677
1678	lwz	4,0(1)
1679	andc	9,9,31
1680	and	22,22,31
1681	andc	10,10,31
1682	and	23,23,31
1683	andc	11,11,31
1684	and	24,24,31
1685	andc	12,12,31
1686	and	25,25,31
1687	or	18,9,22
1688	or	19,10,23
1689	or	20,11,24
1690	or	21,12,25
1691	stw	18,4*1(6)
1692	stw	19,4*2(6)
1693	stw	20,4*3(6)
1694	stw	21,4*4(6)
1695
1696	b	.Lsqr8x_done
1697
1698.align	5
1699.Lsqr8x8_post_condition:
1700	lwz	3,4*6(1)
1701	lwz	4,0(1)
1702	addze	31,0
1703
1704
1705	subfc	22,9,22
1706	subfe	23,10,23
1707	stw	0,4*12(1)
1708	stw	0,4*13(1)
1709	subfe	24,11,24
1710	stw	0,4*14(1)
1711	stw	0,4*15(1)
1712	subfe	25,12,25
1713	stw	0,4*16(1)
1714	stw	0,4*17(1)
1715	subfe	26,14,26
1716	stw	0,4*18(1)
1717	stw	0,4*19(1)
1718	subfe	27,15,27
1719	stw	0,4*20(1)
1720	stw	0,4*21(1)
1721	subfe	28,16,28
1722	stw	0,4*22(1)
1723	stw	0,4*23(1)
1724	subfe	29,17,29
1725	stw	0,4*24(1)
1726	stw	0,4*25(1)
1727	subfe	31,0,31
1728	stw	0,4*26(1)
1729	stw	0,4*27(1)
1730
1731	and	9,9,31
1732	and	10,10,31
1733	addc	22,22,9
1734	and	11,11,31
1735	adde	23,23,10
1736	and	12,12,31
1737	adde	24,24,11
1738	and	14,14,31
1739	adde	25,25,12
1740	and	15,15,31
1741	adde	26,26,14
1742	and	16,16,31
1743	adde	27,27,15
1744	and	17,17,31
1745	adde	28,28,16
1746	adde	29,29,17
1747	stw	22,4*1(3)
1748	stw	23,4*2(3)
1749	stw	24,4*3(3)
1750	stw	25,4*4(3)
1751	stw	26,4*5(3)
1752	stw	27,4*6(3)
1753	stw	28,4*7(3)
1754	stw	29,4*8(3)
1755
1756.Lsqr8x_done:
1757	stw	0,4*8(1)
1758	stw	0,4*10(1)
1759
1760	lwz	14,-4*18(4)
1761	li	3,1
1762	lwz	15,-4*17(4)
1763	lwz	16,-4*16(4)
1764	lwz	17,-4*15(4)
1765	lwz	18,-4*14(4)
1766	lwz	19,-4*13(4)
1767	lwz	20,-4*12(4)
1768	lwz	21,-4*11(4)
1769	lwz	22,-4*10(4)
1770	lwz	23,-4*9(4)
1771	lwz	24,-4*8(4)
1772	lwz	25,-4*7(4)
1773	lwz	26,-4*6(4)
1774	lwz	27,-4*5(4)
1775	lwz	28,-4*4(4)
1776	lwz	29,-4*3(4)
1777	lwz	30,-4*2(4)
1778	lwz	31,-4*1(4)
1779	mr	1,4
1780	blr
1781.long	0
1782.byte	0,12,4,0x20,0x80,18,6,0
1783.long	0
1784
1785.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,80,80,67,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1786.align	2
1787