xref: /netbsd-src/crypto/external/bsd/openssl/lib/libcrypto/arch/mips/poly1305-mips64.S (revision e0ea3921ea68e51b93ffc215f08ae1647c8e1796)
1*e0ea3921Schristos#include "mips_arch.h"
2*e0ea3921Schristos
36410c867Schristos#ifdef MIPSEB
46410c867Schristos# define MSB 0
56410c867Schristos# define LSB 7
66410c867Schristos#else
76410c867Schristos# define MSB 7
86410c867Schristos# define LSB 0
96410c867Schristos#endif
106410c867Schristos
116410c867Schristos.text
126410c867Schristos.set	noat
136410c867Schristos.set	noreorder
146410c867Schristos
156410c867Schristos.align	5
166410c867Schristos.globl	poly1305_init
176410c867Schristos.ent	poly1305_init
186410c867Schristospoly1305_init:
196410c867Schristos	.frame	$29,0,$31
206410c867Schristos	.set	reorder
216410c867Schristos
226410c867Schristos	sd	$0,0($4)
236410c867Schristos	sd	$0,8($4)
246410c867Schristos	sd	$0,16($4)
256410c867Schristos
266410c867Schristos	beqz	$5,.Lno_key
276410c867Schristos
28*e0ea3921Schristos#if defined(_MIPS_ARCH_MIPS64R6)
29*e0ea3921Schristos	ld	$8,0($5)
30*e0ea3921Schristos	ld	$9,8($5)
31*e0ea3921Schristos#else
326410c867Schristos	ldl	$8,0+MSB($5)
336410c867Schristos	ldl	$9,8+MSB($5)
346410c867Schristos	ldr	$8,0+LSB($5)
356410c867Schristos	ldr	$9,8+LSB($5)
36*e0ea3921Schristos#endif
376410c867Schristos#ifdef	MIPSEB
386410c867Schristos# if defined(_MIPS_ARCH_MIPS64R2)
396410c867Schristos	dsbh	$8,$8		# byte swap
406410c867Schristos	 dsbh	$9,$9
416410c867Schristos	dshd	$8,$8
426410c867Schristos	 dshd	$9,$9
436410c867Schristos# else
446410c867Schristos	ori	$10,$0,0xFF
456410c867Schristos	dsll	$1,$10,32
466410c867Schristos	or	$10,$1		# 0x000000FF000000FF
476410c867Schristos
486410c867Schristos	and	$11,$8,$10	# byte swap
496410c867Schristos	 and	$2,$9,$10
506410c867Schristos	dsrl	$1,$8,24
516410c867Schristos	 dsrl	$24,$9,24
526410c867Schristos	dsll	$11,24
536410c867Schristos	 dsll	$2,24
546410c867Schristos	and	$1,$10
556410c867Schristos	 and	$24,$10
566410c867Schristos	dsll	$10,8			# 0x0000FF000000FF00
576410c867Schristos	or	$11,$1
586410c867Schristos	 or	$2,$24
596410c867Schristos	and	$1,$8,$10
606410c867Schristos	 and	$24,$9,$10
616410c867Schristos	dsrl	$8,8
626410c867Schristos	 dsrl	$9,8
636410c867Schristos	dsll	$1,8
646410c867Schristos	 dsll	$24,8
656410c867Schristos	and	$8,$10
666410c867Schristos	 and	$9,$10
676410c867Schristos	or	$11,$1
686410c867Schristos	 or	$2,$24
696410c867Schristos	or	$8,$11
706410c867Schristos	 or	$9,$2
716410c867Schristos	dsrl	$11,$8,32
726410c867Schristos	 dsrl	$2,$9,32
736410c867Schristos	dsll	$8,32
746410c867Schristos	 dsll	$9,32
756410c867Schristos	or	$8,$11
766410c867Schristos	 or	$9,$2
776410c867Schristos# endif
786410c867Schristos#endif
796410c867Schristos	li	$10,1
806410c867Schristos	dsll	$10,32
816410c867Schristos	daddiu	$10,-63
826410c867Schristos	dsll	$10,28
836410c867Schristos	daddiu	$10,-1		# 0ffffffc0fffffff
846410c867Schristos
856410c867Schristos	and	$8,$10
866410c867Schristos	daddiu	$10,-3		# 0ffffffc0ffffffc
876410c867Schristos	and	$9,$10
886410c867Schristos
896410c867Schristos	sd	$8,24($4)
906410c867Schristos	dsrl	$10,$9,2
916410c867Schristos	sd	$9,32($4)
926410c867Schristos	daddu	$10,$9		# s1 = r1 + (r1 >> 2)
936410c867Schristos	sd	$10,40($4)
946410c867Schristos
956410c867Schristos.Lno_key:
966410c867Schristos	li	$2,0			# return 0
976410c867Schristos	jr	$31
986410c867Schristos.end	poly1305_init
996410c867Schristos.align	5
1006410c867Schristos.globl	poly1305_blocks
1016410c867Schristos.ent	poly1305_blocks
1026410c867Schristospoly1305_blocks:
1036410c867Schristos	.set	noreorder
1046410c867Schristos	dsrl	$6,4			# number of complete blocks
1056410c867Schristos	bnez	$6,poly1305_blocks_internal
1066410c867Schristos	nop
1076410c867Schristos	jr	$31
1086410c867Schristos	nop
1096410c867Schristos.end	poly1305_blocks
1106410c867Schristos
1116410c867Schristos.align	5
1126410c867Schristos.ent	poly1305_blocks_internal
1136410c867Schristospoly1305_blocks_internal:
1146410c867Schristos	.frame	$29,6*8,$31
1156410c867Schristos	.mask	0x00030000,-8
1166410c867Schristos	.set	noreorder
117*e0ea3921Schristos	dsubu	$29,6*8
1186410c867Schristos	sd	$17,40($29)
1196410c867Schristos	sd	$16,32($29)
1206410c867Schristos	.set	reorder
1216410c867Schristos
1226410c867Schristos	ld	$12,0($4)		# load hash value
1236410c867Schristos	ld	$13,8($4)
1246410c867Schristos	ld	$14,16($4)
1256410c867Schristos
1266410c867Schristos	ld	$15,24($4)		# load key
1276410c867Schristos	ld	$16,32($4)
1286410c867Schristos	ld	$17,40($4)
1296410c867Schristos
1306410c867Schristos.Loop:
131*e0ea3921Schristos#if defined(_MIPS_ARCH_MIPS64R6)
132*e0ea3921Schristos	ld	$8,0($5)		# load input
133*e0ea3921Schristos	ld	$9,8($5)
134*e0ea3921Schristos#else
1356410c867Schristos	ldl	$8,0+MSB($5)	# load input
1366410c867Schristos	ldl	$9,8+MSB($5)
1376410c867Schristos	ldr	$8,0+LSB($5)
1386410c867Schristos	ldr	$9,8+LSB($5)
139*e0ea3921Schristos#endif
140*e0ea3921Schristos	daddiu	$6,-1
1416410c867Schristos	daddiu	$5,16
1426410c867Schristos#ifdef	MIPSEB
1436410c867Schristos# if defined(_MIPS_ARCH_MIPS64R2)
1446410c867Schristos	dsbh	$8,$8		# byte swap
1456410c867Schristos	 dsbh	$9,$9
1466410c867Schristos	dshd	$8,$8
1476410c867Schristos	 dshd	$9,$9
1486410c867Schristos# else
1496410c867Schristos	ori	$10,$0,0xFF
1506410c867Schristos	dsll	$1,$10,32
1516410c867Schristos	or	$10,$1		# 0x000000FF000000FF
1526410c867Schristos
1536410c867Schristos	and	$11,$8,$10	# byte swap
1546410c867Schristos	 and	$2,$9,$10
1556410c867Schristos	dsrl	$1,$8,24
1566410c867Schristos	 dsrl	$24,$9,24
1576410c867Schristos	dsll	$11,24
1586410c867Schristos	 dsll	$2,24
1596410c867Schristos	and	$1,$10
1606410c867Schristos	 and	$24,$10
1616410c867Schristos	dsll	$10,8			# 0x0000FF000000FF00
1626410c867Schristos	or	$11,$1
1636410c867Schristos	 or	$2,$24
1646410c867Schristos	and	$1,$8,$10
1656410c867Schristos	 and	$24,$9,$10
1666410c867Schristos	dsrl	$8,8
1676410c867Schristos	 dsrl	$9,8
1686410c867Schristos	dsll	$1,8
1696410c867Schristos	 dsll	$24,8
1706410c867Schristos	and	$8,$10
1716410c867Schristos	 and	$9,$10
1726410c867Schristos	or	$11,$1
1736410c867Schristos	 or	$2,$24
1746410c867Schristos	or	$8,$11
1756410c867Schristos	 or	$9,$2
1766410c867Schristos	dsrl	$11,$8,32
1776410c867Schristos	 dsrl	$2,$9,32
1786410c867Schristos	dsll	$8,32
1796410c867Schristos	 dsll	$9,32
1806410c867Schristos	or	$8,$11
1816410c867Schristos	 or	$9,$2
1826410c867Schristos# endif
1836410c867Schristos#endif
1846410c867Schristos	daddu	$12,$8		# accumulate input
1856410c867Schristos	daddu	$13,$9
1866410c867Schristos	sltu	$10,$12,$8
1876410c867Schristos	sltu	$11,$13,$9
1886410c867Schristos	daddu	$13,$10
1896410c867Schristos
190*e0ea3921Schristos	dmultu	($15,$12)		# h0*r0
1916410c867Schristos	 daddu	$14,$7
1926410c867Schristos	 sltu	$10,$13,$10
193*e0ea3921Schristos	mflo	($8,$15,$12)
194*e0ea3921Schristos	mfhi	($9,$15,$12)
1956410c867Schristos
196*e0ea3921Schristos	dmultu	($17,$13)		# h1*5*r1
1976410c867Schristos	 daddu	$10,$11
1986410c867Schristos	 daddu	$14,$10
199*e0ea3921Schristos	mflo	($10,$17,$13)
200*e0ea3921Schristos	mfhi	($11,$17,$13)
2016410c867Schristos
202*e0ea3921Schristos	dmultu	($16,$12)		# h0*r1
2036410c867Schristos	 daddu	$8,$10
2046410c867Schristos	 daddu	$9,$11
205*e0ea3921Schristos	mflo	($1,$16,$12)
206*e0ea3921Schristos	mfhi	($25,$16,$12)
2076410c867Schristos	 sltu	$10,$8,$10
2086410c867Schristos	 daddu	$9,$10
2096410c867Schristos
210*e0ea3921Schristos	dmultu	($15,$13)		# h1*r0
2116410c867Schristos	 daddu	$9,$1
2126410c867Schristos	 sltu	$1,$9,$1
213*e0ea3921Schristos	mflo	($10,$15,$13)
214*e0ea3921Schristos	mfhi	($11,$15,$13)
2156410c867Schristos	 daddu	$25,$1
2166410c867Schristos
217*e0ea3921Schristos	dmultu	($17,$14)		# h2*5*r1
2186410c867Schristos	 daddu	$9,$10
2196410c867Schristos	 daddu	$25,$11
220*e0ea3921Schristos	mflo	($1,$17,$14)
2216410c867Schristos
222*e0ea3921Schristos	dmultu	($15,$14)		# h2*r0
2236410c867Schristos	 sltu	$10,$9,$10
2246410c867Schristos	 daddu	$25,$10
225*e0ea3921Schristos	mflo	($2,$15,$14)
2266410c867Schristos
2276410c867Schristos	daddu	$9,$1
2286410c867Schristos	daddu	$25,$2
2296410c867Schristos	sltu	$1,$9,$1
2306410c867Schristos	daddu	$25,$1
2316410c867Schristos
2326410c867Schristos	li	$10,-4		# final reduction
2336410c867Schristos	and	$10,$25
2346410c867Schristos	dsrl	$11,$25,2
2356410c867Schristos	andi	$14,$25,3
2366410c867Schristos	daddu	$10,$11
2376410c867Schristos	daddu	$12,$8,$10
2386410c867Schristos	sltu	$10,$12,$10
2396410c867Schristos	daddu	$13,$9,$10
2406410c867Schristos	sltu	$10,$13,$10
2416410c867Schristos	daddu	$14,$14,$10
2426410c867Schristos
2436410c867Schristos	bnez	$6,.Loop
2446410c867Schristos
2456410c867Schristos	sd	$12,0($4)		# store hash value
2466410c867Schristos	sd	$13,8($4)
2476410c867Schristos	sd	$14,16($4)
2486410c867Schristos
2496410c867Schristos	.set	noreorder
2506410c867Schristos	ld	$17,40($29)		# epilogue
2516410c867Schristos	ld	$16,32($29)
2526410c867Schristos	jr	$31
253*e0ea3921Schristos	daddu	$29,6*8
2546410c867Schristos.end	poly1305_blocks_internal
2556410c867Schristos.align	5
2566410c867Schristos.globl	poly1305_emit
2576410c867Schristos.ent	poly1305_emit
2586410c867Schristospoly1305_emit:
2596410c867Schristos	.frame	$29,0,$31
2606410c867Schristos	.set	reorder
2616410c867Schristos
2626410c867Schristos	ld	$10,0($4)
2636410c867Schristos	ld	$11,8($4)
2646410c867Schristos	ld	$1,16($4)
2656410c867Schristos
2666410c867Schristos	daddiu	$8,$10,5		# compare to modulus
2676410c867Schristos	sltiu	$2,$8,5
2686410c867Schristos	daddu	$9,$11,$2
2696410c867Schristos	sltu	$2,$9,$2
2706410c867Schristos	daddu	$1,$1,$2
2716410c867Schristos
2726410c867Schristos	dsrl	$1,2			# see if it carried/borrowed
2736410c867Schristos	dsubu	$1,$0,$1
2746410c867Schristos	nor	$2,$0,$1
2756410c867Schristos
2766410c867Schristos	and	$8,$1
2776410c867Schristos	and	$10,$2
2786410c867Schristos	and	$9,$1
2796410c867Schristos	and	$11,$2
2806410c867Schristos	or	$8,$10
2816410c867Schristos	or	$9,$11
2826410c867Schristos
2836410c867Schristos	lwu	$10,0($6)		# load nonce
2846410c867Schristos	lwu	$11,4($6)
2856410c867Schristos	lwu	$1,8($6)
2866410c867Schristos	lwu	$2,12($6)
2876410c867Schristos	dsll	$11,32
2886410c867Schristos	dsll	$2,32
2896410c867Schristos	or	$10,$11
2906410c867Schristos	or	$1,$2
2916410c867Schristos
2926410c867Schristos	daddu	$8,$10		# accumulate nonce
2936410c867Schristos	daddu	$9,$1
2946410c867Schristos	sltu	$10,$8,$10
2956410c867Schristos	daddu	$9,$10
2966410c867Schristos
2976410c867Schristos	dsrl	$10,$8,8		# write mac value
2986410c867Schristos	dsrl	$11,$8,16
2996410c867Schristos	dsrl	$1,$8,24
3006410c867Schristos	sb	$8,0($5)
3016410c867Schristos	dsrl	$2,$8,32
3026410c867Schristos	sb	$10,1($5)
3036410c867Schristos	dsrl	$10,$8,40
3046410c867Schristos	sb	$11,2($5)
3056410c867Schristos	dsrl	$11,$8,48
3066410c867Schristos	sb	$1,3($5)
3076410c867Schristos	dsrl	$1,$8,56
3086410c867Schristos	sb	$2,4($5)
3096410c867Schristos	dsrl	$2,$9,8
3106410c867Schristos	sb	$10,5($5)
3116410c867Schristos	dsrl	$10,$9,16
3126410c867Schristos	sb	$11,6($5)
3136410c867Schristos	dsrl	$11,$9,24
3146410c867Schristos	sb	$1,7($5)
3156410c867Schristos
3166410c867Schristos	sb	$9,8($5)
3176410c867Schristos	dsrl	$1,$9,32
3186410c867Schristos	sb	$2,9($5)
3196410c867Schristos	dsrl	$2,$9,40
3206410c867Schristos	sb	$10,10($5)
3216410c867Schristos	dsrl	$10,$9,48
3226410c867Schristos	sb	$11,11($5)
3236410c867Schristos	dsrl	$11,$9,56
3246410c867Schristos	sb	$1,12($5)
3256410c867Schristos	sb	$2,13($5)
3266410c867Schristos	sb	$10,14($5)
3276410c867Schristos	sb	$11,15($5)
3286410c867Schristos
3296410c867Schristos	jr	$31
3306410c867Schristos.end	poly1305_emit
3316410c867Schristos.rdata
3326410c867Schristos.asciiz	"Poly1305 for MIPS64, CRYPTOGAMS by <appro@openssl.org>"
3336410c867Schristos.align	2
334