1*0a6a1f1dSLionel Sambuc#include "arm_arch.h" 2*0a6a1f1dSLionel Sambuc#include "arm_asm.h" 3*0a6a1f1dSLionel Sambuc 4*0a6a1f1dSLionel Sambuc.text 5*0a6a1f1dSLionel Sambuc.fpu neon 6*0a6a1f1dSLionel Sambuc.code 32 7*0a6a1f1dSLionel Sambuc.global gcm_init_v8 8*0a6a1f1dSLionel Sambuc.type gcm_init_v8,%function 9*0a6a1f1dSLionel Sambuc.align 4 10*0a6a1f1dSLionel Sambucgcm_init_v8: 11*0a6a1f1dSLionel Sambuc vld1.64 {q9},[r1] @ load H 12*0a6a1f1dSLionel Sambuc vmov.i8 q8,#0xe1 13*0a6a1f1dSLionel Sambuc vext.8 q3,q9,q9,#8 14*0a6a1f1dSLionel Sambuc vshl.i64 q8,q8,#57 15*0a6a1f1dSLionel Sambuc vshr.u64 q10,q8,#63 16*0a6a1f1dSLionel Sambuc vext.8 q8,q10,q8,#8 @ t0=0xc2....01 17*0a6a1f1dSLionel Sambuc vdup.32 q9,d18[1] 18*0a6a1f1dSLionel Sambuc vshr.u64 q11,q3,#63 19*0a6a1f1dSLionel Sambuc vshr.s32 q9,q9,#31 @ broadcast carry bit 20*0a6a1f1dSLionel Sambuc vand q11,q11,q8 21*0a6a1f1dSLionel Sambuc vshl.i64 q3,q3,#1 22*0a6a1f1dSLionel Sambuc vext.8 q11,q11,q11,#8 23*0a6a1f1dSLionel Sambuc vand q8,q8,q9 24*0a6a1f1dSLionel Sambuc vorr q3,q3,q11 @ H<<<=1 25*0a6a1f1dSLionel Sambuc veor q3,q3,q8 @ twisted H 26*0a6a1f1dSLionel Sambuc vst1.64 {q3},[r0] 27*0a6a1f1dSLionel Sambuc 28*0a6a1f1dSLionel Sambuc RET 29*0a6a1f1dSLionel Sambuc.size gcm_init_v8,.-gcm_init_v8 30*0a6a1f1dSLionel Sambuc 31*0a6a1f1dSLionel Sambuc.global gcm_gmult_v8 32*0a6a1f1dSLionel Sambuc.type gcm_gmult_v8,%function 33*0a6a1f1dSLionel Sambuc.align 4 34*0a6a1f1dSLionel Sambucgcm_gmult_v8: 35*0a6a1f1dSLionel Sambuc vld1.64 {q9},[r0] @ load Xi 36*0a6a1f1dSLionel Sambuc vmov.i8 q11,#0xe1 37*0a6a1f1dSLionel Sambuc vld1.64 {q12},[r1] @ load twisted H 38*0a6a1f1dSLionel Sambuc vshl.u64 q11,q11,#57 39*0a6a1f1dSLionel Sambuc#ifndef __ARMEB__ 40*0a6a1f1dSLionel Sambuc vrev64.8 q9,q9 41*0a6a1f1dSLionel Sambuc#endif 42*0a6a1f1dSLionel Sambuc vext.8 q13,q12,q12,#8 43*0a6a1f1dSLionel Sambuc mov r3,#0 44*0a6a1f1dSLionel Sambuc vext.8 q3,q9,q9,#8 45*0a6a1f1dSLionel Sambuc mov r12,#0 46*0a6a1f1dSLionel Sambuc veor q13,q13,q12 @ Karatsuba pre-processing 47*0a6a1f1dSLionel Sambuc mov r2,r0 48*0a6a1f1dSLionel Sambuc b .Lgmult_v8 49*0a6a1f1dSLionel Sambuc.size gcm_gmult_v8,.-gcm_gmult_v8 50*0a6a1f1dSLionel Sambuc 51*0a6a1f1dSLionel Sambuc.global gcm_ghash_v8 52*0a6a1f1dSLionel Sambuc.type gcm_ghash_v8,%function 53*0a6a1f1dSLionel Sambuc.align 4 54*0a6a1f1dSLionel Sambucgcm_ghash_v8: 55*0a6a1f1dSLionel Sambuc vld1.64 {q0},[r0] @ load [rotated] Xi 56*0a6a1f1dSLionel Sambuc subs r3,r3,#16 57*0a6a1f1dSLionel Sambuc vmov.i8 q11,#0xe1 58*0a6a1f1dSLionel Sambuc mov r12,#16 59*0a6a1f1dSLionel Sambuc vld1.64 {q12},[r1] @ load twisted H 60*0a6a1f1dSLionel Sambuc moveq r12,#0 61*0a6a1f1dSLionel Sambuc vext.8 q0,q0,q0,#8 62*0a6a1f1dSLionel Sambuc vshl.u64 q11,q11,#57 63*0a6a1f1dSLionel Sambuc vld1.64 {q9},[r2],r12 @ load [rotated] inp 64*0a6a1f1dSLionel Sambuc vext.8 q13,q12,q12,#8 65*0a6a1f1dSLionel Sambuc#ifndef __ARMEB__ 66*0a6a1f1dSLionel Sambuc vrev64.8 q0,q0 67*0a6a1f1dSLionel Sambuc vrev64.8 q9,q9 68*0a6a1f1dSLionel Sambuc#endif 69*0a6a1f1dSLionel Sambuc veor q13,q13,q12 @ Karatsuba pre-processing 70*0a6a1f1dSLionel Sambuc vext.8 q3,q9,q9,#8 71*0a6a1f1dSLionel Sambuc b .Loop_v8 72*0a6a1f1dSLionel Sambuc 73*0a6a1f1dSLionel Sambuc.align 4 74*0a6a1f1dSLionel Sambuc.Loop_v8: 75*0a6a1f1dSLionel Sambuc vext.8 q10,q0,q0,#8 76*0a6a1f1dSLionel Sambuc veor q3,q3,q0 @ inp^=Xi 77*0a6a1f1dSLionel Sambuc veor q9,q9,q10 @ q9 is rotated inp^Xi 78*0a6a1f1dSLionel Sambuc 79*0a6a1f1dSLionel Sambuc.Lgmult_v8: 80*0a6a1f1dSLionel Sambuc .inst 0xf2a80e86 @ pmull q0,q12,q3 @ H.lo�Xi.lo 81*0a6a1f1dSLionel Sambuc veor q9,q9,q3 @ Karatsuba pre-processing 82*0a6a1f1dSLionel Sambuc .inst 0xf2a94e87 @ pmull2 q2,q12,q3 @ H.hi�Xi.hi 83*0a6a1f1dSLionel Sambuc subs r3,r3,#16 84*0a6a1f1dSLionel Sambuc .inst 0xf2aa2ea2 @ pmull q1,q13,q9 @ (H.lo+H.hi)�(Xi.lo+Xi.hi) 85*0a6a1f1dSLionel Sambuc moveq r12,#0 86*0a6a1f1dSLionel Sambuc 87*0a6a1f1dSLionel Sambuc vext.8 q9,q0,q2,#8 @ Karatsuba post-processing 88*0a6a1f1dSLionel Sambuc veor q10,q0,q2 89*0a6a1f1dSLionel Sambuc veor q1,q1,q9 90*0a6a1f1dSLionel Sambuc vld1.64 {q9},[r2],r12 @ load [rotated] inp 91*0a6a1f1dSLionel Sambuc veor q1,q1,q10 92*0a6a1f1dSLionel Sambuc .inst 0xf2e04e26 @ pmull q10,q0,q11 @ 1st phase 93*0a6a1f1dSLionel Sambuc 94*0a6a1f1dSLionel Sambuc vmov d4,d3 @ Xh|Xm - 256-bit result 95*0a6a1f1dSLionel Sambuc vmov d3,d0 @ Xm is rotated Xl 96*0a6a1f1dSLionel Sambuc#ifndef __ARMEB__ 97*0a6a1f1dSLionel Sambuc vrev64.8 q9,q9 98*0a6a1f1dSLionel Sambuc#endif 99*0a6a1f1dSLionel Sambuc veor q0,q1,q10 100*0a6a1f1dSLionel Sambuc vext.8 q3,q9,q9,#8 101*0a6a1f1dSLionel Sambuc 102*0a6a1f1dSLionel Sambuc vext.8 q10,q0,q0,#8 @ 2nd phase 103*0a6a1f1dSLionel Sambuc .inst 0xf2a00e26 @ pmull q0,q0,q11 104*0a6a1f1dSLionel Sambuc veor q10,q10,q2 105*0a6a1f1dSLionel Sambuc veor q0,q0,q10 106*0a6a1f1dSLionel Sambuc bhs .Loop_v8 107*0a6a1f1dSLionel Sambuc 108*0a6a1f1dSLionel Sambuc#ifndef __ARMEB__ 109*0a6a1f1dSLionel Sambuc vrev64.8 q0,q0 110*0a6a1f1dSLionel Sambuc#endif 111*0a6a1f1dSLionel Sambuc vext.8 q0,q0,q0,#8 112*0a6a1f1dSLionel Sambuc vst1.64 {q0},[r0] @ write out Xi 113*0a6a1f1dSLionel Sambuc 114*0a6a1f1dSLionel Sambuc RET 115*0a6a1f1dSLionel Sambuc.size gcm_ghash_v8,.-gcm_ghash_v8 116*0a6a1f1dSLionel Sambuc.asciz "GHASH for ARMv8, CRYPTOGAMS by <appro@openssl.org>" 117*0a6a1f1dSLionel Sambuc.align 2 118