1*0a6a1f1dSLionel Sambuc#include "arm_arch.h" 2*0a6a1f1dSLionel Sambuc#include "arm_asm.h" 3*0a6a1f1dSLionel Sambuc#ifdef __ARMEL__ 4*0a6a1f1dSLionel Sambuc# define LO 0 5*0a6a1f1dSLionel Sambuc# define HI 4 6*0a6a1f1dSLionel Sambuc# define WORD64(hi0,lo0,hi1,lo1) .word lo0,hi0, lo1,hi1 7*0a6a1f1dSLionel Sambuc#else 8*0a6a1f1dSLionel Sambuc# define HI 0 9*0a6a1f1dSLionel Sambuc# define LO 4 10*0a6a1f1dSLionel Sambuc# define WORD64(hi0,lo0,hi1,lo1) .word hi0,lo0, hi1,lo1 11*0a6a1f1dSLionel Sambuc#endif 12*0a6a1f1dSLionel Sambuc 13*0a6a1f1dSLionel Sambuc.text 14*0a6a1f1dSLionel Sambuc.code 32 15*0a6a1f1dSLionel Sambuc.type K512,%object 16*0a6a1f1dSLionel Sambuc.align 5 17*0a6a1f1dSLionel SambucK512: 18*0a6a1f1dSLionel SambucWORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd) 19*0a6a1f1dSLionel SambucWORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc) 20*0a6a1f1dSLionel SambucWORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019) 21*0a6a1f1dSLionel SambucWORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118) 22*0a6a1f1dSLionel SambucWORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe) 23*0a6a1f1dSLionel SambucWORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2) 24*0a6a1f1dSLionel SambucWORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1) 25*0a6a1f1dSLionel SambucWORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694) 26*0a6a1f1dSLionel SambucWORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3) 27*0a6a1f1dSLionel SambucWORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65) 28*0a6a1f1dSLionel SambucWORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483) 29*0a6a1f1dSLionel SambucWORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5) 30*0a6a1f1dSLionel SambucWORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210) 31*0a6a1f1dSLionel SambucWORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4) 32*0a6a1f1dSLionel SambucWORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725) 33*0a6a1f1dSLionel SambucWORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70) 34*0a6a1f1dSLionel SambucWORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926) 35*0a6a1f1dSLionel SambucWORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df) 36*0a6a1f1dSLionel SambucWORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8) 37*0a6a1f1dSLionel SambucWORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b) 38*0a6a1f1dSLionel SambucWORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001) 39*0a6a1f1dSLionel SambucWORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30) 40*0a6a1f1dSLionel SambucWORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910) 41*0a6a1f1dSLionel SambucWORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8) 42*0a6a1f1dSLionel SambucWORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53) 43*0a6a1f1dSLionel SambucWORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8) 44*0a6a1f1dSLionel SambucWORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb) 45*0a6a1f1dSLionel SambucWORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3) 46*0a6a1f1dSLionel SambucWORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60) 47*0a6a1f1dSLionel SambucWORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec) 48*0a6a1f1dSLionel SambucWORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9) 49*0a6a1f1dSLionel SambucWORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b) 50*0a6a1f1dSLionel SambucWORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207) 51*0a6a1f1dSLionel SambucWORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178) 52*0a6a1f1dSLionel SambucWORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6) 53*0a6a1f1dSLionel SambucWORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b) 54*0a6a1f1dSLionel SambucWORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493) 55*0a6a1f1dSLionel SambucWORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c) 56*0a6a1f1dSLionel SambucWORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a) 57*0a6a1f1dSLionel SambucWORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817) 58*0a6a1f1dSLionel Sambuc.size K512,.-K512 59*0a6a1f1dSLionel Sambuc#if __ARM_MAX_ARCH__>=7 60*0a6a1f1dSLionel Sambuc.LOPENSSL_armcap: 61*0a6a1f1dSLionel Sambuc.word OPENSSL_armcap_P-sha512_block_data_order 62*0a6a1f1dSLionel Sambuc.skip 32-4 63*0a6a1f1dSLionel Sambuc#else 64*0a6a1f1dSLionel Sambuc.skip 32 65*0a6a1f1dSLionel Sambuc#endif 66*0a6a1f1dSLionel Sambuc 67*0a6a1f1dSLionel Sambuc.global sha512_block_data_order 68*0a6a1f1dSLionel Sambuc.type sha512_block_data_order,%function 69*0a6a1f1dSLionel Sambucsha512_block_data_order: 70*0a6a1f1dSLionel Sambuc sub r3,pc,#8 @ sha512_block_data_order 71*0a6a1f1dSLionel Sambuc add r2,r1,r2,lsl#7 @ len to point at the end of inp 72*0a6a1f1dSLionel Sambuc#if __ARM_MAX_ARCH__>=7 73*0a6a1f1dSLionel Sambuc ldr r12,.LOPENSSL_armcap 74*0a6a1f1dSLionel Sambuc ldr r12,[r3,r12] @ OPENSSL_armcap_P 75*0a6a1f1dSLionel Sambuc tst r12,#1 76*0a6a1f1dSLionel Sambuc bne .LNEON 77*0a6a1f1dSLionel Sambuc#endif 78*0a6a1f1dSLionel Sambuc stmdb sp!,{r4-r12,lr} 79*0a6a1f1dSLionel Sambuc sub r14,r3,#672 @ K512 80*0a6a1f1dSLionel Sambuc sub sp,sp,#9*8 81*0a6a1f1dSLionel Sambuc 82*0a6a1f1dSLionel Sambuc ldr r7,[r0,#32+LO] 83*0a6a1f1dSLionel Sambuc ldr r8,[r0,#32+HI] 84*0a6a1f1dSLionel Sambuc ldr r9, [r0,#48+LO] 85*0a6a1f1dSLionel Sambuc ldr r10, [r0,#48+HI] 86*0a6a1f1dSLionel Sambuc ldr r11, [r0,#56+LO] 87*0a6a1f1dSLionel Sambuc ldr r12, [r0,#56+HI] 88*0a6a1f1dSLionel Sambuc.Loop: 89*0a6a1f1dSLionel Sambuc str r9, [sp,#48+0] 90*0a6a1f1dSLionel Sambuc str r10, [sp,#48+4] 91*0a6a1f1dSLionel Sambuc str r11, [sp,#56+0] 92*0a6a1f1dSLionel Sambuc str r12, [sp,#56+4] 93*0a6a1f1dSLionel Sambuc ldr r5,[r0,#0+LO] 94*0a6a1f1dSLionel Sambuc ldr r6,[r0,#0+HI] 95*0a6a1f1dSLionel Sambuc ldr r3,[r0,#8+LO] 96*0a6a1f1dSLionel Sambuc ldr r4,[r0,#8+HI] 97*0a6a1f1dSLionel Sambuc ldr r9, [r0,#16+LO] 98*0a6a1f1dSLionel Sambuc ldr r10, [r0,#16+HI] 99*0a6a1f1dSLionel Sambuc ldr r11, [r0,#24+LO] 100*0a6a1f1dSLionel Sambuc ldr r12, [r0,#24+HI] 101*0a6a1f1dSLionel Sambuc str r3,[sp,#8+0] 102*0a6a1f1dSLionel Sambuc str r4,[sp,#8+4] 103*0a6a1f1dSLionel Sambuc str r9, [sp,#16+0] 104*0a6a1f1dSLionel Sambuc str r10, [sp,#16+4] 105*0a6a1f1dSLionel Sambuc str r11, [sp,#24+0] 106*0a6a1f1dSLionel Sambuc str r12, [sp,#24+4] 107*0a6a1f1dSLionel Sambuc ldr r3,[r0,#40+LO] 108*0a6a1f1dSLionel Sambuc ldr r4,[r0,#40+HI] 109*0a6a1f1dSLionel Sambuc str r3,[sp,#40+0] 110*0a6a1f1dSLionel Sambuc str r4,[sp,#40+4] 111*0a6a1f1dSLionel Sambuc 112*0a6a1f1dSLionel Sambuc.L00_15: 113*0a6a1f1dSLionel Sambuc#if __ARM_ARCH__<7 114*0a6a1f1dSLionel Sambuc ldrb r3,[r1,#7] 115*0a6a1f1dSLionel Sambuc ldrb r9, [r1,#6] 116*0a6a1f1dSLionel Sambuc ldrb r10, [r1,#5] 117*0a6a1f1dSLionel Sambuc ldrb r11, [r1,#4] 118*0a6a1f1dSLionel Sambuc ldrb r4,[r1,#3] 119*0a6a1f1dSLionel Sambuc ldrb r12, [r1,#2] 120*0a6a1f1dSLionel Sambuc orr r3,r3,r9,lsl#8 121*0a6a1f1dSLionel Sambuc ldrb r9, [r1,#1] 122*0a6a1f1dSLionel Sambuc orr r3,r3,r10,lsl#16 123*0a6a1f1dSLionel Sambuc ldrb r10, [r1],#8 124*0a6a1f1dSLionel Sambuc orr r3,r3,r11,lsl#24 125*0a6a1f1dSLionel Sambuc orr r4,r4,r12,lsl#8 126*0a6a1f1dSLionel Sambuc orr r4,r4,r9,lsl#16 127*0a6a1f1dSLionel Sambuc orr r4,r4,r10,lsl#24 128*0a6a1f1dSLionel Sambuc#else 129*0a6a1f1dSLionel Sambuc ldr r3,[r1,#4] 130*0a6a1f1dSLionel Sambuc ldr r4,[r1],#8 131*0a6a1f1dSLionel Sambuc#ifdef __ARMEL__ 132*0a6a1f1dSLionel Sambuc rev r3,r3 133*0a6a1f1dSLionel Sambuc rev r4,r4 134*0a6a1f1dSLionel Sambuc#endif 135*0a6a1f1dSLionel Sambuc#endif 136*0a6a1f1dSLionel Sambuc @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) 137*0a6a1f1dSLionel Sambuc @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23 138*0a6a1f1dSLionel Sambuc @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23 139*0a6a1f1dSLionel Sambuc mov r9,r7,lsr#14 140*0a6a1f1dSLionel Sambuc str r3,[sp,#64+0] 141*0a6a1f1dSLionel Sambuc mov r10,r8,lsr#14 142*0a6a1f1dSLionel Sambuc str r4,[sp,#64+4] 143*0a6a1f1dSLionel Sambuc eor r9,r9,r8,lsl#18 144*0a6a1f1dSLionel Sambuc ldr r11,[sp,#56+0] @ h.lo 145*0a6a1f1dSLionel Sambuc eor r10,r10,r7,lsl#18 146*0a6a1f1dSLionel Sambuc ldr r12,[sp,#56+4] @ h.hi 147*0a6a1f1dSLionel Sambuc eor r9,r9,r7,lsr#18 148*0a6a1f1dSLionel Sambuc eor r10,r10,r8,lsr#18 149*0a6a1f1dSLionel Sambuc eor r9,r9,r8,lsl#14 150*0a6a1f1dSLionel Sambuc eor r10,r10,r7,lsl#14 151*0a6a1f1dSLionel Sambuc eor r9,r9,r8,lsr#9 152*0a6a1f1dSLionel Sambuc eor r10,r10,r7,lsr#9 153*0a6a1f1dSLionel Sambuc eor r9,r9,r7,lsl#23 154*0a6a1f1dSLionel Sambuc eor r10,r10,r8,lsl#23 @ Sigma1(e) 155*0a6a1f1dSLionel Sambuc adds r3,r3,r9 156*0a6a1f1dSLionel Sambuc ldr r9,[sp,#40+0] @ f.lo 157*0a6a1f1dSLionel Sambuc adc r4,r4,r10 @ T += Sigma1(e) 158*0a6a1f1dSLionel Sambuc ldr r10,[sp,#40+4] @ f.hi 159*0a6a1f1dSLionel Sambuc adds r3,r3,r11 160*0a6a1f1dSLionel Sambuc ldr r11,[sp,#48+0] @ g.lo 161*0a6a1f1dSLionel Sambuc adc r4,r4,r12 @ T += h 162*0a6a1f1dSLionel Sambuc ldr r12,[sp,#48+4] @ g.hi 163*0a6a1f1dSLionel Sambuc 164*0a6a1f1dSLionel Sambuc eor r9,r9,r11 165*0a6a1f1dSLionel Sambuc str r7,[sp,#32+0] 166*0a6a1f1dSLionel Sambuc eor r10,r10,r12 167*0a6a1f1dSLionel Sambuc str r8,[sp,#32+4] 168*0a6a1f1dSLionel Sambuc and r9,r9,r7 169*0a6a1f1dSLionel Sambuc str r5,[sp,#0+0] 170*0a6a1f1dSLionel Sambuc and r10,r10,r8 171*0a6a1f1dSLionel Sambuc str r6,[sp,#0+4] 172*0a6a1f1dSLionel Sambuc eor r9,r9,r11 173*0a6a1f1dSLionel Sambuc ldr r11,[r14,#LO] @ K[i].lo 174*0a6a1f1dSLionel Sambuc eor r10,r10,r12 @ Ch(e,f,g) 175*0a6a1f1dSLionel Sambuc ldr r12,[r14,#HI] @ K[i].hi 176*0a6a1f1dSLionel Sambuc 177*0a6a1f1dSLionel Sambuc adds r3,r3,r9 178*0a6a1f1dSLionel Sambuc ldr r7,[sp,#24+0] @ d.lo 179*0a6a1f1dSLionel Sambuc adc r4,r4,r10 @ T += Ch(e,f,g) 180*0a6a1f1dSLionel Sambuc ldr r8,[sp,#24+4] @ d.hi 181*0a6a1f1dSLionel Sambuc adds r3,r3,r11 182*0a6a1f1dSLionel Sambuc and r9,r11,#0xff 183*0a6a1f1dSLionel Sambuc adc r4,r4,r12 @ T += K[i] 184*0a6a1f1dSLionel Sambuc adds r7,r7,r3 185*0a6a1f1dSLionel Sambuc ldr r11,[sp,#8+0] @ b.lo 186*0a6a1f1dSLionel Sambuc adc r8,r8,r4 @ d += T 187*0a6a1f1dSLionel Sambuc teq r9,#148 188*0a6a1f1dSLionel Sambuc 189*0a6a1f1dSLionel Sambuc ldr r12,[sp,#16+0] @ c.lo 190*0a6a1f1dSLionel Sambuc orreq r14,r14,#1 191*0a6a1f1dSLionel Sambuc @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) 192*0a6a1f1dSLionel Sambuc @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 193*0a6a1f1dSLionel Sambuc @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25 194*0a6a1f1dSLionel Sambuc mov r9,r5,lsr#28 195*0a6a1f1dSLionel Sambuc mov r10,r6,lsr#28 196*0a6a1f1dSLionel Sambuc eor r9,r9,r6,lsl#4 197*0a6a1f1dSLionel Sambuc eor r10,r10,r5,lsl#4 198*0a6a1f1dSLionel Sambuc eor r9,r9,r6,lsr#2 199*0a6a1f1dSLionel Sambuc eor r10,r10,r5,lsr#2 200*0a6a1f1dSLionel Sambuc eor r9,r9,r5,lsl#30 201*0a6a1f1dSLionel Sambuc eor r10,r10,r6,lsl#30 202*0a6a1f1dSLionel Sambuc eor r9,r9,r6,lsr#7 203*0a6a1f1dSLionel Sambuc eor r10,r10,r5,lsr#7 204*0a6a1f1dSLionel Sambuc eor r9,r9,r5,lsl#25 205*0a6a1f1dSLionel Sambuc eor r10,r10,r6,lsl#25 @ Sigma0(a) 206*0a6a1f1dSLionel Sambuc adds r3,r3,r9 207*0a6a1f1dSLionel Sambuc and r9,r5,r11 208*0a6a1f1dSLionel Sambuc adc r4,r4,r10 @ T += Sigma0(a) 209*0a6a1f1dSLionel Sambuc 210*0a6a1f1dSLionel Sambuc ldr r10,[sp,#8+4] @ b.hi 211*0a6a1f1dSLionel Sambuc orr r5,r5,r11 212*0a6a1f1dSLionel Sambuc ldr r11,[sp,#16+4] @ c.hi 213*0a6a1f1dSLionel Sambuc and r5,r5,r12 214*0a6a1f1dSLionel Sambuc and r12,r6,r10 215*0a6a1f1dSLionel Sambuc orr r6,r6,r10 216*0a6a1f1dSLionel Sambuc orr r5,r5,r9 @ Maj(a,b,c).lo 217*0a6a1f1dSLionel Sambuc and r6,r6,r11 218*0a6a1f1dSLionel Sambuc adds r5,r5,r3 219*0a6a1f1dSLionel Sambuc orr r6,r6,r12 @ Maj(a,b,c).hi 220*0a6a1f1dSLionel Sambuc sub sp,sp,#8 221*0a6a1f1dSLionel Sambuc adc r6,r6,r4 @ h += T 222*0a6a1f1dSLionel Sambuc tst r14,#1 223*0a6a1f1dSLionel Sambuc add r14,r14,#8 224*0a6a1f1dSLionel Sambuc tst r14,#1 225*0a6a1f1dSLionel Sambuc beq .L00_15 226*0a6a1f1dSLionel Sambuc ldr r9,[sp,#184+0] 227*0a6a1f1dSLionel Sambuc ldr r10,[sp,#184+4] 228*0a6a1f1dSLionel Sambuc bic r14,r14,#1 229*0a6a1f1dSLionel Sambuc.L16_79: 230*0a6a1f1dSLionel Sambuc @ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7)) 231*0a6a1f1dSLionel Sambuc @ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25 232*0a6a1f1dSLionel Sambuc @ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7 233*0a6a1f1dSLionel Sambuc mov r3,r9,lsr#1 234*0a6a1f1dSLionel Sambuc ldr r11,[sp,#80+0] 235*0a6a1f1dSLionel Sambuc mov r4,r10,lsr#1 236*0a6a1f1dSLionel Sambuc ldr r12,[sp,#80+4] 237*0a6a1f1dSLionel Sambuc eor r3,r3,r10,lsl#31 238*0a6a1f1dSLionel Sambuc eor r4,r4,r9,lsl#31 239*0a6a1f1dSLionel Sambuc eor r3,r3,r9,lsr#8 240*0a6a1f1dSLionel Sambuc eor r4,r4,r10,lsr#8 241*0a6a1f1dSLionel Sambuc eor r3,r3,r10,lsl#24 242*0a6a1f1dSLionel Sambuc eor r4,r4,r9,lsl#24 243*0a6a1f1dSLionel Sambuc eor r3,r3,r9,lsr#7 244*0a6a1f1dSLionel Sambuc eor r4,r4,r10,lsr#7 245*0a6a1f1dSLionel Sambuc eor r3,r3,r10,lsl#25 246*0a6a1f1dSLionel Sambuc 247*0a6a1f1dSLionel Sambuc @ sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6)) 248*0a6a1f1dSLionel Sambuc @ LO lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26 249*0a6a1f1dSLionel Sambuc @ HI hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6 250*0a6a1f1dSLionel Sambuc mov r9,r11,lsr#19 251*0a6a1f1dSLionel Sambuc mov r10,r12,lsr#19 252*0a6a1f1dSLionel Sambuc eor r9,r9,r12,lsl#13 253*0a6a1f1dSLionel Sambuc eor r10,r10,r11,lsl#13 254*0a6a1f1dSLionel Sambuc eor r9,r9,r12,lsr#29 255*0a6a1f1dSLionel Sambuc eor r10,r10,r11,lsr#29 256*0a6a1f1dSLionel Sambuc eor r9,r9,r11,lsl#3 257*0a6a1f1dSLionel Sambuc eor r10,r10,r12,lsl#3 258*0a6a1f1dSLionel Sambuc eor r9,r9,r11,lsr#6 259*0a6a1f1dSLionel Sambuc eor r10,r10,r12,lsr#6 260*0a6a1f1dSLionel Sambuc ldr r11,[sp,#120+0] 261*0a6a1f1dSLionel Sambuc eor r9,r9,r12,lsl#26 262*0a6a1f1dSLionel Sambuc 263*0a6a1f1dSLionel Sambuc ldr r12,[sp,#120+4] 264*0a6a1f1dSLionel Sambuc adds r3,r3,r9 265*0a6a1f1dSLionel Sambuc ldr r9,[sp,#192+0] 266*0a6a1f1dSLionel Sambuc adc r4,r4,r10 267*0a6a1f1dSLionel Sambuc 268*0a6a1f1dSLionel Sambuc ldr r10,[sp,#192+4] 269*0a6a1f1dSLionel Sambuc adds r3,r3,r11 270*0a6a1f1dSLionel Sambuc adc r4,r4,r12 271*0a6a1f1dSLionel Sambuc adds r3,r3,r9 272*0a6a1f1dSLionel Sambuc adc r4,r4,r10 273*0a6a1f1dSLionel Sambuc @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) 274*0a6a1f1dSLionel Sambuc @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23 275*0a6a1f1dSLionel Sambuc @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23 276*0a6a1f1dSLionel Sambuc mov r9,r7,lsr#14 277*0a6a1f1dSLionel Sambuc str r3,[sp,#64+0] 278*0a6a1f1dSLionel Sambuc mov r10,r8,lsr#14 279*0a6a1f1dSLionel Sambuc str r4,[sp,#64+4] 280*0a6a1f1dSLionel Sambuc eor r9,r9,r8,lsl#18 281*0a6a1f1dSLionel Sambuc ldr r11,[sp,#56+0] @ h.lo 282*0a6a1f1dSLionel Sambuc eor r10,r10,r7,lsl#18 283*0a6a1f1dSLionel Sambuc ldr r12,[sp,#56+4] @ h.hi 284*0a6a1f1dSLionel Sambuc eor r9,r9,r7,lsr#18 285*0a6a1f1dSLionel Sambuc eor r10,r10,r8,lsr#18 286*0a6a1f1dSLionel Sambuc eor r9,r9,r8,lsl#14 287*0a6a1f1dSLionel Sambuc eor r10,r10,r7,lsl#14 288*0a6a1f1dSLionel Sambuc eor r9,r9,r8,lsr#9 289*0a6a1f1dSLionel Sambuc eor r10,r10,r7,lsr#9 290*0a6a1f1dSLionel Sambuc eor r9,r9,r7,lsl#23 291*0a6a1f1dSLionel Sambuc eor r10,r10,r8,lsl#23 @ Sigma1(e) 292*0a6a1f1dSLionel Sambuc adds r3,r3,r9 293*0a6a1f1dSLionel Sambuc ldr r9,[sp,#40+0] @ f.lo 294*0a6a1f1dSLionel Sambuc adc r4,r4,r10 @ T += Sigma1(e) 295*0a6a1f1dSLionel Sambuc ldr r10,[sp,#40+4] @ f.hi 296*0a6a1f1dSLionel Sambuc adds r3,r3,r11 297*0a6a1f1dSLionel Sambuc ldr r11,[sp,#48+0] @ g.lo 298*0a6a1f1dSLionel Sambuc adc r4,r4,r12 @ T += h 299*0a6a1f1dSLionel Sambuc ldr r12,[sp,#48+4] @ g.hi 300*0a6a1f1dSLionel Sambuc 301*0a6a1f1dSLionel Sambuc eor r9,r9,r11 302*0a6a1f1dSLionel Sambuc str r7,[sp,#32+0] 303*0a6a1f1dSLionel Sambuc eor r10,r10,r12 304*0a6a1f1dSLionel Sambuc str r8,[sp,#32+4] 305*0a6a1f1dSLionel Sambuc and r9,r9,r7 306*0a6a1f1dSLionel Sambuc str r5,[sp,#0+0] 307*0a6a1f1dSLionel Sambuc and r10,r10,r8 308*0a6a1f1dSLionel Sambuc str r6,[sp,#0+4] 309*0a6a1f1dSLionel Sambuc eor r9,r9,r11 310*0a6a1f1dSLionel Sambuc ldr r11,[r14,#LO] @ K[i].lo 311*0a6a1f1dSLionel Sambuc eor r10,r10,r12 @ Ch(e,f,g) 312*0a6a1f1dSLionel Sambuc ldr r12,[r14,#HI] @ K[i].hi 313*0a6a1f1dSLionel Sambuc 314*0a6a1f1dSLionel Sambuc adds r3,r3,r9 315*0a6a1f1dSLionel Sambuc ldr r7,[sp,#24+0] @ d.lo 316*0a6a1f1dSLionel Sambuc adc r4,r4,r10 @ T += Ch(e,f,g) 317*0a6a1f1dSLionel Sambuc ldr r8,[sp,#24+4] @ d.hi 318*0a6a1f1dSLionel Sambuc adds r3,r3,r11 319*0a6a1f1dSLionel Sambuc and r9,r11,#0xff 320*0a6a1f1dSLionel Sambuc adc r4,r4,r12 @ T += K[i] 321*0a6a1f1dSLionel Sambuc adds r7,r7,r3 322*0a6a1f1dSLionel Sambuc ldr r11,[sp,#8+0] @ b.lo 323*0a6a1f1dSLionel Sambuc adc r8,r8,r4 @ d += T 324*0a6a1f1dSLionel Sambuc teq r9,#23 325*0a6a1f1dSLionel Sambuc 326*0a6a1f1dSLionel Sambuc ldr r12,[sp,#16+0] @ c.lo 327*0a6a1f1dSLionel Sambuc orreq r14,r14,#1 328*0a6a1f1dSLionel Sambuc @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) 329*0a6a1f1dSLionel Sambuc @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 330*0a6a1f1dSLionel Sambuc @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25 331*0a6a1f1dSLionel Sambuc mov r9,r5,lsr#28 332*0a6a1f1dSLionel Sambuc mov r10,r6,lsr#28 333*0a6a1f1dSLionel Sambuc eor r9,r9,r6,lsl#4 334*0a6a1f1dSLionel Sambuc eor r10,r10,r5,lsl#4 335*0a6a1f1dSLionel Sambuc eor r9,r9,r6,lsr#2 336*0a6a1f1dSLionel Sambuc eor r10,r10,r5,lsr#2 337*0a6a1f1dSLionel Sambuc eor r9,r9,r5,lsl#30 338*0a6a1f1dSLionel Sambuc eor r10,r10,r6,lsl#30 339*0a6a1f1dSLionel Sambuc eor r9,r9,r6,lsr#7 340*0a6a1f1dSLionel Sambuc eor r10,r10,r5,lsr#7 341*0a6a1f1dSLionel Sambuc eor r9,r9,r5,lsl#25 342*0a6a1f1dSLionel Sambuc eor r10,r10,r6,lsl#25 @ Sigma0(a) 343*0a6a1f1dSLionel Sambuc adds r3,r3,r9 344*0a6a1f1dSLionel Sambuc and r9,r5,r11 345*0a6a1f1dSLionel Sambuc adc r4,r4,r10 @ T += Sigma0(a) 346*0a6a1f1dSLionel Sambuc 347*0a6a1f1dSLionel Sambuc ldr r10,[sp,#8+4] @ b.hi 348*0a6a1f1dSLionel Sambuc orr r5,r5,r11 349*0a6a1f1dSLionel Sambuc ldr r11,[sp,#16+4] @ c.hi 350*0a6a1f1dSLionel Sambuc and r5,r5,r12 351*0a6a1f1dSLionel Sambuc and r12,r6,r10 352*0a6a1f1dSLionel Sambuc orr r6,r6,r10 353*0a6a1f1dSLionel Sambuc orr r5,r5,r9 @ Maj(a,b,c).lo 354*0a6a1f1dSLionel Sambuc and r6,r6,r11 355*0a6a1f1dSLionel Sambuc adds r5,r5,r3 356*0a6a1f1dSLionel Sambuc orr r6,r6,r12 @ Maj(a,b,c).hi 357*0a6a1f1dSLionel Sambuc sub sp,sp,#8 358*0a6a1f1dSLionel Sambuc adc r6,r6,r4 @ h += T 359*0a6a1f1dSLionel Sambuc tst r14,#1 360*0a6a1f1dSLionel Sambuc add r14,r14,#8 361*0a6a1f1dSLionel Sambuc ldreq r9,[sp,#184+0] 362*0a6a1f1dSLionel Sambuc ldreq r10,[sp,#184+4] 363*0a6a1f1dSLionel Sambuc beq .L16_79 364*0a6a1f1dSLionel Sambuc bic r14,r14,#1 365*0a6a1f1dSLionel Sambuc 366*0a6a1f1dSLionel Sambuc ldr r3,[sp,#8+0] 367*0a6a1f1dSLionel Sambuc ldr r4,[sp,#8+4] 368*0a6a1f1dSLionel Sambuc ldr r9, [r0,#0+LO] 369*0a6a1f1dSLionel Sambuc ldr r10, [r0,#0+HI] 370*0a6a1f1dSLionel Sambuc ldr r11, [r0,#8+LO] 371*0a6a1f1dSLionel Sambuc ldr r12, [r0,#8+HI] 372*0a6a1f1dSLionel Sambuc adds r9,r5,r9 373*0a6a1f1dSLionel Sambuc str r9, [r0,#0+LO] 374*0a6a1f1dSLionel Sambuc adc r10,r6,r10 375*0a6a1f1dSLionel Sambuc str r10, [r0,#0+HI] 376*0a6a1f1dSLionel Sambuc adds r11,r3,r11 377*0a6a1f1dSLionel Sambuc str r11, [r0,#8+LO] 378*0a6a1f1dSLionel Sambuc adc r12,r4,r12 379*0a6a1f1dSLionel Sambuc str r12, [r0,#8+HI] 380*0a6a1f1dSLionel Sambuc 381*0a6a1f1dSLionel Sambuc ldr r5,[sp,#16+0] 382*0a6a1f1dSLionel Sambuc ldr r6,[sp,#16+4] 383*0a6a1f1dSLionel Sambuc ldr r3,[sp,#24+0] 384*0a6a1f1dSLionel Sambuc ldr r4,[sp,#24+4] 385*0a6a1f1dSLionel Sambuc ldr r9, [r0,#16+LO] 386*0a6a1f1dSLionel Sambuc ldr r10, [r0,#16+HI] 387*0a6a1f1dSLionel Sambuc ldr r11, [r0,#24+LO] 388*0a6a1f1dSLionel Sambuc ldr r12, [r0,#24+HI] 389*0a6a1f1dSLionel Sambuc adds r9,r5,r9 390*0a6a1f1dSLionel Sambuc str r9, [r0,#16+LO] 391*0a6a1f1dSLionel Sambuc adc r10,r6,r10 392*0a6a1f1dSLionel Sambuc str r10, [r0,#16+HI] 393*0a6a1f1dSLionel Sambuc adds r11,r3,r11 394*0a6a1f1dSLionel Sambuc str r11, [r0,#24+LO] 395*0a6a1f1dSLionel Sambuc adc r12,r4,r12 396*0a6a1f1dSLionel Sambuc str r12, [r0,#24+HI] 397*0a6a1f1dSLionel Sambuc 398*0a6a1f1dSLionel Sambuc ldr r3,[sp,#40+0] 399*0a6a1f1dSLionel Sambuc ldr r4,[sp,#40+4] 400*0a6a1f1dSLionel Sambuc ldr r9, [r0,#32+LO] 401*0a6a1f1dSLionel Sambuc ldr r10, [r0,#32+HI] 402*0a6a1f1dSLionel Sambuc ldr r11, [r0,#40+LO] 403*0a6a1f1dSLionel Sambuc ldr r12, [r0,#40+HI] 404*0a6a1f1dSLionel Sambuc adds r7,r7,r9 405*0a6a1f1dSLionel Sambuc str r7,[r0,#32+LO] 406*0a6a1f1dSLionel Sambuc adc r8,r8,r10 407*0a6a1f1dSLionel Sambuc str r8,[r0,#32+HI] 408*0a6a1f1dSLionel Sambuc adds r11,r3,r11 409*0a6a1f1dSLionel Sambuc str r11, [r0,#40+LO] 410*0a6a1f1dSLionel Sambuc adc r12,r4,r12 411*0a6a1f1dSLionel Sambuc str r12, [r0,#40+HI] 412*0a6a1f1dSLionel Sambuc 413*0a6a1f1dSLionel Sambuc ldr r5,[sp,#48+0] 414*0a6a1f1dSLionel Sambuc ldr r6,[sp,#48+4] 415*0a6a1f1dSLionel Sambuc ldr r3,[sp,#56+0] 416*0a6a1f1dSLionel Sambuc ldr r4,[sp,#56+4] 417*0a6a1f1dSLionel Sambuc ldr r9, [r0,#48+LO] 418*0a6a1f1dSLionel Sambuc ldr r10, [r0,#48+HI] 419*0a6a1f1dSLionel Sambuc ldr r11, [r0,#56+LO] 420*0a6a1f1dSLionel Sambuc ldr r12, [r0,#56+HI] 421*0a6a1f1dSLionel Sambuc adds r9,r5,r9 422*0a6a1f1dSLionel Sambuc str r9, [r0,#48+LO] 423*0a6a1f1dSLionel Sambuc adc r10,r6,r10 424*0a6a1f1dSLionel Sambuc str r10, [r0,#48+HI] 425*0a6a1f1dSLionel Sambuc adds r11,r3,r11 426*0a6a1f1dSLionel Sambuc str r11, [r0,#56+LO] 427*0a6a1f1dSLionel Sambuc adc r12,r4,r12 428*0a6a1f1dSLionel Sambuc str r12, [r0,#56+HI] 429*0a6a1f1dSLionel Sambuc 430*0a6a1f1dSLionel Sambuc add sp,sp,#640 431*0a6a1f1dSLionel Sambuc sub r14,r14,#640 432*0a6a1f1dSLionel Sambuc 433*0a6a1f1dSLionel Sambuc teq r1,r2 434*0a6a1f1dSLionel Sambuc bne .Loop 435*0a6a1f1dSLionel Sambuc 436*0a6a1f1dSLionel Sambuc add sp,sp,#8*9 @ destroy frame 437*0a6a1f1dSLionel Sambuc#if __ARM_ARCH__>=5 438*0a6a1f1dSLionel Sambuc ldmia sp!,{r4-r12,pc} 439*0a6a1f1dSLionel Sambuc#else 440*0a6a1f1dSLionel Sambuc ldmia sp!,{r4-r12,lr} 441*0a6a1f1dSLionel Sambuc tst lr,#1 442*0a6a1f1dSLionel Sambuc moveq pc,lr @ be binary compatible with V4, yet 443*0a6a1f1dSLionel Sambuc .word 0xe12fff1e @ interoperable with Thumb ISA:-) 444*0a6a1f1dSLionel Sambuc#endif 445*0a6a1f1dSLionel Sambuc#if __ARM_MAX_ARCH__>=7 446*0a6a1f1dSLionel Sambuc.arch armv7-a 447*0a6a1f1dSLionel Sambuc.fpu neon 448*0a6a1f1dSLionel Sambuc 449*0a6a1f1dSLionel Sambuc.align 4 450*0a6a1f1dSLionel Sambuc.LNEON: 451*0a6a1f1dSLionel Sambuc dmb @ errata #451034 on early Cortex A8 452*0a6a1f1dSLionel Sambuc vstmdb sp!,{d8-d15} @ ABI specification says so 453*0a6a1f1dSLionel Sambuc sub r3,r3,#672 @ K512 454*0a6a1f1dSLionel Sambuc vldmia r0,{d16-d23} @ load context 455*0a6a1f1dSLionel Sambuc.Loop_neon: 456*0a6a1f1dSLionel Sambuc vshr.u64 d24,d20,#14 @ 0 457*0a6a1f1dSLionel Sambuc#if 0<16 458*0a6a1f1dSLionel Sambuc vld1.64 {d0},[r1]! @ handles unaligned 459*0a6a1f1dSLionel Sambuc#endif 460*0a6a1f1dSLionel Sambuc vshr.u64 d25,d20,#18 461*0a6a1f1dSLionel Sambuc#if 0>0 462*0a6a1f1dSLionel Sambuc vadd.i64 d16,d30 @ h+=Maj from the past 463*0a6a1f1dSLionel Sambuc#endif 464*0a6a1f1dSLionel Sambuc vshr.u64 d26,d20,#41 465*0a6a1f1dSLionel Sambuc vld1.64 {d28},[r3,:64]! @ K[i++] 466*0a6a1f1dSLionel Sambuc vsli.64 d24,d20,#50 467*0a6a1f1dSLionel Sambuc vsli.64 d25,d20,#46 468*0a6a1f1dSLionel Sambuc vmov d29,d20 469*0a6a1f1dSLionel Sambuc vsli.64 d26,d20,#23 470*0a6a1f1dSLionel Sambuc#if 0<16 && defined(__ARMEL__) 471*0a6a1f1dSLionel Sambuc vrev64.8 d0,d0 472*0a6a1f1dSLionel Sambuc#endif 473*0a6a1f1dSLionel Sambuc veor d25,d24 474*0a6a1f1dSLionel Sambuc vbsl d29,d21,d22 @ Ch(e,f,g) 475*0a6a1f1dSLionel Sambuc vshr.u64 d24,d16,#28 476*0a6a1f1dSLionel Sambuc veor d26,d25 @ Sigma1(e) 477*0a6a1f1dSLionel Sambuc vadd.i64 d27,d29,d23 478*0a6a1f1dSLionel Sambuc vshr.u64 d25,d16,#34 479*0a6a1f1dSLionel Sambuc vsli.64 d24,d16,#36 480*0a6a1f1dSLionel Sambuc vadd.i64 d27,d26 481*0a6a1f1dSLionel Sambuc vshr.u64 d26,d16,#39 482*0a6a1f1dSLionel Sambuc vadd.i64 d28,d0 483*0a6a1f1dSLionel Sambuc vsli.64 d25,d16,#30 484*0a6a1f1dSLionel Sambuc veor d30,d16,d17 485*0a6a1f1dSLionel Sambuc vsli.64 d26,d16,#25 486*0a6a1f1dSLionel Sambuc veor d23,d24,d25 487*0a6a1f1dSLionel Sambuc vadd.i64 d27,d28 488*0a6a1f1dSLionel Sambuc vbsl d30,d18,d17 @ Maj(a,b,c) 489*0a6a1f1dSLionel Sambuc veor d23,d26 @ Sigma0(a) 490*0a6a1f1dSLionel Sambuc vadd.i64 d19,d27 491*0a6a1f1dSLionel Sambuc vadd.i64 d30,d27 492*0a6a1f1dSLionel Sambuc @ vadd.i64 d23,d30 493*0a6a1f1dSLionel Sambuc vshr.u64 d24,d19,#14 @ 1 494*0a6a1f1dSLionel Sambuc#if 1<16 495*0a6a1f1dSLionel Sambuc vld1.64 {d1},[r1]! @ handles unaligned 496*0a6a1f1dSLionel Sambuc#endif 497*0a6a1f1dSLionel Sambuc vshr.u64 d25,d19,#18 498*0a6a1f1dSLionel Sambuc#if 1>0 499*0a6a1f1dSLionel Sambuc vadd.i64 d23,d30 @ h+=Maj from the past 500*0a6a1f1dSLionel Sambuc#endif 501*0a6a1f1dSLionel Sambuc vshr.u64 d26,d19,#41 502*0a6a1f1dSLionel Sambuc vld1.64 {d28},[r3,:64]! @ K[i++] 503*0a6a1f1dSLionel Sambuc vsli.64 d24,d19,#50 504*0a6a1f1dSLionel Sambuc vsli.64 d25,d19,#46 505*0a6a1f1dSLionel Sambuc vmov d29,d19 506*0a6a1f1dSLionel Sambuc vsli.64 d26,d19,#23 507*0a6a1f1dSLionel Sambuc#if 1<16 && defined(__ARMEL__) 508*0a6a1f1dSLionel Sambuc vrev64.8 d1,d1 509*0a6a1f1dSLionel Sambuc#endif 510*0a6a1f1dSLionel Sambuc veor d25,d24 511*0a6a1f1dSLionel Sambuc vbsl d29,d20,d21 @ Ch(e,f,g) 512*0a6a1f1dSLionel Sambuc vshr.u64 d24,d23,#28 513*0a6a1f1dSLionel Sambuc veor d26,d25 @ Sigma1(e) 514*0a6a1f1dSLionel Sambuc vadd.i64 d27,d29,d22 515*0a6a1f1dSLionel Sambuc vshr.u64 d25,d23,#34 516*0a6a1f1dSLionel Sambuc vsli.64 d24,d23,#36 517*0a6a1f1dSLionel Sambuc vadd.i64 d27,d26 518*0a6a1f1dSLionel Sambuc vshr.u64 d26,d23,#39 519*0a6a1f1dSLionel Sambuc vadd.i64 d28,d1 520*0a6a1f1dSLionel Sambuc vsli.64 d25,d23,#30 521*0a6a1f1dSLionel Sambuc veor d30,d23,d16 522*0a6a1f1dSLionel Sambuc vsli.64 d26,d23,#25 523*0a6a1f1dSLionel Sambuc veor d22,d24,d25 524*0a6a1f1dSLionel Sambuc vadd.i64 d27,d28 525*0a6a1f1dSLionel Sambuc vbsl d30,d17,d16 @ Maj(a,b,c) 526*0a6a1f1dSLionel Sambuc veor d22,d26 @ Sigma0(a) 527*0a6a1f1dSLionel Sambuc vadd.i64 d18,d27 528*0a6a1f1dSLionel Sambuc vadd.i64 d30,d27 529*0a6a1f1dSLionel Sambuc @ vadd.i64 d22,d30 530*0a6a1f1dSLionel Sambuc vshr.u64 d24,d18,#14 @ 2 531*0a6a1f1dSLionel Sambuc#if 2<16 532*0a6a1f1dSLionel Sambuc vld1.64 {d2},[r1]! @ handles unaligned 533*0a6a1f1dSLionel Sambuc#endif 534*0a6a1f1dSLionel Sambuc vshr.u64 d25,d18,#18 535*0a6a1f1dSLionel Sambuc#if 2>0 536*0a6a1f1dSLionel Sambuc vadd.i64 d22,d30 @ h+=Maj from the past 537*0a6a1f1dSLionel Sambuc#endif 538*0a6a1f1dSLionel Sambuc vshr.u64 d26,d18,#41 539*0a6a1f1dSLionel Sambuc vld1.64 {d28},[r3,:64]! @ K[i++] 540*0a6a1f1dSLionel Sambuc vsli.64 d24,d18,#50 541*0a6a1f1dSLionel Sambuc vsli.64 d25,d18,#46 542*0a6a1f1dSLionel Sambuc vmov d29,d18 543*0a6a1f1dSLionel Sambuc vsli.64 d26,d18,#23 544*0a6a1f1dSLionel Sambuc#if 2<16 && defined(__ARMEL__) 545*0a6a1f1dSLionel Sambuc vrev64.8 d2,d2 546*0a6a1f1dSLionel Sambuc#endif 547*0a6a1f1dSLionel Sambuc veor d25,d24 548*0a6a1f1dSLionel Sambuc vbsl d29,d19,d20 @ Ch(e,f,g) 549*0a6a1f1dSLionel Sambuc vshr.u64 d24,d22,#28 550*0a6a1f1dSLionel Sambuc veor d26,d25 @ Sigma1(e) 551*0a6a1f1dSLionel Sambuc vadd.i64 d27,d29,d21 552*0a6a1f1dSLionel Sambuc vshr.u64 d25,d22,#34 553*0a6a1f1dSLionel Sambuc vsli.64 d24,d22,#36 554*0a6a1f1dSLionel Sambuc vadd.i64 d27,d26 555*0a6a1f1dSLionel Sambuc vshr.u64 d26,d22,#39 556*0a6a1f1dSLionel Sambuc vadd.i64 d28,d2 557*0a6a1f1dSLionel Sambuc vsli.64 d25,d22,#30 558*0a6a1f1dSLionel Sambuc veor d30,d22,d23 559*0a6a1f1dSLionel Sambuc vsli.64 d26,d22,#25 560*0a6a1f1dSLionel Sambuc veor d21,d24,d25 561*0a6a1f1dSLionel Sambuc vadd.i64 d27,d28 562*0a6a1f1dSLionel Sambuc vbsl d30,d16,d23 @ Maj(a,b,c) 563*0a6a1f1dSLionel Sambuc veor d21,d26 @ Sigma0(a) 564*0a6a1f1dSLionel Sambuc vadd.i64 d17,d27 565*0a6a1f1dSLionel Sambuc vadd.i64 d30,d27 566*0a6a1f1dSLionel Sambuc @ vadd.i64 d21,d30 567*0a6a1f1dSLionel Sambuc vshr.u64 d24,d17,#14 @ 3 568*0a6a1f1dSLionel Sambuc#if 3<16 569*0a6a1f1dSLionel Sambuc vld1.64 {d3},[r1]! @ handles unaligned 570*0a6a1f1dSLionel Sambuc#endif 571*0a6a1f1dSLionel Sambuc vshr.u64 d25,d17,#18 572*0a6a1f1dSLionel Sambuc#if 3>0 573*0a6a1f1dSLionel Sambuc vadd.i64 d21,d30 @ h+=Maj from the past 574*0a6a1f1dSLionel Sambuc#endif 575*0a6a1f1dSLionel Sambuc vshr.u64 d26,d17,#41 576*0a6a1f1dSLionel Sambuc vld1.64 {d28},[r3,:64]! @ K[i++] 577*0a6a1f1dSLionel Sambuc vsli.64 d24,d17,#50 578*0a6a1f1dSLionel Sambuc vsli.64 d25,d17,#46 579*0a6a1f1dSLionel Sambuc vmov d29,d17 580*0a6a1f1dSLionel Sambuc vsli.64 d26,d17,#23 581*0a6a1f1dSLionel Sambuc#if 3<16 && defined(__ARMEL__) 582*0a6a1f1dSLionel Sambuc vrev64.8 d3,d3 583*0a6a1f1dSLionel Sambuc#endif 584*0a6a1f1dSLionel Sambuc veor d25,d24 585*0a6a1f1dSLionel Sambuc vbsl d29,d18,d19 @ Ch(e,f,g) 586*0a6a1f1dSLionel Sambuc vshr.u64 d24,d21,#28 587*0a6a1f1dSLionel Sambuc veor d26,d25 @ Sigma1(e) 588*0a6a1f1dSLionel Sambuc vadd.i64 d27,d29,d20 589*0a6a1f1dSLionel Sambuc vshr.u64 d25,d21,#34 590*0a6a1f1dSLionel Sambuc vsli.64 d24,d21,#36 591*0a6a1f1dSLionel Sambuc vadd.i64 d27,d26 592*0a6a1f1dSLionel Sambuc vshr.u64 d26,d21,#39 593*0a6a1f1dSLionel Sambuc vadd.i64 d28,d3 594*0a6a1f1dSLionel Sambuc vsli.64 d25,d21,#30 595*0a6a1f1dSLionel Sambuc veor d30,d21,d22 596*0a6a1f1dSLionel Sambuc vsli.64 d26,d21,#25 597*0a6a1f1dSLionel Sambuc veor d20,d24,d25 598*0a6a1f1dSLionel Sambuc vadd.i64 d27,d28 599*0a6a1f1dSLionel Sambuc vbsl d30,d23,d22 @ Maj(a,b,c) 600*0a6a1f1dSLionel Sambuc veor d20,d26 @ Sigma0(a) 601*0a6a1f1dSLionel Sambuc vadd.i64 d16,d27 602*0a6a1f1dSLionel Sambuc vadd.i64 d30,d27 603*0a6a1f1dSLionel Sambuc @ vadd.i64 d20,d30 604*0a6a1f1dSLionel Sambuc vshr.u64 d24,d16,#14 @ 4 605*0a6a1f1dSLionel Sambuc#if 4<16 606*0a6a1f1dSLionel Sambuc vld1.64 {d4},[r1]! @ handles unaligned 607*0a6a1f1dSLionel Sambuc#endif 608*0a6a1f1dSLionel Sambuc vshr.u64 d25,d16,#18 609*0a6a1f1dSLionel Sambuc#if 4>0 610*0a6a1f1dSLionel Sambuc vadd.i64 d20,d30 @ h+=Maj from the past 611*0a6a1f1dSLionel Sambuc#endif 612*0a6a1f1dSLionel Sambuc vshr.u64 d26,d16,#41 613*0a6a1f1dSLionel Sambuc vld1.64 {d28},[r3,:64]! @ K[i++] 614*0a6a1f1dSLionel Sambuc vsli.64 d24,d16,#50 615*0a6a1f1dSLionel Sambuc vsli.64 d25,d16,#46 616*0a6a1f1dSLionel Sambuc vmov d29,d16 617*0a6a1f1dSLionel Sambuc vsli.64 d26,d16,#23 618*0a6a1f1dSLionel Sambuc#if 4<16 && defined(__ARMEL__) 619*0a6a1f1dSLionel Sambuc vrev64.8 d4,d4 620*0a6a1f1dSLionel Sambuc#endif 621*0a6a1f1dSLionel Sambuc veor d25,d24 622*0a6a1f1dSLionel Sambuc vbsl d29,d17,d18 @ Ch(e,f,g) 623*0a6a1f1dSLionel Sambuc vshr.u64 d24,d20,#28 624*0a6a1f1dSLionel Sambuc veor d26,d25 @ Sigma1(e) 625*0a6a1f1dSLionel Sambuc vadd.i64 d27,d29,d19 626*0a6a1f1dSLionel Sambuc vshr.u64 d25,d20,#34 627*0a6a1f1dSLionel Sambuc vsli.64 d24,d20,#36 628*0a6a1f1dSLionel Sambuc vadd.i64 d27,d26 629*0a6a1f1dSLionel Sambuc vshr.u64 d26,d20,#39 630*0a6a1f1dSLionel Sambuc vadd.i64 d28,d4 631*0a6a1f1dSLionel Sambuc vsli.64 d25,d20,#30 632*0a6a1f1dSLionel Sambuc veor d30,d20,d21 633*0a6a1f1dSLionel Sambuc vsli.64 d26,d20,#25 634*0a6a1f1dSLionel Sambuc veor d19,d24,d25 635*0a6a1f1dSLionel Sambuc vadd.i64 d27,d28 636*0a6a1f1dSLionel Sambuc vbsl d30,d22,d21 @ Maj(a,b,c) 637*0a6a1f1dSLionel Sambuc veor d19,d26 @ Sigma0(a) 638*0a6a1f1dSLionel Sambuc vadd.i64 d23,d27 639*0a6a1f1dSLionel Sambuc vadd.i64 d30,d27 640*0a6a1f1dSLionel Sambuc @ vadd.i64 d19,d30 641*0a6a1f1dSLionel Sambuc vshr.u64 d24,d23,#14 @ 5 642*0a6a1f1dSLionel Sambuc#if 5<16 643*0a6a1f1dSLionel Sambuc vld1.64 {d5},[r1]! @ handles unaligned 644*0a6a1f1dSLionel Sambuc#endif 645*0a6a1f1dSLionel Sambuc vshr.u64 d25,d23,#18 646*0a6a1f1dSLionel Sambuc#if 5>0 647*0a6a1f1dSLionel Sambuc vadd.i64 d19,d30 @ h+=Maj from the past 648*0a6a1f1dSLionel Sambuc#endif 649*0a6a1f1dSLionel Sambuc vshr.u64 d26,d23,#41 650*0a6a1f1dSLionel Sambuc vld1.64 {d28},[r3,:64]! @ K[i++] 651*0a6a1f1dSLionel Sambuc vsli.64 d24,d23,#50 652*0a6a1f1dSLionel Sambuc vsli.64 d25,d23,#46 653*0a6a1f1dSLionel Sambuc vmov d29,d23 654*0a6a1f1dSLionel Sambuc vsli.64 d26,d23,#23 655*0a6a1f1dSLionel Sambuc#if 5<16 && defined(__ARMEL__) 656*0a6a1f1dSLionel Sambuc vrev64.8 d5,d5 657*0a6a1f1dSLionel Sambuc#endif 658*0a6a1f1dSLionel Sambuc veor d25,d24 659*0a6a1f1dSLionel Sambuc vbsl d29,d16,d17 @ Ch(e,f,g) 660*0a6a1f1dSLionel Sambuc vshr.u64 d24,d19,#28 661*0a6a1f1dSLionel Sambuc veor d26,d25 @ Sigma1(e) 662*0a6a1f1dSLionel Sambuc vadd.i64 d27,d29,d18 663*0a6a1f1dSLionel Sambuc vshr.u64 d25,d19,#34 664*0a6a1f1dSLionel Sambuc vsli.64 d24,d19,#36 665*0a6a1f1dSLionel Sambuc vadd.i64 d27,d26 666*0a6a1f1dSLionel Sambuc vshr.u64 d26,d19,#39 667*0a6a1f1dSLionel Sambuc vadd.i64 d28,d5 668*0a6a1f1dSLionel Sambuc vsli.64 d25,d19,#30 669*0a6a1f1dSLionel Sambuc veor d30,d19,d20 670*0a6a1f1dSLionel Sambuc vsli.64 d26,d19,#25 671*0a6a1f1dSLionel Sambuc veor d18,d24,d25 672*0a6a1f1dSLionel Sambuc vadd.i64 d27,d28 673*0a6a1f1dSLionel Sambuc vbsl d30,d21,d20 @ Maj(a,b,c) 674*0a6a1f1dSLionel Sambuc veor d18,d26 @ Sigma0(a) 675*0a6a1f1dSLionel Sambuc vadd.i64 d22,d27 676*0a6a1f1dSLionel Sambuc vadd.i64 d30,d27 677*0a6a1f1dSLionel Sambuc @ vadd.i64 d18,d30 678*0a6a1f1dSLionel Sambuc vshr.u64 d24,d22,#14 @ 6 679*0a6a1f1dSLionel Sambuc#if 6<16 680*0a6a1f1dSLionel Sambuc vld1.64 {d6},[r1]! @ handles unaligned 681*0a6a1f1dSLionel Sambuc#endif 682*0a6a1f1dSLionel Sambuc vshr.u64 d25,d22,#18 683*0a6a1f1dSLionel Sambuc#if 6>0 684*0a6a1f1dSLionel Sambuc vadd.i64 d18,d30 @ h+=Maj from the past 685*0a6a1f1dSLionel Sambuc#endif 686*0a6a1f1dSLionel Sambuc vshr.u64 d26,d22,#41 687*0a6a1f1dSLionel Sambuc vld1.64 {d28},[r3,:64]! @ K[i++] 688*0a6a1f1dSLionel Sambuc vsli.64 d24,d22,#50 689*0a6a1f1dSLionel Sambuc vsli.64 d25,d22,#46 690*0a6a1f1dSLionel Sambuc vmov d29,d22 691*0a6a1f1dSLionel Sambuc vsli.64 d26,d22,#23 692*0a6a1f1dSLionel Sambuc#if 6<16 && defined(__ARMEL__) 693*0a6a1f1dSLionel Sambuc vrev64.8 d6,d6 694*0a6a1f1dSLionel Sambuc#endif 695*0a6a1f1dSLionel Sambuc veor d25,d24 696*0a6a1f1dSLionel Sambuc vbsl d29,d23,d16 @ Ch(e,f,g) 697*0a6a1f1dSLionel Sambuc vshr.u64 d24,d18,#28 698*0a6a1f1dSLionel Sambuc veor d26,d25 @ Sigma1(e) 699*0a6a1f1dSLionel Sambuc vadd.i64 d27,d29,d17 700*0a6a1f1dSLionel Sambuc vshr.u64 d25,d18,#34 701*0a6a1f1dSLionel Sambuc vsli.64 d24,d18,#36 702*0a6a1f1dSLionel Sambuc vadd.i64 d27,d26 703*0a6a1f1dSLionel Sambuc vshr.u64 d26,d18,#39 704*0a6a1f1dSLionel Sambuc vadd.i64 d28,d6 705*0a6a1f1dSLionel Sambuc vsli.64 d25,d18,#30 706*0a6a1f1dSLionel Sambuc veor d30,d18,d19 707*0a6a1f1dSLionel Sambuc vsli.64 d26,d18,#25 708*0a6a1f1dSLionel Sambuc veor d17,d24,d25 709*0a6a1f1dSLionel Sambuc vadd.i64 d27,d28 710*0a6a1f1dSLionel Sambuc vbsl d30,d20,d19 @ Maj(a,b,c) 711*0a6a1f1dSLionel Sambuc veor d17,d26 @ Sigma0(a) 712*0a6a1f1dSLionel Sambuc vadd.i64 d21,d27 713*0a6a1f1dSLionel Sambuc vadd.i64 d30,d27 714*0a6a1f1dSLionel Sambuc @ vadd.i64 d17,d30 715*0a6a1f1dSLionel Sambuc vshr.u64 d24,d21,#14 @ 7 716*0a6a1f1dSLionel Sambuc#if 7<16 717*0a6a1f1dSLionel Sambuc vld1.64 {d7},[r1]! @ handles unaligned 718*0a6a1f1dSLionel Sambuc#endif 719*0a6a1f1dSLionel Sambuc vshr.u64 d25,d21,#18 720*0a6a1f1dSLionel Sambuc#if 7>0 721*0a6a1f1dSLionel Sambuc vadd.i64 d17,d30 @ h+=Maj from the past 722*0a6a1f1dSLionel Sambuc#endif 723*0a6a1f1dSLionel Sambuc vshr.u64 d26,d21,#41 724*0a6a1f1dSLionel Sambuc vld1.64 {d28},[r3,:64]! @ K[i++] 725*0a6a1f1dSLionel Sambuc vsli.64 d24,d21,#50 726*0a6a1f1dSLionel Sambuc vsli.64 d25,d21,#46 727*0a6a1f1dSLionel Sambuc vmov d29,d21 728*0a6a1f1dSLionel Sambuc vsli.64 d26,d21,#23 729*0a6a1f1dSLionel Sambuc#if 7<16 && defined(__ARMEL__) 730*0a6a1f1dSLionel Sambuc vrev64.8 d7,d7 731*0a6a1f1dSLionel Sambuc#endif 732*0a6a1f1dSLionel Sambuc veor d25,d24 733*0a6a1f1dSLionel Sambuc vbsl d29,d22,d23 @ Ch(e,f,g) 734*0a6a1f1dSLionel Sambuc vshr.u64 d24,d17,#28 735*0a6a1f1dSLionel Sambuc veor d26,d25 @ Sigma1(e) 736*0a6a1f1dSLionel Sambuc vadd.i64 d27,d29,d16 737*0a6a1f1dSLionel Sambuc vshr.u64 d25,d17,#34 738*0a6a1f1dSLionel Sambuc vsli.64 d24,d17,#36 739*0a6a1f1dSLionel Sambuc vadd.i64 d27,d26 740*0a6a1f1dSLionel Sambuc vshr.u64 d26,d17,#39 741*0a6a1f1dSLionel Sambuc vadd.i64 d28,d7 742*0a6a1f1dSLionel Sambuc vsli.64 d25,d17,#30 743*0a6a1f1dSLionel Sambuc veor d30,d17,d18 744*0a6a1f1dSLionel Sambuc vsli.64 d26,d17,#25 745*0a6a1f1dSLionel Sambuc veor d16,d24,d25 746*0a6a1f1dSLionel Sambuc vadd.i64 d27,d28 747*0a6a1f1dSLionel Sambuc vbsl d30,d19,d18 @ Maj(a,b,c) 748*0a6a1f1dSLionel Sambuc veor d16,d26 @ Sigma0(a) 749*0a6a1f1dSLionel Sambuc vadd.i64 d20,d27 750*0a6a1f1dSLionel Sambuc vadd.i64 d30,d27 751*0a6a1f1dSLionel Sambuc @ vadd.i64 d16,d30 752*0a6a1f1dSLionel Sambuc vshr.u64 d24,d20,#14 @ 8 753*0a6a1f1dSLionel Sambuc#if 8<16 754*0a6a1f1dSLionel Sambuc vld1.64 {d8},[r1]! @ handles unaligned 755*0a6a1f1dSLionel Sambuc#endif 756*0a6a1f1dSLionel Sambuc vshr.u64 d25,d20,#18 757*0a6a1f1dSLionel Sambuc#if 8>0 758*0a6a1f1dSLionel Sambuc vadd.i64 d16,d30 @ h+=Maj from the past 759*0a6a1f1dSLionel Sambuc#endif 760*0a6a1f1dSLionel Sambuc vshr.u64 d26,d20,#41 761*0a6a1f1dSLionel Sambuc vld1.64 {d28},[r3,:64]! @ K[i++] 762*0a6a1f1dSLionel Sambuc vsli.64 d24,d20,#50 763*0a6a1f1dSLionel Sambuc vsli.64 d25,d20,#46 764*0a6a1f1dSLionel Sambuc vmov d29,d20 765*0a6a1f1dSLionel Sambuc vsli.64 d26,d20,#23 766*0a6a1f1dSLionel Sambuc#if 8<16 && defined(__ARMEL__) 767*0a6a1f1dSLionel Sambuc vrev64.8 d8,d8 768*0a6a1f1dSLionel Sambuc#endif 769*0a6a1f1dSLionel Sambuc veor d25,d24 770*0a6a1f1dSLionel Sambuc vbsl d29,d21,d22 @ Ch(e,f,g) 771*0a6a1f1dSLionel Sambuc vshr.u64 d24,d16,#28 772*0a6a1f1dSLionel Sambuc veor d26,d25 @ Sigma1(e) 773*0a6a1f1dSLionel Sambuc vadd.i64 d27,d29,d23 774*0a6a1f1dSLionel Sambuc vshr.u64 d25,d16,#34 775*0a6a1f1dSLionel Sambuc vsli.64 d24,d16,#36 776*0a6a1f1dSLionel Sambuc vadd.i64 d27,d26 777*0a6a1f1dSLionel Sambuc vshr.u64 d26,d16,#39 778*0a6a1f1dSLionel Sambuc vadd.i64 d28,d8 779*0a6a1f1dSLionel Sambuc vsli.64 d25,d16,#30 780*0a6a1f1dSLionel Sambuc veor d30,d16,d17 781*0a6a1f1dSLionel Sambuc vsli.64 d26,d16,#25 782*0a6a1f1dSLionel Sambuc veor d23,d24,d25 783*0a6a1f1dSLionel Sambuc vadd.i64 d27,d28 784*0a6a1f1dSLionel Sambuc vbsl d30,d18,d17 @ Maj(a,b,c) 785*0a6a1f1dSLionel Sambuc veor d23,d26 @ Sigma0(a) 786*0a6a1f1dSLionel Sambuc vadd.i64 d19,d27 787*0a6a1f1dSLionel Sambuc vadd.i64 d30,d27 788*0a6a1f1dSLionel Sambuc @ vadd.i64 d23,d30 789*0a6a1f1dSLionel Sambuc vshr.u64 d24,d19,#14 @ 9 790*0a6a1f1dSLionel Sambuc#if 9<16 791*0a6a1f1dSLionel Sambuc vld1.64 {d9},[r1]! @ handles unaligned 792*0a6a1f1dSLionel Sambuc#endif 793*0a6a1f1dSLionel Sambuc vshr.u64 d25,d19,#18 794*0a6a1f1dSLionel Sambuc#if 9>0 795*0a6a1f1dSLionel Sambuc vadd.i64 d23,d30 @ h+=Maj from the past 796*0a6a1f1dSLionel Sambuc#endif 797*0a6a1f1dSLionel Sambuc vshr.u64 d26,d19,#41 798*0a6a1f1dSLionel Sambuc vld1.64 {d28},[r3,:64]! @ K[i++] 799*0a6a1f1dSLionel Sambuc vsli.64 d24,d19,#50 800*0a6a1f1dSLionel Sambuc vsli.64 d25,d19,#46 801*0a6a1f1dSLionel Sambuc vmov d29,d19 802*0a6a1f1dSLionel Sambuc vsli.64 d26,d19,#23 803*0a6a1f1dSLionel Sambuc#if 9<16 && defined(__ARMEL__) 804*0a6a1f1dSLionel Sambuc vrev64.8 d9,d9 805*0a6a1f1dSLionel Sambuc#endif 806*0a6a1f1dSLionel Sambuc veor d25,d24 807*0a6a1f1dSLionel Sambuc vbsl d29,d20,d21 @ Ch(e,f,g) 808*0a6a1f1dSLionel Sambuc vshr.u64 d24,d23,#28 809*0a6a1f1dSLionel Sambuc veor d26,d25 @ Sigma1(e) 810*0a6a1f1dSLionel Sambuc vadd.i64 d27,d29,d22 811*0a6a1f1dSLionel Sambuc vshr.u64 d25,d23,#34 812*0a6a1f1dSLionel Sambuc vsli.64 d24,d23,#36 813*0a6a1f1dSLionel Sambuc vadd.i64 d27,d26 814*0a6a1f1dSLionel Sambuc vshr.u64 d26,d23,#39 815*0a6a1f1dSLionel Sambuc vadd.i64 d28,d9 816*0a6a1f1dSLionel Sambuc vsli.64 d25,d23,#30 817*0a6a1f1dSLionel Sambuc veor d30,d23,d16 818*0a6a1f1dSLionel Sambuc vsli.64 d26,d23,#25 819*0a6a1f1dSLionel Sambuc veor d22,d24,d25 820*0a6a1f1dSLionel Sambuc vadd.i64 d27,d28 821*0a6a1f1dSLionel Sambuc vbsl d30,d17,d16 @ Maj(a,b,c) 822*0a6a1f1dSLionel Sambuc veor d22,d26 @ Sigma0(a) 823*0a6a1f1dSLionel Sambuc vadd.i64 d18,d27 824*0a6a1f1dSLionel Sambuc vadd.i64 d30,d27 825*0a6a1f1dSLionel Sambuc @ vadd.i64 d22,d30 826*0a6a1f1dSLionel Sambuc vshr.u64 d24,d18,#14 @ 10 827*0a6a1f1dSLionel Sambuc#if 10<16 828*0a6a1f1dSLionel Sambuc vld1.64 {d10},[r1]! @ handles unaligned 829*0a6a1f1dSLionel Sambuc#endif 830*0a6a1f1dSLionel Sambuc vshr.u64 d25,d18,#18 831*0a6a1f1dSLionel Sambuc#if 10>0 832*0a6a1f1dSLionel Sambuc vadd.i64 d22,d30 @ h+=Maj from the past 833*0a6a1f1dSLionel Sambuc#endif 834*0a6a1f1dSLionel Sambuc vshr.u64 d26,d18,#41 835*0a6a1f1dSLionel Sambuc vld1.64 {d28},[r3,:64]! @ K[i++] 836*0a6a1f1dSLionel Sambuc vsli.64 d24,d18,#50 837*0a6a1f1dSLionel Sambuc vsli.64 d25,d18,#46 838*0a6a1f1dSLionel Sambuc vmov d29,d18 839*0a6a1f1dSLionel Sambuc vsli.64 d26,d18,#23 840*0a6a1f1dSLionel Sambuc#if 10<16 && defined(__ARMEL__) 841*0a6a1f1dSLionel Sambuc vrev64.8 d10,d10 842*0a6a1f1dSLionel Sambuc#endif 843*0a6a1f1dSLionel Sambuc veor d25,d24 844*0a6a1f1dSLionel Sambuc vbsl d29,d19,d20 @ Ch(e,f,g) 845*0a6a1f1dSLionel Sambuc vshr.u64 d24,d22,#28 846*0a6a1f1dSLionel Sambuc veor d26,d25 @ Sigma1(e) 847*0a6a1f1dSLionel Sambuc vadd.i64 d27,d29,d21 848*0a6a1f1dSLionel Sambuc vshr.u64 d25,d22,#34 849*0a6a1f1dSLionel Sambuc vsli.64 d24,d22,#36 850*0a6a1f1dSLionel Sambuc vadd.i64 d27,d26 851*0a6a1f1dSLionel Sambuc vshr.u64 d26,d22,#39 852*0a6a1f1dSLionel Sambuc vadd.i64 d28,d10 853*0a6a1f1dSLionel Sambuc vsli.64 d25,d22,#30 854*0a6a1f1dSLionel Sambuc veor d30,d22,d23 855*0a6a1f1dSLionel Sambuc vsli.64 d26,d22,#25 856*0a6a1f1dSLionel Sambuc veor d21,d24,d25 857*0a6a1f1dSLionel Sambuc vadd.i64 d27,d28 858*0a6a1f1dSLionel Sambuc vbsl d30,d16,d23 @ Maj(a,b,c) 859*0a6a1f1dSLionel Sambuc veor d21,d26 @ Sigma0(a) 860*0a6a1f1dSLionel Sambuc vadd.i64 d17,d27 861*0a6a1f1dSLionel Sambuc vadd.i64 d30,d27 862*0a6a1f1dSLionel Sambuc @ vadd.i64 d21,d30 863*0a6a1f1dSLionel Sambuc vshr.u64 d24,d17,#14 @ 11 864*0a6a1f1dSLionel Sambuc#if 11<16 865*0a6a1f1dSLionel Sambuc vld1.64 {d11},[r1]! @ handles unaligned 866*0a6a1f1dSLionel Sambuc#endif 867*0a6a1f1dSLionel Sambuc vshr.u64 d25,d17,#18 868*0a6a1f1dSLionel Sambuc#if 11>0 869*0a6a1f1dSLionel Sambuc vadd.i64 d21,d30 @ h+=Maj from the past 870*0a6a1f1dSLionel Sambuc#endif 871*0a6a1f1dSLionel Sambuc vshr.u64 d26,d17,#41 872*0a6a1f1dSLionel Sambuc vld1.64 {d28},[r3,:64]! @ K[i++] 873*0a6a1f1dSLionel Sambuc vsli.64 d24,d17,#50 874*0a6a1f1dSLionel Sambuc vsli.64 d25,d17,#46 875*0a6a1f1dSLionel Sambuc vmov d29,d17 876*0a6a1f1dSLionel Sambuc vsli.64 d26,d17,#23 877*0a6a1f1dSLionel Sambuc#if 11<16 && defined(__ARMEL__) 878*0a6a1f1dSLionel Sambuc vrev64.8 d11,d11 879*0a6a1f1dSLionel Sambuc#endif 880*0a6a1f1dSLionel Sambuc veor d25,d24 881*0a6a1f1dSLionel Sambuc vbsl d29,d18,d19 @ Ch(e,f,g) 882*0a6a1f1dSLionel Sambuc vshr.u64 d24,d21,#28 883*0a6a1f1dSLionel Sambuc veor d26,d25 @ Sigma1(e) 884*0a6a1f1dSLionel Sambuc vadd.i64 d27,d29,d20 885*0a6a1f1dSLionel Sambuc vshr.u64 d25,d21,#34 886*0a6a1f1dSLionel Sambuc vsli.64 d24,d21,#36 887*0a6a1f1dSLionel Sambuc vadd.i64 d27,d26 888*0a6a1f1dSLionel Sambuc vshr.u64 d26,d21,#39 889*0a6a1f1dSLionel Sambuc vadd.i64 d28,d11 890*0a6a1f1dSLionel Sambuc vsli.64 d25,d21,#30 891*0a6a1f1dSLionel Sambuc veor d30,d21,d22 892*0a6a1f1dSLionel Sambuc vsli.64 d26,d21,#25 893*0a6a1f1dSLionel Sambuc veor d20,d24,d25 894*0a6a1f1dSLionel Sambuc vadd.i64 d27,d28 895*0a6a1f1dSLionel Sambuc vbsl d30,d23,d22 @ Maj(a,b,c) 896*0a6a1f1dSLionel Sambuc veor d20,d26 @ Sigma0(a) 897*0a6a1f1dSLionel Sambuc vadd.i64 d16,d27 898*0a6a1f1dSLionel Sambuc vadd.i64 d30,d27 899*0a6a1f1dSLionel Sambuc @ vadd.i64 d20,d30 900*0a6a1f1dSLionel Sambuc vshr.u64 d24,d16,#14 @ 12 901*0a6a1f1dSLionel Sambuc#if 12<16 902*0a6a1f1dSLionel Sambuc vld1.64 {d12},[r1]! @ handles unaligned 903*0a6a1f1dSLionel Sambuc#endif 904*0a6a1f1dSLionel Sambuc vshr.u64 d25,d16,#18 905*0a6a1f1dSLionel Sambuc#if 12>0 906*0a6a1f1dSLionel Sambuc vadd.i64 d20,d30 @ h+=Maj from the past 907*0a6a1f1dSLionel Sambuc#endif 908*0a6a1f1dSLionel Sambuc vshr.u64 d26,d16,#41 909*0a6a1f1dSLionel Sambuc vld1.64 {d28},[r3,:64]! @ K[i++] 910*0a6a1f1dSLionel Sambuc vsli.64 d24,d16,#50 911*0a6a1f1dSLionel Sambuc vsli.64 d25,d16,#46 912*0a6a1f1dSLionel Sambuc vmov d29,d16 913*0a6a1f1dSLionel Sambuc vsli.64 d26,d16,#23 914*0a6a1f1dSLionel Sambuc#if 12<16 && defined(__ARMEL__) 915*0a6a1f1dSLionel Sambuc vrev64.8 d12,d12 916*0a6a1f1dSLionel Sambuc#endif 917*0a6a1f1dSLionel Sambuc veor d25,d24 918*0a6a1f1dSLionel Sambuc vbsl d29,d17,d18 @ Ch(e,f,g) 919*0a6a1f1dSLionel Sambuc vshr.u64 d24,d20,#28 920*0a6a1f1dSLionel Sambuc veor d26,d25 @ Sigma1(e) 921*0a6a1f1dSLionel Sambuc vadd.i64 d27,d29,d19 922*0a6a1f1dSLionel Sambuc vshr.u64 d25,d20,#34 923*0a6a1f1dSLionel Sambuc vsli.64 d24,d20,#36 924*0a6a1f1dSLionel Sambuc vadd.i64 d27,d26 925*0a6a1f1dSLionel Sambuc vshr.u64 d26,d20,#39 926*0a6a1f1dSLionel Sambuc vadd.i64 d28,d12 927*0a6a1f1dSLionel Sambuc vsli.64 d25,d20,#30 928*0a6a1f1dSLionel Sambuc veor d30,d20,d21 929*0a6a1f1dSLionel Sambuc vsli.64 d26,d20,#25 930*0a6a1f1dSLionel Sambuc veor d19,d24,d25 931*0a6a1f1dSLionel Sambuc vadd.i64 d27,d28 932*0a6a1f1dSLionel Sambuc vbsl d30,d22,d21 @ Maj(a,b,c) 933*0a6a1f1dSLionel Sambuc veor d19,d26 @ Sigma0(a) 934*0a6a1f1dSLionel Sambuc vadd.i64 d23,d27 935*0a6a1f1dSLionel Sambuc vadd.i64 d30,d27 936*0a6a1f1dSLionel Sambuc @ vadd.i64 d19,d30 937*0a6a1f1dSLionel Sambuc vshr.u64 d24,d23,#14 @ 13 938*0a6a1f1dSLionel Sambuc#if 13<16 939*0a6a1f1dSLionel Sambuc vld1.64 {d13},[r1]! @ handles unaligned 940*0a6a1f1dSLionel Sambuc#endif 941*0a6a1f1dSLionel Sambuc vshr.u64 d25,d23,#18 942*0a6a1f1dSLionel Sambuc#if 13>0 943*0a6a1f1dSLionel Sambuc vadd.i64 d19,d30 @ h+=Maj from the past 944*0a6a1f1dSLionel Sambuc#endif 945*0a6a1f1dSLionel Sambuc vshr.u64 d26,d23,#41 946*0a6a1f1dSLionel Sambuc vld1.64 {d28},[r3,:64]! @ K[i++] 947*0a6a1f1dSLionel Sambuc vsli.64 d24,d23,#50 948*0a6a1f1dSLionel Sambuc vsli.64 d25,d23,#46 949*0a6a1f1dSLionel Sambuc vmov d29,d23 950*0a6a1f1dSLionel Sambuc vsli.64 d26,d23,#23 951*0a6a1f1dSLionel Sambuc#if 13<16 && defined(__ARMEL__) 952*0a6a1f1dSLionel Sambuc vrev64.8 d13,d13 953*0a6a1f1dSLionel Sambuc#endif 954*0a6a1f1dSLionel Sambuc veor d25,d24 955*0a6a1f1dSLionel Sambuc vbsl d29,d16,d17 @ Ch(e,f,g) 956*0a6a1f1dSLionel Sambuc vshr.u64 d24,d19,#28 957*0a6a1f1dSLionel Sambuc veor d26,d25 @ Sigma1(e) 958*0a6a1f1dSLionel Sambuc vadd.i64 d27,d29,d18 959*0a6a1f1dSLionel Sambuc vshr.u64 d25,d19,#34 960*0a6a1f1dSLionel Sambuc vsli.64 d24,d19,#36 961*0a6a1f1dSLionel Sambuc vadd.i64 d27,d26 962*0a6a1f1dSLionel Sambuc vshr.u64 d26,d19,#39 963*0a6a1f1dSLionel Sambuc vadd.i64 d28,d13 964*0a6a1f1dSLionel Sambuc vsli.64 d25,d19,#30 965*0a6a1f1dSLionel Sambuc veor d30,d19,d20 966*0a6a1f1dSLionel Sambuc vsli.64 d26,d19,#25 967*0a6a1f1dSLionel Sambuc veor d18,d24,d25 968*0a6a1f1dSLionel Sambuc vadd.i64 d27,d28 969*0a6a1f1dSLionel Sambuc vbsl d30,d21,d20 @ Maj(a,b,c) 970*0a6a1f1dSLionel Sambuc veor d18,d26 @ Sigma0(a) 971*0a6a1f1dSLionel Sambuc vadd.i64 d22,d27 972*0a6a1f1dSLionel Sambuc vadd.i64 d30,d27 973*0a6a1f1dSLionel Sambuc @ vadd.i64 d18,d30 974*0a6a1f1dSLionel Sambuc vshr.u64 d24,d22,#14 @ 14 975*0a6a1f1dSLionel Sambuc#if 14<16 976*0a6a1f1dSLionel Sambuc vld1.64 {d14},[r1]! @ handles unaligned 977*0a6a1f1dSLionel Sambuc#endif 978*0a6a1f1dSLionel Sambuc vshr.u64 d25,d22,#18 979*0a6a1f1dSLionel Sambuc#if 14>0 980*0a6a1f1dSLionel Sambuc vadd.i64 d18,d30 @ h+=Maj from the past 981*0a6a1f1dSLionel Sambuc#endif 982*0a6a1f1dSLionel Sambuc vshr.u64 d26,d22,#41 983*0a6a1f1dSLionel Sambuc vld1.64 {d28},[r3,:64]! @ K[i++] 984*0a6a1f1dSLionel Sambuc vsli.64 d24,d22,#50 985*0a6a1f1dSLionel Sambuc vsli.64 d25,d22,#46 986*0a6a1f1dSLionel Sambuc vmov d29,d22 987*0a6a1f1dSLionel Sambuc vsli.64 d26,d22,#23 988*0a6a1f1dSLionel Sambuc#if 14<16 && defined(__ARMEL__) 989*0a6a1f1dSLionel Sambuc vrev64.8 d14,d14 990*0a6a1f1dSLionel Sambuc#endif 991*0a6a1f1dSLionel Sambuc veor d25,d24 992*0a6a1f1dSLionel Sambuc vbsl d29,d23,d16 @ Ch(e,f,g) 993*0a6a1f1dSLionel Sambuc vshr.u64 d24,d18,#28 994*0a6a1f1dSLionel Sambuc veor d26,d25 @ Sigma1(e) 995*0a6a1f1dSLionel Sambuc vadd.i64 d27,d29,d17 996*0a6a1f1dSLionel Sambuc vshr.u64 d25,d18,#34 997*0a6a1f1dSLionel Sambuc vsli.64 d24,d18,#36 998*0a6a1f1dSLionel Sambuc vadd.i64 d27,d26 999*0a6a1f1dSLionel Sambuc vshr.u64 d26,d18,#39 1000*0a6a1f1dSLionel Sambuc vadd.i64 d28,d14 1001*0a6a1f1dSLionel Sambuc vsli.64 d25,d18,#30 1002*0a6a1f1dSLionel Sambuc veor d30,d18,d19 1003*0a6a1f1dSLionel Sambuc vsli.64 d26,d18,#25 1004*0a6a1f1dSLionel Sambuc veor d17,d24,d25 1005*0a6a1f1dSLionel Sambuc vadd.i64 d27,d28 1006*0a6a1f1dSLionel Sambuc vbsl d30,d20,d19 @ Maj(a,b,c) 1007*0a6a1f1dSLionel Sambuc veor d17,d26 @ Sigma0(a) 1008*0a6a1f1dSLionel Sambuc vadd.i64 d21,d27 1009*0a6a1f1dSLionel Sambuc vadd.i64 d30,d27 1010*0a6a1f1dSLionel Sambuc @ vadd.i64 d17,d30 1011*0a6a1f1dSLionel Sambuc vshr.u64 d24,d21,#14 @ 15 1012*0a6a1f1dSLionel Sambuc#if 15<16 1013*0a6a1f1dSLionel Sambuc vld1.64 {d15},[r1]! @ handles unaligned 1014*0a6a1f1dSLionel Sambuc#endif 1015*0a6a1f1dSLionel Sambuc vshr.u64 d25,d21,#18 1016*0a6a1f1dSLionel Sambuc#if 15>0 1017*0a6a1f1dSLionel Sambuc vadd.i64 d17,d30 @ h+=Maj from the past 1018*0a6a1f1dSLionel Sambuc#endif 1019*0a6a1f1dSLionel Sambuc vshr.u64 d26,d21,#41 1020*0a6a1f1dSLionel Sambuc vld1.64 {d28},[r3,:64]! @ K[i++] 1021*0a6a1f1dSLionel Sambuc vsli.64 d24,d21,#50 1022*0a6a1f1dSLionel Sambuc vsli.64 d25,d21,#46 1023*0a6a1f1dSLionel Sambuc vmov d29,d21 1024*0a6a1f1dSLionel Sambuc vsli.64 d26,d21,#23 1025*0a6a1f1dSLionel Sambuc#if 15<16 && defined(__ARMEL__) 1026*0a6a1f1dSLionel Sambuc vrev64.8 d15,d15 1027*0a6a1f1dSLionel Sambuc#endif 1028*0a6a1f1dSLionel Sambuc veor d25,d24 1029*0a6a1f1dSLionel Sambuc vbsl d29,d22,d23 @ Ch(e,f,g) 1030*0a6a1f1dSLionel Sambuc vshr.u64 d24,d17,#28 1031*0a6a1f1dSLionel Sambuc veor d26,d25 @ Sigma1(e) 1032*0a6a1f1dSLionel Sambuc vadd.i64 d27,d29,d16 1033*0a6a1f1dSLionel Sambuc vshr.u64 d25,d17,#34 1034*0a6a1f1dSLionel Sambuc vsli.64 d24,d17,#36 1035*0a6a1f1dSLionel Sambuc vadd.i64 d27,d26 1036*0a6a1f1dSLionel Sambuc vshr.u64 d26,d17,#39 1037*0a6a1f1dSLionel Sambuc vadd.i64 d28,d15 1038*0a6a1f1dSLionel Sambuc vsli.64 d25,d17,#30 1039*0a6a1f1dSLionel Sambuc veor d30,d17,d18 1040*0a6a1f1dSLionel Sambuc vsli.64 d26,d17,#25 1041*0a6a1f1dSLionel Sambuc veor d16,d24,d25 1042*0a6a1f1dSLionel Sambuc vadd.i64 d27,d28 1043*0a6a1f1dSLionel Sambuc vbsl d30,d19,d18 @ Maj(a,b,c) 1044*0a6a1f1dSLionel Sambuc veor d16,d26 @ Sigma0(a) 1045*0a6a1f1dSLionel Sambuc vadd.i64 d20,d27 1046*0a6a1f1dSLionel Sambuc vadd.i64 d30,d27 1047*0a6a1f1dSLionel Sambuc @ vadd.i64 d16,d30 1048*0a6a1f1dSLionel Sambuc mov r12,#4 1049*0a6a1f1dSLionel Sambuc.L16_79_neon: 1050*0a6a1f1dSLionel Sambuc subs r12,#1 1051*0a6a1f1dSLionel Sambuc vshr.u64 q12,q7,#19 1052*0a6a1f1dSLionel Sambuc vshr.u64 q13,q7,#61 1053*0a6a1f1dSLionel Sambuc vadd.i64 d16,d30 @ h+=Maj from the past 1054*0a6a1f1dSLionel Sambuc vshr.u64 q15,q7,#6 1055*0a6a1f1dSLionel Sambuc vsli.64 q12,q7,#45 1056*0a6a1f1dSLionel Sambuc vext.8 q14,q0,q1,#8 @ X[i+1] 1057*0a6a1f1dSLionel Sambuc vsli.64 q13,q7,#3 1058*0a6a1f1dSLionel Sambuc veor q15,q12 1059*0a6a1f1dSLionel Sambuc vshr.u64 q12,q14,#1 1060*0a6a1f1dSLionel Sambuc veor q15,q13 @ sigma1(X[i+14]) 1061*0a6a1f1dSLionel Sambuc vshr.u64 q13,q14,#8 1062*0a6a1f1dSLionel Sambuc vadd.i64 q0,q15 1063*0a6a1f1dSLionel Sambuc vshr.u64 q15,q14,#7 1064*0a6a1f1dSLionel Sambuc vsli.64 q12,q14,#63 1065*0a6a1f1dSLionel Sambuc vsli.64 q13,q14,#56 1066*0a6a1f1dSLionel Sambuc vext.8 q14,q4,q5,#8 @ X[i+9] 1067*0a6a1f1dSLionel Sambuc veor q15,q12 1068*0a6a1f1dSLionel Sambuc vshr.u64 d24,d20,#14 @ from NEON_00_15 1069*0a6a1f1dSLionel Sambuc vadd.i64 q0,q14 1070*0a6a1f1dSLionel Sambuc vshr.u64 d25,d20,#18 @ from NEON_00_15 1071*0a6a1f1dSLionel Sambuc veor q15,q13 @ sigma0(X[i+1]) 1072*0a6a1f1dSLionel Sambuc vshr.u64 d26,d20,#41 @ from NEON_00_15 1073*0a6a1f1dSLionel Sambuc vadd.i64 q0,q15 1074*0a6a1f1dSLionel Sambuc vld1.64 {d28},[r3,:64]! @ K[i++] 1075*0a6a1f1dSLionel Sambuc vsli.64 d24,d20,#50 1076*0a6a1f1dSLionel Sambuc vsli.64 d25,d20,#46 1077*0a6a1f1dSLionel Sambuc vmov d29,d20 1078*0a6a1f1dSLionel Sambuc vsli.64 d26,d20,#23 1079*0a6a1f1dSLionel Sambuc#if 16<16 && defined(__ARMEL__) 1080*0a6a1f1dSLionel Sambuc vrev64.8 , 1081*0a6a1f1dSLionel Sambuc#endif 1082*0a6a1f1dSLionel Sambuc veor d25,d24 1083*0a6a1f1dSLionel Sambuc vbsl d29,d21,d22 @ Ch(e,f,g) 1084*0a6a1f1dSLionel Sambuc vshr.u64 d24,d16,#28 1085*0a6a1f1dSLionel Sambuc veor d26,d25 @ Sigma1(e) 1086*0a6a1f1dSLionel Sambuc vadd.i64 d27,d29,d23 1087*0a6a1f1dSLionel Sambuc vshr.u64 d25,d16,#34 1088*0a6a1f1dSLionel Sambuc vsli.64 d24,d16,#36 1089*0a6a1f1dSLionel Sambuc vadd.i64 d27,d26 1090*0a6a1f1dSLionel Sambuc vshr.u64 d26,d16,#39 1091*0a6a1f1dSLionel Sambuc vadd.i64 d28,d0 1092*0a6a1f1dSLionel Sambuc vsli.64 d25,d16,#30 1093*0a6a1f1dSLionel Sambuc veor d30,d16,d17 1094*0a6a1f1dSLionel Sambuc vsli.64 d26,d16,#25 1095*0a6a1f1dSLionel Sambuc veor d23,d24,d25 1096*0a6a1f1dSLionel Sambuc vadd.i64 d27,d28 1097*0a6a1f1dSLionel Sambuc vbsl d30,d18,d17 @ Maj(a,b,c) 1098*0a6a1f1dSLionel Sambuc veor d23,d26 @ Sigma0(a) 1099*0a6a1f1dSLionel Sambuc vadd.i64 d19,d27 1100*0a6a1f1dSLionel Sambuc vadd.i64 d30,d27 1101*0a6a1f1dSLionel Sambuc @ vadd.i64 d23,d30 1102*0a6a1f1dSLionel Sambuc vshr.u64 d24,d19,#14 @ 17 1103*0a6a1f1dSLionel Sambuc#if 17<16 1104*0a6a1f1dSLionel Sambuc vld1.64 {d1},[r1]! @ handles unaligned 1105*0a6a1f1dSLionel Sambuc#endif 1106*0a6a1f1dSLionel Sambuc vshr.u64 d25,d19,#18 1107*0a6a1f1dSLionel Sambuc#if 17>0 1108*0a6a1f1dSLionel Sambuc vadd.i64 d23,d30 @ h+=Maj from the past 1109*0a6a1f1dSLionel Sambuc#endif 1110*0a6a1f1dSLionel Sambuc vshr.u64 d26,d19,#41 1111*0a6a1f1dSLionel Sambuc vld1.64 {d28},[r3,:64]! @ K[i++] 1112*0a6a1f1dSLionel Sambuc vsli.64 d24,d19,#50 1113*0a6a1f1dSLionel Sambuc vsli.64 d25,d19,#46 1114*0a6a1f1dSLionel Sambuc vmov d29,d19 1115*0a6a1f1dSLionel Sambuc vsli.64 d26,d19,#23 1116*0a6a1f1dSLionel Sambuc#if 17<16 && defined(__ARMEL__) 1117*0a6a1f1dSLionel Sambuc vrev64.8 , 1118*0a6a1f1dSLionel Sambuc#endif 1119*0a6a1f1dSLionel Sambuc veor d25,d24 1120*0a6a1f1dSLionel Sambuc vbsl d29,d20,d21 @ Ch(e,f,g) 1121*0a6a1f1dSLionel Sambuc vshr.u64 d24,d23,#28 1122*0a6a1f1dSLionel Sambuc veor d26,d25 @ Sigma1(e) 1123*0a6a1f1dSLionel Sambuc vadd.i64 d27,d29,d22 1124*0a6a1f1dSLionel Sambuc vshr.u64 d25,d23,#34 1125*0a6a1f1dSLionel Sambuc vsli.64 d24,d23,#36 1126*0a6a1f1dSLionel Sambuc vadd.i64 d27,d26 1127*0a6a1f1dSLionel Sambuc vshr.u64 d26,d23,#39 1128*0a6a1f1dSLionel Sambuc vadd.i64 d28,d1 1129*0a6a1f1dSLionel Sambuc vsli.64 d25,d23,#30 1130*0a6a1f1dSLionel Sambuc veor d30,d23,d16 1131*0a6a1f1dSLionel Sambuc vsli.64 d26,d23,#25 1132*0a6a1f1dSLionel Sambuc veor d22,d24,d25 1133*0a6a1f1dSLionel Sambuc vadd.i64 d27,d28 1134*0a6a1f1dSLionel Sambuc vbsl d30,d17,d16 @ Maj(a,b,c) 1135*0a6a1f1dSLionel Sambuc veor d22,d26 @ Sigma0(a) 1136*0a6a1f1dSLionel Sambuc vadd.i64 d18,d27 1137*0a6a1f1dSLionel Sambuc vadd.i64 d30,d27 1138*0a6a1f1dSLionel Sambuc @ vadd.i64 d22,d30 1139*0a6a1f1dSLionel Sambuc vshr.u64 q12,q0,#19 1140*0a6a1f1dSLionel Sambuc vshr.u64 q13,q0,#61 1141*0a6a1f1dSLionel Sambuc vadd.i64 d22,d30 @ h+=Maj from the past 1142*0a6a1f1dSLionel Sambuc vshr.u64 q15,q0,#6 1143*0a6a1f1dSLionel Sambuc vsli.64 q12,q0,#45 1144*0a6a1f1dSLionel Sambuc vext.8 q14,q1,q2,#8 @ X[i+1] 1145*0a6a1f1dSLionel Sambuc vsli.64 q13,q0,#3 1146*0a6a1f1dSLionel Sambuc veor q15,q12 1147*0a6a1f1dSLionel Sambuc vshr.u64 q12,q14,#1 1148*0a6a1f1dSLionel Sambuc veor q15,q13 @ sigma1(X[i+14]) 1149*0a6a1f1dSLionel Sambuc vshr.u64 q13,q14,#8 1150*0a6a1f1dSLionel Sambuc vadd.i64 q1,q15 1151*0a6a1f1dSLionel Sambuc vshr.u64 q15,q14,#7 1152*0a6a1f1dSLionel Sambuc vsli.64 q12,q14,#63 1153*0a6a1f1dSLionel Sambuc vsli.64 q13,q14,#56 1154*0a6a1f1dSLionel Sambuc vext.8 q14,q5,q6,#8 @ X[i+9] 1155*0a6a1f1dSLionel Sambuc veor q15,q12 1156*0a6a1f1dSLionel Sambuc vshr.u64 d24,d18,#14 @ from NEON_00_15 1157*0a6a1f1dSLionel Sambuc vadd.i64 q1,q14 1158*0a6a1f1dSLionel Sambuc vshr.u64 d25,d18,#18 @ from NEON_00_15 1159*0a6a1f1dSLionel Sambuc veor q15,q13 @ sigma0(X[i+1]) 1160*0a6a1f1dSLionel Sambuc vshr.u64 d26,d18,#41 @ from NEON_00_15 1161*0a6a1f1dSLionel Sambuc vadd.i64 q1,q15 1162*0a6a1f1dSLionel Sambuc vld1.64 {d28},[r3,:64]! @ K[i++] 1163*0a6a1f1dSLionel Sambuc vsli.64 d24,d18,#50 1164*0a6a1f1dSLionel Sambuc vsli.64 d25,d18,#46 1165*0a6a1f1dSLionel Sambuc vmov d29,d18 1166*0a6a1f1dSLionel Sambuc vsli.64 d26,d18,#23 1167*0a6a1f1dSLionel Sambuc#if 18<16 && defined(__ARMEL__) 1168*0a6a1f1dSLionel Sambuc vrev64.8 , 1169*0a6a1f1dSLionel Sambuc#endif 1170*0a6a1f1dSLionel Sambuc veor d25,d24 1171*0a6a1f1dSLionel Sambuc vbsl d29,d19,d20 @ Ch(e,f,g) 1172*0a6a1f1dSLionel Sambuc vshr.u64 d24,d22,#28 1173*0a6a1f1dSLionel Sambuc veor d26,d25 @ Sigma1(e) 1174*0a6a1f1dSLionel Sambuc vadd.i64 d27,d29,d21 1175*0a6a1f1dSLionel Sambuc vshr.u64 d25,d22,#34 1176*0a6a1f1dSLionel Sambuc vsli.64 d24,d22,#36 1177*0a6a1f1dSLionel Sambuc vadd.i64 d27,d26 1178*0a6a1f1dSLionel Sambuc vshr.u64 d26,d22,#39 1179*0a6a1f1dSLionel Sambuc vadd.i64 d28,d2 1180*0a6a1f1dSLionel Sambuc vsli.64 d25,d22,#30 1181*0a6a1f1dSLionel Sambuc veor d30,d22,d23 1182*0a6a1f1dSLionel Sambuc vsli.64 d26,d22,#25 1183*0a6a1f1dSLionel Sambuc veor d21,d24,d25 1184*0a6a1f1dSLionel Sambuc vadd.i64 d27,d28 1185*0a6a1f1dSLionel Sambuc vbsl d30,d16,d23 @ Maj(a,b,c) 1186*0a6a1f1dSLionel Sambuc veor d21,d26 @ Sigma0(a) 1187*0a6a1f1dSLionel Sambuc vadd.i64 d17,d27 1188*0a6a1f1dSLionel Sambuc vadd.i64 d30,d27 1189*0a6a1f1dSLionel Sambuc @ vadd.i64 d21,d30 1190*0a6a1f1dSLionel Sambuc vshr.u64 d24,d17,#14 @ 19 1191*0a6a1f1dSLionel Sambuc#if 19<16 1192*0a6a1f1dSLionel Sambuc vld1.64 {d3},[r1]! @ handles unaligned 1193*0a6a1f1dSLionel Sambuc#endif 1194*0a6a1f1dSLionel Sambuc vshr.u64 d25,d17,#18 1195*0a6a1f1dSLionel Sambuc#if 19>0 1196*0a6a1f1dSLionel Sambuc vadd.i64 d21,d30 @ h+=Maj from the past 1197*0a6a1f1dSLionel Sambuc#endif 1198*0a6a1f1dSLionel Sambuc vshr.u64 d26,d17,#41 1199*0a6a1f1dSLionel Sambuc vld1.64 {d28},[r3,:64]! @ K[i++] 1200*0a6a1f1dSLionel Sambuc vsli.64 d24,d17,#50 1201*0a6a1f1dSLionel Sambuc vsli.64 d25,d17,#46 1202*0a6a1f1dSLionel Sambuc vmov d29,d17 1203*0a6a1f1dSLionel Sambuc vsli.64 d26,d17,#23 1204*0a6a1f1dSLionel Sambuc#if 19<16 && defined(__ARMEL__) 1205*0a6a1f1dSLionel Sambuc vrev64.8 , 1206*0a6a1f1dSLionel Sambuc#endif 1207*0a6a1f1dSLionel Sambuc veor d25,d24 1208*0a6a1f1dSLionel Sambuc vbsl d29,d18,d19 @ Ch(e,f,g) 1209*0a6a1f1dSLionel Sambuc vshr.u64 d24,d21,#28 1210*0a6a1f1dSLionel Sambuc veor d26,d25 @ Sigma1(e) 1211*0a6a1f1dSLionel Sambuc vadd.i64 d27,d29,d20 1212*0a6a1f1dSLionel Sambuc vshr.u64 d25,d21,#34 1213*0a6a1f1dSLionel Sambuc vsli.64 d24,d21,#36 1214*0a6a1f1dSLionel Sambuc vadd.i64 d27,d26 1215*0a6a1f1dSLionel Sambuc vshr.u64 d26,d21,#39 1216*0a6a1f1dSLionel Sambuc vadd.i64 d28,d3 1217*0a6a1f1dSLionel Sambuc vsli.64 d25,d21,#30 1218*0a6a1f1dSLionel Sambuc veor d30,d21,d22 1219*0a6a1f1dSLionel Sambuc vsli.64 d26,d21,#25 1220*0a6a1f1dSLionel Sambuc veor d20,d24,d25 1221*0a6a1f1dSLionel Sambuc vadd.i64 d27,d28 1222*0a6a1f1dSLionel Sambuc vbsl d30,d23,d22 @ Maj(a,b,c) 1223*0a6a1f1dSLionel Sambuc veor d20,d26 @ Sigma0(a) 1224*0a6a1f1dSLionel Sambuc vadd.i64 d16,d27 1225*0a6a1f1dSLionel Sambuc vadd.i64 d30,d27 1226*0a6a1f1dSLionel Sambuc @ vadd.i64 d20,d30 1227*0a6a1f1dSLionel Sambuc vshr.u64 q12,q1,#19 1228*0a6a1f1dSLionel Sambuc vshr.u64 q13,q1,#61 1229*0a6a1f1dSLionel Sambuc vadd.i64 d20,d30 @ h+=Maj from the past 1230*0a6a1f1dSLionel Sambuc vshr.u64 q15,q1,#6 1231*0a6a1f1dSLionel Sambuc vsli.64 q12,q1,#45 1232*0a6a1f1dSLionel Sambuc vext.8 q14,q2,q3,#8 @ X[i+1] 1233*0a6a1f1dSLionel Sambuc vsli.64 q13,q1,#3 1234*0a6a1f1dSLionel Sambuc veor q15,q12 1235*0a6a1f1dSLionel Sambuc vshr.u64 q12,q14,#1 1236*0a6a1f1dSLionel Sambuc veor q15,q13 @ sigma1(X[i+14]) 1237*0a6a1f1dSLionel Sambuc vshr.u64 q13,q14,#8 1238*0a6a1f1dSLionel Sambuc vadd.i64 q2,q15 1239*0a6a1f1dSLionel Sambuc vshr.u64 q15,q14,#7 1240*0a6a1f1dSLionel Sambuc vsli.64 q12,q14,#63 1241*0a6a1f1dSLionel Sambuc vsli.64 q13,q14,#56 1242*0a6a1f1dSLionel Sambuc vext.8 q14,q6,q7,#8 @ X[i+9] 1243*0a6a1f1dSLionel Sambuc veor q15,q12 1244*0a6a1f1dSLionel Sambuc vshr.u64 d24,d16,#14 @ from NEON_00_15 1245*0a6a1f1dSLionel Sambuc vadd.i64 q2,q14 1246*0a6a1f1dSLionel Sambuc vshr.u64 d25,d16,#18 @ from NEON_00_15 1247*0a6a1f1dSLionel Sambuc veor q15,q13 @ sigma0(X[i+1]) 1248*0a6a1f1dSLionel Sambuc vshr.u64 d26,d16,#41 @ from NEON_00_15 1249*0a6a1f1dSLionel Sambuc vadd.i64 q2,q15 1250*0a6a1f1dSLionel Sambuc vld1.64 {d28},[r3,:64]! @ K[i++] 1251*0a6a1f1dSLionel Sambuc vsli.64 d24,d16,#50 1252*0a6a1f1dSLionel Sambuc vsli.64 d25,d16,#46 1253*0a6a1f1dSLionel Sambuc vmov d29,d16 1254*0a6a1f1dSLionel Sambuc vsli.64 d26,d16,#23 1255*0a6a1f1dSLionel Sambuc#if 20<16 && defined(__ARMEL__) 1256*0a6a1f1dSLionel Sambuc vrev64.8 , 1257*0a6a1f1dSLionel Sambuc#endif 1258*0a6a1f1dSLionel Sambuc veor d25,d24 1259*0a6a1f1dSLionel Sambuc vbsl d29,d17,d18 @ Ch(e,f,g) 1260*0a6a1f1dSLionel Sambuc vshr.u64 d24,d20,#28 1261*0a6a1f1dSLionel Sambuc veor d26,d25 @ Sigma1(e) 1262*0a6a1f1dSLionel Sambuc vadd.i64 d27,d29,d19 1263*0a6a1f1dSLionel Sambuc vshr.u64 d25,d20,#34 1264*0a6a1f1dSLionel Sambuc vsli.64 d24,d20,#36 1265*0a6a1f1dSLionel Sambuc vadd.i64 d27,d26 1266*0a6a1f1dSLionel Sambuc vshr.u64 d26,d20,#39 1267*0a6a1f1dSLionel Sambuc vadd.i64 d28,d4 1268*0a6a1f1dSLionel Sambuc vsli.64 d25,d20,#30 1269*0a6a1f1dSLionel Sambuc veor d30,d20,d21 1270*0a6a1f1dSLionel Sambuc vsli.64 d26,d20,#25 1271*0a6a1f1dSLionel Sambuc veor d19,d24,d25 1272*0a6a1f1dSLionel Sambuc vadd.i64 d27,d28 1273*0a6a1f1dSLionel Sambuc vbsl d30,d22,d21 @ Maj(a,b,c) 1274*0a6a1f1dSLionel Sambuc veor d19,d26 @ Sigma0(a) 1275*0a6a1f1dSLionel Sambuc vadd.i64 d23,d27 1276*0a6a1f1dSLionel Sambuc vadd.i64 d30,d27 1277*0a6a1f1dSLionel Sambuc @ vadd.i64 d19,d30 1278*0a6a1f1dSLionel Sambuc vshr.u64 d24,d23,#14 @ 21 1279*0a6a1f1dSLionel Sambuc#if 21<16 1280*0a6a1f1dSLionel Sambuc vld1.64 {d5},[r1]! @ handles unaligned 1281*0a6a1f1dSLionel Sambuc#endif 1282*0a6a1f1dSLionel Sambuc vshr.u64 d25,d23,#18 1283*0a6a1f1dSLionel Sambuc#if 21>0 1284*0a6a1f1dSLionel Sambuc vadd.i64 d19,d30 @ h+=Maj from the past 1285*0a6a1f1dSLionel Sambuc#endif 1286*0a6a1f1dSLionel Sambuc vshr.u64 d26,d23,#41 1287*0a6a1f1dSLionel Sambuc vld1.64 {d28},[r3,:64]! @ K[i++] 1288*0a6a1f1dSLionel Sambuc vsli.64 d24,d23,#50 1289*0a6a1f1dSLionel Sambuc vsli.64 d25,d23,#46 1290*0a6a1f1dSLionel Sambuc vmov d29,d23 1291*0a6a1f1dSLionel Sambuc vsli.64 d26,d23,#23 1292*0a6a1f1dSLionel Sambuc#if 21<16 && defined(__ARMEL__) 1293*0a6a1f1dSLionel Sambuc vrev64.8 , 1294*0a6a1f1dSLionel Sambuc#endif 1295*0a6a1f1dSLionel Sambuc veor d25,d24 1296*0a6a1f1dSLionel Sambuc vbsl d29,d16,d17 @ Ch(e,f,g) 1297*0a6a1f1dSLionel Sambuc vshr.u64 d24,d19,#28 1298*0a6a1f1dSLionel Sambuc veor d26,d25 @ Sigma1(e) 1299*0a6a1f1dSLionel Sambuc vadd.i64 d27,d29,d18 1300*0a6a1f1dSLionel Sambuc vshr.u64 d25,d19,#34 1301*0a6a1f1dSLionel Sambuc vsli.64 d24,d19,#36 1302*0a6a1f1dSLionel Sambuc vadd.i64 d27,d26 1303*0a6a1f1dSLionel Sambuc vshr.u64 d26,d19,#39 1304*0a6a1f1dSLionel Sambuc vadd.i64 d28,d5 1305*0a6a1f1dSLionel Sambuc vsli.64 d25,d19,#30 1306*0a6a1f1dSLionel Sambuc veor d30,d19,d20 1307*0a6a1f1dSLionel Sambuc vsli.64 d26,d19,#25 1308*0a6a1f1dSLionel Sambuc veor d18,d24,d25 1309*0a6a1f1dSLionel Sambuc vadd.i64 d27,d28 1310*0a6a1f1dSLionel Sambuc vbsl d30,d21,d20 @ Maj(a,b,c) 1311*0a6a1f1dSLionel Sambuc veor d18,d26 @ Sigma0(a) 1312*0a6a1f1dSLionel Sambuc vadd.i64 d22,d27 1313*0a6a1f1dSLionel Sambuc vadd.i64 d30,d27 1314*0a6a1f1dSLionel Sambuc @ vadd.i64 d18,d30 1315*0a6a1f1dSLionel Sambuc vshr.u64 q12,q2,#19 1316*0a6a1f1dSLionel Sambuc vshr.u64 q13,q2,#61 1317*0a6a1f1dSLionel Sambuc vadd.i64 d18,d30 @ h+=Maj from the past 1318*0a6a1f1dSLionel Sambuc vshr.u64 q15,q2,#6 1319*0a6a1f1dSLionel Sambuc vsli.64 q12,q2,#45 1320*0a6a1f1dSLionel Sambuc vext.8 q14,q3,q4,#8 @ X[i+1] 1321*0a6a1f1dSLionel Sambuc vsli.64 q13,q2,#3 1322*0a6a1f1dSLionel Sambuc veor q15,q12 1323*0a6a1f1dSLionel Sambuc vshr.u64 q12,q14,#1 1324*0a6a1f1dSLionel Sambuc veor q15,q13 @ sigma1(X[i+14]) 1325*0a6a1f1dSLionel Sambuc vshr.u64 q13,q14,#8 1326*0a6a1f1dSLionel Sambuc vadd.i64 q3,q15 1327*0a6a1f1dSLionel Sambuc vshr.u64 q15,q14,#7 1328*0a6a1f1dSLionel Sambuc vsli.64 q12,q14,#63 1329*0a6a1f1dSLionel Sambuc vsli.64 q13,q14,#56 1330*0a6a1f1dSLionel Sambuc vext.8 q14,q7,q0,#8 @ X[i+9] 1331*0a6a1f1dSLionel Sambuc veor q15,q12 1332*0a6a1f1dSLionel Sambuc vshr.u64 d24,d22,#14 @ from NEON_00_15 1333*0a6a1f1dSLionel Sambuc vadd.i64 q3,q14 1334*0a6a1f1dSLionel Sambuc vshr.u64 d25,d22,#18 @ from NEON_00_15 1335*0a6a1f1dSLionel Sambuc veor q15,q13 @ sigma0(X[i+1]) 1336*0a6a1f1dSLionel Sambuc vshr.u64 d26,d22,#41 @ from NEON_00_15 1337*0a6a1f1dSLionel Sambuc vadd.i64 q3,q15 1338*0a6a1f1dSLionel Sambuc vld1.64 {d28},[r3,:64]! @ K[i++] 1339*0a6a1f1dSLionel Sambuc vsli.64 d24,d22,#50 1340*0a6a1f1dSLionel Sambuc vsli.64 d25,d22,#46 1341*0a6a1f1dSLionel Sambuc vmov d29,d22 1342*0a6a1f1dSLionel Sambuc vsli.64 d26,d22,#23 1343*0a6a1f1dSLionel Sambuc#if 22<16 && defined(__ARMEL__) 1344*0a6a1f1dSLionel Sambuc vrev64.8 , 1345*0a6a1f1dSLionel Sambuc#endif 1346*0a6a1f1dSLionel Sambuc veor d25,d24 1347*0a6a1f1dSLionel Sambuc vbsl d29,d23,d16 @ Ch(e,f,g) 1348*0a6a1f1dSLionel Sambuc vshr.u64 d24,d18,#28 1349*0a6a1f1dSLionel Sambuc veor d26,d25 @ Sigma1(e) 1350*0a6a1f1dSLionel Sambuc vadd.i64 d27,d29,d17 1351*0a6a1f1dSLionel Sambuc vshr.u64 d25,d18,#34 1352*0a6a1f1dSLionel Sambuc vsli.64 d24,d18,#36 1353*0a6a1f1dSLionel Sambuc vadd.i64 d27,d26 1354*0a6a1f1dSLionel Sambuc vshr.u64 d26,d18,#39 1355*0a6a1f1dSLionel Sambuc vadd.i64 d28,d6 1356*0a6a1f1dSLionel Sambuc vsli.64 d25,d18,#30 1357*0a6a1f1dSLionel Sambuc veor d30,d18,d19 1358*0a6a1f1dSLionel Sambuc vsli.64 d26,d18,#25 1359*0a6a1f1dSLionel Sambuc veor d17,d24,d25 1360*0a6a1f1dSLionel Sambuc vadd.i64 d27,d28 1361*0a6a1f1dSLionel Sambuc vbsl d30,d20,d19 @ Maj(a,b,c) 1362*0a6a1f1dSLionel Sambuc veor d17,d26 @ Sigma0(a) 1363*0a6a1f1dSLionel Sambuc vadd.i64 d21,d27 1364*0a6a1f1dSLionel Sambuc vadd.i64 d30,d27 1365*0a6a1f1dSLionel Sambuc @ vadd.i64 d17,d30 1366*0a6a1f1dSLionel Sambuc vshr.u64 d24,d21,#14 @ 23 1367*0a6a1f1dSLionel Sambuc#if 23<16 1368*0a6a1f1dSLionel Sambuc vld1.64 {d7},[r1]! @ handles unaligned 1369*0a6a1f1dSLionel Sambuc#endif 1370*0a6a1f1dSLionel Sambuc vshr.u64 d25,d21,#18 1371*0a6a1f1dSLionel Sambuc#if 23>0 1372*0a6a1f1dSLionel Sambuc vadd.i64 d17,d30 @ h+=Maj from the past 1373*0a6a1f1dSLionel Sambuc#endif 1374*0a6a1f1dSLionel Sambuc vshr.u64 d26,d21,#41 1375*0a6a1f1dSLionel Sambuc vld1.64 {d28},[r3,:64]! @ K[i++] 1376*0a6a1f1dSLionel Sambuc vsli.64 d24,d21,#50 1377*0a6a1f1dSLionel Sambuc vsli.64 d25,d21,#46 1378*0a6a1f1dSLionel Sambuc vmov d29,d21 1379*0a6a1f1dSLionel Sambuc vsli.64 d26,d21,#23 1380*0a6a1f1dSLionel Sambuc#if 23<16 && defined(__ARMEL__) 1381*0a6a1f1dSLionel Sambuc vrev64.8 , 1382*0a6a1f1dSLionel Sambuc#endif 1383*0a6a1f1dSLionel Sambuc veor d25,d24 1384*0a6a1f1dSLionel Sambuc vbsl d29,d22,d23 @ Ch(e,f,g) 1385*0a6a1f1dSLionel Sambuc vshr.u64 d24,d17,#28 1386*0a6a1f1dSLionel Sambuc veor d26,d25 @ Sigma1(e) 1387*0a6a1f1dSLionel Sambuc vadd.i64 d27,d29,d16 1388*0a6a1f1dSLionel Sambuc vshr.u64 d25,d17,#34 1389*0a6a1f1dSLionel Sambuc vsli.64 d24,d17,#36 1390*0a6a1f1dSLionel Sambuc vadd.i64 d27,d26 1391*0a6a1f1dSLionel Sambuc vshr.u64 d26,d17,#39 1392*0a6a1f1dSLionel Sambuc vadd.i64 d28,d7 1393*0a6a1f1dSLionel Sambuc vsli.64 d25,d17,#30 1394*0a6a1f1dSLionel Sambuc veor d30,d17,d18 1395*0a6a1f1dSLionel Sambuc vsli.64 d26,d17,#25 1396*0a6a1f1dSLionel Sambuc veor d16,d24,d25 1397*0a6a1f1dSLionel Sambuc vadd.i64 d27,d28 1398*0a6a1f1dSLionel Sambuc vbsl d30,d19,d18 @ Maj(a,b,c) 1399*0a6a1f1dSLionel Sambuc veor d16,d26 @ Sigma0(a) 1400*0a6a1f1dSLionel Sambuc vadd.i64 d20,d27 1401*0a6a1f1dSLionel Sambuc vadd.i64 d30,d27 1402*0a6a1f1dSLionel Sambuc @ vadd.i64 d16,d30 1403*0a6a1f1dSLionel Sambuc vshr.u64 q12,q3,#19 1404*0a6a1f1dSLionel Sambuc vshr.u64 q13,q3,#61 1405*0a6a1f1dSLionel Sambuc vadd.i64 d16,d30 @ h+=Maj from the past 1406*0a6a1f1dSLionel Sambuc vshr.u64 q15,q3,#6 1407*0a6a1f1dSLionel Sambuc vsli.64 q12,q3,#45 1408*0a6a1f1dSLionel Sambuc vext.8 q14,q4,q5,#8 @ X[i+1] 1409*0a6a1f1dSLionel Sambuc vsli.64 q13,q3,#3 1410*0a6a1f1dSLionel Sambuc veor q15,q12 1411*0a6a1f1dSLionel Sambuc vshr.u64 q12,q14,#1 1412*0a6a1f1dSLionel Sambuc veor q15,q13 @ sigma1(X[i+14]) 1413*0a6a1f1dSLionel Sambuc vshr.u64 q13,q14,#8 1414*0a6a1f1dSLionel Sambuc vadd.i64 q4,q15 1415*0a6a1f1dSLionel Sambuc vshr.u64 q15,q14,#7 1416*0a6a1f1dSLionel Sambuc vsli.64 q12,q14,#63 1417*0a6a1f1dSLionel Sambuc vsli.64 q13,q14,#56 1418*0a6a1f1dSLionel Sambuc vext.8 q14,q0,q1,#8 @ X[i+9] 1419*0a6a1f1dSLionel Sambuc veor q15,q12 1420*0a6a1f1dSLionel Sambuc vshr.u64 d24,d20,#14 @ from NEON_00_15 1421*0a6a1f1dSLionel Sambuc vadd.i64 q4,q14 1422*0a6a1f1dSLionel Sambuc vshr.u64 d25,d20,#18 @ from NEON_00_15 1423*0a6a1f1dSLionel Sambuc veor q15,q13 @ sigma0(X[i+1]) 1424*0a6a1f1dSLionel Sambuc vshr.u64 d26,d20,#41 @ from NEON_00_15 1425*0a6a1f1dSLionel Sambuc vadd.i64 q4,q15 1426*0a6a1f1dSLionel Sambuc vld1.64 {d28},[r3,:64]! @ K[i++] 1427*0a6a1f1dSLionel Sambuc vsli.64 d24,d20,#50 1428*0a6a1f1dSLionel Sambuc vsli.64 d25,d20,#46 1429*0a6a1f1dSLionel Sambuc vmov d29,d20 1430*0a6a1f1dSLionel Sambuc vsli.64 d26,d20,#23 1431*0a6a1f1dSLionel Sambuc#if 24<16 && defined(__ARMEL__) 1432*0a6a1f1dSLionel Sambuc vrev64.8 , 1433*0a6a1f1dSLionel Sambuc#endif 1434*0a6a1f1dSLionel Sambuc veor d25,d24 1435*0a6a1f1dSLionel Sambuc vbsl d29,d21,d22 @ Ch(e,f,g) 1436*0a6a1f1dSLionel Sambuc vshr.u64 d24,d16,#28 1437*0a6a1f1dSLionel Sambuc veor d26,d25 @ Sigma1(e) 1438*0a6a1f1dSLionel Sambuc vadd.i64 d27,d29,d23 1439*0a6a1f1dSLionel Sambuc vshr.u64 d25,d16,#34 1440*0a6a1f1dSLionel Sambuc vsli.64 d24,d16,#36 1441*0a6a1f1dSLionel Sambuc vadd.i64 d27,d26 1442*0a6a1f1dSLionel Sambuc vshr.u64 d26,d16,#39 1443*0a6a1f1dSLionel Sambuc vadd.i64 d28,d8 1444*0a6a1f1dSLionel Sambuc vsli.64 d25,d16,#30 1445*0a6a1f1dSLionel Sambuc veor d30,d16,d17 1446*0a6a1f1dSLionel Sambuc vsli.64 d26,d16,#25 1447*0a6a1f1dSLionel Sambuc veor d23,d24,d25 1448*0a6a1f1dSLionel Sambuc vadd.i64 d27,d28 1449*0a6a1f1dSLionel Sambuc vbsl d30,d18,d17 @ Maj(a,b,c) 1450*0a6a1f1dSLionel Sambuc veor d23,d26 @ Sigma0(a) 1451*0a6a1f1dSLionel Sambuc vadd.i64 d19,d27 1452*0a6a1f1dSLionel Sambuc vadd.i64 d30,d27 1453*0a6a1f1dSLionel Sambuc @ vadd.i64 d23,d30 1454*0a6a1f1dSLionel Sambuc vshr.u64 d24,d19,#14 @ 25 1455*0a6a1f1dSLionel Sambuc#if 25<16 1456*0a6a1f1dSLionel Sambuc vld1.64 {d9},[r1]! @ handles unaligned 1457*0a6a1f1dSLionel Sambuc#endif 1458*0a6a1f1dSLionel Sambuc vshr.u64 d25,d19,#18 1459*0a6a1f1dSLionel Sambuc#if 25>0 1460*0a6a1f1dSLionel Sambuc vadd.i64 d23,d30 @ h+=Maj from the past 1461*0a6a1f1dSLionel Sambuc#endif 1462*0a6a1f1dSLionel Sambuc vshr.u64 d26,d19,#41 1463*0a6a1f1dSLionel Sambuc vld1.64 {d28},[r3,:64]! @ K[i++] 1464*0a6a1f1dSLionel Sambuc vsli.64 d24,d19,#50 1465*0a6a1f1dSLionel Sambuc vsli.64 d25,d19,#46 1466*0a6a1f1dSLionel Sambuc vmov d29,d19 1467*0a6a1f1dSLionel Sambuc vsli.64 d26,d19,#23 1468*0a6a1f1dSLionel Sambuc#if 25<16 && defined(__ARMEL__) 1469*0a6a1f1dSLionel Sambuc vrev64.8 , 1470*0a6a1f1dSLionel Sambuc#endif 1471*0a6a1f1dSLionel Sambuc veor d25,d24 1472*0a6a1f1dSLionel Sambuc vbsl d29,d20,d21 @ Ch(e,f,g) 1473*0a6a1f1dSLionel Sambuc vshr.u64 d24,d23,#28 1474*0a6a1f1dSLionel Sambuc veor d26,d25 @ Sigma1(e) 1475*0a6a1f1dSLionel Sambuc vadd.i64 d27,d29,d22 1476*0a6a1f1dSLionel Sambuc vshr.u64 d25,d23,#34 1477*0a6a1f1dSLionel Sambuc vsli.64 d24,d23,#36 1478*0a6a1f1dSLionel Sambuc vadd.i64 d27,d26 1479*0a6a1f1dSLionel Sambuc vshr.u64 d26,d23,#39 1480*0a6a1f1dSLionel Sambuc vadd.i64 d28,d9 1481*0a6a1f1dSLionel Sambuc vsli.64 d25,d23,#30 1482*0a6a1f1dSLionel Sambuc veor d30,d23,d16 1483*0a6a1f1dSLionel Sambuc vsli.64 d26,d23,#25 1484*0a6a1f1dSLionel Sambuc veor d22,d24,d25 1485*0a6a1f1dSLionel Sambuc vadd.i64 d27,d28 1486*0a6a1f1dSLionel Sambuc vbsl d30,d17,d16 @ Maj(a,b,c) 1487*0a6a1f1dSLionel Sambuc veor d22,d26 @ Sigma0(a) 1488*0a6a1f1dSLionel Sambuc vadd.i64 d18,d27 1489*0a6a1f1dSLionel Sambuc vadd.i64 d30,d27 1490*0a6a1f1dSLionel Sambuc @ vadd.i64 d22,d30 1491*0a6a1f1dSLionel Sambuc vshr.u64 q12,q4,#19 1492*0a6a1f1dSLionel Sambuc vshr.u64 q13,q4,#61 1493*0a6a1f1dSLionel Sambuc vadd.i64 d22,d30 @ h+=Maj from the past 1494*0a6a1f1dSLionel Sambuc vshr.u64 q15,q4,#6 1495*0a6a1f1dSLionel Sambuc vsli.64 q12,q4,#45 1496*0a6a1f1dSLionel Sambuc vext.8 q14,q5,q6,#8 @ X[i+1] 1497*0a6a1f1dSLionel Sambuc vsli.64 q13,q4,#3 1498*0a6a1f1dSLionel Sambuc veor q15,q12 1499*0a6a1f1dSLionel Sambuc vshr.u64 q12,q14,#1 1500*0a6a1f1dSLionel Sambuc veor q15,q13 @ sigma1(X[i+14]) 1501*0a6a1f1dSLionel Sambuc vshr.u64 q13,q14,#8 1502*0a6a1f1dSLionel Sambuc vadd.i64 q5,q15 1503*0a6a1f1dSLionel Sambuc vshr.u64 q15,q14,#7 1504*0a6a1f1dSLionel Sambuc vsli.64 q12,q14,#63 1505*0a6a1f1dSLionel Sambuc vsli.64 q13,q14,#56 1506*0a6a1f1dSLionel Sambuc vext.8 q14,q1,q2,#8 @ X[i+9] 1507*0a6a1f1dSLionel Sambuc veor q15,q12 1508*0a6a1f1dSLionel Sambuc vshr.u64 d24,d18,#14 @ from NEON_00_15 1509*0a6a1f1dSLionel Sambuc vadd.i64 q5,q14 1510*0a6a1f1dSLionel Sambuc vshr.u64 d25,d18,#18 @ from NEON_00_15 1511*0a6a1f1dSLionel Sambuc veor q15,q13 @ sigma0(X[i+1]) 1512*0a6a1f1dSLionel Sambuc vshr.u64 d26,d18,#41 @ from NEON_00_15 1513*0a6a1f1dSLionel Sambuc vadd.i64 q5,q15 1514*0a6a1f1dSLionel Sambuc vld1.64 {d28},[r3,:64]! @ K[i++] 1515*0a6a1f1dSLionel Sambuc vsli.64 d24,d18,#50 1516*0a6a1f1dSLionel Sambuc vsli.64 d25,d18,#46 1517*0a6a1f1dSLionel Sambuc vmov d29,d18 1518*0a6a1f1dSLionel Sambuc vsli.64 d26,d18,#23 1519*0a6a1f1dSLionel Sambuc#if 26<16 && defined(__ARMEL__) 1520*0a6a1f1dSLionel Sambuc vrev64.8 , 1521*0a6a1f1dSLionel Sambuc#endif 1522*0a6a1f1dSLionel Sambuc veor d25,d24 1523*0a6a1f1dSLionel Sambuc vbsl d29,d19,d20 @ Ch(e,f,g) 1524*0a6a1f1dSLionel Sambuc vshr.u64 d24,d22,#28 1525*0a6a1f1dSLionel Sambuc veor d26,d25 @ Sigma1(e) 1526*0a6a1f1dSLionel Sambuc vadd.i64 d27,d29,d21 1527*0a6a1f1dSLionel Sambuc vshr.u64 d25,d22,#34 1528*0a6a1f1dSLionel Sambuc vsli.64 d24,d22,#36 1529*0a6a1f1dSLionel Sambuc vadd.i64 d27,d26 1530*0a6a1f1dSLionel Sambuc vshr.u64 d26,d22,#39 1531*0a6a1f1dSLionel Sambuc vadd.i64 d28,d10 1532*0a6a1f1dSLionel Sambuc vsli.64 d25,d22,#30 1533*0a6a1f1dSLionel Sambuc veor d30,d22,d23 1534*0a6a1f1dSLionel Sambuc vsli.64 d26,d22,#25 1535*0a6a1f1dSLionel Sambuc veor d21,d24,d25 1536*0a6a1f1dSLionel Sambuc vadd.i64 d27,d28 1537*0a6a1f1dSLionel Sambuc vbsl d30,d16,d23 @ Maj(a,b,c) 1538*0a6a1f1dSLionel Sambuc veor d21,d26 @ Sigma0(a) 1539*0a6a1f1dSLionel Sambuc vadd.i64 d17,d27 1540*0a6a1f1dSLionel Sambuc vadd.i64 d30,d27 1541*0a6a1f1dSLionel Sambuc @ vadd.i64 d21,d30 1542*0a6a1f1dSLionel Sambuc vshr.u64 d24,d17,#14 @ 27 1543*0a6a1f1dSLionel Sambuc#if 27<16 1544*0a6a1f1dSLionel Sambuc vld1.64 {d11},[r1]! @ handles unaligned 1545*0a6a1f1dSLionel Sambuc#endif 1546*0a6a1f1dSLionel Sambuc vshr.u64 d25,d17,#18 1547*0a6a1f1dSLionel Sambuc#if 27>0 1548*0a6a1f1dSLionel Sambuc vadd.i64 d21,d30 @ h+=Maj from the past 1549*0a6a1f1dSLionel Sambuc#endif 1550*0a6a1f1dSLionel Sambuc vshr.u64 d26,d17,#41 1551*0a6a1f1dSLionel Sambuc vld1.64 {d28},[r3,:64]! @ K[i++] 1552*0a6a1f1dSLionel Sambuc vsli.64 d24,d17,#50 1553*0a6a1f1dSLionel Sambuc vsli.64 d25,d17,#46 1554*0a6a1f1dSLionel Sambuc vmov d29,d17 1555*0a6a1f1dSLionel Sambuc vsli.64 d26,d17,#23 1556*0a6a1f1dSLionel Sambuc#if 27<16 && defined(__ARMEL__) 1557*0a6a1f1dSLionel Sambuc vrev64.8 , 1558*0a6a1f1dSLionel Sambuc#endif 1559*0a6a1f1dSLionel Sambuc veor d25,d24 1560*0a6a1f1dSLionel Sambuc vbsl d29,d18,d19 @ Ch(e,f,g) 1561*0a6a1f1dSLionel Sambuc vshr.u64 d24,d21,#28 1562*0a6a1f1dSLionel Sambuc veor d26,d25 @ Sigma1(e) 1563*0a6a1f1dSLionel Sambuc vadd.i64 d27,d29,d20 1564*0a6a1f1dSLionel Sambuc vshr.u64 d25,d21,#34 1565*0a6a1f1dSLionel Sambuc vsli.64 d24,d21,#36 1566*0a6a1f1dSLionel Sambuc vadd.i64 d27,d26 1567*0a6a1f1dSLionel Sambuc vshr.u64 d26,d21,#39 1568*0a6a1f1dSLionel Sambuc vadd.i64 d28,d11 1569*0a6a1f1dSLionel Sambuc vsli.64 d25,d21,#30 1570*0a6a1f1dSLionel Sambuc veor d30,d21,d22 1571*0a6a1f1dSLionel Sambuc vsli.64 d26,d21,#25 1572*0a6a1f1dSLionel Sambuc veor d20,d24,d25 1573*0a6a1f1dSLionel Sambuc vadd.i64 d27,d28 1574*0a6a1f1dSLionel Sambuc vbsl d30,d23,d22 @ Maj(a,b,c) 1575*0a6a1f1dSLionel Sambuc veor d20,d26 @ Sigma0(a) 1576*0a6a1f1dSLionel Sambuc vadd.i64 d16,d27 1577*0a6a1f1dSLionel Sambuc vadd.i64 d30,d27 1578*0a6a1f1dSLionel Sambuc @ vadd.i64 d20,d30 1579*0a6a1f1dSLionel Sambuc vshr.u64 q12,q5,#19 1580*0a6a1f1dSLionel Sambuc vshr.u64 q13,q5,#61 1581*0a6a1f1dSLionel Sambuc vadd.i64 d20,d30 @ h+=Maj from the past 1582*0a6a1f1dSLionel Sambuc vshr.u64 q15,q5,#6 1583*0a6a1f1dSLionel Sambuc vsli.64 q12,q5,#45 1584*0a6a1f1dSLionel Sambuc vext.8 q14,q6,q7,#8 @ X[i+1] 1585*0a6a1f1dSLionel Sambuc vsli.64 q13,q5,#3 1586*0a6a1f1dSLionel Sambuc veor q15,q12 1587*0a6a1f1dSLionel Sambuc vshr.u64 q12,q14,#1 1588*0a6a1f1dSLionel Sambuc veor q15,q13 @ sigma1(X[i+14]) 1589*0a6a1f1dSLionel Sambuc vshr.u64 q13,q14,#8 1590*0a6a1f1dSLionel Sambuc vadd.i64 q6,q15 1591*0a6a1f1dSLionel Sambuc vshr.u64 q15,q14,#7 1592*0a6a1f1dSLionel Sambuc vsli.64 q12,q14,#63 1593*0a6a1f1dSLionel Sambuc vsli.64 q13,q14,#56 1594*0a6a1f1dSLionel Sambuc vext.8 q14,q2,q3,#8 @ X[i+9] 1595*0a6a1f1dSLionel Sambuc veor q15,q12 1596*0a6a1f1dSLionel Sambuc vshr.u64 d24,d16,#14 @ from NEON_00_15 1597*0a6a1f1dSLionel Sambuc vadd.i64 q6,q14 1598*0a6a1f1dSLionel Sambuc vshr.u64 d25,d16,#18 @ from NEON_00_15 1599*0a6a1f1dSLionel Sambuc veor q15,q13 @ sigma0(X[i+1]) 1600*0a6a1f1dSLionel Sambuc vshr.u64 d26,d16,#41 @ from NEON_00_15 1601*0a6a1f1dSLionel Sambuc vadd.i64 q6,q15 1602*0a6a1f1dSLionel Sambuc vld1.64 {d28},[r3,:64]! @ K[i++] 1603*0a6a1f1dSLionel Sambuc vsli.64 d24,d16,#50 1604*0a6a1f1dSLionel Sambuc vsli.64 d25,d16,#46 1605*0a6a1f1dSLionel Sambuc vmov d29,d16 1606*0a6a1f1dSLionel Sambuc vsli.64 d26,d16,#23 1607*0a6a1f1dSLionel Sambuc#if 28<16 && defined(__ARMEL__) 1608*0a6a1f1dSLionel Sambuc vrev64.8 , 1609*0a6a1f1dSLionel Sambuc#endif 1610*0a6a1f1dSLionel Sambuc veor d25,d24 1611*0a6a1f1dSLionel Sambuc vbsl d29,d17,d18 @ Ch(e,f,g) 1612*0a6a1f1dSLionel Sambuc vshr.u64 d24,d20,#28 1613*0a6a1f1dSLionel Sambuc veor d26,d25 @ Sigma1(e) 1614*0a6a1f1dSLionel Sambuc vadd.i64 d27,d29,d19 1615*0a6a1f1dSLionel Sambuc vshr.u64 d25,d20,#34 1616*0a6a1f1dSLionel Sambuc vsli.64 d24,d20,#36 1617*0a6a1f1dSLionel Sambuc vadd.i64 d27,d26 1618*0a6a1f1dSLionel Sambuc vshr.u64 d26,d20,#39 1619*0a6a1f1dSLionel Sambuc vadd.i64 d28,d12 1620*0a6a1f1dSLionel Sambuc vsli.64 d25,d20,#30 1621*0a6a1f1dSLionel Sambuc veor d30,d20,d21 1622*0a6a1f1dSLionel Sambuc vsli.64 d26,d20,#25 1623*0a6a1f1dSLionel Sambuc veor d19,d24,d25 1624*0a6a1f1dSLionel Sambuc vadd.i64 d27,d28 1625*0a6a1f1dSLionel Sambuc vbsl d30,d22,d21 @ Maj(a,b,c) 1626*0a6a1f1dSLionel Sambuc veor d19,d26 @ Sigma0(a) 1627*0a6a1f1dSLionel Sambuc vadd.i64 d23,d27 1628*0a6a1f1dSLionel Sambuc vadd.i64 d30,d27 1629*0a6a1f1dSLionel Sambuc @ vadd.i64 d19,d30 1630*0a6a1f1dSLionel Sambuc vshr.u64 d24,d23,#14 @ 29 1631*0a6a1f1dSLionel Sambuc#if 29<16 1632*0a6a1f1dSLionel Sambuc vld1.64 {d13},[r1]! @ handles unaligned 1633*0a6a1f1dSLionel Sambuc#endif 1634*0a6a1f1dSLionel Sambuc vshr.u64 d25,d23,#18 1635*0a6a1f1dSLionel Sambuc#if 29>0 1636*0a6a1f1dSLionel Sambuc vadd.i64 d19,d30 @ h+=Maj from the past 1637*0a6a1f1dSLionel Sambuc#endif 1638*0a6a1f1dSLionel Sambuc vshr.u64 d26,d23,#41 1639*0a6a1f1dSLionel Sambuc vld1.64 {d28},[r3,:64]! @ K[i++] 1640*0a6a1f1dSLionel Sambuc vsli.64 d24,d23,#50 1641*0a6a1f1dSLionel Sambuc vsli.64 d25,d23,#46 1642*0a6a1f1dSLionel Sambuc vmov d29,d23 1643*0a6a1f1dSLionel Sambuc vsli.64 d26,d23,#23 1644*0a6a1f1dSLionel Sambuc#if 29<16 && defined(__ARMEL__) 1645*0a6a1f1dSLionel Sambuc vrev64.8 , 1646*0a6a1f1dSLionel Sambuc#endif 1647*0a6a1f1dSLionel Sambuc veor d25,d24 1648*0a6a1f1dSLionel Sambuc vbsl d29,d16,d17 @ Ch(e,f,g) 1649*0a6a1f1dSLionel Sambuc vshr.u64 d24,d19,#28 1650*0a6a1f1dSLionel Sambuc veor d26,d25 @ Sigma1(e) 1651*0a6a1f1dSLionel Sambuc vadd.i64 d27,d29,d18 1652*0a6a1f1dSLionel Sambuc vshr.u64 d25,d19,#34 1653*0a6a1f1dSLionel Sambuc vsli.64 d24,d19,#36 1654*0a6a1f1dSLionel Sambuc vadd.i64 d27,d26 1655*0a6a1f1dSLionel Sambuc vshr.u64 d26,d19,#39 1656*0a6a1f1dSLionel Sambuc vadd.i64 d28,d13 1657*0a6a1f1dSLionel Sambuc vsli.64 d25,d19,#30 1658*0a6a1f1dSLionel Sambuc veor d30,d19,d20 1659*0a6a1f1dSLionel Sambuc vsli.64 d26,d19,#25 1660*0a6a1f1dSLionel Sambuc veor d18,d24,d25 1661*0a6a1f1dSLionel Sambuc vadd.i64 d27,d28 1662*0a6a1f1dSLionel Sambuc vbsl d30,d21,d20 @ Maj(a,b,c) 1663*0a6a1f1dSLionel Sambuc veor d18,d26 @ Sigma0(a) 1664*0a6a1f1dSLionel Sambuc vadd.i64 d22,d27 1665*0a6a1f1dSLionel Sambuc vadd.i64 d30,d27 1666*0a6a1f1dSLionel Sambuc @ vadd.i64 d18,d30 1667*0a6a1f1dSLionel Sambuc vshr.u64 q12,q6,#19 1668*0a6a1f1dSLionel Sambuc vshr.u64 q13,q6,#61 1669*0a6a1f1dSLionel Sambuc vadd.i64 d18,d30 @ h+=Maj from the past 1670*0a6a1f1dSLionel Sambuc vshr.u64 q15,q6,#6 1671*0a6a1f1dSLionel Sambuc vsli.64 q12,q6,#45 1672*0a6a1f1dSLionel Sambuc vext.8 q14,q7,q0,#8 @ X[i+1] 1673*0a6a1f1dSLionel Sambuc vsli.64 q13,q6,#3 1674*0a6a1f1dSLionel Sambuc veor q15,q12 1675*0a6a1f1dSLionel Sambuc vshr.u64 q12,q14,#1 1676*0a6a1f1dSLionel Sambuc veor q15,q13 @ sigma1(X[i+14]) 1677*0a6a1f1dSLionel Sambuc vshr.u64 q13,q14,#8 1678*0a6a1f1dSLionel Sambuc vadd.i64 q7,q15 1679*0a6a1f1dSLionel Sambuc vshr.u64 q15,q14,#7 1680*0a6a1f1dSLionel Sambuc vsli.64 q12,q14,#63 1681*0a6a1f1dSLionel Sambuc vsli.64 q13,q14,#56 1682*0a6a1f1dSLionel Sambuc vext.8 q14,q3,q4,#8 @ X[i+9] 1683*0a6a1f1dSLionel Sambuc veor q15,q12 1684*0a6a1f1dSLionel Sambuc vshr.u64 d24,d22,#14 @ from NEON_00_15 1685*0a6a1f1dSLionel Sambuc vadd.i64 q7,q14 1686*0a6a1f1dSLionel Sambuc vshr.u64 d25,d22,#18 @ from NEON_00_15 1687*0a6a1f1dSLionel Sambuc veor q15,q13 @ sigma0(X[i+1]) 1688*0a6a1f1dSLionel Sambuc vshr.u64 d26,d22,#41 @ from NEON_00_15 1689*0a6a1f1dSLionel Sambuc vadd.i64 q7,q15 1690*0a6a1f1dSLionel Sambuc vld1.64 {d28},[r3,:64]! @ K[i++] 1691*0a6a1f1dSLionel Sambuc vsli.64 d24,d22,#50 1692*0a6a1f1dSLionel Sambuc vsli.64 d25,d22,#46 1693*0a6a1f1dSLionel Sambuc vmov d29,d22 1694*0a6a1f1dSLionel Sambuc vsli.64 d26,d22,#23 1695*0a6a1f1dSLionel Sambuc#if 30<16 && defined(__ARMEL__) 1696*0a6a1f1dSLionel Sambuc vrev64.8 , 1697*0a6a1f1dSLionel Sambuc#endif 1698*0a6a1f1dSLionel Sambuc veor d25,d24 1699*0a6a1f1dSLionel Sambuc vbsl d29,d23,d16 @ Ch(e,f,g) 1700*0a6a1f1dSLionel Sambuc vshr.u64 d24,d18,#28 1701*0a6a1f1dSLionel Sambuc veor d26,d25 @ Sigma1(e) 1702*0a6a1f1dSLionel Sambuc vadd.i64 d27,d29,d17 1703*0a6a1f1dSLionel Sambuc vshr.u64 d25,d18,#34 1704*0a6a1f1dSLionel Sambuc vsli.64 d24,d18,#36 1705*0a6a1f1dSLionel Sambuc vadd.i64 d27,d26 1706*0a6a1f1dSLionel Sambuc vshr.u64 d26,d18,#39 1707*0a6a1f1dSLionel Sambuc vadd.i64 d28,d14 1708*0a6a1f1dSLionel Sambuc vsli.64 d25,d18,#30 1709*0a6a1f1dSLionel Sambuc veor d30,d18,d19 1710*0a6a1f1dSLionel Sambuc vsli.64 d26,d18,#25 1711*0a6a1f1dSLionel Sambuc veor d17,d24,d25 1712*0a6a1f1dSLionel Sambuc vadd.i64 d27,d28 1713*0a6a1f1dSLionel Sambuc vbsl d30,d20,d19 @ Maj(a,b,c) 1714*0a6a1f1dSLionel Sambuc veor d17,d26 @ Sigma0(a) 1715*0a6a1f1dSLionel Sambuc vadd.i64 d21,d27 1716*0a6a1f1dSLionel Sambuc vadd.i64 d30,d27 1717*0a6a1f1dSLionel Sambuc @ vadd.i64 d17,d30 1718*0a6a1f1dSLionel Sambuc vshr.u64 d24,d21,#14 @ 31 1719*0a6a1f1dSLionel Sambuc#if 31<16 1720*0a6a1f1dSLionel Sambuc vld1.64 {d15},[r1]! @ handles unaligned 1721*0a6a1f1dSLionel Sambuc#endif 1722*0a6a1f1dSLionel Sambuc vshr.u64 d25,d21,#18 1723*0a6a1f1dSLionel Sambuc#if 31>0 1724*0a6a1f1dSLionel Sambuc vadd.i64 d17,d30 @ h+=Maj from the past 1725*0a6a1f1dSLionel Sambuc#endif 1726*0a6a1f1dSLionel Sambuc vshr.u64 d26,d21,#41 1727*0a6a1f1dSLionel Sambuc vld1.64 {d28},[r3,:64]! @ K[i++] 1728*0a6a1f1dSLionel Sambuc vsli.64 d24,d21,#50 1729*0a6a1f1dSLionel Sambuc vsli.64 d25,d21,#46 1730*0a6a1f1dSLionel Sambuc vmov d29,d21 1731*0a6a1f1dSLionel Sambuc vsli.64 d26,d21,#23 1732*0a6a1f1dSLionel Sambuc#if 31<16 && defined(__ARMEL__) 1733*0a6a1f1dSLionel Sambuc vrev64.8 , 1734*0a6a1f1dSLionel Sambuc#endif 1735*0a6a1f1dSLionel Sambuc veor d25,d24 1736*0a6a1f1dSLionel Sambuc vbsl d29,d22,d23 @ Ch(e,f,g) 1737*0a6a1f1dSLionel Sambuc vshr.u64 d24,d17,#28 1738*0a6a1f1dSLionel Sambuc veor d26,d25 @ Sigma1(e) 1739*0a6a1f1dSLionel Sambuc vadd.i64 d27,d29,d16 1740*0a6a1f1dSLionel Sambuc vshr.u64 d25,d17,#34 1741*0a6a1f1dSLionel Sambuc vsli.64 d24,d17,#36 1742*0a6a1f1dSLionel Sambuc vadd.i64 d27,d26 1743*0a6a1f1dSLionel Sambuc vshr.u64 d26,d17,#39 1744*0a6a1f1dSLionel Sambuc vadd.i64 d28,d15 1745*0a6a1f1dSLionel Sambuc vsli.64 d25,d17,#30 1746*0a6a1f1dSLionel Sambuc veor d30,d17,d18 1747*0a6a1f1dSLionel Sambuc vsli.64 d26,d17,#25 1748*0a6a1f1dSLionel Sambuc veor d16,d24,d25 1749*0a6a1f1dSLionel Sambuc vadd.i64 d27,d28 1750*0a6a1f1dSLionel Sambuc vbsl d30,d19,d18 @ Maj(a,b,c) 1751*0a6a1f1dSLionel Sambuc veor d16,d26 @ Sigma0(a) 1752*0a6a1f1dSLionel Sambuc vadd.i64 d20,d27 1753*0a6a1f1dSLionel Sambuc vadd.i64 d30,d27 1754*0a6a1f1dSLionel Sambuc @ vadd.i64 d16,d30 1755*0a6a1f1dSLionel Sambuc bne .L16_79_neon 1756*0a6a1f1dSLionel Sambuc 1757*0a6a1f1dSLionel Sambuc vadd.i64 d16,d30 @ h+=Maj from the past 1758*0a6a1f1dSLionel Sambuc vldmia r0,{d24-d31} @ load context to temp 1759*0a6a1f1dSLionel Sambuc vadd.i64 q8,q12 @ vectorized accumulate 1760*0a6a1f1dSLionel Sambuc vadd.i64 q9,q13 1761*0a6a1f1dSLionel Sambuc vadd.i64 q10,q14 1762*0a6a1f1dSLionel Sambuc vadd.i64 q11,q15 1763*0a6a1f1dSLionel Sambuc vstmia r0,{d16-d23} @ save context 1764*0a6a1f1dSLionel Sambuc teq r1,r2 1765*0a6a1f1dSLionel Sambuc sub r3,#640 @ rewind K512 1766*0a6a1f1dSLionel Sambuc bne .Loop_neon 1767*0a6a1f1dSLionel Sambuc 1768*0a6a1f1dSLionel Sambuc vldmia sp!,{d8-d15} @ epilogue 1769*0a6a1f1dSLionel Sambuc RET @ .word 0xe12fff1e 1770*0a6a1f1dSLionel Sambuc#endif 1771*0a6a1f1dSLionel Sambuc.size sha512_block_data_order,.-sha512_block_data_order 1772*0a6a1f1dSLionel Sambuc.asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro@openssl.org>" 1773*0a6a1f1dSLionel Sambuc.align 2 1774*0a6a1f1dSLionel Sambuc#if __ARM_MAX_ARCH__>=7 1775*0a6a1f1dSLionel Sambuc.comm OPENSSL_armcap_P,4,4 1776*0a6a1f1dSLionel Sambuc#endif 1777