1*82ab76e1Sjsing/* $OpenBSD: md5_amd64_generic.S,v 1.1 2025/01/24 13:35:04 jsing Exp $ */ 2*82ab76e1Sjsing/* 3*82ab76e1Sjsing * Copyright (c) 2025 Joel Sing <jsing@openbsd.org> 4*82ab76e1Sjsing * 5*82ab76e1Sjsing * Permission to use, copy, modify, and distribute this software for any 6*82ab76e1Sjsing * purpose with or without fee is hereby granted, provided that the above 7*82ab76e1Sjsing * copyright notice and this permission notice appear in all copies. 8*82ab76e1Sjsing * 9*82ab76e1Sjsing * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10*82ab76e1Sjsing * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11*82ab76e1Sjsing * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12*82ab76e1Sjsing * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13*82ab76e1Sjsing * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14*82ab76e1Sjsing * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15*82ab76e1Sjsing * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16*82ab76e1Sjsing */ 17*82ab76e1Sjsing 18*82ab76e1Sjsing#ifdef __CET__ 19*82ab76e1Sjsing#include <cet.h> 20*82ab76e1Sjsing#else 21*82ab76e1Sjsing#define _CET_ENDBR 22*82ab76e1Sjsing#endif 23*82ab76e1Sjsing 24*82ab76e1Sjsing#define ctx %rdi 25*82ab76e1Sjsing#define in %rsi 26*82ab76e1Sjsing#define num %rdx 27*82ab76e1Sjsing 28*82ab76e1Sjsing#define end %rbp 29*82ab76e1Sjsing 30*82ab76e1Sjsing#define A %eax 31*82ab76e1Sjsing#define B %ebx 32*82ab76e1Sjsing#define C %ecx 33*82ab76e1Sjsing#define D %edx 34*82ab76e1Sjsing 35*82ab76e1Sjsing#define AA %r8d 36*82ab76e1Sjsing#define BB %r9d 37*82ab76e1Sjsing#define CC %r10d 38*82ab76e1Sjsing#define DD %r11d 39*82ab76e1Sjsing 40*82ab76e1Sjsing#define tmp0 %r12d 41*82ab76e1Sjsing#define tmp1 %r13d 42*82ab76e1Sjsing 43*82ab76e1Sjsing/* 44*82ab76e1Sjsing * Compute MD5 round 1 as: 45*82ab76e1Sjsing * 46*82ab76e1Sjsing * a = b + rol(a + F(b, c, d) + x + t, s) 47*82ab76e1Sjsing * F(x, y, z) = (x & y) | (~x & z) 48*82ab76e1Sjsing * = ((y ^ z) & x) ^ z 49*82ab76e1Sjsing */ 50*82ab76e1Sjsing#define md5_round1(a, b, c, d, x, t, s) \ 51*82ab76e1Sjsing addl (x*4)(in), a; \ 52*82ab76e1Sjsing movl c, tmp0; \ 53*82ab76e1Sjsing xorl d, tmp0; \ 54*82ab76e1Sjsing andl b, tmp0; \ 55*82ab76e1Sjsing xorl d, tmp0; \ 56*82ab76e1Sjsing leal t(tmp0, a), a; \ 57*82ab76e1Sjsing roll $s, a; \ 58*82ab76e1Sjsing addl b, a; 59*82ab76e1Sjsing 60*82ab76e1Sjsing/* 61*82ab76e1Sjsing * Compute MD5 round 2 as: 62*82ab76e1Sjsing * 63*82ab76e1Sjsing * a = b + rol(a + G(b, c, d) + x + t, s) 64*82ab76e1Sjsing * G(x, y, z) = (x & z) | (y & ~z) 65*82ab76e1Sjsing */ 66*82ab76e1Sjsing#define md5_round2(a, b, c, d, x, t, s) \ 67*82ab76e1Sjsing addl (x*4)(in), a; \ 68*82ab76e1Sjsing movl d, tmp0; \ 69*82ab76e1Sjsing xorl $-1, tmp0; \ 70*82ab76e1Sjsing andl c, tmp0; \ 71*82ab76e1Sjsing addl tmp0, a; \ 72*82ab76e1Sjsing movl d, tmp1; \ 73*82ab76e1Sjsing andl b, tmp1; \ 74*82ab76e1Sjsing leal t(tmp1, a), a; \ 75*82ab76e1Sjsing roll $s, a; \ 76*82ab76e1Sjsing addl b, a; 77*82ab76e1Sjsing 78*82ab76e1Sjsing/* 79*82ab76e1Sjsing * Compute MD5 round 3 as: 80*82ab76e1Sjsing * 81*82ab76e1Sjsing * a = b + rol(a + H(b, c, d) + x + t, s) 82*82ab76e1Sjsing * H(x, y, z) = x ^ y ^ z; 83*82ab76e1Sjsing */ 84*82ab76e1Sjsing#define md5_round3(a, b, c, d, x, t, s) \ 85*82ab76e1Sjsing addl (x*4)(in), a; \ 86*82ab76e1Sjsing movl d, tmp0; \ 87*82ab76e1Sjsing xorl c, tmp0; \ 88*82ab76e1Sjsing xorl b, tmp0; \ 89*82ab76e1Sjsing leal t(tmp0, a), a; \ 90*82ab76e1Sjsing roll $s, a; \ 91*82ab76e1Sjsing addl b, a; 92*82ab76e1Sjsing 93*82ab76e1Sjsing/* 94*82ab76e1Sjsing * Compute MD5 round 4 as: 95*82ab76e1Sjsing * 96*82ab76e1Sjsing * a = b + rol(a + I(b, c, d) + x + t, s) 97*82ab76e1Sjsing * I(x, y, z) = y ^ (x | ~z) 98*82ab76e1Sjsing */ 99*82ab76e1Sjsing#define md5_round4(a, b, c, d, x, t, s) \ 100*82ab76e1Sjsing addl (x*4)(in), a; \ 101*82ab76e1Sjsing movl d, tmp0; \ 102*82ab76e1Sjsing xorl $-1, tmp0; \ 103*82ab76e1Sjsing orl b, tmp0; \ 104*82ab76e1Sjsing xorl c, tmp0; \ 105*82ab76e1Sjsing leal t(tmp0, a), a; \ 106*82ab76e1Sjsing roll $s, a; \ 107*82ab76e1Sjsing addl b, a; 108*82ab76e1Sjsing 109*82ab76e1Sjsing.text 110*82ab76e1Sjsing 111*82ab76e1Sjsing/* 112*82ab76e1Sjsing * void md5_block_data_order(MD5_CTX *ctx, const void *in, size_t num); 113*82ab76e1Sjsing * 114*82ab76e1Sjsing * Standard x86-64 ABI: rdi = ctx, rsi = in, rdx = num 115*82ab76e1Sjsing */ 116*82ab76e1Sjsing.align 16 117*82ab76e1Sjsing.globl md5_block_data_order 118*82ab76e1Sjsing.type md5_block_data_order,@function 119*82ab76e1Sjsingmd5_block_data_order: 120*82ab76e1Sjsing _CET_ENDBR 121*82ab76e1Sjsing 122*82ab76e1Sjsing /* Save callee save registers. */ 123*82ab76e1Sjsing pushq %rbx 124*82ab76e1Sjsing pushq %rbp 125*82ab76e1Sjsing pushq %r12 126*82ab76e1Sjsing pushq %r13 127*82ab76e1Sjsing 128*82ab76e1Sjsing /* Compute end of message. */ 129*82ab76e1Sjsing shlq $6, num 130*82ab76e1Sjsing leaq (in, num, 1), end 131*82ab76e1Sjsing 132*82ab76e1Sjsing /* Load current hash state from context. */ 133*82ab76e1Sjsing movl (0*4)(ctx), AA 134*82ab76e1Sjsing movl (1*4)(ctx), BB 135*82ab76e1Sjsing movl (2*4)(ctx), CC 136*82ab76e1Sjsing movl (3*4)(ctx), DD 137*82ab76e1Sjsing 138*82ab76e1Sjsing jmp .Lblock_loop 139*82ab76e1Sjsing 140*82ab76e1Sjsing.align 16 141*82ab76e1Sjsing.Lblock_loop: 142*82ab76e1Sjsing movl AA, A 143*82ab76e1Sjsing movl BB, B 144*82ab76e1Sjsing movl CC, C 145*82ab76e1Sjsing movl DD, D 146*82ab76e1Sjsing 147*82ab76e1Sjsing md5_round1(A, B, C, D, 0, 0xd76aa478L, 7); 148*82ab76e1Sjsing md5_round1(D, A, B, C, 1, 0xe8c7b756L, 12); 149*82ab76e1Sjsing md5_round1(C, D, A, B, 2, 0x242070dbL, 17); 150*82ab76e1Sjsing md5_round1(B, C, D, A, 3, 0xc1bdceeeL, 22); 151*82ab76e1Sjsing md5_round1(A, B, C, D, 4, 0xf57c0fafL, 7); 152*82ab76e1Sjsing md5_round1(D, A, B, C, 5, 0x4787c62aL, 12); 153*82ab76e1Sjsing md5_round1(C, D, A, B, 6, 0xa8304613L, 17); 154*82ab76e1Sjsing md5_round1(B, C, D, A, 7, 0xfd469501L, 22); 155*82ab76e1Sjsing md5_round1(A, B, C, D, 8, 0x698098d8L, 7); 156*82ab76e1Sjsing md5_round1(D, A, B, C, 9, 0x8b44f7afL, 12); 157*82ab76e1Sjsing md5_round1(C, D, A, B, 10, 0xffff5bb1L, 17); 158*82ab76e1Sjsing md5_round1(B, C, D, A, 11, 0x895cd7beL, 22); 159*82ab76e1Sjsing md5_round1(A, B, C, D, 12, 0x6b901122L, 7); 160*82ab76e1Sjsing md5_round1(D, A, B, C, 13, 0xfd987193L, 12); 161*82ab76e1Sjsing md5_round1(C, D, A, B, 14, 0xa679438eL, 17); 162*82ab76e1Sjsing md5_round1(B, C, D, A, 15, 0x49b40821L, 22); 163*82ab76e1Sjsing 164*82ab76e1Sjsing md5_round2(A, B, C, D, 1, 0xf61e2562L, 5); 165*82ab76e1Sjsing md5_round2(D, A, B, C, 6, 0xc040b340L, 9); 166*82ab76e1Sjsing md5_round2(C, D, A, B, 11, 0x265e5a51L, 14); 167*82ab76e1Sjsing md5_round2(B, C, D, A, 0, 0xe9b6c7aaL, 20); 168*82ab76e1Sjsing md5_round2(A, B, C, D, 5, 0xd62f105dL, 5); 169*82ab76e1Sjsing md5_round2(D, A, B, C, 10, 0x02441453L, 9); 170*82ab76e1Sjsing md5_round2(C, D, A, B, 15, 0xd8a1e681L, 14); 171*82ab76e1Sjsing md5_round2(B, C, D, A, 4, 0xe7d3fbc8L, 20); 172*82ab76e1Sjsing md5_round2(A, B, C, D, 9, 0x21e1cde6L, 5); 173*82ab76e1Sjsing md5_round2(D, A, B, C, 14, 0xc33707d6L, 9); 174*82ab76e1Sjsing md5_round2(C, D, A, B, 3, 0xf4d50d87L, 14); 175*82ab76e1Sjsing md5_round2(B, C, D, A, 8, 0x455a14edL, 20); 176*82ab76e1Sjsing md5_round2(A, B, C, D, 13, 0xa9e3e905L, 5); 177*82ab76e1Sjsing md5_round2(D, A, B, C, 2, 0xfcefa3f8L, 9); 178*82ab76e1Sjsing md5_round2(C, D, A, B, 7, 0x676f02d9L, 14); 179*82ab76e1Sjsing md5_round2(B, C, D, A, 12, 0x8d2a4c8aL, 20); 180*82ab76e1Sjsing 181*82ab76e1Sjsing md5_round3(A, B, C, D, 5, 0xfffa3942L, 4); 182*82ab76e1Sjsing md5_round3(D, A, B, C, 8, 0x8771f681L, 11); 183*82ab76e1Sjsing md5_round3(C, D, A, B, 11, 0x6d9d6122L, 16); 184*82ab76e1Sjsing md5_round3(B, C, D, A, 14, 0xfde5380cL, 23); 185*82ab76e1Sjsing md5_round3(A, B, C, D, 1, 0xa4beea44L, 4); 186*82ab76e1Sjsing md5_round3(D, A, B, C, 4, 0x4bdecfa9L, 11); 187*82ab76e1Sjsing md5_round3(C, D, A, B, 7, 0xf6bb4b60L, 16); 188*82ab76e1Sjsing md5_round3(B, C, D, A, 10, 0xbebfbc70L, 23); 189*82ab76e1Sjsing md5_round3(A, B, C, D, 13, 0x289b7ec6L, 4); 190*82ab76e1Sjsing md5_round3(D, A, B, C, 0, 0xeaa127faL, 11); 191*82ab76e1Sjsing md5_round3(C, D, A, B, 3, 0xd4ef3085L, 16); 192*82ab76e1Sjsing md5_round3(B, C, D, A, 6, 0x04881d05L, 23); 193*82ab76e1Sjsing md5_round3(A, B, C, D, 9, 0xd9d4d039L, 4); 194*82ab76e1Sjsing md5_round3(D, A, B, C, 12, 0xe6db99e5L, 11); 195*82ab76e1Sjsing md5_round3(C, D, A, B, 15, 0x1fa27cf8L, 16); 196*82ab76e1Sjsing md5_round3(B, C, D, A, 2, 0xc4ac5665L, 23); 197*82ab76e1Sjsing 198*82ab76e1Sjsing md5_round4(A, B, C, D, 0, 0xf4292244L, 6); 199*82ab76e1Sjsing md5_round4(D, A, B, C, 7, 0x432aff97L, 10); 200*82ab76e1Sjsing md5_round4(C, D, A, B, 14, 0xab9423a7L, 15); 201*82ab76e1Sjsing md5_round4(B, C, D, A, 5, 0xfc93a039L, 21); 202*82ab76e1Sjsing md5_round4(A, B, C, D, 12, 0x655b59c3L, 6); 203*82ab76e1Sjsing md5_round4(D, A, B, C, 3, 0x8f0ccc92L, 10); 204*82ab76e1Sjsing md5_round4(C, D, A, B, 10, 0xffeff47dL, 15); 205*82ab76e1Sjsing md5_round4(B, C, D, A, 1, 0x85845dd1L, 21); 206*82ab76e1Sjsing md5_round4(A, B, C, D, 8, 0x6fa87e4fL, 6); 207*82ab76e1Sjsing md5_round4(D, A, B, C, 15, 0xfe2ce6e0L, 10); 208*82ab76e1Sjsing md5_round4(C, D, A, B, 6, 0xa3014314L, 15); 209*82ab76e1Sjsing md5_round4(B, C, D, A, 13, 0x4e0811a1L, 21); 210*82ab76e1Sjsing md5_round4(A, B, C, D, 4, 0xf7537e82L, 6); 211*82ab76e1Sjsing md5_round4(D, A, B, C, 11, 0xbd3af235L, 10); 212*82ab76e1Sjsing md5_round4(C, D, A, B, 2, 0x2ad7d2bbL, 15); 213*82ab76e1Sjsing md5_round4(B, C, D, A, 9, 0xeb86d391L, 21); 214*82ab76e1Sjsing 215*82ab76e1Sjsing /* Add intermediate state to hash state. */ 216*82ab76e1Sjsing addl A, AA 217*82ab76e1Sjsing addl B, BB 218*82ab76e1Sjsing addl C, CC 219*82ab76e1Sjsing addl D, DD 220*82ab76e1Sjsing 221*82ab76e1Sjsing addq $64, in 222*82ab76e1Sjsing cmpq end, in 223*82ab76e1Sjsing jb .Lblock_loop 224*82ab76e1Sjsing 225*82ab76e1Sjsing /* Store new hash state to context. */ 226*82ab76e1Sjsing movl AA, (0*4)(ctx) 227*82ab76e1Sjsing movl BB, (1*4)(ctx) 228*82ab76e1Sjsing movl CC, (2*4)(ctx) 229*82ab76e1Sjsing movl DD, (3*4)(ctx) 230*82ab76e1Sjsing 231*82ab76e1Sjsing /* Restore callee save registers. */ 232*82ab76e1Sjsing popq %r13 233*82ab76e1Sjsing popq %r12 234*82ab76e1Sjsing popq %rbp 235*82ab76e1Sjsing popq %rbx 236*82ab76e1Sjsing 237*82ab76e1Sjsing ret 238