xref: /openbsd-src/lib/libcrypto/md5/md5_amd64_generic.S (revision 82ab76e10ac26541f6a2ebe5efefdab07fb8df8c)
1*82ab76e1Sjsing/*	$OpenBSD: md5_amd64_generic.S,v 1.1 2025/01/24 13:35:04 jsing Exp $ */
2*82ab76e1Sjsing/*
3*82ab76e1Sjsing * Copyright (c) 2025 Joel Sing <jsing@openbsd.org>
4*82ab76e1Sjsing *
5*82ab76e1Sjsing * Permission to use, copy, modify, and distribute this software for any
6*82ab76e1Sjsing * purpose with or without fee is hereby granted, provided that the above
7*82ab76e1Sjsing * copyright notice and this permission notice appear in all copies.
8*82ab76e1Sjsing *
9*82ab76e1Sjsing * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10*82ab76e1Sjsing * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11*82ab76e1Sjsing * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12*82ab76e1Sjsing * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13*82ab76e1Sjsing * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14*82ab76e1Sjsing * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15*82ab76e1Sjsing * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16*82ab76e1Sjsing */
17*82ab76e1Sjsing
18*82ab76e1Sjsing#ifdef __CET__
19*82ab76e1Sjsing#include <cet.h>
20*82ab76e1Sjsing#else
21*82ab76e1Sjsing#define _CET_ENDBR
22*82ab76e1Sjsing#endif
23*82ab76e1Sjsing
24*82ab76e1Sjsing#define	ctx		%rdi
25*82ab76e1Sjsing#define	in		%rsi
26*82ab76e1Sjsing#define	num		%rdx
27*82ab76e1Sjsing
28*82ab76e1Sjsing#define	end		%rbp
29*82ab76e1Sjsing
30*82ab76e1Sjsing#define	A		%eax
31*82ab76e1Sjsing#define	B		%ebx
32*82ab76e1Sjsing#define	C		%ecx
33*82ab76e1Sjsing#define	D		%edx
34*82ab76e1Sjsing
35*82ab76e1Sjsing#define	AA		%r8d
36*82ab76e1Sjsing#define	BB		%r9d
37*82ab76e1Sjsing#define	CC		%r10d
38*82ab76e1Sjsing#define	DD		%r11d
39*82ab76e1Sjsing
40*82ab76e1Sjsing#define	tmp0		%r12d
41*82ab76e1Sjsing#define	tmp1		%r13d
42*82ab76e1Sjsing
43*82ab76e1Sjsing/*
44*82ab76e1Sjsing * Compute MD5 round 1 as:
45*82ab76e1Sjsing *
46*82ab76e1Sjsing *   a = b + rol(a + F(b, c, d) + x + t, s)
47*82ab76e1Sjsing *   F(x, y, z) = (x & y) | (~x & z)
48*82ab76e1Sjsing *              = ((y ^ z) & x) ^ z
49*82ab76e1Sjsing */
50*82ab76e1Sjsing#define md5_round1(a, b, c, d, x, t, s) \
51*82ab76e1Sjsing	addl	(x*4)(in), a;					\
52*82ab76e1Sjsing	movl	c, tmp0;					\
53*82ab76e1Sjsing	xorl	d, tmp0;					\
54*82ab76e1Sjsing	andl	b, tmp0;					\
55*82ab76e1Sjsing	xorl	d, tmp0;					\
56*82ab76e1Sjsing	leal	t(tmp0, a), a;					\
57*82ab76e1Sjsing	roll	$s, a;						\
58*82ab76e1Sjsing	addl	b, a;
59*82ab76e1Sjsing
60*82ab76e1Sjsing/*
61*82ab76e1Sjsing * Compute MD5 round 2 as:
62*82ab76e1Sjsing *
63*82ab76e1Sjsing *   a = b + rol(a + G(b, c, d) + x + t, s)
64*82ab76e1Sjsing *   G(x, y, z) = (x & z) | (y & ~z)
65*82ab76e1Sjsing */
66*82ab76e1Sjsing#define md5_round2(a, b, c, d, x, t, s) \
67*82ab76e1Sjsing	addl	(x*4)(in), a;					\
68*82ab76e1Sjsing	movl	d, tmp0;					\
69*82ab76e1Sjsing	xorl	$-1, tmp0;					\
70*82ab76e1Sjsing	andl	c, tmp0;					\
71*82ab76e1Sjsing	addl	tmp0, a;					\
72*82ab76e1Sjsing	movl	d, tmp1;					\
73*82ab76e1Sjsing	andl	b, tmp1;					\
74*82ab76e1Sjsing	leal	t(tmp1, a), a;					\
75*82ab76e1Sjsing	roll	$s, a;						\
76*82ab76e1Sjsing	addl	b, a;
77*82ab76e1Sjsing
78*82ab76e1Sjsing/*
79*82ab76e1Sjsing * Compute MD5 round 3 as:
80*82ab76e1Sjsing *
81*82ab76e1Sjsing *   a = b + rol(a + H(b, c, d) + x + t, s)
82*82ab76e1Sjsing *   H(x, y, z) = x ^ y ^ z;
83*82ab76e1Sjsing */
84*82ab76e1Sjsing#define md5_round3(a, b, c, d, x, t, s) \
85*82ab76e1Sjsing	addl	(x*4)(in), a;					\
86*82ab76e1Sjsing	movl	d, tmp0;					\
87*82ab76e1Sjsing	xorl	c, tmp0;					\
88*82ab76e1Sjsing	xorl	b, tmp0;					\
89*82ab76e1Sjsing	leal    t(tmp0, a), a;					\
90*82ab76e1Sjsing	roll	$s, a;						\
91*82ab76e1Sjsing	addl	b, a;
92*82ab76e1Sjsing
93*82ab76e1Sjsing/*
94*82ab76e1Sjsing * Compute MD5 round 4 as:
95*82ab76e1Sjsing *
96*82ab76e1Sjsing *   a = b + rol(a + I(b, c, d) + x + t, s)
97*82ab76e1Sjsing *   I(x, y, z) = y ^ (x | ~z)
98*82ab76e1Sjsing */
99*82ab76e1Sjsing#define md5_round4(a, b, c, d, x, t, s) \
100*82ab76e1Sjsing	addl	(x*4)(in), a;					\
101*82ab76e1Sjsing	movl	d, tmp0;					\
102*82ab76e1Sjsing	xorl	$-1, tmp0;					\
103*82ab76e1Sjsing	orl	b, tmp0;					\
104*82ab76e1Sjsing	xorl	c, tmp0;					\
105*82ab76e1Sjsing	leal    t(tmp0, a), a;					\
106*82ab76e1Sjsing	roll	$s, a;						\
107*82ab76e1Sjsing	addl	b, a;
108*82ab76e1Sjsing
109*82ab76e1Sjsing.text
110*82ab76e1Sjsing
111*82ab76e1Sjsing/*
112*82ab76e1Sjsing * void md5_block_data_order(MD5_CTX *ctx, const void *in, size_t num);
113*82ab76e1Sjsing *
114*82ab76e1Sjsing * Standard x86-64 ABI: rdi = ctx, rsi = in, rdx = num
115*82ab76e1Sjsing */
116*82ab76e1Sjsing.align 16
117*82ab76e1Sjsing.globl	md5_block_data_order
118*82ab76e1Sjsing.type	md5_block_data_order,@function
119*82ab76e1Sjsingmd5_block_data_order:
120*82ab76e1Sjsing	_CET_ENDBR
121*82ab76e1Sjsing
122*82ab76e1Sjsing	/* Save callee save registers. */
123*82ab76e1Sjsing	pushq	%rbx
124*82ab76e1Sjsing	pushq	%rbp
125*82ab76e1Sjsing	pushq	%r12
126*82ab76e1Sjsing	pushq	%r13
127*82ab76e1Sjsing
128*82ab76e1Sjsing	/* Compute end of message. */
129*82ab76e1Sjsing	shlq	$6, num
130*82ab76e1Sjsing	leaq	(in, num, 1), end
131*82ab76e1Sjsing
132*82ab76e1Sjsing	/* Load current hash state from context. */
133*82ab76e1Sjsing	movl	(0*4)(ctx), AA
134*82ab76e1Sjsing	movl	(1*4)(ctx), BB
135*82ab76e1Sjsing	movl	(2*4)(ctx), CC
136*82ab76e1Sjsing	movl	(3*4)(ctx), DD
137*82ab76e1Sjsing
138*82ab76e1Sjsing	jmp	.Lblock_loop
139*82ab76e1Sjsing
140*82ab76e1Sjsing.align 16
141*82ab76e1Sjsing.Lblock_loop:
142*82ab76e1Sjsing	movl	AA, A
143*82ab76e1Sjsing	movl	BB, B
144*82ab76e1Sjsing	movl	CC, C
145*82ab76e1Sjsing	movl	DD, D
146*82ab76e1Sjsing
147*82ab76e1Sjsing	md5_round1(A, B, C, D, 0, 0xd76aa478L, 7);
148*82ab76e1Sjsing	md5_round1(D, A, B, C, 1, 0xe8c7b756L, 12);
149*82ab76e1Sjsing	md5_round1(C, D, A, B, 2, 0x242070dbL, 17);
150*82ab76e1Sjsing	md5_round1(B, C, D, A, 3, 0xc1bdceeeL, 22);
151*82ab76e1Sjsing	md5_round1(A, B, C, D, 4, 0xf57c0fafL, 7);
152*82ab76e1Sjsing	md5_round1(D, A, B, C, 5, 0x4787c62aL, 12);
153*82ab76e1Sjsing	md5_round1(C, D, A, B, 6, 0xa8304613L, 17);
154*82ab76e1Sjsing	md5_round1(B, C, D, A, 7, 0xfd469501L, 22);
155*82ab76e1Sjsing	md5_round1(A, B, C, D, 8, 0x698098d8L, 7);
156*82ab76e1Sjsing	md5_round1(D, A, B, C, 9, 0x8b44f7afL, 12);
157*82ab76e1Sjsing	md5_round1(C, D, A, B, 10, 0xffff5bb1L, 17);
158*82ab76e1Sjsing	md5_round1(B, C, D, A, 11, 0x895cd7beL, 22);
159*82ab76e1Sjsing	md5_round1(A, B, C, D, 12, 0x6b901122L, 7);
160*82ab76e1Sjsing	md5_round1(D, A, B, C, 13, 0xfd987193L, 12);
161*82ab76e1Sjsing	md5_round1(C, D, A, B, 14, 0xa679438eL, 17);
162*82ab76e1Sjsing	md5_round1(B, C, D, A, 15, 0x49b40821L, 22);
163*82ab76e1Sjsing
164*82ab76e1Sjsing	md5_round2(A, B, C, D, 1, 0xf61e2562L, 5);
165*82ab76e1Sjsing	md5_round2(D, A, B, C, 6, 0xc040b340L, 9);
166*82ab76e1Sjsing	md5_round2(C, D, A, B, 11, 0x265e5a51L, 14);
167*82ab76e1Sjsing	md5_round2(B, C, D, A, 0, 0xe9b6c7aaL, 20);
168*82ab76e1Sjsing	md5_round2(A, B, C, D, 5, 0xd62f105dL, 5);
169*82ab76e1Sjsing	md5_round2(D, A, B, C, 10, 0x02441453L, 9);
170*82ab76e1Sjsing	md5_round2(C, D, A, B, 15, 0xd8a1e681L, 14);
171*82ab76e1Sjsing	md5_round2(B, C, D, A, 4, 0xe7d3fbc8L, 20);
172*82ab76e1Sjsing	md5_round2(A, B, C, D, 9, 0x21e1cde6L, 5);
173*82ab76e1Sjsing	md5_round2(D, A, B, C, 14, 0xc33707d6L, 9);
174*82ab76e1Sjsing	md5_round2(C, D, A, B, 3, 0xf4d50d87L, 14);
175*82ab76e1Sjsing	md5_round2(B, C, D, A, 8, 0x455a14edL, 20);
176*82ab76e1Sjsing	md5_round2(A, B, C, D, 13, 0xa9e3e905L, 5);
177*82ab76e1Sjsing	md5_round2(D, A, B, C, 2, 0xfcefa3f8L, 9);
178*82ab76e1Sjsing	md5_round2(C, D, A, B, 7, 0x676f02d9L, 14);
179*82ab76e1Sjsing	md5_round2(B, C, D, A, 12, 0x8d2a4c8aL, 20);
180*82ab76e1Sjsing
181*82ab76e1Sjsing	md5_round3(A, B, C, D, 5, 0xfffa3942L, 4);
182*82ab76e1Sjsing	md5_round3(D, A, B, C, 8, 0x8771f681L, 11);
183*82ab76e1Sjsing	md5_round3(C, D, A, B, 11, 0x6d9d6122L, 16);
184*82ab76e1Sjsing	md5_round3(B, C, D, A, 14, 0xfde5380cL, 23);
185*82ab76e1Sjsing	md5_round3(A, B, C, D, 1, 0xa4beea44L, 4);
186*82ab76e1Sjsing	md5_round3(D, A, B, C, 4, 0x4bdecfa9L, 11);
187*82ab76e1Sjsing	md5_round3(C, D, A, B, 7, 0xf6bb4b60L, 16);
188*82ab76e1Sjsing	md5_round3(B, C, D, A, 10, 0xbebfbc70L, 23);
189*82ab76e1Sjsing	md5_round3(A, B, C, D, 13, 0x289b7ec6L, 4);
190*82ab76e1Sjsing	md5_round3(D, A, B, C, 0, 0xeaa127faL, 11);
191*82ab76e1Sjsing	md5_round3(C, D, A, B, 3, 0xd4ef3085L, 16);
192*82ab76e1Sjsing	md5_round3(B, C, D, A, 6, 0x04881d05L, 23);
193*82ab76e1Sjsing	md5_round3(A, B, C, D, 9, 0xd9d4d039L, 4);
194*82ab76e1Sjsing	md5_round3(D, A, B, C, 12, 0xe6db99e5L, 11);
195*82ab76e1Sjsing	md5_round3(C, D, A, B, 15, 0x1fa27cf8L, 16);
196*82ab76e1Sjsing	md5_round3(B, C, D, A, 2, 0xc4ac5665L, 23);
197*82ab76e1Sjsing
198*82ab76e1Sjsing	md5_round4(A, B, C, D, 0, 0xf4292244L, 6);
199*82ab76e1Sjsing	md5_round4(D, A, B, C, 7, 0x432aff97L, 10);
200*82ab76e1Sjsing	md5_round4(C, D, A, B, 14, 0xab9423a7L, 15);
201*82ab76e1Sjsing	md5_round4(B, C, D, A, 5, 0xfc93a039L, 21);
202*82ab76e1Sjsing	md5_round4(A, B, C, D, 12, 0x655b59c3L, 6);
203*82ab76e1Sjsing	md5_round4(D, A, B, C, 3, 0x8f0ccc92L, 10);
204*82ab76e1Sjsing	md5_round4(C, D, A, B, 10, 0xffeff47dL, 15);
205*82ab76e1Sjsing	md5_round4(B, C, D, A, 1, 0x85845dd1L, 21);
206*82ab76e1Sjsing	md5_round4(A, B, C, D, 8, 0x6fa87e4fL, 6);
207*82ab76e1Sjsing	md5_round4(D, A, B, C, 15, 0xfe2ce6e0L, 10);
208*82ab76e1Sjsing	md5_round4(C, D, A, B, 6, 0xa3014314L, 15);
209*82ab76e1Sjsing	md5_round4(B, C, D, A, 13, 0x4e0811a1L, 21);
210*82ab76e1Sjsing	md5_round4(A, B, C, D, 4, 0xf7537e82L, 6);
211*82ab76e1Sjsing	md5_round4(D, A, B, C, 11, 0xbd3af235L, 10);
212*82ab76e1Sjsing	md5_round4(C, D, A, B, 2, 0x2ad7d2bbL, 15);
213*82ab76e1Sjsing	md5_round4(B, C, D, A, 9, 0xeb86d391L, 21);
214*82ab76e1Sjsing
215*82ab76e1Sjsing	/* Add intermediate state to hash state. */
216*82ab76e1Sjsing	addl	A, AA
217*82ab76e1Sjsing	addl	B, BB
218*82ab76e1Sjsing	addl	C, CC
219*82ab76e1Sjsing	addl	D, DD
220*82ab76e1Sjsing
221*82ab76e1Sjsing	addq	$64, in
222*82ab76e1Sjsing	cmpq	end, in
223*82ab76e1Sjsing	jb	.Lblock_loop
224*82ab76e1Sjsing
225*82ab76e1Sjsing	/* Store new hash state to context. */
226*82ab76e1Sjsing	movl	AA, (0*4)(ctx)
227*82ab76e1Sjsing	movl	BB, (1*4)(ctx)
228*82ab76e1Sjsing	movl	CC, (2*4)(ctx)
229*82ab76e1Sjsing	movl	DD, (3*4)(ctx)
230*82ab76e1Sjsing
231*82ab76e1Sjsing	/* Restore callee save registers. */
232*82ab76e1Sjsing	popq	%r13
233*82ab76e1Sjsing	popq	%r12
234*82ab76e1Sjsing	popq	%rbp
235*82ab76e1Sjsing	popq	%rbx
236*82ab76e1Sjsing
237*82ab76e1Sjsing	ret
238