xref: /onnv-gate/usr/src/common/crypto/modes/amd64/gcm_intel.s (revision 11141:64e602617ba4)
110627Sopensolaris@drydog.com/*
210627Sopensolaris@drydog.com * CDDL HEADER START
310627Sopensolaris@drydog.com *
410627Sopensolaris@drydog.com * The contents of this file are subject to the terms of the
510627Sopensolaris@drydog.com * Common Development and Distribution License (the "License").
610627Sopensolaris@drydog.com * You may not use this file except in compliance with the License.
710627Sopensolaris@drydog.com *
810627Sopensolaris@drydog.com * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
910627Sopensolaris@drydog.com * or http://www.opensolaris.org/os/licensing.
1010627Sopensolaris@drydog.com * See the License for the specific language governing permissions
1110627Sopensolaris@drydog.com * and limitations under the License.
1210627Sopensolaris@drydog.com *
1310627Sopensolaris@drydog.com * When distributing Covered Code, include this CDDL HEADER in each
1410627Sopensolaris@drydog.com * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1510627Sopensolaris@drydog.com * If applicable, add the following below this CDDL HEADER, with the
1610627Sopensolaris@drydog.com * fields enclosed by brackets "[]" replaced with your own identifying
1710627Sopensolaris@drydog.com * information: Portions Copyright [yyyy] [name of copyright owner]
1810627Sopensolaris@drydog.com *
1910627Sopensolaris@drydog.com * CDDL HEADER END
2010627Sopensolaris@drydog.com */
2110627Sopensolaris@drydog.com
2210627Sopensolaris@drydog.com/*
2310627Sopensolaris@drydog.com * Copyright (c) 2009 Intel Corporation
2410627Sopensolaris@drydog.com * All Rights Reserved.
2510627Sopensolaris@drydog.com */
2610627Sopensolaris@drydog.com/*
2710627Sopensolaris@drydog.com * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
2810627Sopensolaris@drydog.com * Use is subject to license terms.
2910627Sopensolaris@drydog.com */
3010627Sopensolaris@drydog.com
3110627Sopensolaris@drydog.com/*
3210627Sopensolaris@drydog.com * Accelerated GHASH implementation with Intel PCLMULQDQ-NI
3310627Sopensolaris@drydog.com * instructions.  This file contains an accelerated
3410627Sopensolaris@drydog.com * Galois Field Multiplication implementation.
3510627Sopensolaris@drydog.com *
3610627Sopensolaris@drydog.com * PCLMULQDQ is used to accelerate the most time-consuming part of GHASH,
3710627Sopensolaris@drydog.com * carry-less multiplication. More information about PCLMULQDQ can be
3810627Sopensolaris@drydog.com * found at:
3910627Sopensolaris@drydog.com * http://software.intel.com/en-us/articles/
4010627Sopensolaris@drydog.com * carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/
4110627Sopensolaris@drydog.com *
4210627Sopensolaris@drydog.com */
4310627Sopensolaris@drydog.com
4410627Sopensolaris@drydog.com/*
4510627Sopensolaris@drydog.com * ====================================================================
4610627Sopensolaris@drydog.com * OpenSolaris OS modifications
4710627Sopensolaris@drydog.com *
4810627Sopensolaris@drydog.com * This source originates as file galois_hash_asm.c from
4910627Sopensolaris@drydog.com * Intel Corporation dated September 21, 2009.
5010627Sopensolaris@drydog.com *
5110627Sopensolaris@drydog.com * This OpenSolaris version has these major changes from the original source:
5210627Sopensolaris@drydog.com *
5310627Sopensolaris@drydog.com * 1. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
5410627Sopensolaris@drydog.com * /usr/include/sys/asm_linkage.h, lint(1B) guards, and a dummy C function
5510627Sopensolaris@drydog.com * definition for lint.
5610627Sopensolaris@drydog.com *
5710627Sopensolaris@drydog.com * 2. Formatted code, added comments, and added #includes and #defines.
5810627Sopensolaris@drydog.com *
59*11141Sopensolaris@drydog.com * 3. If bit CR0.TS is set, clear and set the TS bit, after and before
6010627Sopensolaris@drydog.com * calling kpreempt_disable() and kpreempt_enable().
6110627Sopensolaris@drydog.com * If the TS bit is not set, Save and restore %xmm registers at the beginning
6210627Sopensolaris@drydog.com * and end of function calls (%xmm* registers are not saved and restored by
6310627Sopensolaris@drydog.com * during kernel thread preemption).
6410627Sopensolaris@drydog.com *
65*11141Sopensolaris@drydog.com * 4. Removed code to perform hashing.  This is already done with C macro
6610627Sopensolaris@drydog.com * GHASH in gcm.c.  For better performance, this removed code should be
6710627Sopensolaris@drydog.com * reintegrated in the future to replace the C GHASH macro.
6810627Sopensolaris@drydog.com *
69*11141Sopensolaris@drydog.com * 5. Added code to byte swap 16-byte input and output.
7010627Sopensolaris@drydog.com *
71*11141Sopensolaris@drydog.com * 6. Folded in comments from the original C source with embedded assembly
7210627Sopensolaris@drydog.com * (SB_w_shift_xor.c)
7310627Sopensolaris@drydog.com *
74*11141Sopensolaris@drydog.com * 7. Renamed function and reordered parameters to match OpenSolaris:
7510627Sopensolaris@drydog.com * Intel interface:
7610627Sopensolaris@drydog.com *	void galois_hash_asm(unsigned char *hk, unsigned char *s,
7710627Sopensolaris@drydog.com *		unsigned char *d, int length)
7810627Sopensolaris@drydog.com * OpenSolaris OS interface:
7910627Sopensolaris@drydog.com *	void gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res);
8010627Sopensolaris@drydog.com * ====================================================================
8110627Sopensolaris@drydog.com */
8210627Sopensolaris@drydog.com
8310627Sopensolaris@drydog.com
8410627Sopensolaris@drydog.com#if defined(lint) || defined(__lint)
8510627Sopensolaris@drydog.com
8610627Sopensolaris@drydog.com#include <sys/types.h>
8710627Sopensolaris@drydog.com
8810627Sopensolaris@drydog.com/* ARGSUSED */
8910627Sopensolaris@drydog.comvoid
9010627Sopensolaris@drydog.comgcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res) {
9110627Sopensolaris@drydog.com}
9210627Sopensolaris@drydog.com
9310627Sopensolaris@drydog.com#else	/* lint */
9410627Sopensolaris@drydog.com
9510627Sopensolaris@drydog.com#include <sys/asm_linkage.h>
9610627Sopensolaris@drydog.com#include <sys/controlregs.h>
9710627Sopensolaris@drydog.com#ifdef _KERNEL
9810627Sopensolaris@drydog.com#include <sys/machprivregs.h>
9910627Sopensolaris@drydog.com#endif
10010627Sopensolaris@drydog.com
10110627Sopensolaris@drydog.com#ifdef _KERNEL
10210627Sopensolaris@drydog.com	/*
10310627Sopensolaris@drydog.com	 * Note: the CLTS macro clobbers P2 (%rsi) under i86xpv.  That is,
10410627Sopensolaris@drydog.com	 * it calls HYPERVISOR_fpu_taskswitch() which modifies %rsi when it
10510627Sopensolaris@drydog.com	 * uses it to pass P2 to syscall.
10610627Sopensolaris@drydog.com	 * This also occurs with the STTS macro, but we don't care if
10710627Sopensolaris@drydog.com	 * P2 (%rsi) is modified just before function exit.
10810627Sopensolaris@drydog.com	 * The CLTS and STTS macros push and pop P1 (%rdi) already.
10910627Sopensolaris@drydog.com	 */
11010627Sopensolaris@drydog.com#ifdef __xpv
11110627Sopensolaris@drydog.com#define	PROTECTED_CLTS \
11210627Sopensolaris@drydog.com	push	%rsi; \
11310627Sopensolaris@drydog.com	CLTS; \
11410627Sopensolaris@drydog.com	pop	%rsi
11510627Sopensolaris@drydog.com#else
11610627Sopensolaris@drydog.com#define	PROTECTED_CLTS \
11710627Sopensolaris@drydog.com	CLTS
11810627Sopensolaris@drydog.com#endif	/* __xpv */
11910627Sopensolaris@drydog.com
12010627Sopensolaris@drydog.com	/*
12110627Sopensolaris@drydog.com	 * If CR0_TS is not set, align stack (with push %rbp) and push
12210627Sopensolaris@drydog.com	 * %xmm0 - %xmm10 on stack, otherwise clear CR0_TS
12310627Sopensolaris@drydog.com	 */
12410627Sopensolaris@drydog.com#define	CLEAR_TS_OR_PUSH_XMM_REGISTERS(tmpreg) \
12510627Sopensolaris@drydog.com	push	%rbp; \
12610627Sopensolaris@drydog.com	mov	%rsp, %rbp; \
127*11141Sopensolaris@drydog.com	movq	%cr0, tmpreg; \
12810627Sopensolaris@drydog.com	testq	$CR0_TS, tmpreg; \
12910627Sopensolaris@drydog.com	jnz	1f; \
13010627Sopensolaris@drydog.com	and	$-XMM_ALIGN, %rsp; \
13110627Sopensolaris@drydog.com	sub	$[XMM_SIZE * 11], %rsp; \
13210627Sopensolaris@drydog.com	movaps	%xmm0, 160(%rsp); \
13310627Sopensolaris@drydog.com	movaps	%xmm1, 144(%rsp); \
13410627Sopensolaris@drydog.com	movaps	%xmm2, 128(%rsp); \
13510627Sopensolaris@drydog.com	movaps	%xmm3, 112(%rsp); \
13610627Sopensolaris@drydog.com	movaps	%xmm4, 96(%rsp); \
13710627Sopensolaris@drydog.com	movaps	%xmm5, 80(%rsp); \
13810627Sopensolaris@drydog.com	movaps	%xmm6, 64(%rsp); \
13910627Sopensolaris@drydog.com	movaps	%xmm7, 48(%rsp); \
14010627Sopensolaris@drydog.com	movaps	%xmm8, 32(%rsp); \
14110627Sopensolaris@drydog.com	movaps	%xmm9, 16(%rsp); \
14210627Sopensolaris@drydog.com	movaps	%xmm10, (%rsp); \
14310627Sopensolaris@drydog.com	jmp	2f; \
14410627Sopensolaris@drydog.com1: \
14510627Sopensolaris@drydog.com	PROTECTED_CLTS; \
14610627Sopensolaris@drydog.com2:
14710627Sopensolaris@drydog.com
14810627Sopensolaris@drydog.com
14910627Sopensolaris@drydog.com	/*
15010627Sopensolaris@drydog.com	 * If CR0_TS was not set above, pop %xmm0 - %xmm10 off stack,
15110627Sopensolaris@drydog.com	 * otherwise set CR0_TS.
15210627Sopensolaris@drydog.com	 */
15310627Sopensolaris@drydog.com#define	SET_TS_OR_POP_XMM_REGISTERS(tmpreg) \
15410627Sopensolaris@drydog.com	testq	$CR0_TS, tmpreg; \
15510627Sopensolaris@drydog.com	jnz	1f; \
15610627Sopensolaris@drydog.com	movaps	(%rsp), %xmm10; \
15710627Sopensolaris@drydog.com	movaps	16(%rsp), %xmm9; \
15810627Sopensolaris@drydog.com	movaps	32(%rsp), %xmm8; \
15910627Sopensolaris@drydog.com	movaps	48(%rsp), %xmm7; \
16010627Sopensolaris@drydog.com	movaps	64(%rsp), %xmm6; \
16110627Sopensolaris@drydog.com	movaps	80(%rsp), %xmm5; \
16210627Sopensolaris@drydog.com	movaps	96(%rsp), %xmm4; \
16310627Sopensolaris@drydog.com	movaps	112(%rsp), %xmm3; \
16410627Sopensolaris@drydog.com	movaps	128(%rsp), %xmm2; \
16510627Sopensolaris@drydog.com	movaps	144(%rsp), %xmm1; \
16610627Sopensolaris@drydog.com	movaps	160(%rsp), %xmm0; \
16710627Sopensolaris@drydog.com	jmp	2f; \
16810627Sopensolaris@drydog.com1: \
16910627Sopensolaris@drydog.com	STTS(tmpreg); \
17010627Sopensolaris@drydog.com2: \
17110627Sopensolaris@drydog.com	mov	%rbp, %rsp; \
17210627Sopensolaris@drydog.com	pop	%rbp
17310627Sopensolaris@drydog.com
17410627Sopensolaris@drydog.com
17510627Sopensolaris@drydog.com#else
17610627Sopensolaris@drydog.com#define	PROTECTED_CLTS
17710627Sopensolaris@drydog.com#define	CLEAR_TS_OR_PUSH_XMM_REGISTERS(tmpreg)
17810627Sopensolaris@drydog.com#define	SET_TS_OR_POP_XMM_REGISTERS(tmpreg)
17910627Sopensolaris@drydog.com#endif	/* _KERNEL */
18010627Sopensolaris@drydog.com
18110627Sopensolaris@drydog.com/*
18210627Sopensolaris@drydog.com * Use this mask to byte-swap a 16-byte integer with the pshufb instruction
18310627Sopensolaris@drydog.com */
18410627Sopensolaris@drydog.com
18510627Sopensolaris@drydog.com// static uint8_t byte_swap16_mask[] = {
18610627Sopensolaris@drydog.com//	 15, 14, 13, 12, 11, 10, 9, 8, 7, 6 ,5, 4, 3, 2, 1, 0 };
18710627Sopensolaris@drydog.com.text
18810627Sopensolaris@drydog.com.align XMM_ALIGN
18910627Sopensolaris@drydog.com.Lbyte_swap16_mask:
19010627Sopensolaris@drydog.com	.byte	15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
19110627Sopensolaris@drydog.com
19210627Sopensolaris@drydog.com
19310627Sopensolaris@drydog.com
19410627Sopensolaris@drydog.com/*
19510627Sopensolaris@drydog.com * void gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res);
19610627Sopensolaris@drydog.com *
19710627Sopensolaris@drydog.com * Perform a carry-less multiplication (that is, use XOR instead of the
198*11141Sopensolaris@drydog.com * multiply operator) on P1 and P2 and place the result in P3.
19910627Sopensolaris@drydog.com *
20010627Sopensolaris@drydog.com * Byte swap the input and the output.
20110627Sopensolaris@drydog.com *
20210627Sopensolaris@drydog.com * Note: x_in, y, and res all point to a block of 20-byte numbers
20310627Sopensolaris@drydog.com * (an array of two 64-bit integers).
20410627Sopensolaris@drydog.com *
20510627Sopensolaris@drydog.com * Note2: For kernel code, caller is responsible for ensuring
20610627Sopensolaris@drydog.com * kpreempt_disable() has been called.  This is because %xmm registers are
20710627Sopensolaris@drydog.com * not saved/restored.  Clear and set the CR0.TS bit on entry and exit,
208*11141Sopensolaris@drydog.com * respectively, if TS is set on entry.  Otherwise, if TS is not set,
20910627Sopensolaris@drydog.com * save and restore %xmm registers on the stack.
21010627Sopensolaris@drydog.com *
21110627Sopensolaris@drydog.com * Note3: Original Intel definition:
21210627Sopensolaris@drydog.com * void galois_hash_asm(unsigned char *hk, unsigned char *s,
21310627Sopensolaris@drydog.com *	unsigned char *d, int length)
21410627Sopensolaris@drydog.com *
21510627Sopensolaris@drydog.com * Note4: Register/parameter mapping:
21610627Sopensolaris@drydog.com * Intel:
21710627Sopensolaris@drydog.com *	Parameter 1: %rcx (copied to %xmm0)	hk or x_in
21810627Sopensolaris@drydog.com *	Parameter 2: %rdx (copied to %xmm1)	s or y
21910627Sopensolaris@drydog.com *	Parameter 3: %rdi (result)		d or res
22010627Sopensolaris@drydog.com * OpenSolaris:
22110627Sopensolaris@drydog.com *	Parameter 1: %rdi (copied to %xmm0)	x_in
22210627Sopensolaris@drydog.com *	Parameter 2: %rsi (copied to %xmm1)	y
22310627Sopensolaris@drydog.com *	Parameter 3: %rdx (result)		res
22410627Sopensolaris@drydog.com */
22510627Sopensolaris@drydog.com
22610627Sopensolaris@drydog.comENTRY_NP(gcm_mul_pclmulqdq)
22710627Sopensolaris@drydog.com	CLEAR_TS_OR_PUSH_XMM_REGISTERS(%r10)
22810627Sopensolaris@drydog.com
22910627Sopensolaris@drydog.com	//
23010627Sopensolaris@drydog.com	// Copy Parameters
23110627Sopensolaris@drydog.com	//
232*11141Sopensolaris@drydog.com	movdqu	(%rdi), %xmm0	// P1
233*11141Sopensolaris@drydog.com	movdqu	(%rsi), %xmm1	// P2
23410627Sopensolaris@drydog.com
23510627Sopensolaris@drydog.com	//
23610627Sopensolaris@drydog.com	// Byte swap 16-byte input
23710627Sopensolaris@drydog.com	//
23810627Sopensolaris@drydog.com	lea	.Lbyte_swap16_mask(%rip), %rax
23910627Sopensolaris@drydog.com	movaps	(%rax), %xmm10
240*11141Sopensolaris@drydog.com	pshufb	%xmm10, %xmm0
241*11141Sopensolaris@drydog.com	pshufb	%xmm10, %xmm1
24210627Sopensolaris@drydog.com
24310627Sopensolaris@drydog.com
24410627Sopensolaris@drydog.com	//
24510627Sopensolaris@drydog.com	// Multiply with the hash key
24610627Sopensolaris@drydog.com	//
24710627Sopensolaris@drydog.com	movdqu	%xmm0, %xmm3
248*11141Sopensolaris@drydog.com	pclmulqdq $0, %xmm1, %xmm3	// xmm3 holds a0*b0
24910627Sopensolaris@drydog.com
25010627Sopensolaris@drydog.com	movdqu	%xmm0, %xmm4
251*11141Sopensolaris@drydog.com	pclmulqdq $16, %xmm1, %xmm4	// xmm4 holds a0*b1
25210627Sopensolaris@drydog.com
25310627Sopensolaris@drydog.com	movdqu	%xmm0, %xmm5
254*11141Sopensolaris@drydog.com	pclmulqdq $1, %xmm1, %xmm5	// xmm5 holds a1*b0
25510627Sopensolaris@drydog.com	movdqu	%xmm0, %xmm6
256*11141Sopensolaris@drydog.com	pclmulqdq $17, %xmm1, %xmm6	// xmm6 holds a1*b1
25710627Sopensolaris@drydog.com
25810627Sopensolaris@drydog.com	pxor	%xmm5, %xmm4	// xmm4 holds a0*b1 + a1*b0
25910627Sopensolaris@drydog.com
26010627Sopensolaris@drydog.com	movdqu	%xmm4, %xmm5	// move the contents of xmm4 to xmm5
26110627Sopensolaris@drydog.com	psrldq	$8, %xmm4	// shift by xmm4 64 bits to the right
26210627Sopensolaris@drydog.com	pslldq	$8, %xmm5	// shift by xmm5 64 bits to the left
26310627Sopensolaris@drydog.com	pxor	%xmm5, %xmm3
26410627Sopensolaris@drydog.com	pxor	%xmm4, %xmm6	// Register pair <xmm6:xmm3> holds the result
26510627Sopensolaris@drydog.com				// of the carry-less multiplication of
26610627Sopensolaris@drydog.com				// xmm0 by xmm1.
26710627Sopensolaris@drydog.com
26810627Sopensolaris@drydog.com	// We shift the result of the multiplication by one bit position
26910627Sopensolaris@drydog.com	// to the left to cope for the fact that the bits are reversed.
27010627Sopensolaris@drydog.com	movdqu	%xmm3, %xmm7
27110627Sopensolaris@drydog.com	movdqu	%xmm6, %xmm8
27210627Sopensolaris@drydog.com	pslld	$1, %xmm3
27310627Sopensolaris@drydog.com	pslld	$1, %xmm6
27410627Sopensolaris@drydog.com	psrld	$31, %xmm7
27510627Sopensolaris@drydog.com	psrld	$31, %xmm8
27610627Sopensolaris@drydog.com	movdqu	%xmm7, %xmm9
27710627Sopensolaris@drydog.com	pslldq	$4, %xmm8
27810627Sopensolaris@drydog.com	pslldq	$4, %xmm7
27910627Sopensolaris@drydog.com	psrldq	$12, %xmm9
28010627Sopensolaris@drydog.com	por	%xmm7, %xmm3
28110627Sopensolaris@drydog.com	por	%xmm8, %xmm6
28210627Sopensolaris@drydog.com	por	%xmm9, %xmm6
28310627Sopensolaris@drydog.com
28410627Sopensolaris@drydog.com	//
28510627Sopensolaris@drydog.com	// First phase of the reduction
28610627Sopensolaris@drydog.com	//
28710627Sopensolaris@drydog.com	// Move xmm3 into xmm7, xmm8, xmm9 in order to perform the shifts
28810627Sopensolaris@drydog.com	// independently.
28910627Sopensolaris@drydog.com	movdqu	%xmm3, %xmm7
29010627Sopensolaris@drydog.com	movdqu	%xmm3, %xmm8
29110627Sopensolaris@drydog.com	movdqu	%xmm3, %xmm9
29210627Sopensolaris@drydog.com	pslld	$31, %xmm7	// packed right shift shifting << 31
29310627Sopensolaris@drydog.com	pslld	$30, %xmm8	// packed right shift shifting << 30
29410627Sopensolaris@drydog.com	pslld	$25, %xmm9	// packed right shift shifting << 25
29510627Sopensolaris@drydog.com	pxor	%xmm8, %xmm7	// xor the shifted versions
29610627Sopensolaris@drydog.com	pxor	%xmm9, %xmm7
29710627Sopensolaris@drydog.com	movdqu	%xmm7, %xmm8
29810627Sopensolaris@drydog.com	pslldq	$12, %xmm7
29910627Sopensolaris@drydog.com	psrldq	$4, %xmm8
30010627Sopensolaris@drydog.com	pxor	%xmm7, %xmm3	// first phase of the reduction complete
30110627Sopensolaris@drydog.com
30210627Sopensolaris@drydog.com	//
30310627Sopensolaris@drydog.com	// Second phase of the reduction
30410627Sopensolaris@drydog.com	//
30510627Sopensolaris@drydog.com	// Make 3 copies of xmm3 in xmm2, xmm4, xmm5 for doing these
30610627Sopensolaris@drydog.com	// shift operations.
30710627Sopensolaris@drydog.com	movdqu	%xmm3, %xmm2
30810627Sopensolaris@drydog.com	movdqu	%xmm3, %xmm4	// packed left shifting >> 1
30910627Sopensolaris@drydog.com	movdqu	%xmm3, %xmm5
31010627Sopensolaris@drydog.com	psrld	$1, %xmm2
31110627Sopensolaris@drydog.com	psrld	$2, %xmm4	// packed left shifting >> 2
31210627Sopensolaris@drydog.com	psrld	$7, %xmm5	// packed left shifting >> 7
31310627Sopensolaris@drydog.com	pxor	%xmm4, %xmm2	// xor the shifted versions
31410627Sopensolaris@drydog.com	pxor	%xmm5, %xmm2
31510627Sopensolaris@drydog.com	pxor	%xmm8, %xmm2
31610627Sopensolaris@drydog.com	pxor	%xmm2, %xmm3
31710627Sopensolaris@drydog.com	pxor	%xmm3, %xmm6	// the result is in xmm6
31810627Sopensolaris@drydog.com
31910627Sopensolaris@drydog.com	//
32010627Sopensolaris@drydog.com	// Byte swap 16-byte result
32110627Sopensolaris@drydog.com	//
322*11141Sopensolaris@drydog.com	pshufb	%xmm10, %xmm6	// %xmm10 has the swap mask
32310627Sopensolaris@drydog.com
32410627Sopensolaris@drydog.com	//
32510627Sopensolaris@drydog.com	// Store the result
32610627Sopensolaris@drydog.com	//
327*11141Sopensolaris@drydog.com	movdqu	%xmm6, (%rdx)	// P3
32810627Sopensolaris@drydog.com
32910627Sopensolaris@drydog.com
33010627Sopensolaris@drydog.com	//
33110627Sopensolaris@drydog.com	// Cleanup and Return
33210627Sopensolaris@drydog.com	//
33310627Sopensolaris@drydog.com	SET_TS_OR_POP_XMM_REGISTERS(%r10)
33410627Sopensolaris@drydog.com	ret
33510627Sopensolaris@drydog.com	SET_SIZE(gcm_mul_pclmulqdq)
33610627Sopensolaris@drydog.com
337*11141Sopensolaris@drydog.com#endif	/* lint || __lint */
338