xref: /onnv-gate/usr/src/lib/libc/amd64/gen/memset.s (revision 10024:2213a466547f)
10Sstevel@tonic-gate/*
26320Sbholler * CDDL HEADER START
36320Sbholler *
46320Sbholler * The contents of this file are subject to the terms of the
56320Sbholler * Common Development and Distribution License (the "License").
66320Sbholler * You may not use this file except in compliance with the License.
76320Sbholler *
86320Sbholler * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
96320Sbholler * or http://www.opensolaris.org/os/licensing.
106320Sbholler * See the License for the specific language governing permissions
116320Sbholler * and limitations under the License.
126320Sbholler *
136320Sbholler * When distributing Covered Code, include this CDDL HEADER in each
146320Sbholler * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
156320Sbholler * If applicable, add the following below this CDDL HEADER, with the
166320Sbholler * fields enclosed by brackets "[]" replaced with your own identifying
176320Sbholler * information: Portions Copyright [yyyy] [name of copyright owner]
186320Sbholler *
196320Sbholler * CDDL HEADER END
200Sstevel@tonic-gate */
210Sstevel@tonic-gate
220Sstevel@tonic-gate/*
23*10024Sbostrovs * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
246812Sraf * Use is subject to license terms.
256812Sraf */
266812Sraf
276812Sraf/*
286320Sbholler * Copyright (c) 2008, Intel Corporation
290Sstevel@tonic-gate * All rights reserved.
300Sstevel@tonic-gate */
310Sstevel@tonic-gate
32*10024Sbostrovs/*
33*10024Sbostrovs * Portions Copyright 2009 Advanced Micro Devices, Inc.
34*10024Sbostrovs */
35*10024Sbostrovs
367298SMark.J.Nelson@Sun.COM	.file	"memset.s"
370Sstevel@tonic-gate
380Sstevel@tonic-gate#include <sys/asm_linkage.h>
390Sstevel@tonic-gate
406812Sraf	ANSI_PRAGMA_WEAK(memset,function)
410Sstevel@tonic-gate
420Sstevel@tonic-gate#include "cache.h"
436320Sbholler#include "proc64_id.h"
440Sstevel@tonic-gate
456320Sbholler#define L(s) .memset/**/s
460Sstevel@tonic-gate
476320Sbholler/*
486320Sbholler * memset algorithm overview:
496320Sbholler *
506320Sbholler * Thresholds used below were determined experimentally.
516320Sbholler *
526320Sbholler * Pseudo code:
536320Sbholler *
54*10024Sbostrovs * NOTE: On AMD NO_SSE is always set.  Performance on Opteron did not improve
55*10024Sbostrovs * using 16-byte stores.  Setting NO_SSE on AMD should be re-evaluated on
56*10024Sbostrovs * future AMD processors.
57*10024Sbostrovs *
58*10024Sbostrovs *
596320Sbholler * If (size <= 144 bytes) {
606320Sbholler *	do unrolled code (primarily 8-byte stores) regardless of alignment.
616320Sbholler * } else {
626320Sbholler *	Align destination to 16-byte boundary
636320Sbholler *
646320Sbholler *      if (NO_SSE) {
656320Sbholler *		If (size > largest level cache) {
666320Sbholler *			Use 8-byte non-temporal stores (64-bytes/loop)
676320Sbholler *		} else {
686320Sbholler *			if (size >= 2K) {
696320Sbholler *				Use rep sstoq
706320Sbholler *			} else {
716320Sbholler *				Use 8-byte stores (128 bytes per loop)
726320Sbholler *			}
736320Sbholler *		}
746320Sbholler *
756320Sbholler *	} else { **USE SSE**
766320Sbholler *		If (size <= 192 bytes) {
776320Sbholler *			do unrolled code using primarily 16-byte stores (SSE2)
786320Sbholler *		} else {
796320Sbholler *			If (size > largest level cache) {
806320Sbholler *				Use 16-byte non-temporal stores (128-bytes/loop)
816320Sbholler *			} else {
826320Sbholler *				Use 16-byte stores (128 bytes per loop)
836320Sbholler *			}
846320Sbholler *		}
856320Sbholler *	}
866320Sbholler *
876320Sbholler *	Finish any remaining bytes via unrolled code above.
886320Sbholler * }
896320Sbholler */
900Sstevel@tonic-gate
916320Sbholler		ENTRY(memset)		# (void *, const void*, size_t)
926320Sbholler		cmp    $0x1,%rdx
936320Sbholler		mov    %rdi,%rax	# memset returns the dest address
946320Sbholler		jne    L(ck2)
956320Sbholler		mov    %sil,(%rdi)
966320Sbholler		ret
976320SbhollerL(ck2):
986320Sbholler		mov    $0x0101010101010101,%r9
996320Sbholler		mov    %rdx,%r8
1006320Sbholler		movzbq %sil,%rdx
1016320Sbholler		imul   %r9,%rdx		# clone value 8 times
1026320Sbholler
1036320Sbholler		cmp    $0x90,%r8	# 144
1046320Sbholler		jge    L(ck_align)
1056320Sbholler
1066320Sbholler		lea    L(setPxQx)(%rip),%r11
1076320Sbholler		add    %r8,%rdi
1080Sstevel@tonic-gate
1096320Sbholler		movslq (%r11,%r8,4),%rcx
1106320Sbholler		lea    (%rcx,%r11,1),%r11
1116320Sbholler		jmpq   *%r11
1126320Sbholler
1136320Sbholler		.balign 16
1146320SbhollerL(setPxQx):	.int       L(P0Q0)-L(setPxQx)
1156320Sbholler		.int       L(P1Q0)-L(setPxQx)
1166320Sbholler		.int       L(P2Q0)-L(setPxQx)
1176320Sbholler		.int       L(P3Q0)-L(setPxQx)
1186320Sbholler		.int       L(P4Q0)-L(setPxQx)
1196320Sbholler		.int       L(P5Q0)-L(setPxQx)
1206320Sbholler		.int       L(P6Q0)-L(setPxQx)
1216320Sbholler		.int       L(P7Q0)-L(setPxQx)
1220Sstevel@tonic-gate
1236320Sbholler		.int       L(P0Q1)-L(setPxQx)
1246320Sbholler		.int       L(P1Q1)-L(setPxQx)
1256320Sbholler		.int       L(P2Q1)-L(setPxQx)
1266320Sbholler		.int       L(P3Q1)-L(setPxQx)
1276320Sbholler		.int       L(P4Q1)-L(setPxQx)
1286320Sbholler		.int       L(P5Q1)-L(setPxQx)
1296320Sbholler		.int       L(P6Q1)-L(setPxQx)
1306320Sbholler		.int       L(P7Q1)-L(setPxQx)
1316320Sbholler
1326320Sbholler		.int       L(P0Q2)-L(setPxQx)
1336320Sbholler		.int       L(P1Q2)-L(setPxQx)
1346320Sbholler		.int       L(P2Q2)-L(setPxQx)
1356320Sbholler		.int       L(P3Q2)-L(setPxQx)
1366320Sbholler		.int       L(P4Q2)-L(setPxQx)
1376320Sbholler		.int       L(P5Q2)-L(setPxQx)
1386320Sbholler		.int       L(P6Q2)-L(setPxQx)
1396320Sbholler		.int       L(P7Q2)-L(setPxQx)
1400Sstevel@tonic-gate
1416320Sbholler		.int       L(P0Q3)-L(setPxQx)
1426320Sbholler		.int       L(P1Q3)-L(setPxQx)
1436320Sbholler		.int       L(P2Q3)-L(setPxQx)
1446320Sbholler		.int       L(P3Q3)-L(setPxQx)
1456320Sbholler		.int       L(P4Q3)-L(setPxQx)
1466320Sbholler		.int       L(P5Q3)-L(setPxQx)
1476320Sbholler		.int       L(P6Q3)-L(setPxQx)
1486320Sbholler		.int       L(P7Q3)-L(setPxQx)
1496320Sbholler
1506320Sbholler		.int       L(P0Q4)-L(setPxQx)
1516320Sbholler		.int       L(P1Q4)-L(setPxQx)
1526320Sbholler		.int       L(P2Q4)-L(setPxQx)
1536320Sbholler		.int       L(P3Q4)-L(setPxQx)
1546320Sbholler		.int       L(P4Q4)-L(setPxQx)
1556320Sbholler		.int       L(P5Q4)-L(setPxQx)
1566320Sbholler		.int       L(P6Q4)-L(setPxQx)
1576320Sbholler		.int       L(P7Q4)-L(setPxQx)
1580Sstevel@tonic-gate
1596320Sbholler		.int       L(P0Q5)-L(setPxQx)
1606320Sbholler		.int       L(P1Q5)-L(setPxQx)
1616320Sbholler		.int       L(P2Q5)-L(setPxQx)
1626320Sbholler		.int       L(P3Q5)-L(setPxQx)
1636320Sbholler		.int       L(P4Q5)-L(setPxQx)
1646320Sbholler		.int       L(P5Q5)-L(setPxQx)
1656320Sbholler		.int       L(P6Q5)-L(setPxQx)
1666320Sbholler		.int       L(P7Q5)-L(setPxQx)
1670Sstevel@tonic-gate
1686320Sbholler		.int       L(P0Q6)-L(setPxQx)
1696320Sbholler		.int       L(P1Q6)-L(setPxQx)
1706320Sbholler		.int       L(P2Q6)-L(setPxQx)
1716320Sbholler		.int       L(P3Q6)-L(setPxQx)
1726320Sbholler		.int       L(P4Q6)-L(setPxQx)
1736320Sbholler		.int       L(P5Q6)-L(setPxQx)
1746320Sbholler		.int       L(P6Q6)-L(setPxQx)
1756320Sbholler		.int       L(P7Q6)-L(setPxQx)
1760Sstevel@tonic-gate
1776320Sbholler		.int       L(P0Q7)-L(setPxQx)
1786320Sbholler		.int       L(P1Q7)-L(setPxQx)
1796320Sbholler		.int       L(P2Q7)-L(setPxQx)
1806320Sbholler		.int       L(P3Q7)-L(setPxQx)
1816320Sbholler		.int       L(P4Q7)-L(setPxQx)
1826320Sbholler		.int       L(P5Q7)-L(setPxQx)
1836320Sbholler		.int       L(P6Q7)-L(setPxQx)
1846320Sbholler		.int       L(P7Q7)-L(setPxQx)
1850Sstevel@tonic-gate
1866320Sbholler		.int       L(P0Q8)-L(setPxQx)
1876320Sbholler		.int       L(P1Q8)-L(setPxQx)
1886320Sbholler		.int       L(P2Q8)-L(setPxQx)
1896320Sbholler		.int       L(P3Q8)-L(setPxQx)
1906320Sbholler		.int       L(P4Q8)-L(setPxQx)
1916320Sbholler		.int       L(P5Q8)-L(setPxQx)
1926320Sbholler		.int       L(P6Q8)-L(setPxQx)
1936320Sbholler		.int       L(P7Q8)-L(setPxQx)
1946320Sbholler
1956320Sbholler		.int       L(P0Q9)-L(setPxQx)
1966320Sbholler		.int       L(P1Q9)-L(setPxQx)
1976320Sbholler		.int       L(P2Q9)-L(setPxQx)
1986320Sbholler		.int       L(P3Q9)-L(setPxQx)
1996320Sbholler		.int       L(P4Q9)-L(setPxQx)
2006320Sbholler		.int       L(P5Q9)-L(setPxQx)
2016320Sbholler		.int       L(P6Q9)-L(setPxQx)
2026320Sbholler		.int       L(P7Q9)-L(setPxQx)
2030Sstevel@tonic-gate
2046320Sbholler		.int       L(P0QA)-L(setPxQx)
2056320Sbholler		.int       L(P1QA)-L(setPxQx)
2066320Sbholler		.int       L(P2QA)-L(setPxQx)
2076320Sbholler		.int       L(P3QA)-L(setPxQx)
2086320Sbholler		.int       L(P4QA)-L(setPxQx)
2096320Sbholler		.int       L(P5QA)-L(setPxQx)
2106320Sbholler		.int       L(P6QA)-L(setPxQx)
2116320Sbholler		.int       L(P7QA)-L(setPxQx)
2126320Sbholler
2136320Sbholler		.int       L(P0QB)-L(setPxQx)
2146320Sbholler		.int       L(P1QB)-L(setPxQx)
2156320Sbholler		.int       L(P2QB)-L(setPxQx)
2166320Sbholler		.int       L(P3QB)-L(setPxQx)
2176320Sbholler		.int       L(P4QB)-L(setPxQx)
2186320Sbholler		.int       L(P5QB)-L(setPxQx)
2196320Sbholler		.int       L(P6QB)-L(setPxQx)
2206320Sbholler		.int       L(P7QB)-L(setPxQx)
2210Sstevel@tonic-gate
2226320Sbholler		.int       L(P0QC)-L(setPxQx)
2236320Sbholler		.int       L(P1QC)-L(setPxQx)
2246320Sbholler		.int       L(P2QC)-L(setPxQx)
2256320Sbholler		.int       L(P3QC)-L(setPxQx)
2266320Sbholler		.int       L(P4QC)-L(setPxQx)
2276320Sbholler		.int       L(P5QC)-L(setPxQx)
2286320Sbholler		.int       L(P6QC)-L(setPxQx)
2296320Sbholler		.int       L(P7QC)-L(setPxQx)
2306320Sbholler
2316320Sbholler		.int       L(P0QD)-L(setPxQx)
2326320Sbholler		.int       L(P1QD)-L(setPxQx)
2336320Sbholler		.int       L(P2QD)-L(setPxQx)
2346320Sbholler		.int       L(P3QD)-L(setPxQx)
2356320Sbholler		.int       L(P4QD)-L(setPxQx)
2366320Sbholler		.int       L(P5QD)-L(setPxQx)
2376320Sbholler		.int       L(P6QD)-L(setPxQx)
2386320Sbholler		.int       L(P7QD)-L(setPxQx)
2390Sstevel@tonic-gate
2406320Sbholler		.int       L(P0QE)-L(setPxQx)	# 112
2416320Sbholler		.int       L(P1QE)-L(setPxQx)
2426320Sbholler		.int       L(P2QE)-L(setPxQx)
2436320Sbholler		.int       L(P3QE)-L(setPxQx)
2446320Sbholler		.int       L(P4QE)-L(setPxQx)
2456320Sbholler		.int       L(P5QE)-L(setPxQx)
2466320Sbholler		.int       L(P6QE)-L(setPxQx)
2476320Sbholler		.int       L(P7QE)-L(setPxQx)
2486320Sbholler
2496320Sbholler		.int       L(P0QF)-L(setPxQx)	#120
2506320Sbholler		.int       L(P1QF)-L(setPxQx)
2516320Sbholler		.int       L(P2QF)-L(setPxQx)
2526320Sbholler		.int       L(P3QF)-L(setPxQx)
2536320Sbholler		.int       L(P4QF)-L(setPxQx)
2546320Sbholler		.int       L(P5QF)-L(setPxQx)
2556320Sbholler		.int       L(P6QF)-L(setPxQx)
2566320Sbholler		.int       L(P7QF)-L(setPxQx)
2570Sstevel@tonic-gate
2586320Sbholler		.int       L(P0QG)-L(setPxQx)	#128
2596320Sbholler		.int       L(P1QG)-L(setPxQx)
2606320Sbholler		.int       L(P2QG)-L(setPxQx)
2616320Sbholler		.int       L(P3QG)-L(setPxQx)
2626320Sbholler		.int       L(P4QG)-L(setPxQx)
2636320Sbholler		.int       L(P5QG)-L(setPxQx)
2646320Sbholler		.int       L(P6QG)-L(setPxQx)
2656320Sbholler		.int       L(P7QG)-L(setPxQx)
2660Sstevel@tonic-gate
2676320Sbholler		.int       L(P0QH)-L(setPxQx)	#136
2686320Sbholler		.int       L(P1QH)-L(setPxQx)
2696320Sbholler		.int       L(P2QH)-L(setPxQx)
2706320Sbholler		.int       L(P3QH)-L(setPxQx)
2716320Sbholler		.int       L(P4QH)-L(setPxQx)
2726320Sbholler		.int       L(P5QH)-L(setPxQx)
2736320Sbholler		.int       L(P6QH)-L(setPxQx)
2746320Sbholler		.int       L(P7QH)-L(setPxQx)	#143
2750Sstevel@tonic-gate
2766320Sbholler		.balign 16
2776320SbhollerL(P1QH):	mov    %rdx,-0x89(%rdi)
2786320SbhollerL(P1QG):	mov    %rdx,-0x81(%rdi)
2796320Sbholler		.balign 16
2806320SbhollerL(P1QF):	mov    %rdx,-0x79(%rdi)
2816320SbhollerL(P1QE):	mov    %rdx,-0x71(%rdi)
2826320SbhollerL(P1QD):	mov    %rdx,-0x69(%rdi)
2836320SbhollerL(P1QC):	mov    %rdx,-0x61(%rdi)
2846320SbhollerL(P1QB):	mov    %rdx,-0x59(%rdi)
2856320SbhollerL(P1QA):	mov    %rdx,-0x51(%rdi)
2866320SbhollerL(P1Q9):	mov    %rdx,-0x49(%rdi)
2876320SbhollerL(P1Q8):	mov    %rdx,-0x41(%rdi)
2886320SbhollerL(P1Q7):	mov    %rdx,-0x39(%rdi)
2896320SbhollerL(P1Q6):	mov    %rdx,-0x31(%rdi)
2906320SbhollerL(P1Q5):	mov    %rdx,-0x29(%rdi)
2916320SbhollerL(P1Q4):	mov    %rdx,-0x21(%rdi)
2926320SbhollerL(P1Q3):	mov    %rdx,-0x19(%rdi)
2936320SbhollerL(P1Q2):	mov    %rdx,-0x11(%rdi)
2946320SbhollerL(P1Q1):	mov    %rdx,-0x9(%rdi)
2956320SbhollerL(P1Q0):	mov    %dl,-0x1(%rdi)
2966320Sbholler		ret
2970Sstevel@tonic-gate
2986320Sbholler		.balign 16
2996320SbhollerL(P0QH):	mov    %rdx,-0x88(%rdi)
3006320Sbholler		.balign 16
3016320SbhollerL(P0QG):	mov    %rdx,-0x80(%rdi)
3026320SbhollerL(P0QF):	mov    %rdx,-0x78(%rdi)
3036320SbhollerL(P0QE):	mov    %rdx,-0x70(%rdi)
3046320SbhollerL(P0QD):	mov    %rdx,-0x68(%rdi)
3056320SbhollerL(P0QC):	mov    %rdx,-0x60(%rdi)
3066320SbhollerL(P0QB):	mov    %rdx,-0x58(%rdi)
3076320SbhollerL(P0QA):	mov    %rdx,-0x50(%rdi)
3086320SbhollerL(P0Q9):	mov    %rdx,-0x48(%rdi)
3096320SbhollerL(P0Q8):	mov    %rdx,-0x40(%rdi)
3106320SbhollerL(P0Q7):	mov    %rdx,-0x38(%rdi)
3116320SbhollerL(P0Q6):	mov    %rdx,-0x30(%rdi)
3126320SbhollerL(P0Q5):	mov    %rdx,-0x28(%rdi)
3136320SbhollerL(P0Q4):	mov    %rdx,-0x20(%rdi)
3146320SbhollerL(P0Q3):	mov    %rdx,-0x18(%rdi)
3156320SbhollerL(P0Q2):	mov    %rdx,-0x10(%rdi)
3166320SbhollerL(P0Q1):	mov    %rdx,-0x8(%rdi)
3176320SbhollerL(P0Q0):	ret
3180Sstevel@tonic-gate
3196320Sbholler		.balign 16
3206320SbhollerL(P2QH):	mov    %rdx,-0x8a(%rdi)
3216320SbhollerL(P2QG):	mov    %rdx,-0x82(%rdi)
3226320Sbholler		.balign 16
3236320SbhollerL(P2QF):	mov    %rdx,-0x7a(%rdi)
3246320SbhollerL(P2QE):	mov    %rdx,-0x72(%rdi)
3256320SbhollerL(P2QD):	mov    %rdx,-0x6a(%rdi)
3266320SbhollerL(P2QC):	mov    %rdx,-0x62(%rdi)
3276320SbhollerL(P2QB):	mov    %rdx,-0x5a(%rdi)
3286320SbhollerL(P2QA):	mov    %rdx,-0x52(%rdi)
3296320SbhollerL(P2Q9):	mov    %rdx,-0x4a(%rdi)
3306320SbhollerL(P2Q8):	mov    %rdx,-0x42(%rdi)
3316320SbhollerL(P2Q7):	mov    %rdx,-0x3a(%rdi)
3326320SbhollerL(P2Q6):	mov    %rdx,-0x32(%rdi)
3336320SbhollerL(P2Q5):	mov    %rdx,-0x2a(%rdi)
3346320SbhollerL(P2Q4):	mov    %rdx,-0x22(%rdi)
3356320SbhollerL(P2Q3):	mov    %rdx,-0x1a(%rdi)
3366320SbhollerL(P2Q2):	mov    %rdx,-0x12(%rdi)
3376320SbhollerL(P2Q1):	mov    %rdx,-0xa(%rdi)
3386320SbhollerL(P2Q0):	mov    %dx,-0x2(%rdi)
3396320Sbholler		ret
3400Sstevel@tonic-gate
3416320Sbholler		.balign 16
3426320SbhollerL(P3QH):	mov    %rdx,-0x8b(%rdi)
3436320SbhollerL(P3QG):	mov    %rdx,-0x83(%rdi)
3446320Sbholler		.balign 16
3456320SbhollerL(P3QF):	mov    %rdx,-0x7b(%rdi)
3466320SbhollerL(P3QE):	mov    %rdx,-0x73(%rdi)
3476320SbhollerL(P3QD):	mov    %rdx,-0x6b(%rdi)
3486320SbhollerL(P3QC):	mov    %rdx,-0x63(%rdi)
3496320SbhollerL(P3QB):	mov    %rdx,-0x5b(%rdi)
3506320SbhollerL(P3QA):	mov    %rdx,-0x53(%rdi)
3516320SbhollerL(P3Q9):	mov    %rdx,-0x4b(%rdi)
3526320SbhollerL(P3Q8):	mov    %rdx,-0x43(%rdi)
3536320SbhollerL(P3Q7):	mov    %rdx,-0x3b(%rdi)
3546320SbhollerL(P3Q6):	mov    %rdx,-0x33(%rdi)
3556320SbhollerL(P3Q5):	mov    %rdx,-0x2b(%rdi)
3566320SbhollerL(P3Q4):	mov    %rdx,-0x23(%rdi)
3576320SbhollerL(P3Q3):	mov    %rdx,-0x1b(%rdi)
3586320SbhollerL(P3Q2):	mov    %rdx,-0x13(%rdi)
3596320SbhollerL(P3Q1):	mov    %rdx,-0xb(%rdi)
3606320SbhollerL(P3Q0):	mov    %dx,-0x3(%rdi)
3616320Sbholler		mov    %dl,-0x1(%rdi)
3626320Sbholler		ret
3630Sstevel@tonic-gate
3646320Sbholler		.balign 16
3656320SbhollerL(P4QH):	mov    %rdx,-0x8c(%rdi)
3666320SbhollerL(P4QG):	mov    %rdx,-0x84(%rdi)
3676320Sbholler		.balign 16
3686320SbhollerL(P4QF):	mov    %rdx,-0x7c(%rdi)
3696320SbhollerL(P4QE):	mov    %rdx,-0x74(%rdi)
3706320SbhollerL(P4QD):	mov    %rdx,-0x6c(%rdi)
3716320SbhollerL(P4QC):	mov    %rdx,-0x64(%rdi)
3726320SbhollerL(P4QB):	mov    %rdx,-0x5c(%rdi)
3736320SbhollerL(P4QA):	mov    %rdx,-0x54(%rdi)
3746320SbhollerL(P4Q9):	mov    %rdx,-0x4c(%rdi)
3756320SbhollerL(P4Q8):	mov    %rdx,-0x44(%rdi)
3766320SbhollerL(P4Q7):	mov    %rdx,-0x3c(%rdi)
3776320SbhollerL(P4Q6):	mov    %rdx,-0x34(%rdi)
3786320SbhollerL(P4Q5):	mov    %rdx,-0x2c(%rdi)
3796320SbhollerL(P4Q4):	mov    %rdx,-0x24(%rdi)
3806320SbhollerL(P4Q3):	mov    %rdx,-0x1c(%rdi)
3816320SbhollerL(P4Q2):	mov    %rdx,-0x14(%rdi)
3826320SbhollerL(P4Q1):	mov    %rdx,-0xc(%rdi)
3836320SbhollerL(P4Q0):	mov    %edx,-0x4(%rdi)
3846320Sbholler		ret
3850Sstevel@tonic-gate
3866320Sbholler		.balign 16
3876320SbhollerL(P5QH):	mov    %rdx,-0x8d(%rdi)
3886320SbhollerL(P5QG):	mov    %rdx,-0x85(%rdi)
3896320Sbholler		.balign 16
3906320SbhollerL(P5QF):	mov    %rdx,-0x7d(%rdi)
3916320SbhollerL(P5QE):	mov    %rdx,-0x75(%rdi)
3926320SbhollerL(P5QD):	mov    %rdx,-0x6d(%rdi)
3936320SbhollerL(P5QC):	mov    %rdx,-0x65(%rdi)
3946320SbhollerL(P5QB):	mov    %rdx,-0x5d(%rdi)
3956320SbhollerL(P5QA):	mov    %rdx,-0x55(%rdi)
3966320SbhollerL(P5Q9):	mov    %rdx,-0x4d(%rdi)
3976320SbhollerL(P5Q8):	mov    %rdx,-0x45(%rdi)
3986320SbhollerL(P5Q7):	mov    %rdx,-0x3d(%rdi)
3996320SbhollerL(P5Q6):	mov    %rdx,-0x35(%rdi)
4006320SbhollerL(P5Q5):	mov    %rdx,-0x2d(%rdi)
4016320SbhollerL(P5Q4):	mov    %rdx,-0x25(%rdi)
4026320SbhollerL(P5Q3):	mov    %rdx,-0x1d(%rdi)
4036320SbhollerL(P5Q2):	mov    %rdx,-0x15(%rdi)
4046320SbhollerL(P5Q1):	mov    %rdx,-0xd(%rdi)
4056320SbhollerL(P5Q0):	mov    %edx,-0x5(%rdi)
4066320Sbholler		mov    %dl,-0x1(%rdi)
4076320Sbholler		ret
4080Sstevel@tonic-gate
4096320Sbholler		.balign 16
4106320SbhollerL(P6QH):	mov    %rdx,-0x8e(%rdi)
4116320SbhollerL(P6QG):	mov    %rdx,-0x86(%rdi)
4126320Sbholler		.balign 16
4136320SbhollerL(P6QF):	mov    %rdx,-0x7e(%rdi)
4146320SbhollerL(P6QE):	mov    %rdx,-0x76(%rdi)
4156320SbhollerL(P6QD):	mov    %rdx,-0x6e(%rdi)
4166320SbhollerL(P6QC):	mov    %rdx,-0x66(%rdi)
4176320SbhollerL(P6QB):	mov    %rdx,-0x5e(%rdi)
4186320SbhollerL(P6QA):	mov    %rdx,-0x56(%rdi)
4196320SbhollerL(P6Q9):	mov    %rdx,-0x4e(%rdi)
4206320SbhollerL(P6Q8):	mov    %rdx,-0x46(%rdi)
4216320SbhollerL(P6Q7):	mov    %rdx,-0x3e(%rdi)
4226320SbhollerL(P6Q6):	mov    %rdx,-0x36(%rdi)
4236320SbhollerL(P6Q5):	mov    %rdx,-0x2e(%rdi)
4246320SbhollerL(P6Q4):	mov    %rdx,-0x26(%rdi)
4256320SbhollerL(P6Q3):	mov    %rdx,-0x1e(%rdi)
4266320SbhollerL(P6Q2):	mov    %rdx,-0x16(%rdi)
4276320SbhollerL(P6Q1):	mov    %rdx,-0xe(%rdi)
4286320SbhollerL(P6Q0):	mov    %edx,-0x6(%rdi)
4296320Sbholler		mov    %dx,-0x2(%rdi)
4306320Sbholler		ret
4310Sstevel@tonic-gate
4326320Sbholler		.balign 16
4336320SbhollerL(P7QH):	mov    %rdx,-0x8f(%rdi)
4346320SbhollerL(P7QG):	mov    %rdx,-0x87(%rdi)
4356320Sbholler		.balign 16
4366320SbhollerL(P7QF):	mov    %rdx,-0x7f(%rdi)
4376320SbhollerL(P7QE):	mov    %rdx,-0x77(%rdi)
4386320SbhollerL(P7QD):	mov    %rdx,-0x6f(%rdi)
4396320SbhollerL(P7QC):	mov    %rdx,-0x67(%rdi)
4406320SbhollerL(P7QB):	mov    %rdx,-0x5f(%rdi)
4416320SbhollerL(P7QA):	mov    %rdx,-0x57(%rdi)
4426320SbhollerL(P7Q9):	mov    %rdx,-0x4f(%rdi)
4436320SbhollerL(P7Q8):	mov    %rdx,-0x47(%rdi)
4446320SbhollerL(P7Q7):	mov    %rdx,-0x3f(%rdi)
4456320SbhollerL(P7Q6):	mov    %rdx,-0x37(%rdi)
4466320SbhollerL(P7Q5):	mov    %rdx,-0x2f(%rdi)
4476320SbhollerL(P7Q4):	mov    %rdx,-0x27(%rdi)
4486320SbhollerL(P7Q3):	mov    %rdx,-0x1f(%rdi)
4496320SbhollerL(P7Q2):	mov    %rdx,-0x17(%rdi)
4506320SbhollerL(P7Q1):	mov    %rdx,-0xf(%rdi)
4516320SbhollerL(P7Q0):	mov    %edx,-0x7(%rdi)
4526320Sbholler		mov    %dx,-0x3(%rdi)
4536320Sbholler		mov    %dl,-0x1(%rdi)
4546320Sbholler		ret
4550Sstevel@tonic-gate
4566320Sbholler		.balign 16
4576320SbhollerL(ck_align):
4586320Sbholler		/*
4596320Sbholler		 * Align to 16 byte boundary first
4606320Sbholler		 */
4616320Sbholler	 	lea    L(AliPxQx)(%rip),%r11
4626320Sbholler	 	mov    $0x10,%r10
4636320Sbholler	 	mov    %rdi,%r9
4646320Sbholler	 	and    $0xf,%r9
4656320Sbholler	 	sub    %r9,%r10
4666320Sbholler	 	and    $0xf,%r10
4676320Sbholler	 	add    %r10,%rdi
4686320Sbholler	 	sub    %r10,%r8
4690Sstevel@tonic-gate
4706320Sbholler		movslq (%r11,%r10,4),%rcx
4716320Sbholler		lea    (%rcx,%r11,1),%r11
4726320Sbholler		jmpq   *%r11			# align dest to 16-byte boundary
4730Sstevel@tonic-gate
4746320Sbholler		.balign 16
4756320SbhollerL(AliPxQx):	.int	L(aligned_now)-L(AliPxQx)
4766320Sbholler		.int	L(A1Q0)-L(AliPxQx)
4776320Sbholler		.int	L(A2Q0)-L(AliPxQx)
4786320Sbholler		.int	L(A3Q0)-L(AliPxQx)
4796320Sbholler		.int	L(A4Q0)-L(AliPxQx)
4806320Sbholler		.int	L(A5Q0)-L(AliPxQx)
4816320Sbholler		.int	L(A6Q0)-L(AliPxQx)
4826320Sbholler		.int	L(A7Q0)-L(AliPxQx)
4830Sstevel@tonic-gate
4846320Sbholler		.int	L(A0Q1)-L(AliPxQx)
4856320Sbholler		.int	L(A1Q1)-L(AliPxQx)
4866320Sbholler		.int	L(A2Q1)-L(AliPxQx)
4876320Sbholler		.int	L(A3Q1)-L(AliPxQx)
4886320Sbholler		.int	L(A4Q1)-L(AliPxQx)
4896320Sbholler		.int	L(A5Q1)-L(AliPxQx)
4906320Sbholler		.int	L(A6Q1)-L(AliPxQx)
4916320Sbholler		.int	L(A7Q1)-L(AliPxQx)
4926320Sbholler
4936320Sbholler		.balign 16
4946320SbhollerL(A5Q1):	mov    %dl,-0xd(%rdi)
4956320SbhollerL(A4Q1):	mov    %edx,-0xc(%rdi)
4966320SbhollerL(A0Q1):	mov    %rdx,-0x8(%rdi)
4976320Sbholler		jmp     L(aligned_now)
4986320Sbholler
4996320Sbholler		.balign 16
5006320SbhollerL(A1Q1):	mov    %dl,-0x9(%rdi)
5016320Sbholler		mov    %rdx,-0x8(%rdi)
5026320Sbholler		jmp    L(aligned_now)
5030Sstevel@tonic-gate
5046320Sbholler		.balign 16
5056320SbhollerL(A1Q0):	mov    %dl,-0x1(%rdi)
5066320Sbholler		jmp    L(aligned_now)
5076320Sbholler
5086320Sbholler		.balign 16
5096320SbhollerL(A3Q1):	mov    %dl,-0xb(%rdi)
5106320SbhollerL(A2Q1):	mov    %dx,-0xa(%rdi)
5116320Sbholler		mov    %rdx,-0x8(%rdi)
5126320Sbholler		jmp    L(aligned_now)
5130Sstevel@tonic-gate
5146320Sbholler		.balign 16
5156320SbhollerL(A3Q0):	mov    %dl,-0x3(%rdi)
5166320SbhollerL(A2Q0):	mov    %dx,-0x2(%rdi)
5176320Sbholler		jmp    L(aligned_now)
5186320Sbholler
5196320Sbholler		.balign 16
5206320SbhollerL(A5Q0):	mov    %dl,-0x5(%rdi)
5216320SbhollerL(A4Q0):	mov    %edx,-0x4(%rdi)
5226320Sbholler		jmp    L(aligned_now)
5230Sstevel@tonic-gate
5246320Sbholler		.balign 16
5256320SbhollerL(A7Q1):	mov    %dl,-0xf(%rdi)
5266320SbhollerL(A6Q1):	mov    %dx,-0xe(%rdi)
5276320Sbholler		mov    %edx,-0xc(%rdi)
5286320Sbholler		mov    %rdx,-0x8(%rdi)
5296320Sbholler		jmp    L(aligned_now)
5306320Sbholler
5316320Sbholler		.balign 16
5326320SbhollerL(A7Q0):	mov    %dl,-0x7(%rdi)
5336320SbhollerL(A6Q0):	mov    %dx,-0x6(%rdi)
5346320Sbholler		mov    %edx,-0x4(%rdi)
5356320Sbholler		#jmp    L(aligned_now)		# Fall thru...
5360Sstevel@tonic-gate
5376320Sbholler		.balign 16
5386320SbhollerL(aligned_now):
5396320Sbholler		/*
5406320Sbholler		 * Check memops method
5416320Sbholler		 */
5426320Sbholler		cmpl   $NO_SSE,.memops_method(%rip)
5436320Sbholler		je     L(Loop8byte_pre)
5440Sstevel@tonic-gate
5456320Sbholler		/*
5466320Sbholler		 * Use SSE2 instructions
5476320Sbholler		 */
5486320Sbholler	 	movd   %rdx,%xmm0
5496320Sbholler		lea    L(SSExDx)(%rip),%r9	# after dest alignment
5506320Sbholler	 	punpcklqdq %xmm0,%xmm0		# fill RegXMM0 with the pattern
5516320Sbholler		cmp    $0xc0,%r8		# 192
5526320Sbholler		jge    L(byte32sse2_pre)
5536320Sbholler
5546320Sbholler		add    %r8,%rdi
5556320Sbholler
5566320Sbholler		movslq (%r9,%r8,4),%rcx
5576320Sbholler		lea    (%rcx,%r9,1),%r9
5586320Sbholler		jmpq   *%r9
5590Sstevel@tonic-gate
5606320Sbholler		.balign 16
5616320SbhollerL(SSE0QB):	movdqa %xmm0,-0xb0(%rdi)
5626320SbhollerL(SSE0QA):	movdqa %xmm0,-0xa0(%rdi)
5636320SbhollerL(SSE0Q9):	movdqa %xmm0,-0x90(%rdi)
5646320SbhollerL(SSE0Q8):	movdqa %xmm0,-0x80(%rdi)
5656320SbhollerL(SSE0Q7):	movdqa %xmm0,-0x70(%rdi)
5666320SbhollerL(SSE0Q6):	movdqa %xmm0,-0x60(%rdi)
5676320SbhollerL(SSE0Q5):	movdqa %xmm0,-0x50(%rdi)
5686320SbhollerL(SSE0Q4):	movdqa %xmm0,-0x40(%rdi)
5696320SbhollerL(SSE0Q3):	movdqa %xmm0,-0x30(%rdi)
5706320SbhollerL(SSE0Q2):	movdqa %xmm0,-0x20(%rdi)
5716320SbhollerL(SSE0Q1):	movdqa %xmm0,-0x10(%rdi)
5726320SbhollerL(SSE0Q0):	ret
5730Sstevel@tonic-gate
5746320Sbholler		.balign 16
5756320SbhollerL(SSE1QB):	movdqa %xmm0,-0xb1(%rdi)
5766320SbhollerL(SSE1QA):	movdqa %xmm0,-0xa1(%rdi)
5776320SbhollerL(SSE1Q9):	movdqa %xmm0,-0x91(%rdi)
5786320SbhollerL(SSE1Q8):	movdqa %xmm0,-0x81(%rdi)
5796320SbhollerL(SSE1Q7):	movdqa %xmm0,-0x71(%rdi)
5806320SbhollerL(SSE1Q6):	movdqa %xmm0,-0x61(%rdi)
5816320SbhollerL(SSE1Q5):	movdqa %xmm0,-0x51(%rdi)
5826320SbhollerL(SSE1Q4):	movdqa %xmm0,-0x41(%rdi)
5836320SbhollerL(SSE1Q3):	movdqa %xmm0,-0x31(%rdi)
5846320SbhollerL(SSE1Q2):	movdqa %xmm0,-0x21(%rdi)
5856320SbhollerL(SSE1Q1):	movdqa %xmm0,-0x11(%rdi)
5866320SbhollerL(SSE1Q0):	mov    %dl,-0x1(%rdi)
5876320Sbholler		ret
5880Sstevel@tonic-gate
5896320Sbholler		.balign 16
5906320SbhollerL(SSE2QB):	movdqa %xmm0,-0xb2(%rdi)
5916320SbhollerL(SSE2QA):	movdqa %xmm0,-0xa2(%rdi)
5926320SbhollerL(SSE2Q9):	movdqa %xmm0,-0x92(%rdi)
5936320SbhollerL(SSE2Q8):	movdqa %xmm0,-0x82(%rdi)
5946320SbhollerL(SSE2Q7):	movdqa %xmm0,-0x72(%rdi)
5956320SbhollerL(SSE2Q6):	movdqa %xmm0,-0x62(%rdi)
5966320SbhollerL(SSE2Q5):	movdqa %xmm0,-0x52(%rdi)
5976320SbhollerL(SSE2Q4):	movdqa %xmm0,-0x42(%rdi)
5986320SbhollerL(SSE2Q3):	movdqa %xmm0,-0x32(%rdi)
5996320SbhollerL(SSE2Q2):	movdqa %xmm0,-0x22(%rdi)
6006320SbhollerL(SSE2Q1):	movdqa %xmm0,-0x12(%rdi)
6016320SbhollerL(SSE2Q0):	mov    %dx,-0x2(%rdi)
6026320Sbholler		ret
6030Sstevel@tonic-gate
6046320Sbholler		.balign 16
6056320SbhollerL(SSE3QB):	movdqa %xmm0,-0xb3(%rdi)
6066320SbhollerL(SSE3QA):	movdqa %xmm0,-0xa3(%rdi)
6076320SbhollerL(SSE3Q9):	movdqa %xmm0,-0x93(%rdi)
6086320SbhollerL(SSE3Q8):	movdqa %xmm0,-0x83(%rdi)
6096320SbhollerL(SSE3Q7):	movdqa %xmm0,-0x73(%rdi)
6106320SbhollerL(SSE3Q6):	movdqa %xmm0,-0x63(%rdi)
6116320SbhollerL(SSE3Q5):	movdqa %xmm0,-0x53(%rdi)
6126320SbhollerL(SSE3Q4):	movdqa %xmm0,-0x43(%rdi)
6136320SbhollerL(SSE3Q3):	movdqa %xmm0,-0x33(%rdi)
6146320SbhollerL(SSE3Q2):	movdqa %xmm0,-0x23(%rdi)
6156320SbhollerL(SSE3Q1):	movdqa %xmm0,-0x13(%rdi)
6166320SbhollerL(SSE3Q0):	mov    %dx,-0x3(%rdi)
6176320Sbholler		mov    %dl,-0x1(%rdi)
6186320Sbholler		ret
6190Sstevel@tonic-gate
6206320Sbholler		.balign 16
6216320SbhollerL(SSE4QB):	movdqa %xmm0,-0xb4(%rdi)
6226320SbhollerL(SSE4QA):	movdqa %xmm0,-0xa4(%rdi)
6236320SbhollerL(SSE4Q9):	movdqa %xmm0,-0x94(%rdi)
6246320SbhollerL(SSE4Q8):	movdqa %xmm0,-0x84(%rdi)
6256320SbhollerL(SSE4Q7):	movdqa %xmm0,-0x74(%rdi)
6266320SbhollerL(SSE4Q6):	movdqa %xmm0,-0x64(%rdi)
6276320SbhollerL(SSE4Q5):	movdqa %xmm0,-0x54(%rdi)
6286320SbhollerL(SSE4Q4):	movdqa %xmm0,-0x44(%rdi)
6296320SbhollerL(SSE4Q3):	movdqa %xmm0,-0x34(%rdi)
6306320SbhollerL(SSE4Q2):	movdqa %xmm0,-0x24(%rdi)
6316320SbhollerL(SSE4Q1):	movdqa %xmm0,-0x14(%rdi)
6326320SbhollerL(SSE4Q0):	mov    %edx,-0x4(%rdi)
6336320Sbholler		ret
6340Sstevel@tonic-gate
6356320Sbholler		.balign 16
6366320SbhollerL(SSE5QB):	movdqa %xmm0,-0xb5(%rdi)
6376320SbhollerL(SSE5QA):	movdqa %xmm0,-0xa5(%rdi)
6386320SbhollerL(SSE5Q9):	movdqa %xmm0,-0x95(%rdi)
6396320SbhollerL(SSE5Q8):	movdqa %xmm0,-0x85(%rdi)
6406320SbhollerL(SSE5Q7):	movdqa %xmm0,-0x75(%rdi)
6416320SbhollerL(SSE5Q6):	movdqa %xmm0,-0x65(%rdi)
6426320SbhollerL(SSE5Q5):	movdqa %xmm0,-0x55(%rdi)
6436320SbhollerL(SSE5Q4):	movdqa %xmm0,-0x45(%rdi)
6446320SbhollerL(SSE5Q3):	movdqa %xmm0,-0x35(%rdi)
6456320SbhollerL(SSE5Q2):	movdqa %xmm0,-0x25(%rdi)
6466320SbhollerL(SSE5Q1):	movdqa %xmm0,-0x15(%rdi)
6476320SbhollerL(SSE5Q0):	mov    %edx,-0x5(%rdi)
6486320Sbholler		mov    %dl,-0x1(%rdi)
6496320Sbholler		ret
6500Sstevel@tonic-gate
6516320Sbholler		.balign 16
6526320SbhollerL(SSE6QB):	movdqa %xmm0,-0xb6(%rdi)
6536320SbhollerL(SSE6QA):	movdqa %xmm0,-0xa6(%rdi)
6546320SbhollerL(SSE6Q9):	movdqa %xmm0,-0x96(%rdi)
6556320SbhollerL(SSE6Q8):	movdqa %xmm0,-0x86(%rdi)
6566320SbhollerL(SSE6Q7):	movdqa %xmm0,-0x76(%rdi)
6576320SbhollerL(SSE6Q6):	movdqa %xmm0,-0x66(%rdi)
6586320SbhollerL(SSE6Q5):	movdqa %xmm0,-0x56(%rdi)
6596320SbhollerL(SSE6Q4):	movdqa %xmm0,-0x46(%rdi)
6606320SbhollerL(SSE6Q3):	movdqa %xmm0,-0x36(%rdi)
6616320SbhollerL(SSE6Q2):	movdqa %xmm0,-0x26(%rdi)
6626320SbhollerL(SSE6Q1):	movdqa %xmm0,-0x16(%rdi)
6636320SbhollerL(SSE6Q0):	mov    %edx,-0x6(%rdi)
6646320Sbholler		mov    %dx,-0x2(%rdi)
6656320Sbholler		ret
6660Sstevel@tonic-gate
6676320Sbholler		.balign 16
6686320SbhollerL(SSE7QB):	movdqa %xmm0,-0xb7(%rdi)
6696320SbhollerL(SSE7QA):	movdqa %xmm0,-0xa7(%rdi)
6706320SbhollerL(SSE7Q9):	movdqa %xmm0,-0x97(%rdi)
6716320SbhollerL(SSE7Q8):	movdqa %xmm0,-0x87(%rdi)
6726320SbhollerL(SSE7Q7):	movdqa %xmm0,-0x77(%rdi)
6736320SbhollerL(SSE7Q6):	movdqa %xmm0,-0x67(%rdi)
6746320SbhollerL(SSE7Q5):	movdqa %xmm0,-0x57(%rdi)
6756320SbhollerL(SSE7Q4):	movdqa %xmm0,-0x47(%rdi)
6766320SbhollerL(SSE7Q3):	movdqa %xmm0,-0x37(%rdi)
6776320SbhollerL(SSE7Q2):	movdqa %xmm0,-0x27(%rdi)
6786320SbhollerL(SSE7Q1):	movdqa %xmm0,-0x17(%rdi)
6796320SbhollerL(SSE7Q0):	mov    %edx,-0x7(%rdi)
6806320Sbholler		mov    %dx,-0x3(%rdi)
6816320Sbholler		mov    %dl,-0x1(%rdi)
6826320Sbholler		ret
6836320Sbholler
6846320Sbholler		.balign 16
6856320SbhollerL(SSE8QB):	movdqa %xmm0,-0xb8(%rdi)
6866320SbhollerL(SSE8QA):	movdqa %xmm0,-0xa8(%rdi)
6876320SbhollerL(SSE8Q9):	movdqa %xmm0,-0x98(%rdi)
6886320SbhollerL(SSE8Q8):	movdqa %xmm0,-0x88(%rdi)
6896320SbhollerL(SSE8Q7):	movdqa %xmm0,-0x78(%rdi)
6906320SbhollerL(SSE8Q6):	movdqa %xmm0,-0x68(%rdi)
6916320SbhollerL(SSE8Q5):	movdqa %xmm0,-0x58(%rdi)
6926320SbhollerL(SSE8Q4):	movdqa %xmm0,-0x48(%rdi)
6936320SbhollerL(SSE8Q3):	movdqa %xmm0,-0x38(%rdi)
6946320SbhollerL(SSE8Q2):	movdqa %xmm0,-0x28(%rdi)
6956320SbhollerL(SSE8Q1):	movdqa %xmm0,-0x18(%rdi)
6966320SbhollerL(SSE8Q0):	mov    %rdx,-0x8(%rdi)
6976320Sbholler		ret
6980Sstevel@tonic-gate
6996320Sbholler		.balign 16
7006320SbhollerL(SSE9QB):	movdqa %xmm0,-0xb9(%rdi)
7016320SbhollerL(SSE9QA):	movdqa %xmm0,-0xa9(%rdi)
7026320SbhollerL(SSE9Q9):	movdqa %xmm0,-0x99(%rdi)
7036320SbhollerL(SSE9Q8):	movdqa %xmm0,-0x89(%rdi)
7046320SbhollerL(SSE9Q7):	movdqa %xmm0,-0x79(%rdi)
7056320SbhollerL(SSE9Q6):	movdqa %xmm0,-0x69(%rdi)
7066320SbhollerL(SSE9Q5):	movdqa %xmm0,-0x59(%rdi)
7076320SbhollerL(SSE9Q4):	movdqa %xmm0,-0x49(%rdi)
7086320SbhollerL(SSE9Q3):	movdqa %xmm0,-0x39(%rdi)
7096320SbhollerL(SSE9Q2):	movdqa %xmm0,-0x29(%rdi)
7106320SbhollerL(SSE9Q1):	movdqa %xmm0,-0x19(%rdi)
7116320SbhollerL(SSE9Q0):	mov    %rdx,-0x9(%rdi)
7126320Sbholler		mov    %dl,-0x1(%rdi)
7136320Sbholler		ret
7140Sstevel@tonic-gate
7156320Sbholler		.balign 16
7166320SbhollerL(SSE10QB):	movdqa %xmm0,-0xba(%rdi)
7176320SbhollerL(SSE10QA):	movdqa %xmm0,-0xaa(%rdi)
7186320SbhollerL(SSE10Q9):	movdqa %xmm0,-0x9a(%rdi)
7196320SbhollerL(SSE10Q8):	movdqa %xmm0,-0x8a(%rdi)
7206320SbhollerL(SSE10Q7):	movdqa %xmm0,-0x7a(%rdi)
7216320SbhollerL(SSE10Q6):	movdqa %xmm0,-0x6a(%rdi)
7226320SbhollerL(SSE10Q5):	movdqa %xmm0,-0x5a(%rdi)
7236320SbhollerL(SSE10Q4):	movdqa %xmm0,-0x4a(%rdi)
7246320SbhollerL(SSE10Q3):	movdqa %xmm0,-0x3a(%rdi)
7256320SbhollerL(SSE10Q2):	movdqa %xmm0,-0x2a(%rdi)
7266320SbhollerL(SSE10Q1):	movdqa %xmm0,-0x1a(%rdi)
7276320SbhollerL(SSE10Q0):	mov    %rdx,-0xa(%rdi)
7286320Sbholler		mov    %dx,-0x2(%rdi)
7296320Sbholler		ret
7300Sstevel@tonic-gate
7316320Sbholler		.balign 16
7326320SbhollerL(SSE11QB):	movdqa %xmm0,-0xbb(%rdi)
7336320SbhollerL(SSE11QA):	movdqa %xmm0,-0xab(%rdi)
7346320SbhollerL(SSE11Q9):	movdqa %xmm0,-0x9b(%rdi)
7356320SbhollerL(SSE11Q8):	movdqa %xmm0,-0x8b(%rdi)
7366320SbhollerL(SSE11Q7):	movdqa %xmm0,-0x7b(%rdi)
7376320SbhollerL(SSE11Q6):	movdqa %xmm0,-0x6b(%rdi)
7386320SbhollerL(SSE11Q5):	movdqa %xmm0,-0x5b(%rdi)
7396320SbhollerL(SSE11Q4):	movdqa %xmm0,-0x4b(%rdi)
7406320SbhollerL(SSE11Q3):	movdqa %xmm0,-0x3b(%rdi)
7416320SbhollerL(SSE11Q2):	movdqa %xmm0,-0x2b(%rdi)
7426320SbhollerL(SSE11Q1):	movdqa %xmm0,-0x1b(%rdi)
7436320SbhollerL(SSE11Q0):	mov    %rdx,-0xb(%rdi)
7446320Sbholler		mov    %dx,-0x3(%rdi)
7456320Sbholler		mov    %dl,-0x1(%rdi)
7466320Sbholler		ret
7470Sstevel@tonic-gate
7486320Sbholler		.balign 16
7496320SbhollerL(SSE12QB):	movdqa %xmm0,-0xbc(%rdi)
7506320SbhollerL(SSE12QA):	movdqa %xmm0,-0xac(%rdi)
7516320SbhollerL(SSE12Q9):	movdqa %xmm0,-0x9c(%rdi)
7526320SbhollerL(SSE12Q8):	movdqa %xmm0,-0x8c(%rdi)
7536320SbhollerL(SSE12Q7):	movdqa %xmm0,-0x7c(%rdi)
7546320SbhollerL(SSE12Q6):	movdqa %xmm0,-0x6c(%rdi)
7556320SbhollerL(SSE12Q5):	movdqa %xmm0,-0x5c(%rdi)
7566320SbhollerL(SSE12Q4):	movdqa %xmm0,-0x4c(%rdi)
7576320SbhollerL(SSE12Q3):	movdqa %xmm0,-0x3c(%rdi)
7586320SbhollerL(SSE12Q2):	movdqa %xmm0,-0x2c(%rdi)
7596320SbhollerL(SSE12Q1):	movdqa %xmm0,-0x1c(%rdi)
7606320SbhollerL(SSE12Q0):	mov    %rdx,-0xc(%rdi)
7616320Sbholler		mov    %edx,-0x4(%rdi)
7626320Sbholler		ret
7630Sstevel@tonic-gate
7646320Sbholler		.balign 16
7656320SbhollerL(SSE13QB):	movdqa %xmm0,-0xbd(%rdi)
7666320SbhollerL(SSE13QA):	movdqa %xmm0,-0xad(%rdi)
7676320SbhollerL(SSE13Q9):	movdqa %xmm0,-0x9d(%rdi)
7686320SbhollerL(SSE13Q8):	movdqa %xmm0,-0x8d(%rdi)
7696320SbhollerL(SSE13Q7):	movdqa %xmm0,-0x7d(%rdi)
7706320SbhollerL(SSE13Q6):	movdqa %xmm0,-0x6d(%rdi)
7716320SbhollerL(SSE13Q5):	movdqa %xmm0,-0x5d(%rdi)
7726320SbhollerL(SSE13Q4):	movdqa %xmm0,-0x4d(%rdi)
7736320SbhollerL(SSE13Q3):	movdqa %xmm0,-0x3d(%rdi)
7746320SbhollerL(SSE13Q2):	movdqa %xmm0,-0x2d(%rdi)
7756320SbhollerL(SSE13Q1):	movdqa %xmm0,-0x1d(%rdi)
7766320SbhollerL(SSE13Q0):	mov    %rdx,-0xd(%rdi)
7776320Sbholler		mov    %edx,-0x5(%rdi)
7786320Sbholler		mov    %dl,-0x1(%rdi)
7796320Sbholler		ret
7800Sstevel@tonic-gate
7816320Sbholler		.balign 16
7826320SbhollerL(SSE14QB):	movdqa %xmm0,-0xbe(%rdi)
7836320SbhollerL(SSE14QA):	movdqa %xmm0,-0xae(%rdi)
7846320SbhollerL(SSE14Q9):	movdqa %xmm0,-0x9e(%rdi)
7856320SbhollerL(SSE14Q8):	movdqa %xmm0,-0x8e(%rdi)
7866320SbhollerL(SSE14Q7):	movdqa %xmm0,-0x7e(%rdi)
7876320SbhollerL(SSE14Q6):	movdqa %xmm0,-0x6e(%rdi)
7886320SbhollerL(SSE14Q5):	movdqa %xmm0,-0x5e(%rdi)
7896320SbhollerL(SSE14Q4):	movdqa %xmm0,-0x4e(%rdi)
7906320SbhollerL(SSE14Q3):	movdqa %xmm0,-0x3e(%rdi)
7916320SbhollerL(SSE14Q2):	movdqa %xmm0,-0x2e(%rdi)
7926320SbhollerL(SSE14Q1):	movdqa %xmm0,-0x1e(%rdi)
7936320SbhollerL(SSE14Q0):	mov    %rdx,-0xe(%rdi)
7946320Sbholler		mov    %edx,-0x6(%rdi)
7956320Sbholler		mov    %dx,-0x2(%rdi)
7966320Sbholler		ret
7970Sstevel@tonic-gate
7986320Sbholler		.balign 16
7996320SbhollerL(SSE15QB):	movdqa %xmm0,-0xbf(%rdi)
8006320SbhollerL(SSE15QA):	movdqa %xmm0,-0xaf(%rdi)
8016320SbhollerL(SSE15Q9):	movdqa %xmm0,-0x9f(%rdi)
8026320SbhollerL(SSE15Q8):	movdqa %xmm0,-0x8f(%rdi)
8036320SbhollerL(SSE15Q7):	movdqa %xmm0,-0x7f(%rdi)
8046320SbhollerL(SSE15Q6):	movdqa %xmm0,-0x6f(%rdi)
8056320SbhollerL(SSE15Q5):	movdqa %xmm0,-0x5f(%rdi)
8066320SbhollerL(SSE15Q4):	movdqa %xmm0,-0x4f(%rdi)
8076320SbhollerL(SSE15Q3):	movdqa %xmm0,-0x3f(%rdi)
8086320SbhollerL(SSE15Q2):	movdqa %xmm0,-0x2f(%rdi)
8096320SbhollerL(SSE15Q1):	movdqa %xmm0,-0x1f(%rdi)
8106320SbhollerL(SSE15Q0):	mov    %rdx,-0xf(%rdi)
8116320Sbholler		mov    %edx,-0x7(%rdi)
8126320Sbholler		mov    %dx,-0x3(%rdi)
8136320Sbholler		mov    %dl,-0x1(%rdi)
8146320Sbholler		ret
8150Sstevel@tonic-gate
8166320Sbholler		.balign 16
8176320SbhollerL(byte32sse2_pre):
8186320Sbholler		mov    .largest_level_cache_size(%rip),%r9d
8196320Sbholler		cmp    %r9,%r8
8206320Sbholler		jg     L(sse2_nt_move)
8216320Sbholler		#jmp    L(byte32sse2)		# Fall thru...
8226320Sbholler
8236320Sbholler		.balign 16
8246320SbhollerL(byte32sse2):
8256320Sbholler		lea    -0x80(%r8),%r8		# 128
8266320Sbholler		cmp    $0x80,%r8
8276320Sbholler		movdqa %xmm0,(%rdi)
8286320Sbholler		movdqa %xmm0,0x10(%rdi)
8296320Sbholler		movdqa %xmm0,0x20(%rdi)
8306320Sbholler		movdqa %xmm0,0x30(%rdi)
8316320Sbholler		movdqa %xmm0,0x40(%rdi)
8326320Sbholler		movdqa %xmm0,0x50(%rdi)
8336320Sbholler		movdqa %xmm0,0x60(%rdi)
8346320Sbholler		movdqa %xmm0,0x70(%rdi)
8350Sstevel@tonic-gate
8366320Sbholler		lea    0x80(%rdi),%rdi
8376320Sbholler		jge    L(byte32sse2)
8386320Sbholler
8396320Sbholler		lea    L(SSExDx)(%rip),%r11
8406320Sbholler		add    %r8,%rdi
8416320Sbholler		movslq (%r11,%r8,4),%rcx
8426320Sbholler		lea    (%rcx,%r11,1),%r11
8436320Sbholler		jmpq   *%r11
8440Sstevel@tonic-gate
8456320Sbholler		.balign	16
8466320SbhollerL(sse2_nt_move):
8476320Sbholler		sub    $0x80,%r8		# 128
8486320Sbholler		movntdq %xmm0,(%rdi)
8496320Sbholler		movntdq %xmm0,0x10(%rdi)
8506320Sbholler		movntdq %xmm0,0x20(%rdi)
8516320Sbholler		movntdq %xmm0,0x30(%rdi)
8526320Sbholler		movntdq %xmm0,0x40(%rdi)
8536320Sbholler		movntdq %xmm0,0x50(%rdi)
8546320Sbholler		movntdq %xmm0,0x60(%rdi)
8556320Sbholler		movntdq %xmm0,0x70(%rdi)
8566320Sbholler		add    $0x80,%rdi
8576320Sbholler		cmp    $0x80,%r8
8586320Sbholler		jge    L(sse2_nt_move)
8590Sstevel@tonic-gate
8606320Sbholler		sfence
8616320Sbholler		lea    L(SSExDx)(%rip),%r11
8626320Sbholler		add    %r8,%rdi
8636320Sbholler		movslq (%r11,%r8,4),%rcx
8646320Sbholler		lea    (%rcx,%r11,1),%r11
8656320Sbholler		jmpq   *%r11
8660Sstevel@tonic-gate
8676320Sbholler		/*
8686320Sbholler		 * Don't use SSE
8696320Sbholler		 */
8706320Sbholler		.balign 16
8716320SbhollerL(Loop8byte_pre):
8726320Sbholler		mov    .largest_level_cache_size(%rip),%r9d
8736320Sbholler		cmp    %r9,%r8
8746320Sbholler		jg     L(Loop8byte_nt_move)
8756320Sbholler		cmp    $0x800,%r8		# Use rep sstoq
8766320Sbholler		jge    L(use_rep)
8770Sstevel@tonic-gate
8786320Sbholler		.balign 16
8796320SbhollerL(Loop8byte):
8806320Sbholler		lea    -0x80(%r8),%r8		# 128
8816320Sbholler		mov    %rdx,(%rdi)
8826320Sbholler		mov    %rdx,0x8(%rdi)
8836320Sbholler		mov    %rdx,0x10(%rdi)
8846320Sbholler		mov    %rdx,0x18(%rdi)
8856320Sbholler		mov    %rdx,0x20(%rdi)
8866320Sbholler		mov    %rdx,0x28(%rdi)
8876320Sbholler		mov    %rdx,0x30(%rdi)
8886320Sbholler		mov    %rdx,0x38(%rdi)
8896320Sbholler		cmp    $0x80,%r8
8906320Sbholler		mov    %rdx,0x40(%rdi)
8916320Sbholler		mov    %rdx,0x48(%rdi)
8926320Sbholler		mov    %rdx,0x50(%rdi)
8936320Sbholler		mov    %rdx,0x58(%rdi)
8946320Sbholler		mov    %rdx,0x60(%rdi)
8956320Sbholler		mov    %rdx,0x68(%rdi)
8966320Sbholler		mov    %rdx,0x70(%rdi)
8976320Sbholler		mov    %rdx,0x78(%rdi)
8986320Sbholler		lea    0x80(%rdi),%rdi
8996320Sbholler		jge    L(Loop8byte)
9006320Sbholler
9016320Sbholler1:
9026320Sbholler		lea    L(setPxQx)(%rip),%r11
9036320Sbholler		lea    (%rdi,%r8,1),%rdi
9046320Sbholler
9056320Sbholler		movslq (%r11,%r8,4),%rcx
9066320Sbholler		lea    (%rcx,%r11,1),%r11
9076320Sbholler		jmpq   *%r11
9080Sstevel@tonic-gate
9096320Sbholler		/*
9106320Sbholler		 * Use rep sstoq for sizes > 2K
9116320Sbholler		 */
9126320Sbholler		.balign 16
9136320SbhollerL(use_rep):
9146320Sbholler		movq   %r8,%rcx			# get size in bytes
9156320Sbholler		xchg   %rax,%rdx
9166320Sbholler		shrq   $3,%rcx
9176320Sbholler		rep
9186320Sbholler		  sstoq
9196320Sbholler		xchg   %rax,%rdx
9206320Sbholler		andq   $7,%r8			# remaining bytes
9216320Sbholler		jnz    1b
9226320Sbholler		ret
9230Sstevel@tonic-gate
9246320Sbholler		.balign 16
9256320SbhollerL(Loop8byte_nt_move):
926*10024Sbostrovs		lea    -0x80(%r8),%r8		# 128
9276320Sbholler		movnti %rdx,(%rdi)
9286320Sbholler		movnti %rdx,0x8(%rdi)
9296320Sbholler		movnti %rdx,0x10(%rdi)
9306320Sbholler		movnti %rdx,0x18(%rdi)
9316320Sbholler		movnti %rdx,0x20(%rdi)
9326320Sbholler		movnti %rdx,0x28(%rdi)
9336320Sbholler		movnti %rdx,0x30(%rdi)
9346320Sbholler		movnti %rdx,0x38(%rdi)
935*10024Sbostrovs		cmp    $0x80,%r8
936*10024Sbostrovs		movnti %rdx,0x40(%rdi)
937*10024Sbostrovs		movnti %rdx,0x48(%rdi)
938*10024Sbostrovs		movnti %rdx,0x50(%rdi)
939*10024Sbostrovs		movnti %rdx,0x58(%rdi)
940*10024Sbostrovs		movnti %rdx,0x60(%rdi)
941*10024Sbostrovs		movnti %rdx,0x68(%rdi)
942*10024Sbostrovs		movnti %rdx,0x70(%rdi)
943*10024Sbostrovs		movnti %rdx,0x78(%rdi)
944*10024Sbostrovs		lea    0x80(%rdi),%rdi
9456320Sbholler		jge    L(Loop8byte_nt_move)
9466320Sbholler
9476320Sbholler		sfence
9486320Sbholler		lea    L(setPxQx)(%rip),%r11
9496320Sbholler		lea    (%rdi,%r8,1),%rdi
9506320Sbholler
9516320Sbholler		movslq    (%r11,%r8,4),%rcx
9526320Sbholler		lea    (%rcx,%r11,1),%r11
9536320Sbholler		jmpq   *%r11
9540Sstevel@tonic-gate
9556320Sbholler		.balign 16
9566320SbhollerL(SSExDx):	.int       L(SSE0Q0) -L(SSExDx)
9576320Sbholler		.int       L(SSE1Q0) -L(SSExDx)
9586320Sbholler		.int       L(SSE2Q0) -L(SSExDx)
9596320Sbholler		.int       L(SSE3Q0) -L(SSExDx)
9606320Sbholler		.int       L(SSE4Q0) -L(SSExDx)
9616320Sbholler		.int       L(SSE5Q0) -L(SSExDx)
9626320Sbholler		.int       L(SSE6Q0) -L(SSExDx)
9636320Sbholler		.int       L(SSE7Q0) -L(SSExDx)
9640Sstevel@tonic-gate
9656320Sbholler		.int       L(SSE8Q0) -L(SSExDx)
9666320Sbholler		.int       L(SSE9Q0) -L(SSExDx)
9676320Sbholler		.int       L(SSE10Q0)-L(SSExDx)
9686320Sbholler		.int       L(SSE11Q0)-L(SSExDx)
9696320Sbholler		.int       L(SSE12Q0)-L(SSExDx)
9706320Sbholler		.int       L(SSE13Q0)-L(SSExDx)
9716320Sbholler		.int       L(SSE14Q0)-L(SSExDx)
9726320Sbholler		.int       L(SSE15Q0)-L(SSExDx)
9730Sstevel@tonic-gate
9746320Sbholler		.int       L(SSE0Q1) -L(SSExDx)
9756320Sbholler		.int       L(SSE1Q1) -L(SSExDx)
9766320Sbholler		.int       L(SSE2Q1) -L(SSExDx)
9776320Sbholler		.int       L(SSE3Q1) -L(SSExDx)
9786320Sbholler		.int       L(SSE4Q1) -L(SSExDx)
9796320Sbholler		.int       L(SSE5Q1) -L(SSExDx)
9806320Sbholler		.int       L(SSE6Q1) -L(SSExDx)
9816320Sbholler		.int       L(SSE7Q1) -L(SSExDx)
9820Sstevel@tonic-gate
9836320Sbholler		.int       L(SSE8Q1) -L(SSExDx)
9846320Sbholler		.int       L(SSE9Q1) -L(SSExDx)
9856320Sbholler		.int       L(SSE10Q1)-L(SSExDx)
9866320Sbholler		.int       L(SSE11Q1)-L(SSExDx)
9876320Sbholler		.int       L(SSE12Q1)-L(SSExDx)
9886320Sbholler		.int       L(SSE13Q1)-L(SSExDx)
9896320Sbholler		.int       L(SSE14Q1)-L(SSExDx)
9906320Sbholler		.int       L(SSE15Q1)-L(SSExDx)
9910Sstevel@tonic-gate
9926320Sbholler		.int       L(SSE0Q2) -L(SSExDx)
9936320Sbholler		.int       L(SSE1Q2) -L(SSExDx)
9946320Sbholler		.int       L(SSE2Q2) -L(SSExDx)
9956320Sbholler		.int       L(SSE3Q2) -L(SSExDx)
9966320Sbholler		.int       L(SSE4Q2) -L(SSExDx)
9976320Sbholler		.int       L(SSE5Q2) -L(SSExDx)
9986320Sbholler		.int       L(SSE6Q2) -L(SSExDx)
9996320Sbholler		.int       L(SSE7Q2) -L(SSExDx)
10000Sstevel@tonic-gate
10016320Sbholler		.int       L(SSE8Q2) -L(SSExDx)
10026320Sbholler		.int       L(SSE9Q2) -L(SSExDx)
10036320Sbholler		.int       L(SSE10Q2)-L(SSExDx)
10046320Sbholler		.int       L(SSE11Q2)-L(SSExDx)
10056320Sbholler		.int       L(SSE12Q2)-L(SSExDx)
10066320Sbholler		.int       L(SSE13Q2)-L(SSExDx)
10076320Sbholler		.int       L(SSE14Q2)-L(SSExDx)
10086320Sbholler		.int       L(SSE15Q2)-L(SSExDx)
10096320Sbholler
10106320Sbholler		.int       L(SSE0Q3) -L(SSExDx)
10116320Sbholler		.int       L(SSE1Q3) -L(SSExDx)
10126320Sbholler		.int       L(SSE2Q3) -L(SSExDx)
10136320Sbholler		.int       L(SSE3Q3) -L(SSExDx)
10146320Sbholler		.int       L(SSE4Q3) -L(SSExDx)
10156320Sbholler		.int       L(SSE5Q3) -L(SSExDx)
10166320Sbholler		.int       L(SSE6Q3) -L(SSExDx)
10176320Sbholler		.int       L(SSE7Q3) -L(SSExDx)
10186320Sbholler
10196320Sbholler		.int       L(SSE8Q3) -L(SSExDx)
10206320Sbholler		.int       L(SSE9Q3) -L(SSExDx)
10216320Sbholler		.int       L(SSE10Q3)-L(SSExDx)
10226320Sbholler		.int       L(SSE11Q3)-L(SSExDx)
10236320Sbholler		.int       L(SSE12Q3)-L(SSExDx)
10246320Sbholler		.int       L(SSE13Q3)-L(SSExDx)
10256320Sbholler		.int       L(SSE14Q3)-L(SSExDx)
10266320Sbholler		.int       L(SSE15Q3)-L(SSExDx)
10270Sstevel@tonic-gate
10286320Sbholler		.int       L(SSE0Q4) -L(SSExDx)
10296320Sbholler		.int       L(SSE1Q4) -L(SSExDx)
10306320Sbholler		.int       L(SSE2Q4) -L(SSExDx)
10316320Sbholler		.int       L(SSE3Q4) -L(SSExDx)
10326320Sbholler		.int       L(SSE4Q4) -L(SSExDx)
10336320Sbholler		.int       L(SSE5Q4) -L(SSExDx)
10346320Sbholler		.int       L(SSE6Q4) -L(SSExDx)
10356320Sbholler		.int       L(SSE7Q4) -L(SSExDx)
10366320Sbholler
10376320Sbholler		.int       L(SSE8Q4) -L(SSExDx)
10386320Sbholler		.int       L(SSE9Q4) -L(SSExDx)
10396320Sbholler		.int       L(SSE10Q4)-L(SSExDx)
10406320Sbholler		.int       L(SSE11Q4)-L(SSExDx)
10416320Sbholler		.int       L(SSE12Q4)-L(SSExDx)
10426320Sbholler		.int       L(SSE13Q4)-L(SSExDx)
10436320Sbholler		.int       L(SSE14Q4)-L(SSExDx)
10446320Sbholler		.int       L(SSE15Q4)-L(SSExDx)
10456320Sbholler
10466320Sbholler		.int       L(SSE0Q5) -L(SSExDx)
10476320Sbholler		.int       L(SSE1Q5) -L(SSExDx)
10486320Sbholler		.int       L(SSE2Q5) -L(SSExDx)
10496320Sbholler		.int       L(SSE3Q5) -L(SSExDx)
10506320Sbholler		.int       L(SSE4Q5) -L(SSExDx)
10516320Sbholler		.int       L(SSE5Q5) -L(SSExDx)
10526320Sbholler		.int       L(SSE6Q5) -L(SSExDx)
10536320Sbholler		.int       L(SSE7Q5) -L(SSExDx)
10546320Sbholler
10556320Sbholler		.int       L(SSE8Q5) -L(SSExDx)
10566320Sbholler		.int       L(SSE9Q5) -L(SSExDx)
10576320Sbholler		.int       L(SSE10Q5)-L(SSExDx)
10586320Sbholler		.int       L(SSE11Q5)-L(SSExDx)
10596320Sbholler		.int       L(SSE12Q5)-L(SSExDx)
10606320Sbholler		.int       L(SSE13Q5)-L(SSExDx)
10616320Sbholler		.int       L(SSE14Q5)-L(SSExDx)
10626320Sbholler		.int       L(SSE15Q5)-L(SSExDx)
10630Sstevel@tonic-gate
10646320Sbholler		.int       L(SSE0Q6) -L(SSExDx)
10656320Sbholler		.int       L(SSE1Q6) -L(SSExDx)
10666320Sbholler		.int       L(SSE2Q6) -L(SSExDx)
10676320Sbholler		.int       L(SSE3Q6) -L(SSExDx)
10686320Sbholler		.int       L(SSE4Q6) -L(SSExDx)
10696320Sbholler		.int       L(SSE5Q6) -L(SSExDx)
10706320Sbholler		.int       L(SSE6Q6) -L(SSExDx)
10716320Sbholler		.int       L(SSE7Q6) -L(SSExDx)
10726320Sbholler
10736320Sbholler		.int       L(SSE8Q6) -L(SSExDx)
10746320Sbholler		.int       L(SSE9Q6) -L(SSExDx)
10756320Sbholler		.int       L(SSE10Q6)-L(SSExDx)
10766320Sbholler		.int       L(SSE11Q6)-L(SSExDx)
10776320Sbholler		.int       L(SSE12Q6)-L(SSExDx)
10786320Sbholler		.int       L(SSE13Q6)-L(SSExDx)
10796320Sbholler		.int       L(SSE14Q6)-L(SSExDx)
10806320Sbholler		.int       L(SSE15Q6)-L(SSExDx)
10816320Sbholler
10826320Sbholler		.int       L(SSE0Q7) -L(SSExDx)
10836320Sbholler		.int       L(SSE1Q7) -L(SSExDx)
10846320Sbholler		.int       L(SSE2Q7) -L(SSExDx)
10856320Sbholler		.int       L(SSE3Q7) -L(SSExDx)
10866320Sbholler		.int       L(SSE4Q7) -L(SSExDx)
10876320Sbholler		.int       L(SSE5Q7) -L(SSExDx)
10886320Sbholler		.int       L(SSE6Q7) -L(SSExDx)
10896320Sbholler		.int       L(SSE7Q7) -L(SSExDx)
10906320Sbholler
10916320Sbholler		.int       L(SSE8Q7) -L(SSExDx)
10926320Sbholler		.int       L(SSE9Q7) -L(SSExDx)
10936320Sbholler		.int       L(SSE10Q7)-L(SSExDx)
10946320Sbholler		.int       L(SSE11Q7)-L(SSExDx)
10956320Sbholler		.int       L(SSE12Q7)-L(SSExDx)
10966320Sbholler		.int       L(SSE13Q7)-L(SSExDx)
10976320Sbholler		.int       L(SSE14Q7)-L(SSExDx)
10986320Sbholler		.int       L(SSE15Q7)-L(SSExDx)
10990Sstevel@tonic-gate
11006320Sbholler		.int       L(SSE0Q8) -L(SSExDx)
11016320Sbholler		.int       L(SSE1Q8) -L(SSExDx)
11026320Sbholler		.int       L(SSE2Q8) -L(SSExDx)
11036320Sbholler		.int       L(SSE3Q8) -L(SSExDx)
11046320Sbholler		.int       L(SSE4Q8) -L(SSExDx)
11056320Sbholler		.int       L(SSE5Q8) -L(SSExDx)
11066320Sbholler		.int       L(SSE6Q8) -L(SSExDx)
11076320Sbholler		.int       L(SSE7Q8) -L(SSExDx)
11086320Sbholler
11096320Sbholler		.int       L(SSE8Q8) -L(SSExDx)
11106320Sbholler		.int       L(SSE9Q8) -L(SSExDx)
11116320Sbholler		.int       L(SSE10Q8)-L(SSExDx)
11126320Sbholler		.int       L(SSE11Q8)-L(SSExDx)
11136320Sbholler		.int       L(SSE12Q8)-L(SSExDx)
11146320Sbholler		.int       L(SSE13Q8)-L(SSExDx)
11156320Sbholler		.int       L(SSE14Q8)-L(SSExDx)
11166320Sbholler		.int       L(SSE15Q8)-L(SSExDx)
11176320Sbholler
11186320Sbholler		.int       L(SSE0Q9) -L(SSExDx)
11196320Sbholler		.int       L(SSE1Q9) -L(SSExDx)
11206320Sbholler		.int       L(SSE2Q9) -L(SSExDx)
11216320Sbholler		.int       L(SSE3Q9) -L(SSExDx)
11226320Sbholler		.int       L(SSE4Q9) -L(SSExDx)
11236320Sbholler		.int       L(SSE5Q9) -L(SSExDx)
11246320Sbholler		.int       L(SSE6Q9) -L(SSExDx)
11256320Sbholler		.int       L(SSE7Q9) -L(SSExDx)
11266320Sbholler
11276320Sbholler		.int       L(SSE8Q9) -L(SSExDx)
11286320Sbholler		.int       L(SSE9Q9) -L(SSExDx)
11296320Sbholler		.int       L(SSE10Q9)-L(SSExDx)
11306320Sbholler		.int       L(SSE11Q9)-L(SSExDx)
11316320Sbholler		.int       L(SSE12Q9)-L(SSExDx)
11326320Sbholler		.int       L(SSE13Q9)-L(SSExDx)
11336320Sbholler		.int       L(SSE14Q9)-L(SSExDx)
11346320Sbholler		.int       L(SSE15Q9)-L(SSExDx)
11350Sstevel@tonic-gate
11366320Sbholler		.int       L(SSE0QA) -L(SSExDx)
11376320Sbholler		.int       L(SSE1QA) -L(SSExDx)
11386320Sbholler		.int       L(SSE2QA) -L(SSExDx)
11396320Sbholler		.int       L(SSE3QA) -L(SSExDx)
11406320Sbholler		.int       L(SSE4QA) -L(SSExDx)
11416320Sbholler		.int       L(SSE5QA) -L(SSExDx)
11426320Sbholler		.int       L(SSE6QA) -L(SSExDx)
11436320Sbholler		.int       L(SSE7QA) -L(SSExDx)
11440Sstevel@tonic-gate
11456320Sbholler		.int       L(SSE8QA) -L(SSExDx)
11466320Sbholler		.int       L(SSE9QA) -L(SSExDx)
11476320Sbholler		.int       L(SSE10QA)-L(SSExDx)
11486320Sbholler		.int       L(SSE11QA)-L(SSExDx)
11496320Sbholler		.int       L(SSE12QA)-L(SSExDx)
11506320Sbholler		.int       L(SSE13QA)-L(SSExDx)
11516320Sbholler		.int       L(SSE14QA)-L(SSExDx)
11526320Sbholler		.int       L(SSE15QA)-L(SSExDx)
11530Sstevel@tonic-gate
11546320Sbholler		.int       L(SSE0QB) -L(SSExDx)
11556320Sbholler		.int       L(SSE1QB) -L(SSExDx)
11566320Sbholler		.int       L(SSE2QB) -L(SSExDx)
11576320Sbholler		.int       L(SSE3QB) -L(SSExDx)
11586320Sbholler		.int       L(SSE4QB) -L(SSExDx)
11596320Sbholler		.int       L(SSE5QB) -L(SSExDx)
11606320Sbholler		.int       L(SSE6QB) -L(SSExDx)
11616320Sbholler		.int       L(SSE7QB) -L(SSExDx)
11620Sstevel@tonic-gate
11636320Sbholler		.int       L(SSE8QB) -L(SSExDx)
11646320Sbholler		.int       L(SSE9QB) -L(SSExDx)
11656320Sbholler		.int       L(SSE10QB)-L(SSExDx)
11666320Sbholler		.int       L(SSE11QB)-L(SSExDx)
11676320Sbholler		.int       L(SSE12QB)-L(SSExDx)
11686320Sbholler		.int       L(SSE13QB)-L(SSExDx)
11696320Sbholler		.int       L(SSE14QB)-L(SSExDx)
11706320Sbholler		.int       L(SSE15QB)-L(SSExDx)
11710Sstevel@tonic-gate
11726320Sbholler		SET_SIZE(memset)
1173