10Sstevel@tonic-gate/* 26320Sbholler * CDDL HEADER START 36320Sbholler * 46320Sbholler * The contents of this file are subject to the terms of the 56320Sbholler * Common Development and Distribution License (the "License"). 66320Sbholler * You may not use this file except in compliance with the License. 76320Sbholler * 86320Sbholler * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 96320Sbholler * or http://www.opensolaris.org/os/licensing. 106320Sbholler * See the License for the specific language governing permissions 116320Sbholler * and limitations under the License. 126320Sbholler * 136320Sbholler * When distributing Covered Code, include this CDDL HEADER in each 146320Sbholler * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 156320Sbholler * If applicable, add the following below this CDDL HEADER, with the 166320Sbholler * fields enclosed by brackets "[]" replaced with your own identifying 176320Sbholler * information: Portions Copyright [yyyy] [name of copyright owner] 186320Sbholler * 196320Sbholler * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate 220Sstevel@tonic-gate/* 23*10024Sbostrovs * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 246812Sraf * Use is subject to license terms. 256812Sraf */ 266812Sraf 276812Sraf/* 286320Sbholler * Copyright (c) 2008, Intel Corporation 290Sstevel@tonic-gate * All rights reserved. 300Sstevel@tonic-gate */ 310Sstevel@tonic-gate 32*10024Sbostrovs/* 33*10024Sbostrovs * Portions Copyright 2009 Advanced Micro Devices, Inc. 34*10024Sbostrovs */ 35*10024Sbostrovs 367298SMark.J.Nelson@Sun.COM .file "memset.s" 370Sstevel@tonic-gate 380Sstevel@tonic-gate#include <sys/asm_linkage.h> 390Sstevel@tonic-gate 406812Sraf ANSI_PRAGMA_WEAK(memset,function) 410Sstevel@tonic-gate 420Sstevel@tonic-gate#include "cache.h" 436320Sbholler#include "proc64_id.h" 440Sstevel@tonic-gate 456320Sbholler#define L(s) .memset/**/s 460Sstevel@tonic-gate 476320Sbholler/* 486320Sbholler * memset algorithm overview: 496320Sbholler * 506320Sbholler * Thresholds used below were determined experimentally. 516320Sbholler * 526320Sbholler * Pseudo code: 536320Sbholler * 54*10024Sbostrovs * NOTE: On AMD NO_SSE is always set. Performance on Opteron did not improve 55*10024Sbostrovs * using 16-byte stores. Setting NO_SSE on AMD should be re-evaluated on 56*10024Sbostrovs * future AMD processors. 57*10024Sbostrovs * 58*10024Sbostrovs * 596320Sbholler * If (size <= 144 bytes) { 606320Sbholler * do unrolled code (primarily 8-byte stores) regardless of alignment. 616320Sbholler * } else { 626320Sbholler * Align destination to 16-byte boundary 636320Sbholler * 646320Sbholler * if (NO_SSE) { 656320Sbholler * If (size > largest level cache) { 666320Sbholler * Use 8-byte non-temporal stores (64-bytes/loop) 676320Sbholler * } else { 686320Sbholler * if (size >= 2K) { 696320Sbholler * Use rep sstoq 706320Sbholler * } else { 716320Sbholler * Use 8-byte stores (128 bytes per loop) 726320Sbholler * } 736320Sbholler * } 746320Sbholler * 756320Sbholler * } else { **USE SSE** 766320Sbholler * If (size <= 192 bytes) { 776320Sbholler * do unrolled code using primarily 16-byte stores (SSE2) 786320Sbholler * } else { 796320Sbholler * If (size > largest level cache) { 806320Sbholler * Use 16-byte non-temporal stores (128-bytes/loop) 816320Sbholler * } else { 826320Sbholler * Use 16-byte stores (128 bytes per loop) 836320Sbholler * } 846320Sbholler * } 856320Sbholler * } 866320Sbholler * 876320Sbholler * Finish any remaining bytes via unrolled code above. 886320Sbholler * } 896320Sbholler */ 900Sstevel@tonic-gate 916320Sbholler ENTRY(memset) # (void *, const void*, size_t) 926320Sbholler cmp $0x1,%rdx 936320Sbholler mov %rdi,%rax # memset returns the dest address 946320Sbholler jne L(ck2) 956320Sbholler mov %sil,(%rdi) 966320Sbholler ret 976320SbhollerL(ck2): 986320Sbholler mov $0x0101010101010101,%r9 996320Sbholler mov %rdx,%r8 1006320Sbholler movzbq %sil,%rdx 1016320Sbholler imul %r9,%rdx # clone value 8 times 1026320Sbholler 1036320Sbholler cmp $0x90,%r8 # 144 1046320Sbholler jge L(ck_align) 1056320Sbholler 1066320Sbholler lea L(setPxQx)(%rip),%r11 1076320Sbholler add %r8,%rdi 1080Sstevel@tonic-gate 1096320Sbholler movslq (%r11,%r8,4),%rcx 1106320Sbholler lea (%rcx,%r11,1),%r11 1116320Sbholler jmpq *%r11 1126320Sbholler 1136320Sbholler .balign 16 1146320SbhollerL(setPxQx): .int L(P0Q0)-L(setPxQx) 1156320Sbholler .int L(P1Q0)-L(setPxQx) 1166320Sbholler .int L(P2Q0)-L(setPxQx) 1176320Sbholler .int L(P3Q0)-L(setPxQx) 1186320Sbholler .int L(P4Q0)-L(setPxQx) 1196320Sbholler .int L(P5Q0)-L(setPxQx) 1206320Sbholler .int L(P6Q0)-L(setPxQx) 1216320Sbholler .int L(P7Q0)-L(setPxQx) 1220Sstevel@tonic-gate 1236320Sbholler .int L(P0Q1)-L(setPxQx) 1246320Sbholler .int L(P1Q1)-L(setPxQx) 1256320Sbholler .int L(P2Q1)-L(setPxQx) 1266320Sbholler .int L(P3Q1)-L(setPxQx) 1276320Sbholler .int L(P4Q1)-L(setPxQx) 1286320Sbholler .int L(P5Q1)-L(setPxQx) 1296320Sbholler .int L(P6Q1)-L(setPxQx) 1306320Sbholler .int L(P7Q1)-L(setPxQx) 1316320Sbholler 1326320Sbholler .int L(P0Q2)-L(setPxQx) 1336320Sbholler .int L(P1Q2)-L(setPxQx) 1346320Sbholler .int L(P2Q2)-L(setPxQx) 1356320Sbholler .int L(P3Q2)-L(setPxQx) 1366320Sbholler .int L(P4Q2)-L(setPxQx) 1376320Sbholler .int L(P5Q2)-L(setPxQx) 1386320Sbholler .int L(P6Q2)-L(setPxQx) 1396320Sbholler .int L(P7Q2)-L(setPxQx) 1400Sstevel@tonic-gate 1416320Sbholler .int L(P0Q3)-L(setPxQx) 1426320Sbholler .int L(P1Q3)-L(setPxQx) 1436320Sbholler .int L(P2Q3)-L(setPxQx) 1446320Sbholler .int L(P3Q3)-L(setPxQx) 1456320Sbholler .int L(P4Q3)-L(setPxQx) 1466320Sbholler .int L(P5Q3)-L(setPxQx) 1476320Sbholler .int L(P6Q3)-L(setPxQx) 1486320Sbholler .int L(P7Q3)-L(setPxQx) 1496320Sbholler 1506320Sbholler .int L(P0Q4)-L(setPxQx) 1516320Sbholler .int L(P1Q4)-L(setPxQx) 1526320Sbholler .int L(P2Q4)-L(setPxQx) 1536320Sbholler .int L(P3Q4)-L(setPxQx) 1546320Sbholler .int L(P4Q4)-L(setPxQx) 1556320Sbholler .int L(P5Q4)-L(setPxQx) 1566320Sbholler .int L(P6Q4)-L(setPxQx) 1576320Sbholler .int L(P7Q4)-L(setPxQx) 1580Sstevel@tonic-gate 1596320Sbholler .int L(P0Q5)-L(setPxQx) 1606320Sbholler .int L(P1Q5)-L(setPxQx) 1616320Sbholler .int L(P2Q5)-L(setPxQx) 1626320Sbholler .int L(P3Q5)-L(setPxQx) 1636320Sbholler .int L(P4Q5)-L(setPxQx) 1646320Sbholler .int L(P5Q5)-L(setPxQx) 1656320Sbholler .int L(P6Q5)-L(setPxQx) 1666320Sbholler .int L(P7Q5)-L(setPxQx) 1670Sstevel@tonic-gate 1686320Sbholler .int L(P0Q6)-L(setPxQx) 1696320Sbholler .int L(P1Q6)-L(setPxQx) 1706320Sbholler .int L(P2Q6)-L(setPxQx) 1716320Sbholler .int L(P3Q6)-L(setPxQx) 1726320Sbholler .int L(P4Q6)-L(setPxQx) 1736320Sbholler .int L(P5Q6)-L(setPxQx) 1746320Sbholler .int L(P6Q6)-L(setPxQx) 1756320Sbholler .int L(P7Q6)-L(setPxQx) 1760Sstevel@tonic-gate 1776320Sbholler .int L(P0Q7)-L(setPxQx) 1786320Sbholler .int L(P1Q7)-L(setPxQx) 1796320Sbholler .int L(P2Q7)-L(setPxQx) 1806320Sbholler .int L(P3Q7)-L(setPxQx) 1816320Sbholler .int L(P4Q7)-L(setPxQx) 1826320Sbholler .int L(P5Q7)-L(setPxQx) 1836320Sbholler .int L(P6Q7)-L(setPxQx) 1846320Sbholler .int L(P7Q7)-L(setPxQx) 1850Sstevel@tonic-gate 1866320Sbholler .int L(P0Q8)-L(setPxQx) 1876320Sbholler .int L(P1Q8)-L(setPxQx) 1886320Sbholler .int L(P2Q8)-L(setPxQx) 1896320Sbholler .int L(P3Q8)-L(setPxQx) 1906320Sbholler .int L(P4Q8)-L(setPxQx) 1916320Sbholler .int L(P5Q8)-L(setPxQx) 1926320Sbholler .int L(P6Q8)-L(setPxQx) 1936320Sbholler .int L(P7Q8)-L(setPxQx) 1946320Sbholler 1956320Sbholler .int L(P0Q9)-L(setPxQx) 1966320Sbholler .int L(P1Q9)-L(setPxQx) 1976320Sbholler .int L(P2Q9)-L(setPxQx) 1986320Sbholler .int L(P3Q9)-L(setPxQx) 1996320Sbholler .int L(P4Q9)-L(setPxQx) 2006320Sbholler .int L(P5Q9)-L(setPxQx) 2016320Sbholler .int L(P6Q9)-L(setPxQx) 2026320Sbholler .int L(P7Q9)-L(setPxQx) 2030Sstevel@tonic-gate 2046320Sbholler .int L(P0QA)-L(setPxQx) 2056320Sbholler .int L(P1QA)-L(setPxQx) 2066320Sbholler .int L(P2QA)-L(setPxQx) 2076320Sbholler .int L(P3QA)-L(setPxQx) 2086320Sbholler .int L(P4QA)-L(setPxQx) 2096320Sbholler .int L(P5QA)-L(setPxQx) 2106320Sbholler .int L(P6QA)-L(setPxQx) 2116320Sbholler .int L(P7QA)-L(setPxQx) 2126320Sbholler 2136320Sbholler .int L(P0QB)-L(setPxQx) 2146320Sbholler .int L(P1QB)-L(setPxQx) 2156320Sbholler .int L(P2QB)-L(setPxQx) 2166320Sbholler .int L(P3QB)-L(setPxQx) 2176320Sbholler .int L(P4QB)-L(setPxQx) 2186320Sbholler .int L(P5QB)-L(setPxQx) 2196320Sbholler .int L(P6QB)-L(setPxQx) 2206320Sbholler .int L(P7QB)-L(setPxQx) 2210Sstevel@tonic-gate 2226320Sbholler .int L(P0QC)-L(setPxQx) 2236320Sbholler .int L(P1QC)-L(setPxQx) 2246320Sbholler .int L(P2QC)-L(setPxQx) 2256320Sbholler .int L(P3QC)-L(setPxQx) 2266320Sbholler .int L(P4QC)-L(setPxQx) 2276320Sbholler .int L(P5QC)-L(setPxQx) 2286320Sbholler .int L(P6QC)-L(setPxQx) 2296320Sbholler .int L(P7QC)-L(setPxQx) 2306320Sbholler 2316320Sbholler .int L(P0QD)-L(setPxQx) 2326320Sbholler .int L(P1QD)-L(setPxQx) 2336320Sbholler .int L(P2QD)-L(setPxQx) 2346320Sbholler .int L(P3QD)-L(setPxQx) 2356320Sbholler .int L(P4QD)-L(setPxQx) 2366320Sbholler .int L(P5QD)-L(setPxQx) 2376320Sbholler .int L(P6QD)-L(setPxQx) 2386320Sbholler .int L(P7QD)-L(setPxQx) 2390Sstevel@tonic-gate 2406320Sbholler .int L(P0QE)-L(setPxQx) # 112 2416320Sbholler .int L(P1QE)-L(setPxQx) 2426320Sbholler .int L(P2QE)-L(setPxQx) 2436320Sbholler .int L(P3QE)-L(setPxQx) 2446320Sbholler .int L(P4QE)-L(setPxQx) 2456320Sbholler .int L(P5QE)-L(setPxQx) 2466320Sbholler .int L(P6QE)-L(setPxQx) 2476320Sbholler .int L(P7QE)-L(setPxQx) 2486320Sbholler 2496320Sbholler .int L(P0QF)-L(setPxQx) #120 2506320Sbholler .int L(P1QF)-L(setPxQx) 2516320Sbholler .int L(P2QF)-L(setPxQx) 2526320Sbholler .int L(P3QF)-L(setPxQx) 2536320Sbholler .int L(P4QF)-L(setPxQx) 2546320Sbholler .int L(P5QF)-L(setPxQx) 2556320Sbholler .int L(P6QF)-L(setPxQx) 2566320Sbholler .int L(P7QF)-L(setPxQx) 2570Sstevel@tonic-gate 2586320Sbholler .int L(P0QG)-L(setPxQx) #128 2596320Sbholler .int L(P1QG)-L(setPxQx) 2606320Sbholler .int L(P2QG)-L(setPxQx) 2616320Sbholler .int L(P3QG)-L(setPxQx) 2626320Sbholler .int L(P4QG)-L(setPxQx) 2636320Sbholler .int L(P5QG)-L(setPxQx) 2646320Sbholler .int L(P6QG)-L(setPxQx) 2656320Sbholler .int L(P7QG)-L(setPxQx) 2660Sstevel@tonic-gate 2676320Sbholler .int L(P0QH)-L(setPxQx) #136 2686320Sbholler .int L(P1QH)-L(setPxQx) 2696320Sbholler .int L(P2QH)-L(setPxQx) 2706320Sbholler .int L(P3QH)-L(setPxQx) 2716320Sbholler .int L(P4QH)-L(setPxQx) 2726320Sbholler .int L(P5QH)-L(setPxQx) 2736320Sbholler .int L(P6QH)-L(setPxQx) 2746320Sbholler .int L(P7QH)-L(setPxQx) #143 2750Sstevel@tonic-gate 2766320Sbholler .balign 16 2776320SbhollerL(P1QH): mov %rdx,-0x89(%rdi) 2786320SbhollerL(P1QG): mov %rdx,-0x81(%rdi) 2796320Sbholler .balign 16 2806320SbhollerL(P1QF): mov %rdx,-0x79(%rdi) 2816320SbhollerL(P1QE): mov %rdx,-0x71(%rdi) 2826320SbhollerL(P1QD): mov %rdx,-0x69(%rdi) 2836320SbhollerL(P1QC): mov %rdx,-0x61(%rdi) 2846320SbhollerL(P1QB): mov %rdx,-0x59(%rdi) 2856320SbhollerL(P1QA): mov %rdx,-0x51(%rdi) 2866320SbhollerL(P1Q9): mov %rdx,-0x49(%rdi) 2876320SbhollerL(P1Q8): mov %rdx,-0x41(%rdi) 2886320SbhollerL(P1Q7): mov %rdx,-0x39(%rdi) 2896320SbhollerL(P1Q6): mov %rdx,-0x31(%rdi) 2906320SbhollerL(P1Q5): mov %rdx,-0x29(%rdi) 2916320SbhollerL(P1Q4): mov %rdx,-0x21(%rdi) 2926320SbhollerL(P1Q3): mov %rdx,-0x19(%rdi) 2936320SbhollerL(P1Q2): mov %rdx,-0x11(%rdi) 2946320SbhollerL(P1Q1): mov %rdx,-0x9(%rdi) 2956320SbhollerL(P1Q0): mov %dl,-0x1(%rdi) 2966320Sbholler ret 2970Sstevel@tonic-gate 2986320Sbholler .balign 16 2996320SbhollerL(P0QH): mov %rdx,-0x88(%rdi) 3006320Sbholler .balign 16 3016320SbhollerL(P0QG): mov %rdx,-0x80(%rdi) 3026320SbhollerL(P0QF): mov %rdx,-0x78(%rdi) 3036320SbhollerL(P0QE): mov %rdx,-0x70(%rdi) 3046320SbhollerL(P0QD): mov %rdx,-0x68(%rdi) 3056320SbhollerL(P0QC): mov %rdx,-0x60(%rdi) 3066320SbhollerL(P0QB): mov %rdx,-0x58(%rdi) 3076320SbhollerL(P0QA): mov %rdx,-0x50(%rdi) 3086320SbhollerL(P0Q9): mov %rdx,-0x48(%rdi) 3096320SbhollerL(P0Q8): mov %rdx,-0x40(%rdi) 3106320SbhollerL(P0Q7): mov %rdx,-0x38(%rdi) 3116320SbhollerL(P0Q6): mov %rdx,-0x30(%rdi) 3126320SbhollerL(P0Q5): mov %rdx,-0x28(%rdi) 3136320SbhollerL(P0Q4): mov %rdx,-0x20(%rdi) 3146320SbhollerL(P0Q3): mov %rdx,-0x18(%rdi) 3156320SbhollerL(P0Q2): mov %rdx,-0x10(%rdi) 3166320SbhollerL(P0Q1): mov %rdx,-0x8(%rdi) 3176320SbhollerL(P0Q0): ret 3180Sstevel@tonic-gate 3196320Sbholler .balign 16 3206320SbhollerL(P2QH): mov %rdx,-0x8a(%rdi) 3216320SbhollerL(P2QG): mov %rdx,-0x82(%rdi) 3226320Sbholler .balign 16 3236320SbhollerL(P2QF): mov %rdx,-0x7a(%rdi) 3246320SbhollerL(P2QE): mov %rdx,-0x72(%rdi) 3256320SbhollerL(P2QD): mov %rdx,-0x6a(%rdi) 3266320SbhollerL(P2QC): mov %rdx,-0x62(%rdi) 3276320SbhollerL(P2QB): mov %rdx,-0x5a(%rdi) 3286320SbhollerL(P2QA): mov %rdx,-0x52(%rdi) 3296320SbhollerL(P2Q9): mov %rdx,-0x4a(%rdi) 3306320SbhollerL(P2Q8): mov %rdx,-0x42(%rdi) 3316320SbhollerL(P2Q7): mov %rdx,-0x3a(%rdi) 3326320SbhollerL(P2Q6): mov %rdx,-0x32(%rdi) 3336320SbhollerL(P2Q5): mov %rdx,-0x2a(%rdi) 3346320SbhollerL(P2Q4): mov %rdx,-0x22(%rdi) 3356320SbhollerL(P2Q3): mov %rdx,-0x1a(%rdi) 3366320SbhollerL(P2Q2): mov %rdx,-0x12(%rdi) 3376320SbhollerL(P2Q1): mov %rdx,-0xa(%rdi) 3386320SbhollerL(P2Q0): mov %dx,-0x2(%rdi) 3396320Sbholler ret 3400Sstevel@tonic-gate 3416320Sbholler .balign 16 3426320SbhollerL(P3QH): mov %rdx,-0x8b(%rdi) 3436320SbhollerL(P3QG): mov %rdx,-0x83(%rdi) 3446320Sbholler .balign 16 3456320SbhollerL(P3QF): mov %rdx,-0x7b(%rdi) 3466320SbhollerL(P3QE): mov %rdx,-0x73(%rdi) 3476320SbhollerL(P3QD): mov %rdx,-0x6b(%rdi) 3486320SbhollerL(P3QC): mov %rdx,-0x63(%rdi) 3496320SbhollerL(P3QB): mov %rdx,-0x5b(%rdi) 3506320SbhollerL(P3QA): mov %rdx,-0x53(%rdi) 3516320SbhollerL(P3Q9): mov %rdx,-0x4b(%rdi) 3526320SbhollerL(P3Q8): mov %rdx,-0x43(%rdi) 3536320SbhollerL(P3Q7): mov %rdx,-0x3b(%rdi) 3546320SbhollerL(P3Q6): mov %rdx,-0x33(%rdi) 3556320SbhollerL(P3Q5): mov %rdx,-0x2b(%rdi) 3566320SbhollerL(P3Q4): mov %rdx,-0x23(%rdi) 3576320SbhollerL(P3Q3): mov %rdx,-0x1b(%rdi) 3586320SbhollerL(P3Q2): mov %rdx,-0x13(%rdi) 3596320SbhollerL(P3Q1): mov %rdx,-0xb(%rdi) 3606320SbhollerL(P3Q0): mov %dx,-0x3(%rdi) 3616320Sbholler mov %dl,-0x1(%rdi) 3626320Sbholler ret 3630Sstevel@tonic-gate 3646320Sbholler .balign 16 3656320SbhollerL(P4QH): mov %rdx,-0x8c(%rdi) 3666320SbhollerL(P4QG): mov %rdx,-0x84(%rdi) 3676320Sbholler .balign 16 3686320SbhollerL(P4QF): mov %rdx,-0x7c(%rdi) 3696320SbhollerL(P4QE): mov %rdx,-0x74(%rdi) 3706320SbhollerL(P4QD): mov %rdx,-0x6c(%rdi) 3716320SbhollerL(P4QC): mov %rdx,-0x64(%rdi) 3726320SbhollerL(P4QB): mov %rdx,-0x5c(%rdi) 3736320SbhollerL(P4QA): mov %rdx,-0x54(%rdi) 3746320SbhollerL(P4Q9): mov %rdx,-0x4c(%rdi) 3756320SbhollerL(P4Q8): mov %rdx,-0x44(%rdi) 3766320SbhollerL(P4Q7): mov %rdx,-0x3c(%rdi) 3776320SbhollerL(P4Q6): mov %rdx,-0x34(%rdi) 3786320SbhollerL(P4Q5): mov %rdx,-0x2c(%rdi) 3796320SbhollerL(P4Q4): mov %rdx,-0x24(%rdi) 3806320SbhollerL(P4Q3): mov %rdx,-0x1c(%rdi) 3816320SbhollerL(P4Q2): mov %rdx,-0x14(%rdi) 3826320SbhollerL(P4Q1): mov %rdx,-0xc(%rdi) 3836320SbhollerL(P4Q0): mov %edx,-0x4(%rdi) 3846320Sbholler ret 3850Sstevel@tonic-gate 3866320Sbholler .balign 16 3876320SbhollerL(P5QH): mov %rdx,-0x8d(%rdi) 3886320SbhollerL(P5QG): mov %rdx,-0x85(%rdi) 3896320Sbholler .balign 16 3906320SbhollerL(P5QF): mov %rdx,-0x7d(%rdi) 3916320SbhollerL(P5QE): mov %rdx,-0x75(%rdi) 3926320SbhollerL(P5QD): mov %rdx,-0x6d(%rdi) 3936320SbhollerL(P5QC): mov %rdx,-0x65(%rdi) 3946320SbhollerL(P5QB): mov %rdx,-0x5d(%rdi) 3956320SbhollerL(P5QA): mov %rdx,-0x55(%rdi) 3966320SbhollerL(P5Q9): mov %rdx,-0x4d(%rdi) 3976320SbhollerL(P5Q8): mov %rdx,-0x45(%rdi) 3986320SbhollerL(P5Q7): mov %rdx,-0x3d(%rdi) 3996320SbhollerL(P5Q6): mov %rdx,-0x35(%rdi) 4006320SbhollerL(P5Q5): mov %rdx,-0x2d(%rdi) 4016320SbhollerL(P5Q4): mov %rdx,-0x25(%rdi) 4026320SbhollerL(P5Q3): mov %rdx,-0x1d(%rdi) 4036320SbhollerL(P5Q2): mov %rdx,-0x15(%rdi) 4046320SbhollerL(P5Q1): mov %rdx,-0xd(%rdi) 4056320SbhollerL(P5Q0): mov %edx,-0x5(%rdi) 4066320Sbholler mov %dl,-0x1(%rdi) 4076320Sbholler ret 4080Sstevel@tonic-gate 4096320Sbholler .balign 16 4106320SbhollerL(P6QH): mov %rdx,-0x8e(%rdi) 4116320SbhollerL(P6QG): mov %rdx,-0x86(%rdi) 4126320Sbholler .balign 16 4136320SbhollerL(P6QF): mov %rdx,-0x7e(%rdi) 4146320SbhollerL(P6QE): mov %rdx,-0x76(%rdi) 4156320SbhollerL(P6QD): mov %rdx,-0x6e(%rdi) 4166320SbhollerL(P6QC): mov %rdx,-0x66(%rdi) 4176320SbhollerL(P6QB): mov %rdx,-0x5e(%rdi) 4186320SbhollerL(P6QA): mov %rdx,-0x56(%rdi) 4196320SbhollerL(P6Q9): mov %rdx,-0x4e(%rdi) 4206320SbhollerL(P6Q8): mov %rdx,-0x46(%rdi) 4216320SbhollerL(P6Q7): mov %rdx,-0x3e(%rdi) 4226320SbhollerL(P6Q6): mov %rdx,-0x36(%rdi) 4236320SbhollerL(P6Q5): mov %rdx,-0x2e(%rdi) 4246320SbhollerL(P6Q4): mov %rdx,-0x26(%rdi) 4256320SbhollerL(P6Q3): mov %rdx,-0x1e(%rdi) 4266320SbhollerL(P6Q2): mov %rdx,-0x16(%rdi) 4276320SbhollerL(P6Q1): mov %rdx,-0xe(%rdi) 4286320SbhollerL(P6Q0): mov %edx,-0x6(%rdi) 4296320Sbholler mov %dx,-0x2(%rdi) 4306320Sbholler ret 4310Sstevel@tonic-gate 4326320Sbholler .balign 16 4336320SbhollerL(P7QH): mov %rdx,-0x8f(%rdi) 4346320SbhollerL(P7QG): mov %rdx,-0x87(%rdi) 4356320Sbholler .balign 16 4366320SbhollerL(P7QF): mov %rdx,-0x7f(%rdi) 4376320SbhollerL(P7QE): mov %rdx,-0x77(%rdi) 4386320SbhollerL(P7QD): mov %rdx,-0x6f(%rdi) 4396320SbhollerL(P7QC): mov %rdx,-0x67(%rdi) 4406320SbhollerL(P7QB): mov %rdx,-0x5f(%rdi) 4416320SbhollerL(P7QA): mov %rdx,-0x57(%rdi) 4426320SbhollerL(P7Q9): mov %rdx,-0x4f(%rdi) 4436320SbhollerL(P7Q8): mov %rdx,-0x47(%rdi) 4446320SbhollerL(P7Q7): mov %rdx,-0x3f(%rdi) 4456320SbhollerL(P7Q6): mov %rdx,-0x37(%rdi) 4466320SbhollerL(P7Q5): mov %rdx,-0x2f(%rdi) 4476320SbhollerL(P7Q4): mov %rdx,-0x27(%rdi) 4486320SbhollerL(P7Q3): mov %rdx,-0x1f(%rdi) 4496320SbhollerL(P7Q2): mov %rdx,-0x17(%rdi) 4506320SbhollerL(P7Q1): mov %rdx,-0xf(%rdi) 4516320SbhollerL(P7Q0): mov %edx,-0x7(%rdi) 4526320Sbholler mov %dx,-0x3(%rdi) 4536320Sbholler mov %dl,-0x1(%rdi) 4546320Sbholler ret 4550Sstevel@tonic-gate 4566320Sbholler .balign 16 4576320SbhollerL(ck_align): 4586320Sbholler /* 4596320Sbholler * Align to 16 byte boundary first 4606320Sbholler */ 4616320Sbholler lea L(AliPxQx)(%rip),%r11 4626320Sbholler mov $0x10,%r10 4636320Sbholler mov %rdi,%r9 4646320Sbholler and $0xf,%r9 4656320Sbholler sub %r9,%r10 4666320Sbholler and $0xf,%r10 4676320Sbholler add %r10,%rdi 4686320Sbholler sub %r10,%r8 4690Sstevel@tonic-gate 4706320Sbholler movslq (%r11,%r10,4),%rcx 4716320Sbholler lea (%rcx,%r11,1),%r11 4726320Sbholler jmpq *%r11 # align dest to 16-byte boundary 4730Sstevel@tonic-gate 4746320Sbholler .balign 16 4756320SbhollerL(AliPxQx): .int L(aligned_now)-L(AliPxQx) 4766320Sbholler .int L(A1Q0)-L(AliPxQx) 4776320Sbholler .int L(A2Q0)-L(AliPxQx) 4786320Sbholler .int L(A3Q0)-L(AliPxQx) 4796320Sbholler .int L(A4Q0)-L(AliPxQx) 4806320Sbholler .int L(A5Q0)-L(AliPxQx) 4816320Sbholler .int L(A6Q0)-L(AliPxQx) 4826320Sbholler .int L(A7Q0)-L(AliPxQx) 4830Sstevel@tonic-gate 4846320Sbholler .int L(A0Q1)-L(AliPxQx) 4856320Sbholler .int L(A1Q1)-L(AliPxQx) 4866320Sbholler .int L(A2Q1)-L(AliPxQx) 4876320Sbholler .int L(A3Q1)-L(AliPxQx) 4886320Sbholler .int L(A4Q1)-L(AliPxQx) 4896320Sbholler .int L(A5Q1)-L(AliPxQx) 4906320Sbholler .int L(A6Q1)-L(AliPxQx) 4916320Sbholler .int L(A7Q1)-L(AliPxQx) 4926320Sbholler 4936320Sbholler .balign 16 4946320SbhollerL(A5Q1): mov %dl,-0xd(%rdi) 4956320SbhollerL(A4Q1): mov %edx,-0xc(%rdi) 4966320SbhollerL(A0Q1): mov %rdx,-0x8(%rdi) 4976320Sbholler jmp L(aligned_now) 4986320Sbholler 4996320Sbholler .balign 16 5006320SbhollerL(A1Q1): mov %dl,-0x9(%rdi) 5016320Sbholler mov %rdx,-0x8(%rdi) 5026320Sbholler jmp L(aligned_now) 5030Sstevel@tonic-gate 5046320Sbholler .balign 16 5056320SbhollerL(A1Q0): mov %dl,-0x1(%rdi) 5066320Sbholler jmp L(aligned_now) 5076320Sbholler 5086320Sbholler .balign 16 5096320SbhollerL(A3Q1): mov %dl,-0xb(%rdi) 5106320SbhollerL(A2Q1): mov %dx,-0xa(%rdi) 5116320Sbholler mov %rdx,-0x8(%rdi) 5126320Sbholler jmp L(aligned_now) 5130Sstevel@tonic-gate 5146320Sbholler .balign 16 5156320SbhollerL(A3Q0): mov %dl,-0x3(%rdi) 5166320SbhollerL(A2Q0): mov %dx,-0x2(%rdi) 5176320Sbholler jmp L(aligned_now) 5186320Sbholler 5196320Sbholler .balign 16 5206320SbhollerL(A5Q0): mov %dl,-0x5(%rdi) 5216320SbhollerL(A4Q0): mov %edx,-0x4(%rdi) 5226320Sbholler jmp L(aligned_now) 5230Sstevel@tonic-gate 5246320Sbholler .balign 16 5256320SbhollerL(A7Q1): mov %dl,-0xf(%rdi) 5266320SbhollerL(A6Q1): mov %dx,-0xe(%rdi) 5276320Sbholler mov %edx,-0xc(%rdi) 5286320Sbholler mov %rdx,-0x8(%rdi) 5296320Sbholler jmp L(aligned_now) 5306320Sbholler 5316320Sbholler .balign 16 5326320SbhollerL(A7Q0): mov %dl,-0x7(%rdi) 5336320SbhollerL(A6Q0): mov %dx,-0x6(%rdi) 5346320Sbholler mov %edx,-0x4(%rdi) 5356320Sbholler #jmp L(aligned_now) # Fall thru... 5360Sstevel@tonic-gate 5376320Sbholler .balign 16 5386320SbhollerL(aligned_now): 5396320Sbholler /* 5406320Sbholler * Check memops method 5416320Sbholler */ 5426320Sbholler cmpl $NO_SSE,.memops_method(%rip) 5436320Sbholler je L(Loop8byte_pre) 5440Sstevel@tonic-gate 5456320Sbholler /* 5466320Sbholler * Use SSE2 instructions 5476320Sbholler */ 5486320Sbholler movd %rdx,%xmm0 5496320Sbholler lea L(SSExDx)(%rip),%r9 # after dest alignment 5506320Sbholler punpcklqdq %xmm0,%xmm0 # fill RegXMM0 with the pattern 5516320Sbholler cmp $0xc0,%r8 # 192 5526320Sbholler jge L(byte32sse2_pre) 5536320Sbholler 5546320Sbholler add %r8,%rdi 5556320Sbholler 5566320Sbholler movslq (%r9,%r8,4),%rcx 5576320Sbholler lea (%rcx,%r9,1),%r9 5586320Sbholler jmpq *%r9 5590Sstevel@tonic-gate 5606320Sbholler .balign 16 5616320SbhollerL(SSE0QB): movdqa %xmm0,-0xb0(%rdi) 5626320SbhollerL(SSE0QA): movdqa %xmm0,-0xa0(%rdi) 5636320SbhollerL(SSE0Q9): movdqa %xmm0,-0x90(%rdi) 5646320SbhollerL(SSE0Q8): movdqa %xmm0,-0x80(%rdi) 5656320SbhollerL(SSE0Q7): movdqa %xmm0,-0x70(%rdi) 5666320SbhollerL(SSE0Q6): movdqa %xmm0,-0x60(%rdi) 5676320SbhollerL(SSE0Q5): movdqa %xmm0,-0x50(%rdi) 5686320SbhollerL(SSE0Q4): movdqa %xmm0,-0x40(%rdi) 5696320SbhollerL(SSE0Q3): movdqa %xmm0,-0x30(%rdi) 5706320SbhollerL(SSE0Q2): movdqa %xmm0,-0x20(%rdi) 5716320SbhollerL(SSE0Q1): movdqa %xmm0,-0x10(%rdi) 5726320SbhollerL(SSE0Q0): ret 5730Sstevel@tonic-gate 5746320Sbholler .balign 16 5756320SbhollerL(SSE1QB): movdqa %xmm0,-0xb1(%rdi) 5766320SbhollerL(SSE1QA): movdqa %xmm0,-0xa1(%rdi) 5776320SbhollerL(SSE1Q9): movdqa %xmm0,-0x91(%rdi) 5786320SbhollerL(SSE1Q8): movdqa %xmm0,-0x81(%rdi) 5796320SbhollerL(SSE1Q7): movdqa %xmm0,-0x71(%rdi) 5806320SbhollerL(SSE1Q6): movdqa %xmm0,-0x61(%rdi) 5816320SbhollerL(SSE1Q5): movdqa %xmm0,-0x51(%rdi) 5826320SbhollerL(SSE1Q4): movdqa %xmm0,-0x41(%rdi) 5836320SbhollerL(SSE1Q3): movdqa %xmm0,-0x31(%rdi) 5846320SbhollerL(SSE1Q2): movdqa %xmm0,-0x21(%rdi) 5856320SbhollerL(SSE1Q1): movdqa %xmm0,-0x11(%rdi) 5866320SbhollerL(SSE1Q0): mov %dl,-0x1(%rdi) 5876320Sbholler ret 5880Sstevel@tonic-gate 5896320Sbholler .balign 16 5906320SbhollerL(SSE2QB): movdqa %xmm0,-0xb2(%rdi) 5916320SbhollerL(SSE2QA): movdqa %xmm0,-0xa2(%rdi) 5926320SbhollerL(SSE2Q9): movdqa %xmm0,-0x92(%rdi) 5936320SbhollerL(SSE2Q8): movdqa %xmm0,-0x82(%rdi) 5946320SbhollerL(SSE2Q7): movdqa %xmm0,-0x72(%rdi) 5956320SbhollerL(SSE2Q6): movdqa %xmm0,-0x62(%rdi) 5966320SbhollerL(SSE2Q5): movdqa %xmm0,-0x52(%rdi) 5976320SbhollerL(SSE2Q4): movdqa %xmm0,-0x42(%rdi) 5986320SbhollerL(SSE2Q3): movdqa %xmm0,-0x32(%rdi) 5996320SbhollerL(SSE2Q2): movdqa %xmm0,-0x22(%rdi) 6006320SbhollerL(SSE2Q1): movdqa %xmm0,-0x12(%rdi) 6016320SbhollerL(SSE2Q0): mov %dx,-0x2(%rdi) 6026320Sbholler ret 6030Sstevel@tonic-gate 6046320Sbholler .balign 16 6056320SbhollerL(SSE3QB): movdqa %xmm0,-0xb3(%rdi) 6066320SbhollerL(SSE3QA): movdqa %xmm0,-0xa3(%rdi) 6076320SbhollerL(SSE3Q9): movdqa %xmm0,-0x93(%rdi) 6086320SbhollerL(SSE3Q8): movdqa %xmm0,-0x83(%rdi) 6096320SbhollerL(SSE3Q7): movdqa %xmm0,-0x73(%rdi) 6106320SbhollerL(SSE3Q6): movdqa %xmm0,-0x63(%rdi) 6116320SbhollerL(SSE3Q5): movdqa %xmm0,-0x53(%rdi) 6126320SbhollerL(SSE3Q4): movdqa %xmm0,-0x43(%rdi) 6136320SbhollerL(SSE3Q3): movdqa %xmm0,-0x33(%rdi) 6146320SbhollerL(SSE3Q2): movdqa %xmm0,-0x23(%rdi) 6156320SbhollerL(SSE3Q1): movdqa %xmm0,-0x13(%rdi) 6166320SbhollerL(SSE3Q0): mov %dx,-0x3(%rdi) 6176320Sbholler mov %dl,-0x1(%rdi) 6186320Sbholler ret 6190Sstevel@tonic-gate 6206320Sbholler .balign 16 6216320SbhollerL(SSE4QB): movdqa %xmm0,-0xb4(%rdi) 6226320SbhollerL(SSE4QA): movdqa %xmm0,-0xa4(%rdi) 6236320SbhollerL(SSE4Q9): movdqa %xmm0,-0x94(%rdi) 6246320SbhollerL(SSE4Q8): movdqa %xmm0,-0x84(%rdi) 6256320SbhollerL(SSE4Q7): movdqa %xmm0,-0x74(%rdi) 6266320SbhollerL(SSE4Q6): movdqa %xmm0,-0x64(%rdi) 6276320SbhollerL(SSE4Q5): movdqa %xmm0,-0x54(%rdi) 6286320SbhollerL(SSE4Q4): movdqa %xmm0,-0x44(%rdi) 6296320SbhollerL(SSE4Q3): movdqa %xmm0,-0x34(%rdi) 6306320SbhollerL(SSE4Q2): movdqa %xmm0,-0x24(%rdi) 6316320SbhollerL(SSE4Q1): movdqa %xmm0,-0x14(%rdi) 6326320SbhollerL(SSE4Q0): mov %edx,-0x4(%rdi) 6336320Sbholler ret 6340Sstevel@tonic-gate 6356320Sbholler .balign 16 6366320SbhollerL(SSE5QB): movdqa %xmm0,-0xb5(%rdi) 6376320SbhollerL(SSE5QA): movdqa %xmm0,-0xa5(%rdi) 6386320SbhollerL(SSE5Q9): movdqa %xmm0,-0x95(%rdi) 6396320SbhollerL(SSE5Q8): movdqa %xmm0,-0x85(%rdi) 6406320SbhollerL(SSE5Q7): movdqa %xmm0,-0x75(%rdi) 6416320SbhollerL(SSE5Q6): movdqa %xmm0,-0x65(%rdi) 6426320SbhollerL(SSE5Q5): movdqa %xmm0,-0x55(%rdi) 6436320SbhollerL(SSE5Q4): movdqa %xmm0,-0x45(%rdi) 6446320SbhollerL(SSE5Q3): movdqa %xmm0,-0x35(%rdi) 6456320SbhollerL(SSE5Q2): movdqa %xmm0,-0x25(%rdi) 6466320SbhollerL(SSE5Q1): movdqa %xmm0,-0x15(%rdi) 6476320SbhollerL(SSE5Q0): mov %edx,-0x5(%rdi) 6486320Sbholler mov %dl,-0x1(%rdi) 6496320Sbholler ret 6500Sstevel@tonic-gate 6516320Sbholler .balign 16 6526320SbhollerL(SSE6QB): movdqa %xmm0,-0xb6(%rdi) 6536320SbhollerL(SSE6QA): movdqa %xmm0,-0xa6(%rdi) 6546320SbhollerL(SSE6Q9): movdqa %xmm0,-0x96(%rdi) 6556320SbhollerL(SSE6Q8): movdqa %xmm0,-0x86(%rdi) 6566320SbhollerL(SSE6Q7): movdqa %xmm0,-0x76(%rdi) 6576320SbhollerL(SSE6Q6): movdqa %xmm0,-0x66(%rdi) 6586320SbhollerL(SSE6Q5): movdqa %xmm0,-0x56(%rdi) 6596320SbhollerL(SSE6Q4): movdqa %xmm0,-0x46(%rdi) 6606320SbhollerL(SSE6Q3): movdqa %xmm0,-0x36(%rdi) 6616320SbhollerL(SSE6Q2): movdqa %xmm0,-0x26(%rdi) 6626320SbhollerL(SSE6Q1): movdqa %xmm0,-0x16(%rdi) 6636320SbhollerL(SSE6Q0): mov %edx,-0x6(%rdi) 6646320Sbholler mov %dx,-0x2(%rdi) 6656320Sbholler ret 6660Sstevel@tonic-gate 6676320Sbholler .balign 16 6686320SbhollerL(SSE7QB): movdqa %xmm0,-0xb7(%rdi) 6696320SbhollerL(SSE7QA): movdqa %xmm0,-0xa7(%rdi) 6706320SbhollerL(SSE7Q9): movdqa %xmm0,-0x97(%rdi) 6716320SbhollerL(SSE7Q8): movdqa %xmm0,-0x87(%rdi) 6726320SbhollerL(SSE7Q7): movdqa %xmm0,-0x77(%rdi) 6736320SbhollerL(SSE7Q6): movdqa %xmm0,-0x67(%rdi) 6746320SbhollerL(SSE7Q5): movdqa %xmm0,-0x57(%rdi) 6756320SbhollerL(SSE7Q4): movdqa %xmm0,-0x47(%rdi) 6766320SbhollerL(SSE7Q3): movdqa %xmm0,-0x37(%rdi) 6776320SbhollerL(SSE7Q2): movdqa %xmm0,-0x27(%rdi) 6786320SbhollerL(SSE7Q1): movdqa %xmm0,-0x17(%rdi) 6796320SbhollerL(SSE7Q0): mov %edx,-0x7(%rdi) 6806320Sbholler mov %dx,-0x3(%rdi) 6816320Sbholler mov %dl,-0x1(%rdi) 6826320Sbholler ret 6836320Sbholler 6846320Sbholler .balign 16 6856320SbhollerL(SSE8QB): movdqa %xmm0,-0xb8(%rdi) 6866320SbhollerL(SSE8QA): movdqa %xmm0,-0xa8(%rdi) 6876320SbhollerL(SSE8Q9): movdqa %xmm0,-0x98(%rdi) 6886320SbhollerL(SSE8Q8): movdqa %xmm0,-0x88(%rdi) 6896320SbhollerL(SSE8Q7): movdqa %xmm0,-0x78(%rdi) 6906320SbhollerL(SSE8Q6): movdqa %xmm0,-0x68(%rdi) 6916320SbhollerL(SSE8Q5): movdqa %xmm0,-0x58(%rdi) 6926320SbhollerL(SSE8Q4): movdqa %xmm0,-0x48(%rdi) 6936320SbhollerL(SSE8Q3): movdqa %xmm0,-0x38(%rdi) 6946320SbhollerL(SSE8Q2): movdqa %xmm0,-0x28(%rdi) 6956320SbhollerL(SSE8Q1): movdqa %xmm0,-0x18(%rdi) 6966320SbhollerL(SSE8Q0): mov %rdx,-0x8(%rdi) 6976320Sbholler ret 6980Sstevel@tonic-gate 6996320Sbholler .balign 16 7006320SbhollerL(SSE9QB): movdqa %xmm0,-0xb9(%rdi) 7016320SbhollerL(SSE9QA): movdqa %xmm0,-0xa9(%rdi) 7026320SbhollerL(SSE9Q9): movdqa %xmm0,-0x99(%rdi) 7036320SbhollerL(SSE9Q8): movdqa %xmm0,-0x89(%rdi) 7046320SbhollerL(SSE9Q7): movdqa %xmm0,-0x79(%rdi) 7056320SbhollerL(SSE9Q6): movdqa %xmm0,-0x69(%rdi) 7066320SbhollerL(SSE9Q5): movdqa %xmm0,-0x59(%rdi) 7076320SbhollerL(SSE9Q4): movdqa %xmm0,-0x49(%rdi) 7086320SbhollerL(SSE9Q3): movdqa %xmm0,-0x39(%rdi) 7096320SbhollerL(SSE9Q2): movdqa %xmm0,-0x29(%rdi) 7106320SbhollerL(SSE9Q1): movdqa %xmm0,-0x19(%rdi) 7116320SbhollerL(SSE9Q0): mov %rdx,-0x9(%rdi) 7126320Sbholler mov %dl,-0x1(%rdi) 7136320Sbholler ret 7140Sstevel@tonic-gate 7156320Sbholler .balign 16 7166320SbhollerL(SSE10QB): movdqa %xmm0,-0xba(%rdi) 7176320SbhollerL(SSE10QA): movdqa %xmm0,-0xaa(%rdi) 7186320SbhollerL(SSE10Q9): movdqa %xmm0,-0x9a(%rdi) 7196320SbhollerL(SSE10Q8): movdqa %xmm0,-0x8a(%rdi) 7206320SbhollerL(SSE10Q7): movdqa %xmm0,-0x7a(%rdi) 7216320SbhollerL(SSE10Q6): movdqa %xmm0,-0x6a(%rdi) 7226320SbhollerL(SSE10Q5): movdqa %xmm0,-0x5a(%rdi) 7236320SbhollerL(SSE10Q4): movdqa %xmm0,-0x4a(%rdi) 7246320SbhollerL(SSE10Q3): movdqa %xmm0,-0x3a(%rdi) 7256320SbhollerL(SSE10Q2): movdqa %xmm0,-0x2a(%rdi) 7266320SbhollerL(SSE10Q1): movdqa %xmm0,-0x1a(%rdi) 7276320SbhollerL(SSE10Q0): mov %rdx,-0xa(%rdi) 7286320Sbholler mov %dx,-0x2(%rdi) 7296320Sbholler ret 7300Sstevel@tonic-gate 7316320Sbholler .balign 16 7326320SbhollerL(SSE11QB): movdqa %xmm0,-0xbb(%rdi) 7336320SbhollerL(SSE11QA): movdqa %xmm0,-0xab(%rdi) 7346320SbhollerL(SSE11Q9): movdqa %xmm0,-0x9b(%rdi) 7356320SbhollerL(SSE11Q8): movdqa %xmm0,-0x8b(%rdi) 7366320SbhollerL(SSE11Q7): movdqa %xmm0,-0x7b(%rdi) 7376320SbhollerL(SSE11Q6): movdqa %xmm0,-0x6b(%rdi) 7386320SbhollerL(SSE11Q5): movdqa %xmm0,-0x5b(%rdi) 7396320SbhollerL(SSE11Q4): movdqa %xmm0,-0x4b(%rdi) 7406320SbhollerL(SSE11Q3): movdqa %xmm0,-0x3b(%rdi) 7416320SbhollerL(SSE11Q2): movdqa %xmm0,-0x2b(%rdi) 7426320SbhollerL(SSE11Q1): movdqa %xmm0,-0x1b(%rdi) 7436320SbhollerL(SSE11Q0): mov %rdx,-0xb(%rdi) 7446320Sbholler mov %dx,-0x3(%rdi) 7456320Sbholler mov %dl,-0x1(%rdi) 7466320Sbholler ret 7470Sstevel@tonic-gate 7486320Sbholler .balign 16 7496320SbhollerL(SSE12QB): movdqa %xmm0,-0xbc(%rdi) 7506320SbhollerL(SSE12QA): movdqa %xmm0,-0xac(%rdi) 7516320SbhollerL(SSE12Q9): movdqa %xmm0,-0x9c(%rdi) 7526320SbhollerL(SSE12Q8): movdqa %xmm0,-0x8c(%rdi) 7536320SbhollerL(SSE12Q7): movdqa %xmm0,-0x7c(%rdi) 7546320SbhollerL(SSE12Q6): movdqa %xmm0,-0x6c(%rdi) 7556320SbhollerL(SSE12Q5): movdqa %xmm0,-0x5c(%rdi) 7566320SbhollerL(SSE12Q4): movdqa %xmm0,-0x4c(%rdi) 7576320SbhollerL(SSE12Q3): movdqa %xmm0,-0x3c(%rdi) 7586320SbhollerL(SSE12Q2): movdqa %xmm0,-0x2c(%rdi) 7596320SbhollerL(SSE12Q1): movdqa %xmm0,-0x1c(%rdi) 7606320SbhollerL(SSE12Q0): mov %rdx,-0xc(%rdi) 7616320Sbholler mov %edx,-0x4(%rdi) 7626320Sbholler ret 7630Sstevel@tonic-gate 7646320Sbholler .balign 16 7656320SbhollerL(SSE13QB): movdqa %xmm0,-0xbd(%rdi) 7666320SbhollerL(SSE13QA): movdqa %xmm0,-0xad(%rdi) 7676320SbhollerL(SSE13Q9): movdqa %xmm0,-0x9d(%rdi) 7686320SbhollerL(SSE13Q8): movdqa %xmm0,-0x8d(%rdi) 7696320SbhollerL(SSE13Q7): movdqa %xmm0,-0x7d(%rdi) 7706320SbhollerL(SSE13Q6): movdqa %xmm0,-0x6d(%rdi) 7716320SbhollerL(SSE13Q5): movdqa %xmm0,-0x5d(%rdi) 7726320SbhollerL(SSE13Q4): movdqa %xmm0,-0x4d(%rdi) 7736320SbhollerL(SSE13Q3): movdqa %xmm0,-0x3d(%rdi) 7746320SbhollerL(SSE13Q2): movdqa %xmm0,-0x2d(%rdi) 7756320SbhollerL(SSE13Q1): movdqa %xmm0,-0x1d(%rdi) 7766320SbhollerL(SSE13Q0): mov %rdx,-0xd(%rdi) 7776320Sbholler mov %edx,-0x5(%rdi) 7786320Sbholler mov %dl,-0x1(%rdi) 7796320Sbholler ret 7800Sstevel@tonic-gate 7816320Sbholler .balign 16 7826320SbhollerL(SSE14QB): movdqa %xmm0,-0xbe(%rdi) 7836320SbhollerL(SSE14QA): movdqa %xmm0,-0xae(%rdi) 7846320SbhollerL(SSE14Q9): movdqa %xmm0,-0x9e(%rdi) 7856320SbhollerL(SSE14Q8): movdqa %xmm0,-0x8e(%rdi) 7866320SbhollerL(SSE14Q7): movdqa %xmm0,-0x7e(%rdi) 7876320SbhollerL(SSE14Q6): movdqa %xmm0,-0x6e(%rdi) 7886320SbhollerL(SSE14Q5): movdqa %xmm0,-0x5e(%rdi) 7896320SbhollerL(SSE14Q4): movdqa %xmm0,-0x4e(%rdi) 7906320SbhollerL(SSE14Q3): movdqa %xmm0,-0x3e(%rdi) 7916320SbhollerL(SSE14Q2): movdqa %xmm0,-0x2e(%rdi) 7926320SbhollerL(SSE14Q1): movdqa %xmm0,-0x1e(%rdi) 7936320SbhollerL(SSE14Q0): mov %rdx,-0xe(%rdi) 7946320Sbholler mov %edx,-0x6(%rdi) 7956320Sbholler mov %dx,-0x2(%rdi) 7966320Sbholler ret 7970Sstevel@tonic-gate 7986320Sbholler .balign 16 7996320SbhollerL(SSE15QB): movdqa %xmm0,-0xbf(%rdi) 8006320SbhollerL(SSE15QA): movdqa %xmm0,-0xaf(%rdi) 8016320SbhollerL(SSE15Q9): movdqa %xmm0,-0x9f(%rdi) 8026320SbhollerL(SSE15Q8): movdqa %xmm0,-0x8f(%rdi) 8036320SbhollerL(SSE15Q7): movdqa %xmm0,-0x7f(%rdi) 8046320SbhollerL(SSE15Q6): movdqa %xmm0,-0x6f(%rdi) 8056320SbhollerL(SSE15Q5): movdqa %xmm0,-0x5f(%rdi) 8066320SbhollerL(SSE15Q4): movdqa %xmm0,-0x4f(%rdi) 8076320SbhollerL(SSE15Q3): movdqa %xmm0,-0x3f(%rdi) 8086320SbhollerL(SSE15Q2): movdqa %xmm0,-0x2f(%rdi) 8096320SbhollerL(SSE15Q1): movdqa %xmm0,-0x1f(%rdi) 8106320SbhollerL(SSE15Q0): mov %rdx,-0xf(%rdi) 8116320Sbholler mov %edx,-0x7(%rdi) 8126320Sbholler mov %dx,-0x3(%rdi) 8136320Sbholler mov %dl,-0x1(%rdi) 8146320Sbholler ret 8150Sstevel@tonic-gate 8166320Sbholler .balign 16 8176320SbhollerL(byte32sse2_pre): 8186320Sbholler mov .largest_level_cache_size(%rip),%r9d 8196320Sbholler cmp %r9,%r8 8206320Sbholler jg L(sse2_nt_move) 8216320Sbholler #jmp L(byte32sse2) # Fall thru... 8226320Sbholler 8236320Sbholler .balign 16 8246320SbhollerL(byte32sse2): 8256320Sbholler lea -0x80(%r8),%r8 # 128 8266320Sbholler cmp $0x80,%r8 8276320Sbholler movdqa %xmm0,(%rdi) 8286320Sbholler movdqa %xmm0,0x10(%rdi) 8296320Sbholler movdqa %xmm0,0x20(%rdi) 8306320Sbholler movdqa %xmm0,0x30(%rdi) 8316320Sbholler movdqa %xmm0,0x40(%rdi) 8326320Sbholler movdqa %xmm0,0x50(%rdi) 8336320Sbholler movdqa %xmm0,0x60(%rdi) 8346320Sbholler movdqa %xmm0,0x70(%rdi) 8350Sstevel@tonic-gate 8366320Sbholler lea 0x80(%rdi),%rdi 8376320Sbholler jge L(byte32sse2) 8386320Sbholler 8396320Sbholler lea L(SSExDx)(%rip),%r11 8406320Sbholler add %r8,%rdi 8416320Sbholler movslq (%r11,%r8,4),%rcx 8426320Sbholler lea (%rcx,%r11,1),%r11 8436320Sbholler jmpq *%r11 8440Sstevel@tonic-gate 8456320Sbholler .balign 16 8466320SbhollerL(sse2_nt_move): 8476320Sbholler sub $0x80,%r8 # 128 8486320Sbholler movntdq %xmm0,(%rdi) 8496320Sbholler movntdq %xmm0,0x10(%rdi) 8506320Sbholler movntdq %xmm0,0x20(%rdi) 8516320Sbholler movntdq %xmm0,0x30(%rdi) 8526320Sbholler movntdq %xmm0,0x40(%rdi) 8536320Sbholler movntdq %xmm0,0x50(%rdi) 8546320Sbholler movntdq %xmm0,0x60(%rdi) 8556320Sbholler movntdq %xmm0,0x70(%rdi) 8566320Sbholler add $0x80,%rdi 8576320Sbholler cmp $0x80,%r8 8586320Sbholler jge L(sse2_nt_move) 8590Sstevel@tonic-gate 8606320Sbholler sfence 8616320Sbholler lea L(SSExDx)(%rip),%r11 8626320Sbholler add %r8,%rdi 8636320Sbholler movslq (%r11,%r8,4),%rcx 8646320Sbholler lea (%rcx,%r11,1),%r11 8656320Sbholler jmpq *%r11 8660Sstevel@tonic-gate 8676320Sbholler /* 8686320Sbholler * Don't use SSE 8696320Sbholler */ 8706320Sbholler .balign 16 8716320SbhollerL(Loop8byte_pre): 8726320Sbholler mov .largest_level_cache_size(%rip),%r9d 8736320Sbholler cmp %r9,%r8 8746320Sbholler jg L(Loop8byte_nt_move) 8756320Sbholler cmp $0x800,%r8 # Use rep sstoq 8766320Sbholler jge L(use_rep) 8770Sstevel@tonic-gate 8786320Sbholler .balign 16 8796320SbhollerL(Loop8byte): 8806320Sbholler lea -0x80(%r8),%r8 # 128 8816320Sbholler mov %rdx,(%rdi) 8826320Sbholler mov %rdx,0x8(%rdi) 8836320Sbholler mov %rdx,0x10(%rdi) 8846320Sbholler mov %rdx,0x18(%rdi) 8856320Sbholler mov %rdx,0x20(%rdi) 8866320Sbholler mov %rdx,0x28(%rdi) 8876320Sbholler mov %rdx,0x30(%rdi) 8886320Sbholler mov %rdx,0x38(%rdi) 8896320Sbholler cmp $0x80,%r8 8906320Sbholler mov %rdx,0x40(%rdi) 8916320Sbholler mov %rdx,0x48(%rdi) 8926320Sbholler mov %rdx,0x50(%rdi) 8936320Sbholler mov %rdx,0x58(%rdi) 8946320Sbholler mov %rdx,0x60(%rdi) 8956320Sbholler mov %rdx,0x68(%rdi) 8966320Sbholler mov %rdx,0x70(%rdi) 8976320Sbholler mov %rdx,0x78(%rdi) 8986320Sbholler lea 0x80(%rdi),%rdi 8996320Sbholler jge L(Loop8byte) 9006320Sbholler 9016320Sbholler1: 9026320Sbholler lea L(setPxQx)(%rip),%r11 9036320Sbholler lea (%rdi,%r8,1),%rdi 9046320Sbholler 9056320Sbholler movslq (%r11,%r8,4),%rcx 9066320Sbholler lea (%rcx,%r11,1),%r11 9076320Sbholler jmpq *%r11 9080Sstevel@tonic-gate 9096320Sbholler /* 9106320Sbholler * Use rep sstoq for sizes > 2K 9116320Sbholler */ 9126320Sbholler .balign 16 9136320SbhollerL(use_rep): 9146320Sbholler movq %r8,%rcx # get size in bytes 9156320Sbholler xchg %rax,%rdx 9166320Sbholler shrq $3,%rcx 9176320Sbholler rep 9186320Sbholler sstoq 9196320Sbholler xchg %rax,%rdx 9206320Sbholler andq $7,%r8 # remaining bytes 9216320Sbholler jnz 1b 9226320Sbholler ret 9230Sstevel@tonic-gate 9246320Sbholler .balign 16 9256320SbhollerL(Loop8byte_nt_move): 926*10024Sbostrovs lea -0x80(%r8),%r8 # 128 9276320Sbholler movnti %rdx,(%rdi) 9286320Sbholler movnti %rdx,0x8(%rdi) 9296320Sbholler movnti %rdx,0x10(%rdi) 9306320Sbholler movnti %rdx,0x18(%rdi) 9316320Sbholler movnti %rdx,0x20(%rdi) 9326320Sbholler movnti %rdx,0x28(%rdi) 9336320Sbholler movnti %rdx,0x30(%rdi) 9346320Sbholler movnti %rdx,0x38(%rdi) 935*10024Sbostrovs cmp $0x80,%r8 936*10024Sbostrovs movnti %rdx,0x40(%rdi) 937*10024Sbostrovs movnti %rdx,0x48(%rdi) 938*10024Sbostrovs movnti %rdx,0x50(%rdi) 939*10024Sbostrovs movnti %rdx,0x58(%rdi) 940*10024Sbostrovs movnti %rdx,0x60(%rdi) 941*10024Sbostrovs movnti %rdx,0x68(%rdi) 942*10024Sbostrovs movnti %rdx,0x70(%rdi) 943*10024Sbostrovs movnti %rdx,0x78(%rdi) 944*10024Sbostrovs lea 0x80(%rdi),%rdi 9456320Sbholler jge L(Loop8byte_nt_move) 9466320Sbholler 9476320Sbholler sfence 9486320Sbholler lea L(setPxQx)(%rip),%r11 9496320Sbholler lea (%rdi,%r8,1),%rdi 9506320Sbholler 9516320Sbholler movslq (%r11,%r8,4),%rcx 9526320Sbholler lea (%rcx,%r11,1),%r11 9536320Sbholler jmpq *%r11 9540Sstevel@tonic-gate 9556320Sbholler .balign 16 9566320SbhollerL(SSExDx): .int L(SSE0Q0) -L(SSExDx) 9576320Sbholler .int L(SSE1Q0) -L(SSExDx) 9586320Sbholler .int L(SSE2Q0) -L(SSExDx) 9596320Sbholler .int L(SSE3Q0) -L(SSExDx) 9606320Sbholler .int L(SSE4Q0) -L(SSExDx) 9616320Sbholler .int L(SSE5Q0) -L(SSExDx) 9626320Sbholler .int L(SSE6Q0) -L(SSExDx) 9636320Sbholler .int L(SSE7Q0) -L(SSExDx) 9640Sstevel@tonic-gate 9656320Sbholler .int L(SSE8Q0) -L(SSExDx) 9666320Sbholler .int L(SSE9Q0) -L(SSExDx) 9676320Sbholler .int L(SSE10Q0)-L(SSExDx) 9686320Sbholler .int L(SSE11Q0)-L(SSExDx) 9696320Sbholler .int L(SSE12Q0)-L(SSExDx) 9706320Sbholler .int L(SSE13Q0)-L(SSExDx) 9716320Sbholler .int L(SSE14Q0)-L(SSExDx) 9726320Sbholler .int L(SSE15Q0)-L(SSExDx) 9730Sstevel@tonic-gate 9746320Sbholler .int L(SSE0Q1) -L(SSExDx) 9756320Sbholler .int L(SSE1Q1) -L(SSExDx) 9766320Sbholler .int L(SSE2Q1) -L(SSExDx) 9776320Sbholler .int L(SSE3Q1) -L(SSExDx) 9786320Sbholler .int L(SSE4Q1) -L(SSExDx) 9796320Sbholler .int L(SSE5Q1) -L(SSExDx) 9806320Sbholler .int L(SSE6Q1) -L(SSExDx) 9816320Sbholler .int L(SSE7Q1) -L(SSExDx) 9820Sstevel@tonic-gate 9836320Sbholler .int L(SSE8Q1) -L(SSExDx) 9846320Sbholler .int L(SSE9Q1) -L(SSExDx) 9856320Sbholler .int L(SSE10Q1)-L(SSExDx) 9866320Sbholler .int L(SSE11Q1)-L(SSExDx) 9876320Sbholler .int L(SSE12Q1)-L(SSExDx) 9886320Sbholler .int L(SSE13Q1)-L(SSExDx) 9896320Sbholler .int L(SSE14Q1)-L(SSExDx) 9906320Sbholler .int L(SSE15Q1)-L(SSExDx) 9910Sstevel@tonic-gate 9926320Sbholler .int L(SSE0Q2) -L(SSExDx) 9936320Sbholler .int L(SSE1Q2) -L(SSExDx) 9946320Sbholler .int L(SSE2Q2) -L(SSExDx) 9956320Sbholler .int L(SSE3Q2) -L(SSExDx) 9966320Sbholler .int L(SSE4Q2) -L(SSExDx) 9976320Sbholler .int L(SSE5Q2) -L(SSExDx) 9986320Sbholler .int L(SSE6Q2) -L(SSExDx) 9996320Sbholler .int L(SSE7Q2) -L(SSExDx) 10000Sstevel@tonic-gate 10016320Sbholler .int L(SSE8Q2) -L(SSExDx) 10026320Sbholler .int L(SSE9Q2) -L(SSExDx) 10036320Sbholler .int L(SSE10Q2)-L(SSExDx) 10046320Sbholler .int L(SSE11Q2)-L(SSExDx) 10056320Sbholler .int L(SSE12Q2)-L(SSExDx) 10066320Sbholler .int L(SSE13Q2)-L(SSExDx) 10076320Sbholler .int L(SSE14Q2)-L(SSExDx) 10086320Sbholler .int L(SSE15Q2)-L(SSExDx) 10096320Sbholler 10106320Sbholler .int L(SSE0Q3) -L(SSExDx) 10116320Sbholler .int L(SSE1Q3) -L(SSExDx) 10126320Sbholler .int L(SSE2Q3) -L(SSExDx) 10136320Sbholler .int L(SSE3Q3) -L(SSExDx) 10146320Sbholler .int L(SSE4Q3) -L(SSExDx) 10156320Sbholler .int L(SSE5Q3) -L(SSExDx) 10166320Sbholler .int L(SSE6Q3) -L(SSExDx) 10176320Sbholler .int L(SSE7Q3) -L(SSExDx) 10186320Sbholler 10196320Sbholler .int L(SSE8Q3) -L(SSExDx) 10206320Sbholler .int L(SSE9Q3) -L(SSExDx) 10216320Sbholler .int L(SSE10Q3)-L(SSExDx) 10226320Sbholler .int L(SSE11Q3)-L(SSExDx) 10236320Sbholler .int L(SSE12Q3)-L(SSExDx) 10246320Sbholler .int L(SSE13Q3)-L(SSExDx) 10256320Sbholler .int L(SSE14Q3)-L(SSExDx) 10266320Sbholler .int L(SSE15Q3)-L(SSExDx) 10270Sstevel@tonic-gate 10286320Sbholler .int L(SSE0Q4) -L(SSExDx) 10296320Sbholler .int L(SSE1Q4) -L(SSExDx) 10306320Sbholler .int L(SSE2Q4) -L(SSExDx) 10316320Sbholler .int L(SSE3Q4) -L(SSExDx) 10326320Sbholler .int L(SSE4Q4) -L(SSExDx) 10336320Sbholler .int L(SSE5Q4) -L(SSExDx) 10346320Sbholler .int L(SSE6Q4) -L(SSExDx) 10356320Sbholler .int L(SSE7Q4) -L(SSExDx) 10366320Sbholler 10376320Sbholler .int L(SSE8Q4) -L(SSExDx) 10386320Sbholler .int L(SSE9Q4) -L(SSExDx) 10396320Sbholler .int L(SSE10Q4)-L(SSExDx) 10406320Sbholler .int L(SSE11Q4)-L(SSExDx) 10416320Sbholler .int L(SSE12Q4)-L(SSExDx) 10426320Sbholler .int L(SSE13Q4)-L(SSExDx) 10436320Sbholler .int L(SSE14Q4)-L(SSExDx) 10446320Sbholler .int L(SSE15Q4)-L(SSExDx) 10456320Sbholler 10466320Sbholler .int L(SSE0Q5) -L(SSExDx) 10476320Sbholler .int L(SSE1Q5) -L(SSExDx) 10486320Sbholler .int L(SSE2Q5) -L(SSExDx) 10496320Sbholler .int L(SSE3Q5) -L(SSExDx) 10506320Sbholler .int L(SSE4Q5) -L(SSExDx) 10516320Sbholler .int L(SSE5Q5) -L(SSExDx) 10526320Sbholler .int L(SSE6Q5) -L(SSExDx) 10536320Sbholler .int L(SSE7Q5) -L(SSExDx) 10546320Sbholler 10556320Sbholler .int L(SSE8Q5) -L(SSExDx) 10566320Sbholler .int L(SSE9Q5) -L(SSExDx) 10576320Sbholler .int L(SSE10Q5)-L(SSExDx) 10586320Sbholler .int L(SSE11Q5)-L(SSExDx) 10596320Sbholler .int L(SSE12Q5)-L(SSExDx) 10606320Sbholler .int L(SSE13Q5)-L(SSExDx) 10616320Sbholler .int L(SSE14Q5)-L(SSExDx) 10626320Sbholler .int L(SSE15Q5)-L(SSExDx) 10630Sstevel@tonic-gate 10646320Sbholler .int L(SSE0Q6) -L(SSExDx) 10656320Sbholler .int L(SSE1Q6) -L(SSExDx) 10666320Sbholler .int L(SSE2Q6) -L(SSExDx) 10676320Sbholler .int L(SSE3Q6) -L(SSExDx) 10686320Sbholler .int L(SSE4Q6) -L(SSExDx) 10696320Sbholler .int L(SSE5Q6) -L(SSExDx) 10706320Sbholler .int L(SSE6Q6) -L(SSExDx) 10716320Sbholler .int L(SSE7Q6) -L(SSExDx) 10726320Sbholler 10736320Sbholler .int L(SSE8Q6) -L(SSExDx) 10746320Sbholler .int L(SSE9Q6) -L(SSExDx) 10756320Sbholler .int L(SSE10Q6)-L(SSExDx) 10766320Sbholler .int L(SSE11Q6)-L(SSExDx) 10776320Sbholler .int L(SSE12Q6)-L(SSExDx) 10786320Sbholler .int L(SSE13Q6)-L(SSExDx) 10796320Sbholler .int L(SSE14Q6)-L(SSExDx) 10806320Sbholler .int L(SSE15Q6)-L(SSExDx) 10816320Sbholler 10826320Sbholler .int L(SSE0Q7) -L(SSExDx) 10836320Sbholler .int L(SSE1Q7) -L(SSExDx) 10846320Sbholler .int L(SSE2Q7) -L(SSExDx) 10856320Sbholler .int L(SSE3Q7) -L(SSExDx) 10866320Sbholler .int L(SSE4Q7) -L(SSExDx) 10876320Sbholler .int L(SSE5Q7) -L(SSExDx) 10886320Sbholler .int L(SSE6Q7) -L(SSExDx) 10896320Sbholler .int L(SSE7Q7) -L(SSExDx) 10906320Sbholler 10916320Sbholler .int L(SSE8Q7) -L(SSExDx) 10926320Sbholler .int L(SSE9Q7) -L(SSExDx) 10936320Sbholler .int L(SSE10Q7)-L(SSExDx) 10946320Sbholler .int L(SSE11Q7)-L(SSExDx) 10956320Sbholler .int L(SSE12Q7)-L(SSExDx) 10966320Sbholler .int L(SSE13Q7)-L(SSExDx) 10976320Sbholler .int L(SSE14Q7)-L(SSExDx) 10986320Sbholler .int L(SSE15Q7)-L(SSExDx) 10990Sstevel@tonic-gate 11006320Sbholler .int L(SSE0Q8) -L(SSExDx) 11016320Sbholler .int L(SSE1Q8) -L(SSExDx) 11026320Sbholler .int L(SSE2Q8) -L(SSExDx) 11036320Sbholler .int L(SSE3Q8) -L(SSExDx) 11046320Sbholler .int L(SSE4Q8) -L(SSExDx) 11056320Sbholler .int L(SSE5Q8) -L(SSExDx) 11066320Sbholler .int L(SSE6Q8) -L(SSExDx) 11076320Sbholler .int L(SSE7Q8) -L(SSExDx) 11086320Sbholler 11096320Sbholler .int L(SSE8Q8) -L(SSExDx) 11106320Sbholler .int L(SSE9Q8) -L(SSExDx) 11116320Sbholler .int L(SSE10Q8)-L(SSExDx) 11126320Sbholler .int L(SSE11Q8)-L(SSExDx) 11136320Sbholler .int L(SSE12Q8)-L(SSExDx) 11146320Sbholler .int L(SSE13Q8)-L(SSExDx) 11156320Sbholler .int L(SSE14Q8)-L(SSExDx) 11166320Sbholler .int L(SSE15Q8)-L(SSExDx) 11176320Sbholler 11186320Sbholler .int L(SSE0Q9) -L(SSExDx) 11196320Sbholler .int L(SSE1Q9) -L(SSExDx) 11206320Sbholler .int L(SSE2Q9) -L(SSExDx) 11216320Sbholler .int L(SSE3Q9) -L(SSExDx) 11226320Sbholler .int L(SSE4Q9) -L(SSExDx) 11236320Sbholler .int L(SSE5Q9) -L(SSExDx) 11246320Sbholler .int L(SSE6Q9) -L(SSExDx) 11256320Sbholler .int L(SSE7Q9) -L(SSExDx) 11266320Sbholler 11276320Sbholler .int L(SSE8Q9) -L(SSExDx) 11286320Sbholler .int L(SSE9Q9) -L(SSExDx) 11296320Sbholler .int L(SSE10Q9)-L(SSExDx) 11306320Sbholler .int L(SSE11Q9)-L(SSExDx) 11316320Sbholler .int L(SSE12Q9)-L(SSExDx) 11326320Sbholler .int L(SSE13Q9)-L(SSExDx) 11336320Sbholler .int L(SSE14Q9)-L(SSExDx) 11346320Sbholler .int L(SSE15Q9)-L(SSExDx) 11350Sstevel@tonic-gate 11366320Sbholler .int L(SSE0QA) -L(SSExDx) 11376320Sbholler .int L(SSE1QA) -L(SSExDx) 11386320Sbholler .int L(SSE2QA) -L(SSExDx) 11396320Sbholler .int L(SSE3QA) -L(SSExDx) 11406320Sbholler .int L(SSE4QA) -L(SSExDx) 11416320Sbholler .int L(SSE5QA) -L(SSExDx) 11426320Sbholler .int L(SSE6QA) -L(SSExDx) 11436320Sbholler .int L(SSE7QA) -L(SSExDx) 11440Sstevel@tonic-gate 11456320Sbholler .int L(SSE8QA) -L(SSExDx) 11466320Sbholler .int L(SSE9QA) -L(SSExDx) 11476320Sbholler .int L(SSE10QA)-L(SSExDx) 11486320Sbholler .int L(SSE11QA)-L(SSExDx) 11496320Sbholler .int L(SSE12QA)-L(SSExDx) 11506320Sbholler .int L(SSE13QA)-L(SSExDx) 11516320Sbholler .int L(SSE14QA)-L(SSExDx) 11526320Sbholler .int L(SSE15QA)-L(SSExDx) 11530Sstevel@tonic-gate 11546320Sbholler .int L(SSE0QB) -L(SSExDx) 11556320Sbholler .int L(SSE1QB) -L(SSExDx) 11566320Sbholler .int L(SSE2QB) -L(SSExDx) 11576320Sbholler .int L(SSE3QB) -L(SSExDx) 11586320Sbholler .int L(SSE4QB) -L(SSExDx) 11596320Sbholler .int L(SSE5QB) -L(SSExDx) 11606320Sbholler .int L(SSE6QB) -L(SSExDx) 11616320Sbholler .int L(SSE7QB) -L(SSExDx) 11620Sstevel@tonic-gate 11636320Sbholler .int L(SSE8QB) -L(SSExDx) 11646320Sbholler .int L(SSE9QB) -L(SSExDx) 11656320Sbholler .int L(SSE10QB)-L(SSExDx) 11666320Sbholler .int L(SSE11QB)-L(SSExDx) 11676320Sbholler .int L(SSE12QB)-L(SSExDx) 11686320Sbholler .int L(SSE13QB)-L(SSExDx) 11696320Sbholler .int L(SSE14QB)-L(SSExDx) 11706320Sbholler .int L(SSE15QB)-L(SSExDx) 11710Sstevel@tonic-gate 11726320Sbholler SET_SIZE(memset) 1173