10Sstevel@tonic-gate/* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*3446Smrj * Common Development and Distribution License (the "License"). 6*3446Smrj * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate/* 22*3446Smrj * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate#pragma ident "%Z%%M% %I% %E% SMI" 270Sstevel@tonic-gate 280Sstevel@tonic-gate#include <sys/asm_linkage.h> 290Sstevel@tonic-gate#include <sys/regset.h> 300Sstevel@tonic-gate#include <sys/privregs.h> 310Sstevel@tonic-gate 320Sstevel@tonic-gate#if defined(__lint) 330Sstevel@tonic-gate#include <sys/types.h> 340Sstevel@tonic-gate#include <sys/archsystm.h> 350Sstevel@tonic-gate#else 360Sstevel@tonic-gate#include "assym.h" 370Sstevel@tonic-gate#endif 380Sstevel@tonic-gate 390Sstevel@tonic-gate/* 400Sstevel@tonic-gate * Do block operations using Streaming SIMD extensions 410Sstevel@tonic-gate */ 420Sstevel@tonic-gate 430Sstevel@tonic-gate#if defined(DEBUG) 440Sstevel@tonic-gate#if defined(__amd64) 450Sstevel@tonic-gate#define ASSERT_KPREEMPT_DISABLED(t, r32, msg) \ 460Sstevel@tonic-gate movq %gs:CPU_THREAD, t; \ 470Sstevel@tonic-gate movsbl T_PREEMPT(t), r32; \ 480Sstevel@tonic-gate testl r32, r32; \ 490Sstevel@tonic-gate jne 5f; \ 500Sstevel@tonic-gate pushq %rbp; \ 510Sstevel@tonic-gate movq %rsp, %rbp; \ 520Sstevel@tonic-gate leaq msg(%rip), %rdi; \ 530Sstevel@tonic-gate xorl %eax, %eax; \ 540Sstevel@tonic-gate call panic; \ 550Sstevel@tonic-gate5: 560Sstevel@tonic-gate#elif defined(__i386) 570Sstevel@tonic-gate#define ASSERT_KPREEMPT_DISABLED(t, r32, msg) \ 580Sstevel@tonic-gate movl %gs:CPU_THREAD, t; \ 590Sstevel@tonic-gate movsbl T_PREEMPT(t), r32; \ 600Sstevel@tonic-gate testl r32, r32; \ 610Sstevel@tonic-gate jne 5f; \ 620Sstevel@tonic-gate pushl %ebp; \ 630Sstevel@tonic-gate movl %esp, %ebp; \ 640Sstevel@tonic-gate pushl $msg; \ 650Sstevel@tonic-gate call panic; \ 660Sstevel@tonic-gate5: 670Sstevel@tonic-gate#endif /* __i386 */ 680Sstevel@tonic-gate#else /* DEBUG */ 690Sstevel@tonic-gate#define ASSERT_KPREEMPT_DISABLED(t, r32, msg) 700Sstevel@tonic-gate#endif /* DEBUG */ 710Sstevel@tonic-gate 720Sstevel@tonic-gate#define BLOCKSHIFT 6 730Sstevel@tonic-gate#define BLOCKSIZE 64 /* (1 << BLOCKSHIFT) */ 740Sstevel@tonic-gate#define BLOCKMASK 63 /* (BLOCKSIZE - 1) */ 750Sstevel@tonic-gate 760Sstevel@tonic-gate#if (1 << BLOCKSHIFT) != BLOCKSIZE || BLOCKMASK != (BLOCKSIZE - 1) 770Sstevel@tonic-gate#error "mucked up constants" 780Sstevel@tonic-gate#endif 790Sstevel@tonic-gate 800Sstevel@tonic-gate#if defined(__lint) 810Sstevel@tonic-gate 820Sstevel@tonic-gate/*ARGSUSED*/ 830Sstevel@tonic-gatevoid 840Sstevel@tonic-gatehwblkclr(void *addr, size_t size) 850Sstevel@tonic-gate{} 860Sstevel@tonic-gate 870Sstevel@tonic-gate#else /* __lint */ 880Sstevel@tonic-gate 890Sstevel@tonic-gate#if defined(__amd64) 900Sstevel@tonic-gate#define ADD addq 910Sstevel@tonic-gate#define SUB subq 920Sstevel@tonic-gate#else 930Sstevel@tonic-gate#define ADD addl 940Sstevel@tonic-gate#define SUB subl 950Sstevel@tonic-gate#endif 960Sstevel@tonic-gate 970Sstevel@tonic-gate#define SAVE_XMM0(r) \ 980Sstevel@tonic-gate SAVE_XMM_PROLOG(r, 1); \ 990Sstevel@tonic-gate movdqa %xmm0, (r) 1000Sstevel@tonic-gate 1010Sstevel@tonic-gate#define ZERO_LOOP_INIT_XMM(dst) \ 1020Sstevel@tonic-gate pxor %xmm0, %xmm0 1030Sstevel@tonic-gate 1040Sstevel@tonic-gate#define ZERO_LOOP_BODY_XMM(dst, cnt) \ 1050Sstevel@tonic-gate movntdq %xmm0, (dst); \ 1060Sstevel@tonic-gate movntdq %xmm0, 0x10(dst); \ 1070Sstevel@tonic-gate movntdq %xmm0, 0x20(dst); \ 1080Sstevel@tonic-gate movntdq %xmm0, 0x30(dst); \ 1090Sstevel@tonic-gate ADD $BLOCKSIZE, dst; \ 1100Sstevel@tonic-gate SUB $1, cnt 1110Sstevel@tonic-gate 1120Sstevel@tonic-gate#define ZERO_LOOP_FINI_XMM(dst) \ 1130Sstevel@tonic-gate mfence 1140Sstevel@tonic-gate 1150Sstevel@tonic-gate#define RSTOR_XMM0(r) \ 1160Sstevel@tonic-gate movdqa 0x0(r), %xmm0; \ 1170Sstevel@tonic-gate RSTOR_XMM_EPILOG(r, 1) 1180Sstevel@tonic-gate 1190Sstevel@tonic-gate#if defined(__amd64) 1200Sstevel@tonic-gate 1210Sstevel@tonic-gate /* 1220Sstevel@tonic-gate * %rdi dst 1230Sstevel@tonic-gate * %rsi size 1240Sstevel@tonic-gate * %rax saved %cr0 (#if DEBUG then %eax is t->t_preempt) 1250Sstevel@tonic-gate * %r8 pointer to %xmm register save area 1260Sstevel@tonic-gate */ 1270Sstevel@tonic-gate ENTRY(hwblkclr) 1280Sstevel@tonic-gate pushq %rbp 1290Sstevel@tonic-gate movq %rsp, %rbp 1300Sstevel@tonic-gate testl $BLOCKMASK, %edi /* address must be BLOCKSIZE aligned */ 1310Sstevel@tonic-gate jne .dobzero 1320Sstevel@tonic-gate cmpq $BLOCKSIZE, %rsi /* size must be at least BLOCKSIZE */ 1330Sstevel@tonic-gate jl .dobzero 1340Sstevel@tonic-gate testq $BLOCKMASK, %rsi /* .. and be a multiple of BLOCKSIZE */ 1350Sstevel@tonic-gate jne .dobzero 1360Sstevel@tonic-gate shrq $BLOCKSHIFT, %rsi 1370Sstevel@tonic-gate 1380Sstevel@tonic-gate ASSERT_KPREEMPT_DISABLED(%r11, %eax, .not_disabled) 1390Sstevel@tonic-gate movq %cr0, %rax 1400Sstevel@tonic-gate clts 1410Sstevel@tonic-gate testl $CR0_TS, %eax 1420Sstevel@tonic-gate jnz 1f 1430Sstevel@tonic-gate 1440Sstevel@tonic-gate SAVE_XMM0(%r8) 1450Sstevel@tonic-gate1: ZERO_LOOP_INIT_XMM(%rdi) 1460Sstevel@tonic-gate9: ZERO_LOOP_BODY_XMM(%rdi, %rsi) 1470Sstevel@tonic-gate jnz 9b 1480Sstevel@tonic-gate ZERO_LOOP_FINI_XMM(%rdi) 1490Sstevel@tonic-gate 1500Sstevel@tonic-gate testl $CR0_TS, %eax 1510Sstevel@tonic-gate jnz 2f 1520Sstevel@tonic-gate RSTOR_XMM0(%r8) 1530Sstevel@tonic-gate2: movq %rax, %cr0 1540Sstevel@tonic-gate leave 1550Sstevel@tonic-gate ret 1560Sstevel@tonic-gate.dobzero: 1570Sstevel@tonic-gate leave 1580Sstevel@tonic-gate jmp bzero 1590Sstevel@tonic-gate SET_SIZE(hwblkclr) 1600Sstevel@tonic-gate 1610Sstevel@tonic-gate#elif defined(__i386) 1620Sstevel@tonic-gate 1630Sstevel@tonic-gate /* 1640Sstevel@tonic-gate * %eax dst 1650Sstevel@tonic-gate * %ecx size in bytes, loop count 1660Sstevel@tonic-gate * %ebx saved %cr0 (#if DEBUG then t->t_preempt) 1670Sstevel@tonic-gate * %edi pointer to %xmm register save area 1680Sstevel@tonic-gate */ 1690Sstevel@tonic-gate ENTRY(hwblkclr) 1700Sstevel@tonic-gate movl 4(%esp), %eax 1710Sstevel@tonic-gate movl 8(%esp), %ecx 1720Sstevel@tonic-gate testl $BLOCKMASK, %eax /* address must be BLOCKSIZE aligned */ 1730Sstevel@tonic-gate jne .dobzero 1740Sstevel@tonic-gate cmpl $BLOCKSIZE, %ecx /* size must be at least BLOCKSIZE */ 1750Sstevel@tonic-gate jl .dobzero 1760Sstevel@tonic-gate testl $BLOCKMASK, %ecx /* .. and be a multiple of BLOCKSIZE */ 1770Sstevel@tonic-gate jne .dobzero 1780Sstevel@tonic-gate shrl $BLOCKSHIFT, %ecx 1790Sstevel@tonic-gate movl 0xc(%esp), %edx 1800Sstevel@tonic-gate pushl %ebx 1810Sstevel@tonic-gate 1820Sstevel@tonic-gate pushl %esi 1830Sstevel@tonic-gate ASSERT_KPREEMPT_DISABLED(%esi, %ebx, .not_disabled) 1840Sstevel@tonic-gate popl %esi 1850Sstevel@tonic-gate movl %cr0, %ebx 1860Sstevel@tonic-gate clts 1870Sstevel@tonic-gate testl $CR0_TS, %ebx 1880Sstevel@tonic-gate jnz 1f 1890Sstevel@tonic-gate 1900Sstevel@tonic-gate pushl %edi 1910Sstevel@tonic-gate SAVE_XMM0(%edi) 1920Sstevel@tonic-gate1: ZERO_LOOP_INIT_XMM(%eax) 1930Sstevel@tonic-gate9: ZERO_LOOP_BODY_XMM(%eax, %ecx) 1940Sstevel@tonic-gate jnz 9b 1950Sstevel@tonic-gate ZERO_LOOP_FINI_XMM(%eax) 1960Sstevel@tonic-gate 1970Sstevel@tonic-gate testl $CR0_TS, %ebx 1980Sstevel@tonic-gate jnz 2f 1990Sstevel@tonic-gate RSTOR_XMM0(%edi) 2000Sstevel@tonic-gate popl %edi 2010Sstevel@tonic-gate2: movl %ebx, %cr0 2020Sstevel@tonic-gate popl %ebx 2030Sstevel@tonic-gate ret 2040Sstevel@tonic-gate.dobzero: 2050Sstevel@tonic-gate jmp bzero 2060Sstevel@tonic-gate SET_SIZE(hwblkclr) 2070Sstevel@tonic-gate 2080Sstevel@tonic-gate#endif /* __i386 */ 2090Sstevel@tonic-gate#endif /* __lint */ 2100Sstevel@tonic-gate 2110Sstevel@tonic-gate 2120Sstevel@tonic-gate#if defined(__lint) 2130Sstevel@tonic-gate 2140Sstevel@tonic-gate/*ARGSUSED*/ 2150Sstevel@tonic-gatevoid 2160Sstevel@tonic-gatehwblkpagecopy(const void *src, void *dst) 2170Sstevel@tonic-gate{} 2180Sstevel@tonic-gate 2190Sstevel@tonic-gate#else /* __lint */ 2200Sstevel@tonic-gate 2210Sstevel@tonic-gate#define PREFETCH_START(src) \ 2220Sstevel@tonic-gate prefetchnta 0x0(src); \ 2230Sstevel@tonic-gate prefetchnta 0x40(src) 2240Sstevel@tonic-gate 2250Sstevel@tonic-gate#define SAVE_XMMS(r) \ 2260Sstevel@tonic-gate SAVE_XMM_PROLOG(r, 8); \ 2270Sstevel@tonic-gate movdqa %xmm0, (r); \ 2280Sstevel@tonic-gate movdqa %xmm1, 0x10(r); \ 2290Sstevel@tonic-gate movdqa %xmm2, 0x20(r); \ 2300Sstevel@tonic-gate movdqa %xmm3, 0x30(r); \ 2310Sstevel@tonic-gate movdqa %xmm4, 0x40(r); \ 2320Sstevel@tonic-gate movdqa %xmm5, 0x50(r); \ 2330Sstevel@tonic-gate movdqa %xmm6, 0x60(r); \ 2340Sstevel@tonic-gate movdqa %xmm7, 0x70(r) 2350Sstevel@tonic-gate 2360Sstevel@tonic-gate#define COPY_LOOP_INIT_XMM(src) \ 2370Sstevel@tonic-gate prefetchnta 0x80(src); \ 2380Sstevel@tonic-gate prefetchnta 0xc0(src); \ 2390Sstevel@tonic-gate movdqa 0x0(src), %xmm0; \ 2400Sstevel@tonic-gate movdqa 0x10(src), %xmm1; \ 2410Sstevel@tonic-gate movdqa 0x20(src), %xmm2; \ 2420Sstevel@tonic-gate movdqa 0x30(src), %xmm3; \ 2430Sstevel@tonic-gate movdqa 0x40(src), %xmm4; \ 2440Sstevel@tonic-gate movdqa 0x50(src), %xmm5; \ 2450Sstevel@tonic-gate movdqa 0x60(src), %xmm6; \ 2460Sstevel@tonic-gate movdqa 0x70(src), %xmm7; \ 2470Sstevel@tonic-gate ADD $0x80, src 2480Sstevel@tonic-gate 2490Sstevel@tonic-gate#define COPY_LOOP_BODY_XMM(src, dst, cnt) \ 2500Sstevel@tonic-gate prefetchnta 0x80(src); \ 2510Sstevel@tonic-gate prefetchnta 0xc0(src); \ 2520Sstevel@tonic-gate prefetchnta 0x100(src); \ 2530Sstevel@tonic-gate prefetchnta 0x140(src); \ 2540Sstevel@tonic-gate movntdq %xmm0, (dst); \ 2550Sstevel@tonic-gate movntdq %xmm1, 0x10(dst); \ 2560Sstevel@tonic-gate movntdq %xmm2, 0x20(dst); \ 2570Sstevel@tonic-gate movntdq %xmm3, 0x30(dst); \ 2580Sstevel@tonic-gate movdqa 0x0(src), %xmm0; \ 2590Sstevel@tonic-gate movdqa 0x10(src), %xmm1; \ 2600Sstevel@tonic-gate movntdq %xmm4, 0x40(dst); \ 2610Sstevel@tonic-gate movntdq %xmm5, 0x50(dst); \ 2620Sstevel@tonic-gate movdqa 0x20(src), %xmm2; \ 2630Sstevel@tonic-gate movdqa 0x30(src), %xmm3; \ 2640Sstevel@tonic-gate movntdq %xmm6, 0x60(dst); \ 2650Sstevel@tonic-gate movntdq %xmm7, 0x70(dst); \ 2660Sstevel@tonic-gate movdqa 0x40(src), %xmm4; \ 2670Sstevel@tonic-gate movdqa 0x50(src), %xmm5; \ 2680Sstevel@tonic-gate ADD $0x80, dst; \ 2690Sstevel@tonic-gate movdqa 0x60(src), %xmm6; \ 2700Sstevel@tonic-gate movdqa 0x70(src), %xmm7; \ 2710Sstevel@tonic-gate ADD $0x80, src; \ 2720Sstevel@tonic-gate subl $1, cnt 2730Sstevel@tonic-gate 2740Sstevel@tonic-gate#define COPY_LOOP_FINI_XMM(dst) \ 2750Sstevel@tonic-gate movntdq %xmm0, 0x0(dst); \ 2760Sstevel@tonic-gate movntdq %xmm1, 0x10(dst); \ 2770Sstevel@tonic-gate movntdq %xmm2, 0x20(dst); \ 2780Sstevel@tonic-gate movntdq %xmm3, 0x30(dst); \ 2790Sstevel@tonic-gate movntdq %xmm4, 0x40(dst); \ 2800Sstevel@tonic-gate movntdq %xmm5, 0x50(dst); \ 2810Sstevel@tonic-gate movntdq %xmm6, 0x60(dst); \ 2820Sstevel@tonic-gate movntdq %xmm7, 0x70(dst) 2830Sstevel@tonic-gate 2840Sstevel@tonic-gate#define RSTOR_XMMS(r) \ 2850Sstevel@tonic-gate movdqa 0x0(r), %xmm0; \ 2860Sstevel@tonic-gate movdqa 0x10(r), %xmm1; \ 2870Sstevel@tonic-gate movdqa 0x20(r), %xmm2; \ 2880Sstevel@tonic-gate movdqa 0x30(r), %xmm3; \ 2890Sstevel@tonic-gate movdqa 0x40(r), %xmm4; \ 2900Sstevel@tonic-gate movdqa 0x50(r), %xmm5; \ 2910Sstevel@tonic-gate movdqa 0x60(r), %xmm6; \ 2920Sstevel@tonic-gate movdqa 0x70(r), %xmm7; \ 2930Sstevel@tonic-gate RSTOR_XMM_EPILOG(r, 8) 2940Sstevel@tonic-gate 2950Sstevel@tonic-gate#if defined(__amd64) 2960Sstevel@tonic-gate 2970Sstevel@tonic-gate /* 2980Sstevel@tonic-gate * %rdi src 2990Sstevel@tonic-gate * %rsi dst 3000Sstevel@tonic-gate * %rdx #if DEBUG then curthread 3010Sstevel@tonic-gate * %ecx loop count 3020Sstevel@tonic-gate * %rax saved %cr0 (#if DEBUG then %eax is t->t_prempt) 3030Sstevel@tonic-gate * %r8 pointer to %xmm register save area 3040Sstevel@tonic-gate */ 3050Sstevel@tonic-gate ENTRY(hwblkpagecopy) 3060Sstevel@tonic-gate pushq %rbp 3070Sstevel@tonic-gate movq %rsp, %rbp 3080Sstevel@tonic-gate PREFETCH_START(%rdi) 3090Sstevel@tonic-gate /* 3100Sstevel@tonic-gate * PAGESIZE is 4096, each loop moves 128 bytes, but the initial 3110Sstevel@tonic-gate * load and final store save us on loop count 3120Sstevel@tonic-gate */ 3130Sstevel@tonic-gate movl $_CONST(32 - 1), %ecx 3140Sstevel@tonic-gate ASSERT_KPREEMPT_DISABLED(%rdx, %eax, .not_disabled) 3150Sstevel@tonic-gate movq %cr0, %rax 3160Sstevel@tonic-gate clts 3170Sstevel@tonic-gate testl $CR0_TS, %eax 3180Sstevel@tonic-gate jnz 3f 3190Sstevel@tonic-gate SAVE_XMMS(%r8) 3200Sstevel@tonic-gate3: COPY_LOOP_INIT_XMM(%rdi) 3210Sstevel@tonic-gate4: COPY_LOOP_BODY_XMM(%rdi, %rsi, %ecx) 3220Sstevel@tonic-gate jnz 4b 3230Sstevel@tonic-gate COPY_LOOP_FINI_XMM(%rsi) 3240Sstevel@tonic-gate testl $CR0_TS, %eax 3250Sstevel@tonic-gate jnz 5f 3260Sstevel@tonic-gate RSTOR_XMMS(%r8) 3270Sstevel@tonic-gate5: movq %rax, %cr0 3280Sstevel@tonic-gate mfence 3290Sstevel@tonic-gate leave 3300Sstevel@tonic-gate ret 3310Sstevel@tonic-gate SET_SIZE(hwblkpagecopy) 3320Sstevel@tonic-gate 3330Sstevel@tonic-gate#elif defined(__i386) 3340Sstevel@tonic-gate 3350Sstevel@tonic-gate /* 3360Sstevel@tonic-gate * %eax src 3370Sstevel@tonic-gate * %edx dst 3380Sstevel@tonic-gate * %ecx loop count 3390Sstevel@tonic-gate * %ebx saved %cr0 (#if DEBUG then t->t_prempt) 3400Sstevel@tonic-gate * %edi pointer to %xmm register save area 3410Sstevel@tonic-gate * %esi #if DEBUG temporary thread pointer 3420Sstevel@tonic-gate */ 3430Sstevel@tonic-gate ENTRY(hwblkpagecopy) 3440Sstevel@tonic-gate movl 4(%esp), %eax 3450Sstevel@tonic-gate movl 8(%esp), %edx 3460Sstevel@tonic-gate PREFETCH_START(%eax) 3470Sstevel@tonic-gate pushl %ebx 3480Sstevel@tonic-gate /* 3490Sstevel@tonic-gate * PAGESIZE is 4096, each loop moves 128 bytes, but the initial 3500Sstevel@tonic-gate * load and final store save us one loop count 3510Sstevel@tonic-gate */ 3520Sstevel@tonic-gate movl $_CONST(32 - 1), %ecx 3530Sstevel@tonic-gate pushl %esi 3540Sstevel@tonic-gate ASSERT_KPREEMPT_DISABLED(%esi, %ebx, .not_disabled) 3550Sstevel@tonic-gate popl %esi 3560Sstevel@tonic-gate movl %cr0, %ebx 3570Sstevel@tonic-gate clts 3580Sstevel@tonic-gate testl $CR0_TS, %ebx 3590Sstevel@tonic-gate jnz 3f 3600Sstevel@tonic-gate pushl %edi 3610Sstevel@tonic-gate SAVE_XMMS(%edi) 3620Sstevel@tonic-gate3: COPY_LOOP_INIT_XMM(%eax) 3630Sstevel@tonic-gate4: COPY_LOOP_BODY_XMM(%eax, %edx, %ecx) 3640Sstevel@tonic-gate jnz 4b 3650Sstevel@tonic-gate COPY_LOOP_FINI_XMM(%edx) 3660Sstevel@tonic-gate testl $CR0_TS, %ebx 3670Sstevel@tonic-gate jnz 5f 3680Sstevel@tonic-gate RSTOR_XMMS(%edi) 3690Sstevel@tonic-gate popl %edi 3700Sstevel@tonic-gate5: movl %ebx, %cr0 3710Sstevel@tonic-gate popl %ebx 3720Sstevel@tonic-gate mfence 3730Sstevel@tonic-gate ret 3740Sstevel@tonic-gate SET_SIZE(hwblkpagecopy) 3750Sstevel@tonic-gate 3760Sstevel@tonic-gate#endif /* __i386 */ 3770Sstevel@tonic-gate#endif /* __lint */ 3780Sstevel@tonic-gate 3790Sstevel@tonic-gate#if defined(__lint) 3800Sstevel@tonic-gate 381*3446Smrj/* 382*3446Smrj * Version of hwblkclr which doesn't use XMM registers. 383*3446Smrj * Note that it requires aligned dst and len. 384*3446Smrj * 385*3446Smrj * XXPV This needs to be performance tuned at some point. 386*3446Smrj * Is 4 the best number of iterations to unroll? 387*3446Smrj */ 3880Sstevel@tonic-gate/*ARGSUSED*/ 3890Sstevel@tonic-gatevoid 390*3446Smrjblock_zero_no_xmm(void *dst, int len) 3910Sstevel@tonic-gate{} 3920Sstevel@tonic-gate 393*3446Smrj#else /* __lint */ 3940Sstevel@tonic-gate 3950Sstevel@tonic-gate#if defined(__amd64) 3960Sstevel@tonic-gate 397*3446Smrj ENTRY(block_zero_no_xmm) 398*3446Smrj pushq %rbp 399*3446Smrj movq %rsp, %rbp 4000Sstevel@tonic-gate xorl %eax, %eax 401*3446Smrj addq %rsi, %rdi 402*3446Smrj negq %rsi 4030Sstevel@tonic-gate1: 404*3446Smrj movnti %rax, (%rdi, %rsi) 405*3446Smrj movnti %rax, 8(%rdi, %rsi) 406*3446Smrj movnti %rax, 16(%rdi, %rsi) 407*3446Smrj movnti %rax, 24(%rdi, %rsi) 408*3446Smrj addq $32, %rsi 4090Sstevel@tonic-gate jnz 1b 4100Sstevel@tonic-gate mfence 411*3446Smrj leave 4120Sstevel@tonic-gate ret 413*3446Smrj SET_SIZE(block_zero_no_xmm) 4140Sstevel@tonic-gate 4150Sstevel@tonic-gate#elif defined(__i386) 4160Sstevel@tonic-gate 417*3446Smrj ENTRY(block_zero_no_xmm) 418*3446Smrj pushl %ebp 419*3446Smrj movl %esp, %ebp 4200Sstevel@tonic-gate xorl %eax, %eax 421*3446Smrj movl 8(%ebp), %edx 422*3446Smrj movl 12(%ebp), %ecx 423*3446Smrj addl %ecx, %edx 424*3446Smrj negl %ecx 4250Sstevel@tonic-gate1: 426*3446Smrj movnti %eax, (%edx, %ecx) 427*3446Smrj movnti %eax, 4(%edx, %ecx) 428*3446Smrj movnti %eax, 8(%edx, %ecx) 429*3446Smrj movnti %eax, 12(%edx, %ecx) 430*3446Smrj addl $16, %ecx 431*3446Smrj jnz 1b 432*3446Smrj mfence 433*3446Smrj leave 434*3446Smrj ret 435*3446Smrj SET_SIZE(block_zero_no_xmm) 436*3446Smrj 437*3446Smrj#endif /* __i386 */ 438*3446Smrj#endif /* __lint */ 439*3446Smrj 440*3446Smrj 441*3446Smrj#if defined(__lint) 442*3446Smrj 443*3446Smrj/* 444*3446Smrj * Version of page copy which doesn't use XMM registers. 445*3446Smrj * 446*3446Smrj * XXPV This needs to be performance tuned at some point. 447*3446Smrj * Is 4 the right number of iterations to unroll? 448*3446Smrj * Is the load/store order optimal? Should it use prefetch? 449*3446Smrj */ 450*3446Smrj/*ARGSUSED*/ 451*3446Smrjvoid 452*3446Smrjpage_copy_no_xmm(void *dst, void *src) 453*3446Smrj{} 454*3446Smrj 455*3446Smrj#else /* __lint */ 456*3446Smrj 457*3446Smrj#if defined(__amd64) 458*3446Smrj 459*3446Smrj ENTRY(page_copy_no_xmm) 460*3446Smrj movq $MMU_STD_PAGESIZE, %rcx 461*3446Smrj addq %rcx, %rdi 462*3446Smrj addq %rcx, %rsi 463*3446Smrj negq %rcx 464*3446Smrj1: 465*3446Smrj movq (%rsi, %rcx), %rax 466*3446Smrj movnti %rax, (%rdi, %rcx) 467*3446Smrj movq 8(%rsi, %rcx), %rax 468*3446Smrj movnti %rax, 8(%rdi, %rcx) 469*3446Smrj movq 16(%rsi, %rcx), %rax 470*3446Smrj movnti %rax, 16(%rdi, %rcx) 471*3446Smrj movq 24(%rsi, %rcx), %rax 472*3446Smrj movnti %rax, 24(%rdi, %rcx) 473*3446Smrj addq $32, %rcx 4740Sstevel@tonic-gate jnz 1b 4750Sstevel@tonic-gate mfence 4760Sstevel@tonic-gate ret 477*3446Smrj SET_SIZE(page_copy_no_xmm) 478*3446Smrj 479*3446Smrj#elif defined(__i386) 480*3446Smrj 481*3446Smrj ENTRY(page_copy_no_xmm) 482*3446Smrj pushl %esi 483*3446Smrj movl $MMU_STD_PAGESIZE, %ecx 484*3446Smrj movl 8(%esp), %edx 485*3446Smrj movl 12(%esp), %esi 486*3446Smrj addl %ecx, %edx 487*3446Smrj addl %ecx, %esi 488*3446Smrj negl %ecx 489*3446Smrj1: 490*3446Smrj movl (%esi, %ecx), %eax 491*3446Smrj movnti %eax, (%edx, %ecx) 492*3446Smrj movl 4(%esi, %ecx), %eax 493*3446Smrj movnti %eax, 4(%edx, %ecx) 494*3446Smrj movl 8(%esi, %ecx), %eax 495*3446Smrj movnti %eax, 8(%edx, %ecx) 496*3446Smrj movl 12(%esi, %ecx), %eax 497*3446Smrj movnti %eax, 12(%edx, %ecx) 498*3446Smrj addl $16, %ecx 499*3446Smrj jnz 1b 500*3446Smrj mfence 501*3446Smrj popl %esi 502*3446Smrj ret 503*3446Smrj SET_SIZE(page_copy_no_xmm) 5040Sstevel@tonic-gate 5050Sstevel@tonic-gate#endif /* __i386 */ 5060Sstevel@tonic-gate#endif /* __lint */ 5070Sstevel@tonic-gate 5080Sstevel@tonic-gate#if defined(DEBUG) && !defined(__lint) 5090Sstevel@tonic-gate .text 5100Sstevel@tonic-gate.not_disabled: 5110Sstevel@tonic-gate .string "sseblk: preemption not disabled!" 5120Sstevel@tonic-gate#endif 513