1*0Sstevel@tonic-gate/* 2*0Sstevel@tonic-gate * CDDL HEADER START 3*0Sstevel@tonic-gate * 4*0Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*0Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*0Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*0Sstevel@tonic-gate * with the License. 8*0Sstevel@tonic-gate * 9*0Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*0Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*0Sstevel@tonic-gate * See the License for the specific language governing permissions 12*0Sstevel@tonic-gate * and limitations under the License. 13*0Sstevel@tonic-gate * 14*0Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*0Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*0Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*0Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*0Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*0Sstevel@tonic-gate * 20*0Sstevel@tonic-gate * CDDL HEADER END 21*0Sstevel@tonic-gate */ 22*0Sstevel@tonic-gate/* 23*0Sstevel@tonic-gate * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24*0Sstevel@tonic-gate * Use is subject to license terms. 25*0Sstevel@tonic-gate */ 26*0Sstevel@tonic-gate 27*0Sstevel@tonic-gate#pragma ident "%Z%%M% %I% %E% SMI" 28*0Sstevel@tonic-gate 29*0Sstevel@tonic-gate#include <sys/param.h> 30*0Sstevel@tonic-gate#include <sys/errno.h> 31*0Sstevel@tonic-gate#include <sys/asm_linkage.h> 32*0Sstevel@tonic-gate#include <sys/vtrace.h> 33*0Sstevel@tonic-gate#include <sys/machthread.h> 34*0Sstevel@tonic-gate#include <sys/clock.h> 35*0Sstevel@tonic-gate#include <sys/asi.h> 36*0Sstevel@tonic-gate#include <sys/fsr.h> 37*0Sstevel@tonic-gate#include <sys/privregs.h> 38*0Sstevel@tonic-gate#include <sys/fpras_impl.h> 39*0Sstevel@tonic-gate 40*0Sstevel@tonic-gate#if !defined(lint) 41*0Sstevel@tonic-gate#include "assym.h" 42*0Sstevel@tonic-gate#endif /* lint */ 43*0Sstevel@tonic-gate 44*0Sstevel@tonic-gate/* 45*0Sstevel@tonic-gate * Pseudo-code to aid in understanding the control flow of the 46*0Sstevel@tonic-gate * bcopy/copyin/copyout routines. 47*0Sstevel@tonic-gate * 48*0Sstevel@tonic-gate * On entry: 49*0Sstevel@tonic-gate * 50*0Sstevel@tonic-gate * ! Determine whether to use the FP register version 51*0Sstevel@tonic-gate * ! or the leaf routine version depending on size 52*0Sstevel@tonic-gate * ! of copy and flags. Set up error handling accordingly. 53*0Sstevel@tonic-gate * ! The transition point depends on whether the src and 54*0Sstevel@tonic-gate * ! dst addresses can be aligned to long word, word, 55*0Sstevel@tonic-gate * ! half word, or byte boundaries. 56*0Sstevel@tonic-gate * ! 57*0Sstevel@tonic-gate * ! WARNING: <Register usage convention> 58*0Sstevel@tonic-gate * ! For FP version, %l6 holds previous error handling and 59*0Sstevel@tonic-gate * ! a flag: TRAMP_FLAG (low bits) 60*0Sstevel@tonic-gate * ! for leaf routine version, %o4 holds those values. 61*0Sstevel@tonic-gate * ! So either %l6 or %o4 is reserved and not available for 62*0Sstevel@tonic-gate * ! any other use. 63*0Sstevel@tonic-gate * 64*0Sstevel@tonic-gate * if (length <= VIS_COPY_THRESHOLD) ! start with a quick test 65*0Sstevel@tonic-gate * go to small_copy; ! to speed short copies 66*0Sstevel@tonic-gate * 67*0Sstevel@tonic-gate * ! src, dst long word alignable 68*0Sstevel@tonic-gate * if (hw_copy_limit_8 == 0) ! hw_copy disabled 69*0Sstevel@tonic-gate * go to small_copy; 70*0Sstevel@tonic-gate * if (length <= hw_copy_limit_8) 71*0Sstevel@tonic-gate * go to small_copy; 72*0Sstevel@tonic-gate * go to FPBLK_copy; 73*0Sstevel@tonic-gate * } 74*0Sstevel@tonic-gate * if (src,dst not alignable) { 75*0Sstevel@tonic-gate * if (hw_copy_limit_1 == 0) ! hw_copy disabled 76*0Sstevel@tonic-gate * go to small_copy; 77*0Sstevel@tonic-gate * if (length <= hw_copy_limit_1) 78*0Sstevel@tonic-gate * go to small_copy; 79*0Sstevel@tonic-gate * go to FPBLK_copy; 80*0Sstevel@tonic-gate * } 81*0Sstevel@tonic-gate * if (src,dst halfword alignable) { 82*0Sstevel@tonic-gate * if (hw_copy_limit_2 == 0) ! hw_copy disabled 83*0Sstevel@tonic-gate * go to small_copy; 84*0Sstevel@tonic-gate * if (length <= hw_copy_limit_2) 85*0Sstevel@tonic-gate * go to small_copy; 86*0Sstevel@tonic-gate * go to FPBLK_copy; 87*0Sstevel@tonic-gate * } 88*0Sstevel@tonic-gate * if (src,dst word alignable) { 89*0Sstevel@tonic-gate * if (hw_copy_limit_4 == 0) ! hw_copy disabled 90*0Sstevel@tonic-gate * go to small_copy; 91*0Sstevel@tonic-gate * if (length <= hw_copy_limit_4) 92*0Sstevel@tonic-gate * go to small_copy; 93*0Sstevel@tonic-gate * go to FPBLK_copy; 94*0Sstevel@tonic-gate * } 95*0Sstevel@tonic-gate * 96*0Sstevel@tonic-gate * small_copy: 97*0Sstevel@tonic-gate * Setup_leaf_rtn_error_handler; ! diffs for each entry point 98*0Sstevel@tonic-gate * 99*0Sstevel@tonic-gate * if (count <= 3) ! fast path for tiny copies 100*0Sstevel@tonic-gate * go to sm_left; ! special finish up code 101*0Sstevel@tonic-gate * else 102*0Sstevel@tonic-gate * if (count > CHKSIZE) ! medium sized copies 103*0Sstevel@tonic-gate * go to sm_med ! tuned by alignment 104*0Sstevel@tonic-gate * if(src&dst not both word aligned) { 105*0Sstevel@tonic-gate * sm_movebytes: 106*0Sstevel@tonic-gate * move byte by byte in 4-way unrolled loop 107*0Sstevel@tonic-gate * fall into sm_left; 108*0Sstevel@tonic-gate * sm_left: 109*0Sstevel@tonic-gate * move 0-3 bytes byte at a time as needed. 110*0Sstevel@tonic-gate * restore error handler and exit. 111*0Sstevel@tonic-gate * 112*0Sstevel@tonic-gate * } else { ! src&dst are word aligned 113*0Sstevel@tonic-gate * check for at least 8 bytes left, 114*0Sstevel@tonic-gate * move word at a time, unrolled by 2 115*0Sstevel@tonic-gate * when fewer than 8 bytes left, 116*0Sstevel@tonic-gate * sm_half: move half word at a time while 2 or more bytes left 117*0Sstevel@tonic-gate * sm_byte: move final byte if necessary 118*0Sstevel@tonic-gate * sm_exit: 119*0Sstevel@tonic-gate * restore error handler and exit. 120*0Sstevel@tonic-gate * } 121*0Sstevel@tonic-gate * 122*0Sstevel@tonic-gate * ! Medium length cases with at least CHKSIZE bytes available 123*0Sstevel@tonic-gate * ! method: line up src and dst as best possible, then 124*0Sstevel@tonic-gate * ! move data in 4-way unrolled loops. 125*0Sstevel@tonic-gate * 126*0Sstevel@tonic-gate * sm_med: 127*0Sstevel@tonic-gate * if(src&dst unalignable) 128*0Sstevel@tonic-gate * go to sm_movebytes 129*0Sstevel@tonic-gate * if(src&dst halfword alignable) 130*0Sstevel@tonic-gate * go to sm_movehalf 131*0Sstevel@tonic-gate * if(src&dst word alignable) 132*0Sstevel@tonic-gate * go to sm_moveword 133*0Sstevel@tonic-gate * ! fall into long word movement 134*0Sstevel@tonic-gate * move bytes until src is word aligned 135*0Sstevel@tonic-gate * if not long word aligned, move a word 136*0Sstevel@tonic-gate * move long words in 4-way unrolled loop until < 32 bytes left 137*0Sstevel@tonic-gate * move long words in 1-way unrolled loop until < 8 bytes left 138*0Sstevel@tonic-gate * if zero bytes left, goto sm_exit 139*0Sstevel@tonic-gate * if one byte left, go to sm_byte 140*0Sstevel@tonic-gate * else go to sm_half 141*0Sstevel@tonic-gate * 142*0Sstevel@tonic-gate * sm_moveword: 143*0Sstevel@tonic-gate * move bytes until src is word aligned 144*0Sstevel@tonic-gate * move words in 4-way unrolled loop until < 16 bytes left 145*0Sstevel@tonic-gate * move words in 1-way unrolled loop until < 4 bytes left 146*0Sstevel@tonic-gate * if zero bytes left, goto sm_exit 147*0Sstevel@tonic-gate * if one byte left, go to sm_byte 148*0Sstevel@tonic-gate * else go to sm_half 149*0Sstevel@tonic-gate * 150*0Sstevel@tonic-gate * sm_movehalf: 151*0Sstevel@tonic-gate * move a byte if needed to align src on halfword 152*0Sstevel@tonic-gate * move halfwords in 4-way unrolled loop until < 8 bytes left 153*0Sstevel@tonic-gate * if zero bytes left, goto sm_exit 154*0Sstevel@tonic-gate * if one byte left, go to sm_byte 155*0Sstevel@tonic-gate * else go to sm_half 156*0Sstevel@tonic-gate * 157*0Sstevel@tonic-gate * 158*0Sstevel@tonic-gate * FPBLK_copy: 159*0Sstevel@tonic-gate * %l6 = curthread->t_lofault; 160*0Sstevel@tonic-gate * if (%l6 != NULL) { 161*0Sstevel@tonic-gate * membar #Sync 162*0Sstevel@tonic-gate * curthread->t_lofault = .copyerr; 163*0Sstevel@tonic-gate * caller_error_handler = TRUE ! %l6 |= 2 164*0Sstevel@tonic-gate * } 165*0Sstevel@tonic-gate * 166*0Sstevel@tonic-gate * ! for FPU testing we must not migrate cpus 167*0Sstevel@tonic-gate * if (curthread->t_lwp == NULL) { 168*0Sstevel@tonic-gate * ! Kernel threads do not have pcb's in which to store 169*0Sstevel@tonic-gate * ! the floating point state, so disallow preemption during 170*0Sstevel@tonic-gate * ! the copy. This also prevents cpu migration. 171*0Sstevel@tonic-gate * kpreempt_disable(curthread); 172*0Sstevel@tonic-gate * } else { 173*0Sstevel@tonic-gate * thread_nomigrate(); 174*0Sstevel@tonic-gate * } 175*0Sstevel@tonic-gate * 176*0Sstevel@tonic-gate * old_fprs = %fprs; 177*0Sstevel@tonic-gate * old_gsr = %gsr; 178*0Sstevel@tonic-gate * if (%fprs.fef) { 179*0Sstevel@tonic-gate * %fprs.fef = 1; 180*0Sstevel@tonic-gate * save current fpregs on stack using blockstore 181*0Sstevel@tonic-gate * } else { 182*0Sstevel@tonic-gate * %fprs.fef = 1; 183*0Sstevel@tonic-gate * } 184*0Sstevel@tonic-gate * 185*0Sstevel@tonic-gate * 186*0Sstevel@tonic-gate * do_blockcopy_here; 187*0Sstevel@tonic-gate * 188*0Sstevel@tonic-gate * In lofault handler: 189*0Sstevel@tonic-gate * curthread->t_lofault = .copyerr2; 190*0Sstevel@tonic-gate * Continue on with the normal exit handler 191*0Sstevel@tonic-gate * 192*0Sstevel@tonic-gate * On normal exit: 193*0Sstevel@tonic-gate * %gsr = old_gsr; 194*0Sstevel@tonic-gate * if (old_fprs & FPRS_FEF) 195*0Sstevel@tonic-gate * restore fpregs from stack using blockload 196*0Sstevel@tonic-gate * else 197*0Sstevel@tonic-gate * zero fpregs 198*0Sstevel@tonic-gate * %fprs = old_fprs; 199*0Sstevel@tonic-gate * membar #Sync 200*0Sstevel@tonic-gate * curthread->t_lofault = (%l6 & ~3); 201*0Sstevel@tonic-gate * ! following test omitted from copyin/copyout as they 202*0Sstevel@tonic-gate * ! will always have a current thread 203*0Sstevel@tonic-gate * if (curthread->t_lwp == NULL) 204*0Sstevel@tonic-gate * kpreempt_enable(curthread); 205*0Sstevel@tonic-gate * else 206*0Sstevel@tonic-gate * thread_allowmigrate(); 207*0Sstevel@tonic-gate * return (0) 208*0Sstevel@tonic-gate * 209*0Sstevel@tonic-gate * In second lofault handler (.copyerr2): 210*0Sstevel@tonic-gate * We've tried to restore fp state from the stack and failed. To 211*0Sstevel@tonic-gate * prevent from returning with a corrupted fp state, we will panic. 212*0Sstevel@tonic-gate */ 213*0Sstevel@tonic-gate 214*0Sstevel@tonic-gate/* 215*0Sstevel@tonic-gate * Comments about optimization choices 216*0Sstevel@tonic-gate * 217*0Sstevel@tonic-gate * The initial optimization decision in this code is to determine 218*0Sstevel@tonic-gate * whether to use the FP registers for a copy or not. If we don't 219*0Sstevel@tonic-gate * use the FP registers, we can execute the copy as a leaf routine, 220*0Sstevel@tonic-gate * saving a register save and restore. Also, less elaborate setup 221*0Sstevel@tonic-gate * is required, allowing short copies to be completed more quickly. 222*0Sstevel@tonic-gate * For longer copies, especially unaligned ones (where the src and 223*0Sstevel@tonic-gate * dst do not align to allow simple ldx,stx operation), the FP 224*0Sstevel@tonic-gate * registers allow much faster copy operations. 225*0Sstevel@tonic-gate * 226*0Sstevel@tonic-gate * The estimated extra cost of the FP path will vary depending on 227*0Sstevel@tonic-gate * src/dst alignment, dst offset from the next 64 byte FPblock store 228*0Sstevel@tonic-gate * boundary, remaining src data after the last full dst cache line is 229*0Sstevel@tonic-gate * moved whether the FP registers need to be saved, and some other 230*0Sstevel@tonic-gate * minor issues. The average additional overhead is estimated to be 231*0Sstevel@tonic-gate * 400 clocks. Since each non-repeated/predicted tst and branch costs 232*0Sstevel@tonic-gate * around 10 clocks, elaborate calculation would slow down to all 233*0Sstevel@tonic-gate * longer copies and only benefit a small portion of medium sized 234*0Sstevel@tonic-gate * copies. Rather than incur such cost, we chose fixed transition 235*0Sstevel@tonic-gate * points for each of the alignment choices. 236*0Sstevel@tonic-gate * 237*0Sstevel@tonic-gate * For the inner loop, here is a comparison of the per cache line 238*0Sstevel@tonic-gate * costs for each alignment when src&dst are in cache: 239*0Sstevel@tonic-gate * 240*0Sstevel@tonic-gate * byte aligned: 108 clocks slower for non-FPBLK 241*0Sstevel@tonic-gate * half aligned: 44 clocks slower for non-FPBLK 242*0Sstevel@tonic-gate * word aligned: 12 clocks slower for non-FPBLK 243*0Sstevel@tonic-gate * long aligned: 4 clocks >>faster<< for non-FPBLK 244*0Sstevel@tonic-gate * 245*0Sstevel@tonic-gate * The long aligned loop runs faster because it does no prefetching. 246*0Sstevel@tonic-gate * That wins if the data is not in cache or there is too little 247*0Sstevel@tonic-gate * data to gain much benefit from prefetching. But when there 248*0Sstevel@tonic-gate * is more data and that data is not in cache, failing to prefetch 249*0Sstevel@tonic-gate * can run much slower. In addition, there is a 2 Kbyte store queue 250*0Sstevel@tonic-gate * which will cause the non-FPBLK inner loop to slow for larger copies. 251*0Sstevel@tonic-gate * The exact tradeoff is strongly load and application dependent, with 252*0Sstevel@tonic-gate * increasing risk of a customer visible performance regression if the 253*0Sstevel@tonic-gate * non-FPBLK code is used for larger copies. Studies of synthetic in-cache 254*0Sstevel@tonic-gate * vs out-of-cache copy tests in user space suggest 1024 bytes as a safe 255*0Sstevel@tonic-gate * upper limit for the non-FPBLK code. To minimize performance regression 256*0Sstevel@tonic-gate * risk while still gaining the primary benefits of the improvements to 257*0Sstevel@tonic-gate * the non-FPBLK code, we set an upper bound of 1024 bytes for the various 258*0Sstevel@tonic-gate * hw_copy_limit_*. Later experimental studies using different values 259*0Sstevel@tonic-gate * of hw_copy_limit_* can be used to make further adjustments if 260*0Sstevel@tonic-gate * appropriate. 261*0Sstevel@tonic-gate * 262*0Sstevel@tonic-gate * hw_copy_limit_1 = src and dst are byte aligned but not halfword aligned 263*0Sstevel@tonic-gate * hw_copy_limit_2 = src and dst are halfword aligned but not word aligned 264*0Sstevel@tonic-gate * hw_copy_limit_4 = src and dst are word aligned but not longword aligned 265*0Sstevel@tonic-gate * hw_copy_limit_8 = src and dst are longword aligned 266*0Sstevel@tonic-gate * 267*0Sstevel@tonic-gate * To say that src and dst are word aligned means that after 268*0Sstevel@tonic-gate * some initial alignment activity of moving 0 to 3 bytes, 269*0Sstevel@tonic-gate * both the src and dst will be on word boundaries so that 270*0Sstevel@tonic-gate * word loads and stores may be used. 271*0Sstevel@tonic-gate * 272*0Sstevel@tonic-gate * Recommended initial values as of Mar 2004, includes testing 273*0Sstevel@tonic-gate * on Cheetah+ (900MHz), Cheetah++ (1200MHz), and Jaguar(1050MHz): 274*0Sstevel@tonic-gate * hw_copy_limit_1 = 256 275*0Sstevel@tonic-gate * hw_copy_limit_2 = 512 276*0Sstevel@tonic-gate * hw_copy_limit_4 = 1024 277*0Sstevel@tonic-gate * hw_copy_limit_8 = 1024 (or 1536 on some systems) 278*0Sstevel@tonic-gate * 279*0Sstevel@tonic-gate * 280*0Sstevel@tonic-gate * If hw_copy_limit_? is set to zero, then use of FPBLK copy is 281*0Sstevel@tonic-gate * disabled for that alignment choice. 282*0Sstevel@tonic-gate * If hw_copy_limit_? is set to a value between 1 and VIS_COPY_THRESHOLD (256) 283*0Sstevel@tonic-gate * the value of VIS_COPY_THRESHOLD is used. 284*0Sstevel@tonic-gate * It is not envisioned that hw_copy_limit_? will be changed in the field 285*0Sstevel@tonic-gate * It is provided to allow for disabling FPBLK copies and to allow 286*0Sstevel@tonic-gate * easy testing of alternate values on future HW implementations 287*0Sstevel@tonic-gate * that might have different cache sizes, clock rates or instruction 288*0Sstevel@tonic-gate * timing rules. 289*0Sstevel@tonic-gate * 290*0Sstevel@tonic-gate * Our first test for FPBLK copies vs non-FPBLK copies checks a minimum 291*0Sstevel@tonic-gate * threshold to speedup all shorter copies (less than 256). That 292*0Sstevel@tonic-gate * saves an alignment test, memory reference, and enabling test 293*0Sstevel@tonic-gate * for all short copies, or an estimated 24 clocks. 294*0Sstevel@tonic-gate * 295*0Sstevel@tonic-gate * The order in which these limits are checked does matter since each 296*0Sstevel@tonic-gate * non-predicted tst and branch costs around 10 clocks. 297*0Sstevel@tonic-gate * If src and dst are randomly selected addresses, 298*0Sstevel@tonic-gate * 4 of 8 will not be alignable. 299*0Sstevel@tonic-gate * 2 of 8 will be half word alignable. 300*0Sstevel@tonic-gate * 1 of 8 will be word alignable. 301*0Sstevel@tonic-gate * 1 of 8 will be long word alignable. 302*0Sstevel@tonic-gate * But, tests on running kernels show that src and dst to copy code 303*0Sstevel@tonic-gate * are typically not on random alignments. Structure copies and 304*0Sstevel@tonic-gate * copies of larger data sizes are often on long word boundaries. 305*0Sstevel@tonic-gate * So we test the long word alignment case first, then 306*0Sstevel@tonic-gate * the byte alignment, then halfword, then word alignment. 307*0Sstevel@tonic-gate * 308*0Sstevel@tonic-gate * Several times, tests for length are made to split the code 309*0Sstevel@tonic-gate * into subcases. These tests often allow later tests to be 310*0Sstevel@tonic-gate * avoided. For example, within the non-FPBLK copy, we first 311*0Sstevel@tonic-gate * check for tiny copies of 3 bytes or less. That allows us 312*0Sstevel@tonic-gate * to use a 4-way unrolled loop for the general byte copy case 313*0Sstevel@tonic-gate * without a test on loop entry. 314*0Sstevel@tonic-gate * We subdivide the non-FPBLK case further into CHKSIZE bytes and less 315*0Sstevel@tonic-gate * vs longer cases. For the really short case, we don't attempt 316*0Sstevel@tonic-gate * align src and dst. We try to minimize special case tests in 317*0Sstevel@tonic-gate * the shortest loops as each test adds a significant percentage 318*0Sstevel@tonic-gate * to the total time. 319*0Sstevel@tonic-gate * 320*0Sstevel@tonic-gate * For the medium sized cases, we allow ourselves to adjust the 321*0Sstevel@tonic-gate * src and dst alignment and provide special cases for each of 322*0Sstevel@tonic-gate * the four adjusted alignment cases. The CHKSIZE that was used 323*0Sstevel@tonic-gate * to decide between short and medium size was chosen to be 39 324*0Sstevel@tonic-gate * as that allows for the worst case of 7 bytes of alignment 325*0Sstevel@tonic-gate * shift and 4 times 8 bytes for the first long word unrolling. 326*0Sstevel@tonic-gate * That knowledge saves an initial test for length on entry into 327*0Sstevel@tonic-gate * the medium cases. If the general loop unrolling factor were 328*0Sstevel@tonic-gate * to be increases, this number would also need to be adjusted. 329*0Sstevel@tonic-gate * 330*0Sstevel@tonic-gate * For all cases in the non-FPBLK code where it is known that at 331*0Sstevel@tonic-gate * least 4 chunks of data are available for movement, the 332*0Sstevel@tonic-gate * loop is unrolled by four. This 4-way loop runs in 8 clocks 333*0Sstevel@tonic-gate * or 2 clocks per data element. Due to limitations of the 334*0Sstevel@tonic-gate * branch instruction on Cheetah, Jaguar, and Panther, the 335*0Sstevel@tonic-gate * minimum time for a small, tight loop is 3 clocks. So 336*0Sstevel@tonic-gate * the 4-way loop runs 50% faster than the fastest non-unrolled 337*0Sstevel@tonic-gate * loop. 338*0Sstevel@tonic-gate * 339*0Sstevel@tonic-gate * Instruction alignment is forced by used of .align 16 directives 340*0Sstevel@tonic-gate * and nops which are not executed in the code. This 341*0Sstevel@tonic-gate * combination of operations shifts the alignment of following 342*0Sstevel@tonic-gate * loops to insure that loops are aligned so that their instructions 343*0Sstevel@tonic-gate * fall within the minimum number of 4 instruction fetch groups. 344*0Sstevel@tonic-gate * If instructions are inserted or removed between the .align 345*0Sstevel@tonic-gate * instruction and the unrolled loops, then the alignment needs 346*0Sstevel@tonic-gate * to be readjusted. Misaligned loops can add a clock per loop 347*0Sstevel@tonic-gate * iteration to the loop timing. 348*0Sstevel@tonic-gate * 349*0Sstevel@tonic-gate * In a few cases, code is duplicated to avoid a branch. Since 350*0Sstevel@tonic-gate * a non-predicted tst and branch takes 10 clocks, this savings 351*0Sstevel@tonic-gate * is judged an appropriate time-space tradeoff. 352*0Sstevel@tonic-gate * 353*0Sstevel@tonic-gate * Within the FPBLK-code, the prefetch method in the inner 354*0Sstevel@tonic-gate * loop needs to be explained as it is not standard. Two 355*0Sstevel@tonic-gate * prefetches are issued for each cache line instead of one. 356*0Sstevel@tonic-gate * The primary one is at the maximum reach of 8 cache lines. 357*0Sstevel@tonic-gate * Most of the time, that maximum prefetch reach gives the 358*0Sstevel@tonic-gate * cache line more time to reach the processor for systems with 359*0Sstevel@tonic-gate * higher processor clocks. But, sometimes memory interference 360*0Sstevel@tonic-gate * can cause that prefetch to be dropped. Putting a second 361*0Sstevel@tonic-gate * prefetch at a reach of 5 cache lines catches the drops 362*0Sstevel@tonic-gate * three iterations later and shows a measured improvement 363*0Sstevel@tonic-gate * in performance over any similar loop with a single prefetch. 364*0Sstevel@tonic-gate * The prefetches are placed in the loop so they overlap with 365*0Sstevel@tonic-gate * non-memory instructions, so that there is no extra cost 366*0Sstevel@tonic-gate * when the data is already in-cache. 367*0Sstevel@tonic-gate * 368*0Sstevel@tonic-gate */ 369*0Sstevel@tonic-gate 370*0Sstevel@tonic-gate/* 371*0Sstevel@tonic-gate * Notes on preserving existing fp state and on membars. 372*0Sstevel@tonic-gate * 373*0Sstevel@tonic-gate * When a copyOP decides to use fp we may have to preserve existing 374*0Sstevel@tonic-gate * floating point state. It is not the caller's state that we need to 375*0Sstevel@tonic-gate * preserve - the rest of the kernel does not use fp and, anyway, fp 376*0Sstevel@tonic-gate * registers are volatile across a call. Some examples: 377*0Sstevel@tonic-gate * 378*0Sstevel@tonic-gate * - userland has fp state and is interrupted (device interrupt 379*0Sstevel@tonic-gate * or trap) and within the interrupt/trap handling we use 380*0Sstevel@tonic-gate * bcopy() 381*0Sstevel@tonic-gate * - another (higher level) interrupt or trap handler uses bcopy 382*0Sstevel@tonic-gate * while a bcopy from an earlier interrupt is still active 383*0Sstevel@tonic-gate * - an asynchronous error trap occurs while fp state exists (in 384*0Sstevel@tonic-gate * userland or in kernel copy) and the tl0 component of the handling 385*0Sstevel@tonic-gate * uses bcopy 386*0Sstevel@tonic-gate * - a user process with fp state incurs a copy-on-write fault and 387*0Sstevel@tonic-gate * hwblkpagecopy always uses fp 388*0Sstevel@tonic-gate * 389*0Sstevel@tonic-gate * We therefore need a per-call place in which to preserve fp state - 390*0Sstevel@tonic-gate * using our stack is ideal (and since fp copy cannot be leaf optimized 391*0Sstevel@tonic-gate * because of calls it makes, this is no hardship). 392*0Sstevel@tonic-gate * 393*0Sstevel@tonic-gate * The following membar BLD/BST discussion is Cheetah pipeline specific. 394*0Sstevel@tonic-gate * In Cheetah BLD is blocking, #LoadLoad/#LoadStore/#StoreStore are 395*0Sstevel@tonic-gate * nops (those semantics always apply) and #StoreLoad is implemented 396*0Sstevel@tonic-gate * as a membar #Sync. 397*0Sstevel@tonic-gate * 398*0Sstevel@tonic-gate * It is possible that the owner of the fp state has a block load or 399*0Sstevel@tonic-gate * block store still "in flight" at the time we come to preserve that 400*0Sstevel@tonic-gate * state. Block loads are blocking in Cheetah pipelines so we do not 401*0Sstevel@tonic-gate * need to sync with them. In preserving fp regs we will use block stores 402*0Sstevel@tonic-gate * (which are not blocking in Cheetah pipelines) so we require a membar #Sync 403*0Sstevel@tonic-gate * after storing state (so that our subsequent use of those registers 404*0Sstevel@tonic-gate * does not modify them before the block stores complete); this membar 405*0Sstevel@tonic-gate * also serves to sync with block stores the owner of the fp state has 406*0Sstevel@tonic-gate * initiated. 407*0Sstevel@tonic-gate * 408*0Sstevel@tonic-gate * When we have finished fp copy (with it's repeated block stores) 409*0Sstevel@tonic-gate * we must membar #Sync so that our block stores may complete before 410*0Sstevel@tonic-gate * we either restore the original fp state into the fp registers or 411*0Sstevel@tonic-gate * return to a caller which may initiate other fp operations that could 412*0Sstevel@tonic-gate * modify the fp regs we used before the block stores complete. 413*0Sstevel@tonic-gate * 414*0Sstevel@tonic-gate * Synchronous faults (eg, unresolvable DMMU miss) that occur while 415*0Sstevel@tonic-gate * t_lofault is not NULL will not panic but will instead trampoline 416*0Sstevel@tonic-gate * to the registered lofault handler. There is no need for any 417*0Sstevel@tonic-gate * membars for these - eg, our store to t_lofault will always be visible to 418*0Sstevel@tonic-gate * ourselves and it is our cpu which will take any trap. 419*0Sstevel@tonic-gate * 420*0Sstevel@tonic-gate * Asynchronous faults (eg, uncorrectable ECC error from memory) that occur 421*0Sstevel@tonic-gate * while t_lofault is not NULL will also not panic. Since we're copying 422*0Sstevel@tonic-gate * to or from userland the extent of the damage is known - the destination 423*0Sstevel@tonic-gate * buffer is incomplete. So trap handlers will trampoline to the lofault 424*0Sstevel@tonic-gate * handler in this case which should take some form of error action to 425*0Sstevel@tonic-gate * avoid using the incomplete buffer. The trap handler also flags the 426*0Sstevel@tonic-gate * fault so that later return-from-trap handling (for the trap that brought 427*0Sstevel@tonic-gate * this thread into the kernel in the first place) can notify the process 428*0Sstevel@tonic-gate * and reboot the system (or restart the service with Greenline/Contracts). 429*0Sstevel@tonic-gate * 430*0Sstevel@tonic-gate * Asynchronous faults (eg, uncorrectable ECC error from memory) can 431*0Sstevel@tonic-gate * result in deferred error traps - the trap is taken sometime after 432*0Sstevel@tonic-gate * the event and the trap PC may not be the PC of the faulting access. 433*0Sstevel@tonic-gate * Delivery of such pending traps can be forced by a membar #Sync, acting 434*0Sstevel@tonic-gate * as an "error barrier" in this role. To accurately apply the user/kernel 435*0Sstevel@tonic-gate * separation described in the preceding paragraph we must force delivery 436*0Sstevel@tonic-gate * of deferred traps affecting kernel state before we install a lofault 437*0Sstevel@tonic-gate * handler (if we interpose a new lofault handler on an existing one there 438*0Sstevel@tonic-gate * is no need to repeat this), and we must force delivery of deferred 439*0Sstevel@tonic-gate * errors affecting the lofault-protected region before we clear t_lofault. 440*0Sstevel@tonic-gate * Failure to do so results in lost kernel state being interpreted as 441*0Sstevel@tonic-gate * affecting a copyin/copyout only, or of an error that really only 442*0Sstevel@tonic-gate * affects copy data being interpreted as losing kernel state. 443*0Sstevel@tonic-gate * 444*0Sstevel@tonic-gate * Since the copy operations may preserve and later restore floating 445*0Sstevel@tonic-gate * point state that does not belong to the caller (see examples above), 446*0Sstevel@tonic-gate * we must be careful in how we do this in order to prevent corruption 447*0Sstevel@tonic-gate * of another program. 448*0Sstevel@tonic-gate * 449*0Sstevel@tonic-gate * To make sure that floating point state is always saved and restored 450*0Sstevel@tonic-gate * correctly, the following "big rules" must be followed when the floating 451*0Sstevel@tonic-gate * point registers will be used: 452*0Sstevel@tonic-gate * 453*0Sstevel@tonic-gate * 1. %l6 always holds the caller's lofault handler. Also in this register, 454*0Sstevel@tonic-gate * Bit 1 (FPUSED_FLAG) indicates that the floating point registers are in 455*0Sstevel@tonic-gate * use. Bit 2 (TRAMP_FLAG) indicates that the call was to bcopy, and a 456*0Sstevel@tonic-gate * lofault handler was set coming in. 457*0Sstevel@tonic-gate * 458*0Sstevel@tonic-gate * 2. The FPUSED flag indicates that all FP state has been successfully stored 459*0Sstevel@tonic-gate * on the stack. It should not be set until this save has been completed. 460*0Sstevel@tonic-gate * 461*0Sstevel@tonic-gate * 3. The FPUSED flag should not be cleared on exit until all FP state has 462*0Sstevel@tonic-gate * been restored from the stack. If an error occurs while restoring 463*0Sstevel@tonic-gate * data from the stack, the error handler can check this flag to see if 464*0Sstevel@tonic-gate * a restore is necessary. 465*0Sstevel@tonic-gate * 466*0Sstevel@tonic-gate * 4. Code run under the new lofault handler must be kept to a minimum. In 467*0Sstevel@tonic-gate * particular, any calls to FP_ALLOWMIGRATE, which could result in a call 468*0Sstevel@tonic-gate * to kpreempt(), should not be made until after the lofault handler has 469*0Sstevel@tonic-gate * been restored. 470*0Sstevel@tonic-gate */ 471*0Sstevel@tonic-gate 472*0Sstevel@tonic-gate/* 473*0Sstevel@tonic-gate * VIS_COPY_THRESHOLD indicates the minimum number of bytes needed 474*0Sstevel@tonic-gate * to "break even" using FP/VIS-accelerated memory operations. 475*0Sstevel@tonic-gate * The FPBLK code assumes a minimum number of bytes are available 476*0Sstevel@tonic-gate * to be moved on entry. Check that code carefully before 477*0Sstevel@tonic-gate * reducing VIS_COPY_THRESHOLD below 256. 478*0Sstevel@tonic-gate */ 479*0Sstevel@tonic-gate/* 480*0Sstevel@tonic-gate * This shadows sys/machsystm.h which can't be included due to the lack of 481*0Sstevel@tonic-gate * _ASM guards in include files it references. Change it here, change it there. 482*0Sstevel@tonic-gate */ 483*0Sstevel@tonic-gate#define VIS_COPY_THRESHOLD 256 484*0Sstevel@tonic-gate 485*0Sstevel@tonic-gate/* 486*0Sstevel@tonic-gate * TEST for very short copies 487*0Sstevel@tonic-gate * Be aware that the maximum unroll for the short unaligned case 488*0Sstevel@tonic-gate * is SHORTCOPY+1 489*0Sstevel@tonic-gate */ 490*0Sstevel@tonic-gate#define SHORTCOPY 3 491*0Sstevel@tonic-gate#define CHKSIZE 39 492*0Sstevel@tonic-gate 493*0Sstevel@tonic-gate/* 494*0Sstevel@tonic-gate * Indicates that we're to trampoline to the error handler. 495*0Sstevel@tonic-gate * Entry points bcopy, copyin_noerr, and copyout_noerr use this flag. 496*0Sstevel@tonic-gate * kcopy, copyout, xcopyout, copyin, and xcopyin do not set this flag. 497*0Sstevel@tonic-gate */ 498*0Sstevel@tonic-gate#define FPUSED_FLAG 1 499*0Sstevel@tonic-gate#define TRAMP_FLAG 2 500*0Sstevel@tonic-gate#define MASK_FLAGS 3 501*0Sstevel@tonic-gate 502*0Sstevel@tonic-gate/* 503*0Sstevel@tonic-gate * Number of outstanding prefetches. 504*0Sstevel@tonic-gate * Testing with 1200 MHz Cheetah+ and Jaguar gives best results with 505*0Sstevel@tonic-gate * two prefetches, one with a reach of 8*BLOCK_SIZE+8 and one with a 506*0Sstevel@tonic-gate * reach of 5*BLOCK_SIZE. The double prefetch gives an typical improvement 507*0Sstevel@tonic-gate * of 5% for large copies as compared to a single prefetch. The reason 508*0Sstevel@tonic-gate * for the improvement is that with Cheetah and Jaguar, some prefetches 509*0Sstevel@tonic-gate * are dropped due to the prefetch queue being full. The second prefetch 510*0Sstevel@tonic-gate * reduces the number of cache lines that are dropped. 511*0Sstevel@tonic-gate * Do not remove the double prefetch or change either CHEETAH_PREFETCH 512*0Sstevel@tonic-gate * or CHEETAH_2ND_PREFETCH without extensive performance tests to prove 513*0Sstevel@tonic-gate * there is no loss of performance. 514*0Sstevel@tonic-gate */ 515*0Sstevel@tonic-gate#define CHEETAH_PREFETCH 8 516*0Sstevel@tonic-gate#define CHEETAH_2ND_PREFETCH 5 517*0Sstevel@tonic-gate 518*0Sstevel@tonic-gate#define VIS_BLOCKSIZE 64 519*0Sstevel@tonic-gate 520*0Sstevel@tonic-gate/* 521*0Sstevel@tonic-gate * Size of stack frame in order to accomodate a 64-byte aligned 522*0Sstevel@tonic-gate * floating-point register save area and 2 64-bit temp locations. 523*0Sstevel@tonic-gate * All copy functions use two quadrants of fp registers; to assure a 524*0Sstevel@tonic-gate * block-aligned two block buffer in which to save we must reserve 525*0Sstevel@tonic-gate * three blocks on stack. Not all functions preserve %pfrs on stack 526*0Sstevel@tonic-gate * or need to preserve %gsr but we use HWCOPYFRAMESIZE for all. 527*0Sstevel@tonic-gate * 528*0Sstevel@tonic-gate * _______________________________________ <-- %fp + STACK_BIAS 529*0Sstevel@tonic-gate * | We may need to preserve 2 quadrants | 530*0Sstevel@tonic-gate * | of fp regs, but since we do so with | 531*0Sstevel@tonic-gate * | BST/BLD we need room in which to | 532*0Sstevel@tonic-gate * | align to VIS_BLOCKSIZE bytes. So | 533*0Sstevel@tonic-gate * | this area is 3 * VIS_BLOCKSIZE. | <-- - SAVED_FPREGS_OFFSET 534*0Sstevel@tonic-gate * |-------------------------------------| 535*0Sstevel@tonic-gate * | 8 bytes to save %fprs | <-- - SAVED_FPRS_OFFSET 536*0Sstevel@tonic-gate * |-------------------------------------| 537*0Sstevel@tonic-gate * | 8 bytes to save %gsr | <-- - SAVED_GSR_OFFSET 538*0Sstevel@tonic-gate * --------------------------------------- 539*0Sstevel@tonic-gate */ 540*0Sstevel@tonic-gate#define HWCOPYFRAMESIZE ((VIS_BLOCKSIZE * (2 + 1)) + (2 * 8)) 541*0Sstevel@tonic-gate#define SAVED_FPREGS_OFFSET (VIS_BLOCKSIZE * 3) 542*0Sstevel@tonic-gate#define SAVED_FPREGS_ADJUST ((VIS_BLOCKSIZE * 2) - 1) 543*0Sstevel@tonic-gate#define SAVED_FPRS_OFFSET (SAVED_FPREGS_OFFSET + 8) 544*0Sstevel@tonic-gate#define SAVED_GSR_OFFSET (SAVED_FPRS_OFFSET + 8) 545*0Sstevel@tonic-gate 546*0Sstevel@tonic-gate/* 547*0Sstevel@tonic-gate * Common macros used by the various versions of the block copy 548*0Sstevel@tonic-gate * routines in this file. 549*0Sstevel@tonic-gate */ 550*0Sstevel@tonic-gate 551*0Sstevel@tonic-gate/* 552*0Sstevel@tonic-gate * In FP copies if we do not have preserved data to restore over 553*0Sstevel@tonic-gate * the fp regs we used then we must zero those regs to avoid 554*0Sstevel@tonic-gate * exposing portions of the data to later threads (data security). 555*0Sstevel@tonic-gate * 556*0Sstevel@tonic-gate * Copy functions use either quadrants 1 and 3 or 2 and 4. 557*0Sstevel@tonic-gate * 558*0Sstevel@tonic-gate * FZEROQ1Q3: Zero quadrants 1 and 3, ie %f0 - %f15 and %f32 - %f47 559*0Sstevel@tonic-gate * FZEROQ2Q4: Zero quadrants 2 and 4, ie %f16 - %f31 and %f48 - %f63 560*0Sstevel@tonic-gate * 561*0Sstevel@tonic-gate * The instructions below are quicker than repeated fzero instructions 562*0Sstevel@tonic-gate * since they can dispatch down two fp pipelines. 563*0Sstevel@tonic-gate */ 564*0Sstevel@tonic-gate#define FZEROQ1Q3 \ 565*0Sstevel@tonic-gate fzero %f0 ;\ 566*0Sstevel@tonic-gate fzero %f2 ;\ 567*0Sstevel@tonic-gate faddd %f0, %f2, %f4 ;\ 568*0Sstevel@tonic-gate fmuld %f0, %f2, %f6 ;\ 569*0Sstevel@tonic-gate faddd %f0, %f2, %f8 ;\ 570*0Sstevel@tonic-gate fmuld %f0, %f2, %f10 ;\ 571*0Sstevel@tonic-gate faddd %f0, %f2, %f12 ;\ 572*0Sstevel@tonic-gate fmuld %f0, %f2, %f14 ;\ 573*0Sstevel@tonic-gate faddd %f0, %f2, %f32 ;\ 574*0Sstevel@tonic-gate fmuld %f0, %f2, %f34 ;\ 575*0Sstevel@tonic-gate faddd %f0, %f2, %f36 ;\ 576*0Sstevel@tonic-gate fmuld %f0, %f2, %f38 ;\ 577*0Sstevel@tonic-gate faddd %f0, %f2, %f40 ;\ 578*0Sstevel@tonic-gate fmuld %f0, %f2, %f42 ;\ 579*0Sstevel@tonic-gate faddd %f0, %f2, %f44 ;\ 580*0Sstevel@tonic-gate fmuld %f0, %f2, %f46 581*0Sstevel@tonic-gate 582*0Sstevel@tonic-gate#define FZEROQ2Q4 \ 583*0Sstevel@tonic-gate fzero %f16 ;\ 584*0Sstevel@tonic-gate fzero %f18 ;\ 585*0Sstevel@tonic-gate faddd %f16, %f18, %f20 ;\ 586*0Sstevel@tonic-gate fmuld %f16, %f18, %f22 ;\ 587*0Sstevel@tonic-gate faddd %f16, %f18, %f24 ;\ 588*0Sstevel@tonic-gate fmuld %f16, %f18, %f26 ;\ 589*0Sstevel@tonic-gate faddd %f16, %f18, %f28 ;\ 590*0Sstevel@tonic-gate fmuld %f16, %f18, %f30 ;\ 591*0Sstevel@tonic-gate faddd %f16, %f18, %f48 ;\ 592*0Sstevel@tonic-gate fmuld %f16, %f18, %f50 ;\ 593*0Sstevel@tonic-gate faddd %f16, %f18, %f52 ;\ 594*0Sstevel@tonic-gate fmuld %f16, %f18, %f54 ;\ 595*0Sstevel@tonic-gate faddd %f16, %f18, %f56 ;\ 596*0Sstevel@tonic-gate fmuld %f16, %f18, %f58 ;\ 597*0Sstevel@tonic-gate faddd %f16, %f18, %f60 ;\ 598*0Sstevel@tonic-gate fmuld %f16, %f18, %f62 599*0Sstevel@tonic-gate 600*0Sstevel@tonic-gate/* 601*0Sstevel@tonic-gate * Macros to save and restore quadrants 1 and 3 or 2 and 4 to/from the stack. 602*0Sstevel@tonic-gate * Used to save and restore in-use fp registers when we want to use FP 603*0Sstevel@tonic-gate * and find fp already in use and copy size still large enough to justify 604*0Sstevel@tonic-gate * the additional overhead of this save and restore. 605*0Sstevel@tonic-gate * 606*0Sstevel@tonic-gate * A membar #Sync is needed before save to sync fp ops initiated before 607*0Sstevel@tonic-gate * the call to the copy function (by whoever has fp in use); for example 608*0Sstevel@tonic-gate * an earlier block load to the quadrant we are about to save may still be 609*0Sstevel@tonic-gate * "in flight". A membar #Sync is required at the end of the save to 610*0Sstevel@tonic-gate * sync our block store (the copy code is about to begin ldd's to the 611*0Sstevel@tonic-gate * first quadrant). Note, however, that since Cheetah pipeline block load 612*0Sstevel@tonic-gate * is blocking we can omit the initial membar before saving fp state (they're 613*0Sstevel@tonic-gate * commented below in case of future porting to a chip that does not block 614*0Sstevel@tonic-gate * on block load). 615*0Sstevel@tonic-gate * 616*0Sstevel@tonic-gate * Similarly: a membar #Sync before restore allows the block stores of 617*0Sstevel@tonic-gate * the copy operation to complete before we fill the quadrants with their 618*0Sstevel@tonic-gate * original data, and a membar #Sync after restore lets the block loads 619*0Sstevel@tonic-gate * of the restore complete before we return to whoever has the fp regs 620*0Sstevel@tonic-gate * in use. To avoid repeated membar #Sync we make it the responsibility 621*0Sstevel@tonic-gate * of the copy code to membar #Sync immediately after copy is complete 622*0Sstevel@tonic-gate * and before using the BLD_*_FROMSTACK macro. 623*0Sstevel@tonic-gate */ 624*0Sstevel@tonic-gate#if !defined(lint) 625*0Sstevel@tonic-gate#define BST_FPQ1Q3_TOSTACK(tmp1) \ 626*0Sstevel@tonic-gate /* membar #Sync */ ;\ 627*0Sstevel@tonic-gate add %fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1 ;\ 628*0Sstevel@tonic-gate and tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */ ;\ 629*0Sstevel@tonic-gate stda %f0, [tmp1]ASI_BLK_P ;\ 630*0Sstevel@tonic-gate add tmp1, VIS_BLOCKSIZE, tmp1 ;\ 631*0Sstevel@tonic-gate stda %f32, [tmp1]ASI_BLK_P ;\ 632*0Sstevel@tonic-gate membar #Sync 633*0Sstevel@tonic-gate 634*0Sstevel@tonic-gate#define BLD_FPQ1Q3_FROMSTACK(tmp1) \ 635*0Sstevel@tonic-gate /* membar #Sync - provided at copy completion */ ;\ 636*0Sstevel@tonic-gate add %fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1 ;\ 637*0Sstevel@tonic-gate and tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */ ;\ 638*0Sstevel@tonic-gate ldda [tmp1]ASI_BLK_P, %f0 ;\ 639*0Sstevel@tonic-gate add tmp1, VIS_BLOCKSIZE, tmp1 ;\ 640*0Sstevel@tonic-gate ldda [tmp1]ASI_BLK_P, %f32 ;\ 641*0Sstevel@tonic-gate membar #Sync 642*0Sstevel@tonic-gate 643*0Sstevel@tonic-gate#define BST_FPQ2Q4_TOSTACK(tmp1) \ 644*0Sstevel@tonic-gate /* membar #Sync */ ;\ 645*0Sstevel@tonic-gate add %fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1 ;\ 646*0Sstevel@tonic-gate and tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */ ;\ 647*0Sstevel@tonic-gate stda %f16, [tmp1]ASI_BLK_P ;\ 648*0Sstevel@tonic-gate add tmp1, VIS_BLOCKSIZE, tmp1 ;\ 649*0Sstevel@tonic-gate stda %f48, [tmp1]ASI_BLK_P ;\ 650*0Sstevel@tonic-gate membar #Sync 651*0Sstevel@tonic-gate 652*0Sstevel@tonic-gate#define BLD_FPQ2Q4_FROMSTACK(tmp1) \ 653*0Sstevel@tonic-gate /* membar #Sync - provided at copy completion */ ;\ 654*0Sstevel@tonic-gate add %fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1 ;\ 655*0Sstevel@tonic-gate and tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */ ;\ 656*0Sstevel@tonic-gate ldda [tmp1]ASI_BLK_P, %f16 ;\ 657*0Sstevel@tonic-gate add tmp1, VIS_BLOCKSIZE, tmp1 ;\ 658*0Sstevel@tonic-gate ldda [tmp1]ASI_BLK_P, %f48 ;\ 659*0Sstevel@tonic-gate membar #Sync 660*0Sstevel@tonic-gate#endif 661*0Sstevel@tonic-gate 662*0Sstevel@tonic-gate/* 663*0Sstevel@tonic-gate * FP_NOMIGRATE and FP_ALLOWMIGRATE. Prevent migration (or, stronger, 664*0Sstevel@tonic-gate * prevent preemption if there is no t_lwp to save FP state to on context 665*0Sstevel@tonic-gate * switch) before commencing a FP copy, and reallow it on completion or 666*0Sstevel@tonic-gate * in error trampoline paths when we were using FP copy. 667*0Sstevel@tonic-gate * 668*0Sstevel@tonic-gate * Both macros may call other functions, so be aware that all outputs are 669*0Sstevel@tonic-gate * forfeit after using these macros. For this reason we do not pass registers 670*0Sstevel@tonic-gate * to use - we just use any outputs we want. 671*0Sstevel@tonic-gate * 672*0Sstevel@tonic-gate * For fpRAS we need to perform the fpRAS mechanism test on the same 673*0Sstevel@tonic-gate * CPU as we use for the copy operation, both so that we validate the 674*0Sstevel@tonic-gate * CPU we perform the copy on and so that we know which CPU failed 675*0Sstevel@tonic-gate * if a failure is detected. Hence we need to be bound to "our" CPU. 676*0Sstevel@tonic-gate * This could be achieved through disabling preemption (and we have do it that 677*0Sstevel@tonic-gate * way for threads with no t_lwp) but for larger copies this may hold 678*0Sstevel@tonic-gate * higher priority threads off of cpu for too long (eg, realtime). So we 679*0Sstevel@tonic-gate * make use of the lightweight t_nomigrate mechanism where we can (ie, when 680*0Sstevel@tonic-gate * we have a t_lwp). 681*0Sstevel@tonic-gate * 682*0Sstevel@tonic-gate * Pseudo code: 683*0Sstevel@tonic-gate * 684*0Sstevel@tonic-gate * FP_NOMIGRATE: 685*0Sstevel@tonic-gate * 686*0Sstevel@tonic-gate * if (curthread->t_lwp) { 687*0Sstevel@tonic-gate * thread_nomigrate(); 688*0Sstevel@tonic-gate * } else { 689*0Sstevel@tonic-gate * kpreempt_disable(); 690*0Sstevel@tonic-gate * } 691*0Sstevel@tonic-gate * 692*0Sstevel@tonic-gate * FP_ALLOWMIGRATE: 693*0Sstevel@tonic-gate * 694*0Sstevel@tonic-gate * if (curthread->t_lwp) { 695*0Sstevel@tonic-gate * thread_allowmigrate(); 696*0Sstevel@tonic-gate * } else { 697*0Sstevel@tonic-gate * kpreempt_enable(); 698*0Sstevel@tonic-gate * } 699*0Sstevel@tonic-gate */ 700*0Sstevel@tonic-gate 701*0Sstevel@tonic-gate#define FP_NOMIGRATE(label1, label2) \ 702*0Sstevel@tonic-gate ldn [THREAD_REG + T_LWP], %o0 ;\ 703*0Sstevel@tonic-gate brz,a,pn %o0, label1/**/f ;\ 704*0Sstevel@tonic-gate ldsb [THREAD_REG + T_PREEMPT], %o1 ;\ 705*0Sstevel@tonic-gate call thread_nomigrate ;\ 706*0Sstevel@tonic-gate nop ;\ 707*0Sstevel@tonic-gate ba label2/**/f ;\ 708*0Sstevel@tonic-gate nop ;\ 709*0Sstevel@tonic-gatelabel1: ;\ 710*0Sstevel@tonic-gate inc %o1 ;\ 711*0Sstevel@tonic-gate stb %o1, [THREAD_REG + T_PREEMPT] ;\ 712*0Sstevel@tonic-gatelabel2: 713*0Sstevel@tonic-gate 714*0Sstevel@tonic-gate#define FP_ALLOWMIGRATE(label1, label2) \ 715*0Sstevel@tonic-gate ldn [THREAD_REG + T_LWP], %o0 ;\ 716*0Sstevel@tonic-gate brz,a,pn %o0, label1/**/f ;\ 717*0Sstevel@tonic-gate ldsb [THREAD_REG + T_PREEMPT], %o1 ;\ 718*0Sstevel@tonic-gate call thread_allowmigrate ;\ 719*0Sstevel@tonic-gate nop ;\ 720*0Sstevel@tonic-gate ba label2/**/f ;\ 721*0Sstevel@tonic-gate nop ;\ 722*0Sstevel@tonic-gatelabel1: ;\ 723*0Sstevel@tonic-gate dec %o1 ;\ 724*0Sstevel@tonic-gate brnz,pn %o1, label2/**/f ;\ 725*0Sstevel@tonic-gate stb %o1, [THREAD_REG + T_PREEMPT] ;\ 726*0Sstevel@tonic-gate ldn [THREAD_REG + T_CPU], %o0 ;\ 727*0Sstevel@tonic-gate ldub [%o0 + CPU_KPRUNRUN], %o0 ;\ 728*0Sstevel@tonic-gate brz,pt %o0, label2/**/f ;\ 729*0Sstevel@tonic-gate nop ;\ 730*0Sstevel@tonic-gate call kpreempt ;\ 731*0Sstevel@tonic-gate rdpr %pil, %o0 ;\ 732*0Sstevel@tonic-gatelabel2: 733*0Sstevel@tonic-gate 734*0Sstevel@tonic-gate/* 735*0Sstevel@tonic-gate * Copy a block of storage, returning an error code if `from' or 736*0Sstevel@tonic-gate * `to' takes a kernel pagefault which cannot be resolved. 737*0Sstevel@tonic-gate * Returns errno value on pagefault error, 0 if all ok 738*0Sstevel@tonic-gate */ 739*0Sstevel@tonic-gate 740*0Sstevel@tonic-gate#if defined(lint) 741*0Sstevel@tonic-gate 742*0Sstevel@tonic-gate/* ARGSUSED */ 743*0Sstevel@tonic-gateint 744*0Sstevel@tonic-gatekcopy(const void *from, void *to, size_t count) 745*0Sstevel@tonic-gate{ return(0); } 746*0Sstevel@tonic-gate 747*0Sstevel@tonic-gate#else /* lint */ 748*0Sstevel@tonic-gate 749*0Sstevel@tonic-gate .seg ".text" 750*0Sstevel@tonic-gate .align 4 751*0Sstevel@tonic-gate 752*0Sstevel@tonic-gate ENTRY(kcopy) 753*0Sstevel@tonic-gate 754*0Sstevel@tonic-gate cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case 755*0Sstevel@tonic-gate bleu,pt %ncc, .kcopy_small ! go to larger cases 756*0Sstevel@tonic-gate xor %o0, %o1, %o3 ! are src, dst alignable? 757*0Sstevel@tonic-gate btst 7, %o3 ! 758*0Sstevel@tonic-gate bz,pt %ncc, .kcopy_8 ! check for longword alignment 759*0Sstevel@tonic-gate nop 760*0Sstevel@tonic-gate btst 1, %o3 ! 761*0Sstevel@tonic-gate bz,pt %ncc, .kcopy_2 ! check for half-word 762*0Sstevel@tonic-gate nop 763*0Sstevel@tonic-gate sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit 764*0Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_1)], %o3 765*0Sstevel@tonic-gate tst %o3 766*0Sstevel@tonic-gate bz,pn %icc, .kcopy_small ! if zero, disable HW copy 767*0Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 768*0Sstevel@tonic-gate bleu,pt %ncc, .kcopy_small ! go to small copy 769*0Sstevel@tonic-gate nop 770*0Sstevel@tonic-gate ba,pt %ncc, .kcopy_more ! otherwise go to large copy 771*0Sstevel@tonic-gate nop 772*0Sstevel@tonic-gate.kcopy_2: 773*0Sstevel@tonic-gate btst 3, %o3 ! 774*0Sstevel@tonic-gate bz,pt %ncc, .kcopy_4 ! check for word alignment 775*0Sstevel@tonic-gate nop 776*0Sstevel@tonic-gate sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit 777*0Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_2)], %o3 778*0Sstevel@tonic-gate tst %o3 779*0Sstevel@tonic-gate bz,pn %icc, .kcopy_small ! if zero, disable HW copy 780*0Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 781*0Sstevel@tonic-gate bleu,pt %ncc, .kcopy_small ! go to small copy 782*0Sstevel@tonic-gate nop 783*0Sstevel@tonic-gate ba,pt %ncc, .kcopy_more ! otherwise go to large copy 784*0Sstevel@tonic-gate nop 785*0Sstevel@tonic-gate.kcopy_4: 786*0Sstevel@tonic-gate ! already checked longword, must be word aligned 787*0Sstevel@tonic-gate sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit 788*0Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_4)], %o3 789*0Sstevel@tonic-gate tst %o3 790*0Sstevel@tonic-gate bz,pn %icc, .kcopy_small ! if zero, disable HW copy 791*0Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 792*0Sstevel@tonic-gate bleu,pt %ncc, .kcopy_small ! go to small copy 793*0Sstevel@tonic-gate nop 794*0Sstevel@tonic-gate ba,pt %ncc, .kcopy_more ! otherwise go to large copy 795*0Sstevel@tonic-gate nop 796*0Sstevel@tonic-gate.kcopy_8: 797*0Sstevel@tonic-gate sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit 798*0Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_8)], %o3 799*0Sstevel@tonic-gate tst %o3 800*0Sstevel@tonic-gate bz,pn %icc, .kcopy_small ! if zero, disable HW copy 801*0Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 802*0Sstevel@tonic-gate bleu,pt %ncc, .kcopy_small ! go to small copy 803*0Sstevel@tonic-gate nop 804*0Sstevel@tonic-gate ba,pt %ncc, .kcopy_more ! otherwise go to large copy 805*0Sstevel@tonic-gate nop 806*0Sstevel@tonic-gate 807*0Sstevel@tonic-gate.kcopy_small: 808*0Sstevel@tonic-gate sethi %hi(.sm_copyerr), %o5 ! sm_copyerr is lofault value 809*0Sstevel@tonic-gate or %o5, %lo(.sm_copyerr), %o5 810*0Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %o4 ! save existing handler 811*0Sstevel@tonic-gate membar #Sync ! sync error barrier 812*0Sstevel@tonic-gate ba,pt %ncc, .sm_do_copy ! common code 813*0Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] ! set t_lofault 814*0Sstevel@tonic-gate 815*0Sstevel@tonic-gate.kcopy_more: 816*0Sstevel@tonic-gate save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 817*0Sstevel@tonic-gate sethi %hi(.copyerr), %l7 ! copyerr is lofault value 818*0Sstevel@tonic-gate or %l7, %lo(.copyerr), %l7 819*0Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %l6 ! save existing handler 820*0Sstevel@tonic-gate membar #Sync ! sync error barrier 821*0Sstevel@tonic-gate ba,pt %ncc, .do_copy ! common code 822*0Sstevel@tonic-gate stn %l7, [THREAD_REG + T_LOFAULT] ! set t_lofault 823*0Sstevel@tonic-gate 824*0Sstevel@tonic-gate 825*0Sstevel@tonic-gate/* 826*0Sstevel@tonic-gate * We got here because of a fault during bcopy_more, called from kcopy or bcopy. 827*0Sstevel@tonic-gate * Errno value is in %g1. bcopy_more uses fp quadrants 1 and 3. 828*0Sstevel@tonic-gate */ 829*0Sstevel@tonic-gate.copyerr: 830*0Sstevel@tonic-gate set .copyerr2, %l0 831*0Sstevel@tonic-gate membar #Sync ! sync error barrier 832*0Sstevel@tonic-gate stn %l0, [THREAD_REG + T_LOFAULT] ! set t_lofault 833*0Sstevel@tonic-gate btst FPUSED_FLAG, %l6 834*0Sstevel@tonic-gate bz %ncc, 1f 835*0Sstevel@tonic-gate and %l6, TRAMP_FLAG, %l0 ! copy trampoline flag to %l0 836*0Sstevel@tonic-gate 837*0Sstevel@tonic-gate ldx [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr 838*0Sstevel@tonic-gate wr %o2, 0, %gsr 839*0Sstevel@tonic-gate 840*0Sstevel@tonic-gate ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3 841*0Sstevel@tonic-gate btst FPRS_FEF, %o3 842*0Sstevel@tonic-gate bz,pt %icc, 4f 843*0Sstevel@tonic-gate nop 844*0Sstevel@tonic-gate 845*0Sstevel@tonic-gate BLD_FPQ1Q3_FROMSTACK(%o2) 846*0Sstevel@tonic-gate 847*0Sstevel@tonic-gate ba,pt %ncc, 1f 848*0Sstevel@tonic-gate wr %o3, 0, %fprs ! restore fprs 849*0Sstevel@tonic-gate 850*0Sstevel@tonic-gate4: 851*0Sstevel@tonic-gate FZEROQ1Q3 852*0Sstevel@tonic-gate wr %o3, 0, %fprs ! restore fprs 853*0Sstevel@tonic-gate 854*0Sstevel@tonic-gate ! 855*0Sstevel@tonic-gate ! Need to cater for the different expectations of kcopy 856*0Sstevel@tonic-gate ! and bcopy. kcopy will *always* set a t_lofault handler 857*0Sstevel@tonic-gate ! If it fires, we're expected to just return the error code 858*0Sstevel@tonic-gate ! and *not* to invoke any existing error handler. As far as 859*0Sstevel@tonic-gate ! bcopy is concerned, we only set t_lofault if there was an 860*0Sstevel@tonic-gate ! existing lofault handler. In that case we're expected to 861*0Sstevel@tonic-gate ! invoke the previously existing handler after resetting the 862*0Sstevel@tonic-gate ! t_lofault value. 863*0Sstevel@tonic-gate ! 864*0Sstevel@tonic-gate1: 865*0Sstevel@tonic-gate andn %l6, MASK_FLAGS, %l6 ! turn trampoline flag off 866*0Sstevel@tonic-gate membar #Sync ! sync error barrier 867*0Sstevel@tonic-gate stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 868*0Sstevel@tonic-gate FP_ALLOWMIGRATE(5, 6) 869*0Sstevel@tonic-gate 870*0Sstevel@tonic-gate btst TRAMP_FLAG, %l0 871*0Sstevel@tonic-gate bnz,pn %ncc, 3f 872*0Sstevel@tonic-gate nop 873*0Sstevel@tonic-gate ret 874*0Sstevel@tonic-gate restore %g1, 0, %o0 875*0Sstevel@tonic-gate 876*0Sstevel@tonic-gate3: 877*0Sstevel@tonic-gate ! 878*0Sstevel@tonic-gate ! We're here via bcopy. There *must* have been an error handler 879*0Sstevel@tonic-gate ! in place otherwise we would have died a nasty death already. 880*0Sstevel@tonic-gate ! 881*0Sstevel@tonic-gate jmp %l6 ! goto real handler 882*0Sstevel@tonic-gate restore %g0, 0, %o0 ! dispose of copy window 883*0Sstevel@tonic-gate 884*0Sstevel@tonic-gate/* 885*0Sstevel@tonic-gate * We got here because of a fault in .copyerr. We can't safely restore fp 886*0Sstevel@tonic-gate * state, so we panic. 887*0Sstevel@tonic-gate */ 888*0Sstevel@tonic-gatefp_panic_msg: 889*0Sstevel@tonic-gate .asciz "Unable to restore fp state after copy operation" 890*0Sstevel@tonic-gate 891*0Sstevel@tonic-gate .align 4 892*0Sstevel@tonic-gate.copyerr2: 893*0Sstevel@tonic-gate set fp_panic_msg, %o0 894*0Sstevel@tonic-gate call panic 895*0Sstevel@tonic-gate nop 896*0Sstevel@tonic-gate 897*0Sstevel@tonic-gate/* 898*0Sstevel@tonic-gate * We got here because of a fault during a small kcopy or bcopy. 899*0Sstevel@tonic-gate * No floating point registers are used by the small copies. 900*0Sstevel@tonic-gate * Errno value is in %g1. 901*0Sstevel@tonic-gate */ 902*0Sstevel@tonic-gate.sm_copyerr: 903*0Sstevel@tonic-gate1: 904*0Sstevel@tonic-gate btst TRAMP_FLAG, %o4 905*0Sstevel@tonic-gate membar #Sync 906*0Sstevel@tonic-gate andn %o4, TRAMP_FLAG, %o4 907*0Sstevel@tonic-gate bnz,pn %ncc, 3f 908*0Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 909*0Sstevel@tonic-gate retl 910*0Sstevel@tonic-gate mov %g1, %o0 911*0Sstevel@tonic-gate3: 912*0Sstevel@tonic-gate jmp %o4 ! goto real handler 913*0Sstevel@tonic-gate mov %g0, %o0 ! 914*0Sstevel@tonic-gate 915*0Sstevel@tonic-gate SET_SIZE(kcopy) 916*0Sstevel@tonic-gate#endif /* lint */ 917*0Sstevel@tonic-gate 918*0Sstevel@tonic-gate 919*0Sstevel@tonic-gate/* 920*0Sstevel@tonic-gate * Copy a block of storage - must not overlap (from + len <= to). 921*0Sstevel@tonic-gate * Registers: l6 - saved t_lofault 922*0Sstevel@tonic-gate * (for short copies, o4 - saved t_lofault) 923*0Sstevel@tonic-gate * 924*0Sstevel@tonic-gate * Copy a page of memory. 925*0Sstevel@tonic-gate * Assumes double word alignment and a count >= 256. 926*0Sstevel@tonic-gate */ 927*0Sstevel@tonic-gate#if defined(lint) 928*0Sstevel@tonic-gate 929*0Sstevel@tonic-gate/* ARGSUSED */ 930*0Sstevel@tonic-gatevoid 931*0Sstevel@tonic-gatebcopy(const void *from, void *to, size_t count) 932*0Sstevel@tonic-gate{} 933*0Sstevel@tonic-gate 934*0Sstevel@tonic-gate#else /* lint */ 935*0Sstevel@tonic-gate 936*0Sstevel@tonic-gate ENTRY(bcopy) 937*0Sstevel@tonic-gate 938*0Sstevel@tonic-gate cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case 939*0Sstevel@tonic-gate bleu,pt %ncc, .bcopy_small ! go to larger cases 940*0Sstevel@tonic-gate xor %o0, %o1, %o3 ! are src, dst alignable? 941*0Sstevel@tonic-gate btst 7, %o3 ! 942*0Sstevel@tonic-gate bz,pt %ncc, .bcopy_8 ! check for longword alignment 943*0Sstevel@tonic-gate nop 944*0Sstevel@tonic-gate btst 1, %o3 ! 945*0Sstevel@tonic-gate bz,pt %ncc, .bcopy_2 ! check for half-word 946*0Sstevel@tonic-gate nop 947*0Sstevel@tonic-gate sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit 948*0Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_1)], %o3 949*0Sstevel@tonic-gate tst %o3 950*0Sstevel@tonic-gate bz,pn %icc, .bcopy_small ! if zero, disable HW copy 951*0Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 952*0Sstevel@tonic-gate bleu,pt %ncc, .bcopy_small ! go to small copy 953*0Sstevel@tonic-gate nop 954*0Sstevel@tonic-gate ba,pt %ncc, .bcopy_more ! otherwise go to large copy 955*0Sstevel@tonic-gate nop 956*0Sstevel@tonic-gate.bcopy_2: 957*0Sstevel@tonic-gate btst 3, %o3 ! 958*0Sstevel@tonic-gate bz,pt %ncc, .bcopy_4 ! check for word alignment 959*0Sstevel@tonic-gate nop 960*0Sstevel@tonic-gate sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit 961*0Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_2)], %o3 962*0Sstevel@tonic-gate tst %o3 963*0Sstevel@tonic-gate bz,pn %icc, .bcopy_small ! if zero, disable HW copy 964*0Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 965*0Sstevel@tonic-gate bleu,pt %ncc, .bcopy_small ! go to small copy 966*0Sstevel@tonic-gate nop 967*0Sstevel@tonic-gate ba,pt %ncc, .bcopy_more ! otherwise go to large copy 968*0Sstevel@tonic-gate nop 969*0Sstevel@tonic-gate.bcopy_4: 970*0Sstevel@tonic-gate ! already checked longword, must be word aligned 971*0Sstevel@tonic-gate sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit 972*0Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_4)], %o3 973*0Sstevel@tonic-gate tst %o3 974*0Sstevel@tonic-gate bz,pn %icc, .bcopy_small ! if zero, disable HW copy 975*0Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 976*0Sstevel@tonic-gate bleu,pt %ncc, .bcopy_small ! go to small copy 977*0Sstevel@tonic-gate nop 978*0Sstevel@tonic-gate ba,pt %ncc, .bcopy_more ! otherwise go to large copy 979*0Sstevel@tonic-gate nop 980*0Sstevel@tonic-gate.bcopy_8: 981*0Sstevel@tonic-gate sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit 982*0Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_8)], %o3 983*0Sstevel@tonic-gate tst %o3 984*0Sstevel@tonic-gate bz,pn %icc, .bcopy_small ! if zero, disable HW copy 985*0Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 986*0Sstevel@tonic-gate bleu,pt %ncc, .bcopy_small ! go to small copy 987*0Sstevel@tonic-gate nop 988*0Sstevel@tonic-gate ba,pt %ncc, .bcopy_more ! otherwise go to large copy 989*0Sstevel@tonic-gate nop 990*0Sstevel@tonic-gate 991*0Sstevel@tonic-gate .align 16 992*0Sstevel@tonic-gate.bcopy_small: 993*0Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %o4 ! save t_lofault 994*0Sstevel@tonic-gate tst %o4 995*0Sstevel@tonic-gate bz,pt %icc, .sm_do_copy 996*0Sstevel@tonic-gate nop 997*0Sstevel@tonic-gate sethi %hi(.sm_copyerr), %o5 998*0Sstevel@tonic-gate or %o5, %lo(.sm_copyerr), %o5 999*0Sstevel@tonic-gate membar #Sync ! sync error barrier 1000*0Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] ! install new vector 1001*0Sstevel@tonic-gate or %o4, TRAMP_FLAG, %o4 ! error should trampoline 1002*0Sstevel@tonic-gate.sm_do_copy: 1003*0Sstevel@tonic-gate cmp %o2, SHORTCOPY ! check for really short case 1004*0Sstevel@tonic-gate bleu,pt %ncc, .bc_sm_left ! 1005*0Sstevel@tonic-gate cmp %o2, CHKSIZE ! check for medium length cases 1006*0Sstevel@tonic-gate bgu,pn %ncc, .bc_med ! 1007*0Sstevel@tonic-gate or %o0, %o1, %o3 ! prepare alignment check 1008*0Sstevel@tonic-gate andcc %o3, 0x3, %g0 ! test for alignment 1009*0Sstevel@tonic-gate bz,pt %ncc, .bc_sm_word ! branch to word aligned case 1010*0Sstevel@tonic-gate.bc_sm_movebytes: 1011*0Sstevel@tonic-gate sub %o2, 3, %o2 ! adjust count to allow cc zero test 1012*0Sstevel@tonic-gate.bc_sm_notalign4: 1013*0Sstevel@tonic-gate ldub [%o0], %o3 ! read byte 1014*0Sstevel@tonic-gate stb %o3, [%o1] ! write byte 1015*0Sstevel@tonic-gate subcc %o2, 4, %o2 ! reduce count by 4 1016*0Sstevel@tonic-gate ldub [%o0 + 1], %o3 ! repeat for a total of 4 bytes 1017*0Sstevel@tonic-gate add %o0, 4, %o0 ! advance SRC by 4 1018*0Sstevel@tonic-gate stb %o3, [%o1 + 1] 1019*0Sstevel@tonic-gate ldub [%o0 - 2], %o3 1020*0Sstevel@tonic-gate add %o1, 4, %o1 ! advance DST by 4 1021*0Sstevel@tonic-gate stb %o3, [%o1 - 2] 1022*0Sstevel@tonic-gate ldub [%o0 - 1], %o3 1023*0Sstevel@tonic-gate bgt,pt %ncc, .bc_sm_notalign4 ! loop til 3 or fewer bytes remain 1024*0Sstevel@tonic-gate stb %o3, [%o1 - 1] 1025*0Sstevel@tonic-gate add %o2, 3, %o2 ! restore count 1026*0Sstevel@tonic-gate.bc_sm_left: 1027*0Sstevel@tonic-gate tst %o2 1028*0Sstevel@tonic-gate bz,pt %ncc, .bc_sm_exit ! check for zero length 1029*0Sstevel@tonic-gate deccc %o2 ! reduce count for cc test 1030*0Sstevel@tonic-gate ldub [%o0], %o3 ! move one byte 1031*0Sstevel@tonic-gate bz,pt %ncc, .bc_sm_exit 1032*0Sstevel@tonic-gate stb %o3, [%o1] 1033*0Sstevel@tonic-gate ldub [%o0 + 1], %o3 ! move another byte 1034*0Sstevel@tonic-gate deccc %o2 ! check for more 1035*0Sstevel@tonic-gate bz,pt %ncc, .bc_sm_exit 1036*0Sstevel@tonic-gate stb %o3, [%o1 + 1] 1037*0Sstevel@tonic-gate ldub [%o0 + 2], %o3 ! move final byte 1038*0Sstevel@tonic-gate stb %o3, [%o1 + 2] 1039*0Sstevel@tonic-gate membar #Sync ! sync error barrier 1040*0Sstevel@tonic-gate andn %o4, TRAMP_FLAG, %o4 1041*0Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1042*0Sstevel@tonic-gate retl 1043*0Sstevel@tonic-gate mov %g0, %o0 ! return 0 1044*0Sstevel@tonic-gate .align 16 1045*0Sstevel@tonic-gate nop ! instruction alignment 1046*0Sstevel@tonic-gate ! see discussion at start of file 1047*0Sstevel@tonic-gate.bc_sm_words: 1048*0Sstevel@tonic-gate lduw [%o0], %o3 ! read word 1049*0Sstevel@tonic-gate.bc_sm_wordx: 1050*0Sstevel@tonic-gate subcc %o2, 8, %o2 ! update count 1051*0Sstevel@tonic-gate stw %o3, [%o1] ! write word 1052*0Sstevel@tonic-gate add %o0, 8, %o0 ! update SRC 1053*0Sstevel@tonic-gate lduw [%o0 - 4], %o3 ! read word 1054*0Sstevel@tonic-gate add %o1, 8, %o1 ! update DST 1055*0Sstevel@tonic-gate bgt,pt %ncc, .bc_sm_words ! loop til done 1056*0Sstevel@tonic-gate stw %o3, [%o1 - 4] ! write word 1057*0Sstevel@tonic-gate addcc %o2, 7, %o2 ! restore count 1058*0Sstevel@tonic-gate bz,pt %ncc, .bc_sm_exit 1059*0Sstevel@tonic-gate deccc %o2 1060*0Sstevel@tonic-gate bz,pt %ncc, .bc_sm_byte 1061*0Sstevel@tonic-gate.bc_sm_half: 1062*0Sstevel@tonic-gate subcc %o2, 2, %o2 ! reduce count by 2 1063*0Sstevel@tonic-gate add %o0, 2, %o0 ! advance SRC by 2 1064*0Sstevel@tonic-gate lduh [%o0 - 2], %o3 ! read half word 1065*0Sstevel@tonic-gate add %o1, 2, %o1 ! advance DST by 2 1066*0Sstevel@tonic-gate bgt,pt %ncc, .bc_sm_half ! loop til done 1067*0Sstevel@tonic-gate sth %o3, [%o1 - 2] ! write half word 1068*0Sstevel@tonic-gate addcc %o2, 1, %o2 ! restore count 1069*0Sstevel@tonic-gate bz,pt %ncc, .bc_sm_exit 1070*0Sstevel@tonic-gate nop 1071*0Sstevel@tonic-gate.bc_sm_byte: 1072*0Sstevel@tonic-gate ldub [%o0], %o3 1073*0Sstevel@tonic-gate stb %o3, [%o1] 1074*0Sstevel@tonic-gate membar #Sync ! sync error barrier 1075*0Sstevel@tonic-gate andn %o4, TRAMP_FLAG, %o4 1076*0Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1077*0Sstevel@tonic-gate retl 1078*0Sstevel@tonic-gate mov %g0, %o0 ! return 0 1079*0Sstevel@tonic-gate 1080*0Sstevel@tonic-gate.bc_sm_word: 1081*0Sstevel@tonic-gate subcc %o2, 4, %o2 ! update count 1082*0Sstevel@tonic-gate bgt,pt %ncc, .bc_sm_wordx 1083*0Sstevel@tonic-gate lduw [%o0], %o3 ! read word 1084*0Sstevel@tonic-gate addcc %o2, 3, %o2 ! restore count 1085*0Sstevel@tonic-gate bz,pt %ncc, .bc_sm_exit 1086*0Sstevel@tonic-gate stw %o3, [%o1] ! write word 1087*0Sstevel@tonic-gate deccc %o2 ! reduce count for cc test 1088*0Sstevel@tonic-gate ldub [%o0 + 4], %o3 ! load one byte 1089*0Sstevel@tonic-gate bz,pt %ncc, .bc_sm_exit 1090*0Sstevel@tonic-gate stb %o3, [%o1 + 4] ! store one byte 1091*0Sstevel@tonic-gate ldub [%o0 + 5], %o3 ! load second byte 1092*0Sstevel@tonic-gate deccc %o2 1093*0Sstevel@tonic-gate bz,pt %ncc, .bc_sm_exit 1094*0Sstevel@tonic-gate stb %o3, [%o1 + 5] ! store second byte 1095*0Sstevel@tonic-gate ldub [%o0 + 6], %o3 ! load third byte 1096*0Sstevel@tonic-gate stb %o3, [%o1 + 6] ! store third byte 1097*0Sstevel@tonic-gate.bc_sm_exit: 1098*0Sstevel@tonic-gate membar #Sync ! sync error barrier 1099*0Sstevel@tonic-gate andn %o4, TRAMP_FLAG, %o4 1100*0Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1101*0Sstevel@tonic-gate retl 1102*0Sstevel@tonic-gate mov %g0, %o0 ! return 0 1103*0Sstevel@tonic-gate 1104*0Sstevel@tonic-gate .align 16 1105*0Sstevel@tonic-gate.bc_med: 1106*0Sstevel@tonic-gate xor %o0, %o1, %o3 ! setup alignment check 1107*0Sstevel@tonic-gate btst 1, %o3 1108*0Sstevel@tonic-gate bnz,pt %ncc, .bc_sm_movebytes ! unaligned 1109*0Sstevel@tonic-gate nop 1110*0Sstevel@tonic-gate btst 3, %o3 1111*0Sstevel@tonic-gate bnz,pt %ncc, .bc_med_half ! halfword aligned 1112*0Sstevel@tonic-gate nop 1113*0Sstevel@tonic-gate btst 7, %o3 1114*0Sstevel@tonic-gate bnz,pt %ncc, .bc_med_word ! word aligned 1115*0Sstevel@tonic-gate nop 1116*0Sstevel@tonic-gate.bc_med_long: 1117*0Sstevel@tonic-gate btst 3, %o0 ! check for 1118*0Sstevel@tonic-gate bz,pt %ncc, .bc_med_long1 ! word alignment 1119*0Sstevel@tonic-gate nop 1120*0Sstevel@tonic-gate.bc_med_long0: 1121*0Sstevel@tonic-gate ldub [%o0], %o3 ! load one byte 1122*0Sstevel@tonic-gate inc %o0 1123*0Sstevel@tonic-gate stb %o3,[%o1] ! store byte 1124*0Sstevel@tonic-gate inc %o1 1125*0Sstevel@tonic-gate btst 3, %o0 1126*0Sstevel@tonic-gate bnz,pt %ncc, .bc_med_long0 1127*0Sstevel@tonic-gate dec %o2 1128*0Sstevel@tonic-gate.bc_med_long1: ! word aligned 1129*0Sstevel@tonic-gate btst 7, %o0 ! check for long word 1130*0Sstevel@tonic-gate bz,pt %ncc, .bc_med_long2 1131*0Sstevel@tonic-gate nop 1132*0Sstevel@tonic-gate lduw [%o0], %o3 ! load word 1133*0Sstevel@tonic-gate add %o0, 4, %o0 ! advance SRC by 4 1134*0Sstevel@tonic-gate stw %o3, [%o1] ! store word 1135*0Sstevel@tonic-gate add %o1, 4, %o1 ! advance DST by 4 1136*0Sstevel@tonic-gate sub %o2, 4, %o2 ! reduce count by 4 1137*0Sstevel@tonic-gate! 1138*0Sstevel@tonic-gate! Now long word aligned and have at least 32 bytes to move 1139*0Sstevel@tonic-gate! 1140*0Sstevel@tonic-gate.bc_med_long2: 1141*0Sstevel@tonic-gate sub %o2, 31, %o2 ! adjust count to allow cc zero test 1142*0Sstevel@tonic-gate.bc_med_lmove: 1143*0Sstevel@tonic-gate ldx [%o0], %o3 ! read long word 1144*0Sstevel@tonic-gate stx %o3, [%o1] ! write long word 1145*0Sstevel@tonic-gate subcc %o2, 32, %o2 ! reduce count by 32 1146*0Sstevel@tonic-gate ldx [%o0 + 8], %o3 ! repeat for a total for 4 long words 1147*0Sstevel@tonic-gate add %o0, 32, %o0 ! advance SRC by 32 1148*0Sstevel@tonic-gate stx %o3, [%o1 + 8] 1149*0Sstevel@tonic-gate ldx [%o0 - 16], %o3 1150*0Sstevel@tonic-gate add %o1, 32, %o1 ! advance DST by 32 1151*0Sstevel@tonic-gate stx %o3, [%o1 - 16] 1152*0Sstevel@tonic-gate ldx [%o0 - 8], %o3 1153*0Sstevel@tonic-gate bgt,pt %ncc, .bc_med_lmove ! loop til 31 or fewer bytes left 1154*0Sstevel@tonic-gate stx %o3, [%o1 - 8] 1155*0Sstevel@tonic-gate addcc %o2, 24, %o2 ! restore count to long word offset 1156*0Sstevel@tonic-gate ble,pt %ncc, .bc_med_lextra ! check for more long words to move 1157*0Sstevel@tonic-gate nop 1158*0Sstevel@tonic-gate.bc_med_lword: 1159*0Sstevel@tonic-gate ldx [%o0], %o3 ! read long word 1160*0Sstevel@tonic-gate subcc %o2, 8, %o2 ! reduce count by 8 1161*0Sstevel@tonic-gate stx %o3, [%o1] ! write long word 1162*0Sstevel@tonic-gate add %o0, 8, %o0 ! advance SRC by 8 1163*0Sstevel@tonic-gate bgt,pt %ncc, .bc_med_lword ! loop til 7 or fewer bytes left 1164*0Sstevel@tonic-gate add %o1, 8, %o1 ! advance DST by 8 1165*0Sstevel@tonic-gate.bc_med_lextra: 1166*0Sstevel@tonic-gate addcc %o2, 7, %o2 ! restore rest of count 1167*0Sstevel@tonic-gate bz,pt %ncc, .bc_sm_exit ! if zero, then done 1168*0Sstevel@tonic-gate deccc %o2 1169*0Sstevel@tonic-gate bz,pt %ncc, .bc_sm_byte 1170*0Sstevel@tonic-gate nop 1171*0Sstevel@tonic-gate ba,pt %ncc, .bc_sm_half 1172*0Sstevel@tonic-gate nop 1173*0Sstevel@tonic-gate 1174*0Sstevel@tonic-gate .align 16 1175*0Sstevel@tonic-gate.bc_med_word: 1176*0Sstevel@tonic-gate btst 3, %o0 ! check for 1177*0Sstevel@tonic-gate bz,pt %ncc, .bc_med_word1 ! word alignment 1178*0Sstevel@tonic-gate nop 1179*0Sstevel@tonic-gate.bc_med_word0: 1180*0Sstevel@tonic-gate ldub [%o0], %o3 ! load one byte 1181*0Sstevel@tonic-gate inc %o0 1182*0Sstevel@tonic-gate stb %o3,[%o1] ! store byte 1183*0Sstevel@tonic-gate inc %o1 1184*0Sstevel@tonic-gate btst 3, %o0 1185*0Sstevel@tonic-gate bnz,pt %ncc, .bc_med_word0 1186*0Sstevel@tonic-gate dec %o2 1187*0Sstevel@tonic-gate! 1188*0Sstevel@tonic-gate! Now word aligned and have at least 36 bytes to move 1189*0Sstevel@tonic-gate! 1190*0Sstevel@tonic-gate.bc_med_word1: 1191*0Sstevel@tonic-gate sub %o2, 15, %o2 ! adjust count to allow cc zero test 1192*0Sstevel@tonic-gate.bc_med_wmove: 1193*0Sstevel@tonic-gate lduw [%o0], %o3 ! read word 1194*0Sstevel@tonic-gate stw %o3, [%o1] ! write word 1195*0Sstevel@tonic-gate subcc %o2, 16, %o2 ! reduce count by 16 1196*0Sstevel@tonic-gate lduw [%o0 + 4], %o3 ! repeat for a total for 4 words 1197*0Sstevel@tonic-gate add %o0, 16, %o0 ! advance SRC by 16 1198*0Sstevel@tonic-gate stw %o3, [%o1 + 4] 1199*0Sstevel@tonic-gate lduw [%o0 - 8], %o3 1200*0Sstevel@tonic-gate add %o1, 16, %o1 ! advance DST by 16 1201*0Sstevel@tonic-gate stw %o3, [%o1 - 8] 1202*0Sstevel@tonic-gate lduw [%o0 - 4], %o3 1203*0Sstevel@tonic-gate bgt,pt %ncc, .bc_med_wmove ! loop til 15 or fewer bytes left 1204*0Sstevel@tonic-gate stw %o3, [%o1 - 4] 1205*0Sstevel@tonic-gate addcc %o2, 12, %o2 ! restore count to word offset 1206*0Sstevel@tonic-gate ble,pt %ncc, .bc_med_wextra ! check for more words to move 1207*0Sstevel@tonic-gate nop 1208*0Sstevel@tonic-gate.bc_med_word2: 1209*0Sstevel@tonic-gate lduw [%o0], %o3 ! read word 1210*0Sstevel@tonic-gate subcc %o2, 4, %o2 ! reduce count by 4 1211*0Sstevel@tonic-gate stw %o3, [%o1] ! write word 1212*0Sstevel@tonic-gate add %o0, 4, %o0 ! advance SRC by 4 1213*0Sstevel@tonic-gate bgt,pt %ncc, .bc_med_word2 ! loop til 3 or fewer bytes left 1214*0Sstevel@tonic-gate add %o1, 4, %o1 ! advance DST by 4 1215*0Sstevel@tonic-gate.bc_med_wextra: 1216*0Sstevel@tonic-gate addcc %o2, 3, %o2 ! restore rest of count 1217*0Sstevel@tonic-gate bz,pt %ncc, .bc_sm_exit ! if zero, then done 1218*0Sstevel@tonic-gate deccc %o2 1219*0Sstevel@tonic-gate bz,pt %ncc, .bc_sm_byte 1220*0Sstevel@tonic-gate nop 1221*0Sstevel@tonic-gate ba,pt %ncc, .bc_sm_half 1222*0Sstevel@tonic-gate nop 1223*0Sstevel@tonic-gate 1224*0Sstevel@tonic-gate .align 16 1225*0Sstevel@tonic-gate.bc_med_half: 1226*0Sstevel@tonic-gate btst 1, %o0 ! check for 1227*0Sstevel@tonic-gate bz,pt %ncc, .bc_med_half1 ! half word alignment 1228*0Sstevel@tonic-gate nop 1229*0Sstevel@tonic-gate ldub [%o0], %o3 ! load one byte 1230*0Sstevel@tonic-gate inc %o0 1231*0Sstevel@tonic-gate stb %o3,[%o1] ! store byte 1232*0Sstevel@tonic-gate inc %o1 1233*0Sstevel@tonic-gate dec %o2 1234*0Sstevel@tonic-gate! 1235*0Sstevel@tonic-gate! Now half word aligned and have at least 38 bytes to move 1236*0Sstevel@tonic-gate! 1237*0Sstevel@tonic-gate.bc_med_half1: 1238*0Sstevel@tonic-gate sub %o2, 7, %o2 ! adjust count to allow cc zero test 1239*0Sstevel@tonic-gate.bc_med_hmove: 1240*0Sstevel@tonic-gate lduh [%o0], %o3 ! read half word 1241*0Sstevel@tonic-gate sth %o3, [%o1] ! write half word 1242*0Sstevel@tonic-gate subcc %o2, 8, %o2 ! reduce count by 8 1243*0Sstevel@tonic-gate lduh [%o0 + 2], %o3 ! repeat for a total for 4 halfwords 1244*0Sstevel@tonic-gate add %o0, 8, %o0 ! advance SRC by 8 1245*0Sstevel@tonic-gate sth %o3, [%o1 + 2] 1246*0Sstevel@tonic-gate lduh [%o0 - 4], %o3 1247*0Sstevel@tonic-gate add %o1, 8, %o1 ! advance DST by 8 1248*0Sstevel@tonic-gate sth %o3, [%o1 - 4] 1249*0Sstevel@tonic-gate lduh [%o0 - 2], %o3 1250*0Sstevel@tonic-gate bgt,pt %ncc, .bc_med_hmove ! loop til 7 or fewer bytes left 1251*0Sstevel@tonic-gate sth %o3, [%o1 - 2] 1252*0Sstevel@tonic-gate addcc %o2, 7, %o2 ! restore count 1253*0Sstevel@tonic-gate bz,pt %ncc, .bc_sm_exit 1254*0Sstevel@tonic-gate deccc %o2 1255*0Sstevel@tonic-gate bz,pt %ncc, .bc_sm_byte 1256*0Sstevel@tonic-gate nop 1257*0Sstevel@tonic-gate ba,pt %ncc, .bc_sm_half 1258*0Sstevel@tonic-gate nop 1259*0Sstevel@tonic-gate 1260*0Sstevel@tonic-gate SET_SIZE(bcopy) 1261*0Sstevel@tonic-gate 1262*0Sstevel@tonic-gate/* 1263*0Sstevel@tonic-gate * The _more entry points are not intended to be used directly by 1264*0Sstevel@tonic-gate * any caller from outside this file. They are provided to allow 1265*0Sstevel@tonic-gate * profiling and dtrace of the portions of the copy code that uses 1266*0Sstevel@tonic-gate * the floating point registers. 1267*0Sstevel@tonic-gate * This entry is particularly important as DTRACE (at least as of 1268*0Sstevel@tonic-gate * 4/2004) does not support leaf functions. 1269*0Sstevel@tonic-gate */ 1270*0Sstevel@tonic-gate 1271*0Sstevel@tonic-gate ENTRY(bcopy_more) 1272*0Sstevel@tonic-gate.bcopy_more: 1273*0Sstevel@tonic-gate save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 1274*0Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %l6 ! save t_lofault 1275*0Sstevel@tonic-gate tst %l6 1276*0Sstevel@tonic-gate bz,pt %ncc, .do_copy 1277*0Sstevel@tonic-gate nop 1278*0Sstevel@tonic-gate sethi %hi(.copyerr), %o2 1279*0Sstevel@tonic-gate or %o2, %lo(.copyerr), %o2 1280*0Sstevel@tonic-gate membar #Sync ! sync error barrier 1281*0Sstevel@tonic-gate stn %o2, [THREAD_REG + T_LOFAULT] ! install new vector 1282*0Sstevel@tonic-gate ! 1283*0Sstevel@tonic-gate ! We've already captured whether t_lofault was zero on entry. 1284*0Sstevel@tonic-gate ! We need to mark ourselves as being from bcopy since both 1285*0Sstevel@tonic-gate ! kcopy and bcopy use the same code path. If TRAMP_FLAG is set 1286*0Sstevel@tonic-gate ! and the saved lofault was zero, we won't reset lofault on 1287*0Sstevel@tonic-gate ! returning. 1288*0Sstevel@tonic-gate ! 1289*0Sstevel@tonic-gate or %l6, TRAMP_FLAG, %l6 1290*0Sstevel@tonic-gate 1291*0Sstevel@tonic-gate/* 1292*0Sstevel@tonic-gate * Copies that reach here are larger than VIS_COPY_THRESHOLD bytes 1293*0Sstevel@tonic-gate * Also, use of FP registers has been tested to be enabled 1294*0Sstevel@tonic-gate */ 1295*0Sstevel@tonic-gate.do_copy: 1296*0Sstevel@tonic-gate FP_NOMIGRATE(6, 7) 1297*0Sstevel@tonic-gate 1298*0Sstevel@tonic-gate rd %fprs, %o2 ! check for unused fp 1299*0Sstevel@tonic-gate st %o2, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] ! save orig %fprs 1300*0Sstevel@tonic-gate btst FPRS_FEF, %o2 1301*0Sstevel@tonic-gate bz,a,pt %icc, .do_blockcopy 1302*0Sstevel@tonic-gate wr %g0, FPRS_FEF, %fprs 1303*0Sstevel@tonic-gate 1304*0Sstevel@tonic-gate BST_FPQ1Q3_TOSTACK(%o2) 1305*0Sstevel@tonic-gate 1306*0Sstevel@tonic-gate.do_blockcopy: 1307*0Sstevel@tonic-gate rd %gsr, %o2 1308*0Sstevel@tonic-gate stx %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr 1309*0Sstevel@tonic-gate or %l6, FPUSED_FLAG, %l6 1310*0Sstevel@tonic-gate 1311*0Sstevel@tonic-gate#define REALSRC %i0 1312*0Sstevel@tonic-gate#define DST %i1 1313*0Sstevel@tonic-gate#define CNT %i2 1314*0Sstevel@tonic-gate#define SRC %i3 1315*0Sstevel@tonic-gate#define TMP %i5 1316*0Sstevel@tonic-gate 1317*0Sstevel@tonic-gate andcc DST, VIS_BLOCKSIZE - 1, TMP 1318*0Sstevel@tonic-gate bz,pt %ncc, 2f 1319*0Sstevel@tonic-gate neg TMP 1320*0Sstevel@tonic-gate add TMP, VIS_BLOCKSIZE, TMP 1321*0Sstevel@tonic-gate 1322*0Sstevel@tonic-gate ! TMP = bytes required to align DST on FP_BLOCK boundary 1323*0Sstevel@tonic-gate ! Using SRC as a tmp here 1324*0Sstevel@tonic-gate cmp TMP, 3 1325*0Sstevel@tonic-gate bleu,pt %ncc, 1f 1326*0Sstevel@tonic-gate sub CNT,TMP,CNT ! adjust main count 1327*0Sstevel@tonic-gate sub TMP, 3, TMP ! adjust for end of loop test 1328*0Sstevel@tonic-gate.bc_blkalign: 1329*0Sstevel@tonic-gate ldub [REALSRC], SRC ! move 4 bytes per loop iteration 1330*0Sstevel@tonic-gate stb SRC, [DST] 1331*0Sstevel@tonic-gate subcc TMP, 4, TMP 1332*0Sstevel@tonic-gate ldub [REALSRC + 1], SRC 1333*0Sstevel@tonic-gate add REALSRC, 4, REALSRC 1334*0Sstevel@tonic-gate stb SRC, [DST + 1] 1335*0Sstevel@tonic-gate ldub [REALSRC - 2], SRC 1336*0Sstevel@tonic-gate add DST, 4, DST 1337*0Sstevel@tonic-gate stb SRC, [DST - 2] 1338*0Sstevel@tonic-gate ldub [REALSRC - 1], SRC 1339*0Sstevel@tonic-gate bgu,pt %ncc, .bc_blkalign 1340*0Sstevel@tonic-gate stb SRC, [DST - 1] 1341*0Sstevel@tonic-gate 1342*0Sstevel@tonic-gate addcc TMP, 3, TMP ! restore count adjustment 1343*0Sstevel@tonic-gate bz,pt %ncc, 2f ! no bytes left? 1344*0Sstevel@tonic-gate nop 1345*0Sstevel@tonic-gate1: ldub [REALSRC], SRC 1346*0Sstevel@tonic-gate inc REALSRC 1347*0Sstevel@tonic-gate inc DST 1348*0Sstevel@tonic-gate deccc TMP 1349*0Sstevel@tonic-gate bgu %ncc, 1b 1350*0Sstevel@tonic-gate stb SRC, [DST - 1] 1351*0Sstevel@tonic-gate 1352*0Sstevel@tonic-gate2: 1353*0Sstevel@tonic-gate andn REALSRC, 0x7, SRC 1354*0Sstevel@tonic-gate alignaddr REALSRC, %g0, %g0 1355*0Sstevel@tonic-gate 1356*0Sstevel@tonic-gate ! SRC - 8-byte aligned 1357*0Sstevel@tonic-gate ! DST - 64-byte aligned 1358*0Sstevel@tonic-gate prefetch [SRC], #one_read 1359*0Sstevel@tonic-gate prefetch [SRC + (1 * VIS_BLOCKSIZE)], #one_read 1360*0Sstevel@tonic-gate prefetch [SRC + (2 * VIS_BLOCKSIZE)], #one_read 1361*0Sstevel@tonic-gate prefetch [SRC + (3 * VIS_BLOCKSIZE)], #one_read 1362*0Sstevel@tonic-gate ldd [SRC], %f0 1363*0Sstevel@tonic-gate#if CHEETAH_PREFETCH > 4 1364*0Sstevel@tonic-gate prefetch [SRC + (4 * VIS_BLOCKSIZE)], #one_read 1365*0Sstevel@tonic-gate#endif 1366*0Sstevel@tonic-gate ldd [SRC + 0x08], %f2 1367*0Sstevel@tonic-gate#if CHEETAH_PREFETCH > 5 1368*0Sstevel@tonic-gate prefetch [SRC + (5 * VIS_BLOCKSIZE)], #one_read 1369*0Sstevel@tonic-gate#endif 1370*0Sstevel@tonic-gate ldd [SRC + 0x10], %f4 1371*0Sstevel@tonic-gate#if CHEETAH_PREFETCH > 6 1372*0Sstevel@tonic-gate prefetch [SRC + (6 * VIS_BLOCKSIZE)], #one_read 1373*0Sstevel@tonic-gate#endif 1374*0Sstevel@tonic-gate faligndata %f0, %f2, %f32 1375*0Sstevel@tonic-gate ldd [SRC + 0x18], %f6 1376*0Sstevel@tonic-gate#if CHEETAH_PREFETCH > 7 1377*0Sstevel@tonic-gate prefetch [SRC + (7 * VIS_BLOCKSIZE)], #one_read 1378*0Sstevel@tonic-gate#endif 1379*0Sstevel@tonic-gate faligndata %f2, %f4, %f34 1380*0Sstevel@tonic-gate ldd [SRC + 0x20], %f8 1381*0Sstevel@tonic-gate faligndata %f4, %f6, %f36 1382*0Sstevel@tonic-gate ldd [SRC + 0x28], %f10 1383*0Sstevel@tonic-gate faligndata %f6, %f8, %f38 1384*0Sstevel@tonic-gate ldd [SRC + 0x30], %f12 1385*0Sstevel@tonic-gate faligndata %f8, %f10, %f40 1386*0Sstevel@tonic-gate ldd [SRC + 0x38], %f14 1387*0Sstevel@tonic-gate faligndata %f10, %f12, %f42 1388*0Sstevel@tonic-gate ldd [SRC + VIS_BLOCKSIZE], %f0 1389*0Sstevel@tonic-gate sub CNT, VIS_BLOCKSIZE, CNT 1390*0Sstevel@tonic-gate add SRC, VIS_BLOCKSIZE, SRC 1391*0Sstevel@tonic-gate add REALSRC, VIS_BLOCKSIZE, REALSRC 1392*0Sstevel@tonic-gate ba,a,pt %ncc, 1f 1393*0Sstevel@tonic-gate nop 1394*0Sstevel@tonic-gate .align 16 1395*0Sstevel@tonic-gate1: 1396*0Sstevel@tonic-gate ldd [SRC + 0x08], %f2 1397*0Sstevel@tonic-gate faligndata %f12, %f14, %f44 1398*0Sstevel@tonic-gate ldd [SRC + 0x10], %f4 1399*0Sstevel@tonic-gate faligndata %f14, %f0, %f46 1400*0Sstevel@tonic-gate stda %f32, [DST]ASI_BLK_P 1401*0Sstevel@tonic-gate ldd [SRC + 0x18], %f6 1402*0Sstevel@tonic-gate faligndata %f0, %f2, %f32 1403*0Sstevel@tonic-gate ldd [SRC + 0x20], %f8 1404*0Sstevel@tonic-gate faligndata %f2, %f4, %f34 1405*0Sstevel@tonic-gate ldd [SRC + 0x28], %f10 1406*0Sstevel@tonic-gate faligndata %f4, %f6, %f36 1407*0Sstevel@tonic-gate ldd [SRC + 0x30], %f12 1408*0Sstevel@tonic-gate faligndata %f6, %f8, %f38 1409*0Sstevel@tonic-gate ldd [SRC + 0x38], %f14 1410*0Sstevel@tonic-gate faligndata %f8, %f10, %f40 1411*0Sstevel@tonic-gate sub CNT, VIS_BLOCKSIZE, CNT 1412*0Sstevel@tonic-gate ldd [SRC + VIS_BLOCKSIZE], %f0 1413*0Sstevel@tonic-gate faligndata %f10, %f12, %f42 1414*0Sstevel@tonic-gate prefetch [SRC + ((CHEETAH_PREFETCH) * VIS_BLOCKSIZE) + 8], #one_read 1415*0Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 1416*0Sstevel@tonic-gate prefetch [SRC + ((CHEETAH_2ND_PREFETCH) * VIS_BLOCKSIZE)], #one_read 1417*0Sstevel@tonic-gate add REALSRC, VIS_BLOCKSIZE, REALSRC 1418*0Sstevel@tonic-gate cmp CNT, VIS_BLOCKSIZE + 8 1419*0Sstevel@tonic-gate bgu,pt %ncc, 1b 1420*0Sstevel@tonic-gate add SRC, VIS_BLOCKSIZE, SRC 1421*0Sstevel@tonic-gate 1422*0Sstevel@tonic-gate ! only if REALSRC & 0x7 is 0 1423*0Sstevel@tonic-gate cmp CNT, VIS_BLOCKSIZE 1424*0Sstevel@tonic-gate bne %ncc, 3f 1425*0Sstevel@tonic-gate andcc REALSRC, 0x7, %g0 1426*0Sstevel@tonic-gate bz,pt %ncc, 2f 1427*0Sstevel@tonic-gate nop 1428*0Sstevel@tonic-gate3: 1429*0Sstevel@tonic-gate faligndata %f12, %f14, %f44 1430*0Sstevel@tonic-gate faligndata %f14, %f0, %f46 1431*0Sstevel@tonic-gate stda %f32, [DST]ASI_BLK_P 1432*0Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 1433*0Sstevel@tonic-gate ba,pt %ncc, 3f 1434*0Sstevel@tonic-gate nop 1435*0Sstevel@tonic-gate2: 1436*0Sstevel@tonic-gate ldd [SRC + 0x08], %f2 1437*0Sstevel@tonic-gate fsrc1 %f12, %f44 1438*0Sstevel@tonic-gate ldd [SRC + 0x10], %f4 1439*0Sstevel@tonic-gate fsrc1 %f14, %f46 1440*0Sstevel@tonic-gate stda %f32, [DST]ASI_BLK_P 1441*0Sstevel@tonic-gate ldd [SRC + 0x18], %f6 1442*0Sstevel@tonic-gate fsrc1 %f0, %f32 1443*0Sstevel@tonic-gate ldd [SRC + 0x20], %f8 1444*0Sstevel@tonic-gate fsrc1 %f2, %f34 1445*0Sstevel@tonic-gate ldd [SRC + 0x28], %f10 1446*0Sstevel@tonic-gate fsrc1 %f4, %f36 1447*0Sstevel@tonic-gate ldd [SRC + 0x30], %f12 1448*0Sstevel@tonic-gate fsrc1 %f6, %f38 1449*0Sstevel@tonic-gate ldd [SRC + 0x38], %f14 1450*0Sstevel@tonic-gate fsrc1 %f8, %f40 1451*0Sstevel@tonic-gate sub CNT, VIS_BLOCKSIZE, CNT 1452*0Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 1453*0Sstevel@tonic-gate add SRC, VIS_BLOCKSIZE, SRC 1454*0Sstevel@tonic-gate add REALSRC, VIS_BLOCKSIZE, REALSRC 1455*0Sstevel@tonic-gate fsrc1 %f10, %f42 1456*0Sstevel@tonic-gate fsrc1 %f12, %f44 1457*0Sstevel@tonic-gate fsrc1 %f14, %f46 1458*0Sstevel@tonic-gate stda %f32, [DST]ASI_BLK_P 1459*0Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 1460*0Sstevel@tonic-gate ba,a,pt %ncc, .bcb_exit 1461*0Sstevel@tonic-gate nop 1462*0Sstevel@tonic-gate 1463*0Sstevel@tonic-gate3: tst CNT 1464*0Sstevel@tonic-gate bz,a,pt %ncc, .bcb_exit 1465*0Sstevel@tonic-gate nop 1466*0Sstevel@tonic-gate 1467*0Sstevel@tonic-gate5: ldub [REALSRC], TMP 1468*0Sstevel@tonic-gate inc REALSRC 1469*0Sstevel@tonic-gate inc DST 1470*0Sstevel@tonic-gate deccc CNT 1471*0Sstevel@tonic-gate bgu %ncc, 5b 1472*0Sstevel@tonic-gate stb TMP, [DST - 1] 1473*0Sstevel@tonic-gate.bcb_exit: 1474*0Sstevel@tonic-gate membar #Sync 1475*0Sstevel@tonic-gate 1476*0Sstevel@tonic-gate FPRAS_INTERVAL(FPRAS_BCOPY, 0, %l5, %o2, %o3, %o4, %o5, 8) 1477*0Sstevel@tonic-gate FPRAS_REWRITE_TYPE2Q1(0, %l5, %o2, %o3, 8, 9) 1478*0Sstevel@tonic-gate FPRAS_CHECK(FPRAS_BCOPY, %l5, 9) ! outputs lost 1479*0Sstevel@tonic-gate 1480*0Sstevel@tonic-gate ldx [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr 1481*0Sstevel@tonic-gate wr %o2, 0, %gsr 1482*0Sstevel@tonic-gate 1483*0Sstevel@tonic-gate ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3 1484*0Sstevel@tonic-gate btst FPRS_FEF, %o3 1485*0Sstevel@tonic-gate bz,pt %icc, 4f 1486*0Sstevel@tonic-gate nop 1487*0Sstevel@tonic-gate 1488*0Sstevel@tonic-gate BLD_FPQ1Q3_FROMSTACK(%o2) 1489*0Sstevel@tonic-gate 1490*0Sstevel@tonic-gate ba,pt %ncc, 2f 1491*0Sstevel@tonic-gate wr %o3, 0, %fprs ! restore fprs 1492*0Sstevel@tonic-gate4: 1493*0Sstevel@tonic-gate FZEROQ1Q3 1494*0Sstevel@tonic-gate wr %o3, 0, %fprs ! restore fprs 1495*0Sstevel@tonic-gate2: 1496*0Sstevel@tonic-gate membar #Sync ! sync error barrier 1497*0Sstevel@tonic-gate andn %l6, MASK_FLAGS, %l6 1498*0Sstevel@tonic-gate stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1499*0Sstevel@tonic-gate FP_ALLOWMIGRATE(5, 6) 1500*0Sstevel@tonic-gate ret 1501*0Sstevel@tonic-gate restore %g0, 0, %o0 1502*0Sstevel@tonic-gate 1503*0Sstevel@tonic-gate SET_SIZE(bcopy_more) 1504*0Sstevel@tonic-gate 1505*0Sstevel@tonic-gate#endif /* lint */ 1506*0Sstevel@tonic-gate 1507*0Sstevel@tonic-gate/* 1508*0Sstevel@tonic-gate * Block copy with possibly overlapped operands. 1509*0Sstevel@tonic-gate */ 1510*0Sstevel@tonic-gate 1511*0Sstevel@tonic-gate#if defined(lint) 1512*0Sstevel@tonic-gate 1513*0Sstevel@tonic-gate/*ARGSUSED*/ 1514*0Sstevel@tonic-gatevoid 1515*0Sstevel@tonic-gateovbcopy(const void *from, void *to, size_t count) 1516*0Sstevel@tonic-gate{} 1517*0Sstevel@tonic-gate 1518*0Sstevel@tonic-gate#else /* lint */ 1519*0Sstevel@tonic-gate 1520*0Sstevel@tonic-gate ENTRY(ovbcopy) 1521*0Sstevel@tonic-gate tst %o2 ! check count 1522*0Sstevel@tonic-gate bgu,a %ncc, 1f ! nothing to do or bad arguments 1523*0Sstevel@tonic-gate subcc %o0, %o1, %o3 ! difference of from and to address 1524*0Sstevel@tonic-gate 1525*0Sstevel@tonic-gate retl ! return 1526*0Sstevel@tonic-gate nop 1527*0Sstevel@tonic-gate1: 1528*0Sstevel@tonic-gate bneg,a %ncc, 2f 1529*0Sstevel@tonic-gate neg %o3 ! if < 0, make it positive 1530*0Sstevel@tonic-gate2: cmp %o2, %o3 ! cmp size and abs(from - to) 1531*0Sstevel@tonic-gate bleu %ncc, bcopy ! if size <= abs(diff): use bcopy, 1532*0Sstevel@tonic-gate .empty ! no overlap 1533*0Sstevel@tonic-gate cmp %o0, %o1 ! compare from and to addresses 1534*0Sstevel@tonic-gate blu %ncc, .ov_bkwd ! if from < to, copy backwards 1535*0Sstevel@tonic-gate nop 1536*0Sstevel@tonic-gate ! 1537*0Sstevel@tonic-gate ! Copy forwards. 1538*0Sstevel@tonic-gate ! 1539*0Sstevel@tonic-gate.ov_fwd: 1540*0Sstevel@tonic-gate ldub [%o0], %o3 ! read from address 1541*0Sstevel@tonic-gate inc %o0 ! inc from address 1542*0Sstevel@tonic-gate stb %o3, [%o1] ! write to address 1543*0Sstevel@tonic-gate deccc %o2 ! dec count 1544*0Sstevel@tonic-gate bgu %ncc, .ov_fwd ! loop till done 1545*0Sstevel@tonic-gate inc %o1 ! inc to address 1546*0Sstevel@tonic-gate 1547*0Sstevel@tonic-gate retl ! return 1548*0Sstevel@tonic-gate nop 1549*0Sstevel@tonic-gate ! 1550*0Sstevel@tonic-gate ! Copy backwards. 1551*0Sstevel@tonic-gate ! 1552*0Sstevel@tonic-gate.ov_bkwd: 1553*0Sstevel@tonic-gate deccc %o2 ! dec count 1554*0Sstevel@tonic-gate ldub [%o0 + %o2], %o3 ! get byte at end of src 1555*0Sstevel@tonic-gate bgu %ncc, .ov_bkwd ! loop till done 1556*0Sstevel@tonic-gate stb %o3, [%o1 + %o2] ! delay slot, store at end of dst 1557*0Sstevel@tonic-gate 1558*0Sstevel@tonic-gate retl ! return 1559*0Sstevel@tonic-gate nop 1560*0Sstevel@tonic-gate 1561*0Sstevel@tonic-gate SET_SIZE(ovbcopy) 1562*0Sstevel@tonic-gate 1563*0Sstevel@tonic-gate#endif /* lint */ 1564*0Sstevel@tonic-gate 1565*0Sstevel@tonic-gate 1566*0Sstevel@tonic-gate/* 1567*0Sstevel@tonic-gate * hwblkpagecopy() 1568*0Sstevel@tonic-gate * 1569*0Sstevel@tonic-gate * Copies exactly one page. This routine assumes the caller (ppcopy) 1570*0Sstevel@tonic-gate * has already disabled kernel preemption and has checked 1571*0Sstevel@tonic-gate * use_hw_bcopy. Preventing preemption also prevents cpu migration. 1572*0Sstevel@tonic-gate */ 1573*0Sstevel@tonic-gate#ifdef lint 1574*0Sstevel@tonic-gate/*ARGSUSED*/ 1575*0Sstevel@tonic-gatevoid 1576*0Sstevel@tonic-gatehwblkpagecopy(const void *src, void *dst) 1577*0Sstevel@tonic-gate{ } 1578*0Sstevel@tonic-gate#else /* lint */ 1579*0Sstevel@tonic-gate ENTRY(hwblkpagecopy) 1580*0Sstevel@tonic-gate ! get another window w/space for three aligned blocks of saved fpregs 1581*0Sstevel@tonic-gate save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 1582*0Sstevel@tonic-gate 1583*0Sstevel@tonic-gate ! %i0 - source address (arg) 1584*0Sstevel@tonic-gate ! %i1 - destination address (arg) 1585*0Sstevel@tonic-gate ! %i2 - length of region (not arg) 1586*0Sstevel@tonic-gate ! %l0 - saved fprs 1587*0Sstevel@tonic-gate ! %l1 - pointer to saved fpregs 1588*0Sstevel@tonic-gate 1589*0Sstevel@tonic-gate rd %fprs, %l0 ! check for unused fp 1590*0Sstevel@tonic-gate btst FPRS_FEF, %l0 1591*0Sstevel@tonic-gate bz,a,pt %icc, 1f 1592*0Sstevel@tonic-gate wr %g0, FPRS_FEF, %fprs 1593*0Sstevel@tonic-gate 1594*0Sstevel@tonic-gate BST_FPQ1Q3_TOSTACK(%l1) 1595*0Sstevel@tonic-gate 1596*0Sstevel@tonic-gate1: set PAGESIZE, CNT 1597*0Sstevel@tonic-gate mov REALSRC, SRC 1598*0Sstevel@tonic-gate 1599*0Sstevel@tonic-gate prefetch [SRC], #one_read 1600*0Sstevel@tonic-gate prefetch [SRC + (1 * VIS_BLOCKSIZE)], #one_read 1601*0Sstevel@tonic-gate prefetch [SRC + (2 * VIS_BLOCKSIZE)], #one_read 1602*0Sstevel@tonic-gate prefetch [SRC + (3 * VIS_BLOCKSIZE)], #one_read 1603*0Sstevel@tonic-gate ldd [SRC], %f0 1604*0Sstevel@tonic-gate#if CHEETAH_PREFETCH > 4 1605*0Sstevel@tonic-gate prefetch [SRC + (4 * VIS_BLOCKSIZE)], #one_read 1606*0Sstevel@tonic-gate#endif 1607*0Sstevel@tonic-gate ldd [SRC + 0x08], %f2 1608*0Sstevel@tonic-gate#if CHEETAH_PREFETCH > 5 1609*0Sstevel@tonic-gate prefetch [SRC + (5 * VIS_BLOCKSIZE)], #one_read 1610*0Sstevel@tonic-gate#endif 1611*0Sstevel@tonic-gate ldd [SRC + 0x10], %f4 1612*0Sstevel@tonic-gate#if CHEETAH_PREFETCH > 6 1613*0Sstevel@tonic-gate prefetch [SRC + (6 * VIS_BLOCKSIZE)], #one_read 1614*0Sstevel@tonic-gate#endif 1615*0Sstevel@tonic-gate fsrc1 %f0, %f32 1616*0Sstevel@tonic-gate ldd [SRC + 0x18], %f6 1617*0Sstevel@tonic-gate#if CHEETAH_PREFETCH > 7 1618*0Sstevel@tonic-gate prefetch [SRC + (7 * VIS_BLOCKSIZE)], #one_read 1619*0Sstevel@tonic-gate#endif 1620*0Sstevel@tonic-gate fsrc1 %f2, %f34 1621*0Sstevel@tonic-gate ldd [SRC + 0x20], %f8 1622*0Sstevel@tonic-gate fsrc1 %f4, %f36 1623*0Sstevel@tonic-gate ldd [SRC + 0x28], %f10 1624*0Sstevel@tonic-gate fsrc1 %f6, %f38 1625*0Sstevel@tonic-gate ldd [SRC + 0x30], %f12 1626*0Sstevel@tonic-gate fsrc1 %f8, %f40 1627*0Sstevel@tonic-gate ldd [SRC + 0x38], %f14 1628*0Sstevel@tonic-gate fsrc1 %f10, %f42 1629*0Sstevel@tonic-gate ldd [SRC + VIS_BLOCKSIZE], %f0 1630*0Sstevel@tonic-gate sub CNT, VIS_BLOCKSIZE, CNT 1631*0Sstevel@tonic-gate add SRC, VIS_BLOCKSIZE, SRC 1632*0Sstevel@tonic-gate ba,a,pt %ncc, 2f 1633*0Sstevel@tonic-gate nop 1634*0Sstevel@tonic-gate .align 16 1635*0Sstevel@tonic-gate2: 1636*0Sstevel@tonic-gate ldd [SRC + 0x08], %f2 1637*0Sstevel@tonic-gate fsrc1 %f12, %f44 1638*0Sstevel@tonic-gate ldd [SRC + 0x10], %f4 1639*0Sstevel@tonic-gate fsrc1 %f14, %f46 1640*0Sstevel@tonic-gate stda %f32, [DST]ASI_BLK_P 1641*0Sstevel@tonic-gate ldd [SRC + 0x18], %f6 1642*0Sstevel@tonic-gate fsrc1 %f0, %f32 1643*0Sstevel@tonic-gate ldd [SRC + 0x20], %f8 1644*0Sstevel@tonic-gate fsrc1 %f2, %f34 1645*0Sstevel@tonic-gate ldd [SRC + 0x28], %f10 1646*0Sstevel@tonic-gate fsrc1 %f4, %f36 1647*0Sstevel@tonic-gate ldd [SRC + 0x30], %f12 1648*0Sstevel@tonic-gate fsrc1 %f6, %f38 1649*0Sstevel@tonic-gate ldd [SRC + 0x38], %f14 1650*0Sstevel@tonic-gate fsrc1 %f8, %f40 1651*0Sstevel@tonic-gate ldd [SRC + VIS_BLOCKSIZE], %f0 1652*0Sstevel@tonic-gate fsrc1 %f10, %f42 1653*0Sstevel@tonic-gate prefetch [SRC + ((CHEETAH_PREFETCH) * VIS_BLOCKSIZE) + 8], #one_read 1654*0Sstevel@tonic-gate sub CNT, VIS_BLOCKSIZE, CNT 1655*0Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 1656*0Sstevel@tonic-gate cmp CNT, VIS_BLOCKSIZE + 8 1657*0Sstevel@tonic-gate prefetch [SRC + ((CHEETAH_2ND_PREFETCH) * VIS_BLOCKSIZE)], #one_read 1658*0Sstevel@tonic-gate bgu,pt %ncc, 2b 1659*0Sstevel@tonic-gate add SRC, VIS_BLOCKSIZE, SRC 1660*0Sstevel@tonic-gate 1661*0Sstevel@tonic-gate ! trailing block 1662*0Sstevel@tonic-gate ldd [SRC + 0x08], %f2 1663*0Sstevel@tonic-gate fsrc1 %f12, %f44 1664*0Sstevel@tonic-gate ldd [SRC + 0x10], %f4 1665*0Sstevel@tonic-gate fsrc1 %f14, %f46 1666*0Sstevel@tonic-gate stda %f32, [DST]ASI_BLK_P 1667*0Sstevel@tonic-gate ldd [SRC + 0x18], %f6 1668*0Sstevel@tonic-gate fsrc1 %f0, %f32 1669*0Sstevel@tonic-gate ldd [SRC + 0x20], %f8 1670*0Sstevel@tonic-gate fsrc1 %f2, %f34 1671*0Sstevel@tonic-gate ldd [SRC + 0x28], %f10 1672*0Sstevel@tonic-gate fsrc1 %f4, %f36 1673*0Sstevel@tonic-gate ldd [SRC + 0x30], %f12 1674*0Sstevel@tonic-gate fsrc1 %f6, %f38 1675*0Sstevel@tonic-gate ldd [SRC + 0x38], %f14 1676*0Sstevel@tonic-gate fsrc1 %f8, %f40 1677*0Sstevel@tonic-gate sub CNT, VIS_BLOCKSIZE, CNT 1678*0Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 1679*0Sstevel@tonic-gate add SRC, VIS_BLOCKSIZE, SRC 1680*0Sstevel@tonic-gate fsrc1 %f10, %f42 1681*0Sstevel@tonic-gate fsrc1 %f12, %f44 1682*0Sstevel@tonic-gate fsrc1 %f14, %f46 1683*0Sstevel@tonic-gate stda %f32, [DST]ASI_BLK_P 1684*0Sstevel@tonic-gate 1685*0Sstevel@tonic-gate membar #Sync 1686*0Sstevel@tonic-gate 1687*0Sstevel@tonic-gate FPRAS_INTERVAL(FPRAS_PGCOPY, 1, %l5, %o2, %o3, %o4, %o5, 8) 1688*0Sstevel@tonic-gate FPRAS_REWRITE_TYPE1(1, %l5, %f32, %o2, 9) 1689*0Sstevel@tonic-gate FPRAS_CHECK(FPRAS_PGCOPY, %l5, 9) ! lose outputs 1690*0Sstevel@tonic-gate 1691*0Sstevel@tonic-gate btst FPRS_FEF, %l0 1692*0Sstevel@tonic-gate bz,pt %icc, 2f 1693*0Sstevel@tonic-gate nop 1694*0Sstevel@tonic-gate 1695*0Sstevel@tonic-gate BLD_FPQ1Q3_FROMSTACK(%l3) 1696*0Sstevel@tonic-gate ba 3f 1697*0Sstevel@tonic-gate nop 1698*0Sstevel@tonic-gate 1699*0Sstevel@tonic-gate2: FZEROQ1Q3 1700*0Sstevel@tonic-gate 1701*0Sstevel@tonic-gate3: wr %l0, 0, %fprs ! restore fprs 1702*0Sstevel@tonic-gate ret 1703*0Sstevel@tonic-gate restore %g0, 0, %o0 1704*0Sstevel@tonic-gate 1705*0Sstevel@tonic-gate SET_SIZE(hwblkpagecopy) 1706*0Sstevel@tonic-gate#endif /* lint */ 1707*0Sstevel@tonic-gate 1708*0Sstevel@tonic-gate 1709*0Sstevel@tonic-gate/* 1710*0Sstevel@tonic-gate * Transfer data to and from user space - 1711*0Sstevel@tonic-gate * Note that these routines can cause faults 1712*0Sstevel@tonic-gate * It is assumed that the kernel has nothing at 1713*0Sstevel@tonic-gate * less than KERNELBASE in the virtual address space. 1714*0Sstevel@tonic-gate * 1715*0Sstevel@tonic-gate * Note that copyin(9F) and copyout(9F) are part of the 1716*0Sstevel@tonic-gate * DDI/DKI which specifies that they return '-1' on "errors." 1717*0Sstevel@tonic-gate * 1718*0Sstevel@tonic-gate * Sigh. 1719*0Sstevel@tonic-gate * 1720*0Sstevel@tonic-gate * So there's two extremely similar routines - xcopyin() and xcopyout() 1721*0Sstevel@tonic-gate * which return the errno that we've faithfully computed. This 1722*0Sstevel@tonic-gate * allows other callers (e.g. uiomove(9F)) to work correctly. 1723*0Sstevel@tonic-gate * Given that these are used pretty heavily, we expand the calling 1724*0Sstevel@tonic-gate * sequences inline for all flavours (rather than making wrappers). 1725*0Sstevel@tonic-gate * 1726*0Sstevel@tonic-gate * There are also stub routines for xcopyout_little and xcopyin_little, 1727*0Sstevel@tonic-gate * which currently are intended to handle requests of <= 16 bytes from 1728*0Sstevel@tonic-gate * do_unaligned. Future enhancement to make them handle 8k pages efficiently 1729*0Sstevel@tonic-gate * is left as an exercise... 1730*0Sstevel@tonic-gate */ 1731*0Sstevel@tonic-gate 1732*0Sstevel@tonic-gate/* 1733*0Sstevel@tonic-gate * Copy user data to kernel space (copyOP/xcopyOP/copyOP_noerr) 1734*0Sstevel@tonic-gate * 1735*0Sstevel@tonic-gate * General theory of operation: 1736*0Sstevel@tonic-gate * 1737*0Sstevel@tonic-gate * The only difference between copy{in,out} and 1738*0Sstevel@tonic-gate * xcopy{in,out} is in the error handling routine they invoke 1739*0Sstevel@tonic-gate * when a memory access error occurs. xcopyOP returns the errno 1740*0Sstevel@tonic-gate * while copyOP returns -1 (see above). copy{in,out}_noerr set 1741*0Sstevel@tonic-gate * a special flag (by oring the TRAMP_FLAG into the fault handler address) 1742*0Sstevel@tonic-gate * if they are called with a fault handler already in place. That flag 1743*0Sstevel@tonic-gate * causes the default handlers to trampoline to the previous handler 1744*0Sstevel@tonic-gate * upon an error. 1745*0Sstevel@tonic-gate * 1746*0Sstevel@tonic-gate * None of the copyops routines grab a window until it's decided that 1747*0Sstevel@tonic-gate * we need to do a HW block copy operation. This saves a window 1748*0Sstevel@tonic-gate * spill/fill when we're called during socket ops. The typical IO 1749*0Sstevel@tonic-gate * path won't cause spill/fill traps. 1750*0Sstevel@tonic-gate * 1751*0Sstevel@tonic-gate * This code uses a set of 4 limits for the maximum size that will 1752*0Sstevel@tonic-gate * be copied given a particular input/output address alignment. 1753*0Sstevel@tonic-gate * If the value for a particular limit is zero, the copy will be performed 1754*0Sstevel@tonic-gate * by the plain copy loops rather than FPBLK. 1755*0Sstevel@tonic-gate * 1756*0Sstevel@tonic-gate * See the description of bcopy above for more details of the 1757*0Sstevel@tonic-gate * data copying algorithm and the default limits. 1758*0Sstevel@tonic-gate * 1759*0Sstevel@tonic-gate */ 1760*0Sstevel@tonic-gate 1761*0Sstevel@tonic-gate/* 1762*0Sstevel@tonic-gate * Copy kernel data to user space (copyout/xcopyout/xcopyout_little). 1763*0Sstevel@tonic-gate */ 1764*0Sstevel@tonic-gate 1765*0Sstevel@tonic-gate#if defined(lint) 1766*0Sstevel@tonic-gate 1767*0Sstevel@tonic-gate 1768*0Sstevel@tonic-gate#else /* lint */ 1769*0Sstevel@tonic-gate/* 1770*0Sstevel@tonic-gate * We save the arguments in the following registers in case of a fault: 1771*0Sstevel@tonic-gate * kaddr - %l1 1772*0Sstevel@tonic-gate * uaddr - %l2 1773*0Sstevel@tonic-gate * count - %l3 1774*0Sstevel@tonic-gate */ 1775*0Sstevel@tonic-gate#define SAVE_SRC %l1 1776*0Sstevel@tonic-gate#define SAVE_DST %l2 1777*0Sstevel@tonic-gate#define SAVE_COUNT %l3 1778*0Sstevel@tonic-gate 1779*0Sstevel@tonic-gate#define SM_SAVE_SRC %g4 1780*0Sstevel@tonic-gate#define SM_SAVE_DST %g5 1781*0Sstevel@tonic-gate#define SM_SAVE_COUNT %o5 1782*0Sstevel@tonic-gate#define ERRNO %l5 1783*0Sstevel@tonic-gate 1784*0Sstevel@tonic-gate 1785*0Sstevel@tonic-gate#define REAL_LOFAULT %l4 1786*0Sstevel@tonic-gate/* 1787*0Sstevel@tonic-gate * Generic copyio fault handler. This is the first line of defense when a 1788*0Sstevel@tonic-gate * fault occurs in (x)copyin/(x)copyout. In order for this to function 1789*0Sstevel@tonic-gate * properly, the value of the 'real' lofault handler should be in REAL_LOFAULT. 1790*0Sstevel@tonic-gate * This allows us to share common code for all the flavors of the copy 1791*0Sstevel@tonic-gate * operations, including the _noerr versions. 1792*0Sstevel@tonic-gate * 1793*0Sstevel@tonic-gate * Note that this function will restore the original input parameters before 1794*0Sstevel@tonic-gate * calling REAL_LOFAULT. So the real handler can vector to the appropriate 1795*0Sstevel@tonic-gate * member of the t_copyop structure, if needed. 1796*0Sstevel@tonic-gate */ 1797*0Sstevel@tonic-gate ENTRY(copyio_fault) 1798*0Sstevel@tonic-gate membar #Sync 1799*0Sstevel@tonic-gate mov %g1,ERRNO ! save errno in ERRNO 1800*0Sstevel@tonic-gate btst FPUSED_FLAG, %l6 1801*0Sstevel@tonic-gate bz %ncc, 1f 1802*0Sstevel@tonic-gate nop 1803*0Sstevel@tonic-gate 1804*0Sstevel@tonic-gate ldx [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 1805*0Sstevel@tonic-gate wr %o2, 0, %gsr ! restore gsr 1806*0Sstevel@tonic-gate 1807*0Sstevel@tonic-gate ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3 1808*0Sstevel@tonic-gate btst FPRS_FEF, %o3 1809*0Sstevel@tonic-gate bz,pt %icc, 4f 1810*0Sstevel@tonic-gate nop 1811*0Sstevel@tonic-gate 1812*0Sstevel@tonic-gate BLD_FPQ2Q4_FROMSTACK(%o2) 1813*0Sstevel@tonic-gate 1814*0Sstevel@tonic-gate ba,pt %ncc, 1f 1815*0Sstevel@tonic-gate wr %o3, 0, %fprs ! restore fprs 1816*0Sstevel@tonic-gate 1817*0Sstevel@tonic-gate4: 1818*0Sstevel@tonic-gate FZEROQ2Q4 1819*0Sstevel@tonic-gate wr %o3, 0, %fprs ! restore fprs 1820*0Sstevel@tonic-gate 1821*0Sstevel@tonic-gate1: 1822*0Sstevel@tonic-gate andn %l6, FPUSED_FLAG, %l6 1823*0Sstevel@tonic-gate membar #Sync 1824*0Sstevel@tonic-gate stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1825*0Sstevel@tonic-gate FP_ALLOWMIGRATE(5, 6) 1826*0Sstevel@tonic-gate 1827*0Sstevel@tonic-gate mov SAVE_SRC, %i0 1828*0Sstevel@tonic-gate mov SAVE_DST, %i1 1829*0Sstevel@tonic-gate jmp REAL_LOFAULT 1830*0Sstevel@tonic-gate mov SAVE_COUNT, %i2 1831*0Sstevel@tonic-gate 1832*0Sstevel@tonic-gate SET_SIZE(copyio_fault) 1833*0Sstevel@tonic-gate 1834*0Sstevel@tonic-gate 1835*0Sstevel@tonic-gate#endif 1836*0Sstevel@tonic-gate 1837*0Sstevel@tonic-gate#if defined(lint) 1838*0Sstevel@tonic-gate 1839*0Sstevel@tonic-gate/*ARGSUSED*/ 1840*0Sstevel@tonic-gateint 1841*0Sstevel@tonic-gatecopyout(const void *kaddr, void *uaddr, size_t count) 1842*0Sstevel@tonic-gate{ return (0); } 1843*0Sstevel@tonic-gate 1844*0Sstevel@tonic-gate#else /* lint */ 1845*0Sstevel@tonic-gate 1846*0Sstevel@tonic-gate ENTRY(copyout) 1847*0Sstevel@tonic-gate 1848*0Sstevel@tonic-gate cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case 1849*0Sstevel@tonic-gate bleu,pt %ncc, .copyout_small ! go to larger cases 1850*0Sstevel@tonic-gate xor %o0, %o1, %o3 ! are src, dst alignable? 1851*0Sstevel@tonic-gate btst 7, %o3 ! 1852*0Sstevel@tonic-gate bz,pt %ncc, .copyout_8 ! check for longword alignment 1853*0Sstevel@tonic-gate nop 1854*0Sstevel@tonic-gate btst 1, %o3 ! 1855*0Sstevel@tonic-gate bz,pt %ncc, .copyout_2 ! check for half-word 1856*0Sstevel@tonic-gate nop 1857*0Sstevel@tonic-gate sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit 1858*0Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_1)], %o3 1859*0Sstevel@tonic-gate tst %o3 1860*0Sstevel@tonic-gate bz,pn %icc, .copyout_small ! if zero, disable HW copy 1861*0Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 1862*0Sstevel@tonic-gate bleu,pt %ncc, .copyout_small ! go to small copy 1863*0Sstevel@tonic-gate nop 1864*0Sstevel@tonic-gate ba,pt %ncc, .copyout_more ! otherwise go to large copy 1865*0Sstevel@tonic-gate nop 1866*0Sstevel@tonic-gate.copyout_2: 1867*0Sstevel@tonic-gate btst 3, %o3 ! 1868*0Sstevel@tonic-gate bz,pt %ncc, .copyout_4 ! check for word alignment 1869*0Sstevel@tonic-gate nop 1870*0Sstevel@tonic-gate sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit 1871*0Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_2)], %o3 1872*0Sstevel@tonic-gate tst %o3 1873*0Sstevel@tonic-gate bz,pn %icc, .copyout_small ! if zero, disable HW copy 1874*0Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 1875*0Sstevel@tonic-gate bleu,pt %ncc, .copyout_small ! go to small copy 1876*0Sstevel@tonic-gate nop 1877*0Sstevel@tonic-gate ba,pt %ncc, .copyout_more ! otherwise go to large copy 1878*0Sstevel@tonic-gate nop 1879*0Sstevel@tonic-gate.copyout_4: 1880*0Sstevel@tonic-gate ! already checked longword, must be word aligned 1881*0Sstevel@tonic-gate sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit 1882*0Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_4)], %o3 1883*0Sstevel@tonic-gate tst %o3 1884*0Sstevel@tonic-gate bz,pn %icc, .copyout_small ! if zero, disable HW copy 1885*0Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 1886*0Sstevel@tonic-gate bleu,pt %ncc, .copyout_small ! go to small copy 1887*0Sstevel@tonic-gate nop 1888*0Sstevel@tonic-gate ba,pt %ncc, .copyout_more ! otherwise go to large copy 1889*0Sstevel@tonic-gate nop 1890*0Sstevel@tonic-gate.copyout_8: 1891*0Sstevel@tonic-gate sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit 1892*0Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_8)], %o3 1893*0Sstevel@tonic-gate tst %o3 1894*0Sstevel@tonic-gate bz,pn %icc, .copyout_small ! if zero, disable HW copy 1895*0Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 1896*0Sstevel@tonic-gate bleu,pt %ncc, .copyout_small ! go to small copy 1897*0Sstevel@tonic-gate nop 1898*0Sstevel@tonic-gate ba,pt %ncc, .copyout_more ! otherwise go to large copy 1899*0Sstevel@tonic-gate nop 1900*0Sstevel@tonic-gate 1901*0Sstevel@tonic-gate .align 16 1902*0Sstevel@tonic-gate nop ! instruction alignment 1903*0Sstevel@tonic-gate ! see discussion at start of file 1904*0Sstevel@tonic-gate.copyout_small: 1905*0Sstevel@tonic-gate sethi %hi(.sm_copyout_err), %o5 ! .sm_copyout_err is lofault 1906*0Sstevel@tonic-gate or %o5, %lo(.sm_copyout_err), %o5 1907*0Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %o4 ! save existing handler 1908*0Sstevel@tonic-gate membar #Sync ! sync error barrier 1909*0Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] ! set t_lofault 1910*0Sstevel@tonic-gate.sm_do_copyout: 1911*0Sstevel@tonic-gate mov %o0, SM_SAVE_SRC 1912*0Sstevel@tonic-gate mov %o1, SM_SAVE_DST 1913*0Sstevel@tonic-gate cmp %o2, SHORTCOPY ! check for really short case 1914*0Sstevel@tonic-gate bleu,pt %ncc, .co_sm_left ! 1915*0Sstevel@tonic-gate mov %o2, SM_SAVE_COUNT 1916*0Sstevel@tonic-gate cmp %o2, CHKSIZE ! check for medium length cases 1917*0Sstevel@tonic-gate bgu,pn %ncc, .co_med ! 1918*0Sstevel@tonic-gate or %o0, %o1, %o3 ! prepare alignment check 1919*0Sstevel@tonic-gate andcc %o3, 0x3, %g0 ! test for alignment 1920*0Sstevel@tonic-gate bz,pt %ncc, .co_sm_word ! branch to word aligned case 1921*0Sstevel@tonic-gate.co_sm_movebytes: 1922*0Sstevel@tonic-gate sub %o2, 3, %o2 ! adjust count to allow cc zero test 1923*0Sstevel@tonic-gate.co_sm_notalign4: 1924*0Sstevel@tonic-gate ldub [%o0], %o3 ! read byte 1925*0Sstevel@tonic-gate subcc %o2, 4, %o2 ! reduce count by 4 1926*0Sstevel@tonic-gate stba %o3, [%o1]ASI_USER ! write byte 1927*0Sstevel@tonic-gate inc %o1 ! advance DST by 1 1928*0Sstevel@tonic-gate ldub [%o0 + 1], %o3 ! repeat for a total of 4 bytes 1929*0Sstevel@tonic-gate add %o0, 4, %o0 ! advance SRC by 4 1930*0Sstevel@tonic-gate stba %o3, [%o1]ASI_USER 1931*0Sstevel@tonic-gate inc %o1 ! advance DST by 1 1932*0Sstevel@tonic-gate ldub [%o0 - 2], %o3 1933*0Sstevel@tonic-gate stba %o3, [%o1]ASI_USER 1934*0Sstevel@tonic-gate inc %o1 ! advance DST by 1 1935*0Sstevel@tonic-gate ldub [%o0 - 1], %o3 1936*0Sstevel@tonic-gate stba %o3, [%o1]ASI_USER 1937*0Sstevel@tonic-gate bgt,pt %ncc, .co_sm_notalign4 ! loop til 3 or fewer bytes remain 1938*0Sstevel@tonic-gate inc %o1 ! advance DST by 1 1939*0Sstevel@tonic-gate add %o2, 3, %o2 ! restore count 1940*0Sstevel@tonic-gate.co_sm_left: 1941*0Sstevel@tonic-gate tst %o2 1942*0Sstevel@tonic-gate bz,pt %ncc, .co_sm_exit ! check for zero length 1943*0Sstevel@tonic-gate nop 1944*0Sstevel@tonic-gate ldub [%o0], %o3 ! load one byte 1945*0Sstevel@tonic-gate deccc %o2 ! reduce count for cc test 1946*0Sstevel@tonic-gate bz,pt %ncc, .co_sm_exit 1947*0Sstevel@tonic-gate stba %o3,[%o1]ASI_USER ! store one byte 1948*0Sstevel@tonic-gate ldub [%o0 + 1], %o3 ! load second byte 1949*0Sstevel@tonic-gate deccc %o2 1950*0Sstevel@tonic-gate inc %o1 1951*0Sstevel@tonic-gate bz,pt %ncc, .co_sm_exit 1952*0Sstevel@tonic-gate stba %o3,[%o1]ASI_USER ! store second byte 1953*0Sstevel@tonic-gate ldub [%o0 + 2], %o3 ! load third byte 1954*0Sstevel@tonic-gate inc %o1 1955*0Sstevel@tonic-gate stba %o3,[%o1]ASI_USER ! store third byte 1956*0Sstevel@tonic-gate membar #Sync ! sync error barrier 1957*0Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1958*0Sstevel@tonic-gate retl 1959*0Sstevel@tonic-gate mov %g0, %o0 ! return 0 1960*0Sstevel@tonic-gate .align 16 1961*0Sstevel@tonic-gate.co_sm_words: 1962*0Sstevel@tonic-gate lduw [%o0], %o3 ! read word 1963*0Sstevel@tonic-gate.co_sm_wordx: 1964*0Sstevel@tonic-gate subcc %o2, 8, %o2 ! update count 1965*0Sstevel@tonic-gate stwa %o3, [%o1]ASI_USER ! write word 1966*0Sstevel@tonic-gate add %o0, 8, %o0 ! update SRC 1967*0Sstevel@tonic-gate lduw [%o0 - 4], %o3 ! read word 1968*0Sstevel@tonic-gate add %o1, 4, %o1 ! update DST 1969*0Sstevel@tonic-gate stwa %o3, [%o1]ASI_USER ! write word 1970*0Sstevel@tonic-gate bgt,pt %ncc, .co_sm_words ! loop til done 1971*0Sstevel@tonic-gate add %o1, 4, %o1 ! update DST 1972*0Sstevel@tonic-gate addcc %o2, 7, %o2 ! restore count 1973*0Sstevel@tonic-gate bz,pt %ncc, .co_sm_exit 1974*0Sstevel@tonic-gate nop 1975*0Sstevel@tonic-gate deccc %o2 1976*0Sstevel@tonic-gate bz,pt %ncc, .co_sm_byte 1977*0Sstevel@tonic-gate.co_sm_half: 1978*0Sstevel@tonic-gate subcc %o2, 2, %o2 ! reduce count by 2 1979*0Sstevel@tonic-gate lduh [%o0], %o3 ! read half word 1980*0Sstevel@tonic-gate add %o0, 2, %o0 ! advance SRC by 2 1981*0Sstevel@tonic-gate stha %o3, [%o1]ASI_USER ! write half word 1982*0Sstevel@tonic-gate bgt,pt %ncc, .co_sm_half ! loop til done 1983*0Sstevel@tonic-gate add %o1, 2, %o1 ! advance DST by 2 1984*0Sstevel@tonic-gate addcc %o2, 1, %o2 ! restore count 1985*0Sstevel@tonic-gate bz,pt %ncc, .co_sm_exit 1986*0Sstevel@tonic-gate nop 1987*0Sstevel@tonic-gate.co_sm_byte: 1988*0Sstevel@tonic-gate ldub [%o0], %o3 1989*0Sstevel@tonic-gate stba %o3, [%o1]ASI_USER 1990*0Sstevel@tonic-gate membar #Sync ! sync error barrier 1991*0Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 1992*0Sstevel@tonic-gate retl 1993*0Sstevel@tonic-gate mov %g0, %o0 ! return 0 1994*0Sstevel@tonic-gate .align 16 1995*0Sstevel@tonic-gate.co_sm_word: 1996*0Sstevel@tonic-gate subcc %o2, 4, %o2 ! update count 1997*0Sstevel@tonic-gate bgt,pt %ncc, .co_sm_wordx 1998*0Sstevel@tonic-gate lduw [%o0], %o3 ! read word 1999*0Sstevel@tonic-gate addcc %o2, 3, %o2 ! restore count 2000*0Sstevel@tonic-gate bz,pt %ncc, .co_sm_exit 2001*0Sstevel@tonic-gate stwa %o3, [%o1]ASI_USER ! write word 2002*0Sstevel@tonic-gate deccc %o2 ! reduce count for cc test 2003*0Sstevel@tonic-gate ldub [%o0 + 4], %o3 ! load one byte 2004*0Sstevel@tonic-gate add %o1, 4, %o1 2005*0Sstevel@tonic-gate bz,pt %ncc, .co_sm_exit 2006*0Sstevel@tonic-gate stba %o3, [%o1]ASI_USER ! store one byte 2007*0Sstevel@tonic-gate ldub [%o0 + 5], %o3 ! load second byte 2008*0Sstevel@tonic-gate deccc %o2 2009*0Sstevel@tonic-gate inc %o1 2010*0Sstevel@tonic-gate bz,pt %ncc, .co_sm_exit 2011*0Sstevel@tonic-gate stba %o3, [%o1]ASI_USER ! store second byte 2012*0Sstevel@tonic-gate ldub [%o0 + 6], %o3 ! load third byte 2013*0Sstevel@tonic-gate inc %o1 2014*0Sstevel@tonic-gate stba %o3, [%o1]ASI_USER ! store third byte 2015*0Sstevel@tonic-gate.co_sm_exit: 2016*0Sstevel@tonic-gate membar #Sync ! sync error barrier 2017*0Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 2018*0Sstevel@tonic-gate retl 2019*0Sstevel@tonic-gate mov %g0, %o0 ! return 0 2020*0Sstevel@tonic-gate 2021*0Sstevel@tonic-gate .align 16 2022*0Sstevel@tonic-gate.co_med: 2023*0Sstevel@tonic-gate xor %o0, %o1, %o3 ! setup alignment check 2024*0Sstevel@tonic-gate btst 1, %o3 2025*0Sstevel@tonic-gate bnz,pt %ncc, .co_sm_movebytes ! unaligned 2026*0Sstevel@tonic-gate nop 2027*0Sstevel@tonic-gate btst 3, %o3 2028*0Sstevel@tonic-gate bnz,pt %ncc, .co_med_half ! halfword aligned 2029*0Sstevel@tonic-gate nop 2030*0Sstevel@tonic-gate btst 7, %o3 2031*0Sstevel@tonic-gate bnz,pt %ncc, .co_med_word ! word aligned 2032*0Sstevel@tonic-gate nop 2033*0Sstevel@tonic-gate.co_med_long: 2034*0Sstevel@tonic-gate btst 3, %o0 ! check for 2035*0Sstevel@tonic-gate bz,pt %ncc, .co_med_long1 ! word alignment 2036*0Sstevel@tonic-gate nop 2037*0Sstevel@tonic-gate.co_med_long0: 2038*0Sstevel@tonic-gate ldub [%o0], %o3 ! load one byte 2039*0Sstevel@tonic-gate inc %o0 2040*0Sstevel@tonic-gate stba %o3,[%o1]ASI_USER ! store byte 2041*0Sstevel@tonic-gate inc %o1 2042*0Sstevel@tonic-gate btst 3, %o0 2043*0Sstevel@tonic-gate bnz,pt %ncc, .co_med_long0 2044*0Sstevel@tonic-gate dec %o2 2045*0Sstevel@tonic-gate.co_med_long1: ! word aligned 2046*0Sstevel@tonic-gate btst 7, %o0 ! check for long word 2047*0Sstevel@tonic-gate bz,pt %ncc, .co_med_long2 2048*0Sstevel@tonic-gate nop 2049*0Sstevel@tonic-gate lduw [%o0], %o3 ! load word 2050*0Sstevel@tonic-gate add %o0, 4, %o0 ! advance SRC by 4 2051*0Sstevel@tonic-gate stwa %o3, [%o1]ASI_USER ! store word 2052*0Sstevel@tonic-gate add %o1, 4, %o1 ! advance DST by 4 2053*0Sstevel@tonic-gate sub %o2, 4, %o2 ! reduce count by 4 2054*0Sstevel@tonic-gate! 2055*0Sstevel@tonic-gate! Now long word aligned and have at least 32 bytes to move 2056*0Sstevel@tonic-gate! 2057*0Sstevel@tonic-gate.co_med_long2: 2058*0Sstevel@tonic-gate sub %o2, 31, %o2 ! adjust count to allow cc zero test 2059*0Sstevel@tonic-gate sub %o1, 8, %o1 ! adjust pointer to allow store in 2060*0Sstevel@tonic-gate ! branch delay slot instead of add 2061*0Sstevel@tonic-gate.co_med_lmove: 2062*0Sstevel@tonic-gate add %o1, 8, %o1 ! advance DST by 8 2063*0Sstevel@tonic-gate ldx [%o0], %o3 ! read long word 2064*0Sstevel@tonic-gate subcc %o2, 32, %o2 ! reduce count by 32 2065*0Sstevel@tonic-gate stxa %o3, [%o1]ASI_USER ! write long word 2066*0Sstevel@tonic-gate add %o1, 8, %o1 ! advance DST by 8 2067*0Sstevel@tonic-gate ldx [%o0 + 8], %o3 ! repeat for a total for 4 long words 2068*0Sstevel@tonic-gate add %o0, 32, %o0 ! advance SRC by 32 2069*0Sstevel@tonic-gate stxa %o3, [%o1]ASI_USER 2070*0Sstevel@tonic-gate ldx [%o0 - 16], %o3 2071*0Sstevel@tonic-gate add %o1, 8, %o1 ! advance DST by 8 2072*0Sstevel@tonic-gate stxa %o3, [%o1]ASI_USER 2073*0Sstevel@tonic-gate ldx [%o0 - 8], %o3 2074*0Sstevel@tonic-gate add %o1, 8, %o1 ! advance DST by 8 2075*0Sstevel@tonic-gate bgt,pt %ncc, .co_med_lmove ! loop til 31 or fewer bytes left 2076*0Sstevel@tonic-gate stxa %o3, [%o1]ASI_USER 2077*0Sstevel@tonic-gate add %o1, 8, %o1 ! advance DST by 8 2078*0Sstevel@tonic-gate addcc %o2, 24, %o2 ! restore count to long word offset 2079*0Sstevel@tonic-gate ble,pt %ncc, .co_med_lextra ! check for more long words to move 2080*0Sstevel@tonic-gate nop 2081*0Sstevel@tonic-gate.co_med_lword: 2082*0Sstevel@tonic-gate ldx [%o0], %o3 ! read long word 2083*0Sstevel@tonic-gate subcc %o2, 8, %o2 ! reduce count by 8 2084*0Sstevel@tonic-gate stxa %o3, [%o1]ASI_USER ! write long word 2085*0Sstevel@tonic-gate add %o0, 8, %o0 ! advance SRC by 8 2086*0Sstevel@tonic-gate bgt,pt %ncc, .co_med_lword ! loop til 7 or fewer bytes left 2087*0Sstevel@tonic-gate add %o1, 8, %o1 ! advance DST by 8 2088*0Sstevel@tonic-gate.co_med_lextra: 2089*0Sstevel@tonic-gate addcc %o2, 7, %o2 ! restore rest of count 2090*0Sstevel@tonic-gate bz,pt %ncc, .co_sm_exit ! if zero, then done 2091*0Sstevel@tonic-gate deccc %o2 2092*0Sstevel@tonic-gate bz,pt %ncc, .co_sm_byte 2093*0Sstevel@tonic-gate nop 2094*0Sstevel@tonic-gate ba,pt %ncc, .co_sm_half 2095*0Sstevel@tonic-gate nop 2096*0Sstevel@tonic-gate 2097*0Sstevel@tonic-gate .align 16 2098*0Sstevel@tonic-gate nop ! instruction alignment 2099*0Sstevel@tonic-gate ! see discussion at start of file 2100*0Sstevel@tonic-gate.co_med_word: 2101*0Sstevel@tonic-gate btst 3, %o0 ! check for 2102*0Sstevel@tonic-gate bz,pt %ncc, .co_med_word1 ! word alignment 2103*0Sstevel@tonic-gate nop 2104*0Sstevel@tonic-gate.co_med_word0: 2105*0Sstevel@tonic-gate ldub [%o0], %o3 ! load one byte 2106*0Sstevel@tonic-gate inc %o0 2107*0Sstevel@tonic-gate stba %o3,[%o1]ASI_USER ! store byte 2108*0Sstevel@tonic-gate inc %o1 2109*0Sstevel@tonic-gate btst 3, %o0 2110*0Sstevel@tonic-gate bnz,pt %ncc, .co_med_word0 2111*0Sstevel@tonic-gate dec %o2 2112*0Sstevel@tonic-gate! 2113*0Sstevel@tonic-gate! Now word aligned and have at least 36 bytes to move 2114*0Sstevel@tonic-gate! 2115*0Sstevel@tonic-gate.co_med_word1: 2116*0Sstevel@tonic-gate sub %o2, 15, %o2 ! adjust count to allow cc zero test 2117*0Sstevel@tonic-gate.co_med_wmove: 2118*0Sstevel@tonic-gate lduw [%o0], %o3 ! read word 2119*0Sstevel@tonic-gate subcc %o2, 16, %o2 ! reduce count by 16 2120*0Sstevel@tonic-gate stwa %o3, [%o1]ASI_USER ! write word 2121*0Sstevel@tonic-gate add %o1, 4, %o1 ! advance DST by 4 2122*0Sstevel@tonic-gate lduw [%o0 + 4], %o3 ! repeat for a total for 4 words 2123*0Sstevel@tonic-gate add %o0, 16, %o0 ! advance SRC by 16 2124*0Sstevel@tonic-gate stwa %o3, [%o1]ASI_USER 2125*0Sstevel@tonic-gate add %o1, 4, %o1 ! advance DST by 4 2126*0Sstevel@tonic-gate lduw [%o0 - 8], %o3 2127*0Sstevel@tonic-gate stwa %o3, [%o1]ASI_USER 2128*0Sstevel@tonic-gate add %o1, 4, %o1 ! advance DST by 4 2129*0Sstevel@tonic-gate lduw [%o0 - 4], %o3 2130*0Sstevel@tonic-gate stwa %o3, [%o1]ASI_USER 2131*0Sstevel@tonic-gate bgt,pt %ncc, .co_med_wmove ! loop til 15 or fewer bytes left 2132*0Sstevel@tonic-gate add %o1, 4, %o1 ! advance DST by 4 2133*0Sstevel@tonic-gate addcc %o2, 12, %o2 ! restore count to word offset 2134*0Sstevel@tonic-gate ble,pt %ncc, .co_med_wextra ! check for more words to move 2135*0Sstevel@tonic-gate nop 2136*0Sstevel@tonic-gate.co_med_word2: 2137*0Sstevel@tonic-gate lduw [%o0], %o3 ! read word 2138*0Sstevel@tonic-gate subcc %o2, 4, %o2 ! reduce count by 4 2139*0Sstevel@tonic-gate stwa %o3, [%o1]ASI_USER ! write word 2140*0Sstevel@tonic-gate add %o0, 4, %o0 ! advance SRC by 4 2141*0Sstevel@tonic-gate bgt,pt %ncc, .co_med_word2 ! loop til 3 or fewer bytes left 2142*0Sstevel@tonic-gate add %o1, 4, %o1 ! advance DST by 4 2143*0Sstevel@tonic-gate.co_med_wextra: 2144*0Sstevel@tonic-gate addcc %o2, 3, %o2 ! restore rest of count 2145*0Sstevel@tonic-gate bz,pt %ncc, .co_sm_exit ! if zero, then done 2146*0Sstevel@tonic-gate deccc %o2 2147*0Sstevel@tonic-gate bz,pt %ncc, .co_sm_byte 2148*0Sstevel@tonic-gate nop 2149*0Sstevel@tonic-gate ba,pt %ncc, .co_sm_half 2150*0Sstevel@tonic-gate nop 2151*0Sstevel@tonic-gate 2152*0Sstevel@tonic-gate .align 16 2153*0Sstevel@tonic-gate nop ! instruction alignment 2154*0Sstevel@tonic-gate nop ! see discussion at start of file 2155*0Sstevel@tonic-gate nop 2156*0Sstevel@tonic-gate.co_med_half: 2157*0Sstevel@tonic-gate btst 1, %o0 ! check for 2158*0Sstevel@tonic-gate bz,pt %ncc, .co_med_half1 ! half word alignment 2159*0Sstevel@tonic-gate nop 2160*0Sstevel@tonic-gate ldub [%o0], %o3 ! load one byte 2161*0Sstevel@tonic-gate inc %o0 2162*0Sstevel@tonic-gate stba %o3,[%o1]ASI_USER ! store byte 2163*0Sstevel@tonic-gate inc %o1 2164*0Sstevel@tonic-gate dec %o2 2165*0Sstevel@tonic-gate! 2166*0Sstevel@tonic-gate! Now half word aligned and have at least 38 bytes to move 2167*0Sstevel@tonic-gate! 2168*0Sstevel@tonic-gate.co_med_half1: 2169*0Sstevel@tonic-gate sub %o2, 7, %o2 ! adjust count to allow cc zero test 2170*0Sstevel@tonic-gate.co_med_hmove: 2171*0Sstevel@tonic-gate lduh [%o0], %o3 ! read half word 2172*0Sstevel@tonic-gate subcc %o2, 8, %o2 ! reduce count by 8 2173*0Sstevel@tonic-gate stha %o3, [%o1]ASI_USER ! write half word 2174*0Sstevel@tonic-gate add %o1, 2, %o1 ! advance DST by 2 2175*0Sstevel@tonic-gate lduh [%o0 + 2], %o3 ! repeat for a total for 4 halfwords 2176*0Sstevel@tonic-gate add %o0, 8, %o0 ! advance SRC by 8 2177*0Sstevel@tonic-gate stha %o3, [%o1]ASI_USER 2178*0Sstevel@tonic-gate add %o1, 2, %o1 ! advance DST by 2 2179*0Sstevel@tonic-gate lduh [%o0 - 4], %o3 2180*0Sstevel@tonic-gate stha %o3, [%o1]ASI_USER 2181*0Sstevel@tonic-gate add %o1, 2, %o1 ! advance DST by 2 2182*0Sstevel@tonic-gate lduh [%o0 - 2], %o3 2183*0Sstevel@tonic-gate stha %o3, [%o1]ASI_USER 2184*0Sstevel@tonic-gate bgt,pt %ncc, .co_med_hmove ! loop til 7 or fewer bytes left 2185*0Sstevel@tonic-gate add %o1, 2, %o1 ! advance DST by 2 2186*0Sstevel@tonic-gate addcc %o2, 7, %o2 ! restore count 2187*0Sstevel@tonic-gate bz,pt %ncc, .co_sm_exit 2188*0Sstevel@tonic-gate deccc %o2 2189*0Sstevel@tonic-gate bz,pt %ncc, .co_sm_byte 2190*0Sstevel@tonic-gate nop 2191*0Sstevel@tonic-gate ba,pt %ncc, .co_sm_half 2192*0Sstevel@tonic-gate nop 2193*0Sstevel@tonic-gate 2194*0Sstevel@tonic-gate/* 2195*0Sstevel@tonic-gate * We got here because of a fault during short copyout. 2196*0Sstevel@tonic-gate * Errno value is in ERRNO, but DDI/DKI says return -1 (sigh). 2197*0Sstevel@tonic-gate */ 2198*0Sstevel@tonic-gate.sm_copyout_err: 2199*0Sstevel@tonic-gate membar #Sync 2200*0Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 2201*0Sstevel@tonic-gate mov SM_SAVE_SRC, %o0 2202*0Sstevel@tonic-gate mov SM_SAVE_DST, %o1 2203*0Sstevel@tonic-gate mov SM_SAVE_COUNT, %o2 2204*0Sstevel@tonic-gate ldn [THREAD_REG + T_COPYOPS], %o3 ! check for copyop handler 2205*0Sstevel@tonic-gate tst %o3 2206*0Sstevel@tonic-gate bz,pt %ncc, 3f ! if not, return error 2207*0Sstevel@tonic-gate nop 2208*0Sstevel@tonic-gate ldn [%o3 + CP_COPYOUT], %o5 ! if handler, invoke it with 2209*0Sstevel@tonic-gate jmp %o5 ! original arguments 2210*0Sstevel@tonic-gate nop 2211*0Sstevel@tonic-gate3: 2212*0Sstevel@tonic-gate retl 2213*0Sstevel@tonic-gate or %g0, -1, %o0 ! return error value 2214*0Sstevel@tonic-gate 2215*0Sstevel@tonic-gate SET_SIZE(copyout) 2216*0Sstevel@tonic-gate 2217*0Sstevel@tonic-gate/* 2218*0Sstevel@tonic-gate * The _more entry points are not intended to be used directly by 2219*0Sstevel@tonic-gate * any caller from outside this file. They are provided to allow 2220*0Sstevel@tonic-gate * profiling and dtrace of the portions of the copy code that uses 2221*0Sstevel@tonic-gate * the floating point registers. 2222*0Sstevel@tonic-gate * This entry is particularly important as DTRACE (at least as of 2223*0Sstevel@tonic-gate * 4/2004) does not support leaf functions. 2224*0Sstevel@tonic-gate */ 2225*0Sstevel@tonic-gate 2226*0Sstevel@tonic-gate ENTRY(copyout_more) 2227*0Sstevel@tonic-gate.copyout_more: 2228*0Sstevel@tonic-gate save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 2229*0Sstevel@tonic-gate set .copyout_err, REAL_LOFAULT 2230*0Sstevel@tonic-gate 2231*0Sstevel@tonic-gate/* 2232*0Sstevel@tonic-gate * Copy outs that reach here are larger than VIS_COPY_THRESHOLD bytes 2233*0Sstevel@tonic-gate */ 2234*0Sstevel@tonic-gate.do_copyout: 2235*0Sstevel@tonic-gate set copyio_fault, %l7 ! .copyio_fault is lofault val 2236*0Sstevel@tonic-gate 2237*0Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %l6 ! save existing handler 2238*0Sstevel@tonic-gate membar #Sync ! sync error barrier 2239*0Sstevel@tonic-gate stn %l7, [THREAD_REG + T_LOFAULT] ! set t_lofault 2240*0Sstevel@tonic-gate 2241*0Sstevel@tonic-gate mov %i0, SAVE_SRC 2242*0Sstevel@tonic-gate mov %i1, SAVE_DST 2243*0Sstevel@tonic-gate mov %i2, SAVE_COUNT 2244*0Sstevel@tonic-gate 2245*0Sstevel@tonic-gate FP_NOMIGRATE(6, 7) 2246*0Sstevel@tonic-gate 2247*0Sstevel@tonic-gate rd %fprs, %o2 ! check for unused fp 2248*0Sstevel@tonic-gate st %o2, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] ! save orig %fprs 2249*0Sstevel@tonic-gate btst FPRS_FEF, %o2 2250*0Sstevel@tonic-gate bz,a,pt %icc, .do_blockcopyout 2251*0Sstevel@tonic-gate wr %g0, FPRS_FEF, %fprs 2252*0Sstevel@tonic-gate 2253*0Sstevel@tonic-gate BST_FPQ2Q4_TOSTACK(%o2) 2254*0Sstevel@tonic-gate 2255*0Sstevel@tonic-gate.do_blockcopyout: 2256*0Sstevel@tonic-gate rd %gsr, %o2 2257*0Sstevel@tonic-gate stx %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr 2258*0Sstevel@tonic-gate or %l6, FPUSED_FLAG, %l6 2259*0Sstevel@tonic-gate 2260*0Sstevel@tonic-gate andcc DST, VIS_BLOCKSIZE - 1, TMP 2261*0Sstevel@tonic-gate mov ASI_USER, %asi 2262*0Sstevel@tonic-gate bz,pt %ncc, 2f 2263*0Sstevel@tonic-gate neg TMP 2264*0Sstevel@tonic-gate add TMP, VIS_BLOCKSIZE, TMP 2265*0Sstevel@tonic-gate 2266*0Sstevel@tonic-gate ! TMP = bytes required to align DST on FP_BLOCK boundary 2267*0Sstevel@tonic-gate ! Using SRC as a tmp here 2268*0Sstevel@tonic-gate cmp TMP, 3 2269*0Sstevel@tonic-gate bleu,pt %ncc, 1f 2270*0Sstevel@tonic-gate sub CNT,TMP,CNT ! adjust main count 2271*0Sstevel@tonic-gate sub TMP, 3, TMP ! adjust for end of loop test 2272*0Sstevel@tonic-gate.co_blkalign: 2273*0Sstevel@tonic-gate ldub [REALSRC], SRC ! move 4 bytes per loop iteration 2274*0Sstevel@tonic-gate stba SRC, [DST]%asi 2275*0Sstevel@tonic-gate subcc TMP, 4, TMP 2276*0Sstevel@tonic-gate ldub [REALSRC + 1], SRC 2277*0Sstevel@tonic-gate add REALSRC, 4, REALSRC 2278*0Sstevel@tonic-gate stba SRC, [DST + 1]%asi 2279*0Sstevel@tonic-gate ldub [REALSRC - 2], SRC 2280*0Sstevel@tonic-gate add DST, 4, DST 2281*0Sstevel@tonic-gate stba SRC, [DST - 2]%asi 2282*0Sstevel@tonic-gate ldub [REALSRC - 1], SRC 2283*0Sstevel@tonic-gate bgu,pt %ncc, .co_blkalign 2284*0Sstevel@tonic-gate stba SRC, [DST - 1]%asi 2285*0Sstevel@tonic-gate 2286*0Sstevel@tonic-gate addcc TMP, 3, TMP ! restore count adjustment 2287*0Sstevel@tonic-gate bz,pt %ncc, 2f ! no bytes left? 2288*0Sstevel@tonic-gate nop 2289*0Sstevel@tonic-gate1: ldub [REALSRC], SRC 2290*0Sstevel@tonic-gate inc REALSRC 2291*0Sstevel@tonic-gate inc DST 2292*0Sstevel@tonic-gate deccc TMP 2293*0Sstevel@tonic-gate bgu %ncc, 1b 2294*0Sstevel@tonic-gate stba SRC, [DST - 1]%asi 2295*0Sstevel@tonic-gate 2296*0Sstevel@tonic-gate2: 2297*0Sstevel@tonic-gate andn REALSRC, 0x7, SRC 2298*0Sstevel@tonic-gate alignaddr REALSRC, %g0, %g0 2299*0Sstevel@tonic-gate 2300*0Sstevel@tonic-gate ! SRC - 8-byte aligned 2301*0Sstevel@tonic-gate ! DST - 64-byte aligned 2302*0Sstevel@tonic-gate prefetch [SRC], #one_read 2303*0Sstevel@tonic-gate prefetch [SRC + (1 * VIS_BLOCKSIZE)], #one_read 2304*0Sstevel@tonic-gate prefetch [SRC + (2 * VIS_BLOCKSIZE)], #one_read 2305*0Sstevel@tonic-gate prefetch [SRC + (3 * VIS_BLOCKSIZE)], #one_read 2306*0Sstevel@tonic-gate ldd [SRC], %f16 2307*0Sstevel@tonic-gate#if CHEETAH_PREFETCH > 4 2308*0Sstevel@tonic-gate prefetch [SRC + (4 * VIS_BLOCKSIZE)], #one_read 2309*0Sstevel@tonic-gate#endif 2310*0Sstevel@tonic-gate ldd [SRC + 0x08], %f18 2311*0Sstevel@tonic-gate#if CHEETAH_PREFETCH > 5 2312*0Sstevel@tonic-gate prefetch [SRC + (5 * VIS_BLOCKSIZE)], #one_read 2313*0Sstevel@tonic-gate#endif 2314*0Sstevel@tonic-gate ldd [SRC + 0x10], %f20 2315*0Sstevel@tonic-gate#if CHEETAH_PREFETCH > 6 2316*0Sstevel@tonic-gate prefetch [SRC + (6 * VIS_BLOCKSIZE)], #one_read 2317*0Sstevel@tonic-gate#endif 2318*0Sstevel@tonic-gate faligndata %f16, %f18, %f48 2319*0Sstevel@tonic-gate ldd [SRC + 0x18], %f22 2320*0Sstevel@tonic-gate#if CHEETAH_PREFETCH > 7 2321*0Sstevel@tonic-gate prefetch [SRC + (7 * VIS_BLOCKSIZE)], #one_read 2322*0Sstevel@tonic-gate#endif 2323*0Sstevel@tonic-gate faligndata %f18, %f20, %f50 2324*0Sstevel@tonic-gate ldd [SRC + 0x20], %f24 2325*0Sstevel@tonic-gate faligndata %f20, %f22, %f52 2326*0Sstevel@tonic-gate ldd [SRC + 0x28], %f26 2327*0Sstevel@tonic-gate faligndata %f22, %f24, %f54 2328*0Sstevel@tonic-gate ldd [SRC + 0x30], %f28 2329*0Sstevel@tonic-gate faligndata %f24, %f26, %f56 2330*0Sstevel@tonic-gate ldd [SRC + 0x38], %f30 2331*0Sstevel@tonic-gate faligndata %f26, %f28, %f58 2332*0Sstevel@tonic-gate ldd [SRC + VIS_BLOCKSIZE], %f16 2333*0Sstevel@tonic-gate sub CNT, VIS_BLOCKSIZE, CNT 2334*0Sstevel@tonic-gate add SRC, VIS_BLOCKSIZE, SRC 2335*0Sstevel@tonic-gate add REALSRC, VIS_BLOCKSIZE, REALSRC 2336*0Sstevel@tonic-gate ba,a,pt %ncc, 1f 2337*0Sstevel@tonic-gate nop 2338*0Sstevel@tonic-gate .align 16 2339*0Sstevel@tonic-gate1: 2340*0Sstevel@tonic-gate ldd [SRC + 0x08], %f18 2341*0Sstevel@tonic-gate faligndata %f28, %f30, %f60 2342*0Sstevel@tonic-gate ldd [SRC + 0x10], %f20 2343*0Sstevel@tonic-gate faligndata %f30, %f16, %f62 2344*0Sstevel@tonic-gate stda %f48, [DST]ASI_BLK_AIUS 2345*0Sstevel@tonic-gate ldd [SRC + 0x18], %f22 2346*0Sstevel@tonic-gate faligndata %f16, %f18, %f48 2347*0Sstevel@tonic-gate ldd [SRC + 0x20], %f24 2348*0Sstevel@tonic-gate faligndata %f18, %f20, %f50 2349*0Sstevel@tonic-gate ldd [SRC + 0x28], %f26 2350*0Sstevel@tonic-gate faligndata %f20, %f22, %f52 2351*0Sstevel@tonic-gate ldd [SRC + 0x30], %f28 2352*0Sstevel@tonic-gate faligndata %f22, %f24, %f54 2353*0Sstevel@tonic-gate ldd [SRC + 0x38], %f30 2354*0Sstevel@tonic-gate faligndata %f24, %f26, %f56 2355*0Sstevel@tonic-gate sub CNT, VIS_BLOCKSIZE, CNT 2356*0Sstevel@tonic-gate ldd [SRC + VIS_BLOCKSIZE], %f16 2357*0Sstevel@tonic-gate faligndata %f26, %f28, %f58 2358*0Sstevel@tonic-gate prefetch [SRC + ((CHEETAH_PREFETCH) * VIS_BLOCKSIZE) + 8], #one_read 2359*0Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 2360*0Sstevel@tonic-gate prefetch [SRC + ((CHEETAH_2ND_PREFETCH) * VIS_BLOCKSIZE)], #one_read 2361*0Sstevel@tonic-gate add REALSRC, VIS_BLOCKSIZE, REALSRC 2362*0Sstevel@tonic-gate cmp CNT, VIS_BLOCKSIZE + 8 2363*0Sstevel@tonic-gate bgu,pt %ncc, 1b 2364*0Sstevel@tonic-gate add SRC, VIS_BLOCKSIZE, SRC 2365*0Sstevel@tonic-gate 2366*0Sstevel@tonic-gate ! only if REALSRC & 0x7 is 0 2367*0Sstevel@tonic-gate cmp CNT, VIS_BLOCKSIZE 2368*0Sstevel@tonic-gate bne %ncc, 3f 2369*0Sstevel@tonic-gate andcc REALSRC, 0x7, %g0 2370*0Sstevel@tonic-gate bz,pt %ncc, 2f 2371*0Sstevel@tonic-gate nop 2372*0Sstevel@tonic-gate3: 2373*0Sstevel@tonic-gate faligndata %f28, %f30, %f60 2374*0Sstevel@tonic-gate faligndata %f30, %f16, %f62 2375*0Sstevel@tonic-gate stda %f48, [DST]ASI_BLK_AIUS 2376*0Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 2377*0Sstevel@tonic-gate ba,pt %ncc, 3f 2378*0Sstevel@tonic-gate nop 2379*0Sstevel@tonic-gate2: 2380*0Sstevel@tonic-gate ldd [SRC + 0x08], %f18 2381*0Sstevel@tonic-gate fsrc1 %f28, %f60 2382*0Sstevel@tonic-gate ldd [SRC + 0x10], %f20 2383*0Sstevel@tonic-gate fsrc1 %f30, %f62 2384*0Sstevel@tonic-gate stda %f48, [DST]ASI_BLK_AIUS 2385*0Sstevel@tonic-gate ldd [SRC + 0x18], %f22 2386*0Sstevel@tonic-gate fsrc1 %f16, %f48 2387*0Sstevel@tonic-gate ldd [SRC + 0x20], %f24 2388*0Sstevel@tonic-gate fsrc1 %f18, %f50 2389*0Sstevel@tonic-gate ldd [SRC + 0x28], %f26 2390*0Sstevel@tonic-gate fsrc1 %f20, %f52 2391*0Sstevel@tonic-gate ldd [SRC + 0x30], %f28 2392*0Sstevel@tonic-gate fsrc1 %f22, %f54 2393*0Sstevel@tonic-gate ldd [SRC + 0x38], %f30 2394*0Sstevel@tonic-gate fsrc1 %f24, %f56 2395*0Sstevel@tonic-gate sub CNT, VIS_BLOCKSIZE, CNT 2396*0Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 2397*0Sstevel@tonic-gate add SRC, VIS_BLOCKSIZE, SRC 2398*0Sstevel@tonic-gate add REALSRC, VIS_BLOCKSIZE, REALSRC 2399*0Sstevel@tonic-gate fsrc1 %f26, %f58 2400*0Sstevel@tonic-gate fsrc1 %f28, %f60 2401*0Sstevel@tonic-gate fsrc1 %f30, %f62 2402*0Sstevel@tonic-gate stda %f48, [DST]ASI_BLK_AIUS 2403*0Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 2404*0Sstevel@tonic-gate ba,a,pt %ncc, 4f 2405*0Sstevel@tonic-gate nop 2406*0Sstevel@tonic-gate 2407*0Sstevel@tonic-gate3: tst CNT 2408*0Sstevel@tonic-gate bz,a %ncc, 4f 2409*0Sstevel@tonic-gate nop 2410*0Sstevel@tonic-gate 2411*0Sstevel@tonic-gate5: ldub [REALSRC], TMP 2412*0Sstevel@tonic-gate inc REALSRC 2413*0Sstevel@tonic-gate inc DST 2414*0Sstevel@tonic-gate deccc CNT 2415*0Sstevel@tonic-gate bgu %ncc, 5b 2416*0Sstevel@tonic-gate stba TMP, [DST - 1]%asi 2417*0Sstevel@tonic-gate4: 2418*0Sstevel@tonic-gate 2419*0Sstevel@tonic-gate.copyout_exit: 2420*0Sstevel@tonic-gate membar #Sync 2421*0Sstevel@tonic-gate 2422*0Sstevel@tonic-gate FPRAS_INTERVAL(FPRAS_COPYOUT, 0, %l5, %o2, %o3, %o4, %o5, 8) 2423*0Sstevel@tonic-gate FPRAS_REWRITE_TYPE2Q2(0, %l5, %o2, %o3, 8, 9) 2424*0Sstevel@tonic-gate FPRAS_CHECK(FPRAS_COPYOUT, %l5, 9) ! lose outputs 2425*0Sstevel@tonic-gate 2426*0Sstevel@tonic-gate ldx [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 2427*0Sstevel@tonic-gate wr %o2, 0, %gsr ! restore gsr 2428*0Sstevel@tonic-gate 2429*0Sstevel@tonic-gate ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3 2430*0Sstevel@tonic-gate btst FPRS_FEF, %o3 2431*0Sstevel@tonic-gate bz,pt %icc, 4f 2432*0Sstevel@tonic-gate nop 2433*0Sstevel@tonic-gate 2434*0Sstevel@tonic-gate BLD_FPQ2Q4_FROMSTACK(%o2) 2435*0Sstevel@tonic-gate 2436*0Sstevel@tonic-gate ba,pt %ncc, 1f 2437*0Sstevel@tonic-gate wr %o3, 0, %fprs ! restore fprs 2438*0Sstevel@tonic-gate 2439*0Sstevel@tonic-gate4: 2440*0Sstevel@tonic-gate FZEROQ2Q4 2441*0Sstevel@tonic-gate wr %o3, 0, %fprs ! restore fprs 2442*0Sstevel@tonic-gate 2443*0Sstevel@tonic-gate1: 2444*0Sstevel@tonic-gate membar #Sync 2445*0Sstevel@tonic-gate andn %l6, FPUSED_FLAG, %l6 2446*0Sstevel@tonic-gate stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 2447*0Sstevel@tonic-gate FP_ALLOWMIGRATE(5, 6) 2448*0Sstevel@tonic-gate ret 2449*0Sstevel@tonic-gate restore %g0, 0, %o0 2450*0Sstevel@tonic-gate 2451*0Sstevel@tonic-gate/* 2452*0Sstevel@tonic-gate * We got here because of a fault during copyout. 2453*0Sstevel@tonic-gate * Errno value is in ERRNO, but DDI/DKI says return -1 (sigh). 2454*0Sstevel@tonic-gate */ 2455*0Sstevel@tonic-gate.copyout_err: 2456*0Sstevel@tonic-gate ldn [THREAD_REG + T_COPYOPS], %o4 ! check for copyop handler 2457*0Sstevel@tonic-gate tst %o4 2458*0Sstevel@tonic-gate bz,pt %ncc, 2f ! if not, return error 2459*0Sstevel@tonic-gate nop 2460*0Sstevel@tonic-gate ldn [%o4 + CP_COPYOUT], %g2 ! if handler, invoke it with 2461*0Sstevel@tonic-gate jmp %g2 ! original arguments 2462*0Sstevel@tonic-gate restore %g0, 0, %g0 ! dispose of copy window 2463*0Sstevel@tonic-gate2: 2464*0Sstevel@tonic-gate ret 2465*0Sstevel@tonic-gate restore %g0, -1, %o0 ! return error value 2466*0Sstevel@tonic-gate 2467*0Sstevel@tonic-gate 2468*0Sstevel@tonic-gate SET_SIZE(copyout_more) 2469*0Sstevel@tonic-gate 2470*0Sstevel@tonic-gate#endif /* lint */ 2471*0Sstevel@tonic-gate 2472*0Sstevel@tonic-gate 2473*0Sstevel@tonic-gate#ifdef lint 2474*0Sstevel@tonic-gate 2475*0Sstevel@tonic-gate/*ARGSUSED*/ 2476*0Sstevel@tonic-gateint 2477*0Sstevel@tonic-gatexcopyout(const void *kaddr, void *uaddr, size_t count) 2478*0Sstevel@tonic-gate{ return (0); } 2479*0Sstevel@tonic-gate 2480*0Sstevel@tonic-gate#else /* lint */ 2481*0Sstevel@tonic-gate 2482*0Sstevel@tonic-gate ENTRY(xcopyout) 2483*0Sstevel@tonic-gate cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case 2484*0Sstevel@tonic-gate bleu,pt %ncc, .xcopyout_small ! go to larger cases 2485*0Sstevel@tonic-gate xor %o0, %o1, %o3 ! are src, dst alignable? 2486*0Sstevel@tonic-gate btst 7, %o3 ! 2487*0Sstevel@tonic-gate bz,pt %ncc, .xcopyout_8 ! 2488*0Sstevel@tonic-gate nop 2489*0Sstevel@tonic-gate btst 1, %o3 ! 2490*0Sstevel@tonic-gate bz,pt %ncc, .xcopyout_2 ! check for half-word 2491*0Sstevel@tonic-gate nop 2492*0Sstevel@tonic-gate sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit 2493*0Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_1)], %o3 2494*0Sstevel@tonic-gate tst %o3 2495*0Sstevel@tonic-gate bz,pn %icc, .xcopyout_small ! if zero, disable HW copy 2496*0Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 2497*0Sstevel@tonic-gate bleu,pt %ncc, .xcopyout_small ! go to small copy 2498*0Sstevel@tonic-gate nop 2499*0Sstevel@tonic-gate ba,pt %ncc, .xcopyout_more ! otherwise go to large copy 2500*0Sstevel@tonic-gate nop 2501*0Sstevel@tonic-gate.xcopyout_2: 2502*0Sstevel@tonic-gate btst 3, %o3 ! 2503*0Sstevel@tonic-gate bz,pt %ncc, .xcopyout_4 ! check for word alignment 2504*0Sstevel@tonic-gate nop 2505*0Sstevel@tonic-gate sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit 2506*0Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_2)], %o3 2507*0Sstevel@tonic-gate tst %o3 2508*0Sstevel@tonic-gate bz,pn %icc, .xcopyout_small ! if zero, disable HW copy 2509*0Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 2510*0Sstevel@tonic-gate bleu,pt %ncc, .xcopyout_small ! go to small copy 2511*0Sstevel@tonic-gate nop 2512*0Sstevel@tonic-gate ba,pt %ncc, .xcopyout_more ! otherwise go to large copy 2513*0Sstevel@tonic-gate nop 2514*0Sstevel@tonic-gate.xcopyout_4: 2515*0Sstevel@tonic-gate ! already checked longword, must be word aligned 2516*0Sstevel@tonic-gate sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit 2517*0Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_4)], %o3 2518*0Sstevel@tonic-gate tst %o3 2519*0Sstevel@tonic-gate bz,pn %icc, .xcopyout_small ! if zero, disable HW copy 2520*0Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 2521*0Sstevel@tonic-gate bleu,pt %ncc, .xcopyout_small ! go to small copy 2522*0Sstevel@tonic-gate nop 2523*0Sstevel@tonic-gate ba,pt %ncc, .xcopyout_more ! otherwise go to large copy 2524*0Sstevel@tonic-gate nop 2525*0Sstevel@tonic-gate.xcopyout_8: 2526*0Sstevel@tonic-gate sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit 2527*0Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_8)], %o3 2528*0Sstevel@tonic-gate tst %o3 2529*0Sstevel@tonic-gate bz,pn %icc, .xcopyout_small ! if zero, disable HW copy 2530*0Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 2531*0Sstevel@tonic-gate bleu,pt %ncc, .xcopyout_small ! go to small copy 2532*0Sstevel@tonic-gate nop 2533*0Sstevel@tonic-gate ba,pt %ncc, .xcopyout_more ! otherwise go to large copy 2534*0Sstevel@tonic-gate nop 2535*0Sstevel@tonic-gate 2536*0Sstevel@tonic-gate.xcopyout_small: 2537*0Sstevel@tonic-gate sethi %hi(.sm_xcopyout_err), %o5 ! .sm_xcopyout_err is lofault 2538*0Sstevel@tonic-gate or %o5, %lo(.sm_xcopyout_err), %o5 2539*0Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %o4 ! save existing handler 2540*0Sstevel@tonic-gate membar #Sync ! sync error barrier 2541*0Sstevel@tonic-gate ba,pt %ncc, .sm_do_copyout ! common code 2542*0Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] ! set t_lofault 2543*0Sstevel@tonic-gate 2544*0Sstevel@tonic-gate.xcopyout_more: 2545*0Sstevel@tonic-gate save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 2546*0Sstevel@tonic-gate sethi %hi(.xcopyout_err), REAL_LOFAULT 2547*0Sstevel@tonic-gate ba,pt %ncc, .do_copyout ! common code 2548*0Sstevel@tonic-gate or REAL_LOFAULT, %lo(.xcopyout_err), REAL_LOFAULT 2549*0Sstevel@tonic-gate 2550*0Sstevel@tonic-gate/* 2551*0Sstevel@tonic-gate * We got here because of fault during xcopyout 2552*0Sstevel@tonic-gate * Errno value is in ERRNO 2553*0Sstevel@tonic-gate */ 2554*0Sstevel@tonic-gate.xcopyout_err: 2555*0Sstevel@tonic-gate ldn [THREAD_REG + T_COPYOPS], %o4 ! check for copyop handler 2556*0Sstevel@tonic-gate tst %o4 2557*0Sstevel@tonic-gate bz,pt %ncc, 2f ! if not, return error 2558*0Sstevel@tonic-gate nop 2559*0Sstevel@tonic-gate ldn [%o4 + CP_XCOPYOUT], %g2 ! if handler, invoke it with 2560*0Sstevel@tonic-gate jmp %g2 ! original arguments 2561*0Sstevel@tonic-gate restore %g0, 0, %g0 ! dispose of copy window 2562*0Sstevel@tonic-gate2: 2563*0Sstevel@tonic-gate ret 2564*0Sstevel@tonic-gate restore ERRNO, 0, %o0 ! return errno value 2565*0Sstevel@tonic-gate 2566*0Sstevel@tonic-gate.sm_xcopyout_err: 2567*0Sstevel@tonic-gate 2568*0Sstevel@tonic-gate membar #Sync 2569*0Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 2570*0Sstevel@tonic-gate mov SM_SAVE_SRC, %o0 2571*0Sstevel@tonic-gate mov SM_SAVE_DST, %o1 2572*0Sstevel@tonic-gate mov SM_SAVE_COUNT, %o2 2573*0Sstevel@tonic-gate ldn [THREAD_REG + T_COPYOPS], %o3 ! check for copyop handler 2574*0Sstevel@tonic-gate tst %o3 2575*0Sstevel@tonic-gate bz,pt %ncc, 3f ! if not, return error 2576*0Sstevel@tonic-gate nop 2577*0Sstevel@tonic-gate ldn [%o3 + CP_XCOPYOUT], %o5 ! if handler, invoke it with 2578*0Sstevel@tonic-gate jmp %o5 ! original arguments 2579*0Sstevel@tonic-gate nop 2580*0Sstevel@tonic-gate3: 2581*0Sstevel@tonic-gate retl 2582*0Sstevel@tonic-gate or %g1, 0, %o0 ! return errno value 2583*0Sstevel@tonic-gate 2584*0Sstevel@tonic-gate SET_SIZE(xcopyout) 2585*0Sstevel@tonic-gate 2586*0Sstevel@tonic-gate#endif /* lint */ 2587*0Sstevel@tonic-gate 2588*0Sstevel@tonic-gate#ifdef lint 2589*0Sstevel@tonic-gate 2590*0Sstevel@tonic-gate/*ARGSUSED*/ 2591*0Sstevel@tonic-gateint 2592*0Sstevel@tonic-gatexcopyout_little(const void *kaddr, void *uaddr, size_t count) 2593*0Sstevel@tonic-gate{ return (0); } 2594*0Sstevel@tonic-gate 2595*0Sstevel@tonic-gate#else /* lint */ 2596*0Sstevel@tonic-gate 2597*0Sstevel@tonic-gate ENTRY(xcopyout_little) 2598*0Sstevel@tonic-gate sethi %hi(.xcopyio_err), %o5 2599*0Sstevel@tonic-gate or %o5, %lo(.xcopyio_err), %o5 2600*0Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %o4 2601*0Sstevel@tonic-gate membar #Sync ! sync error barrier 2602*0Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] 2603*0Sstevel@tonic-gate mov %o4, %o5 2604*0Sstevel@tonic-gate 2605*0Sstevel@tonic-gate subcc %g0, %o2, %o3 2606*0Sstevel@tonic-gate add %o0, %o2, %o0 2607*0Sstevel@tonic-gate bz,pn %ncc, 2f ! check for zero bytes 2608*0Sstevel@tonic-gate sub %o2, 1, %o4 2609*0Sstevel@tonic-gate add %o0, %o4, %o0 ! start w/last byte 2610*0Sstevel@tonic-gate add %o1, %o2, %o1 2611*0Sstevel@tonic-gate ldub [%o0 + %o3], %o4 2612*0Sstevel@tonic-gate 2613*0Sstevel@tonic-gate1: stba %o4, [%o1 + %o3]ASI_AIUSL 2614*0Sstevel@tonic-gate inccc %o3 2615*0Sstevel@tonic-gate sub %o0, 2, %o0 ! get next byte 2616*0Sstevel@tonic-gate bcc,a,pt %ncc, 1b 2617*0Sstevel@tonic-gate ldub [%o0 + %o3], %o4 2618*0Sstevel@tonic-gate 2619*0Sstevel@tonic-gate2: 2620*0Sstevel@tonic-gate membar #Sync ! sync error barrier 2621*0Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 2622*0Sstevel@tonic-gate retl 2623*0Sstevel@tonic-gate mov %g0, %o0 ! return (0) 2624*0Sstevel@tonic-gate 2625*0Sstevel@tonic-gate SET_SIZE(xcopyout_little) 2626*0Sstevel@tonic-gate 2627*0Sstevel@tonic-gate#endif /* lint */ 2628*0Sstevel@tonic-gate 2629*0Sstevel@tonic-gate/* 2630*0Sstevel@tonic-gate * Copy user data to kernel space (copyin/xcopyin/xcopyin_little) 2631*0Sstevel@tonic-gate */ 2632*0Sstevel@tonic-gate 2633*0Sstevel@tonic-gate#if defined(lint) 2634*0Sstevel@tonic-gate 2635*0Sstevel@tonic-gate/*ARGSUSED*/ 2636*0Sstevel@tonic-gateint 2637*0Sstevel@tonic-gatecopyin(const void *uaddr, void *kaddr, size_t count) 2638*0Sstevel@tonic-gate{ return (0); } 2639*0Sstevel@tonic-gate 2640*0Sstevel@tonic-gate#else /* lint */ 2641*0Sstevel@tonic-gate 2642*0Sstevel@tonic-gate ENTRY(copyin) 2643*0Sstevel@tonic-gate cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case 2644*0Sstevel@tonic-gate bleu,pt %ncc, .copyin_small ! go to larger cases 2645*0Sstevel@tonic-gate xor %o0, %o1, %o3 ! are src, dst alignable? 2646*0Sstevel@tonic-gate btst 7, %o3 ! 2647*0Sstevel@tonic-gate bz,pt %ncc, .copyin_8 ! check for longword alignment 2648*0Sstevel@tonic-gate nop 2649*0Sstevel@tonic-gate btst 1, %o3 ! 2650*0Sstevel@tonic-gate bz,pt %ncc, .copyin_2 ! check for half-word 2651*0Sstevel@tonic-gate nop 2652*0Sstevel@tonic-gate sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit 2653*0Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_1)], %o3 2654*0Sstevel@tonic-gate tst %o3 2655*0Sstevel@tonic-gate bz,pn %icc, .copyin_small ! if zero, disable HW copy 2656*0Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 2657*0Sstevel@tonic-gate bleu,pt %ncc, .copyin_small ! go to small copy 2658*0Sstevel@tonic-gate nop 2659*0Sstevel@tonic-gate ba,pt %ncc, .copyin_more ! otherwise go to large copy 2660*0Sstevel@tonic-gate nop 2661*0Sstevel@tonic-gate.copyin_2: 2662*0Sstevel@tonic-gate btst 3, %o3 ! 2663*0Sstevel@tonic-gate bz,pt %ncc, .copyin_4 ! check for word alignment 2664*0Sstevel@tonic-gate nop 2665*0Sstevel@tonic-gate sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit 2666*0Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_2)], %o3 2667*0Sstevel@tonic-gate tst %o3 2668*0Sstevel@tonic-gate bz,pn %icc, .copyin_small ! if zero, disable HW copy 2669*0Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 2670*0Sstevel@tonic-gate bleu,pt %ncc, .copyin_small ! go to small copy 2671*0Sstevel@tonic-gate nop 2672*0Sstevel@tonic-gate ba,pt %ncc, .copyin_more ! otherwise go to large copy 2673*0Sstevel@tonic-gate nop 2674*0Sstevel@tonic-gate.copyin_4: 2675*0Sstevel@tonic-gate ! already checked longword, must be word aligned 2676*0Sstevel@tonic-gate sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit 2677*0Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_4)], %o3 2678*0Sstevel@tonic-gate tst %o3 2679*0Sstevel@tonic-gate bz,pn %icc, .copyin_small ! if zero, disable HW copy 2680*0Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 2681*0Sstevel@tonic-gate bleu,pt %ncc, .copyin_small ! go to small copy 2682*0Sstevel@tonic-gate nop 2683*0Sstevel@tonic-gate ba,pt %ncc, .copyin_more ! otherwise go to large copy 2684*0Sstevel@tonic-gate nop 2685*0Sstevel@tonic-gate.copyin_8: 2686*0Sstevel@tonic-gate sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit 2687*0Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_8)], %o3 2688*0Sstevel@tonic-gate tst %o3 2689*0Sstevel@tonic-gate bz,pn %icc, .copyin_small ! if zero, disable HW copy 2690*0Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 2691*0Sstevel@tonic-gate bleu,pt %ncc, .copyin_small ! go to small copy 2692*0Sstevel@tonic-gate nop 2693*0Sstevel@tonic-gate ba,pt %ncc, .copyin_more ! otherwise go to large copy 2694*0Sstevel@tonic-gate nop 2695*0Sstevel@tonic-gate 2696*0Sstevel@tonic-gate .align 16 2697*0Sstevel@tonic-gate nop ! instruction alignment 2698*0Sstevel@tonic-gate ! see discussion at start of file 2699*0Sstevel@tonic-gate.copyin_small: 2700*0Sstevel@tonic-gate sethi %hi(.sm_copyin_err), %o5 ! .sm_copyin_err is lofault 2701*0Sstevel@tonic-gate or %o5, %lo(.sm_copyin_err), %o5 2702*0Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %o4 ! set/save t_lofault, no tramp 2703*0Sstevel@tonic-gate membar #Sync ! sync error barrier 2704*0Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] 2705*0Sstevel@tonic-gate.sm_do_copyin: 2706*0Sstevel@tonic-gate mov %o0, SM_SAVE_SRC 2707*0Sstevel@tonic-gate mov %o1, SM_SAVE_DST 2708*0Sstevel@tonic-gate cmp %o2, SHORTCOPY ! check for really short case 2709*0Sstevel@tonic-gate bleu,pt %ncc, .ci_sm_left ! 2710*0Sstevel@tonic-gate mov %o2, SM_SAVE_COUNT 2711*0Sstevel@tonic-gate cmp %o2, CHKSIZE ! check for medium length cases 2712*0Sstevel@tonic-gate bgu,pn %ncc, .ci_med ! 2713*0Sstevel@tonic-gate or %o0, %o1, %o3 ! prepare alignment check 2714*0Sstevel@tonic-gate andcc %o3, 0x3, %g0 ! test for alignment 2715*0Sstevel@tonic-gate bz,pt %ncc, .ci_sm_word ! branch to word aligned case 2716*0Sstevel@tonic-gate.ci_sm_movebytes: 2717*0Sstevel@tonic-gate sub %o2, 3, %o2 ! adjust count to allow cc zero test 2718*0Sstevel@tonic-gate.ci_sm_notalign4: 2719*0Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 ! read byte 2720*0Sstevel@tonic-gate subcc %o2, 4, %o2 ! reduce count by 4 2721*0Sstevel@tonic-gate stb %o3, [%o1] ! write byte 2722*0Sstevel@tonic-gate add %o0, 1, %o0 ! advance SRC by 1 2723*0Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 ! repeat for a total of 4 bytes 2724*0Sstevel@tonic-gate add %o0, 1, %o0 ! advance SRC by 1 2725*0Sstevel@tonic-gate stb %o3, [%o1 + 1] 2726*0Sstevel@tonic-gate add %o1, 4, %o1 ! advance DST by 4 2727*0Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 2728*0Sstevel@tonic-gate add %o0, 1, %o0 ! advance SRC by 1 2729*0Sstevel@tonic-gate stb %o3, [%o1 - 2] 2730*0Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 2731*0Sstevel@tonic-gate add %o0, 1, %o0 ! advance SRC by 1 2732*0Sstevel@tonic-gate bgt,pt %ncc, .ci_sm_notalign4 ! loop til 3 or fewer bytes remain 2733*0Sstevel@tonic-gate stb %o3, [%o1 - 1] 2734*0Sstevel@tonic-gate add %o2, 3, %o2 ! restore count 2735*0Sstevel@tonic-gate.ci_sm_left: 2736*0Sstevel@tonic-gate tst %o2 2737*0Sstevel@tonic-gate bz,pt %ncc, .ci_sm_exit 2738*0Sstevel@tonic-gate nop 2739*0Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 ! load one byte 2740*0Sstevel@tonic-gate deccc %o2 ! reduce count for cc test 2741*0Sstevel@tonic-gate bz,pt %ncc, .ci_sm_exit 2742*0Sstevel@tonic-gate stb %o3,[%o1] ! store one byte 2743*0Sstevel@tonic-gate inc %o0 2744*0Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 ! load second byte 2745*0Sstevel@tonic-gate deccc %o2 2746*0Sstevel@tonic-gate bz,pt %ncc, .ci_sm_exit 2747*0Sstevel@tonic-gate stb %o3,[%o1 + 1] ! store second byte 2748*0Sstevel@tonic-gate inc %o0 2749*0Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 ! load third byte 2750*0Sstevel@tonic-gate stb %o3,[%o1 + 2] ! store third byte 2751*0Sstevel@tonic-gate membar #Sync ! sync error barrier 2752*0Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 2753*0Sstevel@tonic-gate retl 2754*0Sstevel@tonic-gate mov %g0, %o0 ! return 0 2755*0Sstevel@tonic-gate .align 16 2756*0Sstevel@tonic-gate.ci_sm_words: 2757*0Sstevel@tonic-gate lduwa [%o0]ASI_USER, %o3 ! read word 2758*0Sstevel@tonic-gate.ci_sm_wordx: 2759*0Sstevel@tonic-gate subcc %o2, 8, %o2 ! update count 2760*0Sstevel@tonic-gate stw %o3, [%o1] ! write word 2761*0Sstevel@tonic-gate add %o0, 4, %o0 ! update SRC 2762*0Sstevel@tonic-gate add %o1, 8, %o1 ! update DST 2763*0Sstevel@tonic-gate lduwa [%o0]ASI_USER, %o3 ! read word 2764*0Sstevel@tonic-gate add %o0, 4, %o0 ! update SRC 2765*0Sstevel@tonic-gate bgt,pt %ncc, .ci_sm_words ! loop til done 2766*0Sstevel@tonic-gate stw %o3, [%o1 - 4] ! write word 2767*0Sstevel@tonic-gate addcc %o2, 7, %o2 ! restore count 2768*0Sstevel@tonic-gate bz,pt %ncc, .ci_sm_exit 2769*0Sstevel@tonic-gate nop 2770*0Sstevel@tonic-gate deccc %o2 2771*0Sstevel@tonic-gate bz,pt %ncc, .ci_sm_byte 2772*0Sstevel@tonic-gate.ci_sm_half: 2773*0Sstevel@tonic-gate subcc %o2, 2, %o2 ! reduce count by 2 2774*0Sstevel@tonic-gate lduha [%o0]ASI_USER, %o3 ! read half word 2775*0Sstevel@tonic-gate add %o0, 2, %o0 ! advance SRC by 2 2776*0Sstevel@tonic-gate add %o1, 2, %o1 ! advance DST by 2 2777*0Sstevel@tonic-gate bgt,pt %ncc, .ci_sm_half ! loop til done 2778*0Sstevel@tonic-gate sth %o3, [%o1 - 2] ! write half word 2779*0Sstevel@tonic-gate addcc %o2, 1, %o2 ! restore count 2780*0Sstevel@tonic-gate bz,pt %ncc, .ci_sm_exit 2781*0Sstevel@tonic-gate nop 2782*0Sstevel@tonic-gate.ci_sm_byte: 2783*0Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 2784*0Sstevel@tonic-gate stb %o3, [%o1] 2785*0Sstevel@tonic-gate membar #Sync ! sync error barrier 2786*0Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 2787*0Sstevel@tonic-gate retl 2788*0Sstevel@tonic-gate mov %g0, %o0 ! return 0 2789*0Sstevel@tonic-gate .align 16 2790*0Sstevel@tonic-gate.ci_sm_word: 2791*0Sstevel@tonic-gate subcc %o2, 4, %o2 ! update count 2792*0Sstevel@tonic-gate bgt,pt %ncc, .ci_sm_wordx 2793*0Sstevel@tonic-gate lduwa [%o0]ASI_USER, %o3 ! read word 2794*0Sstevel@tonic-gate addcc %o2, 3, %o2 ! restore count 2795*0Sstevel@tonic-gate bz,pt %ncc, .ci_sm_exit 2796*0Sstevel@tonic-gate stw %o3, [%o1] ! write word 2797*0Sstevel@tonic-gate deccc %o2 ! reduce count for cc test 2798*0Sstevel@tonic-gate add %o0, 4, %o0 2799*0Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 ! load one byte 2800*0Sstevel@tonic-gate bz,pt %ncc, .ci_sm_exit 2801*0Sstevel@tonic-gate stb %o3, [%o1 + 4] ! store one byte 2802*0Sstevel@tonic-gate inc %o0 2803*0Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 ! load second byte 2804*0Sstevel@tonic-gate deccc %o2 2805*0Sstevel@tonic-gate bz,pt %ncc, .ci_sm_exit 2806*0Sstevel@tonic-gate stb %o3, [%o1 + 5] ! store second byte 2807*0Sstevel@tonic-gate inc %o0 2808*0Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 ! load third byte 2809*0Sstevel@tonic-gate stb %o3, [%o1 + 6] ! store third byte 2810*0Sstevel@tonic-gate.ci_sm_exit: 2811*0Sstevel@tonic-gate membar #Sync ! sync error barrier 2812*0Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 2813*0Sstevel@tonic-gate retl 2814*0Sstevel@tonic-gate mov %g0, %o0 ! return 0 2815*0Sstevel@tonic-gate 2816*0Sstevel@tonic-gate .align 16 2817*0Sstevel@tonic-gate.ci_med: 2818*0Sstevel@tonic-gate xor %o0, %o1, %o3 ! setup alignment check 2819*0Sstevel@tonic-gate btst 1, %o3 2820*0Sstevel@tonic-gate bnz,pt %ncc, .ci_sm_movebytes ! unaligned 2821*0Sstevel@tonic-gate nop 2822*0Sstevel@tonic-gate btst 3, %o3 2823*0Sstevel@tonic-gate bnz,pt %ncc, .ci_med_half ! halfword aligned 2824*0Sstevel@tonic-gate nop 2825*0Sstevel@tonic-gate btst 7, %o3 2826*0Sstevel@tonic-gate bnz,pt %ncc, .ci_med_word ! word aligned 2827*0Sstevel@tonic-gate nop 2828*0Sstevel@tonic-gate.ci_med_long: 2829*0Sstevel@tonic-gate btst 3, %o0 ! check for 2830*0Sstevel@tonic-gate bz,pt %ncc, .ci_med_long1 ! word alignment 2831*0Sstevel@tonic-gate nop 2832*0Sstevel@tonic-gate.ci_med_long0: 2833*0Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 ! load one byte 2834*0Sstevel@tonic-gate inc %o0 2835*0Sstevel@tonic-gate stb %o3,[%o1] ! store byte 2836*0Sstevel@tonic-gate inc %o1 2837*0Sstevel@tonic-gate btst 3, %o0 2838*0Sstevel@tonic-gate bnz,pt %ncc, .ci_med_long0 2839*0Sstevel@tonic-gate dec %o2 2840*0Sstevel@tonic-gate.ci_med_long1: ! word aligned 2841*0Sstevel@tonic-gate btst 7, %o0 ! check for long word 2842*0Sstevel@tonic-gate bz,pt %ncc, .ci_med_long2 2843*0Sstevel@tonic-gate nop 2844*0Sstevel@tonic-gate lduwa [%o0]ASI_USER, %o3 ! load word 2845*0Sstevel@tonic-gate add %o0, 4, %o0 ! advance SRC by 4 2846*0Sstevel@tonic-gate stw %o3, [%o1] ! store word 2847*0Sstevel@tonic-gate add %o1, 4, %o1 ! advance DST by 4 2848*0Sstevel@tonic-gate sub %o2, 4, %o2 ! reduce count by 4 2849*0Sstevel@tonic-gate! 2850*0Sstevel@tonic-gate! Now long word aligned and have at least 32 bytes to move 2851*0Sstevel@tonic-gate! 2852*0Sstevel@tonic-gate.ci_med_long2: 2853*0Sstevel@tonic-gate sub %o2, 31, %o2 ! adjust count to allow cc zero test 2854*0Sstevel@tonic-gate.ci_med_lmove: 2855*0Sstevel@tonic-gate ldxa [%o0]ASI_USER, %o3 ! read long word 2856*0Sstevel@tonic-gate subcc %o2, 32, %o2 ! reduce count by 32 2857*0Sstevel@tonic-gate stx %o3, [%o1] ! write long word 2858*0Sstevel@tonic-gate add %o0, 8, %o0 ! advance SRC by 8 2859*0Sstevel@tonic-gate ldxa [%o0]ASI_USER, %o3 ! repeat for a total for 4 long words 2860*0Sstevel@tonic-gate add %o0, 8, %o0 ! advance SRC by 8 2861*0Sstevel@tonic-gate stx %o3, [%o1 + 8] 2862*0Sstevel@tonic-gate add %o1, 32, %o1 ! advance DST by 32 2863*0Sstevel@tonic-gate ldxa [%o0]ASI_USER, %o3 2864*0Sstevel@tonic-gate add %o0, 8, %o0 ! advance SRC by 8 2865*0Sstevel@tonic-gate stx %o3, [%o1 - 16] 2866*0Sstevel@tonic-gate ldxa [%o0]ASI_USER, %o3 2867*0Sstevel@tonic-gate add %o0, 8, %o0 ! advance SRC by 8 2868*0Sstevel@tonic-gate bgt,pt %ncc, .ci_med_lmove ! loop til 31 or fewer bytes left 2869*0Sstevel@tonic-gate stx %o3, [%o1 - 8] 2870*0Sstevel@tonic-gate addcc %o2, 24, %o2 ! restore count to long word offset 2871*0Sstevel@tonic-gate ble,pt %ncc, .ci_med_lextra ! check for more long words to move 2872*0Sstevel@tonic-gate nop 2873*0Sstevel@tonic-gate.ci_med_lword: 2874*0Sstevel@tonic-gate ldxa [%o0]ASI_USER, %o3 ! read long word 2875*0Sstevel@tonic-gate subcc %o2, 8, %o2 ! reduce count by 8 2876*0Sstevel@tonic-gate stx %o3, [%o1] ! write long word 2877*0Sstevel@tonic-gate add %o0, 8, %o0 ! advance SRC by 8 2878*0Sstevel@tonic-gate bgt,pt %ncc, .ci_med_lword ! loop til 7 or fewer bytes left 2879*0Sstevel@tonic-gate add %o1, 8, %o1 ! advance DST by 8 2880*0Sstevel@tonic-gate.ci_med_lextra: 2881*0Sstevel@tonic-gate addcc %o2, 7, %o2 ! restore rest of count 2882*0Sstevel@tonic-gate bz,pt %ncc, .ci_sm_exit ! if zero, then done 2883*0Sstevel@tonic-gate deccc %o2 2884*0Sstevel@tonic-gate bz,pt %ncc, .ci_sm_byte 2885*0Sstevel@tonic-gate nop 2886*0Sstevel@tonic-gate ba,pt %ncc, .ci_sm_half 2887*0Sstevel@tonic-gate nop 2888*0Sstevel@tonic-gate 2889*0Sstevel@tonic-gate .align 16 2890*0Sstevel@tonic-gate nop ! instruction alignment 2891*0Sstevel@tonic-gate ! see discussion at start of file 2892*0Sstevel@tonic-gate.ci_med_word: 2893*0Sstevel@tonic-gate btst 3, %o0 ! check for 2894*0Sstevel@tonic-gate bz,pt %ncc, .ci_med_word1 ! word alignment 2895*0Sstevel@tonic-gate nop 2896*0Sstevel@tonic-gate.ci_med_word0: 2897*0Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 ! load one byte 2898*0Sstevel@tonic-gate inc %o0 2899*0Sstevel@tonic-gate stb %o3,[%o1] ! store byte 2900*0Sstevel@tonic-gate inc %o1 2901*0Sstevel@tonic-gate btst 3, %o0 2902*0Sstevel@tonic-gate bnz,pt %ncc, .ci_med_word0 2903*0Sstevel@tonic-gate dec %o2 2904*0Sstevel@tonic-gate! 2905*0Sstevel@tonic-gate! Now word aligned and have at least 36 bytes to move 2906*0Sstevel@tonic-gate! 2907*0Sstevel@tonic-gate.ci_med_word1: 2908*0Sstevel@tonic-gate sub %o2, 15, %o2 ! adjust count to allow cc zero test 2909*0Sstevel@tonic-gate.ci_med_wmove: 2910*0Sstevel@tonic-gate lduwa [%o0]ASI_USER, %o3 ! read word 2911*0Sstevel@tonic-gate subcc %o2, 16, %o2 ! reduce count by 16 2912*0Sstevel@tonic-gate stw %o3, [%o1] ! write word 2913*0Sstevel@tonic-gate add %o0, 4, %o0 ! advance SRC by 4 2914*0Sstevel@tonic-gate lduwa [%o0]ASI_USER, %o3 ! repeat for a total for 4 words 2915*0Sstevel@tonic-gate add %o0, 4, %o0 ! advance SRC by 4 2916*0Sstevel@tonic-gate stw %o3, [%o1 + 4] 2917*0Sstevel@tonic-gate add %o1, 16, %o1 ! advance DST by 16 2918*0Sstevel@tonic-gate lduwa [%o0]ASI_USER, %o3 2919*0Sstevel@tonic-gate add %o0, 4, %o0 ! advance SRC by 4 2920*0Sstevel@tonic-gate stw %o3, [%o1 - 8] 2921*0Sstevel@tonic-gate lduwa [%o0]ASI_USER, %o3 2922*0Sstevel@tonic-gate add %o0, 4, %o0 ! advance SRC by 4 2923*0Sstevel@tonic-gate bgt,pt %ncc, .ci_med_wmove ! loop til 15 or fewer bytes left 2924*0Sstevel@tonic-gate stw %o3, [%o1 - 4] 2925*0Sstevel@tonic-gate addcc %o2, 12, %o2 ! restore count to word offset 2926*0Sstevel@tonic-gate ble,pt %ncc, .ci_med_wextra ! check for more words to move 2927*0Sstevel@tonic-gate nop 2928*0Sstevel@tonic-gate.ci_med_word2: 2929*0Sstevel@tonic-gate lduwa [%o0]ASI_USER, %o3 ! read word 2930*0Sstevel@tonic-gate subcc %o2, 4, %o2 ! reduce count by 4 2931*0Sstevel@tonic-gate stw %o3, [%o1] ! write word 2932*0Sstevel@tonic-gate add %o0, 4, %o0 ! advance SRC by 4 2933*0Sstevel@tonic-gate bgt,pt %ncc, .ci_med_word2 ! loop til 3 or fewer bytes left 2934*0Sstevel@tonic-gate add %o1, 4, %o1 ! advance DST by 4 2935*0Sstevel@tonic-gate.ci_med_wextra: 2936*0Sstevel@tonic-gate addcc %o2, 3, %o2 ! restore rest of count 2937*0Sstevel@tonic-gate bz,pt %ncc, .ci_sm_exit ! if zero, then done 2938*0Sstevel@tonic-gate deccc %o2 2939*0Sstevel@tonic-gate bz,pt %ncc, .ci_sm_byte 2940*0Sstevel@tonic-gate nop 2941*0Sstevel@tonic-gate ba,pt %ncc, .ci_sm_half 2942*0Sstevel@tonic-gate nop 2943*0Sstevel@tonic-gate 2944*0Sstevel@tonic-gate .align 16 2945*0Sstevel@tonic-gate nop ! instruction alignment 2946*0Sstevel@tonic-gate ! see discussion at start of file 2947*0Sstevel@tonic-gate.ci_med_half: 2948*0Sstevel@tonic-gate btst 1, %o0 ! check for 2949*0Sstevel@tonic-gate bz,pt %ncc, .ci_med_half1 ! half word alignment 2950*0Sstevel@tonic-gate nop 2951*0Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 ! load one byte 2952*0Sstevel@tonic-gate inc %o0 2953*0Sstevel@tonic-gate stb %o3,[%o1] ! store byte 2954*0Sstevel@tonic-gate inc %o1 2955*0Sstevel@tonic-gate dec %o2 2956*0Sstevel@tonic-gate! 2957*0Sstevel@tonic-gate! Now half word aligned and have at least 38 bytes to move 2958*0Sstevel@tonic-gate! 2959*0Sstevel@tonic-gate.ci_med_half1: 2960*0Sstevel@tonic-gate sub %o2, 7, %o2 ! adjust count to allow cc zero test 2961*0Sstevel@tonic-gate.ci_med_hmove: 2962*0Sstevel@tonic-gate lduha [%o0]ASI_USER, %o3 ! read half word 2963*0Sstevel@tonic-gate subcc %o2, 8, %o2 ! reduce count by 8 2964*0Sstevel@tonic-gate sth %o3, [%o1] ! write half word 2965*0Sstevel@tonic-gate add %o0, 2, %o0 ! advance SRC by 2 2966*0Sstevel@tonic-gate lduha [%o0]ASI_USER, %o3 ! repeat for a total for 4 halfwords 2967*0Sstevel@tonic-gate add %o0, 2, %o0 ! advance SRC by 2 2968*0Sstevel@tonic-gate sth %o3, [%o1 + 2] 2969*0Sstevel@tonic-gate add %o1, 8, %o1 ! advance DST by 8 2970*0Sstevel@tonic-gate lduha [%o0]ASI_USER, %o3 2971*0Sstevel@tonic-gate add %o0, 2, %o0 ! advance SRC by 2 2972*0Sstevel@tonic-gate sth %o3, [%o1 - 4] 2973*0Sstevel@tonic-gate lduha [%o0]ASI_USER, %o3 2974*0Sstevel@tonic-gate add %o0, 2, %o0 ! advance SRC by 2 2975*0Sstevel@tonic-gate bgt,pt %ncc, .ci_med_hmove ! loop til 7 or fewer bytes left 2976*0Sstevel@tonic-gate sth %o3, [%o1 - 2] 2977*0Sstevel@tonic-gate addcc %o2, 7, %o2 ! restore count 2978*0Sstevel@tonic-gate bz,pt %ncc, .ci_sm_exit 2979*0Sstevel@tonic-gate deccc %o2 2980*0Sstevel@tonic-gate bz,pt %ncc, .ci_sm_byte 2981*0Sstevel@tonic-gate nop 2982*0Sstevel@tonic-gate ba,pt %ncc, .ci_sm_half 2983*0Sstevel@tonic-gate nop 2984*0Sstevel@tonic-gate 2985*0Sstevel@tonic-gate.sm_copyin_err: 2986*0Sstevel@tonic-gate membar #Sync 2987*0Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 2988*0Sstevel@tonic-gate mov SM_SAVE_SRC, %o0 2989*0Sstevel@tonic-gate mov SM_SAVE_DST, %o1 2990*0Sstevel@tonic-gate mov SM_SAVE_COUNT, %o2 2991*0Sstevel@tonic-gate ldn [THREAD_REG + T_COPYOPS], %o3 ! check for copyop handler 2992*0Sstevel@tonic-gate tst %o3 2993*0Sstevel@tonic-gate bz,pt %ncc, 3f ! if not, return error 2994*0Sstevel@tonic-gate nop 2995*0Sstevel@tonic-gate ldn [%o3 + CP_COPYIN], %o5 ! if handler, invoke it with 2996*0Sstevel@tonic-gate jmp %o5 ! original arguments 2997*0Sstevel@tonic-gate nop 2998*0Sstevel@tonic-gate3: 2999*0Sstevel@tonic-gate retl 3000*0Sstevel@tonic-gate or %g0, -1, %o0 ! return errno value 3001*0Sstevel@tonic-gate 3002*0Sstevel@tonic-gate SET_SIZE(copyin) 3003*0Sstevel@tonic-gate 3004*0Sstevel@tonic-gate 3005*0Sstevel@tonic-gate/* 3006*0Sstevel@tonic-gate * The _more entry points are not intended to be used directly by 3007*0Sstevel@tonic-gate * any caller from outside this file. They are provided to allow 3008*0Sstevel@tonic-gate * profiling and dtrace of the portions of the copy code that uses 3009*0Sstevel@tonic-gate * the floating point registers. 3010*0Sstevel@tonic-gate * This entry is particularly important as DTRACE (at least as of 3011*0Sstevel@tonic-gate * 4/2004) does not support leaf functions. 3012*0Sstevel@tonic-gate */ 3013*0Sstevel@tonic-gate 3014*0Sstevel@tonic-gate ENTRY(copyin_more) 3015*0Sstevel@tonic-gate.copyin_more: 3016*0Sstevel@tonic-gate save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 3017*0Sstevel@tonic-gate set .copyin_err, REAL_LOFAULT 3018*0Sstevel@tonic-gate 3019*0Sstevel@tonic-gate/* 3020*0Sstevel@tonic-gate * Copy ins that reach here are larger than VIS_COPY_THRESHOLD bytes 3021*0Sstevel@tonic-gate */ 3022*0Sstevel@tonic-gate.do_copyin: 3023*0Sstevel@tonic-gate set copyio_fault, %l7 ! .copyio_fault is lofault val 3024*0Sstevel@tonic-gate 3025*0Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %l6 ! save existing handler 3026*0Sstevel@tonic-gate membar #Sync ! sync error barrier 3027*0Sstevel@tonic-gate stn %l7, [THREAD_REG + T_LOFAULT] ! set t_lofault 3028*0Sstevel@tonic-gate 3029*0Sstevel@tonic-gate mov %i0, SAVE_SRC 3030*0Sstevel@tonic-gate mov %i1, SAVE_DST 3031*0Sstevel@tonic-gate mov %i2, SAVE_COUNT 3032*0Sstevel@tonic-gate 3033*0Sstevel@tonic-gate FP_NOMIGRATE(6, 7) 3034*0Sstevel@tonic-gate 3035*0Sstevel@tonic-gate rd %fprs, %o2 ! check for unused fp 3036*0Sstevel@tonic-gate st %o2, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] ! save orig %fprs 3037*0Sstevel@tonic-gate btst FPRS_FEF, %o2 3038*0Sstevel@tonic-gate bz,a,pt %icc, .do_blockcopyin 3039*0Sstevel@tonic-gate wr %g0, FPRS_FEF, %fprs 3040*0Sstevel@tonic-gate 3041*0Sstevel@tonic-gate BST_FPQ2Q4_TOSTACK(%o2) 3042*0Sstevel@tonic-gate 3043*0Sstevel@tonic-gate.do_blockcopyin: 3044*0Sstevel@tonic-gate rd %gsr, %o2 3045*0Sstevel@tonic-gate stx %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr 3046*0Sstevel@tonic-gate or %l6, FPUSED_FLAG, %l6 3047*0Sstevel@tonic-gate 3048*0Sstevel@tonic-gate andcc DST, VIS_BLOCKSIZE - 1, TMP 3049*0Sstevel@tonic-gate mov ASI_USER, %asi 3050*0Sstevel@tonic-gate bz,pt %ncc, 2f 3051*0Sstevel@tonic-gate neg TMP 3052*0Sstevel@tonic-gate add TMP, VIS_BLOCKSIZE, TMP 3053*0Sstevel@tonic-gate 3054*0Sstevel@tonic-gate ! TMP = bytes required to align DST on FP_BLOCK boundary 3055*0Sstevel@tonic-gate ! Using SRC as a tmp here 3056*0Sstevel@tonic-gate cmp TMP, 3 3057*0Sstevel@tonic-gate bleu,pt %ncc, 1f 3058*0Sstevel@tonic-gate sub CNT,TMP,CNT ! adjust main count 3059*0Sstevel@tonic-gate sub TMP, 3, TMP ! adjust for end of loop test 3060*0Sstevel@tonic-gate.ci_blkalign: 3061*0Sstevel@tonic-gate lduba [REALSRC]%asi, SRC ! move 4 bytes per loop iteration 3062*0Sstevel@tonic-gate stb SRC, [DST] 3063*0Sstevel@tonic-gate subcc TMP, 4, TMP 3064*0Sstevel@tonic-gate lduba [REALSRC + 1]%asi, SRC 3065*0Sstevel@tonic-gate add REALSRC, 4, REALSRC 3066*0Sstevel@tonic-gate stb SRC, [DST + 1] 3067*0Sstevel@tonic-gate lduba [REALSRC - 2]%asi, SRC 3068*0Sstevel@tonic-gate add DST, 4, DST 3069*0Sstevel@tonic-gate stb SRC, [DST - 2] 3070*0Sstevel@tonic-gate lduba [REALSRC - 1]%asi, SRC 3071*0Sstevel@tonic-gate bgu,pt %ncc, .ci_blkalign 3072*0Sstevel@tonic-gate stb SRC, [DST - 1] 3073*0Sstevel@tonic-gate 3074*0Sstevel@tonic-gate addcc TMP, 3, TMP ! restore count adjustment 3075*0Sstevel@tonic-gate bz,pt %ncc, 2f ! no bytes left? 3076*0Sstevel@tonic-gate nop 3077*0Sstevel@tonic-gate1: lduba [REALSRC]%asi, SRC 3078*0Sstevel@tonic-gate inc REALSRC 3079*0Sstevel@tonic-gate inc DST 3080*0Sstevel@tonic-gate deccc TMP 3081*0Sstevel@tonic-gate bgu %ncc, 1b 3082*0Sstevel@tonic-gate stb SRC, [DST - 1] 3083*0Sstevel@tonic-gate 3084*0Sstevel@tonic-gate2: 3085*0Sstevel@tonic-gate andn REALSRC, 0x7, SRC 3086*0Sstevel@tonic-gate alignaddr REALSRC, %g0, %g0 3087*0Sstevel@tonic-gate 3088*0Sstevel@tonic-gate ! SRC - 8-byte aligned 3089*0Sstevel@tonic-gate ! DST - 64-byte aligned 3090*0Sstevel@tonic-gate prefetcha [SRC]%asi, #one_read 3091*0Sstevel@tonic-gate prefetcha [SRC + (1 * VIS_BLOCKSIZE)]%asi, #one_read 3092*0Sstevel@tonic-gate prefetcha [SRC + (2 * VIS_BLOCKSIZE)]%asi, #one_read 3093*0Sstevel@tonic-gate prefetcha [SRC + (3 * VIS_BLOCKSIZE)]%asi, #one_read 3094*0Sstevel@tonic-gate ldda [SRC]%asi, %f16 3095*0Sstevel@tonic-gate#if CHEETAH_PREFETCH > 4 3096*0Sstevel@tonic-gate prefetcha [SRC + (4 * VIS_BLOCKSIZE)]%asi, #one_read 3097*0Sstevel@tonic-gate#endif 3098*0Sstevel@tonic-gate ldda [SRC + 0x08]%asi, %f18 3099*0Sstevel@tonic-gate#if CHEETAH_PREFETCH > 5 3100*0Sstevel@tonic-gate prefetcha [SRC + (5 * VIS_BLOCKSIZE)]%asi, #one_read 3101*0Sstevel@tonic-gate#endif 3102*0Sstevel@tonic-gate ldda [SRC + 0x10]%asi, %f20 3103*0Sstevel@tonic-gate#if CHEETAH_PREFETCH > 6 3104*0Sstevel@tonic-gate prefetcha [SRC + (6 * VIS_BLOCKSIZE)]%asi, #one_read 3105*0Sstevel@tonic-gate#endif 3106*0Sstevel@tonic-gate faligndata %f16, %f18, %f48 3107*0Sstevel@tonic-gate ldda [SRC + 0x18]%asi, %f22 3108*0Sstevel@tonic-gate#if CHEETAH_PREFETCH > 7 3109*0Sstevel@tonic-gate prefetcha [SRC + (7 * VIS_BLOCKSIZE)]%asi, #one_read 3110*0Sstevel@tonic-gate#endif 3111*0Sstevel@tonic-gate faligndata %f18, %f20, %f50 3112*0Sstevel@tonic-gate ldda [SRC + 0x20]%asi, %f24 3113*0Sstevel@tonic-gate faligndata %f20, %f22, %f52 3114*0Sstevel@tonic-gate ldda [SRC + 0x28]%asi, %f26 3115*0Sstevel@tonic-gate faligndata %f22, %f24, %f54 3116*0Sstevel@tonic-gate ldda [SRC + 0x30]%asi, %f28 3117*0Sstevel@tonic-gate faligndata %f24, %f26, %f56 3118*0Sstevel@tonic-gate ldda [SRC + 0x38]%asi, %f30 3119*0Sstevel@tonic-gate faligndata %f26, %f28, %f58 3120*0Sstevel@tonic-gate ldda [SRC + VIS_BLOCKSIZE]%asi, %f16 3121*0Sstevel@tonic-gate sub CNT, VIS_BLOCKSIZE, CNT 3122*0Sstevel@tonic-gate add SRC, VIS_BLOCKSIZE, SRC 3123*0Sstevel@tonic-gate add REALSRC, VIS_BLOCKSIZE, REALSRC 3124*0Sstevel@tonic-gate ba,a,pt %ncc, 1f 3125*0Sstevel@tonic-gate nop 3126*0Sstevel@tonic-gate .align 16 3127*0Sstevel@tonic-gate1: 3128*0Sstevel@tonic-gate ldda [SRC + 0x08]%asi, %f18 3129*0Sstevel@tonic-gate faligndata %f28, %f30, %f60 3130*0Sstevel@tonic-gate ldda [SRC + 0x10]%asi, %f20 3131*0Sstevel@tonic-gate faligndata %f30, %f16, %f62 3132*0Sstevel@tonic-gate stda %f48, [DST]ASI_BLK_P 3133*0Sstevel@tonic-gate ldda [SRC + 0x18]%asi, %f22 3134*0Sstevel@tonic-gate faligndata %f16, %f18, %f48 3135*0Sstevel@tonic-gate ldda [SRC + 0x20]%asi, %f24 3136*0Sstevel@tonic-gate faligndata %f18, %f20, %f50 3137*0Sstevel@tonic-gate ldda [SRC + 0x28]%asi, %f26 3138*0Sstevel@tonic-gate faligndata %f20, %f22, %f52 3139*0Sstevel@tonic-gate ldda [SRC + 0x30]%asi, %f28 3140*0Sstevel@tonic-gate faligndata %f22, %f24, %f54 3141*0Sstevel@tonic-gate ldda [SRC + 0x38]%asi, %f30 3142*0Sstevel@tonic-gate faligndata %f24, %f26, %f56 3143*0Sstevel@tonic-gate sub CNT, VIS_BLOCKSIZE, CNT 3144*0Sstevel@tonic-gate ldda [SRC + VIS_BLOCKSIZE]%asi, %f16 3145*0Sstevel@tonic-gate faligndata %f26, %f28, %f58 3146*0Sstevel@tonic-gate prefetcha [SRC + ((CHEETAH_PREFETCH) * VIS_BLOCKSIZE) + 8]%asi, #one_read 3147*0Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 3148*0Sstevel@tonic-gate prefetcha [SRC + ((CHEETAH_2ND_PREFETCH) * VIS_BLOCKSIZE)]%asi, #one_read 3149*0Sstevel@tonic-gate add REALSRC, VIS_BLOCKSIZE, REALSRC 3150*0Sstevel@tonic-gate cmp CNT, VIS_BLOCKSIZE + 8 3151*0Sstevel@tonic-gate bgu,pt %ncc, 1b 3152*0Sstevel@tonic-gate add SRC, VIS_BLOCKSIZE, SRC 3153*0Sstevel@tonic-gate 3154*0Sstevel@tonic-gate ! only if REALSRC & 0x7 is 0 3155*0Sstevel@tonic-gate cmp CNT, VIS_BLOCKSIZE 3156*0Sstevel@tonic-gate bne %ncc, 3f 3157*0Sstevel@tonic-gate andcc REALSRC, 0x7, %g0 3158*0Sstevel@tonic-gate bz,pt %ncc, 2f 3159*0Sstevel@tonic-gate nop 3160*0Sstevel@tonic-gate3: 3161*0Sstevel@tonic-gate faligndata %f28, %f30, %f60 3162*0Sstevel@tonic-gate faligndata %f30, %f16, %f62 3163*0Sstevel@tonic-gate stda %f48, [DST]ASI_BLK_P 3164*0Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 3165*0Sstevel@tonic-gate ba,pt %ncc, 3f 3166*0Sstevel@tonic-gate nop 3167*0Sstevel@tonic-gate2: 3168*0Sstevel@tonic-gate ldda [SRC + 0x08]%asi, %f18 3169*0Sstevel@tonic-gate fsrc1 %f28, %f60 3170*0Sstevel@tonic-gate ldda [SRC + 0x10]%asi, %f20 3171*0Sstevel@tonic-gate fsrc1 %f30, %f62 3172*0Sstevel@tonic-gate stda %f48, [DST]ASI_BLK_P 3173*0Sstevel@tonic-gate ldda [SRC + 0x18]%asi, %f22 3174*0Sstevel@tonic-gate fsrc1 %f16, %f48 3175*0Sstevel@tonic-gate ldda [SRC + 0x20]%asi, %f24 3176*0Sstevel@tonic-gate fsrc1 %f18, %f50 3177*0Sstevel@tonic-gate ldda [SRC + 0x28]%asi, %f26 3178*0Sstevel@tonic-gate fsrc1 %f20, %f52 3179*0Sstevel@tonic-gate ldda [SRC + 0x30]%asi, %f28 3180*0Sstevel@tonic-gate fsrc1 %f22, %f54 3181*0Sstevel@tonic-gate ldda [SRC + 0x38]%asi, %f30 3182*0Sstevel@tonic-gate fsrc1 %f24, %f56 3183*0Sstevel@tonic-gate sub CNT, VIS_BLOCKSIZE, CNT 3184*0Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 3185*0Sstevel@tonic-gate add SRC, VIS_BLOCKSIZE, SRC 3186*0Sstevel@tonic-gate add REALSRC, VIS_BLOCKSIZE, REALSRC 3187*0Sstevel@tonic-gate fsrc1 %f26, %f58 3188*0Sstevel@tonic-gate fsrc1 %f28, %f60 3189*0Sstevel@tonic-gate fsrc1 %f30, %f62 3190*0Sstevel@tonic-gate stda %f48, [DST]ASI_BLK_P 3191*0Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 3192*0Sstevel@tonic-gate ba,a,pt %ncc, 4f 3193*0Sstevel@tonic-gate nop 3194*0Sstevel@tonic-gate 3195*0Sstevel@tonic-gate3: tst CNT 3196*0Sstevel@tonic-gate bz,a %ncc, 4f 3197*0Sstevel@tonic-gate nop 3198*0Sstevel@tonic-gate 3199*0Sstevel@tonic-gate5: lduba [REALSRC]ASI_USER, TMP 3200*0Sstevel@tonic-gate inc REALSRC 3201*0Sstevel@tonic-gate inc DST 3202*0Sstevel@tonic-gate deccc CNT 3203*0Sstevel@tonic-gate bgu %ncc, 5b 3204*0Sstevel@tonic-gate stb TMP, [DST - 1] 3205*0Sstevel@tonic-gate4: 3206*0Sstevel@tonic-gate 3207*0Sstevel@tonic-gate.copyin_exit: 3208*0Sstevel@tonic-gate membar #Sync 3209*0Sstevel@tonic-gate 3210*0Sstevel@tonic-gate FPRAS_INTERVAL(FPRAS_COPYIN, 1, %l5, %o2, %o3, %o4, %o5, 8) 3211*0Sstevel@tonic-gate FPRAS_REWRITE_TYPE1(1, %l5, %f48, %o2, 9) 3212*0Sstevel@tonic-gate FPRAS_CHECK(FPRAS_COPYIN, %l5, 9) ! lose outputs 3213*0Sstevel@tonic-gate 3214*0Sstevel@tonic-gate ldx [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr 3215*0Sstevel@tonic-gate wr %o2, 0, %gsr 3216*0Sstevel@tonic-gate 3217*0Sstevel@tonic-gate ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3 3218*0Sstevel@tonic-gate btst FPRS_FEF, %o3 3219*0Sstevel@tonic-gate bz,pt %icc, 4f 3220*0Sstevel@tonic-gate nop 3221*0Sstevel@tonic-gate 3222*0Sstevel@tonic-gate BLD_FPQ2Q4_FROMSTACK(%o2) 3223*0Sstevel@tonic-gate 3224*0Sstevel@tonic-gate ba,pt %ncc, 1f 3225*0Sstevel@tonic-gate wr %o3, 0, %fprs ! restore fprs 3226*0Sstevel@tonic-gate 3227*0Sstevel@tonic-gate4: 3228*0Sstevel@tonic-gate FZEROQ2Q4 3229*0Sstevel@tonic-gate wr %o3, 0, %fprs ! restore fprs 3230*0Sstevel@tonic-gate 3231*0Sstevel@tonic-gate1: 3232*0Sstevel@tonic-gate membar #Sync ! sync error barrier 3233*0Sstevel@tonic-gate andn %l6, FPUSED_FLAG, %l6 3234*0Sstevel@tonic-gate stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 3235*0Sstevel@tonic-gate FP_ALLOWMIGRATE(5, 6) 3236*0Sstevel@tonic-gate ret 3237*0Sstevel@tonic-gate restore %g0, 0, %o0 3238*0Sstevel@tonic-gate/* 3239*0Sstevel@tonic-gate * We got here because of a fault during copyin 3240*0Sstevel@tonic-gate * Errno value is in ERRNO, but DDI/DKI says return -1 (sigh). 3241*0Sstevel@tonic-gate */ 3242*0Sstevel@tonic-gate.copyin_err: 3243*0Sstevel@tonic-gate ldn [THREAD_REG + T_COPYOPS], %o4 ! check for copyop handler 3244*0Sstevel@tonic-gate tst %o4 3245*0Sstevel@tonic-gate bz,pt %ncc, 2f ! if not, return error 3246*0Sstevel@tonic-gate nop 3247*0Sstevel@tonic-gate ldn [%o4 + CP_COPYIN], %g2 ! if handler, invoke it with 3248*0Sstevel@tonic-gate jmp %g2 ! original arguments 3249*0Sstevel@tonic-gate restore %g0, 0, %g0 ! dispose of copy window 3250*0Sstevel@tonic-gate2: 3251*0Sstevel@tonic-gate ret 3252*0Sstevel@tonic-gate restore %g0, -1, %o0 ! return error value 3253*0Sstevel@tonic-gate 3254*0Sstevel@tonic-gate 3255*0Sstevel@tonic-gate SET_SIZE(copyin_more) 3256*0Sstevel@tonic-gate 3257*0Sstevel@tonic-gate#endif /* lint */ 3258*0Sstevel@tonic-gate 3259*0Sstevel@tonic-gate#ifdef lint 3260*0Sstevel@tonic-gate 3261*0Sstevel@tonic-gate/*ARGSUSED*/ 3262*0Sstevel@tonic-gateint 3263*0Sstevel@tonic-gatexcopyin(const void *uaddr, void *kaddr, size_t count) 3264*0Sstevel@tonic-gate{ return (0); } 3265*0Sstevel@tonic-gate 3266*0Sstevel@tonic-gate#else /* lint */ 3267*0Sstevel@tonic-gate 3268*0Sstevel@tonic-gate ENTRY(xcopyin) 3269*0Sstevel@tonic-gate 3270*0Sstevel@tonic-gate cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case 3271*0Sstevel@tonic-gate bleu,pt %ncc, .xcopyin_small ! go to larger cases 3272*0Sstevel@tonic-gate xor %o0, %o1, %o3 ! are src, dst alignable? 3273*0Sstevel@tonic-gate btst 7, %o3 ! 3274*0Sstevel@tonic-gate bz,pt %ncc, .xcopyin_8 ! check for longword alignment 3275*0Sstevel@tonic-gate nop 3276*0Sstevel@tonic-gate btst 1, %o3 ! 3277*0Sstevel@tonic-gate bz,pt %ncc, .xcopyin_2 ! check for half-word 3278*0Sstevel@tonic-gate nop 3279*0Sstevel@tonic-gate sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit 3280*0Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_1)], %o3 3281*0Sstevel@tonic-gate tst %o3 3282*0Sstevel@tonic-gate bz,pn %icc, .xcopyin_small ! if zero, disable HW copy 3283*0Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 3284*0Sstevel@tonic-gate bleu,pt %ncc, .xcopyin_small ! go to small copy 3285*0Sstevel@tonic-gate nop 3286*0Sstevel@tonic-gate ba,pt %ncc, .xcopyin_more ! otherwise go to large copy 3287*0Sstevel@tonic-gate nop 3288*0Sstevel@tonic-gate.xcopyin_2: 3289*0Sstevel@tonic-gate btst 3, %o3 ! 3290*0Sstevel@tonic-gate bz,pt %ncc, .xcopyin_4 ! check for word alignment 3291*0Sstevel@tonic-gate nop 3292*0Sstevel@tonic-gate sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit 3293*0Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_2)], %o3 3294*0Sstevel@tonic-gate tst %o3 3295*0Sstevel@tonic-gate bz,pn %icc, .xcopyin_small ! if zero, disable HW copy 3296*0Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 3297*0Sstevel@tonic-gate bleu,pt %ncc, .xcopyin_small ! go to small copy 3298*0Sstevel@tonic-gate nop 3299*0Sstevel@tonic-gate ba,pt %ncc, .xcopyin_more ! otherwise go to large copy 3300*0Sstevel@tonic-gate nop 3301*0Sstevel@tonic-gate.xcopyin_4: 3302*0Sstevel@tonic-gate ! already checked longword, must be word aligned 3303*0Sstevel@tonic-gate sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit 3304*0Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_4)], %o3 3305*0Sstevel@tonic-gate tst %o3 3306*0Sstevel@tonic-gate bz,pn %icc, .xcopyin_small ! if zero, disable HW copy 3307*0Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 3308*0Sstevel@tonic-gate bleu,pt %ncc, .xcopyin_small ! go to small copy 3309*0Sstevel@tonic-gate nop 3310*0Sstevel@tonic-gate ba,pt %ncc, .xcopyin_more ! otherwise go to large copy 3311*0Sstevel@tonic-gate nop 3312*0Sstevel@tonic-gate.xcopyin_8: 3313*0Sstevel@tonic-gate sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit 3314*0Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_8)], %o3 3315*0Sstevel@tonic-gate tst %o3 3316*0Sstevel@tonic-gate bz,pn %icc, .xcopyin_small ! if zero, disable HW copy 3317*0Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 3318*0Sstevel@tonic-gate bleu,pt %ncc, .xcopyin_small ! go to small copy 3319*0Sstevel@tonic-gate nop 3320*0Sstevel@tonic-gate ba,pt %ncc, .xcopyin_more ! otherwise go to large copy 3321*0Sstevel@tonic-gate nop 3322*0Sstevel@tonic-gate 3323*0Sstevel@tonic-gate.xcopyin_small: 3324*0Sstevel@tonic-gate sethi %hi(.sm_xcopyin_err), %o5 ! .sm_xcopyin_err is lofault value 3325*0Sstevel@tonic-gate or %o5, %lo(.sm_xcopyin_err), %o5 3326*0Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %o4 ! set/save t_lofaul 3327*0Sstevel@tonic-gate membar #Sync ! sync error barrier 3328*0Sstevel@tonic-gate ba,pt %ncc, .sm_do_copyin ! common code 3329*0Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] 3330*0Sstevel@tonic-gate 3331*0Sstevel@tonic-gate.xcopyin_more: 3332*0Sstevel@tonic-gate save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 3333*0Sstevel@tonic-gate sethi %hi(.xcopyin_err), REAL_LOFAULT ! .xcopyin_err is lofault value 3334*0Sstevel@tonic-gate ba,pt %ncc, .do_copyin 3335*0Sstevel@tonic-gate or REAL_LOFAULT, %lo(.xcopyin_err), REAL_LOFAULT 3336*0Sstevel@tonic-gate 3337*0Sstevel@tonic-gate/* 3338*0Sstevel@tonic-gate * We got here because of fault during xcopyin 3339*0Sstevel@tonic-gate * Errno value is in ERRNO 3340*0Sstevel@tonic-gate */ 3341*0Sstevel@tonic-gate.xcopyin_err: 3342*0Sstevel@tonic-gate ldn [THREAD_REG + T_COPYOPS], %o4 ! check for copyop handler 3343*0Sstevel@tonic-gate tst %o4 3344*0Sstevel@tonic-gate bz,pt %ncc, 2f ! if not, return error 3345*0Sstevel@tonic-gate nop 3346*0Sstevel@tonic-gate ldn [%o4 + CP_XCOPYIN], %g2 ! if handler, invoke it with 3347*0Sstevel@tonic-gate jmp %g2 ! original arguments 3348*0Sstevel@tonic-gate restore %g0, 0, %g0 ! dispose of copy window 3349*0Sstevel@tonic-gate2: 3350*0Sstevel@tonic-gate ret 3351*0Sstevel@tonic-gate restore ERRNO, 0, %o0 ! return errno value 3352*0Sstevel@tonic-gate 3353*0Sstevel@tonic-gate.sm_xcopyin_err: 3354*0Sstevel@tonic-gate 3355*0Sstevel@tonic-gate membar #Sync 3356*0Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 3357*0Sstevel@tonic-gate mov SM_SAVE_SRC, %o0 3358*0Sstevel@tonic-gate mov SM_SAVE_DST, %o1 3359*0Sstevel@tonic-gate mov SM_SAVE_COUNT, %o2 3360*0Sstevel@tonic-gate ldn [THREAD_REG + T_COPYOPS], %o3 ! check for copyop handler 3361*0Sstevel@tonic-gate tst %o3 3362*0Sstevel@tonic-gate bz,pt %ncc, 3f ! if not, return error 3363*0Sstevel@tonic-gate nop 3364*0Sstevel@tonic-gate ldn [%o3 + CP_XCOPYIN], %o5 ! if handler, invoke it with 3365*0Sstevel@tonic-gate jmp %o5 ! original arguments 3366*0Sstevel@tonic-gate nop 3367*0Sstevel@tonic-gate3: 3368*0Sstevel@tonic-gate retl 3369*0Sstevel@tonic-gate or %g1, 0, %o0 ! return errno value 3370*0Sstevel@tonic-gate 3371*0Sstevel@tonic-gate SET_SIZE(xcopyin) 3372*0Sstevel@tonic-gate 3373*0Sstevel@tonic-gate#endif /* lint */ 3374*0Sstevel@tonic-gate 3375*0Sstevel@tonic-gate#ifdef lint 3376*0Sstevel@tonic-gate 3377*0Sstevel@tonic-gate/*ARGSUSED*/ 3378*0Sstevel@tonic-gateint 3379*0Sstevel@tonic-gatexcopyin_little(const void *uaddr, void *kaddr, size_t count) 3380*0Sstevel@tonic-gate{ return (0); } 3381*0Sstevel@tonic-gate 3382*0Sstevel@tonic-gate#else /* lint */ 3383*0Sstevel@tonic-gate 3384*0Sstevel@tonic-gate ENTRY(xcopyin_little) 3385*0Sstevel@tonic-gate sethi %hi(.xcopyio_err), %o5 3386*0Sstevel@tonic-gate or %o5, %lo(.xcopyio_err), %o5 3387*0Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %o4 3388*0Sstevel@tonic-gate membar #Sync ! sync error barrier 3389*0Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] 3390*0Sstevel@tonic-gate mov %o4, %o5 3391*0Sstevel@tonic-gate 3392*0Sstevel@tonic-gate subcc %g0, %o2, %o3 3393*0Sstevel@tonic-gate add %o0, %o2, %o0 3394*0Sstevel@tonic-gate bz,pn %ncc, 2f ! check for zero bytes 3395*0Sstevel@tonic-gate sub %o2, 1, %o4 3396*0Sstevel@tonic-gate add %o0, %o4, %o0 ! start w/last byte 3397*0Sstevel@tonic-gate add %o1, %o2, %o1 3398*0Sstevel@tonic-gate lduba [%o0 + %o3]ASI_AIUSL, %o4 3399*0Sstevel@tonic-gate 3400*0Sstevel@tonic-gate1: stb %o4, [%o1 + %o3] 3401*0Sstevel@tonic-gate inccc %o3 3402*0Sstevel@tonic-gate sub %o0, 2, %o0 ! get next byte 3403*0Sstevel@tonic-gate bcc,a,pt %ncc, 1b 3404*0Sstevel@tonic-gate lduba [%o0 + %o3]ASI_AIUSL, %o4 3405*0Sstevel@tonic-gate 3406*0Sstevel@tonic-gate2: 3407*0Sstevel@tonic-gate membar #Sync ! sync error barrier 3408*0Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 3409*0Sstevel@tonic-gate retl 3410*0Sstevel@tonic-gate mov %g0, %o0 ! return (0) 3411*0Sstevel@tonic-gate 3412*0Sstevel@tonic-gate.xcopyio_err: 3413*0Sstevel@tonic-gate membar #Sync ! sync error barrier 3414*0Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 3415*0Sstevel@tonic-gate retl 3416*0Sstevel@tonic-gate mov %g1, %o0 3417*0Sstevel@tonic-gate 3418*0Sstevel@tonic-gate SET_SIZE(xcopyin_little) 3419*0Sstevel@tonic-gate 3420*0Sstevel@tonic-gate#endif /* lint */ 3421*0Sstevel@tonic-gate 3422*0Sstevel@tonic-gate 3423*0Sstevel@tonic-gate/* 3424*0Sstevel@tonic-gate * Copy a block of storage - must not overlap (from + len <= to). 3425*0Sstevel@tonic-gate * No fault handler installed (to be called under on_fault()) 3426*0Sstevel@tonic-gate */ 3427*0Sstevel@tonic-gate#if defined(lint) 3428*0Sstevel@tonic-gate 3429*0Sstevel@tonic-gate/* ARGSUSED */ 3430*0Sstevel@tonic-gatevoid 3431*0Sstevel@tonic-gatecopyin_noerr(const void *ufrom, void *kto, size_t count) 3432*0Sstevel@tonic-gate{} 3433*0Sstevel@tonic-gate 3434*0Sstevel@tonic-gate#else /* lint */ 3435*0Sstevel@tonic-gate ENTRY(copyin_noerr) 3436*0Sstevel@tonic-gate 3437*0Sstevel@tonic-gate cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case 3438*0Sstevel@tonic-gate bleu,pt %ncc, .copyin_ne_small ! go to larger cases 3439*0Sstevel@tonic-gate xor %o0, %o1, %o3 ! are src, dst alignable? 3440*0Sstevel@tonic-gate btst 7, %o3 ! 3441*0Sstevel@tonic-gate bz,pt %ncc, .copyin_ne_8 ! check for longword alignment 3442*0Sstevel@tonic-gate nop 3443*0Sstevel@tonic-gate btst 1, %o3 ! 3444*0Sstevel@tonic-gate bz,pt %ncc, .copyin_ne_2 ! check for half-word 3445*0Sstevel@tonic-gate nop 3446*0Sstevel@tonic-gate sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit 3447*0Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_1)], %o3 3448*0Sstevel@tonic-gate tst %o3 3449*0Sstevel@tonic-gate bz,pn %icc, .copyin_ne_small ! if zero, disable HW copy 3450*0Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 3451*0Sstevel@tonic-gate bleu,pt %ncc, .copyin_ne_small ! go to small copy 3452*0Sstevel@tonic-gate nop 3453*0Sstevel@tonic-gate ba,pt %ncc, .copyin_noerr_more ! otherwise go to large copy 3454*0Sstevel@tonic-gate nop 3455*0Sstevel@tonic-gate.copyin_ne_2: 3456*0Sstevel@tonic-gate btst 3, %o3 ! 3457*0Sstevel@tonic-gate bz,pt %ncc, .copyin_ne_4 ! check for word alignment 3458*0Sstevel@tonic-gate nop 3459*0Sstevel@tonic-gate sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit 3460*0Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_2)], %o3 3461*0Sstevel@tonic-gate tst %o3 3462*0Sstevel@tonic-gate bz,pn %icc, .copyin_ne_small ! if zero, disable HW copy 3463*0Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 3464*0Sstevel@tonic-gate bleu,pt %ncc, .copyin_ne_small ! go to small copy 3465*0Sstevel@tonic-gate nop 3466*0Sstevel@tonic-gate ba,pt %ncc, .copyin_noerr_more ! otherwise go to large copy 3467*0Sstevel@tonic-gate nop 3468*0Sstevel@tonic-gate.copyin_ne_4: 3469*0Sstevel@tonic-gate ! already checked longword, must be word aligned 3470*0Sstevel@tonic-gate sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit 3471*0Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_4)], %o3 3472*0Sstevel@tonic-gate tst %o3 3473*0Sstevel@tonic-gate bz,pn %icc, .copyin_ne_small ! if zero, disable HW copy 3474*0Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 3475*0Sstevel@tonic-gate bleu,pt %ncc, .copyin_ne_small ! go to small copy 3476*0Sstevel@tonic-gate nop 3477*0Sstevel@tonic-gate ba,pt %ncc, .copyin_noerr_more ! otherwise go to large copy 3478*0Sstevel@tonic-gate nop 3479*0Sstevel@tonic-gate.copyin_ne_8: 3480*0Sstevel@tonic-gate sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit 3481*0Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_8)], %o3 3482*0Sstevel@tonic-gate tst %o3 3483*0Sstevel@tonic-gate bz,pn %icc, .copyin_ne_small ! if zero, disable HW copy 3484*0Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 3485*0Sstevel@tonic-gate bleu,pt %ncc, .copyin_ne_small ! go to small copy 3486*0Sstevel@tonic-gate nop 3487*0Sstevel@tonic-gate ba,pt %ncc, .copyin_noerr_more ! otherwise go to large copy 3488*0Sstevel@tonic-gate nop 3489*0Sstevel@tonic-gate 3490*0Sstevel@tonic-gate.copyin_ne_small: 3491*0Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %o4 3492*0Sstevel@tonic-gate tst %o4 3493*0Sstevel@tonic-gate bz,pn %ncc, .sm_do_copyin 3494*0Sstevel@tonic-gate nop 3495*0Sstevel@tonic-gate sethi %hi(.sm_copyio_noerr), %o5 3496*0Sstevel@tonic-gate or %o5, %lo(.sm_copyio_noerr), %o5 3497*0Sstevel@tonic-gate membar #Sync ! sync error barrier 3498*0Sstevel@tonic-gate ba,pt %ncc, .sm_do_copyin 3499*0Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] ! set/save t_lofault 3500*0Sstevel@tonic-gate 3501*0Sstevel@tonic-gate.copyin_noerr_more: 3502*0Sstevel@tonic-gate save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 3503*0Sstevel@tonic-gate sethi %hi(.copyio_noerr), REAL_LOFAULT 3504*0Sstevel@tonic-gate ba,pt %ncc, .do_copyin 3505*0Sstevel@tonic-gate or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT 3506*0Sstevel@tonic-gate 3507*0Sstevel@tonic-gate.copyio_noerr: 3508*0Sstevel@tonic-gate jmp %l6 3509*0Sstevel@tonic-gate restore %g0,0,%g0 3510*0Sstevel@tonic-gate 3511*0Sstevel@tonic-gate.sm_copyio_noerr: 3512*0Sstevel@tonic-gate membar #Sync 3513*0Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore t_lofault 3514*0Sstevel@tonic-gate jmp %o4 3515*0Sstevel@tonic-gate nop 3516*0Sstevel@tonic-gate 3517*0Sstevel@tonic-gate SET_SIZE(copyin_noerr) 3518*0Sstevel@tonic-gate#endif /* lint */ 3519*0Sstevel@tonic-gate 3520*0Sstevel@tonic-gate/* 3521*0Sstevel@tonic-gate * Copy a block of storage - must not overlap (from + len <= to). 3522*0Sstevel@tonic-gate * No fault handler installed (to be called under on_fault()) 3523*0Sstevel@tonic-gate */ 3524*0Sstevel@tonic-gate 3525*0Sstevel@tonic-gate#if defined(lint) 3526*0Sstevel@tonic-gate 3527*0Sstevel@tonic-gate/* ARGSUSED */ 3528*0Sstevel@tonic-gatevoid 3529*0Sstevel@tonic-gatecopyout_noerr(const void *kfrom, void *uto, size_t count) 3530*0Sstevel@tonic-gate{} 3531*0Sstevel@tonic-gate 3532*0Sstevel@tonic-gate#else /* lint */ 3533*0Sstevel@tonic-gate ENTRY(copyout_noerr) 3534*0Sstevel@tonic-gate 3535*0Sstevel@tonic-gate cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case 3536*0Sstevel@tonic-gate bleu,pt %ncc, .copyout_ne_small ! go to larger cases 3537*0Sstevel@tonic-gate xor %o0, %o1, %o3 ! are src, dst alignable? 3538*0Sstevel@tonic-gate btst 7, %o3 ! 3539*0Sstevel@tonic-gate bz,pt %ncc, .copyout_ne_8 ! check for longword alignment 3540*0Sstevel@tonic-gate nop 3541*0Sstevel@tonic-gate btst 1, %o3 ! 3542*0Sstevel@tonic-gate bz,pt %ncc, .copyout_ne_2 ! check for half-word 3543*0Sstevel@tonic-gate nop 3544*0Sstevel@tonic-gate sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit 3545*0Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_1)], %o3 3546*0Sstevel@tonic-gate tst %o3 3547*0Sstevel@tonic-gate bz,pn %icc, .copyout_ne_small ! if zero, disable HW copy 3548*0Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 3549*0Sstevel@tonic-gate bleu,pt %ncc, .copyout_ne_small ! go to small copy 3550*0Sstevel@tonic-gate nop 3551*0Sstevel@tonic-gate ba,pt %ncc, .copyout_noerr_more ! otherwise go to large copy 3552*0Sstevel@tonic-gate nop 3553*0Sstevel@tonic-gate.copyout_ne_2: 3554*0Sstevel@tonic-gate btst 3, %o3 ! 3555*0Sstevel@tonic-gate bz,pt %ncc, .copyout_ne_4 ! check for word alignment 3556*0Sstevel@tonic-gate nop 3557*0Sstevel@tonic-gate sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit 3558*0Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_2)], %o3 3559*0Sstevel@tonic-gate tst %o3 3560*0Sstevel@tonic-gate bz,pn %icc, .copyout_ne_small ! if zero, disable HW copy 3561*0Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 3562*0Sstevel@tonic-gate bleu,pt %ncc, .copyout_ne_small ! go to small copy 3563*0Sstevel@tonic-gate nop 3564*0Sstevel@tonic-gate ba,pt %ncc, .copyout_noerr_more ! otherwise go to large copy 3565*0Sstevel@tonic-gate nop 3566*0Sstevel@tonic-gate.copyout_ne_4: 3567*0Sstevel@tonic-gate ! already checked longword, must be word aligned 3568*0Sstevel@tonic-gate sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit 3569*0Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_4)], %o3 3570*0Sstevel@tonic-gate tst %o3 3571*0Sstevel@tonic-gate bz,pn %icc, .copyout_ne_small ! if zero, disable HW copy 3572*0Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 3573*0Sstevel@tonic-gate bleu,pt %ncc, .copyout_ne_small ! go to small copy 3574*0Sstevel@tonic-gate nop 3575*0Sstevel@tonic-gate ba,pt %ncc, .copyout_noerr_more ! otherwise go to large copy 3576*0Sstevel@tonic-gate nop 3577*0Sstevel@tonic-gate.copyout_ne_8: 3578*0Sstevel@tonic-gate sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit 3579*0Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_8)], %o3 3580*0Sstevel@tonic-gate tst %o3 3581*0Sstevel@tonic-gate bz,pn %icc, .copyout_ne_small ! if zero, disable HW copy 3582*0Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 3583*0Sstevel@tonic-gate bleu,pt %ncc, .copyout_ne_small ! go to small copy 3584*0Sstevel@tonic-gate nop 3585*0Sstevel@tonic-gate ba,pt %ncc, .copyout_noerr_more ! otherwise go to large copy 3586*0Sstevel@tonic-gate nop 3587*0Sstevel@tonic-gate 3588*0Sstevel@tonic-gate.copyout_ne_small: 3589*0Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %o4 3590*0Sstevel@tonic-gate tst %o4 3591*0Sstevel@tonic-gate bz,pn %ncc, .sm_do_copyout 3592*0Sstevel@tonic-gate nop 3593*0Sstevel@tonic-gate sethi %hi(.sm_copyio_noerr), %o5 3594*0Sstevel@tonic-gate or %o5, %lo(.sm_copyio_noerr), %o5 3595*0Sstevel@tonic-gate membar #Sync ! sync error barrier 3596*0Sstevel@tonic-gate ba,pt %ncc, .sm_do_copyout 3597*0Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] ! set/save t_lofault 3598*0Sstevel@tonic-gate 3599*0Sstevel@tonic-gate.copyout_noerr_more: 3600*0Sstevel@tonic-gate save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 3601*0Sstevel@tonic-gate sethi %hi(.copyio_noerr), REAL_LOFAULT 3602*0Sstevel@tonic-gate ba,pt %ncc, .do_copyout 3603*0Sstevel@tonic-gate or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT 3604*0Sstevel@tonic-gate 3605*0Sstevel@tonic-gate SET_SIZE(copyout_noerr) 3606*0Sstevel@tonic-gate#endif /* lint */ 3607*0Sstevel@tonic-gate 3608*0Sstevel@tonic-gate 3609*0Sstevel@tonic-gate/* 3610*0Sstevel@tonic-gate * hwblkclr - clears block-aligned, block-multiple-sized regions that are 3611*0Sstevel@tonic-gate * longer than 256 bytes in length using spitfire's block stores. If 3612*0Sstevel@tonic-gate * the criteria for using this routine are not met then it calls bzero 3613*0Sstevel@tonic-gate * and returns 1. Otherwise 0 is returned indicating success. 3614*0Sstevel@tonic-gate * Caller is responsible for ensuring use_hw_bzero is true and that 3615*0Sstevel@tonic-gate * kpreempt_disable() has been called. 3616*0Sstevel@tonic-gate */ 3617*0Sstevel@tonic-gate#ifdef lint 3618*0Sstevel@tonic-gate/*ARGSUSED*/ 3619*0Sstevel@tonic-gateint 3620*0Sstevel@tonic-gatehwblkclr(void *addr, size_t len) 3621*0Sstevel@tonic-gate{ 3622*0Sstevel@tonic-gate return(0); 3623*0Sstevel@tonic-gate} 3624*0Sstevel@tonic-gate#else /* lint */ 3625*0Sstevel@tonic-gate ! %i0 - start address 3626*0Sstevel@tonic-gate ! %i1 - length of region (multiple of 64) 3627*0Sstevel@tonic-gate ! %l0 - saved fprs 3628*0Sstevel@tonic-gate ! %l1 - pointer to saved %d0 block 3629*0Sstevel@tonic-gate ! %l2 - saved curthread->t_lwp 3630*0Sstevel@tonic-gate 3631*0Sstevel@tonic-gate ENTRY(hwblkclr) 3632*0Sstevel@tonic-gate ! get another window w/space for one aligned block of saved fpregs 3633*0Sstevel@tonic-gate save %sp, -SA(MINFRAME + 2*VIS_BLOCKSIZE), %sp 3634*0Sstevel@tonic-gate 3635*0Sstevel@tonic-gate ! Must be block-aligned 3636*0Sstevel@tonic-gate andcc %i0, (VIS_BLOCKSIZE-1), %g0 3637*0Sstevel@tonic-gate bnz,pn %ncc, 1f 3638*0Sstevel@tonic-gate nop 3639*0Sstevel@tonic-gate 3640*0Sstevel@tonic-gate ! ... and must be 256 bytes or more 3641*0Sstevel@tonic-gate cmp %i1, 256 3642*0Sstevel@tonic-gate blu,pn %ncc, 1f 3643*0Sstevel@tonic-gate nop 3644*0Sstevel@tonic-gate 3645*0Sstevel@tonic-gate ! ... and length must be a multiple of VIS_BLOCKSIZE 3646*0Sstevel@tonic-gate andcc %i1, (VIS_BLOCKSIZE-1), %g0 3647*0Sstevel@tonic-gate bz,pn %ncc, 2f 3648*0Sstevel@tonic-gate nop 3649*0Sstevel@tonic-gate 3650*0Sstevel@tonic-gate1: ! punt, call bzero but notify the caller that bzero was used 3651*0Sstevel@tonic-gate mov %i0, %o0 3652*0Sstevel@tonic-gate call bzero 3653*0Sstevel@tonic-gate mov %i1, %o1 3654*0Sstevel@tonic-gate ret 3655*0Sstevel@tonic-gate restore %g0, 1, %o0 ! return (1) - did not use block operations 3656*0Sstevel@tonic-gate 3657*0Sstevel@tonic-gate2: rd %fprs, %l0 ! check for unused fp 3658*0Sstevel@tonic-gate btst FPRS_FEF, %l0 3659*0Sstevel@tonic-gate bz,pt %icc, 1f 3660*0Sstevel@tonic-gate nop 3661*0Sstevel@tonic-gate 3662*0Sstevel@tonic-gate ! save in-use fpregs on stack 3663*0Sstevel@tonic-gate membar #Sync 3664*0Sstevel@tonic-gate add %fp, STACK_BIAS - 65, %l1 3665*0Sstevel@tonic-gate and %l1, -VIS_BLOCKSIZE, %l1 3666*0Sstevel@tonic-gate stda %d0, [%l1]ASI_BLK_P 3667*0Sstevel@tonic-gate 3668*0Sstevel@tonic-gate1: membar #StoreStore|#StoreLoad|#LoadStore 3669*0Sstevel@tonic-gate wr %g0, FPRS_FEF, %fprs 3670*0Sstevel@tonic-gate wr %g0, ASI_BLK_P, %asi 3671*0Sstevel@tonic-gate 3672*0Sstevel@tonic-gate ! Clear block 3673*0Sstevel@tonic-gate fzero %d0 3674*0Sstevel@tonic-gate fzero %d2 3675*0Sstevel@tonic-gate fzero %d4 3676*0Sstevel@tonic-gate fzero %d6 3677*0Sstevel@tonic-gate fzero %d8 3678*0Sstevel@tonic-gate fzero %d10 3679*0Sstevel@tonic-gate fzero %d12 3680*0Sstevel@tonic-gate fzero %d14 3681*0Sstevel@tonic-gate 3682*0Sstevel@tonic-gate mov 256, %i3 3683*0Sstevel@tonic-gate ba,pt %ncc, .pz_doblock 3684*0Sstevel@tonic-gate nop 3685*0Sstevel@tonic-gate 3686*0Sstevel@tonic-gate.pz_blkstart: 3687*0Sstevel@tonic-gate ! stda %d0, [%i0 + 192]%asi ! in dly slot of branch that got us here 3688*0Sstevel@tonic-gate stda %d0, [%i0 + 128]%asi 3689*0Sstevel@tonic-gate stda %d0, [%i0 + 64]%asi 3690*0Sstevel@tonic-gate stda %d0, [%i0]%asi 3691*0Sstevel@tonic-gate.pz_zinst: 3692*0Sstevel@tonic-gate add %i0, %i3, %i0 3693*0Sstevel@tonic-gate sub %i1, %i3, %i1 3694*0Sstevel@tonic-gate.pz_doblock: 3695*0Sstevel@tonic-gate cmp %i1, 256 3696*0Sstevel@tonic-gate bgeu,a %ncc, .pz_blkstart 3697*0Sstevel@tonic-gate stda %d0, [%i0 + 192]%asi 3698*0Sstevel@tonic-gate 3699*0Sstevel@tonic-gate cmp %i1, 64 3700*0Sstevel@tonic-gate blu %ncc, .pz_finish 3701*0Sstevel@tonic-gate 3702*0Sstevel@tonic-gate andn %i1, (64-1), %i3 3703*0Sstevel@tonic-gate srl %i3, 4, %i2 ! using blocks, 1 instr / 16 words 3704*0Sstevel@tonic-gate set .pz_zinst, %i4 3705*0Sstevel@tonic-gate sub %i4, %i2, %i4 3706*0Sstevel@tonic-gate jmp %i4 3707*0Sstevel@tonic-gate nop 3708*0Sstevel@tonic-gate 3709*0Sstevel@tonic-gate.pz_finish: 3710*0Sstevel@tonic-gate membar #Sync 3711*0Sstevel@tonic-gate btst FPRS_FEF, %l0 3712*0Sstevel@tonic-gate bz,a .pz_finished 3713*0Sstevel@tonic-gate wr %l0, 0, %fprs ! restore fprs 3714*0Sstevel@tonic-gate 3715*0Sstevel@tonic-gate ! restore fpregs from stack 3716*0Sstevel@tonic-gate ldda [%l1]ASI_BLK_P, %d0 3717*0Sstevel@tonic-gate membar #Sync 3718*0Sstevel@tonic-gate wr %l0, 0, %fprs ! restore fprs 3719*0Sstevel@tonic-gate 3720*0Sstevel@tonic-gate.pz_finished: 3721*0Sstevel@tonic-gate ret 3722*0Sstevel@tonic-gate restore %g0, 0, %o0 ! return (bzero or not) 3723*0Sstevel@tonic-gate 3724*0Sstevel@tonic-gate SET_SIZE(hwblkclr) 3725*0Sstevel@tonic-gate#endif /* lint */ 3726*0Sstevel@tonic-gate 3727*0Sstevel@tonic-gate#ifdef lint 3728*0Sstevel@tonic-gate/*ARGSUSED*/ 3729*0Sstevel@tonic-gatevoid 3730*0Sstevel@tonic-gatehw_pa_bcopy32(uint64_t src, uint64_t dst) 3731*0Sstevel@tonic-gate{} 3732*0Sstevel@tonic-gate#else /*!lint */ 3733*0Sstevel@tonic-gate /* 3734*0Sstevel@tonic-gate * Copy 32 bytes of data from src (%o0) to dst (%o1) 3735*0Sstevel@tonic-gate * using physical addresses. 3736*0Sstevel@tonic-gate */ 3737*0Sstevel@tonic-gate ENTRY_NP(hw_pa_bcopy32) 3738*0Sstevel@tonic-gate rdpr %pstate, %g1 3739*0Sstevel@tonic-gate andn %g1, PSTATE_IE, %g2 3740*0Sstevel@tonic-gate wrpr %g0, %g2, %pstate 3741*0Sstevel@tonic-gate 3742*0Sstevel@tonic-gate rdpr %pstate, %g0 3743*0Sstevel@tonic-gate ldxa [%o0]ASI_MEM, %o2 3744*0Sstevel@tonic-gate add %o0, 8, %o0 3745*0Sstevel@tonic-gate ldxa [%o0]ASI_MEM, %o3 3746*0Sstevel@tonic-gate add %o0, 8, %o0 3747*0Sstevel@tonic-gate ldxa [%o0]ASI_MEM, %o4 3748*0Sstevel@tonic-gate add %o0, 8, %o0 3749*0Sstevel@tonic-gate ldxa [%o0]ASI_MEM, %o5 3750*0Sstevel@tonic-gate 3751*0Sstevel@tonic-gate stxa %g0, [%o1]ASI_DC_INVAL 3752*0Sstevel@tonic-gate membar #Sync 3753*0Sstevel@tonic-gate 3754*0Sstevel@tonic-gate stxa %o2, [%o1]ASI_MEM 3755*0Sstevel@tonic-gate add %o1, 8, %o1 3756*0Sstevel@tonic-gate stxa %o3, [%o1]ASI_MEM 3757*0Sstevel@tonic-gate add %o1, 8, %o1 3758*0Sstevel@tonic-gate stxa %o4, [%o1]ASI_MEM 3759*0Sstevel@tonic-gate add %o1, 8, %o1 3760*0Sstevel@tonic-gate stxa %o5, [%o1]ASI_MEM 3761*0Sstevel@tonic-gate 3762*0Sstevel@tonic-gate retl 3763*0Sstevel@tonic-gate wrpr %g0, %g1, %pstate 3764*0Sstevel@tonic-gate 3765*0Sstevel@tonic-gate SET_SIZE(hw_pa_bcopy32) 3766*0Sstevel@tonic-gate 3767*0Sstevel@tonic-gate#endif /* lint */ 3768*0Sstevel@tonic-gate 3769*0Sstevel@tonic-gate#if defined(lint) 3770*0Sstevel@tonic-gate 3771*0Sstevel@tonic-gateint use_hw_bcopy = 1; 3772*0Sstevel@tonic-gateint use_hw_bzero = 1; 3773*0Sstevel@tonic-gateuint_t hw_copy_limit_1 = 0; 3774*0Sstevel@tonic-gateuint_t hw_copy_limit_2 = 0; 3775*0Sstevel@tonic-gateuint_t hw_copy_limit_4 = 0; 3776*0Sstevel@tonic-gateuint_t hw_copy_limit_8 = 0; 3777*0Sstevel@tonic-gate 3778*0Sstevel@tonic-gate#else /* !lint */ 3779*0Sstevel@tonic-gate 3780*0Sstevel@tonic-gate DGDEF(use_hw_bcopy) 3781*0Sstevel@tonic-gate .word 1 3782*0Sstevel@tonic-gate DGDEF(use_hw_bzero) 3783*0Sstevel@tonic-gate .word 1 3784*0Sstevel@tonic-gate DGDEF(hw_copy_limit_1) 3785*0Sstevel@tonic-gate .word 0 3786*0Sstevel@tonic-gate DGDEF(hw_copy_limit_2) 3787*0Sstevel@tonic-gate .word 0 3788*0Sstevel@tonic-gate DGDEF(hw_copy_limit_4) 3789*0Sstevel@tonic-gate .word 0 3790*0Sstevel@tonic-gate DGDEF(hw_copy_limit_8) 3791*0Sstevel@tonic-gate .word 0 3792*0Sstevel@tonic-gate 3793*0Sstevel@tonic-gate .align 64 3794*0Sstevel@tonic-gate .section ".text" 3795*0Sstevel@tonic-gate#endif /* !lint */ 3796