1*84d9c625SLionel Sambuc/* $NetBSD: memcpy.S,v 1.2 2013/03/17 02:13:10 christos Exp $ */ 2*84d9c625SLionel Sambuc 3*84d9c625SLionel Sambuc/* 4*84d9c625SLionel Sambuc * Copyright (c) 1996-2002 Eduardo Horvath 5*84d9c625SLionel Sambuc * All rights reserved. 6*84d9c625SLionel Sambuc * 7*84d9c625SLionel Sambuc * Redistribution and use in source and binary forms, with or without 8*84d9c625SLionel Sambuc * modification, are permitted provided that the following conditions 9*84d9c625SLionel Sambuc * are met: 10*84d9c625SLionel Sambuc * 1. Redistributions of source code must retain the above copyright 11*84d9c625SLionel Sambuc * notice, this list of conditions and the following disclaimer. 12*84d9c625SLionel Sambuc * 13*84d9c625SLionel Sambuc * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND 14*84d9c625SLionel Sambuc * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15*84d9c625SLionel Sambuc * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16*84d9c625SLionel Sambuc * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE 17*84d9c625SLionel Sambuc * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18*84d9c625SLionel Sambuc * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19*84d9c625SLionel Sambuc * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20*84d9c625SLionel Sambuc * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21*84d9c625SLionel Sambuc * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22*84d9c625SLionel Sambuc * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23*84d9c625SLionel Sambuc * SUCH DAMAGE. 24*84d9c625SLionel Sambuc * 25*84d9c625SLionel Sambuc */ 26*84d9c625SLionel Sambuc#include "strmacros.h" 27*84d9c625SLionel Sambuc#if defined(LIBC_SCCS) && !defined(lint) 28*84d9c625SLionel SambucRCSID("$NetBSD: memcpy.S,v 1.2 2013/03/17 02:13:10 christos Exp $") 29*84d9c625SLionel Sambuc#endif /* LIBC_SCCS and not lint */ 30*84d9c625SLionel Sambuc 31*84d9c625SLionel Sambuc/* 32*84d9c625SLionel Sambuc * memcpy 33*84d9c625SLionel Sambuc * Assumes regions do not overlap; 34*84d9c625SLionel Sambuc * 35*84d9c625SLionel Sambuc * Must not use %g7 (see copyin/copyout above). 36*84d9c625SLionel Sambuc */ 37*84d9c625SLionel SambucENTRY(memcpy) /* dest, src, size */ 38*84d9c625SLionel Sambuc /* 39*84d9c625SLionel Sambuc * Swap args for bcopy. Gcc generates calls to memcpy for 40*84d9c625SLionel Sambuc * structure assignments. 41*84d9c625SLionel Sambuc */ 42*84d9c625SLionel Sambuc mov %o0, %o3 43*84d9c625SLionel Sambuc mov %o1, %o0 44*84d9c625SLionel Sambuc mov %o3, %o1 45*84d9c625SLionel Sambuc#if !defined(_KERNEL) || defined(_RUMPKERNEL) 46*84d9c625SLionel SambucENTRY(bcopy) /* src, dest, size */ 47*84d9c625SLionel Sambuc#endif 48*84d9c625SLionel Sambuc#ifdef DEBUG 49*84d9c625SLionel Sambuc#if defined(_KERNEL) && !defined(_RUMPKERNEL) 50*84d9c625SLionel Sambuc set pmapdebug, %o4 51*84d9c625SLionel Sambuc ld [%o4], %o4 52*84d9c625SLionel Sambuc btst 0x80, %o4 ! PDB_COPY 53*84d9c625SLionel Sambuc bz,pt %icc, 3f 54*84d9c625SLionel Sambuc nop 55*84d9c625SLionel Sambuc#endif 56*84d9c625SLionel Sambuc save %sp, -CC64FSZ, %sp 57*84d9c625SLionel Sambuc mov %i0, %o1 58*84d9c625SLionel Sambuc set 2f, %o0 59*84d9c625SLionel Sambuc mov %i1, %o2 60*84d9c625SLionel Sambuc call printf 61*84d9c625SLionel Sambuc mov %i2, %o3 62*84d9c625SLionel Sambuc! ta 1; nop 63*84d9c625SLionel Sambuc restore 64*84d9c625SLionel Sambuc .data 65*84d9c625SLionel Sambuc2: .asciz "memcpy(%p<-%p,%x)\n" 66*84d9c625SLionel Sambuc _ALIGN 67*84d9c625SLionel Sambuc .text 68*84d9c625SLionel Sambuc3: 69*84d9c625SLionel Sambuc#endif 70*84d9c625SLionel Sambuc 71*84d9c625SLionel Sambuc cmp %o2, BCOPY_SMALL 72*84d9c625SLionel Sambuc 73*84d9c625SLionel SambucLmemcpy_start: 74*84d9c625SLionel Sambuc bge,pt CCCR, 2f ! if >= this many, go be fancy. 75*84d9c625SLionel Sambuc cmp %o2, 256 76*84d9c625SLionel Sambuc 77*84d9c625SLionel Sambuc mov %o1, %o5 ! Save memcpy return value 78*84d9c625SLionel Sambuc /* 79*84d9c625SLionel Sambuc * Not much to copy, just do it a byte at a time. 80*84d9c625SLionel Sambuc */ 81*84d9c625SLionel Sambuc deccc %o2 ! while (--len >= 0) 82*84d9c625SLionel Sambuc bl 1f 83*84d9c625SLionel Sambuc .empty 84*84d9c625SLionel Sambuc0: 85*84d9c625SLionel Sambuc inc %o0 86*84d9c625SLionel Sambuc ldsb [%o0 - 1], %o4 ! (++dst)[-1] = *src++; 87*84d9c625SLionel Sambuc stb %o4, [%o1] 88*84d9c625SLionel Sambuc deccc %o2 89*84d9c625SLionel Sambuc bge 0b 90*84d9c625SLionel Sambuc inc %o1 91*84d9c625SLionel Sambuc1: 92*84d9c625SLionel Sambuc retl 93*84d9c625SLionel Sambuc mov %o5, %o0 94*84d9c625SLionel Sambuc NOTREACHED 95*84d9c625SLionel Sambuc 96*84d9c625SLionel Sambuc /* 97*84d9c625SLionel Sambuc * Plenty of data to copy, so try to do it optimally. 98*84d9c625SLionel Sambuc */ 99*84d9c625SLionel Sambuc2: 100*84d9c625SLionel Sambuc#ifdef USE_BLOCK_STORE_LOAD 101*84d9c625SLionel Sambuc ! If it is big enough, use VIS instructions 102*84d9c625SLionel Sambuc bge Lmemcpy_block 103*84d9c625SLionel Sambuc nop 104*84d9c625SLionel Sambuc#endif /* USE_BLOCK_STORE_LOAD */ 105*84d9c625SLionel SambucLmemcpy_fancy: 106*84d9c625SLionel Sambuc 107*84d9c625SLionel Sambuc !! 108*84d9c625SLionel Sambuc !! First align the output to a 8-byte entity 109*84d9c625SLionel Sambuc !! 110*84d9c625SLionel Sambuc 111*84d9c625SLionel Sambuc save %sp, -CC64FSZ, %sp 112*84d9c625SLionel Sambuc 113*84d9c625SLionel Sambuc mov %i0, %l0 114*84d9c625SLionel Sambuc mov %i1, %l1 115*84d9c625SLionel Sambuc 116*84d9c625SLionel Sambuc mov %i2, %l2 117*84d9c625SLionel Sambuc btst 1, %l1 118*84d9c625SLionel Sambuc 119*84d9c625SLionel Sambuc bz,pt %icc, 4f 120*84d9c625SLionel Sambuc btst 2, %l1 121*84d9c625SLionel Sambuc ldub [%l0], %l4 ! Load 1st byte 122*84d9c625SLionel Sambuc 123*84d9c625SLionel Sambuc deccc 1, %l2 124*84d9c625SLionel Sambuc ble,pn CCCR, Lmemcpy_finish ! XXXX 125*84d9c625SLionel Sambuc inc 1, %l0 126*84d9c625SLionel Sambuc 127*84d9c625SLionel Sambuc stb %l4, [%l1] ! Store 1st byte 128*84d9c625SLionel Sambuc inc 1, %l1 ! Update address 129*84d9c625SLionel Sambuc btst 2, %l1 130*84d9c625SLionel Sambuc4: 131*84d9c625SLionel Sambuc bz,pt %icc, 4f 132*84d9c625SLionel Sambuc 133*84d9c625SLionel Sambuc btst 1, %l0 134*84d9c625SLionel Sambuc bz,a 1f 135*84d9c625SLionel Sambuc lduh [%l0], %l4 ! Load short 136*84d9c625SLionel Sambuc 137*84d9c625SLionel Sambuc ldub [%l0], %l4 ! Load bytes 138*84d9c625SLionel Sambuc 139*84d9c625SLionel Sambuc ldub [%l0+1], %l3 140*84d9c625SLionel Sambuc sllx %l4, 8, %l4 141*84d9c625SLionel Sambuc or %l3, %l4, %l4 142*84d9c625SLionel Sambuc 143*84d9c625SLionel Sambuc1: 144*84d9c625SLionel Sambuc deccc 2, %l2 145*84d9c625SLionel Sambuc ble,pn CCCR, Lmemcpy_finish ! XXXX 146*84d9c625SLionel Sambuc inc 2, %l0 147*84d9c625SLionel Sambuc sth %l4, [%l1] ! Store 1st short 148*84d9c625SLionel Sambuc 149*84d9c625SLionel Sambuc inc 2, %l1 150*84d9c625SLionel Sambuc4: 151*84d9c625SLionel Sambuc btst 4, %l1 152*84d9c625SLionel Sambuc bz,pt CCCR, 4f 153*84d9c625SLionel Sambuc 154*84d9c625SLionel Sambuc btst 3, %l0 155*84d9c625SLionel Sambuc bz,a,pt CCCR, 1f 156*84d9c625SLionel Sambuc lduw [%l0], %l4 ! Load word -1 157*84d9c625SLionel Sambuc 158*84d9c625SLionel Sambuc btst 1, %l0 159*84d9c625SLionel Sambuc bz,a,pt %icc, 2f 160*84d9c625SLionel Sambuc lduh [%l0], %l4 161*84d9c625SLionel Sambuc 162*84d9c625SLionel Sambuc ldub [%l0], %l4 163*84d9c625SLionel Sambuc 164*84d9c625SLionel Sambuc lduh [%l0+1], %l3 165*84d9c625SLionel Sambuc sllx %l4, 16, %l4 166*84d9c625SLionel Sambuc or %l4, %l3, %l4 167*84d9c625SLionel Sambuc 168*84d9c625SLionel Sambuc ldub [%l0+3], %l3 169*84d9c625SLionel Sambuc sllx %l4, 8, %l4 170*84d9c625SLionel Sambuc ba,pt %icc, 1f 171*84d9c625SLionel Sambuc or %l4, %l3, %l4 172*84d9c625SLionel Sambuc 173*84d9c625SLionel Sambuc2: 174*84d9c625SLionel Sambuc lduh [%l0+2], %l3 175*84d9c625SLionel Sambuc sllx %l4, 16, %l4 176*84d9c625SLionel Sambuc or %l4, %l3, %l4 177*84d9c625SLionel Sambuc 178*84d9c625SLionel Sambuc1: 179*84d9c625SLionel Sambuc deccc 4, %l2 180*84d9c625SLionel Sambuc ble,pn CCCR, Lmemcpy_finish ! XXXX 181*84d9c625SLionel Sambuc inc 4, %l0 182*84d9c625SLionel Sambuc 183*84d9c625SLionel Sambuc st %l4, [%l1] ! Store word 184*84d9c625SLionel Sambuc inc 4, %l1 185*84d9c625SLionel Sambuc4: 186*84d9c625SLionel Sambuc !! 187*84d9c625SLionel Sambuc !! We are now 32-bit aligned in the dest. 188*84d9c625SLionel Sambuc !! 189*84d9c625SLionel SambucLmemcpy_common: 190*84d9c625SLionel Sambuc 191*84d9c625SLionel Sambuc and %l0, 7, %l4 ! Shift amount 192*84d9c625SLionel Sambuc andn %l0, 7, %l0 ! Source addr 193*84d9c625SLionel Sambuc 194*84d9c625SLionel Sambuc brz,pt %l4, Lmemcpy_noshift8 ! No shift version... 195*84d9c625SLionel Sambuc 196*84d9c625SLionel Sambuc sllx %l4, 3, %l4 ! In bits 197*84d9c625SLionel Sambuc mov 8<<3, %l3 198*84d9c625SLionel Sambuc 199*84d9c625SLionel Sambuc ldx [%l0], %o0 ! Load word -1 200*84d9c625SLionel Sambuc sub %l3, %l4, %l3 ! Reverse shift 201*84d9c625SLionel Sambuc deccc 12*8, %l2 ! Have enough room? 202*84d9c625SLionel Sambuc 203*84d9c625SLionel Sambuc sllx %o0, %l4, %o0 204*84d9c625SLionel Sambuc bl,pn CCCR, 2f 205*84d9c625SLionel Sambuc and %l3, 0x38, %l3 206*84d9c625SLionel SambucLmemcpy_unrolled8: 207*84d9c625SLionel Sambuc 208*84d9c625SLionel Sambuc /* 209*84d9c625SLionel Sambuc * This is about as close to optimal as you can get, since 210*84d9c625SLionel Sambuc * the shifts require EU0 and cannot be paired, and you have 211*84d9c625SLionel Sambuc * 3 dependent operations on the data. 212*84d9c625SLionel Sambuc */ 213*84d9c625SLionel Sambuc 214*84d9c625SLionel Sambuc! ldx [%l0+0*8], %o0 ! Already done 215*84d9c625SLionel Sambuc! sllx %o0, %l4, %o0 ! Already done 216*84d9c625SLionel Sambuc ldx [%l0+1*8], %o1 217*84d9c625SLionel Sambuc ldx [%l0+2*8], %o2 218*84d9c625SLionel Sambuc ldx [%l0+3*8], %o3 219*84d9c625SLionel Sambuc ldx [%l0+4*8], %o4 220*84d9c625SLionel Sambuc ba,pt %icc, 1f 221*84d9c625SLionel Sambuc ldx [%l0+5*8], %o5 222*84d9c625SLionel Sambuc .align 8 223*84d9c625SLionel Sambuc1: 224*84d9c625SLionel Sambuc srlx %o1, %l3, %g1 225*84d9c625SLionel Sambuc inc 6*8, %l0 226*84d9c625SLionel Sambuc 227*84d9c625SLionel Sambuc sllx %o1, %l4, %o1 228*84d9c625SLionel Sambuc or %g1, %o0, %g6 229*84d9c625SLionel Sambuc ldx [%l0+0*8], %o0 230*84d9c625SLionel Sambuc 231*84d9c625SLionel Sambuc stx %g6, [%l1+0*8] 232*84d9c625SLionel Sambuc srlx %o2, %l3, %g1 233*84d9c625SLionel Sambuc 234*84d9c625SLionel Sambuc sllx %o2, %l4, %o2 235*84d9c625SLionel Sambuc or %g1, %o1, %g6 236*84d9c625SLionel Sambuc ldx [%l0+1*8], %o1 237*84d9c625SLionel Sambuc 238*84d9c625SLionel Sambuc stx %g6, [%l1+1*8] 239*84d9c625SLionel Sambuc srlx %o3, %l3, %g1 240*84d9c625SLionel Sambuc 241*84d9c625SLionel Sambuc sllx %o3, %l4, %o3 242*84d9c625SLionel Sambuc or %g1, %o2, %g6 243*84d9c625SLionel Sambuc ldx [%l0+2*8], %o2 244*84d9c625SLionel Sambuc 245*84d9c625SLionel Sambuc stx %g6, [%l1+2*8] 246*84d9c625SLionel Sambuc srlx %o4, %l3, %g1 247*84d9c625SLionel Sambuc 248*84d9c625SLionel Sambuc sllx %o4, %l4, %o4 249*84d9c625SLionel Sambuc or %g1, %o3, %g6 250*84d9c625SLionel Sambuc ldx [%l0+3*8], %o3 251*84d9c625SLionel Sambuc 252*84d9c625SLionel Sambuc stx %g6, [%l1+3*8] 253*84d9c625SLionel Sambuc srlx %o5, %l3, %g1 254*84d9c625SLionel Sambuc 255*84d9c625SLionel Sambuc sllx %o5, %l4, %o5 256*84d9c625SLionel Sambuc or %g1, %o4, %g6 257*84d9c625SLionel Sambuc ldx [%l0+4*8], %o4 258*84d9c625SLionel Sambuc 259*84d9c625SLionel Sambuc stx %g6, [%l1+4*8] 260*84d9c625SLionel Sambuc srlx %o0, %l3, %g1 261*84d9c625SLionel Sambuc deccc 6*8, %l2 ! Have enough room? 262*84d9c625SLionel Sambuc 263*84d9c625SLionel Sambuc sllx %o0, %l4, %o0 ! Next loop 264*84d9c625SLionel Sambuc or %g1, %o5, %g6 265*84d9c625SLionel Sambuc ldx [%l0+5*8], %o5 266*84d9c625SLionel Sambuc 267*84d9c625SLionel Sambuc stx %g6, [%l1+5*8] 268*84d9c625SLionel Sambuc bge,pt CCCR, 1b 269*84d9c625SLionel Sambuc inc 6*8, %l1 270*84d9c625SLionel Sambuc 271*84d9c625SLionel SambucLmemcpy_unrolled8_cleanup: 272*84d9c625SLionel Sambuc !! 273*84d9c625SLionel Sambuc !! Finished 8 byte block, unload the regs. 274*84d9c625SLionel Sambuc !! 275*84d9c625SLionel Sambuc srlx %o1, %l3, %g1 276*84d9c625SLionel Sambuc inc 5*8, %l0 277*84d9c625SLionel Sambuc 278*84d9c625SLionel Sambuc sllx %o1, %l4, %o1 279*84d9c625SLionel Sambuc or %g1, %o0, %g6 280*84d9c625SLionel Sambuc 281*84d9c625SLionel Sambuc stx %g6, [%l1+0*8] 282*84d9c625SLionel Sambuc srlx %o2, %l3, %g1 283*84d9c625SLionel Sambuc 284*84d9c625SLionel Sambuc sllx %o2, %l4, %o2 285*84d9c625SLionel Sambuc or %g1, %o1, %g6 286*84d9c625SLionel Sambuc 287*84d9c625SLionel Sambuc stx %g6, [%l1+1*8] 288*84d9c625SLionel Sambuc srlx %o3, %l3, %g1 289*84d9c625SLionel Sambuc 290*84d9c625SLionel Sambuc sllx %o3, %l4, %o3 291*84d9c625SLionel Sambuc or %g1, %o2, %g6 292*84d9c625SLionel Sambuc 293*84d9c625SLionel Sambuc stx %g6, [%l1+2*8] 294*84d9c625SLionel Sambuc srlx %o4, %l3, %g1 295*84d9c625SLionel Sambuc 296*84d9c625SLionel Sambuc sllx %o4, %l4, %o4 297*84d9c625SLionel Sambuc or %g1, %o3, %g6 298*84d9c625SLionel Sambuc 299*84d9c625SLionel Sambuc stx %g6, [%l1+3*8] 300*84d9c625SLionel Sambuc srlx %o5, %l3, %g1 301*84d9c625SLionel Sambuc 302*84d9c625SLionel Sambuc sllx %o5, %l4, %o5 303*84d9c625SLionel Sambuc or %g1, %o4, %g6 304*84d9c625SLionel Sambuc 305*84d9c625SLionel Sambuc stx %g6, [%l1+4*8] 306*84d9c625SLionel Sambuc inc 5*8, %l1 307*84d9c625SLionel Sambuc 308*84d9c625SLionel Sambuc mov %o5, %o0 ! Save our unused data 309*84d9c625SLionel Sambuc dec 5*8, %l2 310*84d9c625SLionel Sambuc2: 311*84d9c625SLionel Sambuc inccc 12*8, %l2 312*84d9c625SLionel Sambuc bz,pn %icc, Lmemcpy_complete 313*84d9c625SLionel Sambuc 314*84d9c625SLionel Sambuc !! Unrolled 8 times 315*84d9c625SLionel SambucLmemcpy_aligned8: 316*84d9c625SLionel Sambuc! ldx [%l0], %o0 ! Already done 317*84d9c625SLionel Sambuc! sllx %o0, %l4, %o0 ! Shift high word 318*84d9c625SLionel Sambuc 319*84d9c625SLionel Sambuc deccc 8, %l2 ! Pre-decrement 320*84d9c625SLionel Sambuc bl,pn CCCR, Lmemcpy_finish 321*84d9c625SLionel Sambuc1: 322*84d9c625SLionel Sambuc ldx [%l0+8], %o1 ! Load word 0 323*84d9c625SLionel Sambuc inc 8, %l0 324*84d9c625SLionel Sambuc 325*84d9c625SLionel Sambuc srlx %o1, %l3, %g6 326*84d9c625SLionel Sambuc or %g6, %o0, %g6 ! Combine 327*84d9c625SLionel Sambuc 328*84d9c625SLionel Sambuc stx %g6, [%l1] ! Store result 329*84d9c625SLionel Sambuc inc 8, %l1 330*84d9c625SLionel Sambuc 331*84d9c625SLionel Sambuc deccc 8, %l2 332*84d9c625SLionel Sambuc bge,pn CCCR, 1b 333*84d9c625SLionel Sambuc sllx %o1, %l4, %o0 334*84d9c625SLionel Sambuc 335*84d9c625SLionel Sambuc btst 7, %l2 ! Done? 336*84d9c625SLionel Sambuc bz,pt CCCR, Lmemcpy_complete 337*84d9c625SLionel Sambuc 338*84d9c625SLionel Sambuc !! 339*84d9c625SLionel Sambuc !! Loadup the last dregs into %o0 and shift it into place 340*84d9c625SLionel Sambuc !! 341*84d9c625SLionel Sambuc srlx %l3, 3, %g6 ! # bytes in %o0 342*84d9c625SLionel Sambuc dec 8, %g6 ! - 8 343*84d9c625SLionel Sambuc !! n-8 - (by - 8) -> n - by 344*84d9c625SLionel Sambuc subcc %l2, %g6, %g0 ! # bytes we need 345*84d9c625SLionel Sambuc ble,pt %icc, Lmemcpy_finish 346*84d9c625SLionel Sambuc nop 347*84d9c625SLionel Sambuc ldx [%l0+8], %o1 ! Need another word 348*84d9c625SLionel Sambuc srlx %o1, %l3, %o1 349*84d9c625SLionel Sambuc ba,pt %icc, Lmemcpy_finish 350*84d9c625SLionel Sambuc or %o0, %o1, %o0 ! All loaded up. 351*84d9c625SLionel Sambuc 352*84d9c625SLionel SambucLmemcpy_noshift8: 353*84d9c625SLionel Sambuc deccc 6*8, %l2 ! Have enough room? 354*84d9c625SLionel Sambuc bl,pn CCCR, 2f 355*84d9c625SLionel Sambuc nop 356*84d9c625SLionel Sambuc ba,pt %icc, 1f 357*84d9c625SLionel Sambuc nop 358*84d9c625SLionel Sambuc .align 32 359*84d9c625SLionel Sambuc1: 360*84d9c625SLionel Sambuc ldx [%l0+0*8], %o0 361*84d9c625SLionel Sambuc ldx [%l0+1*8], %o1 362*84d9c625SLionel Sambuc ldx [%l0+2*8], %o2 363*84d9c625SLionel Sambuc stx %o0, [%l1+0*8] 364*84d9c625SLionel Sambuc stx %o1, [%l1+1*8] 365*84d9c625SLionel Sambuc stx %o2, [%l1+2*8] 366*84d9c625SLionel Sambuc 367*84d9c625SLionel Sambuc 368*84d9c625SLionel Sambuc ldx [%l0+3*8], %o3 369*84d9c625SLionel Sambuc ldx [%l0+4*8], %o4 370*84d9c625SLionel Sambuc ldx [%l0+5*8], %o5 371*84d9c625SLionel Sambuc inc 6*8, %l0 372*84d9c625SLionel Sambuc stx %o3, [%l1+3*8] 373*84d9c625SLionel Sambuc deccc 6*8, %l2 374*84d9c625SLionel Sambuc stx %o4, [%l1+4*8] 375*84d9c625SLionel Sambuc stx %o5, [%l1+5*8] 376*84d9c625SLionel Sambuc bge,pt CCCR, 1b 377*84d9c625SLionel Sambuc inc 6*8, %l1 378*84d9c625SLionel Sambuc2: 379*84d9c625SLionel Sambuc inc 6*8, %l2 380*84d9c625SLionel Sambuc1: 381*84d9c625SLionel Sambuc deccc 8, %l2 382*84d9c625SLionel Sambuc bl,pn %icc, 1f ! < 0 --> sub word 383*84d9c625SLionel Sambuc nop 384*84d9c625SLionel Sambuc ldx [%l0], %g6 385*84d9c625SLionel Sambuc inc 8, %l0 386*84d9c625SLionel Sambuc stx %g6, [%l1] 387*84d9c625SLionel Sambuc bg,pt %icc, 1b ! Exactly 0 --> done 388*84d9c625SLionel Sambuc inc 8, %l1 389*84d9c625SLionel Sambuc1: 390*84d9c625SLionel Sambuc btst 7, %l2 ! Done? 391*84d9c625SLionel Sambuc bz,pt CCCR, Lmemcpy_complete 392*84d9c625SLionel Sambuc clr %l4 393*84d9c625SLionel Sambuc ldx [%l0], %o0 394*84d9c625SLionel SambucLmemcpy_finish: 395*84d9c625SLionel Sambuc 396*84d9c625SLionel Sambuc brz,pn %l2, 2f ! 100% complete? 397*84d9c625SLionel Sambuc cmp %l2, 8 ! Exactly 8 bytes? 398*84d9c625SLionel Sambuc bz,a,pn CCCR, 2f 399*84d9c625SLionel Sambuc stx %o0, [%l1] 400*84d9c625SLionel Sambuc 401*84d9c625SLionel Sambuc btst 4, %l2 ! Word store? 402*84d9c625SLionel Sambuc bz CCCR, 1f 403*84d9c625SLionel Sambuc srlx %o0, 32, %g6 ! Shift high word down 404*84d9c625SLionel Sambuc stw %g6, [%l1] 405*84d9c625SLionel Sambuc inc 4, %l1 406*84d9c625SLionel Sambuc mov %o0, %g6 ! Operate on the low bits 407*84d9c625SLionel Sambuc1: 408*84d9c625SLionel Sambuc btst 2, %l2 409*84d9c625SLionel Sambuc mov %g6, %o0 410*84d9c625SLionel Sambuc bz 1f 411*84d9c625SLionel Sambuc srlx %o0, 16, %g6 412*84d9c625SLionel Sambuc 413*84d9c625SLionel Sambuc sth %g6, [%l1] ! Store short 414*84d9c625SLionel Sambuc inc 2, %l1 415*84d9c625SLionel Sambuc mov %o0, %g6 ! Operate on low bytes 416*84d9c625SLionel Sambuc1: 417*84d9c625SLionel Sambuc mov %g6, %o0 418*84d9c625SLionel Sambuc btst 1, %l2 ! Byte aligned? 419*84d9c625SLionel Sambuc bz 2f 420*84d9c625SLionel Sambuc srlx %o0, 8, %g6 421*84d9c625SLionel Sambuc 422*84d9c625SLionel Sambuc stb %g6, [%l1] ! Store last byte 423*84d9c625SLionel Sambuc inc 1, %l1 ! Update address 424*84d9c625SLionel Sambuc2: 425*84d9c625SLionel SambucLmemcpy_complete: 426*84d9c625SLionel Sambuc#if 0 427*84d9c625SLionel Sambuc !! 428*84d9c625SLionel Sambuc !! verify copy success. 429*84d9c625SLionel Sambuc !! 430*84d9c625SLionel Sambuc 431*84d9c625SLionel Sambuc mov %i0, %o2 432*84d9c625SLionel Sambuc mov %i1, %o4 433*84d9c625SLionel Sambuc mov %i2, %l4 434*84d9c625SLionel Sambuc0: 435*84d9c625SLionel Sambuc ldub [%o2], %o1 436*84d9c625SLionel Sambuc inc %o2 437*84d9c625SLionel Sambuc ldub [%o4], %o3 438*84d9c625SLionel Sambuc inc %o4 439*84d9c625SLionel Sambuc cmp %o3, %o1 440*84d9c625SLionel Sambuc bnz 1f 441*84d9c625SLionel Sambuc dec %l4 442*84d9c625SLionel Sambuc brnz %l4, 0b 443*84d9c625SLionel Sambuc nop 444*84d9c625SLionel Sambuc ba 2f 445*84d9c625SLionel Sambuc nop 446*84d9c625SLionel Sambuc 447*84d9c625SLionel Sambuc1: 448*84d9c625SLionel Sambuc set 0f, %o0 449*84d9c625SLionel Sambuc call printf 450*84d9c625SLionel Sambuc sub %i2, %l4, %o5 451*84d9c625SLionel Sambuc set 1f, %o0 452*84d9c625SLionel Sambuc mov %i0, %o2 453*84d9c625SLionel Sambuc mov %i1, %o1 454*84d9c625SLionel Sambuc call printf 455*84d9c625SLionel Sambuc mov %i2, %o3 456*84d9c625SLionel Sambuc ta 1 457*84d9c625SLionel Sambuc .data 458*84d9c625SLionel Sambuc0: .asciz "memcpy failed: %x@%p != %x@%p byte %d\n" 459*84d9c625SLionel Sambuc1: .asciz "memcpy(%p, %p, %lx)\n" 460*84d9c625SLionel Sambuc .align 8 461*84d9c625SLionel Sambuc .text 462*84d9c625SLionel Sambuc2: 463*84d9c625SLionel Sambuc#endif 464*84d9c625SLionel Sambuc ret 465*84d9c625SLionel Sambuc restore %i1, %g0, %o0 466*84d9c625SLionel Sambuc 467*84d9c625SLionel Sambuc#ifdef USE_BLOCK_STORE_LOAD 468*84d9c625SLionel Sambuc 469*84d9c625SLionel Sambuc/* 470*84d9c625SLionel Sambuc * Block copy. Useful for >256 byte copies. 471*84d9c625SLionel Sambuc * 472*84d9c625SLionel Sambuc * Benchmarking has shown this always seems to be slower than 473*84d9c625SLionel Sambuc * the integer version, so this is disabled. Maybe someone will 474*84d9c625SLionel Sambuc * figure out why sometime. 475*84d9c625SLionel Sambuc */ 476*84d9c625SLionel Sambuc 477*84d9c625SLionel SambucLmemcpy_block: 478*84d9c625SLionel Sambuc sethi %hi(block_disable), %o3 479*84d9c625SLionel Sambuc ldx [ %o3 + %lo(block_disable) ], %o3 480*84d9c625SLionel Sambuc brnz,pn %o3, Lmemcpy_fancy 481*84d9c625SLionel Sambuc !! Make sure our trap table is installed 482*84d9c625SLionel Sambuc set _C_LABEL(trapbase), %o5 483*84d9c625SLionel Sambuc rdpr %tba, %o3 484*84d9c625SLionel Sambuc sub %o3, %o5, %o3 485*84d9c625SLionel Sambuc brnz,pn %o3, Lmemcpy_fancy ! No, then don't use block load/store 486*84d9c625SLionel Sambuc nop 487*84d9c625SLionel Sambuc#if defined(_KERNEL) && !defined(_RUMPKERNEL) 488*84d9c625SLionel Sambuc/* 489*84d9c625SLionel Sambuc * Kernel: 490*84d9c625SLionel Sambuc * 491*84d9c625SLionel Sambuc * Here we use VIS instructions to do a block clear of a page. 492*84d9c625SLionel Sambuc * But before we can do that we need to save and enable the FPU. 493*84d9c625SLionel Sambuc * The last owner of the FPU registers is fplwp, and 494*84d9c625SLionel Sambuc * fplwp->l_md.md_fpstate is the current fpstate. If that's not 495*84d9c625SLionel Sambuc * null, call savefpstate() with it to store our current fp state. 496*84d9c625SLionel Sambuc * 497*84d9c625SLionel Sambuc * Next, allocate an aligned fpstate on the stack. We will properly 498*84d9c625SLionel Sambuc * nest calls on a particular stack so this should not be a problem. 499*84d9c625SLionel Sambuc * 500*84d9c625SLionel Sambuc * Now we grab either curlwp (or if we're on the interrupt stack 501*84d9c625SLionel Sambuc * lwp0). We stash its existing fpstate in a local register and 502*84d9c625SLionel Sambuc * put our new fpstate in curlwp->p_md.md_fpstate. We point 503*84d9c625SLionel Sambuc * fplwp at curlwp (or lwp0) and enable the FPU. 504*84d9c625SLionel Sambuc * 505*84d9c625SLionel Sambuc * If we are ever preempted, our FPU state will be saved in our 506*84d9c625SLionel Sambuc * fpstate. Then, when we're resumed and we take an FPDISABLED 507*84d9c625SLionel Sambuc * trap, the trap handler will be able to fish our FPU state out 508*84d9c625SLionel Sambuc * of curlwp (or lwp0). 509*84d9c625SLionel Sambuc * 510*84d9c625SLionel Sambuc * On exiting this routine we undo the damage: restore the original 511*84d9c625SLionel Sambuc * pointer to curlwp->p_md.md_fpstate, clear our fplwp, and disable 512*84d9c625SLionel Sambuc * the MMU. 513*84d9c625SLionel Sambuc * 514*84d9c625SLionel Sambuc * 515*84d9c625SLionel Sambuc * Register usage, Kernel only (after save): 516*84d9c625SLionel Sambuc * 517*84d9c625SLionel Sambuc * %i0 src 518*84d9c625SLionel Sambuc * %i1 dest 519*84d9c625SLionel Sambuc * %i2 size 520*84d9c625SLionel Sambuc * 521*84d9c625SLionel Sambuc * %l0 XXXX DEBUG old fpstate 522*84d9c625SLionel Sambuc * %l1 fplwp (hi bits only) 523*84d9c625SLionel Sambuc * %l2 orig fplwp 524*84d9c625SLionel Sambuc * %l3 orig fpstate 525*84d9c625SLionel Sambuc * %l5 curlwp 526*84d9c625SLionel Sambuc * %l6 old fpstate 527*84d9c625SLionel Sambuc * 528*84d9c625SLionel Sambuc * Register ussage, Kernel and user: 529*84d9c625SLionel Sambuc * 530*84d9c625SLionel Sambuc * %g1 src (retval for memcpy) 531*84d9c625SLionel Sambuc * 532*84d9c625SLionel Sambuc * %o0 src 533*84d9c625SLionel Sambuc * %o1 dest 534*84d9c625SLionel Sambuc * %o2 end dest 535*84d9c625SLionel Sambuc * %o5 last safe fetchable address 536*84d9c625SLionel Sambuc */ 537*84d9c625SLionel Sambuc 538*84d9c625SLionel Sambuc ENABLE_FPU(0) 539*84d9c625SLionel Sambuc 540*84d9c625SLionel Sambuc mov %i0, %o0 ! Src addr. 541*84d9c625SLionel Sambuc mov %i1, %o1 ! Store our dest ptr here. 542*84d9c625SLionel Sambuc mov %i2, %o2 ! Len counter 543*84d9c625SLionel Sambuc#endif /* _KERNEL */ 544*84d9c625SLionel Sambuc 545*84d9c625SLionel Sambuc !! 546*84d9c625SLionel Sambuc !! First align the output to a 64-bit entity 547*84d9c625SLionel Sambuc !! 548*84d9c625SLionel Sambuc 549*84d9c625SLionel Sambuc mov %o1, %g1 ! memcpy retval 550*84d9c625SLionel Sambuc add %o0, %o2, %o5 ! End of source block 551*84d9c625SLionel Sambuc 552*84d9c625SLionel Sambuc andn %o0, 7, %o3 ! Start of block 553*84d9c625SLionel Sambuc dec %o5 554*84d9c625SLionel Sambuc fzero %f0 555*84d9c625SLionel Sambuc 556*84d9c625SLionel Sambuc andn %o5, BLOCK_ALIGN, %o5 ! Last safe addr. 557*84d9c625SLionel Sambuc ldd [%o3], %f2 ! Load 1st word 558*84d9c625SLionel Sambuc 559*84d9c625SLionel Sambuc dec 8, %o3 ! Move %o3 1 word back 560*84d9c625SLionel Sambuc btst 1, %o1 561*84d9c625SLionel Sambuc bz 4f 562*84d9c625SLionel Sambuc 563*84d9c625SLionel Sambuc mov -7, %o4 ! Lowest src addr possible 564*84d9c625SLionel Sambuc alignaddr %o0, %o4, %o4 ! Base addr for load. 565*84d9c625SLionel Sambuc 566*84d9c625SLionel Sambuc cmp %o3, %o4 567*84d9c625SLionel Sambuc be,pt CCCR, 1f ! Already loaded? 568*84d9c625SLionel Sambuc mov %o4, %o3 569*84d9c625SLionel Sambuc fmovd %f2, %f0 ! No. Shift 570*84d9c625SLionel Sambuc ldd [%o3+8], %f2 ! And load 571*84d9c625SLionel Sambuc1: 572*84d9c625SLionel Sambuc 573*84d9c625SLionel Sambuc faligndata %f0, %f2, %f4 ! Isolate 1st byte 574*84d9c625SLionel Sambuc 575*84d9c625SLionel Sambuc stda %f4, [%o1] ASI_FL8_P ! Store 1st byte 576*84d9c625SLionel Sambuc inc 1, %o1 ! Update address 577*84d9c625SLionel Sambuc inc 1, %o0 578*84d9c625SLionel Sambuc dec 1, %o2 579*84d9c625SLionel Sambuc4: 580*84d9c625SLionel Sambuc btst 2, %o1 581*84d9c625SLionel Sambuc bz 4f 582*84d9c625SLionel Sambuc 583*84d9c625SLionel Sambuc mov -6, %o4 ! Calculate src - 6 584*84d9c625SLionel Sambuc alignaddr %o0, %o4, %o4 ! calculate shift mask and dest. 585*84d9c625SLionel Sambuc 586*84d9c625SLionel Sambuc cmp %o3, %o4 ! Addresses same? 587*84d9c625SLionel Sambuc be,pt CCCR, 1f 588*84d9c625SLionel Sambuc mov %o4, %o3 589*84d9c625SLionel Sambuc fmovd %f2, %f0 ! Shuffle data 590*84d9c625SLionel Sambuc ldd [%o3+8], %f2 ! Load word 0 591*84d9c625SLionel Sambuc1: 592*84d9c625SLionel Sambuc faligndata %f0, %f2, %f4 ! Move 1st short low part of f8 593*84d9c625SLionel Sambuc 594*84d9c625SLionel Sambuc stda %f4, [%o1] ASI_FL16_P ! Store 1st short 595*84d9c625SLionel Sambuc dec 2, %o2 596*84d9c625SLionel Sambuc inc 2, %o1 597*84d9c625SLionel Sambuc inc 2, %o0 598*84d9c625SLionel Sambuc4: 599*84d9c625SLionel Sambuc brz,pn %o2, Lmemcpy_blockfinish ! XXXX 600*84d9c625SLionel Sambuc 601*84d9c625SLionel Sambuc btst 4, %o1 602*84d9c625SLionel Sambuc bz 4f 603*84d9c625SLionel Sambuc 604*84d9c625SLionel Sambuc mov -4, %o4 605*84d9c625SLionel Sambuc alignaddr %o0, %o4, %o4 ! calculate shift mask and dest. 606*84d9c625SLionel Sambuc 607*84d9c625SLionel Sambuc cmp %o3, %o4 ! Addresses same? 608*84d9c625SLionel Sambuc beq,pt CCCR, 1f 609*84d9c625SLionel Sambuc mov %o4, %o3 610*84d9c625SLionel Sambuc fmovd %f2, %f0 ! Shuffle data 611*84d9c625SLionel Sambuc ldd [%o3+8], %f2 ! Load word 0 612*84d9c625SLionel Sambuc1: 613*84d9c625SLionel Sambuc faligndata %f0, %f2, %f4 ! Move 1st short low part of f8 614*84d9c625SLionel Sambuc 615*84d9c625SLionel Sambuc st %f5, [%o1] ! Store word 616*84d9c625SLionel Sambuc dec 4, %o2 617*84d9c625SLionel Sambuc inc 4, %o1 618*84d9c625SLionel Sambuc inc 4, %o0 619*84d9c625SLionel Sambuc4: 620*84d9c625SLionel Sambuc brz,pn %o2, Lmemcpy_blockfinish ! XXXX 621*84d9c625SLionel Sambuc !! 622*84d9c625SLionel Sambuc !! We are now 32-bit aligned in the dest. 623*84d9c625SLionel Sambuc !! 624*84d9c625SLionel SambucLmemcpy_block_common: 625*84d9c625SLionel Sambuc 626*84d9c625SLionel Sambuc mov -0, %o4 627*84d9c625SLionel Sambuc alignaddr %o0, %o4, %o4 ! base - shift 628*84d9c625SLionel Sambuc 629*84d9c625SLionel Sambuc cmp %o3, %o4 ! Addresses same? 630*84d9c625SLionel Sambuc beq,pt CCCR, 1f 631*84d9c625SLionel Sambuc mov %o4, %o3 632*84d9c625SLionel Sambuc fmovd %f2, %f0 ! Shuffle data 633*84d9c625SLionel Sambuc ldd [%o3+8], %f2 ! Load word 0 634*84d9c625SLionel Sambuc1: 635*84d9c625SLionel Sambuc add %o3, 8, %o0 ! now use %o0 for src 636*84d9c625SLionel Sambuc 637*84d9c625SLionel Sambuc !! 638*84d9c625SLionel Sambuc !! Continue until our dest is block aligned 639*84d9c625SLionel Sambuc !! 640*84d9c625SLionel SambucLmemcpy_block_aligned8: 641*84d9c625SLionel Sambuc1: 642*84d9c625SLionel Sambuc brz %o2, Lmemcpy_blockfinish 643*84d9c625SLionel Sambuc btst BLOCK_ALIGN, %o1 ! Block aligned? 644*84d9c625SLionel Sambuc bz 1f 645*84d9c625SLionel Sambuc 646*84d9c625SLionel Sambuc faligndata %f0, %f2, %f4 ! Generate result 647*84d9c625SLionel Sambuc deccc 8, %o2 648*84d9c625SLionel Sambuc ble,pn %icc, Lmemcpy_blockfinish ! Should never happen 649*84d9c625SLionel Sambuc fmovd %f4, %f48 650*84d9c625SLionel Sambuc 651*84d9c625SLionel Sambuc std %f4, [%o1] ! Store result 652*84d9c625SLionel Sambuc inc 8, %o1 653*84d9c625SLionel Sambuc 654*84d9c625SLionel Sambuc fmovd %f2, %f0 655*84d9c625SLionel Sambuc inc 8, %o0 656*84d9c625SLionel Sambuc ba,pt %xcc, 1b ! Not yet. 657*84d9c625SLionel Sambuc ldd [%o0], %f2 ! Load next part 658*84d9c625SLionel SambucLmemcpy_block_aligned64: 659*84d9c625SLionel Sambuc1: 660*84d9c625SLionel Sambuc 661*84d9c625SLionel Sambuc/* 662*84d9c625SLionel Sambuc * 64-byte aligned -- ready for block operations. 663*84d9c625SLionel Sambuc * 664*84d9c625SLionel Sambuc * Here we have the destination block aligned, but the 665*84d9c625SLionel Sambuc * source pointer may not be. Sub-word alignment will 666*84d9c625SLionel Sambuc * be handled by faligndata instructions. But the source 667*84d9c625SLionel Sambuc * can still be potentially aligned to 8 different words 668*84d9c625SLionel Sambuc * in our 64-bit block, so we have 8 different copy routines. 669*84d9c625SLionel Sambuc * 670*84d9c625SLionel Sambuc * Once we figure out our source alignment, we branch 671*84d9c625SLionel Sambuc * to the appropriate copy routine, which sets up the 672*84d9c625SLionel Sambuc * alignment for faligndata and loads (sets) the values 673*84d9c625SLionel Sambuc * into the source registers and does the copy loop. 674*84d9c625SLionel Sambuc * 675*84d9c625SLionel Sambuc * When were down to less than 1 block to store, we 676*84d9c625SLionel Sambuc * exit the copy loop and execute cleanup code. 677*84d9c625SLionel Sambuc * 678*84d9c625SLionel Sambuc * Block loads and stores are not properly interlocked. 679*84d9c625SLionel Sambuc * Stores save one reg/cycle, so you can start overwriting 680*84d9c625SLionel Sambuc * registers the cycle after the store is issued. 681*84d9c625SLionel Sambuc * 682*84d9c625SLionel Sambuc * Block loads require a block load to a different register 683*84d9c625SLionel Sambuc * block or a membar #Sync before accessing the loaded 684*84d9c625SLionel Sambuc * data. 685*84d9c625SLionel Sambuc * 686*84d9c625SLionel Sambuc * Since the faligndata instructions may be offset as far 687*84d9c625SLionel Sambuc * as 7 registers into a block (if you are shifting source 688*84d9c625SLionel Sambuc * 7 -> dest 0), you need 3 source register blocks for full 689*84d9c625SLionel Sambuc * performance: one you are copying, one you are loading, 690*84d9c625SLionel Sambuc * and one for interlocking. Otherwise, we would need to 691*84d9c625SLionel Sambuc * sprinkle the code with membar #Sync and lose the advantage 692*84d9c625SLionel Sambuc * of running faligndata in parallel with block stores. This 693*84d9c625SLionel Sambuc * means we are fetching a full 128 bytes ahead of the stores. 694*84d9c625SLionel Sambuc * We need to make sure the prefetch does not inadvertently 695*84d9c625SLionel Sambuc * cross a page boundary and fault on data that we will never 696*84d9c625SLionel Sambuc * store. 697*84d9c625SLionel Sambuc * 698*84d9c625SLionel Sambuc */ 699*84d9c625SLionel Sambuc#if 1 700*84d9c625SLionel Sambuc and %o0, BLOCK_ALIGN, %o3 701*84d9c625SLionel Sambuc srax %o3, 3, %o3 ! Isolate the offset 702*84d9c625SLionel Sambuc 703*84d9c625SLionel Sambuc brz %o3, L100 ! 0->0 704*84d9c625SLionel Sambuc btst 4, %o3 705*84d9c625SLionel Sambuc bnz %xcc, 4f 706*84d9c625SLionel Sambuc btst 2, %o3 707*84d9c625SLionel Sambuc bnz %xcc, 2f 708*84d9c625SLionel Sambuc btst 1, %o3 709*84d9c625SLionel Sambuc ba,pt %xcc, L101 ! 0->1 710*84d9c625SLionel Sambuc nop /* XXX spitfire bug */ 711*84d9c625SLionel Sambuc2: 712*84d9c625SLionel Sambuc bz %xcc, L102 ! 0->2 713*84d9c625SLionel Sambuc nop 714*84d9c625SLionel Sambuc ba,pt %xcc, L103 ! 0->3 715*84d9c625SLionel Sambuc nop /* XXX spitfire bug */ 716*84d9c625SLionel Sambuc4: 717*84d9c625SLionel Sambuc bnz %xcc, 2f 718*84d9c625SLionel Sambuc btst 1, %o3 719*84d9c625SLionel Sambuc bz %xcc, L104 ! 0->4 720*84d9c625SLionel Sambuc nop 721*84d9c625SLionel Sambuc ba,pt %xcc, L105 ! 0->5 722*84d9c625SLionel Sambuc nop /* XXX spitfire bug */ 723*84d9c625SLionel Sambuc2: 724*84d9c625SLionel Sambuc bz %xcc, L106 ! 0->6 725*84d9c625SLionel Sambuc nop 726*84d9c625SLionel Sambuc ba,pt %xcc, L107 ! 0->7 727*84d9c625SLionel Sambuc nop /* XXX spitfire bug */ 728*84d9c625SLionel Sambuc#else 729*84d9c625SLionel Sambuc 730*84d9c625SLionel Sambuc !! 731*84d9c625SLionel Sambuc !! Isolate the word offset, which just happens to be 732*84d9c625SLionel Sambuc !! the slot in our jump table. 733*84d9c625SLionel Sambuc !! 734*84d9c625SLionel Sambuc !! This is 6 insns, most of which cannot be paired, 735*84d9c625SLionel Sambuc !! which is about the same as the above version. 736*84d9c625SLionel Sambuc !! 737*84d9c625SLionel Sambuc rd %pc, %o4 738*84d9c625SLionel Sambuc1: 739*84d9c625SLionel Sambuc and %o0, 0x31, %o3 740*84d9c625SLionel Sambuc add %o3, (Lmemcpy_block_jmp - 1b), %o3 741*84d9c625SLionel Sambuc jmpl %o4 + %o3, %g0 742*84d9c625SLionel Sambuc nop 743*84d9c625SLionel Sambuc 744*84d9c625SLionel Sambuc !! 745*84d9c625SLionel Sambuc !! Jump table 746*84d9c625SLionel Sambuc !! 747*84d9c625SLionel Sambuc 748*84d9c625SLionel SambucLmemcpy_block_jmp: 749*84d9c625SLionel Sambuc ba,a,pt %xcc, L100 750*84d9c625SLionel Sambuc nop 751*84d9c625SLionel Sambuc ba,a,pt %xcc, L101 752*84d9c625SLionel Sambuc nop 753*84d9c625SLionel Sambuc ba,a,pt %xcc, L102 754*84d9c625SLionel Sambuc nop 755*84d9c625SLionel Sambuc ba,a,pt %xcc, L103 756*84d9c625SLionel Sambuc nop 757*84d9c625SLionel Sambuc ba,a,pt %xcc, L104 758*84d9c625SLionel Sambuc nop 759*84d9c625SLionel Sambuc ba,a,pt %xcc, L105 760*84d9c625SLionel Sambuc nop 761*84d9c625SLionel Sambuc ba,a,pt %xcc, L106 762*84d9c625SLionel Sambuc nop 763*84d9c625SLionel Sambuc ba,a,pt %xcc, L107 764*84d9c625SLionel Sambuc nop 765*84d9c625SLionel Sambuc#endif 766*84d9c625SLionel Sambuc 767*84d9c625SLionel Sambuc !! 768*84d9c625SLionel Sambuc !! Source is block aligned. 769*84d9c625SLionel Sambuc !! 770*84d9c625SLionel Sambuc !! Just load a block and go. 771*84d9c625SLionel Sambuc !! 772*84d9c625SLionel SambucL100: 773*84d9c625SLionel Sambuc#ifdef RETURN_NAME 774*84d9c625SLionel Sambuc sethi %hi(1f), %g1 775*84d9c625SLionel Sambuc ba,pt %icc, 2f 776*84d9c625SLionel Sambuc or %g1, %lo(1f), %g1 777*84d9c625SLionel Sambuc1: 778*84d9c625SLionel Sambuc .asciz "L100" 779*84d9c625SLionel Sambuc .align 8 780*84d9c625SLionel Sambuc2: 781*84d9c625SLionel Sambuc#endif 782*84d9c625SLionel Sambuc fmovd %f0 , %f62 783*84d9c625SLionel Sambuc ldda [%o0] ASI_BLK_P, %f0 784*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o0 785*84d9c625SLionel Sambuc cmp %o0, %o5 786*84d9c625SLionel Sambuc bleu,a,pn %icc, 3f 787*84d9c625SLionel Sambuc ldda [%o0] ASI_BLK_P, %f16 788*84d9c625SLionel Sambuc ba,pt %icc, 3f 789*84d9c625SLionel Sambuc membar #Sync 790*84d9c625SLionel Sambuc 791*84d9c625SLionel Sambuc .align 32 ! ICache align. 792*84d9c625SLionel Sambuc3: 793*84d9c625SLionel Sambuc faligndata %f62, %f0, %f32 794*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o0 795*84d9c625SLionel Sambuc faligndata %f0, %f2, %f34 796*84d9c625SLionel Sambuc dec BLOCK_SIZE, %o2 797*84d9c625SLionel Sambuc faligndata %f2, %f4, %f36 798*84d9c625SLionel Sambuc cmp %o0, %o5 799*84d9c625SLionel Sambuc faligndata %f4, %f6, %f38 800*84d9c625SLionel Sambuc faligndata %f6, %f8, %f40 801*84d9c625SLionel Sambuc faligndata %f8, %f10, %f42 802*84d9c625SLionel Sambuc faligndata %f10, %f12, %f44 803*84d9c625SLionel Sambuc brlez,pn %o2, Lmemcpy_blockdone 804*84d9c625SLionel Sambuc faligndata %f12, %f14, %f46 805*84d9c625SLionel Sambuc 806*84d9c625SLionel Sambuc bleu,a,pn %icc, 2f 807*84d9c625SLionel Sambuc ldda [%o0] ASI_BLK_P, %f48 808*84d9c625SLionel Sambuc membar #Sync 809*84d9c625SLionel Sambuc2: 810*84d9c625SLionel Sambuc stda %f32, [%o1] ASI_STORE 811*84d9c625SLionel Sambuc faligndata %f14, %f16, %f32 812*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o0 813*84d9c625SLionel Sambuc faligndata %f16, %f18, %f34 814*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o1 815*84d9c625SLionel Sambuc faligndata %f18, %f20, %f36 816*84d9c625SLionel Sambuc dec BLOCK_SIZE, %o2 817*84d9c625SLionel Sambuc faligndata %f20, %f22, %f38 818*84d9c625SLionel Sambuc cmp %o0, %o5 819*84d9c625SLionel Sambuc faligndata %f22, %f24, %f40 820*84d9c625SLionel Sambuc faligndata %f24, %f26, %f42 821*84d9c625SLionel Sambuc faligndata %f26, %f28, %f44 822*84d9c625SLionel Sambuc brlez,pn %o2, Lmemcpy_blockdone 823*84d9c625SLionel Sambuc faligndata %f28, %f30, %f46 824*84d9c625SLionel Sambuc 825*84d9c625SLionel Sambuc bleu,a,pn %icc, 2f 826*84d9c625SLionel Sambuc ldda [%o0] ASI_BLK_P, %f0 827*84d9c625SLionel Sambuc membar #Sync 828*84d9c625SLionel Sambuc2: 829*84d9c625SLionel Sambuc stda %f32, [%o1] ASI_STORE 830*84d9c625SLionel Sambuc faligndata %f30, %f48, %f32 831*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o0 832*84d9c625SLionel Sambuc faligndata %f48, %f50, %f34 833*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o1 834*84d9c625SLionel Sambuc faligndata %f50, %f52, %f36 835*84d9c625SLionel Sambuc dec BLOCK_SIZE, %o2 836*84d9c625SLionel Sambuc faligndata %f52, %f54, %f38 837*84d9c625SLionel Sambuc cmp %o0, %o5 838*84d9c625SLionel Sambuc faligndata %f54, %f56, %f40 839*84d9c625SLionel Sambuc faligndata %f56, %f58, %f42 840*84d9c625SLionel Sambuc faligndata %f58, %f60, %f44 841*84d9c625SLionel Sambuc brlez,pn %o2, Lmemcpy_blockdone 842*84d9c625SLionel Sambuc faligndata %f60, %f62, %f46 843*84d9c625SLionel Sambuc bleu,a,pn %icc, 2f 844*84d9c625SLionel Sambuc ldda [%o0] ASI_BLK_P, %f16 ! Increment is at top 845*84d9c625SLionel Sambuc membar #Sync 846*84d9c625SLionel Sambuc2: 847*84d9c625SLionel Sambuc stda %f32, [%o1] ASI_STORE 848*84d9c625SLionel Sambuc ba 3b 849*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o1 850*84d9c625SLionel Sambuc 851*84d9c625SLionel Sambuc !! 852*84d9c625SLionel Sambuc !! Source at BLOCK_ALIGN+8 853*84d9c625SLionel Sambuc !! 854*84d9c625SLionel Sambuc !! We need to load almost 1 complete block by hand. 855*84d9c625SLionel Sambuc !! 856*84d9c625SLionel SambucL101: 857*84d9c625SLionel Sambuc#ifdef RETURN_NAME 858*84d9c625SLionel Sambuc sethi %hi(1f), %g1 859*84d9c625SLionel Sambuc ba,pt %icc, 2f 860*84d9c625SLionel Sambuc or %g1, %lo(1f), %g1 861*84d9c625SLionel Sambuc1: 862*84d9c625SLionel Sambuc .asciz "L101" 863*84d9c625SLionel Sambuc .align 8 864*84d9c625SLionel Sambuc2: 865*84d9c625SLionel Sambuc#endif 866*84d9c625SLionel Sambuc! fmovd %f0, %f0 ! Hoist fmovd 867*84d9c625SLionel Sambuc ldd [%o0], %f2 868*84d9c625SLionel Sambuc inc 8, %o0 869*84d9c625SLionel Sambuc ldd [%o0], %f4 870*84d9c625SLionel Sambuc inc 8, %o0 871*84d9c625SLionel Sambuc ldd [%o0], %f6 872*84d9c625SLionel Sambuc inc 8, %o0 873*84d9c625SLionel Sambuc ldd [%o0], %f8 874*84d9c625SLionel Sambuc inc 8, %o0 875*84d9c625SLionel Sambuc ldd [%o0], %f10 876*84d9c625SLionel Sambuc inc 8, %o0 877*84d9c625SLionel Sambuc ldd [%o0], %f12 878*84d9c625SLionel Sambuc inc 8, %o0 879*84d9c625SLionel Sambuc ldd [%o0], %f14 880*84d9c625SLionel Sambuc inc 8, %o0 881*84d9c625SLionel Sambuc 882*84d9c625SLionel Sambuc cmp %o0, %o5 883*84d9c625SLionel Sambuc bleu,a,pn %icc, 3f 884*84d9c625SLionel Sambuc ldda [%o0] ASI_BLK_P, %f16 885*84d9c625SLionel Sambuc membar #Sync 886*84d9c625SLionel Sambuc3: 887*84d9c625SLionel Sambuc faligndata %f0, %f2, %f32 888*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o0 889*84d9c625SLionel Sambuc faligndata %f2, %f4, %f34 890*84d9c625SLionel Sambuc cmp %o0, %o5 891*84d9c625SLionel Sambuc faligndata %f4, %f6, %f36 892*84d9c625SLionel Sambuc dec BLOCK_SIZE, %o2 893*84d9c625SLionel Sambuc faligndata %f6, %f8, %f38 894*84d9c625SLionel Sambuc faligndata %f8, %f10, %f40 895*84d9c625SLionel Sambuc faligndata %f10, %f12, %f42 896*84d9c625SLionel Sambuc faligndata %f12, %f14, %f44 897*84d9c625SLionel Sambuc bleu,a,pn %icc, 2f 898*84d9c625SLionel Sambuc ldda [%o0] ASI_BLK_P, %f48 899*84d9c625SLionel Sambuc membar #Sync 900*84d9c625SLionel Sambuc2: 901*84d9c625SLionel Sambuc brlez,pn %o2, Lmemcpy_blockdone 902*84d9c625SLionel Sambuc faligndata %f14, %f16, %f46 903*84d9c625SLionel Sambuc 904*84d9c625SLionel Sambuc stda %f32, [%o1] ASI_STORE 905*84d9c625SLionel Sambuc 906*84d9c625SLionel Sambuc faligndata %f16, %f18, %f32 907*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o0 908*84d9c625SLionel Sambuc faligndata %f18, %f20, %f34 909*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o1 910*84d9c625SLionel Sambuc faligndata %f20, %f22, %f36 911*84d9c625SLionel Sambuc cmp %o0, %o5 912*84d9c625SLionel Sambuc faligndata %f22, %f24, %f38 913*84d9c625SLionel Sambuc dec BLOCK_SIZE, %o2 914*84d9c625SLionel Sambuc faligndata %f24, %f26, %f40 915*84d9c625SLionel Sambuc faligndata %f26, %f28, %f42 916*84d9c625SLionel Sambuc faligndata %f28, %f30, %f44 917*84d9c625SLionel Sambuc bleu,a,pn %icc, 2f 918*84d9c625SLionel Sambuc ldda [%o0] ASI_BLK_P, %f0 919*84d9c625SLionel Sambuc membar #Sync 920*84d9c625SLionel Sambuc2: 921*84d9c625SLionel Sambuc brlez,pn %o2, Lmemcpy_blockdone 922*84d9c625SLionel Sambuc faligndata %f30, %f48, %f46 923*84d9c625SLionel Sambuc 924*84d9c625SLionel Sambuc stda %f32, [%o1] ASI_STORE 925*84d9c625SLionel Sambuc 926*84d9c625SLionel Sambuc faligndata %f48, %f50, %f32 927*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o0 928*84d9c625SLionel Sambuc faligndata %f50, %f52, %f34 929*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o1 930*84d9c625SLionel Sambuc faligndata %f52, %f54, %f36 931*84d9c625SLionel Sambuc cmp %o0, %o5 932*84d9c625SLionel Sambuc faligndata %f54, %f56, %f38 933*84d9c625SLionel Sambuc dec BLOCK_SIZE, %o2 934*84d9c625SLionel Sambuc faligndata %f56, %f58, %f40 935*84d9c625SLionel Sambuc faligndata %f58, %f60, %f42 936*84d9c625SLionel Sambuc faligndata %f60, %f62, %f44 937*84d9c625SLionel Sambuc bleu,a,pn %icc, 2f 938*84d9c625SLionel Sambuc ldda [%o0] ASI_BLK_P, %f16 939*84d9c625SLionel Sambuc membar #Sync 940*84d9c625SLionel Sambuc2: 941*84d9c625SLionel Sambuc brlez,pn %o2, Lmemcpy_blockdone 942*84d9c625SLionel Sambuc faligndata %f62, %f0, %f46 943*84d9c625SLionel Sambuc 944*84d9c625SLionel Sambuc stda %f32, [%o1] ASI_STORE 945*84d9c625SLionel Sambuc ba 3b 946*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o1 947*84d9c625SLionel Sambuc 948*84d9c625SLionel Sambuc !! 949*84d9c625SLionel Sambuc !! Source at BLOCK_ALIGN+16 950*84d9c625SLionel Sambuc !! 951*84d9c625SLionel Sambuc !! We need to load 6 doubles by hand. 952*84d9c625SLionel Sambuc !! 953*84d9c625SLionel SambucL102: 954*84d9c625SLionel Sambuc#ifdef RETURN_NAME 955*84d9c625SLionel Sambuc sethi %hi(1f), %g1 956*84d9c625SLionel Sambuc ba,pt %icc, 2f 957*84d9c625SLionel Sambuc or %g1, %lo(1f), %g1 958*84d9c625SLionel Sambuc1: 959*84d9c625SLionel Sambuc .asciz "L102" 960*84d9c625SLionel Sambuc .align 8 961*84d9c625SLionel Sambuc2: 962*84d9c625SLionel Sambuc#endif 963*84d9c625SLionel Sambuc ldd [%o0], %f4 964*84d9c625SLionel Sambuc inc 8, %o0 965*84d9c625SLionel Sambuc fmovd %f0, %f2 ! Hoist fmovd 966*84d9c625SLionel Sambuc ldd [%o0], %f6 967*84d9c625SLionel Sambuc inc 8, %o0 968*84d9c625SLionel Sambuc 969*84d9c625SLionel Sambuc ldd [%o0], %f8 970*84d9c625SLionel Sambuc inc 8, %o0 971*84d9c625SLionel Sambuc ldd [%o0], %f10 972*84d9c625SLionel Sambuc inc 8, %o0 973*84d9c625SLionel Sambuc ldd [%o0], %f12 974*84d9c625SLionel Sambuc inc 8, %o0 975*84d9c625SLionel Sambuc ldd [%o0], %f14 976*84d9c625SLionel Sambuc inc 8, %o0 977*84d9c625SLionel Sambuc 978*84d9c625SLionel Sambuc cmp %o0, %o5 979*84d9c625SLionel Sambuc bleu,a,pn %icc, 3f 980*84d9c625SLionel Sambuc ldda [%o0] ASI_BLK_P, %f16 981*84d9c625SLionel Sambuc membar #Sync 982*84d9c625SLionel Sambuc3: 983*84d9c625SLionel Sambuc faligndata %f2, %f4, %f32 984*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o0 985*84d9c625SLionel Sambuc faligndata %f4, %f6, %f34 986*84d9c625SLionel Sambuc cmp %o0, %o5 987*84d9c625SLionel Sambuc faligndata %f6, %f8, %f36 988*84d9c625SLionel Sambuc dec BLOCK_SIZE, %o2 989*84d9c625SLionel Sambuc faligndata %f8, %f10, %f38 990*84d9c625SLionel Sambuc faligndata %f10, %f12, %f40 991*84d9c625SLionel Sambuc faligndata %f12, %f14, %f42 992*84d9c625SLionel Sambuc bleu,a,pn %icc, 2f 993*84d9c625SLionel Sambuc ldda [%o0] ASI_BLK_P, %f48 994*84d9c625SLionel Sambuc membar #Sync 995*84d9c625SLionel Sambuc2: 996*84d9c625SLionel Sambuc faligndata %f14, %f16, %f44 997*84d9c625SLionel Sambuc 998*84d9c625SLionel Sambuc brlez,pn %o2, Lmemcpy_blockdone 999*84d9c625SLionel Sambuc faligndata %f16, %f18, %f46 1000*84d9c625SLionel Sambuc 1001*84d9c625SLionel Sambuc stda %f32, [%o1] ASI_STORE 1002*84d9c625SLionel Sambuc 1003*84d9c625SLionel Sambuc faligndata %f18, %f20, %f32 1004*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o0 1005*84d9c625SLionel Sambuc faligndata %f20, %f22, %f34 1006*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o1 1007*84d9c625SLionel Sambuc faligndata %f22, %f24, %f36 1008*84d9c625SLionel Sambuc cmp %o0, %o5 1009*84d9c625SLionel Sambuc faligndata %f24, %f26, %f38 1010*84d9c625SLionel Sambuc dec BLOCK_SIZE, %o2 1011*84d9c625SLionel Sambuc faligndata %f26, %f28, %f40 1012*84d9c625SLionel Sambuc faligndata %f28, %f30, %f42 1013*84d9c625SLionel Sambuc bleu,a,pn %icc, 2f 1014*84d9c625SLionel Sambuc ldda [%o0] ASI_BLK_P, %f0 1015*84d9c625SLionel Sambuc membar #Sync 1016*84d9c625SLionel Sambuc2: 1017*84d9c625SLionel Sambuc faligndata %f30, %f48, %f44 1018*84d9c625SLionel Sambuc brlez,pn %o2, Lmemcpy_blockdone 1019*84d9c625SLionel Sambuc faligndata %f48, %f50, %f46 1020*84d9c625SLionel Sambuc 1021*84d9c625SLionel Sambuc stda %f32, [%o1] ASI_STORE 1022*84d9c625SLionel Sambuc 1023*84d9c625SLionel Sambuc faligndata %f50, %f52, %f32 1024*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o0 1025*84d9c625SLionel Sambuc faligndata %f52, %f54, %f34 1026*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o1 1027*84d9c625SLionel Sambuc faligndata %f54, %f56, %f36 1028*84d9c625SLionel Sambuc cmp %o0, %o5 1029*84d9c625SLionel Sambuc faligndata %f56, %f58, %f38 1030*84d9c625SLionel Sambuc dec BLOCK_SIZE, %o2 1031*84d9c625SLionel Sambuc faligndata %f58, %f60, %f40 1032*84d9c625SLionel Sambuc faligndata %f60, %f62, %f42 1033*84d9c625SLionel Sambuc bleu,a,pn %icc, 2f 1034*84d9c625SLionel Sambuc ldda [%o0] ASI_BLK_P, %f16 1035*84d9c625SLionel Sambuc membar #Sync 1036*84d9c625SLionel Sambuc2: 1037*84d9c625SLionel Sambuc faligndata %f62, %f0, %f44 1038*84d9c625SLionel Sambuc brlez,pn %o2, Lmemcpy_blockdone 1039*84d9c625SLionel Sambuc faligndata %f0, %f2, %f46 1040*84d9c625SLionel Sambuc 1041*84d9c625SLionel Sambuc stda %f32, [%o1] ASI_STORE 1042*84d9c625SLionel Sambuc ba 3b 1043*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o1 1044*84d9c625SLionel Sambuc 1045*84d9c625SLionel Sambuc !! 1046*84d9c625SLionel Sambuc !! Source at BLOCK_ALIGN+24 1047*84d9c625SLionel Sambuc !! 1048*84d9c625SLionel Sambuc !! We need to load 5 doubles by hand. 1049*84d9c625SLionel Sambuc !! 1050*84d9c625SLionel SambucL103: 1051*84d9c625SLionel Sambuc#ifdef RETURN_NAME 1052*84d9c625SLionel Sambuc sethi %hi(1f), %g1 1053*84d9c625SLionel Sambuc ba,pt %icc, 2f 1054*84d9c625SLionel Sambuc or %g1, %lo(1f), %g1 1055*84d9c625SLionel Sambuc1: 1056*84d9c625SLionel Sambuc .asciz "L103" 1057*84d9c625SLionel Sambuc .align 8 1058*84d9c625SLionel Sambuc2: 1059*84d9c625SLionel Sambuc#endif 1060*84d9c625SLionel Sambuc fmovd %f0, %f4 1061*84d9c625SLionel Sambuc ldd [%o0], %f6 1062*84d9c625SLionel Sambuc inc 8, %o0 1063*84d9c625SLionel Sambuc ldd [%o0], %f8 1064*84d9c625SLionel Sambuc inc 8, %o0 1065*84d9c625SLionel Sambuc ldd [%o0], %f10 1066*84d9c625SLionel Sambuc inc 8, %o0 1067*84d9c625SLionel Sambuc ldd [%o0], %f12 1068*84d9c625SLionel Sambuc inc 8, %o0 1069*84d9c625SLionel Sambuc ldd [%o0], %f14 1070*84d9c625SLionel Sambuc inc 8, %o0 1071*84d9c625SLionel Sambuc 1072*84d9c625SLionel Sambuc cmp %o0, %o5 1073*84d9c625SLionel Sambuc bleu,a,pn %icc, 2f 1074*84d9c625SLionel Sambuc ldda [%o0] ASI_BLK_P, %f16 1075*84d9c625SLionel Sambuc membar #Sync 1076*84d9c625SLionel Sambuc2: 1077*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o0 1078*84d9c625SLionel Sambuc3: 1079*84d9c625SLionel Sambuc faligndata %f4, %f6, %f32 1080*84d9c625SLionel Sambuc cmp %o0, %o5 1081*84d9c625SLionel Sambuc faligndata %f6, %f8, %f34 1082*84d9c625SLionel Sambuc dec BLOCK_SIZE, %o2 1083*84d9c625SLionel Sambuc faligndata %f8, %f10, %f36 1084*84d9c625SLionel Sambuc faligndata %f10, %f12, %f38 1085*84d9c625SLionel Sambuc faligndata %f12, %f14, %f40 1086*84d9c625SLionel Sambuc bleu,a,pn %icc, 2f 1087*84d9c625SLionel Sambuc ldda [%o0] ASI_BLK_P, %f48 1088*84d9c625SLionel Sambuc membar #Sync 1089*84d9c625SLionel Sambuc2: 1090*84d9c625SLionel Sambuc faligndata %f14, %f16, %f42 1091*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o0 1092*84d9c625SLionel Sambuc faligndata %f16, %f18, %f44 1093*84d9c625SLionel Sambuc brlez,pn %o2, Lmemcpy_blockdone 1094*84d9c625SLionel Sambuc faligndata %f18, %f20, %f46 1095*84d9c625SLionel Sambuc 1096*84d9c625SLionel Sambuc stda %f32, [%o1] ASI_STORE 1097*84d9c625SLionel Sambuc 1098*84d9c625SLionel Sambuc faligndata %f20, %f22, %f32 1099*84d9c625SLionel Sambuc cmp %o0, %o5 1100*84d9c625SLionel Sambuc faligndata %f22, %f24, %f34 1101*84d9c625SLionel Sambuc dec BLOCK_SIZE, %o2 1102*84d9c625SLionel Sambuc faligndata %f24, %f26, %f36 1103*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o1 1104*84d9c625SLionel Sambuc faligndata %f26, %f28, %f38 1105*84d9c625SLionel Sambuc faligndata %f28, %f30, %f40 1106*84d9c625SLionel Sambuc ble,a,pn %icc, 2f 1107*84d9c625SLionel Sambuc ldda [%o0] ASI_BLK_P, %f0 1108*84d9c625SLionel Sambuc membar #Sync 1109*84d9c625SLionel Sambuc2: 1110*84d9c625SLionel Sambuc faligndata %f30, %f48, %f42 1111*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o0 1112*84d9c625SLionel Sambuc faligndata %f48, %f50, %f44 1113*84d9c625SLionel Sambuc brlez,pn %o2, Lmemcpy_blockdone 1114*84d9c625SLionel Sambuc faligndata %f50, %f52, %f46 1115*84d9c625SLionel Sambuc 1116*84d9c625SLionel Sambuc stda %f32, [%o1] ASI_STORE 1117*84d9c625SLionel Sambuc 1118*84d9c625SLionel Sambuc faligndata %f52, %f54, %f32 1119*84d9c625SLionel Sambuc cmp %o0, %o5 1120*84d9c625SLionel Sambuc faligndata %f54, %f56, %f34 1121*84d9c625SLionel Sambuc dec BLOCK_SIZE, %o2 1122*84d9c625SLionel Sambuc faligndata %f56, %f58, %f36 1123*84d9c625SLionel Sambuc faligndata %f58, %f60, %f38 1124*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o1 1125*84d9c625SLionel Sambuc faligndata %f60, %f62, %f40 1126*84d9c625SLionel Sambuc bleu,a,pn %icc, 2f 1127*84d9c625SLionel Sambuc ldda [%o0] ASI_BLK_P, %f16 1128*84d9c625SLionel Sambuc membar #Sync 1129*84d9c625SLionel Sambuc2: 1130*84d9c625SLionel Sambuc faligndata %f62, %f0, %f42 1131*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o0 1132*84d9c625SLionel Sambuc faligndata %f0, %f2, %f44 1133*84d9c625SLionel Sambuc brlez,pn %o2, Lmemcpy_blockdone 1134*84d9c625SLionel Sambuc faligndata %f2, %f4, %f46 1135*84d9c625SLionel Sambuc 1136*84d9c625SLionel Sambuc stda %f32, [%o1] ASI_STORE 1137*84d9c625SLionel Sambuc ba 3b 1138*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o1 1139*84d9c625SLionel Sambuc 1140*84d9c625SLionel Sambuc !! 1141*84d9c625SLionel Sambuc !! Source at BLOCK_ALIGN+32 1142*84d9c625SLionel Sambuc !! 1143*84d9c625SLionel Sambuc !! We need to load 4 doubles by hand. 1144*84d9c625SLionel Sambuc !! 1145*84d9c625SLionel SambucL104: 1146*84d9c625SLionel Sambuc#ifdef RETURN_NAME 1147*84d9c625SLionel Sambuc sethi %hi(1f), %g1 1148*84d9c625SLionel Sambuc ba,pt %icc, 2f 1149*84d9c625SLionel Sambuc or %g1, %lo(1f), %g1 1150*84d9c625SLionel Sambuc1: 1151*84d9c625SLionel Sambuc .asciz "L104" 1152*84d9c625SLionel Sambuc .align 8 1153*84d9c625SLionel Sambuc2: 1154*84d9c625SLionel Sambuc#endif 1155*84d9c625SLionel Sambuc fmovd %f0, %f6 1156*84d9c625SLionel Sambuc ldd [%o0], %f8 1157*84d9c625SLionel Sambuc inc 8, %o0 1158*84d9c625SLionel Sambuc ldd [%o0], %f10 1159*84d9c625SLionel Sambuc inc 8, %o0 1160*84d9c625SLionel Sambuc ldd [%o0], %f12 1161*84d9c625SLionel Sambuc inc 8, %o0 1162*84d9c625SLionel Sambuc ldd [%o0], %f14 1163*84d9c625SLionel Sambuc inc 8, %o0 1164*84d9c625SLionel Sambuc 1165*84d9c625SLionel Sambuc cmp %o0, %o5 1166*84d9c625SLionel Sambuc bleu,a,pn %icc, 2f 1167*84d9c625SLionel Sambuc ldda [%o0] ASI_BLK_P, %f16 1168*84d9c625SLionel Sambuc membar #Sync 1169*84d9c625SLionel Sambuc2: 1170*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o0 1171*84d9c625SLionel Sambuc3: 1172*84d9c625SLionel Sambuc faligndata %f6, %f8, %f32 1173*84d9c625SLionel Sambuc cmp %o0, %o5 1174*84d9c625SLionel Sambuc faligndata %f8, %f10, %f34 1175*84d9c625SLionel Sambuc dec BLOCK_SIZE, %o2 1176*84d9c625SLionel Sambuc faligndata %f10, %f12, %f36 1177*84d9c625SLionel Sambuc faligndata %f12, %f14, %f38 1178*84d9c625SLionel Sambuc bleu,a,pn %icc, 2f 1179*84d9c625SLionel Sambuc ldda [%o0] ASI_BLK_P, %f48 1180*84d9c625SLionel Sambuc membar #Sync 1181*84d9c625SLionel Sambuc2: 1182*84d9c625SLionel Sambuc faligndata %f14, %f16, %f40 1183*84d9c625SLionel Sambuc faligndata %f16, %f18, %f42 1184*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o0 1185*84d9c625SLionel Sambuc faligndata %f18, %f20, %f44 1186*84d9c625SLionel Sambuc brlez,pn %o2, Lmemcpy_blockdone 1187*84d9c625SLionel Sambuc faligndata %f20, %f22, %f46 1188*84d9c625SLionel Sambuc 1189*84d9c625SLionel Sambuc stda %f32, [%o1] ASI_STORE 1190*84d9c625SLionel Sambuc 1191*84d9c625SLionel Sambuc faligndata %f22, %f24, %f32 1192*84d9c625SLionel Sambuc cmp %o0, %o5 1193*84d9c625SLionel Sambuc faligndata %f24, %f26, %f34 1194*84d9c625SLionel Sambuc faligndata %f26, %f28, %f36 1195*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o1 1196*84d9c625SLionel Sambuc faligndata %f28, %f30, %f38 1197*84d9c625SLionel Sambuc bleu,a,pn %icc, 2f 1198*84d9c625SLionel Sambuc ldda [%o0] ASI_BLK_P, %f0 1199*84d9c625SLionel Sambuc membar #Sync 1200*84d9c625SLionel Sambuc2: 1201*84d9c625SLionel Sambuc faligndata %f30, %f48, %f40 1202*84d9c625SLionel Sambuc dec BLOCK_SIZE, %o2 1203*84d9c625SLionel Sambuc faligndata %f48, %f50, %f42 1204*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o0 1205*84d9c625SLionel Sambuc faligndata %f50, %f52, %f44 1206*84d9c625SLionel Sambuc brlez,pn %o2, Lmemcpy_blockdone 1207*84d9c625SLionel Sambuc faligndata %f52, %f54, %f46 1208*84d9c625SLionel Sambuc 1209*84d9c625SLionel Sambuc stda %f32, [%o1] ASI_STORE 1210*84d9c625SLionel Sambuc 1211*84d9c625SLionel Sambuc faligndata %f54, %f56, %f32 1212*84d9c625SLionel Sambuc cmp %o0, %o5 1213*84d9c625SLionel Sambuc faligndata %f56, %f58, %f34 1214*84d9c625SLionel Sambuc faligndata %f58, %f60, %f36 1215*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o1 1216*84d9c625SLionel Sambuc faligndata %f60, %f62, %f38 1217*84d9c625SLionel Sambuc bleu,a,pn %icc, 2f 1218*84d9c625SLionel Sambuc ldda [%o0] ASI_BLK_P, %f16 1219*84d9c625SLionel Sambuc membar #Sync 1220*84d9c625SLionel Sambuc2: 1221*84d9c625SLionel Sambuc faligndata %f62, %f0, %f40 1222*84d9c625SLionel Sambuc dec BLOCK_SIZE, %o2 1223*84d9c625SLionel Sambuc faligndata %f0, %f2, %f42 1224*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o0 1225*84d9c625SLionel Sambuc faligndata %f2, %f4, %f44 1226*84d9c625SLionel Sambuc brlez,pn %o2, Lmemcpy_blockdone 1227*84d9c625SLionel Sambuc faligndata %f4, %f6, %f46 1228*84d9c625SLionel Sambuc 1229*84d9c625SLionel Sambuc stda %f32, [%o1] ASI_STORE 1230*84d9c625SLionel Sambuc ba 3b 1231*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o1 1232*84d9c625SLionel Sambuc 1233*84d9c625SLionel Sambuc !! 1234*84d9c625SLionel Sambuc !! Source at BLOCK_ALIGN+40 1235*84d9c625SLionel Sambuc !! 1236*84d9c625SLionel Sambuc !! We need to load 3 doubles by hand. 1237*84d9c625SLionel Sambuc !! 1238*84d9c625SLionel SambucL105: 1239*84d9c625SLionel Sambuc#ifdef RETURN_NAME 1240*84d9c625SLionel Sambuc sethi %hi(1f), %g1 1241*84d9c625SLionel Sambuc ba,pt %icc, 2f 1242*84d9c625SLionel Sambuc or %g1, %lo(1f), %g1 1243*84d9c625SLionel Sambuc1: 1244*84d9c625SLionel Sambuc .asciz "L105" 1245*84d9c625SLionel Sambuc .align 8 1246*84d9c625SLionel Sambuc2: 1247*84d9c625SLionel Sambuc#endif 1248*84d9c625SLionel Sambuc fmovd %f0, %f8 1249*84d9c625SLionel Sambuc ldd [%o0], %f10 1250*84d9c625SLionel Sambuc inc 8, %o0 1251*84d9c625SLionel Sambuc ldd [%o0], %f12 1252*84d9c625SLionel Sambuc inc 8, %o0 1253*84d9c625SLionel Sambuc ldd [%o0], %f14 1254*84d9c625SLionel Sambuc inc 8, %o0 1255*84d9c625SLionel Sambuc 1256*84d9c625SLionel Sambuc cmp %o0, %o5 1257*84d9c625SLionel Sambuc bleu,a,pn %icc, 2f 1258*84d9c625SLionel Sambuc ldda [%o0] ASI_BLK_P, %f16 1259*84d9c625SLionel Sambuc membar #Sync 1260*84d9c625SLionel Sambuc2: 1261*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o0 1262*84d9c625SLionel Sambuc3: 1263*84d9c625SLionel Sambuc faligndata %f8, %f10, %f32 1264*84d9c625SLionel Sambuc cmp %o0, %o5 1265*84d9c625SLionel Sambuc faligndata %f10, %f12, %f34 1266*84d9c625SLionel Sambuc faligndata %f12, %f14, %f36 1267*84d9c625SLionel Sambuc bleu,a,pn %icc, 2f 1268*84d9c625SLionel Sambuc ldda [%o0] ASI_BLK_P, %f48 1269*84d9c625SLionel Sambuc membar #Sync 1270*84d9c625SLionel Sambuc2: 1271*84d9c625SLionel Sambuc faligndata %f14, %f16, %f38 1272*84d9c625SLionel Sambuc dec BLOCK_SIZE, %o2 1273*84d9c625SLionel Sambuc faligndata %f16, %f18, %f40 1274*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o0 1275*84d9c625SLionel Sambuc faligndata %f18, %f20, %f42 1276*84d9c625SLionel Sambuc faligndata %f20, %f22, %f44 1277*84d9c625SLionel Sambuc brlez,pn %o2, Lmemcpy_blockdone 1278*84d9c625SLionel Sambuc faligndata %f22, %f24, %f46 1279*84d9c625SLionel Sambuc 1280*84d9c625SLionel Sambuc stda %f32, [%o1] ASI_STORE 1281*84d9c625SLionel Sambuc 1282*84d9c625SLionel Sambuc faligndata %f24, %f26, %f32 1283*84d9c625SLionel Sambuc cmp %o0, %o5 1284*84d9c625SLionel Sambuc faligndata %f26, %f28, %f34 1285*84d9c625SLionel Sambuc dec BLOCK_SIZE, %o2 1286*84d9c625SLionel Sambuc faligndata %f28, %f30, %f36 1287*84d9c625SLionel Sambuc bleu,a,pn %icc, 2f 1288*84d9c625SLionel Sambuc ldda [%o0] ASI_BLK_P, %f0 1289*84d9c625SLionel Sambuc membar #Sync 1290*84d9c625SLionel Sambuc2: 1291*84d9c625SLionel Sambuc faligndata %f30, %f48, %f38 1292*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o1 1293*84d9c625SLionel Sambuc faligndata %f48, %f50, %f40 1294*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o0 1295*84d9c625SLionel Sambuc faligndata %f50, %f52, %f42 1296*84d9c625SLionel Sambuc faligndata %f52, %f54, %f44 1297*84d9c625SLionel Sambuc brlez,pn %o2, Lmemcpy_blockdone 1298*84d9c625SLionel Sambuc faligndata %f54, %f56, %f46 1299*84d9c625SLionel Sambuc 1300*84d9c625SLionel Sambuc stda %f32, [%o1] ASI_STORE 1301*84d9c625SLionel Sambuc 1302*84d9c625SLionel Sambuc faligndata %f56, %f58, %f32 1303*84d9c625SLionel Sambuc cmp %o0, %o5 1304*84d9c625SLionel Sambuc faligndata %f58, %f60, %f34 1305*84d9c625SLionel Sambuc dec BLOCK_SIZE, %o2 1306*84d9c625SLionel Sambuc faligndata %f60, %f62, %f36 1307*84d9c625SLionel Sambuc bleu,a,pn %icc, 2f 1308*84d9c625SLionel Sambuc ldda [%o0] ASI_BLK_P, %f16 1309*84d9c625SLionel Sambuc membar #Sync 1310*84d9c625SLionel Sambuc2: 1311*84d9c625SLionel Sambuc faligndata %f62, %f0, %f38 1312*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o1 1313*84d9c625SLionel Sambuc faligndata %f0, %f2, %f40 1314*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o0 1315*84d9c625SLionel Sambuc faligndata %f2, %f4, %f42 1316*84d9c625SLionel Sambuc faligndata %f4, %f6, %f44 1317*84d9c625SLionel Sambuc brlez,pn %o2, Lmemcpy_blockdone 1318*84d9c625SLionel Sambuc faligndata %f6, %f8, %f46 1319*84d9c625SLionel Sambuc 1320*84d9c625SLionel Sambuc stda %f32, [%o1] ASI_STORE 1321*84d9c625SLionel Sambuc ba 3b 1322*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o1 1323*84d9c625SLionel Sambuc 1324*84d9c625SLionel Sambuc 1325*84d9c625SLionel Sambuc !! 1326*84d9c625SLionel Sambuc !! Source at BLOCK_ALIGN+48 1327*84d9c625SLionel Sambuc !! 1328*84d9c625SLionel Sambuc !! We need to load 2 doubles by hand. 1329*84d9c625SLionel Sambuc !! 1330*84d9c625SLionel SambucL106: 1331*84d9c625SLionel Sambuc#ifdef RETURN_NAME 1332*84d9c625SLionel Sambuc sethi %hi(1f), %g1 1333*84d9c625SLionel Sambuc ba,pt %icc, 2f 1334*84d9c625SLionel Sambuc or %g1, %lo(1f), %g1 1335*84d9c625SLionel Sambuc1: 1336*84d9c625SLionel Sambuc .asciz "L106" 1337*84d9c625SLionel Sambuc .align 8 1338*84d9c625SLionel Sambuc2: 1339*84d9c625SLionel Sambuc#endif 1340*84d9c625SLionel Sambuc fmovd %f0, %f10 1341*84d9c625SLionel Sambuc ldd [%o0], %f12 1342*84d9c625SLionel Sambuc inc 8, %o0 1343*84d9c625SLionel Sambuc ldd [%o0], %f14 1344*84d9c625SLionel Sambuc inc 8, %o0 1345*84d9c625SLionel Sambuc 1346*84d9c625SLionel Sambuc cmp %o0, %o5 1347*84d9c625SLionel Sambuc bleu,a,pn %icc, 2f 1348*84d9c625SLionel Sambuc ldda [%o0] ASI_BLK_P, %f16 1349*84d9c625SLionel Sambuc membar #Sync 1350*84d9c625SLionel Sambuc2: 1351*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o0 1352*84d9c625SLionel Sambuc3: 1353*84d9c625SLionel Sambuc faligndata %f10, %f12, %f32 1354*84d9c625SLionel Sambuc cmp %o0, %o5 1355*84d9c625SLionel Sambuc faligndata %f12, %f14, %f34 1356*84d9c625SLionel Sambuc bleu,a,pn %icc, 2f 1357*84d9c625SLionel Sambuc ldda [%o0] ASI_BLK_P, %f48 1358*84d9c625SLionel Sambuc membar #Sync 1359*84d9c625SLionel Sambuc2: 1360*84d9c625SLionel Sambuc faligndata %f14, %f16, %f36 1361*84d9c625SLionel Sambuc dec BLOCK_SIZE, %o2 1362*84d9c625SLionel Sambuc faligndata %f16, %f18, %f38 1363*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o0 1364*84d9c625SLionel Sambuc faligndata %f18, %f20, %f40 1365*84d9c625SLionel Sambuc faligndata %f20, %f22, %f42 1366*84d9c625SLionel Sambuc faligndata %f22, %f24, %f44 1367*84d9c625SLionel Sambuc brlez,pn %o2, Lmemcpy_blockdone 1368*84d9c625SLionel Sambuc faligndata %f24, %f26, %f46 1369*84d9c625SLionel Sambuc 1370*84d9c625SLionel Sambuc stda %f32, [%o1] ASI_STORE 1371*84d9c625SLionel Sambuc 1372*84d9c625SLionel Sambuc faligndata %f26, %f28, %f32 1373*84d9c625SLionel Sambuc cmp %o0, %o5 1374*84d9c625SLionel Sambuc faligndata %f28, %f30, %f34 1375*84d9c625SLionel Sambuc bleu,a,pn %icc, 2f 1376*84d9c625SLionel Sambuc ldda [%o0] ASI_BLK_P, %f0 1377*84d9c625SLionel Sambuc membar #Sync 1378*84d9c625SLionel Sambuc2: 1379*84d9c625SLionel Sambuc faligndata %f30, %f48, %f36 1380*84d9c625SLionel Sambuc dec BLOCK_SIZE, %o2 1381*84d9c625SLionel Sambuc faligndata %f48, %f50, %f38 1382*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o1 1383*84d9c625SLionel Sambuc faligndata %f50, %f52, %f40 1384*84d9c625SLionel Sambuc faligndata %f52, %f54, %f42 1385*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o0 1386*84d9c625SLionel Sambuc faligndata %f54, %f56, %f44 1387*84d9c625SLionel Sambuc brlez,pn %o2, Lmemcpy_blockdone 1388*84d9c625SLionel Sambuc faligndata %f56, %f58, %f46 1389*84d9c625SLionel Sambuc 1390*84d9c625SLionel Sambuc stda %f32, [%o1] ASI_STORE 1391*84d9c625SLionel Sambuc 1392*84d9c625SLionel Sambuc faligndata %f58, %f60, %f32 1393*84d9c625SLionel Sambuc cmp %o0, %o5 1394*84d9c625SLionel Sambuc faligndata %f60, %f62, %f34 1395*84d9c625SLionel Sambuc bleu,a,pn %icc, 2f 1396*84d9c625SLionel Sambuc ldda [%o0] ASI_BLK_P, %f16 1397*84d9c625SLionel Sambuc membar #Sync 1398*84d9c625SLionel Sambuc2: 1399*84d9c625SLionel Sambuc faligndata %f62, %f0, %f36 1400*84d9c625SLionel Sambuc dec BLOCK_SIZE, %o2 1401*84d9c625SLionel Sambuc faligndata %f0, %f2, %f38 1402*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o1 1403*84d9c625SLionel Sambuc faligndata %f2, %f4, %f40 1404*84d9c625SLionel Sambuc faligndata %f4, %f6, %f42 1405*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o0 1406*84d9c625SLionel Sambuc faligndata %f6, %f8, %f44 1407*84d9c625SLionel Sambuc brlez,pn %o2, Lmemcpy_blockdone 1408*84d9c625SLionel Sambuc faligndata %f8, %f10, %f46 1409*84d9c625SLionel Sambuc 1410*84d9c625SLionel Sambuc stda %f32, [%o1] ASI_STORE 1411*84d9c625SLionel Sambuc ba 3b 1412*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o1 1413*84d9c625SLionel Sambuc 1414*84d9c625SLionel Sambuc 1415*84d9c625SLionel Sambuc !! 1416*84d9c625SLionel Sambuc !! Source at BLOCK_ALIGN+56 1417*84d9c625SLionel Sambuc !! 1418*84d9c625SLionel Sambuc !! We need to load 1 double by hand. 1419*84d9c625SLionel Sambuc !! 1420*84d9c625SLionel SambucL107: 1421*84d9c625SLionel Sambuc#ifdef RETURN_NAME 1422*84d9c625SLionel Sambuc sethi %hi(1f), %g1 1423*84d9c625SLionel Sambuc ba,pt %icc, 2f 1424*84d9c625SLionel Sambuc or %g1, %lo(1f), %g1 1425*84d9c625SLionel Sambuc1: 1426*84d9c625SLionel Sambuc .asciz "L107" 1427*84d9c625SLionel Sambuc .align 8 1428*84d9c625SLionel Sambuc2: 1429*84d9c625SLionel Sambuc#endif 1430*84d9c625SLionel Sambuc fmovd %f0, %f12 1431*84d9c625SLionel Sambuc ldd [%o0], %f14 1432*84d9c625SLionel Sambuc inc 8, %o0 1433*84d9c625SLionel Sambuc 1434*84d9c625SLionel Sambuc cmp %o0, %o5 1435*84d9c625SLionel Sambuc bleu,a,pn %icc, 2f 1436*84d9c625SLionel Sambuc ldda [%o0] ASI_BLK_P, %f16 1437*84d9c625SLionel Sambuc membar #Sync 1438*84d9c625SLionel Sambuc2: 1439*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o0 1440*84d9c625SLionel Sambuc3: 1441*84d9c625SLionel Sambuc faligndata %f12, %f14, %f32 1442*84d9c625SLionel Sambuc cmp %o0, %o5 1443*84d9c625SLionel Sambuc bleu,a,pn %icc, 2f 1444*84d9c625SLionel Sambuc ldda [%o0] ASI_BLK_P, %f48 1445*84d9c625SLionel Sambuc membar #Sync 1446*84d9c625SLionel Sambuc2: 1447*84d9c625SLionel Sambuc faligndata %f14, %f16, %f34 1448*84d9c625SLionel Sambuc dec BLOCK_SIZE, %o2 1449*84d9c625SLionel Sambuc faligndata %f16, %f18, %f36 1450*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o0 1451*84d9c625SLionel Sambuc faligndata %f18, %f20, %f38 1452*84d9c625SLionel Sambuc faligndata %f20, %f22, %f40 1453*84d9c625SLionel Sambuc faligndata %f22, %f24, %f42 1454*84d9c625SLionel Sambuc faligndata %f24, %f26, %f44 1455*84d9c625SLionel Sambuc brlez,pn %o2, Lmemcpy_blockdone 1456*84d9c625SLionel Sambuc faligndata %f26, %f28, %f46 1457*84d9c625SLionel Sambuc 1458*84d9c625SLionel Sambuc stda %f32, [%o1] ASI_STORE 1459*84d9c625SLionel Sambuc 1460*84d9c625SLionel Sambuc faligndata %f28, %f30, %f32 1461*84d9c625SLionel Sambuc cmp %o0, %o5 1462*84d9c625SLionel Sambuc bleu,a,pn %icc, 2f 1463*84d9c625SLionel Sambuc ldda [%o0] ASI_BLK_P, %f0 1464*84d9c625SLionel Sambuc membar #Sync 1465*84d9c625SLionel Sambuc2: 1466*84d9c625SLionel Sambuc faligndata %f30, %f48, %f34 1467*84d9c625SLionel Sambuc dec BLOCK_SIZE, %o2 1468*84d9c625SLionel Sambuc faligndata %f48, %f50, %f36 1469*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o1 1470*84d9c625SLionel Sambuc faligndata %f50, %f52, %f38 1471*84d9c625SLionel Sambuc faligndata %f52, %f54, %f40 1472*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o0 1473*84d9c625SLionel Sambuc faligndata %f54, %f56, %f42 1474*84d9c625SLionel Sambuc faligndata %f56, %f58, %f44 1475*84d9c625SLionel Sambuc brlez,pn %o2, Lmemcpy_blockdone 1476*84d9c625SLionel Sambuc faligndata %f58, %f60, %f46 1477*84d9c625SLionel Sambuc 1478*84d9c625SLionel Sambuc stda %f32, [%o1] ASI_STORE 1479*84d9c625SLionel Sambuc 1480*84d9c625SLionel Sambuc faligndata %f60, %f62, %f32 1481*84d9c625SLionel Sambuc cmp %o0, %o5 1482*84d9c625SLionel Sambuc bleu,a,pn %icc, 2f 1483*84d9c625SLionel Sambuc ldda [%o0] ASI_BLK_P, %f16 1484*84d9c625SLionel Sambuc membar #Sync 1485*84d9c625SLionel Sambuc2: 1486*84d9c625SLionel Sambuc faligndata %f62, %f0, %f34 1487*84d9c625SLionel Sambuc dec BLOCK_SIZE, %o2 1488*84d9c625SLionel Sambuc faligndata %f0, %f2, %f36 1489*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o1 1490*84d9c625SLionel Sambuc faligndata %f2, %f4, %f38 1491*84d9c625SLionel Sambuc faligndata %f4, %f6, %f40 1492*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o0 1493*84d9c625SLionel Sambuc faligndata %f6, %f8, %f42 1494*84d9c625SLionel Sambuc faligndata %f8, %f10, %f44 1495*84d9c625SLionel Sambuc 1496*84d9c625SLionel Sambuc brlez,pn %o2, Lmemcpy_blockdone 1497*84d9c625SLionel Sambuc faligndata %f10, %f12, %f46 1498*84d9c625SLionel Sambuc 1499*84d9c625SLionel Sambuc stda %f32, [%o1] ASI_STORE 1500*84d9c625SLionel Sambuc ba 3b 1501*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o1 1502*84d9c625SLionel Sambuc 1503*84d9c625SLionel SambucLmemcpy_blockdone: 1504*84d9c625SLionel Sambuc inc BLOCK_SIZE, %o2 ! Fixup our overcommit 1505*84d9c625SLionel Sambuc membar #Sync ! Finish any pending loads 1506*84d9c625SLionel Sambuc#define FINISH_REG(f) \ 1507*84d9c625SLionel Sambuc deccc 8, %o2; \ 1508*84d9c625SLionel Sambuc bl,a Lmemcpy_blockfinish; \ 1509*84d9c625SLionel Sambuc fmovd f, %f48; \ 1510*84d9c625SLionel Sambuc std f, [%o1]; \ 1511*84d9c625SLionel Sambuc inc 8, %o1 1512*84d9c625SLionel Sambuc 1513*84d9c625SLionel Sambuc FINISH_REG(%f32) 1514*84d9c625SLionel Sambuc FINISH_REG(%f34) 1515*84d9c625SLionel Sambuc FINISH_REG(%f36) 1516*84d9c625SLionel Sambuc FINISH_REG(%f38) 1517*84d9c625SLionel Sambuc FINISH_REG(%f40) 1518*84d9c625SLionel Sambuc FINISH_REG(%f42) 1519*84d9c625SLionel Sambuc FINISH_REG(%f44) 1520*84d9c625SLionel Sambuc FINISH_REG(%f46) 1521*84d9c625SLionel Sambuc FINISH_REG(%f48) 1522*84d9c625SLionel Sambuc#undef FINISH_REG 1523*84d9c625SLionel Sambuc !! 1524*84d9c625SLionel Sambuc !! The low 3 bits have the sub-word bits needed to be 1525*84d9c625SLionel Sambuc !! stored [because (x-8)&0x7 == x]. 1526*84d9c625SLionel Sambuc !! 1527*84d9c625SLionel SambucLmemcpy_blockfinish: 1528*84d9c625SLionel Sambuc brz,pn %o2, 2f ! 100% complete? 1529*84d9c625SLionel Sambuc fmovd %f48, %f4 1530*84d9c625SLionel Sambuc cmp %o2, 8 ! Exactly 8 bytes? 1531*84d9c625SLionel Sambuc bz,a,pn CCCR, 2f 1532*84d9c625SLionel Sambuc std %f4, [%o1] 1533*84d9c625SLionel Sambuc 1534*84d9c625SLionel Sambuc btst 4, %o2 ! Word store? 1535*84d9c625SLionel Sambuc bz CCCR, 1f 1536*84d9c625SLionel Sambuc nop 1537*84d9c625SLionel Sambuc st %f4, [%o1] 1538*84d9c625SLionel Sambuc inc 4, %o1 1539*84d9c625SLionel Sambuc1: 1540*84d9c625SLionel Sambuc btst 2, %o2 1541*84d9c625SLionel Sambuc fzero %f0 1542*84d9c625SLionel Sambuc bz 1f 1543*84d9c625SLionel Sambuc 1544*84d9c625SLionel Sambuc mov -6, %o4 1545*84d9c625SLionel Sambuc alignaddr %o1, %o4, %g0 1546*84d9c625SLionel Sambuc 1547*84d9c625SLionel Sambuc faligndata %f0, %f4, %f8 1548*84d9c625SLionel Sambuc 1549*84d9c625SLionel Sambuc stda %f8, [%o1] ASI_FL16_P ! Store short 1550*84d9c625SLionel Sambuc inc 2, %o1 1551*84d9c625SLionel Sambuc1: 1552*84d9c625SLionel Sambuc btst 1, %o2 ! Byte aligned? 1553*84d9c625SLionel Sambuc bz 2f 1554*84d9c625SLionel Sambuc 1555*84d9c625SLionel Sambuc mov -7, %o0 ! Calculate dest - 7 1556*84d9c625SLionel Sambuc alignaddr %o1, %o0, %g0 ! Calculate shift mask and dest. 1557*84d9c625SLionel Sambuc 1558*84d9c625SLionel Sambuc faligndata %f0, %f4, %f8 ! Move 1st byte to low part of f8 1559*84d9c625SLionel Sambuc 1560*84d9c625SLionel Sambuc stda %f8, [%o1] ASI_FL8_P ! Store 1st byte 1561*84d9c625SLionel Sambuc inc 1, %o1 ! Update address 1562*84d9c625SLionel Sambuc2: 1563*84d9c625SLionel Sambuc membar #Sync 1564*84d9c625SLionel Sambuc#if 0 1565*84d9c625SLionel Sambuc !! 1566*84d9c625SLionel Sambuc !! verify copy success. 1567*84d9c625SLionel Sambuc !! 1568*84d9c625SLionel Sambuc 1569*84d9c625SLionel Sambuc mov %i0, %o2 1570*84d9c625SLionel Sambuc mov %i1, %o4 1571*84d9c625SLionel Sambuc mov %i2, %l4 1572*84d9c625SLionel Sambuc0: 1573*84d9c625SLionel Sambuc ldub [%o2], %o1 1574*84d9c625SLionel Sambuc inc %o2 1575*84d9c625SLionel Sambuc ldub [%o4], %o3 1576*84d9c625SLionel Sambuc inc %o4 1577*84d9c625SLionel Sambuc cmp %o3, %o1 1578*84d9c625SLionel Sambuc bnz 1f 1579*84d9c625SLionel Sambuc dec %l4 1580*84d9c625SLionel Sambuc brnz %l4, 0b 1581*84d9c625SLionel Sambuc nop 1582*84d9c625SLionel Sambuc ba 2f 1583*84d9c625SLionel Sambuc nop 1584*84d9c625SLionel Sambuc 1585*84d9c625SLionel Sambuc1: 1586*84d9c625SLionel Sambuc set block_disable, %o0 1587*84d9c625SLionel Sambuc stx %o0, [%o0] 1588*84d9c625SLionel Sambuc 1589*84d9c625SLionel Sambuc set 0f, %o0 1590*84d9c625SLionel Sambuc call prom_printf 1591*84d9c625SLionel Sambuc sub %i2, %l4, %o5 1592*84d9c625SLionel Sambuc set 1f, %o0 1593*84d9c625SLionel Sambuc mov %i0, %o2 1594*84d9c625SLionel Sambuc mov %i1, %o1 1595*84d9c625SLionel Sambuc call prom_printf 1596*84d9c625SLionel Sambuc mov %i2, %o3 1597*84d9c625SLionel Sambuc ta 1 1598*84d9c625SLionel Sambuc .data 1599*84d9c625SLionel Sambuc _ALIGN 1600*84d9c625SLionel Sambuc0: .asciz "block memcpy failed: %x@%p != %x@%p byte %d\r\n" 1601*84d9c625SLionel Sambuc1: .asciz "memcpy(%p, %p, %lx)\r\n" 1602*84d9c625SLionel Sambuc _ALIGN 1603*84d9c625SLionel Sambuc .text 1604*84d9c625SLionel Sambuc2: 1605*84d9c625SLionel Sambuc#endif 1606*84d9c625SLionel Sambuc#if defined(_KERNEL) && !defined(_RUMPKERNEL) 1607*84d9c625SLionel Sambuc 1608*84d9c625SLionel Sambuc/* 1609*84d9c625SLionel Sambuc * Weve saved our possible fpstate, now disable the fpu 1610*84d9c625SLionel Sambuc * and continue with life. 1611*84d9c625SLionel Sambuc */ 1612*84d9c625SLionel Sambuc RESTORE_FPU 1613*84d9c625SLionel Sambuc ret 1614*84d9c625SLionel Sambuc restore %g1, 0, %o0 ! Return DEST for memcpy 1615*84d9c625SLionel Sambuc#endif 1616*84d9c625SLionel Sambuc retl 1617*84d9c625SLionel Sambuc mov %g1, %o0 1618*84d9c625SLionel Sambuc/* 1619*84d9c625SLionel Sambuc * Use block_disable to turn off block insns for 1620*84d9c625SLionel Sambuc * memcpy/memset 1621*84d9c625SLionel Sambuc */ 1622*84d9c625SLionel Sambuc .data 1623*84d9c625SLionel Sambuc .align 8 1624*84d9c625SLionel Sambuc .globl block_disable 1625*84d9c625SLionel Sambucblock_disable: .xword 1 1626*84d9c625SLionel Sambuc .text 1627*84d9c625SLionel Sambuc#endif /* USE_BLOCK_STORE_LOAD */ 1628