1/* $NetBSD: bcopy.S,v 1.13 2011/01/24 15:01:30 skrll Exp $ */ 2 3/* 4 * Copyright (c) 2002 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Matthew Fredette. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32/* 33 * Copy routines for NetBSD/hppa. 34 */ 35 36#undef _LOCORE 37#define _LOCORE /* XXX fredette - unfortunate */ 38 39#include <machine/cpu.h> 40#include <machine/asm.h> 41#include <machine/frame.h> 42#include <machine/reg.h> 43 44#if defined(LIBC_SCCS) && !defined(lint) 45RCSID("$NetBSD: bcopy.S,v 1.13 2011/01/24 15:01:30 skrll Exp $") 46#endif /* LIBC_SCCS and not lint */ 47 48/* 49 * The stbys instruction is a little asymmetric. When (%r2 & 3) 50 * is zero, stbys,b,m %r1, 4(%r2) works like stws,ma. You 51 * might then wish that when (%r2 & 3) == 0, stbys,e,m %r1, -4(%r2) 52 * worked like stws,mb. But it doesn't. 53 * 54 * This macro works around this problem. It requires that %t2 55 * hold the number of bytes that will be written by this store 56 * (meaning that it ranges from one to four). 57 * 58 * Watch the delay-slot trickery here. The comib is used to set 59 * up which instruction, either the stws or the stbys, is run 60 * in the delay slot of the b instruction. 61 */ 62#define _STBYS_E_M(r, dst_spc, dst_off) \ 63 comib,<> 4, %t2, 4 ! \ 64 b 4 ! \ 65 stws,mb r, -4(dst_spc, dst_off) ! \ 66 stbys,e,m r, 0(dst_spc, dst_off) 67 68/* 69 * This macro does a bulk copy with no shifting. cmplt and m are 70 * the completer and displacement multiplier, respectively, for 71 * the load and store instructions. 72 */ 73#define _COPY(src_spc, src_off, dst_spc, dst_off, count, cmplt, m) \ 74 ! \ 75 /* ! \ 76 * Loop storing 16 bytes at a time. Since count ! \ 77 * may be > INT_MAX, we have to be careful and ! \ 78 * avoid comparisons that treat it as a signed ! \ 79 * quantity, until after this loop, when count ! \ 80 * is guaranteed to be less than 16. ! \ 81 */ ! \ 82 comib,>>=,n 15, count, _LABEL(_skip16) ! \ 83.label _LABEL(_loop16) ! \ 84 addi -16, count, count ! \ 85 ldws,cmplt m*4(src_spc, src_off), %t1 ! \ 86 ldws,cmplt m*4(src_spc, src_off), %t2 ! \ 87 ldws,cmplt m*4(src_spc, src_off), %t3 ! \ 88 ldws,cmplt m*4(src_spc, src_off), %t4 ! \ 89 stws,cmplt %t1, m*4(dst_spc, dst_off) ! \ 90 stws,cmplt %t2, m*4(dst_spc, dst_off) ! \ 91 stws,cmplt %t3, m*4(dst_spc, dst_off) ! \ 92 comib,<< 15, count, _LABEL(_loop16) ! \ 93 stws,cmplt %t4, m*4(dst_spc, dst_off) ! \ 94.label _LABEL(_skip16) ! \ 95 ! \ 96 /* Loop storing 4 bytes at a time. */ ! \ 97 addib,<,n -4, count, _LABEL(_skip4) ! \ 98.label _LABEL(_loop4) ! \ 99 ldws,cmplt m*4(src_spc, src_off), %t1 ! \ 100 addib,>= -4, count, _LABEL(_loop4) ! \ 101 stws,cmplt %t1, m*4(dst_spc, dst_off) ! \ 102.label _LABEL(_skip4) ! \ 103 /* Restore the correct count. */ ! \ 104 addi 4, count, count ! \ 105 ! \ 106.label _LABEL(_do1) ! \ 107 ! \ 108 /* Loop storing 1 byte at a time. */ ! \ 109 addib,<,n -1, count, _LABEL(_skip1) ! \ 110.label _LABEL(_loop1) ! \ 111 ldbs,cmplt m*1(src_spc, src_off), %t1 ! \ 112 addib,>= -1, count, _LABEL(_loop1) ! \ 113 stbs,cmplt %t1, m*1(dst_spc, dst_off) ! \ 114.label _LABEL(_skip1) ! \ 115 /* Restore the correct count. */ ! \ 116 b _LABEL(_done) ! \ 117 addi 1, count, count 118 119/* 120 * This macro is definitely strange. It exists purely to 121 * allow the _COPYS macro to be reused, but because it 122 * requires this long attempt to explain it, I'm starting 123 * to doubt the value of that. 124 * 125 * Part of the expansion of the _COPYS macro below are loops 126 * that copy four words or one word at a time, performing shifts 127 * to get data to line up correctly in the destination buffer. 128 * 129 * The _COPYS macro is used when copying backwards, as well 130 * as forwards. The 4-word loop always loads into %t1, %t2, %t3, 131 * and %t4 in that order. This means that when copying forward, 132 * %t1 will have the word from the lowest address, and %t4 will 133 * have the word from the highest address. When copying 134 * backwards, the opposite is true. 135 * 136 * The shift instructions need pairs of registers with adjacent 137 * words, with the register containing the word from the lowest 138 * address *always* coming first. It is this assymetry that 139 * gives rise to this macro - depending on which direction 140 * we're copying in, these ordered pairs are different. 141 * 142 * Fortunately, we can compute those register numbers at compile 143 * time, and assemble them manually into a shift instruction. 144 * That's what this macro does. 145 * 146 * This macro takes two arguments. n ranges from 0 to 3 and 147 * is the "shift number", i.e., n = 0 means we're doing the 148 * shift for what will be the first store. 149 * 150 * m is the displacement multiplier from the _COPYS macro call. 151 * This is 1 for a forward copy and -1 for a backwards copy. 152 * So, the ((m + 1) / 2) term yields 0 for a backwards copy and 153 * 1 for a forward copy, and the ((m - 1) / 2) term yields 154 * 0 for a forward copy, and -1 for a backwards copy. 155 * These terms are used to discriminate the register computations 156 * below. 157 * 158 * When copying forward, then, the first register used with 159 * the first vshd will be 19 + (3 - ((0 - 1) & 3)), or %t4, 160 * which matches _COPYS' requirement that the word last loaded 161 * be in %t4. The first register used for the second vshd 162 * will then "wrap" around to 19 + (3 - ((1 - 1) & 3)), or %t1. 163 * And so on to %t2 and %t3. 164 * 165 * When copying forward, the second register used with the first 166 * vshd will be (19 + (3 - ((n + 0) & 3)), or %t1. It will 167 * continue to be %t2, then %t3, and finally %t4. 168 * 169 * When copying backwards, the values for the first and second 170 * register for each vshd are reversed from the forwards case. 171 * (Symmetry reclaimed!) Proving this is "left as an exercise 172 * for the reader" (remember the different discriminating values!) 173 */ 174#define _VSHD(n, m, t) \ 175 .word (0xd0000000 | \ 176 ((19 + (3 - ((n - 1 * ((m + 1) / 2)) & 3))) << 16) | \ 177 ((19 + (3 - ((n + 1 * ((m - 1) / 2)) & 3))) << 21) | \ 178 (t)) 179 180/* 181 * This macro does a bulk copy with shifting. cmplt and m are 182 * the completer and displacement multiplier, respectively, for 183 * the load and store instructions. It is assumed that the 184 * word last loaded is already in %t4. 185 */ 186#define _COPYS(src_spc, src_off, dst_spc, dst_off, count, cmplt, m) \ 187 ! \ 188 /* ! \ 189 * Loop storing 16 bytes at a time. Since count ! \ 190 * may be > INT_MAX, we have to be careful and ! \ 191 * avoid comparisons that treat it as a signed ! \ 192 * quantity, until after this loop, when count ! \ 193 * is guaranteed to be less than 16. ! \ 194 */ ! \ 195 comib,>>=,n 15, count, _LABEL(S_skip16) ! \ 196.label _LABEL(S_loop16) ! \ 197 addi -16, count, count ! \ 198 ldws,cmplt m*4(src_spc, src_off), %t1 ! \ 199 ldws,cmplt m*4(src_spc, src_off), %t2 ! \ 200 ldws,cmplt m*4(src_spc, src_off), %t3 ! \ 201 _VSHD(0, m, 1) /* vshd %t4, %t1, %r1 */ ! \ 202 ldws,cmplt m*4(src_spc, src_off), %t4 ! \ 203 _VSHD(1, m, 22) /* vshd %t1, %t2, %t1 */ ! \ 204 _VSHD(2, m, 21) /* vshd %t2, %t3, %t2 */ ! \ 205 _VSHD(3, m, 20) /* vshd %t3, %t4, %t3 */ ! \ 206 stws,cmplt %r1, m*4(dst_spc, dst_off) ! \ 207 stws,cmplt %t1, m*4(dst_spc, dst_off) ! \ 208 stws,cmplt %t2, m*4(dst_spc, dst_off) ! \ 209 comib,<< 15, count, _LABEL(S_loop16) ! \ 210 stws,cmplt %t3, m*4(dst_spc, dst_off) ! \ 211.label _LABEL(S_skip16) ! \ 212 ! \ 213 /* Loop storing 4 bytes at a time. */ ! \ 214 addib,<,n -4, count, _LABEL(S_skip4) ! \ 215.label _LABEL(S_loop4) ! \ 216 ldws,cmplt m*4(src_spc, src_off), %t1 ! \ 217 _VSHD(0, m, 1) /* into %r1 (1) */ ! \ 218 copy %t1, %t4 ! \ 219 addib,>= -4, count, _LABEL(S_loop4) ! \ 220 stws,cmplt %r1, m*4(dst_spc, dst_off) ! \ 221.label _LABEL(S_skip4) ! \ 222 ! \ 223 /* ! \ 224 * We now need to "back up" src_off by the ! \ 225 * number of bytes remaining in the FIFO ! \ 226 * (i.e., the number of bytes remaining in %t4), ! \ 227 * because (the correct) count still includes ! \ 228 * these bytes, and we intent to keep it that ! \ 229 * way, and finish with the single-byte copier. ! \ 230 * ! \ 231 * The number of bytes remaining in the FIFO is ! \ 232 * related to the shift count, so recover it, ! \ 233 * restoring the correct count at the same time. ! \ 234 */ ! \ 235 mfctl %cr11, %t1 ! \ 236 addi 4, count, count ! \ 237 shd %r0, %t1, 3, %t1 ! \ 238 ! \ 239 /* ! \ 240 * If we're copying forward, the shift count ! \ 241 * is the number of bytes remaining in the ! \ 242 * FIFO, and we want to subtract it from src_off. ! \ 243 * If we're copying backwards, (4 - shift count) ! \ 244 * is the number of bytes remaining in the FIFO, ! \ 245 * and we want to add it to src_off. ! \ 246 * ! \ 247 * We observe that x + (4 - y) = x - (y - 4), ! \ 248 * and introduce this instruction to add -4 when ! \ 249 * m is -1, although this does mean one extra ! \ 250 * instruction in the forward case. ! \ 251 */ ! \ 252 addi 4*((m - 1) / 2), %t1, %t1 ! \ 253 ! \ 254 /* Now branch to the byte-at-a-time loop. */ ! \ 255 b _LABEL(_do1) ! \ 256 sub src_off, %t1, src_off 257 258/* 259 * This macro copies a region in the forward direction. 260 */ 261#define _COPY_FORWARD(src_spc, src_off, dst_spc, dst_off, count) \ 262 ! \ 263 /* ! \ 264 * Since in the shifting-left case we will ! \ 265 * load 8 bytes before checking count, to ! \ 266 * keep things simple, branch to the byte ! \ 267 * copier unless we're copying at least 8. ! \ 268 */ ! \ 269 comib,>>,n 8, count, _LABEL(_do1) ! \ 270 ! \ 271 /* ! \ 272 * Once we 4-byte align the source offset, ! \ 273 * figure out how many bytes from the region ! \ 274 * will be in the first 4-byte word we read. ! \ 275 * Ditto for writing the destination offset. ! \ 276 */ ! \ 277 extru src_off, 31, 2, %t1 ! \ 278 extru dst_off, 31, 2, %t2 ! \ 279 subi 4, %t1, %t1 ! \ 280 subi 4, %t2, %t2 ! \ 281 ! \ 282 /* ! \ 283 * Calculate the byte shift required. A ! \ 284 * positive value means a source 4-byte word ! \ 285 * has to be shifted to the right to line up ! \ 286 * as a destination 4-byte word. ! \ 287 */ ! \ 288 sub %t1, %t2, %t1 ! \ 289 ! \ 290 /* 4-byte align src_off. */ ! \ 291 depi 0, 31, 2, src_off ! \ 292 ! \ 293 /* ! \ 294 * It's somewhat important to note that this ! \ 295 * code thinks of count as "the number of bytes ! \ 296 * that haven't been stored yet", as opposed to ! \ 297 * "the number of bytes that haven't been copied ! \ 298 * yet". The distinction is subtle, but becomes ! \ 299 * apparent at the end of the shifting code, where ! \ 300 * we "back up" src_off to correspond to count, ! \ 301 * as opposed to flushing the FIFO. ! \ 302 * ! \ 303 * We calculated above how many bytes our first ! \ 304 * store will store, so update count now. ! \ 305 * ! \ 306 * If the shift is zero, strictly as an optimization ! \ 307 * we use a copy loop that does no shifting. ! \ 308 */ ! \ 309 comb,<> %r0, %t1, _LABEL(_shifting) ! \ 310 sub count, %t2, count ! \ 311 ! \ 312 /* Load and store the first word. */ ! \ 313 ldws,ma 4(src_spc, src_off), %t4 ! \ 314 stbys,b,m %t4, 4(dst_spc, dst_off) ! \ 315 ! \ 316 /* Do the rest of the copy. */ ! \ 317 _COPY(src_spc,src_off,dst_spc,dst_off,count,ma,1) ! \ 318 ! \ 319.label _LABEL(_shifting) ! \ 320 ! \ 321 /* ! \ 322 * If shift < 0, we need to shift words to the ! \ 323 * left. Since we can't do this directly, we ! \ 324 * adjust the shift so it's a shift to the right ! \ 325 * and load the first word into the high word of ! \ 326 * the FIFO. Otherwise, we load a zero into the ! \ 327 * high word of the FIFO. ! \ 328 */ ! \ 329 comb,<= %r0, %t1, _LABEL(_shiftingrt) ! \ 330 copy %r0, %t3 ! \ 331 addi 4, %t1, %t1 ! \ 332 ldws,ma 4(src_spc, src_off), %t3 ! \ 333.label _LABEL(_shiftingrt) ! \ 334 ! \ 335 /* ! \ 336 * Turn the shift byte count into a bit count, ! \ 337 * load the next word, set the Shift Amount ! \ 338 * Register, and form and store the first word. ! \ 339 */ ! \ 340 sh3add %t1, %r0, %t1 ! \ 341 ldws,ma 4(src_spc, src_off), %t4 ! \ 342 mtctl %t1, %cr11 ! \ 343 vshd %t3, %t4, %r1 ! \ 344 stbys,b,m %r1, 4(dst_spc, dst_off) ! \ 345 ! \ 346 /* Do the rest of the copy. */ ! \ 347 _COPYS(src_spc,src_off,dst_spc,dst_off,count,ma,1) 348 349/* This macro copies a region in the reverse direction. */ 350#define _COPY_REVERSE(src_spc, src_off, dst_spc, dst_off, count) \ 351 ! \ 352 /* Immediately add count to both offsets. */ ! \ 353 add src_off, count, src_off ! \ 354 add dst_off, count, dst_off ! \ 355 ! \ 356 /* ! \ 357 * Since in the shifting-right case we ! \ 358 * will load 8 bytes before checking ! \ 359 * count, to keep things simple, branch ! \ 360 * to the byte copier unless we're ! \ 361 * copying at least 8 bytes. ! \ 362 */ ! \ 363 comib,>>,n 8, count, _LABEL(_do1) ! \ 364 ! \ 365 /* ! \ 366 * Once we 4-byte align the source offset, ! \ 367 * figure out how many bytes from the region ! \ 368 * will be in the first 4-byte word we read. ! \ 369 * Ditto for writing the destination offset. ! \ 370 */ ! \ 371 extru,<> src_off, 31, 2, %t1 ! \ 372 ldi 4, %t1 ! \ 373 extru,<> dst_off, 31, 2, %t2 ! \ 374 ldi 4, %t2 ! \ 375 ! \ 376 /* ! \ 377 * Calculate the byte shift required. A ! \ 378 * positive value means a source 4-byte ! \ 379 * word has to be shifted to the right to ! \ 380 * line up as a destination 4-byte word. ! \ 381 */ ! \ 382 sub %t2, %t1, %t1 ! \ 383 ! \ 384 /* ! \ 385 * 4-byte align src_off, leaving it pointing ! \ 386 * to the 4-byte word *after* the next word ! \ 387 * we intend to load. ! \ 388 * ! \ 389 * It's somewhat important to note that this ! \ 390 * code thinks of count as "the number of bytes ! \ 391 * that haven't been stored yet", as opposed to ! \ 392 * "the number of bytes that haven't been copied ! \ 393 * yet". The distinction is subtle, but becomes ! \ 394 * apparent at the end of the shifting code, where ! \ 395 * we "back up" src_off to correspond to count, ! \ 396 * as opposed to flushing the FIFO. ! \ 397 * ! \ 398 * We calculated above how many bytes our first ! \ 399 * store will store, so update count now. ! \ 400 * ! \ 401 * If the shift is zero, we use a copy loop that ! \ 402 * does no shifting. NB: unlike the forward case, ! \ 403 * this is NOT strictly an optimization. If the ! \ 404 * SAR is zero the vshds do NOT do the right thing. ! \ 405 * This is another assymetry more or less the "fault" ! \ 406 * of vshd. ! \ 407 */ ! \ 408 addi 3, src_off, src_off ! \ 409 sub count, %t2, count ! \ 410 comb,<> %r0, %t1, _LABEL(_shifting) ! \ 411 depi 0, 31, 2, src_off ! \ 412 ! \ 413 /* Load and store the first word. */ ! \ 414 ldws,mb -4(src_spc, src_off), %t4 ! \ 415 _STBYS_E_M(%t4, dst_spc, dst_off) ! \ 416 ! \ 417 /* Do the rest of the copy. */ ! \ 418 _COPY(src_spc,src_off,dst_spc,dst_off,count,mb,-1) ! \ 419 ! \ 420.label _LABEL(_shifting) ! \ 421 ! \ 422 /* ! \ 423 * If shift < 0, we need to shift words to the ! \ 424 * left. Since we can't do this directly, we ! \ 425 * adjust the shift so it's a shift to the right ! \ 426 * and load a zero in to the low word of the FIFO. ! \ 427 * Otherwise, we load the first word into the ! \ 428 * low word of the FIFO. ! \ 429 * ! \ 430 * Note the nullification trickery here. We ! \ 431 * assume that we're shifting to the left, and ! \ 432 * load zero into the low word of the FIFO. Then ! \ 433 * we nullify the addi if we're shifting to the ! \ 434 * right. If the addi is not nullified, we are ! \ 435 * shifting to the left, so we nullify the load. ! \ 436 * we branch if we're shifting to the ! \ 437 */ ! \ 438 copy %r0, %t3 ! \ 439 comb,<=,n %r0, %t1, 0 ! \ 440 addi,tr 4, %t1, %t1 ! \ 441 ldws,mb -4(src_spc, src_off), %t3 ! \ 442 ! \ 443 /* ! \ 444 * Turn the shift byte count into a bit count, ! \ 445 * load the next word, set the Shift Amount ! \ 446 * Register, and form and store the first word. ! \ 447 */ ! \ 448 sh3add %t1, %r0, %t1 ! \ 449 ldws,mb -4(src_spc, src_off), %t4 ! \ 450 mtctl %t1, %cr11 ! \ 451 vshd %t4, %t3, %r1 ! \ 452 _STBYS_E_M(%r1, dst_spc, dst_off) ! \ 453 ! \ 454 /* Do the rest of the copy. */ ! \ 455 _COPYS(src_spc,src_off,dst_spc,dst_off,count,mb,-1) 456 457/* 458 * For paranoia, when things aren't going well, enable this 459 * code to assemble byte-at-a-time-only copying. 460 */ 461#if 1 462#undef _COPY_FORWARD 463#define _COPY_FORWARD(src_spc, src_off, dst_spc, dst_off, count) \ 464 comb,=,n %r0, count, _LABEL(_done) ! \ 465 ldbs,ma 1(src_spc, src_off), %r1 ! \ 466 addib,<> -1, count, -12 ! \ 467 stbs,ma %r1, 1(dst_spc, dst_off) ! \ 468 b,n _LABEL(_done) 469#undef _COPY_REVERSE 470#define _COPY_REVERSE(src_spc, src_off, dst_spc, dst_off, count) \ 471 comb,= %r0, count, _LABEL(_done) ! \ 472 add src_off, count, src_off ! \ 473 add dst_off, count, dst_off ! \ 474 ldbs,mb -1(src_spc, src_off), %r1 ! \ 475 addib,<> -1, count, -12 ! \ 476 stbs,mb %r1, -1(dst_spc, dst_off) ! \ 477 b,n _LABEL(_done) 478#endif 479 480/* 481 * If none of the following are defined, define BCOPY. 482 */ 483#if !(defined(SPCOPY) || defined(MEMCPY) || defined(MEMMOVE)) 484#define BCOPY 485#endif 486 487#if defined(SPCOPY) && !defined(_STANDALONE) 488 489#include "opt_multiprocessor.h" 490 491#include <sys/errno.h> 492#include "assym.h" 493 494/* 495 * int spcopy(pa_space_t ssp, const void *src, pa_space_t dsp, void *dst, 496 * size_t len) 497 * 498 * We assume that the regions do not overlap. 499 */ 500LEAF_ENTRY(spcopy) 501 502 /* 503 * Setup the fault handler, which will fill in %ret0 if triggered. 504 */ 505 GET_CURLWP(%r31) 506#ifdef DIAGNOSTIC 507 comb,<>,n %r0, %r31, Lspcopy_curlwp_ok 508 ldil L%panic, %r1 509 ldil L%Lspcopy_curlwp_bad, %arg0 510 ldo R%panic(%r1), %r1 511 ldo R%Lspcopy_curlwp_bad(%arg0), %arg0 512 .call 513 bv,n %r0(%r1) 514 nop 515Lspcopy_curlwp_bad: 516 .asciz "spcopy: curlwp == NULL\n" 517 .align 8 518Lspcopy_curlwp_ok: 519#endif /* DIAGNOSTIC */ 520 ldil L%spcopy_fault, %r1 521 ldw L_PCB(%r31), %r31 522 ldo R%spcopy_fault(%r1), %r1 523 stw %r1, PCB_ONFAULT(%r31) 524 525 /* Setup the space registers. */ 526 mfsp %sr2, %ret1 527 mtsp %arg0, %sr1 528 mtsp %arg2, %sr2 529 530 /* Get the len argument and do the copy. */ 531 ldw HPPA_FRAME_ARG(4)(%sp), %arg0 532#define _LABEL(l) __CONCAT(spcopy,l) 533 _COPY_FORWARD(%sr1,%arg1,%sr2,%arg3,%arg0) 534_LABEL(_done): 535 536 /* Return. */ 537 copy %r0, %ret0 538ALTENTRY(spcopy_fault) 539 stw %r0, PCB_ONFAULT(%r31) 540 bv %r0(%rp) 541 mtsp %ret1, %sr2 542EXIT(spcopy) 543#endif /* SPCOPY && !_STANDALONE */ 544 545#ifdef MEMCPY 546/* 547 * void *memcpy(void *restrict dst, const void *restrict src, size_t len); 548 * 549 * memcpy is specifically restricted to working on 550 * non-overlapping regions, so we can just copy forward. 551 */ 552LEAF_ENTRY(memcpy) 553 copy %arg0, %ret0 554#define _LABEL(l) __CONCAT(memcpy,l) 555 _COPY_FORWARD(%sr0,%arg1,%sr0,%arg0,%arg2) 556_LABEL(_done): 557 bv,n %r0(%rp) 558 nop 559EXIT(memcpy) 560#endif /* MEMCPY */ 561 562#ifdef BCOPY 563/* 564 * void bcopy(const void *src, void *dst, size_t len); 565 */ 566LEAF_ENTRY(bcopy) 567 copy %arg0, %r1 568 copy %arg1, %arg0 569 copy %r1, %arg1 570 /* FALLTHROUGH */ 571#define _LABEL_F(l) __CONCAT(bcopy_F,l) 572#define _LABEL_R(l) __CONCAT(bcopy_R,l) 573#endif 574 575#ifdef MEMMOVE 576/* 577 * void *memmove(void *dst, const void *src, size_t len); 578 */ 579LEAF_ENTRY(memmove) 580#define _LABEL_F(l) __CONCAT(memmove_F,l) 581#define _LABEL_R(l) __CONCAT(memmove_R,l) 582 copy %arg0, %ret0 583#endif /* MEMMOVE */ 584 585#if defined(BCOPY) || defined(MEMMOVE) 586 587 /* 588 * If src >= dst or src + len <= dst, we copy 589 * forward, else we copy in reverse. 590 */ 591 add %arg1, %arg2, %r1 592 comb,>>=,n %arg1, %arg0, 0 593 comb,>>,n %r1, %arg0, _LABEL_R(_go) 594 595#define _LABEL _LABEL_F 596 _COPY_FORWARD(%sr0,%arg1,%sr0,%arg0,%arg2) 597#undef _LABEL 598 599_LABEL_R(_go): 600#define _LABEL _LABEL_R 601 _COPY_REVERSE(%sr0,%arg1,%sr0,%arg0,%arg2) 602#undef _LABEL 603 604_LABEL_F(_done): 605_LABEL_R(_done): 606 bv,n %r0(%rp) 607 nop 608#ifdef BCOPY 609EXIT(bcopy) 610#else 611EXIT(memmove) 612#endif 613#endif /* BCOPY || MEMMOVE */ 614