1/* $NetBSD: bcopy.S,v 1.15 2015/08/30 07:55:45 uebayasi Exp $ */ 2 3/* 4 * Copyright (c) 2002 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Matthew Fredette. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32/* 33 * Copy routines for NetBSD/hppa. 34 */ 35 36#undef _LOCORE 37#define _LOCORE /* XXX fredette - unfortunate */ 38 39#if defined(SPCOPY) && !defined(_STANDALONE) 40 41#include "opt_diagnostic.h" 42#include "opt_multiprocessor.h" 43 44#include <machine/cpu.h> 45 46#endif 47 48#include <machine/asm.h> 49#include <machine/frame.h> 50#include <machine/reg.h> 51 52#if defined(LIBC_SCCS) && !defined(lint) 53RCSID("$NetBSD: bcopy.S,v 1.15 2015/08/30 07:55:45 uebayasi Exp $") 54#endif /* LIBC_SCCS and not lint */ 55 56/* 57 * The stbys instruction is a little asymmetric. When (%r2 & 3) 58 * is zero, stbys,b,m %r1, 4(%r2) works like stws,ma. You 59 * might then wish that when (%r2 & 3) == 0, stbys,e,m %r1, -4(%r2) 60 * worked like stws,mb. But it doesn't. 61 * 62 * This macro works around this problem. It requires that %t2 63 * hold the number of bytes that will be written by this store 64 * (meaning that it ranges from one to four). 65 * 66 * Watch the delay-slot trickery here. The comib is used to set 67 * up which instruction, either the stws or the stbys, is run 68 * in the delay slot of the b instruction. 69 */ 70#define _STBYS_E_M(r, dst_spc, dst_off) \ 71 comib,<> 4, %t2, 4 ! \ 72 b 4 ! \ 73 stws,mb r, -4(dst_spc, dst_off) ! \ 74 stbys,e,m r, 0(dst_spc, dst_off) 75 76/* 77 * This macro does a bulk copy with no shifting. cmplt and m are 78 * the completer and displacement multiplier, respectively, for 79 * the load and store instructions. 80 */ 81#define _COPY(src_spc, src_off, dst_spc, dst_off, count, cmplt, m) \ 82 ! \ 83 /* ! \ 84 * Loop storing 16 bytes at a time. Since count ! \ 85 * may be > INT_MAX, we have to be careful and ! \ 86 * avoid comparisons that treat it as a signed ! \ 87 * quantity, until after this loop, when count ! \ 88 * is guaranteed to be less than 16. ! \ 89 */ ! \ 90 comib,>>=,n 15, count, _LABEL(_skip16) ! \ 91.label _LABEL(_loop16) ! \ 92 addi -16, count, count ! \ 93 ldws,cmplt m*4(src_spc, src_off), %t1 ! \ 94 ldws,cmplt m*4(src_spc, src_off), %t2 ! \ 95 ldws,cmplt m*4(src_spc, src_off), %t3 ! \ 96 ldws,cmplt m*4(src_spc, src_off), %t4 ! \ 97 stws,cmplt %t1, m*4(dst_spc, dst_off) ! \ 98 stws,cmplt %t2, m*4(dst_spc, dst_off) ! \ 99 stws,cmplt %t3, m*4(dst_spc, dst_off) ! \ 100 comib,<< 15, count, _LABEL(_loop16) ! \ 101 stws,cmplt %t4, m*4(dst_spc, dst_off) ! \ 102.label _LABEL(_skip16) ! \ 103 ! \ 104 /* Loop storing 4 bytes at a time. */ ! \ 105 addib,<,n -4, count, _LABEL(_skip4) ! \ 106.label _LABEL(_loop4) ! \ 107 ldws,cmplt m*4(src_spc, src_off), %t1 ! \ 108 addib,>= -4, count, _LABEL(_loop4) ! \ 109 stws,cmplt %t1, m*4(dst_spc, dst_off) ! \ 110.label _LABEL(_skip4) ! \ 111 /* Restore the correct count. */ ! \ 112 addi 4, count, count ! \ 113 ! \ 114.label _LABEL(_do1) ! \ 115 ! \ 116 /* Loop storing 1 byte at a time. */ ! \ 117 addib,<,n -1, count, _LABEL(_skip1) ! \ 118.label _LABEL(_loop1) ! \ 119 ldbs,cmplt m*1(src_spc, src_off), %t1 ! \ 120 addib,>= -1, count, _LABEL(_loop1) ! \ 121 stbs,cmplt %t1, m*1(dst_spc, dst_off) ! \ 122.label _LABEL(_skip1) ! \ 123 /* Restore the correct count. */ ! \ 124 b _LABEL(_done) ! \ 125 addi 1, count, count 126 127/* 128 * This macro is definitely strange. It exists purely to 129 * allow the _COPYS macro to be reused, but because it 130 * requires this long attempt to explain it, I'm starting 131 * to doubt the value of that. 132 * 133 * Part of the expansion of the _COPYS macro below are loops 134 * that copy four words or one word at a time, performing shifts 135 * to get data to line up correctly in the destination buffer. 136 * 137 * The _COPYS macro is used when copying backwards, as well 138 * as forwards. The 4-word loop always loads into %t1, %t2, %t3, 139 * and %t4 in that order. This means that when copying forward, 140 * %t1 will have the word from the lowest address, and %t4 will 141 * have the word from the highest address. When copying 142 * backwards, the opposite is true. 143 * 144 * The shift instructions need pairs of registers with adjacent 145 * words, with the register containing the word from the lowest 146 * address *always* coming first. It is this assymetry that 147 * gives rise to this macro - depending on which direction 148 * we're copying in, these ordered pairs are different. 149 * 150 * Fortunately, we can compute those register numbers at compile 151 * time, and assemble them manually into a shift instruction. 152 * That's what this macro does. 153 * 154 * This macro takes two arguments. n ranges from 0 to 3 and 155 * is the "shift number", i.e., n = 0 means we're doing the 156 * shift for what will be the first store. 157 * 158 * m is the displacement multiplier from the _COPYS macro call. 159 * This is 1 for a forward copy and -1 for a backwards copy. 160 * So, the ((m + 1) / 2) term yields 0 for a backwards copy and 161 * 1 for a forward copy, and the ((m - 1) / 2) term yields 162 * 0 for a forward copy, and -1 for a backwards copy. 163 * These terms are used to discriminate the register computations 164 * below. 165 * 166 * When copying forward, then, the first register used with 167 * the first vshd will be 19 + (3 - ((0 - 1) & 3)), or %t4, 168 * which matches _COPYS' requirement that the word last loaded 169 * be in %t4. The first register used for the second vshd 170 * will then "wrap" around to 19 + (3 - ((1 - 1) & 3)), or %t1. 171 * And so on to %t2 and %t3. 172 * 173 * When copying forward, the second register used with the first 174 * vshd will be (19 + (3 - ((n + 0) & 3)), or %t1. It will 175 * continue to be %t2, then %t3, and finally %t4. 176 * 177 * When copying backwards, the values for the first and second 178 * register for each vshd are reversed from the forwards case. 179 * (Symmetry reclaimed!) Proving this is "left as an exercise 180 * for the reader" (remember the different discriminating values!) 181 */ 182#define _VSHD(n, m, t) \ 183 .word (0xd0000000 | \ 184 ((19 + (3 - ((n - 1 * ((m + 1) / 2)) & 3))) << 16) | \ 185 ((19 + (3 - ((n + 1 * ((m - 1) / 2)) & 3))) << 21) | \ 186 (t)) 187 188/* 189 * This macro does a bulk copy with shifting. cmplt and m are 190 * the completer and displacement multiplier, respectively, for 191 * the load and store instructions. It is assumed that the 192 * word last loaded is already in %t4. 193 */ 194#define _COPYS(src_spc, src_off, dst_spc, dst_off, count, cmplt, m) \ 195 ! \ 196 /* ! \ 197 * Loop storing 16 bytes at a time. Since count ! \ 198 * may be > INT_MAX, we have to be careful and ! \ 199 * avoid comparisons that treat it as a signed ! \ 200 * quantity, until after this loop, when count ! \ 201 * is guaranteed to be less than 16. ! \ 202 */ ! \ 203 comib,>>=,n 15, count, _LABEL(S_skip16) ! \ 204.label _LABEL(S_loop16) ! \ 205 addi -16, count, count ! \ 206 ldws,cmplt m*4(src_spc, src_off), %t1 ! \ 207 ldws,cmplt m*4(src_spc, src_off), %t2 ! \ 208 ldws,cmplt m*4(src_spc, src_off), %t3 ! \ 209 _VSHD(0, m, 1) /* vshd %t4, %t1, %r1 */ ! \ 210 ldws,cmplt m*4(src_spc, src_off), %t4 ! \ 211 _VSHD(1, m, 22) /* vshd %t1, %t2, %t1 */ ! \ 212 _VSHD(2, m, 21) /* vshd %t2, %t3, %t2 */ ! \ 213 _VSHD(3, m, 20) /* vshd %t3, %t4, %t3 */ ! \ 214 stws,cmplt %r1, m*4(dst_spc, dst_off) ! \ 215 stws,cmplt %t1, m*4(dst_spc, dst_off) ! \ 216 stws,cmplt %t2, m*4(dst_spc, dst_off) ! \ 217 comib,<< 15, count, _LABEL(S_loop16) ! \ 218 stws,cmplt %t3, m*4(dst_spc, dst_off) ! \ 219.label _LABEL(S_skip16) ! \ 220 ! \ 221 /* Loop storing 4 bytes at a time. */ ! \ 222 addib,<,n -4, count, _LABEL(S_skip4) ! \ 223.label _LABEL(S_loop4) ! \ 224 ldws,cmplt m*4(src_spc, src_off), %t1 ! \ 225 _VSHD(0, m, 1) /* into %r1 (1) */ ! \ 226 copy %t1, %t4 ! \ 227 addib,>= -4, count, _LABEL(S_loop4) ! \ 228 stws,cmplt %r1, m*4(dst_spc, dst_off) ! \ 229.label _LABEL(S_skip4) ! \ 230 ! \ 231 /* ! \ 232 * We now need to "back up" src_off by the ! \ 233 * number of bytes remaining in the FIFO ! \ 234 * (i.e., the number of bytes remaining in %t4), ! \ 235 * because (the correct) count still includes ! \ 236 * these bytes, and we intent to keep it that ! \ 237 * way, and finish with the single-byte copier. ! \ 238 * ! \ 239 * The number of bytes remaining in the FIFO is ! \ 240 * related to the shift count, so recover it, ! \ 241 * restoring the correct count at the same time. ! \ 242 */ ! \ 243 mfctl %cr11, %t1 ! \ 244 addi 4, count, count ! \ 245 shd %r0, %t1, 3, %t1 ! \ 246 ! \ 247 /* ! \ 248 * If we're copying forward, the shift count ! \ 249 * is the number of bytes remaining in the ! \ 250 * FIFO, and we want to subtract it from src_off. ! \ 251 * If we're copying backwards, (4 - shift count) ! \ 252 * is the number of bytes remaining in the FIFO, ! \ 253 * and we want to add it to src_off. ! \ 254 * ! \ 255 * We observe that x + (4 - y) = x - (y - 4), ! \ 256 * and introduce this instruction to add -4 when ! \ 257 * m is -1, although this does mean one extra ! \ 258 * instruction in the forward case. ! \ 259 */ ! \ 260 addi 4*((m - 1) / 2), %t1, %t1 ! \ 261 ! \ 262 /* Now branch to the byte-at-a-time loop. */ ! \ 263 b _LABEL(_do1) ! \ 264 sub src_off, %t1, src_off 265 266/* 267 * This macro copies a region in the forward direction. 268 */ 269#define _COPY_FORWARD(src_spc, src_off, dst_spc, dst_off, count) \ 270 ! \ 271 /* ! \ 272 * Since in the shifting-left case we will ! \ 273 * load 8 bytes before checking count, to ! \ 274 * keep things simple, branch to the byte ! \ 275 * copier unless we're copying at least 8. ! \ 276 */ ! \ 277 comib,>>,n 8, count, _LABEL(_do1) ! \ 278 ! \ 279 /* ! \ 280 * Once we 4-byte align the source offset, ! \ 281 * figure out how many bytes from the region ! \ 282 * will be in the first 4-byte word we read. ! \ 283 * Ditto for writing the destination offset. ! \ 284 */ ! \ 285 extru src_off, 31, 2, %t1 ! \ 286 extru dst_off, 31, 2, %t2 ! \ 287 subi 4, %t1, %t1 ! \ 288 subi 4, %t2, %t2 ! \ 289 ! \ 290 /* ! \ 291 * Calculate the byte shift required. A ! \ 292 * positive value means a source 4-byte word ! \ 293 * has to be shifted to the right to line up ! \ 294 * as a destination 4-byte word. ! \ 295 */ ! \ 296 sub %t1, %t2, %t1 ! \ 297 ! \ 298 /* 4-byte align src_off. */ ! \ 299 depi 0, 31, 2, src_off ! \ 300 ! \ 301 /* ! \ 302 * It's somewhat important to note that this ! \ 303 * code thinks of count as "the number of bytes ! \ 304 * that haven't been stored yet", as opposed to ! \ 305 * "the number of bytes that haven't been copied ! \ 306 * yet". The distinction is subtle, but becomes ! \ 307 * apparent at the end of the shifting code, where ! \ 308 * we "back up" src_off to correspond to count, ! \ 309 * as opposed to flushing the FIFO. ! \ 310 * ! \ 311 * We calculated above how many bytes our first ! \ 312 * store will store, so update count now. ! \ 313 * ! \ 314 * If the shift is zero, strictly as an optimization ! \ 315 * we use a copy loop that does no shifting. ! \ 316 */ ! \ 317 comb,<> %r0, %t1, _LABEL(_shifting) ! \ 318 sub count, %t2, count ! \ 319 ! \ 320 /* Load and store the first word. */ ! \ 321 ldws,ma 4(src_spc, src_off), %t4 ! \ 322 stbys,b,m %t4, 4(dst_spc, dst_off) ! \ 323 ! \ 324 /* Do the rest of the copy. */ ! \ 325 _COPY(src_spc,src_off,dst_spc,dst_off,count,ma,1) ! \ 326 ! \ 327.label _LABEL(_shifting) ! \ 328 ! \ 329 /* ! \ 330 * If shift < 0, we need to shift words to the ! \ 331 * left. Since we can't do this directly, we ! \ 332 * adjust the shift so it's a shift to the right ! \ 333 * and load the first word into the high word of ! \ 334 * the FIFO. Otherwise, we load a zero into the ! \ 335 * high word of the FIFO. ! \ 336 */ ! \ 337 comb,<= %r0, %t1, _LABEL(_shiftingrt) ! \ 338 copy %r0, %t3 ! \ 339 addi 4, %t1, %t1 ! \ 340 ldws,ma 4(src_spc, src_off), %t3 ! \ 341.label _LABEL(_shiftingrt) ! \ 342 ! \ 343 /* ! \ 344 * Turn the shift byte count into a bit count, ! \ 345 * load the next word, set the Shift Amount ! \ 346 * Register, and form and store the first word. ! \ 347 */ ! \ 348 sh3add %t1, %r0, %t1 ! \ 349 ldws,ma 4(src_spc, src_off), %t4 ! \ 350 mtctl %t1, %cr11 ! \ 351 vshd %t3, %t4, %r1 ! \ 352 stbys,b,m %r1, 4(dst_spc, dst_off) ! \ 353 ! \ 354 /* Do the rest of the copy. */ ! \ 355 _COPYS(src_spc,src_off,dst_spc,dst_off,count,ma,1) 356 357/* This macro copies a region in the reverse direction. */ 358#define _COPY_REVERSE(src_spc, src_off, dst_spc, dst_off, count) \ 359 ! \ 360 /* Immediately add count to both offsets. */ ! \ 361 add src_off, count, src_off ! \ 362 add dst_off, count, dst_off ! \ 363 ! \ 364 /* ! \ 365 * Since in the shifting-right case we ! \ 366 * will load 8 bytes before checking ! \ 367 * count, to keep things simple, branch ! \ 368 * to the byte copier unless we're ! \ 369 * copying at least 8 bytes. ! \ 370 */ ! \ 371 comib,>>,n 8, count, _LABEL(_do1) ! \ 372 ! \ 373 /* ! \ 374 * Once we 4-byte align the source offset, ! \ 375 * figure out how many bytes from the region ! \ 376 * will be in the first 4-byte word we read. ! \ 377 * Ditto for writing the destination offset. ! \ 378 */ ! \ 379 extru,<> src_off, 31, 2, %t1 ! \ 380 ldi 4, %t1 ! \ 381 extru,<> dst_off, 31, 2, %t2 ! \ 382 ldi 4, %t2 ! \ 383 ! \ 384 /* ! \ 385 * Calculate the byte shift required. A ! \ 386 * positive value means a source 4-byte ! \ 387 * word has to be shifted to the right to ! \ 388 * line up as a destination 4-byte word. ! \ 389 */ ! \ 390 sub %t2, %t1, %t1 ! \ 391 ! \ 392 /* ! \ 393 * 4-byte align src_off, leaving it pointing ! \ 394 * to the 4-byte word *after* the next word ! \ 395 * we intend to load. ! \ 396 * ! \ 397 * It's somewhat important to note that this ! \ 398 * code thinks of count as "the number of bytes ! \ 399 * that haven't been stored yet", as opposed to ! \ 400 * "the number of bytes that haven't been copied ! \ 401 * yet". The distinction is subtle, but becomes ! \ 402 * apparent at the end of the shifting code, where ! \ 403 * we "back up" src_off to correspond to count, ! \ 404 * as opposed to flushing the FIFO. ! \ 405 * ! \ 406 * We calculated above how many bytes our first ! \ 407 * store will store, so update count now. ! \ 408 * ! \ 409 * If the shift is zero, we use a copy loop that ! \ 410 * does no shifting. NB: unlike the forward case, ! \ 411 * this is NOT strictly an optimization. If the ! \ 412 * SAR is zero the vshds do NOT do the right thing. ! \ 413 * This is another assymetry more or less the "fault" ! \ 414 * of vshd. ! \ 415 */ ! \ 416 addi 3, src_off, src_off ! \ 417 sub count, %t2, count ! \ 418 comb,<> %r0, %t1, _LABEL(_shifting) ! \ 419 depi 0, 31, 2, src_off ! \ 420 ! \ 421 /* Load and store the first word. */ ! \ 422 ldws,mb -4(src_spc, src_off), %t4 ! \ 423 _STBYS_E_M(%t4, dst_spc, dst_off) ! \ 424 ! \ 425 /* Do the rest of the copy. */ ! \ 426 _COPY(src_spc,src_off,dst_spc,dst_off,count,mb,-1) ! \ 427 ! \ 428.label _LABEL(_shifting) ! \ 429 ! \ 430 /* ! \ 431 * If shift < 0, we need to shift words to the ! \ 432 * left. Since we can't do this directly, we ! \ 433 * adjust the shift so it's a shift to the right ! \ 434 * and load a zero in to the low word of the FIFO. ! \ 435 * Otherwise, we load the first word into the ! \ 436 * low word of the FIFO. ! \ 437 * ! \ 438 * Note the nullification trickery here. We ! \ 439 * assume that we're shifting to the left, and ! \ 440 * load zero into the low word of the FIFO. Then ! \ 441 * we nullify the addi if we're shifting to the ! \ 442 * right. If the addi is not nullified, we are ! \ 443 * shifting to the left, so we nullify the load. ! \ 444 * we branch if we're shifting to the ! \ 445 */ ! \ 446 copy %r0, %t3 ! \ 447 comb,<=,n %r0, %t1, 0 ! \ 448 addi,tr 4, %t1, %t1 ! \ 449 ldws,mb -4(src_spc, src_off), %t3 ! \ 450 ! \ 451 /* ! \ 452 * Turn the shift byte count into a bit count, ! \ 453 * load the next word, set the Shift Amount ! \ 454 * Register, and form and store the first word. ! \ 455 */ ! \ 456 sh3add %t1, %r0, %t1 ! \ 457 ldws,mb -4(src_spc, src_off), %t4 ! \ 458 mtctl %t1, %cr11 ! \ 459 vshd %t4, %t3, %r1 ! \ 460 _STBYS_E_M(%r1, dst_spc, dst_off) ! \ 461 ! \ 462 /* Do the rest of the copy. */ ! \ 463 _COPYS(src_spc,src_off,dst_spc,dst_off,count,mb,-1) 464 465/* 466 * For paranoia, when things aren't going well, enable this 467 * code to assemble byte-at-a-time-only copying. 468 */ 469#if 1 470#undef _COPY_FORWARD 471#define _COPY_FORWARD(src_spc, src_off, dst_spc, dst_off, count) \ 472 comb,=,n %r0, count, _LABEL(_done) ! \ 473 ldbs,ma 1(src_spc, src_off), %r1 ! \ 474 addib,<> -1, count, -12 ! \ 475 stbs,ma %r1, 1(dst_spc, dst_off) ! \ 476 b,n _LABEL(_done) 477#undef _COPY_REVERSE 478#define _COPY_REVERSE(src_spc, src_off, dst_spc, dst_off, count) \ 479 comb,= %r0, count, _LABEL(_done) ! \ 480 add src_off, count, src_off ! \ 481 add dst_off, count, dst_off ! \ 482 ldbs,mb -1(src_spc, src_off), %r1 ! \ 483 addib,<> -1, count, -12 ! \ 484 stbs,mb %r1, -1(dst_spc, dst_off) ! \ 485 b,n _LABEL(_done) 486#endif 487 488/* 489 * If none of the following are defined, define BCOPY. 490 */ 491#if !(defined(SPCOPY) || defined(MEMCPY) || defined(MEMMOVE)) 492#define BCOPY 493#endif 494 495#if defined(SPCOPY) && !defined(_STANDALONE) 496 497#include <sys/errno.h> 498#include "assym.h" 499 500/* 501 * int spcopy(pa_space_t ssp, const void *src, pa_space_t dsp, void *dst, 502 * size_t len) 503 * 504 * We assume that the regions do not overlap. 505 */ 506LEAF_ENTRY(spcopy) 507 508 /* 509 * Setup the fault handler, which will fill in %ret0 if triggered. 510 */ 511 GET_CURLWP(%r31) 512#ifdef DIAGNOSTIC 513 comb,<>,n %r0, %r31, Lspcopy_curlwp_ok 514 ldil L%panic, %r1 515 ldil L%Lspcopy_curlwp_bad, %arg0 516 ldo R%panic(%r1), %r1 517 ldo R%Lspcopy_curlwp_bad(%arg0), %arg0 518 .call 519 bv,n %r0(%r1) 520 nop 521Lspcopy_curlwp_bad: 522 .asciz "spcopy: curlwp == NULL\n" 523 .align 8 524Lspcopy_curlwp_ok: 525#endif /* DIAGNOSTIC */ 526 ldil L%spcopy_fault, %r1 527 ldw L_PCB(%r31), %r31 528 ldo R%spcopy_fault(%r1), %r1 529 stw %r1, PCB_ONFAULT(%r31) 530 531 /* Setup the space registers. */ 532 mfsp %sr2, %ret1 533 mtsp %arg0, %sr1 534 mtsp %arg2, %sr2 535 536 /* Get the len argument and do the copy. */ 537 ldw HPPA_FRAME_ARG(4)(%sp), %arg0 538#define _LABEL(l) __CONCAT(spcopy,l) 539 _COPY_FORWARD(%sr1,%arg1,%sr2,%arg3,%arg0) 540_LABEL(_done): 541 542 /* Return. */ 543 copy %r0, %ret0 544ALTENTRY(spcopy_fault) 545 stw %r0, PCB_ONFAULT(%r31) 546 bv %r0(%rp) 547 mtsp %ret1, %sr2 548EXIT(spcopy) 549#endif /* SPCOPY && !_STANDALONE */ 550 551#ifdef MEMCPY 552/* 553 * void *memcpy(void *restrict dst, const void *restrict src, size_t len); 554 * 555 * memcpy is specifically restricted to working on 556 * non-overlapping regions, so we can just copy forward. 557 */ 558LEAF_ENTRY(memcpy) 559 copy %arg0, %ret0 560#define _LABEL(l) __CONCAT(memcpy,l) 561 _COPY_FORWARD(%sr0,%arg1,%sr0,%arg0,%arg2) 562_LABEL(_done): 563 bv,n %r0(%rp) 564 nop 565EXIT(memcpy) 566#endif /* MEMCPY */ 567 568#ifdef BCOPY 569/* 570 * void bcopy(const void *src, void *dst, size_t len); 571 */ 572LEAF_ENTRY(bcopy) 573 copy %arg0, %r1 574 copy %arg1, %arg0 575 copy %r1, %arg1 576 /* FALLTHROUGH */ 577#define _LABEL_F(l) __CONCAT(bcopy_F,l) 578#define _LABEL_R(l) __CONCAT(bcopy_R,l) 579#endif 580 581#ifdef MEMMOVE 582/* 583 * void *memmove(void *dst, const void *src, size_t len); 584 */ 585LEAF_ENTRY(memmove) 586#define _LABEL_F(l) __CONCAT(memmove_F,l) 587#define _LABEL_R(l) __CONCAT(memmove_R,l) 588 copy %arg0, %ret0 589#endif /* MEMMOVE */ 590 591#if defined(BCOPY) || defined(MEMMOVE) 592 593 /* 594 * If src >= dst or src + len <= dst, we copy 595 * forward, else we copy in reverse. 596 */ 597 add %arg1, %arg2, %r1 598 comb,>>=,n %arg1, %arg0, 0 599 comb,>>,n %r1, %arg0, _LABEL_R(_go) 600 601#define _LABEL _LABEL_F 602 _COPY_FORWARD(%sr0,%arg1,%sr0,%arg0,%arg2) 603#undef _LABEL 604 605_LABEL_R(_go): 606#define _LABEL _LABEL_R 607 _COPY_REVERSE(%sr0,%arg1,%sr0,%arg0,%arg2) 608#undef _LABEL 609 610_LABEL_F(_done): 611_LABEL_R(_done): 612 bv,n %r0(%rp) 613 nop 614#ifdef BCOPY 615EXIT(bcopy) 616#else 617EXIT(memmove) 618#endif 619#endif /* BCOPY || MEMMOVE */ 620