1*37da2899SCharles.Forsyth#define BDNZ BC 16,0, 2*37da2899SCharles.Forsyth TEXT memcpy(SB), $0 3*37da2899SCharles.Forsyth BR move 4*37da2899SCharles.Forsyth 5*37da2899SCharles.Forsyth TEXT memmove(SB), $0 6*37da2899SCharles.Forsythmove: 7*37da2899SCharles.Forsyth 8*37da2899SCharles.Forsyth/* 9*37da2899SCharles.Forsyth * performance: 10*37da2899SCharles.Forsyth * (tba) 11*37da2899SCharles.Forsyth */ 12*37da2899SCharles.Forsyth 13*37da2899SCharles.Forsyth MOVW R3, s1+0(FP) 14*37da2899SCharles.Forsyth MOVW n+8(FP), R9 /* R9 is count */ 15*37da2899SCharles.Forsyth MOVW R3, R10 /* R10 is to-pointer */ 16*37da2899SCharles.Forsyth CMP R9, $0 17*37da2899SCharles.Forsyth BEQ ret 18*37da2899SCharles.Forsyth BLT trap 19*37da2899SCharles.Forsyth MOVW s2+4(FP), R11 /* R11 is from-pointer */ 20*37da2899SCharles.Forsyth 21*37da2899SCharles.Forsyth/* 22*37da2899SCharles.Forsyth * if no more than 16 bytes, just use one lsw/stsw 23*37da2899SCharles.Forsyth */ 24*37da2899SCharles.Forsyth CMP R9, $16 25*37da2899SCharles.Forsyth BLE fout 26*37da2899SCharles.Forsyth 27*37da2899SCharles.Forsyth ADD R9,R11, R13 /* R13 is end from-pointer */ 28*37da2899SCharles.Forsyth ADD R9,R10, R12 /* R12 is end to-pointer */ 29*37da2899SCharles.Forsyth 30*37da2899SCharles.Forsyth/* 31*37da2899SCharles.Forsyth * easiest test is copy backwards if 32*37da2899SCharles.Forsyth * destination string has higher mem address 33*37da2899SCharles.Forsyth */ 34*37da2899SCharles.Forsyth CMPU R10, R11 35*37da2899SCharles.Forsyth BGT back 36*37da2899SCharles.Forsyth 37*37da2899SCharles.Forsyth/* 38*37da2899SCharles.Forsyth * test if both pointers 39*37da2899SCharles.Forsyth * are similarly word aligned 40*37da2899SCharles.Forsyth */ 41*37da2899SCharles.Forsyth XOR R10,R11, R7 42*37da2899SCharles.Forsyth ANDCC $3,R7 43*37da2899SCharles.Forsyth BNE fbad 44*37da2899SCharles.Forsyth 45*37da2899SCharles.Forsyth/* 46*37da2899SCharles.Forsyth * move a few bytes to align pointers 47*37da2899SCharles.Forsyth */ 48*37da2899SCharles.Forsyth ANDCC $3,R10,R7 49*37da2899SCharles.Forsyth BEQ f2 50*37da2899SCharles.Forsyth SUBC R7, $4, R7 51*37da2899SCharles.Forsyth SUB R7, R9 52*37da2899SCharles.Forsyth MOVW R7, XER 53*37da2899SCharles.Forsyth LSW (R11), R16 54*37da2899SCharles.Forsyth ADD R7, R11 55*37da2899SCharles.Forsyth STSW R16, (R10) 56*37da2899SCharles.Forsyth ADD R7, R10 57*37da2899SCharles.Forsyth 58*37da2899SCharles.Forsyth/* 59*37da2899SCharles.Forsyth * turn R14 into doubleword count 60*37da2899SCharles.Forsyth * copy 16 bytes at a time while there's room. 61*37da2899SCharles.Forsyth */ 62*37da2899SCharles.Forsythf2: 63*37da2899SCharles.Forsyth SRAWCC $4, R9, R14 64*37da2899SCharles.Forsyth BLE fout 65*37da2899SCharles.Forsyth MOVW R14, CTR 66*37da2899SCharles.Forsyth SUB $4, R11 67*37da2899SCharles.Forsyth SUB $4, R10 68*37da2899SCharles.Forsythf3: 69*37da2899SCharles.Forsyth MOVWU 4(R11), R16 70*37da2899SCharles.Forsyth MOVWU R16, 4(R10) 71*37da2899SCharles.Forsyth MOVWU 4(R11), R17 72*37da2899SCharles.Forsyth MOVWU R17, 4(R10) 73*37da2899SCharles.Forsyth MOVWU 4(R11), R16 74*37da2899SCharles.Forsyth MOVWU R16, 4(R10) 75*37da2899SCharles.Forsyth MOVWU 4(R11), R17 76*37da2899SCharles.Forsyth MOVWU R17, 4(R10) 77*37da2899SCharles.Forsyth BDNZ f3 78*37da2899SCharles.Forsyth RLWNMCC $0, R9, $15, R9 /* residue */ 79*37da2899SCharles.Forsyth BEQ ret 80*37da2899SCharles.Forsyth ADD $4, R11 81*37da2899SCharles.Forsyth ADD $4, R10 82*37da2899SCharles.Forsyth 83*37da2899SCharles.Forsyth/* 84*37da2899SCharles.Forsyth * move up to 16 bytes through R16 .. R19; aligned and unaligned 85*37da2899SCharles.Forsyth */ 86*37da2899SCharles.Forsythfout: 87*37da2899SCharles.Forsyth MOVW R9, XER 88*37da2899SCharles.Forsyth LSW (R11), R16 89*37da2899SCharles.Forsyth STSW R16, (R10) 90*37da2899SCharles.Forsyth BR ret 91*37da2899SCharles.Forsyth 92*37da2899SCharles.Forsyth/* 93*37da2899SCharles.Forsyth * loop for unaligned copy, then copy up to 15 remaining bytes 94*37da2899SCharles.Forsyth */ 95*37da2899SCharles.Forsythfbad: 96*37da2899SCharles.Forsyth SRAWCC $4, R9, R14 97*37da2899SCharles.Forsyth BLE f6 98*37da2899SCharles.Forsyth MOVW R14, CTR 99*37da2899SCharles.Forsythf5: 100*37da2899SCharles.Forsyth LSW (R11), $16, R16 101*37da2899SCharles.Forsyth ADD $16, R11 102*37da2899SCharles.Forsyth STSW R16, $16, (R10) 103*37da2899SCharles.Forsyth ADD $16, R10 104*37da2899SCharles.Forsyth BDNZ f5 105*37da2899SCharles.Forsyth RLWNMCC $0, R9, $15, R9 /* residue */ 106*37da2899SCharles.Forsyth BEQ ret 107*37da2899SCharles.Forsythf6: 108*37da2899SCharles.Forsyth MOVW R9, XER 109*37da2899SCharles.Forsyth LSW (R11), R16 110*37da2899SCharles.Forsyth STSW R16, (R10) 111*37da2899SCharles.Forsyth BR ret 112*37da2899SCharles.Forsyth 113*37da2899SCharles.Forsyth/* 114*37da2899SCharles.Forsyth * whole thing repeated for backwards 115*37da2899SCharles.Forsyth */ 116*37da2899SCharles.Forsythback: 117*37da2899SCharles.Forsyth CMP R9, $4 118*37da2899SCharles.Forsyth BLT bout 119*37da2899SCharles.Forsyth 120*37da2899SCharles.Forsyth XOR R12,R13, R7 121*37da2899SCharles.Forsyth ANDCC $3,R7 122*37da2899SCharles.Forsyth BNE bout 123*37da2899SCharles.Forsythb1: 124*37da2899SCharles.Forsyth ANDCC $3,R13, R7 125*37da2899SCharles.Forsyth BEQ b2 126*37da2899SCharles.Forsyth MOVBZU -1(R13), R16 127*37da2899SCharles.Forsyth MOVBZU R16, -1(R12) 128*37da2899SCharles.Forsyth SUB $1, R9 129*37da2899SCharles.Forsyth BR b1 130*37da2899SCharles.Forsythb2: 131*37da2899SCharles.Forsyth SRAWCC $4, R9, R14 132*37da2899SCharles.Forsyth BLE b4 133*37da2899SCharles.Forsyth MOVW R14, CTR 134*37da2899SCharles.Forsythb3: 135*37da2899SCharles.Forsyth MOVWU -4(R13), R16 136*37da2899SCharles.Forsyth MOVWU R16, -4(R12) 137*37da2899SCharles.Forsyth MOVWU -4(R13), R17 138*37da2899SCharles.Forsyth MOVWU R17, -4(R12) 139*37da2899SCharles.Forsyth MOVWU -4(R13), R16 140*37da2899SCharles.Forsyth MOVWU R16, -4(R12) 141*37da2899SCharles.Forsyth MOVWU -4(R13), R17 142*37da2899SCharles.Forsyth MOVWU R17, -4(R12) 143*37da2899SCharles.Forsyth BDNZ b3 144*37da2899SCharles.Forsyth RLWNMCC $0, R9, $15, R9 /* residue */ 145*37da2899SCharles.Forsyth BEQ ret 146*37da2899SCharles.Forsythb4: 147*37da2899SCharles.Forsyth SRAWCC $2, R9, R14 148*37da2899SCharles.Forsyth BLE bout 149*37da2899SCharles.Forsyth MOVW R14, CTR 150*37da2899SCharles.Forsythb5: 151*37da2899SCharles.Forsyth MOVWU -4(R13), R16 152*37da2899SCharles.Forsyth MOVWU R16, -4(R12) 153*37da2899SCharles.Forsyth BDNZ b5 154*37da2899SCharles.Forsyth RLWNMCC $0, R9, $3, R9 /* residue */ 155*37da2899SCharles.Forsyth BEQ ret 156*37da2899SCharles.Forsyth 157*37da2899SCharles.Forsythbout: 158*37da2899SCharles.Forsyth CMPU R13, R11 159*37da2899SCharles.Forsyth BLE ret 160*37da2899SCharles.Forsyth MOVBZU -1(R13), R16 161*37da2899SCharles.Forsyth MOVBZU R16, -1(R12) 162*37da2899SCharles.Forsyth BR bout 163*37da2899SCharles.Forsyth 164*37da2899SCharles.Forsythtrap: 165*37da2899SCharles.Forsyth/* MOVW $0, R0 */ 166*37da2899SCharles.Forsyth MOVW R0, 0(R0) 167*37da2899SCharles.Forsyth 168*37da2899SCharles.Forsythret: 169*37da2899SCharles.Forsyth MOVW s1+0(FP), R3 170*37da2899SCharles.Forsyth RETURN 171