1*37da2899SCharles.Forsyth TEXT memcmp(SB), $0 2*37da2899SCharles.Forsyth#define BDNZ BC 16,0, 3*37da2899SCharles.Forsyth MOVW R3, s1+0(FP) /* R3 is pointer1 */ 4*37da2899SCharles.Forsyth 5*37da2899SCharles.Forsyth/* 6*37da2899SCharles.Forsyth * performance: 7*37da2899SCharles.Forsyth * 67mb/sec aligned; 16mb/sec unaligned 8*37da2899SCharles.Forsyth */ 9*37da2899SCharles.Forsyth 10*37da2899SCharles.Forsyth MOVW n+8(FP), R4 /* R4 is count */ 11*37da2899SCharles.Forsyth MOVW s2+4(FP), R5 /* R5 is pointer2 */ 12*37da2899SCharles.Forsyth 13*37da2899SCharles.Forsyth/* 14*37da2899SCharles.Forsyth * let LSW do the work for 4 characters or less; aligned and unaligned 15*37da2899SCharles.Forsyth */ 16*37da2899SCharles.Forsyth CMP R4, $0 17*37da2899SCharles.Forsyth BLE eq 18*37da2899SCharles.Forsyth CMP R4, $4 19*37da2899SCharles.Forsyth BLE out 20*37da2899SCharles.Forsyth 21*37da2899SCharles.Forsyth XOR R3, R5, R9 22*37da2899SCharles.Forsyth ANDCC $3, R9 23*37da2899SCharles.Forsyth BNE l4 /* pointers misaligned; use LSW loop */ 24*37da2899SCharles.Forsyth 25*37da2899SCharles.Forsyth/* 26*37da2899SCharles.Forsyth * do enough bytes to align pointers 27*37da2899SCharles.Forsyth */ 28*37da2899SCharles.Forsyth ANDCC $3,R3, R9 29*37da2899SCharles.Forsyth BEQ l2 30*37da2899SCharles.Forsyth SUBC R9, $4, R9 31*37da2899SCharles.Forsyth MOVW R9, XER 32*37da2899SCharles.Forsyth LSW (R3), R10 33*37da2899SCharles.Forsyth ADD R9, R3 34*37da2899SCharles.Forsyth LSW (R5), R14 35*37da2899SCharles.Forsyth ADD R9, R5 36*37da2899SCharles.Forsyth SUB R9, R4 37*37da2899SCharles.Forsyth CMPU R10, R14 38*37da2899SCharles.Forsyth BNE ne 39*37da2899SCharles.Forsyth 40*37da2899SCharles.Forsyth/* 41*37da2899SCharles.Forsyth * compare 16 at a time 42*37da2899SCharles.Forsyth */ 43*37da2899SCharles.Forsythl2: 44*37da2899SCharles.Forsyth SRAWCC $4, R4, R9 45*37da2899SCharles.Forsyth BLE l4 46*37da2899SCharles.Forsyth MOVW R9, CTR 47*37da2899SCharles.Forsyth SUB $4, R3 48*37da2899SCharles.Forsyth SUB $4, R5 49*37da2899SCharles.Forsythl3: 50*37da2899SCharles.Forsyth MOVWU 4(R3), R10 51*37da2899SCharles.Forsyth MOVWU 4(R5), R12 52*37da2899SCharles.Forsyth MOVWU 4(R3), R11 53*37da2899SCharles.Forsyth MOVWU 4(R5), R13 54*37da2899SCharles.Forsyth CMPU R10, R12 55*37da2899SCharles.Forsyth BNE ne 56*37da2899SCharles.Forsyth MOVWU 4(R3), R10 57*37da2899SCharles.Forsyth MOVWU 4(R5), R12 58*37da2899SCharles.Forsyth CMPU R11, R13 59*37da2899SCharles.Forsyth BNE ne 60*37da2899SCharles.Forsyth MOVWU 4(R3), R11 61*37da2899SCharles.Forsyth MOVWU 4(R5), R13 62*37da2899SCharles.Forsyth CMPU R10, R12 63*37da2899SCharles.Forsyth BNE ne 64*37da2899SCharles.Forsyth CMPU R11, R13 65*37da2899SCharles.Forsyth BNE ne 66*37da2899SCharles.Forsyth BDNZ l3 67*37da2899SCharles.Forsyth ADD $4, R3 68*37da2899SCharles.Forsyth ADD $4, R5 69*37da2899SCharles.Forsyth RLWNMCC $0, R4, $15, R4 /* residue */ 70*37da2899SCharles.Forsyth BEQ eq 71*37da2899SCharles.Forsyth 72*37da2899SCharles.Forsyth/* 73*37da2899SCharles.Forsyth * do remaining words with LSW; also does unaligned case 74*37da2899SCharles.Forsyth */ 75*37da2899SCharles.Forsythl4: 76*37da2899SCharles.Forsyth SRAWCC $2, R4, R9 77*37da2899SCharles.Forsyth BLE out 78*37da2899SCharles.Forsyth MOVW R9, CTR 79*37da2899SCharles.Forsythl5: 80*37da2899SCharles.Forsyth LSW (R3), $4, R10 81*37da2899SCharles.Forsyth ADD $4, R3 82*37da2899SCharles.Forsyth LSW (R5), $4, R11 83*37da2899SCharles.Forsyth ADD $4, R5 84*37da2899SCharles.Forsyth CMPU R10, R11 85*37da2899SCharles.Forsyth BNE ne 86*37da2899SCharles.Forsyth BDNZ l5 87*37da2899SCharles.Forsyth RLWNMCC $0, R4, $3, R4 /* residue */ 88*37da2899SCharles.Forsyth BEQ eq 89*37da2899SCharles.Forsyth 90*37da2899SCharles.Forsyth/* 91*37da2899SCharles.Forsyth * do remaining bytes with final LSW 92*37da2899SCharles.Forsyth */ 93*37da2899SCharles.Forsythout: 94*37da2899SCharles.Forsyth MOVW R4, XER 95*37da2899SCharles.Forsyth LSW (R3), R10 96*37da2899SCharles.Forsyth LSW (R5), R11 97*37da2899SCharles.Forsyth CMPU R10, R11 98*37da2899SCharles.Forsyth BNE ne 99*37da2899SCharles.Forsyth 100*37da2899SCharles.Forsytheq: 101*37da2899SCharles.Forsyth MOVW $0, R3 102*37da2899SCharles.Forsyth RETURN 103*37da2899SCharles.Forsyth 104*37da2899SCharles.Forsythne: 105*37da2899SCharles.Forsyth MOVW $1, R3 106*37da2899SCharles.Forsyth BGE ret 107*37da2899SCharles.Forsyth MOVW $-1,R3 108*37da2899SCharles.Forsythret: 109*37da2899SCharles.Forsyth RETURN 110*37da2899SCharles.Forsyth END 111