1 TEXT memset(SB),$0 2#define BDNZ BC 16,0, 3 MOVW R3, p+0(FP) /* R3 is pointer */ 4 5/* 6 * performance: 7 * about 100mbytes/sec (8k blocks) on a 603/105 without L2 cache 8 * drops to 40mbytes/sec (10k blocks) and 28mbytes/sec with 32k blocks 9 */ 10 11 MOVW n+8(FP), R4 /* R4 is count */ 12 CMP R4, $0 13 BLE ret 14 MOVW c+4(FP), R5 /* R5 is char */ 15 16/* 17 * create 16 copies of c in R5 .. R8 18 */ 19 RLWNM $0, R5, $0xff, R5 20 RLWMI $8, R5, $0xff00, R5 21 RLWMI $16, R5, $0xffff0000, R5 22 MOVW R5, R6 23 MOVW R5, R7 24 MOVW R5, R8 25 26/* 27 * let STSW do the work for 16 characters or less; aligned and unaligned 28 */ 29 CMP R4, $16 30 BLE out 31 32/* 33 * store enough bytes to align pointer 34 */ 35 ANDCC $7,R3, R9 36 BEQ l2 37 SUBC R9, $8, R9 38 MOVW R9, XER 39 STSW R5, (R3) 40 ADD R9, R3 41 SUB R9, R4 42 43/* 44 * store 16 at a time while there's room 45 * STSW was used here originally, but it's `completion serialised' 46 */ 47l2: 48 SRAWCC $4, R4, R9 49 BLE out 50 MOVW R9, CTR 51l3: 52 MOVW R5, 0(R3) 53 ADD $8, R3, R10 54 MOVW R6, 4(R3) 55 MOVW R7, 0(R10) 56 ADD $8, R10, R3 57 MOVW R8, 4(R10) 58 BDNZ l3 59 RLWNMCC $0, R4, $15, R4 /* residue */ 60 BEQ ret 61 62/* 63 * store up to 16 bytes from R5 .. R8; aligned and unaligned 64 */ 65 66out: 67 MOVW R4, XER 68 STSW R5, (R3) 69 70ret: 71 MOVW 0(FP), R3 72 RETURN 73 END 74