1 *37da2899SCharles.ForsythTO = 1 2 *37da2899SCharles.ForsythTOE = 2 3 *37da2899SCharles.ForsythN = 3 4 *37da2899SCharles.ForsythTMP = 3 /* N and TMP don't overlap */ 5 *37da2899SCharles.Forsyth 6 *37da2899SCharles.ForsythTEXT memset(SB), $0 7 *37da2899SCharles.Forsyth MOVW R0, R(TO) 8 *37da2899SCharles.Forsyth MOVW data+4(FP), R(4) 9 *37da2899SCharles.Forsyth MOVW n+8(FP), R(N) 10 *37da2899SCharles.Forsyth 11 *37da2899SCharles.Forsyth ADD R(N), R(TO), R(TOE) /* to end pointer */ 12 *37da2899SCharles.Forsyth 13 *37da2899SCharles.Forsyth CMP $4, R(N) /* need at least 4 bytes to copy */ 14 *37da2899SCharles.Forsyth BLT _1tail 15 *37da2899SCharles.Forsyth 16 *37da2899SCharles.Forsyth AND $0xFF, R(4) /* it's a byte */ 17 *37da2899SCharles.Forsyth SLL $8, R(4), R(TMP) /* replicate to a word */ 18 *37da2899SCharles.Forsyth ORR R(TMP), R(4) 19 *37da2899SCharles.Forsyth SLL $16, R(4), R(TMP) 20 *37da2899SCharles.Forsyth ORR R(TMP), R(4) 21 *37da2899SCharles.Forsyth 22 *37da2899SCharles.Forsyth_4align: /* align on 4 */ 23 *37da2899SCharles.Forsyth AND.S $3, R(TO), R(TMP) 24 *37da2899SCharles.Forsyth BEQ _4aligned 25 *37da2899SCharles.Forsyth 26 *37da2899SCharles.Forsyth MOVBU.P R(4), 1(R(TO)) /* implicit write back */ 27 *37da2899SCharles.Forsyth B _4align 28 *37da2899SCharles.Forsyth 29 *37da2899SCharles.Forsyth_4aligned: 30 *37da2899SCharles.Forsyth SUB $31, R(TOE), R(TMP) /* do 32-byte chunks if possible */ 31 *37da2899SCharles.Forsyth CMP R(TMP), R(TO) 32 *37da2899SCharles.Forsyth BHS _4tail 33 *37da2899SCharles.Forsyth 34 *37da2899SCharles.Forsyth MOVW R4, R5 /* replicate */ 35 *37da2899SCharles.Forsyth MOVW R4, R6 36 *37da2899SCharles.Forsyth MOVW R4, R7 37 *37da2899SCharles.Forsyth MOVW R4, R8 38 *37da2899SCharles.Forsyth MOVW R4, R9 39 *37da2899SCharles.Forsyth MOVW R4, R10 40 *37da2899SCharles.Forsyth MOVW R4, R11 41 *37da2899SCharles.Forsyth 42 *37da2899SCharles.Forsyth_f32loop: 43 *37da2899SCharles.Forsyth CMP R(TMP), R(TO) 44 *37da2899SCharles.Forsyth BHS _4tail 45 *37da2899SCharles.Forsyth 46 *37da2899SCharles.Forsyth MOVM.IA.W [R4-R11], (R(TO)) 47 *37da2899SCharles.Forsyth B _f32loop 48 *37da2899SCharles.Forsyth 49 *37da2899SCharles.Forsyth_4tail: 50 *37da2899SCharles.Forsyth SUB $3, R(TOE), R(TMP) /* do remaining words if possible */ 51 *37da2899SCharles.Forsyth_4loop: 52 *37da2899SCharles.Forsyth CMP R(TMP), R(TO) 53 *37da2899SCharles.Forsyth BHS _1tail 54 *37da2899SCharles.Forsyth 55 *37da2899SCharles.Forsyth MOVW.P R(4), 4(R(TO)) /* implicit write back */ 56 *37da2899SCharles.Forsyth B _4loop 57 *37da2899SCharles.Forsyth 58 *37da2899SCharles.Forsyth_1tail: 59 *37da2899SCharles.Forsyth CMP R(TO), R(TOE) 60 *37da2899SCharles.Forsyth BEQ _return 61 *37da2899SCharles.Forsyth 62 *37da2899SCharles.Forsyth MOVBU.P R(4), 1(R(TO)) /* implicit write back */ 63 *37da2899SCharles.Forsyth B _1tail 64 *37da2899SCharles.Forsyth 65 *37da2899SCharles.Forsyth_return: 66 *37da2899SCharles.Forsyth RET 67