1/* $NetBSD: memset.S,v 1.1 2005/12/20 19:28:50 christos Exp $ */ 2 3/*- 4 * Copyright (c) 2002 SHIMIZU Ryo. All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include <machine/asm.h> 30 31#define REG_PTR r0 32#define REG_TMP1 r1 33 34#ifdef BZERO 35# define REG_C r2 36# define REG_DST r4 37# define REG_LEN r5 38#else 39# define REG_DST0 r3 40# define REG_DST r4 41# define REG_C r5 42# define REG_LEN r6 43#endif 44 45#ifdef BZERO 46ENTRY(bzero) 47#else 48ENTRY(memset) 49 mov REG_DST,REG_DST0 /* for return value */ 50#endif 51 /* small amount to fill ? */ 52 mov #28,REG_TMP1 53 cmp/hs REG_TMP1,REG_LEN /* if (len >= 28) goto large; */ 54 bt/s large 55 mov #12,REG_TMP1 /* if (len >= 12) goto small; */ 56 cmp/hs REG_TMP1,REG_LEN 57 bt/s small 58#ifdef BZERO 59 mov #0,REG_C 60#endif 61 /* very little fill (0 ~ 11 bytes) */ 62 tst REG_LEN,REG_LEN 63 add REG_DST,REG_LEN 64 bt/s done 65 add #1,REG_DST 66 67 /* unroll 4 loops */ 68 cmp/eq REG_DST,REG_LEN 691: mov.b REG_C,@-REG_LEN 70 bt/s done 71 cmp/eq REG_DST,REG_LEN 72 mov.b REG_C,@-REG_LEN 73 bt/s done 74 cmp/eq REG_DST,REG_LEN 75 mov.b REG_C,@-REG_LEN 76 bt/s done 77 cmp/eq REG_DST,REG_LEN 78 mov.b REG_C,@-REG_LEN 79 bf/s 1b 80 cmp/eq REG_DST,REG_LEN 81done: 82#ifdef BZERO 83 rts 84 nop 85#else 86 rts 87 mov REG_DST0,r0 88#endif 89 90 91small: 92 mov REG_DST,r0 93 tst #1,r0 94 bt/s small_aligned 95 mov REG_DST,REG_TMP1 96 shll REG_LEN 97 mova 1f,r0 /* 1f must be 4bytes aligned! */ 98 add #16,REG_TMP1 /* REG_TMP1 = dst+16; */ 99 sub REG_LEN,r0 100 jmp @r0 101 mov REG_C,r0 102 103 .align 2 104 mov.b r0,@(15,REG_TMP1) 105 mov.b r0,@(14,REG_TMP1) 106 mov.b r0,@(13,REG_TMP1) 107 mov.b r0,@(12,REG_TMP1) 108 mov.b r0,@(11,REG_TMP1) 109 mov.b r0,@(10,REG_TMP1) 110 mov.b r0,@(9,REG_TMP1) 111 mov.b r0,@(8,REG_TMP1) 112 mov.b r0,@(7,REG_TMP1) 113 mov.b r0,@(6,REG_TMP1) 114 mov.b r0,@(5,REG_TMP1) 115 mov.b r0,@(4,REG_TMP1) 116 mov.b r0,@(3,REG_TMP1) 117 mov.b r0,@(2,REG_TMP1) 118 mov.b r0,@(1,REG_TMP1) 119 mov.b r0,@REG_TMP1 120 mov.b r0,@(15,REG_DST) 121 mov.b r0,@(14,REG_DST) 122 mov.b r0,@(13,REG_DST) 123 mov.b r0,@(12,REG_DST) 124 mov.b r0,@(11,REG_DST) 125 mov.b r0,@(10,REG_DST) 126 mov.b r0,@(9,REG_DST) 127 mov.b r0,@(8,REG_DST) 128 mov.b r0,@(7,REG_DST) 129 mov.b r0,@(6,REG_DST) 130 mov.b r0,@(5,REG_DST) 131 mov.b r0,@(4,REG_DST) 132 mov.b r0,@(3,REG_DST) 133 mov.b r0,@(2,REG_DST) 134 mov.b r0,@(1,REG_DST) 135#ifdef BZERO 136 rts 1371: mov.b r0,@REG_DST 138#else 139 mov.b r0,@REG_DST 1401: rts 141 mov REG_DST0,r0 142#endif 143 144 145/* 2 bytes aligned small fill */ 146small_aligned: 147#ifndef BZERO 148 extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */ 149 shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */ 150 or REG_TMP1,REG_C /* REG_C = ????xxxx */ 151#endif 152 153 mov REG_LEN,r0 154 tst #1,r0 /* len is aligned? */ 155 bt/s 1f 156 add #-1,r0 157 mov.b REG_C,@(r0,REG_DST) /* fill last a byte */ 158 mov r0,REG_LEN 1591: 160 161 mova 1f,r0 /* 1f must be 4bytes aligned! */ 162 sub REG_LEN,r0 163 jmp @r0 164 mov REG_C,r0 165 166 .align 2 167 mov.w r0,@(30,REG_DST) 168 mov.w r0,@(28,REG_DST) 169 mov.w r0,@(26,REG_DST) 170 mov.w r0,@(24,REG_DST) 171 mov.w r0,@(22,REG_DST) 172 mov.w r0,@(20,REG_DST) 173 mov.w r0,@(18,REG_DST) 174 mov.w r0,@(16,REG_DST) 175 mov.w r0,@(14,REG_DST) 176 mov.w r0,@(12,REG_DST) 177 mov.w r0,@(10,REG_DST) 178 mov.w r0,@(8,REG_DST) 179 mov.w r0,@(6,REG_DST) 180 mov.w r0,@(4,REG_DST) 181 mov.w r0,@(2,REG_DST) 182#ifdef BZERO 183 rts 1841: mov.w r0,@REG_DST 185#else 186 mov.w r0,@REG_DST 1871: rts 188 mov REG_DST0,r0 189#endif 190 191 192 193 .align 2 194large: 195#ifdef BZERO 196 mov #0,REG_C 197#else 198 extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */ 199 shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */ 200 or REG_C,REG_TMP1 /* REG_C = ????xx00, REG_TMP1 = ????xxxx */ 201 swap.w REG_TMP1,REG_C /* REG_C = xxxx????, REG_TMP1 = ????xxxx */ 202 xtrct REG_TMP1,REG_C /* REG_C = xxxxxxxx */ 203#endif 204 205 mov #3,REG_TMP1 206 tst REG_TMP1,REG_DST 207 mov REG_DST,REG_PTR 208 bf/s unaligned_dst 209 add REG_LEN,REG_PTR /* REG_PTR = dst + len; */ 210 tst REG_TMP1,REG_LEN 211 bf/s unaligned_len 212 213aligned: 214 /* fill 32*n bytes */ 215 mov #32,REG_TMP1 216 cmp/hi REG_LEN,REG_TMP1 217 bt 9f 218 .align 2 2191: sub REG_TMP1,REG_PTR 220 mov.l REG_C,@REG_PTR 221 sub REG_TMP1,REG_LEN 222 mov.l REG_C,@(4,REG_PTR) 223 cmp/hi REG_LEN,REG_TMP1 224 mov.l REG_C,@(8,REG_PTR) 225 mov.l REG_C,@(12,REG_PTR) 226 mov.l REG_C,@(16,REG_PTR) 227 mov.l REG_C,@(20,REG_PTR) 228 mov.l REG_C,@(24,REG_PTR) 229 bf/s 1b 230 mov.l REG_C,@(28,REG_PTR) 2319: 232 233 /* fill left 4*n bytes */ 234 cmp/eq REG_DST,REG_PTR 235 bt 9f 236 add #4,REG_DST 237 cmp/eq REG_DST,REG_PTR 2381: mov.l REG_C,@-REG_PTR 239 bt/s 9f 240 cmp/eq REG_DST,REG_PTR 241 mov.l REG_C,@-REG_PTR 242 bt/s 9f 243 cmp/eq REG_DST,REG_PTR 244 mov.l REG_C,@-REG_PTR 245 bt/s 9f 246 cmp/eq REG_DST,REG_PTR 247 mov.l REG_C,@-REG_PTR 248 bf/s 1b 249 cmp/eq REG_DST,REG_PTR 2509: 251#ifdef BZERO 252 rts 253 nop 254#else 255 rts 256 mov REG_DST0,r0 257#endif 258 259 260unaligned_dst: 261 mov #1,REG_TMP1 262 tst REG_TMP1,REG_DST /* if (dst & 1) { */ 263 add #1,REG_TMP1 264 bt/s 2f 265 tst REG_TMP1,REG_DST 266 mov.b REG_C,@REG_DST /* *dst++ = c; */ 267 add #1,REG_DST 268 tst REG_TMP1,REG_DST 2692: /* } */ 270 /* if (dst & 2) { */ 271 bt 4f 272 mov.w REG_C,@REG_DST /* *(u_int16_t*)dst++ = c; */ 273 add #2,REG_DST 2744: /* } */ 275 276 277 tst #3,REG_PTR /* if (ptr & 3) { */ 278 bt/s 4f /* */ 279unaligned_len: 280 tst #1,REG_PTR /* if (ptr & 1) { */ 281 bt/s 2f 282 tst #2,REG_PTR 283 mov.b REG_C,@-REG_PTR /* --ptr = c; */ 2842: /* } */ 285 /* if (ptr & 2) { */ 286 bt 4f 287 mov.w REG_C,@-REG_PTR /* *--(u_int16_t*)ptr = c; */ 2884: /* } */ 289 /* } */ 290 291 mov REG_PTR,REG_LEN 292 bra aligned 293 sub REG_DST,REG_LEN 294 295