1*9b9d2a55Sguenther/* $OpenBSD: memset.S,v 1.2 2015/08/31 02:53:57 guenther Exp $ */ 2cf252584Smiod/* $NetBSD: memset.S,v 1.1 2005/12/20 19:28:50 christos Exp $ */ 3cf252584Smiod 4cf252584Smiod/*- 5cf252584Smiod * Copyright (c) 2002 SHIMIZU Ryo. All rights reserved. 6cf252584Smiod * 7cf252584Smiod * Redistribution and use in source and binary forms, with or without 8cf252584Smiod * modification, are permitted provided that the following conditions 9cf252584Smiod * are met: 10cf252584Smiod * 1. Redistributions of source code must retain the above copyright 11cf252584Smiod * notice, this list of conditions and the following disclaimer. 12cf252584Smiod * 2. Redistributions in binary form must reproduce the above copyright 13cf252584Smiod * notice, this list of conditions and the following disclaimer in the 14cf252584Smiod * documentation and/or other materials provided with the distribution. 15cf252584Smiod * 3. The name of the author may not be used to endorse or promote products 16cf252584Smiod * derived from this software without specific prior written permission. 17cf252584Smiod * 18cf252584Smiod * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19cf252584Smiod * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20cf252584Smiod * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21cf252584Smiod * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22cf252584Smiod * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23cf252584Smiod * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24cf252584Smiod * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25cf252584Smiod * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26cf252584Smiod * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27cf252584Smiod * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28cf252584Smiod */ 29cf252584Smiod 30*9b9d2a55Sguenther#include "SYS.h" 31cf252584Smiod 32cf252584Smiod#define REG_PTR r0 33cf252584Smiod#define REG_TMP1 r1 34cf252584Smiod 35cf252584Smiod#ifdef BZERO 36cf252584Smiod# define REG_C r2 37cf252584Smiod# define REG_DST r4 38cf252584Smiod# define REG_LEN r5 39cf252584Smiod#else 40cf252584Smiod# define REG_DST0 r3 41cf252584Smiod# define REG_DST r4 42cf252584Smiod# define REG_C r5 43cf252584Smiod# define REG_LEN r6 44cf252584Smiod#endif 45cf252584Smiod 46cf252584Smiod#ifdef BZERO 47cf252584SmiodENTRY(bzero) 48cf252584Smiod#else 49cf252584SmiodENTRY(memset) 50cf252584Smiod mov REG_DST,REG_DST0 /* for return value */ 51cf252584Smiod#endif 52cf252584Smiod /* small amount to fill ? */ 53cf252584Smiod mov #28,REG_TMP1 54cf252584Smiod cmp/hs REG_TMP1,REG_LEN /* if (len >= 28) goto large; */ 55cf252584Smiod bt/s large 56cf252584Smiod mov #12,REG_TMP1 /* if (len >= 12) goto small; */ 57cf252584Smiod cmp/hs REG_TMP1,REG_LEN 58cf252584Smiod bt/s small 59cf252584Smiod#ifdef BZERO 60cf252584Smiod mov #0,REG_C 61cf252584Smiod#endif 62cf252584Smiod /* very little fill (0 ~ 11 bytes) */ 63cf252584Smiod tst REG_LEN,REG_LEN 64cf252584Smiod add REG_DST,REG_LEN 65cf252584Smiod bt/s done 66cf252584Smiod add #1,REG_DST 67cf252584Smiod 68cf252584Smiod /* unroll 4 loops */ 69cf252584Smiod cmp/eq REG_DST,REG_LEN 70cf252584Smiod1: mov.b REG_C,@-REG_LEN 71cf252584Smiod bt/s done 72cf252584Smiod cmp/eq REG_DST,REG_LEN 73cf252584Smiod mov.b REG_C,@-REG_LEN 74cf252584Smiod bt/s done 75cf252584Smiod cmp/eq REG_DST,REG_LEN 76cf252584Smiod mov.b REG_C,@-REG_LEN 77cf252584Smiod bt/s done 78cf252584Smiod cmp/eq REG_DST,REG_LEN 79cf252584Smiod mov.b REG_C,@-REG_LEN 80cf252584Smiod bf/s 1b 81cf252584Smiod cmp/eq REG_DST,REG_LEN 82cf252584Smioddone: 83cf252584Smiod#ifdef BZERO 84cf252584Smiod rts 85cf252584Smiod nop 86cf252584Smiod#else 87cf252584Smiod rts 88cf252584Smiod mov REG_DST0,r0 89cf252584Smiod#endif 90cf252584Smiod 91cf252584Smiod 92cf252584Smiodsmall: 93cf252584Smiod mov REG_DST,r0 94cf252584Smiod tst #1,r0 95cf252584Smiod bt/s small_aligned 96cf252584Smiod mov REG_DST,REG_TMP1 97cf252584Smiod shll REG_LEN 98cf252584Smiod mova 1f,r0 /* 1f must be 4bytes aligned! */ 99cf252584Smiod add #16,REG_TMP1 /* REG_TMP1 = dst+16; */ 100cf252584Smiod sub REG_LEN,r0 101cf252584Smiod jmp @r0 102cf252584Smiod mov REG_C,r0 103cf252584Smiod 104cf252584Smiod .align 2 105cf252584Smiod mov.b r0,@(15,REG_TMP1) 106cf252584Smiod mov.b r0,@(14,REG_TMP1) 107cf252584Smiod mov.b r0,@(13,REG_TMP1) 108cf252584Smiod mov.b r0,@(12,REG_TMP1) 109cf252584Smiod mov.b r0,@(11,REG_TMP1) 110cf252584Smiod mov.b r0,@(10,REG_TMP1) 111cf252584Smiod mov.b r0,@(9,REG_TMP1) 112cf252584Smiod mov.b r0,@(8,REG_TMP1) 113cf252584Smiod mov.b r0,@(7,REG_TMP1) 114cf252584Smiod mov.b r0,@(6,REG_TMP1) 115cf252584Smiod mov.b r0,@(5,REG_TMP1) 116cf252584Smiod mov.b r0,@(4,REG_TMP1) 117cf252584Smiod mov.b r0,@(3,REG_TMP1) 118cf252584Smiod mov.b r0,@(2,REG_TMP1) 119cf252584Smiod mov.b r0,@(1,REG_TMP1) 120cf252584Smiod mov.b r0,@REG_TMP1 121cf252584Smiod mov.b r0,@(15,REG_DST) 122cf252584Smiod mov.b r0,@(14,REG_DST) 123cf252584Smiod mov.b r0,@(13,REG_DST) 124cf252584Smiod mov.b r0,@(12,REG_DST) 125cf252584Smiod mov.b r0,@(11,REG_DST) 126cf252584Smiod mov.b r0,@(10,REG_DST) 127cf252584Smiod mov.b r0,@(9,REG_DST) 128cf252584Smiod mov.b r0,@(8,REG_DST) 129cf252584Smiod mov.b r0,@(7,REG_DST) 130cf252584Smiod mov.b r0,@(6,REG_DST) 131cf252584Smiod mov.b r0,@(5,REG_DST) 132cf252584Smiod mov.b r0,@(4,REG_DST) 133cf252584Smiod mov.b r0,@(3,REG_DST) 134cf252584Smiod mov.b r0,@(2,REG_DST) 135cf252584Smiod mov.b r0,@(1,REG_DST) 136cf252584Smiod#ifdef BZERO 137cf252584Smiod rts 138cf252584Smiod1: mov.b r0,@REG_DST 139cf252584Smiod#else 140cf252584Smiod mov.b r0,@REG_DST 141cf252584Smiod1: rts 142cf252584Smiod mov REG_DST0,r0 143cf252584Smiod#endif 144cf252584Smiod 145cf252584Smiod 146cf252584Smiod/* 2 bytes aligned small fill */ 147cf252584Smiodsmall_aligned: 148cf252584Smiod#ifndef BZERO 149cf252584Smiod extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */ 150cf252584Smiod shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */ 151cf252584Smiod or REG_TMP1,REG_C /* REG_C = ????xxxx */ 152cf252584Smiod#endif 153cf252584Smiod 154cf252584Smiod mov REG_LEN,r0 155cf252584Smiod tst #1,r0 /* len is aligned? */ 156cf252584Smiod bt/s 1f 157cf252584Smiod add #-1,r0 158cf252584Smiod mov.b REG_C,@(r0,REG_DST) /* fill last a byte */ 159cf252584Smiod mov r0,REG_LEN 160cf252584Smiod1: 161cf252584Smiod 162cf252584Smiod mova 1f,r0 /* 1f must be 4bytes aligned! */ 163cf252584Smiod sub REG_LEN,r0 164cf252584Smiod jmp @r0 165cf252584Smiod mov REG_C,r0 166cf252584Smiod 167cf252584Smiod .align 2 168cf252584Smiod mov.w r0,@(30,REG_DST) 169cf252584Smiod mov.w r0,@(28,REG_DST) 170cf252584Smiod mov.w r0,@(26,REG_DST) 171cf252584Smiod mov.w r0,@(24,REG_DST) 172cf252584Smiod mov.w r0,@(22,REG_DST) 173cf252584Smiod mov.w r0,@(20,REG_DST) 174cf252584Smiod mov.w r0,@(18,REG_DST) 175cf252584Smiod mov.w r0,@(16,REG_DST) 176cf252584Smiod mov.w r0,@(14,REG_DST) 177cf252584Smiod mov.w r0,@(12,REG_DST) 178cf252584Smiod mov.w r0,@(10,REG_DST) 179cf252584Smiod mov.w r0,@(8,REG_DST) 180cf252584Smiod mov.w r0,@(6,REG_DST) 181cf252584Smiod mov.w r0,@(4,REG_DST) 182cf252584Smiod mov.w r0,@(2,REG_DST) 183cf252584Smiod#ifdef BZERO 184cf252584Smiod rts 185cf252584Smiod1: mov.w r0,@REG_DST 186cf252584Smiod#else 187cf252584Smiod mov.w r0,@REG_DST 188cf252584Smiod1: rts 189cf252584Smiod mov REG_DST0,r0 190cf252584Smiod#endif 191cf252584Smiod 192cf252584Smiod 193cf252584Smiod 194cf252584Smiod .align 2 195cf252584Smiodlarge: 196cf252584Smiod#ifdef BZERO 197cf252584Smiod mov #0,REG_C 198cf252584Smiod#else 199cf252584Smiod extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */ 200cf252584Smiod shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */ 201cf252584Smiod or REG_C,REG_TMP1 /* REG_C = ????xx00, REG_TMP1 = ????xxxx */ 202cf252584Smiod swap.w REG_TMP1,REG_C /* REG_C = xxxx????, REG_TMP1 = ????xxxx */ 203cf252584Smiod xtrct REG_TMP1,REG_C /* REG_C = xxxxxxxx */ 204cf252584Smiod#endif 205cf252584Smiod 206cf252584Smiod mov #3,REG_TMP1 207cf252584Smiod tst REG_TMP1,REG_DST 208cf252584Smiod mov REG_DST,REG_PTR 209cf252584Smiod bf/s unaligned_dst 210cf252584Smiod add REG_LEN,REG_PTR /* REG_PTR = dst + len; */ 211cf252584Smiod tst REG_TMP1,REG_LEN 212cf252584Smiod bf/s unaligned_len 213cf252584Smiod 214cf252584Smiodaligned: 215cf252584Smiod /* fill 32*n bytes */ 216cf252584Smiod mov #32,REG_TMP1 217cf252584Smiod cmp/hi REG_LEN,REG_TMP1 218cf252584Smiod bt 9f 219cf252584Smiod .align 2 220cf252584Smiod1: sub REG_TMP1,REG_PTR 221cf252584Smiod mov.l REG_C,@REG_PTR 222cf252584Smiod sub REG_TMP1,REG_LEN 223cf252584Smiod mov.l REG_C,@(4,REG_PTR) 224cf252584Smiod cmp/hi REG_LEN,REG_TMP1 225cf252584Smiod mov.l REG_C,@(8,REG_PTR) 226cf252584Smiod mov.l REG_C,@(12,REG_PTR) 227cf252584Smiod mov.l REG_C,@(16,REG_PTR) 228cf252584Smiod mov.l REG_C,@(20,REG_PTR) 229cf252584Smiod mov.l REG_C,@(24,REG_PTR) 230cf252584Smiod bf/s 1b 231cf252584Smiod mov.l REG_C,@(28,REG_PTR) 232cf252584Smiod9: 233cf252584Smiod 234cf252584Smiod /* fill left 4*n bytes */ 235cf252584Smiod cmp/eq REG_DST,REG_PTR 236cf252584Smiod bt 9f 237cf252584Smiod add #4,REG_DST 238cf252584Smiod cmp/eq REG_DST,REG_PTR 239cf252584Smiod1: mov.l REG_C,@-REG_PTR 240cf252584Smiod bt/s 9f 241cf252584Smiod cmp/eq REG_DST,REG_PTR 242cf252584Smiod mov.l REG_C,@-REG_PTR 243cf252584Smiod bt/s 9f 244cf252584Smiod cmp/eq REG_DST,REG_PTR 245cf252584Smiod mov.l REG_C,@-REG_PTR 246cf252584Smiod bt/s 9f 247cf252584Smiod cmp/eq REG_DST,REG_PTR 248cf252584Smiod mov.l REG_C,@-REG_PTR 249cf252584Smiod bf/s 1b 250cf252584Smiod cmp/eq REG_DST,REG_PTR 251cf252584Smiod9: 252cf252584Smiod#ifdef BZERO 253cf252584Smiod rts 254cf252584Smiod nop 255cf252584Smiod#else 256cf252584Smiod rts 257cf252584Smiod mov REG_DST0,r0 258cf252584Smiod#endif 259cf252584Smiod 260cf252584Smiod 261cf252584Smiodunaligned_dst: 262cf252584Smiod mov #1,REG_TMP1 263cf252584Smiod tst REG_TMP1,REG_DST /* if (dst & 1) { */ 264cf252584Smiod add #1,REG_TMP1 265cf252584Smiod bt/s 2f 266cf252584Smiod tst REG_TMP1,REG_DST 267cf252584Smiod mov.b REG_C,@REG_DST /* *dst++ = c; */ 268cf252584Smiod add #1,REG_DST 269cf252584Smiod tst REG_TMP1,REG_DST 270cf252584Smiod2: /* } */ 271cf252584Smiod /* if (dst & 2) { */ 272cf252584Smiod bt 4f 273cf252584Smiod mov.w REG_C,@REG_DST /* *(u_int16_t*)dst++ = c; */ 274cf252584Smiod add #2,REG_DST 275cf252584Smiod4: /* } */ 276cf252584Smiod 277cf252584Smiod 278cf252584Smiod tst #3,REG_PTR /* if (ptr & 3) { */ 279cf252584Smiod bt/s 4f /* */ 280cf252584Smiodunaligned_len: 281cf252584Smiod tst #1,REG_PTR /* if (ptr & 1) { */ 282cf252584Smiod bt/s 2f 283cf252584Smiod tst #2,REG_PTR 284cf252584Smiod mov.b REG_C,@-REG_PTR /* --ptr = c; */ 285cf252584Smiod2: /* } */ 286cf252584Smiod /* if (ptr & 2) { */ 287cf252584Smiod bt 4f 288cf252584Smiod mov.w REG_C,@-REG_PTR /* *--(u_int16_t*)ptr = c; */ 289cf252584Smiod4: /* } */ 290cf252584Smiod /* } */ 291cf252584Smiod 292cf252584Smiod mov REG_PTR,REG_LEN 293cf252584Smiod bra aligned 294cf252584Smiod sub REG_DST,REG_LEN 295cf252584Smiod 296*9b9d2a55Sguenther#ifdef BZERO 297*9b9d2a55SguentherEND_WEAK(bzero) 298*9b9d2a55Sguenther#else 299*9b9d2a55SguentherEND_STRONG(memset) 300*9b9d2a55Sguenther#endif 301