1b6cbf720SGianluca Guida/*- 2b6cbf720SGianluca Guida * Copyright (c) 1990 The Regents of the University of California. 3b6cbf720SGianluca Guida * All rights reserved. 4b6cbf720SGianluca Guida * 5b6cbf720SGianluca Guida * This code is derived from locore.s. 6b6cbf720SGianluca Guida * 7b6cbf720SGianluca Guida * Redistribution and use in source and binary forms, with or without 8b6cbf720SGianluca Guida * modification, are permitted provided that the following conditions 9b6cbf720SGianluca Guida * are met: 10b6cbf720SGianluca Guida * 1. Redistributions of source code must retain the above copyright 11b6cbf720SGianluca Guida * notice, this list of conditions and the following disclaimer. 12b6cbf720SGianluca Guida * 2. Redistributions in binary form must reproduce the above copyright 13b6cbf720SGianluca Guida * notice, this list of conditions and the following disclaimer in the 14b6cbf720SGianluca Guida * documentation and/or other materials provided with the distribution. 15b6cbf720SGianluca Guida * 3. Neither the name of the University nor the names of its contributors 16b6cbf720SGianluca Guida * may be used to endorse or promote products derived from this software 17b6cbf720SGianluca Guida * without specific prior written permission. 18b6cbf720SGianluca Guida * 19b6cbf720SGianluca Guida * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20b6cbf720SGianluca Guida * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21b6cbf720SGianluca Guida * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22b6cbf720SGianluca Guida * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23b6cbf720SGianluca Guida * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24b6cbf720SGianluca Guida * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25b6cbf720SGianluca Guida * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26b6cbf720SGianluca Guida * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27b6cbf720SGianluca Guida * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28b6cbf720SGianluca Guida * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29b6cbf720SGianluca Guida * SUCH DAMAGE. 30b6cbf720SGianluca Guida */ 31b6cbf720SGianluca Guida 32b6cbf720SGianluca Guida#include <machine/asm.h> 33b6cbf720SGianluca Guida 34b6cbf720SGianluca Guida#if defined(LIBC_SCCS) 35*0a6a1f1dSLionel Sambuc RCSID("$NetBSD: bcopy.S,v 1.5 2014/03/22 19:16:34 jakllsch Exp $") 36b6cbf720SGianluca Guida#endif 37b6cbf720SGianluca Guida 38b6cbf720SGianluca Guida /* 39b6cbf720SGianluca Guida * (ov)bcopy (src,dst,cnt) 40b6cbf720SGianluca Guida * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 41b6cbf720SGianluca Guida * 42b6cbf720SGianluca Guida * Hacked about by dsl@netbsd.org 43b6cbf720SGianluca Guida */ 44b6cbf720SGianluca Guida 45b6cbf720SGianluca Guida#ifdef MEMCOPY 46b6cbf720SGianluca GuidaENTRY(memcpy) 47b6cbf720SGianluca Guida#define NO_OVERLAP 48b6cbf720SGianluca Guida#else 49b6cbf720SGianluca Guida#ifdef MEMMOVE 50b6cbf720SGianluca GuidaENTRY(memmove) 51b6cbf720SGianluca Guida#else 52b6cbf720SGianluca GuidaENTRY(bcopy) 53b6cbf720SGianluca Guida#endif 54b6cbf720SGianluca Guida#endif 55b6cbf720SGianluca Guida movq %rdx,%rcx 56b6cbf720SGianluca Guida#if defined(MEMCOPY) || defined(MEMMOVE) 57b6cbf720SGianluca Guida movq %rdi,%rax /* must return destination address */ 58b6cbf720SGianluca Guida mov %rdi,%r11 /* for misaligned check */ 59b6cbf720SGianluca Guida#else 60b6cbf720SGianluca Guida mov %rsi,%r11 /* for misaligned check */ 61b6cbf720SGianluca Guida xchgq %rdi,%rsi /* bcopy() has arg order reversed */ 62b6cbf720SGianluca Guida#endif 63b6cbf720SGianluca Guida 64b6cbf720SGianluca Guida#if !defined(NO_OVERLAP) 65b6cbf720SGianluca Guida movq %rdi,%r8 66b6cbf720SGianluca Guida subq %rsi,%r8 67b6cbf720SGianluca Guida#endif 68b6cbf720SGianluca Guida 69b6cbf720SGianluca Guida shrq $3,%rcx /* count for copy by words */ 70b6cbf720SGianluca Guida jz 8f /* j if less than 8 bytes */ 71b6cbf720SGianluca Guida 72b6cbf720SGianluca Guida lea -8(%rdi,%rdx),%r9 /* target address of last 8 */ 73b6cbf720SGianluca Guida mov -8(%rsi,%rdx),%r10 /* get last word */ 74b6cbf720SGianluca Guida#if !defined(NO_OVERLAP) 75b6cbf720SGianluca Guida cmpq %rdx,%r8 /* overlapping? */ 76b6cbf720SGianluca Guida jb 10f 77b6cbf720SGianluca Guida#endif 78b6cbf720SGianluca Guida 79b6cbf720SGianluca Guida/* 80b6cbf720SGianluca Guida * Non-overlaping, copy forwards. 81b6cbf720SGianluca Guida * Newer Intel cpus (Nehalem) will do 16byte read/write transfers 82b6cbf720SGianluca Guida * if %ecx is more than 76. 83b6cbf720SGianluca Guida * AMD might do something similar some day. 84b6cbf720SGianluca Guida */ 85b6cbf720SGianluca Guida and $7,%r11 /* destination misaligned ? */ 86b6cbf720SGianluca Guida jnz 2f 87b6cbf720SGianluca Guida rep 88b6cbf720SGianluca Guida movsq 89b6cbf720SGianluca Guida mov %r10,(%r9) /* write last word */ 90b6cbf720SGianluca Guida ret 91b6cbf720SGianluca Guida 92b6cbf720SGianluca Guida/* 93b6cbf720SGianluca Guida * Destination misaligned 94b6cbf720SGianluca Guida * AMD say it is better to align the destination (not the source). 95b6cbf720SGianluca Guida * This will also re-align copies if the source and dest are both 96b6cbf720SGianluca Guida * misaligned by the same amount) 97b6cbf720SGianluca Guida * (I think Nehalem will use its accelerated copy if the source 98b6cbf720SGianluca Guida * and destination have the same alignment.) 99b6cbf720SGianluca Guida */ 100b6cbf720SGianluca Guida2: 101b6cbf720SGianluca Guida lea -9(%r11,%rdx),%rcx /* post re-alignment count */ 102b6cbf720SGianluca Guida neg %r11 /* now -1 .. -7 */ 103b6cbf720SGianluca Guida mov (%rsi),%rdx /* get first word */ 104b6cbf720SGianluca Guida mov %rdi,%r8 /* target for first word */ 105b6cbf720SGianluca Guida lea 8(%rsi,%r11),%rsi 106b6cbf720SGianluca Guida lea 8(%rdi,%r11),%rdi 107b6cbf720SGianluca Guida shr $3,%rcx 108b6cbf720SGianluca Guida rep 109b6cbf720SGianluca Guida movsq 110b6cbf720SGianluca Guida mov %rdx,(%r8) /* write first word */ 111b6cbf720SGianluca Guida mov %r10,(%r9) /* write last word */ 112b6cbf720SGianluca Guida ret 113b6cbf720SGianluca Guida 114b6cbf720SGianluca Guida#if !defined(NO_OVERLAP) 115b6cbf720SGianluca Guida/* Must copy backwards. 116b6cbf720SGianluca Guida * Reverse copy is probably easy to code faster than 'rep movds' 117b6cbf720SGianluca Guida * since that requires (IIRC) an extra clock every 3 iterations (AMD). 118b6cbf720SGianluca Guida * However I don't suppose anything cares that much! 119b6cbf720SGianluca Guida * The big cost is the std/cld pair - reputedly 50+ cycles on Netburst P4. 120b6cbf720SGianluca Guida * The copy is aligned with the buffer start (more likely to 121b6cbf720SGianluca Guida * be a multiple of 8 than the end). 122b6cbf720SGianluca Guida */ 123b6cbf720SGianluca Guida10: 124b6cbf720SGianluca Guida lea -8(%rsi,%rcx,8),%rsi 125b6cbf720SGianluca Guida lea -8(%rdi,%rcx,8),%rdi 126b6cbf720SGianluca Guida std 127b6cbf720SGianluca Guida rep 128b6cbf720SGianluca Guida movsq 129b6cbf720SGianluca Guida cld 130b6cbf720SGianluca Guida mov %r10,(%r9) /* write last bytes */ 131b6cbf720SGianluca Guida ret 132b6cbf720SGianluca Guida#endif 133b6cbf720SGianluca Guida 134b6cbf720SGianluca Guida/* Less than 8 bytes to copy, copy by bytes */ 135b6cbf720SGianluca Guida/* Intel Nehalem optimise 'rep movsb' for <= 7 bytes (9-15 clocks). 136b6cbf720SGianluca Guida * For longer transfers it is 50+ ! 137b6cbf720SGianluca Guida */ 138b6cbf720SGianluca Guida8: mov %rdx,%rcx 139b6cbf720SGianluca Guida 140b6cbf720SGianluca Guida#if !defined(NO_OVERLAP) 141b6cbf720SGianluca Guida cmpq %rdx,%r8 /* overlapping? */ 142b6cbf720SGianluca Guida jb 81f 143b6cbf720SGianluca Guida#endif 144b6cbf720SGianluca Guida 145b6cbf720SGianluca Guida /* nope, copy forwards. */ 146b6cbf720SGianluca Guida rep 147b6cbf720SGianluca Guida movsb 148b6cbf720SGianluca Guida ret 149b6cbf720SGianluca Guida 150b6cbf720SGianluca Guida#if !defined(NO_OVERLAP) 151b6cbf720SGianluca Guida/* Must copy backwards */ 152b6cbf720SGianluca Guida81: 153b6cbf720SGianluca Guida lea -1(%rsi,%rcx),%rsi 154b6cbf720SGianluca Guida lea -1(%rdi,%rcx),%rdi 155b6cbf720SGianluca Guida std 156b6cbf720SGianluca Guida rep 157b6cbf720SGianluca Guida movsb 158b6cbf720SGianluca Guida cld 159b6cbf720SGianluca Guida ret 160b6cbf720SGianluca Guida#endif 161*0a6a1f1dSLionel Sambuc 162*0a6a1f1dSLionel Sambuc#ifdef MEMCOPY 163*0a6a1f1dSLionel SambucEND(memcpy) 164*0a6a1f1dSLionel Sambuc#else 165*0a6a1f1dSLionel Sambuc#ifdef MEMMOVE 166*0a6a1f1dSLionel SambucEND(memmove) 167*0a6a1f1dSLionel Sambuc#else 168*0a6a1f1dSLionel SambucEND(bcopy) 169*0a6a1f1dSLionel Sambuc#endif 170*0a6a1f1dSLionel Sambuc#endif 171