1*9137531aSmatt/* $NetBSD: memcmp.S,v 1.4 2013/07/04 20:57:59 matt Exp $ */ 22ba84f0fSgarbled 32ba84f0fSgarbled/* stropt/memcmp.S, pl_string_common, pl_linux 10/11/04 11:45:35 42ba84f0fSgarbled * ========================================================================== 52ba84f0fSgarbled * Optimized memcmp implementation for IBM PowerPC 405/440. 62ba84f0fSgarbled * 72ba84f0fSgarbled * Copyright (c) 2003, IBM Corporation 82ba84f0fSgarbled * All rights reserved. 92ba84f0fSgarbled * 102ba84f0fSgarbled * Redistribution and use in source and binary forms, with or 112ba84f0fSgarbled * without modification, are permitted provided that the following 122ba84f0fSgarbled * conditions are met: 132ba84f0fSgarbled * 142ba84f0fSgarbled * * Redistributions of source code must retain the above 152ba84f0fSgarbled * copyright notice, this list of conditions and the following 162ba84f0fSgarbled * disclaimer. 172ba84f0fSgarbled * * Redistributions in binary form must reproduce the above 182ba84f0fSgarbled * copyright notice, this list of conditions and the following 192ba84f0fSgarbled * disclaimer in the documentation and/or other materials 202ba84f0fSgarbled * provided with the distribution. 212ba84f0fSgarbled * * Neither the name of IBM nor the names of its contributors 222ba84f0fSgarbled * may be used to endorse or promote products derived from this 232ba84f0fSgarbled * software without specific prior written permission. 242ba84f0fSgarbled * 252ba84f0fSgarbled * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 262ba84f0fSgarbled * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 272ba84f0fSgarbled * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 282ba84f0fSgarbled * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 292ba84f0fSgarbled * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS 302ba84f0fSgarbled * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, 312ba84f0fSgarbled * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 322ba84f0fSgarbled * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 332ba84f0fSgarbled * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 342ba84f0fSgarbled * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 352ba84f0fSgarbled * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 362ba84f0fSgarbled * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 372ba84f0fSgarbled * 382ba84f0fSgarbled * ========================================================================== 392ba84f0fSgarbled * 402ba84f0fSgarbled * Function: Compare two character strings (up to n characters) 412ba84f0fSgarbled * 422ba84f0fSgarbled * int memcmp(const char *s1, const char *s2, int n) 432ba84f0fSgarbled * 442ba84f0fSgarbled * Input: r3 - buffer 1 address 452ba84f0fSgarbled * r4 - buffer 2 address 462ba84f0fSgarbled * r5 - maximum characters to compare 472ba84f0fSgarbled * Output: r3 <0 (less), 0 (equal), >0 (greater) 482ba84f0fSgarbled * 492ba84f0fSgarbled * ========================================================================== 502ba84f0fSgarbled */ 512ba84f0fSgarbled 522ba84f0fSgarbled#include <machine/asm.h> 532ba84f0fSgarbled 542ba84f0fSgarbled .text 552ba84f0fSgarbled .align 4 562ba84f0fSgarbled/* LINTSTUB: Func: void *memcmp(const void *, const void *, size_t) */ 572ba84f0fSgarbledENTRY(memcmp) 582ba84f0fSgarbled 592ba84f0fSgarbled /* 602ba84f0fSgarbled * Check count passed in R5. If zero, return 0; otherwise continue. 612ba84f0fSgarbled */ 622ba84f0fSgarbled cmpwi %r5,0 632ba84f0fSgarbled beq- ret_0; 642ba84f0fSgarbled 652ba84f0fSgarbled /* 662ba84f0fSgarbled * Most of the time the difference is found in the first 672ba84f0fSgarbled * several bytes. The following code minimizes the number 682ba84f0fSgarbled * of load operations for short compares. 692ba84f0fSgarbled */ 702ba84f0fSgarbled 712ba84f0fSgarbled mr %r11, %r3 /* Save buffer 1 */ 722ba84f0fSgarbled 732ba84f0fSgarbledagain: 742ba84f0fSgarbled 752ba84f0fSgarbled not %r10, %r4 /* buffer 2: bytes to page bdy */ 762ba84f0fSgarbled rlwinm. %r10, %r10,29,23,31 /* buffer 2: dwords to page bdy */ 772ba84f0fSgarbled beq- bytebybyte /* If < 8 bytes to the page bdy */ 782ba84f0fSgarbled /* do byte by byte */ 792ba84f0fSgarbled lwz %r8, 0(%r4) /* load 1st buffer 2 word */ 802ba84f0fSgarbled 812ba84f0fSgarbled not %r12, %r11 /* buffer 1: bytes to page bdy */ 822ba84f0fSgarbled rlwinm. %r12, %r12,29,23,31 /* buffer 1: dwords to page bdy */ 832ba84f0fSgarbled beq- bytebybyte /* If < 8 bytes to the page bdy */ 842ba84f0fSgarbled /* do byte by byte */ 852ba84f0fSgarbled lwz %r6, 0(%r11) /* load 1st buffer 1 word */ 862ba84f0fSgarbled 872ba84f0fSgarbled cmpwi %r5, 4 /* If remaining count <= 4 */ 882ba84f0fSgarbled ble+ first4 /* handle specially. DWG */ 892ba84f0fSgarbled 902ba84f0fSgarbled cmplw %r8, %r6 /* compare buffer 2 and buffer 1*/ 912ba84f0fSgarbled bne+ all_done /* different => we're done */ 922ba84f0fSgarbled 932ba84f0fSgarbled lwzu %r9, 4(%r4) /* load 2nd buffer 2 word */ 942ba84f0fSgarbled lwzu %r7, 4(%r11) /* load 2nd buffer 1 word */ 952ba84f0fSgarbled 962ba84f0fSgarbled cmpwi %r5, 8 /* If remaining count <= 8 */ 972ba84f0fSgarbled ble+ last4 /* handle specially. DWG */ 982ba84f0fSgarbled 992ba84f0fSgarbled cmplw %r9, %r7 /* compare buffer 2 and buffer 1*/ 1002ba84f0fSgarbled bne+ all_done /* different => we're done */ 1012ba84f0fSgarbled 1022ba84f0fSgarbled addi %r5, %r5, -8 /* Update character counter DWG */ 1032ba84f0fSgarbled addi %r10, %r4, 0x0004 /* DWG*/ 1042ba84f0fSgarbled not %r10, %r10 /* buffer 2: bytes to page bdy DWG */ 1052ba84f0fSgarbled rlwinm. %r10, %r10,29,23,31 /* buffer 2: dwords to page bdy DWG */ 1062ba84f0fSgarbled addi %r12, %r11, 0x0004 /* DWG */ 1072ba84f0fSgarbled not %r12, %r12 /* buffer 1: bytes to page bdy DWG */ 1082ba84f0fSgarbled rlwinm. %r12, %r12,29,23,31 /* buffer 1: dwords to page bdy DWG */ 1092ba84f0fSgarbled 1102ba84f0fSgarbled /* The following section prior to loop: figures out whether */ 1112ba84f0fSgarbled /* the buffer 1 or buffer 2 is closer to the page boundary. */ 1122ba84f0fSgarbled /* The main loop count is then set up to reflect the number of */ 1132ba84f0fSgarbled /* double words of the buffer that is closest */ 1142ba84f0fSgarbled 1152ba84f0fSgarbled cmpw %r10, %r12 /* Find closest */ 1162ba84f0fSgarbled blt lt 1172ba84f0fSgarbled 1182ba84f0fSgarbled mr %r10, %r12 1192ba84f0fSgarbled 1202ba84f0fSgarbledlt: 1212ba84f0fSgarbled 1222ba84f0fSgarbled srwi %r12, %r5, 3 /* Double check the total count */ 1232ba84f0fSgarbled cmpw %r10, %r12 /* limitation */ 1242ba84f0fSgarbled blt lt2 1252ba84f0fSgarbled 1262ba84f0fSgarbled mr %r10, %r12 /* DWG */ 1272ba84f0fSgarbledlt2: /* DWG */ 1282ba84f0fSgarbled cmpwi %r10, 0 /* DWG */ 1292ba84f0fSgarbled bne lt3 /* DWG */ 1302ba84f0fSgarbled addi %r4, %r4, 0x0004 /* DWG */ 1312ba84f0fSgarbled addi %r11,%r11,0x0004 /* DWG */ 1322ba84f0fSgarbled b again /* DWG */ 1332ba84f0fSgarbledlt3: /* DWG */ 1342ba84f0fSgarbled mtctr %r10 /* dword count for loop */ 1352ba84f0fSgarbled lwzu %r6, 4(%r11) /* pre-load buffer 1 word */ 1362ba84f0fSgarbled 1372ba84f0fSgarbled b in /* To the loop */ 1382ba84f0fSgarbled 1392ba84f0fSgarbledloop: /* main loop */ 1402ba84f0fSgarbled 1412ba84f0fSgarbled cmplw %r8, %r6 /* Compare first buffer 2 word */ 1422ba84f0fSgarbled bne- all_done /* with first buffer 1 word */ 1432ba84f0fSgarbled /* If different, we're done */ 1442ba84f0fSgarbled cmplw %r9, %r7 /* Compare second buffer 2 word */ 1452ba84f0fSgarbled /* with second buffer 1 word */ 1462ba84f0fSgarbled lwzu %r6, 4(%r11) /* pre-load buffer 1 word */ 1472ba84f0fSgarbled 1482ba84f0fSgarbled bne- all_done /* If different, we're done */ 1492ba84f0fSgarbled 1502ba84f0fSgarbledin: 1512ba84f0fSgarbled 1522ba84f0fSgarbled lwzu %r7, 4(%r11) /* pre-load buffer 1 word */ 1532ba84f0fSgarbled lwzu %r8, 4(%r4) /* pre-load buffer 2 word */ 1542ba84f0fSgarbled lwzu %r9, 4(%r4) /* pre-load buffer 2 word */ 1552ba84f0fSgarbled 1562ba84f0fSgarbled bdnz+ loop /* Do more DW's if cnt > 0 */ 1572ba84f0fSgarbled 1582ba84f0fSgarbled /*mfctr %r12*/ /*DWG*/ /* number of dwords left */ 1592ba84f0fSgarbled /*subf %r10, %r12, %r10*/ /*DWG*//* number of dwords compared */ 1602ba84f0fSgarbled slwi %r10, %r10, 3 1612ba84f0fSgarbled subf %r5, %r10, %r5 /* adjust byte counter */ 1622ba84f0fSgarbled /*bne+ partial*/ /*DWG*/ /* If less than 8 bytes, handle */ 1632ba84f0fSgarbled /* specially */ 1642ba84f0fSgarbled /*cmpwi %r5, 8*/ /* Removed. DWG */ 1652ba84f0fSgarbled /*blt partial*/ /* Removed. DWG */ 1662ba84f0fSgarbled 1672ba84f0fSgarbled /*addic %r5, %r5, -8*/ /*DWG*/ /* Subtract two words from count*/ 1682ba84f0fSgarbled 1692ba84f0fSgarbled cmplw %r8, %r6 /* compare last dword */ 1702ba84f0fSgarbled addi %r4, %r4, 4 1712ba84f0fSgarbled bne- all_done 1722ba84f0fSgarbled 1732ba84f0fSgarbled cmplw %r9, %r7 1742ba84f0fSgarbled addi %r11, %r11, 4 1752ba84f0fSgarbled bne- all_done 1762ba84f0fSgarbled 1772ba84f0fSgarbledbytebybyte: 1782ba84f0fSgarbled 1792ba84f0fSgarbled /* We've gotten close to a page boundary: do a byte-byte-byte 1802ba84f0fSgarbled * compare for the following 8 bytes, and then go back to 1812ba84f0fSgarbled * the full-word compare loop. 1822ba84f0fSgarbled */ 1832ba84f0fSgarbled 1842ba84f0fSgarbled li %r3, 8 /* loop count */ 1852ba84f0fSgarbled cmpw %r3, %r5 /* take min(8, counter) */ 1862ba84f0fSgarbled ble f2 1872ba84f0fSgarbled 1882ba84f0fSgarbled mr. %r3, %r5 1892ba84f0fSgarbled 1902ba84f0fSgarbled beqlr 1912ba84f0fSgarbled 1922ba84f0fSgarbledf2: 1932ba84f0fSgarbled 1942ba84f0fSgarbled mtctr %r3 1952ba84f0fSgarbled subf %r5, %r3, %r5 /* adjust counter */ 1962ba84f0fSgarbled 1972ba84f0fSgarbledbbb: 1982ba84f0fSgarbled 1992ba84f0fSgarbled lbz %r6, 0(%r11) /* byte copy loop */ 2002ba84f0fSgarbled 2012ba84f0fSgarbled addi %r11, %r11, 1 2022ba84f0fSgarbled 2032ba84f0fSgarbled lbz %r8, 0(%r4) 2042ba84f0fSgarbled 2052ba84f0fSgarbled addi %r4, %r4, 1 2062ba84f0fSgarbled 2072ba84f0fSgarbled cmplw %r8, %r6 2082ba84f0fSgarbled 209*9137531aSmatt bdnzt eq, bbb 2102ba84f0fSgarbled 2112ba84f0fSgarbled bne all_done 2122ba84f0fSgarbled 2132ba84f0fSgarbled cmpwi %r5, 0 2142ba84f0fSgarbled bgt again /* handle the rest */ 2152ba84f0fSgarbled 2162ba84f0fSgarbled xor %r3,%r3,%r3 2172ba84f0fSgarbled 2182ba84f0fSgarbled blr 2192ba84f0fSgarbled 2202ba84f0fSgarbled#if 0 /* Removed code section. DWG */ 2212ba84f0fSgarbledpartial: 2222ba84f0fSgarbled 2232ba84f0fSgarbled mr. %r3, %r5 2242ba84f0fSgarbled 2252ba84f0fSgarbled beqlr /* If count -> 0, we're done */ 2262ba84f0fSgarbled 2272ba84f0fSgarbledf1: 2282ba84f0fSgarbled 2292ba84f0fSgarbled subfic %r3, %r3, 4 /* zero/end in first word? */ 2302ba84f0fSgarbled cmpwi %r3, 0 2312ba84f0fSgarbled blt last4 2322ba84f0fSgarbled#endif /* DWG */ 2332ba84f0fSgarbled 2342ba84f0fSgarbledfirst4: 2352ba84f0fSgarbled subfic %r3, %r5, 4 /* If count <= 4, handle */ 2362ba84f0fSgarbled rlwinm %r3, %r3, 3, 0, 31 /* count *= 8 */ 2372ba84f0fSgarbled srw %r6, %r6, %r3 /* align 1st buffer 1 word */ 2382ba84f0fSgarbled srw %r8, %r8, %r3 /* align 1st buffer 2 word */ 2392ba84f0fSgarbled 2402ba84f0fSgarbled cmplw %r8, %r6 /* get result */ 2412ba84f0fSgarbled bne all_done 2422ba84f0fSgarbled xor %r3,%r3,%r3 2432ba84f0fSgarbled blr 2442ba84f0fSgarbled 2452ba84f0fSgarbledlast4: 2462ba84f0fSgarbled subfic %r10, %r5, 8 /*DWG*/ 2472ba84f0fSgarbled rlwinm %r10, %r10, 3, 0, 31 /* count *= 8 */ 2482ba84f0fSgarbled srw %r7, %r7, %r10 /* align 2nd buffer 1 word */ 2492ba84f0fSgarbled srw %r9, %r9, %r10 /* align 2nd buffer 2 word */ 2502ba84f0fSgarbled 2512ba84f0fSgarbled cmplw %r9, %r7 /* get result */ 2522ba84f0fSgarbled bne all_done 2532ba84f0fSgarbledret_0: 2542ba84f0fSgarbled xor %r3,%r3,%r3 /* Equal result */ 2552ba84f0fSgarbled blr 2562ba84f0fSgarbled 2572ba84f0fSgarbledall_done: 2582ba84f0fSgarbled 2592ba84f0fSgarbled blt finish_lt 2602ba84f0fSgarbled 2612ba84f0fSgarbled addi %r3,0,-1 /* Less than result */ 2622ba84f0fSgarbled 2632ba84f0fSgarbled blr 2642ba84f0fSgarbled 2652ba84f0fSgarbledfinish_lt: 2662ba84f0fSgarbled 2672ba84f0fSgarbled addi %r3,0,1 /* Greater than result */ 2682ba84f0fSgarbled 2692ba84f0fSgarbled blr 270cf88c389SmattEND(memcmp) 271