1/* $NetBSD: memcmp.S,v 1.2 2008/03/06 21:17:17 phx Exp $ */ 2 3/* stropt/memcmp.S, pl_string_common, pl_linux 10/11/04 11:45:35 4 * ========================================================================== 5 * Optimized memcmp implementation for IBM PowerPC 405/440. 6 * 7 * Copyright (c) 2003, IBM Corporation 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * * Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * * Redistributions in binary form must reproduce the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer in the documentation and/or other materials 20 * provided with the distribution. 21 * * Neither the name of IBM nor the names of its contributors 22 * may be used to endorse or promote products derived from this 23 * software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 26 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 27 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 28 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 29 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, 31 * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 32 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 33 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 34 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 35 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 36 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 37 * 38 * ========================================================================== 39 * 40 * Function: Compare two character strings (up to n characters) 41 * 42 * int memcmp(const char *s1, const char *s2, int n) 43 * 44 * Input: r3 - buffer 1 address 45 * r4 - buffer 2 address 46 * r5 - maximum characters to compare 47 * Output: r3 <0 (less), 0 (equal), >0 (greater) 48 * 49 * ========================================================================== 50 */ 51 52#define _NOREGNAMES 53#include <machine/asm.h> 54 55 .text 56 .align 4 57/* LINTSTUB: Func: void *memcmp(const void *, const void *, size_t) */ 58ENTRY(memcmp) 59 60 /* 61 * Check count passed in R5. If zero, return 0; otherwise continue. 62 */ 63 cmpwi %r5,0 64 beq- ret_0; 65 66 /* 67 * Most of the time the difference is found in the first 68 * several bytes. The following code minimizes the number 69 * of load operations for short compares. 70 */ 71 72 mr %r11, %r3 /* Save buffer 1 */ 73 74again: 75 76 not %r10, %r4 /* buffer 2: bytes to page bdy */ 77 rlwinm. %r10, %r10,29,23,31 /* buffer 2: dwords to page bdy */ 78 beq- bytebybyte /* If < 8 bytes to the page bdy */ 79 /* do byte by byte */ 80 lwz %r8, 0(%r4) /* load 1st buffer 2 word */ 81 82 not %r12, %r11 /* buffer 1: bytes to page bdy */ 83 rlwinm. %r12, %r12,29,23,31 /* buffer 1: dwords to page bdy */ 84 beq- bytebybyte /* If < 8 bytes to the page bdy */ 85 /* do byte by byte */ 86 lwz %r6, 0(%r11) /* load 1st buffer 1 word */ 87 88 cmpwi %r5, 4 /* If remaining count <= 4 */ 89 ble+ first4 /* handle specially. DWG */ 90 91 cmplw %r8, %r6 /* compare buffer 2 and buffer 1*/ 92 bne+ all_done /* different => we're done */ 93 94 lwzu %r9, 4(%r4) /* load 2nd buffer 2 word */ 95 lwzu %r7, 4(%r11) /* load 2nd buffer 1 word */ 96 97 cmpwi %r5, 8 /* If remaining count <= 8 */ 98 ble+ last4 /* handle specially. DWG */ 99 100 cmplw %r9, %r7 /* compare buffer 2 and buffer 1*/ 101 bne+ all_done /* different => we're done */ 102 103 addi %r5, %r5, -8 /* Update character counter DWG */ 104 addi %r10, %r4, 0x0004 /* DWG*/ 105 not %r10, %r10 /* buffer 2: bytes to page bdy DWG */ 106 rlwinm. %r10, %r10,29,23,31 /* buffer 2: dwords to page bdy DWG */ 107 addi %r12, %r11, 0x0004 /* DWG */ 108 not %r12, %r12 /* buffer 1: bytes to page bdy DWG */ 109 rlwinm. %r12, %r12,29,23,31 /* buffer 1: dwords to page bdy DWG */ 110 111 /* The following section prior to loop: figures out whether */ 112 /* the buffer 1 or buffer 2 is closer to the page boundary. */ 113 /* The main loop count is then set up to reflect the number of */ 114 /* double words of the buffer that is closest */ 115 116 cmpw %r10, %r12 /* Find closest */ 117 blt lt 118 119 mr %r10, %r12 120 121lt: 122 123 srwi %r12, %r5, 3 /* Double check the total count */ 124 cmpw %r10, %r12 /* limitation */ 125 blt lt2 126 127 mr %r10, %r12 /* DWG */ 128lt2: /* DWG */ 129 cmpwi %r10, 0 /* DWG */ 130 bne lt3 /* DWG */ 131 addi %r4, %r4, 0x0004 /* DWG */ 132 addi %r11,%r11,0x0004 /* DWG */ 133 b again /* DWG */ 134lt3: /* DWG */ 135 mtctr %r10 /* dword count for loop */ 136 lwzu %r6, 4(%r11) /* pre-load buffer 1 word */ 137 138 b in /* To the loop */ 139 140loop: /* main loop */ 141 142 cmplw %r8, %r6 /* Compare first buffer 2 word */ 143 bne- all_done /* with first buffer 1 word */ 144 /* If different, we're done */ 145 cmplw %r9, %r7 /* Compare second buffer 2 word */ 146 /* with second buffer 1 word */ 147 lwzu %r6, 4(%r11) /* pre-load buffer 1 word */ 148 149 bne- all_done /* If different, we're done */ 150 151in: 152 153 lwzu %r7, 4(%r11) /* pre-load buffer 1 word */ 154 lwzu %r8, 4(%r4) /* pre-load buffer 2 word */ 155 lwzu %r9, 4(%r4) /* pre-load buffer 2 word */ 156 157 bdnz+ loop /* Do more DW's if cnt > 0 */ 158 159 /*mfctr %r12*/ /*DWG*/ /* number of dwords left */ 160 /*subf %r10, %r12, %r10*/ /*DWG*//* number of dwords compared */ 161 slwi %r10, %r10, 3 162 subf %r5, %r10, %r5 /* adjust byte counter */ 163 /*bne+ partial*/ /*DWG*/ /* If less than 8 bytes, handle */ 164 /* specially */ 165 /*cmpwi %r5, 8*/ /* Removed. DWG */ 166 /*blt partial*/ /* Removed. DWG */ 167 168 /*addic %r5, %r5, -8*/ /*DWG*/ /* Subtract two words from count*/ 169 170 cmplw %r8, %r6 /* compare last dword */ 171 addi %r4, %r4, 4 172 bne- all_done 173 174 cmplw %r9, %r7 175 addi %r11, %r11, 4 176 bne- all_done 177 178bytebybyte: 179 180 /* We've gotten close to a page boundary: do a byte-byte-byte 181 * compare for the following 8 bytes, and then go back to 182 * the full-word compare loop. 183 */ 184 185 li %r3, 8 /* loop count */ 186 cmpw %r3, %r5 /* take min(8, counter) */ 187 ble f2 188 189 mr. %r3, %r5 190 191 beqlr 192 193f2: 194 195 mtctr %r3 196 subf %r5, %r3, %r5 /* adjust counter */ 197 198bbb: 199 200 lbz %r6, 0(%r11) /* byte copy loop */ 201 202 addi %r11, %r11, 1 203 204 lbz %r8, 0(%r4) 205 206 addi %r4, %r4, 1 207 208 cmplw %r8, %r6 209 210 bdnzt+ eq, bbb 211 212 bne all_done 213 214 cmpwi %r5, 0 215 bgt again /* handle the rest */ 216 217 xor %r3,%r3,%r3 218 219 blr 220 221#if 0 /* Removed code section. DWG */ 222partial: 223 224 mr. %r3, %r5 225 226 beqlr /* If count -> 0, we're done */ 227 228f1: 229 230 subfic %r3, %r3, 4 /* zero/end in first word? */ 231 cmpwi %r3, 0 232 blt last4 233#endif /* DWG */ 234 235first4: 236 subfic %r3, %r5, 4 /* If count <= 4, handle */ 237 rlwinm %r3, %r3, 3, 0, 31 /* count *= 8 */ 238 srw %r6, %r6, %r3 /* align 1st buffer 1 word */ 239 srw %r8, %r8, %r3 /* align 1st buffer 2 word */ 240 241 cmplw %r8, %r6 /* get result */ 242 bne all_done 243 xor %r3,%r3,%r3 244 blr 245 246last4: 247 subfic %r10, %r5, 8 /*DWG*/ 248 rlwinm %r10, %r10, 3, 0, 31 /* count *= 8 */ 249 srw %r7, %r7, %r10 /* align 2nd buffer 1 word */ 250 srw %r9, %r9, %r10 /* align 2nd buffer 2 word */ 251 252 cmplw %r9, %r7 /* get result */ 253 bne all_done 254ret_0: 255 xor %r3,%r3,%r3 /* Equal result */ 256 blr 257 258all_done: 259 260 blt finish_lt 261 262 addi %r3,0,-1 /* Less than result */ 263 264 blr 265 266finish_lt: 267 268 addi %r3,0,1 /* Greater than result */ 269 270 blr 271