1*5d9d9091SRichard Lowe/* 2*5d9d9091SRichard Lowe * CDDL HEADER START 3*5d9d9091SRichard Lowe * 4*5d9d9091SRichard Lowe * The contents of this file are subject to the terms of the 5*5d9d9091SRichard Lowe * Common Development and Distribution License (the "License"). 6*5d9d9091SRichard Lowe * You may not use this file except in compliance with the License. 7*5d9d9091SRichard Lowe * 8*5d9d9091SRichard Lowe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*5d9d9091SRichard Lowe * or http://www.opensolaris.org/os/licensing. 10*5d9d9091SRichard Lowe * See the License for the specific language governing permissions 11*5d9d9091SRichard Lowe * and limitations under the License. 12*5d9d9091SRichard Lowe * 13*5d9d9091SRichard Lowe * When distributing Covered Code, include this CDDL HEADER in each 14*5d9d9091SRichard Lowe * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*5d9d9091SRichard Lowe * If applicable, add the following below this CDDL HEADER, with the 16*5d9d9091SRichard Lowe * fields enclosed by brackets "[]" replaced with your own identifying 17*5d9d9091SRichard Lowe * information: Portions Copyright [yyyy] [name of copyright owner] 18*5d9d9091SRichard Lowe * 19*5d9d9091SRichard Lowe * CDDL HEADER END 20*5d9d9091SRichard Lowe */ 21*5d9d9091SRichard Lowe 22*5d9d9091SRichard Lowe/* 23*5d9d9091SRichard Lowe * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24*5d9d9091SRichard Lowe * Use is subject to license terms. 25*5d9d9091SRichard Lowe */ 26*5d9d9091SRichard Lowe 27*5d9d9091SRichard Lowe .file "memcmp.s" 28*5d9d9091SRichard Lowe 29*5d9d9091SRichard Lowe/* 30*5d9d9091SRichard Lowe * memcmp(s1, s2, len) 31*5d9d9091SRichard Lowe * 32*5d9d9091SRichard Lowe * Compare n bytes: s1>s2: >0 s1==s2: 0 s1<s2: <0 33*5d9d9091SRichard Lowe * 34*5d9d9091SRichard Lowe * Fast assembler language version of the following C-program for memcmp 35*5d9d9091SRichard Lowe * which represents the `standard' for the C-library. 36*5d9d9091SRichard Lowe * 37*5d9d9091SRichard Lowe * int 38*5d9d9091SRichard Lowe * memcmp(const void *s1, const void *s2, size_t n) 39*5d9d9091SRichard Lowe * { 40*5d9d9091SRichard Lowe * if (s1 != s2 && n != 0) { 41*5d9d9091SRichard Lowe * const char *ps1 = s1; 42*5d9d9091SRichard Lowe * const char *ps2 = s2; 43*5d9d9091SRichard Lowe * do { 44*5d9d9091SRichard Lowe * if (*ps1++ != *ps2++) 45*5d9d9091SRichard Lowe * return (ps1[-1] - ps2[-1]); 46*5d9d9091SRichard Lowe * } while (--n != 0); 47*5d9d9091SRichard Lowe * } 48*5d9d9091SRichard Lowe * return (0); 49*5d9d9091SRichard Lowe * } 50*5d9d9091SRichard Lowe */ 51*5d9d9091SRichard Lowe 52*5d9d9091SRichard Lowe#include <sys/asm_linkage.h> 53*5d9d9091SRichard Lowe 54*5d9d9091SRichard Lowe ANSI_PRAGMA_WEAK(memcmp,function) 55*5d9d9091SRichard Lowe 56*5d9d9091SRichard Lowe ENTRY(memcmp) 57*5d9d9091SRichard Lowe cmp %o0, %o1 ! s1 == s2? 58*5d9d9091SRichard Lowe be,pn %xcc, .cmpeq 59*5d9d9091SRichard Lowe cmp %o2, 17 60*5d9d9091SRichard Lowe bleu,a,pn %xcc, .cmpbyt ! for small counts go do bytes 61*5d9d9091SRichard Lowe sub %o1, %o0, %o1 62*5d9d9091SRichard Lowe 63*5d9d9091SRichard Lowe andcc %o0, 3, %o3 ! is s1 aligned? 64*5d9d9091SRichard Lowe bz,a,pn %icc, .iss2 ! if so go check s2 65*5d9d9091SRichard Lowe andcc %o1, 3, %o4 ! is s2 aligned? 66*5d9d9091SRichard Lowe cmp %o3, 2 67*5d9d9091SRichard Lowe be,pn %icc, .algn2 68*5d9d9091SRichard Lowe cmp %o3, 3 69*5d9d9091SRichard Lowe 70*5d9d9091SRichard Lowe.algn1: ldub [%o0], %o4 ! cmp one byte 71*5d9d9091SRichard Lowe inc %o0 72*5d9d9091SRichard Lowe ldub [%o1], %o5 73*5d9d9091SRichard Lowe inc %o1 74*5d9d9091SRichard Lowe dec %o2 75*5d9d9091SRichard Lowe be,pn %icc, .algn3 76*5d9d9091SRichard Lowe cmp %o4, %o5 77*5d9d9091SRichard Lowe be,pt %icc, .algn2 78*5d9d9091SRichard Lowe nop 79*5d9d9091SRichard Lowe b,a .noteq 80*5d9d9091SRichard Lowe 81*5d9d9091SRichard Lowe.algn2: lduh [%o0], %o4 82*5d9d9091SRichard Lowe inc 2, %o0 83*5d9d9091SRichard Lowe ldub [%o1], %o5 84*5d9d9091SRichard Lowe inc 1, %o1 85*5d9d9091SRichard Lowe srl %o4, 8, %o3 86*5d9d9091SRichard Lowe cmp %o3, %o5 87*5d9d9091SRichard Lowe be,a,pt %icc, 1f 88*5d9d9091SRichard Lowe ldub [%o1], %o5 ! delay slot, get next byte from s2 89*5d9d9091SRichard Lowe b .noteq 90*5d9d9091SRichard Lowe mov %o3, %o4 ! delay slot, move *s1 to %o4 91*5d9d9091SRichard Lowe1: inc %o1 92*5d9d9091SRichard Lowe dec 2, %o2 93*5d9d9091SRichard Lowe and %o4, 0xff, %o4 94*5d9d9091SRichard Lowe cmp %o4, %o5 95*5d9d9091SRichard Lowe.algn3: be,a,pt %icc, .iss2 96*5d9d9091SRichard Lowe andcc %o1, 3, %o4 ! delay slot, is s2 aligned? 97*5d9d9091SRichard Lowe b,a .noteq 98*5d9d9091SRichard Lowe 99*5d9d9091SRichard Lowe.cmpbyt:b .bytcmp 100*5d9d9091SRichard Lowe deccc %o2 101*5d9d9091SRichard Lowe1: ldub [%o0 + %o1], %o5 ! byte compare loop 102*5d9d9091SRichard Lowe inc %o0 103*5d9d9091SRichard Lowe cmp %o4, %o5 104*5d9d9091SRichard Lowe be,a,pt %icc, .bytcmp 105*5d9d9091SRichard Lowe deccc %o2 ! delay slot, compare count (len) 106*5d9d9091SRichard Lowe b,a .noteq 107*5d9d9091SRichard Lowe.bytcmp:bgeu,a,pt %xcc, 1b 108*5d9d9091SRichard Lowe ldub [%o0], %o4 109*5d9d9091SRichard Lowe.cmpeq: 110*5d9d9091SRichard Lowe retl ! strings compare equal 111*5d9d9091SRichard Lowe clr %o0 112*5d9d9091SRichard Lowe 113*5d9d9091SRichard Lowe.noteq_word: ! words aren't equal. find unequal byte 114*5d9d9091SRichard Lowe srl %o4, 24, %o1 ! first byte 115*5d9d9091SRichard Lowe srl %o5, 24, %o2 116*5d9d9091SRichard Lowe cmp %o1, %o2 117*5d9d9091SRichard Lowe bne,pn %icc, 1f 118*5d9d9091SRichard Lowe sll %o4, 8, %o4 119*5d9d9091SRichard Lowe sll %o5, 8, %o5 120*5d9d9091SRichard Lowe srl %o4, 24, %o1 121*5d9d9091SRichard Lowe srl %o5, 24, %o2 122*5d9d9091SRichard Lowe cmp %o1, %o2 123*5d9d9091SRichard Lowe bne,pn %icc, 1f 124*5d9d9091SRichard Lowe sll %o4, 8, %o4 125*5d9d9091SRichard Lowe sll %o5, 8, %o5 126*5d9d9091SRichard Lowe srl %o4, 24, %o1 127*5d9d9091SRichard Lowe srl %o5, 24, %o2 128*5d9d9091SRichard Lowe cmp %o1, %o2 129*5d9d9091SRichard Lowe bne,pn %icc, 1f 130*5d9d9091SRichard Lowe sll %o4, 8, %o4 131*5d9d9091SRichard Lowe sll %o5, 8, %o5 132*5d9d9091SRichard Lowe srl %o4, 24, %o1 133*5d9d9091SRichard Lowe srl %o5, 24, %o2 134*5d9d9091SRichard Lowe1: 135*5d9d9091SRichard Lowe retl 136*5d9d9091SRichard Lowe sub %o1, %o2, %o0 ! delay slot 137*5d9d9091SRichard Lowe 138*5d9d9091SRichard Lowe.noteq: 139*5d9d9091SRichard Lowe retl ! strings aren't equal 140*5d9d9091SRichard Lowe sub %o4, %o5, %o0 ! delay slot, return(*s1 - *s2) 141*5d9d9091SRichard Lowe 142*5d9d9091SRichard Lowe.iss2: andn %o2, 3, %o3 ! count of aligned bytes 143*5d9d9091SRichard Lowe and %o2, 3, %o2 ! remaining bytes 144*5d9d9091SRichard Lowe bz,pn %icc, .w4cmp ! if s2 word aligned, compare words 145*5d9d9091SRichard Lowe cmp %o4, 2 146*5d9d9091SRichard Lowe be,pn %icc, .w2cmp ! s2 half aligned 147*5d9d9091SRichard Lowe cmp %o4, 1 148*5d9d9091SRichard Lowe 149*5d9d9091SRichard Lowe.w3cmp: 150*5d9d9091SRichard Lowe dec 4, %o3 ! avoid reading beyond the last byte 151*5d9d9091SRichard Lowe inc 4, %o2 152*5d9d9091SRichard Lowe ldub [%o1], %g1 ! read a byte to align for word reads 153*5d9d9091SRichard Lowe inc 1, %o1 154*5d9d9091SRichard Lowe be,pt %icc, .w1cmp ! aligned to 1 or 3 bytes 155*5d9d9091SRichard Lowe sll %g1, 24, %o5 156*5d9d9091SRichard Lowe 157*5d9d9091SRichard Lowe sub %o1, %o0, %o1 158*5d9d9091SRichard Lowe2: lduw [%o0 + %o1], %g1 159*5d9d9091SRichard Lowe lduw [%o0], %o4 160*5d9d9091SRichard Lowe inc 4, %o0 161*5d9d9091SRichard Lowe srl %g1, 8, %g5 ! merge with the other half 162*5d9d9091SRichard Lowe or %g5, %o5, %o5 163*5d9d9091SRichard Lowe cmp %o4, %o5 164*5d9d9091SRichard Lowe bne,pt %icc, .noteq_word 165*5d9d9091SRichard Lowe deccc 4, %o3 166*5d9d9091SRichard Lowe bnz,pt %xcc, 2b 167*5d9d9091SRichard Lowe sll %g1, 24, %o5 168*5d9d9091SRichard Lowe sub %o1, 1, %o1 ! used 3 bytes of the last word read 169*5d9d9091SRichard Lowe b .bytcmp 170*5d9d9091SRichard Lowe deccc %o2 171*5d9d9091SRichard Lowe 172*5d9d9091SRichard Lowe.w1cmp: 173*5d9d9091SRichard Lowe dec 4, %o3 ! avoid reading beyond the last byte 174*5d9d9091SRichard Lowe inc 4, %o2 175*5d9d9091SRichard Lowe lduh [%o1], %g1 ! read 3 bytes to word align 176*5d9d9091SRichard Lowe inc 2, %o1 177*5d9d9091SRichard Lowe sll %g1, 8, %g5 178*5d9d9091SRichard Lowe or %o5, %g5, %o5 179*5d9d9091SRichard Lowe 180*5d9d9091SRichard Lowe sub %o1, %o0, %o1 181*5d9d9091SRichard Lowe3: lduw [%o0 + %o1], %g1 182*5d9d9091SRichard Lowe lduw [%o0], %o4 183*5d9d9091SRichard Lowe inc 4, %o0 184*5d9d9091SRichard Lowe srl %g1, 24, %g5 ! merge with the other half 185*5d9d9091SRichard Lowe or %g5, %o5, %o5 186*5d9d9091SRichard Lowe cmp %o4, %o5 187*5d9d9091SRichard Lowe bne,pt %icc, .noteq_word 188*5d9d9091SRichard Lowe deccc 4, %o3 189*5d9d9091SRichard Lowe bnz,pt %xcc, 3b 190*5d9d9091SRichard Lowe sll %g1, 8, %o5 191*5d9d9091SRichard Lowe sub %o1, 3, %o1 ! used 1 byte of the last word read 192*5d9d9091SRichard Lowe b .bytcmp 193*5d9d9091SRichard Lowe deccc %o2 194*5d9d9091SRichard Lowe 195*5d9d9091SRichard Lowe.w2cmp: 196*5d9d9091SRichard Lowe dec 4, %o3 ! avoid reading beyond the last byte 197*5d9d9091SRichard Lowe inc 4, %o2 198*5d9d9091SRichard Lowe lduh [%o1], %g1 ! read a halfword to align s2 199*5d9d9091SRichard Lowe inc 2, %o1 200*5d9d9091SRichard Lowe sll %g1, 16, %o5 201*5d9d9091SRichard Lowe sub %o1, %o0, %o1 202*5d9d9091SRichard Lowe4: lduw [%o0 + %o1], %g1 ! read a word from s2 203*5d9d9091SRichard Lowe lduw [%o0], %o4 ! read a word from s1 204*5d9d9091SRichard Lowe inc 4, %o0 205*5d9d9091SRichard Lowe srl %g1, 16, %g5 ! merge with the other half 206*5d9d9091SRichard Lowe or %g5, %o5, %o5 207*5d9d9091SRichard Lowe cmp %o4, %o5 208*5d9d9091SRichard Lowe bne,pn %icc, .noteq_word 209*5d9d9091SRichard Lowe deccc 4, %o3 210*5d9d9091SRichard Lowe bnz,pt %xcc, 4b 211*5d9d9091SRichard Lowe sll %g1, 16, %o5 212*5d9d9091SRichard Lowe sub %o1, 2, %o1 ! only used half of the last read word 213*5d9d9091SRichard Lowe b .bytcmp 214*5d9d9091SRichard Lowe deccc %o2 215*5d9d9091SRichard Lowe 216*5d9d9091SRichard Lowe.w4cmp: 217*5d9d9091SRichard Lowe sub %o1, %o0, %o1 218*5d9d9091SRichard Lowe lduw [%o0 + %o1], %o5 219*5d9d9091SRichard Lowe5: lduw [%o0], %o4 220*5d9d9091SRichard Lowe inc 4, %o0 221*5d9d9091SRichard Lowe cmp %o4, %o5 222*5d9d9091SRichard Lowe bne,pt %icc, .noteq_word 223*5d9d9091SRichard Lowe deccc 4, %o3 224*5d9d9091SRichard Lowe bnz,a,pt %xcc, 5b 225*5d9d9091SRichard Lowe lduw [%o0 + %o1], %o5 226*5d9d9091SRichard Lowe b .bytcmp ! compare remaining bytes, if any 227*5d9d9091SRichard Lowe deccc %o2 228*5d9d9091SRichard Lowe 229*5d9d9091SRichard Lowe SET_SIZE(memcmp) 230