1*13093SRoger.Faulkner@Oracle.COM/* 2*13093SRoger.Faulkner@Oracle.COM * CDDL HEADER START 3*13093SRoger.Faulkner@Oracle.COM * 4*13093SRoger.Faulkner@Oracle.COM * The contents of this file are subject to the terms of the 5*13093SRoger.Faulkner@Oracle.COM * Common Development and Distribution License (the "License"). 6*13093SRoger.Faulkner@Oracle.COM * You may not use this file except in compliance with the License. 7*13093SRoger.Faulkner@Oracle.COM * 8*13093SRoger.Faulkner@Oracle.COM * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*13093SRoger.Faulkner@Oracle.COM * or http://www.opensolaris.org/os/licensing. 10*13093SRoger.Faulkner@Oracle.COM * See the License for the specific language governing permissions 11*13093SRoger.Faulkner@Oracle.COM * and limitations under the License. 12*13093SRoger.Faulkner@Oracle.COM * 13*13093SRoger.Faulkner@Oracle.COM * When distributing Covered Code, include this CDDL HEADER in each 14*13093SRoger.Faulkner@Oracle.COM * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*13093SRoger.Faulkner@Oracle.COM * If applicable, add the following below this CDDL HEADER, with the 16*13093SRoger.Faulkner@Oracle.COM * fields enclosed by brackets "[]" replaced with your own identifying 17*13093SRoger.Faulkner@Oracle.COM * information: Portions Copyright [yyyy] [name of copyright owner] 18*13093SRoger.Faulkner@Oracle.COM * 19*13093SRoger.Faulkner@Oracle.COM * CDDL HEADER END 20*13093SRoger.Faulkner@Oracle.COM */ 21*13093SRoger.Faulkner@Oracle.COM 22*13093SRoger.Faulkner@Oracle.COM/* 23*13093SRoger.Faulkner@Oracle.COM * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 24*13093SRoger.Faulkner@Oracle.COM */ 25*13093SRoger.Faulkner@Oracle.COM 26*13093SRoger.Faulkner@Oracle.COM/* 27*13093SRoger.Faulkner@Oracle.COM * The ascii_strcasecmp() function is a case insensitive versions of strcmp(). 28*13093SRoger.Faulkner@Oracle.COM * It assumes the ASCII character set and ignores differences in case 29*13093SRoger.Faulkner@Oracle.COM * when comparing lower and upper case characters. In other words, it 30*13093SRoger.Faulkner@Oracle.COM * behaves as if both strings had been converted to lower case using 31*13093SRoger.Faulkner@Oracle.COM * tolower() in the "C" locale on each byte, and the results had then 32*13093SRoger.Faulkner@Oracle.COM * been compared using strcmp(). 33*13093SRoger.Faulkner@Oracle.COM * 34*13093SRoger.Faulkner@Oracle.COM * The assembly code below is an optimized version of the following C 35*13093SRoger.Faulkner@Oracle.COM * reference: 36*13093SRoger.Faulkner@Oracle.COM * 37*13093SRoger.Faulkner@Oracle.COM * static const char charmap[] = { 38*13093SRoger.Faulkner@Oracle.COM * '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', 39*13093SRoger.Faulkner@Oracle.COM * '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', 40*13093SRoger.Faulkner@Oracle.COM * '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', 41*13093SRoger.Faulkner@Oracle.COM * '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', 42*13093SRoger.Faulkner@Oracle.COM * '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', 43*13093SRoger.Faulkner@Oracle.COM * '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', 44*13093SRoger.Faulkner@Oracle.COM * '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', 45*13093SRoger.Faulkner@Oracle.COM * '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', 46*13093SRoger.Faulkner@Oracle.COM * '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', 47*13093SRoger.Faulkner@Oracle.COM * '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', 48*13093SRoger.Faulkner@Oracle.COM * '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', 49*13093SRoger.Faulkner@Oracle.COM * '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', 50*13093SRoger.Faulkner@Oracle.COM * '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', 51*13093SRoger.Faulkner@Oracle.COM * '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', 52*13093SRoger.Faulkner@Oracle.COM * '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', 53*13093SRoger.Faulkner@Oracle.COM * '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', 54*13093SRoger.Faulkner@Oracle.COM * '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', 55*13093SRoger.Faulkner@Oracle.COM * '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', 56*13093SRoger.Faulkner@Oracle.COM * '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', 57*13093SRoger.Faulkner@Oracle.COM * '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', 58*13093SRoger.Faulkner@Oracle.COM * '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', 59*13093SRoger.Faulkner@Oracle.COM * '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', 60*13093SRoger.Faulkner@Oracle.COM * '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', 61*13093SRoger.Faulkner@Oracle.COM * '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', 62*13093SRoger.Faulkner@Oracle.COM * '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', 63*13093SRoger.Faulkner@Oracle.COM * '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', 64*13093SRoger.Faulkner@Oracle.COM * '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', 65*13093SRoger.Faulkner@Oracle.COM * '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', 66*13093SRoger.Faulkner@Oracle.COM * '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', 67*13093SRoger.Faulkner@Oracle.COM * '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', 68*13093SRoger.Faulkner@Oracle.COM * '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', 69*13093SRoger.Faulkner@Oracle.COM * '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', 70*13093SRoger.Faulkner@Oracle.COM * }; 71*13093SRoger.Faulkner@Oracle.COM * 72*13093SRoger.Faulkner@Oracle.COM * int 73*13093SRoger.Faulkner@Oracle.COM * ascii_strcasecmp(const char *s1, const char *s2) 74*13093SRoger.Faulkner@Oracle.COM * { 75*13093SRoger.Faulkner@Oracle.COM * const unsigned char *cm = (const unsigned char *)charmap; 76*13093SRoger.Faulkner@Oracle.COM * const unsigned char *us1 = (const unsigned char *)s1; 77*13093SRoger.Faulkner@Oracle.COM * const unsigned char *us2 = (const unsigned char *)s2; 78*13093SRoger.Faulkner@Oracle.COM * 79*13093SRoger.Faulkner@Oracle.COM * while (cm[*us1] == cm[*us2++]) 80*13093SRoger.Faulkner@Oracle.COM * if (*us1++ == '\0') 81*13093SRoger.Faulkner@Oracle.COM * return (0); 82*13093SRoger.Faulkner@Oracle.COM * return (cm[*us1] - cm[*(us2 - 1)]); 83*13093SRoger.Faulkner@Oracle.COM * } 84*13093SRoger.Faulkner@Oracle.COM * 85*13093SRoger.Faulkner@Oracle.COM * The following algorithm, from a 1987 news posting by Alan Mycroft, is 86*13093SRoger.Faulkner@Oracle.COM * used for finding null bytes in a word: 87*13093SRoger.Faulkner@Oracle.COM * 88*13093SRoger.Faulkner@Oracle.COM * #define has_null(word) ((word - 0x01010101) & (~word & 0x80808080)) 89*13093SRoger.Faulkner@Oracle.COM * 90*13093SRoger.Faulkner@Oracle.COM * The following algorithm is used for a wordwise tolower() operation: 91*13093SRoger.Faulkner@Oracle.COM * 92*13093SRoger.Faulkner@Oracle.COM * unsigned int 93*13093SRoger.Faulkner@Oracle.COM * parallel_tolower (unsigned int x) 94*13093SRoger.Faulkner@Oracle.COM * { 95*13093SRoger.Faulkner@Oracle.COM * unsigned int p; 96*13093SRoger.Faulkner@Oracle.COM * unsigned int q; 97*13093SRoger.Faulkner@Oracle.COM * 98*13093SRoger.Faulkner@Oracle.COM * unsigned int m1 = 0x80808080; 99*13093SRoger.Faulkner@Oracle.COM * unsigned int m2 = 0x3f3f3f3f; 100*13093SRoger.Faulkner@Oracle.COM * unsigned int m3 = 0x25252525; 101*13093SRoger.Faulkner@Oracle.COM * 102*13093SRoger.Faulkner@Oracle.COM * q = x & ~m1;// newb = byte & 0x7F 103*13093SRoger.Faulkner@Oracle.COM * p = q + m2; // newb > 0x5A --> MSB set 104*13093SRoger.Faulkner@Oracle.COM * q = q + m3; // newb < 0x41 --> MSB clear 105*13093SRoger.Faulkner@Oracle.COM * p = p & ~q; // newb > 0x40 && newb < 0x5B --> MSB set 106*13093SRoger.Faulkner@Oracle.COM * q = m1 & ~x;// byte < 0x80 --> 0x80 107*13093SRoger.Faulkner@Oracle.COM * q = p & q; // newb > 0x40 && newb < 0x5B && byte < 0x80 -> 0x80,else 0 108*13093SRoger.Faulkner@Oracle.COM * q = q >> 2; // newb > 0x40 && newb < 0x5B && byte < 0x80 -> 0x20,else 0 109*13093SRoger.Faulkner@Oracle.COM * return (x + q); // translate uppercase characters to lowercase 110*13093SRoger.Faulkner@Oracle.COM * } 111*13093SRoger.Faulkner@Oracle.COM * 112*13093SRoger.Faulkner@Oracle.COM * Both algorithms have been tested exhaustively for all possible 2^32 inputs. 113*13093SRoger.Faulkner@Oracle.COM */ 114*13093SRoger.Faulkner@Oracle.COM 115*13093SRoger.Faulkner@Oracle.COM#include <sys/asm_linkage.h> 116*13093SRoger.Faulkner@Oracle.COM 117*13093SRoger.Faulkner@Oracle.COM ! The first part of this algorithm walks through the beginning of 118*13093SRoger.Faulkner@Oracle.COM ! both strings a byte at a time until the source ptr is aligned to 119*13093SRoger.Faulkner@Oracle.COM ! a word boundary. During these steps, the bytes are translated to 120*13093SRoger.Faulkner@Oracle.COM ! lower-case if they are upper-case, and are checked against 121*13093SRoger.Faulkner@Oracle.COM ! the source string. 122*13093SRoger.Faulkner@Oracle.COM 123*13093SRoger.Faulkner@Oracle.COM ENTRY(ascii_strcasecmp) 124*13093SRoger.Faulkner@Oracle.COM 125*13093SRoger.Faulkner@Oracle.COM .align 32 126*13093SRoger.Faulkner@Oracle.COM 127*13093SRoger.Faulkner@Oracle.COM save %sp, -SA(WINDOWSIZE), %sp 128*13093SRoger.Faulkner@Oracle.COM subcc %i0, %i1, %i2 ! s1 == s2 ? 129*13093SRoger.Faulkner@Oracle.COM bz .stringsequal ! yup, done, strings equal 130*13093SRoger.Faulkner@Oracle.COM andcc %i0, 3, %i3 ! s1 word-aligned ? 131*13093SRoger.Faulkner@Oracle.COM bz .s1aligned1 ! yup 132*13093SRoger.Faulkner@Oracle.COM sethi %hi(0x80808080), %i4 ! start loading Mycroft's magic1 133*13093SRoger.Faulkner@Oracle.COM 134*13093SRoger.Faulkner@Oracle.COM ldub [%i1 + %i2], %i0 ! s1[0] 135*13093SRoger.Faulkner@Oracle.COM ldub [%i1], %g1 ! s2[0] 136*13093SRoger.Faulkner@Oracle.COM sub %i0, 'A', %l0 ! transform for faster uppercase check 137*13093SRoger.Faulkner@Oracle.COM sub %g1, 'A', %l1 ! transform for faster uppercase check 138*13093SRoger.Faulkner@Oracle.COM cmp %l0, ('Z' - 'A') ! s1[0] uppercase? 139*13093SRoger.Faulkner@Oracle.COM bleu,a .noxlate11 ! yes 140*13093SRoger.Faulkner@Oracle.COM add %i0, ('a' - 'A'), %i0 ! s1[0] = tolower(s1[0]) 141*13093SRoger.Faulkner@Oracle.COM.noxlate11: 142*13093SRoger.Faulkner@Oracle.COM cmp %l1, ('Z' - 'A') ! s2[0] uppercase? 143*13093SRoger.Faulkner@Oracle.COM bleu,a .noxlate12 ! yes 144*13093SRoger.Faulkner@Oracle.COM add %g1, ('a' - 'A'), %g1 ! s2[0] = tolower(s2[0]) 145*13093SRoger.Faulkner@Oracle.COM.noxlate12: 146*13093SRoger.Faulkner@Oracle.COM subcc %i0, %g1, %i0 ! tolower(s1[0]) != tolower(s2[0]) ? 147*13093SRoger.Faulkner@Oracle.COM bne .done ! yup, done 148*13093SRoger.Faulkner@Oracle.COM inc %i1 ! s1++, s2++ 149*13093SRoger.Faulkner@Oracle.COM addcc %i0, %g1, %i0 ! s1[0] == 0 ? 150*13093SRoger.Faulkner@Oracle.COM bz .done ! yup, done, strings equal 151*13093SRoger.Faulkner@Oracle.COM cmp %i3, 3 ! s1 aligned now? 152*13093SRoger.Faulkner@Oracle.COM bz .s1aligned2 ! yup 153*13093SRoger.Faulkner@Oracle.COM sethi %hi(0x01010101), %i5 ! start loading Mycroft's magic2 154*13093SRoger.Faulkner@Oracle.COM 155*13093SRoger.Faulkner@Oracle.COM ldub [%i1 + %i2], %i0 ! s1[1] 156*13093SRoger.Faulkner@Oracle.COM ldub [%i1], %g1 ! s2[1] 157*13093SRoger.Faulkner@Oracle.COM sub %i0, 'A', %l0 ! transform for faster uppercase check 158*13093SRoger.Faulkner@Oracle.COM sub %g1, 'A', %l1 ! transform for faster uppercase check 159*13093SRoger.Faulkner@Oracle.COM cmp %l0, ('Z' - 'A') ! s1[1] uppercase? 160*13093SRoger.Faulkner@Oracle.COM bleu,a .noxlate21 ! yes 161*13093SRoger.Faulkner@Oracle.COM add %i0, ('a' - 'A'), %i0 ! s1[1] = tolower(s1[1]) 162*13093SRoger.Faulkner@Oracle.COM.noxlate21: 163*13093SRoger.Faulkner@Oracle.COM cmp %l1, ('Z' - 'A') ! s2[1] uppercase? 164*13093SRoger.Faulkner@Oracle.COM bleu,a .noxlate22 ! yes 165*13093SRoger.Faulkner@Oracle.COM add %g1, ('a' - 'A'), %g1 ! s2[1] = tolower(s2[1]) 166*13093SRoger.Faulkner@Oracle.COM.noxlate22: 167*13093SRoger.Faulkner@Oracle.COM subcc %i0, %g1, %i0 ! tolower(s1[1]) != tolower(s2[1]) ? 168*13093SRoger.Faulkner@Oracle.COM bne .done ! yup, done 169*13093SRoger.Faulkner@Oracle.COM inc %i1 ! s1++, s2++ 170*13093SRoger.Faulkner@Oracle.COM addcc %i0, %g1, %i0 ! s1[1] == 0 ? 171*13093SRoger.Faulkner@Oracle.COM bz .done ! yup, done, strings equal 172*13093SRoger.Faulkner@Oracle.COM cmp %i3, 2 ! s1 aligned now? 173*13093SRoger.Faulkner@Oracle.COM bz .s1aligned3 ! yup 174*13093SRoger.Faulkner@Oracle.COM or %i4, %lo(0x80808080),%i4! finish loading Mycroft's magic1 175*13093SRoger.Faulkner@Oracle.COM 176*13093SRoger.Faulkner@Oracle.COM ldub [%i1 + %i2], %i0 ! s1[2] 177*13093SRoger.Faulkner@Oracle.COM ldub [%i1], %g1 ! s2[2] 178*13093SRoger.Faulkner@Oracle.COM sub %i0, 'A', %l0 ! transform for faster uppercase check 179*13093SRoger.Faulkner@Oracle.COM sub %g1, 'A', %l1 ! transform for faster uppercase check 180*13093SRoger.Faulkner@Oracle.COM cmp %l0, ('Z' - 'A') ! s1[2] uppercase? 181*13093SRoger.Faulkner@Oracle.COM bleu,a .noxlate31 ! yes 182*13093SRoger.Faulkner@Oracle.COM add %i0, ('a' - 'A'), %i0 ! s1[2] = tolower(s1[2]) 183*13093SRoger.Faulkner@Oracle.COM.noxlate31: 184*13093SRoger.Faulkner@Oracle.COM cmp %l1, ('Z' - 'A') ! s2[2] uppercase? 185*13093SRoger.Faulkner@Oracle.COM bleu,a .noxlate32 ! yes 186*13093SRoger.Faulkner@Oracle.COM add %g1, ('a' - 'A'), %g1 ! s2[2] = tolower(s2[2]) 187*13093SRoger.Faulkner@Oracle.COM.noxlate32: 188*13093SRoger.Faulkner@Oracle.COM subcc %i0, %g1, %i0 ! tolower(s1[2]) != tolower(s2[2]) ? 189*13093SRoger.Faulkner@Oracle.COM bne .done ! yup, done 190*13093SRoger.Faulkner@Oracle.COM inc %i1 ! s1++, s2++ 191*13093SRoger.Faulkner@Oracle.COM addcc %i0, %g1, %i0 ! s1[2] == 0 ? 192*13093SRoger.Faulkner@Oracle.COM bz .done ! yup, done, strings equal 193*13093SRoger.Faulkner@Oracle.COM or %i5, %lo(0x01010101),%i5! finish loading Mycroft's magic2 194*13093SRoger.Faulkner@Oracle.COM ba .s1aligned4 ! s1 aligned now 195*13093SRoger.Faulkner@Oracle.COM andcc %i1, 3, %i3 ! s2 word-aligned ? 196*13093SRoger.Faulkner@Oracle.COM 197*13093SRoger.Faulkner@Oracle.COM ! Here, we initialize our checks for a zero byte and decide 198*13093SRoger.Faulkner@Oracle.COM ! whether or not we can optimize further if we're fortunate 199*13093SRoger.Faulkner@Oracle.COM ! enough to have a word aligned desintation 200*13093SRoger.Faulkner@Oracle.COM 201*13093SRoger.Faulkner@Oracle.COM.s1aligned1: 202*13093SRoger.Faulkner@Oracle.COM sethi %hi(0x01010101), %i5 ! start loading Mycroft's magic2 203*13093SRoger.Faulkner@Oracle.COM.s1aligned2: 204*13093SRoger.Faulkner@Oracle.COM or %i4, %lo(0x80808080),%i4! finish loading Mycroft's magic1 205*13093SRoger.Faulkner@Oracle.COM.s1aligned3: 206*13093SRoger.Faulkner@Oracle.COM or %i5, %lo(0x01010101),%i5! finish loading Mycroft's magic2 207*13093SRoger.Faulkner@Oracle.COM andcc %i1, 3, %i3 ! s2 word aligned ? 208*13093SRoger.Faulkner@Oracle.COM.s1aligned4: 209*13093SRoger.Faulkner@Oracle.COM sethi %hi(0x3f3f3f3f), %l2 ! load m2 for parallel tolower() 210*13093SRoger.Faulkner@Oracle.COM sethi %hi(0x25252525), %l3 ! load m3 for parallel tolower() 211*13093SRoger.Faulkner@Oracle.COM or %l2, %lo(0x3f3f3f3f),%l2! finish loading m2 212*13093SRoger.Faulkner@Oracle.COM bz .word4 ! yup, s2 word-aligned 213*13093SRoger.Faulkner@Oracle.COM or %l3, %lo(0x25252525),%l3! finish loading m3 214*13093SRoger.Faulkner@Oracle.COM 215*13093SRoger.Faulkner@Oracle.COM add %i2, %i3, %i2 ! start adjusting offset s1-s2 216*13093SRoger.Faulkner@Oracle.COM sll %i3, 3, %l6 ! shift factor for left shifts 217*13093SRoger.Faulkner@Oracle.COM andn %i1, 3, %i1 ! round s1 pointer down to next word 218*13093SRoger.Faulkner@Oracle.COM sub %g0, %l6, %l7 ! shift factor for right shifts 219*13093SRoger.Faulkner@Oracle.COM orn %i3, %g0, %i3 ! generate all ones 220*13093SRoger.Faulkner@Oracle.COM lduw [%i1], %i0 ! new lower word from s2 221*13093SRoger.Faulkner@Oracle.COM srl %i3, %l6, %i3 ! mask for fixing up bytes 222*13093SRoger.Faulkner@Oracle.COM sll %i0, %l6, %g1 ! partial unaligned word from s2 223*13093SRoger.Faulkner@Oracle.COM orn %i0, %i3, %i0 ! force start bytes to non-zero 224*13093SRoger.Faulkner@Oracle.COM nop ! pad to align loop to 16-byte boundary 225*13093SRoger.Faulkner@Oracle.COM nop ! pad to align loop to 16-byte boundary 226*13093SRoger.Faulkner@Oracle.COM 227*13093SRoger.Faulkner@Oracle.COM ! This is the comparision procedure used if the destination is not 228*13093SRoger.Faulkner@Oracle.COM ! word aligned, if it is, we use word4 & cmp4 229*13093SRoger.Faulkner@Oracle.COM 230*13093SRoger.Faulkner@Oracle.COM.cmp: 231*13093SRoger.Faulkner@Oracle.COM andn %i4, %i0, %l4 ! ~word & 0x80808080 232*13093SRoger.Faulkner@Oracle.COM sub %i0, %i5, %l5 ! word - 0x01010101 233*13093SRoger.Faulkner@Oracle.COM andcc %l5, %l4, %g0 ! (word - 0x01010101) & ~word & 0x80808080 234*13093SRoger.Faulkner@Oracle.COM bz,a .doload ! null byte in previous aligned s2 word 235*13093SRoger.Faulkner@Oracle.COM lduw [%i1 + 4], %i0 ! load next aligned word from s2 236*13093SRoger.Faulkner@Oracle.COM.doload: 237*13093SRoger.Faulkner@Oracle.COM srl %i0, %l7, %i3 ! byte(s) from new aligned word from s2 238*13093SRoger.Faulkner@Oracle.COM or %g1, %i3, %g1 ! merge to get unaligned word from s2 239*13093SRoger.Faulkner@Oracle.COM lduw [%i1 + %i2], %i3 ! x1 = word from s1 240*13093SRoger.Faulkner@Oracle.COM andn %i3, %i4, %l0 ! q1 = x1 & ~m1 241*13093SRoger.Faulkner@Oracle.COM andn %g1, %i4, %l4 ! q2 = x2 & ~m1 242*13093SRoger.Faulkner@Oracle.COM add %l0, %l2, %l1 ! p1 = q1 + m2 243*13093SRoger.Faulkner@Oracle.COM add %l4, %l2, %l5 ! p2 = q2 + m2 244*13093SRoger.Faulkner@Oracle.COM add %l0, %l3, %l0 ! q1 = q1 + m3 245*13093SRoger.Faulkner@Oracle.COM add %l4, %l3, %l4 ! q2 = q2 + m3 246*13093SRoger.Faulkner@Oracle.COM andn %l1, %l0, %l1 ! p1 = p1 & ~q1 247*13093SRoger.Faulkner@Oracle.COM andn %l5, %l4, %l5 ! p2 = p2 & ~q2 248*13093SRoger.Faulkner@Oracle.COM andn %i4, %i3, %l0 ! q1 = m1 & ~x1 249*13093SRoger.Faulkner@Oracle.COM andn %i4, %g1, %l4 ! q2 = m1 & ~x2 250*13093SRoger.Faulkner@Oracle.COM and %l0, %l1, %l0 ! q1 = p1 & q1 251*13093SRoger.Faulkner@Oracle.COM and %l4, %l5, %l4 ! q2 = p2 & q2 252*13093SRoger.Faulkner@Oracle.COM srl %l0, 2, %l0 ! q1 = q1 >> 2 253*13093SRoger.Faulkner@Oracle.COM srl %l4, 2, %l4 ! q2 = q2 >> 2 254*13093SRoger.Faulkner@Oracle.COM add %l0, %i3, %i3 ! lowercase word from s1 255*13093SRoger.Faulkner@Oracle.COM add %l4, %g1, %g1 ! lowercase word from s2 256*13093SRoger.Faulkner@Oracle.COM cmp %i3, %g1 ! tolower(*s1) != tolower(*s2) ? 257*13093SRoger.Faulkner@Oracle.COM bne .wordsdiffer ! yup, now find byte that is different 258*13093SRoger.Faulkner@Oracle.COM add %i1, 4, %i1 ! s1+=4, s2+=4 259*13093SRoger.Faulkner@Oracle.COM andn %i4, %i3, %l4 ! ~word & 0x80808080 260*13093SRoger.Faulkner@Oracle.COM sub %i3, %i5, %l5 ! word - 0x01010101 261*13093SRoger.Faulkner@Oracle.COM andcc %l5, %l4, %g0 ! (word - 0x01010101) & ~word & 0x80808080 262*13093SRoger.Faulkner@Oracle.COM bz .cmp ! no null-byte in s1 yet 263*13093SRoger.Faulkner@Oracle.COM sll %i0, %l6, %g1 ! bytes from old aligned word from s2 264*13093SRoger.Faulkner@Oracle.COM 265*13093SRoger.Faulkner@Oracle.COM ! words are equal but the end of s1 has been reached 266*13093SRoger.Faulkner@Oracle.COM ! this means the strings must be equal 267*13093SRoger.Faulkner@Oracle.COM.stringsequal: 268*13093SRoger.Faulkner@Oracle.COM ret ! return 269*13093SRoger.Faulkner@Oracle.COM restore %g0, %g0, %o0 ! return 0, i.e. strings are equal 270*13093SRoger.Faulkner@Oracle.COM nop ! pad 271*13093SRoger.Faulkner@Oracle.COM 272*13093SRoger.Faulkner@Oracle.COM ! we have a word aligned source and destination! This means 273*13093SRoger.Faulkner@Oracle.COM ! things get to go fast! 274*13093SRoger.Faulkner@Oracle.COM 275*13093SRoger.Faulkner@Oracle.COM.word4: 276*13093SRoger.Faulkner@Oracle.COM lduw [%i1 + %i2], %i3 ! x1 = word from s1 277*13093SRoger.Faulkner@Oracle.COM 278*13093SRoger.Faulkner@Oracle.COM.cmp4: 279*13093SRoger.Faulkner@Oracle.COM andn %i3, %i4, %l0 ! q1 = x1 & ~m1 280*13093SRoger.Faulkner@Oracle.COM lduw [%i1], %g1 ! x2 = word from s2 281*13093SRoger.Faulkner@Oracle.COM andn %g1, %i4, %l4 ! q2 = x2 & ~m1 282*13093SRoger.Faulkner@Oracle.COM add %l0, %l2, %l1 ! p1 = q1 + m2 283*13093SRoger.Faulkner@Oracle.COM add %l4, %l2, %l5 ! p2 = q2 + m2 284*13093SRoger.Faulkner@Oracle.COM add %l0, %l3, %l0 ! q1 = q1 + m3 285*13093SRoger.Faulkner@Oracle.COM add %l4, %l3, %l4 ! q2 = q2 + m3 286*13093SRoger.Faulkner@Oracle.COM andn %l1, %l0, %l1 ! p1 = p1 & ~q1 287*13093SRoger.Faulkner@Oracle.COM andn %l5, %l4, %l5 ! p2 = p2 & ~q2 288*13093SRoger.Faulkner@Oracle.COM andn %i4, %i3, %l0 ! q1 = m1 & ~x1 289*13093SRoger.Faulkner@Oracle.COM andn %i4, %g1, %l4 ! q2 = m1 & ~x2 290*13093SRoger.Faulkner@Oracle.COM and %l0, %l1, %l0 ! q1 = p1 & q1 291*13093SRoger.Faulkner@Oracle.COM and %l4, %l5, %l4 ! q2 = p2 & q2 292*13093SRoger.Faulkner@Oracle.COM srl %l0, 2, %l0 ! q1 = q1 >> 2 293*13093SRoger.Faulkner@Oracle.COM srl %l4, 2, %l4 ! q2 = q2 >> 2 294*13093SRoger.Faulkner@Oracle.COM add %l0, %i3, %i3 ! lowercase word from s1 295*13093SRoger.Faulkner@Oracle.COM add %l4, %g1, %g1 ! lowercase word from s2 296*13093SRoger.Faulkner@Oracle.COM cmp %i3, %g1 ! tolower(*s1) != tolower(*s2) ? 297*13093SRoger.Faulkner@Oracle.COM bne .wordsdiffer ! yup, now find mismatching character 298*13093SRoger.Faulkner@Oracle.COM add %i1, 4, %i1 ! s1+=4, s2+=4 299*13093SRoger.Faulkner@Oracle.COM andn %i4, %i3, %l4 ! ~word & 0x80808080 300*13093SRoger.Faulkner@Oracle.COM sub %i3, %i5, %l5 ! word - 0x01010101 301*13093SRoger.Faulkner@Oracle.COM andcc %l5, %l4, %g0 ! (word - 0x01010101) & ~word & 0x80808080 302*13093SRoger.Faulkner@Oracle.COM bz,a .cmp4 ! no null-byte in s1 yet 303*13093SRoger.Faulkner@Oracle.COM lduw [%i1 + %i2], %i3 ! load word from s1 304*13093SRoger.Faulkner@Oracle.COM 305*13093SRoger.Faulkner@Oracle.COM ! words are equal but the end of s1 has been reached 306*13093SRoger.Faulkner@Oracle.COM ! this means the strings must be equal 307*13093SRoger.Faulkner@Oracle.COM.stringsequal4: 308*13093SRoger.Faulkner@Oracle.COM ret ! return 309*13093SRoger.Faulkner@Oracle.COM restore %g0, %g0, %o0 ! return 0, i.e. strings are equal 310*13093SRoger.Faulkner@Oracle.COM 311*13093SRoger.Faulkner@Oracle.COM.wordsdiffer: 312*13093SRoger.Faulkner@Oracle.COM srl %g1, 24, %i2 ! first byte of mismatching word in s2 313*13093SRoger.Faulkner@Oracle.COM srl %i3, 24, %i1 ! first byte of mismatching word in s1 314*13093SRoger.Faulkner@Oracle.COM subcc %i1, %i2, %i0 ! *s1-*s2 315*13093SRoger.Faulkner@Oracle.COM bnz .done ! bytes differ, return difference 316*13093SRoger.Faulkner@Oracle.COM srl %g1, 16, %i2 ! second byte of mismatching word in s2 317*13093SRoger.Faulkner@Oracle.COM andcc %i1, 0xff, %i0 ! *s1 == 0 ? 318*13093SRoger.Faulkner@Oracle.COM bz .done ! yup, done, strings equal 319*13093SRoger.Faulkner@Oracle.COM 320*13093SRoger.Faulkner@Oracle.COM ! we know byte 1 is equal, so can compare bytes 1,2 as a group 321*13093SRoger.Faulkner@Oracle.COM 322*13093SRoger.Faulkner@Oracle.COM srl %i3, 16, %i1 ! second byte of mismatching word in s1 323*13093SRoger.Faulkner@Oracle.COM subcc %i1, %i2, %i0 ! *s1-*s2 324*13093SRoger.Faulkner@Oracle.COM bnz .done ! bytes differ, return difference 325*13093SRoger.Faulkner@Oracle.COM srl %g1, 8, %i2 ! third byte of mismatching word in s2 326*13093SRoger.Faulkner@Oracle.COM andcc %i1, 0xff, %i0 ! *s1 == 0 ? 327*13093SRoger.Faulkner@Oracle.COM bz .done ! yup, done, strings equal 328*13093SRoger.Faulkner@Oracle.COM 329*13093SRoger.Faulkner@Oracle.COM ! we know bytes 1, 2 are equal, so can compare bytes 1,2,3 as a group 330*13093SRoger.Faulkner@Oracle.COM 331*13093SRoger.Faulkner@Oracle.COM srl %i3, 8, %i1 ! third byte of mismatching word in s1 332*13093SRoger.Faulkner@Oracle.COM subcc %i1, %i2, %i0 ! *s1-*s2 333*13093SRoger.Faulkner@Oracle.COM bnz .done ! bytes differ, return difference 334*13093SRoger.Faulkner@Oracle.COM andcc %i1, 0xff, %g0 ! *s1 == 0 ? 335*13093SRoger.Faulkner@Oracle.COM bz .stringsequal ! yup, done, strings equal 336*13093SRoger.Faulkner@Oracle.COM 337*13093SRoger.Faulkner@Oracle.COM ! we know bytes 1,2,3 are equal, so can compare bytes 1,2,3,4 as group 338*13093SRoger.Faulkner@Oracle.COM 339*13093SRoger.Faulkner@Oracle.COM subcc %i3, %g1, %i0 ! *s1-*s2 340*13093SRoger.Faulkner@Oracle.COM bz,a .done ! bytes differ, return difference 341*13093SRoger.Faulkner@Oracle.COM andcc %i3, 0xff, %i0 ! *s1 == 0, strings equal 342*13093SRoger.Faulkner@Oracle.COM 343*13093SRoger.Faulkner@Oracle.COM.done: 344*13093SRoger.Faulkner@Oracle.COM ret ! return 345*13093SRoger.Faulkner@Oracle.COM restore %i0, %g0, %o0 ! return 0 or byte difference 346*13093SRoger.Faulkner@Oracle.COM 347*13093SRoger.Faulkner@Oracle.COM SET_SIZE(ascii_strcasecmp) 348