198b9484cSchristos /* Compare strings while treating digits characters numerically. 2*7e120ff0Schristos Copyright (C) 1997-2024 Free Software Foundation, Inc. 398b9484cSchristos This file is part of the libiberty library. 44b169a6bSchristos Contributed by Jean-François Bignolles <bignolle@ecoledoc.ibp.fr>, 1997. 598b9484cSchristos 698b9484cSchristos Libiberty is free software; you can redistribute it and/or 798b9484cSchristos modify it under the terms of the GNU Lesser General Public 898b9484cSchristos License as published by the Free Software Foundation; either 998b9484cSchristos version 2.1 of the License, or (at your option) any later version. 1098b9484cSchristos 1198b9484cSchristos Libiberty is distributed in the hope that it will be useful, 1298b9484cSchristos but WITHOUT ANY WARRANTY; without even the implied warranty of 1398b9484cSchristos MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1498b9484cSchristos Lesser General Public License for more details. 1598b9484cSchristos 1698b9484cSchristos You should have received a copy of the GNU Lesser General Public 1798b9484cSchristos License along with the GNU C Library; if not, write to the Free 1898b9484cSchristos Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 1998b9484cSchristos 02110-1301 USA. */ 2098b9484cSchristos 2198b9484cSchristos #include "libiberty.h" 2298b9484cSchristos #include "safe-ctype.h" 2398b9484cSchristos 2498b9484cSchristos /* 2598b9484cSchristos @deftypefun int strverscmp (const char *@var{s1}, const char *@var{s2}) 2698b9484cSchristos The @code{strverscmp} function compares the string @var{s1} against 2798b9484cSchristos @var{s2}, considering them as holding indices/version numbers. Return 2898b9484cSchristos value follows the same conventions as found in the @code{strverscmp} 2998b9484cSchristos function. In fact, if @var{s1} and @var{s2} contain no digits, 3098b9484cSchristos @code{strverscmp} behaves like @code{strcmp}. 3198b9484cSchristos 3298b9484cSchristos Basically, we compare strings normally (character by character), until 3398b9484cSchristos we find a digit in each string - then we enter a special comparison 3498b9484cSchristos mode, where each sequence of digits is taken as a whole. If we reach the 3598b9484cSchristos end of these two parts without noticing a difference, we return to the 3698b9484cSchristos standard comparison mode. There are two types of numeric parts: 3798b9484cSchristos "integral" and "fractional" (those begin with a '0'). The types 3898b9484cSchristos of the numeric parts affect the way we sort them: 3998b9484cSchristos 4098b9484cSchristos @itemize @bullet 4198b9484cSchristos @item 4298b9484cSchristos integral/integral: we compare values as you would expect. 4398b9484cSchristos 4498b9484cSchristos @item 4598b9484cSchristos fractional/integral: the fractional part is less than the integral one. 4698b9484cSchristos Again, no surprise. 4798b9484cSchristos 4898b9484cSchristos @item 4998b9484cSchristos fractional/fractional: the things become a bit more complex. 5098b9484cSchristos If the common prefix contains only leading zeroes, the longest part is less 5198b9484cSchristos than the other one; else the comparison behaves normally. 5298b9484cSchristos @end itemize 5398b9484cSchristos 5498b9484cSchristos @smallexample 5598b9484cSchristos strverscmp ("no digit", "no digit") 5698b9484cSchristos @result{} 0 // @r{same behavior as strcmp.} 5798b9484cSchristos strverscmp ("item#99", "item#100") 5898b9484cSchristos @result{} <0 // @r{same prefix, but 99 < 100.} 5998b9484cSchristos strverscmp ("alpha1", "alpha001") 6098b9484cSchristos @result{} >0 // @r{fractional part inferior to integral one.} 6198b9484cSchristos strverscmp ("part1_f012", "part1_f01") 6298b9484cSchristos @result{} >0 // @r{two fractional parts.} 6398b9484cSchristos strverscmp ("foo.009", "foo.0") 6498b9484cSchristos @result{} <0 // @r{idem, but with leading zeroes only.} 6598b9484cSchristos @end smallexample 6698b9484cSchristos 6798b9484cSchristos This function is especially useful when dealing with filename sorting, 6898b9484cSchristos because filenames frequently hold indices/version numbers. 6998b9484cSchristos @end deftypefun 7098b9484cSchristos 7198b9484cSchristos */ 7298b9484cSchristos 7398b9484cSchristos /* states: S_N: normal, S_I: comparing integral part, S_F: comparing 7498b9484cSchristos fractional parts, S_Z: idem but with leading Zeroes only */ 7598b9484cSchristos #define S_N 0x0 7698b9484cSchristos #define S_I 0x4 7798b9484cSchristos #define S_F 0x8 7898b9484cSchristos #define S_Z 0xC 7998b9484cSchristos 8098b9484cSchristos /* result_type: CMP: return diff; LEN: compare using len_diff/diff */ 8198b9484cSchristos #define CMP 2 8298b9484cSchristos #define LEN 3 8398b9484cSchristos 8498b9484cSchristos 8598b9484cSchristos /* Compare S1 and S2 as strings holding indices/version numbers, 8698b9484cSchristos returning less than, equal to or greater than zero if S1 is less than, 8798b9484cSchristos equal to or greater than S2 (for more info, see the Glibc texinfo doc). */ 8898b9484cSchristos 8998b9484cSchristos int 9098b9484cSchristos strverscmp (const char *s1, const char *s2) 9198b9484cSchristos { 9298b9484cSchristos const unsigned char *p1 = (const unsigned char *) s1; 9398b9484cSchristos const unsigned char *p2 = (const unsigned char *) s2; 9498b9484cSchristos unsigned char c1, c2; 9598b9484cSchristos int state; 9698b9484cSchristos int diff; 9798b9484cSchristos 9898b9484cSchristos /* Symbol(s) 0 [1-9] others (padding) 9998b9484cSchristos Transition (10) 0 (01) d (00) x (11) - */ 10098b9484cSchristos static const unsigned int next_state[] = 10198b9484cSchristos { 10298b9484cSchristos /* state x d 0 - */ 10398b9484cSchristos /* S_N */ S_N, S_I, S_Z, S_N, 10498b9484cSchristos /* S_I */ S_N, S_I, S_I, S_I, 10598b9484cSchristos /* S_F */ S_N, S_F, S_F, S_F, 10698b9484cSchristos /* S_Z */ S_N, S_F, S_Z, S_Z 10798b9484cSchristos }; 10898b9484cSchristos 10998b9484cSchristos static const int result_type[] = 11098b9484cSchristos { 11198b9484cSchristos /* state x/x x/d x/0 x/- d/x d/d d/0 d/- 11298b9484cSchristos 0/x 0/d 0/0 0/- -/x -/d -/0 -/- */ 11398b9484cSchristos 11498b9484cSchristos /* S_N */ CMP, CMP, CMP, CMP, CMP, LEN, CMP, CMP, 11598b9484cSchristos CMP, CMP, CMP, CMP, CMP, CMP, CMP, CMP, 11698b9484cSchristos /* S_I */ CMP, -1, -1, CMP, +1, LEN, LEN, CMP, 11798b9484cSchristos +1, LEN, LEN, CMP, CMP, CMP, CMP, CMP, 11898b9484cSchristos /* S_F */ CMP, CMP, CMP, CMP, CMP, LEN, CMP, CMP, 11998b9484cSchristos CMP, CMP, CMP, CMP, CMP, CMP, CMP, CMP, 12098b9484cSchristos /* S_Z */ CMP, +1, +1, CMP, -1, CMP, CMP, CMP, 12198b9484cSchristos -1, CMP, CMP, CMP 12298b9484cSchristos }; 12398b9484cSchristos 12498b9484cSchristos if (p1 == p2) 12598b9484cSchristos return 0; 12698b9484cSchristos 12798b9484cSchristos c1 = *p1++; 12898b9484cSchristos c2 = *p2++; 12998b9484cSchristos /* Hint: '0' is a digit too. */ 13098b9484cSchristos state = S_N | ((c1 == '0') + (ISDIGIT (c1) != 0)); 13198b9484cSchristos 13298b9484cSchristos while ((diff = c1 - c2) == 0 && c1 != '\0') 13398b9484cSchristos { 13498b9484cSchristos state = next_state[state]; 13598b9484cSchristos c1 = *p1++; 13698b9484cSchristos c2 = *p2++; 13798b9484cSchristos state |= (c1 == '0') + (ISDIGIT (c1) != 0); 13898b9484cSchristos } 13998b9484cSchristos 14098b9484cSchristos state = result_type[state << 2 | (((c2 == '0') + (ISDIGIT (c2) != 0)))]; 14198b9484cSchristos 14298b9484cSchristos switch (state) 14398b9484cSchristos { 14498b9484cSchristos case CMP: 14598b9484cSchristos return diff; 14698b9484cSchristos 14798b9484cSchristos case LEN: 14898b9484cSchristos while (ISDIGIT (*p1++)) 14998b9484cSchristos if (!ISDIGIT (*p2++)) 15098b9484cSchristos return 1; 15198b9484cSchristos 15298b9484cSchristos return ISDIGIT (*p2) ? -1 : diff; 15398b9484cSchristos 15498b9484cSchristos default: 15598b9484cSchristos return state; 15698b9484cSchristos } 15798b9484cSchristos } 158