xref: /netbsd-src/external/gpl3/gdb/dist/libiberty/strverscmp.c (revision 7e120ff03ede3fe64e2c8620c01465d528502ddb)
198b9484cSchristos /* Compare strings while treating digits characters numerically.
2*7e120ff0Schristos    Copyright (C) 1997-2024 Free Software Foundation, Inc.
398b9484cSchristos    This file is part of the libiberty library.
44b169a6bSchristos    Contributed by Jean-François Bignolles <bignolle@ecoledoc.ibp.fr>, 1997.
598b9484cSchristos 
698b9484cSchristos    Libiberty is free software; you can redistribute it and/or
798b9484cSchristos    modify it under the terms of the GNU Lesser General Public
898b9484cSchristos    License as published by the Free Software Foundation; either
998b9484cSchristos    version 2.1 of the License, or (at your option) any later version.
1098b9484cSchristos 
1198b9484cSchristos    Libiberty is distributed in the hope that it will be useful,
1298b9484cSchristos    but WITHOUT ANY WARRANTY; without even the implied warranty of
1398b9484cSchristos    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
1498b9484cSchristos    Lesser General Public License for more details.
1598b9484cSchristos 
1698b9484cSchristos    You should have received a copy of the GNU Lesser General Public
1798b9484cSchristos    License along with the GNU C Library; if not, write to the Free
1898b9484cSchristos    Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
1998b9484cSchristos    02110-1301 USA.  */
2098b9484cSchristos 
2198b9484cSchristos #include "libiberty.h"
2298b9484cSchristos #include "safe-ctype.h"
2398b9484cSchristos 
2498b9484cSchristos /*
2598b9484cSchristos @deftypefun int strverscmp (const char *@var{s1}, const char *@var{s2})
2698b9484cSchristos The @code{strverscmp} function compares the string @var{s1} against
2798b9484cSchristos @var{s2}, considering them as holding indices/version numbers.  Return
2898b9484cSchristos value follows the same conventions as found in the @code{strverscmp}
2998b9484cSchristos function.  In fact, if @var{s1} and @var{s2} contain no digits,
3098b9484cSchristos @code{strverscmp} behaves like @code{strcmp}.
3198b9484cSchristos 
3298b9484cSchristos Basically, we compare strings normally (character by character), until
3398b9484cSchristos we find a digit in each string - then we enter a special comparison
3498b9484cSchristos mode, where each sequence of digits is taken as a whole.  If we reach the
3598b9484cSchristos end of these two parts without noticing a difference, we return to the
3698b9484cSchristos standard comparison mode.  There are two types of numeric parts:
3798b9484cSchristos "integral" and "fractional" (those  begin with a '0'). The types
3898b9484cSchristos of the numeric parts affect the way we sort them:
3998b9484cSchristos 
4098b9484cSchristos @itemize @bullet
4198b9484cSchristos @item
4298b9484cSchristos integral/integral: we compare values as you would expect.
4398b9484cSchristos 
4498b9484cSchristos @item
4598b9484cSchristos fractional/integral: the fractional part is less than the integral one.
4698b9484cSchristos Again, no surprise.
4798b9484cSchristos 
4898b9484cSchristos @item
4998b9484cSchristos fractional/fractional: the things become a bit more complex.
5098b9484cSchristos If the common prefix contains only leading zeroes, the longest part is less
5198b9484cSchristos than the other one; else the comparison behaves normally.
5298b9484cSchristos @end itemize
5398b9484cSchristos 
5498b9484cSchristos @smallexample
5598b9484cSchristos strverscmp ("no digit", "no digit")
5698b9484cSchristos     @result{} 0    // @r{same behavior as strcmp.}
5798b9484cSchristos strverscmp ("item#99", "item#100")
5898b9484cSchristos     @result{} <0   // @r{same prefix, but 99 < 100.}
5998b9484cSchristos strverscmp ("alpha1", "alpha001")
6098b9484cSchristos     @result{} >0   // @r{fractional part inferior to integral one.}
6198b9484cSchristos strverscmp ("part1_f012", "part1_f01")
6298b9484cSchristos     @result{} >0   // @r{two fractional parts.}
6398b9484cSchristos strverscmp ("foo.009", "foo.0")
6498b9484cSchristos     @result{} <0   // @r{idem, but with leading zeroes only.}
6598b9484cSchristos @end smallexample
6698b9484cSchristos 
6798b9484cSchristos This function is especially useful when dealing with filename sorting,
6898b9484cSchristos because filenames frequently hold indices/version numbers.
6998b9484cSchristos @end deftypefun
7098b9484cSchristos 
7198b9484cSchristos */
7298b9484cSchristos 
7398b9484cSchristos /* states: S_N: normal, S_I: comparing integral part, S_F: comparing
7498b9484cSchristos            fractional parts, S_Z: idem but with leading Zeroes only */
7598b9484cSchristos #define  S_N    0x0
7698b9484cSchristos #define  S_I    0x4
7798b9484cSchristos #define  S_F    0x8
7898b9484cSchristos #define  S_Z    0xC
7998b9484cSchristos 
8098b9484cSchristos /* result_type: CMP: return diff; LEN: compare using len_diff/diff */
8198b9484cSchristos #define  CMP    2
8298b9484cSchristos #define  LEN    3
8398b9484cSchristos 
8498b9484cSchristos 
8598b9484cSchristos /* Compare S1 and S2 as strings holding indices/version numbers,
8698b9484cSchristos    returning less than, equal to or greater than zero if S1 is less than,
8798b9484cSchristos    equal to or greater than S2 (for more info, see the Glibc texinfo doc).  */
8898b9484cSchristos 
8998b9484cSchristos int
9098b9484cSchristos strverscmp (const char *s1, const char *s2)
9198b9484cSchristos {
9298b9484cSchristos   const unsigned char *p1 = (const unsigned char *) s1;
9398b9484cSchristos   const unsigned char *p2 = (const unsigned char *) s2;
9498b9484cSchristos   unsigned char c1, c2;
9598b9484cSchristos   int state;
9698b9484cSchristos   int diff;
9798b9484cSchristos 
9898b9484cSchristos   /* Symbol(s)    0       [1-9]   others  (padding)
9998b9484cSchristos      Transition   (10) 0  (01) d  (00) x  (11) -   */
10098b9484cSchristos   static const unsigned int next_state[] =
10198b9484cSchristos     {
10298b9484cSchristos       /* state    x    d    0    - */
10398b9484cSchristos       /* S_N */  S_N, S_I, S_Z, S_N,
10498b9484cSchristos       /* S_I */  S_N, S_I, S_I, S_I,
10598b9484cSchristos       /* S_F */  S_N, S_F, S_F, S_F,
10698b9484cSchristos       /* S_Z */  S_N, S_F, S_Z, S_Z
10798b9484cSchristos     };
10898b9484cSchristos 
10998b9484cSchristos   static const int result_type[] =
11098b9484cSchristos     {
11198b9484cSchristos       /* state   x/x  x/d  x/0  x/-  d/x  d/d  d/0  d/-
11298b9484cSchristos                  0/x  0/d  0/0  0/-  -/x  -/d  -/0  -/- */
11398b9484cSchristos 
11498b9484cSchristos       /* S_N */  CMP, CMP, CMP, CMP, CMP, LEN, CMP, CMP,
11598b9484cSchristos                  CMP, CMP, CMP, CMP, CMP, CMP, CMP, CMP,
11698b9484cSchristos       /* S_I */  CMP, -1,  -1,  CMP, +1,  LEN, LEN, CMP,
11798b9484cSchristos                  +1,  LEN, LEN, CMP, CMP, CMP, CMP, CMP,
11898b9484cSchristos       /* S_F */  CMP, CMP, CMP, CMP, CMP, LEN, CMP, CMP,
11998b9484cSchristos                  CMP, CMP, CMP, CMP, CMP, CMP, CMP, CMP,
12098b9484cSchristos       /* S_Z */  CMP, +1,  +1,  CMP, -1,  CMP, CMP, CMP,
12198b9484cSchristos                  -1,  CMP, CMP, CMP
12298b9484cSchristos     };
12398b9484cSchristos 
12498b9484cSchristos   if (p1 == p2)
12598b9484cSchristos     return 0;
12698b9484cSchristos 
12798b9484cSchristos   c1 = *p1++;
12898b9484cSchristos   c2 = *p2++;
12998b9484cSchristos   /* Hint: '0' is a digit too.  */
13098b9484cSchristos   state = S_N | ((c1 == '0') + (ISDIGIT (c1) != 0));
13198b9484cSchristos 
13298b9484cSchristos   while ((diff = c1 - c2) == 0 && c1 != '\0')
13398b9484cSchristos     {
13498b9484cSchristos       state = next_state[state];
13598b9484cSchristos       c1 = *p1++;
13698b9484cSchristos       c2 = *p2++;
13798b9484cSchristos       state |= (c1 == '0') + (ISDIGIT (c1) != 0);
13898b9484cSchristos     }
13998b9484cSchristos 
14098b9484cSchristos   state = result_type[state << 2 | (((c2 == '0') + (ISDIGIT (c2) != 0)))];
14198b9484cSchristos 
14298b9484cSchristos   switch (state)
14398b9484cSchristos     {
14498b9484cSchristos     case CMP:
14598b9484cSchristos       return diff;
14698b9484cSchristos 
14798b9484cSchristos     case LEN:
14898b9484cSchristos       while (ISDIGIT (*p1++))
14998b9484cSchristos 	if (!ISDIGIT (*p2++))
15098b9484cSchristos 	  return 1;
15198b9484cSchristos 
15298b9484cSchristos       return ISDIGIT (*p2) ? -1 : diff;
15398b9484cSchristos 
15498b9484cSchristos     default:
15598b9484cSchristos       return state;
15698b9484cSchristos     }
15798b9484cSchristos }
158