116dce513Schristos /* Compare strings while treating digits characters numerically.
2*e992f068Schristos Copyright (C) 1997-2022 Free Software Foundation, Inc.
316dce513Schristos This file is part of the libiberty library.
4*e992f068Schristos Contributed by Jean-François Bignolles <bignolle@ecoledoc.ibp.fr>, 1997.
516dce513Schristos
616dce513Schristos Libiberty is free software; you can redistribute it and/or
716dce513Schristos modify it under the terms of the GNU Lesser General Public
816dce513Schristos License as published by the Free Software Foundation; either
916dce513Schristos version 2.1 of the License, or (at your option) any later version.
1016dce513Schristos
1116dce513Schristos Libiberty is distributed in the hope that it will be useful,
1216dce513Schristos but WITHOUT ANY WARRANTY; without even the implied warranty of
1316dce513Schristos MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1416dce513Schristos Lesser General Public License for more details.
1516dce513Schristos
1616dce513Schristos You should have received a copy of the GNU Lesser General Public
1716dce513Schristos License along with the GNU C Library; if not, write to the Free
1816dce513Schristos Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
1916dce513Schristos 02110-1301 USA. */
2016dce513Schristos
2116dce513Schristos #include "libiberty.h"
2216dce513Schristos #include "safe-ctype.h"
2316dce513Schristos
2416dce513Schristos /*
2516dce513Schristos @deftypefun int strverscmp (const char *@var{s1}, const char *@var{s2})
2616dce513Schristos The @code{strverscmp} function compares the string @var{s1} against
2716dce513Schristos @var{s2}, considering them as holding indices/version numbers. Return
2816dce513Schristos value follows the same conventions as found in the @code{strverscmp}
2916dce513Schristos function. In fact, if @var{s1} and @var{s2} contain no digits,
3016dce513Schristos @code{strverscmp} behaves like @code{strcmp}.
3116dce513Schristos
3216dce513Schristos Basically, we compare strings normally (character by character), until
3316dce513Schristos we find a digit in each string - then we enter a special comparison
3416dce513Schristos mode, where each sequence of digits is taken as a whole. If we reach the
3516dce513Schristos end of these two parts without noticing a difference, we return to the
3616dce513Schristos standard comparison mode. There are two types of numeric parts:
3716dce513Schristos "integral" and "fractional" (those begin with a '0'). The types
3816dce513Schristos of the numeric parts affect the way we sort them:
3916dce513Schristos
4016dce513Schristos @itemize @bullet
4116dce513Schristos @item
4216dce513Schristos integral/integral: we compare values as you would expect.
4316dce513Schristos
4416dce513Schristos @item
4516dce513Schristos fractional/integral: the fractional part is less than the integral one.
4616dce513Schristos Again, no surprise.
4716dce513Schristos
4816dce513Schristos @item
4916dce513Schristos fractional/fractional: the things become a bit more complex.
5016dce513Schristos If the common prefix contains only leading zeroes, the longest part is less
5116dce513Schristos than the other one; else the comparison behaves normally.
5216dce513Schristos @end itemize
5316dce513Schristos
5416dce513Schristos @smallexample
5516dce513Schristos strverscmp ("no digit", "no digit")
5616dce513Schristos @result{} 0 // @r{same behavior as strcmp.}
5716dce513Schristos strverscmp ("item#99", "item#100")
5816dce513Schristos @result{} <0 // @r{same prefix, but 99 < 100.}
5916dce513Schristos strverscmp ("alpha1", "alpha001")
6016dce513Schristos @result{} >0 // @r{fractional part inferior to integral one.}
6116dce513Schristos strverscmp ("part1_f012", "part1_f01")
6216dce513Schristos @result{} >0 // @r{two fractional parts.}
6316dce513Schristos strverscmp ("foo.009", "foo.0")
6416dce513Schristos @result{} <0 // @r{idem, but with leading zeroes only.}
6516dce513Schristos @end smallexample
6616dce513Schristos
6716dce513Schristos This function is especially useful when dealing with filename sorting,
6816dce513Schristos because filenames frequently hold indices/version numbers.
6916dce513Schristos @end deftypefun
7016dce513Schristos
7116dce513Schristos */
7216dce513Schristos
7316dce513Schristos /* states: S_N: normal, S_I: comparing integral part, S_F: comparing
7416dce513Schristos fractional parts, S_Z: idem but with leading Zeroes only */
7516dce513Schristos #define S_N 0x0
7616dce513Schristos #define S_I 0x4
7716dce513Schristos #define S_F 0x8
7816dce513Schristos #define S_Z 0xC
7916dce513Schristos
8016dce513Schristos /* result_type: CMP: return diff; LEN: compare using len_diff/diff */
8116dce513Schristos #define CMP 2
8216dce513Schristos #define LEN 3
8316dce513Schristos
8416dce513Schristos
8516dce513Schristos /* Compare S1 and S2 as strings holding indices/version numbers,
8616dce513Schristos returning less than, equal to or greater than zero if S1 is less than,
8716dce513Schristos equal to or greater than S2 (for more info, see the Glibc texinfo doc). */
8816dce513Schristos
8916dce513Schristos int
strverscmp(const char * s1,const char * s2)9016dce513Schristos strverscmp (const char *s1, const char *s2)
9116dce513Schristos {
9216dce513Schristos const unsigned char *p1 = (const unsigned char *) s1;
9316dce513Schristos const unsigned char *p2 = (const unsigned char *) s2;
9416dce513Schristos unsigned char c1, c2;
9516dce513Schristos int state;
9616dce513Schristos int diff;
9716dce513Schristos
9816dce513Schristos /* Symbol(s) 0 [1-9] others (padding)
9916dce513Schristos Transition (10) 0 (01) d (00) x (11) - */
10016dce513Schristos static const unsigned int next_state[] =
10116dce513Schristos {
10216dce513Schristos /* state x d 0 - */
10316dce513Schristos /* S_N */ S_N, S_I, S_Z, S_N,
10416dce513Schristos /* S_I */ S_N, S_I, S_I, S_I,
10516dce513Schristos /* S_F */ S_N, S_F, S_F, S_F,
10616dce513Schristos /* S_Z */ S_N, S_F, S_Z, S_Z
10716dce513Schristos };
10816dce513Schristos
10916dce513Schristos static const int result_type[] =
11016dce513Schristos {
11116dce513Schristos /* state x/x x/d x/0 x/- d/x d/d d/0 d/-
11216dce513Schristos 0/x 0/d 0/0 0/- -/x -/d -/0 -/- */
11316dce513Schristos
11416dce513Schristos /* S_N */ CMP, CMP, CMP, CMP, CMP, LEN, CMP, CMP,
11516dce513Schristos CMP, CMP, CMP, CMP, CMP, CMP, CMP, CMP,
11616dce513Schristos /* S_I */ CMP, -1, -1, CMP, +1, LEN, LEN, CMP,
11716dce513Schristos +1, LEN, LEN, CMP, CMP, CMP, CMP, CMP,
11816dce513Schristos /* S_F */ CMP, CMP, CMP, CMP, CMP, LEN, CMP, CMP,
11916dce513Schristos CMP, CMP, CMP, CMP, CMP, CMP, CMP, CMP,
12016dce513Schristos /* S_Z */ CMP, +1, +1, CMP, -1, CMP, CMP, CMP,
12116dce513Schristos -1, CMP, CMP, CMP
12216dce513Schristos };
12316dce513Schristos
12416dce513Schristos if (p1 == p2)
12516dce513Schristos return 0;
12616dce513Schristos
12716dce513Schristos c1 = *p1++;
12816dce513Schristos c2 = *p2++;
12916dce513Schristos /* Hint: '0' is a digit too. */
13016dce513Schristos state = S_N | ((c1 == '0') + (ISDIGIT (c1) != 0));
13116dce513Schristos
13216dce513Schristos while ((diff = c1 - c2) == 0 && c1 != '\0')
13316dce513Schristos {
13416dce513Schristos state = next_state[state];
13516dce513Schristos c1 = *p1++;
13616dce513Schristos c2 = *p2++;
13716dce513Schristos state |= (c1 == '0') + (ISDIGIT (c1) != 0);
13816dce513Schristos }
13916dce513Schristos
14016dce513Schristos state = result_type[state << 2 | (((c2 == '0') + (ISDIGIT (c2) != 0)))];
14116dce513Schristos
14216dce513Schristos switch (state)
14316dce513Schristos {
14416dce513Schristos case CMP:
14516dce513Schristos return diff;
14616dce513Schristos
14716dce513Schristos case LEN:
14816dce513Schristos while (ISDIGIT (*p1++))
14916dce513Schristos if (!ISDIGIT (*p2++))
15016dce513Schristos return 1;
15116dce513Schristos
15216dce513Schristos return ISDIGIT (*p2) ? -1 : diff;
15316dce513Schristos
15416dce513Schristos default:
15516dce513Schristos return state;
15616dce513Schristos }
15716dce513Schristos }
158