1*5bf6c2a6Smillert /* $OpenBSD: vsort.c,v 1.2 2015/04/01 21:46:38 millert Exp $ */
279428148Smillert
379428148Smillert /*-
479428148Smillert * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
579428148Smillert * Copyright (C) 2012 Gabor Kovesdan <gabor@FreeBSD.org>
679428148Smillert * All rights reserved.
779428148Smillert *
879428148Smillert * Redistribution and use in source and binary forms, with or without
979428148Smillert * modification, are permitted provided that the following conditions
1079428148Smillert * are met:
1179428148Smillert * 1. Redistributions of source code must retain the above copyright
1279428148Smillert * notice, this list of conditions and the following disclaimer.
1379428148Smillert * 2. Redistributions in binary form must reproduce the above copyright
1479428148Smillert * notice, this list of conditions and the following disclaimer in the
1579428148Smillert * documentation and/or other materials provided with the distribution.
1679428148Smillert *
1779428148Smillert * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1879428148Smillert * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1979428148Smillert * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2079428148Smillert * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
2179428148Smillert * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2279428148Smillert * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2379428148Smillert * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2479428148Smillert * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2579428148Smillert * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2679428148Smillert * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2779428148Smillert * SUCH DAMAGE.
2879428148Smillert */
2979428148Smillert
3079428148Smillert #include <sys/types.h>
3179428148Smillert
3279428148Smillert #include <ctype.h>
3379428148Smillert #include <stdlib.h>
3479428148Smillert #include <string.h>
3579428148Smillert
3679428148Smillert #include "sort.h"
3779428148Smillert #include "vsort.h"
3879428148Smillert
3979428148Smillert static inline bool
isdigit_clocale(wchar_t c)4079428148Smillert isdigit_clocale(wchar_t c)
4179428148Smillert {
4279428148Smillert return (c >= L'0' && c <= L'9');
4379428148Smillert }
4479428148Smillert
4579428148Smillert static inline bool
isalpha_clocale(wchar_t c)4679428148Smillert isalpha_clocale(wchar_t c)
4779428148Smillert {
4879428148Smillert return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z'));
4979428148Smillert }
5079428148Smillert
5179428148Smillert static inline bool
isalnum_clocale(wchar_t c)5279428148Smillert isalnum_clocale(wchar_t c)
5379428148Smillert {
5479428148Smillert return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z') ||
5579428148Smillert (c >= L'0' && c <= L'9'));
5679428148Smillert }
5779428148Smillert
5879428148Smillert /*
5979428148Smillert * Find string suffix of format: (\.[A-Za-z~][A-Za-z0-9~]*)*$
6079428148Smillert * Set length of string before suffix.
6179428148Smillert */
6279428148Smillert static void
find_suffix(bwstring_iterator si,bwstring_iterator se,size_t * len)6379428148Smillert find_suffix(bwstring_iterator si, bwstring_iterator se, size_t *len)
6479428148Smillert {
6579428148Smillert wchar_t c;
6679428148Smillert size_t clen;
6779428148Smillert bool expect_alpha, sfx;
6879428148Smillert
6979428148Smillert sfx = false;
7079428148Smillert expect_alpha = false;
7179428148Smillert *len = 0;
7279428148Smillert clen = 0;
7379428148Smillert
7479428148Smillert while ((si < se) && (c = bws_get_iter_value(si))) {
7579428148Smillert if (expect_alpha) {
7679428148Smillert expect_alpha = false;
7779428148Smillert if (!isalpha_clocale(c) && (c != L'~'))
7879428148Smillert sfx = false;
7979428148Smillert } else if (c == L'.') {
8079428148Smillert expect_alpha = true;
8179428148Smillert if (!sfx) {
8279428148Smillert sfx = true;
8379428148Smillert *len = clen;
8479428148Smillert }
8579428148Smillert } else if (!isalnum_clocale(c) && (c != L'~'))
8679428148Smillert sfx = false;
8779428148Smillert
8879428148Smillert si = bws_iterator_inc(si, 1);
8979428148Smillert ++clen;
9079428148Smillert }
9179428148Smillert
9279428148Smillert /* This code must be here to make the implementation compatible
9379428148Smillert * with WORDING of GNU sort documentation.
9479428148Smillert * But the GNU sort implementation is not following its own
9579428148Smillert * documentation. GNU sort allows empty file extensions
9679428148Smillert * (just dot with nothing after); but the regular expression in
9779428148Smillert * their documentation does not allow empty file extensions.
9879428148Smillert * We chose to make our implementation compatible with GNU sort
9979428148Smillert * implementation. If they will ever fix their bug, this code
10079428148Smillert * must be uncommented. Or they may choose to fix the info page,
10179428148Smillert * then the code stays commented.
10279428148Smillert *
10379428148Smillert if (expect_alpha)
10479428148Smillert sfx = false;
10579428148Smillert */
10679428148Smillert
10779428148Smillert if (!sfx)
10879428148Smillert *len = clen;
10979428148Smillert }
11079428148Smillert
11179428148Smillert static inline int
cmp_chars(wchar_t c1,wchar_t c2)11279428148Smillert cmp_chars(wchar_t c1, wchar_t c2)
11379428148Smillert {
11479428148Smillert if (c1 == c2)
11579428148Smillert return 0;
11679428148Smillert
11779428148Smillert if (c1 == L'~')
11879428148Smillert return -1;
11979428148Smillert if (c2 == L'~')
12079428148Smillert return 1;
12179428148Smillert
12279428148Smillert if (isdigit_clocale(c1) || !c1)
12379428148Smillert return (isdigit_clocale(c2) || !c2) ? 0 : -1;
12479428148Smillert
12579428148Smillert if (isdigit_clocale(c2) || !c2)
12679428148Smillert return 1;
12779428148Smillert
12879428148Smillert if (isalpha_clocale(c1))
12979428148Smillert return isalpha_clocale(c2) ? (int)c1 - (int)c2 : -1;
13079428148Smillert
13179428148Smillert if (isalpha_clocale(c2))
13279428148Smillert return 1;
13379428148Smillert
13479428148Smillert return (int)c1 - (int)c2;
13579428148Smillert }
13679428148Smillert
13779428148Smillert static int
cmpversions(bwstring_iterator si1,bwstring_iterator se1,bwstring_iterator si2,bwstring_iterator se2)13879428148Smillert cmpversions(bwstring_iterator si1, bwstring_iterator se1,
13979428148Smillert bwstring_iterator si2, bwstring_iterator se2)
14079428148Smillert {
14179428148Smillert int cmp, diff;
14279428148Smillert
14379428148Smillert while ((si1 < se1) || (si2 < se2)) {
14479428148Smillert diff = 0;
14579428148Smillert
14679428148Smillert while (((si1 < se1) &&
14779428148Smillert !isdigit_clocale(bws_get_iter_value(si1))) ||
14879428148Smillert ((si2 < se2) && !isdigit_clocale(bws_get_iter_value(si2)))) {
14979428148Smillert wchar_t c1, c2;
15079428148Smillert
15179428148Smillert c1 = (si1 < se1) ? bws_get_iter_value(si1) : 0;
15279428148Smillert c2 = (si2 < se2) ? bws_get_iter_value(si2) : 0;
15379428148Smillert
15479428148Smillert cmp = cmp_chars(c1, c2);
15579428148Smillert if (cmp)
15679428148Smillert return cmp;
15779428148Smillert
15879428148Smillert if (si1 < se1)
15979428148Smillert si1 = bws_iterator_inc(si1, 1);
16079428148Smillert if (si2 < se2)
16179428148Smillert si2 = bws_iterator_inc(si2, 1);
16279428148Smillert }
16379428148Smillert
16479428148Smillert while (bws_get_iter_value(si1) == L'0')
16579428148Smillert si1 = bws_iterator_inc(si1, 1);
16679428148Smillert
16779428148Smillert while (bws_get_iter_value(si2) == L'0')
16879428148Smillert si2 = bws_iterator_inc(si2, 1);
16979428148Smillert
17079428148Smillert while (isdigit_clocale(bws_get_iter_value(si1)) &&
17179428148Smillert isdigit_clocale(bws_get_iter_value(si2))) {
17279428148Smillert if (!diff)
17379428148Smillert diff = ((int)bws_get_iter_value(si1) -
17479428148Smillert (int)bws_get_iter_value(si2));
17579428148Smillert si1 = bws_iterator_inc(si1, 1);
17679428148Smillert si2 = bws_iterator_inc(si2, 1);
17779428148Smillert }
17879428148Smillert
17979428148Smillert if (isdigit_clocale(bws_get_iter_value(si1)))
18079428148Smillert return 1;
18179428148Smillert
18279428148Smillert if (isdigit_clocale(bws_get_iter_value(si2)))
18379428148Smillert return -1;
18479428148Smillert
18579428148Smillert if (diff)
18679428148Smillert return diff;
18779428148Smillert }
18879428148Smillert
18979428148Smillert return 0;
19079428148Smillert }
19179428148Smillert
19279428148Smillert /*
19379428148Smillert * Compare two version strings
19479428148Smillert */
19579428148Smillert int
vcmp(struct bwstring * s1,struct bwstring * s2)19679428148Smillert vcmp(struct bwstring *s1, struct bwstring *s2)
19779428148Smillert {
19879428148Smillert bwstring_iterator si1, si2;
19979428148Smillert wchar_t c1, c2;
20079428148Smillert size_t len1, len2, slen1, slen2;
20179428148Smillert int cmp_bytes, cmp_res;
20279428148Smillert
20379428148Smillert if (s1 == s2)
20479428148Smillert return 0;
20579428148Smillert
20679428148Smillert cmp_bytes = bwscmp(s1, s2, 0);
20779428148Smillert if (cmp_bytes == 0)
20879428148Smillert return 0;
20979428148Smillert
21079428148Smillert len1 = slen1 = BWSLEN(s1);
21179428148Smillert len2 = slen2 = BWSLEN(s2);
21279428148Smillert
21379428148Smillert if (slen1 < 1)
21479428148Smillert return -1;
21579428148Smillert if (slen2 < 1)
21679428148Smillert return 1;
21779428148Smillert
21879428148Smillert si1 = bws_begin(s1);
21979428148Smillert si2 = bws_begin(s2);
22079428148Smillert
22179428148Smillert c1 = bws_get_iter_value(si1);
22279428148Smillert c2 = bws_get_iter_value(si2);
22379428148Smillert
22479428148Smillert if (c1 == L'.' && (slen1 == 1))
22579428148Smillert return -1;
22679428148Smillert
22779428148Smillert if (c2 == L'.' && (slen2 == 1))
22879428148Smillert return 1;
22979428148Smillert
23079428148Smillert if (slen1 == 2 && c1 == L'.' &&
23179428148Smillert bws_get_iter_value(bws_iterator_inc(si1, 1)) == L'.')
23279428148Smillert return -1;
23379428148Smillert if (slen2 == 2 && c2 == L'.' &&
23479428148Smillert bws_get_iter_value(bws_iterator_inc(si2, 1)) == L'.')
23579428148Smillert return 1;
23679428148Smillert
23779428148Smillert if (c1 == L'.' && c2 != L'.')
23879428148Smillert return -1;
23979428148Smillert if (c1 != L'.' && c2 == L'.')
24079428148Smillert return 1;
24179428148Smillert
24279428148Smillert if (c1 == L'.' && c2 == L'.') {
24379428148Smillert si1 = bws_iterator_inc(si1, 1);
24479428148Smillert si2 = bws_iterator_inc(si2, 1);
24579428148Smillert }
24679428148Smillert
24779428148Smillert find_suffix(si1, bws_end(s1), &len1);
24879428148Smillert find_suffix(si2, bws_end(s2), &len2);
24979428148Smillert
25079428148Smillert if ((len1 == len2) && (bws_iterator_cmp(si1, si2, len1) == 0))
25179428148Smillert return cmp_bytes;
25279428148Smillert
25379428148Smillert cmp_res = cmpversions(si1, bws_iterator_inc(si1, len1), si2,
25479428148Smillert bws_iterator_inc(si2, len2));
25579428148Smillert
25679428148Smillert if (cmp_res == 0)
25779428148Smillert cmp_res = cmp_bytes;
25879428148Smillert
25979428148Smillert return cmp_res;
26079428148Smillert }
261