1*50fc853eSJohn Marino /*-
2*50fc853eSJohn Marino * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
3*50fc853eSJohn Marino * Copyright (C) 2012 Gabor Kovesdan <gabor@FreeBSD.org>
4*50fc853eSJohn Marino * All rights reserved.
5*50fc853eSJohn Marino *
6*50fc853eSJohn Marino * Redistribution and use in source and binary forms, with or without
7*50fc853eSJohn Marino * modification, are permitted provided that the following conditions
8*50fc853eSJohn Marino * are met:
9*50fc853eSJohn Marino * 1. Redistributions of source code must retain the above copyright
10*50fc853eSJohn Marino * notice, this list of conditions and the following disclaimer.
11*50fc853eSJohn Marino * 2. Redistributions in binary form must reproduce the above copyright
12*50fc853eSJohn Marino * notice, this list of conditions and the following disclaimer in the
13*50fc853eSJohn Marino * documentation and/or other materials provided with the distribution.
14*50fc853eSJohn Marino *
15*50fc853eSJohn Marino * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16*50fc853eSJohn Marino * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17*50fc853eSJohn Marino * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18*50fc853eSJohn Marino * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19*50fc853eSJohn Marino * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20*50fc853eSJohn Marino * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21*50fc853eSJohn Marino * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22*50fc853eSJohn Marino * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23*50fc853eSJohn Marino * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24*50fc853eSJohn Marino * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25*50fc853eSJohn Marino * SUCH DAMAGE.
26*50fc853eSJohn Marino *
27*50fc853eSJohn Marino * $FreeBSD: head/usr.bin/sort/vsort.c 281132 2015-04-06 02:35:55Z pfg $
28*50fc853eSJohn Marino */
29*50fc853eSJohn Marino
30*50fc853eSJohn Marino
31*50fc853eSJohn Marino #include <sys/types.h>
32*50fc853eSJohn Marino
33*50fc853eSJohn Marino #include <ctype.h>
34*50fc853eSJohn Marino #include <stdlib.h>
35*50fc853eSJohn Marino #include <string.h>
36*50fc853eSJohn Marino
37*50fc853eSJohn Marino #include "sort.h"
38*50fc853eSJohn Marino #include "vsort.h"
39*50fc853eSJohn Marino
40*50fc853eSJohn Marino static inline bool
isdigit_clocale(wchar_t c)41*50fc853eSJohn Marino isdigit_clocale(wchar_t c)
42*50fc853eSJohn Marino {
43*50fc853eSJohn Marino
44*50fc853eSJohn Marino return (c >= L'0' && c <= L'9');
45*50fc853eSJohn Marino }
46*50fc853eSJohn Marino
47*50fc853eSJohn Marino static inline bool
isalpha_clocale(wchar_t c)48*50fc853eSJohn Marino isalpha_clocale(wchar_t c)
49*50fc853eSJohn Marino {
50*50fc853eSJohn Marino
51*50fc853eSJohn Marino return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z'));
52*50fc853eSJohn Marino }
53*50fc853eSJohn Marino
54*50fc853eSJohn Marino static inline bool
isalnum_clocale(wchar_t c)55*50fc853eSJohn Marino isalnum_clocale(wchar_t c)
56*50fc853eSJohn Marino {
57*50fc853eSJohn Marino
58*50fc853eSJohn Marino return ((c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z') ||
59*50fc853eSJohn Marino (c >= L'0' && c <= L'9'));
60*50fc853eSJohn Marino }
61*50fc853eSJohn Marino
62*50fc853eSJohn Marino /*
63*50fc853eSJohn Marino * Find string suffix of format: (\.[A-Za-z~][A-Za-z0-9~]*)*$
64*50fc853eSJohn Marino * Set length of string before suffix.
65*50fc853eSJohn Marino */
66*50fc853eSJohn Marino static void
find_suffix(bwstring_iterator si,bwstring_iterator se,size_t * len)67*50fc853eSJohn Marino find_suffix(bwstring_iterator si, bwstring_iterator se, size_t *len)
68*50fc853eSJohn Marino {
69*50fc853eSJohn Marino wchar_t c;
70*50fc853eSJohn Marino size_t clen;
71*50fc853eSJohn Marino bool expect_alpha, sfx;
72*50fc853eSJohn Marino
73*50fc853eSJohn Marino sfx = false;
74*50fc853eSJohn Marino expect_alpha = false;
75*50fc853eSJohn Marino *len = 0;
76*50fc853eSJohn Marino clen = 0;
77*50fc853eSJohn Marino
78*50fc853eSJohn Marino while ((si < se) && (c = bws_get_iter_value(si))) {
79*50fc853eSJohn Marino if (expect_alpha) {
80*50fc853eSJohn Marino expect_alpha = false;
81*50fc853eSJohn Marino if (!isalpha_clocale(c) && (c != L'~'))
82*50fc853eSJohn Marino sfx = false;
83*50fc853eSJohn Marino } else if (c == L'.') {
84*50fc853eSJohn Marino expect_alpha = true;
85*50fc853eSJohn Marino if (!sfx) {
86*50fc853eSJohn Marino sfx = true;
87*50fc853eSJohn Marino *len = clen;
88*50fc853eSJohn Marino }
89*50fc853eSJohn Marino } else if (!isalnum_clocale(c) && (c != L'~'))
90*50fc853eSJohn Marino sfx = false;
91*50fc853eSJohn Marino
92*50fc853eSJohn Marino si = bws_iterator_inc(si, 1);
93*50fc853eSJohn Marino ++clen;
94*50fc853eSJohn Marino }
95*50fc853eSJohn Marino
96*50fc853eSJohn Marino /* This code must be here to make the implementation compatible
97*50fc853eSJohn Marino * with WORDING of GNU sort documentation.
98*50fc853eSJohn Marino * But the GNU sort implementation is not following its own
99*50fc853eSJohn Marino * documentation. GNU sort allows empty file extensions
100*50fc853eSJohn Marino * (just dot with nothing after); but the regular expression in
101*50fc853eSJohn Marino * their documentation does not allow empty file extensions.
102*50fc853eSJohn Marino * We chose to make our implementation compatible with GNU sort
103*50fc853eSJohn Marino * implementation. If they will ever fix their bug, this code
104*50fc853eSJohn Marino * must be uncommented. Or they may choose to fix the info page,
105*50fc853eSJohn Marino * then the code stays commented.
106*50fc853eSJohn Marino *
107*50fc853eSJohn Marino if (expect_alpha)
108*50fc853eSJohn Marino sfx = false;
109*50fc853eSJohn Marino */
110*50fc853eSJohn Marino
111*50fc853eSJohn Marino if (!sfx)
112*50fc853eSJohn Marino *len = clen;
113*50fc853eSJohn Marino }
114*50fc853eSJohn Marino
115*50fc853eSJohn Marino static inline int
cmp_chars(wchar_t c1,wchar_t c2)116*50fc853eSJohn Marino cmp_chars(wchar_t c1, wchar_t c2)
117*50fc853eSJohn Marino {
118*50fc853eSJohn Marino
119*50fc853eSJohn Marino if (c1 == c2)
120*50fc853eSJohn Marino return (0);
121*50fc853eSJohn Marino
122*50fc853eSJohn Marino if (c1 == L'~')
123*50fc853eSJohn Marino return (-1);
124*50fc853eSJohn Marino if (c2 == L'~')
125*50fc853eSJohn Marino return (+1);
126*50fc853eSJohn Marino
127*50fc853eSJohn Marino if (isdigit_clocale(c1) || !c1)
128*50fc853eSJohn Marino return ((isdigit_clocale(c2) || !c2) ? 0 : -1);
129*50fc853eSJohn Marino
130*50fc853eSJohn Marino if (isdigit_clocale(c2) || !c2)
131*50fc853eSJohn Marino return (+1);
132*50fc853eSJohn Marino
133*50fc853eSJohn Marino if (isalpha_clocale(c1))
134*50fc853eSJohn Marino return ((isalpha_clocale(c2)) ? ((int) c1 - (int) c2) : -1);
135*50fc853eSJohn Marino
136*50fc853eSJohn Marino if (isalpha_clocale(c2))
137*50fc853eSJohn Marino return (+1);
138*50fc853eSJohn Marino
139*50fc853eSJohn Marino return ((int) c1 - (int) c2);
140*50fc853eSJohn Marino }
141*50fc853eSJohn Marino
142*50fc853eSJohn Marino static int
cmpversions(bwstring_iterator si1,bwstring_iterator se1,bwstring_iterator si2,bwstring_iterator se2)143*50fc853eSJohn Marino cmpversions(bwstring_iterator si1, bwstring_iterator se1,
144*50fc853eSJohn Marino bwstring_iterator si2, bwstring_iterator se2)
145*50fc853eSJohn Marino {
146*50fc853eSJohn Marino int cmp, diff;
147*50fc853eSJohn Marino
148*50fc853eSJohn Marino while ((si1 < se1) || (si2 < se2)) {
149*50fc853eSJohn Marino diff = 0;
150*50fc853eSJohn Marino
151*50fc853eSJohn Marino while (((si1 < se1) &&
152*50fc853eSJohn Marino !isdigit_clocale(bws_get_iter_value(si1))) ||
153*50fc853eSJohn Marino ((si2 < se2) && !isdigit_clocale(bws_get_iter_value(si2)))) {
154*50fc853eSJohn Marino wchar_t c1, c2;
155*50fc853eSJohn Marino
156*50fc853eSJohn Marino c1 = (si1 < se1) ? bws_get_iter_value(si1) : 0;
157*50fc853eSJohn Marino c2 = (si2 < se2) ? bws_get_iter_value(si2) : 0;
158*50fc853eSJohn Marino
159*50fc853eSJohn Marino cmp = cmp_chars(c1, c2);
160*50fc853eSJohn Marino if (cmp)
161*50fc853eSJohn Marino return (cmp);
162*50fc853eSJohn Marino
163*50fc853eSJohn Marino if (si1 < se1)
164*50fc853eSJohn Marino si1 = bws_iterator_inc(si1, 1);
165*50fc853eSJohn Marino if (si2 < se2)
166*50fc853eSJohn Marino si2 = bws_iterator_inc(si2, 1);
167*50fc853eSJohn Marino }
168*50fc853eSJohn Marino
169*50fc853eSJohn Marino while (bws_get_iter_value(si1) == L'0')
170*50fc853eSJohn Marino si1 = bws_iterator_inc(si1, 1);
171*50fc853eSJohn Marino
172*50fc853eSJohn Marino while (bws_get_iter_value(si2) == L'0')
173*50fc853eSJohn Marino si2 = bws_iterator_inc(si2, 1);
174*50fc853eSJohn Marino
175*50fc853eSJohn Marino while (isdigit_clocale(bws_get_iter_value(si1)) &&
176*50fc853eSJohn Marino isdigit_clocale(bws_get_iter_value(si2))) {
177*50fc853eSJohn Marino if (!diff)
178*50fc853eSJohn Marino diff = ((int)bws_get_iter_value(si1) -
179*50fc853eSJohn Marino (int)bws_get_iter_value(si2));
180*50fc853eSJohn Marino si1 = bws_iterator_inc(si1, 1);
181*50fc853eSJohn Marino si2 = bws_iterator_inc(si2, 1);
182*50fc853eSJohn Marino }
183*50fc853eSJohn Marino
184*50fc853eSJohn Marino if (isdigit_clocale(bws_get_iter_value(si1)))
185*50fc853eSJohn Marino return (1);
186*50fc853eSJohn Marino
187*50fc853eSJohn Marino if (isdigit_clocale(bws_get_iter_value(si2)))
188*50fc853eSJohn Marino return (-1);
189*50fc853eSJohn Marino
190*50fc853eSJohn Marino if (diff)
191*50fc853eSJohn Marino return (diff);
192*50fc853eSJohn Marino }
193*50fc853eSJohn Marino
194*50fc853eSJohn Marino return (0);
195*50fc853eSJohn Marino }
196*50fc853eSJohn Marino
197*50fc853eSJohn Marino /*
198*50fc853eSJohn Marino * Compare two version strings
199*50fc853eSJohn Marino */
200*50fc853eSJohn Marino int
vcmp(struct bwstring * s1,struct bwstring * s2)201*50fc853eSJohn Marino vcmp(struct bwstring *s1, struct bwstring *s2)
202*50fc853eSJohn Marino {
203*50fc853eSJohn Marino bwstring_iterator si1, si2;
204*50fc853eSJohn Marino wchar_t c1, c2;
205*50fc853eSJohn Marino size_t len1, len2, slen1, slen2;
206*50fc853eSJohn Marino int cmp_bytes, cmp_res;
207*50fc853eSJohn Marino
208*50fc853eSJohn Marino if (s1 == s2)
209*50fc853eSJohn Marino return (0);
210*50fc853eSJohn Marino
211*50fc853eSJohn Marino cmp_bytes = bwscmp(s1, s2, 0);
212*50fc853eSJohn Marino if (cmp_bytes == 0)
213*50fc853eSJohn Marino return (0);
214*50fc853eSJohn Marino
215*50fc853eSJohn Marino len1 = slen1 = BWSLEN(s1);
216*50fc853eSJohn Marino len2 = slen2 = BWSLEN(s2);
217*50fc853eSJohn Marino
218*50fc853eSJohn Marino if (slen1 < 1)
219*50fc853eSJohn Marino return (-1);
220*50fc853eSJohn Marino if (slen2 < 1)
221*50fc853eSJohn Marino return (+1);
222*50fc853eSJohn Marino
223*50fc853eSJohn Marino si1 = bws_begin(s1);
224*50fc853eSJohn Marino si2 = bws_begin(s2);
225*50fc853eSJohn Marino
226*50fc853eSJohn Marino c1 = bws_get_iter_value(si1);
227*50fc853eSJohn Marino c2 = bws_get_iter_value(si2);
228*50fc853eSJohn Marino
229*50fc853eSJohn Marino if (c1 == L'.' && (slen1 == 1))
230*50fc853eSJohn Marino return (-1);
231*50fc853eSJohn Marino
232*50fc853eSJohn Marino if (c2 == L'.' && (slen2 == 1))
233*50fc853eSJohn Marino return (+1);
234*50fc853eSJohn Marino
235*50fc853eSJohn Marino if (slen1 == 2 && c1 == L'.' &&
236*50fc853eSJohn Marino bws_get_iter_value(bws_iterator_inc(si1, 1)) == L'.')
237*50fc853eSJohn Marino return (-1);
238*50fc853eSJohn Marino if (slen2 == 2 && c2 == L'.' &&
239*50fc853eSJohn Marino bws_get_iter_value(bws_iterator_inc(si2, 1)) == L'.')
240*50fc853eSJohn Marino return (+1);
241*50fc853eSJohn Marino
242*50fc853eSJohn Marino if (c1 == L'.' && c2 != L'.')
243*50fc853eSJohn Marino return (-1);
244*50fc853eSJohn Marino if (c1 != L'.' && c2 == L'.')
245*50fc853eSJohn Marino return (+1);
246*50fc853eSJohn Marino
247*50fc853eSJohn Marino if (c1 == L'.' && c2 == L'.') {
248*50fc853eSJohn Marino si1 = bws_iterator_inc(si1, 1);
249*50fc853eSJohn Marino si2 = bws_iterator_inc(si2, 1);
250*50fc853eSJohn Marino }
251*50fc853eSJohn Marino
252*50fc853eSJohn Marino find_suffix(si1, bws_end(s1), &len1);
253*50fc853eSJohn Marino find_suffix(si2, bws_end(s2), &len2);
254*50fc853eSJohn Marino
255*50fc853eSJohn Marino if ((len1 == len2) && (bws_iterator_cmp(si1, si2, len1) == 0))
256*50fc853eSJohn Marino return (cmp_bytes);
257*50fc853eSJohn Marino
258*50fc853eSJohn Marino cmp_res = cmpversions(si1, bws_iterator_inc(si1, len1), si2,
259*50fc853eSJohn Marino bws_iterator_inc(si2, len2));
260*50fc853eSJohn Marino
261*50fc853eSJohn Marino if (cmp_res == 0)
262*50fc853eSJohn Marino cmp_res = cmp_bytes;
263*50fc853eSJohn Marino
264*50fc853eSJohn Marino return (cmp_res);
265*50fc853eSJohn Marino }
266