History log of /dflybsd-src/lib/libc/string/wcscoll.c (Results 1 – 4 of 4)
Revision Date Author Comments
# d705af0f 23-Oct-2015 John Marino <draco@marino.st>

collate: Fix expansion substitions (broken upstream too)

Through testing, the user noted that some Cyrillic characters were not
sorting correctly, and this was confirmed.

After extensive testing an

collate: Fix expansion substitions (broken upstream too)

Through testing, the user noted that some Cyrillic characters were not
sorting correctly, and this was confirmed.

After extensive testing and review, the localedef tool was eliminated
as the culprit. The sustitutions were encoded correctly in LC_COLLATE.

The error was mainly in wcscoll where character expansions were
mishandled. The main directive pass routines had to be written to
go back for a new collation value when the "state" variable was set.
Before pointers were being advanced, the second lookup was gettting
applied to the wrong character, etc.

The "eat expansion codes" section on collate.c also had a bug. Later
own, the "state" variable logic was changed to only set if next
code was greater than zero (rather than >= 0).

Some additional cleanups got captured from previous work:
1) The previous commit moved the binary search comment from the
correct location to a wrong location because it's wrong upstream
in Illumos. The comment has little value so I just removed it.
2) Don't check if pointers are null before freeing, this is
redundant as free() handles null pointers.
3) The two binary search trees were standardized wrt initialization
4) On the binary search trees, a negative "high" exits rather than
checking the table count again.

Reported-by: bapt@FreeBSD.org

diff --git a/lib/libc/locale/collate.c b/lib/libc/locale/collate.c
index 0bd2e1c..6912732 100644
--- a/lib/libc/locale/collate.c
+++ b/lib/libc/locale/collate.c
@@ -216,27 +216,18 @@ substsearch(struct xlocale_collate *table, const wchar_t key, int pass)
return (p->pri);
}

-/*
- * Note: for performance reasons, we have expanded bsearch here. This avoids
- * function call overhead with each comparison.
- */
-
static collate_chain_t *
chainsearch(struct xlocale_collate *table, const wchar_t *key, int *len)
{
- int low;
- int high;
+ int low = 0;
+ int high = table->info->chain_count - 1;;
int next, compar, l;
collate_chain_t *p;
- collate_chain_t *tab;
+ collate_chain_t *tab = table->chain_pri_table;

- if (table->info->chain_count == 0)
+ if (high < 0)
return (NULL);

- low = 0;
- high = table->info->chain_count - 1;
- tab = table->chain_pri_table;
-
while (low <= high) {
next = (low + high) / 2;
p = tab + next;
@@ -266,7 +257,7 @@ largesearch(struct xlocale_collate *table, const wchar_t key)
collate_large_t *p;
collate_large_t *tab = table->large_pri_table;

- if (table->info->large_count == 0)
+ if (high < 0)
return (NULL);

while (low <= high) {
@@ -310,7 +301,10 @@ _collate_lookup(struct xlocale_collate *table, const wchar_t *t, int *len,
if ((sptr = *state) != NULL) {
*pri = *sptr;
sptr++;
- *state = *sptr ? sptr : NULL;
+ if ((sptr == *state) || (sptr == NULL))
+ *state = NULL;
+ else
+ *state = sptr;
*len = 0;
return;
}
@@ -371,7 +365,7 @@ _collate_lookup(struct xlocale_collate *table, const wchar_t *t, int *len,
* code ensures this for us.
*/
if ((sptr = substsearch(table, *pri, which)) != NULL) {
- if ((*pri = *sptr) != 0) {
+ if ((*pri = *sptr) > 0) {
sptr++;
*state = *sptr ? sptr : NULL;
}
diff --git a/lib/libc/string/wcscoll.c b/lib/libc/string/wcscoll.c
index 87a91c2..c6cb890 100644
--- a/lib/libc/string/wcscoll.c
+++ b/lib/libc/string/wcscoll.c
@@ -74,6 +74,7 @@ wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
const int32_t *st2 = NULL;
const wchar_t *w1 = ws1;
const wchar_t *w2 = ws2;
+ int check1, check2;

/* special pass for UNDEFINED */
if (pass == table->info->directive_count) {
@@ -107,25 +108,36 @@ wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
}

if (direc & DIRECTIVE_POSITION) {
- while ((*w1 || st1) && (*w2 || st2)) {
+ while (*w1 && *w2) {
pri1 = pri2 = 0;
- _collate_lookup(table, w1, &len1, &pri1, pass,
- &st1);
- if (pri1 <= 0) {
- if (pri1 < 0) {
- errno = EINVAL;
- goto fail;
+ check1 = check2 = 1;
+ while ((pri1 == pri2) && (check1 || check2)) {
+ if (check1) {
+ _collate_lookup(table, w1, &len1,
+ &pri1, pass, &st1);
+ if (pri1 < 0) {
+ errno = EINVAL;
+ goto fail;
+ }
+ if (!pri1) {
+ pri1 = COLLATE_MAX_PRIORITY;
+ st1 = NULL;
+ }
+ check1 = (st1 != NULL);
}
- pri1 = COLLATE_MAX_PRIORITY;
- }
- _collate_lookup(table, w2, &len2, &pri2, pass,
- &st2);
- if (pri2 <= 0) {
- if (pri2 < 0) {
- errno = EINVAL;
- goto fail;
+ if (check2) {
+ _collate_lookup(table, w2, &len2,
+ &pri2, pass, &st2);
+ if (pri2 < 0) {
+ errno = EINVAL;
+ goto fail;
+ }
+ if (!pri2) {
+ pri2 = COLLATE_MAX_PRIORITY;
+ st2 = NULL;
+ }
+ check2 = (st2 != NULL);
}
- pri2 = COLLATE_MAX_PRIORITY;
}
if (pri1 != pri2) {
ret = pri1 - pri2;
@@ -135,29 +147,38 @@ wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
w2 += len2;
}
} else {
- while ((*w1 || st1) && (*w2 || st2)) {
+ while (*w1 && *w2) {
pri1 = pri2 = 0;
- while (*w1) {
- _collate_lookup(table, w1, &len1,
- &pri1, pass, &st1);
- if (pri1 > 0)
- break;
- if (pri1 < 0) {
- errno = EINVAL;
- goto fail;
+ check1 = check2 = 1;
+ while ((pri1 == pri2) && (check1 || check2)) {
+ while (check1 && *w1) {
+ _collate_lookup(table, w1,
+ &len1, &pri1, pass, &st1);
+ if (pri1 > 0)
+ break;
+ if (pri1 < 0) {
+ errno = EINVAL;
+ goto fail;
+ }
+ st1 = NULL;
+ w1 += 1;
}
- w1 += len1;
- }
- while (*w2) {
- _collate_lookup(table, w2, &len2,
- &pri2, pass, &st2);
- if (pri2 > 0)
- break;
- if (pri2 < 0) {
- errno = EINVAL;
- goto fail;
+ check1 = (st1 != NULL);
+ while (check2 && *w2) {
+ _collate_lookup(table, w2,
+ &len2, &pri2, pass, &st2);
+ if (pri2 > 0)
+ break;
+ if (pri2 < 0) {
+ errno = EINVAL;
+ goto fail;
+ }
+ st2 = NULL;
+ w2 += 1;
}
- w2 += len2;
+ check2 = (st2 != NULL);
+ if (!pri1 || !pri2)
+ break;
}
if (!pri1 || !pri2)
break;
@@ -182,10 +203,8 @@ wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
ret = 0;

end:
- if (tr1)
- free(tr1);
- if (tr2)
- free(tr2);
+ free(tr1);
+ free(tr2);

return (ret);

show more ...


# 16db8bac 28-Jul-2015 John Marino <draco@marino.st>

collate 1/4: Add support for LC_COLLATE format "DragonFly 4.4" in libc

The collate functions within libc have been using version 1 and 1.2 of the
packed LC_COLLATE binary formats. These were genera

collate 1/4: Add support for LC_COLLATE format "DragonFly 4.4" in libc

The collate functions within libc have been using version 1 and 1.2 of the
packed LC_COLLATE binary formats. These were generated with the colldef
tool, but the new LC_COLLATE files are going to be generated by the new
localedef tool using CLDR POSIX files as input. The DragonFly-flavored
version of localedef identifies the format as "DragonFly 4.4". Any
LC_COLLATE file with a different version will simply not be loaded, and
all LC* categories will get set to "C" (aka "POSIX") locale.

This work is based off of Nexenta's contribution to Illumos (successor
to OpenSolaris). The integration with xlocale is my work though.

The following commits will enable localedef tool, disable the colldef
tool, add generated colldef directory, and finally remove colldef from
base.

show more ...


# 0d5acd74 21-Sep-2013 John Marino <draco@marino.st>

locales, libconv: Sync with FreeBSD (extensive reach)

What started out as a relatively simply upgrade to libiconv finally
resulted in a simultaneous overhaul to locales, strings, and stdio.
All of t

locales, libconv: Sync with FreeBSD (extensive reach)

What started out as a relatively simply upgrade to libiconv finally
resulted in a simultaneous overhaul to locales, strings, and stdio.
All of these are interdependent and there is no way to upgrade them
individually or in steps.

These cases are similar to what happened with libm where significant
syncing came from NetBSD previously, rendering contributions from
FreeBSD difficult. Libiconv and locales (both ancient) are now in
sync with FreeBSD HEAD.

As several headers were signficantly updated and the mtree was updated
to accommodate the new include/xlocale directory, this commit will
require a full world build. It also may cause some dports to no longer
build due to prototype differences, but the dports will be adjusted.

The regexp library was not being used. It was removed from FreeBSD four
years ago. Since it required collate updates, I took the opportunity to
remove it completely by adding re_comp functionality to 4.3 compat
library like FreeBSD did.

__DragonFly_version has been bumped to 500300.

show more ...


# 716024cd 14-Jan-2009 Peter Avalos <pavalos@theshell.com>

Sync lib/libc/string with FreeBSD:

-Move legacy functions to strings.h.

-A few WARNS and style cleanup.

-Add ffsl(), fls(), flsl(), ffsll(), flsll().

-Merge index.3 and rindex.3 since they are so

Sync lib/libc/string with FreeBSD:

-Move legacy functions to strings.h.

-A few WARNS and style cleanup.

-Add ffsl(), fls(), flsl(), ffsll(), flsll().

-Merge index.3 and rindex.3 since they are so similar. Do the same for
strrchr().

-Add memrchr().

-Remove advertising clause from copyrights.

-Add memmem() which is basically the same as strstr().

-Add stpcpy().

-Add a SECURITY CONSIDERATIONS section to remind people about buffer
overflows, etc.

-Add restrict keyword where required.

-Add strndup().

-Correctly document the return value of strerror() and strerror_r() and
the contents of the returned buffer for unknown error codes.

-Add NLS catalogs support to strerror(), strerror_r() and strsignal().
Controlled by NLS define.

-For strl* files, use a less restrictive copyright (original author
changed it).

-strmode.3: strmode does not return 0.

-Move swab()'s prototype to unistd.h IAW SUS.

-Simplify by removing unneeded local variables and explicit
null termination is wcs* functions.

-Move wcscoll() and wcsxfrm() to string/ and do locale-sensitive
collation for single-byte locales.

-Add wcsdup().

-Remove unneeded includes.

-Reimplement wcsrchr(3) more efficiently, using a single forward scan
(like strrchr(3)) instead of scanning forwards to find the end of the
string then scanning backwards to find the character.

-Slightly optimize wcswidth().

-Tell contrib srcs that we have strndup now since some of them provide
their own static definition which will blow up a buildworld.

show more ...