xref: /freebsd-src/cddl/contrib/opensolaris/lib/libdtrace/common/dt_string.c (revision 1670a1c2a47d10ecccd001970b859caf93cd3b6e)
16ff6d951SJohn Birrell /*
26ff6d951SJohn Birrell  * CDDL HEADER START
36ff6d951SJohn Birrell  *
46ff6d951SJohn Birrell  * The contents of this file are subject to the terms of the
5*1670a1c2SRui Paulo  * Common Development and Distribution License (the "License").
6*1670a1c2SRui Paulo  * You may not use this file except in compliance with the License.
76ff6d951SJohn Birrell  *
86ff6d951SJohn Birrell  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
96ff6d951SJohn Birrell  * or http://www.opensolaris.org/os/licensing.
106ff6d951SJohn Birrell  * See the License for the specific language governing permissions
116ff6d951SJohn Birrell  * and limitations under the License.
126ff6d951SJohn Birrell  *
136ff6d951SJohn Birrell  * When distributing Covered Code, include this CDDL HEADER in each
146ff6d951SJohn Birrell  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
156ff6d951SJohn Birrell  * If applicable, add the following below this CDDL HEADER, with the
166ff6d951SJohn Birrell  * fields enclosed by brackets "[]" replaced with your own identifying
176ff6d951SJohn Birrell  * information: Portions Copyright [yyyy] [name of copyright owner]
186ff6d951SJohn Birrell  *
196ff6d951SJohn Birrell  * CDDL HEADER END
206ff6d951SJohn Birrell  */
216ff6d951SJohn Birrell 
22*1670a1c2SRui Paulo /*
23*1670a1c2SRui Paulo  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24*1670a1c2SRui Paulo  */
256ff6d951SJohn Birrell 
266ff6d951SJohn Birrell #include <strings.h>
276ff6d951SJohn Birrell #include <stdlib.h>
286ff6d951SJohn Birrell #include <errno.h>
296ff6d951SJohn Birrell #include <ctype.h>
306ff6d951SJohn Birrell 
316ff6d951SJohn Birrell #include <dt_string.h>
32*1670a1c2SRui Paulo #include <dt_impl.h>
336ff6d951SJohn Birrell 
346ff6d951SJohn Birrell /*
356ff6d951SJohn Birrell  * Create a copy of string s, but only duplicate the first n bytes.
366ff6d951SJohn Birrell  */
376ff6d951SJohn Birrell char *
386ff6d951SJohn Birrell strndup(const char *s, size_t n)
396ff6d951SJohn Birrell {
406ff6d951SJohn Birrell 	char *s2 = malloc(n + 1);
416ff6d951SJohn Birrell 
42*1670a1c2SRui Paulo 	if (s2 == NULL)
43*1670a1c2SRui Paulo 		longjmp(yypcb->pcb_jmpbuf, EDT_NOMEM);
44*1670a1c2SRui Paulo 
456ff6d951SJohn Birrell 	(void) strncpy(s2, s, n);
466ff6d951SJohn Birrell 	s2[n] = '\0';
476ff6d951SJohn Birrell 	return (s2);
486ff6d951SJohn Birrell }
496ff6d951SJohn Birrell 
506ff6d951SJohn Birrell /*
516ff6d951SJohn Birrell  * Transform string s inline, converting each embedded C escape sequence string
526ff6d951SJohn Birrell  * to the corresponding character.  For example, the substring "\n" is replaced
536ff6d951SJohn Birrell  * by an inline '\n' character.  The length of the resulting string is returned.
546ff6d951SJohn Birrell  */
556ff6d951SJohn Birrell size_t
566ff6d951SJohn Birrell stresc2chr(char *s)
576ff6d951SJohn Birrell {
586ff6d951SJohn Birrell 	char *p, *q, c;
596ff6d951SJohn Birrell 	int esc = 0;
606ff6d951SJohn Birrell 	int x;
616ff6d951SJohn Birrell 
626ff6d951SJohn Birrell 	for (p = q = s; (c = *p) != '\0'; p++) {
636ff6d951SJohn Birrell 		if (esc) {
646ff6d951SJohn Birrell 			switch (c) {
656ff6d951SJohn Birrell 			case '0':
666ff6d951SJohn Birrell 			case '1':
676ff6d951SJohn Birrell 			case '2':
686ff6d951SJohn Birrell 			case '3':
696ff6d951SJohn Birrell 			case '4':
706ff6d951SJohn Birrell 			case '5':
716ff6d951SJohn Birrell 			case '6':
726ff6d951SJohn Birrell 			case '7':
736ff6d951SJohn Birrell 				c -= '0';
746ff6d951SJohn Birrell 				p++;
756ff6d951SJohn Birrell 
766ff6d951SJohn Birrell 				if (*p >= '0' && *p <= '7') {
776ff6d951SJohn Birrell 					c = c * 8 + *p++ - '0';
786ff6d951SJohn Birrell 
796ff6d951SJohn Birrell 					if (*p >= '0' && *p <= '7')
806ff6d951SJohn Birrell 						c = c * 8 + *p - '0';
816ff6d951SJohn Birrell 					else
826ff6d951SJohn Birrell 						p--;
836ff6d951SJohn Birrell 				} else
846ff6d951SJohn Birrell 					p--;
856ff6d951SJohn Birrell 
866ff6d951SJohn Birrell 				*q++ = c;
876ff6d951SJohn Birrell 				break;
886ff6d951SJohn Birrell 
896ff6d951SJohn Birrell 			case 'a':
906ff6d951SJohn Birrell 				*q++ = '\a';
916ff6d951SJohn Birrell 				break;
926ff6d951SJohn Birrell 			case 'b':
936ff6d951SJohn Birrell 				*q++ = '\b';
946ff6d951SJohn Birrell 				break;
956ff6d951SJohn Birrell 			case 'f':
966ff6d951SJohn Birrell 				*q++ = '\f';
976ff6d951SJohn Birrell 				break;
986ff6d951SJohn Birrell 			case 'n':
996ff6d951SJohn Birrell 				*q++ = '\n';
1006ff6d951SJohn Birrell 				break;
1016ff6d951SJohn Birrell 			case 'r':
1026ff6d951SJohn Birrell 				*q++ = '\r';
1036ff6d951SJohn Birrell 				break;
1046ff6d951SJohn Birrell 			case 't':
1056ff6d951SJohn Birrell 				*q++ = '\t';
1066ff6d951SJohn Birrell 				break;
1076ff6d951SJohn Birrell 			case 'v':
1086ff6d951SJohn Birrell 				*q++ = '\v';
1096ff6d951SJohn Birrell 				break;
1106ff6d951SJohn Birrell 
1116ff6d951SJohn Birrell 			case 'x':
1126ff6d951SJohn Birrell 				for (x = 0; (c = *++p) != '\0'; ) {
1136ff6d951SJohn Birrell 					if (c >= '0' && c <= '9')
1146ff6d951SJohn Birrell 						x = x * 16 + c - '0';
1156ff6d951SJohn Birrell 					else if (c >= 'a' && c <= 'f')
1166ff6d951SJohn Birrell 						x = x * 16 + c - 'a' + 10;
1176ff6d951SJohn Birrell 					else if (c >= 'A' && c <= 'F')
1186ff6d951SJohn Birrell 						x = x * 16 + c - 'A' + 10;
1196ff6d951SJohn Birrell 					else
1206ff6d951SJohn Birrell 						break;
1216ff6d951SJohn Birrell 				}
1226ff6d951SJohn Birrell 				*q++ = (char)x;
1236ff6d951SJohn Birrell 				p--;
1246ff6d951SJohn Birrell 				break;
1256ff6d951SJohn Birrell 
1266ff6d951SJohn Birrell 			case '"':
1276ff6d951SJohn Birrell 			case '\\':
1286ff6d951SJohn Birrell 				*q++ = c;
1296ff6d951SJohn Birrell 				break;
1306ff6d951SJohn Birrell 			default:
1316ff6d951SJohn Birrell 				*q++ = '\\';
1326ff6d951SJohn Birrell 				*q++ = c;
1336ff6d951SJohn Birrell 			}
1346ff6d951SJohn Birrell 
1356ff6d951SJohn Birrell 			esc = 0;
1366ff6d951SJohn Birrell 
1376ff6d951SJohn Birrell 		} else {
1386ff6d951SJohn Birrell 			if ((esc = c == '\\') == 0)
1396ff6d951SJohn Birrell 				*q++ = c;
1406ff6d951SJohn Birrell 		}
1416ff6d951SJohn Birrell 	}
1426ff6d951SJohn Birrell 
1436ff6d951SJohn Birrell 	*q = '\0';
1446ff6d951SJohn Birrell 	return ((size_t)(q - s));
1456ff6d951SJohn Birrell }
1466ff6d951SJohn Birrell 
1476ff6d951SJohn Birrell /*
1486ff6d951SJohn Birrell  * Create a copy of string s in which certain unprintable or special characters
1496ff6d951SJohn Birrell  * have been converted to the string representation of their C escape sequence.
1506ff6d951SJohn Birrell  * For example, the newline character is expanded to the string "\n".
1516ff6d951SJohn Birrell  */
1526ff6d951SJohn Birrell char *
1536ff6d951SJohn Birrell strchr2esc(const char *s, size_t n)
1546ff6d951SJohn Birrell {
1556ff6d951SJohn Birrell 	const char *p;
1566ff6d951SJohn Birrell 	char *q, *s2, c;
1576ff6d951SJohn Birrell 	size_t addl = 0;
1586ff6d951SJohn Birrell 
1596ff6d951SJohn Birrell 	for (p = s; p < s + n; p++) {
1606ff6d951SJohn Birrell 		switch (c = *p) {
1616ff6d951SJohn Birrell 		case '\0':
1626ff6d951SJohn Birrell 		case '\a':
1636ff6d951SJohn Birrell 		case '\b':
1646ff6d951SJohn Birrell 		case '\f':
1656ff6d951SJohn Birrell 		case '\n':
1666ff6d951SJohn Birrell 		case '\r':
1676ff6d951SJohn Birrell 		case '\t':
1686ff6d951SJohn Birrell 		case '\v':
1696ff6d951SJohn Birrell 		case '"':
1706ff6d951SJohn Birrell 		case '\\':
1716ff6d951SJohn Birrell 			addl++;		/* 1 add'l char needed to follow \ */
1726ff6d951SJohn Birrell 			break;
1736ff6d951SJohn Birrell 		case ' ':
1746ff6d951SJohn Birrell 			break;
1756ff6d951SJohn Birrell 		default:
1766ff6d951SJohn Birrell 			if (c < '!' || c > '~')
1776ff6d951SJohn Birrell 				addl += 3; /* 3 add'l chars following \ */
1786ff6d951SJohn Birrell 		}
1796ff6d951SJohn Birrell 	}
1806ff6d951SJohn Birrell 
1816ff6d951SJohn Birrell 	if ((s2 = malloc(n + addl + 1)) == NULL)
1826ff6d951SJohn Birrell 		return (NULL);
1836ff6d951SJohn Birrell 
1846ff6d951SJohn Birrell 	for (p = s, q = s2; p < s + n; p++) {
1856ff6d951SJohn Birrell 		switch (c = *p) {
1866ff6d951SJohn Birrell 		case '\0':
1876ff6d951SJohn Birrell 			*q++ = '\\';
1886ff6d951SJohn Birrell 			*q++ = '0';
1896ff6d951SJohn Birrell 			break;
1906ff6d951SJohn Birrell 		case '\a':
1916ff6d951SJohn Birrell 			*q++ = '\\';
1926ff6d951SJohn Birrell 			*q++ = 'a';
1936ff6d951SJohn Birrell 			break;
1946ff6d951SJohn Birrell 		case '\b':
1956ff6d951SJohn Birrell 			*q++ = '\\';
1966ff6d951SJohn Birrell 			*q++ = 'b';
1976ff6d951SJohn Birrell 			break;
1986ff6d951SJohn Birrell 		case '\f':
1996ff6d951SJohn Birrell 			*q++ = '\\';
2006ff6d951SJohn Birrell 			*q++ = 'f';
2016ff6d951SJohn Birrell 			break;
2026ff6d951SJohn Birrell 		case '\n':
2036ff6d951SJohn Birrell 			*q++ = '\\';
2046ff6d951SJohn Birrell 			*q++ = 'n';
2056ff6d951SJohn Birrell 			break;
2066ff6d951SJohn Birrell 		case '\r':
2076ff6d951SJohn Birrell 			*q++ = '\\';
2086ff6d951SJohn Birrell 			*q++ = 'r';
2096ff6d951SJohn Birrell 			break;
2106ff6d951SJohn Birrell 		case '\t':
2116ff6d951SJohn Birrell 			*q++ = '\\';
2126ff6d951SJohn Birrell 			*q++ = 't';
2136ff6d951SJohn Birrell 			break;
2146ff6d951SJohn Birrell 		case '\v':
2156ff6d951SJohn Birrell 			*q++ = '\\';
2166ff6d951SJohn Birrell 			*q++ = 'v';
2176ff6d951SJohn Birrell 			break;
2186ff6d951SJohn Birrell 		case '"':
2196ff6d951SJohn Birrell 			*q++ = '\\';
2206ff6d951SJohn Birrell 			*q++ = '"';
2216ff6d951SJohn Birrell 			break;
2226ff6d951SJohn Birrell 		case '\\':
2236ff6d951SJohn Birrell 			*q++ = '\\';
2246ff6d951SJohn Birrell 			*q++ = '\\';
2256ff6d951SJohn Birrell 			break;
2266ff6d951SJohn Birrell 		case ' ':
2276ff6d951SJohn Birrell 			*q++ = c;
2286ff6d951SJohn Birrell 			break;
2296ff6d951SJohn Birrell 		default:
2306ff6d951SJohn Birrell 			if (c < '!' || c > '~') {
2316ff6d951SJohn Birrell 				*q++ = '\\';
2326ff6d951SJohn Birrell 				*q++ = ((c >> 6) & 3) + '0';
2336ff6d951SJohn Birrell 				*q++ = ((c >> 3) & 7) + '0';
2346ff6d951SJohn Birrell 				*q++ = (c & 7) + '0';
2356ff6d951SJohn Birrell 			} else
2366ff6d951SJohn Birrell 				*q++ = c;
2376ff6d951SJohn Birrell 		}
2386ff6d951SJohn Birrell 
2396ff6d951SJohn Birrell 		if (c == '\0')
2406ff6d951SJohn Birrell 			break; /* don't continue past \0 even if p < s + n */
2416ff6d951SJohn Birrell 	}
2426ff6d951SJohn Birrell 
2436ff6d951SJohn Birrell 	*q = '\0';
2446ff6d951SJohn Birrell 	return (s2);
2456ff6d951SJohn Birrell }
2466ff6d951SJohn Birrell 
2476ff6d951SJohn Birrell /*
2486ff6d951SJohn Birrell  * Return the basename (name after final /) of the given string.  We use
2496ff6d951SJohn Birrell  * strbasename rather than basename to avoid conflicting with libgen.h's
2506ff6d951SJohn Birrell  * non-const function prototype.
2516ff6d951SJohn Birrell  */
2526ff6d951SJohn Birrell const char *
2536ff6d951SJohn Birrell strbasename(const char *s)
2546ff6d951SJohn Birrell {
2556ff6d951SJohn Birrell 	const char *p = strrchr(s, '/');
2566ff6d951SJohn Birrell 
2576ff6d951SJohn Birrell 	if (p == NULL)
2586ff6d951SJohn Birrell 		return (s);
2596ff6d951SJohn Birrell 
2606ff6d951SJohn Birrell 	return (++p);
2616ff6d951SJohn Birrell }
2626ff6d951SJohn Birrell 
2636ff6d951SJohn Birrell /*
2646ff6d951SJohn Birrell  * This function tests a string against the regular expression used for idents
2656ff6d951SJohn Birrell  * and integers in the D lexer, and should match the superset of RGX_IDENT and
2666ff6d951SJohn Birrell  * RGX_INT in dt_lex.l.  If an invalid character is found, the function returns
2676ff6d951SJohn Birrell  * a pointer to it.  Otherwise NULL is returned for a valid string.
2686ff6d951SJohn Birrell  */
2696ff6d951SJohn Birrell const char *
2706ff6d951SJohn Birrell strbadidnum(const char *s)
2716ff6d951SJohn Birrell {
2726ff6d951SJohn Birrell 	char *p;
2736ff6d951SJohn Birrell 	int c;
2746ff6d951SJohn Birrell 
2756ff6d951SJohn Birrell 	if (*s == '\0')
2766ff6d951SJohn Birrell 		return (s);
2776ff6d951SJohn Birrell 
2786ff6d951SJohn Birrell 	errno = 0;
2796ff6d951SJohn Birrell 	(void) strtoull(s, &p, 0);
2806ff6d951SJohn Birrell 
2816ff6d951SJohn Birrell 	if (errno == 0 && *p == '\0')
2826ff6d951SJohn Birrell 		return (NULL); /* matches RGX_INT */
2836ff6d951SJohn Birrell 
2846ff6d951SJohn Birrell 	while ((c = *s++) != '\0') {
2856ff6d951SJohn Birrell 		if (isalnum(c) == 0 && c != '_' && c != '`')
2866ff6d951SJohn Birrell 			return (s - 1);
2876ff6d951SJohn Birrell 	}
2886ff6d951SJohn Birrell 
2896ff6d951SJohn Birrell 	return (NULL); /* matches RGX_IDENT */
2906ff6d951SJohn Birrell }
2916ff6d951SJohn Birrell 
2926ff6d951SJohn Birrell /*
2936ff6d951SJohn Birrell  * Determine whether the string contains a glob matching pattern or is just a
2946ff6d951SJohn Birrell  * simple string.  See gmatch(3GEN) and sh(1) for the glob syntax definition.
2956ff6d951SJohn Birrell  */
2966ff6d951SJohn Birrell int
2976ff6d951SJohn Birrell strisglob(const char *s)
2986ff6d951SJohn Birrell {
2996ff6d951SJohn Birrell 	char c;
3006ff6d951SJohn Birrell 
3016ff6d951SJohn Birrell 	while ((c = *s++) != '\0') {
3026ff6d951SJohn Birrell 		if (c == '[' || c == '?' || c == '*' || c == '\\')
3036ff6d951SJohn Birrell 			return (1);
3046ff6d951SJohn Birrell 	}
3056ff6d951SJohn Birrell 
3066ff6d951SJohn Birrell 	return (0);
3076ff6d951SJohn Birrell }
3086ff6d951SJohn Birrell 
3096ff6d951SJohn Birrell /*
3106ff6d951SJohn Birrell  * Hyphenate a string in-place by converting any instances of "__" to "-",
3116ff6d951SJohn Birrell  * which we use for probe names to improve readability, and return the string.
3126ff6d951SJohn Birrell  */
3136ff6d951SJohn Birrell char *
3146ff6d951SJohn Birrell strhyphenate(char *s)
3156ff6d951SJohn Birrell {
3166ff6d951SJohn Birrell 	char *p, *q;
3176ff6d951SJohn Birrell 
3186ff6d951SJohn Birrell 	for (p = s, q = p + strlen(p); p < q; p++) {
3196ff6d951SJohn Birrell 		if (p[0] == '_' && p[1] == '_') {
3206ff6d951SJohn Birrell 			p[0] = '-';
3216ff6d951SJohn Birrell 			bcopy(p + 2, p + 1, (size_t)(q - p) - 1);
3226ff6d951SJohn Birrell 		}
3236ff6d951SJohn Birrell 	}
3246ff6d951SJohn Birrell 
3256ff6d951SJohn Birrell 	return (s);
3266ff6d951SJohn Birrell }
327