xref: /freebsd-src/cddl/contrib/opensolaris/lib/libdtrace/common/dt_string.c (revision 6ff6d951ade3f3379932df7f878ef3ea272cfc59)
1*6ff6d951SJohn Birrell /*
2*6ff6d951SJohn Birrell  * CDDL HEADER START
3*6ff6d951SJohn Birrell  *
4*6ff6d951SJohn Birrell  * The contents of this file are subject to the terms of the
5*6ff6d951SJohn Birrell  * Common Development and Distribution License, Version 1.0 only
6*6ff6d951SJohn Birrell  * (the "License").  You may not use this file except in compliance
7*6ff6d951SJohn Birrell  * with the License.
8*6ff6d951SJohn Birrell  *
9*6ff6d951SJohn Birrell  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*6ff6d951SJohn Birrell  * or http://www.opensolaris.org/os/licensing.
11*6ff6d951SJohn Birrell  * See the License for the specific language governing permissions
12*6ff6d951SJohn Birrell  * and limitations under the License.
13*6ff6d951SJohn Birrell  *
14*6ff6d951SJohn Birrell  * When distributing Covered Code, include this CDDL HEADER in each
15*6ff6d951SJohn Birrell  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*6ff6d951SJohn Birrell  * If applicable, add the following below this CDDL HEADER, with the
17*6ff6d951SJohn Birrell  * fields enclosed by brackets "[]" replaced with your own identifying
18*6ff6d951SJohn Birrell  * information: Portions Copyright [yyyy] [name of copyright owner]
19*6ff6d951SJohn Birrell  *
20*6ff6d951SJohn Birrell  * CDDL HEADER END
21*6ff6d951SJohn Birrell  */
22*6ff6d951SJohn Birrell /*
23*6ff6d951SJohn Birrell  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*6ff6d951SJohn Birrell  * Use is subject to license terms.
25*6ff6d951SJohn Birrell  */
26*6ff6d951SJohn Birrell 
27*6ff6d951SJohn Birrell #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*6ff6d951SJohn Birrell 
29*6ff6d951SJohn Birrell #include <strings.h>
30*6ff6d951SJohn Birrell #include <stdlib.h>
31*6ff6d951SJohn Birrell #include <errno.h>
32*6ff6d951SJohn Birrell #include <ctype.h>
33*6ff6d951SJohn Birrell 
34*6ff6d951SJohn Birrell #include <dt_string.h>
35*6ff6d951SJohn Birrell 
36*6ff6d951SJohn Birrell /*
37*6ff6d951SJohn Birrell  * Create a copy of string s, but only duplicate the first n bytes.
38*6ff6d951SJohn Birrell  */
39*6ff6d951SJohn Birrell char *
40*6ff6d951SJohn Birrell strndup(const char *s, size_t n)
41*6ff6d951SJohn Birrell {
42*6ff6d951SJohn Birrell 	char *s2 = malloc(n + 1);
43*6ff6d951SJohn Birrell 
44*6ff6d951SJohn Birrell 	(void) strncpy(s2, s, n);
45*6ff6d951SJohn Birrell 	s2[n] = '\0';
46*6ff6d951SJohn Birrell 	return (s2);
47*6ff6d951SJohn Birrell }
48*6ff6d951SJohn Birrell 
49*6ff6d951SJohn Birrell /*
50*6ff6d951SJohn Birrell  * Transform string s inline, converting each embedded C escape sequence string
51*6ff6d951SJohn Birrell  * to the corresponding character.  For example, the substring "\n" is replaced
52*6ff6d951SJohn Birrell  * by an inline '\n' character.  The length of the resulting string is returned.
53*6ff6d951SJohn Birrell  */
54*6ff6d951SJohn Birrell size_t
55*6ff6d951SJohn Birrell stresc2chr(char *s)
56*6ff6d951SJohn Birrell {
57*6ff6d951SJohn Birrell 	char *p, *q, c;
58*6ff6d951SJohn Birrell 	int esc = 0;
59*6ff6d951SJohn Birrell 	int x;
60*6ff6d951SJohn Birrell 
61*6ff6d951SJohn Birrell 	for (p = q = s; (c = *p) != '\0'; p++) {
62*6ff6d951SJohn Birrell 		if (esc) {
63*6ff6d951SJohn Birrell 			switch (c) {
64*6ff6d951SJohn Birrell 			case '0':
65*6ff6d951SJohn Birrell 			case '1':
66*6ff6d951SJohn Birrell 			case '2':
67*6ff6d951SJohn Birrell 			case '3':
68*6ff6d951SJohn Birrell 			case '4':
69*6ff6d951SJohn Birrell 			case '5':
70*6ff6d951SJohn Birrell 			case '6':
71*6ff6d951SJohn Birrell 			case '7':
72*6ff6d951SJohn Birrell 				c -= '0';
73*6ff6d951SJohn Birrell 				p++;
74*6ff6d951SJohn Birrell 
75*6ff6d951SJohn Birrell 				if (*p >= '0' && *p <= '7') {
76*6ff6d951SJohn Birrell 					c = c * 8 + *p++ - '0';
77*6ff6d951SJohn Birrell 
78*6ff6d951SJohn Birrell 					if (*p >= '0' && *p <= '7')
79*6ff6d951SJohn Birrell 						c = c * 8 + *p - '0';
80*6ff6d951SJohn Birrell 					else
81*6ff6d951SJohn Birrell 						p--;
82*6ff6d951SJohn Birrell 				} else
83*6ff6d951SJohn Birrell 					p--;
84*6ff6d951SJohn Birrell 
85*6ff6d951SJohn Birrell 				*q++ = c;
86*6ff6d951SJohn Birrell 				break;
87*6ff6d951SJohn Birrell 
88*6ff6d951SJohn Birrell 			case 'a':
89*6ff6d951SJohn Birrell 				*q++ = '\a';
90*6ff6d951SJohn Birrell 				break;
91*6ff6d951SJohn Birrell 			case 'b':
92*6ff6d951SJohn Birrell 				*q++ = '\b';
93*6ff6d951SJohn Birrell 				break;
94*6ff6d951SJohn Birrell 			case 'f':
95*6ff6d951SJohn Birrell 				*q++ = '\f';
96*6ff6d951SJohn Birrell 				break;
97*6ff6d951SJohn Birrell 			case 'n':
98*6ff6d951SJohn Birrell 				*q++ = '\n';
99*6ff6d951SJohn Birrell 				break;
100*6ff6d951SJohn Birrell 			case 'r':
101*6ff6d951SJohn Birrell 				*q++ = '\r';
102*6ff6d951SJohn Birrell 				break;
103*6ff6d951SJohn Birrell 			case 't':
104*6ff6d951SJohn Birrell 				*q++ = '\t';
105*6ff6d951SJohn Birrell 				break;
106*6ff6d951SJohn Birrell 			case 'v':
107*6ff6d951SJohn Birrell 				*q++ = '\v';
108*6ff6d951SJohn Birrell 				break;
109*6ff6d951SJohn Birrell 
110*6ff6d951SJohn Birrell 			case 'x':
111*6ff6d951SJohn Birrell 				for (x = 0; (c = *++p) != '\0'; ) {
112*6ff6d951SJohn Birrell 					if (c >= '0' && c <= '9')
113*6ff6d951SJohn Birrell 						x = x * 16 + c - '0';
114*6ff6d951SJohn Birrell 					else if (c >= 'a' && c <= 'f')
115*6ff6d951SJohn Birrell 						x = x * 16 + c - 'a' + 10;
116*6ff6d951SJohn Birrell 					else if (c >= 'A' && c <= 'F')
117*6ff6d951SJohn Birrell 						x = x * 16 + c - 'A' + 10;
118*6ff6d951SJohn Birrell 					else
119*6ff6d951SJohn Birrell 						break;
120*6ff6d951SJohn Birrell 				}
121*6ff6d951SJohn Birrell 				*q++ = (char)x;
122*6ff6d951SJohn Birrell 				p--;
123*6ff6d951SJohn Birrell 				break;
124*6ff6d951SJohn Birrell 
125*6ff6d951SJohn Birrell 			case '"':
126*6ff6d951SJohn Birrell 			case '\\':
127*6ff6d951SJohn Birrell 				*q++ = c;
128*6ff6d951SJohn Birrell 				break;
129*6ff6d951SJohn Birrell 			default:
130*6ff6d951SJohn Birrell 				*q++ = '\\';
131*6ff6d951SJohn Birrell 				*q++ = c;
132*6ff6d951SJohn Birrell 			}
133*6ff6d951SJohn Birrell 
134*6ff6d951SJohn Birrell 			esc = 0;
135*6ff6d951SJohn Birrell 
136*6ff6d951SJohn Birrell 		} else {
137*6ff6d951SJohn Birrell 			if ((esc = c == '\\') == 0)
138*6ff6d951SJohn Birrell 				*q++ = c;
139*6ff6d951SJohn Birrell 		}
140*6ff6d951SJohn Birrell 	}
141*6ff6d951SJohn Birrell 
142*6ff6d951SJohn Birrell 	*q = '\0';
143*6ff6d951SJohn Birrell 	return ((size_t)(q - s));
144*6ff6d951SJohn Birrell }
145*6ff6d951SJohn Birrell 
146*6ff6d951SJohn Birrell /*
147*6ff6d951SJohn Birrell  * Create a copy of string s in which certain unprintable or special characters
148*6ff6d951SJohn Birrell  * have been converted to the string representation of their C escape sequence.
149*6ff6d951SJohn Birrell  * For example, the newline character is expanded to the string "\n".
150*6ff6d951SJohn Birrell  */
151*6ff6d951SJohn Birrell char *
152*6ff6d951SJohn Birrell strchr2esc(const char *s, size_t n)
153*6ff6d951SJohn Birrell {
154*6ff6d951SJohn Birrell 	const char *p;
155*6ff6d951SJohn Birrell 	char *q, *s2, c;
156*6ff6d951SJohn Birrell 	size_t addl = 0;
157*6ff6d951SJohn Birrell 
158*6ff6d951SJohn Birrell 	for (p = s; p < s + n; p++) {
159*6ff6d951SJohn Birrell 		switch (c = *p) {
160*6ff6d951SJohn Birrell 		case '\0':
161*6ff6d951SJohn Birrell 		case '\a':
162*6ff6d951SJohn Birrell 		case '\b':
163*6ff6d951SJohn Birrell 		case '\f':
164*6ff6d951SJohn Birrell 		case '\n':
165*6ff6d951SJohn Birrell 		case '\r':
166*6ff6d951SJohn Birrell 		case '\t':
167*6ff6d951SJohn Birrell 		case '\v':
168*6ff6d951SJohn Birrell 		case '"':
169*6ff6d951SJohn Birrell 		case '\\':
170*6ff6d951SJohn Birrell 			addl++;		/* 1 add'l char needed to follow \ */
171*6ff6d951SJohn Birrell 			break;
172*6ff6d951SJohn Birrell 		case ' ':
173*6ff6d951SJohn Birrell 			break;
174*6ff6d951SJohn Birrell 		default:
175*6ff6d951SJohn Birrell 			if (c < '!' || c > '~')
176*6ff6d951SJohn Birrell 				addl += 3; /* 3 add'l chars following \ */
177*6ff6d951SJohn Birrell 		}
178*6ff6d951SJohn Birrell 	}
179*6ff6d951SJohn Birrell 
180*6ff6d951SJohn Birrell 	if ((s2 = malloc(n + addl + 1)) == NULL)
181*6ff6d951SJohn Birrell 		return (NULL);
182*6ff6d951SJohn Birrell 
183*6ff6d951SJohn Birrell 	for (p = s, q = s2; p < s + n; p++) {
184*6ff6d951SJohn Birrell 		switch (c = *p) {
185*6ff6d951SJohn Birrell 		case '\0':
186*6ff6d951SJohn Birrell 			*q++ = '\\';
187*6ff6d951SJohn Birrell 			*q++ = '0';
188*6ff6d951SJohn Birrell 			break;
189*6ff6d951SJohn Birrell 		case '\a':
190*6ff6d951SJohn Birrell 			*q++ = '\\';
191*6ff6d951SJohn Birrell 			*q++ = 'a';
192*6ff6d951SJohn Birrell 			break;
193*6ff6d951SJohn Birrell 		case '\b':
194*6ff6d951SJohn Birrell 			*q++ = '\\';
195*6ff6d951SJohn Birrell 			*q++ = 'b';
196*6ff6d951SJohn Birrell 			break;
197*6ff6d951SJohn Birrell 		case '\f':
198*6ff6d951SJohn Birrell 			*q++ = '\\';
199*6ff6d951SJohn Birrell 			*q++ = 'f';
200*6ff6d951SJohn Birrell 			break;
201*6ff6d951SJohn Birrell 		case '\n':
202*6ff6d951SJohn Birrell 			*q++ = '\\';
203*6ff6d951SJohn Birrell 			*q++ = 'n';
204*6ff6d951SJohn Birrell 			break;
205*6ff6d951SJohn Birrell 		case '\r':
206*6ff6d951SJohn Birrell 			*q++ = '\\';
207*6ff6d951SJohn Birrell 			*q++ = 'r';
208*6ff6d951SJohn Birrell 			break;
209*6ff6d951SJohn Birrell 		case '\t':
210*6ff6d951SJohn Birrell 			*q++ = '\\';
211*6ff6d951SJohn Birrell 			*q++ = 't';
212*6ff6d951SJohn Birrell 			break;
213*6ff6d951SJohn Birrell 		case '\v':
214*6ff6d951SJohn Birrell 			*q++ = '\\';
215*6ff6d951SJohn Birrell 			*q++ = 'v';
216*6ff6d951SJohn Birrell 			break;
217*6ff6d951SJohn Birrell 		case '"':
218*6ff6d951SJohn Birrell 			*q++ = '\\';
219*6ff6d951SJohn Birrell 			*q++ = '"';
220*6ff6d951SJohn Birrell 			break;
221*6ff6d951SJohn Birrell 		case '\\':
222*6ff6d951SJohn Birrell 			*q++ = '\\';
223*6ff6d951SJohn Birrell 			*q++ = '\\';
224*6ff6d951SJohn Birrell 			break;
225*6ff6d951SJohn Birrell 		case ' ':
226*6ff6d951SJohn Birrell 			*q++ = c;
227*6ff6d951SJohn Birrell 			break;
228*6ff6d951SJohn Birrell 		default:
229*6ff6d951SJohn Birrell 			if (c < '!' || c > '~') {
230*6ff6d951SJohn Birrell 				*q++ = '\\';
231*6ff6d951SJohn Birrell 				*q++ = ((c >> 6) & 3) + '0';
232*6ff6d951SJohn Birrell 				*q++ = ((c >> 3) & 7) + '0';
233*6ff6d951SJohn Birrell 				*q++ = (c & 7) + '0';
234*6ff6d951SJohn Birrell 			} else
235*6ff6d951SJohn Birrell 				*q++ = c;
236*6ff6d951SJohn Birrell 		}
237*6ff6d951SJohn Birrell 
238*6ff6d951SJohn Birrell 		if (c == '\0')
239*6ff6d951SJohn Birrell 			break; /* don't continue past \0 even if p < s + n */
240*6ff6d951SJohn Birrell 	}
241*6ff6d951SJohn Birrell 
242*6ff6d951SJohn Birrell 	*q = '\0';
243*6ff6d951SJohn Birrell 	return (s2);
244*6ff6d951SJohn Birrell }
245*6ff6d951SJohn Birrell 
246*6ff6d951SJohn Birrell /*
247*6ff6d951SJohn Birrell  * Return the basename (name after final /) of the given string.  We use
248*6ff6d951SJohn Birrell  * strbasename rather than basename to avoid conflicting with libgen.h's
249*6ff6d951SJohn Birrell  * non-const function prototype.
250*6ff6d951SJohn Birrell  */
251*6ff6d951SJohn Birrell const char *
252*6ff6d951SJohn Birrell strbasename(const char *s)
253*6ff6d951SJohn Birrell {
254*6ff6d951SJohn Birrell 	const char *p = strrchr(s, '/');
255*6ff6d951SJohn Birrell 
256*6ff6d951SJohn Birrell 	if (p == NULL)
257*6ff6d951SJohn Birrell 		return (s);
258*6ff6d951SJohn Birrell 
259*6ff6d951SJohn Birrell 	return (++p);
260*6ff6d951SJohn Birrell }
261*6ff6d951SJohn Birrell 
262*6ff6d951SJohn Birrell /*
263*6ff6d951SJohn Birrell  * This function tests a string against the regular expression used for idents
264*6ff6d951SJohn Birrell  * and integers in the D lexer, and should match the superset of RGX_IDENT and
265*6ff6d951SJohn Birrell  * RGX_INT in dt_lex.l.  If an invalid character is found, the function returns
266*6ff6d951SJohn Birrell  * a pointer to it.  Otherwise NULL is returned for a valid string.
267*6ff6d951SJohn Birrell  */
268*6ff6d951SJohn Birrell const char *
269*6ff6d951SJohn Birrell strbadidnum(const char *s)
270*6ff6d951SJohn Birrell {
271*6ff6d951SJohn Birrell 	char *p;
272*6ff6d951SJohn Birrell 	int c;
273*6ff6d951SJohn Birrell 
274*6ff6d951SJohn Birrell 	if (*s == '\0')
275*6ff6d951SJohn Birrell 		return (s);
276*6ff6d951SJohn Birrell 
277*6ff6d951SJohn Birrell 	errno = 0;
278*6ff6d951SJohn Birrell 	(void) strtoull(s, &p, 0);
279*6ff6d951SJohn Birrell 
280*6ff6d951SJohn Birrell 	if (errno == 0 && *p == '\0')
281*6ff6d951SJohn Birrell 		return (NULL); /* matches RGX_INT */
282*6ff6d951SJohn Birrell 
283*6ff6d951SJohn Birrell 	while ((c = *s++) != '\0') {
284*6ff6d951SJohn Birrell 		if (isalnum(c) == 0 && c != '_' && c != '`')
285*6ff6d951SJohn Birrell 			return (s - 1);
286*6ff6d951SJohn Birrell 	}
287*6ff6d951SJohn Birrell 
288*6ff6d951SJohn Birrell 	return (NULL); /* matches RGX_IDENT */
289*6ff6d951SJohn Birrell }
290*6ff6d951SJohn Birrell 
291*6ff6d951SJohn Birrell /*
292*6ff6d951SJohn Birrell  * Determine whether the string contains a glob matching pattern or is just a
293*6ff6d951SJohn Birrell  * simple string.  See gmatch(3GEN) and sh(1) for the glob syntax definition.
294*6ff6d951SJohn Birrell  */
295*6ff6d951SJohn Birrell int
296*6ff6d951SJohn Birrell strisglob(const char *s)
297*6ff6d951SJohn Birrell {
298*6ff6d951SJohn Birrell 	char c;
299*6ff6d951SJohn Birrell 
300*6ff6d951SJohn Birrell 	while ((c = *s++) != '\0') {
301*6ff6d951SJohn Birrell 		if (c == '[' || c == '?' || c == '*' || c == '\\')
302*6ff6d951SJohn Birrell 			return (1);
303*6ff6d951SJohn Birrell 	}
304*6ff6d951SJohn Birrell 
305*6ff6d951SJohn Birrell 	return (0);
306*6ff6d951SJohn Birrell }
307*6ff6d951SJohn Birrell 
308*6ff6d951SJohn Birrell /*
309*6ff6d951SJohn Birrell  * Hyphenate a string in-place by converting any instances of "__" to "-",
310*6ff6d951SJohn Birrell  * which we use for probe names to improve readability, and return the string.
311*6ff6d951SJohn Birrell  */
312*6ff6d951SJohn Birrell char *
313*6ff6d951SJohn Birrell strhyphenate(char *s)
314*6ff6d951SJohn Birrell {
315*6ff6d951SJohn Birrell 	char *p, *q;
316*6ff6d951SJohn Birrell 
317*6ff6d951SJohn Birrell 	for (p = s, q = p + strlen(p); p < q; p++) {
318*6ff6d951SJohn Birrell 		if (p[0] == '_' && p[1] == '_') {
319*6ff6d951SJohn Birrell 			p[0] = '-';
320*6ff6d951SJohn Birrell 			bcopy(p + 2, p + 1, (size_t)(q - p) - 1);
321*6ff6d951SJohn Birrell 		}
322*6ff6d951SJohn Birrell 	}
323*6ff6d951SJohn Birrell 
324*6ff6d951SJohn Birrell 	return (s);
325*6ff6d951SJohn Birrell }
326