xref: /netbsd-src/external/cddl/osnet/dist/lib/libdtrace/common/dt_string.c (revision c0855460da148f19acba9384f3b92685f0354376)
1a864dc36Sdarran /*
2a864dc36Sdarran  * CDDL HEADER START
3a864dc36Sdarran  *
4a864dc36Sdarran  * The contents of this file are subject to the terms of the
5*c0855460Schristos  * Common Development and Distribution License (the "License").
6*c0855460Schristos  * You may not use this file except in compliance with the License.
7a864dc36Sdarran  *
8a864dc36Sdarran  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9a864dc36Sdarran  * or http://www.opensolaris.org/os/licensing.
10a864dc36Sdarran  * See the License for the specific language governing permissions
11a864dc36Sdarran  * and limitations under the License.
12a864dc36Sdarran  *
13a864dc36Sdarran  * When distributing Covered Code, include this CDDL HEADER in each
14a864dc36Sdarran  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15a864dc36Sdarran  * If applicable, add the following below this CDDL HEADER, with the
16a864dc36Sdarran  * fields enclosed by brackets "[]" replaced with your own identifying
17a864dc36Sdarran  * information: Portions Copyright [yyyy] [name of copyright owner]
18a864dc36Sdarran  *
19a864dc36Sdarran  * CDDL HEADER END
20a864dc36Sdarran  */
21a864dc36Sdarran 
22*c0855460Schristos /*
23*c0855460Schristos  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24*c0855460Schristos  */
25a864dc36Sdarran 
26a864dc36Sdarran #include <strings.h>
27a864dc36Sdarran #include <stdlib.h>
28a864dc36Sdarran #include <errno.h>
29a864dc36Sdarran #include <ctype.h>
30a864dc36Sdarran 
31a864dc36Sdarran #include <dt_string.h>
32a864dc36Sdarran 
33a864dc36Sdarran /*
34a864dc36Sdarran  * Transform string s inline, converting each embedded C escape sequence string
35a864dc36Sdarran  * to the corresponding character.  For example, the substring "\n" is replaced
36a864dc36Sdarran  * by an inline '\n' character.  The length of the resulting string is returned.
37a864dc36Sdarran  */
38a864dc36Sdarran size_t
stresc2chr(char * s)39a864dc36Sdarran stresc2chr(char *s)
40a864dc36Sdarran {
41a864dc36Sdarran 	char *p, *q, c;
42a864dc36Sdarran 	int esc = 0;
43a864dc36Sdarran 	int x;
44a864dc36Sdarran 
45a864dc36Sdarran 	for (p = q = s; (c = *p) != '\0'; p++) {
46a864dc36Sdarran 		if (esc) {
47a864dc36Sdarran 			switch (c) {
48a864dc36Sdarran 			case '0':
49a864dc36Sdarran 			case '1':
50a864dc36Sdarran 			case '2':
51a864dc36Sdarran 			case '3':
52a864dc36Sdarran 			case '4':
53a864dc36Sdarran 			case '5':
54a864dc36Sdarran 			case '6':
55a864dc36Sdarran 			case '7':
56a864dc36Sdarran 				c -= '0';
57a864dc36Sdarran 				p++;
58a864dc36Sdarran 
59a864dc36Sdarran 				if (*p >= '0' && *p <= '7') {
60a864dc36Sdarran 					c = c * 8 + *p++ - '0';
61a864dc36Sdarran 
62a864dc36Sdarran 					if (*p >= '0' && *p <= '7')
63a864dc36Sdarran 						c = c * 8 + *p - '0';
64a864dc36Sdarran 					else
65a864dc36Sdarran 						p--;
66a864dc36Sdarran 				} else
67a864dc36Sdarran 					p--;
68a864dc36Sdarran 
69a864dc36Sdarran 				*q++ = c;
70a864dc36Sdarran 				break;
71a864dc36Sdarran 
72a864dc36Sdarran 			case 'a':
73a864dc36Sdarran 				*q++ = '\a';
74a864dc36Sdarran 				break;
75a864dc36Sdarran 			case 'b':
76a864dc36Sdarran 				*q++ = '\b';
77a864dc36Sdarran 				break;
78a864dc36Sdarran 			case 'f':
79a864dc36Sdarran 				*q++ = '\f';
80a864dc36Sdarran 				break;
81a864dc36Sdarran 			case 'n':
82a864dc36Sdarran 				*q++ = '\n';
83a864dc36Sdarran 				break;
84a864dc36Sdarran 			case 'r':
85a864dc36Sdarran 				*q++ = '\r';
86a864dc36Sdarran 				break;
87a864dc36Sdarran 			case 't':
88a864dc36Sdarran 				*q++ = '\t';
89a864dc36Sdarran 				break;
90a864dc36Sdarran 			case 'v':
91a864dc36Sdarran 				*q++ = '\v';
92a864dc36Sdarran 				break;
93a864dc36Sdarran 
94a864dc36Sdarran 			case 'x':
95a864dc36Sdarran 				for (x = 0; (c = *++p) != '\0'; ) {
96a864dc36Sdarran 					if (c >= '0' && c <= '9')
97a864dc36Sdarran 						x = x * 16 + c - '0';
98a864dc36Sdarran 					else if (c >= 'a' && c <= 'f')
99a864dc36Sdarran 						x = x * 16 + c - 'a' + 10;
100a864dc36Sdarran 					else if (c >= 'A' && c <= 'F')
101a864dc36Sdarran 						x = x * 16 + c - 'A' + 10;
102a864dc36Sdarran 					else
103a864dc36Sdarran 						break;
104a864dc36Sdarran 				}
105a864dc36Sdarran 				*q++ = (char)x;
106a864dc36Sdarran 				p--;
107a864dc36Sdarran 				break;
108a864dc36Sdarran 
109a864dc36Sdarran 			case '"':
110a864dc36Sdarran 			case '\\':
111a864dc36Sdarran 				*q++ = c;
112a864dc36Sdarran 				break;
113a864dc36Sdarran 			default:
114a864dc36Sdarran 				*q++ = '\\';
115a864dc36Sdarran 				*q++ = c;
116a864dc36Sdarran 			}
117a864dc36Sdarran 
118a864dc36Sdarran 			esc = 0;
119a864dc36Sdarran 
120a864dc36Sdarran 		} else {
121a864dc36Sdarran 			if ((esc = c == '\\') == 0)
122a864dc36Sdarran 				*q++ = c;
123a864dc36Sdarran 		}
124a864dc36Sdarran 	}
125a864dc36Sdarran 
126a864dc36Sdarran 	*q = '\0';
127a864dc36Sdarran 	return ((size_t)(q - s));
128a864dc36Sdarran }
129a864dc36Sdarran 
130a864dc36Sdarran /*
131a864dc36Sdarran  * Create a copy of string s in which certain unprintable or special characters
132a864dc36Sdarran  * have been converted to the string representation of their C escape sequence.
133a864dc36Sdarran  * For example, the newline character is expanded to the string "\n".
134a864dc36Sdarran  */
135a864dc36Sdarran char *
strchr2esc(const char * s,size_t n)136a864dc36Sdarran strchr2esc(const char *s, size_t n)
137a864dc36Sdarran {
138a864dc36Sdarran 	const char *p;
139a864dc36Sdarran 	char *q, *s2, c;
140a864dc36Sdarran 	size_t addl = 0;
141a864dc36Sdarran 
142a864dc36Sdarran 	for (p = s; p < s + n; p++) {
143a864dc36Sdarran 		switch (c = *p) {
144a864dc36Sdarran 		case '\0':
145a864dc36Sdarran 		case '\a':
146a864dc36Sdarran 		case '\b':
147a864dc36Sdarran 		case '\f':
148a864dc36Sdarran 		case '\n':
149a864dc36Sdarran 		case '\r':
150a864dc36Sdarran 		case '\t':
151a864dc36Sdarran 		case '\v':
152a864dc36Sdarran 		case '"':
153a864dc36Sdarran 		case '\\':
154a864dc36Sdarran 			addl++;		/* 1 add'l char needed to follow \ */
155a864dc36Sdarran 			break;
156a864dc36Sdarran 		case ' ':
157a864dc36Sdarran 			break;
158a864dc36Sdarran 		default:
159a864dc36Sdarran 			if (c < '!' || c > '~')
160a864dc36Sdarran 				addl += 3; /* 3 add'l chars following \ */
161a864dc36Sdarran 		}
162a864dc36Sdarran 	}
163a864dc36Sdarran 
164a864dc36Sdarran 	if ((s2 = malloc(n + addl + 1)) == NULL)
165a864dc36Sdarran 		return (NULL);
166a864dc36Sdarran 
167a864dc36Sdarran 	for (p = s, q = s2; p < s + n; p++) {
168a864dc36Sdarran 		switch (c = *p) {
169a864dc36Sdarran 		case '\0':
170a864dc36Sdarran 			*q++ = '\\';
171a864dc36Sdarran 			*q++ = '0';
172a864dc36Sdarran 			break;
173a864dc36Sdarran 		case '\a':
174a864dc36Sdarran 			*q++ = '\\';
175a864dc36Sdarran 			*q++ = 'a';
176a864dc36Sdarran 			break;
177a864dc36Sdarran 		case '\b':
178a864dc36Sdarran 			*q++ = '\\';
179a864dc36Sdarran 			*q++ = 'b';
180a864dc36Sdarran 			break;
181a864dc36Sdarran 		case '\f':
182a864dc36Sdarran 			*q++ = '\\';
183a864dc36Sdarran 			*q++ = 'f';
184a864dc36Sdarran 			break;
185a864dc36Sdarran 		case '\n':
186a864dc36Sdarran 			*q++ = '\\';
187a864dc36Sdarran 			*q++ = 'n';
188a864dc36Sdarran 			break;
189a864dc36Sdarran 		case '\r':
190a864dc36Sdarran 			*q++ = '\\';
191a864dc36Sdarran 			*q++ = 'r';
192a864dc36Sdarran 			break;
193a864dc36Sdarran 		case '\t':
194a864dc36Sdarran 			*q++ = '\\';
195a864dc36Sdarran 			*q++ = 't';
196a864dc36Sdarran 			break;
197a864dc36Sdarran 		case '\v':
198a864dc36Sdarran 			*q++ = '\\';
199a864dc36Sdarran 			*q++ = 'v';
200a864dc36Sdarran 			break;
201a864dc36Sdarran 		case '"':
202a864dc36Sdarran 			*q++ = '\\';
203a864dc36Sdarran 			*q++ = '"';
204a864dc36Sdarran 			break;
205a864dc36Sdarran 		case '\\':
206a864dc36Sdarran 			*q++ = '\\';
207a864dc36Sdarran 			*q++ = '\\';
208a864dc36Sdarran 			break;
209a864dc36Sdarran 		case ' ':
210a864dc36Sdarran 			*q++ = c;
211a864dc36Sdarran 			break;
212a864dc36Sdarran 		default:
213a864dc36Sdarran 			if (c < '!' || c > '~') {
214a864dc36Sdarran 				*q++ = '\\';
215a864dc36Sdarran 				*q++ = ((c >> 6) & 3) + '0';
216a864dc36Sdarran 				*q++ = ((c >> 3) & 7) + '0';
217a864dc36Sdarran 				*q++ = (c & 7) + '0';
218a864dc36Sdarran 			} else
219a864dc36Sdarran 				*q++ = c;
220a864dc36Sdarran 		}
221a864dc36Sdarran 
222a864dc36Sdarran 		if (c == '\0')
223a864dc36Sdarran 			break; /* don't continue past \0 even if p < s + n */
224a864dc36Sdarran 	}
225a864dc36Sdarran 
226a864dc36Sdarran 	*q = '\0';
227a864dc36Sdarran 	return (s2);
228a864dc36Sdarran }
229a864dc36Sdarran 
230a864dc36Sdarran /*
231a864dc36Sdarran  * Return the basename (name after final /) of the given string.  We use
232a864dc36Sdarran  * strbasename rather than basename to avoid conflicting with libgen.h's
233a864dc36Sdarran  * non-const function prototype.
234a864dc36Sdarran  */
235a864dc36Sdarran const char *
strbasename(const char * s)236a864dc36Sdarran strbasename(const char *s)
237a864dc36Sdarran {
238a864dc36Sdarran 	const char *p = strrchr(s, '/');
239a864dc36Sdarran 
240a864dc36Sdarran 	if (p == NULL)
241a864dc36Sdarran 		return (s);
242a864dc36Sdarran 
243a864dc36Sdarran 	return (++p);
244a864dc36Sdarran }
245a864dc36Sdarran 
246a864dc36Sdarran /*
247a864dc36Sdarran  * This function tests a string against the regular expression used for idents
248a864dc36Sdarran  * and integers in the D lexer, and should match the superset of RGX_IDENT and
249a864dc36Sdarran  * RGX_INT in dt_lex.l.  If an invalid character is found, the function returns
250a864dc36Sdarran  * a pointer to it.  Otherwise NULL is returned for a valid string.
251a864dc36Sdarran  */
252a864dc36Sdarran const char *
strbadidnum(const char * s)253a864dc36Sdarran strbadidnum(const char *s)
254a864dc36Sdarran {
255a864dc36Sdarran 	char *p;
256a864dc36Sdarran 	int c;
257a864dc36Sdarran 
258a864dc36Sdarran 	if (*s == '\0')
259a864dc36Sdarran 		return (s);
260a864dc36Sdarran 
261a864dc36Sdarran 	errno = 0;
262a864dc36Sdarran 	(void) strtoull(s, &p, 0);
263a864dc36Sdarran 
264a864dc36Sdarran 	if (errno == 0 && *p == '\0')
265a864dc36Sdarran 		return (NULL); /* matches RGX_INT */
266a864dc36Sdarran 
267a864dc36Sdarran 	while ((c = *s++) != '\0') {
268a864dc36Sdarran 		if (isalnum(c) == 0 && c != '_' && c != '`')
269a864dc36Sdarran 			return (s - 1);
270a864dc36Sdarran 	}
271a864dc36Sdarran 
272a864dc36Sdarran 	return (NULL); /* matches RGX_IDENT */
273a864dc36Sdarran }
274a864dc36Sdarran 
275a864dc36Sdarran /*
276a864dc36Sdarran  * Determine whether the string contains a glob matching pattern or is just a
277a864dc36Sdarran  * simple string.  See gmatch(3GEN) and sh(1) for the glob syntax definition.
278a864dc36Sdarran  */
279a864dc36Sdarran int
strisglob(const char * s)280a864dc36Sdarran strisglob(const char *s)
281a864dc36Sdarran {
282a864dc36Sdarran 	char c;
283a864dc36Sdarran 
284a864dc36Sdarran 	while ((c = *s++) != '\0') {
285a864dc36Sdarran 		if (c == '[' || c == '?' || c == '*' || c == '\\')
286a864dc36Sdarran 			return (1);
287a864dc36Sdarran 	}
288a864dc36Sdarran 
289a864dc36Sdarran 	return (0);
290a864dc36Sdarran }
291a864dc36Sdarran 
292a864dc36Sdarran /*
293a864dc36Sdarran  * Hyphenate a string in-place by converting any instances of "__" to "-",
294a864dc36Sdarran  * which we use for probe names to improve readability, and return the string.
295a864dc36Sdarran  */
296a864dc36Sdarran char *
strhyphenate(char * s)297a864dc36Sdarran strhyphenate(char *s)
298a864dc36Sdarran {
299a864dc36Sdarran 	char *p, *q;
300a864dc36Sdarran 
301a864dc36Sdarran 	for (p = s, q = p + strlen(p); p < q; p++) {
302a864dc36Sdarran 		if (p[0] == '_' && p[1] == '_') {
303a864dc36Sdarran 			p[0] = '-';
304a864dc36Sdarran 			bcopy(p + 2, p + 1, (size_t)(q - p) - 1);
305a864dc36Sdarran 		}
306a864dc36Sdarran 	}
307a864dc36Sdarran 
308a864dc36Sdarran 	return (s);
309a864dc36Sdarran }
310