xref: /onnv-gate/usr/src/cmd/sgs/libconv/common/c_literal.c (revision 11734:d29dc9c2b6c5)
16635Sab196087 /*
26635Sab196087  * CDDL HEADER START
36635Sab196087  *
46635Sab196087  * The contents of this file are subject to the terms of the
56635Sab196087  * Common Development and Distribution License (the "License").
66635Sab196087  * You may not use this file except in compliance with the License.
76635Sab196087  *
86635Sab196087  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
96635Sab196087  * or http://www.opensolaris.org/os/licensing.
106635Sab196087  * See the License for the specific language governing permissions
116635Sab196087  * and limitations under the License.
126635Sab196087  *
136635Sab196087  * When distributing Covered Code, include this CDDL HEADER in each
146635Sab196087  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
156635Sab196087  * If applicable, add the following below this CDDL HEADER, with the
166635Sab196087  * fields enclosed by brackets "[]" replaced with your own identifying
176635Sab196087  * information: Portions Copyright [yyyy] [name of copyright owner]
186635Sab196087  *
196635Sab196087  * CDDL HEADER END
206635Sab196087  */
216635Sab196087 
226635Sab196087 /*
23*11734SAli.Bahrami@Sun.COM  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
246635Sab196087  * Use is subject to license terms.
256635Sab196087  */
266635Sab196087 
276635Sab196087 
286635Sab196087 /*
296635Sab196087  * Translate a string into C literal string constant notation.
306635Sab196087  */
316635Sab196087 
326635Sab196087 #include	<stdio.h>
336635Sab196087 #include	<ctype.h>
346635Sab196087 #include	<_conv.h>
356635Sab196087 #include	<c_literal_msg.h>
366635Sab196087 
376635Sab196087 
386635Sab196087 /*
396635Sab196087  * Convert characters to the form used by the C language to represent
406635Sab196087  * literal strings:
416635Sab196087  *	- Printable characters are shown as themselves
426635Sab196087  *	- Convert special characters to their 2-character escaped forms:
436635Sab196087  *		alert (bell)	\a
446635Sab196087  *		backspace	\b
456635Sab196087  *		formfeed	\f
466635Sab196087  *		newline		\n
476635Sab196087  *		return		\r
486635Sab196087  *		horizontal tab	\t
496635Sab196087  *		vertical tab	\v
506635Sab196087  *		backspace	\\
516635Sab196087  *		single quote	\'
526635Sab196087  *		double quote	\"
536635Sab196087  *	- Display other non-printable characters as 4-character escaped
546635Sab196087  *		octal constants.
556635Sab196087  *
566635Sab196087  * entry:
576635Sab196087  *	buf - Buffer of characters to be processed
586635Sab196087  *	n # of characters in buf to be processed
596635Sab196087  *	outfunc - Function to be called to move output characters.
606635Sab196087  *	uvalue - User value. This argument is passed to outfunc without
616635Sab196087  *		examination. The caller can use it to pass additional
626635Sab196087  *		information required by the callback.
636635Sab196087  *
646635Sab196087  * exit:
656635Sab196087  *	The string has been processed, with the resulting data passed
666635Sab196087  *	to outfunc for processing.
676635Sab196087  */
686635Sab196087 void
conv_str_to_c_literal(const char * buf,size_t n,Conv_str_to_c_literal_func_t * outfunc,void * uvalue)696635Sab196087 conv_str_to_c_literal(const char *buf, size_t n,
706635Sab196087     Conv_str_to_c_literal_func_t *outfunc, void *uvalue)
716635Sab196087 {
726635Sab196087 	char	bs_buf[2];	/* For two-character backslash codes */
736635Sab196087 	char	octal_buf[10];	/* For \000 style octal constants */
746635Sab196087 
756635Sab196087 	bs_buf[0] = '\\';
766635Sab196087 	while (n > 0) {
776635Sab196087 		switch (*buf) {
786635Sab196087 		case '\0':
796635Sab196087 			bs_buf[1] = '0';
806635Sab196087 			break;
816635Sab196087 		case '\a':
826635Sab196087 			bs_buf[1] = 'a';
836635Sab196087 			break;
846635Sab196087 		case '\b':
856635Sab196087 			bs_buf[1] = 'b';
866635Sab196087 			break;
876635Sab196087 		case '\f':
886635Sab196087 			bs_buf[1] = 'f';
896635Sab196087 			break;
906635Sab196087 		case '\n':
916635Sab196087 			bs_buf[1] = 'n';
926635Sab196087 			break;
936635Sab196087 		case '\r':
946635Sab196087 			bs_buf[1] = 'r';
956635Sab196087 			break;
966635Sab196087 		case '\t':
976635Sab196087 			bs_buf[1] = 't';
986635Sab196087 			break;
996635Sab196087 		case '\v':
1006635Sab196087 			bs_buf[1] = 'v';
1016635Sab196087 			break;
1026635Sab196087 		case '\\':
1036635Sab196087 			bs_buf[1] = '\\';
1046635Sab196087 			break;
1056635Sab196087 		case '\'':
1066635Sab196087 			bs_buf[1] = '\'';
1076635Sab196087 			break;
1086635Sab196087 		case '"':
1096635Sab196087 			bs_buf[1] = '"';
1106635Sab196087 			break;
1116635Sab196087 		default:
1126635Sab196087 			bs_buf[1] = '\0';
1136635Sab196087 		}
1146635Sab196087 
1156635Sab196087 		if (bs_buf[1] != '\0') {
1166635Sab196087 			(*outfunc)(bs_buf, 2, uvalue);
1176635Sab196087 			buf++;
1186635Sab196087 			n--;
1196635Sab196087 		} else if (isprint(*buf)) {
1206635Sab196087 			/*
1216635Sab196087 			 * Output the entire sequence of printable
1226635Sab196087 			 * characters in a single shot.
1236635Sab196087 			 */
1246635Sab196087 			const char	*start = buf;
1256635Sab196087 			size_t		outlen = 0;
1266635Sab196087 
1276635Sab196087 			for (start = buf; (n > 0) && isprint(*buf); buf++, n--)
1286635Sab196087 				outlen++;
1296635Sab196087 			(*outfunc)(start, outlen, uvalue);
1306635Sab196087 		} else {
1316635Sab196087 			/* Generic unprintable character: Use octal notation */
1326635Sab196087 			(void) snprintf(octal_buf, sizeof (octal_buf),
1338747SAli.Bahrami@Sun.COM 			    MSG_ORIG(MSG_FMT_OCTCONST), (uchar_t)*buf);
1346635Sab196087 			(*outfunc)(octal_buf, strlen(octal_buf), uvalue);
1356635Sab196087 			buf++;
1366635Sab196087 			n--;
1376635Sab196087 		}
1386635Sab196087 	}
1396635Sab196087 }
140*11734SAli.Bahrami@Sun.COM 
141*11734SAli.Bahrami@Sun.COM /*
142*11734SAli.Bahrami@Sun.COM  * Given the pointer to the character following a '\' character in
143*11734SAli.Bahrami@Sun.COM  * a C style literal, return the ASCII character code it represents,
144*11734SAli.Bahrami@Sun.COM  * and advance the string pointer to the character following the last
145*11734SAli.Bahrami@Sun.COM  * character in the escape sequence.
146*11734SAli.Bahrami@Sun.COM  *
147*11734SAli.Bahrami@Sun.COM  * entry:
148*11734SAli.Bahrami@Sun.COM  *	str - Address of string pointer to first character following
149*11734SAli.Bahrami@Sun.COM  *		the backslash.
150*11734SAli.Bahrami@Sun.COM  *
151*11734SAli.Bahrami@Sun.COM  * exit:
152*11734SAli.Bahrami@Sun.COM  *	If the character is not valid, -1 is returned. Otherwise
153*11734SAli.Bahrami@Sun.COM  *	it returns the ASCII code for the translated character, and
154*11734SAli.Bahrami@Sun.COM  *	*str has been advanced.
155*11734SAli.Bahrami@Sun.COM  */
156*11734SAli.Bahrami@Sun.COM int
conv_translate_c_esc(char ** str)157*11734SAli.Bahrami@Sun.COM conv_translate_c_esc(char **str)
158*11734SAli.Bahrami@Sun.COM {
159*11734SAli.Bahrami@Sun.COM 	char	*s = *str;
160*11734SAli.Bahrami@Sun.COM 	int	ch, i;
161*11734SAli.Bahrami@Sun.COM 
162*11734SAli.Bahrami@Sun.COM 	ch = *s++;
163*11734SAli.Bahrami@Sun.COM 	switch (ch) {
164*11734SAli.Bahrami@Sun.COM 	case 'a':
165*11734SAli.Bahrami@Sun.COM 		ch = '\a';
166*11734SAli.Bahrami@Sun.COM 		break;
167*11734SAli.Bahrami@Sun.COM 	case 'b':
168*11734SAli.Bahrami@Sun.COM 		ch = '\b';
169*11734SAli.Bahrami@Sun.COM 		break;
170*11734SAli.Bahrami@Sun.COM 	case 'f':
171*11734SAli.Bahrami@Sun.COM 		ch = '\f';
172*11734SAli.Bahrami@Sun.COM 		break;
173*11734SAli.Bahrami@Sun.COM 	case 'n':
174*11734SAli.Bahrami@Sun.COM 		ch = '\n';
175*11734SAli.Bahrami@Sun.COM 		break;
176*11734SAli.Bahrami@Sun.COM 	case 'r':
177*11734SAli.Bahrami@Sun.COM 		ch = '\r';
178*11734SAli.Bahrami@Sun.COM 		break;
179*11734SAli.Bahrami@Sun.COM 	case 't':
180*11734SAli.Bahrami@Sun.COM 		ch = '\t';
181*11734SAli.Bahrami@Sun.COM 		break;
182*11734SAli.Bahrami@Sun.COM 	case 'v':
183*11734SAli.Bahrami@Sun.COM 		ch = '\v';
184*11734SAli.Bahrami@Sun.COM 		break;
185*11734SAli.Bahrami@Sun.COM 
186*11734SAli.Bahrami@Sun.COM 	case '0':
187*11734SAli.Bahrami@Sun.COM 	case '1':
188*11734SAli.Bahrami@Sun.COM 	case '2':
189*11734SAli.Bahrami@Sun.COM 	case '3':
190*11734SAli.Bahrami@Sun.COM 	case '4':
191*11734SAli.Bahrami@Sun.COM 	case '5':
192*11734SAli.Bahrami@Sun.COM 	case '6':
193*11734SAli.Bahrami@Sun.COM 	case '7':
194*11734SAli.Bahrami@Sun.COM 		/* Octal constant: There can be up to 3 digits */
195*11734SAli.Bahrami@Sun.COM 		ch -= '0';
196*11734SAli.Bahrami@Sun.COM 		for (i = 0; i < 2; i++) {
197*11734SAli.Bahrami@Sun.COM 			if ((*s < '0') || (*s > '7'))
198*11734SAli.Bahrami@Sun.COM 				break;
199*11734SAli.Bahrami@Sun.COM 			ch = (ch << 3) + (*s++ - '0');
200*11734SAli.Bahrami@Sun.COM 		}
201*11734SAli.Bahrami@Sun.COM 		break;
202*11734SAli.Bahrami@Sun.COM 
203*11734SAli.Bahrami@Sun.COM 	/*
204*11734SAli.Bahrami@Sun.COM 	 * There are some cases where ch already has the desired value.
205*11734SAli.Bahrami@Sun.COM 	 * These cases exist simply to remove the special meaning that
206*11734SAli.Bahrami@Sun.COM 	 * character would otherwise have. We need to match them to
207*11734SAli.Bahrami@Sun.COM 	 * prevent them from falling into the default error case.
208*11734SAli.Bahrami@Sun.COM 	 */
209*11734SAli.Bahrami@Sun.COM 	case '\\':
210*11734SAli.Bahrami@Sun.COM 	case '\'':
211*11734SAli.Bahrami@Sun.COM 	case '"':
212*11734SAli.Bahrami@Sun.COM 		break;
213*11734SAli.Bahrami@Sun.COM 
214*11734SAli.Bahrami@Sun.COM 	default:
215*11734SAli.Bahrami@Sun.COM 		ch = -1;
216*11734SAli.Bahrami@Sun.COM 		break;
217*11734SAli.Bahrami@Sun.COM 	}
218*11734SAli.Bahrami@Sun.COM 
219*11734SAli.Bahrami@Sun.COM 	*str = s;
220*11734SAli.Bahrami@Sun.COM 	return (ch);
221*11734SAli.Bahrami@Sun.COM }
222