xref: /netbsd-src/external/gpl3/gcc/dist/libcody/buffer.cc (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1 // CODYlib		-*- mode:c++ -*-
2 // Copyright (C) 2020 Nathan Sidwell, nathan@acm.org
3 // License: Apache v2.0
4 
5 // Cody
6 #include "internal.hh"
7 // C++
8 #include <algorithm>
9 // C
10 #include <cstring>
11 // OS
12 #include <unistd.h>
13 #include <cerrno>
14 
15 // MessageBuffer code
16 
17 // Lines consist of words and end with a NEWLINE (0xa) char
18 // Whitespace characters are TAB (0x9) and SPACE (0x20)
19 // Words consist of non-whitespace chars separated by whitespace.
20 // Multiple lines in one transaction are indicated by ending non-final
21 // lines with a SEMICOLON (0x3b) word, immediately before the NEWLINE
22 // Continuations with ; preceding it
23 // Words matching regexp [-+_/%.a-zA-Z0-9]+ need no quoting.
24 // Quoting with '...'
25 // Anything outside of [-+_/%.a-zA-Z0-9] needs quoting
26 // Anything outside of <= <space> or DEL or \' or \\ needs escaping.
27 // Escapes are \\, \', \n, \t, \_, everything else as \<hex><hex>?
28 // Spaces separate words, UTF8 encoding for non-ascii chars
29 
30 namespace Cody {
31 namespace Detail {
32 
33 static const char CONTINUE = S2C(u8";");
34 
BeginLine()35 void MessageBuffer::BeginLine ()
36 {
37   if (!buffer.empty ())
38     {
39       // Terminate the previous line with a continuation
40       buffer.reserve (buffer.size () + 3);
41       buffer.push_back (S2C(u8" "));
42       buffer.push_back (CONTINUE);
43       buffer.push_back (S2C(u8"\n"));
44     }
45   lastBol = buffer.size ();
46 }
47 
48 // QUOTE means 'maybe quote', we search it for quote-needing chars
49 
Append(char const * str,bool quote,size_t len)50 void MessageBuffer::Append (char const *str, bool quote, size_t len)
51 {
52   if (len == ~size_t (0))
53     len = strlen (str);
54 
55   if (!len && !quote)
56     return;
57 
58   // We want to quote characters outside of [-+_A-Za-z0-9/%.], anything
59   // that could remotely be shell-active.  UTF8 encoding for non-ascii.
60   if (quote && len)
61     {
62       quote = false;
63       // Scan looking for quote-needing characters.  We could just
64       // append until we find one, but that's probably confusing
65       for (size_t ix = len; ix--;)
66 	{
67 	  unsigned char c = (unsigned char)str[ix];
68 	  if (!((c >= S2C(u8"a") && c <= S2C(u8"z"))
69 		|| (c >= S2C(u8"A") && c <= S2C(u8"Z"))
70 		|| (c >= S2C(u8"0") && c <= S2C(u8"9"))
71 		|| c == S2C(u8"-") || c == S2C(u8"+") || c == S2C(u8"_")
72 		|| c == S2C(u8"/") || c == S2C(u8"%") || c == S2C(u8".")))
73 	    {
74 	      quote = true;
75 	      break;
76 	    }
77 	}
78     }
79 
80   // Maximal length of appended string
81   buffer.reserve (buffer.size () + len * (quote ? 3 : 1) + 2);
82 
83   if (quote)
84     buffer.push_back (S2C(u8"'"));
85 
86   for (auto *end = str + len; str != end;)
87     {
88       auto *e = end;
89 
90       if (quote)
91 	// Look for next escape-needing char.  More relaxed than
92 	// the earlier needs-quoting check.
93 	for (e = str; e != end; ++e)
94 	  {
95 	    unsigned char c = (unsigned char)*e;
96 	    if (c < S2C(u8" ") || c == 0x7f
97 		|| c == S2C(u8"\\") || c == S2C(u8"'"))
98 	      break;
99 	  }
100       buffer.insert (buffer.end (), str, e);
101       str = e;
102 
103       if (str == end)
104 	break;
105 
106       buffer.push_back (S2C(u8"\\"));
107       switch (unsigned char c = (unsigned char)*str++)
108 	{
109 	case S2C(u8"\t"):
110 	  c = S2C(u8"t");
111 	  goto append;
112 
113 	case S2C(u8"\n"):
114 	  c = S2C(u8"n");
115 	  goto append;
116 
117 	case S2C(u8"'"):
118 	case S2C(u8"\\"):
119 	append:
120 	  buffer.push_back (c);
121 	  break;
122 
123 	default:
124 	  // Full-on escape.  Use 2 lower-case hex chars
125 	  for (unsigned shift = 8; shift;)
126 	    {
127 	      shift -= 4;
128 
129 	      char nibble = (c >> shift) & 0xf;
130 	      nibble += S2C(u8"0");
131 	      if (nibble > S2C(u8"9"))
132 		nibble += S2C(u8"a") - (S2C(u8"9") + 1);
133 	      buffer.push_back (nibble);
134 	    }
135 	}
136     }
137 
138   if (quote)
139     buffer.push_back (S2C(u8"'"));
140 }
141 
Append(char c)142 void MessageBuffer::Append (char c)
143 {
144   buffer.push_back (c);
145 }
146 
AppendInteger(unsigned u)147 void MessageBuffer::AppendInteger (unsigned u)
148 {
149   // Sigh, even though std::to_string is C++11, we support building on
150   // gcc 4.8, which is a C++11 compiler lacking std::to_string.  so
151   // have something horrible.
152   std::string v (20, 0);
153   size_t len = snprintf (const_cast<char *> (v.data ()), v.size (), "%u", u);
154   v.erase (len);
155 
156   AppendWord (v);
157 }
158 
Write(int fd)159 int MessageBuffer::Write (int fd) noexcept
160 {
161   size_t limit = buffer.size () - lastBol;
162   ssize_t count = write (fd, &buffer.data ()[lastBol], limit);
163 
164   int err = 0;
165   if (count < 0)
166     err = errno;
167   else
168     {
169       lastBol += count;
170       if (size_t (count) != limit)
171 	err = EAGAIN;
172     }
173 
174   if (err != EAGAIN && err != EINTR)
175     {
176       // Reset for next message
177       buffer.clear ();
178       lastBol = 0;
179     }
180 
181   return err;
182 }
183 
Read(int fd)184 int MessageBuffer::Read (int fd) noexcept
185 {
186   constexpr size_t blockSize = 200;
187 
188   size_t lwm = buffer.size ();
189   size_t hwm = buffer.capacity ();
190   if (hwm - lwm < blockSize / 2)
191     hwm += blockSize;
192   buffer.resize (hwm);
193 
194   auto iter = buffer.begin () + lwm;
195   ssize_t count = read (fd, &*iter, hwm - lwm);
196   buffer.resize (lwm + (count >= 0 ? count : 0));
197 
198   if (count < 0)
199     return errno;
200 
201   if (!count)
202     // End of file
203     return -1;
204 
205   bool more = true;
206   for (;;)
207     {
208       auto newline = std::find (iter, buffer.end (), S2C(u8"\n"));
209       if (newline == buffer.end ())
210 	break;
211       more = newline != buffer.begin () && newline[-1] == CONTINUE;
212       iter = newline + 1;
213 
214       if (iter == buffer.end ())
215 	break;
216 
217       if (!more)
218 	{
219 	  // There is no continuation, but there are chars after the
220 	  // newline.  Truncate the buffer and return an error
221 	  buffer.resize (iter - buffer.begin ());
222 	  return EINVAL;
223 	}
224     }
225 
226   return more ? EAGAIN : 0;
227 }
228 
Lex(std::vector<std::string> & result)229 int MessageBuffer::Lex (std::vector<std::string> &result)
230 {
231   result.clear ();
232 
233   if (IsAtEnd ())
234     return ENOENT;
235 
236   Assert (buffer.back () == S2C(u8"\n"));
237 
238   auto iter = buffer.begin () + lastBol;
239 
240   for (std::string *word = nullptr;;)
241     {
242       char c = *iter;
243 
244       ++iter;
245       if (c == S2C(u8" ") || c == S2C(u8"\t"))
246 	{
247 	  word = nullptr;
248 	  continue;
249 	}
250 
251       if (c == S2C(u8"\n"))
252 	break;
253 
254       if (c == CONTINUE)
255 	{
256 	  // Line continuation
257 	  if (word || *iter != S2C(u8"\n"))
258 	    goto malformed;
259 	  ++iter;
260 	  break;
261 	}
262 
263       if (c <= S2C(u8" ") || c >= 0x7f)
264 	goto malformed;
265 
266       if (!word)
267 	{
268 	  result.emplace_back ();
269 	  word = &result.back ();
270 	}
271 
272       if (c == S2C(u8"'"))
273 	{
274 	  // Quoted word
275 	  for (;;)
276 	    {
277 	      c = *iter;
278 
279 	      if (c == S2C(u8"\n"))
280 		{
281 		malformed:;
282 		  result.clear ();
283 		  iter = std::find (iter, buffer.end (), S2C(u8"\n"));
284 		  auto back = iter;
285 		  if (back[-1] == CONTINUE  && back[-2] == S2C(u8" "))
286 		    // Smells like a line continuation
287 		    back -= 2;
288 		  result.emplace_back (&buffer[lastBol],
289 				       back - buffer.begin () - lastBol);
290 		  ++iter;
291 		  lastBol = iter - buffer.begin ();
292 		  return EINVAL;
293 		}
294 
295 	      if (c < S2C(u8" ") || c >= 0x7f)
296 		goto malformed;
297 
298 	      ++iter;
299 	      if (c == S2C(u8"'"))
300 		break;
301 
302 	      if (c == S2C(u8"\\"))
303 		// escape
304 		switch (c = *iter)
305 		  {
306 		    case S2C(u8"\\"):
307 		    case S2C(u8"'"):
308 		      ++iter;
309 		      break;
310 
311 		    case S2C(u8"n"):
312 		      c = S2C(u8"\n");
313 		      ++iter;
314 		      break;
315 
316 		    case S2C(u8"_"):
317 		      // We used to escape SPACE as \_, so accept that
318 		      c = S2C(u8" ");
319 		      ++iter;
320 		      break;
321 
322 		    case S2C(u8"t"):
323 		      c = S2C(u8"\t");
324 		      ++iter;
325 		      break;
326 
327 		    default:
328 		      {
329 			unsigned v = 0;
330 			for (unsigned nibble = 0; nibble != 2; nibble++)
331 			  {
332 			    c = *iter;
333 			    if (c < S2C(u8"0"))
334 			      {
335 				if (!nibble)
336 				  goto malformed;
337 				break;
338 			      }
339 			    else if (c <= S2C(u8"9"))
340 			      c -= S2C(u8"0");
341 			    else if (c < S2C(u8"a"))
342 			      {
343 				if (!nibble)
344 				  goto malformed;
345 				break;
346 			      }
347 			    else if (c <= S2C(u8"f"))
348 			      c -= S2C(u8"a") - 10;
349 			    else
350 			      {
351 				if (!nibble)
352 				  goto malformed;
353 				break;
354 			      }
355 			    ++iter;
356 			    v = (v << 4) | c;
357 			  }
358 			c = v;
359 		      }
360 		  }
361 	      word->push_back (c);
362 	    }
363 	}
364       else
365 	// Unquoted character
366 	word->push_back (c);
367     }
368   lastBol = iter - buffer.begin ();
369   if (result.empty ())
370     return ENOENT;
371 
372   return 0;
373 }
374 
LexedLine(std::string & str)375 void MessageBuffer::LexedLine (std::string &str)
376 {
377   if (lastBol)
378     {
379       size_t pos = lastBol - 1;
380       for (; pos; pos--)
381 	if (buffer[pos-1] == S2C(u8"\n"))
382 	  break;
383 
384       size_t end = lastBol - 1;
385       if (buffer[end-1] == CONTINUE && buffer[end-2] == S2C(u8" "))
386 	// Strip line continuation
387 	end -= 2;
388       str.append (&buffer[pos], end - pos);
389     }
390 }
391 } // Detail
392 } // Cody
393