xref: /freebsd-src/contrib/llvm-project/compiler-rt/lib/scudo/standalone/string_utils.cpp (revision 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e)
168d75effSDimitry Andric //===-- string_utils.cpp ----------------------------------------*- C++ -*-===//
268d75effSDimitry Andric //
368d75effSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
468d75effSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
568d75effSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
668d75effSDimitry Andric //
768d75effSDimitry Andric //===----------------------------------------------------------------------===//
868d75effSDimitry Andric 
968d75effSDimitry Andric #include "string_utils.h"
1068d75effSDimitry Andric #include "common.h"
1168d75effSDimitry Andric 
1268d75effSDimitry Andric #include <stdarg.h>
1368d75effSDimitry Andric #include <string.h>
1468d75effSDimitry Andric 
1568d75effSDimitry Andric namespace scudo {
1668d75effSDimitry Andric 
1768d75effSDimitry Andric static int appendChar(char **Buffer, const char *BufferEnd, char C) {
1868d75effSDimitry Andric   if (*Buffer < BufferEnd) {
1968d75effSDimitry Andric     **Buffer = C;
2068d75effSDimitry Andric     (*Buffer)++;
2168d75effSDimitry Andric   }
2268d75effSDimitry Andric   return 1;
2368d75effSDimitry Andric }
2468d75effSDimitry Andric 
2568d75effSDimitry Andric // Appends number in a given Base to buffer. If its length is less than
2668d75effSDimitry Andric // |MinNumberLength|, it is padded with leading zeroes or spaces, depending
2768d75effSDimitry Andric // on the value of |PadWithZero|.
2868d75effSDimitry Andric static int appendNumber(char **Buffer, const char *BufferEnd, u64 AbsoluteValue,
2968d75effSDimitry Andric                         u8 Base, u8 MinNumberLength, bool PadWithZero,
3068d75effSDimitry Andric                         bool Negative, bool Upper) {
3168d75effSDimitry Andric   constexpr uptr MaxLen = 30;
3268d75effSDimitry Andric   RAW_CHECK(Base == 10 || Base == 16);
3368d75effSDimitry Andric   RAW_CHECK(Base == 10 || !Negative);
3468d75effSDimitry Andric   RAW_CHECK(AbsoluteValue || !Negative);
3568d75effSDimitry Andric   RAW_CHECK(MinNumberLength < MaxLen);
3668d75effSDimitry Andric   int Res = 0;
3768d75effSDimitry Andric   if (Negative && MinNumberLength)
3868d75effSDimitry Andric     --MinNumberLength;
3968d75effSDimitry Andric   if (Negative && PadWithZero)
4068d75effSDimitry Andric     Res += appendChar(Buffer, BufferEnd, '-');
4168d75effSDimitry Andric   uptr NumBuffer[MaxLen];
4268d75effSDimitry Andric   int Pos = 0;
4368d75effSDimitry Andric   do {
4468d75effSDimitry Andric     RAW_CHECK_MSG(static_cast<uptr>(Pos) < MaxLen,
4568d75effSDimitry Andric                   "appendNumber buffer overflow");
4668d75effSDimitry Andric     NumBuffer[Pos++] = static_cast<uptr>(AbsoluteValue % Base);
4768d75effSDimitry Andric     AbsoluteValue /= Base;
4868d75effSDimitry Andric   } while (AbsoluteValue > 0);
4968d75effSDimitry Andric   if (Pos < MinNumberLength) {
5068d75effSDimitry Andric     memset(&NumBuffer[Pos], 0,
5168d75effSDimitry Andric            sizeof(NumBuffer[0]) * static_cast<uptr>(MinNumberLength - Pos));
5268d75effSDimitry Andric     Pos = MinNumberLength;
5368d75effSDimitry Andric   }
5468d75effSDimitry Andric   RAW_CHECK(Pos > 0);
5568d75effSDimitry Andric   Pos--;
5668d75effSDimitry Andric   for (; Pos >= 0 && NumBuffer[Pos] == 0; Pos--) {
5768d75effSDimitry Andric     char c = (PadWithZero || Pos == 0) ? '0' : ' ';
5868d75effSDimitry Andric     Res += appendChar(Buffer, BufferEnd, c);
5968d75effSDimitry Andric   }
6068d75effSDimitry Andric   if (Negative && !PadWithZero)
6168d75effSDimitry Andric     Res += appendChar(Buffer, BufferEnd, '-');
6268d75effSDimitry Andric   for (; Pos >= 0; Pos--) {
6368d75effSDimitry Andric     char Digit = static_cast<char>(NumBuffer[Pos]);
6468d75effSDimitry Andric     Digit = static_cast<char>((Digit < 10) ? '0' + Digit
6568d75effSDimitry Andric                                            : (Upper ? 'A' : 'a') + Digit - 10);
6668d75effSDimitry Andric     Res += appendChar(Buffer, BufferEnd, Digit);
6768d75effSDimitry Andric   }
6868d75effSDimitry Andric   return Res;
6968d75effSDimitry Andric }
7068d75effSDimitry Andric 
7168d75effSDimitry Andric static int appendUnsigned(char **Buffer, const char *BufferEnd, u64 Num,
7268d75effSDimitry Andric                           u8 Base, u8 MinNumberLength, bool PadWithZero,
7368d75effSDimitry Andric                           bool Upper) {
7468d75effSDimitry Andric   return appendNumber(Buffer, BufferEnd, Num, Base, MinNumberLength,
7568d75effSDimitry Andric                       PadWithZero, /*Negative=*/false, Upper);
7668d75effSDimitry Andric }
7768d75effSDimitry Andric 
7868d75effSDimitry Andric static int appendSignedDecimal(char **Buffer, const char *BufferEnd, s64 Num,
7968d75effSDimitry Andric                                u8 MinNumberLength, bool PadWithZero) {
8068d75effSDimitry Andric   const bool Negative = (Num < 0);
81e8d8bef9SDimitry Andric   const u64 UnsignedNum = (Num == INT64_MIN)
82e8d8bef9SDimitry Andric                               ? static_cast<u64>(INT64_MAX) + 1
83e8d8bef9SDimitry Andric                               : static_cast<u64>(Negative ? -Num : Num);
84e8d8bef9SDimitry Andric   return appendNumber(Buffer, BufferEnd, UnsignedNum, 10, MinNumberLength,
85e8d8bef9SDimitry Andric                       PadWithZero, Negative, /*Upper=*/false);
8668d75effSDimitry Andric }
8768d75effSDimitry Andric 
8868d75effSDimitry Andric // Use the fact that explicitly requesting 0 Width (%0s) results in UB and
8968d75effSDimitry Andric // interpret Width == 0 as "no Width requested":
9068d75effSDimitry Andric // Width == 0 - no Width requested
9168d75effSDimitry Andric // Width  < 0 - left-justify S within and pad it to -Width chars, if necessary
9268d75effSDimitry Andric // Width  > 0 - right-justify S, not implemented yet
9368d75effSDimitry Andric static int appendString(char **Buffer, const char *BufferEnd, int Width,
9468d75effSDimitry Andric                         int MaxChars, const char *S) {
9568d75effSDimitry Andric   if (!S)
9668d75effSDimitry Andric     S = "<null>";
9768d75effSDimitry Andric   int Res = 0;
9868d75effSDimitry Andric   for (; *S; S++) {
9968d75effSDimitry Andric     if (MaxChars >= 0 && Res >= MaxChars)
10068d75effSDimitry Andric       break;
10168d75effSDimitry Andric     Res += appendChar(Buffer, BufferEnd, *S);
10268d75effSDimitry Andric   }
10368d75effSDimitry Andric   // Only the left justified strings are supported.
10468d75effSDimitry Andric   while (Width < -Res)
10568d75effSDimitry Andric     Res += appendChar(Buffer, BufferEnd, ' ');
10668d75effSDimitry Andric   return Res;
10768d75effSDimitry Andric }
10868d75effSDimitry Andric 
10968d75effSDimitry Andric static int appendPointer(char **Buffer, const char *BufferEnd, u64 ptr_value) {
11068d75effSDimitry Andric   int Res = 0;
11168d75effSDimitry Andric   Res += appendString(Buffer, BufferEnd, 0, -1, "0x");
11268d75effSDimitry Andric   Res += appendUnsigned(Buffer, BufferEnd, ptr_value, 16,
11368d75effSDimitry Andric                         SCUDO_POINTER_FORMAT_LENGTH, /*PadWithZero=*/true,
11468d75effSDimitry Andric                         /*Upper=*/false);
11568d75effSDimitry Andric   return Res;
11668d75effSDimitry Andric }
11768d75effSDimitry Andric 
118fe6060f1SDimitry Andric static int formatString(char *Buffer, uptr BufferLength, const char *Format,
11968d75effSDimitry Andric                         va_list Args) {
12068d75effSDimitry Andric   static const char *PrintfFormatsHelp =
12168d75effSDimitry Andric       "Supported formatString formats: %([0-9]*)?(z|ll)?{d,u,x,X}; %p; "
12268d75effSDimitry Andric       "%[-]([0-9]*)?(\\.\\*)?s; %c\n";
12368d75effSDimitry Andric   RAW_CHECK(Format);
12468d75effSDimitry Andric   RAW_CHECK(BufferLength > 0);
12568d75effSDimitry Andric   const char *BufferEnd = &Buffer[BufferLength - 1];
12668d75effSDimitry Andric   const char *Cur = Format;
12768d75effSDimitry Andric   int Res = 0;
12868d75effSDimitry Andric   for (; *Cur; Cur++) {
12968d75effSDimitry Andric     if (*Cur != '%') {
13068d75effSDimitry Andric       Res += appendChar(&Buffer, BufferEnd, *Cur);
13168d75effSDimitry Andric       continue;
13268d75effSDimitry Andric     }
13368d75effSDimitry Andric     Cur++;
13468d75effSDimitry Andric     const bool LeftJustified = *Cur == '-';
13568d75effSDimitry Andric     if (LeftJustified)
13668d75effSDimitry Andric       Cur++;
13768d75effSDimitry Andric     bool HaveWidth = (*Cur >= '0' && *Cur <= '9');
13868d75effSDimitry Andric     const bool PadWithZero = (*Cur == '0');
13968d75effSDimitry Andric     u8 Width = 0;
14068d75effSDimitry Andric     if (HaveWidth) {
14168d75effSDimitry Andric       while (*Cur >= '0' && *Cur <= '9')
14268d75effSDimitry Andric         Width = static_cast<u8>(Width * 10 + *Cur++ - '0');
14368d75effSDimitry Andric     }
14468d75effSDimitry Andric     const bool HavePrecision = (Cur[0] == '.' && Cur[1] == '*');
14568d75effSDimitry Andric     int Precision = -1;
14668d75effSDimitry Andric     if (HavePrecision) {
14768d75effSDimitry Andric       Cur += 2;
14868d75effSDimitry Andric       Precision = va_arg(Args, int);
14968d75effSDimitry Andric     }
15068d75effSDimitry Andric     const bool HaveZ = (*Cur == 'z');
15168d75effSDimitry Andric     Cur += HaveZ;
15268d75effSDimitry Andric     const bool HaveLL = !HaveZ && (Cur[0] == 'l' && Cur[1] == 'l');
15368d75effSDimitry Andric     Cur += HaveLL * 2;
15468d75effSDimitry Andric     s64 DVal;
15568d75effSDimitry Andric     u64 UVal;
15668d75effSDimitry Andric     const bool HaveLength = HaveZ || HaveLL;
15768d75effSDimitry Andric     const bool HaveFlags = HaveWidth || HaveLength;
15868d75effSDimitry Andric     // At the moment only %s supports precision and left-justification.
15968d75effSDimitry Andric     CHECK(!((Precision >= 0 || LeftJustified) && *Cur != 's'));
16068d75effSDimitry Andric     switch (*Cur) {
16168d75effSDimitry Andric     case 'd': {
16268d75effSDimitry Andric       DVal = HaveLL  ? va_arg(Args, s64)
163e8d8bef9SDimitry Andric              : HaveZ ? va_arg(Args, sptr)
164e8d8bef9SDimitry Andric                      : va_arg(Args, int);
16568d75effSDimitry Andric       Res += appendSignedDecimal(&Buffer, BufferEnd, DVal, Width, PadWithZero);
16668d75effSDimitry Andric       break;
16768d75effSDimitry Andric     }
16868d75effSDimitry Andric     case 'u':
16968d75effSDimitry Andric     case 'x':
17068d75effSDimitry Andric     case 'X': {
17168d75effSDimitry Andric       UVal = HaveLL  ? va_arg(Args, u64)
172e8d8bef9SDimitry Andric              : HaveZ ? va_arg(Args, uptr)
173e8d8bef9SDimitry Andric                      : va_arg(Args, unsigned);
17468d75effSDimitry Andric       const bool Upper = (*Cur == 'X');
17568d75effSDimitry Andric       Res += appendUnsigned(&Buffer, BufferEnd, UVal, (*Cur == 'u') ? 10 : 16,
17668d75effSDimitry Andric                             Width, PadWithZero, Upper);
17768d75effSDimitry Andric       break;
17868d75effSDimitry Andric     }
17968d75effSDimitry Andric     case 'p': {
18068d75effSDimitry Andric       RAW_CHECK_MSG(!HaveFlags, PrintfFormatsHelp);
18168d75effSDimitry Andric       Res += appendPointer(&Buffer, BufferEnd, va_arg(Args, uptr));
18268d75effSDimitry Andric       break;
18368d75effSDimitry Andric     }
18468d75effSDimitry Andric     case 's': {
18568d75effSDimitry Andric       RAW_CHECK_MSG(!HaveLength, PrintfFormatsHelp);
18668d75effSDimitry Andric       // Only left-justified Width is supported.
18768d75effSDimitry Andric       CHECK(!HaveWidth || LeftJustified);
18868d75effSDimitry Andric       Res += appendString(&Buffer, BufferEnd, LeftJustified ? -Width : Width,
18968d75effSDimitry Andric                           Precision, va_arg(Args, char *));
19068d75effSDimitry Andric       break;
19168d75effSDimitry Andric     }
19268d75effSDimitry Andric     case 'c': {
19368d75effSDimitry Andric       RAW_CHECK_MSG(!HaveFlags, PrintfFormatsHelp);
19468d75effSDimitry Andric       Res +=
19568d75effSDimitry Andric           appendChar(&Buffer, BufferEnd, static_cast<char>(va_arg(Args, int)));
19668d75effSDimitry Andric       break;
19768d75effSDimitry Andric     }
198*06c3fb27SDimitry Andric     // In Scudo, `s64`/`u64` are supposed to use `lld` and `llu` respectively.
199*06c3fb27SDimitry Andric     // However, `-Wformat` doesn't know we have a different parser for those
200*06c3fb27SDimitry Andric     // placeholders and it keeps complaining the type mismatch on 64-bit
201*06c3fb27SDimitry Andric     // platform which uses `ld`/`lu` for `s64`/`u64`. Therefore, in order to
202*06c3fb27SDimitry Andric     // silence the warning, we turn to use `PRId64`/`PRIu64` for printing
203*06c3fb27SDimitry Andric     // `s64`/`u64` and handle the `ld`/`lu` here.
204*06c3fb27SDimitry Andric     case 'l': {
205*06c3fb27SDimitry Andric       ++Cur;
206*06c3fb27SDimitry Andric       RAW_CHECK(*Cur == 'd' || *Cur == 'u');
207*06c3fb27SDimitry Andric 
208*06c3fb27SDimitry Andric       if (*Cur == 'd') {
209*06c3fb27SDimitry Andric         DVal = va_arg(Args, s64);
210*06c3fb27SDimitry Andric         Res +=
211*06c3fb27SDimitry Andric             appendSignedDecimal(&Buffer, BufferEnd, DVal, Width, PadWithZero);
212*06c3fb27SDimitry Andric       } else {
213*06c3fb27SDimitry Andric         UVal = va_arg(Args, u64);
214*06c3fb27SDimitry Andric         Res += appendUnsigned(&Buffer, BufferEnd, UVal, 10, Width, PadWithZero,
215*06c3fb27SDimitry Andric                               false);
216*06c3fb27SDimitry Andric       }
217*06c3fb27SDimitry Andric 
218*06c3fb27SDimitry Andric       break;
219*06c3fb27SDimitry Andric     }
22068d75effSDimitry Andric     case '%': {
22168d75effSDimitry Andric       RAW_CHECK_MSG(!HaveFlags, PrintfFormatsHelp);
22268d75effSDimitry Andric       Res += appendChar(&Buffer, BufferEnd, '%');
22368d75effSDimitry Andric       break;
22468d75effSDimitry Andric     }
22568d75effSDimitry Andric     default: {
22668d75effSDimitry Andric       RAW_CHECK_MSG(false, PrintfFormatsHelp);
22768d75effSDimitry Andric     }
22868d75effSDimitry Andric     }
22968d75effSDimitry Andric   }
23068d75effSDimitry Andric   RAW_CHECK(Buffer <= BufferEnd);
23168d75effSDimitry Andric   appendChar(&Buffer, BufferEnd + 1, '\0');
23268d75effSDimitry Andric   return Res;
23368d75effSDimitry Andric }
23468d75effSDimitry Andric 
235fe6060f1SDimitry Andric int formatString(char *Buffer, uptr BufferLength, const char *Format, ...) {
236fe6060f1SDimitry Andric   va_list Args;
237fe6060f1SDimitry Andric   va_start(Args, Format);
238fe6060f1SDimitry Andric   int Res = formatString(Buffer, BufferLength, Format, Args);
239fe6060f1SDimitry Andric   va_end(Args);
240fe6060f1SDimitry Andric   return Res;
241fe6060f1SDimitry Andric }
242fe6060f1SDimitry Andric 
243*06c3fb27SDimitry Andric void ScopedString::vappend(const char *Format, va_list Args) {
24468d75effSDimitry Andric   va_list ArgsCopy;
24568d75effSDimitry Andric   va_copy(ArgsCopy, Args);
24668d75effSDimitry Andric   // formatString doesn't currently support a null buffer or zero buffer length,
24768d75effSDimitry Andric   // so in order to get the resulting formatted string length, we use a one-char
24868d75effSDimitry Andric   // buffer.
24968d75effSDimitry Andric   char C[1];
25068d75effSDimitry Andric   const uptr AdditionalLength =
25168d75effSDimitry Andric       static_cast<uptr>(formatString(C, sizeof(C), Format, Args)) + 1;
252fe6060f1SDimitry Andric   const uptr Length = length();
25368d75effSDimitry Andric   String.resize(Length + AdditionalLength);
254fe6060f1SDimitry Andric   const uptr FormattedLength = static_cast<uptr>(formatString(
255fe6060f1SDimitry Andric       String.data() + Length, String.size() - Length, Format, ArgsCopy));
256fe6060f1SDimitry Andric   RAW_CHECK(data()[length()] == '\0');
257fe6060f1SDimitry Andric   RAW_CHECK(FormattedLength + 1 == AdditionalLength);
258e8d8bef9SDimitry Andric   va_end(ArgsCopy);
25968d75effSDimitry Andric }
26068d75effSDimitry Andric 
26168d75effSDimitry Andric void ScopedString::append(const char *Format, ...) {
26268d75effSDimitry Andric   va_list Args;
26368d75effSDimitry Andric   va_start(Args, Format);
264*06c3fb27SDimitry Andric   vappend(Format, Args);
26568d75effSDimitry Andric   va_end(Args);
26668d75effSDimitry Andric }
26768d75effSDimitry Andric 
26868d75effSDimitry Andric void Printf(const char *Format, ...) {
26968d75effSDimitry Andric   va_list Args;
27068d75effSDimitry Andric   va_start(Args, Format);
271fe6060f1SDimitry Andric   ScopedString Msg;
272*06c3fb27SDimitry Andric   Msg.vappend(Format, Args);
27368d75effSDimitry Andric   outputRaw(Msg.data());
27468d75effSDimitry Andric   va_end(Args);
27568d75effSDimitry Andric }
27668d75effSDimitry Andric 
27768d75effSDimitry Andric } // namespace scudo
278