168d75effSDimitry Andric //===-- string_utils.cpp ----------------------------------------*- C++ -*-===// 268d75effSDimitry Andric // 368d75effSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 468d75effSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 568d75effSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 668d75effSDimitry Andric // 768d75effSDimitry Andric //===----------------------------------------------------------------------===// 868d75effSDimitry Andric 968d75effSDimitry Andric #include "string_utils.h" 1068d75effSDimitry Andric #include "common.h" 1168d75effSDimitry Andric 1268d75effSDimitry Andric #include <stdarg.h> 1368d75effSDimitry Andric #include <string.h> 1468d75effSDimitry Andric 1568d75effSDimitry Andric namespace scudo { 1668d75effSDimitry Andric 1768d75effSDimitry Andric static int appendChar(char **Buffer, const char *BufferEnd, char C) { 1868d75effSDimitry Andric if (*Buffer < BufferEnd) { 1968d75effSDimitry Andric **Buffer = C; 2068d75effSDimitry Andric (*Buffer)++; 2168d75effSDimitry Andric } 2268d75effSDimitry Andric return 1; 2368d75effSDimitry Andric } 2468d75effSDimitry Andric 2568d75effSDimitry Andric // Appends number in a given Base to buffer. If its length is less than 2668d75effSDimitry Andric // |MinNumberLength|, it is padded with leading zeroes or spaces, depending 2768d75effSDimitry Andric // on the value of |PadWithZero|. 2868d75effSDimitry Andric static int appendNumber(char **Buffer, const char *BufferEnd, u64 AbsoluteValue, 2968d75effSDimitry Andric u8 Base, u8 MinNumberLength, bool PadWithZero, 3068d75effSDimitry Andric bool Negative, bool Upper) { 3168d75effSDimitry Andric constexpr uptr MaxLen = 30; 3268d75effSDimitry Andric RAW_CHECK(Base == 10 || Base == 16); 3368d75effSDimitry Andric RAW_CHECK(Base == 10 || !Negative); 3468d75effSDimitry Andric RAW_CHECK(AbsoluteValue || !Negative); 3568d75effSDimitry Andric RAW_CHECK(MinNumberLength < MaxLen); 3668d75effSDimitry Andric int Res = 0; 3768d75effSDimitry Andric if (Negative && MinNumberLength) 3868d75effSDimitry Andric --MinNumberLength; 3968d75effSDimitry Andric if (Negative && PadWithZero) 4068d75effSDimitry Andric Res += appendChar(Buffer, BufferEnd, '-'); 4168d75effSDimitry Andric uptr NumBuffer[MaxLen]; 4268d75effSDimitry Andric int Pos = 0; 4368d75effSDimitry Andric do { 4468d75effSDimitry Andric RAW_CHECK_MSG(static_cast<uptr>(Pos) < MaxLen, 4568d75effSDimitry Andric "appendNumber buffer overflow"); 4668d75effSDimitry Andric NumBuffer[Pos++] = static_cast<uptr>(AbsoluteValue % Base); 4768d75effSDimitry Andric AbsoluteValue /= Base; 4868d75effSDimitry Andric } while (AbsoluteValue > 0); 4968d75effSDimitry Andric if (Pos < MinNumberLength) { 5068d75effSDimitry Andric memset(&NumBuffer[Pos], 0, 5168d75effSDimitry Andric sizeof(NumBuffer[0]) * static_cast<uptr>(MinNumberLength - Pos)); 5268d75effSDimitry Andric Pos = MinNumberLength; 5368d75effSDimitry Andric } 5468d75effSDimitry Andric RAW_CHECK(Pos > 0); 5568d75effSDimitry Andric Pos--; 5668d75effSDimitry Andric for (; Pos >= 0 && NumBuffer[Pos] == 0; Pos--) { 5768d75effSDimitry Andric char c = (PadWithZero || Pos == 0) ? '0' : ' '; 5868d75effSDimitry Andric Res += appendChar(Buffer, BufferEnd, c); 5968d75effSDimitry Andric } 6068d75effSDimitry Andric if (Negative && !PadWithZero) 6168d75effSDimitry Andric Res += appendChar(Buffer, BufferEnd, '-'); 6268d75effSDimitry Andric for (; Pos >= 0; Pos--) { 6368d75effSDimitry Andric char Digit = static_cast<char>(NumBuffer[Pos]); 6468d75effSDimitry Andric Digit = static_cast<char>((Digit < 10) ? '0' + Digit 6568d75effSDimitry Andric : (Upper ? 'A' : 'a') + Digit - 10); 6668d75effSDimitry Andric Res += appendChar(Buffer, BufferEnd, Digit); 6768d75effSDimitry Andric } 6868d75effSDimitry Andric return Res; 6968d75effSDimitry Andric } 7068d75effSDimitry Andric 7168d75effSDimitry Andric static int appendUnsigned(char **Buffer, const char *BufferEnd, u64 Num, 7268d75effSDimitry Andric u8 Base, u8 MinNumberLength, bool PadWithZero, 7368d75effSDimitry Andric bool Upper) { 7468d75effSDimitry Andric return appendNumber(Buffer, BufferEnd, Num, Base, MinNumberLength, 7568d75effSDimitry Andric PadWithZero, /*Negative=*/false, Upper); 7668d75effSDimitry Andric } 7768d75effSDimitry Andric 7868d75effSDimitry Andric static int appendSignedDecimal(char **Buffer, const char *BufferEnd, s64 Num, 7968d75effSDimitry Andric u8 MinNumberLength, bool PadWithZero) { 8068d75effSDimitry Andric const bool Negative = (Num < 0); 81e8d8bef9SDimitry Andric const u64 UnsignedNum = (Num == INT64_MIN) 82e8d8bef9SDimitry Andric ? static_cast<u64>(INT64_MAX) + 1 83e8d8bef9SDimitry Andric : static_cast<u64>(Negative ? -Num : Num); 84e8d8bef9SDimitry Andric return appendNumber(Buffer, BufferEnd, UnsignedNum, 10, MinNumberLength, 85e8d8bef9SDimitry Andric PadWithZero, Negative, /*Upper=*/false); 8668d75effSDimitry Andric } 8768d75effSDimitry Andric 8868d75effSDimitry Andric // Use the fact that explicitly requesting 0 Width (%0s) results in UB and 8968d75effSDimitry Andric // interpret Width == 0 as "no Width requested": 9068d75effSDimitry Andric // Width == 0 - no Width requested 9168d75effSDimitry Andric // Width < 0 - left-justify S within and pad it to -Width chars, if necessary 9268d75effSDimitry Andric // Width > 0 - right-justify S, not implemented yet 9368d75effSDimitry Andric static int appendString(char **Buffer, const char *BufferEnd, int Width, 9468d75effSDimitry Andric int MaxChars, const char *S) { 9568d75effSDimitry Andric if (!S) 9668d75effSDimitry Andric S = "<null>"; 9768d75effSDimitry Andric int Res = 0; 9868d75effSDimitry Andric for (; *S; S++) { 9968d75effSDimitry Andric if (MaxChars >= 0 && Res >= MaxChars) 10068d75effSDimitry Andric break; 10168d75effSDimitry Andric Res += appendChar(Buffer, BufferEnd, *S); 10268d75effSDimitry Andric } 10368d75effSDimitry Andric // Only the left justified strings are supported. 10468d75effSDimitry Andric while (Width < -Res) 10568d75effSDimitry Andric Res += appendChar(Buffer, BufferEnd, ' '); 10668d75effSDimitry Andric return Res; 10768d75effSDimitry Andric } 10868d75effSDimitry Andric 10968d75effSDimitry Andric static int appendPointer(char **Buffer, const char *BufferEnd, u64 ptr_value) { 11068d75effSDimitry Andric int Res = 0; 11168d75effSDimitry Andric Res += appendString(Buffer, BufferEnd, 0, -1, "0x"); 11268d75effSDimitry Andric Res += appendUnsigned(Buffer, BufferEnd, ptr_value, 16, 11368d75effSDimitry Andric SCUDO_POINTER_FORMAT_LENGTH, /*PadWithZero=*/true, 11468d75effSDimitry Andric /*Upper=*/false); 11568d75effSDimitry Andric return Res; 11668d75effSDimitry Andric } 11768d75effSDimitry Andric 118fe6060f1SDimitry Andric static int formatString(char *Buffer, uptr BufferLength, const char *Format, 11968d75effSDimitry Andric va_list Args) { 12068d75effSDimitry Andric static const char *PrintfFormatsHelp = 12168d75effSDimitry Andric "Supported formatString formats: %([0-9]*)?(z|ll)?{d,u,x,X}; %p; " 12268d75effSDimitry Andric "%[-]([0-9]*)?(\\.\\*)?s; %c\n"; 12368d75effSDimitry Andric RAW_CHECK(Format); 12468d75effSDimitry Andric RAW_CHECK(BufferLength > 0); 12568d75effSDimitry Andric const char *BufferEnd = &Buffer[BufferLength - 1]; 12668d75effSDimitry Andric const char *Cur = Format; 12768d75effSDimitry Andric int Res = 0; 12868d75effSDimitry Andric for (; *Cur; Cur++) { 12968d75effSDimitry Andric if (*Cur != '%') { 13068d75effSDimitry Andric Res += appendChar(&Buffer, BufferEnd, *Cur); 13168d75effSDimitry Andric continue; 13268d75effSDimitry Andric } 13368d75effSDimitry Andric Cur++; 13468d75effSDimitry Andric const bool LeftJustified = *Cur == '-'; 13568d75effSDimitry Andric if (LeftJustified) 13668d75effSDimitry Andric Cur++; 13768d75effSDimitry Andric bool HaveWidth = (*Cur >= '0' && *Cur <= '9'); 13868d75effSDimitry Andric const bool PadWithZero = (*Cur == '0'); 13968d75effSDimitry Andric u8 Width = 0; 14068d75effSDimitry Andric if (HaveWidth) { 14168d75effSDimitry Andric while (*Cur >= '0' && *Cur <= '9') 14268d75effSDimitry Andric Width = static_cast<u8>(Width * 10 + *Cur++ - '0'); 14368d75effSDimitry Andric } 14468d75effSDimitry Andric const bool HavePrecision = (Cur[0] == '.' && Cur[1] == '*'); 14568d75effSDimitry Andric int Precision = -1; 14668d75effSDimitry Andric if (HavePrecision) { 14768d75effSDimitry Andric Cur += 2; 14868d75effSDimitry Andric Precision = va_arg(Args, int); 14968d75effSDimitry Andric } 15068d75effSDimitry Andric const bool HaveZ = (*Cur == 'z'); 15168d75effSDimitry Andric Cur += HaveZ; 15268d75effSDimitry Andric const bool HaveLL = !HaveZ && (Cur[0] == 'l' && Cur[1] == 'l'); 15368d75effSDimitry Andric Cur += HaveLL * 2; 15468d75effSDimitry Andric s64 DVal; 15568d75effSDimitry Andric u64 UVal; 15668d75effSDimitry Andric const bool HaveLength = HaveZ || HaveLL; 15768d75effSDimitry Andric const bool HaveFlags = HaveWidth || HaveLength; 15868d75effSDimitry Andric // At the moment only %s supports precision and left-justification. 15968d75effSDimitry Andric CHECK(!((Precision >= 0 || LeftJustified) && *Cur != 's')); 16068d75effSDimitry Andric switch (*Cur) { 16168d75effSDimitry Andric case 'd': { 16268d75effSDimitry Andric DVal = HaveLL ? va_arg(Args, s64) 163e8d8bef9SDimitry Andric : HaveZ ? va_arg(Args, sptr) 164e8d8bef9SDimitry Andric : va_arg(Args, int); 16568d75effSDimitry Andric Res += appendSignedDecimal(&Buffer, BufferEnd, DVal, Width, PadWithZero); 16668d75effSDimitry Andric break; 16768d75effSDimitry Andric } 16868d75effSDimitry Andric case 'u': 16968d75effSDimitry Andric case 'x': 17068d75effSDimitry Andric case 'X': { 17168d75effSDimitry Andric UVal = HaveLL ? va_arg(Args, u64) 172e8d8bef9SDimitry Andric : HaveZ ? va_arg(Args, uptr) 173e8d8bef9SDimitry Andric : va_arg(Args, unsigned); 17468d75effSDimitry Andric const bool Upper = (*Cur == 'X'); 17568d75effSDimitry Andric Res += appendUnsigned(&Buffer, BufferEnd, UVal, (*Cur == 'u') ? 10 : 16, 17668d75effSDimitry Andric Width, PadWithZero, Upper); 17768d75effSDimitry Andric break; 17868d75effSDimitry Andric } 17968d75effSDimitry Andric case 'p': { 18068d75effSDimitry Andric RAW_CHECK_MSG(!HaveFlags, PrintfFormatsHelp); 18168d75effSDimitry Andric Res += appendPointer(&Buffer, BufferEnd, va_arg(Args, uptr)); 18268d75effSDimitry Andric break; 18368d75effSDimitry Andric } 18468d75effSDimitry Andric case 's': { 18568d75effSDimitry Andric RAW_CHECK_MSG(!HaveLength, PrintfFormatsHelp); 18668d75effSDimitry Andric // Only left-justified Width is supported. 18768d75effSDimitry Andric CHECK(!HaveWidth || LeftJustified); 18868d75effSDimitry Andric Res += appendString(&Buffer, BufferEnd, LeftJustified ? -Width : Width, 18968d75effSDimitry Andric Precision, va_arg(Args, char *)); 19068d75effSDimitry Andric break; 19168d75effSDimitry Andric } 19268d75effSDimitry Andric case 'c': { 19368d75effSDimitry Andric RAW_CHECK_MSG(!HaveFlags, PrintfFormatsHelp); 19468d75effSDimitry Andric Res += 19568d75effSDimitry Andric appendChar(&Buffer, BufferEnd, static_cast<char>(va_arg(Args, int))); 19668d75effSDimitry Andric break; 19768d75effSDimitry Andric } 198*06c3fb27SDimitry Andric // In Scudo, `s64`/`u64` are supposed to use `lld` and `llu` respectively. 199*06c3fb27SDimitry Andric // However, `-Wformat` doesn't know we have a different parser for those 200*06c3fb27SDimitry Andric // placeholders and it keeps complaining the type mismatch on 64-bit 201*06c3fb27SDimitry Andric // platform which uses `ld`/`lu` for `s64`/`u64`. Therefore, in order to 202*06c3fb27SDimitry Andric // silence the warning, we turn to use `PRId64`/`PRIu64` for printing 203*06c3fb27SDimitry Andric // `s64`/`u64` and handle the `ld`/`lu` here. 204*06c3fb27SDimitry Andric case 'l': { 205*06c3fb27SDimitry Andric ++Cur; 206*06c3fb27SDimitry Andric RAW_CHECK(*Cur == 'd' || *Cur == 'u'); 207*06c3fb27SDimitry Andric 208*06c3fb27SDimitry Andric if (*Cur == 'd') { 209*06c3fb27SDimitry Andric DVal = va_arg(Args, s64); 210*06c3fb27SDimitry Andric Res += 211*06c3fb27SDimitry Andric appendSignedDecimal(&Buffer, BufferEnd, DVal, Width, PadWithZero); 212*06c3fb27SDimitry Andric } else { 213*06c3fb27SDimitry Andric UVal = va_arg(Args, u64); 214*06c3fb27SDimitry Andric Res += appendUnsigned(&Buffer, BufferEnd, UVal, 10, Width, PadWithZero, 215*06c3fb27SDimitry Andric false); 216*06c3fb27SDimitry Andric } 217*06c3fb27SDimitry Andric 218*06c3fb27SDimitry Andric break; 219*06c3fb27SDimitry Andric } 22068d75effSDimitry Andric case '%': { 22168d75effSDimitry Andric RAW_CHECK_MSG(!HaveFlags, PrintfFormatsHelp); 22268d75effSDimitry Andric Res += appendChar(&Buffer, BufferEnd, '%'); 22368d75effSDimitry Andric break; 22468d75effSDimitry Andric } 22568d75effSDimitry Andric default: { 22668d75effSDimitry Andric RAW_CHECK_MSG(false, PrintfFormatsHelp); 22768d75effSDimitry Andric } 22868d75effSDimitry Andric } 22968d75effSDimitry Andric } 23068d75effSDimitry Andric RAW_CHECK(Buffer <= BufferEnd); 23168d75effSDimitry Andric appendChar(&Buffer, BufferEnd + 1, '\0'); 23268d75effSDimitry Andric return Res; 23368d75effSDimitry Andric } 23468d75effSDimitry Andric 235fe6060f1SDimitry Andric int formatString(char *Buffer, uptr BufferLength, const char *Format, ...) { 236fe6060f1SDimitry Andric va_list Args; 237fe6060f1SDimitry Andric va_start(Args, Format); 238fe6060f1SDimitry Andric int Res = formatString(Buffer, BufferLength, Format, Args); 239fe6060f1SDimitry Andric va_end(Args); 240fe6060f1SDimitry Andric return Res; 241fe6060f1SDimitry Andric } 242fe6060f1SDimitry Andric 243*06c3fb27SDimitry Andric void ScopedString::vappend(const char *Format, va_list Args) { 24468d75effSDimitry Andric va_list ArgsCopy; 24568d75effSDimitry Andric va_copy(ArgsCopy, Args); 24668d75effSDimitry Andric // formatString doesn't currently support a null buffer or zero buffer length, 24768d75effSDimitry Andric // so in order to get the resulting formatted string length, we use a one-char 24868d75effSDimitry Andric // buffer. 24968d75effSDimitry Andric char C[1]; 25068d75effSDimitry Andric const uptr AdditionalLength = 25168d75effSDimitry Andric static_cast<uptr>(formatString(C, sizeof(C), Format, Args)) + 1; 252fe6060f1SDimitry Andric const uptr Length = length(); 25368d75effSDimitry Andric String.resize(Length + AdditionalLength); 254fe6060f1SDimitry Andric const uptr FormattedLength = static_cast<uptr>(formatString( 255fe6060f1SDimitry Andric String.data() + Length, String.size() - Length, Format, ArgsCopy)); 256fe6060f1SDimitry Andric RAW_CHECK(data()[length()] == '\0'); 257fe6060f1SDimitry Andric RAW_CHECK(FormattedLength + 1 == AdditionalLength); 258e8d8bef9SDimitry Andric va_end(ArgsCopy); 25968d75effSDimitry Andric } 26068d75effSDimitry Andric 26168d75effSDimitry Andric void ScopedString::append(const char *Format, ...) { 26268d75effSDimitry Andric va_list Args; 26368d75effSDimitry Andric va_start(Args, Format); 264*06c3fb27SDimitry Andric vappend(Format, Args); 26568d75effSDimitry Andric va_end(Args); 26668d75effSDimitry Andric } 26768d75effSDimitry Andric 26868d75effSDimitry Andric void Printf(const char *Format, ...) { 26968d75effSDimitry Andric va_list Args; 27068d75effSDimitry Andric va_start(Args, Format); 271fe6060f1SDimitry Andric ScopedString Msg; 272*06c3fb27SDimitry Andric Msg.vappend(Format, Args); 27368d75effSDimitry Andric outputRaw(Msg.data()); 27468d75effSDimitry Andric va_end(Args); 27568d75effSDimitry Andric } 27668d75effSDimitry Andric 27768d75effSDimitry Andric } // namespace scudo 278