1 //===-- String to integer conversion utils ----------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 // ----------------------------------------------------------------------------- 10 // **** WARNING **** 11 // This file is shared with libc++. You should also be careful when adding 12 // dependencies to this file, since it needs to build for all libc++ targets. 13 // ----------------------------------------------------------------------------- 14 15 #ifndef LLVM_LIBC_SRC___SUPPORT_STR_TO_INTEGER_H 16 #define LLVM_LIBC_SRC___SUPPORT_STR_TO_INTEGER_H 17 18 #include "src/__support/CPP/limits.h" 19 #include "src/__support/CPP/type_traits.h" 20 #include "src/__support/CPP/type_traits/make_unsigned.h" 21 #include "src/__support/big_int.h" 22 #include "src/__support/common.h" 23 #include "src/__support/ctype_utils.h" 24 #include "src/__support/macros/config.h" 25 #include "src/__support/str_to_num_result.h" 26 #include "src/__support/uint128.h" 27 #include "src/errno/libc_errno.h" // For ERANGE 28 29 namespace LIBC_NAMESPACE_DECL { 30 namespace internal { 31 32 // Returns a pointer to the first character in src that is not a whitespace 33 // character (as determined by isspace()) 34 // TODO: Change from returning a pointer to returning a length. 35 LIBC_INLINE const char * 36 first_non_whitespace(const char *__restrict src, 37 size_t src_len = cpp::numeric_limits<size_t>::max()) { 38 size_t src_cur = 0; 39 while (src_cur < src_len && internal::isspace(src[src_cur])) { 40 ++src_cur; 41 } 42 return src + src_cur; 43 } 44 45 // checks if the next 3 characters of the string pointer are the start of a 46 // hexadecimal number. Does not advance the string pointer. 47 LIBC_INLINE bool 48 is_hex_start(const char *__restrict src, 49 size_t src_len = cpp::numeric_limits<size_t>::max()) { 50 if (src_len < 3) 51 return false; 52 return *src == '0' && tolower(*(src + 1)) == 'x' && isalnum(*(src + 2)) && 53 b36_char_to_int(*(src + 2)) < 16; 54 } 55 56 // Takes the address of the string pointer and parses the base from the start of 57 // it. 58 LIBC_INLINE int infer_base(const char *__restrict src, size_t src_len) { 59 // A hexadecimal number is defined as "the prefix 0x or 0X followed by a 60 // sequence of the decimal digits and the letters a (or A) through f (or F) 61 // with values 10 through 15 respectively." (C standard 6.4.4.1) 62 if (is_hex_start(src, src_len)) 63 return 16; 64 // An octal number is defined as "the prefix 0 optionally followed by a 65 // sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any 66 // number that starts with 0, including just 0, is an octal number. 67 if (src_len > 0 && src[0] == '0') 68 return 8; 69 // A decimal number is defined as beginning "with a nonzero digit and 70 // consist[ing] of a sequence of decimal digits." (C standard 6.4.4.1) 71 return 10; 72 } 73 74 // ----------------------------------------------------------------------------- 75 // **** WARNING **** 76 // This interface is shared with libc++, if you change this interface you need 77 // to update it in both libc and libc++. 78 // ----------------------------------------------------------------------------- 79 // Takes a pointer to a string and the base to convert to. This function is used 80 // as the backend for all of the string to int functions. 81 template <class T> 82 LIBC_INLINE StrToNumResult<T> 83 strtointeger(const char *__restrict src, int base, 84 const size_t src_len = cpp::numeric_limits<size_t>::max()) { 85 using ResultType = make_integral_or_big_int_unsigned_t<T>; 86 87 ResultType result = 0; 88 89 bool is_number = false; 90 size_t src_cur = 0; 91 int error_val = 0; 92 93 if (src_len == 0) 94 return {0, 0, 0}; 95 96 if (base < 0 || base == 1 || base > 36) 97 return {0, 0, EINVAL}; 98 99 src_cur = first_non_whitespace(src, src_len) - src; 100 101 char result_sign = '+'; 102 if (src[src_cur] == '+' || src[src_cur] == '-') { 103 result_sign = src[src_cur]; 104 ++src_cur; 105 } 106 107 if (base == 0) 108 base = infer_base(src + src_cur, src_len - src_cur); 109 110 if (base == 16 && is_hex_start(src + src_cur, src_len - src_cur)) 111 src_cur = src_cur + 2; 112 113 constexpr bool IS_UNSIGNED = cpp::is_unsigned_v<T>; 114 const bool is_positive = (result_sign == '+'); 115 116 ResultType constexpr NEGATIVE_MAX = 117 !IS_UNSIGNED ? static_cast<ResultType>(cpp::numeric_limits<T>::max()) + 1 118 : cpp::numeric_limits<T>::max(); 119 ResultType const abs_max = 120 (is_positive ? cpp::numeric_limits<T>::max() : NEGATIVE_MAX); 121 ResultType const abs_max_div_by_base = 122 static_cast<ResultType>(abs_max / base); 123 124 while (src_cur < src_len && isalnum(src[src_cur])) { 125 int cur_digit = b36_char_to_int(src[src_cur]); 126 if (cur_digit >= base) 127 break; 128 129 is_number = true; 130 ++src_cur; 131 132 // If the number has already hit the maximum value for the current type then 133 // the result cannot change, but we still need to advance src to the end of 134 // the number. 135 if (result == abs_max) { 136 error_val = ERANGE; 137 continue; 138 } 139 140 if (result > abs_max_div_by_base) { 141 result = abs_max; 142 error_val = ERANGE; 143 } else { 144 result = static_cast<ResultType>(result * base); 145 } 146 if (result > abs_max - cur_digit) { 147 result = abs_max; 148 error_val = ERANGE; 149 } else { 150 result = static_cast<ResultType>(result + cur_digit); 151 } 152 } 153 154 ptrdiff_t str_len = is_number ? (src_cur) : 0; 155 156 if (error_val == ERANGE) { 157 if (is_positive || IS_UNSIGNED) 158 return {cpp::numeric_limits<T>::max(), str_len, error_val}; 159 else // T is signed and there is a negative overflow 160 return {cpp::numeric_limits<T>::min(), str_len, error_val}; 161 } 162 163 return {static_cast<T>(is_positive ? result : -result), str_len, error_val}; 164 } 165 166 } // namespace internal 167 } // namespace LIBC_NAMESPACE_DECL 168 169 #endif // LLVM_LIBC_SRC___SUPPORT_STR_TO_INTEGER_H 170