1 //===---------- emutls.c - Implements __emutls_get_address ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include <stdint.h> 10 #include <stdlib.h> 11 #include <string.h> 12 13 #include "int_lib.h" 14 15 #ifdef __BIONIC__ 16 // There are 4 pthread key cleanup rounds on Bionic. Delay emutls deallocation 17 // to round 2. We need to delay deallocation because: 18 // - Android versions older than M lack __cxa_thread_atexit_impl, so apps 19 // use a pthread key destructor to call C++ destructors. 20 // - Apps might use __thread/thread_local variables in pthread destructors. 21 // We can't wait until the final two rounds, because jemalloc needs two rounds 22 // after the final malloc/free call to free its thread-specific data (see 23 // https://reviews.llvm.org/D46978#1107507). 24 #define EMUTLS_SKIP_DESTRUCTOR_ROUNDS 1 25 #else 26 #define EMUTLS_SKIP_DESTRUCTOR_ROUNDS 0 27 #endif 28 29 typedef struct emutls_address_array { 30 uintptr_t skip_destructor_rounds; 31 uintptr_t size; // number of elements in the 'data' array 32 void *data[]; 33 } emutls_address_array; 34 35 static void emutls_shutdown(emutls_address_array *array); 36 37 #ifndef _WIN32 38 39 #include <pthread.h> 40 41 static pthread_mutex_t emutls_mutex = PTHREAD_MUTEX_INITIALIZER; 42 static pthread_key_t emutls_pthread_key; 43 static bool emutls_key_created = false; 44 45 typedef unsigned int gcc_word __attribute__((mode(word))); 46 typedef unsigned int gcc_pointer __attribute__((mode(pointer))); 47 48 // Default is not to use posix_memalign, so systems like Android 49 // can use thread local data without heavier POSIX memory allocators. 50 #ifndef EMUTLS_USE_POSIX_MEMALIGN 51 #define EMUTLS_USE_POSIX_MEMALIGN 0 52 #endif 53 54 static __inline void *emutls_memalign_alloc(size_t align, size_t size) { 55 void *base; 56 #if EMUTLS_USE_POSIX_MEMALIGN 57 if (posix_memalign(&base, align, size) != 0) 58 abort(); 59 #else 60 #define EXTRA_ALIGN_PTR_BYTES (align - 1 + sizeof(void *)) 61 char *object; 62 if ((object = (char *)malloc(EXTRA_ALIGN_PTR_BYTES + size)) == NULL) 63 abort(); 64 base = (void *)(((uintptr_t)(object + EXTRA_ALIGN_PTR_BYTES)) & 65 ~(uintptr_t)(align - 1)); 66 67 ((void **)base)[-1] = object; 68 #endif 69 return base; 70 } 71 72 static __inline void emutls_memalign_free(void *base) { 73 #if EMUTLS_USE_POSIX_MEMALIGN 74 free(base); 75 #else 76 // The mallocated address is in ((void**)base)[-1] 77 free(((void **)base)[-1]); 78 #endif 79 } 80 81 static __inline void emutls_setspecific(emutls_address_array *value) { 82 pthread_setspecific(emutls_pthread_key, (void *)value); 83 } 84 85 static __inline emutls_address_array *emutls_getspecific() { 86 return (emutls_address_array *)pthread_getspecific(emutls_pthread_key); 87 } 88 89 static void emutls_key_destructor(void *ptr) { 90 emutls_address_array *array = (emutls_address_array *)ptr; 91 if (array->skip_destructor_rounds > 0) { 92 // emutls is deallocated using a pthread key destructor. These 93 // destructors are called in several rounds to accommodate destructor 94 // functions that (re)initialize key values with pthread_setspecific. 95 // Delay the emutls deallocation to accommodate other end-of-thread 96 // cleanup tasks like calling thread_local destructors (e.g. the 97 // __cxa_thread_atexit fallback in libc++abi). 98 array->skip_destructor_rounds--; 99 emutls_setspecific(array); 100 } else { 101 emutls_shutdown(array); 102 free(ptr); 103 } 104 } 105 106 static __inline void emutls_init(void) { 107 if (pthread_key_create(&emutls_pthread_key, emutls_key_destructor) != 0) 108 abort(); 109 emutls_key_created = true; 110 } 111 112 static __inline void emutls_init_once(void) { 113 static pthread_once_t once = PTHREAD_ONCE_INIT; 114 pthread_once(&once, emutls_init); 115 } 116 117 static __inline void emutls_lock() { pthread_mutex_lock(&emutls_mutex); } 118 119 static __inline void emutls_unlock() { pthread_mutex_unlock(&emutls_mutex); } 120 121 #else // _WIN32 122 123 #include <assert.h> 124 #include <malloc.h> 125 #include <stdio.h> 126 #include <windows.h> 127 128 static LPCRITICAL_SECTION emutls_mutex; 129 static DWORD emutls_tls_index = TLS_OUT_OF_INDEXES; 130 131 typedef uintptr_t gcc_word; 132 typedef void *gcc_pointer; 133 134 static void win_error(DWORD last_err, const char *hint) { 135 char *buffer = NULL; 136 if (FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | 137 FORMAT_MESSAGE_FROM_SYSTEM | 138 FORMAT_MESSAGE_MAX_WIDTH_MASK, 139 NULL, last_err, 0, (LPSTR)&buffer, 1, NULL)) { 140 fprintf(stderr, "Windows error: %s\n", buffer); 141 } else { 142 fprintf(stderr, "Unkown Windows error: %s\n", hint); 143 } 144 LocalFree(buffer); 145 } 146 147 static __inline void win_abort(DWORD last_err, const char *hint) { 148 win_error(last_err, hint); 149 abort(); 150 } 151 152 static __inline void *emutls_memalign_alloc(size_t align, size_t size) { 153 void *base = _aligned_malloc(size, align); 154 if (!base) 155 win_abort(GetLastError(), "_aligned_malloc"); 156 return base; 157 } 158 159 static __inline void emutls_memalign_free(void *base) { _aligned_free(base); } 160 161 static void emutls_exit(void) { 162 if (emutls_mutex) { 163 DeleteCriticalSection(emutls_mutex); 164 _aligned_free(emutls_mutex); 165 emutls_mutex = NULL; 166 } 167 if (emutls_tls_index != TLS_OUT_OF_INDEXES) { 168 emutls_shutdown((emutls_address_array *)TlsGetValue(emutls_tls_index)); 169 TlsFree(emutls_tls_index); 170 emutls_tls_index = TLS_OUT_OF_INDEXES; 171 } 172 } 173 174 #pragma warning(push) 175 #pragma warning(disable : 4100) 176 static BOOL CALLBACK emutls_init(PINIT_ONCE p0, PVOID p1, PVOID *p2) { 177 emutls_mutex = 178 (LPCRITICAL_SECTION)_aligned_malloc(sizeof(CRITICAL_SECTION), 16); 179 if (!emutls_mutex) { 180 win_error(GetLastError(), "_aligned_malloc"); 181 return FALSE; 182 } 183 InitializeCriticalSection(emutls_mutex); 184 185 emutls_tls_index = TlsAlloc(); 186 if (emutls_tls_index == TLS_OUT_OF_INDEXES) { 187 emutls_exit(); 188 win_error(GetLastError(), "TlsAlloc"); 189 return FALSE; 190 } 191 atexit(&emutls_exit); 192 return TRUE; 193 } 194 195 static __inline void emutls_init_once(void) { 196 static INIT_ONCE once; 197 InitOnceExecuteOnce(&once, emutls_init, NULL, NULL); 198 } 199 200 static __inline void emutls_lock() { EnterCriticalSection(emutls_mutex); } 201 202 static __inline void emutls_unlock() { LeaveCriticalSection(emutls_mutex); } 203 204 static __inline void emutls_setspecific(emutls_address_array *value) { 205 if (TlsSetValue(emutls_tls_index, (LPVOID)value) == 0) 206 win_abort(GetLastError(), "TlsSetValue"); 207 } 208 209 static __inline emutls_address_array *emutls_getspecific() { 210 LPVOID value = TlsGetValue(emutls_tls_index); 211 if (value == NULL) { 212 const DWORD err = GetLastError(); 213 if (err != ERROR_SUCCESS) 214 win_abort(err, "TlsGetValue"); 215 } 216 return (emutls_address_array *)value; 217 } 218 219 // Provide atomic load/store functions for emutls_get_index if built with MSVC. 220 #if !defined(__ATOMIC_RELEASE) 221 #include <intrin.h> 222 223 enum { __ATOMIC_ACQUIRE = 2, __ATOMIC_RELEASE = 3 }; 224 225 static __inline uintptr_t __atomic_load_n(void *ptr, unsigned type) { 226 assert(type == __ATOMIC_ACQUIRE); 227 // These return the previous value - but since we do an OR with 0, 228 // it's equivalent to a plain load. 229 #ifdef _WIN64 230 return InterlockedOr64(ptr, 0); 231 #else 232 return InterlockedOr(ptr, 0); 233 #endif 234 } 235 236 static __inline void __atomic_store_n(void *ptr, uintptr_t val, unsigned type) { 237 assert(type == __ATOMIC_RELEASE); 238 InterlockedExchangePointer((void *volatile *)ptr, (void *)val); 239 } 240 241 #endif // __ATOMIC_RELEASE 242 243 #pragma warning(pop) 244 245 #endif // _WIN32 246 247 static size_t emutls_num_object = 0; // number of allocated TLS objects 248 249 // Free the allocated TLS data 250 static void emutls_shutdown(emutls_address_array *array) { 251 if (array) { 252 uintptr_t i; 253 for (i = 0; i < array->size; ++i) { 254 if (array->data[i]) 255 emutls_memalign_free(array->data[i]); 256 } 257 } 258 } 259 260 // For every TLS variable xyz, 261 // there is one __emutls_control variable named __emutls_v.xyz. 262 // If xyz has non-zero initial value, __emutls_v.xyz's "value" 263 // will point to __emutls_t.xyz, which has the initial value. 264 typedef struct __emutls_control { 265 // Must use gcc_word here, instead of size_t, to match GCC. When 266 // gcc_word is larger than size_t, the upper extra bits are all 267 // zeros. We can use variables of size_t to operate on size and 268 // align. 269 gcc_word size; // size of the object in bytes 270 gcc_word align; // alignment of the object in bytes 271 union { 272 uintptr_t index; // data[index-1] is the object address 273 void *address; // object address, when in single thread env 274 } object; 275 void *value; // null or non-zero initial value for the object 276 } __emutls_control; 277 278 // Emulated TLS objects are always allocated at run-time. 279 static __inline void *emutls_allocate_object(__emutls_control *control) { 280 // Use standard C types, check with gcc's emutls.o. 281 COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(gcc_pointer)); 282 COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(void *)); 283 284 size_t size = control->size; 285 size_t align = control->align; 286 void *base; 287 if (align < sizeof(void *)) 288 align = sizeof(void *); 289 // Make sure that align is power of 2. 290 if ((align & (align - 1)) != 0) 291 abort(); 292 293 base = emutls_memalign_alloc(align, size); 294 if (control->value) 295 memcpy(base, control->value, size); 296 else 297 memset(base, 0, size); 298 return base; 299 } 300 301 // Returns control->object.index; set index if not allocated yet. 302 static __inline uintptr_t emutls_get_index(__emutls_control *control) { 303 uintptr_t index = __atomic_load_n(&control->object.index, __ATOMIC_ACQUIRE); 304 if (!index) { 305 emutls_init_once(); 306 emutls_lock(); 307 index = control->object.index; 308 if (!index) { 309 index = ++emutls_num_object; 310 __atomic_store_n(&control->object.index, index, __ATOMIC_RELEASE); 311 } 312 emutls_unlock(); 313 } 314 return index; 315 } 316 317 // Updates newly allocated thread local emutls_address_array. 318 static __inline void emutls_check_array_set_size(emutls_address_array *array, 319 uintptr_t size) { 320 if (array == NULL) 321 abort(); 322 array->size = size; 323 emutls_setspecific(array); 324 } 325 326 // Returns the new 'data' array size, number of elements, 327 // which must be no smaller than the given index. 328 static __inline uintptr_t emutls_new_data_array_size(uintptr_t index) { 329 // Need to allocate emutls_address_array with extra slots 330 // to store the header. 331 // Round up the emutls_address_array size to multiple of 16. 332 uintptr_t header_words = sizeof(emutls_address_array) / sizeof(void *); 333 return ((index + header_words + 15) & ~((uintptr_t)15)) - header_words; 334 } 335 336 // Returns the size in bytes required for an emutls_address_array with 337 // N number of elements for data field. 338 static __inline uintptr_t emutls_asize(uintptr_t N) { 339 return N * sizeof(void *) + sizeof(emutls_address_array); 340 } 341 342 // Returns the thread local emutls_address_array. 343 // Extends its size if necessary to hold address at index. 344 static __inline emutls_address_array * 345 emutls_get_address_array(uintptr_t index) { 346 emutls_address_array *array = emutls_getspecific(); 347 if (array == NULL) { 348 uintptr_t new_size = emutls_new_data_array_size(index); 349 array = (emutls_address_array *)malloc(emutls_asize(new_size)); 350 if (array) { 351 memset(array->data, 0, new_size * sizeof(void *)); 352 array->skip_destructor_rounds = EMUTLS_SKIP_DESTRUCTOR_ROUNDS; 353 } 354 emutls_check_array_set_size(array, new_size); 355 } else if (index > array->size) { 356 uintptr_t orig_size = array->size; 357 uintptr_t new_size = emutls_new_data_array_size(index); 358 array = (emutls_address_array *)realloc(array, emutls_asize(new_size)); 359 if (array) 360 memset(array->data + orig_size, 0, 361 (new_size - orig_size) * sizeof(void *)); 362 emutls_check_array_set_size(array, new_size); 363 } 364 return array; 365 } 366 367 void *__emutls_get_address(__emutls_control *control) { 368 uintptr_t index = emutls_get_index(control); 369 emutls_address_array *array = emutls_get_address_array(index--); 370 if (array->data[index] == NULL) 371 array->data[index] = emutls_allocate_object(control); 372 return array->data[index]; 373 } 374 375 #ifdef __BIONIC__ 376 // Called by Bionic on dlclose to delete the emutls pthread key. 377 __attribute__((visibility("hidden"))) void __emutls_unregister_key(void) { 378 if (emutls_key_created) { 379 pthread_key_delete(emutls_pthread_key); 380 emutls_key_created = false; 381 } 382 } 383 #endif 384