xref: /netbsd-src/external/bsd/unbound/dist/testcode/checklocks.c (revision d909946ca08dceb44d7d0f22ec9488679695d976)
1 /**
2  * testcode/checklocks.c - wrapper on locks that checks access.
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  *
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  *
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 #include "config.h"
37 #include <signal.h>
38 #include "util/locks.h"   /* include before checklocks.h */
39 #include "testcode/checklocks.h"
40 
41 /**
42  * \file
43  * Locks that are checked.
44  *
45  * Ugly hack: uses the fact that workers start with an int thread_num, and
46  * are passed to thread_create to make the thread numbers here the same as
47  * those used for logging which is nice.
48  *
49  * Todo:
50  *	 - debug status print, of thread lock stacks, and current waiting.
51  */
52 #ifdef USE_THREAD_DEBUG
53 
54 /** How long to wait before lock attempt is a failure. */
55 #define CHECK_LOCK_TIMEOUT 120 /* seconds */
56 /** How long to wait before join attempt is a failure. */
57 #define CHECK_JOIN_TIMEOUT 120 /* seconds */
58 
59 /** if key has been created */
60 static int key_created = 0;
61 /** if the key was deleted, i.e. we have quit */
62 static int key_deleted = 0;
63 /** we hide the thread debug info with this key. */
64 static ub_thread_key_t thr_debug_key;
65 /** the list of threads, so all threads can be examined. NULL if unused. */
66 static struct thr_check* thread_infos[THRDEBUG_MAX_THREADS];
67 /** do we check locking order */
68 int check_locking_order = 1;
69 /** the pid of this runset, reasonably unique. */
70 static pid_t check_lock_pid;
71 
72 /** print all possible debug info on the state of the system */
73 static void total_debug_info(void);
74 
75 /** print pretty lock error and exit */
76 static void lock_error(struct checked_lock* lock,
77 	const char* func, const char* file, int line, const char* err)
78 {
79 	log_err("lock error (description follows)");
80 	log_err("Created at %s %s:%d", lock->create_func,
81 		lock->create_file, lock->create_line);
82 	if(lock->holder_func && lock->holder_file)
83 		log_err("Previously %s %s:%d", lock->holder_func,
84 			lock->holder_file, lock->holder_line);
85 	log_err("At %s %s:%d", func, file, line);
86 	log_err("Error for %s lock: %s",
87 		(lock->type==check_lock_mutex)?"mutex": (
88 		(lock->type==check_lock_spinlock)?"spinlock": (
89 		(lock->type==check_lock_rwlock)?"rwlock": "badtype")), err);
90 	log_err("complete status display:");
91 	total_debug_info();
92 	fatal_exit("bailing out");
93 }
94 
95 /**
96  * Obtain lock on debug lock structure. This could be a deadlock by the caller.
97  * The debug code itself does not deadlock. Anyway, check with timeouts.
98  * @param lock: on what to acquire lock.
99  * @param func: user level caller identification.
100  * @param file: user level caller identification.
101  * @param line: user level caller identification.
102  */
103 static void
104 acquire_locklock(struct checked_lock* lock,
105 	const char* func, const char* file, int line)
106 {
107 	struct timespec to;
108 	int err;
109 	int contend = 0;
110 	/* first try; inc contention counter if not immediately */
111 	if((err = pthread_mutex_trylock(&lock->lock))) {
112 		if(err==EBUSY)
113 			contend++;
114 		else fatal_exit("error in mutex_trylock: %s", strerror(err));
115 	}
116 	if(!err)
117 		return; /* immediate success */
118 	to.tv_sec = time(NULL) + CHECK_LOCK_TIMEOUT;
119 	to.tv_nsec = 0;
120 	err = pthread_mutex_timedlock(&lock->lock, &to);
121 	if(err) {
122 		log_err("in acquiring locklock: %s", strerror(err));
123 		lock_error(lock, func, file, line, "acquire locklock");
124 	}
125 	/* since we hold the lock, we can edit the contention_count */
126 	lock->contention_count += contend;
127 }
128 
129 /** add protected region */
130 void
131 lock_protect(void *p, void* area, size_t size)
132 {
133 	struct checked_lock* lock = *(struct checked_lock**)p;
134 	struct protected_area* e = (struct protected_area*)malloc(
135 		sizeof(struct protected_area));
136 	if(!e)
137 		fatal_exit("lock_protect: out of memory");
138 	e->region = area;
139 	e->size = size;
140 	e->hold = malloc(size);
141 	if(!e->hold)
142 		fatal_exit("lock_protect: out of memory");
143 	memcpy(e->hold, e->region, e->size);
144 
145 	acquire_locklock(lock, __func__, __FILE__, __LINE__);
146 	e->next = lock->prot;
147 	lock->prot = e;
148 	LOCKRET(pthread_mutex_unlock(&lock->lock));
149 }
150 
151 /** remove protected region */
152 void
153 lock_unprotect(void* mangled, void* area)
154 {
155 	struct checked_lock* lock = *(struct checked_lock**)mangled;
156 	struct protected_area* p, **prevp;
157 	if(!lock)
158 		return;
159 	acquire_locklock(lock, __func__, __FILE__, __LINE__);
160 	p = lock->prot;
161 	prevp = &lock->prot;
162 	while(p) {
163 		if(p->region == area) {
164 			*prevp = p->next;
165 			free(p->hold);
166 			free(p);
167 			LOCKRET(pthread_mutex_unlock(&lock->lock));
168 			return;
169 		}
170 		prevp = &p->next;
171 		p = p->next;
172 	}
173 	LOCKRET(pthread_mutex_unlock(&lock->lock));
174 }
175 
176 /**
177  * Check protected memory region. Memory compare. Exit on error.
178  * @param lock: which lock to check.
179  * @param func: location we are now (when failure is detected).
180  * @param file: location we are now (when failure is detected).
181  * @param line: location we are now (when failure is detected).
182  */
183 static void
184 prot_check(struct checked_lock* lock,
185 	const char* func, const char* file, int line)
186 {
187 	struct protected_area* p = lock->prot;
188 	while(p) {
189 		if(memcmp(p->hold, p->region, p->size) != 0) {
190 			log_hex("memory prev", p->hold, p->size);
191 			log_hex("memory here", p->region, p->size);
192 			lock_error(lock, func, file, line,
193 				"protected area modified");
194 		}
195 		p = p->next;
196 	}
197 }
198 
199 /** Copy protected memory region */
200 static void
201 prot_store(struct checked_lock* lock)
202 {
203 	struct protected_area* p = lock->prot;
204 	while(p) {
205 		memcpy(p->hold, p->region, p->size);
206 		p = p->next;
207 	}
208 }
209 
210 /** get memory held by lock */
211 size_t
212 lock_get_mem(void* pp)
213 {
214 	size_t s;
215 	struct checked_lock* lock = *(struct checked_lock**)pp;
216 	struct protected_area* p;
217 	s = sizeof(struct checked_lock);
218 	acquire_locklock(lock, __func__, __FILE__, __LINE__);
219 	for(p = lock->prot; p; p = p->next) {
220 		s += sizeof(struct protected_area);
221 		s += p->size;
222 	}
223 	LOCKRET(pthread_mutex_unlock(&lock->lock));
224 	return s;
225 }
226 
227 /** write lock trace info to file, while you hold those locks */
228 static void
229 ordercheck_locklock(struct thr_check* thr, struct checked_lock* lock)
230 {
231 	int info[4];
232 	if(!check_locking_order) return;
233 	if(!thr->holding_first) return; /* no older lock, no info */
234 	/* write: <lock id held> <lock id new> <file> <line> */
235 	info[0] = thr->holding_first->create_thread;
236 	info[1] = thr->holding_first->create_instance;
237 	info[2] = lock->create_thread;
238 	info[3] = lock->create_instance;
239 	if(fwrite(info, 4*sizeof(int), 1, thr->order_info) != 1 ||
240 		fwrite(lock->holder_file, strlen(lock->holder_file)+1, 1,
241 		thr->order_info) != 1 ||
242 		fwrite(&lock->holder_line, sizeof(int), 1,
243 		thr->order_info) != 1)
244 		log_err("fwrite: %s", strerror(errno));
245 }
246 
247 /** write ordercheck lock creation details to file */
248 static void
249 ordercheck_lockcreate(struct thr_check* thr, struct checked_lock* lock)
250 {
251 	/* write: <ffff = create> <lock id> <file> <line> */
252 	int cmd = -1;
253 	if(!check_locking_order) return;
254 
255 	if( fwrite(&cmd, sizeof(int), 1, thr->order_info) != 1 ||
256 		fwrite(&lock->create_thread, sizeof(int), 1,
257 			thr->order_info) != 1 ||
258 		fwrite(&lock->create_instance, sizeof(int), 1,
259 			thr->order_info) != 1 ||
260 		fwrite(lock->create_file, strlen(lock->create_file)+1, 1,
261 			thr->order_info) != 1 ||
262 		fwrite(&lock->create_line, sizeof(int), 1,
263 		thr->order_info) != 1)
264 		log_err("fwrite: %s", strerror(errno));
265 }
266 
267 /** alloc struct, init lock empty */
268 void
269 checklock_init(enum check_lock_type type, struct checked_lock** lock,
270         const char* func, const char* file, int line)
271 {
272 	struct checked_lock* e = (struct checked_lock*)calloc(1,
273 		sizeof(struct checked_lock));
274 	struct thr_check *thr = (struct thr_check*)pthread_getspecific(
275 		thr_debug_key);
276 	if(!e)
277 		fatal_exit("%s %s %d: out of memory", func, file, line);
278 	if(!thr) {
279 		/* this is called when log_init() calls lock_init()
280 		 * functions, and the test check code has not yet
281 		 * been initialised.  But luckily, the checklock_start()
282 		 * routine can be called multiple times without ill effect.
283 		 */
284 		checklock_start();
285 		thr = (struct thr_check*)pthread_getspecific(thr_debug_key);
286 	}
287 	if(!thr)
288 		fatal_exit("%s %s %d: lock_init no thread info", func, file,
289 			line);
290 	*lock = e;
291 	e->type = type;
292 	e->create_func = func;
293 	e->create_file = file;
294 	e->create_line = line;
295 	e->create_thread = thr->num;
296 	e->create_instance = thr->locks_created++;
297 	ordercheck_lockcreate(thr, e);
298 	LOCKRET(pthread_mutex_init(&e->lock, NULL));
299 	switch(e->type) {
300 		case check_lock_mutex:
301 			LOCKRET(pthread_mutex_init(&e->u.mutex, NULL));
302 			break;
303 		case check_lock_spinlock:
304 			LOCKRET(pthread_spin_init(&e->u.spinlock, PTHREAD_PROCESS_PRIVATE));
305 			break;
306 		case check_lock_rwlock:
307 			LOCKRET(pthread_rwlock_init(&e->u.rwlock, NULL));
308 			break;
309 		default:
310 			log_assert(0);
311 	}
312 }
313 
314 /** delete prot items */
315 static void
316 prot_clear(struct checked_lock* lock)
317 {
318 	struct protected_area* p=lock->prot, *np;
319 	while(p) {
320 		np = p->next;
321 		free(p->hold);
322 		free(p);
323 		p = np;
324 	}
325 }
326 
327 /** check if type is OK for the lock given */
328 static void
329 checktype(enum check_lock_type type, struct checked_lock* lock,
330         const char* func, const char* file, int line)
331 {
332 	if(!lock)
333 		fatal_exit("use of null/deleted lock at %s %s:%d",
334 			func, file, line);
335 	if(type != lock->type) {
336 		lock_error(lock, func, file, line, "wrong lock type");
337 	}
338 }
339 
340 /** check if OK, free struct */
341 void
342 checklock_destroy(enum check_lock_type type, struct checked_lock** lock,
343         const char* func, const char* file, int line)
344 {
345 	const size_t contention_interest = 1; /* promille contented locks */
346 	struct checked_lock* e;
347 	if(!lock)
348 		return;
349 	e = *lock;
350 	if(!e)
351 		return;
352 	checktype(type, e, func, file, line);
353 
354 	/* check if delete is OK */
355 	acquire_locklock(e, func, file, line);
356 	if(e->hold_count != 0)
357 		lock_error(e, func, file, line, "delete while locked.");
358 	if(e->wait_count != 0)
359 		lock_error(e, func, file, line, "delete while waited on.");
360 	prot_check(e, func, file, line);
361 	*lock = NULL; /* use after free will fail */
362 	LOCKRET(pthread_mutex_unlock(&e->lock));
363 
364 	/* contention, look at fraction in trouble. */
365 	if(e->history_count > 1 &&
366 	   1000*e->contention_count/e->history_count > contention_interest) {
367 		log_info("lock created %s %s %d has contention %u of %u (%d%%)",
368 			e->create_func, e->create_file, e->create_line,
369 			(unsigned int)e->contention_count,
370 			(unsigned int)e->history_count,
371 			(int)(100*e->contention_count/e->history_count));
372 	}
373 
374 	/* delete it */
375 	LOCKRET(pthread_mutex_destroy(&e->lock));
376 	prot_clear(e);
377 	/* since nobody holds the lock - see check above, no need to unlink
378 	 * from the thread-held locks list. */
379 	switch(e->type) {
380 		case check_lock_mutex:
381 			LOCKRET(pthread_mutex_destroy(&e->u.mutex));
382 			break;
383 		case check_lock_spinlock:
384 			LOCKRET(pthread_spin_destroy(&e->u.spinlock));
385 			break;
386 		case check_lock_rwlock:
387 			LOCKRET(pthread_rwlock_destroy(&e->u.rwlock));
388 			break;
389 		default:
390 			log_assert(0);
391 	}
392 	memset(e, 0, sizeof(struct checked_lock));
393 	free(e);
394 }
395 
396 /** finish acquiring lock, shared between _(rd|wr||)lock() routines */
397 static void
398 finish_acquire_lock(struct thr_check* thr, struct checked_lock* lock,
399         const char* func, const char* file, int line)
400 {
401 	thr->waiting = NULL;
402 	lock->wait_count --;
403 	lock->holder = thr;
404 	lock->hold_count ++;
405 	lock->holder_func = func;
406 	lock->holder_file = file;
407 	lock->holder_line = line;
408 	ordercheck_locklock(thr, lock);
409 
410 	/* insert in thread lock list, as first */
411 	lock->prev_held_lock[thr->num] = NULL;
412 	lock->next_held_lock[thr->num] = thr->holding_first;
413 	if(thr->holding_first)
414 		/* no need to lock it, since this thread already holds the
415 		 * lock (since it is on this list) and we only edit thr->num
416 		 * member in array. So it is safe.  */
417 		thr->holding_first->prev_held_lock[thr->num] = lock;
418 	else	thr->holding_last = lock;
419 	thr->holding_first = lock;
420 }
421 
422 /**
423  * Locking routine.
424  * @param type: as passed by user.
425  * @param lock: as passed by user.
426  * @param func: caller location.
427  * @param file: caller location.
428  * @param line: caller location.
429  * @param tryfunc: the pthread_mutex_trylock or similar function.
430  * @param timedfunc: the pthread_mutex_timedlock or similar function.
431  *	Uses absolute timeout value.
432  * @param arg: what to pass to tryfunc and timedlock.
433  * @param exclusive: if lock must be exclusive (only one allowed).
434  * @param getwr: if attempts to get writelock (or readlock) for rwlocks.
435  */
436 static void
437 checklock_lockit(enum check_lock_type type, struct checked_lock* lock,
438         const char* func, const char* file, int line,
439 	int (*tryfunc)(void*), int (*timedfunc)(void*, struct timespec*),
440 	void* arg, int exclusive, int getwr)
441 {
442 	int err;
443 	int contend = 0;
444 	struct thr_check *thr = (struct thr_check*)pthread_getspecific(
445 		thr_debug_key);
446 	checktype(type, lock, func, file, line);
447 	if(!thr) lock_error(lock, func, file, line, "no thread info");
448 
449 	acquire_locklock(lock, func, file, line);
450 	lock->wait_count ++;
451 	thr->waiting = lock;
452 	if(exclusive && lock->hold_count > 0 && lock->holder == thr)
453 		lock_error(lock, func, file, line, "thread already owns lock");
454 	if(type==check_lock_rwlock && getwr && lock->writeholder == thr)
455 		lock_error(lock, func, file, line, "thread already has wrlock");
456 	LOCKRET(pthread_mutex_unlock(&lock->lock));
457 
458 	/* first try; if busy increase contention counter */
459 	if((err=tryfunc(arg))) {
460 		struct timespec to;
461 		if(err != EBUSY) log_err("trylock: %s", strerror(err));
462 		to.tv_sec = time(NULL) + CHECK_LOCK_TIMEOUT;
463 		to.tv_nsec = 0;
464 		if((err=timedfunc(arg, &to))) {
465 			if(err == ETIMEDOUT)
466 				lock_error(lock, func, file, line,
467 					"timeout possible deadlock");
468 			log_err("timedlock: %s", strerror(err));
469 		}
470 		contend ++;
471 	}
472 	/* got the lock */
473 
474 	acquire_locklock(lock, func, file, line);
475 	lock->contention_count += contend;
476 	lock->history_count++;
477 	if(exclusive && lock->hold_count > 0)
478 		lock_error(lock, func, file, line, "got nonexclusive lock");
479 	if(type==check_lock_rwlock && getwr && lock->writeholder)
480 		lock_error(lock, func, file, line, "got nonexclusive wrlock");
481 	if(type==check_lock_rwlock && getwr)
482 		lock->writeholder = thr;
483 	/* check the memory areas for unauthorized changes,
484 	 * between last unlock time and current lock time.
485 	 * we check while holding the lock (threadsafe).
486 	 */
487 	if(getwr || exclusive)
488 		prot_check(lock, func, file, line);
489 	finish_acquire_lock(thr, lock, func, file, line);
490 	LOCKRET(pthread_mutex_unlock(&lock->lock));
491 }
492 
493 /** helper for rdlock: try */
494 static int try_rd(void* arg)
495 { return pthread_rwlock_tryrdlock((pthread_rwlock_t*)arg); }
496 /** helper for rdlock: timed */
497 static int timed_rd(void* arg, struct timespec* to)
498 { return pthread_rwlock_timedrdlock((pthread_rwlock_t*)arg, to); }
499 
500 /** check if OK, lock */
501 void
502 checklock_rdlock(enum check_lock_type type, struct checked_lock* lock,
503         const char* func, const char* file, int line)
504 {
505 	if(key_deleted)
506 		return;
507 
508 	log_assert(type == check_lock_rwlock);
509 	checklock_lockit(type, lock, func, file, line,
510 		try_rd, timed_rd, &lock->u.rwlock, 0, 0);
511 }
512 
513 /** helper for wrlock: try */
514 static int try_wr(void* arg)
515 { return pthread_rwlock_trywrlock((pthread_rwlock_t*)arg); }
516 /** helper for wrlock: timed */
517 static int timed_wr(void* arg, struct timespec* to)
518 { return pthread_rwlock_timedwrlock((pthread_rwlock_t*)arg, to); }
519 
520 /** check if OK, lock */
521 void
522 checklock_wrlock(enum check_lock_type type, struct checked_lock* lock,
523         const char* func, const char* file, int line)
524 {
525 	if(key_deleted)
526 		return;
527 	log_assert(type == check_lock_rwlock);
528 	checklock_lockit(type, lock, func, file, line,
529 		try_wr, timed_wr, &lock->u.rwlock, 0, 1);
530 }
531 
532 /** helper for lock mutex: try */
533 static int try_mutex(void* arg)
534 { return pthread_mutex_trylock((pthread_mutex_t*)arg); }
535 /** helper for lock mutex: timed */
536 static int timed_mutex(void* arg, struct timespec* to)
537 { return pthread_mutex_timedlock((pthread_mutex_t*)arg, to); }
538 
539 /** helper for lock spinlock: try */
540 static int try_spinlock(void* arg)
541 { return pthread_spin_trylock((pthread_spinlock_t*)arg); }
542 /** helper for lock spinlock: timed */
543 static int timed_spinlock(void* arg, struct timespec* to)
544 {
545 	int err;
546 	/* spin for 5 seconds. (ouch for the CPU, but it beats forever) */
547 	while( (err=try_spinlock(arg)) == EBUSY) {
548 #ifndef S_SPLINT_S
549 		if(time(NULL) >= to->tv_sec)
550 			return ETIMEDOUT;
551 		usleep(1000); /* in 1/1000000s of a second */
552 #endif
553 	}
554 	return err;
555 }
556 
557 /** check if OK, lock */
558 void
559 checklock_lock(enum check_lock_type type, struct checked_lock* lock,
560         const char* func, const char* file, int line)
561 {
562 	if(key_deleted)
563 		return;
564 	log_assert(type != check_lock_rwlock);
565 	switch(type) {
566 		case check_lock_mutex:
567 			checklock_lockit(type, lock, func, file, line,
568 				try_mutex, timed_mutex, &lock->u.mutex, 1, 0);
569 			break;
570 		case check_lock_spinlock:
571 			/* void* cast needed because 'volatile' on some OS */
572 			checklock_lockit(type, lock, func, file, line,
573 				try_spinlock, timed_spinlock,
574 				(void*)&lock->u.spinlock, 1, 0);
575 			break;
576 		default:
577 			log_assert(0);
578 	}
579 }
580 
581 /** check if OK, unlock */
582 void
583 checklock_unlock(enum check_lock_type type, struct checked_lock* lock,
584         const char* func, const char* file, int line)
585 {
586 	struct thr_check *thr;
587 	if(key_deleted)
588 		return;
589 	thr = (struct thr_check*)pthread_getspecific(thr_debug_key);
590 	checktype(type, lock, func, file, line);
591 	if(!thr) lock_error(lock, func, file, line, "no thread info");
592 
593 	acquire_locklock(lock, func, file, line);
594 	/* was this thread even holding this lock? */
595 	if(thr->holding_first != lock &&
596 		lock->prev_held_lock[thr->num] == NULL) {
597 		lock_error(lock, func, file, line, "unlock nonlocked lock");
598 	}
599 	if(lock->hold_count <= 0)
600 		lock_error(lock, func, file, line, "too many unlocks");
601 
602 	/* store this point as last touched by */
603 	lock->holder = thr;
604 	lock->hold_count --;
605 	lock->holder_func = func;
606 	lock->holder_file = file;
607 	lock->holder_line = line;
608 
609 	/* delete from thread holder list */
610 	/* no need to lock other lockstructs, because they are all on the
611 	 * held-locks list, and this thread holds their locks.
612 	 * we only touch the thr->num members, so it is safe.  */
613 	if(thr->holding_first == lock)
614 		thr->holding_first = lock->next_held_lock[thr->num];
615 	if(thr->holding_last == lock)
616 		thr->holding_last = lock->prev_held_lock[thr->num];
617 	if(lock->next_held_lock[thr->num])
618 		lock->next_held_lock[thr->num]->prev_held_lock[thr->num] =
619 			lock->prev_held_lock[thr->num];
620 	if(lock->prev_held_lock[thr->num])
621 		lock->prev_held_lock[thr->num]->next_held_lock[thr->num] =
622 			lock->next_held_lock[thr->num];
623 	lock->next_held_lock[thr->num] = NULL;
624 	lock->prev_held_lock[thr->num] = NULL;
625 
626 	if(type==check_lock_rwlock && lock->writeholder == thr) {
627 		lock->writeholder = NULL;
628 		prot_store(lock);
629 	} else if(type != check_lock_rwlock) {
630 		/* store memory areas that are protected, for later checks */
631 		prot_store(lock);
632 	}
633 	LOCKRET(pthread_mutex_unlock(&lock->lock));
634 
635 	/* unlock it */
636 	switch(type) {
637 		case check_lock_mutex:
638 			LOCKRET(pthread_mutex_unlock(&lock->u.mutex));
639 			break;
640 		case check_lock_spinlock:
641 			LOCKRET(pthread_spin_unlock(&lock->u.spinlock));
642 			break;
643 		case check_lock_rwlock:
644 			LOCKRET(pthread_rwlock_unlock(&lock->u.rwlock));
645 			break;
646 		default:
647 			log_assert(0);
648 	}
649 }
650 
651 /** open order info debug file, thr->num must be valid */
652 static void
653 open_lockorder(struct thr_check* thr)
654 {
655 	char buf[24];
656 	time_t t;
657 	snprintf(buf, sizeof(buf), "ublocktrace.%d", thr->num);
658 	thr->order_info = fopen(buf, "w");
659 	if(!thr->order_info)
660 		fatal_exit("could not open %s: %s", buf, strerror(errno));
661 	thr->locks_created = 0;
662 	t = time(NULL);
663 	/* write: <time_stamp> <runpid> <thread_num> */
664 	if(fwrite(&t, sizeof(t), 1, thr->order_info) != 1 ||
665 		fwrite(&thr->num, sizeof(thr->num), 1, thr->order_info) != 1 ||
666 		fwrite(&check_lock_pid, sizeof(check_lock_pid), 1,
667 		thr->order_info) != 1)
668 		log_err("fwrite: %s", strerror(errno));
669 }
670 
671 /** checklock thread main, Inits thread structure */
672 static void* checklock_main(void* arg)
673 {
674 	struct thr_check* thr = (struct thr_check*)arg;
675 	void* ret;
676 	thr->id = pthread_self();
677 	/* Hack to get same numbers as in log file */
678 	thr->num = *(int*)(thr->arg);
679 	log_assert(thr->num < THRDEBUG_MAX_THREADS);
680 	/* as an aside, due to this, won't work for libunbound bg thread */
681 	if(thread_infos[thr->num] != NULL)
682 		log_warn("thread warning, thr->num %d not NULL", thr->num);
683 	thread_infos[thr->num] = thr;
684 	LOCKRET(pthread_setspecific(thr_debug_key, thr));
685 	if(check_locking_order)
686 		open_lockorder(thr);
687 	ret = thr->func(thr->arg);
688 	thread_infos[thr->num] = NULL;
689 	if(check_locking_order)
690 		fclose(thr->order_info);
691 	free(thr);
692 	return ret;
693 }
694 
695 /** init the main thread */
696 void checklock_start(void)
697 {
698 	if(key_deleted)
699 		return;
700 	if(!key_created) {
701 		struct thr_check* thisthr = (struct thr_check*)calloc(1,
702 			sizeof(struct thr_check));
703 		if(!thisthr)
704 			fatal_exit("thrcreate: out of memory");
705 		key_created = 1;
706 		check_lock_pid = getpid();
707 		LOCKRET(pthread_key_create(&thr_debug_key, NULL));
708 		LOCKRET(pthread_setspecific(thr_debug_key, thisthr));
709 		thread_infos[0] = thisthr;
710 		if(check_locking_order)
711 			open_lockorder(thisthr);
712 	}
713 }
714 
715 /** stop checklocks */
716 void checklock_stop(void)
717 {
718 	if(key_created) {
719 		int i;
720 		key_deleted = 1;
721 		if(check_locking_order)
722 			fclose(thread_infos[0]->order_info);
723 		free(thread_infos[0]);
724 		thread_infos[0] = NULL;
725 		for(i = 0; i < THRDEBUG_MAX_THREADS; i++)
726 			log_assert(thread_infos[i] == NULL);
727 			/* should have been cleaned up. */
728 		LOCKRET(pthread_key_delete(thr_debug_key));
729 		key_created = 0;
730 	}
731 }
732 
733 /** allocate debug info and create thread */
734 void
735 checklock_thrcreate(pthread_t* id, void* (*func)(void*), void* arg)
736 {
737 	struct thr_check* thr = (struct thr_check*)calloc(1,
738 		sizeof(struct thr_check));
739 	if(!thr)
740 		fatal_exit("thrcreate: out of memory");
741 	if(!key_created) {
742 		checklock_start();
743 	}
744 	thr->func = func;
745 	thr->arg = arg;
746 	LOCKRET(pthread_create(id, NULL, checklock_main, thr));
747 }
748 
749 /** count number of thread infos */
750 static int
751 count_thread_infos(void)
752 {
753 	int cnt = 0;
754 	int i;
755 	for(i=0; i<THRDEBUG_MAX_THREADS; i++)
756 		if(thread_infos[i])
757 			cnt++;
758 	return cnt;
759 }
760 
761 /** print lots of info on a lock */
762 static void
763 lock_debug_info(struct checked_lock* lock)
764 {
765 	if(!lock) return;
766 	log_info("+++ Lock %llx, %d %d create %s %s %d",
767 		(unsigned long long)(size_t)lock,
768 		lock->create_thread, lock->create_instance,
769 		lock->create_func, lock->create_file, lock->create_line);
770 	log_info("lock type: %s",
771 		(lock->type==check_lock_mutex)?"mutex": (
772 		(lock->type==check_lock_spinlock)?"spinlock": (
773 		(lock->type==check_lock_rwlock)?"rwlock": "badtype")));
774 	log_info("lock contention %u, history:%u, hold:%d, wait:%d",
775 		(unsigned)lock->contention_count, (unsigned)lock->history_count,
776 		lock->hold_count, lock->wait_count);
777 	log_info("last touch %s %s %d", lock->holder_func, lock->holder_file,
778 		lock->holder_line);
779 	log_info("holder thread %d, writeholder thread %d",
780 		lock->holder?lock->holder->num:-1,
781 		lock->writeholder?lock->writeholder->num:-1);
782 }
783 
784 /** print debug locks held by a thread */
785 static void
786 held_debug_info(struct thr_check* thr, struct checked_lock* lock)
787 {
788 	if(!lock) return;
789 	lock_debug_info(lock);
790 	held_debug_info(thr, lock->next_held_lock[thr->num]);
791 }
792 
793 /** print debug info for a thread */
794 static void
795 thread_debug_info(struct thr_check* thr)
796 {
797 	struct checked_lock* w = NULL;
798 	struct checked_lock* f = NULL;
799 	struct checked_lock* l = NULL;
800 	if(!thr) return;
801 	log_info("pthread id is %x", (int)thr->id);
802 	log_info("thread func is %llx", (unsigned long long)(size_t)thr->func);
803 	log_info("thread arg is %llx (%d)",
804 		(unsigned long long)(size_t)thr->arg,
805 		(thr->arg?*(int*)thr->arg:0));
806 	log_info("thread num is %d", thr->num);
807 	log_info("locks created %d", thr->locks_created);
808 	log_info("open file for lockinfo: %s",
809 		thr->order_info?"yes, flushing":"no");
810 	fflush(thr->order_info);
811 	w = thr->waiting;
812 	f = thr->holding_first;
813 	l = thr->holding_last;
814 	log_info("thread waiting for a lock: %s %llx", w?"yes":"no",
815 		(unsigned long long)(size_t)w);
816 	lock_debug_info(w);
817 	log_info("thread holding first: %s, last: %s", f?"yes":"no",
818 		l?"yes":"no");
819 	held_debug_info(thr, f);
820 }
821 
822 static void
823 total_debug_info(void)
824 {
825 	int i;
826 	log_info("checklocks: supervising %d threads.",
827 		count_thread_infos());
828 	if(!key_created) {
829 		log_info("No thread debug key created yet");
830 	}
831 	for(i=0; i<THRDEBUG_MAX_THREADS; i++) {
832 		if(thread_infos[i]) {
833 			log_info("*** Thread %d information: ***", i);
834 			thread_debug_info(thread_infos[i]);
835 		}
836 	}
837 }
838 
839 /** signal handler for join timeout, Exits */
840 static RETSIGTYPE joinalarm(int ATTR_UNUSED(sig))
841 {
842 	log_err("join thread timeout. hangup or deadlock. Info follows.");
843 	total_debug_info();
844 	fatal_exit("join thread timeout. hangup or deadlock.");
845 }
846 
847 /** wait for thread with a timeout */
848 void
849 checklock_thrjoin(pthread_t thread)
850 {
851 	/* wait with a timeout */
852 	if(signal(SIGALRM, joinalarm) == SIG_ERR)
853 		fatal_exit("signal(): %s", strerror(errno));
854 	(void)alarm(CHECK_JOIN_TIMEOUT);
855 	LOCKRET(pthread_join(thread, NULL));
856 	(void)alarm(0);
857 }
858 
859 #endif /* USE_THREAD_DEBUG */
860