1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #pragma ident "%Z%%M% %I% %E% SMI"
27
28 /*
29 * Given several files containing CTF data, merge and uniquify that data into
30 * a single CTF section in an output file.
31 *
32 * Merges can proceed independently. As such, we perform the merges in parallel
33 * using a worker thread model. A given glob of CTF data (either all of the CTF
34 * data from a single input file, or the result of one or more merges) can only
35 * be involved in a single merge at any given time, so the process decreases in
36 * parallelism, especially towards the end, as more and more files are
37 * consolidated, finally resulting in a single merge of two large CTF graphs.
38 * Unfortunately, the last merge is also the slowest, as the two graphs being
39 * merged are each the product of merges of half of the input files.
40 *
41 * The algorithm consists of two phases, described in detail below. The first
42 * phase entails the merging of CTF data in groups of eight. The second phase
43 * takes the results of Phase I, and merges them two at a time. This disparity
44 * is due to an observation that the merge time increases at least quadratically
45 * with the size of the CTF data being merged. As such, merges of CTF graphs
46 * newly read from input files are much faster than merges of CTF graphs that
47 * are themselves the results of prior merges.
48 *
49 * A further complication is the need to ensure the repeatability of CTF merges.
50 * That is, a merge should produce the same output every time, given the same
51 * input. In both phases, this consistency requirement is met by imposing an
52 * ordering on the merge process, thus ensuring that a given set of input files
53 * are merged in the same order every time.
54 *
55 * Phase I
56 *
57 * The main thread reads the input files one by one, transforming the CTF
58 * data they contain into tdata structures. When a given file has been read
59 * and parsed, it is placed on the work queue for retrieval by worker threads.
60 *
61 * Central to Phase I is the Work In Progress (wip) array, which is used to
62 * merge batches of files in a predictable order. Files are read by the main
63 * thread, and are merged into wip array elements in round-robin order. When
64 * the number of files merged into a given array slot equals the batch size,
65 * the merged CTF graph in that array is added to the done slot in order by
66 * array slot.
67 *
68 * For example, consider a case where we have five input files, a batch size
69 * of two, a wip array size of two, and two worker threads (T1 and T2).
70 *
71 * 1. The wip array elements are assigned initial batch numbers 0 and 1.
72 * 2. T1 reads an input file from the input queue (wq_queue). This is the
73 * first input file, so it is placed into wip[0]. The second file is
74 * similarly read and placed into wip[1]. The wip array slots now contain
75 * one file each (wip_nmerged == 1).
76 * 3. T1 reads the third input file, which it merges into wip[0]. The
77 * number of files in wip[0] is equal to the batch size.
78 * 4. T2 reads the fourth input file, which it merges into wip[1]. wip[1]
79 * is now full too.
80 * 5. T2 attempts to place the contents of wip[1] on the done queue
81 * (wq_done_queue), but it can't, since the batch ID for wip[1] is 1.
82 * Batch 0 needs to be on the done queue before batch 1 can be added, so
83 * T2 blocks on wip[1]'s cv.
84 * 6. T1 attempts to place the contents of wip[0] on the done queue, and
85 * succeeds, updating wq_lastdonebatch to 0. It clears wip[0], and sets
86 * its batch ID to 2. T1 then signals wip[1]'s cv to awaken T2.
87 * 7. T2 wakes up, notices that wq_lastdonebatch is 0, which means that
88 * batch 1 can now be added. It adds wip[1] to the done queue, clears
89 * wip[1], and sets its batch ID to 3. It signals wip[0]'s cv, and
90 * restarts.
91 *
92 * The above process continues until all input files have been consumed. At
93 * this point, a pair of barriers are used to allow a single thread to move
94 * any partial batches from the wip array to the done array in batch ID order.
95 * When this is complete, wq_done_queue is moved to wq_queue, and Phase II
96 * begins.
97 *
98 * Locking Semantics (Phase I)
99 *
100 * The input queue (wq_queue) and the done queue (wq_done_queue) are
101 * protected by separate mutexes - wq_queue_lock and wq_done_queue. wip
102 * array slots are protected by their own mutexes, which must be grabbed
103 * before releasing the input queue lock. The wip array lock is dropped
104 * when the thread restarts the loop. If the array slot was full, the
105 * array lock will be held while the slot contents are added to the done
106 * queue. The done queue lock is used to protect the wip slot cv's.
107 *
108 * The pow number is protected by the queue lock. The master batch ID
109 * and last completed batch (wq_lastdonebatch) counters are protected *in
110 * Phase I* by the done queue lock.
111 *
112 * Phase II
113 *
114 * When Phase II begins, the queue consists of the merged batches from the
115 * first phase. Assume we have five batches:
116 *
117 * Q: a b c d e
118 *
119 * Using the same batch ID mechanism we used in Phase I, but without the wip
120 * array, worker threads remove two entries at a time from the beginning of
121 * the queue. These two entries are merged, and are added back to the tail
122 * of the queue, as follows:
123 *
124 * Q: a b c d e # start
125 * Q: c d e ab # a, b removed, merged, added to end
126 * Q: e ab cd # c, d removed, merged, added to end
127 * Q: cd eab # e, ab removed, merged, added to end
128 * Q: cdeab # cd, eab removed, merged, added to end
129 *
130 * When one entry remains on the queue, with no merges outstanding, Phase II
131 * finishes. We pre-determine the stopping point by pre-calculating the
132 * number of nodes that will appear on the list. In the example above, the
133 * number (wq_ninqueue) is 9. When ninqueue is 1, we conclude Phase II by
134 * signaling the main thread via wq_done_cv.
135 *
136 * Locking Semantics (Phase II)
137 *
138 * The queue (wq_queue), ninqueue, and the master batch ID and last
139 * completed batch counters are protected by wq_queue_lock. The done
140 * queue and corresponding lock are unused in Phase II as is the wip array.
141 *
142 * Uniquification
143 *
144 * We want the CTF data that goes into a given module to be as small as
145 * possible. For example, we don't want it to contain any type data that may
146 * be present in another common module. As such, after creating the master
147 * tdata_t for a given module, we can, if requested by the user, uniquify it
148 * against the tdata_t from another module (genunix in the case of the SunOS
149 * kernel). We perform a merge between the tdata_t for this module and the
150 * tdata_t from genunix. Nodes found in this module that are not present in
151 * genunix are added to a third tdata_t - the uniquified tdata_t.
152 *
153 * Additive Merges
154 *
155 * In some cases, for example if we are issuing a new version of a common
156 * module in a patch, we need to make sure that the CTF data already present
157 * in that module does not change. Changes to this data would void the CTF
158 * data in any module that uniquified against the common module. To preserve
159 * the existing data, we can perform what is known as an additive merge. In
160 * this case, a final uniquification is performed against the CTF data in the
161 * previous version of the module. The result will be the placement of new
162 * and changed data after the existing data, thus preserving the existing type
163 * ID space.
164 *
165 * Saving the result
166 *
167 * When the merges are complete, the resulting tdata_t is placed into the
168 * output file, replacing the .SUNW_ctf section (if any) already in that file.
169 *
170 * The person who changes the merging thread code in this file without updating
171 * this comment will not live to see the stock hit five.
172 */
173
174 #if HAVE_NBTOOL_CONFIG_H
175 # include "nbtool_config.h"
176 #endif
177
178 #include <stdio.h>
179 #include <stdlib.h>
180 #ifndef _NETBSD_SOURCE
181 #define _NETBSD_SOURCE /* XXX TBD fix this */
182 #include <unistd.h>
183 #undef _NETBSD_SOURCE
184 #else
185 #include <unistd.h>
186 #endif
187 #include <pthread.h>
188 #include <assert.h>
189 #ifdef illumos
190 #include <synch.h>
191 #endif
192 #include <signal.h>
193 #include <libgen.h>
194 #include <string.h>
195 #include <errno.h>
196 #ifdef illumos
197 #include <alloca.h>
198 #endif
199 #include <sys/param.h>
200 #include <sys/types.h>
201 #include <sys/mman.h>
202 #ifdef illumos
203 #include <sys/sysconf.h>
204 #endif
205
206 #include "ctf_headers.h"
207 #include "ctftools.h"
208 #include "ctfmerge.h"
209 #include "traverse.h"
210 #include "memory.h"
211 #include "fifo.h"
212 #include "barrier.h"
213
214 #pragma init(bigheap)
215
216 #define MERGE_PHASE1_BATCH_SIZE 8
217 #define MERGE_PHASE1_MAX_SLOTS 5
218 #define MERGE_INPUT_THROTTLE_LEN 10
219
220 const char *progname;
221 static char *outfile = NULL;
222 static char *tmpname = NULL;
223 static int dynsym;
224 int debug_level = DEBUG_LEVEL;
225 #ifdef illumos
226 static size_t maxpgsize = 0x400000;
227 #endif
228
229
230 static void
usage(void)231 usage(void)
232 {
233 (void) fprintf(stderr,
234 "Usage: %s [-fgstv] -l label | -L labelenv -o outfile file ...\n"
235 " %s [-fgstv] -l label | -L labelenv -o outfile -d uniqfile\n"
236 " %*s [-g] [-D uniqlabel] file ...\n"
237 " %s [-fgstv] -l label | -L labelenv -o outfile -w withfile "
238 "file ...\n"
239 " %s [-g] -c srcfile destfile\n"
240 "\n"
241 " Note: if -L labelenv is specified and labelenv is not set in\n"
242 " the environment, a default value is used.\n",
243 progname, progname, (int)strlen(progname), " ",
244 progname, progname);
245 }
246
247 #ifdef illumos
248 static void
bigheap(void)249 bigheap(void)
250 {
251 size_t big, *size;
252 int sizes;
253 struct memcntl_mha mha;
254
255 /*
256 * First, get the available pagesizes.
257 */
258 if ((sizes = getpagesizes(NULL, 0)) == -1)
259 return;
260
261 if (sizes == 1 || (size = alloca(sizeof (size_t) * sizes)) == NULL)
262 return;
263
264 if (getpagesizes(size, sizes) == -1)
265 return;
266
267 while (size[sizes - 1] > maxpgsize)
268 sizes--;
269
270 /* set big to the largest allowed page size */
271 big = size[sizes - 1];
272 if (big & (big - 1)) {
273 /*
274 * The largest page size is not a power of two for some
275 * inexplicable reason; return.
276 */
277 return;
278 }
279
280 /*
281 * Now, align our break to the largest page size.
282 */
283 if (brk((void *)((((uintptr_t)sbrk(0) - 1) & ~(big - 1)) + big)) != 0)
284 return;
285
286 /*
287 * set the preferred page size for the heap
288 */
289 mha.mha_cmd = MHA_MAPSIZE_BSSBRK;
290 mha.mha_flags = 0;
291 mha.mha_pagesize = big;
292
293 (void) memcntl(NULL, 0, MC_HAT_ADVISE, (caddr_t)&mha, 0, 0);
294 }
295 #endif
296
297 static void
finalize_phase_one(workqueue_t * wq)298 finalize_phase_one(workqueue_t *wq)
299 {
300 int startslot, i;
301
302 /*
303 * wip slots are cleared out only when maxbatchsz td's have been merged
304 * into them. We're not guaranteed that the number of files we're
305 * merging is a multiple of maxbatchsz, so there will be some partial
306 * groups in the wip array. Move them to the done queue in batch ID
307 * order, starting with the slot containing the next batch that would
308 * have been placed on the done queue, followed by the others.
309 * One thread will be doing this while the others wait at the barrier
310 * back in worker_thread(), so we don't need to worry about pesky things
311 * like locks.
312 */
313
314 for (startslot = -1, i = 0; i < wq->wq_nwipslots; i++) {
315 if (wq->wq_wip[i].wip_batchid == wq->wq_lastdonebatch + 1) {
316 startslot = i;
317 break;
318 }
319 }
320
321 assert(startslot != -1);
322
323 for (i = startslot; i < startslot + wq->wq_nwipslots; i++) {
324 int slotnum = i % wq->wq_nwipslots;
325 wip_t *wipslot = &wq->wq_wip[slotnum];
326
327 if (wipslot->wip_td != NULL) {
328 debug(2, "clearing slot %d (%d) (saving %d)\n",
329 slotnum, i, wipslot->wip_nmerged);
330 } else
331 debug(2, "clearing slot %d (%d)\n", slotnum, i);
332
333 if (wipslot->wip_td != NULL) {
334 fifo_add(wq->wq_donequeue, wipslot->wip_td);
335 wq->wq_wip[slotnum].wip_td = NULL;
336 }
337 }
338
339 wq->wq_lastdonebatch = wq->wq_next_batchid++;
340
341 debug(2, "phase one done: donequeue has %d items\n",
342 fifo_len(wq->wq_donequeue));
343 }
344
345 static void
init_phase_two(workqueue_t * wq)346 init_phase_two(workqueue_t *wq)
347 {
348 int num;
349
350 /*
351 * We're going to continually merge the first two entries on the queue,
352 * placing the result on the end, until there's nothing left to merge.
353 * At that point, everything will have been merged into one. The
354 * initial value of ninqueue needs to be equal to the total number of
355 * entries that will show up on the queue, both at the start of the
356 * phase and as generated by merges during the phase.
357 */
358 wq->wq_ninqueue = num = fifo_len(wq->wq_donequeue);
359 while (num != 1) {
360 wq->wq_ninqueue += num / 2;
361 num = num / 2 + num % 2;
362 }
363
364 /*
365 * Move the done queue to the work queue. We won't be using the done
366 * queue in phase 2.
367 */
368 assert(fifo_len(wq->wq_queue) == 0);
369 fifo_free(wq->wq_queue, NULL);
370 wq->wq_queue = wq->wq_donequeue;
371 }
372
373 static void
wip_save_work(workqueue_t * wq,wip_t * slot,int slotnum)374 wip_save_work(workqueue_t *wq, wip_t *slot, int slotnum)
375 {
376 if ((errno = pthread_mutex_lock(&wq->wq_donequeue_lock)) != 0)
377 terminate("%s: pthread_mutex_lock(wq_donequeue_lock)",
378 __func__);
379
380 while (wq->wq_lastdonebatch + 1 < slot->wip_batchid) {
381 if ((errno = pthread_cond_wait(&slot->wip_cv, &wq->wq_donequeue_lock)) != 0)
382 terminate("%s: pthread_cond_wait(wip_cv,wq_donequeue_lock)",
383 __func__);
384 }
385 assert(wq->wq_lastdonebatch + 1 == slot->wip_batchid);
386
387 fifo_add(wq->wq_donequeue, slot->wip_td);
388 wq->wq_lastdonebatch++;
389 const int nextslot = (slotnum + 1) % wq->wq_nwipslots;
390 if ((errno = pthread_cond_signal(&wq->wq_wip[nextslot].wip_cv)) != 0)
391 terminate("%s: pthread_cond_signal(wq_wip[%d].wip_cv)",
392 __func__, nextslot);
393
394 /* reset the slot for next use */
395 slot->wip_td = NULL;
396 slot->wip_batchid = wq->wq_next_batchid++;
397
398 if ((errno = pthread_mutex_unlock(&wq->wq_donequeue_lock)) != 0)
399 terminate("%s: pthread_mutex_unlock(wq_donequeue_lock)",
400 __func__);
401 }
402
403 static void
wip_add_work(wip_t * slot,tdata_t * pow)404 wip_add_work(wip_t *slot, tdata_t *pow)
405 {
406 if (slot->wip_td == NULL) {
407 slot->wip_td = pow;
408 slot->wip_nmerged = 1;
409 } else {
410 debug(2, "0x%jx: merging %p into %p\n",
411 (uintmax_t)(uintptr_t)pthread_self(),
412 (void *)pow, (void *)slot->wip_td);
413
414 merge_into_master(pow, slot->wip_td, NULL, 0);
415 tdata_free(pow);
416
417 slot->wip_nmerged++;
418 }
419 }
420
421 static void
worker_runphase1(workqueue_t * wq)422 worker_runphase1(workqueue_t *wq)
423 {
424 wip_t *wipslot;
425 tdata_t *pow;
426 int wipslotnum, pownum;
427
428 for (;;) {
429 if ((errno = pthread_mutex_lock(&wq->wq_queue_lock)) != 0)
430 terminate("%s: pthread_mutex_lock(wq_queue_lock)",
431 __func__);
432
433 while (fifo_empty(wq->wq_queue)) {
434 if (wq->wq_nomorefiles == 1) {
435 if ((errno = pthread_cond_broadcast(&wq->wq_work_avail)) != 0)
436 terminate("%s: pthread_cond_broadcast(wq_work_avail)",
437 __func__);
438 if ((errno = pthread_mutex_unlock(&wq->wq_queue_lock)) != 0)
439 terminate("%s: pthread_mutex_unlock(wq_queue_lock)",
440 __func__);
441
442 /* on to phase 2 ... */
443 return;
444 }
445
446 if ((errno = pthread_cond_wait(&wq->wq_work_avail,
447 &wq->wq_queue_lock)) != 0)
448 terminate("%s: pthread_cond_wait(wq_work_avail,wq_queue_lock)",
449 __func__);
450 }
451
452 /* there's work to be done! */
453 pow = fifo_remove(wq->wq_queue);
454 pownum = wq->wq_nextpownum++;
455 if ((errno = pthread_cond_broadcast(&wq->wq_work_removed)) != 0)
456 terminate("%s: pthread_cond_broadcast(wq_work_removed)",
457 __func__);
458
459 assert(pow != NULL);
460
461 /* merge it into the right slot */
462 wipslotnum = pownum % wq->wq_nwipslots;
463 wipslot = &wq->wq_wip[wipslotnum];
464
465 if ((errno = pthread_mutex_lock(&wipslot->wip_lock)) != 0)
466 terminate("%s: pthread_mutex_lock(wip_lock)", __func__);
467
468 if ((errno = pthread_mutex_unlock(&wq->wq_queue_lock)) != 0)
469 terminate("%s: pthread_mutex_unlock(wq_queue_lock)",
470 __func__);
471
472 wip_add_work(wipslot, pow);
473
474 if (wipslot->wip_nmerged == wq->wq_maxbatchsz)
475 wip_save_work(wq, wipslot, wipslotnum);
476
477 if ((errno = pthread_mutex_unlock(&wipslot->wip_lock)) != 0)
478 terminate("%s: pthread_mutex_unlock(wip_lock)",
479 __func__);
480 }
481 }
482
483 static void
worker_runphase2(workqueue_t * wq)484 worker_runphase2(workqueue_t *wq)
485 {
486 tdata_t *pow1, *pow2;
487 int batchid;
488
489 for (;;) {
490 if ((errno = pthread_mutex_lock(&wq->wq_queue_lock)) != 0)
491 terminate("%s: pthread_mutex_lock(wq_queue_lock)",
492 __func__);
493
494 if (wq->wq_ninqueue == 1) {
495 if ((errno = pthread_cond_broadcast(&wq->wq_work_avail)) != 0)
496 terminate("%s: pthread_cond_broadcast(wq_work_avail)",
497 __func__);
498 if ((errno = pthread_mutex_unlock(&wq->wq_queue_lock)) != 0)
499 terminate("%s: pthread_mutex_unlock(wq_queue_lock)",
500 __func__);
501
502 debug(2, "0x%jx: entering p2 completion barrier\n",
503 (uintmax_t)(uintptr_t)pthread_self());
504 if (barrier_wait(&wq->wq_bar1)) {
505 if ((errno = pthread_mutex_lock(&wq->wq_queue_lock)) != 0)
506 terminate("%s: pthread_mutex_lock(wq_queue_lock)",
507 __func__);
508 wq->wq_alldone = 1;
509 if ((errno = pthread_cond_signal(&wq->wq_alldone_cv)) != 0)
510 terminate("%s: pthread_cond_signal(wq_alldone_cv)",
511 __func__);
512 if ((errno = pthread_mutex_unlock(&wq->wq_queue_lock)) != 0)
513 terminate("%s: pthread_mutex_unlock(wq_queue_lock)",
514 __func__);
515 }
516
517 return;
518 }
519
520 if (fifo_len(wq->wq_queue) < 2) {
521 if ((errno = pthread_cond_wait(&wq->wq_work_avail,
522 &wq->wq_queue_lock)) != 0)
523 terminate("%s: pthread_cond_wait(wq_work_avail,wq_queue_lock)",
524 __func__);
525 if ((errno = pthread_mutex_unlock(&wq->wq_queue_lock)) != 0)
526 terminate("%s: pthread_mutex_unlock(wq_queue_lock)",
527 __func__);
528 continue;
529 }
530
531 /* there's work to be done! */
532 pow1 = fifo_remove(wq->wq_queue);
533 pow2 = fifo_remove(wq->wq_queue);
534 wq->wq_ninqueue -= 2;
535
536 batchid = wq->wq_next_batchid++;
537
538 if ((errno = pthread_mutex_unlock(&wq->wq_queue_lock)) != 0)
539 terminate("%s: pthread_mutex_unlock(wq_queue_lock)",
540 __func__);
541
542 debug(2, "0x%jx: merging %p into %p\n",
543 (uintmax_t)(uintptr_t)pthread_self(),
544 (void *)pow1, (void *)pow2);
545 merge_into_master(pow1, pow2, NULL, 0);
546 tdata_free(pow1);
547
548 /*
549 * merging is complete. place at the tail of the queue in
550 * proper order.
551 */
552 if ((errno = pthread_mutex_lock(&wq->wq_queue_lock)) != 0)
553 terminate("%s: pthread_mutex_lock(wq_queue_lock)",
554 __func__);
555 while (wq->wq_lastdonebatch + 1 != batchid) {
556 if ((errno = pthread_cond_wait(&wq->wq_done_cv,
557 &wq->wq_queue_lock)) != 0)
558 terminate("%s: pthread_cond_wait(wq_done_cv,wq_queue_lock)",
559 __func__);
560 }
561
562 wq->wq_lastdonebatch = batchid;
563
564 fifo_add(wq->wq_queue, pow2);
565 debug(2, "0x%jx: added %p to queue, len now %d, ninqueue %d\n",
566 (uintmax_t)(uintptr_t)pthread_self(), (void *)pow2,
567 fifo_len(wq->wq_queue), wq->wq_ninqueue);
568 if ((errno = pthread_cond_broadcast(&wq->wq_done_cv)) != 0)
569 terminate("%s: pthread_cond_broadcast(wq_done_cv)",
570 __func__);
571 if ((errno = pthread_cond_signal(&wq->wq_work_avail)) != 0)
572 terminate("%s: pthread_cond_signal(wq_work_avail)",
573 __func__);
574 if ((errno = pthread_mutex_unlock(&wq->wq_queue_lock)) != 0)
575 terminate("%s: pthread_mutex_unlock(wq_queue_lock)",
576 __func__);
577 }
578 }
579
580 /*
581 * Main loop for worker threads.
582 */
583 static void *
worker_thread(void * v)584 worker_thread(void *v)
585 {
586 workqueue_t *wq = v;
587 worker_runphase1(wq);
588
589 debug(2, "0x%jx: entering first barrier\n",
590 (uintmax_t)(uintptr_t)pthread_self());
591
592 if (barrier_wait(&wq->wq_bar1)) {
593
594 debug(2, "0x%jx: doing work in first barrier\n",
595 (uintmax_t)(uintptr_t)pthread_self());
596
597 finalize_phase_one(wq);
598
599 init_phase_two(wq);
600
601 debug(2, "0x%jx: ninqueue is %d, %d on queue\n",
602 (uintmax_t)(uintptr_t)pthread_self(),
603 wq->wq_ninqueue, fifo_len(wq->wq_queue));
604 }
605
606 debug(2, "0x%jx: entering second barrier\n",
607 (uintmax_t)(uintptr_t)pthread_self());
608
609 (void) barrier_wait(&wq->wq_bar2);
610
611 debug(2, "0x%jx: phase 1 complete\n",
612 (uintmax_t)(uintptr_t)pthread_self());
613
614 worker_runphase2(wq);
615 return NULL;
616 }
617
618 /*
619 * Pass a tdata_t tree, built from an input file, off to the work queue for
620 * consumption by worker threads.
621 */
622 static int
merge_ctf_cb(tdata_t * td,char * name,void * arg)623 merge_ctf_cb(tdata_t *td, char *name, void *arg)
624 {
625 workqueue_t *wq = arg;
626
627 debug(3, "Adding tdata %p for processing\n", (void *)td);
628
629 if ((errno = pthread_mutex_lock(&wq->wq_queue_lock)) != 0)
630 terminate("%s: pthread_mutex_lock(wq_queue_lock)", __func__);
631 while (fifo_len(wq->wq_queue) > wq->wq_ithrottle) {
632 debug(2, "Throttling input (len = %d, throttle = %d)\n",
633 fifo_len(wq->wq_queue), wq->wq_ithrottle);
634 if ((errno = pthread_cond_wait(&wq->wq_work_removed, &wq->wq_queue_lock)) != 0)
635 terminate("%s: pthread_cond_wait(wq_work_removed,wq_queue_lock)",
636 __func__);
637 }
638
639 fifo_add(wq->wq_queue, td);
640 debug(1, "Thread 0x%jx announcing %s\n",
641 (uintmax_t)(uintptr_t)pthread_self(), name);
642 if ((errno = pthread_cond_broadcast(&wq->wq_work_avail)) != 0)
643 terminate("%s: pthread_cond_broadcast(wq_work_avail)", __func__);
644 if ((errno = pthread_mutex_unlock(&wq->wq_queue_lock)) != 0)
645 terminate("%s: pthread_mutex_unlock(wq_queue_lock)", __func__);
646
647 return (1);
648 }
649
650 /*
651 * This program is intended to be invoked from a Makefile, as part of the build.
652 * As such, in the event of a failure or user-initiated interrupt (^C), we need
653 * to ensure that a subsequent re-make will cause ctfmerge to be executed again.
654 * Unfortunately, ctfmerge will usually be invoked directly after (and as part
655 * of the same Makefile rule as) a link, and will operate on the linked file
656 * in place. If we merely exit upon receipt of a SIGINT, a subsequent make
657 * will notice that the *linked* file is newer than the object files, and thus
658 * will not reinvoke ctfmerge. The only way to ensure that a subsequent make
659 * reinvokes ctfmerge, is to remove the file to which we are adding CTF
660 * data (confusingly named the output file). This means that the link will need
661 * to happen again, but links are generally fast, and we can't allow the merge
662 * to be skipped.
663 *
664 * Another possibility would be to block SIGINT entirely - to always run to
665 * completion. The run time of ctfmerge can, however, be measured in minutes
666 * in some cases, so this is not a valid option.
667 */
668 static void __dead
handle_sig(int sig)669 handle_sig(int sig)
670 {
671 terminate("Caught signal %d - exiting\n", sig);
672 }
673
674 static void
terminate_cleanup(void)675 terminate_cleanup(void)
676 {
677 int dounlink = getenv("CTFMERGE_TERMINATE_NO_UNLINK") ? 0 : 1;
678
679 if (tmpname != NULL && dounlink)
680 unlink(tmpname);
681
682 if (outfile == NULL)
683 return;
684
685 #if !defined (__FreeBSD__) && !(defined(__NetBSD__) || HAVE_NBTOOL_CONFIG_H)
686 if (dounlink) {
687 fprintf(stderr, "Removing %s\n", outfile);
688 unlink(outfile);
689 }
690 #endif
691 }
692
693 static void
copy_ctf_data(char * srcfile,char * destfile,int keep_stabs)694 copy_ctf_data(char *srcfile, char *destfile, int keep_stabs)
695 {
696 tdata_t *srctd;
697
698 if (read_ctf(&srcfile, 1, NULL, read_ctf_save_cb, &srctd, 1) == 0)
699 terminate("No CTF data found in source file %s\n", srcfile);
700
701 tmpname = mktmpname(destfile, ".ctf");
702 write_ctf(srctd, destfile, tmpname, CTF_COMPRESS | CTF_SWAP_BYTES | keep_stabs);
703 if (rename(tmpname, destfile) != 0) {
704 terminate("Couldn't rename temp file %s to %s", tmpname,
705 destfile);
706 }
707 free(tmpname);
708 tdata_free(srctd);
709 }
710
711 static void
wq_init(workqueue_t * wq,int nfiles)712 wq_init(workqueue_t *wq, int nfiles)
713 {
714 int throttle, nslots, i;
715 const char *e;
716
717 if (getenv("CTFMERGE_MAX_SLOTS"))
718 nslots = atoi(getenv("CTFMERGE_MAX_SLOTS"));
719 else
720 nslots = MERGE_PHASE1_MAX_SLOTS;
721
722 if (getenv("CTFMERGE_PHASE1_BATCH_SIZE"))
723 wq->wq_maxbatchsz = atoi(getenv("CTFMERGE_PHASE1_BATCH_SIZE"));
724 else
725 wq->wq_maxbatchsz = MERGE_PHASE1_BATCH_SIZE;
726
727 nslots = MIN(nslots, (nfiles + wq->wq_maxbatchsz - 1) /
728 wq->wq_maxbatchsz);
729
730 wq->wq_wip = xcalloc(sizeof (wip_t) * nslots);
731 wq->wq_nwipslots = nslots;
732 e = getenv("CTFMERGE_NUM_THREADS");
733 if (e) {
734 wq->wq_nthreads = atoi(e);
735 } else {
736 #ifdef _SC_NPROCESSORS_ONLN
737 wq->wq_nthreads = MIN(sysconf(_SC_NPROCESSORS_ONLN) * 3 / 2,
738 nslots);
739 #else
740 wq->wq_nthreads = 2;
741 #endif
742 }
743 wq->wq_thread = xmalloc(sizeof (pthread_t) * wq->wq_nthreads);
744
745 e = getenv("CTFMERGE_INPUT_THROTTLE");
746 throttle = e ? atoi(e) : MERGE_INPUT_THROTTLE_LEN;
747 wq->wq_ithrottle = throttle * wq->wq_nthreads;
748
749 debug(1, "Using %d slots, %d threads\n", wq->wq_nwipslots,
750 wq->wq_nthreads);
751
752 wq->wq_next_batchid = 0;
753
754 for (i = 0; i < nslots; i++) {
755 if ((errno = pthread_mutex_init(&wq->wq_wip[i].wip_lock, NULL)) != 0)
756 terminate("%s: pthread_mutex_init(wip[%d].wip_lock",
757 __func__, i);
758 if ((errno = pthread_cond_init(&wq->wq_wip[i].wip_cv, NULL)) != 0)
759 terminate("%s: pthread_cond_init(wip[%d].wip_cv",
760 __func__, i);
761 wq->wq_wip[i].wip_batchid = wq->wq_next_batchid++;
762 }
763
764 if ((errno = pthread_mutex_init(&wq->wq_queue_lock, NULL)) != 0)
765 terminate("%s: pthread_mutex_init(wq_queue_lock)", __func__);
766 wq->wq_queue = fifo_new();
767 if ((errno = pthread_cond_init(&wq->wq_work_avail, NULL)) != 0)
768 terminate("%s: pthread_cond_init(wq_work_avail)", __func__);
769 if ((errno = pthread_cond_init(&wq->wq_work_removed, NULL)) != 0)
770 terminate("%s: pthread_cond_init(wq_work_removed", __func__);
771 wq->wq_ninqueue = nfiles;
772 wq->wq_nextpownum = 0;
773
774 if ((errno = pthread_mutex_init(&wq->wq_donequeue_lock, NULL)) != 0)
775 terminate("%s: pthread_mutex_init(wq_donequeue_lock)", __func__);
776 wq->wq_donequeue = fifo_new();
777 wq->wq_lastdonebatch = -1;
778
779 if ((errno = pthread_cond_init(&wq->wq_done_cv, NULL)) != 0)
780 terminate("%s: pthread_cond_init(wq_done_cv)", __func__);
781
782 if ((errno = pthread_cond_init(&wq->wq_alldone_cv, NULL)) != 0)
783 terminate("%s: pthread_cond_init(wq_alldone_cv)", __func__);
784 wq->wq_alldone = 0;
785
786 barrier_init(&wq->wq_bar1, wq->wq_nthreads);
787 barrier_init(&wq->wq_bar2, wq->wq_nthreads);
788
789 wq->wq_nomorefiles = 0;
790 }
791
792 static void
start_threads(workqueue_t * wq)793 start_threads(workqueue_t *wq)
794 {
795 sigset_t sets;
796 int i;
797
798 sigemptyset(&sets);
799 sigaddset(&sets, SIGINT);
800 sigaddset(&sets, SIGQUIT);
801 sigaddset(&sets, SIGTERM);
802 if ((errno = pthread_sigmask(SIG_BLOCK, &sets, NULL)) != 0)
803 terminate("%s: pthread_sigmask(SIG_BLOCK)", __func__);
804
805 for (i = 0; i < wq->wq_nthreads; i++) {
806 if ((errno = pthread_create(&wq->wq_thread[i], NULL, worker_thread, wq)) != 0)
807 terminate("%s: pthread_create(wq_thread[%d]",
808 __func__, i);
809 }
810
811 #ifdef illumos
812 sigset(SIGINT, handle_sig);
813 sigset(SIGQUIT, handle_sig);
814 sigset(SIGTERM, handle_sig);
815 #else
816 signal(SIGINT, handle_sig);
817 signal(SIGQUIT, handle_sig);
818 signal(SIGTERM, handle_sig);
819 #endif
820 if ((errno = pthread_sigmask(SIG_UNBLOCK, &sets, NULL)) != 0)
821 terminate("%s: pthread_sigmask(SIG_UNBLOCK)", __func__);
822 }
823
824 static void
join_threads(workqueue_t * wq)825 join_threads(workqueue_t *wq)
826 {
827 int i;
828
829 for (i = 0; i < wq->wq_nthreads; i++) {
830 if ((errno = pthread_join(wq->wq_thread[i], NULL)) != 0)
831 terminate("%s: pthread_join(wq_thread[%d]",
832 __func__, i);
833 }
834 }
835
836 static int
strcompare(const void * p1,const void * p2)837 strcompare(const void *p1, const void *p2)
838 {
839 const char *s1 = *((const char * const *)p1);
840 const char *s2 = *((const char * const *)p2);
841
842 return (strcmp(s1, s2));
843 }
844
845 /*
846 * Core work queue structure; passed to worker threads on thread creation
847 * as the main point of coordination. Allocate as a static structure; we
848 * could have put this into a local variable in main, but passing a pointer
849 * into your stack to another thread is fragile at best and leads to some
850 * hard-to-debug failure modes.
851 */
852 static workqueue_t wq;
853
854 int
main(int argc,char ** argv)855 main(int argc, char **argv)
856 {
857 tdata_t *mstrtd, *savetd;
858 char *uniqfile = NULL, *uniqlabel = NULL;
859 char *withfile = NULL;
860 char *label = NULL;
861 char **ifiles, **tifiles;
862 int verbose = 0, docopy = 0;
863 int write_fuzzy_match = 0;
864 int keep_stabs = 0;
865 int require_ctf = 0;
866 int nifiles, nielems;
867 int c, i, idx, tidx, err;
868
869 progname = basename(argv[0]);
870
871 if (getenv("CTFMERGE_DEBUG_LEVEL"))
872 debug_level = atoi(getenv("CTFMERGE_DEBUG_LEVEL"));
873
874 err = 0;
875 while ((c = getopt(argc, argv, ":cd:D:fgl:L:o:tvw:s")) != EOF) {
876 switch (c) {
877 case 'c':
878 docopy = 1;
879 break;
880 case 'd':
881 /* Uniquify against `uniqfile' */
882 uniqfile = optarg;
883 break;
884 case 'D':
885 /* Uniquify against label `uniqlabel' in `uniqfile' */
886 uniqlabel = optarg;
887 break;
888 case 'f':
889 write_fuzzy_match = CTF_FUZZY_MATCH;
890 break;
891 case 'g':
892 keep_stabs = CTF_KEEP_STABS;
893 break;
894 case 'l':
895 /* Label merged types with `label' */
896 label = optarg;
897 break;
898 case 'L':
899 /* Label merged types with getenv(`label`) */
900 if ((label = getenv(optarg)) == NULL)
901 label = __UNCONST(CTF_DEFAULT_LABEL);
902 break;
903 case 'o':
904 /* Place merged types in CTF section in `outfile' */
905 outfile = optarg;
906 break;
907 case 't':
908 /* Insist *all* object files built from C have CTF */
909 require_ctf = 1;
910 break;
911 case 'v':
912 /* More debugging information */
913 verbose = 1;
914 break;
915 case 'w':
916 /* Additive merge with data from `withfile' */
917 withfile = optarg;
918 break;
919 case 's':
920 /* use the dynsym rather than the symtab */
921 dynsym = CTF_USE_DYNSYM;
922 break;
923 default:
924 usage();
925 exit(2);
926 }
927 }
928
929 /* Validate arguments */
930 if (docopy) {
931 if (uniqfile != NULL || uniqlabel != NULL || label != NULL ||
932 outfile != NULL || withfile != NULL || dynsym != 0)
933 err++;
934
935 if (argc - optind != 2)
936 err++;
937 } else {
938 if (uniqfile != NULL && withfile != NULL)
939 err++;
940
941 if (uniqlabel != NULL && uniqfile == NULL)
942 err++;
943
944 if (outfile == NULL || label == NULL)
945 err++;
946
947 if (argc - optind == 0)
948 err++;
949 }
950
951 if (err) {
952 usage();
953 exit(2);
954 }
955
956 if (getenv("STRIPSTABS_KEEP_STABS") != NULL)
957 keep_stabs = CTF_KEEP_STABS;
958
959 if (uniqfile && access(uniqfile, R_OK) != 0) {
960 warning("Uniquification file %s couldn't be opened and "
961 "will be ignored.\n", uniqfile);
962 uniqfile = NULL;
963 }
964 if (withfile && access(withfile, R_OK) != 0) {
965 warning("With file %s couldn't be opened and will be "
966 "ignored.\n", withfile);
967 withfile = NULL;
968 }
969 if (outfile && access(outfile, R_OK|W_OK) != 0)
970 terminate("Cannot open output file %s for r/w", outfile);
971
972 /*
973 * This is ugly, but we don't want to have to have a separate tool
974 * (yet) just for copying an ELF section with our specific requirements,
975 * so we shoe-horn a copier into ctfmerge.
976 */
977 if (docopy) {
978 copy_ctf_data(argv[optind], argv[optind + 1], keep_stabs);
979
980 exit(0);
981 }
982
983 set_terminate_cleanup(terminate_cleanup);
984
985 /* Sort the input files and strip out duplicates */
986 nifiles = argc - optind;
987 ifiles = xmalloc(sizeof (char *) * nifiles);
988 tifiles = xmalloc(sizeof (char *) * nifiles);
989
990 for (i = 0; i < nifiles; i++)
991 tifiles[i] = argv[optind + i];
992 qsort(tifiles, nifiles, sizeof (char *), strcompare);
993
994 ifiles[0] = tifiles[0];
995 for (idx = 0, tidx = 1; tidx < nifiles; tidx++) {
996 if (strcmp(ifiles[idx], tifiles[tidx]) != 0)
997 ifiles[++idx] = tifiles[tidx];
998 }
999 nifiles = idx + 1;
1000
1001 /* Make sure they all exist */
1002 if ((nielems = count_files(ifiles, nifiles)) < 0)
1003 terminate("Some input files were inaccessible\n");
1004
1005 /* Prepare for the merge */
1006 wq_init(&wq, nielems);
1007
1008 start_threads(&wq);
1009
1010 /*
1011 * Start the merge
1012 *
1013 * We're reading everything from each of the object files, so we
1014 * don't need to specify labels.
1015 */
1016 if (read_ctf(ifiles, nifiles, NULL, merge_ctf_cb,
1017 &wq, require_ctf) == 0) {
1018 /*
1019 * If we're verifying that C files have CTF, it's safe to
1020 * assume that in this case, we're building only from assembly
1021 * inputs.
1022 */
1023 if (require_ctf)
1024 exit(0);
1025 terminate("No ctf sections found to merge\n");
1026 }
1027
1028 if ((errno = pthread_mutex_lock(&wq.wq_queue_lock)) != 0)
1029 terminate("%s: pthread_mutex_lock(wq_queue_lock)", __func__);
1030 wq.wq_nomorefiles = 1;
1031 if ((errno = pthread_cond_broadcast(&wq.wq_work_avail)) != 0)
1032 terminate("%s: pthread_cond_broadcast(wq_work_avail)", __func__);
1033 if ((errno = pthread_mutex_unlock(&wq.wq_queue_lock)) != 0)
1034 terminate("%s: pthread_mutex_unlock(wq_queue_lock)", __func__);
1035
1036 if ((errno = pthread_mutex_lock(&wq.wq_queue_lock)) != 0)
1037 terminate("%s: pthread_mutex_lock(wq_queue_lock)", __func__);
1038 while (wq.wq_alldone == 0) {
1039 if ((errno = pthread_cond_wait(&wq.wq_alldone_cv, &wq.wq_queue_lock)) != 0)
1040 terminate("%s: pthread_cond_wait(wq_alldone_cv,wq_queue_lock)",
1041 __func__);
1042 }
1043 if ((errno = pthread_mutex_unlock(&wq.wq_queue_lock)) != 0)
1044 terminate("%s: pthread_mutex_unlock(wq_queue_lock)", __func__);
1045
1046 join_threads(&wq);
1047
1048 /*
1049 * All requested files have been merged, with the resulting tree in
1050 * mstrtd. savetd is the tree that will be placed into the output file.
1051 *
1052 * Regardless of whether we're doing a normal uniquification or an
1053 * additive merge, we need a type tree that has been uniquified
1054 * against uniqfile or withfile, as appropriate.
1055 *
1056 * If we're doing a uniquification, we stuff the resulting tree into
1057 * outfile. Otherwise, we add the tree to the tree already in withfile.
1058 */
1059 assert(fifo_len(wq.wq_queue) == 1);
1060 mstrtd = fifo_remove(wq.wq_queue);
1061
1062 if (verbose || debug_level) {
1063 debug(2, "Statistics for td %p\n", (void *)mstrtd);
1064
1065 iidesc_stats(mstrtd->td_iihash);
1066 }
1067
1068 if (uniqfile != NULL || withfile != NULL) {
1069 char *reffile, *reflabel = NULL;
1070 tdata_t *reftd;
1071
1072 if (uniqfile != NULL) {
1073 reffile = uniqfile;
1074 reflabel = uniqlabel;
1075 } else
1076 reffile = withfile;
1077
1078 if (read_ctf(&reffile, 1, reflabel, read_ctf_save_cb,
1079 &reftd, require_ctf) == 0) {
1080 terminate("No CTF data found in reference file %s\n",
1081 reffile);
1082 }
1083
1084 savetd = tdata_new();
1085
1086 if (CTF_TYPE_ISCHILD(reftd->td_nextid))
1087 terminate("No room for additional types in master\n");
1088
1089 savetd->td_nextid = withfile ? reftd->td_nextid :
1090 CTF_INDEX_TO_TYPE(1, TRUE);
1091 merge_into_master(mstrtd, reftd, savetd, 0);
1092
1093 tdata_label_add(savetd, label, CTF_LABEL_LASTIDX);
1094
1095 if (withfile) {
1096 /*
1097 * savetd holds the new data to be added to the withfile
1098 */
1099 tdata_t *withtd = reftd;
1100
1101 tdata_merge(withtd, savetd);
1102
1103 savetd = withtd;
1104 } else {
1105 char uniqname[MAXPATHLEN];
1106 labelent_t *parle;
1107
1108 parle = tdata_label_top(reftd);
1109
1110 savetd->td_parlabel = xstrdup(parle->le_name);
1111
1112 strncpy(uniqname, reffile, sizeof (uniqname));
1113 uniqname[MAXPATHLEN - 1] = '\0';
1114 savetd->td_parname = xstrdup(basename(uniqname));
1115 }
1116
1117 } else {
1118 /*
1119 * No post processing. Write the merged tree as-is into the
1120 * output file.
1121 */
1122 tdata_label_free(mstrtd);
1123 tdata_label_add(mstrtd, label, CTF_LABEL_LASTIDX);
1124
1125 savetd = mstrtd;
1126 }
1127
1128 tmpname = mktmpname(outfile, ".ctf");
1129 write_ctf(savetd, outfile, tmpname,
1130 CTF_COMPRESS | CTF_SWAP_BYTES | write_fuzzy_match | dynsym | keep_stabs);
1131 if (rename(tmpname, outfile) != 0)
1132 terminate("Couldn't rename output temp file %s", tmpname);
1133 free(tmpname);
1134
1135 return (0);
1136 }
1137