xref: /openbsd-src/usr.bin/tsort/tsort.c (revision b2ea75c1b17e1a9a339660e7ed45cd24946b230e)
1 /* $OpenBSD: tsort.c,v 1.10 2001/07/14 14:18:50 espie Exp $ */
2 /* ex:ts=8 sw=4:
3  */
4 
5 /*
6  * Copyright (c) 1999-2001 Marc Espie.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  * This product includes software developed by Marc Espie for the OpenBSD
19  * Project.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE OPENBSD PROJECT AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OPENBSD
25  * PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <sys/types.h>
35 #include <assert.h>
36 #include <ctype.h>
37 #include <err.h>
38 #include <limits.h>
39 #include <stddef.h>
40 #include <ohash.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <sysexits.h>
45 #include <unistd.h>
46 
47 /* The complexity of topological sorting is O(e), where e is the
48  * size of input.  While reading input, vertices have to be identified,
49  * thus add the complexity of e keys retrieval among v keys using
50  * an appropriate data structure.  This program uses open double hashing
51  * for that purpose.  See Knuth for the expected complexity of double
52  * hashing (Brent variation should probably be used if v << e, as a user
53  * option).
54  *
55  * The algorithm used for longest cycle reporting is accurate, but somewhat
56  * expensive.  It may need to build all free paths of the graph (a free
57  * path is a path that never goes twice through the same node), whose
58  * number can be as high as O(2^e).  Usually, the number of free paths is
59  * much smaller though.  This program's author does not believe that a
60  * significantly better worst-case complexity algorithm exists.
61  *
62  * In case of a hints file, the set of minimal nodes is maintained as a
63  * heap.  The resulting complexity is O(e+v log v) for the worst case.
64  * The average should actually be near O(e).
65  *
66  * If the hints file is incomplete, there is some extra complexity incurred
67  * by make_transparent, which does propagate order values to unmarked
68  * nodes. In the worst case, make_transparent is  O(e u),
69  * where u is the number of originally unmarked nodes.
70  * In practice, it is much faster.
71  *
72  * The simple topological sort algorithm detects cycles.  This program
73  * goes further, breaking cycles through the use of simple heuristics.
74  * Each cycle break checks the whole set of nodes, hence if c cycles break
75  * are needed, this is an extra cost of O(c v).
76  *
77  * Possible heuristics are as follows:
78  * - break cycle at node with lowest number of predecessors (default case),
79  * - break longest cycle at node with lowest number of predecessors,
80  * - break cycle at next node from the hints file.
81  *
82  * Except for the hints file case, which sets an explicit constraint on
83  * which cycle to break, those heuristics locally result in the smallest
84  * number of broken edges.
85  *
86  * Those are admittedly greedy strategies, as is the selection of the next
87  * node from the hints file amongst equivalent candidates that is used for
88  * `stable' topological sorting.
89  */
90 
91 #ifdef __GNUC__
92 #define UNUSED	__attribute__((unused))
93 #else
94 #define UNUSED
95 #endif
96 
97 struct node;
98 
99 /* The set of arcs from a given node is stored as a linked list.  */
100 struct link {
101 	struct link *next;
102 	struct node *node;
103 };
104 
105 #define NO_ORDER	UINT_MAX
106 
107 struct node {
108 	unsigned int refs;	/* Number of arcs left, coming into this node .
109 				 * Note that nodes with a null count can't
110 				 * be part of cycles.  */
111 	struct link  *arcs;	/* List of forward arcs.  */
112 
113 	unsigned int order; 	/* Order of nodes according to a hint file.  */
114 
115 	/* Cycle detection algorithms build a free path of nodes.  */
116 	struct node  *from; 	/* Previous node in the current path.  */
117 
118 	unsigned int mark;	/* Mark processed nodes in cycle discovery.  */
119 	struct link  *traverse;	/* Next link to traverse when backtracking.  */
120 	char         k[1];	/* Name of this node.  */
121 };
122 
123 #define HASH_START 9
124 
125 struct array {
126 	unsigned int entries;
127 	struct node  **t;
128 };
129 
130 static void nodes_init __P((struct ohash *));
131 static struct node *node_lookup __P((struct ohash *, const char *, const char *));
132 static void usage __P((void));
133 static struct node *new_node __P((const char *, const char *));
134 
135 static unsigned int read_pairs __P((FILE *, struct ohash *, int,
136     const char *, unsigned int, int));
137 static void split_nodes __P((struct ohash *, struct array *, struct array *));
138 static void make_transparent __P((struct ohash *));
139 static void insert_arc __P((struct node *, struct node *));
140 
141 #ifdef DEBUG
142 static void dump_node __P((struct node *));
143 static void dump_array __P((struct array *));
144 static void dump_hash __P((struct ohash *));
145 #endif
146 static unsigned int read_hints __P((FILE *, struct ohash *, int,
147     const char *, unsigned int));
148 static struct node *find_smallest_node __P((struct array *));
149 static struct node *find_good_cycle_break __P((struct array *));
150 static void print_cycle __P((struct array *));
151 static struct node *find_cycle_from __P((struct node *, struct array *));
152 static struct node *find_predecessor __P((struct array *, struct node *));
153 static unsigned int traverse_node __P((struct node *, unsigned int, struct array *));
154 static struct node *find_longest_cycle __P((struct array *, struct array *));
155 
156 static void heap_down __P((struct array *, unsigned int));
157 static void heapify __P((struct array *, int));
158 static struct node *dequeue __P((struct array *));
159 static void enqueue __P((struct array *, struct node *));
160 
161 
162 
163 #define erealloc(n, s)	emem(realloc(n, s))
164 static void *hash_alloc __P((size_t, void *));
165 static void hash_free __P((void *, size_t, void *));
166 static void* entry_alloc __P((size_t, void *));
167 static void *emalloc __P((size_t));
168 static void *emem __P((void *));
169 #define DEBUG_TRAVERSE 0
170 static struct ohash_info node_info = {
171 	offsetof(struct node, k), NULL, hash_alloc, hash_free, entry_alloc };
172 
173 
174 int main __P((int, char *[]));
175 
176 
177 /***
178  *** Memory handling.
179  ***/
180 
181 static void *
182 emem(p)
183 	void 		*p;
184 {
185 	if (p)
186 		return p;
187 	else
188 		errx(EX_SOFTWARE, "Memory exhausted");
189 }
190 
191 static void *
192 hash_alloc(s, u)
193 	size_t s;
194 	void *u		UNUSED;
195 {
196 	return emem(calloc(s, 1));
197 }
198 
199 static void
200 hash_free(p, s, u)
201 	void *p;
202 	size_t s	UNUSED;
203 	void *u		UNUSED;
204 {
205 	free(p);
206 }
207 
208 static void *
209 entry_alloc(s, u)
210 	size_t s;
211 	void *u		UNUSED;
212 {
213 	return emalloc(s);
214 }
215 
216 static void *
217 emalloc(s)
218 	size_t s;
219 {
220 	return emem(malloc(s));
221 }
222 
223 
224 /***
225  *** Hash table.
226  ***/
227 
228 /* Inserting and finding nodes in the hash structure.
229  * We handle interval strings for efficiency wrt fgetln.  */
230 static struct node *
231 new_node(start, end)
232 	const char 	*start;
233 	const char 	*end;
234 {
235 	struct node 	*n;
236 
237 	n = ohash_create_entry(&node_info, start, &end);
238 	n->from = NULL;
239 	n->arcs = NULL;
240 	n->refs = 0;
241 	n->mark = 0;
242 	n->order = NO_ORDER;
243 	n->traverse = NULL;
244 	return n;
245 }
246 
247 
248 static void
249 nodes_init(h)
250 	struct ohash 	*h;
251 {
252 	ohash_init(h, HASH_START, &node_info);
253 }
254 
255 static struct node *
256 node_lookup(h, start, end)
257 	struct ohash 	*h;
258 	const char 	*start;
259 	const char 	*end;
260 {
261 	unsigned int	i;
262 	struct node *	n;
263 
264 	i = ohash_qlookupi(h, start, &end);
265 
266 	n = ohash_find(h, i);
267 	if (n == NULL)
268 		n = ohash_insert(h, i, new_node(start, end));
269 	return n;
270 }
271 
272 #ifdef DEBUG
273 static void
274 dump_node(n)
275     struct node 	*n;
276 {
277 	struct link 	*l;
278 
279 	if (n->refs == 0)
280 		return;
281 	printf("%s (%u/%u): ", n->k, n->order, n->refs);
282 	for (l = n->arcs; l != NULL; l = l->next)
283 		if (n->refs != 0)
284 		printf("%s(%u/%u) ", l->node->k, l->node->order, l->node->refs);
285     	putchar('\n');
286 }
287 
288 static void
289 dump_array(a)
290 	struct array	*a;
291 {
292 	unsigned int 	i;
293 
294 	for (i = 0; i < a->entries; i++)
295 		dump_node(a->t[i]);
296 }
297 
298 static void
299 dump_hash(h)
300 	struct ohash 	*h;
301 {
302 	unsigned int 	i;
303 	struct node 	*n;
304 
305 	for (n = ohash_first(h, &i); n != NULL; n = ohash_next(h, &i))
306 		dump_node(n);
307 }
308 #endif
309 
310 
311 /***
312  *** Reading data.
313  ***/
314 
315 static void
316 insert_arc(a, b)
317 	struct node 	*a, *b;
318 {
319 	struct link 	*l;
320 
321 	/* Check that this arc is not already present.  */
322 	for (l = a->arcs; l != NULL; l = l->next) {
323 		if (l->node == b)
324 			return;
325 	}
326 	b->refs++;
327 	l = emalloc(sizeof(struct link));
328 	l->node = b;
329 	l->next = a->arcs;
330 	a->arcs = l;
331 }
332 
333 static unsigned int
334 read_pairs(f, h, reverse, name, order, hint)
335 	FILE 		*f;
336 	struct ohash 	*h;
337 	int 		reverse;
338 	const char 	*name;
339 	unsigned int	order;
340 	int		hint;
341 {
342 	int 		toggle;
343 	struct node 	*a;
344 	size_t 		size;
345 	char 		*str;
346 
347 	toggle = 1;
348 	a = NULL;
349 
350 	while ((str = fgetln(f, &size)) != NULL) {
351 		char *sentinel;
352 
353 		sentinel = str + size;
354 		for (;;) {
355 			char *e;
356 
357 			while (isspace(*str) && str < sentinel)
358 				str++;
359 			if (str == sentinel)
360 				break;
361 			for (e = str; !isspace(*e) && e < sentinel; e++)
362 				continue;
363 			if (toggle) {
364 				a = node_lookup(h, str, e);
365 				if (a->order == NO_ORDER && hint)
366 					a->order = order++;
367 			} else {
368 				struct node *b;
369 
370 				b = node_lookup(h, str, e);
371 				assert(a != NULL);
372 				if (b != a) {
373 					if (reverse)
374 						insert_arc(b, a);
375 					else
376 						insert_arc(a, b);
377 				}
378 			}
379 			toggle = !toggle;
380 			str = e;
381 		}
382 	}
383 	if (toggle == 0)
384 		errx(EX_DATAERR, "odd number of pairs in %s", name);
385     	if (!feof(f))
386 		err(EX_IOERR, "error reading %s", name);
387 	return order;
388 }
389 
390 static unsigned int
391 read_hints(f, h, quiet, name, order)
392 	FILE 		*f;
393 	struct ohash 	*h;
394 	int 		quiet;
395 	const char 	*name;
396 	unsigned int	order;
397 {
398 	char 		*str;
399 	size_t 		size;
400 
401 	while ((str = fgetln(f, &size)) != NULL) {
402 		char *sentinel;
403 
404 		sentinel = str + size;
405 		for (;;) {
406 			char *e;
407 			struct node *a;
408 
409 			while (isspace(*str) && str < sentinel)
410 				str++;
411 			if (str == sentinel)
412 				break;
413 			for (e = str; !isspace(*e) && e < sentinel; e++)
414 				continue;
415 			a = node_lookup(h, str, e);
416 			if (a->order != NO_ORDER) {
417 				if (!quiet)
418 				    warnx(
419 					"duplicate node %s in hints file %s",
420 					a->k, name);
421 			} else
422 				a->order = order++;
423 			str = e;
424 		}
425 	}
426 	return order;
427 }
428 
429 
430 /***
431  *** Standard heap handling routines.
432  ***/
433 
434 static void
435 heap_down(h, i)
436 	struct array 	*h;
437 	unsigned int 	i;
438 {
439 	unsigned int 	j;
440 	struct node 	*swap;
441 
442 	for (; (j=2*i+1) < h->entries; i = j) {
443 		if (j+1 < h->entries && h->t[j+1]->order < h->t[j]->order)
444 		    	j++;
445 		if (h->t[i]->order <= h->t[j]->order)
446 			break;
447 		swap = h->t[i];
448 		h->t[i] = h->t[j];
449 		h->t[j] = swap;
450 	}
451 }
452 
453 static void
454 heapify(h, verbose)
455 	struct array 	*h;
456 	int		verbose;
457 {
458 	unsigned int 	i;
459 
460 	for (i = h->entries; i != 0;) {
461 		if (h->t[--i]->order == NO_ORDER && verbose)
462 			warnx("node %s absent from hints file", h->t[i]->k);
463 		heap_down(h, i);
464 	}
465 }
466 
467 #define DEQUEUE(h) ( hints_flag ? dequeue(h) : (h)->t[--(h)->entries] )
468 
469 static struct node *
470 dequeue(h)
471 	struct array 	*h;
472 {
473 	struct node 	*n;
474 
475 	if (h->entries == 0)
476 		n = NULL;
477 	else {
478 		n = h->t[0];
479 		if (--h->entries != 0) {
480 		    h->t[0] = h->t[h->entries];
481 		    heap_down(h, 0);
482 		}
483 	}
484 	return n;
485 }
486 
487 #define ENQUEUE(h, n) do {			\
488 	if (hints_flag)				\
489 		enqueue((h), (n));		\
490 	else					\
491 		(h)->t[(h)->entries++] = (n);	\
492 	} while(0);
493 
494 static void
495 enqueue(h, n)
496 	struct array 	*h;
497 	struct node 	*n;
498 {
499 	unsigned int 	i, j;
500 	struct node 	*swap;
501 
502 	h->t[h->entries++] = n;
503 	for (i = h->entries-1; i > 0; i = j) {
504 		j = (i-1)/2;
505 		if (h->t[j]->order < h->t[i]->order)
506 			break;
507 		swap = h->t[j];
508 		h->t[j] = h->t[i];
509 		h->t[i] = swap;
510 	}
511 }
512 
513 /* Nodes without order should not hinder direct dependencies.
514  * Iterate until no nodes are left.
515  */
516 static void
517 make_transparent(hash)
518 	struct ohash	*hash;
519 {
520 	struct node 	*n;
521 	unsigned int 	i;
522 	struct link 	*l;
523 	int		adjusted;
524 	int		bad;
525 	unsigned int	min;
526 
527 	/* first try to solve complete nodes */
528 	do {
529 		adjusted = 0;
530 		bad = 0;
531 		for (n = ohash_first(hash, &i); n != NULL;
532 		    n = ohash_next(hash, &i)) {
533 			if (n->order == NO_ORDER) {
534 				min = NO_ORDER;
535 
536 				for (l = n->arcs; l != NULL; l = l->next) {
537 					/* unsolved node -> delay resolution */
538 					if (l->node->order == NO_ORDER) {
539 						bad = 1;
540 						break;
541 					} else if (l->node->order < min)
542 						min = l->node->order;
543 				}
544 				if (min < NO_ORDER && l == NULL) {
545 					n->order = min;
546 					adjusted = 1;
547 				}
548 			}
549 		}
550 
551 	} while (adjusted);
552 
553 	/* then, if incomplete nodes are left, do them */
554 	if (bad) do {
555 		adjusted = 0;
556 		for (n = ohash_first(hash, &i); n != NULL;
557 		    n = ohash_next(hash, &i))
558 			if (n->order == NO_ORDER)
559 				for (l = n->arcs; l != NULL; l = l->next)
560 					if (l->node->order < n->order) {
561 						n->order = l->node->order;
562 						adjusted = 1;
563 					}
564 	} while (adjusted);
565 }
566 
567 
568 /***
569  *** Search through hash array for nodes.
570  ***/
571 
572 /* Split nodes into unrefed nodes/live nodes.  */
573 static void
574 split_nodes(hash, heap, remaining)
575 	struct ohash 	*hash;
576 	struct array 	*heap;
577 	struct array	*remaining;
578 {
579 
580 	struct node *n;
581 	unsigned int i;
582 
583 	heap->t = emalloc(sizeof(struct node *) * ohash_entries(hash));
584 	remaining->t = emalloc(sizeof(struct node *) * ohash_entries(hash));
585 	heap->entries = 0;
586 	remaining->entries = 0;
587 
588 	for (n = ohash_first(hash, &i); n != NULL; n = ohash_next(hash, &i)) {
589 		if (n->refs == 0)
590 			heap->t[heap->entries++] = n;
591 		else
592 			remaining->t[remaining->entries++] = n;
593 	}
594 }
595 
596 /* Good point to break a cycle: live node with as few refs as possible. */
597 static struct node *
598 find_good_cycle_break(h)
599 	struct array 	*h;
600 {
601 	unsigned int 	i;
602 	unsigned int 	best;
603 	struct node 	*u;
604 
605 	best = UINT_MAX;
606 	u = NULL;
607 
608 	assert(h->entries != 0);
609 	for (i = 0; i < h->entries; i++) {
610 		struct node *n = h->t[i];
611 		/* No need to look further. */
612 		if (n->refs == 1)
613 			return n;
614 		if (n->refs != 0 && n->refs < best) {
615 			best = n->refs;
616 			u = n;
617 		}
618 	}
619 	assert(u != NULL);
620 	return u;
621 }
622 
623 /*  Retrieve the node with the smallest order.  */
624 static struct node *
625 find_smallest_node(h)
626 	struct array 	*h;
627 {
628 	unsigned int 	i;
629 	unsigned int 	best;
630 	struct node 	*u;
631 
632 	best = UINT_MAX;
633 	u = NULL;
634 
635 	assert(h->entries != 0);
636 	for (i = 0; i < h->entries; i++) {
637 		struct node *n = h->t[i];
638 		if (n->refs != 0 && n->order < best) {
639 			best = n->order;
640 			u = n;
641 		}
642 	}
643 	assert(u != NULL);
644 	return u;
645 }
646 
647 
648 /***
649  *** Graph algorithms.
650  ***/
651 
652 /* Explore the nodes reachable from i to find a cycle, store it in c.
653  * This may fail.  */
654 static struct node *
655 find_cycle_from(i, c)
656 	struct node 	*i;
657 	struct array 	*c;
658 {
659 	struct node 	*n;
660 
661 	n = i;
662 	/* XXX Previous cycle findings may have left this pointer non-null.  */
663 	i->from = NULL;
664 
665 	for (;;) {
666 		/* Note that all marks are reversed before this code exits.  */
667 		n->mark = 1;
668 		if (n->traverse)
669 			n->traverse = n->traverse->next;
670 		else
671 			n->traverse = n->arcs;
672 		/* Skip over dead nodes.  */
673 		while (n->traverse && n->traverse->node->refs == 0)
674 			n->traverse = n->traverse->next;
675 		if (n->traverse) {
676 			struct node *go = n->traverse->node;
677 
678 			if (go->mark) {
679 				c->entries = 0;
680 				for (; n != NULL && n != go; n = n->from) {
681 					c->t[c->entries++] = n;
682 					n->mark = 0;
683 				}
684 				for (; n != NULL; n = n->from)
685 					n->mark = 0;
686 				c->t[c->entries++] = go;
687 				return go;
688 			} else {
689 			    go->from = n;
690 			    n = go;
691 			}
692 		} else {
693 			n->mark = 0;
694 			n = n->from;
695 			if (n == NULL)
696 				return NULL;
697 		}
698 	}
699 }
700 
701 /* Find a live predecessor of node n.  This is a slow routine, as it needs
702  * to go through the whole array, but it is not needed often.
703  */
704 static struct node *
705 find_predecessor(a, n)
706 	struct array *a;
707 	struct node *n;
708 {
709 	unsigned int i;
710 
711 	for (i = 0; i < a->entries; i++) {
712 		struct node *m;
713 
714 		m = a->t[i];
715 		if (m->refs != 0) {
716 			struct link *l;
717 
718 			for (l = m->arcs; l != NULL; l = l->next)
719 				if (l->node == n)
720 					return m;
721 		}
722 	}
723 	assert(1 == 0);
724 	return NULL;
725 }
726 
727 /* Traverse all strongly connected components reachable from node n.
728    Start numbering them at o. Return the maximum order reached.
729    Update the largest cycle found so far.
730  */
731 static unsigned int
732 traverse_node(n, o, c)
733 	struct node 	*n;
734 	unsigned int 	o;
735 	struct array 	*c;
736 {
737 	unsigned int 	min, max;
738 
739 	n->from = NULL;
740 	min = o;
741 	max = ++o;
742 
743 	for (;;) {
744 		n->mark = o;
745 		if (DEBUG_TRAVERSE)
746 			printf("%s(%d) ", n->k, n->mark);
747 		/* Find next arc to explore.  */
748 		if (n->traverse)
749 			n->traverse = n->traverse->next;
750 		else
751 			n->traverse = n->arcs;
752 		/* Skip over dead nodes.  */
753 		while (n->traverse && n->traverse->node->refs == 0)
754 			n->traverse = n->traverse->next;
755 		/* If arc left.  */
756 		if (n->traverse) {
757 			struct node 	*go;
758 
759 			go = n->traverse->node;
760 			/* Optimisation: if go->mark < min, we already
761 			 * visited this strongly-connected component in
762 			 * a previous pass.  Hence, this can yield no new
763 			 * cycle.  */
764 
765 			/* Not part of the current path: go for it.  */
766 			if (go->mark == 0 || go->mark == min) {
767 				go->from = n;
768 				n = go;
769 				o++;
770 				if (o > max)
771 					max = o;
772 			/* Part of the current path: check cycle length.  */
773 			} else if (go->mark > min) {
774 				if (DEBUG_TRAVERSE)
775 					printf("%d\n", o - go->mark + 1);
776 				if (o - go->mark + 1 > c->entries) {
777 					struct node *t;
778 					unsigned int i;
779 
780 					c->entries = o - go->mark + 1;
781 					i = 0;
782 					c->t[i++] = go;
783 					for (t = n; t != go; t = t->from)
784 						c->t[i++] = t;
785 				}
786 			}
787 
788 		/* No arc left: backtrack.  */
789 		} else {
790 			n->mark = min;
791 			n = n->from;
792 			if (!n)
793 				return max;
794 			o--;
795 		}
796 	}
797 }
798 
799 static void
800 print_cycle(c)
801 	struct array 	*c;
802 {
803 	unsigned int 	i;
804 
805 	/* Printing in reverse order, since cycle discoveries finds reverse
806 	 * edges.  */
807 	for (i = c->entries; i != 0;) {
808 		i--;
809 		warnx("%s", c->t[i]->k);
810 	}
811 }
812 
813 static struct node *
814 find_longest_cycle(h, c)
815 	struct array 	*h;
816 	struct array 	*c;
817 {
818 	unsigned int 	i;
819 	unsigned int 	o;
820 	unsigned int 	best;
821 	struct node 	*n;
822 	static int 	notfirst = 0;
823 
824 	assert(h->entries != 0);
825 
826 	/* No cycle found yet.  */
827 	c->entries = 0;
828 
829 	/* Reset the set of marks, except the first time around.  */
830 	if (notfirst) {
831 		for (i = 0; i < h->entries; i++)
832 			h->t[i]->mark = 0;
833 	} else
834 		notfirst = 1;
835 
836 	o = 0;
837 
838 	/* Traverse the array.  Each unmarked, live node heralds a
839 	 * new set of strongly connected components.  */
840 	for (i = 0; i < h->entries; i++) {
841 		n = h->t[i];
842 		if (n->refs != 0 && n->mark == 0) {
843 			/* Each call to traverse_node uses a separate
844 			 * interval of numbers to mark nodes.  */
845 			o++;
846 			o = traverse_node(n, o, c);
847 		}
848 	}
849 
850 	assert(c->entries != 0);
851 	n = c->t[0];
852 	best = n->refs;
853 	for (i = 0; i < c->entries; i++) {
854 		if (c->t[i]->refs < best) {
855 			n = c->t[i];
856 			best = n->refs;
857 		}
858 	}
859 	return n;
860 }
861 
862 
863 #define plural(n) ((n) > 1 ? "s" : "")
864 
865 int
866 main(argc, argv)
867     int 		argc;
868     char 		*argv[];
869 {
870 	struct ohash 	pairs;
871 	int 		reverse_flag, quiet_flag, long_flag,
872 			    warn_flag, hints_flag, verbose_flag;
873 	unsigned int	order;
874 
875 	order = 0;
876 
877 	reverse_flag = quiet_flag = long_flag =
878 		warn_flag = hints_flag = verbose_flag = 0;
879 	nodes_init(&pairs);
880 
881 	{
882 	    int c;
883 
884 	    while ((c = getopt(argc, argv, "h:flqrvw")) != -1) {
885 		    switch(c) {
886 		    case 'h': {
887 			    FILE *f;
888 
889 			    f = fopen(optarg, "r");
890 			    if (f == NULL)
891 				    err(EX_NOINPUT, "Can't open hint file %s",
892 					optarg);
893 			    order = read_hints(f, &pairs, quiet_flag,
894 				optarg, order);
895 			    fclose(f);
896 		    }
897 			    hints_flag = 1;
898 			    break;
899 			    /*FALLTHRU*/
900 		    case 'f':
901 			    hints_flag = 2;
902 			    break;
903 		    case 'l':
904 			    long_flag = 1;
905 			    break;
906 		    case 'q':
907 			    quiet_flag = 1;
908 			    break;
909 		    case 'r':
910 			    reverse_flag = 1;
911 			    break;
912 		    case 'v':
913 			    verbose_flag = 1;
914 			    break;
915 		    case 'w':
916 			    warn_flag = 1;
917 			    break;
918 		    default:
919 			    usage();
920 		    }
921 	    }
922 
923 	    argc -= optind;
924 	    argv += optind;
925 	}
926 
927 	switch(argc) {
928 	case 1: {
929 		FILE *f;
930 
931 		f = fopen(argv[0], "r");
932 		if (f == NULL)
933 			err(EX_NOINPUT, "Can't open file %s", argv[1]);
934 		order = read_pairs(f, &pairs, reverse_flag, argv[1], order,
935 		    hints_flag == 2);
936 		fclose(f);
937 		break;
938 	}
939 	case 0:
940 		order = read_pairs(stdin, &pairs, reverse_flag, "stdin",
941 		    order, hints_flag == 2);
942 		break;
943 	default:
944 		usage();
945 	}
946 
947 	{
948 	    struct array 	aux;	/* Unrefed nodes/cycle reporting.  */
949 	    struct array	remaining;
950 	    unsigned int	broken_arcs, broken_cycles;
951 	    unsigned int	left;
952 
953 	    broken_arcs = 0;
954 	    broken_cycles = 0;
955 
956 	    if (hints_flag)
957 	    	make_transparent(&pairs);
958 	    split_nodes(&pairs, &aux, &remaining);
959 	    ohash_delete(&pairs);
960 
961 	    if (hints_flag)
962 		    heapify(&aux, verbose_flag);
963 
964 	    left = remaining.entries + aux.entries;
965 	    while (left != 0) {
966 
967 		    /* Standard topological sort.  */
968 		    while (aux.entries) {
969 			    struct link *l;
970 			    struct node *n;
971 
972 			    n = DEQUEUE(&aux);
973 			    printf("%s\n", n->k);
974 			    left--;
975 			    /* We can't free nodes, as we don't know which
976 			     * entry we can remove in the hash table.  We
977 			     * rely on refs == 0 to recognize live nodes.
978 			     * Decrease ref count of live nodes, enter new
979 			     * candidates into the unrefed list.  */
980 			    for (l = n->arcs; l != NULL; l = l->next)
981 				    if (l->node->refs != 0 &&
982 					--l->node->refs == 0) {
983 					    ENQUEUE(&aux, l->node);
984 				    }
985 		    }
986 		    /* There are still cycles to break.  */
987 		    if (left != 0) {
988 			    struct node *n;
989 
990 			    broken_cycles++;
991 			    /* XXX Simple cycle detection and long cycle
992 			     * detection are mutually exclusive.  */
993 
994 			    if (long_flag) {
995 				    n = find_longest_cycle(&remaining, &aux);
996 			    } else {
997 				    struct node *b;
998 
999 				    if (hints_flag)
1000 					    n = find_smallest_node(&remaining);
1001 				    else
1002 					    n = find_good_cycle_break(&remaining);
1003 				    while ((b = find_cycle_from(n, &aux)) == NULL)
1004 					    n = find_predecessor(&remaining, n);
1005 				    n = b;
1006 			    }
1007 
1008 			    if (!quiet_flag) {
1009 				    warnx("cycle in data");
1010 				    print_cycle(&aux);
1011 			    }
1012 
1013 			    if (verbose_flag)
1014 				    warnx("%u edge%s broken", n->refs,
1015 					plural(n->refs));
1016 			    broken_arcs += n->refs;
1017 			    n->refs = 0;
1018 			    /* Reinitialization, cycle reporting uses aux.  */
1019 			    aux.t[0] = n;
1020 			    aux.entries = 1;
1021 		    }
1022 	    }
1023 	    if (verbose_flag && broken_cycles != 0)
1024 		    warnx("%u cycle%s broken, for a total of %u edge%s",
1025 			broken_cycles, plural(broken_cycles),
1026 			broken_arcs, plural(broken_arcs));
1027 	    if (warn_flag)
1028 		    exit(broken_cycles < 256 ? broken_cycles : 255);
1029 	    else
1030 		    exit(EX_OK);
1031 	}
1032 }
1033 
1034 
1035 extern char *__progname;
1036 
1037 static void
1038 usage()
1039 {
1040 	fprintf(stderr, "Usage: %s [-h file] [-flqrvw] [file]\n", __progname);
1041 	exit(EX_USAGE);
1042 }
1043