xref: /onnv-gate/usr/src/lib/libzfs/common/libzfs_graph.c (revision 2474:c001ad7e0c25)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Iterate over all children of the current object.  This includes the normal
30  * dataset hierarchy, but also arbitrary hierarchies due to clones.  We want to
31  * walk all datasets in the pool, and construct a directed graph of the form:
32  *
33  * 			home
34  *                        |
35  *                   +----+----+
36  *                   |         |
37  *                   v         v             ws
38  *                  bar       baz             |
39  *                             |              |
40  *                             v              v
41  *                          @yesterday ----> foo
42  *
43  * In order to construct this graph, we have to walk every dataset in the pool,
44  * because the clone parent is stored as a property of the child, not the
45  * parent.  The parent only keeps track of the number of clones.
46  *
47  * In the normal case (without clones) this would be rather expensive.  To avoid
48  * unnecessary computation, we first try a walk of the subtree hierarchy
49  * starting from the initial node.  At each dataset, we construct a node in the
50  * graph and an edge leading from its parent.  If we don't see any snapshots
51  * with a non-zero clone count, then we are finished.
52  *
53  * If we do find a cloned snapshot, then we finish the walk of the current
54  * subtree, but indicate that we need to do a complete walk.  We then perform a
55  * global walk of all datasets, avoiding the subtree we already processed.
56  *
57  * At the end of this, we'll end up with a directed graph of all relevant (and
58  * possible some irrelevant) datasets in the system.  We need to both find our
59  * limiting subgraph and determine a safe ordering in which to destroy the
60  * datasets.  We do a topological ordering of our graph starting at our target
61  * dataset, and then walk the results in reverse.
62  *
63  * It's possible for the graph to have cycles if, for example, the user renames
64  * a clone to be the parent of its origin snapshot.  The user can request to
65  * generate an error in this case, or ignore the cycle and continue.
66  *
67  * When removing datasets, we want to destroy the snapshots in chronological
68  * order (because this is the most efficient method).  In order to accomplish
69  * this, we store the creation transaction group with each vertex and keep each
70  * vertex's edges sorted according to this value.  The topological sort will
71  * automatically walk the snapshots in the correct order.
72  */
73 
74 #include <assert.h>
75 #include <libintl.h>
76 #include <stdio.h>
77 #include <stdlib.h>
78 #include <string.h>
79 #include <strings.h>
80 #include <unistd.h>
81 
82 #include <libzfs.h>
83 
84 #include "libzfs_impl.h"
85 #include "zfs_namecheck.h"
86 
87 #define	MIN_EDGECOUNT	4
88 
89 /*
90  * Vertex structure.  Indexed by dataset name, this structure maintains a list
91  * of edges to other vertices.
92  */
93 struct zfs_edge;
94 typedef struct zfs_vertex {
95 	char			zv_dataset[ZFS_MAXNAMELEN];
96 	struct zfs_vertex	*zv_next;
97 	int			zv_visited;
98 	uint64_t		zv_txg;
99 	struct zfs_edge		**zv_edges;
100 	int			zv_edgecount;
101 	int			zv_edgealloc;
102 } zfs_vertex_t;
103 
104 enum {
105 	VISIT_SEEN = 1,
106 	VISIT_SORT_PRE,
107 	VISIT_SORT_POST
108 };
109 
110 /*
111  * Edge structure.  Simply maintains a pointer to the destination vertex.  There
112  * is no need to store the source vertex, since we only use edges in the context
113  * of the source vertex.
114  */
115 typedef struct zfs_edge {
116 	zfs_vertex_t		*ze_dest;
117 	struct zfs_edge		*ze_next;
118 } zfs_edge_t;
119 
120 #define	ZFS_GRAPH_SIZE		1027	/* this could be dynamic some day */
121 
122 /*
123  * Graph structure.  Vertices are maintained in a hash indexed by dataset name.
124  */
125 typedef struct zfs_graph {
126 	zfs_vertex_t		**zg_hash;
127 	size_t			zg_size;
128 	size_t			zg_nvertex;
129 } zfs_graph_t;
130 
131 /*
132  * Allocate a new edge pointing to the target vertex.
133  */
134 static zfs_edge_t *
135 zfs_edge_create(libzfs_handle_t *hdl, zfs_vertex_t *dest)
136 {
137 	zfs_edge_t *zep = zfs_alloc(hdl, sizeof (zfs_edge_t));
138 
139 	if (zep == NULL)
140 		return (NULL);
141 
142 	zep->ze_dest = dest;
143 
144 	return (zep);
145 }
146 
147 /*
148  * Destroy an edge.
149  */
150 static void
151 zfs_edge_destroy(zfs_edge_t *zep)
152 {
153 	free(zep);
154 }
155 
156 /*
157  * Allocate a new vertex with the given name.
158  */
159 static zfs_vertex_t *
160 zfs_vertex_create(libzfs_handle_t *hdl, const char *dataset)
161 {
162 	zfs_vertex_t *zvp = zfs_alloc(hdl, sizeof (zfs_vertex_t));
163 
164 	if (zvp == NULL)
165 		return (NULL);
166 
167 	assert(strlen(dataset) < ZFS_MAXNAMELEN);
168 
169 	(void) strlcpy(zvp->zv_dataset, dataset, sizeof (zvp->zv_dataset));
170 
171 	if ((zvp->zv_edges = zfs_alloc(hdl,
172 	    MIN_EDGECOUNT * sizeof (void *))) == NULL) {
173 		free(zvp);
174 		return (NULL);
175 	}
176 
177 	zvp->zv_edgealloc = MIN_EDGECOUNT;
178 
179 	return (zvp);
180 }
181 
182 /*
183  * Destroy a vertex.  Frees up any associated edges.
184  */
185 static void
186 zfs_vertex_destroy(zfs_vertex_t *zvp)
187 {
188 	int i;
189 
190 	for (i = 0; i < zvp->zv_edgecount; i++)
191 		zfs_edge_destroy(zvp->zv_edges[i]);
192 
193 	free(zvp->zv_edges);
194 	free(zvp);
195 }
196 
197 /*
198  * Given a vertex, add an edge to the destination vertex.
199  */
200 static int
201 zfs_vertex_add_edge(libzfs_handle_t *hdl, zfs_vertex_t *zvp,
202     zfs_vertex_t *dest)
203 {
204 	zfs_edge_t *zep = zfs_edge_create(hdl, dest);
205 
206 	if (zep == NULL)
207 		return (-1);
208 
209 	if (zvp->zv_edgecount == zvp->zv_edgealloc) {
210 		zfs_edge_t **newedges = zfs_alloc(hdl, zvp->zv_edgealloc * 2 *
211 		    sizeof (void *));
212 
213 		if (newedges == NULL)
214 			return (-1);
215 
216 		bcopy(zvp->zv_edges, newedges,
217 		    zvp->zv_edgealloc * sizeof (void *));
218 
219 		zvp->zv_edgealloc *= 2;
220 		free(zvp->zv_edges);
221 		zvp->zv_edges = newedges;
222 	}
223 
224 	zvp->zv_edges[zvp->zv_edgecount++] = zep;
225 
226 	return (0);
227 }
228 
229 static int
230 zfs_edge_compare(const void *a, const void *b)
231 {
232 	const zfs_edge_t *ea = *((zfs_edge_t **)a);
233 	const zfs_edge_t *eb = *((zfs_edge_t **)b);
234 
235 	if (ea->ze_dest->zv_txg < eb->ze_dest->zv_txg)
236 		return (-1);
237 	if (ea->ze_dest->zv_txg > eb->ze_dest->zv_txg)
238 		return (1);
239 	return (0);
240 }
241 
242 /*
243  * Sort the given vertex edges according to the creation txg of each vertex.
244  */
245 static void
246 zfs_vertex_sort_edges(zfs_vertex_t *zvp)
247 {
248 	if (zvp->zv_edgecount == 0)
249 		return;
250 
251 	qsort(zvp->zv_edges, zvp->zv_edgecount, sizeof (void *),
252 	    zfs_edge_compare);
253 }
254 
255 /*
256  * Construct a new graph object.  We allow the size to be specified as a
257  * parameter so in the future we can size the hash according to the number of
258  * datasets in the pool.
259  */
260 static zfs_graph_t *
261 zfs_graph_create(libzfs_handle_t *hdl, size_t size)
262 {
263 	zfs_graph_t *zgp = zfs_alloc(hdl, sizeof (zfs_graph_t));
264 
265 	if (zgp == NULL)
266 		return (NULL);
267 
268 	zgp->zg_size = size;
269 	if ((zgp->zg_hash = zfs_alloc(hdl,
270 	    size * sizeof (zfs_vertex_t *))) == NULL) {
271 		free(zgp);
272 		return (NULL);
273 	}
274 
275 	return (zgp);
276 }
277 
278 /*
279  * Destroy a graph object.  We have to iterate over all the hash chains,
280  * destroying each vertex in the process.
281  */
282 static void
283 zfs_graph_destroy(zfs_graph_t *zgp)
284 {
285 	int i;
286 	zfs_vertex_t *current, *next;
287 
288 	for (i = 0; i < zgp->zg_size; i++) {
289 		current = zgp->zg_hash[i];
290 		while (current != NULL) {
291 			next = current->zv_next;
292 			zfs_vertex_destroy(current);
293 			current = next;
294 		}
295 	}
296 
297 	free(zgp->zg_hash);
298 	free(zgp);
299 }
300 
301 /*
302  * Graph hash function.  Classic bernstein k=33 hash function, taken from
303  * usr/src/cmd/sgs/tools/common/strhash.c
304  */
305 static size_t
306 zfs_graph_hash(zfs_graph_t *zgp, const char *str)
307 {
308 	size_t hash = 5381;
309 	int c;
310 
311 	while ((c = *str++) != 0)
312 		hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
313 
314 	return (hash % zgp->zg_size);
315 }
316 
317 /*
318  * Given a dataset name, finds the associated vertex, creating it if necessary.
319  */
320 static zfs_vertex_t *
321 zfs_graph_lookup(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset,
322     uint64_t txg)
323 {
324 	size_t idx = zfs_graph_hash(zgp, dataset);
325 	zfs_vertex_t *zvp;
326 
327 	for (zvp = zgp->zg_hash[idx]; zvp != NULL; zvp = zvp->zv_next) {
328 		if (strcmp(zvp->zv_dataset, dataset) == 0) {
329 			if (zvp->zv_txg == 0)
330 				zvp->zv_txg = txg;
331 			return (zvp);
332 		}
333 	}
334 
335 	if ((zvp = zfs_vertex_create(hdl, dataset)) == NULL)
336 		return (NULL);
337 
338 	zvp->zv_next = zgp->zg_hash[idx];
339 	zvp->zv_txg = txg;
340 	zgp->zg_hash[idx] = zvp;
341 	zgp->zg_nvertex++;
342 
343 	return (zvp);
344 }
345 
346 /*
347  * Given two dataset names, create an edge between them.  For the source vertex,
348  * mark 'zv_visited' to indicate that we have seen this vertex, and not simply
349  * created it as a destination of another edge.  If 'dest' is NULL, then this
350  * is an individual vertex (i.e. the starting vertex), so don't add an edge.
351  */
352 static int
353 zfs_graph_add(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *source,
354     const char *dest, uint64_t txg)
355 {
356 	zfs_vertex_t *svp, *dvp;
357 
358 	if ((svp = zfs_graph_lookup(hdl, zgp, source, 0)) == NULL)
359 		return (-1);
360 	svp->zv_visited = VISIT_SEEN;
361 	if (dest != NULL) {
362 		dvp = zfs_graph_lookup(hdl, zgp, dest, txg);
363 		if (dvp == NULL)
364 			return (-1);
365 		if (zfs_vertex_add_edge(hdl, svp, dvp) != 0)
366 			return (-1);
367 	}
368 
369 	return (0);
370 }
371 
372 /*
373  * Iterate over all children of the given dataset, adding any vertices as
374  * necessary.  Returns 0 if no cloned snapshots were seen, -1 if there was an
375  * error, or 1 otherwise.  This is a simple recursive algorithm - the ZFS
376  * namespace typically is very flat.  We manually invoke the necessary ioctl()
377  * calls to avoid the overhead and additional semantics of zfs_open().
378  */
379 static int
380 iterate_children(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset)
381 {
382 	zfs_cmd_t zc = { 0 };
383 	int ret = 0, err;
384 	zfs_vertex_t *zvp;
385 
386 	/*
387 	 * Look up the source vertex, and avoid it if we've seen it before.
388 	 */
389 	zvp = zfs_graph_lookup(hdl, zgp, dataset, 0);
390 	if (zvp == NULL)
391 		return (-1);
392 	if (zvp->zv_visited == VISIT_SEEN)
393 		return (0);
394 
395 	/*
396 	 * We check the clone parent here instead of within the loop, so that if
397 	 * the root dataset has been promoted from a clone, we find its parent
398 	 * appropriately.
399 	 */
400 	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
401 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0 &&
402 	    zc.zc_objset_stats.dds_clone_of[0] != '\0') {
403 		if (zfs_graph_add(hdl, zgp, zc.zc_objset_stats.dds_clone_of,
404 		    zc.zc_name, zc.zc_objset_stats.dds_creation_txg) != 0)
405 			return (-1);
406 	}
407 
408 	for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
409 	    ioctl(hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0;
410 	    (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {
411 
412 		/*
413 		 * Ignore private dataset names.
414 		 */
415 		if (dataset_name_hidden(zc.zc_name))
416 			continue;
417 
418 		/*
419 		 * Get statistics for this dataset, to determine the type of the
420 		 * dataset and clone statistics.  If this fails, the dataset has
421 		 * since been removed, and we're pretty much screwed anyway.
422 		 */
423 		if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
424 			continue;
425 
426 		/*
427 		 * Add an edge between the parent and the child.
428 		 */
429 		if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name,
430 		    zc.zc_objset_stats.dds_creation_txg) != 0)
431 			return (-1);
432 
433 		/*
434 		 * Iterate over all children
435 		 */
436 		err = iterate_children(hdl, zgp, zc.zc_name);
437 		if (err == -1)
438 			return (-1);
439 		else if (err == 1)
440 			ret = 1;
441 
442 		/*
443 		 * Indicate if we found a dataset with a non-zero clone count.
444 		 */
445 		if (zc.zc_objset_stats.dds_num_clones != 0)
446 			ret = 1;
447 	}
448 
449 	/*
450 	 * Now iterate over all snapshots.
451 	 */
452 	bzero(&zc, sizeof (zc));
453 
454 	for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
455 	    ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT, &zc) == 0;
456 	    (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {
457 
458 		/*
459 		 * Get statistics for this dataset, to determine the type of the
460 		 * dataset and clone statistics.  If this fails, the dataset has
461 		 * since been removed, and we're pretty much screwed anyway.
462 		 */
463 		if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
464 			continue;
465 
466 		/*
467 		 * Add an edge between the parent and the child.
468 		 */
469 		if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name,
470 		    zc.zc_objset_stats.dds_creation_txg) != 0)
471 			return (-1);
472 
473 		/*
474 		 * Indicate if we found a dataset with a non-zero clone count.
475 		 */
476 		if (zc.zc_objset_stats.dds_num_clones != 0)
477 			ret = 1;
478 	}
479 
480 	zvp->zv_visited = VISIT_SEEN;
481 
482 	return (ret);
483 }
484 
485 /*
486  * Construct a complete graph of all necessary vertices.  First, we iterate over
487  * only our object's children.  If we don't find any cloned snapshots, then we
488  * simple return that.  Otherwise, we have to start at the pool root and iterate
489  * over all datasets.
490  */
491 static zfs_graph_t *
492 construct_graph(libzfs_handle_t *hdl, const char *dataset)
493 {
494 	zfs_graph_t *zgp = zfs_graph_create(hdl, ZFS_GRAPH_SIZE);
495 	zfs_cmd_t zc = { 0 };
496 	int ret = 0;
497 
498 	if (zgp == NULL)
499 		return (zgp);
500 
501 	/*
502 	 * We need to explicitly check whether this dataset has clones or not,
503 	 * since iterate_children() only checks the children.
504 	 */
505 	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
506 	(void) ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc);
507 
508 	if (zc.zc_objset_stats.dds_num_clones != 0 ||
509 	    (ret = iterate_children(hdl, zgp, dataset)) != 0) {
510 		/*
511 		 * Determine pool name and try again.
512 		 */
513 		char *pool, *slash;
514 
515 		if ((slash = strchr(dataset, '/')) != NULL ||
516 		    (slash = strchr(dataset, '@')) != NULL) {
517 			pool = zfs_alloc(hdl, slash - dataset + 1);
518 			if (pool == NULL) {
519 				zfs_graph_destroy(zgp);
520 				return (NULL);
521 			}
522 			(void) strncpy(pool, dataset, slash - dataset);
523 			pool[slash - dataset] = '\0';
524 
525 			if (iterate_children(hdl, zgp, pool) == -1 ||
526 			    zfs_graph_add(hdl, zgp, pool, NULL, 0) != 0) {
527 				free(pool);
528 				zfs_graph_destroy(zgp);
529 				return (NULL);
530 			}
531 
532 			free(pool);
533 		}
534 	}
535 
536 	if (ret == -1 || zfs_graph_add(hdl, zgp, dataset, NULL, 0) != 0) {
537 		zfs_graph_destroy(zgp);
538 		return (NULL);
539 	}
540 
541 	return (zgp);
542 }
543 
544 /*
545  * Given a graph, do a recursive topological sort into the given array.  This is
546  * really just a depth first search, so that the deepest nodes appear first.
547  * hijack the 'zv_visited' marker to avoid visiting the same vertex twice.
548  */
549 static int
550 topo_sort(libzfs_handle_t *hdl, boolean_t allowrecursion, char **result,
551     size_t *idx, zfs_vertex_t *zgv)
552 {
553 	int i;
554 
555 	if (zgv->zv_visited == VISIT_SORT_PRE && !allowrecursion) {
556 		/*
557 		 * If we've already seen this vertex as part of our depth-first
558 		 * search, then we have a cyclic dependency, and we must return
559 		 * an error.
560 		 */
561 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
562 		    "recursive dependency at '%s'"),
563 		    zgv->zv_dataset);
564 		return (zfs_error(hdl, EZFS_RECURSIVE,
565 		    dgettext(TEXT_DOMAIN,
566 		    "cannot determine dependent datasets")));
567 	} else if (zgv->zv_visited >= VISIT_SORT_PRE) {
568 		/*
569 		 * If we've already processed this as part of the topological
570 		 * sort, then don't bother doing so again.
571 		 */
572 		return (0);
573 	}
574 
575 	zgv->zv_visited = VISIT_SORT_PRE;
576 
577 	/* avoid doing a search if we don't have to */
578 	zfs_vertex_sort_edges(zgv);
579 	for (i = 0; i < zgv->zv_edgecount; i++) {
580 		if (topo_sort(hdl, allowrecursion, result, idx,
581 		    zgv->zv_edges[i]->ze_dest) != 0)
582 			return (-1);
583 	}
584 
585 	/* we may have visited this in the course of the above */
586 	if (zgv->zv_visited == VISIT_SORT_POST)
587 		return (0);
588 
589 	if ((result[*idx] = zfs_alloc(hdl,
590 	    strlen(zgv->zv_dataset) + 1)) == NULL)
591 		return (-1);
592 
593 	(void) strcpy(result[*idx], zgv->zv_dataset);
594 	*idx += 1;
595 	zgv->zv_visited = VISIT_SORT_POST;
596 	return (0);
597 }
598 
599 /*
600  * The only public interface for this file.  Do the dirty work of constructing a
601  * child list for the given object.  Construct the graph, do the toplogical
602  * sort, and then return the array of strings to the caller.
603  *
604  * The 'allowrecursion' parameter controls behavior when cycles are found.  If
605  * it is set, the the cycle is ignored and the results returned as if the cycle
606  * did not exist.  If it is not set, then the routine will generate an error if
607  * a cycle is found.
608  */
609 int
610 get_dependents(libzfs_handle_t *hdl, boolean_t allowrecursion,
611     const char *dataset, char ***result, size_t *count)
612 {
613 	zfs_graph_t *zgp;
614 	zfs_vertex_t *zvp;
615 
616 	if ((zgp = construct_graph(hdl, dataset)) == NULL)
617 		return (-1);
618 
619 	if ((*result = zfs_alloc(hdl,
620 	    zgp->zg_nvertex * sizeof (char *))) == NULL) {
621 		zfs_graph_destroy(zgp);
622 		return (-1);
623 	}
624 
625 	if ((zvp = zfs_graph_lookup(hdl, zgp, dataset, 0)) == NULL) {
626 		free(*result);
627 		zfs_graph_destroy(zgp);
628 		return (-1);
629 	}
630 
631 	*count = 0;
632 	if (topo_sort(hdl, allowrecursion, *result, count, zvp) != 0) {
633 		free(*result);
634 		zfs_graph_destroy(zgp);
635 		return (-1);
636 	}
637 
638 	/*
639 	 * Get rid of the last entry, which is our starting vertex and not
640 	 * strictly a dependent.
641 	 */
642 	assert(*count > 0);
643 	free((*result)[*count - 1]);
644 	(*count)--;
645 
646 	zfs_graph_destroy(zgp);
647 
648 	return (0);
649 }
650