xref: /onnv-gate/usr/src/lib/libzfs/common/libzfs_graph.c (revision 6148:1bbb9d6c89cb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Iterate over all children of the current object.  This includes the normal
30  * dataset hierarchy, but also arbitrary hierarchies due to clones.  We want to
31  * walk all datasets in the pool, and construct a directed graph of the form:
32  *
33  * 			home
34  *                        |
35  *                   +----+----+
36  *                   |         |
37  *                   v         v             ws
38  *                  bar       baz             |
39  *                             |              |
40  *                             v              v
41  *                          @yesterday ----> foo
42  *
43  * In order to construct this graph, we have to walk every dataset in the pool,
44  * because the clone parent is stored as a property of the child, not the
45  * parent.  The parent only keeps track of the number of clones.
46  *
47  * In the normal case (without clones) this would be rather expensive.  To avoid
48  * unnecessary computation, we first try a walk of the subtree hierarchy
49  * starting from the initial node.  At each dataset, we construct a node in the
50  * graph and an edge leading from its parent.  If we don't see any snapshots
51  * with a non-zero clone count, then we are finished.
52  *
53  * If we do find a cloned snapshot, then we finish the walk of the current
54  * subtree, but indicate that we need to do a complete walk.  We then perform a
55  * global walk of all datasets, avoiding the subtree we already processed.
56  *
57  * At the end of this, we'll end up with a directed graph of all relevant (and
58  * possible some irrelevant) datasets in the system.  We need to both find our
59  * limiting subgraph and determine a safe ordering in which to destroy the
60  * datasets.  We do a topological ordering of our graph starting at our target
61  * dataset, and then walk the results in reverse.
62  *
63  * It's possible for the graph to have cycles if, for example, the user renames
64  * a clone to be the parent of its origin snapshot.  The user can request to
65  * generate an error in this case, or ignore the cycle and continue.
66  *
67  * When removing datasets, we want to destroy the snapshots in chronological
68  * order (because this is the most efficient method).  In order to accomplish
69  * this, we store the creation transaction group with each vertex and keep each
70  * vertex's edges sorted according to this value.  The topological sort will
71  * automatically walk the snapshots in the correct order.
72  */
73 
74 #include <assert.h>
75 #include <libintl.h>
76 #include <stdio.h>
77 #include <stdlib.h>
78 #include <string.h>
79 #include <strings.h>
80 #include <unistd.h>
81 
82 #include <libzfs.h>
83 
84 #include "libzfs_impl.h"
85 #include "zfs_namecheck.h"
86 
87 #define	MIN_EDGECOUNT	4
88 
89 /*
90  * Vertex structure.  Indexed by dataset name, this structure maintains a list
91  * of edges to other vertices.
92  */
93 struct zfs_edge;
94 typedef struct zfs_vertex {
95 	char			zv_dataset[ZFS_MAXNAMELEN];
96 	struct zfs_vertex	*zv_next;
97 	int			zv_visited;
98 	uint64_t		zv_txg;
99 	struct zfs_edge		**zv_edges;
100 	int			zv_edgecount;
101 	int			zv_edgealloc;
102 } zfs_vertex_t;
103 
104 enum {
105 	VISIT_SEEN = 1,
106 	VISIT_SORT_PRE,
107 	VISIT_SORT_POST
108 };
109 
110 /*
111  * Edge structure.  Simply maintains a pointer to the destination vertex.  There
112  * is no need to store the source vertex, since we only use edges in the context
113  * of the source vertex.
114  */
115 typedef struct zfs_edge {
116 	zfs_vertex_t		*ze_dest;
117 	struct zfs_edge		*ze_next;
118 } zfs_edge_t;
119 
120 #define	ZFS_GRAPH_SIZE		1027	/* this could be dynamic some day */
121 
122 /*
123  * Graph structure.  Vertices are maintained in a hash indexed by dataset name.
124  */
125 typedef struct zfs_graph {
126 	zfs_vertex_t		**zg_hash;
127 	size_t			zg_size;
128 	size_t			zg_nvertex;
129 	const char		*zg_root;
130 	int			zg_clone_count;
131 } zfs_graph_t;
132 
133 /*
134  * Allocate a new edge pointing to the target vertex.
135  */
136 static zfs_edge_t *
137 zfs_edge_create(libzfs_handle_t *hdl, zfs_vertex_t *dest)
138 {
139 	zfs_edge_t *zep = zfs_alloc(hdl, sizeof (zfs_edge_t));
140 
141 	if (zep == NULL)
142 		return (NULL);
143 
144 	zep->ze_dest = dest;
145 
146 	return (zep);
147 }
148 
149 /*
150  * Destroy an edge.
151  */
152 static void
153 zfs_edge_destroy(zfs_edge_t *zep)
154 {
155 	free(zep);
156 }
157 
158 /*
159  * Allocate a new vertex with the given name.
160  */
161 static zfs_vertex_t *
162 zfs_vertex_create(libzfs_handle_t *hdl, const char *dataset)
163 {
164 	zfs_vertex_t *zvp = zfs_alloc(hdl, sizeof (zfs_vertex_t));
165 
166 	if (zvp == NULL)
167 		return (NULL);
168 
169 	assert(strlen(dataset) < ZFS_MAXNAMELEN);
170 
171 	(void) strlcpy(zvp->zv_dataset, dataset, sizeof (zvp->zv_dataset));
172 
173 	if ((zvp->zv_edges = zfs_alloc(hdl,
174 	    MIN_EDGECOUNT * sizeof (void *))) == NULL) {
175 		free(zvp);
176 		return (NULL);
177 	}
178 
179 	zvp->zv_edgealloc = MIN_EDGECOUNT;
180 
181 	return (zvp);
182 }
183 
184 /*
185  * Destroy a vertex.  Frees up any associated edges.
186  */
187 static void
188 zfs_vertex_destroy(zfs_vertex_t *zvp)
189 {
190 	int i;
191 
192 	for (i = 0; i < zvp->zv_edgecount; i++)
193 		zfs_edge_destroy(zvp->zv_edges[i]);
194 
195 	free(zvp->zv_edges);
196 	free(zvp);
197 }
198 
199 /*
200  * Given a vertex, add an edge to the destination vertex.
201  */
202 static int
203 zfs_vertex_add_edge(libzfs_handle_t *hdl, zfs_vertex_t *zvp,
204     zfs_vertex_t *dest)
205 {
206 	zfs_edge_t *zep = zfs_edge_create(hdl, dest);
207 
208 	if (zep == NULL)
209 		return (-1);
210 
211 	if (zvp->zv_edgecount == zvp->zv_edgealloc) {
212 		void *ptr;
213 
214 		if ((ptr = zfs_realloc(hdl, zvp->zv_edges,
215 		    zvp->zv_edgealloc * sizeof (void *),
216 		    zvp->zv_edgealloc * 2 * sizeof (void *))) == NULL)
217 			return (-1);
218 
219 		zvp->zv_edges = ptr;
220 		zvp->zv_edgealloc *= 2;
221 	}
222 
223 	zvp->zv_edges[zvp->zv_edgecount++] = zep;
224 
225 	return (0);
226 }
227 
228 static int
229 zfs_edge_compare(const void *a, const void *b)
230 {
231 	const zfs_edge_t *ea = *((zfs_edge_t **)a);
232 	const zfs_edge_t *eb = *((zfs_edge_t **)b);
233 
234 	if (ea->ze_dest->zv_txg < eb->ze_dest->zv_txg)
235 		return (-1);
236 	if (ea->ze_dest->zv_txg > eb->ze_dest->zv_txg)
237 		return (1);
238 	return (0);
239 }
240 
241 /*
242  * Sort the given vertex edges according to the creation txg of each vertex.
243  */
244 static void
245 zfs_vertex_sort_edges(zfs_vertex_t *zvp)
246 {
247 	if (zvp->zv_edgecount == 0)
248 		return;
249 
250 	qsort(zvp->zv_edges, zvp->zv_edgecount, sizeof (void *),
251 	    zfs_edge_compare);
252 }
253 
254 /*
255  * Construct a new graph object.  We allow the size to be specified as a
256  * parameter so in the future we can size the hash according to the number of
257  * datasets in the pool.
258  */
259 static zfs_graph_t *
260 zfs_graph_create(libzfs_handle_t *hdl, const char *dataset, size_t size)
261 {
262 	zfs_graph_t *zgp = zfs_alloc(hdl, sizeof (zfs_graph_t));
263 
264 	if (zgp == NULL)
265 		return (NULL);
266 
267 	zgp->zg_size = size;
268 	if ((zgp->zg_hash = zfs_alloc(hdl,
269 	    size * sizeof (zfs_vertex_t *))) == NULL) {
270 		free(zgp);
271 		return (NULL);
272 	}
273 
274 	zgp->zg_root = dataset;
275 	zgp->zg_clone_count = 0;
276 
277 	return (zgp);
278 }
279 
280 /*
281  * Destroy a graph object.  We have to iterate over all the hash chains,
282  * destroying each vertex in the process.
283  */
284 static void
285 zfs_graph_destroy(zfs_graph_t *zgp)
286 {
287 	int i;
288 	zfs_vertex_t *current, *next;
289 
290 	for (i = 0; i < zgp->zg_size; i++) {
291 		current = zgp->zg_hash[i];
292 		while (current != NULL) {
293 			next = current->zv_next;
294 			zfs_vertex_destroy(current);
295 			current = next;
296 		}
297 	}
298 
299 	free(zgp->zg_hash);
300 	free(zgp);
301 }
302 
303 /*
304  * Graph hash function.  Classic bernstein k=33 hash function, taken from
305  * usr/src/cmd/sgs/tools/common/strhash.c
306  */
307 static size_t
308 zfs_graph_hash(zfs_graph_t *zgp, const char *str)
309 {
310 	size_t hash = 5381;
311 	int c;
312 
313 	while ((c = *str++) != 0)
314 		hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
315 
316 	return (hash % zgp->zg_size);
317 }
318 
319 /*
320  * Given a dataset name, finds the associated vertex, creating it if necessary.
321  */
322 static zfs_vertex_t *
323 zfs_graph_lookup(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset,
324     uint64_t txg)
325 {
326 	size_t idx = zfs_graph_hash(zgp, dataset);
327 	zfs_vertex_t *zvp;
328 
329 	for (zvp = zgp->zg_hash[idx]; zvp != NULL; zvp = zvp->zv_next) {
330 		if (strcmp(zvp->zv_dataset, dataset) == 0) {
331 			if (zvp->zv_txg == 0)
332 				zvp->zv_txg = txg;
333 			return (zvp);
334 		}
335 	}
336 
337 	if ((zvp = zfs_vertex_create(hdl, dataset)) == NULL)
338 		return (NULL);
339 
340 	zvp->zv_next = zgp->zg_hash[idx];
341 	zvp->zv_txg = txg;
342 	zgp->zg_hash[idx] = zvp;
343 	zgp->zg_nvertex++;
344 
345 	return (zvp);
346 }
347 
348 /*
349  * Given two dataset names, create an edge between them.  For the source vertex,
350  * mark 'zv_visited' to indicate that we have seen this vertex, and not simply
351  * created it as a destination of another edge.  If 'dest' is NULL, then this
352  * is an individual vertex (i.e. the starting vertex), so don't add an edge.
353  */
354 static int
355 zfs_graph_add(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *source,
356     const char *dest, uint64_t txg)
357 {
358 	zfs_vertex_t *svp, *dvp;
359 
360 	if ((svp = zfs_graph_lookup(hdl, zgp, source, 0)) == NULL)
361 		return (-1);
362 	svp->zv_visited = VISIT_SEEN;
363 	if (dest != NULL) {
364 		dvp = zfs_graph_lookup(hdl, zgp, dest, txg);
365 		if (dvp == NULL)
366 			return (-1);
367 		if (zfs_vertex_add_edge(hdl, svp, dvp) != 0)
368 			return (-1);
369 	}
370 
371 	return (0);
372 }
373 
374 /*
375  * Iterate over all children of the given dataset, adding any vertices
376  * as necessary.  Returns -1 if there was an error, or 0 otherwise.
377  * This is a simple recursive algorithm - the ZFS namespace typically
378  * is very flat.  We manually invoke the necessary ioctl() calls to
379  * avoid the overhead and additional semantics of zfs_open().
380  */
381 static int
382 iterate_children(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset)
383 {
384 	zfs_cmd_t zc = { 0 };
385 	zfs_vertex_t *zvp;
386 
387 	/*
388 	 * Look up the source vertex, and avoid it if we've seen it before.
389 	 */
390 	zvp = zfs_graph_lookup(hdl, zgp, dataset, 0);
391 	if (zvp == NULL)
392 		return (-1);
393 	if (zvp->zv_visited == VISIT_SEEN)
394 		return (0);
395 
396 	/*
397 	 * Iterate over all children
398 	 */
399 	for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
400 	    ioctl(hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0;
401 	    (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {
402 
403 		/*
404 		 * Ignore private dataset names.
405 		 */
406 		if (dataset_name_hidden(zc.zc_name))
407 			continue;
408 
409 		/*
410 		 * Get statistics for this dataset, to determine the type of the
411 		 * dataset and clone statistics.  If this fails, the dataset has
412 		 * since been removed, and we're pretty much screwed anyway.
413 		 */
414 		zc.zc_objset_stats.dds_origin[0] = '\0';
415 		if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
416 			continue;
417 
418 		if (zc.zc_objset_stats.dds_origin[0] != '\0') {
419 			if (zfs_graph_add(hdl, zgp,
420 			    zc.zc_objset_stats.dds_origin, zc.zc_name,
421 			    zc.zc_objset_stats.dds_creation_txg) != 0)
422 				return (-1);
423 			/*
424 			 * Count origins only if they are contained in the graph
425 			 */
426 			if (isa_child_of(zc.zc_objset_stats.dds_origin,
427 			    zgp->zg_root))
428 				zgp->zg_clone_count--;
429 		}
430 
431 		/*
432 		 * Add an edge between the parent and the child.
433 		 */
434 		if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name,
435 		    zc.zc_objset_stats.dds_creation_txg) != 0)
436 			return (-1);
437 
438 		/*
439 		 * Recursively visit child
440 		 */
441 		if (iterate_children(hdl, zgp, zc.zc_name))
442 			return (-1);
443 	}
444 
445 	/*
446 	 * Now iterate over all snapshots.
447 	 */
448 	bzero(&zc, sizeof (zc));
449 
450 	for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
451 	    ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT, &zc) == 0;
452 	    (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {
453 
454 		/*
455 		 * Get statistics for this dataset, to determine the type of the
456 		 * dataset and clone statistics.  If this fails, the dataset has
457 		 * since been removed, and we're pretty much screwed anyway.
458 		 */
459 		if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
460 			continue;
461 
462 		/*
463 		 * Add an edge between the parent and the child.
464 		 */
465 		if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name,
466 		    zc.zc_objset_stats.dds_creation_txg) != 0)
467 			return (-1);
468 
469 		zgp->zg_clone_count += zc.zc_objset_stats.dds_num_clones;
470 	}
471 
472 	zvp->zv_visited = VISIT_SEEN;
473 
474 	return (0);
475 }
476 
477 /*
478  * Returns false if there are no snapshots with dependent clones in this
479  * subtree or if all of those clones are also in this subtree.  Returns
480  * true if there is an error or there are external dependents.
481  */
482 static boolean_t
483 external_dependents(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset)
484 {
485 	zfs_cmd_t zc = { 0 };
486 
487 	/*
488 	 * Check whether this dataset is a clone or has clones since
489 	 * iterate_children() only checks the children.
490 	 */
491 	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
492 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
493 		return (B_TRUE);
494 
495 	if (zc.zc_objset_stats.dds_origin[0] != '\0') {
496 		if (zfs_graph_add(hdl, zgp,
497 		    zc.zc_objset_stats.dds_origin, zc.zc_name,
498 		    zc.zc_objset_stats.dds_creation_txg) != 0)
499 			return (B_TRUE);
500 		if (isa_child_of(zc.zc_objset_stats.dds_origin, dataset))
501 			zgp->zg_clone_count--;
502 	}
503 
504 	if ((zc.zc_objset_stats.dds_num_clones) ||
505 	    iterate_children(hdl, zgp, dataset))
506 		return (B_TRUE);
507 
508 	return (zgp->zg_clone_count != 0);
509 }
510 
511 /*
512  * Construct a complete graph of all necessary vertices.  First, iterate over
513  * only our object's children.  If no cloned snapshots are found, or all of
514  * the cloned snapshots are in this subtree then return a graph of the subtree.
515  * Otherwise, start at the root of the pool and iterate over all datasets.
516  */
517 static zfs_graph_t *
518 construct_graph(libzfs_handle_t *hdl, const char *dataset)
519 {
520 	zfs_graph_t *zgp = zfs_graph_create(hdl, dataset, ZFS_GRAPH_SIZE);
521 	int ret = 0;
522 
523 	if (zgp == NULL)
524 		return (zgp);
525 
526 	if ((strchr(dataset, '/') == NULL) ||
527 	    (external_dependents(hdl, zgp, dataset))) {
528 		/*
529 		 * Determine pool name and try again.
530 		 */
531 		int len = strcspn(dataset, "/@") + 1;
532 		char *pool = zfs_alloc(hdl, len);
533 
534 		if (pool == NULL) {
535 			zfs_graph_destroy(zgp);
536 			return (NULL);
537 		}
538 		(void) strlcpy(pool, dataset, len);
539 
540 		if (iterate_children(hdl, zgp, pool) == -1 ||
541 		    zfs_graph_add(hdl, zgp, pool, NULL, 0) != 0) {
542 			free(pool);
543 			zfs_graph_destroy(zgp);
544 			return (NULL);
545 		}
546 		free(pool);
547 	}
548 
549 	if (ret == -1 || zfs_graph_add(hdl, zgp, dataset, NULL, 0) != 0) {
550 		zfs_graph_destroy(zgp);
551 		return (NULL);
552 	}
553 
554 	return (zgp);
555 }
556 
557 /*
558  * Given a graph, do a recursive topological sort into the given array.  This is
559  * really just a depth first search, so that the deepest nodes appear first.
560  * hijack the 'zv_visited' marker to avoid visiting the same vertex twice.
561  */
562 static int
563 topo_sort(libzfs_handle_t *hdl, boolean_t allowrecursion, char **result,
564     size_t *idx, zfs_vertex_t *zgv)
565 {
566 	int i;
567 
568 	if (zgv->zv_visited == VISIT_SORT_PRE && !allowrecursion) {
569 		/*
570 		 * If we've already seen this vertex as part of our depth-first
571 		 * search, then we have a cyclic dependency, and we must return
572 		 * an error.
573 		 */
574 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
575 		    "recursive dependency at '%s'"),
576 		    zgv->zv_dataset);
577 		return (zfs_error(hdl, EZFS_RECURSIVE,
578 		    dgettext(TEXT_DOMAIN,
579 		    "cannot determine dependent datasets")));
580 	} else if (zgv->zv_visited >= VISIT_SORT_PRE) {
581 		/*
582 		 * If we've already processed this as part of the topological
583 		 * sort, then don't bother doing so again.
584 		 */
585 		return (0);
586 	}
587 
588 	zgv->zv_visited = VISIT_SORT_PRE;
589 
590 	/* avoid doing a search if we don't have to */
591 	zfs_vertex_sort_edges(zgv);
592 	for (i = 0; i < zgv->zv_edgecount; i++) {
593 		if (topo_sort(hdl, allowrecursion, result, idx,
594 		    zgv->zv_edges[i]->ze_dest) != 0)
595 			return (-1);
596 	}
597 
598 	/* we may have visited this in the course of the above */
599 	if (zgv->zv_visited == VISIT_SORT_POST)
600 		return (0);
601 
602 	if ((result[*idx] = zfs_alloc(hdl,
603 	    strlen(zgv->zv_dataset) + 1)) == NULL)
604 		return (-1);
605 
606 	(void) strcpy(result[*idx], zgv->zv_dataset);
607 	*idx += 1;
608 	zgv->zv_visited = VISIT_SORT_POST;
609 	return (0);
610 }
611 
612 /*
613  * The only public interface for this file.  Do the dirty work of constructing a
614  * child list for the given object.  Construct the graph, do the toplogical
615  * sort, and then return the array of strings to the caller.
616  *
617  * The 'allowrecursion' parameter controls behavior when cycles are found.  If
618  * it is set, the the cycle is ignored and the results returned as if the cycle
619  * did not exist.  If it is not set, then the routine will generate an error if
620  * a cycle is found.
621  */
622 int
623 get_dependents(libzfs_handle_t *hdl, boolean_t allowrecursion,
624     const char *dataset, char ***result, size_t *count)
625 {
626 	zfs_graph_t *zgp;
627 	zfs_vertex_t *zvp;
628 
629 	if ((zgp = construct_graph(hdl, dataset)) == NULL)
630 		return (-1);
631 
632 	if ((*result = zfs_alloc(hdl,
633 	    zgp->zg_nvertex * sizeof (char *))) == NULL) {
634 		zfs_graph_destroy(zgp);
635 		return (-1);
636 	}
637 
638 	if ((zvp = zfs_graph_lookup(hdl, zgp, dataset, 0)) == NULL) {
639 		free(*result);
640 		zfs_graph_destroy(zgp);
641 		return (-1);
642 	}
643 
644 	*count = 0;
645 	if (topo_sort(hdl, allowrecursion, *result, count, zvp) != 0) {
646 		free(*result);
647 		zfs_graph_destroy(zgp);
648 		return (-1);
649 	}
650 
651 	/*
652 	 * Get rid of the last entry, which is our starting vertex and not
653 	 * strictly a dependent.
654 	 */
655 	assert(*count > 0);
656 	free((*result)[*count - 1]);
657 	(*count)--;
658 
659 	zfs_graph_destroy(zgp);
660 
661 	return (0);
662 }
663