xref: /onnv-gate/usr/src/lib/libzfs/common/libzfs_graph.c (revision 2082:76b439ec3ac1)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Iterate over all children of the current object.  This includes the normal
30  * dataset hierarchy, but also arbitrary hierarchies due to clones.  We want to
31  * walk all datasets in the pool, and construct a directed graph of the form:
32  *
33  * 			home
34  *                        |
35  *                   +----+----+
36  *                   |         |
37  *                   v         v             ws
38  *                  bar       baz             |
39  *                             |              |
40  *                             v              v
41  *                          @yesterday ----> foo
42  *
43  * In order to construct this graph, we have to walk every dataset in the pool,
44  * because the clone parent is stored as a property of the child, not the
45  * parent.  The parent only keeps track of the number of clones.
46  *
47  * In the normal case (without clones) this would be rather expensive.  To avoid
48  * unnecessary computation, we first try a walk of the subtree hierarchy
49  * starting from the initial node.  At each dataset, we construct a node in the
50  * graph and an edge leading from its parent.  If we don't see any snapshots
51  * with a non-zero clone count, then we are finished.
52  *
53  * If we do find a cloned snapshot, then we finish the walk of the current
54  * subtree, but indicate that we need to do a complete walk.  We then perform a
55  * global walk of all datasets, avoiding the subtree we already processed.
56  *
57  * At the end of this, we'll end up with a directed graph of all relevant (and
58  * possible some irrelevant) datasets in the system.  We need to both find our
59  * limiting subgraph and determine a safe ordering in which to destroy the
60  * datasets.  We do a topological ordering of our graph starting at our target
61  * dataset, and then walk the results in reverse.
62  *
63  * When removing datasets, we want to destroy the snapshots in chronological
64  * order (because this is the most efficient method).  In order to accomplish
65  * this, we store the creation transaction group with each vertex and keep each
66  * vertex's edges sorted according to this value.  The topological sort will
67  * automatically walk the snapshots in the correct order.
68  */
69 
70 #include <assert.h>
71 #include <stdio.h>
72 #include <stdlib.h>
73 #include <string.h>
74 #include <strings.h>
75 #include <unistd.h>
76 
77 #include <libzfs.h>
78 
79 #include "libzfs_impl.h"
80 #include "zfs_namecheck.h"
81 
82 #define	MIN_EDGECOUNT	4
83 
84 /*
85  * Vertex structure.  Indexed by dataset name, this structure maintains a list
86  * of edges to other vertices.
87  */
88 struct zfs_edge;
89 typedef struct zfs_vertex {
90 	char			zv_dataset[ZFS_MAXNAMELEN];
91 	struct zfs_vertex	*zv_next;
92 	int			zv_visited;
93 	uint64_t		zv_txg;
94 	struct zfs_edge		**zv_edges;
95 	int			zv_edgecount;
96 	int			zv_edgealloc;
97 } zfs_vertex_t;
98 
99 /*
100  * Edge structure.  Simply maintains a pointer to the destination vertex.  There
101  * is no need to store the source vertex, since we only use edges in the context
102  * of the source vertex.
103  */
104 typedef struct zfs_edge {
105 	zfs_vertex_t		*ze_dest;
106 	struct zfs_edge		*ze_next;
107 } zfs_edge_t;
108 
109 #define	ZFS_GRAPH_SIZE		1027	/* this could be dynamic some day */
110 
111 /*
112  * Graph structure.  Vertices are maintained in a hash indexed by dataset name.
113  */
114 typedef struct zfs_graph {
115 	zfs_vertex_t		**zg_hash;
116 	size_t			zg_size;
117 	size_t			zg_nvertex;
118 } zfs_graph_t;
119 
120 /*
121  * Allocate a new edge pointing to the target vertex.
122  */
123 static zfs_edge_t *
124 zfs_edge_create(libzfs_handle_t *hdl, zfs_vertex_t *dest)
125 {
126 	zfs_edge_t *zep = zfs_alloc(hdl, sizeof (zfs_edge_t));
127 
128 	if (zep == NULL)
129 		return (NULL);
130 
131 	zep->ze_dest = dest;
132 
133 	return (zep);
134 }
135 
136 /*
137  * Destroy an edge.
138  */
139 static void
140 zfs_edge_destroy(zfs_edge_t *zep)
141 {
142 	free(zep);
143 }
144 
145 /*
146  * Allocate a new vertex with the given name.
147  */
148 static zfs_vertex_t *
149 zfs_vertex_create(libzfs_handle_t *hdl, const char *dataset)
150 {
151 	zfs_vertex_t *zvp = zfs_alloc(hdl, sizeof (zfs_vertex_t));
152 
153 	if (zvp == NULL)
154 		return (NULL);
155 
156 	assert(strlen(dataset) < ZFS_MAXNAMELEN);
157 
158 	(void) strlcpy(zvp->zv_dataset, dataset, sizeof (zvp->zv_dataset));
159 
160 	if ((zvp->zv_edges = zfs_alloc(hdl,
161 	    MIN_EDGECOUNT * sizeof (void *))) == NULL) {
162 		free(zvp);
163 		return (NULL);
164 	}
165 
166 	zvp->zv_edgealloc = MIN_EDGECOUNT;
167 
168 	return (zvp);
169 }
170 
171 /*
172  * Destroy a vertex.  Frees up any associated edges.
173  */
174 static void
175 zfs_vertex_destroy(zfs_vertex_t *zvp)
176 {
177 	int i;
178 
179 	for (i = 0; i < zvp->zv_edgecount; i++)
180 		zfs_edge_destroy(zvp->zv_edges[i]);
181 
182 	free(zvp->zv_edges);
183 	free(zvp);
184 }
185 
186 /*
187  * Given a vertex, add an edge to the destination vertex.
188  */
189 static int
190 zfs_vertex_add_edge(libzfs_handle_t *hdl, zfs_vertex_t *zvp,
191     zfs_vertex_t *dest)
192 {
193 	zfs_edge_t *zep = zfs_edge_create(hdl, dest);
194 
195 	if (zep == NULL)
196 		return (-1);
197 
198 	if (zvp->zv_edgecount == zvp->zv_edgealloc) {
199 		zfs_edge_t **newedges = zfs_alloc(hdl, zvp->zv_edgealloc * 2 *
200 		    sizeof (void *));
201 
202 		if (newedges == NULL)
203 			return (-1);
204 
205 		bcopy(zvp->zv_edges, newedges,
206 		    zvp->zv_edgealloc * sizeof (void *));
207 
208 		zvp->zv_edgealloc *= 2;
209 		free(zvp->zv_edges);
210 		zvp->zv_edges = newedges;
211 	}
212 
213 	zvp->zv_edges[zvp->zv_edgecount++] = zep;
214 
215 	return (0);
216 }
217 
218 static int
219 zfs_edge_compare(const void *a, const void *b)
220 {
221 	const zfs_edge_t *ea = *((zfs_edge_t **)a);
222 	const zfs_edge_t *eb = *((zfs_edge_t **)b);
223 
224 	if (ea->ze_dest->zv_txg < eb->ze_dest->zv_txg)
225 		return (-1);
226 	if (ea->ze_dest->zv_txg > eb->ze_dest->zv_txg)
227 		return (1);
228 	return (0);
229 }
230 
231 /*
232  * Sort the given vertex edges according to the creation txg of each vertex.
233  */
234 static void
235 zfs_vertex_sort_edges(zfs_vertex_t *zvp)
236 {
237 	if (zvp->zv_edgecount == 0)
238 		return;
239 
240 	qsort(zvp->zv_edges, zvp->zv_edgecount, sizeof (void *),
241 	    zfs_edge_compare);
242 }
243 
244 /*
245  * Construct a new graph object.  We allow the size to be specified as a
246  * parameter so in the future we can size the hash according to the number of
247  * datasets in the pool.
248  */
249 static zfs_graph_t *
250 zfs_graph_create(libzfs_handle_t *hdl, size_t size)
251 {
252 	zfs_graph_t *zgp = zfs_alloc(hdl, sizeof (zfs_graph_t));
253 
254 	if (zgp == NULL)
255 		return (NULL);
256 
257 	zgp->zg_size = size;
258 	if ((zgp->zg_hash = zfs_alloc(hdl,
259 	    size * sizeof (zfs_vertex_t *))) == NULL) {
260 		free(zgp);
261 		return (NULL);
262 	}
263 
264 	return (zgp);
265 }
266 
267 /*
268  * Destroy a graph object.  We have to iterate over all the hash chains,
269  * destroying each vertex in the process.
270  */
271 static void
272 zfs_graph_destroy(zfs_graph_t *zgp)
273 {
274 	int i;
275 	zfs_vertex_t *current, *next;
276 
277 	for (i = 0; i < zgp->zg_size; i++) {
278 		current = zgp->zg_hash[i];
279 		while (current != NULL) {
280 			next = current->zv_next;
281 			zfs_vertex_destroy(current);
282 			current = next;
283 		}
284 	}
285 
286 	free(zgp->zg_hash);
287 	free(zgp);
288 }
289 
290 /*
291  * Graph hash function.  Classic bernstein k=33 hash function, taken from
292  * usr/src/cmd/sgs/tools/common/strhash.c
293  */
294 static size_t
295 zfs_graph_hash(zfs_graph_t *zgp, const char *str)
296 {
297 	size_t hash = 5381;
298 	int c;
299 
300 	while ((c = *str++) != 0)
301 		hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
302 
303 	return (hash % zgp->zg_size);
304 }
305 
306 /*
307  * Given a dataset name, finds the associated vertex, creating it if necessary.
308  */
309 static zfs_vertex_t *
310 zfs_graph_lookup(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset,
311     uint64_t txg)
312 {
313 	size_t idx = zfs_graph_hash(zgp, dataset);
314 	zfs_vertex_t *zvp;
315 
316 	for (zvp = zgp->zg_hash[idx]; zvp != NULL; zvp = zvp->zv_next) {
317 		if (strcmp(zvp->zv_dataset, dataset) == 0) {
318 			if (zvp->zv_txg == 0)
319 				zvp->zv_txg = txg;
320 			return (zvp);
321 		}
322 	}
323 
324 	if ((zvp = zfs_vertex_create(hdl, dataset)) == NULL)
325 		return (NULL);
326 
327 	zvp->zv_next = zgp->zg_hash[idx];
328 	zvp->zv_txg = txg;
329 	zgp->zg_hash[idx] = zvp;
330 	zgp->zg_nvertex++;
331 
332 	return (zvp);
333 }
334 
335 /*
336  * Given two dataset names, create an edge between them.  For the source vertex,
337  * mark 'zv_visited' to indicate that we have seen this vertex, and not simply
338  * created it as a destination of another edge.  If 'dest' is NULL, then this
339  * is an individual vertex (i.e. the starting vertex), so don't add an edge.
340  */
341 static int
342 zfs_graph_add(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *source,
343     const char *dest, uint64_t txg)
344 {
345 	zfs_vertex_t *svp, *dvp;
346 
347 	if ((svp = zfs_graph_lookup(hdl, zgp, source, 0)) == NULL)
348 		return (-1);
349 	svp->zv_visited = 1;
350 	if (dest != NULL) {
351 		dvp = zfs_graph_lookup(hdl, zgp, dest, txg);
352 		if (dvp == NULL)
353 			return (-1);
354 		if (zfs_vertex_add_edge(hdl, svp, dvp) != 0)
355 			return (-1);
356 	}
357 
358 	return (0);
359 }
360 
361 /*
362  * Iterate over all children of the given dataset, adding any vertices as
363  * necessary.  Returns 0 if no cloned snapshots were seen, -1 if there was an
364  * error, or 1 otherwise.  This is
365  * a simple recursive algorithm - the ZFS namespace typically is very flat.  We
366  * manually invoke the necessary ioctl() calls to avoid the overhead and
367  * additional semantics of zfs_open().
368  */
369 static int
370 iterate_children(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset)
371 {
372 	zfs_cmd_t zc = { 0 };
373 	int ret = 0, err;
374 	zfs_vertex_t *zvp;
375 
376 	/*
377 	 * Look up the source vertex, and avoid it if we've seen it before.
378 	 */
379 	zvp = zfs_graph_lookup(hdl, zgp, dataset, 0);
380 	if (zvp == NULL)
381 		return (-1);
382 	if (zvp->zv_visited)
383 		return (0);
384 
385 	for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
386 	    ioctl(hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0;
387 	    (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {
388 
389 		/*
390 		 * Ignore private dataset names.
391 		 */
392 		if (dataset_name_hidden(zc.zc_name))
393 			continue;
394 
395 		/*
396 		 * Get statistics for this dataset, to determine the type of the
397 		 * dataset and clone statistics.  If this fails, the dataset has
398 		 * since been removed, and we're pretty much screwed anyway.
399 		 */
400 		if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
401 			continue;
402 
403 		/*
404 		 * Add an edge between the parent and the child.
405 		 */
406 		if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name,
407 		    zc.zc_objset_stats.dds_creation_txg) != 0)
408 			return (-1);
409 
410 		/*
411 		 * If this dataset has a clone parent, add an appropriate edge.
412 		 */
413 		if (zc.zc_objset_stats.dds_clone_of[0] != '\0' &&
414 		    zfs_graph_add(hdl, zgp, zc.zc_objset_stats.dds_clone_of,
415 		    zc.zc_name, zc.zc_objset_stats.dds_creation_txg) != 0)
416 			return (-1);
417 
418 		/*
419 		 * Iterate over all children
420 		 */
421 		err = iterate_children(hdl, zgp, zc.zc_name);
422 		if (err == -1)
423 			return (-1);
424 		else if (err == 1)
425 			ret = 1;
426 
427 		/*
428 		 * Indicate if we found a dataset with a non-zero clone count.
429 		 */
430 		if (zc.zc_objset_stats.dds_num_clones != 0)
431 			ret = 1;
432 	}
433 
434 	/*
435 	 * Now iterate over all snapshots.
436 	 */
437 	bzero(&zc, sizeof (zc));
438 
439 	for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
440 	    ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT, &zc) == 0;
441 	    (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {
442 
443 		/*
444 		 * Get statistics for this dataset, to determine the type of the
445 		 * dataset and clone statistics.  If this fails, the dataset has
446 		 * since been removed, and we're pretty much screwed anyway.
447 		 */
448 		if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
449 			continue;
450 
451 		/*
452 		 * Add an edge between the parent and the child.
453 		 */
454 		if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name,
455 		    zc.zc_objset_stats.dds_creation_txg) != 0)
456 			return (-1);
457 
458 		/*
459 		 * Indicate if we found a dataset with a non-zero clone count.
460 		 */
461 		if (zc.zc_objset_stats.dds_num_clones != 0)
462 			ret = 1;
463 	}
464 
465 	zvp->zv_visited = 1;
466 
467 	return (ret);
468 }
469 
470 /*
471  * Construct a complete graph of all necessary vertices.  First, we iterate over
472  * only our object's children.  If we don't find any cloned snapshots, then we
473  * simple return that.  Otherwise, we have to start at the pool root and iterate
474  * over all datasets.
475  */
476 static zfs_graph_t *
477 construct_graph(libzfs_handle_t *hdl, const char *dataset)
478 {
479 	zfs_graph_t *zgp = zfs_graph_create(hdl, ZFS_GRAPH_SIZE);
480 	zfs_cmd_t zc = { 0 };
481 	int ret = 0;
482 
483 	if (zgp == NULL)
484 		return (zgp);
485 
486 	/*
487 	 * We need to explicitly check whether this dataset has clones or not,
488 	 * since iterate_children() only checks the children.
489 	 */
490 	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
491 	(void) ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc);
492 
493 	if (zc.zc_objset_stats.dds_num_clones != 0 ||
494 	    (ret = iterate_children(hdl, zgp, dataset)) != 0) {
495 		/*
496 		 * Determine pool name and try again.
497 		 */
498 		char *pool, *slash;
499 
500 		if ((slash = strchr(dataset, '/')) != NULL ||
501 		    (slash = strchr(dataset, '@')) != NULL) {
502 			pool = zfs_alloc(hdl, slash - dataset + 1);
503 			if (pool == NULL) {
504 				zfs_graph_destroy(zgp);
505 				return (NULL);
506 			}
507 			(void) strncpy(pool, dataset, slash - dataset);
508 			pool[slash - dataset] = '\0';
509 
510 			if (iterate_children(hdl, zgp, pool) == -1 ||
511 			    zfs_graph_add(hdl, zgp, pool, NULL, 0) != 0) {
512 				free(pool);
513 				zfs_graph_destroy(zgp);
514 				return (NULL);
515 			}
516 
517 			free(pool);
518 		}
519 	}
520 
521 	if (ret == -1 || zfs_graph_add(hdl, zgp, dataset, NULL, 0) != 0) {
522 		zfs_graph_destroy(zgp);
523 		return (NULL);
524 	}
525 
526 	return (zgp);
527 }
528 
529 /*
530  * Given a graph, do a recursive topological sort into the given array.  This is
531  * really just a depth first search, so that the deepest nodes appear first.
532  * hijack the 'zv_visited' marker to avoid visiting the same vertex twice.
533  */
534 static int
535 topo_sort(libzfs_handle_t *hdl, char **result, size_t *idx, zfs_vertex_t *zgv)
536 {
537 	int i;
538 
539 	/* avoid doing a search if we don't have to */
540 	if (zgv->zv_visited == 2)
541 		return (0);
542 
543 	zfs_vertex_sort_edges(zgv);
544 	for (i = 0; i < zgv->zv_edgecount; i++) {
545 		if (topo_sort(hdl, result, idx, zgv->zv_edges[i]->ze_dest) != 0)
546 			return (-1);
547 	}
548 
549 	/* we may have visited this in the course of the above */
550 	if (zgv->zv_visited == 2)
551 		return (0);
552 
553 	if ((result[*idx] = zfs_alloc(hdl,
554 	    strlen(zgv->zv_dataset) + 1)) == NULL)
555 		return (-1);
556 
557 	(void) strcpy(result[*idx], zgv->zv_dataset);
558 	*idx += 1;
559 	zgv->zv_visited = 2;
560 	return (0);
561 }
562 
563 /*
564  * The only public interface for this file.  Do the dirty work of constructing a
565  * child list for the given object.  Construct the graph, do the toplogical
566  * sort, and then return the array of strings to the caller.
567  */
568 char **
569 get_dependents(libzfs_handle_t *hdl, const char *dataset, size_t *count)
570 {
571 	char **result;
572 	zfs_graph_t *zgp;
573 	zfs_vertex_t *zvp;
574 
575 	if ((zgp = construct_graph(hdl, dataset)) == NULL)
576 		return (NULL);
577 
578 	if ((result = zfs_alloc(hdl,
579 	    zgp->zg_nvertex * sizeof (char *))) == NULL) {
580 		zfs_graph_destroy(zgp);
581 		return (NULL);
582 	}
583 
584 	if ((zvp = zfs_graph_lookup(hdl, zgp, dataset, 0)) == NULL) {
585 		free(result);
586 		zfs_graph_destroy(zgp);
587 		return (NULL);
588 	}
589 
590 	*count = 0;
591 	if (topo_sort(hdl, result, count, zvp) != 0) {
592 		free(result);
593 		zfs_graph_destroy(zgp);
594 		return (NULL);
595 	}
596 
597 	/*
598 	 * Get rid of the last entry, which is our starting vertex and not
599 	 * strictly a dependent.
600 	 */
601 	assert(*count > 0);
602 	free(result[*count - 1]);
603 	(*count)--;
604 
605 	zfs_graph_destroy(zgp);
606 
607 	return (result);
608 }
609