xref: /onnv-gate/usr/src/lib/libzfs/common/libzfs_graph.c (revision 789:b348f31ed315)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * Iterate over all children of the current object.  This includes the normal
31  * dataset hierarchy, but also arbitrary hierarchies due to clones.  We want to
32  * walk all datasets in the pool, and construct a directed graph of the form:
33  *
34  * 			home
35  *                        |
36  *                   +----+----+
37  *                   |         |
38  *                   v         v             ws
39  *                  bar       baz             |
40  *                             |              |
41  *                             v              v
42  *                          @yesterday ----> foo
43  *
44  * In order to construct this graph, we have to walk every dataset in the pool,
45  * because the clone parent is stored as a property of the child, not the
46  * parent.  The parent only keeps track of the number of clones.
47  *
48  * In the normal case (without clones) this would be rather expensive.  To avoid
49  * unnecessary computation, we first try a walk of the subtree hierarchy
50  * starting from the initial node.  At each dataset, we construct a node in the
51  * graph and an edge leading from its parent.  If we don't see any snapshots
52  * with a non-zero clone count, then we are finished.
53  *
54  * If we do find a cloned snapshot, then we finish the walk of the current
55  * subtree, but indicate that we need to do a complete walk.  We then perform a
56  * global walk of all datasets, avoiding the subtree we already processed.
57  *
58  * At the end of this, we'll end up with a directed graph of all relevant (and
59  * possible some irrelevant) datasets in the system.  We need to both find our
60  * limiting subgraph and determine a safe ordering in which to destroy the
61  * datasets.  We do a topological ordering of our graph starting at our target
62  * dataset, and then walk the results in reverse.
63  *
64  * When removing datasets, we want to destroy the snapshots in chronological
65  * order (because this is the most efficient method).  In order to accomplish
66  * this, we store the creation transaction group with each vertex and keep each
67  * vertex's edges sorted according to this value.  The topological sort will
68  * automatically walk the snapshots in the correct order.
69  */
70 
71 #include <assert.h>
72 #include <stdio.h>
73 #include <stdlib.h>
74 #include <string.h>
75 #include <strings.h>
76 #include <unistd.h>
77 
78 #include <libzfs.h>
79 
80 #include "libzfs_impl.h"
81 #include "zfs_namecheck.h"
82 
83 #define	MIN_EDGECOUNT	4
84 
85 /*
86  * Vertex structure.  Indexed by dataset name, this structure maintains a list
87  * of edges to other vertices.
88  */
89 struct zfs_edge;
90 typedef struct zfs_vertex {
91 	char			zv_dataset[ZFS_MAXNAMELEN];
92 	struct zfs_vertex	*zv_next;
93 	int			zv_visited;
94 	uint64_t		zv_txg;
95 	struct zfs_edge		**zv_edges;
96 	int			zv_edgecount;
97 	int			zv_edgealloc;
98 } zfs_vertex_t;
99 
100 /*
101  * Edge structure.  Simply maintains a pointer to the destination vertex.  There
102  * is no need to store the source vertex, since we only use edges in the context
103  * of the source vertex.
104  */
105 typedef struct zfs_edge {
106 	zfs_vertex_t		*ze_dest;
107 	struct zfs_edge		*ze_next;
108 } zfs_edge_t;
109 
110 #define	ZFS_GRAPH_SIZE		1027	/* this could be dynamic some day */
111 
112 /*
113  * Graph structure.  Vertices are maintained in a hash indexed by dataset name.
114  */
115 typedef struct zfs_graph {
116 	zfs_vertex_t		**zg_hash;
117 	size_t			zg_size;
118 	size_t			zg_nvertex;
119 } zfs_graph_t;
120 
121 /*
122  * Allocate a new edge pointing to the target vertex.
123  */
124 static zfs_edge_t *
125 zfs_edge_create(zfs_vertex_t *dest)
126 {
127 	zfs_edge_t *zep = zfs_malloc(sizeof (zfs_edge_t));
128 
129 	zep->ze_dest = dest;
130 
131 	return (zep);
132 }
133 
134 /*
135  * Destroy an edge.
136  */
137 static void
138 zfs_edge_destroy(zfs_edge_t *zep)
139 {
140 	free(zep);
141 }
142 
143 /*
144  * Allocate a new vertex with the given name.
145  */
146 static zfs_vertex_t *
147 zfs_vertex_create(const char *dataset)
148 {
149 	zfs_vertex_t *zvp = zfs_malloc(sizeof (zfs_vertex_t));
150 
151 	assert(strlen(dataset) < ZFS_MAXNAMELEN);
152 
153 	(void) strlcpy(zvp->zv_dataset, dataset, sizeof (zvp->zv_dataset));
154 
155 	zvp->zv_edges = zfs_malloc(MIN_EDGECOUNT * sizeof (void *));
156 	zvp->zv_edgealloc = MIN_EDGECOUNT;
157 
158 	return (zvp);
159 }
160 
161 /*
162  * Destroy a vertex.  Frees up any associated edges.
163  */
164 static void
165 zfs_vertex_destroy(zfs_vertex_t *zvp)
166 {
167 	int i;
168 
169 	for (i = 0; i < zvp->zv_edgecount; i++)
170 		zfs_edge_destroy(zvp->zv_edges[i]);
171 
172 	free(zvp->zv_edges);
173 	free(zvp);
174 }
175 
176 /*
177  * Given a vertex, add an edge to the destination vertex.
178  */
179 static void
180 zfs_vertex_add_edge(zfs_vertex_t *zvp, zfs_vertex_t *dest)
181 {
182 	zfs_edge_t *zep = zfs_edge_create(dest);
183 
184 	if (zvp->zv_edgecount == zvp->zv_edgealloc) {
185 		zfs_edge_t **newedges = zfs_malloc(zvp->zv_edgealloc * 2 *
186 		    sizeof (void *));
187 
188 		bcopy(zvp->zv_edges, newedges,
189 		    zvp->zv_edgealloc * sizeof (void *));
190 
191 		zvp->zv_edgealloc *= 2;
192 		free(zvp->zv_edges);
193 		zvp->zv_edges = newedges;
194 	}
195 
196 	zvp->zv_edges[zvp->zv_edgecount++] = zep;
197 }
198 
199 static int
200 zfs_edge_compare(const void *a, const void *b)
201 {
202 	const zfs_edge_t *ea = *((zfs_edge_t **)a);
203 	const zfs_edge_t *eb = *((zfs_edge_t **)b);
204 
205 	if (ea->ze_dest->zv_txg < eb->ze_dest->zv_txg)
206 		return (-1);
207 	if (ea->ze_dest->zv_txg > eb->ze_dest->zv_txg)
208 		return (1);
209 	return (0);
210 }
211 
212 /*
213  * Sort the given vertex edges according to the creation txg of each vertex.
214  */
215 static void
216 zfs_vertex_sort_edges(zfs_vertex_t *zvp)
217 {
218 	if (zvp->zv_edgecount == 0)
219 		return;
220 
221 	qsort(zvp->zv_edges, zvp->zv_edgecount, sizeof (void *),
222 	    zfs_edge_compare);
223 }
224 
225 /*
226  * Construct a new graph object.  We allow the size to be specified as a
227  * parameter so in the future we can size the hash according to the number of
228  * datasets in the pool.
229  */
230 static zfs_graph_t *
231 zfs_graph_create(size_t size)
232 {
233 	zfs_graph_t *zgp = zfs_malloc(sizeof (zfs_graph_t));
234 
235 	zgp->zg_size = size;
236 	zgp->zg_hash = zfs_malloc(size * sizeof (zfs_vertex_t *));
237 
238 	return (zgp);
239 }
240 
241 /*
242  * Destroy a graph object.  We have to iterate over all the hash chains,
243  * destroying each vertex in the process.
244  */
245 static void
246 zfs_graph_destroy(zfs_graph_t *zgp)
247 {
248 	int i;
249 	zfs_vertex_t *current, *next;
250 
251 	for (i = 0; i < zgp->zg_size; i++) {
252 		current = zgp->zg_hash[i];
253 		while (current != NULL) {
254 			next = current->zv_next;
255 			zfs_vertex_destroy(current);
256 			current = next;
257 		}
258 	}
259 
260 	free(zgp->zg_hash);
261 	free(zgp);
262 }
263 
264 /*
265  * Graph hash function.  Classic bernstein k=33 hash function, taken from
266  * usr/src/cmd/sgs/tools/common/strhash.c
267  */
268 static size_t
269 zfs_graph_hash(zfs_graph_t *zgp, const char *str)
270 {
271 	size_t hash = 5381;
272 	int c;
273 
274 	while ((c = *str++) != 0)
275 		hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
276 
277 	return (hash % zgp->zg_size);
278 }
279 
280 /*
281  * Given a dataset name, finds the associated vertex, creating it if necessary.
282  */
283 static zfs_vertex_t *
284 zfs_graph_lookup(zfs_graph_t *zgp, const char *dataset, uint64_t txg)
285 {
286 	size_t idx = zfs_graph_hash(zgp, dataset);
287 	zfs_vertex_t *zvp;
288 
289 	for (zvp = zgp->zg_hash[idx]; zvp != NULL; zvp = zvp->zv_next) {
290 		if (strcmp(zvp->zv_dataset, dataset) == 0) {
291 			if (zvp->zv_txg == 0)
292 				zvp->zv_txg = txg;
293 			return (zvp);
294 		}
295 	}
296 
297 	zvp = zfs_vertex_create(dataset);
298 	zvp->zv_next = zgp->zg_hash[idx];
299 	zvp->zv_txg = txg;
300 	zgp->zg_hash[idx] = zvp;
301 	zgp->zg_nvertex++;
302 
303 	return (zvp);
304 }
305 
306 /*
307  * Given two dataset names, create an edge between them.  For the source vertex,
308  * mark 'zv_visited' to indicate that we have seen this vertex, and not simply
309  * created it as a destination of another edge.  If 'dest' is NULL, then this
310  * is an individual vertex (i.e. the starting vertex), so don't add an edge.
311  */
312 static void
313 zfs_graph_add(zfs_graph_t *zgp, const char *source, const char *dest,
314     uint64_t txg)
315 {
316 	zfs_vertex_t *svp, *dvp;
317 
318 	svp = zfs_graph_lookup(zgp, source, 0);
319 	svp->zv_visited = 1;
320 	if (dest != NULL) {
321 		dvp = zfs_graph_lookup(zgp, dest, txg);
322 		zfs_vertex_add_edge(svp, dvp);
323 	}
324 }
325 
326 /*
327  * Iterate over all children of the given dataset, adding any vertices as
328  * necessary.  Returns 0 if no cloned snapshots were seen, 1 otherwise.  This is
329  * a simple recursive algorithm - the ZFS namespace typically is very flat.  We
330  * manually invoke the necessary ioctl() calls to avoid the overhead and
331  * additional semantics of zfs_open().
332  */
333 static int
334 iterate_children(zfs_graph_t *zgp, const char *dataset)
335 {
336 	zfs_cmd_t zc = { 0 };
337 	int ret = 0;
338 	zfs_vertex_t *zvp;
339 
340 	/*
341 	 * Look up the source vertex, and avoid it if we've seen it before.
342 	 */
343 	zvp = zfs_graph_lookup(zgp, dataset, 0);
344 	if (zvp->zv_visited)
345 		return (0);
346 
347 	for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
348 	    ioctl(zfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0;
349 	    (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {
350 
351 		/*
352 		 * Ignore private dataset names.
353 		 */
354 		if (dataset_name_hidden(zc.zc_name))
355 			continue;
356 
357 		/*
358 		 * Get statistics for this dataset, to determine the type of the
359 		 * dataset and clone statistics.  If this fails, the dataset has
360 		 * since been removed, and we're pretty much screwed anyway.
361 		 */
362 		if (ioctl(zfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
363 			continue;
364 
365 		/*
366 		 * Add an edge between the parent and the child.
367 		 */
368 		zfs_graph_add(zgp, dataset, zc.zc_name,
369 		    zc.zc_objset_stats.dds_creation_txg);
370 
371 		/*
372 		 * If this dataset has a clone parent, add an appropriate edge.
373 		 */
374 		if (zc.zc_objset_stats.dds_clone_of[0] != '\0')
375 			zfs_graph_add(zgp, zc.zc_objset_stats.dds_clone_of,
376 			    zc.zc_name, zc.zc_objset_stats.dds_creation_txg);
377 
378 		/*
379 		 * Iterate over all children
380 		 */
381 		ret |= iterate_children(zgp, zc.zc_name);
382 
383 		/*
384 		 * Indicate if we found a dataset with a non-zero clone count.
385 		 */
386 		if (zc.zc_objset_stats.dds_num_clones != 0)
387 			ret |= 1;
388 	}
389 
390 	/*
391 	 * Now iterate over all snapshots.
392 	 */
393 	bzero(&zc, sizeof (zc));
394 
395 	for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
396 	    ioctl(zfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT, &zc) == 0;
397 	    (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {
398 
399 		/*
400 		 * Get statistics for this dataset, to determine the type of the
401 		 * dataset and clone statistics.  If this fails, the dataset has
402 		 * since been removed, and we're pretty much screwed anyway.
403 		 */
404 		if (ioctl(zfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
405 			continue;
406 
407 		/*
408 		 * Add an edge between the parent and the child.
409 		 */
410 		zfs_graph_add(zgp, dataset, zc.zc_name,
411 		    zc.zc_objset_stats.dds_creation_txg);
412 
413 		/*
414 		 * Indicate if we found a dataset with a non-zero clone count.
415 		 */
416 		if (zc.zc_objset_stats.dds_num_clones != 0)
417 			ret |= 1;
418 	}
419 
420 	zvp->zv_visited = 1;
421 
422 	return (ret);
423 }
424 
425 /*
426  * Construct a complete graph of all necessary vertices.  First, we iterate over
427  * only our object's children.  If we don't find any cloned snapshots, then we
428  * simple return that.  Otherwise, we have to start at the pool root and iterate
429  * over all datasets.
430  */
431 static zfs_graph_t *
432 construct_graph(const char *dataset)
433 {
434 	zfs_graph_t *zgp = zfs_graph_create(ZFS_GRAPH_SIZE);
435 	zfs_cmd_t zc = { 0 };
436 
437 	/*
438 	 * We need to explicitly check whether this dataset has clones or not,
439 	 * since iterate_children() only checks the children.
440 	 */
441 	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
442 	(void) ioctl(zfs_fd, ZFS_IOC_OBJSET_STATS, &zc);
443 
444 	if (zc.zc_objset_stats.dds_num_clones != 0 ||
445 	    iterate_children(zgp, dataset) != 0) {
446 		/*
447 		 * Determine pool name and try again.
448 		 */
449 		char *pool, *slash;
450 
451 		if ((slash = strchr(dataset, '/')) != NULL ||
452 		    (slash = strchr(dataset, '@')) != NULL) {
453 			pool = zfs_malloc(slash - dataset + 1);
454 			(void) strncpy(pool, dataset, slash - dataset);
455 			pool[slash - dataset] = '\0';
456 
457 			(void) iterate_children(zgp, pool);
458 			zfs_graph_add(zgp, pool, NULL, 0);
459 
460 			free(pool);
461 		}
462 	}
463 	zfs_graph_add(zgp, dataset, NULL, 0);
464 
465 	return (zgp);
466 }
467 
468 /*
469  * Given a graph, do a recursive topological sort into the given array.  This is
470  * really just a depth first search, so that the deepest nodes appear first.
471  * hijack the 'zv_visited' marker to avoid visiting the same vertex twice.
472  */
473 static void
474 topo_sort(char **result, size_t *idx, zfs_vertex_t *zgv)
475 {
476 	int i;
477 
478 	/* avoid doing a search if we don't have to */
479 	if (zgv->zv_visited == 2)
480 		return;
481 
482 	zfs_vertex_sort_edges(zgv);
483 	for (i = 0; i < zgv->zv_edgecount; i++)
484 		topo_sort(result, idx, zgv->zv_edges[i]->ze_dest);
485 
486 	/* we may have visited this in the course of the above */
487 	if (zgv->zv_visited == 2)
488 		return;
489 
490 	result[*idx] = zfs_malloc(strlen(zgv->zv_dataset) + 1);
491 	(void) strcpy(result[*idx], zgv->zv_dataset);
492 	*idx += 1;
493 	zgv->zv_visited = 2;
494 }
495 
496 /*
497  * The only public interface for this file.  Do the dirty work of constructing a
498  * child list for the given object.  Construct the graph, do the toplogical
499  * sort, and then return the array of strings to the caller.
500  */
501 char **
502 get_dependents(const char *dataset, size_t *count)
503 {
504 	char **result;
505 	zfs_graph_t *zgp;
506 	zfs_vertex_t *zvp;
507 
508 	zgp = construct_graph(dataset);
509 	result = zfs_malloc(zgp->zg_nvertex * sizeof (char *));
510 
511 	zvp = zfs_graph_lookup(zgp, dataset, 0);
512 
513 	*count = 0;
514 	topo_sort(result, count, zvp);
515 
516 	/*
517 	 * Get rid of the last entry, which is our starting vertex and not
518 	 * strictly a dependent.
519 	 */
520 	assert(*count > 0);
521 	free(result[*count - 1]);
522 	(*count)--;
523 
524 	zfs_graph_destroy(zgp);
525 
526 	return (result);
527 }
528