1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Iterate over all children of the current object. This includes the normal 31 * dataset hierarchy, but also arbitrary hierarchies due to clones. We want to 32 * walk all datasets in the pool, and construct a directed graph of the form: 33 * 34 * home 35 * | 36 * +----+----+ 37 * | | 38 * v v ws 39 * bar baz | 40 * | | 41 * v v 42 * @yesterday ----> foo 43 * 44 * In order to construct this graph, we have to walk every dataset in the pool, 45 * because the clone parent is stored as a property of the child, not the 46 * parent. The parent only keeps track of the number of clones. 47 * 48 * In the normal case (without clones) this would be rather expensive. To avoid 49 * unnecessary computation, we first try a walk of the subtree hierarchy 50 * starting from the initial node. At each dataset, we construct a node in the 51 * graph and an edge leading from its parent. If we don't see any snapshots 52 * with a non-zero clone count, then we are finished. 53 * 54 * If we do find a cloned snapshot, then we finish the walk of the current 55 * subtree, but indicate that we need to do a complete walk. We then perform a 56 * global walk of all datasets, avoiding the subtree we already processed. 57 * 58 * At the end of this, we'll end up with a directed graph of all relevant (and 59 * possible some irrelevant) datasets in the system. We need to both find our 60 * limiting subgraph and determine a safe ordering in which to destroy the 61 * datasets. We do a topological ordering of our graph starting at our target 62 * dataset, and then walk the results in reverse. 63 * 64 * When removing datasets, we want to destroy the snapshots in chronological 65 * order (because this is the most efficient method). In order to accomplish 66 * this, we store the creation transaction group with each vertex and keep each 67 * vertex's edges sorted according to this value. The topological sort will 68 * automatically walk the snapshots in the correct order. 69 */ 70 71 #include <assert.h> 72 #include <stdio.h> 73 #include <stdlib.h> 74 #include <string.h> 75 #include <strings.h> 76 #include <unistd.h> 77 78 #include <libzfs.h> 79 80 #include "libzfs_impl.h" 81 #include "zfs_namecheck.h" 82 83 #define MIN_EDGECOUNT 4 84 85 /* 86 * Vertex structure. Indexed by dataset name, this structure maintains a list 87 * of edges to other vertices. 88 */ 89 struct zfs_edge; 90 typedef struct zfs_vertex { 91 char zv_dataset[ZFS_MAXNAMELEN]; 92 struct zfs_vertex *zv_next; 93 int zv_visited; 94 uint64_t zv_txg; 95 struct zfs_edge **zv_edges; 96 int zv_edgecount; 97 int zv_edgealloc; 98 } zfs_vertex_t; 99 100 /* 101 * Edge structure. Simply maintains a pointer to the destination vertex. There 102 * is no need to store the source vertex, since we only use edges in the context 103 * of the source vertex. 104 */ 105 typedef struct zfs_edge { 106 zfs_vertex_t *ze_dest; 107 struct zfs_edge *ze_next; 108 } zfs_edge_t; 109 110 #define ZFS_GRAPH_SIZE 1027 /* this could be dynamic some day */ 111 112 /* 113 * Graph structure. Vertices are maintained in a hash indexed by dataset name. 114 */ 115 typedef struct zfs_graph { 116 zfs_vertex_t **zg_hash; 117 size_t zg_size; 118 size_t zg_nvertex; 119 } zfs_graph_t; 120 121 /* 122 * Allocate a new edge pointing to the target vertex. 123 */ 124 static zfs_edge_t * 125 zfs_edge_create(zfs_vertex_t *dest) 126 { 127 zfs_edge_t *zep = zfs_malloc(sizeof (zfs_edge_t)); 128 129 zep->ze_dest = dest; 130 131 return (zep); 132 } 133 134 /* 135 * Destroy an edge. 136 */ 137 static void 138 zfs_edge_destroy(zfs_edge_t *zep) 139 { 140 free(zep); 141 } 142 143 /* 144 * Allocate a new vertex with the given name. 145 */ 146 static zfs_vertex_t * 147 zfs_vertex_create(const char *dataset) 148 { 149 zfs_vertex_t *zvp = zfs_malloc(sizeof (zfs_vertex_t)); 150 151 assert(strlen(dataset) < ZFS_MAXNAMELEN); 152 153 (void) strlcpy(zvp->zv_dataset, dataset, sizeof (zvp->zv_dataset)); 154 155 zvp->zv_edges = zfs_malloc(MIN_EDGECOUNT * sizeof (void *)); 156 zvp->zv_edgealloc = MIN_EDGECOUNT; 157 158 return (zvp); 159 } 160 161 /* 162 * Destroy a vertex. Frees up any associated edges. 163 */ 164 static void 165 zfs_vertex_destroy(zfs_vertex_t *zvp) 166 { 167 int i; 168 169 for (i = 0; i < zvp->zv_edgecount; i++) 170 zfs_edge_destroy(zvp->zv_edges[i]); 171 172 free(zvp->zv_edges); 173 free(zvp); 174 } 175 176 /* 177 * Given a vertex, add an edge to the destination vertex. 178 */ 179 static void 180 zfs_vertex_add_edge(zfs_vertex_t *zvp, zfs_vertex_t *dest) 181 { 182 zfs_edge_t *zep = zfs_edge_create(dest); 183 184 if (zvp->zv_edgecount == zvp->zv_edgealloc) { 185 zfs_edge_t **newedges = zfs_malloc(zvp->zv_edgealloc * 2 * 186 sizeof (void *)); 187 188 bcopy(zvp->zv_edges, newedges, 189 zvp->zv_edgealloc * sizeof (void *)); 190 191 zvp->zv_edgealloc *= 2; 192 free(zvp->zv_edges); 193 zvp->zv_edges = newedges; 194 } 195 196 zvp->zv_edges[zvp->zv_edgecount++] = zep; 197 } 198 199 static int 200 zfs_edge_compare(const void *a, const void *b) 201 { 202 const zfs_edge_t *ea = *((zfs_edge_t **)a); 203 const zfs_edge_t *eb = *((zfs_edge_t **)b); 204 205 if (ea->ze_dest->zv_txg < eb->ze_dest->zv_txg) 206 return (-1); 207 if (ea->ze_dest->zv_txg > eb->ze_dest->zv_txg) 208 return (1); 209 return (0); 210 } 211 212 /* 213 * Sort the given vertex edges according to the creation txg of each vertex. 214 */ 215 static void 216 zfs_vertex_sort_edges(zfs_vertex_t *zvp) 217 { 218 if (zvp->zv_edgecount == 0) 219 return; 220 221 qsort(zvp->zv_edges, zvp->zv_edgecount, sizeof (void *), 222 zfs_edge_compare); 223 } 224 225 /* 226 * Construct a new graph object. We allow the size to be specified as a 227 * parameter so in the future we can size the hash according to the number of 228 * datasets in the pool. 229 */ 230 static zfs_graph_t * 231 zfs_graph_create(size_t size) 232 { 233 zfs_graph_t *zgp = zfs_malloc(sizeof (zfs_graph_t)); 234 235 zgp->zg_size = size; 236 zgp->zg_hash = zfs_malloc(size * sizeof (zfs_vertex_t *)); 237 238 return (zgp); 239 } 240 241 /* 242 * Destroy a graph object. We have to iterate over all the hash chains, 243 * destroying each vertex in the process. 244 */ 245 static void 246 zfs_graph_destroy(zfs_graph_t *zgp) 247 { 248 int i; 249 zfs_vertex_t *current, *next; 250 251 for (i = 0; i < zgp->zg_size; i++) { 252 current = zgp->zg_hash[i]; 253 while (current != NULL) { 254 next = current->zv_next; 255 zfs_vertex_destroy(current); 256 current = next; 257 } 258 } 259 260 free(zgp->zg_hash); 261 free(zgp); 262 } 263 264 /* 265 * Graph hash function. Classic bernstein k=33 hash function, taken from 266 * usr/src/cmd/sgs/tools/common/strhash.c 267 */ 268 static size_t 269 zfs_graph_hash(zfs_graph_t *zgp, const char *str) 270 { 271 size_t hash = 5381; 272 int c; 273 274 while ((c = *str++) != 0) 275 hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ 276 277 return (hash % zgp->zg_size); 278 } 279 280 /* 281 * Given a dataset name, finds the associated vertex, creating it if necessary. 282 */ 283 static zfs_vertex_t * 284 zfs_graph_lookup(zfs_graph_t *zgp, const char *dataset, uint64_t txg) 285 { 286 size_t idx = zfs_graph_hash(zgp, dataset); 287 zfs_vertex_t *zvp; 288 289 for (zvp = zgp->zg_hash[idx]; zvp != NULL; zvp = zvp->zv_next) { 290 if (strcmp(zvp->zv_dataset, dataset) == 0) { 291 if (zvp->zv_txg == 0) 292 zvp->zv_txg = txg; 293 return (zvp); 294 } 295 } 296 297 zvp = zfs_vertex_create(dataset); 298 zvp->zv_next = zgp->zg_hash[idx]; 299 zvp->zv_txg = txg; 300 zgp->zg_hash[idx] = zvp; 301 zgp->zg_nvertex++; 302 303 return (zvp); 304 } 305 306 /* 307 * Given two dataset names, create an edge between them. For the source vertex, 308 * mark 'zv_visited' to indicate that we have seen this vertex, and not simply 309 * created it as a destination of another edge. If 'dest' is NULL, then this 310 * is an individual vertex (i.e. the starting vertex), so don't add an edge. 311 */ 312 static void 313 zfs_graph_add(zfs_graph_t *zgp, const char *source, const char *dest, 314 uint64_t txg) 315 { 316 zfs_vertex_t *svp, *dvp; 317 318 svp = zfs_graph_lookup(zgp, source, 0); 319 svp->zv_visited = 1; 320 if (dest != NULL) { 321 dvp = zfs_graph_lookup(zgp, dest, txg); 322 zfs_vertex_add_edge(svp, dvp); 323 } 324 } 325 326 /* 327 * Iterate over all children of the given dataset, adding any vertices as 328 * necessary. Returns 0 if no cloned snapshots were seen, 1 otherwise. This is 329 * a simple recursive algorithm - the ZFS namespace typically is very flat. We 330 * manually invoke the necessary ioctl() calls to avoid the overhead and 331 * additional semantics of zfs_open(). 332 */ 333 static int 334 iterate_children(zfs_graph_t *zgp, const char *dataset) 335 { 336 zfs_cmd_t zc = { 0 }; 337 int ret = 0; 338 zfs_vertex_t *zvp; 339 340 /* 341 * Look up the source vertex, and avoid it if we've seen it before. 342 */ 343 zvp = zfs_graph_lookup(zgp, dataset, 0); 344 if (zvp->zv_visited) 345 return (0); 346 347 for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); 348 ioctl(zfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0; 349 (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) { 350 351 /* 352 * Ignore private dataset names. 353 */ 354 if (dataset_name_hidden(zc.zc_name)) 355 continue; 356 357 /* 358 * Get statistics for this dataset, to determine the type of the 359 * dataset and clone statistics. If this fails, the dataset has 360 * since been removed, and we're pretty much screwed anyway. 361 */ 362 if (ioctl(zfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) 363 continue; 364 365 /* 366 * Add an edge between the parent and the child. 367 */ 368 zfs_graph_add(zgp, dataset, zc.zc_name, 369 zc.zc_objset_stats.dds_creation_txg); 370 371 /* 372 * If this dataset has a clone parent, add an appropriate edge. 373 */ 374 if (zc.zc_objset_stats.dds_clone_of[0] != '\0') 375 zfs_graph_add(zgp, zc.zc_objset_stats.dds_clone_of, 376 zc.zc_name, zc.zc_objset_stats.dds_creation_txg); 377 378 /* 379 * Iterate over all children 380 */ 381 ret |= iterate_children(zgp, zc.zc_name); 382 383 /* 384 * Indicate if we found a dataset with a non-zero clone count. 385 */ 386 if (zc.zc_objset_stats.dds_num_clones != 0) 387 ret |= 1; 388 } 389 390 /* 391 * Now iterate over all snapshots. 392 */ 393 bzero(&zc, sizeof (zc)); 394 395 for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); 396 ioctl(zfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT, &zc) == 0; 397 (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) { 398 399 /* 400 * Get statistics for this dataset, to determine the type of the 401 * dataset and clone statistics. If this fails, the dataset has 402 * since been removed, and we're pretty much screwed anyway. 403 */ 404 if (ioctl(zfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) 405 continue; 406 407 /* 408 * Add an edge between the parent and the child. 409 */ 410 zfs_graph_add(zgp, dataset, zc.zc_name, 411 zc.zc_objset_stats.dds_creation_txg); 412 413 /* 414 * Indicate if we found a dataset with a non-zero clone count. 415 */ 416 if (zc.zc_objset_stats.dds_num_clones != 0) 417 ret |= 1; 418 } 419 420 zvp->zv_visited = 1; 421 422 return (ret); 423 } 424 425 /* 426 * Construct a complete graph of all necessary vertices. First, we iterate over 427 * only our object's children. If we don't find any cloned snapshots, then we 428 * simple return that. Otherwise, we have to start at the pool root and iterate 429 * over all datasets. 430 */ 431 static zfs_graph_t * 432 construct_graph(const char *dataset) 433 { 434 zfs_graph_t *zgp = zfs_graph_create(ZFS_GRAPH_SIZE); 435 zfs_cmd_t zc = { 0 }; 436 437 /* 438 * We need to explicitly check whether this dataset has clones or not, 439 * since iterate_children() only checks the children. 440 */ 441 (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); 442 (void) ioctl(zfs_fd, ZFS_IOC_OBJSET_STATS, &zc); 443 444 if (zc.zc_objset_stats.dds_num_clones != 0 || 445 iterate_children(zgp, dataset) != 0) { 446 /* 447 * Determine pool name and try again. 448 */ 449 char *pool, *slash; 450 451 if ((slash = strchr(dataset, '/')) != NULL || 452 (slash = strchr(dataset, '@')) != NULL) { 453 pool = zfs_malloc(slash - dataset + 1); 454 (void) strncpy(pool, dataset, slash - dataset); 455 pool[slash - dataset] = '\0'; 456 457 (void) iterate_children(zgp, pool); 458 zfs_graph_add(zgp, pool, NULL, 0); 459 460 free(pool); 461 } 462 } 463 zfs_graph_add(zgp, dataset, NULL, 0); 464 465 return (zgp); 466 } 467 468 /* 469 * Given a graph, do a recursive topological sort into the given array. This is 470 * really just a depth first search, so that the deepest nodes appear first. 471 * hijack the 'zv_visited' marker to avoid visiting the same vertex twice. 472 */ 473 static void 474 topo_sort(char **result, size_t *idx, zfs_vertex_t *zgv) 475 { 476 int i; 477 478 /* avoid doing a search if we don't have to */ 479 if (zgv->zv_visited == 2) 480 return; 481 482 zfs_vertex_sort_edges(zgv); 483 for (i = 0; i < zgv->zv_edgecount; i++) 484 topo_sort(result, idx, zgv->zv_edges[i]->ze_dest); 485 486 /* we may have visited this in the course of the above */ 487 if (zgv->zv_visited == 2) 488 return; 489 490 result[*idx] = zfs_malloc(strlen(zgv->zv_dataset) + 1); 491 (void) strcpy(result[*idx], zgv->zv_dataset); 492 *idx += 1; 493 zgv->zv_visited = 2; 494 } 495 496 /* 497 * The only public interface for this file. Do the dirty work of constructing a 498 * child list for the given object. Construct the graph, do the toplogical 499 * sort, and then return the array of strings to the caller. 500 */ 501 char ** 502 get_dependents(const char *dataset, size_t *count) 503 { 504 char **result; 505 zfs_graph_t *zgp; 506 zfs_vertex_t *zvp; 507 508 zgp = construct_graph(dataset); 509 result = zfs_malloc(zgp->zg_nvertex * sizeof (char *)); 510 511 zvp = zfs_graph_lookup(zgp, dataset, 0); 512 513 *count = 0; 514 topo_sort(result, count, zvp); 515 516 /* 517 * Get rid of the last entry, which is our starting vertex and not 518 * strictly a dependent. 519 */ 520 assert(*count > 0); 521 free(result[*count - 1]); 522 (*count)--; 523 524 zfs_graph_destroy(zgp); 525 526 return (result); 527 } 528