xref: /netbsd-src/external/gpl2/lvm2/dist/lib/metadata/metadata.c (revision 274254cdae52594c1aa480a736aef78313d15c9c)
1 /*	$NetBSD: metadata.c,v 1.1.1.2 2009/02/18 11:17:14 haad Exp $	*/
2 
3 /*
4  * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
5  * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
6  *
7  * This file is part of LVM2.
8  *
9  * This copyrighted material is made available to anyone wishing to use,
10  * modify, copy, or redistribute it subject to the terms and conditions
11  * of the GNU Lesser General Public License v.2.1.
12  *
13  * You should have received a copy of the GNU Lesser General Public License
14  * along with this program; if not, write to the Free Software Foundation,
15  * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16  */
17 
18 #include "lib.h"
19 #include "device.h"
20 #include "metadata.h"
21 #include "toolcontext.h"
22 #include "lvm-string.h"
23 #include "lvm-file.h"
24 #include "lvmcache.h"
25 #include "memlock.h"
26 #include "str_list.h"
27 #include "pv_alloc.h"
28 #include "activate.h"
29 #include "display.h"
30 #include "locking.h"
31 #include "archiver.h"
32 #include "defaults.h"
33 
34 #include <sys/param.h>
35 
36 /*
37  * FIXME: Check for valid handle before dereferencing field or log error?
38  */
39 #define pv_field(handle, field)				\
40 	(((const struct physical_volume *)(handle))->field)
41 
42 static struct physical_volume *_pv_read(struct cmd_context *cmd,
43 					const char *pv_name,
44 					struct dm_list *mdas,
45 					uint64_t *label_sector,
46 					int warnings);
47 
48 static struct physical_volume *_pv_create(const struct format_type *fmt,
49 				  struct device *dev,
50 				  struct id *id, uint64_t size,
51 				  uint64_t pe_start,
52 				  uint32_t existing_extent_count,
53 				  uint32_t existing_extent_size,
54 				  int pvmetadatacopies,
55 				  uint64_t pvmetadatasize, struct dm_list *mdas);
56 
57 static int _pv_write(struct cmd_context *cmd __attribute((unused)),
58 		     struct physical_volume *pv,
59 	     	     struct dm_list *mdas, int64_t label_sector);
60 
61 static struct physical_volume *_find_pv_by_name(struct cmd_context *cmd,
62 			 			const char *pv_name);
63 
64 static struct pv_list *_find_pv_in_vg(const struct volume_group *vg,
65 				      const char *pv_name);
66 
67 static struct physical_volume *_find_pv_in_vg_by_uuid(const struct volume_group *vg,
68 						      const struct id *id);
69 
70 unsigned long pe_align(struct physical_volume *pv)
71 {
72 	if (pv->pe_align)
73 		goto out;
74 
75 	pv->pe_align = MAX(65536UL, lvm_getpagesize()) >> SECTOR_SHIFT;
76 
77 	/*
78 	 * Align to chunk size of underlying md device if present
79 	 */
80 	if (!pv->dev)
81 		goto out;
82 
83 	if (find_config_tree_bool(pv->fmt->cmd, "devices/md_chunk_alignment",
84 				  DEFAULT_MD_CHUNK_ALIGNMENT))
85 		pv->pe_align = MAX(pv->pe_align,
86 				   dev_md_chunk_size(pv->fmt->cmd->sysfs_dir,
87 						     pv->dev));
88 
89 	log_very_verbose("%s: Setting PE alignment to %lu sectors.",
90 			 dev_name(pv->dev), pv->pe_align);
91 
92 out:
93 	return pv->pe_align;
94 }
95 
96 /**
97  * add_pv_to_vg - Add a physical volume to a volume group
98  * @vg - volume group to add to
99  * @pv_name - name of the pv (to be removed)
100  * @pv - physical volume to add to volume group
101  *
102  * Returns:
103  *  0 - failure
104  *  1 - success
105  * FIXME: remove pv_name - obtain safely from pv
106  */
107 int add_pv_to_vg(struct volume_group *vg, const char *pv_name,
108 		 struct physical_volume *pv)
109 {
110 	struct pv_list *pvl;
111 	struct format_instance *fid = vg->fid;
112 	struct dm_pool *mem = fid->fmt->cmd->mem;
113 
114 	log_verbose("Adding physical volume '%s' to volume group '%s'",
115 		    pv_name, vg->name);
116 
117 	if (!(pvl = dm_pool_zalloc(mem, sizeof(*pvl)))) {
118 		log_error("pv_list allocation for '%s' failed", pv_name);
119 		return 0;
120 	}
121 
122 	if (!is_orphan_vg(pv->vg_name)) {
123 		log_error("Physical volume '%s' is already in volume group "
124 			  "'%s'", pv_name, pv->vg_name);
125 		return 0;
126 	}
127 
128 	if (pv->fmt != fid->fmt) {
129 		log_error("Physical volume %s is of different format type (%s)",
130 			  pv_name, pv->fmt->name);
131 		return 0;
132 	}
133 
134 	/* Ensure PV doesn't depend on another PV already in the VG */
135 	if (pv_uses_vg(pv, vg)) {
136 		log_error("Physical volume %s might be constructed from same "
137 			  "volume group %s", pv_name, vg->name);
138 		return 0;
139 	}
140 
141 	if (!(pv->vg_name = dm_pool_strdup(mem, vg->name))) {
142 		log_error("vg->name allocation failed for '%s'", pv_name);
143 		return 0;
144 	}
145 
146 	memcpy(&pv->vgid, &vg->id, sizeof(vg->id));
147 
148 	/* Units of 512-byte sectors */
149 	pv->pe_size = vg->extent_size;
150 
151 	/* FIXME Do proper rounding-up alignment? */
152 	/* Reserved space for label; this holds 0 for PVs created by LVM1 */
153 	if (pv->pe_start < pe_align(pv))
154 		pv->pe_start = pe_align(pv);
155 
156 	/*
157 	 * pe_count must always be calculated by pv_setup
158 	 */
159 	pv->pe_alloc_count = 0;
160 
161 	if (!fid->fmt->ops->pv_setup(fid->fmt, UINT64_C(0), 0,
162 				     vg->extent_size, 0, UINT64_C(0),
163 				     &fid->metadata_areas, pv, vg)) {
164 		log_error("Format-specific setup of physical volume '%s' "
165 			  "failed.", pv_name);
166 		return 0;
167 	}
168 
169 	if (_find_pv_in_vg(vg, pv_name)) {
170 		log_error("Physical volume '%s' listed more than once.",
171 			  pv_name);
172 		return 0;
173 	}
174 
175 	if (vg->pv_count && (vg->pv_count == vg->max_pv)) {
176 		log_error("No space for '%s' - volume group '%s' "
177 			  "holds max %d physical volume(s).", pv_name,
178 			  vg->name, vg->max_pv);
179 		return 0;
180 	}
181 
182 	if (!alloc_pv_segment_whole_pv(mem, pv))
183 		return_0;
184 
185 	pvl->pv = pv;
186 	dm_list_add(&vg->pvs, &pvl->list);
187 
188 	if ((uint64_t) vg->extent_count + pv->pe_count > UINT32_MAX) {
189 		log_error("Unable to add %s to %s: new extent count (%"
190 			  PRIu64 ") exceeds limit (%" PRIu32 ").",
191 			  pv_name, vg->name,
192 			  (uint64_t) vg->extent_count + pv->pe_count,
193 			  UINT32_MAX);
194 		return 0;
195 	}
196 
197 	vg->pv_count++;
198 	vg->extent_count += pv->pe_count;
199 	vg->free_count += pv->pe_count;
200 
201 	return 1;
202 }
203 
204 static int _copy_pv(struct physical_volume *pv_to,
205 		    struct physical_volume *pv_from)
206 {
207 	memcpy(pv_to, pv_from, sizeof(*pv_to));
208 
209 	if (!str_list_dup(pv_to->fmt->cmd->mem, &pv_to->tags, &pv_from->tags)) {
210 		log_error("PV tags duplication failed");
211 		return 0;
212 	}
213 
214 	if (!peg_dup(pv_to->fmt->cmd->mem, &pv_to->segments,
215 		     &pv_from->segments))
216 		return_0;
217 
218 	return 1;
219 }
220 
221 int get_pv_from_vg_by_id(const struct format_type *fmt, const char *vg_name,
222 			 const char *vgid, const char *pvid,
223 			 struct physical_volume *pv)
224 {
225 	struct volume_group *vg;
226 	struct pv_list *pvl;
227 	int consistent = 0;
228 
229 	if (!(vg = vg_read(fmt->cmd, vg_name, vgid, &consistent))) {
230 		log_error("get_pv_from_vg_by_id: vg_read failed to read VG %s",
231 			  vg_name);
232 		return 0;
233 	}
234 
235 	if (!consistent)
236 		log_warn("WARNING: Volume group %s is not consistent",
237 			 vg_name);
238 
239 	dm_list_iterate_items(pvl, &vg->pvs) {
240 		if (id_equal(&pvl->pv->id, (const struct id *) pvid)) {
241 			if (!_copy_pv(pv, pvl->pv))
242 				return_0;
243 			return 1;
244 		}
245 	}
246 
247 	return 0;
248 }
249 
250 static int validate_new_vg_name(struct cmd_context *cmd, const char *vg_name)
251 {
252 	char vg_path[PATH_MAX];
253 
254 	if (!validate_name(vg_name))
255 		return_0;
256 
257 	snprintf(vg_path, PATH_MAX, "%s%s", cmd->dev_dir, vg_name);
258 	if (path_exists(vg_path)) {
259 		log_error("%s: already exists in filesystem", vg_path);
260 		return 0;
261 	}
262 
263 	return 1;
264 }
265 
266 int validate_vg_rename_params(struct cmd_context *cmd,
267 			      const char *vg_name_old,
268 			      const char *vg_name_new)
269 {
270 	unsigned length;
271 	char *dev_dir;
272 
273 	dev_dir = cmd->dev_dir;
274 	length = strlen(dev_dir);
275 
276 	/* Check sanity of new name */
277 	if (strlen(vg_name_new) > NAME_LEN - length - 2) {
278 		log_error("New volume group path exceeds maximum length "
279 			  "of %d!", NAME_LEN - length - 2);
280 		return 0;
281 	}
282 
283 	if (!validate_new_vg_name(cmd, vg_name_new)) {
284 		log_error("New volume group name \"%s\" is invalid",
285 			  vg_name_new);
286 		return 0;
287 	}
288 
289 	if (!strcmp(vg_name_old, vg_name_new)) {
290 		log_error("Old and new volume group names must differ");
291 		return 0;
292 	}
293 
294 	return 1;
295 }
296 
297 int vg_rename(struct cmd_context *cmd, struct volume_group *vg,
298 	      const char *new_name)
299 {
300 	struct dm_pool *mem = cmd->mem;
301 	struct pv_list *pvl;
302 
303 	if (!(vg->name = dm_pool_strdup(mem, new_name))) {
304 		log_error("vg->name allocation failed for '%s'", new_name);
305 		return 0;
306 	}
307 
308 	dm_list_iterate_items(pvl, &vg->pvs) {
309 		if (!(pvl->pv->vg_name = dm_pool_strdup(mem, new_name))) {
310 			log_error("pv->vg_name allocation failed for '%s'",
311 				  pv_dev_name(pvl->pv));
312 			return 0;
313 		}
314 	}
315 
316 	return 1;
317 }
318 
319 static int remove_lvs_in_vg(struct cmd_context *cmd,
320 			    struct volume_group *vg,
321 			    force_t force)
322 {
323 	struct dm_list *lst;
324 	struct lv_list *lvl;
325 
326 	while ((lst = dm_list_first(&vg->lvs))) {
327 		lvl = dm_list_item(lst, struct lv_list);
328 		if (!lv_remove_with_dependencies(cmd, lvl->lv, force))
329 		    return 0;
330 	}
331 
332 	return 1;
333 }
334 
335 /* FIXME: remove redundant vg_name */
336 int vg_remove_single(struct cmd_context *cmd, const char *vg_name,
337 		     struct volume_group *vg, int consistent,
338 		     force_t force __attribute((unused)))
339 {
340 	struct physical_volume *pv;
341 	struct pv_list *pvl;
342 	unsigned lv_count;
343 	int ret = 1;
344 
345 	if (!vg || !consistent || vg_missing_pv_count(vg)) {
346 		log_error("Volume group \"%s\" not found, is inconsistent "
347 			  "or has PVs missing.", vg_name);
348 		log_error("Consider vgreduce --removemissing if metadata "
349 			  "is inconsistent.");
350 		return 0;
351 	}
352 
353 	if (!vg_check_status(vg, EXPORTED_VG))
354 		return 0;
355 
356 	lv_count = displayable_lvs_in_vg(vg);
357 
358 	if (lv_count) {
359 		if ((force == PROMPT) &&
360 		    (yes_no_prompt("Do you really want to remove volume "
361 				   "group \"%s\" containing %u "
362 				   "logical volumes? [y/n]: ",
363 				   vg_name, lv_count) == 'n')) {
364 			log_print("Volume group \"%s\" not removed", vg_name);
365 			return 0;
366 		}
367 		if (!remove_lvs_in_vg(cmd, vg, force))
368 			return 0;
369 	}
370 
371 	lv_count = displayable_lvs_in_vg(vg);
372 
373 	if (lv_count) {
374 		log_error("Volume group \"%s\" still contains %u "
375 			  "logical volume(s)", vg_name, lv_count);
376 		return 0;
377 	}
378 
379 	if (!archive(vg))
380 		return 0;
381 
382 	if (!vg_remove(vg)) {
383 		log_error("vg_remove %s failed", vg_name);
384 		return 0;
385 	}
386 
387 	/* init physical volumes */
388 	dm_list_iterate_items(pvl, &vg->pvs) {
389 		pv = pvl->pv;
390 		log_verbose("Removing physical volume \"%s\" from "
391 			    "volume group \"%s\"", pv_dev_name(pv), vg_name);
392 		pv->vg_name = vg->fid->fmt->orphan_vg_name;
393 		pv->status = ALLOCATABLE_PV;
394 
395 		if (!dev_get_size(pv_dev(pv), &pv->size)) {
396 			log_error("%s: Couldn't get size.", pv_dev_name(pv));
397 			ret = 0;
398 			continue;
399 		}
400 
401 		/* FIXME Write to same sector label was read from */
402 		if (!pv_write(cmd, pv, NULL, INT64_C(-1))) {
403 			log_error("Failed to remove physical volume \"%s\""
404 				  " from volume group \"%s\"",
405 				  pv_dev_name(pv), vg_name);
406 			ret = 0;
407 		}
408 	}
409 
410 	backup_remove(cmd, vg_name);
411 
412 	if (ret)
413 		log_print("Volume group \"%s\" successfully removed", vg_name);
414 	else
415 		log_error("Volume group \"%s\" not properly removed", vg_name);
416 
417 	return ret;
418 }
419 
420 int vg_extend(struct volume_group *vg, int pv_count, char **pv_names)
421 {
422 	int i;
423 	struct physical_volume *pv;
424 
425 	/* attach each pv */
426 	for (i = 0; i < pv_count; i++) {
427 		if (!(pv = pv_by_path(vg->fid->fmt->cmd, pv_names[i]))) {
428 			log_error("%s not identified as an existing "
429 				  "physical volume", pv_names[i]);
430 			goto bad;
431 		}
432 
433 		if (!add_pv_to_vg(vg, pv_names[i], pv))
434 			goto bad;
435 	}
436 
437 /* FIXME Decide whether to initialise and add new mdahs to format instance */
438 
439 	return 1;
440 
441       bad:
442 	log_error("Unable to add physical volume '%s' to "
443 		  "volume group '%s'.", pv_names[i], vg->name);
444 	return 0;
445 }
446 
447 const char *strip_dir(const char *vg_name, const char *dev_dir)
448 {
449 	size_t len = strlen(dev_dir);
450 	if (!strncmp(vg_name, dev_dir, len))
451 		vg_name += len;
452 
453 	return vg_name;
454 }
455 
456 /*
457  * Validate parameters to vg_create() before calling.
458  * FIXME: Move inside vg_create library function.
459  * FIXME: Change vgcreate_params struct to individual gets/sets
460  */
461 int validate_vg_create_params(struct cmd_context *cmd,
462 			      struct vgcreate_params *vp)
463 {
464 	if (!validate_new_vg_name(cmd, vp->vg_name)) {
465 		log_error("New volume group name \"%s\" is invalid",
466 			  vp->vg_name);
467 		return 1;
468 	}
469 
470 	if (vp->alloc == ALLOC_INHERIT) {
471 		log_error("Volume Group allocation policy cannot inherit "
472 			  "from anything");
473 		return 1;
474 	}
475 
476 	if (!vp->extent_size) {
477 		log_error("Physical extent size may not be zero");
478 		return 1;
479 	}
480 
481 	if (!(cmd->fmt->features & FMT_UNLIMITED_VOLS)) {
482 		if (!vp->max_lv)
483 			vp->max_lv = 255;
484 		if (!vp->max_pv)
485 			vp->max_pv = 255;
486 		if (vp->max_lv > 255 || vp->max_pv > 255) {
487 			log_error("Number of volumes may not exceed 255");
488 			return 1;
489 		}
490 	}
491 
492 	return 0;
493 }
494 
495 struct volume_group *vg_create(struct cmd_context *cmd, const char *vg_name,
496 			       uint32_t extent_size, uint32_t max_pv,
497 			       uint32_t max_lv, alloc_policy_t alloc,
498 			       int pv_count, char **pv_names)
499 {
500 	struct volume_group *vg;
501 	struct dm_pool *mem = cmd->mem;
502 	int consistent = 0;
503 
504 	if (!(vg = dm_pool_zalloc(mem, sizeof(*vg))))
505 		return_NULL;
506 
507 	/* is this vg name already in use ? */
508 	if (vg_read(cmd, vg_name, NULL, &consistent)) {
509 		log_err("A volume group called '%s' already exists.", vg_name);
510 		goto bad;
511 	}
512 
513 	if (!id_create(&vg->id)) {
514 		log_err("Couldn't create uuid for volume group '%s'.", vg_name);
515 		goto bad;
516 	}
517 
518 	/* Strip dev_dir if present */
519 	vg_name = strip_dir(vg_name, cmd->dev_dir);
520 
521 	vg->cmd = cmd;
522 
523 	if (!(vg->name = dm_pool_strdup(mem, vg_name)))
524 		goto_bad;
525 
526 	vg->seqno = 0;
527 
528 	vg->status = (RESIZEABLE_VG | LVM_READ | LVM_WRITE);
529 	if (!(vg->system_id = dm_pool_alloc(mem, NAME_LEN)))
530 		goto_bad;
531 
532 	*vg->system_id = '\0';
533 
534 	vg->extent_size = extent_size;
535 	vg->extent_count = 0;
536 	vg->free_count = 0;
537 
538 	vg->max_lv = max_lv;
539 	vg->max_pv = max_pv;
540 
541 	vg->alloc = alloc;
542 
543 	vg->pv_count = 0;
544 	dm_list_init(&vg->pvs);
545 
546 	vg->lv_count = 0;
547 	dm_list_init(&vg->lvs);
548 
549 	vg->snapshot_count = 0;
550 
551 	dm_list_init(&vg->tags);
552 
553 	if (!(vg->fid = cmd->fmt->ops->create_instance(cmd->fmt, vg_name,
554 						       NULL, NULL))) {
555 		log_error("Failed to create format instance");
556 		goto bad;
557 	}
558 
559 	if (vg->fid->fmt->ops->vg_setup &&
560 	    !vg->fid->fmt->ops->vg_setup(vg->fid, vg)) {
561 		log_error("Format specific setup of volume group '%s' failed.",
562 			  vg_name);
563 		goto bad;
564 	}
565 
566 	/* attach the pv's */
567 	if (!vg_extend(vg, pv_count, pv_names))
568 		goto_bad;
569 
570 	return vg;
571 
572       bad:
573 	dm_pool_free(mem, vg);
574 	return NULL;
575 }
576 
577 static int _recalc_extents(uint32_t *extents, const char *desc1,
578 			   const char *desc2, uint32_t old_size,
579 			   uint32_t new_size)
580 {
581 	uint64_t size = (uint64_t) old_size * (*extents);
582 
583 	if (size % new_size) {
584 		log_error("New size %" PRIu64 " for %s%s not an exact number "
585 			  "of new extents.", size, desc1, desc2);
586 		return 0;
587 	}
588 
589 	size /= new_size;
590 
591 	if (size > UINT32_MAX) {
592 		log_error("New extent count %" PRIu64 " for %s%s exceeds "
593 			  "32 bits.", size, desc1, desc2);
594 		return 0;
595 	}
596 
597 	*extents = (uint32_t) size;
598 
599 	return 1;
600 }
601 
602 int vg_change_pesize(struct cmd_context *cmd __attribute((unused)),
603 		     struct volume_group *vg, uint32_t new_size)
604 {
605 	uint32_t old_size = vg->extent_size;
606 	struct pv_list *pvl;
607 	struct lv_list *lvl;
608 	struct physical_volume *pv;
609 	struct logical_volume *lv;
610 	struct lv_segment *seg;
611 	struct pv_segment *pvseg;
612 	uint32_t s;
613 
614 	vg->extent_size = new_size;
615 
616 	if (vg->fid->fmt->ops->vg_setup &&
617 	    !vg->fid->fmt->ops->vg_setup(vg->fid, vg))
618 		return_0;
619 
620 	if (!_recalc_extents(&vg->extent_count, vg->name, "", old_size,
621 			     new_size))
622 		return_0;
623 
624 	if (!_recalc_extents(&vg->free_count, vg->name, " free space",
625 			     old_size, new_size))
626 		return_0;
627 
628 	/* foreach PV */
629 	dm_list_iterate_items(pvl, &vg->pvs) {
630 		pv = pvl->pv;
631 
632 		pv->pe_size = new_size;
633 		if (!_recalc_extents(&pv->pe_count, pv_dev_name(pv), "",
634 				     old_size, new_size))
635 			return_0;
636 
637 		if (!_recalc_extents(&pv->pe_alloc_count, pv_dev_name(pv),
638 				     " allocated space", old_size, new_size))
639 			return_0;
640 
641 		/* foreach free PV Segment */
642 		dm_list_iterate_items(pvseg, &pv->segments) {
643 			if (pvseg_is_allocated(pvseg))
644 				continue;
645 
646 			if (!_recalc_extents(&pvseg->pe, pv_dev_name(pv),
647 					     " PV segment start", old_size,
648 					     new_size))
649 				return_0;
650 			if (!_recalc_extents(&pvseg->len, pv_dev_name(pv),
651 					     " PV segment length", old_size,
652 					     new_size))
653 				return_0;
654 		}
655 	}
656 
657 	/* foreach LV */
658 	dm_list_iterate_items(lvl, &vg->lvs) {
659 		lv = lvl->lv;
660 
661 		if (!_recalc_extents(&lv->le_count, lv->name, "", old_size,
662 				     new_size))
663 			return_0;
664 
665 		dm_list_iterate_items(seg, &lv->segments) {
666 			if (!_recalc_extents(&seg->le, lv->name,
667 					     " segment start", old_size,
668 					     new_size))
669 				return_0;
670 
671 			if (!_recalc_extents(&seg->len, lv->name,
672 					     " segment length", old_size,
673 					     new_size))
674 				return_0;
675 
676 			if (!_recalc_extents(&seg->area_len, lv->name,
677 					     " area length", old_size,
678 					     new_size))
679 				return_0;
680 
681 			if (!_recalc_extents(&seg->extents_copied, lv->name,
682 					     " extents moved", old_size,
683 					     new_size))
684 				return_0;
685 
686 			/* foreach area */
687 			for (s = 0; s < seg->area_count; s++) {
688 				switch (seg_type(seg, s)) {
689 				case AREA_PV:
690 					if (!_recalc_extents
691 					    (&seg_pe(seg, s),
692 					     lv->name,
693 					     " pvseg start", old_size,
694 					     new_size))
695 						return_0;
696 					if (!_recalc_extents
697 					    (&seg_pvseg(seg, s)->len,
698 					     lv->name,
699 					     " pvseg length", old_size,
700 					     new_size))
701 						return_0;
702 					break;
703 				case AREA_LV:
704 					if (!_recalc_extents
705 					    (&seg_le(seg, s), lv->name,
706 					     " area start", old_size,
707 					     new_size))
708 						return_0;
709 					break;
710 				case AREA_UNASSIGNED:
711 					log_error("Unassigned area %u found in "
712 						  "segment", s);
713 					return 0;
714 				}
715 			}
716 		}
717 
718 	}
719 
720 	return 1;
721 }
722 
723 /*
724  * Separate metadata areas after splitting a VG.
725  * Also accepts orphan VG as destination (for vgreduce).
726  */
727 int vg_split_mdas(struct cmd_context *cmd __attribute((unused)),
728 		  struct volume_group *vg_from, struct volume_group *vg_to)
729 {
730 	struct metadata_area *mda, *mda2;
731 	struct dm_list *mdas_from, *mdas_to;
732 	int common_mda = 0;
733 
734 	mdas_from = &vg_from->fid->metadata_areas;
735 	mdas_to = &vg_to->fid->metadata_areas;
736 
737 	dm_list_iterate_items_safe(mda, mda2, mdas_from) {
738 		if (!mda->ops->mda_in_vg) {
739 			common_mda = 1;
740 			continue;
741 		}
742 
743 		if (!mda->ops->mda_in_vg(vg_from->fid, vg_from, mda)) {
744 			if (is_orphan_vg(vg_to->name))
745 				dm_list_del(&mda->list);
746 			else
747 				dm_list_move(mdas_to, &mda->list);
748 		}
749 	}
750 
751 	if (dm_list_empty(mdas_from) ||
752 	    (!is_orphan_vg(vg_to->name) && dm_list_empty(mdas_to)))
753 		return common_mda;
754 
755 	return 1;
756 }
757 
758 /**
759  * pv_create - initialize a physical volume for use with a volume group
760  * @fmt: format type
761  * @dev: PV device to initialize
762  * @id: PV UUID to use for initialization
763  * @size: size of the PV in sectors
764  * @pe_start: physical extent start
765  * @existing_extent_count
766  * @existing_extent_size
767  * @pvmetadatacopies
768  * @pvmetadatasize
769  * @mdas
770  *
771  * Returns:
772  *   PV handle - physical volume initialized successfully
773  *   NULL - invalid parameter or problem initializing the physical volume
774  *
775  * Note:
776  *   FIXME - liblvm todo - tidy up arguments for external use (fmt, mdas, etc)
777  */
778 pv_t *pv_create(const struct cmd_context *cmd,
779 		struct device *dev,
780 		struct id *id, uint64_t size,
781 		uint64_t pe_start,
782 		uint32_t existing_extent_count,
783 		uint32_t existing_extent_size,
784 		int pvmetadatacopies,
785 		uint64_t pvmetadatasize, struct dm_list *mdas)
786 {
787 	return _pv_create(cmd->fmt, dev, id, size, pe_start,
788 			  existing_extent_count,
789 			  existing_extent_size,
790 			  pvmetadatacopies,
791 			  pvmetadatasize, mdas);
792 }
793 
794 static void _free_pv(struct dm_pool *mem, struct physical_volume *pv)
795 {
796 	dm_pool_free(mem, pv);
797 }
798 
799 static struct physical_volume *_alloc_pv(struct dm_pool *mem, struct device *dev)
800 {
801 	struct physical_volume *pv = dm_pool_zalloc(mem, sizeof(*pv));
802 
803 	if (!pv)
804 		return_NULL;
805 
806 	if (!(pv->vg_name = dm_pool_zalloc(mem, NAME_LEN))) {
807 		dm_pool_free(mem, pv);
808 		return NULL;
809 	}
810 
811 	pv->pe_size = 0;
812 	pv->pe_start = 0;
813 	pv->pe_count = 0;
814 	pv->pe_alloc_count = 0;
815 	pv->pe_align = 0;
816 	pv->fmt = NULL;
817 	pv->dev = dev;
818 
819 	pv->status = ALLOCATABLE_PV;
820 
821 	dm_list_init(&pv->tags);
822 	dm_list_init(&pv->segments);
823 
824 	return pv;
825 }
826 
827 /* Sizes in sectors */
828 static struct physical_volume *_pv_create(const struct format_type *fmt,
829 				  struct device *dev,
830 				  struct id *id, uint64_t size,
831 				  uint64_t pe_start,
832 				  uint32_t existing_extent_count,
833 				  uint32_t existing_extent_size,
834 				  int pvmetadatacopies,
835 				  uint64_t pvmetadatasize, struct dm_list *mdas)
836 {
837 	struct dm_pool *mem = fmt->cmd->mem;
838 	struct physical_volume *pv = _alloc_pv(mem, dev);
839 
840 	if (!pv)
841 		return NULL;
842 
843 	if (id)
844 		memcpy(&pv->id, id, sizeof(*id));
845 	else if (!id_create(&pv->id)) {
846 		log_error("Failed to create random uuid for %s.",
847 			  dev_name(dev));
848 		goto bad;
849 	}
850 
851 	if (!dev_get_size(pv->dev, &pv->size)) {
852 		log_error("%s: Couldn't get size.", pv_dev_name(pv));
853 		goto bad;
854 	}
855 
856 	if (size) {
857 		if (size > pv->size)
858 			log_warn("WARNING: %s: Overriding real size. "
859 				  "You could lose data.", pv_dev_name(pv));
860 		log_verbose("%s: Pretending size is %" PRIu64 " sectors.",
861 			    pv_dev_name(pv), size);
862 		pv->size = size;
863 	}
864 
865 	if (pv->size < PV_MIN_SIZE) {
866 		log_error("%s: Size must exceed minimum of %ld sectors.",
867 			  pv_dev_name(pv), PV_MIN_SIZE);
868 		goto bad;
869 	}
870 
871 	pv->fmt = fmt;
872 	pv->vg_name = fmt->orphan_vg_name;
873 
874 	if (!fmt->ops->pv_setup(fmt, pe_start, existing_extent_count,
875 				existing_extent_size,
876 				pvmetadatacopies, pvmetadatasize, mdas,
877 				pv, NULL)) {
878 		log_error("%s: Format-specific setup of physical volume "
879 			  "failed.", pv_dev_name(pv));
880 		goto bad;
881 	}
882 	return pv;
883 
884       bad:
885 	_free_pv(mem, pv);
886 	return NULL;
887 }
888 
889 /* FIXME: liblvm todo - make into function that returns handle */
890 struct pv_list *find_pv_in_vg(const struct volume_group *vg,
891 			      const char *pv_name)
892 {
893 	return _find_pv_in_vg(vg, pv_name);
894 }
895 
896 static struct pv_list *_find_pv_in_vg(const struct volume_group *vg,
897 				      const char *pv_name)
898 {
899 	struct pv_list *pvl;
900 
901 	dm_list_iterate_items(pvl, &vg->pvs)
902 		if (pvl->pv->dev == dev_cache_get(pv_name, vg->cmd->filter))
903 			return pvl;
904 
905 	return NULL;
906 }
907 
908 struct pv_list *find_pv_in_pv_list(const struct dm_list *pl,
909 				   const struct physical_volume *pv)
910 {
911 	struct pv_list *pvl;
912 
913 	dm_list_iterate_items(pvl, pl)
914 		if (pvl->pv == pv)
915 			return pvl;
916 
917 	return NULL;
918 }
919 
920 int pv_is_in_vg(struct volume_group *vg, struct physical_volume *pv)
921 {
922 	struct pv_list *pvl;
923 
924 	dm_list_iterate_items(pvl, &vg->pvs)
925 		if (pv == pvl->pv)
926 			 return 1;
927 
928 	return 0;
929 }
930 
931 /**
932  * find_pv_in_vg_by_uuid - Find PV in VG by PV UUID
933  * @vg: volume group to search
934  * @id: UUID of the PV to match
935  *
936  * Returns:
937  *   PV handle - if UUID of PV found in VG
938  *   NULL - invalid parameter or UUID of PV not found in VG
939  *
940  * Note
941  *   FIXME - liblvm todo - make into function that takes VG handle
942  */
943 pv_t *find_pv_in_vg_by_uuid(const struct volume_group *vg,
944 			    const struct id *id)
945 {
946 	return _find_pv_in_vg_by_uuid(vg, id);
947 }
948 
949 
950 static struct physical_volume *_find_pv_in_vg_by_uuid(const struct volume_group *vg,
951 						      const struct id *id)
952 {
953 	struct pv_list *pvl;
954 
955 	dm_list_iterate_items(pvl, &vg->pvs)
956 		if (id_equal(&pvl->pv->id, id))
957 			return pvl->pv;
958 
959 	return NULL;
960 }
961 
962 struct lv_list *find_lv_in_vg(const struct volume_group *vg,
963 			      const char *lv_name)
964 {
965 	struct lv_list *lvl;
966 	const char *ptr;
967 
968 	/* Use last component */
969 	if ((ptr = strrchr(lv_name, '/')))
970 		ptr++;
971 	else
972 		ptr = lv_name;
973 
974 	dm_list_iterate_items(lvl, &vg->lvs)
975 		if (!strcmp(lvl->lv->name, ptr))
976 			return lvl;
977 
978 	return NULL;
979 }
980 
981 struct lv_list *find_lv_in_lv_list(const struct dm_list *ll,
982 				   const struct logical_volume *lv)
983 {
984 	struct lv_list *lvl;
985 
986 	dm_list_iterate_items(lvl, ll)
987 		if (lvl->lv == lv)
988 			return lvl;
989 
990 	return NULL;
991 }
992 
993 struct lv_list *find_lv_in_vg_by_lvid(struct volume_group *vg,
994 				      const union lvid *lvid)
995 {
996 	struct lv_list *lvl;
997 
998 	dm_list_iterate_items(lvl, &vg->lvs)
999 		if (!strncmp(lvl->lv->lvid.s, lvid->s, sizeof(*lvid)))
1000 			return lvl;
1001 
1002 	return NULL;
1003 }
1004 
1005 struct logical_volume *find_lv(const struct volume_group *vg,
1006 			       const char *lv_name)
1007 {
1008 	struct lv_list *lvl = find_lv_in_vg(vg, lv_name);
1009 	return lvl ? lvl->lv : NULL;
1010 }
1011 
1012 struct physical_volume *find_pv(struct volume_group *vg, struct device *dev)
1013 {
1014 	struct pv_list *pvl;
1015 
1016 	dm_list_iterate_items(pvl, &vg->pvs)
1017 		if (dev == pvl->pv->dev)
1018 			return pvl->pv;
1019 
1020 	return NULL;
1021 }
1022 
1023 /* FIXME: liblvm todo - make into function that returns handle */
1024 struct physical_volume *find_pv_by_name(struct cmd_context *cmd,
1025 					const char *pv_name)
1026 {
1027 	return _find_pv_by_name(cmd, pv_name);
1028 }
1029 
1030 
1031 static struct physical_volume *_find_pv_by_name(struct cmd_context *cmd,
1032 			 			const char *pv_name)
1033 {
1034 	struct physical_volume *pv;
1035 
1036 	if (!(pv = _pv_read(cmd, pv_name, NULL, NULL, 1))) {
1037 		log_error("Physical volume %s not found", pv_name);
1038 		return NULL;
1039 	}
1040 
1041 	if (is_orphan_vg(pv->vg_name)) {
1042 		/* If a PV has no MDAs - need to search all VGs for it */
1043 		if (!scan_vgs_for_pvs(cmd))
1044 			return_NULL;
1045 		if (!(pv = _pv_read(cmd, pv_name, NULL, NULL, 1))) {
1046 			log_error("Physical volume %s not found", pv_name);
1047 			return NULL;
1048 		}
1049 	}
1050 
1051 	if (is_orphan_vg(pv->vg_name)) {
1052 		log_error("Physical volume %s not in a volume group", pv_name);
1053 		return NULL;
1054 	}
1055 
1056 	return pv;
1057 }
1058 
1059 /* Find segment at a given logical extent in an LV */
1060 struct lv_segment *find_seg_by_le(const struct logical_volume *lv, uint32_t le)
1061 {
1062 	struct lv_segment *seg;
1063 
1064 	dm_list_iterate_items(seg, &lv->segments)
1065 		if (le >= seg->le && le < seg->le + seg->len)
1066 			return seg;
1067 
1068 	return NULL;
1069 }
1070 
1071 struct lv_segment *first_seg(const struct logical_volume *lv)
1072 {
1073 	struct lv_segment *seg = NULL;
1074 
1075 	dm_list_iterate_items(seg, &lv->segments)
1076 		break;
1077 
1078 	return seg;
1079 }
1080 
1081 /* Find segment at a given physical extent in a PV */
1082 struct pv_segment *find_peg_by_pe(const struct physical_volume *pv, uint32_t pe)
1083 {
1084 	struct pv_segment *peg;
1085 
1086 	dm_list_iterate_items(peg, &pv->segments)
1087 		if (pe >= peg->pe && pe < peg->pe + peg->len)
1088 			return peg;
1089 
1090 	return NULL;
1091 }
1092 
1093 int vg_remove(struct volume_group *vg)
1094 {
1095 	struct metadata_area *mda;
1096 
1097 	/* FIXME Improve recovery situation? */
1098 	/* Remove each copy of the metadata */
1099 	dm_list_iterate_items(mda, &vg->fid->metadata_areas) {
1100 		if (mda->ops->vg_remove &&
1101 		    !mda->ops->vg_remove(vg->fid, vg, mda))
1102 			return_0;
1103 	}
1104 
1105 	return 1;
1106 }
1107 
1108 unsigned displayable_lvs_in_vg(const struct volume_group *vg)
1109 {
1110 	struct lv_list *lvl;
1111 	unsigned lv_count = 0;
1112 
1113 	dm_list_iterate_items(lvl, &vg->lvs)
1114 		if (lv_is_displayable(lvl->lv))
1115 			lv_count++;
1116 
1117 	return lv_count;
1118 }
1119 
1120 /*
1121  * Determine whether two vgs are compatible for merging.
1122  */
1123 int vgs_are_compatible(struct cmd_context *cmd __attribute((unused)),
1124 		       struct volume_group *vg_from,
1125 		       struct volume_group *vg_to)
1126 {
1127 	struct lv_list *lvl1, *lvl2;
1128 	struct pv_list *pvl;
1129 	char *name1, *name2;
1130 
1131 	if (lvs_in_vg_activated(vg_from)) {
1132 		log_error("Logical volumes in \"%s\" must be inactive",
1133 			  vg_from->name);
1134 		return 0;
1135 	}
1136 
1137 	/* Check compatibility */
1138 	if (vg_to->extent_size != vg_from->extent_size) {
1139 		log_error("Extent sizes differ: %d (%s) and %d (%s)",
1140 			  vg_to->extent_size, vg_to->name,
1141 			  vg_from->extent_size, vg_from->name);
1142 		return 0;
1143 	}
1144 
1145 	if (vg_to->max_pv &&
1146 	    (vg_to->max_pv < vg_to->pv_count + vg_from->pv_count)) {
1147 		log_error("Maximum number of physical volumes (%d) exceeded "
1148 			  " for \"%s\" and \"%s\"", vg_to->max_pv, vg_to->name,
1149 			  vg_from->name);
1150 		return 0;
1151 	}
1152 
1153 	if (vg_to->max_lv &&
1154 	    (vg_to->max_lv < vg_to->lv_count + vg_from->lv_count)) {
1155 		log_error("Maximum number of logical volumes (%d) exceeded "
1156 			  " for \"%s\" and \"%s\"", vg_to->max_lv, vg_to->name,
1157 			  vg_from->name);
1158 		return 0;
1159 	}
1160 
1161 	/* Metadata types must be the same */
1162 	if (vg_to->fid->fmt != vg_from->fid->fmt) {
1163 		log_error("Metadata types differ for \"%s\" and \"%s\"",
1164 			  vg_to->name, vg_from->name);
1165 		return 0;
1166 	}
1167 
1168 	/* Clustering attribute must be the same */
1169 	if (vg_is_clustered(vg_to) != vg_is_clustered(vg_from)) {
1170 		log_error("Clustered attribute differs for \"%s\" and \"%s\"",
1171 			  vg_to->name, vg_from->name);
1172 		return 0;
1173 	}
1174 
1175 	/* Check no conflicts with LV names */
1176 	dm_list_iterate_items(lvl1, &vg_to->lvs) {
1177 		name1 = lvl1->lv->name;
1178 
1179 		dm_list_iterate_items(lvl2, &vg_from->lvs) {
1180 			name2 = lvl2->lv->name;
1181 
1182 			if (!strcmp(name1, name2)) {
1183 				log_error("Duplicate logical volume "
1184 					  "name \"%s\" "
1185 					  "in \"%s\" and \"%s\"",
1186 					  name1, vg_to->name, vg_from->name);
1187 				return 0;
1188 			}
1189 		}
1190 	}
1191 
1192 	/* Check no PVs are constructed from either VG */
1193 	dm_list_iterate_items(pvl, &vg_to->pvs) {
1194 		if (pv_uses_vg(pvl->pv, vg_from)) {
1195 			log_error("Physical volume %s might be constructed "
1196 				  "from same volume group %s.",
1197 				  pv_dev_name(pvl->pv), vg_from->name);
1198 			return 0;
1199 		}
1200 	}
1201 
1202 	dm_list_iterate_items(pvl, &vg_from->pvs) {
1203 		if (pv_uses_vg(pvl->pv, vg_to)) {
1204 			log_error("Physical volume %s might be constructed "
1205 				  "from same volume group %s.",
1206 				  pv_dev_name(pvl->pv), vg_to->name);
1207 			return 0;
1208 		}
1209 	}
1210 
1211 	return 1;
1212 }
1213 
1214 struct _lv_postorder_baton {
1215 	int (*fn)(struct logical_volume *lv, void *data);
1216 	void *data;
1217 };
1218 
1219 static int _lv_postorder_visit(struct logical_volume *,
1220 			       int (*fn)(struct logical_volume *lv, void *data),
1221 			       void *data);
1222 
1223 static int _lv_postorder_level(struct logical_volume *lv, void *data)
1224 {
1225 	struct _lv_postorder_baton *baton = data;
1226 	if (lv->status & POSTORDER_OPEN_FLAG)
1227 		return 1; // a data structure loop has closed...
1228 	lv->status |= POSTORDER_OPEN_FLAG;
1229 	int r =_lv_postorder_visit(lv, baton->fn, baton->data);
1230 	lv->status &= ~POSTORDER_OPEN_FLAG;
1231 	lv->status |= POSTORDER_FLAG;
1232 	return r;
1233 };
1234 
1235 static int _lv_each_dependency(struct logical_volume *lv,
1236 			       int (*fn)(struct logical_volume *lv, void *data),
1237 			       void *data)
1238 {
1239 	int i, s;
1240 	struct lv_segment *lvseg;
1241 
1242 	struct logical_volume *deps[] = {
1243 		lv->snapshot ? lv->snapshot->origin : 0,
1244 		lv->snapshot ? lv->snapshot->cow : 0 };
1245 	for (i = 0; i < sizeof(deps) / sizeof(*deps); ++i) {
1246 		if (deps[i] && !fn(deps[i], data))
1247 			return_0;
1248 	}
1249 
1250 	dm_list_iterate_items(lvseg, &lv->segments) {
1251 		if (lvseg->log_lv && !fn(lvseg->log_lv, data))
1252 			return_0;
1253 		for (s = 0; s < lvseg->area_count; ++s) {
1254 			if (seg_type(lvseg, s) == AREA_LV && !fn(seg_lv(lvseg,s), data))
1255 				return_0;
1256 		}
1257 	}
1258 	return 1;
1259 }
1260 
1261 static int _lv_postorder_cleanup(struct logical_volume *lv, void *data)
1262 {
1263 	if (!(lv->status & POSTORDER_FLAG))
1264 		return 1;
1265 	lv->status &= ~POSTORDER_FLAG;
1266 
1267 	if (!_lv_each_dependency(lv, _lv_postorder_cleanup, data))
1268 		return_0;
1269 	return 1;
1270 }
1271 
1272 static int _lv_postorder_visit(struct logical_volume *lv,
1273 			       int (*fn)(struct logical_volume *lv, void *data),
1274 			       void *data)
1275 {
1276 	struct _lv_postorder_baton baton;
1277 	int r;
1278 
1279 	if (lv->status & POSTORDER_FLAG)
1280 		return 1;
1281 
1282 	baton.fn = fn;
1283 	baton.data = data;
1284 	r = _lv_each_dependency(lv, _lv_postorder_level, &baton);
1285 	if (r) {
1286 		r = fn(lv, data);
1287 		log_verbose("visited %s", lv->name);
1288 	}
1289 	return r;
1290 }
1291 
1292 /*
1293  * This will walk the LV dependency graph in depth-first order and in the
1294  * postorder, call a callback function "fn". The void *data is passed along all
1295  * the calls. The callback may return zero to indicate an error and terminate
1296  * the depth-first walk. The error is propagated to return value of
1297  * _lv_postorder.
1298  */
1299 static int _lv_postorder(struct logical_volume *lv,
1300 			       int (*fn)(struct logical_volume *lv, void *data),
1301 			       void *data)
1302 {
1303 	int r;
1304 	r = _lv_postorder_visit(lv, fn, data);
1305 	_lv_postorder_cleanup(lv, 0);
1306 	return r;
1307 }
1308 
1309 struct _lv_mark_if_partial_baton {
1310 	int partial;
1311 };
1312 
1313 static int _lv_mark_if_partial_collect(struct logical_volume *lv, void *data)
1314 {
1315 	struct _lv_mark_if_partial_baton *baton = data;
1316 	if (lv->status & PARTIAL_LV)
1317 		baton->partial = 1;
1318 
1319 	return 1;
1320 }
1321 
1322 static int _lv_mark_if_partial_single(struct logical_volume *lv, void *data)
1323 {
1324 	int s;
1325 	struct _lv_mark_if_partial_baton baton;
1326 	struct lv_segment *lvseg;
1327 
1328 	dm_list_iterate_items(lvseg, &lv->segments) {
1329 		for (s = 0; s < lvseg->area_count; ++s) {
1330 			if (seg_type(lvseg, s) == AREA_PV) {
1331 				if (seg_pv(lvseg, s)->status & MISSING_PV)
1332 					lv->status |= PARTIAL_LV;
1333 			}
1334 		}
1335 	}
1336 
1337 	baton.partial = 0;
1338 	_lv_each_dependency(lv, _lv_mark_if_partial_collect, &baton);
1339 
1340 	if (baton.partial)
1341 		lv->status |= PARTIAL_LV;
1342 
1343 	return 1;
1344 }
1345 
1346 static int _lv_mark_if_partial(struct logical_volume *lv)
1347 {
1348 	return _lv_postorder(lv, _lv_mark_if_partial_single, NULL);
1349 }
1350 
1351 /*
1352  * Mark LVs with missing PVs using PARTIAL_LV status flag. The flag is
1353  * propagated transitively, so LVs referencing other LVs are marked
1354  * partial as well, if any of their referenced LVs are marked partial.
1355  */
1356 static int _vg_mark_partial_lvs(struct volume_group *vg)
1357 {
1358 	struct logical_volume *lv;
1359 	struct lv_list *lvl;
1360 
1361 	dm_list_iterate_items(lvl, &vg->lvs) {
1362 		lv = lvl->lv;
1363 		if (!_lv_mark_if_partial(lv))
1364 			return_0;
1365 	}
1366 	return 1;
1367 }
1368 
1369 int vg_validate(struct volume_group *vg)
1370 {
1371 	struct pv_list *pvl, *pvl2;
1372 	struct lv_list *lvl, *lvl2;
1373 	char uuid[64] __attribute((aligned(8)));
1374 	int r = 1;
1375 	uint32_t lv_count;
1376 
1377 	/* FIXME Also check there's no data/metadata overlap */
1378 
1379 	dm_list_iterate_items(pvl, &vg->pvs) {
1380 		dm_list_iterate_items(pvl2, &vg->pvs) {
1381 			if (pvl == pvl2)
1382 				break;
1383 			if (id_equal(&pvl->pv->id,
1384 				     &pvl2->pv->id)) {
1385 				if (!id_write_format(&pvl->pv->id, uuid,
1386 						     sizeof(uuid)))
1387 					 stack;
1388 				log_error("Internal error: Duplicate PV id "
1389 					  "%s detected for %s in %s.",
1390 					  uuid, pv_dev_name(pvl->pv),
1391 					  vg->name);
1392 				r = 0;
1393 			}
1394 		}
1395 
1396 		if (strcmp(pvl->pv->vg_name, vg->name)) {
1397 			log_error("Internal error: VG name for PV %s is corrupted",
1398 				  pv_dev_name(pvl->pv));
1399 			r = 0;
1400 		}
1401 	}
1402 
1403 	if (!check_pv_segments(vg)) {
1404 		log_error("Internal error: PV segments corrupted in %s.",
1405 			  vg->name);
1406 		r = 0;
1407 	}
1408 
1409 	if ((lv_count = (uint32_t) dm_list_size(&vg->lvs)) !=
1410 	    vg->lv_count + 2 * vg->snapshot_count) {
1411 		log_error("Internal error: #internal LVs (%u) != #LVs (%"
1412 			  PRIu32 ") + 2 * #snapshots (%" PRIu32 ") in VG %s",
1413 			  dm_list_size(&vg->lvs), vg->lv_count,
1414 			  vg->snapshot_count, vg->name);
1415 		r = 0;
1416 	}
1417 
1418 	dm_list_iterate_items(lvl, &vg->lvs) {
1419 		dm_list_iterate_items(lvl2, &vg->lvs) {
1420 			if (lvl == lvl2)
1421 				break;
1422 			if (!strcmp(lvl->lv->name, lvl2->lv->name)) {
1423 				log_error("Internal error: Duplicate LV name "
1424 					  "%s detected in %s.", lvl->lv->name,
1425 					  vg->name);
1426 				r = 0;
1427 			}
1428 			if (id_equal(&lvl->lv->lvid.id[1],
1429 				     &lvl2->lv->lvid.id[1])) {
1430 				if (!id_write_format(&lvl->lv->lvid.id[1], uuid,
1431 						     sizeof(uuid)))
1432 					 stack;
1433 				log_error("Internal error: Duplicate LV id "
1434 					  "%s detected for %s and %s in %s.",
1435 					  uuid, lvl->lv->name, lvl2->lv->name,
1436 					  vg->name);
1437 				r = 0;
1438 			}
1439 		}
1440 	}
1441 
1442 	dm_list_iterate_items(lvl, &vg->lvs) {
1443 		if (!check_lv_segments(lvl->lv, 1)) {
1444 			log_error("Internal error: LV segments corrupted in %s.",
1445 				  lvl->lv->name);
1446 			r = 0;
1447 		}
1448 	}
1449 
1450 	if (!(vg->fid->fmt->features & FMT_UNLIMITED_VOLS) &&
1451 	    (!vg->max_lv || !vg->max_pv)) {
1452 		log_error("Internal error: Volume group %s has limited PV/LV count"
1453 			  " but limit is not set.", vg->name);
1454 		r = 0;
1455 	}
1456 
1457 	return r;
1458 }
1459 
1460 /*
1461  * After vg_write() returns success,
1462  * caller MUST call either vg_commit() or vg_revert()
1463  */
1464 int vg_write(struct volume_group *vg)
1465 {
1466 	struct dm_list *mdah;
1467 	struct metadata_area *mda;
1468 
1469 	if (!vg_validate(vg))
1470 		return_0;
1471 
1472 	if (vg->status & PARTIAL_VG) {
1473 		log_error("Cannot update partial volume group %s.", vg->name);
1474 		return 0;
1475 	}
1476 
1477 	if (vg_missing_pv_count(vg) && !vg->cmd->handles_missing_pvs) {
1478 		log_error("Cannot update volume group %s while physical "
1479 			  "volumes are missing.", vg->name);
1480 		return 0;
1481 	}
1482 
1483 	if (dm_list_empty(&vg->fid->metadata_areas)) {
1484 		log_error("Aborting vg_write: No metadata areas to write to!");
1485 		return 0;
1486 	}
1487 
1488 	if (!drop_cached_metadata(vg)) {
1489 		log_error("Unable to drop cached metadata for VG %s.", vg->name);
1490 		return 0;
1491 	}
1492 
1493 	vg->seqno++;
1494 
1495 	/* Write to each copy of the metadata area */
1496 	dm_list_iterate_items(mda, &vg->fid->metadata_areas) {
1497 		if (!mda->ops->vg_write) {
1498 			log_error("Format does not support writing volume"
1499 				  "group metadata areas");
1500 			/* Revert */
1501 			dm_list_uniterate(mdah, &vg->fid->metadata_areas, &mda->list) {
1502 				mda = dm_list_item(mdah, struct metadata_area);
1503 
1504 				if (mda->ops->vg_revert &&
1505 				    !mda->ops->vg_revert(vg->fid, vg, mda)) {
1506 					stack;
1507 				}
1508 			}
1509 			return 0;
1510 		}
1511 		if (!mda->ops->vg_write(vg->fid, vg, mda)) {
1512 			stack;
1513 			/* Revert */
1514 			dm_list_uniterate(mdah, &vg->fid->metadata_areas, &mda->list) {
1515 				mda = dm_list_item(mdah, struct metadata_area);
1516 
1517 				if (mda->ops->vg_revert &&
1518 				    !mda->ops->vg_revert(vg->fid, vg, mda)) {
1519 					stack;
1520 				}
1521 			}
1522 			return 0;
1523 		}
1524 	}
1525 
1526 	/* Now pre-commit each copy of the new metadata */
1527 	dm_list_iterate_items(mda, &vg->fid->metadata_areas) {
1528 		if (mda->ops->vg_precommit &&
1529 		    !mda->ops->vg_precommit(vg->fid, vg, mda)) {
1530 			stack;
1531 			/* Revert */
1532 			dm_list_iterate_items(mda, &vg->fid->metadata_areas) {
1533 				if (mda->ops->vg_revert &&
1534 				    !mda->ops->vg_revert(vg->fid, vg, mda)) {
1535 					stack;
1536 				}
1537 			}
1538 			return 0;
1539 		}
1540 	}
1541 
1542 	return 1;
1543 }
1544 
1545 /* Commit pending changes */
1546 int vg_commit(struct volume_group *vg)
1547 {
1548 	struct metadata_area *mda;
1549 	int cache_updated = 0;
1550 	int failed = 0;
1551 
1552 	if (!vgname_is_locked(vg->name)) {
1553 		log_error("Internal error: Attempt to write new VG metadata "
1554 			  "without locking %s", vg->name);
1555 		return cache_updated;
1556 	}
1557 
1558 	/* Commit to each copy of the metadata area */
1559 	dm_list_iterate_items(mda, &vg->fid->metadata_areas) {
1560 		failed = 0;
1561 		if (mda->ops->vg_commit &&
1562 		    !mda->ops->vg_commit(vg->fid, vg, mda)) {
1563 			stack;
1564 			failed = 1;
1565 		}
1566 		/* Update cache first time we succeed */
1567 		if (!failed && !cache_updated) {
1568 			lvmcache_update_vg(vg, 0);
1569 			cache_updated = 1;
1570 		}
1571 	}
1572 
1573 	/* If update failed, remove any cached precommitted metadata. */
1574 	if (!cache_updated && !drop_cached_metadata(vg))
1575 		log_error("Attempt to drop cached metadata failed "
1576 			  "after commit for VG %s.", vg->name);
1577 
1578 	/* If at least one mda commit succeeded, it was committed */
1579 	return cache_updated;
1580 }
1581 
1582 /* Don't commit any pending changes */
1583 int vg_revert(struct volume_group *vg)
1584 {
1585 	struct metadata_area *mda;
1586 
1587 	dm_list_iterate_items(mda, &vg->fid->metadata_areas) {
1588 		if (mda->ops->vg_revert &&
1589 		    !mda->ops->vg_revert(vg->fid, vg, mda)) {
1590 			stack;
1591 		}
1592 	}
1593 
1594 	if (!drop_cached_metadata(vg))
1595 		log_error("Attempt to drop cached metadata failed "
1596 			  "after reverted update for VG %s.", vg->name);
1597 
1598 	return 1;
1599 }
1600 
1601 /* Make orphan PVs look like a VG */
1602 static struct volume_group *_vg_read_orphans(struct cmd_context *cmd,
1603 					     const char *orphan_vgname)
1604 {
1605 	struct lvmcache_vginfo *vginfo;
1606 	struct lvmcache_info *info;
1607 	struct pv_list *pvl;
1608 	struct volume_group *vg;
1609 	struct physical_volume *pv;
1610 
1611 	lvmcache_label_scan(cmd, 0);
1612 
1613 	if (!(vginfo = vginfo_from_vgname(orphan_vgname, NULL)))
1614 		return_NULL;
1615 
1616 	if (!(vg = dm_pool_zalloc(cmd->mem, sizeof(*vg)))) {
1617 		log_error("vg allocation failed");
1618 		return NULL;
1619 	}
1620 	dm_list_init(&vg->pvs);
1621 	dm_list_init(&vg->lvs);
1622 	dm_list_init(&vg->tags);
1623 	vg->cmd = cmd;
1624 	if (!(vg->name = dm_pool_strdup(cmd->mem, orphan_vgname))) {
1625 		log_error("vg name allocation failed");
1626 		return NULL;
1627 	}
1628 
1629 	/* create format instance with appropriate metadata area */
1630 	if (!(vg->fid = vginfo->fmt->ops->create_instance(vginfo->fmt,
1631 							  orphan_vgname, NULL,
1632 							  NULL))) {
1633 		log_error("Failed to create format instance");
1634 		dm_pool_free(cmd->mem, vg);
1635 		return NULL;
1636 	}
1637 
1638 	dm_list_iterate_items(info, &vginfo->infos) {
1639 		if (!(pv = _pv_read(cmd, dev_name(info->dev), NULL, NULL, 1))) {
1640 			continue;
1641 		}
1642 		if (!(pvl = dm_pool_zalloc(cmd->mem, sizeof(*pvl)))) {
1643 			log_error("pv_list allocation failed");
1644 			return NULL;
1645 		}
1646 		pvl->pv = pv;
1647 		dm_list_add(&vg->pvs, &pvl->list);
1648 		vg->pv_count++;
1649 	}
1650 
1651 	return vg;
1652 }
1653 
1654 static int _update_pv_list(struct dm_list *all_pvs, struct volume_group *vg)
1655 {
1656 	struct pv_list *pvl, *pvl2;
1657 
1658 	dm_list_iterate_items(pvl, &vg->pvs) {
1659 		dm_list_iterate_items(pvl2, all_pvs) {
1660 			if (pvl->pv->dev == pvl2->pv->dev)
1661 				goto next_pv;
1662 		}
1663 		/* PV is not on list so add it.  Note that we don't copy it. */
1664        		if (!(pvl2 = dm_pool_zalloc(vg->cmd->mem, sizeof(*pvl2)))) {
1665 			log_error("pv_list allocation for '%s' failed",
1666 				  pv_dev_name(pvl->pv));
1667 			return 0;
1668 		}
1669 		pvl2->pv = pvl->pv;
1670 		dm_list_add(all_pvs, &pvl2->list);
1671   next_pv:
1672 		;
1673 	}
1674 
1675 	return 1;
1676 }
1677 
1678 int vg_missing_pv_count(const vg_t *vg)
1679 {
1680 	int ret = 0;
1681 	struct pv_list *pvl;
1682 	dm_list_iterate_items(pvl, &vg->pvs) {
1683 		if (pvl->pv->status & MISSING_PV)
1684 			++ ret;
1685 	}
1686 	return ret;
1687 }
1688 
1689 /* Caller sets consistent to 1 if it's safe for vg_read to correct
1690  * inconsistent metadata on disk (i.e. the VG write lock is held).
1691  * This guarantees only consistent metadata is returned.
1692  * If consistent is 0, caller must check whether consistent == 1 on return
1693  * and take appropriate action if it isn't (e.g. abort; get write lock
1694  * and call vg_read again).
1695  *
1696  * If precommitted is set, use precommitted metadata if present.
1697  *
1698  * Either of vgname or vgid may be NULL.
1699  */
1700 static struct volume_group *_vg_read(struct cmd_context *cmd,
1701 				     const char *vgname,
1702 				     const char *vgid,
1703 				     int *consistent, unsigned precommitted)
1704 {
1705 	struct format_instance *fid;
1706 	const struct format_type *fmt;
1707 	struct volume_group *vg, *correct_vg = NULL;
1708 	struct metadata_area *mda;
1709 	struct lvmcache_info *info;
1710 	int inconsistent = 0;
1711 	int inconsistent_vgid = 0;
1712 	int inconsistent_pvs = 0;
1713 	unsigned use_precommitted = precommitted;
1714 	struct dm_list *pvids;
1715 	struct pv_list *pvl, *pvl2;
1716 	struct dm_list all_pvs;
1717 	char uuid[64] __attribute((aligned(8)));
1718 
1719 	if (is_orphan_vg(vgname)) {
1720 		if (use_precommitted) {
1721 			log_error("Internal error: vg_read requires vgname "
1722 				  "with pre-commit.");
1723 			return NULL;
1724 		}
1725 		*consistent = 1;
1726 		return _vg_read_orphans(cmd, vgname);
1727 	}
1728 
1729 	if ((correct_vg = lvmcache_get_vg(vgid, precommitted))) {
1730 		if (vg_missing_pv_count(correct_vg)) {
1731 			log_verbose("There are %d physical volumes missing.",
1732 				    vg_missing_pv_count(correct_vg));
1733 			_vg_mark_partial_lvs(correct_vg);
1734 		}
1735 		*consistent = 1;
1736 		return correct_vg;
1737 	}
1738 
1739 	/* Find the vgname in the cache */
1740 	/* If it's not there we must do full scan to be completely sure */
1741 	if (!(fmt = fmt_from_vgname(vgname, vgid))) {
1742 		lvmcache_label_scan(cmd, 0);
1743 		if (!(fmt = fmt_from_vgname(vgname, vgid))) {
1744 			if (memlock())
1745 				return_NULL;
1746 			lvmcache_label_scan(cmd, 2);
1747 			if (!(fmt = fmt_from_vgname(vgname, vgid)))
1748 				return_NULL;
1749 		}
1750 	}
1751 
1752 	/* Now determine the correct vgname if none was supplied */
1753 	if (!vgname && !(vgname = vgname_from_vgid(cmd->mem, vgid)))
1754 		return_NULL;
1755 
1756 	if (use_precommitted && !(fmt->features & FMT_PRECOMMIT))
1757 		use_precommitted = 0;
1758 
1759 	/* create format instance with appropriate metadata area */
1760 	if (!(fid = fmt->ops->create_instance(fmt, vgname, vgid, NULL))) {
1761 		log_error("Failed to create format instance");
1762 		return NULL;
1763 	}
1764 
1765 	/* Store pvids for later so we can check if any are missing */
1766 	if (!(pvids = lvmcache_get_pvids(cmd, vgname, vgid)))
1767 		return_NULL;
1768 
1769 	/* Ensure contents of all metadata areas match - else do recovery */
1770 	dm_list_iterate_items(mda, &fid->metadata_areas) {
1771 		if ((use_precommitted &&
1772 		     !(vg = mda->ops->vg_read_precommit(fid, vgname, mda))) ||
1773 		    (!use_precommitted &&
1774 		     !(vg = mda->ops->vg_read(fid, vgname, mda)))) {
1775 			inconsistent = 1;
1776 			continue;
1777 		}
1778 		if (!correct_vg) {
1779 			correct_vg = vg;
1780 			continue;
1781 		}
1782 		/* FIXME Also ensure contents same - checksum compare? */
1783 		if (correct_vg->seqno != vg->seqno) {
1784 			inconsistent = 1;
1785 			if (vg->seqno > correct_vg->seqno)
1786 				correct_vg = vg;
1787 		}
1788 	}
1789 
1790 	/* Ensure every PV in the VG was in the cache */
1791 	if (correct_vg) {
1792 		/*
1793 		 * If the VG has PVs without mdas, they may still be
1794 		 * orphans in the cache: update the cache state here.
1795 		 */
1796 		if (!inconsistent &&
1797 		    dm_list_size(&correct_vg->pvs) > dm_list_size(pvids)) {
1798 			dm_list_iterate_items(pvl, &correct_vg->pvs) {
1799 				if (!pvl->pv->dev) {
1800 					inconsistent_pvs = 1;
1801 					break;
1802 				}
1803 
1804 				if (str_list_match_item(pvids, pvl->pv->dev->pvid))
1805 					continue;
1806 
1807 				/*
1808 				 * PV not marked as belonging to this VG in cache.
1809 				 * Check it's an orphan without metadata area.
1810 				 */
1811 				if (!(info = info_from_pvid(pvl->pv->dev->pvid, 1)) ||
1812 				   !info->vginfo || !is_orphan_vg(info->vginfo->vgname) ||
1813 				   dm_list_size(&info->mdas)) {
1814 					inconsistent_pvs = 1;
1815 					break;
1816 				}
1817 			}
1818 
1819 			/* If the check passed, let's update VG and recalculate pvids */
1820 			if (!inconsistent_pvs) {
1821 				log_debug("Updating cache for PVs without mdas "
1822 					  "in VG %s.", vgname);
1823 				lvmcache_update_vg(correct_vg, use_precommitted);
1824 
1825 				if (!(pvids = lvmcache_get_pvids(cmd, vgname, vgid)))
1826 					return_NULL;
1827 			}
1828 		}
1829 
1830 		if (dm_list_size(&correct_vg->pvs) != dm_list_size(pvids)
1831 		    + vg_missing_pv_count(correct_vg)) {
1832 			log_debug("Cached VG %s had incorrect PV list",
1833 				  vgname);
1834 
1835 			if (memlock())
1836 				inconsistent = 1;
1837 			else
1838 				correct_vg = NULL;
1839 		} else dm_list_iterate_items(pvl, &correct_vg->pvs) {
1840 			if (pvl->pv->status & MISSING_PV)
1841 				continue;
1842 			if (!str_list_match_item(pvids, pvl->pv->dev->pvid)) {
1843 				log_debug("Cached VG %s had incorrect PV list",
1844 					  vgname);
1845 				correct_vg = NULL;
1846 				break;
1847 			}
1848 		}
1849 	}
1850 
1851 	dm_list_init(&all_pvs);
1852 
1853 	/* Failed to find VG where we expected it - full scan and retry */
1854 	if (!correct_vg) {
1855 		inconsistent = 0;
1856 
1857 		if (memlock())
1858 			return_NULL;
1859 		lvmcache_label_scan(cmd, 2);
1860 		if (!(fmt = fmt_from_vgname(vgname, vgid)))
1861 			return_NULL;
1862 
1863 		if (precommitted && !(fmt->features & FMT_PRECOMMIT))
1864 			use_precommitted = 0;
1865 
1866 		/* create format instance with appropriate metadata area */
1867 		if (!(fid = fmt->ops->create_instance(fmt, vgname, vgid, NULL))) {
1868 			log_error("Failed to create format instance");
1869 			return NULL;
1870 		}
1871 
1872 		/* Ensure contents of all metadata areas match - else recover */
1873 		dm_list_iterate_items(mda, &fid->metadata_areas) {
1874 			if ((use_precommitted &&
1875 			     !(vg = mda->ops->vg_read_precommit(fid, vgname,
1876 								mda))) ||
1877 			    (!use_precommitted &&
1878 			     !(vg = mda->ops->vg_read(fid, vgname, mda)))) {
1879 				inconsistent = 1;
1880 				continue;
1881 			}
1882 			if (!correct_vg) {
1883 				correct_vg = vg;
1884 				if (!_update_pv_list(&all_pvs, correct_vg))
1885 					return_NULL;
1886 				continue;
1887 			}
1888 
1889 			if (strncmp((char *)vg->id.uuid,
1890 			    (char *)correct_vg->id.uuid, ID_LEN)) {
1891 				inconsistent = 1;
1892 				inconsistent_vgid = 1;
1893 			}
1894 
1895 			/* FIXME Also ensure contents same - checksums same? */
1896 			if (correct_vg->seqno != vg->seqno) {
1897 				inconsistent = 1;
1898 				if (!_update_pv_list(&all_pvs, vg))
1899 					return_NULL;
1900 				if (vg->seqno > correct_vg->seqno)
1901 					correct_vg = vg;
1902 			}
1903 		}
1904 
1905 		/* Give up looking */
1906 		if (!correct_vg)
1907 			return_NULL;
1908 	}
1909 
1910 	lvmcache_update_vg(correct_vg, use_precommitted);
1911 
1912 	if (inconsistent) {
1913 		/* FIXME Test should be if we're *using* precommitted metadata not if we were searching for it */
1914 		if (use_precommitted) {
1915 			log_error("Inconsistent pre-commit metadata copies "
1916 				  "for volume group %s", vgname);
1917 			return NULL;
1918 		}
1919 
1920 		if (!*consistent)
1921 			return correct_vg;
1922 
1923 		/* Don't touch if vgids didn't match */
1924 		if (inconsistent_vgid) {
1925 			log_error("Inconsistent metadata UUIDs found for "
1926 				  "volume group %s", vgname);
1927 			*consistent = 0;
1928 			return correct_vg;
1929 		}
1930 
1931 		log_warn("WARNING: Inconsistent metadata found for VG %s - updating "
1932 			 "to use version %u", vgname, correct_vg->seqno);
1933 
1934 		if (!vg_write(correct_vg)) {
1935 			log_error("Automatic metadata correction failed");
1936 			return NULL;
1937 		}
1938 
1939 		if (!vg_commit(correct_vg)) {
1940 			log_error("Automatic metadata correction commit "
1941 				  "failed");
1942 			return NULL;
1943 		}
1944 
1945 		dm_list_iterate_items(pvl, &all_pvs) {
1946 			dm_list_iterate_items(pvl2, &correct_vg->pvs) {
1947 				if (pvl->pv->dev == pvl2->pv->dev)
1948 					goto next_pv;
1949 			}
1950 			if (!id_write_format(&pvl->pv->id, uuid, sizeof(uuid)))
1951 				return_NULL;
1952 			log_error("Removing PV %s (%s) that no longer belongs to VG %s",
1953 				  pv_dev_name(pvl->pv), uuid, correct_vg->name);
1954 			if (!pv_write_orphan(cmd, pvl->pv))
1955 				return_NULL;
1956       next_pv:
1957 			;
1958 		}
1959 	}
1960 
1961 	if (vg_missing_pv_count(correct_vg)) {
1962 		log_verbose("There are %d physical volumes missing.",
1963 			    vg_missing_pv_count(correct_vg));
1964 		_vg_mark_partial_lvs(correct_vg);
1965 	}
1966 
1967 	if ((correct_vg->status & PVMOVE) && !pvmove_mode()) {
1968 		log_error("WARNING: Interrupted pvmove detected in "
1969 			  "volume group %s", correct_vg->name);
1970 		log_error("Please restore the metadata by running "
1971 			  "vgcfgrestore.");
1972 		return NULL;
1973 	}
1974 
1975 	*consistent = 1;
1976 	return correct_vg;
1977 }
1978 
1979 struct volume_group *vg_read(struct cmd_context *cmd, const char *vgname,
1980 			     const char *vgid, int *consistent)
1981 {
1982 	struct volume_group *vg;
1983 	struct lv_list *lvl;
1984 
1985 	if (!(vg = _vg_read(cmd, vgname, vgid, consistent, 0)))
1986 		return NULL;
1987 
1988 	if (!check_pv_segments(vg)) {
1989 		log_error("Internal error: PV segments corrupted in %s.",
1990 			  vg->name);
1991 		return NULL;
1992 	}
1993 
1994 	dm_list_iterate_items(lvl, &vg->lvs) {
1995 		if (!check_lv_segments(lvl->lv, 1)) {
1996 			log_error("Internal error: LV segments corrupted in %s.",
1997 				  lvl->lv->name);
1998 			return NULL;
1999 		}
2000 	}
2001 
2002 	return vg;
2003 }
2004 
2005 /* This is only called by lv_from_lvid, which is only called from
2006  * activate.c so we know the appropriate VG lock is already held and
2007  * the vg_read is therefore safe.
2008  */
2009 static struct volume_group *_vg_read_by_vgid(struct cmd_context *cmd,
2010 					    const char *vgid,
2011 					    unsigned precommitted)
2012 {
2013 	const char *vgname;
2014 	struct dm_list *vgnames;
2015 	struct volume_group *vg;
2016 	struct lvmcache_vginfo *vginfo;
2017 	struct str_list *strl;
2018 	int consistent = 0;
2019 
2020 	/* Is corresponding vgname already cached? */
2021 	if ((vginfo = vginfo_from_vgid(vgid)) &&
2022 	    vginfo->vgname && !is_orphan_vg(vginfo->vgname)) {
2023 		if ((vg = _vg_read(cmd, NULL, vgid,
2024 				   &consistent, precommitted)) &&
2025 		    !strncmp((char *)vg->id.uuid, vgid, ID_LEN)) {
2026 
2027 			if (!consistent) {
2028 				log_error("Volume group %s metadata is "
2029 					  "inconsistent", vg->name);
2030 			}
2031 			return vg;
2032 		}
2033 	}
2034 
2035 	/* Mustn't scan if memory locked: ensure cache gets pre-populated! */
2036 	if (memlock())
2037 		return NULL;
2038 
2039 	/* FIXME Need a genuine read by ID here - don't vg_read by name! */
2040 	/* FIXME Disabled vgrenames while active for now because we aren't
2041 	 *       allowed to do a full scan here any more. */
2042 
2043 	// The slow way - full scan required to cope with vgrename
2044 	if (!(vgnames = get_vgs(cmd, 2))) {
2045 		log_error("vg_read_by_vgid: get_vgs failed");
2046 		return NULL;
2047 	}
2048 
2049 	dm_list_iterate_items(strl, vgnames) {
2050 		vgname = strl->str;
2051 		if (!vgname || is_orphan_vg(vgname))
2052 			continue;	// FIXME Unnecessary?
2053 		consistent = 0;
2054 		if ((vg = _vg_read(cmd, vgname, vgid, &consistent,
2055 				   precommitted)) &&
2056 		    !strncmp((char *)vg->id.uuid, vgid, ID_LEN)) {
2057 
2058 			if (!consistent) {
2059 				log_error("Volume group %s metadata is "
2060 					  "inconsistent", vgname);
2061 				return NULL;
2062 			}
2063 			return vg;
2064 		}
2065 	}
2066 
2067 	return NULL;
2068 }
2069 
2070 /* Only called by activate.c */
2071 struct logical_volume *lv_from_lvid(struct cmd_context *cmd, const char *lvid_s,
2072 				    unsigned precommitted)
2073 {
2074 	struct lv_list *lvl;
2075 	struct volume_group *vg;
2076 	const union lvid *lvid;
2077 
2078 	lvid = (const union lvid *) lvid_s;
2079 
2080 	log_very_verbose("Finding volume group for uuid %s", lvid_s);
2081 	if (!(vg = _vg_read_by_vgid(cmd, (char *)lvid->id[0].uuid, precommitted))) {
2082 		log_error("Volume group for uuid not found: %s", lvid_s);
2083 		return NULL;
2084 	}
2085 
2086 	log_verbose("Found volume group \"%s\"", vg->name);
2087 	if (vg->status & EXPORTED_VG) {
2088 		log_error("Volume group \"%s\" is exported", vg->name);
2089 		return NULL;
2090 	}
2091 	if (!(lvl = find_lv_in_vg_by_lvid(vg, lvid))) {
2092 		log_very_verbose("Can't find logical volume id %s", lvid_s);
2093 		return NULL;
2094 	}
2095 
2096 	return lvl->lv;
2097 }
2098 
2099 /**
2100  * pv_read - read and return a handle to a physical volume
2101  * @cmd: LVM command initiating the pv_read
2102  * @pv_name: full device name of the PV, including the path
2103  * @mdas: list of metadata areas of the PV
2104  * @label_sector: sector number where the PV label is stored on @pv_name
2105  * @warnings:
2106  *
2107  * Returns:
2108  *   PV handle - valid pv_name and successful read of the PV, or
2109  *   NULL - invalid parameter or error in reading the PV
2110  *
2111  * Note:
2112  *   FIXME - liblvm todo - make into function that returns handle
2113  */
2114 struct physical_volume *pv_read(struct cmd_context *cmd, const char *pv_name,
2115 				struct dm_list *mdas, uint64_t *label_sector,
2116 				int warnings)
2117 {
2118 	return _pv_read(cmd, pv_name, mdas, label_sector, warnings);
2119 }
2120 
2121 /* FIXME Use label functions instead of PV functions */
2122 static struct physical_volume *_pv_read(struct cmd_context *cmd,
2123 					const char *pv_name,
2124 					struct dm_list *mdas,
2125 					uint64_t *label_sector,
2126 					int warnings)
2127 {
2128 	struct physical_volume *pv;
2129 	struct label *label;
2130 	struct lvmcache_info *info;
2131 	struct device *dev;
2132 
2133 	if (!(dev = dev_cache_get(pv_name, cmd->filter)))
2134 		return_NULL;
2135 
2136 	if (!(label_read(dev, &label, UINT64_C(0)))) {
2137 		if (warnings)
2138 			log_error("No physical volume label read from %s",
2139 				  pv_name);
2140 		return NULL;
2141 	}
2142 
2143 	info = (struct lvmcache_info *) label->info;
2144 	if (label_sector && *label_sector)
2145 		*label_sector = label->sector;
2146 
2147 	if (!(pv = dm_pool_zalloc(cmd->mem, sizeof(*pv)))) {
2148 		log_error("pv allocation for '%s' failed", pv_name);
2149 		return NULL;
2150 	}
2151 
2152 	dm_list_init(&pv->tags);
2153 	dm_list_init(&pv->segments);
2154 
2155 	/* FIXME Move more common code up here */
2156 	if (!(info->fmt->ops->pv_read(info->fmt, pv_name, pv, mdas))) {
2157 		log_error("Failed to read existing physical volume '%s'",
2158 			  pv_name);
2159 		return NULL;
2160 	}
2161 
2162 	if (!pv->size)
2163 		return NULL;
2164 
2165 	if (!alloc_pv_segment_whole_pv(cmd->mem, pv))
2166 		return_NULL;
2167 
2168 	return pv;
2169 }
2170 
2171 /* May return empty list */
2172 struct dm_list *get_vgs(struct cmd_context *cmd, int full_scan)
2173 {
2174 	return lvmcache_get_vgnames(cmd, full_scan);
2175 }
2176 
2177 struct dm_list *get_vgids(struct cmd_context *cmd, int full_scan)
2178 {
2179 	return lvmcache_get_vgids(cmd, full_scan);
2180 }
2181 
2182 static int _get_pvs(struct cmd_context *cmd, struct dm_list **pvslist)
2183 {
2184 	struct str_list *strl;
2185 	struct dm_list * uninitialized_var(results);
2186 	const char *vgname, *vgid;
2187 	struct dm_list *pvh, *tmp;
2188 	struct dm_list *vgids;
2189 	struct volume_group *vg;
2190 	int consistent = 0;
2191 	int old_pvmove;
2192 
2193 	lvmcache_label_scan(cmd, 0);
2194 
2195 	if (pvslist) {
2196 		if (!(results = dm_pool_alloc(cmd->mem, sizeof(*results)))) {
2197 			log_error("PV list allocation failed");
2198 			return 0;
2199 		}
2200 
2201 		dm_list_init(results);
2202 	}
2203 
2204 	/* Get list of VGs */
2205 	if (!(vgids = get_vgids(cmd, 0))) {
2206 		log_error("get_pvs: get_vgs failed");
2207 		return 0;
2208 	}
2209 
2210 	/* Read every VG to ensure cache consistency */
2211 	/* Orphan VG is last on list */
2212 	old_pvmove = pvmove_mode();
2213 	init_pvmove(1);
2214 	dm_list_iterate_items(strl, vgids) {
2215 		vgid = strl->str;
2216 		if (!vgid)
2217 			continue;	/* FIXME Unnecessary? */
2218 		consistent = 0;
2219 		if (!(vgname = vgname_from_vgid(NULL, vgid))) {
2220 			stack;
2221 			continue;
2222 		}
2223 		if (!(vg = vg_read(cmd, vgname, vgid, &consistent))) {
2224 			stack;
2225 			continue;
2226 		}
2227 		if (!consistent)
2228 			log_warn("WARNING: Volume Group %s is not consistent",
2229 				 vgname);
2230 
2231 		/* Move PVs onto results list */
2232 		if (pvslist)
2233 			dm_list_iterate_safe(pvh, tmp, &vg->pvs)
2234 				dm_list_add(results, pvh);
2235 	}
2236 	init_pvmove(old_pvmove);
2237 
2238 	if (pvslist)
2239 		*pvslist = results;
2240 	else
2241 		dm_pool_free(cmd->mem, vgids);
2242 
2243 	return 1;
2244 }
2245 
2246 struct dm_list *get_pvs(struct cmd_context *cmd)
2247 {
2248 	struct dm_list *results;
2249 
2250 	if (!_get_pvs(cmd, &results))
2251 		return NULL;
2252 
2253 	return results;
2254 }
2255 
2256 int scan_vgs_for_pvs(struct cmd_context *cmd)
2257 {
2258 	return _get_pvs(cmd, NULL);
2259 }
2260 
2261 /* FIXME: liblvm todo - make into function that takes handle */
2262 int pv_write(struct cmd_context *cmd __attribute((unused)),
2263 	     struct physical_volume *pv,
2264 	     struct dm_list *mdas, int64_t label_sector)
2265 {
2266 	return _pv_write(cmd, pv, mdas, label_sector);
2267 }
2268 
2269 static int _pv_write(struct cmd_context *cmd __attribute((unused)),
2270 		     struct physical_volume *pv,
2271 	     	     struct dm_list *mdas, int64_t label_sector)
2272 {
2273 	if (!pv->fmt->ops->pv_write) {
2274 		log_error("Format does not support writing physical volumes");
2275 		return 0;
2276 	}
2277 
2278 	if (!is_orphan_vg(pv->vg_name) || pv->pe_alloc_count) {
2279 		log_error("Assertion failed: can't _pv_write non-orphan PV "
2280 			  "(in VG %s)", pv->vg_name);
2281 		return 0;
2282 	}
2283 
2284 	if (!pv->fmt->ops->pv_write(pv->fmt, pv, mdas, label_sector))
2285 		return_0;
2286 
2287 	return 1;
2288 }
2289 
2290 int pv_write_orphan(struct cmd_context *cmd, struct physical_volume *pv)
2291 {
2292 	const char *old_vg_name = pv->vg_name;
2293 
2294 	pv->vg_name = cmd->fmt->orphan_vg_name;
2295 	pv->status = ALLOCATABLE_PV;
2296 	pv->pe_alloc_count = 0;
2297 
2298 	if (!dev_get_size(pv->dev, &pv->size)) {
2299 		log_error("%s: Couldn't get size.", pv_dev_name(pv));
2300 		return 0;
2301 	}
2302 
2303 	if (!_pv_write(cmd, pv, NULL, INT64_C(-1))) {
2304 		log_error("Failed to clear metadata from physical "
2305 			  "volume \"%s\" after removal from \"%s\"",
2306 			  pv_dev_name(pv), old_vg_name);
2307 		return 0;
2308 	}
2309 
2310 	return 1;
2311 }
2312 
2313 /**
2314  * is_orphan_vg - Determine whether a vg_name is an orphan
2315  * @vg_name: pointer to the vg_name
2316  */
2317 int is_orphan_vg(const char *vg_name)
2318 {
2319 	return (vg_name && vg_name[0] == ORPHAN_PREFIX[0]) ? 1 : 0;
2320 }
2321 
2322 /**
2323  * is_orphan - Determine whether a pv is an orphan based on its vg_name
2324  * @pv: handle to the physical volume
2325  */
2326 int is_orphan(const pv_t *pv)
2327 {
2328 	return is_orphan_vg(pv_field(pv, vg_name));
2329 }
2330 
2331 /**
2332  * is_pv - Determine whether a pv is a real pv or dummy one
2333  * @pv: handle to device
2334  */
2335 int is_pv(pv_t *pv)
2336 {
2337 	return (pv_field(pv, vg_name) ? 1 : 0);
2338 }
2339 
2340 /*
2341  * Returns:
2342  *  0 - fail
2343  *  1 - success
2344  */
2345 int pv_analyze(struct cmd_context *cmd, const char *pv_name,
2346 	       uint64_t label_sector)
2347 {
2348 	struct label *label;
2349 	struct device *dev;
2350 	struct metadata_area *mda;
2351 	struct lvmcache_info *info;
2352 
2353 	dev = dev_cache_get(pv_name, cmd->filter);
2354 	if (!dev) {
2355 		log_error("Device %s not found (or ignored by filtering).",
2356 			  pv_name);
2357 		return 0;
2358 	}
2359 
2360 	/*
2361 	 * First, scan for LVM labels.
2362 	 */
2363 	if (!label_read(dev, &label, label_sector)) {
2364 		log_error("Could not find LVM label on %s",
2365 			  pv_name);
2366 		return 0;
2367 	}
2368 
2369 	log_print("Found label on %s, sector %"PRIu64", type=%s",
2370 		  pv_name, label->sector, label->type);
2371 
2372 	/*
2373 	 * Next, loop through metadata areas
2374 	 */
2375 	info = label->info;
2376 	dm_list_iterate_items(mda, &info->mdas)
2377 		mda->ops->pv_analyze_mda(info->fmt, mda);
2378 
2379 	return 1;
2380 }
2381 
2382 
2383 
2384 /**
2385  * vg_check_status - check volume group status flags and log error
2386  * @vg - volume group to check status flags
2387  * @status - specific status flags to check (e.g. EXPORTED_VG)
2388  *
2389  * Returns:
2390  * 0 - fail
2391  * 1 - success
2392  */
2393 int vg_check_status(const struct volume_group *vg, uint32_t status)
2394 {
2395 	if ((status & CLUSTERED) &&
2396 	    (vg_is_clustered(vg)) && !locking_is_clustered() &&
2397 	    !lockingfailed()) {
2398 		log_error("Skipping clustered volume group %s", vg->name);
2399 		return 0;
2400 	}
2401 
2402 	if ((status & EXPORTED_VG) &&
2403 	    (vg->status & EXPORTED_VG)) {
2404 		log_error("Volume group %s is exported", vg->name);
2405 		return 0;
2406 	}
2407 
2408 	if ((status & LVM_WRITE) &&
2409 	    !(vg->status & LVM_WRITE)) {
2410 		log_error("Volume group %s is read-only", vg->name);
2411 		return 0;
2412 	}
2413 	if ((status & RESIZEABLE_VG) &&
2414 	    !(vg->status & RESIZEABLE_VG)) {
2415 		log_error("Volume group %s is not resizeable.", vg->name);
2416 		return 0;
2417 	}
2418 
2419 	return 1;
2420 }
2421 
2422 /*
2423  * vg_lock_and_read - consolidate vg locking, reading, and status flag checking
2424  *
2425  * Returns:
2426  * NULL - failure
2427  * non-NULL - success; volume group handle
2428  */
2429 vg_t *vg_lock_and_read(struct cmd_context *cmd, const char *vg_name,
2430 		       const char *vgid,
2431 		       uint32_t lock_flags, uint32_t status_flags,
2432 		       uint32_t misc_flags)
2433 {
2434 	struct volume_group *vg;
2435 	int consistent = 1;
2436 
2437 	if (!(misc_flags & CORRECT_INCONSISTENT))
2438 		consistent = 0;
2439 
2440 	if (!validate_name(vg_name)) {
2441 		log_error("Volume group name %s has invalid characters",
2442 			  vg_name);
2443 		return NULL;
2444 	}
2445 
2446 	if (!lock_vol(cmd, vg_name, lock_flags)) {
2447 		log_error("Can't get lock for %s", vg_name);
2448 		return NULL;
2449 	}
2450 
2451 	if (!(vg = vg_read(cmd, vg_name, vgid, &consistent)) ||
2452 	    ((misc_flags & FAIL_INCONSISTENT) && !consistent)) {
2453 		log_error("Volume group \"%s\" not found", vg_name);
2454 		unlock_vg(cmd, vg_name);
2455 		return NULL;
2456 	}
2457 
2458 	if (!vg_check_status(vg, status_flags)) {
2459 		unlock_vg(cmd, vg_name);
2460 		return NULL;
2461 	}
2462 
2463 	return vg;
2464 }
2465 
2466 /*
2467  * Gets/Sets for external LVM library
2468  */
2469 struct id pv_id(const pv_t *pv)
2470 {
2471 	return pv_field(pv, id);
2472 }
2473 
2474 const struct format_type *pv_format_type(const pv_t *pv)
2475 {
2476 	return pv_field(pv, fmt);
2477 }
2478 
2479 struct id pv_vgid(const pv_t *pv)
2480 {
2481 	return pv_field(pv, vgid);
2482 }
2483 
2484 struct device *pv_dev(const pv_t *pv)
2485 {
2486 	return pv_field(pv, dev);
2487 }
2488 
2489 const char *pv_vg_name(const pv_t *pv)
2490 {
2491 	return pv_field(pv, vg_name);
2492 }
2493 
2494 const char *pv_dev_name(const pv_t *pv)
2495 {
2496 	return dev_name(pv_dev(pv));
2497 }
2498 
2499 uint64_t pv_size(const pv_t *pv)
2500 {
2501 	return pv_field(pv, size);
2502 }
2503 
2504 uint32_t pv_status(const pv_t *pv)
2505 {
2506 	return pv_field(pv, status);
2507 }
2508 
2509 uint32_t pv_pe_size(const pv_t *pv)
2510 {
2511 	return pv_field(pv, pe_size);
2512 }
2513 
2514 uint64_t pv_pe_start(const pv_t *pv)
2515 {
2516 	return pv_field(pv, pe_start);
2517 }
2518 
2519 uint32_t pv_pe_count(const pv_t *pv)
2520 {
2521 	return pv_field(pv, pe_count);
2522 }
2523 
2524 uint32_t pv_pe_alloc_count(const pv_t *pv)
2525 {
2526 	return pv_field(pv, pe_alloc_count);
2527 }
2528 
2529 uint32_t vg_status(const vg_t *vg)
2530 {
2531 	return vg->status;
2532 }
2533 
2534 /**
2535  * pv_by_path - Given a device path return a PV handle if it is a PV
2536  * @cmd - handle to the LVM command instance
2537  * @pv_name - device path to read for the PV
2538  *
2539  * Returns:
2540  *  NULL - device path does not contain a valid PV
2541  *  non-NULL - PV handle corresponding to device path
2542  *
2543  * FIXME: merge with find_pv_by_name ?
2544  */
2545 pv_t *pv_by_path(struct cmd_context *cmd, const char *pv_name)
2546 {
2547 	struct dm_list mdas;
2548 
2549 	dm_list_init(&mdas);
2550 	return _pv_read(cmd, pv_name, &mdas, NULL, 1);
2551 }
2552