xref: /dflybsd-src/contrib/lvm2/dist/lib/device/dev-md.c (revision 86d7f5d305c6adaa56ff4582ece9859d73106103)
1*86d7f5d3SJohn Marino /*	$NetBSD: dev-md.c,v 1.1.1.2 2009/12/02 00:26:33 haad Exp $	*/
2*86d7f5d3SJohn Marino 
3*86d7f5d3SJohn Marino /*
4*86d7f5d3SJohn Marino  * Copyright (C) 2004 Luca Berra
5*86d7f5d3SJohn Marino  * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
6*86d7f5d3SJohn Marino  *
7*86d7f5d3SJohn Marino  * This file is part of LVM2.
8*86d7f5d3SJohn Marino  *
9*86d7f5d3SJohn Marino  * This copyrighted material is made available to anyone wishing to use,
10*86d7f5d3SJohn Marino  * modify, copy, or redistribute it subject to the terms and conditions
11*86d7f5d3SJohn Marino  * of the GNU Lesser General Public License v.2.1.
12*86d7f5d3SJohn Marino  *
13*86d7f5d3SJohn Marino  * You should have received a copy of the GNU Lesser General Public License
14*86d7f5d3SJohn Marino  * along with this program; if not, write to the Free Software Foundation,
15*86d7f5d3SJohn Marino  * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16*86d7f5d3SJohn Marino  */
17*86d7f5d3SJohn Marino 
18*86d7f5d3SJohn Marino #include "lib.h"
19*86d7f5d3SJohn Marino #include "metadata.h"
20*86d7f5d3SJohn Marino #include "xlate.h"
21*86d7f5d3SJohn Marino #include "filter.h"
22*86d7f5d3SJohn Marino 
23*86d7f5d3SJohn Marino #ifdef linux
24*86d7f5d3SJohn Marino 
25*86d7f5d3SJohn Marino /* Lifted from <linux/raid/md_p.h> because of difficulty including it */
26*86d7f5d3SJohn Marino 
27*86d7f5d3SJohn Marino #define MD_SB_MAGIC 0xa92b4efc
28*86d7f5d3SJohn Marino #define MD_RESERVED_BYTES (64 * 1024ULL)
29*86d7f5d3SJohn Marino #define MD_RESERVED_SECTORS (MD_RESERVED_BYTES / 512)
30*86d7f5d3SJohn Marino #define MD_NEW_SIZE_SECTORS(x) ((x & ~(MD_RESERVED_SECTORS - 1)) \
31*86d7f5d3SJohn Marino 				- MD_RESERVED_SECTORS)
32*86d7f5d3SJohn Marino 
_dev_has_md_magic(struct device * dev,uint64_t sb_offset)33*86d7f5d3SJohn Marino static int _dev_has_md_magic(struct device *dev, uint64_t sb_offset)
34*86d7f5d3SJohn Marino {
35*86d7f5d3SJohn Marino 	uint32_t md_magic;
36*86d7f5d3SJohn Marino 
37*86d7f5d3SJohn Marino 	/* Version 1 is little endian; version 0.90.0 is machine endian */
38*86d7f5d3SJohn Marino 	if (dev_read(dev, sb_offset, sizeof(uint32_t), &md_magic) &&
39*86d7f5d3SJohn Marino 	    ((md_magic == xlate32(MD_SB_MAGIC)) ||
40*86d7f5d3SJohn Marino 	     (md_magic == MD_SB_MAGIC)))
41*86d7f5d3SJohn Marino 		return 1;
42*86d7f5d3SJohn Marino 
43*86d7f5d3SJohn Marino 	return 0;
44*86d7f5d3SJohn Marino }
45*86d7f5d3SJohn Marino 
46*86d7f5d3SJohn Marino /*
47*86d7f5d3SJohn Marino  * Calculate the position of the superblock.
48*86d7f5d3SJohn Marino  * It is always aligned to a 4K boundary and
49*86d7f5d3SJohn Marino  * depending on minor_version, it can be:
50*86d7f5d3SJohn Marino  * 0: At least 8K, but less than 12K, from end of device
51*86d7f5d3SJohn Marino  * 1: At start of device
52*86d7f5d3SJohn Marino  * 2: 4K from start of device.
53*86d7f5d3SJohn Marino  */
54*86d7f5d3SJohn Marino typedef enum {
55*86d7f5d3SJohn Marino 	MD_MINOR_VERSION_MIN,
56*86d7f5d3SJohn Marino 	MD_MINOR_V0 = MD_MINOR_VERSION_MIN,
57*86d7f5d3SJohn Marino 	MD_MINOR_V1,
58*86d7f5d3SJohn Marino 	MD_MINOR_V2,
59*86d7f5d3SJohn Marino 	MD_MINOR_VERSION_MAX = MD_MINOR_V2
60*86d7f5d3SJohn Marino } md_minor_version_t;
61*86d7f5d3SJohn Marino 
_v1_sb_offset(uint64_t size,md_minor_version_t minor_version)62*86d7f5d3SJohn Marino static uint64_t _v1_sb_offset(uint64_t size, md_minor_version_t minor_version)
63*86d7f5d3SJohn Marino {
64*86d7f5d3SJohn Marino 	uint64_t uninitialized_var(sb_offset);
65*86d7f5d3SJohn Marino 
66*86d7f5d3SJohn Marino 	switch(minor_version) {
67*86d7f5d3SJohn Marino 	case MD_MINOR_V0:
68*86d7f5d3SJohn Marino 		sb_offset = (size - 8 * 2) & ~(4 * 2 - 1ULL);
69*86d7f5d3SJohn Marino 		break;
70*86d7f5d3SJohn Marino 	case MD_MINOR_V1:
71*86d7f5d3SJohn Marino 		sb_offset = 0;
72*86d7f5d3SJohn Marino 		break;
73*86d7f5d3SJohn Marino 	case MD_MINOR_V2:
74*86d7f5d3SJohn Marino 		sb_offset = 4 * 2;
75*86d7f5d3SJohn Marino 		break;
76*86d7f5d3SJohn Marino 	}
77*86d7f5d3SJohn Marino 	sb_offset <<= SECTOR_SHIFT;
78*86d7f5d3SJohn Marino 
79*86d7f5d3SJohn Marino 	return sb_offset;
80*86d7f5d3SJohn Marino }
81*86d7f5d3SJohn Marino 
82*86d7f5d3SJohn Marino /*
83*86d7f5d3SJohn Marino  * Returns -1 on error
84*86d7f5d3SJohn Marino  */
dev_is_md(struct device * dev,uint64_t * sb)85*86d7f5d3SJohn Marino int dev_is_md(struct device *dev, uint64_t *sb)
86*86d7f5d3SJohn Marino {
87*86d7f5d3SJohn Marino 	int ret = 1;
88*86d7f5d3SJohn Marino 	md_minor_version_t minor;
89*86d7f5d3SJohn Marino 	uint64_t size, sb_offset;
90*86d7f5d3SJohn Marino 
91*86d7f5d3SJohn Marino 	if (!dev_get_size(dev, &size)) {
92*86d7f5d3SJohn Marino 		stack;
93*86d7f5d3SJohn Marino 		return -1;
94*86d7f5d3SJohn Marino 	}
95*86d7f5d3SJohn Marino 
96*86d7f5d3SJohn Marino 	if (size < MD_RESERVED_SECTORS * 2)
97*86d7f5d3SJohn Marino 		return 0;
98*86d7f5d3SJohn Marino 
99*86d7f5d3SJohn Marino 	if (!dev_open(dev)) {
100*86d7f5d3SJohn Marino 		stack;
101*86d7f5d3SJohn Marino 		return -1;
102*86d7f5d3SJohn Marino 	}
103*86d7f5d3SJohn Marino 
104*86d7f5d3SJohn Marino 	/* Check if it is an md component device. */
105*86d7f5d3SJohn Marino 	/* Version 0.90.0 */
106*86d7f5d3SJohn Marino 	sb_offset = MD_NEW_SIZE_SECTORS(size) << SECTOR_SHIFT;
107*86d7f5d3SJohn Marino 	if (_dev_has_md_magic(dev, sb_offset))
108*86d7f5d3SJohn Marino 		goto out;
109*86d7f5d3SJohn Marino 
110*86d7f5d3SJohn Marino 	minor = MD_MINOR_VERSION_MIN;
111*86d7f5d3SJohn Marino 	/* Version 1, try v1.0 -> v1.2 */
112*86d7f5d3SJohn Marino 	do {
113*86d7f5d3SJohn Marino 		sb_offset = _v1_sb_offset(size, minor);
114*86d7f5d3SJohn Marino 		if (_dev_has_md_magic(dev, sb_offset))
115*86d7f5d3SJohn Marino 			goto out;
116*86d7f5d3SJohn Marino 	} while (++minor <= MD_MINOR_VERSION_MAX);
117*86d7f5d3SJohn Marino 
118*86d7f5d3SJohn Marino 	ret = 0;
119*86d7f5d3SJohn Marino 
120*86d7f5d3SJohn Marino out:
121*86d7f5d3SJohn Marino 	if (!dev_close(dev))
122*86d7f5d3SJohn Marino 		stack;
123*86d7f5d3SJohn Marino 
124*86d7f5d3SJohn Marino 	if (ret && sb)
125*86d7f5d3SJohn Marino 		*sb = sb_offset;
126*86d7f5d3SJohn Marino 
127*86d7f5d3SJohn Marino 	return ret;
128*86d7f5d3SJohn Marino }
129*86d7f5d3SJohn Marino 
_md_sysfs_attribute_snprintf(char * path,size_t size,const char * sysfs_dir,struct device * blkdev,const char * attribute)130*86d7f5d3SJohn Marino static int _md_sysfs_attribute_snprintf(char *path, size_t size,
131*86d7f5d3SJohn Marino 					const char *sysfs_dir,
132*86d7f5d3SJohn Marino 					struct device *blkdev,
133*86d7f5d3SJohn Marino 					const char *attribute)
134*86d7f5d3SJohn Marino {
135*86d7f5d3SJohn Marino 	struct stat info;
136*86d7f5d3SJohn Marino 	dev_t dev = blkdev->dev;
137*86d7f5d3SJohn Marino 	int ret = -1;
138*86d7f5d3SJohn Marino 
139*86d7f5d3SJohn Marino 	if (!sysfs_dir || !*sysfs_dir)
140*86d7f5d3SJohn Marino 		return ret;
141*86d7f5d3SJohn Marino 
142*86d7f5d3SJohn Marino 	if (MAJOR(dev) == blkext_major()) {
143*86d7f5d3SJohn Marino 		/* lookup parent MD device from blkext partition */
144*86d7f5d3SJohn Marino 		if (!get_primary_dev(sysfs_dir, blkdev, &dev))
145*86d7f5d3SJohn Marino 			return ret;
146*86d7f5d3SJohn Marino 	}
147*86d7f5d3SJohn Marino 
148*86d7f5d3SJohn Marino 	if (MAJOR(dev) != md_major())
149*86d7f5d3SJohn Marino 		return ret;
150*86d7f5d3SJohn Marino 
151*86d7f5d3SJohn Marino 	ret = dm_snprintf(path, size, "%s/dev/block/%d:%d/md/%s", sysfs_dir,
152*86d7f5d3SJohn Marino 			  (int)MAJOR(dev), (int)MINOR(dev), attribute);
153*86d7f5d3SJohn Marino 	if (ret < 0) {
154*86d7f5d3SJohn Marino 		log_error("dm_snprintf md %s failed", attribute);
155*86d7f5d3SJohn Marino 		return ret;
156*86d7f5d3SJohn Marino 	}
157*86d7f5d3SJohn Marino 
158*86d7f5d3SJohn Marino 	if (stat(path, &info) == -1) {
159*86d7f5d3SJohn Marino 		if (errno != ENOENT) {
160*86d7f5d3SJohn Marino 			log_sys_error("stat", path);
161*86d7f5d3SJohn Marino 			return ret;
162*86d7f5d3SJohn Marino 		}
163*86d7f5d3SJohn Marino 		/* old sysfs structure */
164*86d7f5d3SJohn Marino 		ret = dm_snprintf(path, size, "%s/block/md%d/md/%s",
165*86d7f5d3SJohn Marino 				  sysfs_dir, (int)MINOR(dev), attribute);
166*86d7f5d3SJohn Marino 		if (ret < 0) {
167*86d7f5d3SJohn Marino 			log_error("dm_snprintf old md %s failed", attribute);
168*86d7f5d3SJohn Marino 			return ret;
169*86d7f5d3SJohn Marino 		}
170*86d7f5d3SJohn Marino 	}
171*86d7f5d3SJohn Marino 
172*86d7f5d3SJohn Marino 	return ret;
173*86d7f5d3SJohn Marino }
174*86d7f5d3SJohn Marino 
_md_sysfs_attribute_scanf(const char * sysfs_dir,struct device * dev,const char * attribute_name,const char * attribute_fmt,void * attribute_value)175*86d7f5d3SJohn Marino static int _md_sysfs_attribute_scanf(const char *sysfs_dir,
176*86d7f5d3SJohn Marino 				     struct device *dev,
177*86d7f5d3SJohn Marino 				     const char *attribute_name,
178*86d7f5d3SJohn Marino 				     const char *attribute_fmt,
179*86d7f5d3SJohn Marino 				     void *attribute_value)
180*86d7f5d3SJohn Marino {
181*86d7f5d3SJohn Marino 	char path[PATH_MAX+1], buffer[64];
182*86d7f5d3SJohn Marino 	FILE *fp;
183*86d7f5d3SJohn Marino 	int ret = 0;
184*86d7f5d3SJohn Marino 
185*86d7f5d3SJohn Marino 	if (_md_sysfs_attribute_snprintf(path, PATH_MAX, sysfs_dir,
186*86d7f5d3SJohn Marino 					 dev, attribute_name) < 0)
187*86d7f5d3SJohn Marino 		return ret;
188*86d7f5d3SJohn Marino 
189*86d7f5d3SJohn Marino 	if (!(fp = fopen(path, "r"))) {
190*86d7f5d3SJohn Marino 		log_sys_error("fopen", path);
191*86d7f5d3SJohn Marino 		return ret;
192*86d7f5d3SJohn Marino 	}
193*86d7f5d3SJohn Marino 
194*86d7f5d3SJohn Marino 	if (!fgets(buffer, sizeof(buffer), fp)) {
195*86d7f5d3SJohn Marino 		log_sys_error("fgets", path);
196*86d7f5d3SJohn Marino 		goto out;
197*86d7f5d3SJohn Marino 	}
198*86d7f5d3SJohn Marino 
199*86d7f5d3SJohn Marino 	if ((ret = sscanf(buffer, attribute_fmt, attribute_value)) != 1) {
200*86d7f5d3SJohn Marino 		log_error("%s sysfs attr %s not in expected format: %s",
201*86d7f5d3SJohn Marino 			  dev_name(dev), attribute_name, buffer);
202*86d7f5d3SJohn Marino 		goto out;
203*86d7f5d3SJohn Marino 	}
204*86d7f5d3SJohn Marino 
205*86d7f5d3SJohn Marino out:
206*86d7f5d3SJohn Marino 	if (fclose(fp))
207*86d7f5d3SJohn Marino 		log_sys_error("fclose", path);
208*86d7f5d3SJohn Marino 
209*86d7f5d3SJohn Marino 	return ret;
210*86d7f5d3SJohn Marino }
211*86d7f5d3SJohn Marino 
212*86d7f5d3SJohn Marino /*
213*86d7f5d3SJohn Marino  * Retrieve chunk size from md device using sysfs.
214*86d7f5d3SJohn Marino  */
dev_md_chunk_size(const char * sysfs_dir,struct device * dev)215*86d7f5d3SJohn Marino static unsigned long dev_md_chunk_size(const char *sysfs_dir,
216*86d7f5d3SJohn Marino 				       struct device *dev)
217*86d7f5d3SJohn Marino {
218*86d7f5d3SJohn Marino 	const char *attribute = "chunk_size";
219*86d7f5d3SJohn Marino 	unsigned long chunk_size_bytes = 0UL;
220*86d7f5d3SJohn Marino 
221*86d7f5d3SJohn Marino 	if (_md_sysfs_attribute_scanf(sysfs_dir, dev, attribute,
222*86d7f5d3SJohn Marino 				      "%lu", &chunk_size_bytes) != 1)
223*86d7f5d3SJohn Marino 		return 0;
224*86d7f5d3SJohn Marino 
225*86d7f5d3SJohn Marino 	log_very_verbose("Device %s %s is %lu bytes.",
226*86d7f5d3SJohn Marino 			 dev_name(dev), attribute, chunk_size_bytes);
227*86d7f5d3SJohn Marino 
228*86d7f5d3SJohn Marino 	return chunk_size_bytes >> SECTOR_SHIFT;
229*86d7f5d3SJohn Marino }
230*86d7f5d3SJohn Marino 
231*86d7f5d3SJohn Marino /*
232*86d7f5d3SJohn Marino  * Retrieve level from md device using sysfs.
233*86d7f5d3SJohn Marino  */
dev_md_level(const char * sysfs_dir,struct device * dev)234*86d7f5d3SJohn Marino static int dev_md_level(const char *sysfs_dir, struct device *dev)
235*86d7f5d3SJohn Marino {
236*86d7f5d3SJohn Marino 	const char *attribute = "level";
237*86d7f5d3SJohn Marino 	int level = -1;
238*86d7f5d3SJohn Marino 
239*86d7f5d3SJohn Marino 	if (_md_sysfs_attribute_scanf(sysfs_dir, dev, attribute,
240*86d7f5d3SJohn Marino 				      "raid%d", &level) != 1)
241*86d7f5d3SJohn Marino 		return -1;
242*86d7f5d3SJohn Marino 
243*86d7f5d3SJohn Marino 	log_very_verbose("Device %s %s is raid%d.",
244*86d7f5d3SJohn Marino 			 dev_name(dev), attribute, level);
245*86d7f5d3SJohn Marino 
246*86d7f5d3SJohn Marino 	return level;
247*86d7f5d3SJohn Marino }
248*86d7f5d3SJohn Marino 
249*86d7f5d3SJohn Marino /*
250*86d7f5d3SJohn Marino  * Retrieve raid_disks from md device using sysfs.
251*86d7f5d3SJohn Marino  */
dev_md_raid_disks(const char * sysfs_dir,struct device * dev)252*86d7f5d3SJohn Marino static int dev_md_raid_disks(const char *sysfs_dir, struct device *dev)
253*86d7f5d3SJohn Marino {
254*86d7f5d3SJohn Marino 	const char *attribute = "raid_disks";
255*86d7f5d3SJohn Marino 	int raid_disks = 0;
256*86d7f5d3SJohn Marino 
257*86d7f5d3SJohn Marino 	if (_md_sysfs_attribute_scanf(sysfs_dir, dev, attribute,
258*86d7f5d3SJohn Marino 				      "%d", &raid_disks) != 1)
259*86d7f5d3SJohn Marino 		return 0;
260*86d7f5d3SJohn Marino 
261*86d7f5d3SJohn Marino 	log_very_verbose("Device %s %s is %d.",
262*86d7f5d3SJohn Marino 			 dev_name(dev), attribute, raid_disks);
263*86d7f5d3SJohn Marino 
264*86d7f5d3SJohn Marino 	return raid_disks;
265*86d7f5d3SJohn Marino }
266*86d7f5d3SJohn Marino 
267*86d7f5d3SJohn Marino /*
268*86d7f5d3SJohn Marino  * Calculate stripe width of md device using its sysfs files.
269*86d7f5d3SJohn Marino  */
dev_md_stripe_width(const char * sysfs_dir,struct device * dev)270*86d7f5d3SJohn Marino unsigned long dev_md_stripe_width(const char *sysfs_dir, struct device *dev)
271*86d7f5d3SJohn Marino {
272*86d7f5d3SJohn Marino 	unsigned long chunk_size_sectors = 0UL;
273*86d7f5d3SJohn Marino 	unsigned long stripe_width_sectors = 0UL;
274*86d7f5d3SJohn Marino 	int level, raid_disks, data_disks;
275*86d7f5d3SJohn Marino 
276*86d7f5d3SJohn Marino 	chunk_size_sectors = dev_md_chunk_size(sysfs_dir, dev);
277*86d7f5d3SJohn Marino 	if (!chunk_size_sectors)
278*86d7f5d3SJohn Marino 		return 0;
279*86d7f5d3SJohn Marino 
280*86d7f5d3SJohn Marino 	level = dev_md_level(sysfs_dir, dev);
281*86d7f5d3SJohn Marino 	if (level < 0)
282*86d7f5d3SJohn Marino 		return 0;
283*86d7f5d3SJohn Marino 
284*86d7f5d3SJohn Marino 	raid_disks = dev_md_raid_disks(sysfs_dir, dev);
285*86d7f5d3SJohn Marino 	if (!raid_disks)
286*86d7f5d3SJohn Marino 		return 0;
287*86d7f5d3SJohn Marino 
288*86d7f5d3SJohn Marino 	/* The raid level governs the number of data disks. */
289*86d7f5d3SJohn Marino 	switch (level) {
290*86d7f5d3SJohn Marino 	case 0:
291*86d7f5d3SJohn Marino 		/* striped md does not have any parity disks */
292*86d7f5d3SJohn Marino 		data_disks = raid_disks;
293*86d7f5d3SJohn Marino 		break;
294*86d7f5d3SJohn Marino 	case 1:
295*86d7f5d3SJohn Marino 	case 10:
296*86d7f5d3SJohn Marino 		/* mirrored md effectively has 1 data disk */
297*86d7f5d3SJohn Marino 		data_disks = 1;
298*86d7f5d3SJohn Marino 		break;
299*86d7f5d3SJohn Marino 	case 4:
300*86d7f5d3SJohn Marino 	case 5:
301*86d7f5d3SJohn Marino 		/* both raid 4 and 5 have a single parity disk */
302*86d7f5d3SJohn Marino 		data_disks = raid_disks - 1;
303*86d7f5d3SJohn Marino 		break;
304*86d7f5d3SJohn Marino 	case 6:
305*86d7f5d3SJohn Marino 		/* raid 6 has 2 parity disks */
306*86d7f5d3SJohn Marino 		data_disks = raid_disks - 2;
307*86d7f5d3SJohn Marino 		break;
308*86d7f5d3SJohn Marino 	default:
309*86d7f5d3SJohn Marino 		log_error("Device %s has an unknown md raid level: %d",
310*86d7f5d3SJohn Marino 			  dev_name(dev), level);
311*86d7f5d3SJohn Marino 		return 0;
312*86d7f5d3SJohn Marino 	}
313*86d7f5d3SJohn Marino 
314*86d7f5d3SJohn Marino 	stripe_width_sectors = chunk_size_sectors * data_disks;
315*86d7f5d3SJohn Marino 
316*86d7f5d3SJohn Marino 	log_very_verbose("Device %s stripe-width is %lu bytes.",
317*86d7f5d3SJohn Marino 			 dev_name(dev),
318*86d7f5d3SJohn Marino 			 stripe_width_sectors << SECTOR_SHIFT);
319*86d7f5d3SJohn Marino 
320*86d7f5d3SJohn Marino 	return stripe_width_sectors;
321*86d7f5d3SJohn Marino }
322*86d7f5d3SJohn Marino 
323*86d7f5d3SJohn Marino #else
324*86d7f5d3SJohn Marino 
dev_is_md(struct device * dev __attribute ((unused)),uint64_t * sb __attribute ((unused)))325*86d7f5d3SJohn Marino int dev_is_md(struct device *dev __attribute((unused)),
326*86d7f5d3SJohn Marino 	      uint64_t *sb __attribute((unused)))
327*86d7f5d3SJohn Marino {
328*86d7f5d3SJohn Marino 	return 0;
329*86d7f5d3SJohn Marino }
330*86d7f5d3SJohn Marino 
dev_md_stripe_width(const char * sysfs_dir __attribute ((unused)),struct device * dev __attribute ((unused)))331*86d7f5d3SJohn Marino unsigned long dev_md_stripe_width(const char *sysfs_dir __attribute((unused)),
332*86d7f5d3SJohn Marino 				  struct device *dev  __attribute((unused)))
333*86d7f5d3SJohn Marino {
334*86d7f5d3SJohn Marino 	return 0UL;
335*86d7f5d3SJohn Marino }
336*86d7f5d3SJohn Marino 
337*86d7f5d3SJohn Marino #endif
338