xref: /netbsd-src/sys/dev/raidframe/rf_paritymap.c (revision b1c86f5f087524e68db12794ee9c3e3da1ab17a0)
1 /* $NetBSD: rf_paritymap.c,v 1.5 2010/03/14 21:11:41 jld Exp $ */
2 
3 /*-
4  * Copyright (c) 2009 Jed Davis.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: rf_paritymap.c,v 1.5 2010/03/14 21:11:41 jld Exp $");
31 
32 #include <sys/param.h>
33 #include <sys/callout.h>
34 #include <sys/kmem.h>
35 #include <sys/mutex.h>
36 #include <sys/rwlock.h>
37 #include <sys/systm.h>
38 #include <sys/types.h>
39 
40 #include <dev/raidframe/rf_paritymap.h>
41 #include <dev/raidframe/rf_stripelocks.h>
42 #include <dev/raidframe/rf_layout.h>
43 #include <dev/raidframe/rf_raid.h>
44 #include <dev/raidframe/rf_parityscan.h>
45 #include <dev/raidframe/rf_kintf.h>
46 
47 /* Important parameters: */
48 #define REGION_MINSIZE (25ULL << 20)
49 #define DFL_TICKMS      40000
50 #define DFL_COOLDOWN    8     /* 7-8 intervals of 40s = 5min +/- 20s */
51 
52 /* Internal-use flag bits. */
53 #define TICKING 1
54 #define TICKED 2
55 
56 /* Prototypes! */
57 static void rf_paritymap_write_locked(struct rf_paritymap *);
58 static void rf_paritymap_tick(void *);
59 static u_int rf_paritymap_nreg(RF_Raid_t *);
60 
61 /* Extract the current status of the parity map. */
62 void
63 rf_paritymap_status(struct rf_paritymap *pm, struct rf_pmstat *ps)
64 {
65 	memset(ps, 0, sizeof(*ps));
66 	if (pm == NULL)
67 		ps->enabled = 0;
68 	else {
69 		ps->enabled = 1;
70 		ps->region_size = pm->region_size;
71 		mutex_enter(&pm->lock);
72 		memcpy(&ps->params, &pm->params, sizeof(ps->params));
73 		memcpy(ps->dirty, pm->disk_now, sizeof(ps->dirty));
74 		memcpy(&ps->ctrs, &pm->ctrs, sizeof(ps->ctrs));
75 		mutex_exit(&pm->lock);
76 	}
77 }
78 
79 /*
80  * Test whether parity in a given sector is suspected of being inconsistent
81  * on disk (assuming that any pending I/O to it is allowed to complete).
82  * This may be of interest to future work on parity scrubbing.
83  */
84 int
85 rf_paritymap_test(struct rf_paritymap *pm, daddr_t sector)
86 {
87 	unsigned region = sector / pm->region_size;
88 	int retval;
89 
90 	mutex_enter(&pm->lock);
91 	retval = isset(pm->disk_boot->bits, region) ? 1 : 0;
92 	mutex_exit(&pm->lock);
93 	return retval;
94 }
95 
96 /* To be called before a write to the RAID is submitted. */
97 void
98 rf_paritymap_begin(struct rf_paritymap *pm, daddr_t offset, daddr_t size)
99 {
100 	unsigned i, b, e;
101 
102 	b = offset / pm->region_size;
103 	e = (offset + size - 1) / pm->region_size;
104 
105 	for (i = b; i <= e; i++)
106 		rf_paritymap_begin_region(pm, i);
107 }
108 
109 /* To be called after a write to the RAID completes. */
110 void
111 rf_paritymap_end(struct rf_paritymap *pm, daddr_t offset, daddr_t size)
112 {
113 	unsigned i, b, e;
114 
115 	b = offset / pm->region_size;
116 	e = (offset + size - 1) / pm->region_size;
117 
118 	for (i = b; i <= e; i++)
119 		rf_paritymap_end_region(pm, i);
120 }
121 
122 void
123 rf_paritymap_begin_region(struct rf_paritymap *pm, unsigned region)
124 {
125 	int needs_write;
126 
127 	KASSERT(region < RF_PARITYMAP_NREG);
128 	pm->ctrs.nwrite++;
129 
130 	/* If it was being kept warm, deal with that. */
131 	mutex_enter(&pm->lock);
132 	if (pm->current->state[region] < 0)
133 		pm->current->state[region] = 0;
134 
135 	/* This shouldn't happen unless RAIDOUTSTANDING is set too high. */
136 	KASSERT(pm->current->state[region] < 127);
137 	pm->current->state[region]++;
138 
139 	needs_write = isclr(pm->disk_now->bits, region);
140 
141 	if (needs_write) {
142 		KASSERT(pm->current->state[region] == 1);
143 		rf_paritymap_write_locked(pm);
144 	}
145 
146 	mutex_exit(&pm->lock);
147 }
148 
149 void
150 rf_paritymap_end_region(struct rf_paritymap *pm, unsigned region)
151 {
152 	KASSERT(region < RF_PARITYMAP_NREG);
153 
154 	mutex_enter(&pm->lock);
155 	KASSERT(pm->current->state[region] > 0);
156 	--pm->current->state[region];
157 
158 	if (pm->current->state[region] <= 0) {
159 		pm->current->state[region] = -pm->params.cooldown;
160 		KASSERT(pm->current->state[region] <= 0);
161 		mutex_enter(&pm->lk_flags);
162 		if (!(pm->flags & TICKING)) {
163 			pm->flags |= TICKING;
164 			mutex_exit(&pm->lk_flags);
165 			callout_schedule(&pm->ticker,
166 			    mstohz(pm->params.tickms));
167 		} else
168 			mutex_exit(&pm->lk_flags);
169 	}
170 	mutex_exit(&pm->lock);
171 }
172 
173 /*
174  * Updates the parity map to account for any changes in current activity
175  * and/or an ongoing parity scan, then writes it to disk with appropriate
176  * synchronization.
177  */
178 void
179 rf_paritymap_write(struct rf_paritymap *pm)
180 {
181 	mutex_enter(&pm->lock);
182 	rf_paritymap_write_locked(pm);
183 	mutex_exit(&pm->lock);
184 }
185 
186 /* As above, but to be used when pm->lock is already held. */
187 static void
188 rf_paritymap_write_locked(struct rf_paritymap *pm)
189 {
190 	char w, w0;
191 	int i, j, setting, clearing;
192 
193 	setting = clearing = 0;
194 	for (i = 0; i < RF_PARITYMAP_NBYTE; i++) {
195 		w0 = pm->disk_now->bits[i];
196 		w = pm->disk_boot->bits[i];
197 
198 		for (j = 0; j < NBBY; j++)
199 			if (pm->current->state[i * NBBY + j] != 0)
200 				w |= 1 << j;
201 
202 		if (w & ~w0)
203 			setting = 1;
204 		if (w0 & ~w)
205 			clearing = 1;
206 
207 		pm->disk_now->bits[i] = w;
208 	}
209 	pm->ctrs.ncachesync += setting + clearing;
210 	pm->ctrs.nclearing += clearing;
211 
212 	/*
213 	 * If bits are being set in the parity map, then a sync is
214 	 * required afterwards, so that the regions are marked dirty
215 	 * on disk before any writes to them take place.  If bits are
216 	 * being cleared, then a sync is required before the write, so
217 	 * that any writes to those regions are processed before the
218 	 * region is marked clean.  (Synchronization is somewhat
219 	 * overkill; a write ordering barrier would suffice, but we
220 	 * currently have no way to express that directly.)
221 	 */
222 	if (clearing)
223 		rf_sync_component_caches(pm->raid);
224 	rf_paritymap_kern_write(pm->raid, pm->disk_now);
225 	if (setting)
226 		rf_sync_component_caches(pm->raid);
227 }
228 
229 /* Mark all parity as being in need of rewrite. */
230 void
231 rf_paritymap_invalidate(struct rf_paritymap *pm)
232 {
233 	mutex_enter(&pm->lock);
234 	memset(pm->disk_boot, ~(unsigned char)0,
235 	    sizeof(struct rf_paritymap_ondisk));
236 	mutex_exit(&pm->lock);
237 }
238 
239 /* Mark all parity as being correct. */
240 void
241 rf_paritymap_forceclean(struct rf_paritymap *pm)
242 {
243 	mutex_enter(&pm->lock);
244 	memset(pm->disk_boot, (unsigned char)0,
245 	    sizeof(struct rf_paritymap_ondisk));
246 	mutex_exit(&pm->lock);
247 }
248 
249 /*
250  * The cooldown callout routine just defers its work to a thread; it can't do
251  * the parity map write itself as it would block, and although mutex-induced
252  * blocking is permitted it seems wise to avoid tying up the softint.
253  */
254 static void
255 rf_paritymap_tick(void *arg)
256 {
257 	struct rf_paritymap *pm = arg;
258 
259 	mutex_enter(&pm->lk_flags);
260 	pm->flags |= TICKED;
261 	mutex_exit(&pm->lk_flags);
262 	wakeup(&(pm->raid->iodone)); /* XXX */
263 }
264 
265 /*
266  * This is where the parity cooling work (and rearming the callout if needed)
267  * is done; the raidio thread calls it when woken up, as by the above.
268  */
269 void
270 rf_paritymap_checkwork(struct rf_paritymap *pm)
271 {
272 	int i, zerop, progressp;
273 
274 	mutex_enter(&pm->lk_flags);
275 	if (pm->flags & TICKED) {
276 		zerop = progressp = 0;
277 
278 		pm->flags &= ~TICKED;
279 		mutex_exit(&pm->lk_flags);
280 
281 		mutex_enter(&pm->lock);
282 		for (i = 0; i < RF_PARITYMAP_NREG; i++) {
283 			if (pm->current->state[i] < 0) {
284 				progressp = 1;
285 				pm->current->state[i]++;
286 				if (pm->current->state[i] == 0)
287 					zerop = 1;
288 			}
289 		}
290 
291 		if (progressp)
292 			callout_schedule(&pm->ticker,
293 			    mstohz(pm->params.tickms));
294 		else {
295 			mutex_enter(&pm->lk_flags);
296 			pm->flags &= ~TICKING;
297 			mutex_exit(&pm->lk_flags);
298 		}
299 
300 		if (zerop)
301 			rf_paritymap_write_locked(pm);
302 		mutex_exit(&pm->lock);
303 	} else
304 		mutex_exit(&pm->lk_flags);
305 }
306 
307 /*
308  * Set parity map parameters; used both to alter parameters on the fly and to
309  * establish their initial values.  Note that setting a parameter to 0 means
310  * to leave the previous setting unchanged, and that if this is done for the
311  * initial setting of "regions", then a default value will be computed based
312  * on the RAID component size.
313  */
314 int
315 rf_paritymap_set_params(struct rf_paritymap *pm,
316     const struct rf_pmparams *params, int todisk)
317 {
318 	int cooldown, tickms;
319 	u_int regions;
320 	RF_RowCol_t col;
321 	RF_ComponentLabel_t *clabel;
322 	RF_Raid_t *raidPtr;
323 
324 	cooldown = params->cooldown != 0
325 	    ? params->cooldown : pm->params.cooldown;
326 	tickms = params->tickms != 0
327 	    ? params->tickms : pm->params.tickms;
328 	regions = params->regions != 0
329 	    ? params->regions : pm->params.regions;
330 
331 	if (cooldown < 1 || cooldown > 128) {
332 		printf("raid%d: cooldown %d out of range\n", pm->raid->raidid,
333 		    cooldown);
334 		return (-1);
335 	}
336 	if (tickms < 10) {
337 		printf("raid%d: tick time %dms out of range\n",
338 		    pm->raid->raidid, tickms);
339 		return (-1);
340 	}
341 	if (regions == 0) {
342 		regions = rf_paritymap_nreg(pm->raid);
343 	} else if (regions > RF_PARITYMAP_NREG) {
344 		printf("raid%d: region count %u too large (more than %u)\n",
345 		    pm->raid->raidid, regions, RF_PARITYMAP_NREG);
346 		return (-1);
347 	}
348 
349 	/* XXX any currently warm parity will be used with the new tickms! */
350 	pm->params.cooldown = cooldown;
351 	pm->params.tickms = tickms;
352 	/* Apply the initial region count, but do not change it after that. */
353 	if (pm->params.regions == 0)
354 		pm->params.regions = regions;
355 
356 	/* So that the newly set parameters can be tested: */
357 	pm->ctrs.nwrite = pm->ctrs.ncachesync = pm->ctrs.nclearing = 0;
358 
359 	if (todisk) {
360 		raidPtr = pm->raid;
361 		for (col = 0; col < raidPtr->numCol; col++) {
362 			if (RF_DEAD_DISK(raidPtr->Disks[col].status))
363 				continue;
364 
365 			clabel = raidget_component_label(raidPtr, col);
366 			clabel->parity_map_ntick = cooldown;
367 			clabel->parity_map_tickms = tickms;
368 			clabel->parity_map_regions = regions;
369 
370 			/* Don't touch the disk if it's been spared */
371 			if (clabel->status == rf_ds_spared)
372 				continue;
373 
374 			raidflush_component_label(raidPtr, col);
375 		}
376 
377 		/* handle the spares too... */
378 		for (col = 0; col < raidPtr->numSpare; col++) {
379 			if (raidPtr->Disks[raidPtr->numCol+col].status == rf_ds_used_spare) {
380 				clabel = raidget_component_label(raidPtr, raidPtr->numCol+col);
381 				clabel->parity_map_ntick = cooldown;
382 				clabel->parity_map_tickms = tickms;
383 				clabel->parity_map_regions = regions;
384 				raidflush_component_label(raidPtr, raidPtr->numCol+col);
385 			}
386 		}
387 	}
388 	return 0;
389 }
390 
391 /*
392  * The number of regions may not be as many as can fit into the map, because
393  * when regions are too small, the overhead of setting parity map bits
394  * becomes significant in comparison to the actual I/O, while the
395  * corresponding gains in parity verification time become negligible.  Thus,
396  * a minimum region size (defined above) is imposed.
397  *
398  * Note that, if the number of regions is less than the maximum, then some of
399  * the regions will be "fictional", corresponding to no actual disk; some
400  * parts of the code may process them as normal, but they can not ever be
401  * written to.
402  */
403 static u_int
404 rf_paritymap_nreg(RF_Raid_t *raid)
405 {
406 	daddr_t bytes_per_disk, nreg;
407 
408 	bytes_per_disk = raid->sectorsPerDisk << raid->logBytesPerSector;
409 	nreg = bytes_per_disk / REGION_MINSIZE;
410 	if (nreg > RF_PARITYMAP_NREG)
411 		nreg = RF_PARITYMAP_NREG;
412 
413 	return (u_int)nreg;
414 }
415 
416 /*
417  * Initialize a parity map given specific parameters.  This neither reads nor
418  * writes the parity map config in the component labels; for that, see below.
419  */
420 int
421 rf_paritymap_init(struct rf_paritymap *pm, RF_Raid_t *raid,
422     const struct rf_pmparams *params)
423 {
424 	daddr_t rstripes;
425 	struct rf_pmparams safe;
426 
427 	pm->raid = raid;
428 	pm->params.regions = 0;
429 	if (0 != rf_paritymap_set_params(pm, params, 0)) {
430 		/*
431 		 * If the parameters are out-of-range, then bring the
432 		 * parity map up with something reasonable, so that
433 		 * the admin can at least go and fix it (or ignore it
434 		 * entirely).
435 		 */
436 		safe.cooldown = DFL_COOLDOWN;
437 		safe.tickms = DFL_TICKMS;
438 		safe.regions = 0;
439 
440 		if (0 != rf_paritymap_set_params(pm, &safe, 0))
441 			return (-1);
442 	}
443 
444 	rstripes = howmany(raid->Layout.numStripe, pm->params.regions);
445 	pm->region_size = rstripes * raid->Layout.dataSectorsPerStripe;
446 
447 	callout_init(&pm->ticker, CALLOUT_MPSAFE);
448 	callout_setfunc(&pm->ticker, rf_paritymap_tick, pm);
449 	pm->flags = 0;
450 
451 	pm->disk_boot = kmem_alloc(sizeof(struct rf_paritymap_ondisk),
452 	    KM_SLEEP);
453 	pm->disk_now = kmem_alloc(sizeof(struct rf_paritymap_ondisk),
454 	    KM_SLEEP);
455 	pm->current = kmem_zalloc(sizeof(struct rf_paritymap_current),
456 	    KM_SLEEP);
457 
458 	rf_paritymap_kern_read(pm->raid, pm->disk_boot);
459 	memcpy(pm->disk_now, pm->disk_boot, sizeof(*pm->disk_now));
460 
461 	mutex_init(&pm->lock, MUTEX_DEFAULT, IPL_NONE);
462 	mutex_init(&pm->lk_flags, MUTEX_DEFAULT, IPL_SOFTCLOCK);
463 
464 	return 0;
465 }
466 
467 /*
468  * Destroys a parity map; unless "force" is set, also cleans parity for any
469  * regions which were still in cooldown (but are not dirty on disk).
470  */
471 void
472 rf_paritymap_destroy(struct rf_paritymap *pm, int force)
473 {
474 	int i;
475 
476 	callout_halt(&pm->ticker, NULL); /* XXX stop? halt? */
477 	callout_destroy(&pm->ticker);
478 
479 	if (!force) {
480 		for (i = 0; i < RF_PARITYMAP_NREG; i++) {
481 			/* XXX check for > 0 ? */
482 			if (pm->current->state[i] < 0)
483 				pm->current->state[i] = 0;
484 		}
485 
486 		rf_paritymap_write_locked(pm);
487 	}
488 
489 	mutex_destroy(&pm->lock);
490 	mutex_destroy(&pm->lk_flags);
491 
492 	kmem_free(pm->disk_boot, sizeof(struct rf_paritymap_ondisk));
493 	kmem_free(pm->disk_now, sizeof(struct rf_paritymap_ondisk));
494 	kmem_free(pm->current, sizeof(struct rf_paritymap_current));
495 }
496 
497 /*
498  * Rewrite parity, taking parity map into account; this is the equivalent of
499  * the old rf_RewriteParity, and is likewise to be called from a suitable
500  * thread and shouldn't have multiple copies running in parallel and so on.
501  *
502  * Note that the fictional regions are "cleaned" in one shot, so that very
503  * small RAIDs (useful for testing) will not experience potentially severe
504  * regressions in rewrite time.
505  */
506 int
507 rf_paritymap_rewrite(struct rf_paritymap *pm)
508 {
509 	int i, ret_val = 0;
510 	daddr_t reg_b, reg_e;
511 
512 	/* Process only the actual regions. */
513 	for (i = 0; i < pm->params.regions; i++) {
514 		mutex_enter(&pm->lock);
515 		if (isset(pm->disk_boot->bits, i)) {
516 			mutex_exit(&pm->lock);
517 
518 			reg_b = i * pm->region_size;
519 			reg_e = reg_b + pm->region_size;
520 			if (reg_e > pm->raid->totalSectors)
521 				reg_e = pm->raid->totalSectors;
522 
523 			if (rf_RewriteParityRange(pm->raid, reg_b,
524 			    reg_e - reg_b)) {
525 				ret_val = 1;
526 				if (pm->raid->waitShutdown)
527 					return ret_val;
528 			} else {
529 				mutex_enter(&pm->lock);
530 				clrbit(pm->disk_boot->bits, i);
531 				rf_paritymap_write_locked(pm);
532 				mutex_exit(&pm->lock);
533 			}
534 		} else {
535 			mutex_exit(&pm->lock);
536 		}
537 	}
538 
539 	/* Now, clear the fictional regions, if any. */
540 	rf_paritymap_forceclean(pm);
541 	rf_paritymap_write(pm);
542 
543 	return ret_val;
544 }
545 
546 /*
547  * How to merge the on-disk parity maps when reading them in from the
548  * various components; returns whether they differ.  In the case that
549  * they do differ, sets *dst to the union of *dst and *src.
550  *
551  * In theory, it should be safe to take the intersection (or just pick
552  * a single component arbitrarily), but the paranoid approach costs
553  * little.
554  *
555  * Appropriate locking, if any, is the responsibility of the caller.
556  */
557 int
558 rf_paritymap_merge(struct rf_paritymap_ondisk *dst,
559     struct rf_paritymap_ondisk *src)
560 {
561 	int i, discrep = 0;
562 
563 	for (i = 0; i < RF_PARITYMAP_NBYTE; i++) {
564 		if (dst->bits[i] != src->bits[i])
565 			discrep = 1;
566 		dst->bits[i] |= src->bits[i];
567 	}
568 
569 	return discrep;
570 }
571 
572 /*
573  * Detach a parity map from its RAID.  This is not meant to be applied except
574  * when unconfiguring the RAID after all I/O has been resolved, as otherwise
575  * an out-of-date parity map could be treated as current.
576  */
577 void
578 rf_paritymap_detach(RF_Raid_t *raidPtr)
579 {
580 	if (raidPtr->parity_map == NULL)
581 		return;
582 
583 	simple_lock(&(raidPtr->iodone_lock));
584 	struct rf_paritymap *pm = raidPtr->parity_map;
585 	raidPtr->parity_map = NULL;
586 	simple_unlock(&(raidPtr->iodone_lock));
587 	/* XXXjld is that enough locking?  Or too much? */
588 	rf_paritymap_destroy(pm, 0);
589 	kmem_free(pm, sizeof(*pm));
590 }
591 
592 /*
593  * Is this RAID set ineligible for parity-map use due to not actually
594  * having any parity?  (If so, rf_paritymap_attach is a no-op, but
595  * rf_paritymap_{get,set}_disable will still pointlessly act on the
596  * component labels.)
597  */
598 int
599 rf_paritymap_ineligible(RF_Raid_t *raidPtr)
600 {
601 	return raidPtr->Layout.map->faultsTolerated == 0;
602 }
603 
604 /*
605  * Attach a parity map to a RAID set if appropriate.  Includes
606  * configure-time processing of parity-map fields of component label.
607  */
608 void
609 rf_paritymap_attach(RF_Raid_t *raidPtr, int force)
610 {
611 	RF_RowCol_t col;
612 	int pm_use, pm_zap;
613 	int g_tickms, g_ntick, g_regions;
614 	int good;
615 	RF_ComponentLabel_t *clabel;
616 	u_int flags, regions;
617 	struct rf_pmparams params;
618 
619 	if (rf_paritymap_ineligible(raidPtr)) {
620 		/* There isn't any parity. */
621 		return;
622 	}
623 
624 	pm_use = 1;
625 	pm_zap = 0;
626 	g_tickms = DFL_TICKMS;
627 	g_ntick = DFL_COOLDOWN;
628 	g_regions = 0;
629 
630 	/*
631 	 * Collect opinions on the set config.  If this is the initial
632 	 * config (raidctl -C), treat all labels as invalid, since
633 	 * there may be random data present.
634 	 */
635 	if (!force) {
636 		for (col = 0; col < raidPtr->numCol; col++) {
637 			if (RF_DEAD_DISK(raidPtr->Disks[col].status))
638 				continue;
639 			clabel = raidget_component_label(raidPtr, col);
640 			flags = clabel->parity_map_flags;
641 			/* Check for use by non-parity-map kernel. */
642 			if (clabel->parity_map_modcount
643 			    != clabel->mod_counter) {
644 				flags &= ~RF_PMLABEL_WASUSED;
645 			}
646 
647 			if (flags & RF_PMLABEL_VALID) {
648 				g_tickms = clabel->parity_map_tickms;
649 				g_ntick = clabel->parity_map_ntick;
650 				regions = clabel->parity_map_regions;
651 				if (g_regions == 0)
652 					g_regions = regions;
653 				else if (g_regions != regions) {
654 					pm_zap = 1; /* important! */
655 				}
656 
657 				if (flags & RF_PMLABEL_DISABLE) {
658 					pm_use = 0;
659 				}
660 				if (!(flags & RF_PMLABEL_WASUSED)) {
661 					pm_zap = 1;
662 				}
663 			} else {
664 				pm_zap = 1;
665 			}
666 		}
667 	} else {
668 		pm_zap = 1;
669 	}
670 
671 	/* Finally, create and attach the parity map. */
672 	if (pm_use) {
673 		params.cooldown = g_ntick;
674 		params.tickms = g_tickms;
675 		params.regions = g_regions;
676 
677 		raidPtr->parity_map = kmem_alloc(sizeof(struct rf_paritymap),
678 		    KM_SLEEP);
679 		if (0 != rf_paritymap_init(raidPtr->parity_map, raidPtr,
680 			&params)) {
681 			/* It failed; do without. */
682 			kmem_free(raidPtr->parity_map,
683 			    sizeof(struct rf_paritymap));
684 			raidPtr->parity_map = NULL;
685 			return;
686 		}
687 
688 		if (g_regions == 0)
689 			/* Pick up the autoconfigured region count. */
690 			g_regions = raidPtr->parity_map->params.regions;
691 
692 		if (pm_zap) {
693 			good = raidPtr->parity_good && !force;
694 
695 			if (good)
696 				rf_paritymap_forceclean(raidPtr->parity_map);
697 			else
698 				rf_paritymap_invalidate(raidPtr->parity_map);
699 			/* This needs to be on disk before WASUSED is set. */
700 			rf_paritymap_write(raidPtr->parity_map);
701 		}
702 	}
703 
704 	/* Alter labels in-core to reflect the current view of things. */
705 	for (col = 0; col < raidPtr->numCol; col++) {
706 		if (RF_DEAD_DISK(raidPtr->Disks[col].status))
707 			continue;
708 		clabel = raidget_component_label(raidPtr, col);
709 
710 		if (pm_use)
711 			flags = RF_PMLABEL_VALID | RF_PMLABEL_WASUSED;
712 		else
713 			flags = RF_PMLABEL_VALID | RF_PMLABEL_DISABLE;
714 
715 		clabel->parity_map_flags = flags;
716 		clabel->parity_map_tickms = g_tickms;
717 		clabel->parity_map_ntick = g_ntick;
718 		clabel->parity_map_regions = g_regions;
719 		raidflush_component_label(raidPtr, col);
720 	}
721 	/* Note that we're just in 'attach' here, and there won't
722 	   be any spare disks at this point. */
723 }
724 
725 /*
726  * For initializing the parity-map fields of a component label, both on
727  * initial creation and on reconstruct/copyback/etc.  */
728 void
729 rf_paritymap_init_label(struct rf_paritymap *pm, RF_ComponentLabel_t *clabel)
730 {
731 	if (pm != NULL) {
732 		clabel->parity_map_flags =
733 		    RF_PMLABEL_VALID | RF_PMLABEL_WASUSED;
734 		clabel->parity_map_tickms = pm->params.tickms;
735 		clabel->parity_map_ntick = pm->params.cooldown;
736 		/*
737 		 * XXXjld: If the number of regions is changed on disk, and
738 		 * then a new component is labeled before the next configure,
739 		 * then it will get the old value and they will conflict on
740 		 * the next boot (and the default will be used instead).
741 		 */
742 		clabel->parity_map_regions = pm->params.regions;
743 	} else {
744 		/*
745 		 * XXXjld: if the map is disabled, and all the components are
746 		 * replaced without an intervening unconfigure/reconfigure,
747 		 * then it will become enabled on the next unconfig/reconfig.
748 		 */
749 	}
750 }
751 
752 
753 /* Will the parity map be disabled next time? */
754 int
755 rf_paritymap_get_disable(RF_Raid_t *raidPtr)
756 {
757 	RF_ComponentLabel_t *clabel;
758 	RF_RowCol_t col;
759 	int dis;
760 
761 	dis = 0;
762 	for (col = 0; col < raidPtr->numCol; col++) {
763 		if (RF_DEAD_DISK(raidPtr->Disks[col].status))
764 			continue;
765 		clabel = raidget_component_label(raidPtr, col);
766 		if (clabel->parity_map_flags & RF_PMLABEL_DISABLE)
767 			dis = 1;
768 	}
769         for (col = 0; col < raidPtr->numSpare; col++) {
770 		if (raidPtr->Disks[raidPtr->numCol+col].status != rf_ds_used_spare)
771                         continue;
772                 clabel = raidget_component_label(raidPtr, raidPtr->numCol+col);
773                 if (clabel->parity_map_flags & RF_PMLABEL_DISABLE)
774                         dis = 1;
775         }
776 
777 	return dis;
778 }
779 
780 /* Set whether the parity map will be disabled next time. */
781 void
782 rf_paritymap_set_disable(RF_Raid_t *raidPtr, int dis)
783 {
784 	RF_ComponentLabel_t *clabel;
785 	RF_RowCol_t col;
786 
787 	for (col = 0; col < raidPtr->numCol; col++) {
788 		if (RF_DEAD_DISK(raidPtr->Disks[col].status))
789 			continue;
790 		clabel = raidget_component_label(raidPtr, col);
791 		if (dis)
792 			clabel->parity_map_flags |= RF_PMLABEL_DISABLE;
793 		else
794 			clabel->parity_map_flags &= ~RF_PMLABEL_DISABLE;
795 		raidflush_component_label(raidPtr, col);
796 	}
797 
798 	/* update any used spares as well */
799 	for (col = 0; col < raidPtr->numSpare; col++) {
800 		if (raidPtr->Disks[raidPtr->numCol+col].status != rf_ds_used_spare)
801 			continue;
802 
803 		clabel = raidget_component_label(raidPtr, raidPtr->numCol+col);
804 		if (dis)
805 			clabel->parity_map_flags |= RF_PMLABEL_DISABLE;
806 		else
807 			clabel->parity_map_flags &= ~RF_PMLABEL_DISABLE;
808 		raidflush_component_label(raidPtr, raidPtr->numCol+col);
809 	}
810 }
811