1 /* $NetBSD: rf_netbsdkintf.c,v 1.187 2005/05/29 22:03:09 christos Exp $ */ 2 /*- 3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Greg Oster; Jason R. Thorpe. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the NetBSD 20 * Foundation, Inc. and its contributors. 21 * 4. Neither the name of The NetBSD Foundation nor the names of its 22 * contributors may be used to endorse or promote products derived 23 * from this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38 /* 39 * Copyright (c) 1990, 1993 40 * The Regents of the University of California. All rights reserved. 41 * 42 * This code is derived from software contributed to Berkeley by 43 * the Systems Programming Group of the University of Utah Computer 44 * Science Department. 45 * 46 * Redistribution and use in source and binary forms, with or without 47 * modification, are permitted provided that the following conditions 48 * are met: 49 * 1. Redistributions of source code must retain the above copyright 50 * notice, this list of conditions and the following disclaimer. 51 * 2. Redistributions in binary form must reproduce the above copyright 52 * notice, this list of conditions and the following disclaimer in the 53 * documentation and/or other materials provided with the distribution. 54 * 3. Neither the name of the University nor the names of its contributors 55 * may be used to endorse or promote products derived from this software 56 * without specific prior written permission. 57 * 58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 68 * SUCH DAMAGE. 69 * 70 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 71 * 72 * @(#)cd.c 8.2 (Berkeley) 11/16/93 73 */ 74 75 /* 76 * Copyright (c) 1988 University of Utah. 77 * 78 * This code is derived from software contributed to Berkeley by 79 * the Systems Programming Group of the University of Utah Computer 80 * Science Department. 81 * 82 * Redistribution and use in source and binary forms, with or without 83 * modification, are permitted provided that the following conditions 84 * are met: 85 * 1. Redistributions of source code must retain the above copyright 86 * notice, this list of conditions and the following disclaimer. 87 * 2. Redistributions in binary form must reproduce the above copyright 88 * notice, this list of conditions and the following disclaimer in the 89 * documentation and/or other materials provided with the distribution. 90 * 3. All advertising materials mentioning features or use of this software 91 * must display the following acknowledgement: 92 * This product includes software developed by the University of 93 * California, Berkeley and its contributors. 94 * 4. Neither the name of the University nor the names of its contributors 95 * may be used to endorse or promote products derived from this software 96 * without specific prior written permission. 97 * 98 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 99 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 100 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 101 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 102 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 103 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 104 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 105 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 106 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 107 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 108 * SUCH DAMAGE. 109 * 110 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 111 * 112 * @(#)cd.c 8.2 (Berkeley) 11/16/93 113 */ 114 115 /* 116 * Copyright (c) 1995 Carnegie-Mellon University. 117 * All rights reserved. 118 * 119 * Authors: Mark Holland, Jim Zelenka 120 * 121 * Permission to use, copy, modify and distribute this software and 122 * its documentation is hereby granted, provided that both the copyright 123 * notice and this permission notice appear in all copies of the 124 * software, derivative works or modified versions, and any portions 125 * thereof, and that both notices appear in supporting documentation. 126 * 127 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 128 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 129 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 130 * 131 * Carnegie Mellon requests users of this software to return to 132 * 133 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 134 * School of Computer Science 135 * Carnegie Mellon University 136 * Pittsburgh PA 15213-3890 137 * 138 * any improvements or extensions that they make and grant Carnegie the 139 * rights to redistribute these changes. 140 */ 141 142 /*********************************************************** 143 * 144 * rf_kintf.c -- the kernel interface routines for RAIDframe 145 * 146 ***********************************************************/ 147 148 #include <sys/cdefs.h> 149 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.187 2005/05/29 22:03:09 christos Exp $"); 150 151 #include <sys/param.h> 152 #include <sys/errno.h> 153 #include <sys/pool.h> 154 #include <sys/proc.h> 155 #include <sys/queue.h> 156 #include <sys/disk.h> 157 #include <sys/device.h> 158 #include <sys/stat.h> 159 #include <sys/ioctl.h> 160 #include <sys/fcntl.h> 161 #include <sys/systm.h> 162 #include <sys/namei.h> 163 #include <sys/vnode.h> 164 #include <sys/disklabel.h> 165 #include <sys/conf.h> 166 #include <sys/lock.h> 167 #include <sys/buf.h> 168 #include <sys/bufq.h> 169 #include <sys/user.h> 170 #include <sys/reboot.h> 171 172 #include <dev/raidframe/raidframevar.h> 173 #include <dev/raidframe/raidframeio.h> 174 #include "raid.h" 175 #include "opt_raid_autoconfig.h" 176 #include "rf_raid.h" 177 #include "rf_copyback.h" 178 #include "rf_dag.h" 179 #include "rf_dagflags.h" 180 #include "rf_desc.h" 181 #include "rf_diskqueue.h" 182 #include "rf_etimer.h" 183 #include "rf_general.h" 184 #include "rf_kintf.h" 185 #include "rf_options.h" 186 #include "rf_driver.h" 187 #include "rf_parityscan.h" 188 #include "rf_threadstuff.h" 189 190 #ifdef DEBUG 191 int rf_kdebug_level = 0; 192 #define db1_printf(a) if (rf_kdebug_level > 0) printf a 193 #else /* DEBUG */ 194 #define db1_printf(a) { } 195 #endif /* DEBUG */ 196 197 static RF_Raid_t **raidPtrs; /* global raid device descriptors */ 198 199 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex) 200 201 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a 202 * spare table */ 203 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from 204 * installation process */ 205 206 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures"); 207 208 /* prototypes */ 209 static void KernelWakeupFunc(struct buf *); 210 static void InitBP(struct buf *, struct vnode *, unsigned, 211 dev_t, RF_SectorNum_t, RF_SectorCount_t, caddr_t, void (*) (struct buf *), 212 void *, int, struct proc *); 213 static void raidinit(RF_Raid_t *); 214 215 void raidattach(int); 216 217 dev_type_open(raidopen); 218 dev_type_close(raidclose); 219 dev_type_read(raidread); 220 dev_type_write(raidwrite); 221 dev_type_ioctl(raidioctl); 222 dev_type_strategy(raidstrategy); 223 dev_type_dump(raiddump); 224 dev_type_size(raidsize); 225 226 const struct bdevsw raid_bdevsw = { 227 raidopen, raidclose, raidstrategy, raidioctl, 228 raiddump, raidsize, D_DISK 229 }; 230 231 const struct cdevsw raid_cdevsw = { 232 raidopen, raidclose, raidread, raidwrite, raidioctl, 233 nostop, notty, nopoll, nommap, nokqfilter, D_DISK 234 }; 235 236 /* 237 * Pilfered from ccd.c 238 */ 239 240 struct raidbuf { 241 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */ 242 struct buf *rf_obp; /* ptr. to original I/O buf */ 243 RF_DiskQueueData_t *req;/* the request that this was part of.. */ 244 }; 245 246 /* XXX Not sure if the following should be replacing the raidPtrs above, 247 or if it should be used in conjunction with that... 248 */ 249 250 struct raid_softc { 251 int sc_flags; /* flags */ 252 int sc_cflags; /* configuration flags */ 253 size_t sc_size; /* size of the raid device */ 254 char sc_xname[20]; /* XXX external name */ 255 struct disk sc_dkdev; /* generic disk device info */ 256 struct bufq_state buf_queue; /* used for the device queue */ 257 }; 258 /* sc_flags */ 259 #define RAIDF_INITED 0x01 /* unit has been initialized */ 260 #define RAIDF_WLABEL 0x02 /* label area is writable */ 261 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */ 262 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */ 263 #define RAIDF_LOCKED 0x80 /* unit is locked */ 264 265 #define raidunit(x) DISKUNIT(x) 266 int numraid = 0; 267 268 /* 269 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device. 270 * Be aware that large numbers can allow the driver to consume a lot of 271 * kernel memory, especially on writes, and in degraded mode reads. 272 * 273 * For example: with a stripe width of 64 blocks (32k) and 5 disks, 274 * a single 64K write will typically require 64K for the old data, 275 * 64K for the old parity, and 64K for the new parity, for a total 276 * of 192K (if the parity buffer is not re-used immediately). 277 * Even it if is used immediately, that's still 128K, which when multiplied 278 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data. 279 * 280 * Now in degraded mode, for example, a 64K read on the above setup may 281 * require data reconstruction, which will require *all* of the 4 remaining 282 * disks to participate -- 4 * 32K/disk == 128K again. 283 */ 284 285 #ifndef RAIDOUTSTANDING 286 #define RAIDOUTSTANDING 6 287 #endif 288 289 #define RAIDLABELDEV(dev) \ 290 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART)) 291 292 /* declared here, and made public, for the benefit of KVM stuff.. */ 293 struct raid_softc *raid_softc; 294 295 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *, 296 struct disklabel *); 297 static void raidgetdisklabel(dev_t); 298 static void raidmakedisklabel(struct raid_softc *); 299 300 static int raidlock(struct raid_softc *); 301 static void raidunlock(struct raid_softc *); 302 303 static void rf_markalldirty(RF_Raid_t *); 304 305 struct device *raidrootdev; 306 307 void rf_ReconThread(struct rf_recon_req *); 308 void rf_RewriteParityThread(RF_Raid_t *raidPtr); 309 void rf_CopybackThread(RF_Raid_t *raidPtr); 310 void rf_ReconstructInPlaceThread(struct rf_recon_req *); 311 int rf_autoconfig(struct device *self); 312 void rf_buildroothack(RF_ConfigSet_t *); 313 314 RF_AutoConfig_t *rf_find_raid_components(void); 315 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *); 316 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *); 317 static int rf_reasonable_label(RF_ComponentLabel_t *); 318 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *); 319 int rf_set_autoconfig(RF_Raid_t *, int); 320 int rf_set_rootpartition(RF_Raid_t *, int); 321 void rf_release_all_vps(RF_ConfigSet_t *); 322 void rf_cleanup_config_set(RF_ConfigSet_t *); 323 int rf_have_enough_components(RF_ConfigSet_t *); 324 int rf_auto_config_set(RF_ConfigSet_t *, int *); 325 326 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not 327 allow autoconfig to take place. 328 Note that this is overridden by having 329 RAID_AUTOCONFIG as an option in the 330 kernel config file. */ 331 332 struct RF_Pools_s rf_pools; 333 334 void 335 raidattach(int num) 336 { 337 int raidID; 338 int i, rc; 339 340 #ifdef DEBUG 341 printf("raidattach: Asked for %d units\n", num); 342 #endif 343 344 if (num <= 0) { 345 #ifdef DIAGNOSTIC 346 panic("raidattach: count <= 0"); 347 #endif 348 return; 349 } 350 /* This is where all the initialization stuff gets done. */ 351 352 numraid = num; 353 354 /* Make some space for requested number of units... */ 355 356 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **)); 357 if (raidPtrs == NULL) { 358 panic("raidPtrs is NULL!!"); 359 } 360 361 /* Initialize the component buffer pool. */ 362 rf_pool_init(&rf_pools.cbuf, sizeof(struct raidbuf), 363 "raidpl", num * RAIDOUTSTANDING, 364 2 * num * RAIDOUTSTANDING); 365 366 rf_mutex_init(&rf_sparet_wait_mutex); 367 368 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL; 369 370 for (i = 0; i < num; i++) 371 raidPtrs[i] = NULL; 372 rc = rf_BootRaidframe(); 373 if (rc == 0) 374 printf("Kernelized RAIDframe activated\n"); 375 else 376 panic("Serious error booting RAID!!"); 377 378 /* put together some datastructures like the CCD device does.. This 379 * lets us lock the device and what-not when it gets opened. */ 380 381 raid_softc = (struct raid_softc *) 382 malloc(num * sizeof(struct raid_softc), 383 M_RAIDFRAME, M_NOWAIT); 384 if (raid_softc == NULL) { 385 printf("WARNING: no memory for RAIDframe driver\n"); 386 return; 387 } 388 389 memset(raid_softc, 0, num * sizeof(struct raid_softc)); 390 391 raidrootdev = (struct device *)malloc(num * sizeof(struct device), 392 M_RAIDFRAME, M_NOWAIT); 393 if (raidrootdev == NULL) { 394 panic("No memory for RAIDframe driver!!?!?!"); 395 } 396 397 for (raidID = 0; raidID < num; raidID++) { 398 bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS); 399 400 raidrootdev[raidID].dv_class = DV_DISK; 401 raidrootdev[raidID].dv_cfdata = NULL; 402 raidrootdev[raidID].dv_unit = raidID; 403 raidrootdev[raidID].dv_parent = NULL; 404 raidrootdev[raidID].dv_flags = 0; 405 snprintf(raidrootdev[raidID].dv_xname, 406 sizeof(raidrootdev[raidID].dv_xname), "raid%d", raidID); 407 408 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t), 409 (RF_Raid_t *)); 410 if (raidPtrs[raidID] == NULL) { 411 printf("WARNING: raidPtrs[%d] is NULL\n", raidID); 412 numraid = raidID; 413 return; 414 } 415 } 416 417 #ifdef RAID_AUTOCONFIG 418 raidautoconfig = 1; 419 #endif 420 421 /* 422 * Register a finalizer which will be used to auto-config RAID 423 * sets once all real hardware devices have been found. 424 */ 425 if (config_finalize_register(NULL, rf_autoconfig) != 0) 426 printf("WARNING: unable to register RAIDframe finalizer\n"); 427 } 428 429 int 430 rf_autoconfig(struct device *self) 431 { 432 RF_AutoConfig_t *ac_list; 433 RF_ConfigSet_t *config_sets; 434 435 if (raidautoconfig == 0) 436 return (0); 437 438 /* XXX This code can only be run once. */ 439 raidautoconfig = 0; 440 441 /* 1. locate all RAID components on the system */ 442 #ifdef DEBUG 443 printf("Searching for RAID components...\n"); 444 #endif 445 ac_list = rf_find_raid_components(); 446 447 /* 2. Sort them into their respective sets. */ 448 config_sets = rf_create_auto_sets(ac_list); 449 450 /* 451 * 3. Evaluate each set andconfigure the valid ones. 452 * This gets done in rf_buildroothack(). 453 */ 454 rf_buildroothack(config_sets); 455 456 return (1); 457 } 458 459 void 460 rf_buildroothack(RF_ConfigSet_t *config_sets) 461 { 462 RF_ConfigSet_t *cset; 463 RF_ConfigSet_t *next_cset; 464 int retcode; 465 int raidID; 466 int rootID; 467 int num_root; 468 469 rootID = 0; 470 num_root = 0; 471 cset = config_sets; 472 while(cset != NULL ) { 473 next_cset = cset->next; 474 if (rf_have_enough_components(cset) && 475 cset->ac->clabel->autoconfigure==1) { 476 retcode = rf_auto_config_set(cset,&raidID); 477 if (!retcode) { 478 if (cset->rootable) { 479 rootID = raidID; 480 num_root++; 481 } 482 } else { 483 /* The autoconfig didn't work :( */ 484 #if DEBUG 485 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID); 486 #endif 487 rf_release_all_vps(cset); 488 } 489 } else { 490 /* we're not autoconfiguring this set... 491 release the associated resources */ 492 rf_release_all_vps(cset); 493 } 494 /* cleanup */ 495 rf_cleanup_config_set(cset); 496 cset = next_cset; 497 } 498 499 /* we found something bootable... */ 500 501 if (num_root == 1) { 502 booted_device = &raidrootdev[rootID]; 503 } else if (num_root > 1) { 504 /* we can't guess.. require the user to answer... */ 505 boothowto |= RB_ASKNAME; 506 } 507 } 508 509 510 int 511 raidsize(dev_t dev) 512 { 513 struct raid_softc *rs; 514 struct disklabel *lp; 515 int part, unit, omask, size; 516 517 unit = raidunit(dev); 518 if (unit >= numraid) 519 return (-1); 520 rs = &raid_softc[unit]; 521 522 if ((rs->sc_flags & RAIDF_INITED) == 0) 523 return (-1); 524 525 part = DISKPART(dev); 526 omask = rs->sc_dkdev.dk_openmask & (1 << part); 527 lp = rs->sc_dkdev.dk_label; 528 529 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc)) 530 return (-1); 531 532 if (lp->d_partitions[part].p_fstype != FS_SWAP) 533 size = -1; 534 else 535 size = lp->d_partitions[part].p_size * 536 (lp->d_secsize / DEV_BSIZE); 537 538 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc)) 539 return (-1); 540 541 return (size); 542 543 } 544 545 int 546 raiddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size) 547 { 548 /* Not implemented. */ 549 return ENXIO; 550 } 551 /* ARGSUSED */ 552 int 553 raidopen(dev_t dev, int flags, int fmt, struct proc *p) 554 { 555 int unit = raidunit(dev); 556 struct raid_softc *rs; 557 struct disklabel *lp; 558 int part, pmask; 559 int error = 0; 560 561 if (unit >= numraid) 562 return (ENXIO); 563 rs = &raid_softc[unit]; 564 565 if ((error = raidlock(rs)) != 0) 566 return (error); 567 lp = rs->sc_dkdev.dk_label; 568 569 part = DISKPART(dev); 570 pmask = (1 << part); 571 572 if ((rs->sc_flags & RAIDF_INITED) && 573 (rs->sc_dkdev.dk_openmask == 0)) 574 raidgetdisklabel(dev); 575 576 /* make sure that this partition exists */ 577 578 if (part != RAW_PART) { 579 if (((rs->sc_flags & RAIDF_INITED) == 0) || 580 ((part >= lp->d_npartitions) || 581 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 582 error = ENXIO; 583 raidunlock(rs); 584 return (error); 585 } 586 } 587 /* Prevent this unit from being unconfigured while open. */ 588 switch (fmt) { 589 case S_IFCHR: 590 rs->sc_dkdev.dk_copenmask |= pmask; 591 break; 592 593 case S_IFBLK: 594 rs->sc_dkdev.dk_bopenmask |= pmask; 595 break; 596 } 597 598 if ((rs->sc_dkdev.dk_openmask == 0) && 599 ((rs->sc_flags & RAIDF_INITED) != 0)) { 600 /* First one... mark things as dirty... Note that we *MUST* 601 have done a configure before this. I DO NOT WANT TO BE 602 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED 603 THAT THEY BELONG TOGETHER!!!!! */ 604 /* XXX should check to see if we're only open for reading 605 here... If so, we needn't do this, but then need some 606 other way of keeping track of what's happened.. */ 607 608 rf_markalldirty( raidPtrs[unit] ); 609 } 610 611 612 rs->sc_dkdev.dk_openmask = 613 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 614 615 raidunlock(rs); 616 617 return (error); 618 619 620 } 621 /* ARGSUSED */ 622 int 623 raidclose(dev_t dev, int flags, int fmt, struct proc *p) 624 { 625 int unit = raidunit(dev); 626 struct raid_softc *rs; 627 int error = 0; 628 int part; 629 630 if (unit >= numraid) 631 return (ENXIO); 632 rs = &raid_softc[unit]; 633 634 if ((error = raidlock(rs)) != 0) 635 return (error); 636 637 part = DISKPART(dev); 638 639 /* ...that much closer to allowing unconfiguration... */ 640 switch (fmt) { 641 case S_IFCHR: 642 rs->sc_dkdev.dk_copenmask &= ~(1 << part); 643 break; 644 645 case S_IFBLK: 646 rs->sc_dkdev.dk_bopenmask &= ~(1 << part); 647 break; 648 } 649 rs->sc_dkdev.dk_openmask = 650 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 651 652 if ((rs->sc_dkdev.dk_openmask == 0) && 653 ((rs->sc_flags & RAIDF_INITED) != 0)) { 654 /* Last one... device is not unconfigured yet. 655 Device shutdown has taken care of setting the 656 clean bits if RAIDF_INITED is not set 657 mark things as clean... */ 658 659 rf_update_component_labels(raidPtrs[unit], 660 RF_FINAL_COMPONENT_UPDATE); 661 if (doing_shutdown) { 662 /* last one, and we're going down, so 663 lights out for this RAID set too. */ 664 error = rf_Shutdown(raidPtrs[unit]); 665 666 /* It's no longer initialized... */ 667 rs->sc_flags &= ~RAIDF_INITED; 668 669 /* Detach the disk. */ 670 disk_detach(&rs->sc_dkdev); 671 } 672 } 673 674 raidunlock(rs); 675 return (0); 676 677 } 678 679 void 680 raidstrategy(struct buf *bp) 681 { 682 int s; 683 684 unsigned int raidID = raidunit(bp->b_dev); 685 RF_Raid_t *raidPtr; 686 struct raid_softc *rs = &raid_softc[raidID]; 687 int wlabel; 688 689 if ((rs->sc_flags & RAIDF_INITED) ==0) { 690 bp->b_error = ENXIO; 691 bp->b_flags |= B_ERROR; 692 bp->b_resid = bp->b_bcount; 693 biodone(bp); 694 return; 695 } 696 if (raidID >= numraid || !raidPtrs[raidID]) { 697 bp->b_error = ENODEV; 698 bp->b_flags |= B_ERROR; 699 bp->b_resid = bp->b_bcount; 700 biodone(bp); 701 return; 702 } 703 raidPtr = raidPtrs[raidID]; 704 if (!raidPtr->valid) { 705 bp->b_error = ENODEV; 706 bp->b_flags |= B_ERROR; 707 bp->b_resid = bp->b_bcount; 708 biodone(bp); 709 return; 710 } 711 if (bp->b_bcount == 0) { 712 db1_printf(("b_bcount is zero..\n")); 713 biodone(bp); 714 return; 715 } 716 717 /* 718 * Do bounds checking and adjust transfer. If there's an 719 * error, the bounds check will flag that for us. 720 */ 721 722 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING); 723 if (DISKPART(bp->b_dev) != RAW_PART) 724 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) { 725 db1_printf(("Bounds check failed!!:%d %d\n", 726 (int) bp->b_blkno, (int) wlabel)); 727 biodone(bp); 728 return; 729 } 730 s = splbio(); 731 732 bp->b_resid = 0; 733 734 /* stuff it onto our queue */ 735 BUFQ_PUT(&rs->buf_queue, bp); 736 737 raidstart(raidPtrs[raidID]); 738 739 splx(s); 740 } 741 /* ARGSUSED */ 742 int 743 raidread(dev_t dev, struct uio *uio, int flags) 744 { 745 int unit = raidunit(dev); 746 struct raid_softc *rs; 747 748 if (unit >= numraid) 749 return (ENXIO); 750 rs = &raid_softc[unit]; 751 752 if ((rs->sc_flags & RAIDF_INITED) == 0) 753 return (ENXIO); 754 755 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio)); 756 757 } 758 /* ARGSUSED */ 759 int 760 raidwrite(dev_t dev, struct uio *uio, int flags) 761 { 762 int unit = raidunit(dev); 763 struct raid_softc *rs; 764 765 if (unit >= numraid) 766 return (ENXIO); 767 rs = &raid_softc[unit]; 768 769 if ((rs->sc_flags & RAIDF_INITED) == 0) 770 return (ENXIO); 771 772 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio)); 773 774 } 775 776 int 777 raidioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) 778 { 779 int unit = raidunit(dev); 780 int error = 0; 781 int part, pmask; 782 struct raid_softc *rs; 783 RF_Config_t *k_cfg, *u_cfg; 784 RF_Raid_t *raidPtr; 785 RF_RaidDisk_t *diskPtr; 786 RF_AccTotals_t *totals; 787 RF_DeviceConfig_t *d_cfg, **ucfgp; 788 u_char *specific_buf; 789 int retcode = 0; 790 int column; 791 int raidid; 792 struct rf_recon_req *rrcopy, *rr; 793 RF_ComponentLabel_t *clabel; 794 RF_ComponentLabel_t ci_label; 795 RF_ComponentLabel_t **clabel_ptr; 796 RF_SingleComponent_t *sparePtr,*componentPtr; 797 RF_SingleComponent_t hot_spare; 798 RF_SingleComponent_t component; 799 RF_ProgressInfo_t progressInfo, **progressInfoPtr; 800 int i, j, d; 801 #ifdef __HAVE_OLD_DISKLABEL 802 struct disklabel newlabel; 803 #endif 804 805 if (unit >= numraid) 806 return (ENXIO); 807 rs = &raid_softc[unit]; 808 raidPtr = raidPtrs[unit]; 809 810 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev, 811 (int) DISKPART(dev), (int) unit, (int) cmd)); 812 813 /* Must be open for writes for these commands... */ 814 switch (cmd) { 815 case DIOCSDINFO: 816 case DIOCWDINFO: 817 #ifdef __HAVE_OLD_DISKLABEL 818 case ODIOCWDINFO: 819 case ODIOCSDINFO: 820 #endif 821 case DIOCWLABEL: 822 if ((flag & FWRITE) == 0) 823 return (EBADF); 824 } 825 826 /* Must be initialized for these... */ 827 switch (cmd) { 828 case DIOCGDINFO: 829 case DIOCSDINFO: 830 case DIOCWDINFO: 831 #ifdef __HAVE_OLD_DISKLABEL 832 case ODIOCGDINFO: 833 case ODIOCWDINFO: 834 case ODIOCSDINFO: 835 case ODIOCGDEFLABEL: 836 #endif 837 case DIOCGPART: 838 case DIOCWLABEL: 839 case DIOCGDEFLABEL: 840 case RAIDFRAME_SHUTDOWN: 841 case RAIDFRAME_REWRITEPARITY: 842 case RAIDFRAME_GET_INFO: 843 case RAIDFRAME_RESET_ACCTOTALS: 844 case RAIDFRAME_GET_ACCTOTALS: 845 case RAIDFRAME_KEEP_ACCTOTALS: 846 case RAIDFRAME_GET_SIZE: 847 case RAIDFRAME_FAIL_DISK: 848 case RAIDFRAME_COPYBACK: 849 case RAIDFRAME_CHECK_RECON_STATUS: 850 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 851 case RAIDFRAME_GET_COMPONENT_LABEL: 852 case RAIDFRAME_SET_COMPONENT_LABEL: 853 case RAIDFRAME_ADD_HOT_SPARE: 854 case RAIDFRAME_REMOVE_HOT_SPARE: 855 case RAIDFRAME_INIT_LABELS: 856 case RAIDFRAME_REBUILD_IN_PLACE: 857 case RAIDFRAME_CHECK_PARITY: 858 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 859 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 860 case RAIDFRAME_CHECK_COPYBACK_STATUS: 861 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 862 case RAIDFRAME_SET_AUTOCONFIG: 863 case RAIDFRAME_SET_ROOT: 864 case RAIDFRAME_DELETE_COMPONENT: 865 case RAIDFRAME_INCORPORATE_HOT_SPARE: 866 if ((rs->sc_flags & RAIDF_INITED) == 0) 867 return (ENXIO); 868 } 869 870 switch (cmd) { 871 872 /* configure the system */ 873 case RAIDFRAME_CONFIGURE: 874 875 if (raidPtr->valid) { 876 /* There is a valid RAID set running on this unit! */ 877 printf("raid%d: Device already configured!\n",unit); 878 return(EINVAL); 879 } 880 881 /* copy-in the configuration information */ 882 /* data points to a pointer to the configuration structure */ 883 884 u_cfg = *((RF_Config_t **) data); 885 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *)); 886 if (k_cfg == NULL) { 887 return (ENOMEM); 888 } 889 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t)); 890 if (retcode) { 891 RF_Free(k_cfg, sizeof(RF_Config_t)); 892 db1_printf(("rf_ioctl: retcode=%d copyin.1\n", 893 retcode)); 894 return (retcode); 895 } 896 /* allocate a buffer for the layout-specific data, and copy it 897 * in */ 898 if (k_cfg->layoutSpecificSize) { 899 if (k_cfg->layoutSpecificSize > 10000) { 900 /* sanity check */ 901 RF_Free(k_cfg, sizeof(RF_Config_t)); 902 return (EINVAL); 903 } 904 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize, 905 (u_char *)); 906 if (specific_buf == NULL) { 907 RF_Free(k_cfg, sizeof(RF_Config_t)); 908 return (ENOMEM); 909 } 910 retcode = copyin(k_cfg->layoutSpecific, specific_buf, 911 k_cfg->layoutSpecificSize); 912 if (retcode) { 913 RF_Free(k_cfg, sizeof(RF_Config_t)); 914 RF_Free(specific_buf, 915 k_cfg->layoutSpecificSize); 916 db1_printf(("rf_ioctl: retcode=%d copyin.2\n", 917 retcode)); 918 return (retcode); 919 } 920 } else 921 specific_buf = NULL; 922 k_cfg->layoutSpecific = specific_buf; 923 924 /* should do some kind of sanity check on the configuration. 925 * Store the sum of all the bytes in the last byte? */ 926 927 /* configure the system */ 928 929 /* 930 * Clear the entire RAID descriptor, just to make sure 931 * there is no stale data left in the case of a 932 * reconfiguration 933 */ 934 memset((char *) raidPtr, 0, sizeof(RF_Raid_t)); 935 raidPtr->raidid = unit; 936 937 retcode = rf_Configure(raidPtr, k_cfg, NULL); 938 939 if (retcode == 0) { 940 941 /* allow this many simultaneous IO's to 942 this RAID device */ 943 raidPtr->openings = RAIDOUTSTANDING; 944 945 raidinit(raidPtr); 946 rf_markalldirty(raidPtr); 947 } 948 /* free the buffers. No return code here. */ 949 if (k_cfg->layoutSpecificSize) { 950 RF_Free(specific_buf, k_cfg->layoutSpecificSize); 951 } 952 RF_Free(k_cfg, sizeof(RF_Config_t)); 953 954 return (retcode); 955 956 /* shutdown the system */ 957 case RAIDFRAME_SHUTDOWN: 958 959 if ((error = raidlock(rs)) != 0) 960 return (error); 961 962 /* 963 * If somebody has a partition mounted, we shouldn't 964 * shutdown. 965 */ 966 967 part = DISKPART(dev); 968 pmask = (1 << part); 969 if ((rs->sc_dkdev.dk_openmask & ~pmask) || 970 ((rs->sc_dkdev.dk_bopenmask & pmask) && 971 (rs->sc_dkdev.dk_copenmask & pmask))) { 972 raidunlock(rs); 973 return (EBUSY); 974 } 975 976 retcode = rf_Shutdown(raidPtr); 977 978 /* It's no longer initialized... */ 979 rs->sc_flags &= ~RAIDF_INITED; 980 981 /* Detach the disk. */ 982 disk_detach(&rs->sc_dkdev); 983 984 raidunlock(rs); 985 986 return (retcode); 987 case RAIDFRAME_GET_COMPONENT_LABEL: 988 clabel_ptr = (RF_ComponentLabel_t **) data; 989 /* need to read the component label for the disk indicated 990 by row,column in clabel */ 991 992 /* For practice, let's get it directly fromdisk, rather 993 than from the in-core copy */ 994 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ), 995 (RF_ComponentLabel_t *)); 996 if (clabel == NULL) 997 return (ENOMEM); 998 999 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t)); 1000 1001 retcode = copyin( *clabel_ptr, clabel, 1002 sizeof(RF_ComponentLabel_t)); 1003 1004 if (retcode) { 1005 RF_Free( clabel, sizeof(RF_ComponentLabel_t)); 1006 return(retcode); 1007 } 1008 1009 clabel->row = 0; /* Don't allow looking at anything else.*/ 1010 1011 column = clabel->column; 1012 1013 if ((column < 0) || (column >= raidPtr->numCol + 1014 raidPtr->numSpare)) { 1015 RF_Free( clabel, sizeof(RF_ComponentLabel_t)); 1016 return(EINVAL); 1017 } 1018 1019 raidread_component_label(raidPtr->Disks[column].dev, 1020 raidPtr->raid_cinfo[column].ci_vp, 1021 clabel ); 1022 1023 retcode = copyout(clabel, *clabel_ptr, 1024 sizeof(RF_ComponentLabel_t)); 1025 RF_Free(clabel, sizeof(RF_ComponentLabel_t)); 1026 return (retcode); 1027 1028 case RAIDFRAME_SET_COMPONENT_LABEL: 1029 clabel = (RF_ComponentLabel_t *) data; 1030 1031 /* XXX check the label for valid stuff... */ 1032 /* Note that some things *should not* get modified -- 1033 the user should be re-initing the labels instead of 1034 trying to patch things. 1035 */ 1036 1037 raidid = raidPtr->raidid; 1038 #if DEBUG 1039 printf("raid%d: Got component label:\n", raidid); 1040 printf("raid%d: Version: %d\n", raidid, clabel->version); 1041 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number); 1042 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter); 1043 printf("raid%d: Column: %d\n", raidid, clabel->column); 1044 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns); 1045 printf("raid%d: Clean: %d\n", raidid, clabel->clean); 1046 printf("raid%d: Status: %d\n", raidid, clabel->status); 1047 #endif 1048 clabel->row = 0; 1049 column = clabel->column; 1050 1051 if ((column < 0) || (column >= raidPtr->numCol)) { 1052 return(EINVAL); 1053 } 1054 1055 /* XXX this isn't allowed to do anything for now :-) */ 1056 1057 /* XXX and before it is, we need to fill in the rest 1058 of the fields!?!?!?! */ 1059 #if 0 1060 raidwrite_component_label( 1061 raidPtr->Disks[column].dev, 1062 raidPtr->raid_cinfo[column].ci_vp, 1063 clabel ); 1064 #endif 1065 return (0); 1066 1067 case RAIDFRAME_INIT_LABELS: 1068 clabel = (RF_ComponentLabel_t *) data; 1069 /* 1070 we only want the serial number from 1071 the above. We get all the rest of the information 1072 from the config that was used to create this RAID 1073 set. 1074 */ 1075 1076 raidPtr->serial_number = clabel->serial_number; 1077 1078 raid_init_component_label(raidPtr, &ci_label); 1079 ci_label.serial_number = clabel->serial_number; 1080 ci_label.row = 0; /* we dont' pretend to support more */ 1081 1082 for(column=0;column<raidPtr->numCol;column++) { 1083 diskPtr = &raidPtr->Disks[column]; 1084 if (!RF_DEAD_DISK(diskPtr->status)) { 1085 ci_label.partitionSize = diskPtr->partitionSize; 1086 ci_label.column = column; 1087 raidwrite_component_label( 1088 raidPtr->Disks[column].dev, 1089 raidPtr->raid_cinfo[column].ci_vp, 1090 &ci_label ); 1091 } 1092 } 1093 1094 return (retcode); 1095 case RAIDFRAME_SET_AUTOCONFIG: 1096 d = rf_set_autoconfig(raidPtr, *(int *) data); 1097 printf("raid%d: New autoconfig value is: %d\n", 1098 raidPtr->raidid, d); 1099 *(int *) data = d; 1100 return (retcode); 1101 1102 case RAIDFRAME_SET_ROOT: 1103 d = rf_set_rootpartition(raidPtr, *(int *) data); 1104 printf("raid%d: New rootpartition value is: %d\n", 1105 raidPtr->raidid, d); 1106 *(int *) data = d; 1107 return (retcode); 1108 1109 /* initialize all parity */ 1110 case RAIDFRAME_REWRITEPARITY: 1111 1112 if (raidPtr->Layout.map->faultsTolerated == 0) { 1113 /* Parity for RAID 0 is trivially correct */ 1114 raidPtr->parity_good = RF_RAID_CLEAN; 1115 return(0); 1116 } 1117 1118 if (raidPtr->parity_rewrite_in_progress == 1) { 1119 /* Re-write is already in progress! */ 1120 return(EINVAL); 1121 } 1122 1123 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread, 1124 rf_RewriteParityThread, 1125 raidPtr,"raid_parity"); 1126 return (retcode); 1127 1128 1129 case RAIDFRAME_ADD_HOT_SPARE: 1130 sparePtr = (RF_SingleComponent_t *) data; 1131 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t)); 1132 retcode = rf_add_hot_spare(raidPtr, &hot_spare); 1133 return(retcode); 1134 1135 case RAIDFRAME_REMOVE_HOT_SPARE: 1136 return(retcode); 1137 1138 case RAIDFRAME_DELETE_COMPONENT: 1139 componentPtr = (RF_SingleComponent_t *)data; 1140 memcpy( &component, componentPtr, 1141 sizeof(RF_SingleComponent_t)); 1142 retcode = rf_delete_component(raidPtr, &component); 1143 return(retcode); 1144 1145 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1146 componentPtr = (RF_SingleComponent_t *)data; 1147 memcpy( &component, componentPtr, 1148 sizeof(RF_SingleComponent_t)); 1149 retcode = rf_incorporate_hot_spare(raidPtr, &component); 1150 return(retcode); 1151 1152 case RAIDFRAME_REBUILD_IN_PLACE: 1153 1154 if (raidPtr->Layout.map->faultsTolerated == 0) { 1155 /* Can't do this on a RAID 0!! */ 1156 return(EINVAL); 1157 } 1158 1159 if (raidPtr->recon_in_progress == 1) { 1160 /* a reconstruct is already in progress! */ 1161 return(EINVAL); 1162 } 1163 1164 componentPtr = (RF_SingleComponent_t *) data; 1165 memcpy( &component, componentPtr, 1166 sizeof(RF_SingleComponent_t)); 1167 component.row = 0; /* we don't support any more */ 1168 column = component.column; 1169 1170 if ((column < 0) || (column >= raidPtr->numCol)) { 1171 return(EINVAL); 1172 } 1173 1174 RF_LOCK_MUTEX(raidPtr->mutex); 1175 if ((raidPtr->Disks[column].status == rf_ds_optimal) && 1176 (raidPtr->numFailures > 0)) { 1177 /* XXX 0 above shouldn't be constant!!! */ 1178 /* some component other than this has failed. 1179 Let's not make things worse than they already 1180 are... */ 1181 printf("raid%d: Unable to reconstruct to disk at:\n", 1182 raidPtr->raidid); 1183 printf("raid%d: Col: %d Too many failures.\n", 1184 raidPtr->raidid, column); 1185 RF_UNLOCK_MUTEX(raidPtr->mutex); 1186 return (EINVAL); 1187 } 1188 if (raidPtr->Disks[column].status == 1189 rf_ds_reconstructing) { 1190 printf("raid%d: Unable to reconstruct to disk at:\n", 1191 raidPtr->raidid); 1192 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column); 1193 1194 RF_UNLOCK_MUTEX(raidPtr->mutex); 1195 return (EINVAL); 1196 } 1197 if (raidPtr->Disks[column].status == rf_ds_spared) { 1198 RF_UNLOCK_MUTEX(raidPtr->mutex); 1199 return (EINVAL); 1200 } 1201 RF_UNLOCK_MUTEX(raidPtr->mutex); 1202 1203 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1204 if (rrcopy == NULL) 1205 return(ENOMEM); 1206 1207 rrcopy->raidPtr = (void *) raidPtr; 1208 rrcopy->col = column; 1209 1210 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1211 rf_ReconstructInPlaceThread, 1212 rrcopy,"raid_reconip"); 1213 return(retcode); 1214 1215 case RAIDFRAME_GET_INFO: 1216 if (!raidPtr->valid) 1217 return (ENODEV); 1218 ucfgp = (RF_DeviceConfig_t **) data; 1219 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t), 1220 (RF_DeviceConfig_t *)); 1221 if (d_cfg == NULL) 1222 return (ENOMEM); 1223 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t)); 1224 d_cfg->rows = 1; /* there is only 1 row now */ 1225 d_cfg->cols = raidPtr->numCol; 1226 d_cfg->ndevs = raidPtr->numCol; 1227 if (d_cfg->ndevs >= RF_MAX_DISKS) { 1228 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1229 return (ENOMEM); 1230 } 1231 d_cfg->nspares = raidPtr->numSpare; 1232 if (d_cfg->nspares >= RF_MAX_DISKS) { 1233 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1234 return (ENOMEM); 1235 } 1236 d_cfg->maxqdepth = raidPtr->maxQueueDepth; 1237 d = 0; 1238 for (j = 0; j < d_cfg->cols; j++) { 1239 d_cfg->devs[d] = raidPtr->Disks[j]; 1240 d++; 1241 } 1242 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) { 1243 d_cfg->spares[i] = raidPtr->Disks[j]; 1244 } 1245 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t)); 1246 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1247 1248 return (retcode); 1249 1250 case RAIDFRAME_CHECK_PARITY: 1251 *(int *) data = raidPtr->parity_good; 1252 return (0); 1253 1254 case RAIDFRAME_RESET_ACCTOTALS: 1255 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals)); 1256 return (0); 1257 1258 case RAIDFRAME_GET_ACCTOTALS: 1259 totals = (RF_AccTotals_t *) data; 1260 *totals = raidPtr->acc_totals; 1261 return (0); 1262 1263 case RAIDFRAME_KEEP_ACCTOTALS: 1264 raidPtr->keep_acc_totals = *(int *)data; 1265 return (0); 1266 1267 case RAIDFRAME_GET_SIZE: 1268 *(int *) data = raidPtr->totalSectors; 1269 return (0); 1270 1271 /* fail a disk & optionally start reconstruction */ 1272 case RAIDFRAME_FAIL_DISK: 1273 1274 if (raidPtr->Layout.map->faultsTolerated == 0) { 1275 /* Can't do this on a RAID 0!! */ 1276 return(EINVAL); 1277 } 1278 1279 rr = (struct rf_recon_req *) data; 1280 rr->row = 0; 1281 if (rr->col < 0 || rr->col >= raidPtr->numCol) 1282 return (EINVAL); 1283 1284 1285 RF_LOCK_MUTEX(raidPtr->mutex); 1286 if (raidPtr->status == rf_rs_reconstructing) { 1287 /* you can't fail a disk while we're reconstructing! */ 1288 /* XXX wrong for RAID6 */ 1289 RF_UNLOCK_MUTEX(raidPtr->mutex); 1290 return (EINVAL); 1291 } 1292 if ((raidPtr->Disks[rr->col].status == 1293 rf_ds_optimal) && (raidPtr->numFailures > 0)) { 1294 /* some other component has failed. Let's not make 1295 things worse. XXX wrong for RAID6 */ 1296 RF_UNLOCK_MUTEX(raidPtr->mutex); 1297 return (EINVAL); 1298 } 1299 if (raidPtr->Disks[rr->col].status == rf_ds_spared) { 1300 /* Can't fail a spared disk! */ 1301 RF_UNLOCK_MUTEX(raidPtr->mutex); 1302 return (EINVAL); 1303 } 1304 RF_UNLOCK_MUTEX(raidPtr->mutex); 1305 1306 /* make a copy of the recon request so that we don't rely on 1307 * the user's buffer */ 1308 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1309 if (rrcopy == NULL) 1310 return(ENOMEM); 1311 memcpy(rrcopy, rr, sizeof(*rr)); 1312 rrcopy->raidPtr = (void *) raidPtr; 1313 1314 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1315 rf_ReconThread, 1316 rrcopy,"raid_recon"); 1317 return (0); 1318 1319 /* invoke a copyback operation after recon on whatever disk 1320 * needs it, if any */ 1321 case RAIDFRAME_COPYBACK: 1322 1323 if (raidPtr->Layout.map->faultsTolerated == 0) { 1324 /* This makes no sense on a RAID 0!! */ 1325 return(EINVAL); 1326 } 1327 1328 if (raidPtr->copyback_in_progress == 1) { 1329 /* Copyback is already in progress! */ 1330 return(EINVAL); 1331 } 1332 1333 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread, 1334 rf_CopybackThread, 1335 raidPtr,"raid_copyback"); 1336 return (retcode); 1337 1338 /* return the percentage completion of reconstruction */ 1339 case RAIDFRAME_CHECK_RECON_STATUS: 1340 if (raidPtr->Layout.map->faultsTolerated == 0) { 1341 /* This makes no sense on a RAID 0, so tell the 1342 user it's done. */ 1343 *(int *) data = 100; 1344 return(0); 1345 } 1346 if (raidPtr->status != rf_rs_reconstructing) 1347 *(int *) data = 100; 1348 else { 1349 if (raidPtr->reconControl->numRUsTotal > 0) { 1350 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal); 1351 } else { 1352 *(int *) data = 0; 1353 } 1354 } 1355 return (0); 1356 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1357 progressInfoPtr = (RF_ProgressInfo_t **) data; 1358 if (raidPtr->status != rf_rs_reconstructing) { 1359 progressInfo.remaining = 0; 1360 progressInfo.completed = 100; 1361 progressInfo.total = 100; 1362 } else { 1363 progressInfo.total = 1364 raidPtr->reconControl->numRUsTotal; 1365 progressInfo.completed = 1366 raidPtr->reconControl->numRUsComplete; 1367 progressInfo.remaining = progressInfo.total - 1368 progressInfo.completed; 1369 } 1370 retcode = copyout(&progressInfo, *progressInfoPtr, 1371 sizeof(RF_ProgressInfo_t)); 1372 return (retcode); 1373 1374 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1375 if (raidPtr->Layout.map->faultsTolerated == 0) { 1376 /* This makes no sense on a RAID 0, so tell the 1377 user it's done. */ 1378 *(int *) data = 100; 1379 return(0); 1380 } 1381 if (raidPtr->parity_rewrite_in_progress == 1) { 1382 *(int *) data = 100 * 1383 raidPtr->parity_rewrite_stripes_done / 1384 raidPtr->Layout.numStripe; 1385 } else { 1386 *(int *) data = 100; 1387 } 1388 return (0); 1389 1390 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1391 progressInfoPtr = (RF_ProgressInfo_t **) data; 1392 if (raidPtr->parity_rewrite_in_progress == 1) { 1393 progressInfo.total = raidPtr->Layout.numStripe; 1394 progressInfo.completed = 1395 raidPtr->parity_rewrite_stripes_done; 1396 progressInfo.remaining = progressInfo.total - 1397 progressInfo.completed; 1398 } else { 1399 progressInfo.remaining = 0; 1400 progressInfo.completed = 100; 1401 progressInfo.total = 100; 1402 } 1403 retcode = copyout(&progressInfo, *progressInfoPtr, 1404 sizeof(RF_ProgressInfo_t)); 1405 return (retcode); 1406 1407 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1408 if (raidPtr->Layout.map->faultsTolerated == 0) { 1409 /* This makes no sense on a RAID 0 */ 1410 *(int *) data = 100; 1411 return(0); 1412 } 1413 if (raidPtr->copyback_in_progress == 1) { 1414 *(int *) data = 100 * raidPtr->copyback_stripes_done / 1415 raidPtr->Layout.numStripe; 1416 } else { 1417 *(int *) data = 100; 1418 } 1419 return (0); 1420 1421 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1422 progressInfoPtr = (RF_ProgressInfo_t **) data; 1423 if (raidPtr->copyback_in_progress == 1) { 1424 progressInfo.total = raidPtr->Layout.numStripe; 1425 progressInfo.completed = 1426 raidPtr->copyback_stripes_done; 1427 progressInfo.remaining = progressInfo.total - 1428 progressInfo.completed; 1429 } else { 1430 progressInfo.remaining = 0; 1431 progressInfo.completed = 100; 1432 progressInfo.total = 100; 1433 } 1434 retcode = copyout(&progressInfo, *progressInfoPtr, 1435 sizeof(RF_ProgressInfo_t)); 1436 return (retcode); 1437 1438 /* the sparetable daemon calls this to wait for the kernel to 1439 * need a spare table. this ioctl does not return until a 1440 * spare table is needed. XXX -- calling mpsleep here in the 1441 * ioctl code is almost certainly wrong and evil. -- XXX XXX 1442 * -- I should either compute the spare table in the kernel, 1443 * or have a different -- XXX XXX -- interface (a different 1444 * character device) for delivering the table -- XXX */ 1445 #if 0 1446 case RAIDFRAME_SPARET_WAIT: 1447 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1448 while (!rf_sparet_wait_queue) 1449 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE); 1450 waitreq = rf_sparet_wait_queue; 1451 rf_sparet_wait_queue = rf_sparet_wait_queue->next; 1452 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1453 1454 /* structure assignment */ 1455 *((RF_SparetWait_t *) data) = *waitreq; 1456 1457 RF_Free(waitreq, sizeof(*waitreq)); 1458 return (0); 1459 1460 /* wakes up a process waiting on SPARET_WAIT and puts an error 1461 * code in it that will cause the dameon to exit */ 1462 case RAIDFRAME_ABORT_SPARET_WAIT: 1463 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1464 waitreq->fcol = -1; 1465 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1466 waitreq->next = rf_sparet_wait_queue; 1467 rf_sparet_wait_queue = waitreq; 1468 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1469 wakeup(&rf_sparet_wait_queue); 1470 return (0); 1471 1472 /* used by the spare table daemon to deliver a spare table 1473 * into the kernel */ 1474 case RAIDFRAME_SEND_SPARET: 1475 1476 /* install the spare table */ 1477 retcode = rf_SetSpareTable(raidPtr, *(void **) data); 1478 1479 /* respond to the requestor. the return status of the spare 1480 * table installation is passed in the "fcol" field */ 1481 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1482 waitreq->fcol = retcode; 1483 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1484 waitreq->next = rf_sparet_resp_queue; 1485 rf_sparet_resp_queue = waitreq; 1486 wakeup(&rf_sparet_resp_queue); 1487 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1488 1489 return (retcode); 1490 #endif 1491 1492 default: 1493 break; /* fall through to the os-specific code below */ 1494 1495 } 1496 1497 if (!raidPtr->valid) 1498 return (EINVAL); 1499 1500 /* 1501 * Add support for "regular" device ioctls here. 1502 */ 1503 1504 switch (cmd) { 1505 case DIOCGDINFO: 1506 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label); 1507 break; 1508 #ifdef __HAVE_OLD_DISKLABEL 1509 case ODIOCGDINFO: 1510 newlabel = *(rs->sc_dkdev.dk_label); 1511 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1512 return ENOTTY; 1513 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1514 break; 1515 #endif 1516 1517 case DIOCGPART: 1518 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label; 1519 ((struct partinfo *) data)->part = 1520 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1521 break; 1522 1523 case DIOCWDINFO: 1524 case DIOCSDINFO: 1525 #ifdef __HAVE_OLD_DISKLABEL 1526 case ODIOCWDINFO: 1527 case ODIOCSDINFO: 1528 #endif 1529 { 1530 struct disklabel *lp; 1531 #ifdef __HAVE_OLD_DISKLABEL 1532 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1533 memset(&newlabel, 0, sizeof newlabel); 1534 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1535 lp = &newlabel; 1536 } else 1537 #endif 1538 lp = (struct disklabel *)data; 1539 1540 if ((error = raidlock(rs)) != 0) 1541 return (error); 1542 1543 rs->sc_flags |= RAIDF_LABELLING; 1544 1545 error = setdisklabel(rs->sc_dkdev.dk_label, 1546 lp, 0, rs->sc_dkdev.dk_cpulabel); 1547 if (error == 0) { 1548 if (cmd == DIOCWDINFO 1549 #ifdef __HAVE_OLD_DISKLABEL 1550 || cmd == ODIOCWDINFO 1551 #endif 1552 ) 1553 error = writedisklabel(RAIDLABELDEV(dev), 1554 raidstrategy, rs->sc_dkdev.dk_label, 1555 rs->sc_dkdev.dk_cpulabel); 1556 } 1557 rs->sc_flags &= ~RAIDF_LABELLING; 1558 1559 raidunlock(rs); 1560 1561 if (error) 1562 return (error); 1563 break; 1564 } 1565 1566 case DIOCWLABEL: 1567 if (*(int *) data != 0) 1568 rs->sc_flags |= RAIDF_WLABEL; 1569 else 1570 rs->sc_flags &= ~RAIDF_WLABEL; 1571 break; 1572 1573 case DIOCGDEFLABEL: 1574 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data); 1575 break; 1576 1577 #ifdef __HAVE_OLD_DISKLABEL 1578 case ODIOCGDEFLABEL: 1579 raidgetdefaultlabel(raidPtr, rs, &newlabel); 1580 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1581 return ENOTTY; 1582 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1583 break; 1584 #endif 1585 1586 default: 1587 retcode = ENOTTY; 1588 } 1589 return (retcode); 1590 1591 } 1592 1593 1594 /* raidinit -- complete the rest of the initialization for the 1595 RAIDframe device. */ 1596 1597 1598 static void 1599 raidinit(RF_Raid_t *raidPtr) 1600 { 1601 struct raid_softc *rs; 1602 int unit; 1603 1604 unit = raidPtr->raidid; 1605 1606 rs = &raid_softc[unit]; 1607 1608 /* XXX should check return code first... */ 1609 rs->sc_flags |= RAIDF_INITED; 1610 1611 /* XXX doesn't check bounds. */ 1612 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit); 1613 1614 rs->sc_dkdev.dk_name = rs->sc_xname; 1615 1616 /* disk_attach actually creates space for the CPU disklabel, among 1617 * other things, so it's critical to call this *BEFORE* we try putzing 1618 * with disklabels. */ 1619 1620 disk_attach(&rs->sc_dkdev); 1621 1622 /* XXX There may be a weird interaction here between this, and 1623 * protectedSectors, as used in RAIDframe. */ 1624 1625 rs->sc_size = raidPtr->totalSectors; 1626 } 1627 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 1628 /* wake up the daemon & tell it to get us a spare table 1629 * XXX 1630 * the entries in the queues should be tagged with the raidPtr 1631 * so that in the extremely rare case that two recons happen at once, 1632 * we know for which device were requesting a spare table 1633 * XXX 1634 * 1635 * XXX This code is not currently used. GO 1636 */ 1637 int 1638 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req) 1639 { 1640 int retcode; 1641 1642 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1643 req->next = rf_sparet_wait_queue; 1644 rf_sparet_wait_queue = req; 1645 wakeup(&rf_sparet_wait_queue); 1646 1647 /* mpsleep unlocks the mutex */ 1648 while (!rf_sparet_resp_queue) { 1649 tsleep(&rf_sparet_resp_queue, PRIBIO, 1650 "raidframe getsparetable", 0); 1651 } 1652 req = rf_sparet_resp_queue; 1653 rf_sparet_resp_queue = req->next; 1654 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1655 1656 retcode = req->fcol; 1657 RF_Free(req, sizeof(*req)); /* this is not the same req as we 1658 * alloc'd */ 1659 return (retcode); 1660 } 1661 #endif 1662 1663 /* a wrapper around rf_DoAccess that extracts appropriate info from the 1664 * bp & passes it down. 1665 * any calls originating in the kernel must use non-blocking I/O 1666 * do some extra sanity checking to return "appropriate" error values for 1667 * certain conditions (to make some standard utilities work) 1668 * 1669 * Formerly known as: rf_DoAccessKernel 1670 */ 1671 void 1672 raidstart(RF_Raid_t *raidPtr) 1673 { 1674 RF_SectorCount_t num_blocks, pb, sum; 1675 RF_RaidAddr_t raid_addr; 1676 struct partition *pp; 1677 daddr_t blocknum; 1678 int unit; 1679 struct raid_softc *rs; 1680 int do_async; 1681 struct buf *bp; 1682 int rc; 1683 1684 unit = raidPtr->raidid; 1685 rs = &raid_softc[unit]; 1686 1687 /* quick check to see if anything has died recently */ 1688 RF_LOCK_MUTEX(raidPtr->mutex); 1689 if (raidPtr->numNewFailures > 0) { 1690 RF_UNLOCK_MUTEX(raidPtr->mutex); 1691 rf_update_component_labels(raidPtr, 1692 RF_NORMAL_COMPONENT_UPDATE); 1693 RF_LOCK_MUTEX(raidPtr->mutex); 1694 raidPtr->numNewFailures--; 1695 } 1696 1697 /* Check to see if we're at the limit... */ 1698 while (raidPtr->openings > 0) { 1699 RF_UNLOCK_MUTEX(raidPtr->mutex); 1700 1701 /* get the next item, if any, from the queue */ 1702 if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) { 1703 /* nothing more to do */ 1704 return; 1705 } 1706 1707 /* Ok, for the bp we have here, bp->b_blkno is relative to the 1708 * partition.. Need to make it absolute to the underlying 1709 * device.. */ 1710 1711 blocknum = bp->b_blkno; 1712 if (DISKPART(bp->b_dev) != RAW_PART) { 1713 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; 1714 blocknum += pp->p_offset; 1715 } 1716 1717 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, 1718 (int) blocknum)); 1719 1720 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount)); 1721 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid)); 1722 1723 /* *THIS* is where we adjust what block we're going to... 1724 * but DO NOT TOUCH bp->b_blkno!!! */ 1725 raid_addr = blocknum; 1726 1727 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector; 1728 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0; 1729 sum = raid_addr + num_blocks + pb; 1730 if (1 || rf_debugKernelAccess) { 1731 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n", 1732 (int) raid_addr, (int) sum, (int) num_blocks, 1733 (int) pb, (int) bp->b_resid)); 1734 } 1735 if ((sum > raidPtr->totalSectors) || (sum < raid_addr) 1736 || (sum < num_blocks) || (sum < pb)) { 1737 bp->b_error = ENOSPC; 1738 bp->b_flags |= B_ERROR; 1739 bp->b_resid = bp->b_bcount; 1740 biodone(bp); 1741 RF_LOCK_MUTEX(raidPtr->mutex); 1742 continue; 1743 } 1744 /* 1745 * XXX rf_DoAccess() should do this, not just DoAccessKernel() 1746 */ 1747 1748 if (bp->b_bcount & raidPtr->sectorMask) { 1749 bp->b_error = EINVAL; 1750 bp->b_flags |= B_ERROR; 1751 bp->b_resid = bp->b_bcount; 1752 biodone(bp); 1753 RF_LOCK_MUTEX(raidPtr->mutex); 1754 continue; 1755 1756 } 1757 db1_printf(("Calling DoAccess..\n")); 1758 1759 1760 RF_LOCK_MUTEX(raidPtr->mutex); 1761 raidPtr->openings--; 1762 RF_UNLOCK_MUTEX(raidPtr->mutex); 1763 1764 /* 1765 * Everything is async. 1766 */ 1767 do_async = 1; 1768 1769 disk_busy(&rs->sc_dkdev); 1770 1771 /* XXX we're still at splbio() here... do we *really* 1772 need to be? */ 1773 1774 /* don't ever condition on bp->b_flags & B_WRITE. 1775 * always condition on B_READ instead */ 1776 1777 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ? 1778 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE, 1779 do_async, raid_addr, num_blocks, 1780 bp->b_data, bp, RF_DAG_NONBLOCKING_IO); 1781 1782 if (rc) { 1783 bp->b_error = rc; 1784 bp->b_flags |= B_ERROR; 1785 bp->b_resid = bp->b_bcount; 1786 biodone(bp); 1787 /* continue loop */ 1788 } 1789 1790 RF_LOCK_MUTEX(raidPtr->mutex); 1791 } 1792 RF_UNLOCK_MUTEX(raidPtr->mutex); 1793 } 1794 1795 1796 1797 1798 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */ 1799 1800 int 1801 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req) 1802 { 1803 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE; 1804 struct buf *bp; 1805 struct raidbuf *raidbp = NULL; 1806 1807 req->queue = queue; 1808 1809 #if DIAGNOSTIC 1810 if (queue->raidPtr->raidid >= numraid) { 1811 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid, 1812 numraid); 1813 panic("Invalid Unit number in rf_DispatchKernelIO"); 1814 } 1815 #endif 1816 1817 bp = req->bp; 1818 #if 1 1819 /* XXX when there is a physical disk failure, someone is passing us a 1820 * buffer that contains old stuff!! Attempt to deal with this problem 1821 * without taking a performance hit... (not sure where the real bug 1822 * is. It's buried in RAIDframe somewhere) :-( GO ) */ 1823 1824 if (bp->b_flags & B_ERROR) { 1825 bp->b_flags &= ~B_ERROR; 1826 } 1827 if (bp->b_error != 0) { 1828 bp->b_error = 0; 1829 } 1830 #endif 1831 raidbp = pool_get(&rf_pools.cbuf, PR_NOWAIT); 1832 if (raidbp == NULL) { 1833 bp->b_flags |= B_ERROR; 1834 bp->b_error = ENOMEM; 1835 return (ENOMEM); 1836 } 1837 BUF_INIT(&raidbp->rf_buf); 1838 1839 /* 1840 * context for raidiodone 1841 */ 1842 raidbp->rf_obp = bp; 1843 raidbp->req = req; 1844 1845 BIO_COPYPRIO(&raidbp->rf_buf, bp); 1846 1847 switch (req->type) { 1848 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */ 1849 /* XXX need to do something extra here.. */ 1850 /* I'm leaving this in, as I've never actually seen it used, 1851 * and I'd like folks to report it... GO */ 1852 printf(("WAKEUP CALLED\n")); 1853 queue->numOutstanding++; 1854 1855 /* XXX need to glue the original buffer into this?? */ 1856 1857 KernelWakeupFunc(&raidbp->rf_buf); 1858 break; 1859 1860 case RF_IO_TYPE_READ: 1861 case RF_IO_TYPE_WRITE: 1862 #if RF_ACC_TRACE > 0 1863 if (req->tracerec) { 1864 RF_ETIMER_START(req->tracerec->timer); 1865 } 1866 #endif 1867 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp, 1868 op | bp->b_flags, queue->rf_cinfo->ci_dev, 1869 req->sectorOffset, req->numSector, 1870 req->buf, KernelWakeupFunc, (void *) req, 1871 queue->raidPtr->logBytesPerSector, req->b_proc); 1872 1873 if (rf_debugKernelAccess) { 1874 db1_printf(("dispatch: bp->b_blkno = %ld\n", 1875 (long) bp->b_blkno)); 1876 } 1877 queue->numOutstanding++; 1878 queue->last_deq_sector = req->sectorOffset; 1879 /* acc wouldn't have been let in if there were any pending 1880 * reqs at any other priority */ 1881 queue->curPriority = req->priority; 1882 1883 db1_printf(("Going for %c to unit %d col %d\n", 1884 req->type, queue->raidPtr->raidid, 1885 queue->col)); 1886 db1_printf(("sector %d count %d (%d bytes) %d\n", 1887 (int) req->sectorOffset, (int) req->numSector, 1888 (int) (req->numSector << 1889 queue->raidPtr->logBytesPerSector), 1890 (int) queue->raidPtr->logBytesPerSector)); 1891 if ((raidbp->rf_buf.b_flags & B_READ) == 0) { 1892 raidbp->rf_buf.b_vp->v_numoutput++; 1893 } 1894 VOP_STRATEGY(raidbp->rf_buf.b_vp, &raidbp->rf_buf); 1895 1896 break; 1897 1898 default: 1899 panic("bad req->type in rf_DispatchKernelIO"); 1900 } 1901 db1_printf(("Exiting from DispatchKernelIO\n")); 1902 1903 return (0); 1904 } 1905 /* this is the callback function associated with a I/O invoked from 1906 kernel code. 1907 */ 1908 static void 1909 KernelWakeupFunc(struct buf *vbp) 1910 { 1911 RF_DiskQueueData_t *req = NULL; 1912 RF_DiskQueue_t *queue; 1913 struct raidbuf *raidbp = (struct raidbuf *) vbp; 1914 struct buf *bp; 1915 int s; 1916 1917 s = splbio(); 1918 db1_printf(("recovering the request queue:\n")); 1919 req = raidbp->req; 1920 1921 bp = raidbp->rf_obp; 1922 1923 queue = (RF_DiskQueue_t *) req->queue; 1924 1925 if (raidbp->rf_buf.b_flags & B_ERROR) { 1926 bp->b_flags |= B_ERROR; 1927 bp->b_error = raidbp->rf_buf.b_error ? 1928 raidbp->rf_buf.b_error : EIO; 1929 } 1930 1931 /* XXX methinks this could be wrong... */ 1932 #if 1 1933 bp->b_resid = raidbp->rf_buf.b_resid; 1934 #endif 1935 #if RF_ACC_TRACE > 0 1936 if (req->tracerec) { 1937 RF_ETIMER_STOP(req->tracerec->timer); 1938 RF_ETIMER_EVAL(req->tracerec->timer); 1939 RF_LOCK_MUTEX(rf_tracing_mutex); 1940 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer); 1941 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer); 1942 req->tracerec->num_phys_ios++; 1943 RF_UNLOCK_MUTEX(rf_tracing_mutex); 1944 } 1945 #endif 1946 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */ 1947 1948 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go 1949 * ballistic, and mark the component as hosed... */ 1950 1951 if (bp->b_flags & B_ERROR) { 1952 /* Mark the disk as dead */ 1953 /* but only mark it once... */ 1954 /* and only if it wouldn't leave this RAID set 1955 completely broken */ 1956 if ((queue->raidPtr->Disks[queue->col].status == 1957 rf_ds_optimal) && (queue->raidPtr->numFailures < 1958 queue->raidPtr->Layout.map->faultsTolerated)) { 1959 printf("raid%d: IO Error. Marking %s as failed.\n", 1960 queue->raidPtr->raidid, 1961 queue->raidPtr->Disks[queue->col].devname); 1962 queue->raidPtr->Disks[queue->col].status = 1963 rf_ds_failed; 1964 queue->raidPtr->status = rf_rs_degraded; 1965 queue->raidPtr->numFailures++; 1966 queue->raidPtr->numNewFailures++; 1967 } else { /* Disk is already dead... */ 1968 /* printf("Disk already marked as dead!\n"); */ 1969 } 1970 1971 } 1972 1973 pool_put(&rf_pools.cbuf, raidbp); 1974 1975 /* Fill in the error value */ 1976 1977 req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0; 1978 1979 simple_lock(&queue->raidPtr->iodone_lock); 1980 1981 /* Drop this one on the "finished" queue... */ 1982 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries); 1983 1984 /* Let the raidio thread know there is work to be done. */ 1985 wakeup(&(queue->raidPtr->iodone)); 1986 1987 simple_unlock(&queue->raidPtr->iodone_lock); 1988 1989 splx(s); 1990 } 1991 1992 1993 1994 /* 1995 * initialize a buf structure for doing an I/O in the kernel. 1996 */ 1997 static void 1998 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev, 1999 RF_SectorNum_t startSect, RF_SectorCount_t numSect, caddr_t bf, 2000 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector, 2001 struct proc *b_proc) 2002 { 2003 /* bp->b_flags = B_PHYS | rw_flag; */ 2004 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */ 2005 bp->b_bcount = numSect << logBytesPerSector; 2006 bp->b_bufsize = bp->b_bcount; 2007 bp->b_error = 0; 2008 bp->b_dev = dev; 2009 bp->b_data = bf; 2010 bp->b_blkno = startSect; 2011 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */ 2012 if (bp->b_bcount == 0) { 2013 panic("bp->b_bcount is zero in InitBP!!"); 2014 } 2015 bp->b_proc = b_proc; 2016 bp->b_iodone = cbFunc; 2017 bp->b_vp = b_vp; 2018 2019 } 2020 2021 static void 2022 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs, 2023 struct disklabel *lp) 2024 { 2025 memset(lp, 0, sizeof(*lp)); 2026 2027 /* fabricate a label... */ 2028 lp->d_secperunit = raidPtr->totalSectors; 2029 lp->d_secsize = raidPtr->bytesPerSector; 2030 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe; 2031 lp->d_ntracks = 4 * raidPtr->numCol; 2032 lp->d_ncylinders = raidPtr->totalSectors / 2033 (lp->d_nsectors * lp->d_ntracks); 2034 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 2035 2036 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename)); 2037 lp->d_type = DTYPE_RAID; 2038 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 2039 lp->d_rpm = 3600; 2040 lp->d_interleave = 1; 2041 lp->d_flags = 0; 2042 2043 lp->d_partitions[RAW_PART].p_offset = 0; 2044 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors; 2045 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 2046 lp->d_npartitions = RAW_PART + 1; 2047 2048 lp->d_magic = DISKMAGIC; 2049 lp->d_magic2 = DISKMAGIC; 2050 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label); 2051 2052 } 2053 /* 2054 * Read the disklabel from the raid device. If one is not present, fake one 2055 * up. 2056 */ 2057 static void 2058 raidgetdisklabel(dev_t dev) 2059 { 2060 int unit = raidunit(dev); 2061 struct raid_softc *rs = &raid_softc[unit]; 2062 const char *errstring; 2063 struct disklabel *lp = rs->sc_dkdev.dk_label; 2064 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel; 2065 RF_Raid_t *raidPtr; 2066 2067 db1_printf(("Getting the disklabel...\n")); 2068 2069 memset(clp, 0, sizeof(*clp)); 2070 2071 raidPtr = raidPtrs[unit]; 2072 2073 raidgetdefaultlabel(raidPtr, rs, lp); 2074 2075 /* 2076 * Call the generic disklabel extraction routine. 2077 */ 2078 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy, 2079 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel); 2080 if (errstring) 2081 raidmakedisklabel(rs); 2082 else { 2083 int i; 2084 struct partition *pp; 2085 2086 /* 2087 * Sanity check whether the found disklabel is valid. 2088 * 2089 * This is necessary since total size of the raid device 2090 * may vary when an interleave is changed even though exactly 2091 * same componets are used, and old disklabel may used 2092 * if that is found. 2093 */ 2094 if (lp->d_secperunit != rs->sc_size) 2095 printf("raid%d: WARNING: %s: " 2096 "total sector size in disklabel (%d) != " 2097 "the size of raid (%ld)\n", unit, rs->sc_xname, 2098 lp->d_secperunit, (long) rs->sc_size); 2099 for (i = 0; i < lp->d_npartitions; i++) { 2100 pp = &lp->d_partitions[i]; 2101 if (pp->p_offset + pp->p_size > rs->sc_size) 2102 printf("raid%d: WARNING: %s: end of partition `%c' " 2103 "exceeds the size of raid (%ld)\n", 2104 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size); 2105 } 2106 } 2107 2108 } 2109 /* 2110 * Take care of things one might want to take care of in the event 2111 * that a disklabel isn't present. 2112 */ 2113 static void 2114 raidmakedisklabel(struct raid_softc *rs) 2115 { 2116 struct disklabel *lp = rs->sc_dkdev.dk_label; 2117 db1_printf(("Making a label..\n")); 2118 2119 /* 2120 * For historical reasons, if there's no disklabel present 2121 * the raw partition must be marked FS_BSDFFS. 2122 */ 2123 2124 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 2125 2126 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 2127 2128 lp->d_checksum = dkcksum(lp); 2129 } 2130 /* 2131 * Lookup the provided name in the filesystem. If the file exists, 2132 * is a valid block device, and isn't being used by anyone else, 2133 * set *vpp to the file's vnode. 2134 * You'll find the original of this in ccd.c 2135 */ 2136 int 2137 raidlookup(char *path, struct proc *p, struct vnode **vpp) 2138 { 2139 struct nameidata nd; 2140 struct vnode *vp; 2141 struct vattr va; 2142 int error; 2143 2144 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p); 2145 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) { 2146 return (error); 2147 } 2148 vp = nd.ni_vp; 2149 if (vp->v_usecount > 1) { 2150 VOP_UNLOCK(vp, 0); 2151 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p); 2152 return (EBUSY); 2153 } 2154 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) { 2155 VOP_UNLOCK(vp, 0); 2156 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p); 2157 return (error); 2158 } 2159 /* XXX: eventually we should handle VREG, too. */ 2160 if (va.va_type != VBLK) { 2161 VOP_UNLOCK(vp, 0); 2162 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p); 2163 return (ENOTBLK); 2164 } 2165 VOP_UNLOCK(vp, 0); 2166 *vpp = vp; 2167 return (0); 2168 } 2169 /* 2170 * Wait interruptibly for an exclusive lock. 2171 * 2172 * XXX 2173 * Several drivers do this; it should be abstracted and made MP-safe. 2174 * (Hmm... where have we seen this warning before :-> GO ) 2175 */ 2176 static int 2177 raidlock(struct raid_softc *rs) 2178 { 2179 int error; 2180 2181 while ((rs->sc_flags & RAIDF_LOCKED) != 0) { 2182 rs->sc_flags |= RAIDF_WANTED; 2183 if ((error = 2184 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0) 2185 return (error); 2186 } 2187 rs->sc_flags |= RAIDF_LOCKED; 2188 return (0); 2189 } 2190 /* 2191 * Unlock and wake up any waiters. 2192 */ 2193 static void 2194 raidunlock(struct raid_softc *rs) 2195 { 2196 2197 rs->sc_flags &= ~RAIDF_LOCKED; 2198 if ((rs->sc_flags & RAIDF_WANTED) != 0) { 2199 rs->sc_flags &= ~RAIDF_WANTED; 2200 wakeup(rs); 2201 } 2202 } 2203 2204 2205 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */ 2206 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */ 2207 2208 int 2209 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter) 2210 { 2211 RF_ComponentLabel_t clabel; 2212 raidread_component_label(dev, b_vp, &clabel); 2213 clabel.mod_counter = mod_counter; 2214 clabel.clean = RF_RAID_CLEAN; 2215 raidwrite_component_label(dev, b_vp, &clabel); 2216 return(0); 2217 } 2218 2219 2220 int 2221 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter) 2222 { 2223 RF_ComponentLabel_t clabel; 2224 raidread_component_label(dev, b_vp, &clabel); 2225 clabel.mod_counter = mod_counter; 2226 clabel.clean = RF_RAID_DIRTY; 2227 raidwrite_component_label(dev, b_vp, &clabel); 2228 return(0); 2229 } 2230 2231 /* ARGSUSED */ 2232 int 2233 raidread_component_label(dev_t dev, struct vnode *b_vp, 2234 RF_ComponentLabel_t *clabel) 2235 { 2236 struct buf *bp; 2237 const struct bdevsw *bdev; 2238 int error; 2239 2240 /* XXX should probably ensure that we don't try to do this if 2241 someone has changed rf_protected_sectors. */ 2242 2243 if (b_vp == NULL) { 2244 /* For whatever reason, this component is not valid. 2245 Don't try to read a component label from it. */ 2246 return(EINVAL); 2247 } 2248 2249 /* get a block of the appropriate size... */ 2250 bp = geteblk((int)RF_COMPONENT_INFO_SIZE); 2251 bp->b_dev = dev; 2252 2253 /* get our ducks in a row for the read */ 2254 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE; 2255 bp->b_bcount = RF_COMPONENT_INFO_SIZE; 2256 bp->b_flags |= B_READ; 2257 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE; 2258 2259 bdev = bdevsw_lookup(bp->b_dev); 2260 if (bdev == NULL) 2261 return (ENXIO); 2262 (*bdev->d_strategy)(bp); 2263 2264 error = biowait(bp); 2265 2266 if (!error) { 2267 memcpy(clabel, bp->b_data, 2268 sizeof(RF_ComponentLabel_t)); 2269 } 2270 2271 brelse(bp); 2272 return(error); 2273 } 2274 /* ARGSUSED */ 2275 int 2276 raidwrite_component_label(dev_t dev, struct vnode *b_vp, 2277 RF_ComponentLabel_t *clabel) 2278 { 2279 struct buf *bp; 2280 const struct bdevsw *bdev; 2281 int error; 2282 2283 /* get a block of the appropriate size... */ 2284 bp = geteblk((int)RF_COMPONENT_INFO_SIZE); 2285 bp->b_dev = dev; 2286 2287 /* get our ducks in a row for the write */ 2288 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE; 2289 bp->b_bcount = RF_COMPONENT_INFO_SIZE; 2290 bp->b_flags |= B_WRITE; 2291 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE; 2292 2293 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE ); 2294 2295 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t)); 2296 2297 bdev = bdevsw_lookup(bp->b_dev); 2298 if (bdev == NULL) 2299 return (ENXIO); 2300 (*bdev->d_strategy)(bp); 2301 error = biowait(bp); 2302 brelse(bp); 2303 if (error) { 2304 #if 1 2305 printf("Failed to write RAID component info!\n"); 2306 #endif 2307 } 2308 2309 return(error); 2310 } 2311 2312 void 2313 rf_markalldirty(RF_Raid_t *raidPtr) 2314 { 2315 RF_ComponentLabel_t clabel; 2316 int sparecol; 2317 int c; 2318 int j; 2319 int scol = -1; 2320 2321 raidPtr->mod_counter++; 2322 for (c = 0; c < raidPtr->numCol; c++) { 2323 /* we don't want to touch (at all) a disk that has 2324 failed */ 2325 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) { 2326 raidread_component_label( 2327 raidPtr->Disks[c].dev, 2328 raidPtr->raid_cinfo[c].ci_vp, 2329 &clabel); 2330 if (clabel.status == rf_ds_spared) { 2331 /* XXX do something special... 2332 but whatever you do, don't 2333 try to access it!! */ 2334 } else { 2335 raidmarkdirty( 2336 raidPtr->Disks[c].dev, 2337 raidPtr->raid_cinfo[c].ci_vp, 2338 raidPtr->mod_counter); 2339 } 2340 } 2341 } 2342 2343 for( c = 0; c < raidPtr->numSpare ; c++) { 2344 sparecol = raidPtr->numCol + c; 2345 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2346 /* 2347 2348 we claim this disk is "optimal" if it's 2349 rf_ds_used_spare, as that means it should be 2350 directly substitutable for the disk it replaced. 2351 We note that too... 2352 2353 */ 2354 2355 for(j=0;j<raidPtr->numCol;j++) { 2356 if (raidPtr->Disks[j].spareCol == sparecol) { 2357 scol = j; 2358 break; 2359 } 2360 } 2361 2362 raidread_component_label( 2363 raidPtr->Disks[sparecol].dev, 2364 raidPtr->raid_cinfo[sparecol].ci_vp, 2365 &clabel); 2366 /* make sure status is noted */ 2367 2368 raid_init_component_label(raidPtr, &clabel); 2369 2370 clabel.row = 0; 2371 clabel.column = scol; 2372 /* Note: we *don't* change status from rf_ds_used_spare 2373 to rf_ds_optimal */ 2374 /* clabel.status = rf_ds_optimal; */ 2375 2376 raidmarkdirty(raidPtr->Disks[sparecol].dev, 2377 raidPtr->raid_cinfo[sparecol].ci_vp, 2378 raidPtr->mod_counter); 2379 } 2380 } 2381 } 2382 2383 2384 void 2385 rf_update_component_labels(RF_Raid_t *raidPtr, int final) 2386 { 2387 RF_ComponentLabel_t clabel; 2388 int sparecol; 2389 int c; 2390 int j; 2391 int scol; 2392 2393 scol = -1; 2394 2395 /* XXX should do extra checks to make sure things really are clean, 2396 rather than blindly setting the clean bit... */ 2397 2398 raidPtr->mod_counter++; 2399 2400 for (c = 0; c < raidPtr->numCol; c++) { 2401 if (raidPtr->Disks[c].status == rf_ds_optimal) { 2402 raidread_component_label( 2403 raidPtr->Disks[c].dev, 2404 raidPtr->raid_cinfo[c].ci_vp, 2405 &clabel); 2406 /* make sure status is noted */ 2407 clabel.status = rf_ds_optimal; 2408 /* bump the counter */ 2409 clabel.mod_counter = raidPtr->mod_counter; 2410 2411 raidwrite_component_label( 2412 raidPtr->Disks[c].dev, 2413 raidPtr->raid_cinfo[c].ci_vp, 2414 &clabel); 2415 if (final == RF_FINAL_COMPONENT_UPDATE) { 2416 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2417 raidmarkclean( 2418 raidPtr->Disks[c].dev, 2419 raidPtr->raid_cinfo[c].ci_vp, 2420 raidPtr->mod_counter); 2421 } 2422 } 2423 } 2424 /* else we don't touch it.. */ 2425 } 2426 2427 for( c = 0; c < raidPtr->numSpare ; c++) { 2428 sparecol = raidPtr->numCol + c; 2429 /* Need to ensure that the reconstruct actually completed! */ 2430 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2431 /* 2432 2433 we claim this disk is "optimal" if it's 2434 rf_ds_used_spare, as that means it should be 2435 directly substitutable for the disk it replaced. 2436 We note that too... 2437 2438 */ 2439 2440 for(j=0;j<raidPtr->numCol;j++) { 2441 if (raidPtr->Disks[j].spareCol == sparecol) { 2442 scol = j; 2443 break; 2444 } 2445 } 2446 2447 /* XXX shouldn't *really* need this... */ 2448 raidread_component_label( 2449 raidPtr->Disks[sparecol].dev, 2450 raidPtr->raid_cinfo[sparecol].ci_vp, 2451 &clabel); 2452 /* make sure status is noted */ 2453 2454 raid_init_component_label(raidPtr, &clabel); 2455 2456 clabel.mod_counter = raidPtr->mod_counter; 2457 clabel.column = scol; 2458 clabel.status = rf_ds_optimal; 2459 2460 raidwrite_component_label( 2461 raidPtr->Disks[sparecol].dev, 2462 raidPtr->raid_cinfo[sparecol].ci_vp, 2463 &clabel); 2464 if (final == RF_FINAL_COMPONENT_UPDATE) { 2465 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2466 raidmarkclean( raidPtr->Disks[sparecol].dev, 2467 raidPtr->raid_cinfo[sparecol].ci_vp, 2468 raidPtr->mod_counter); 2469 } 2470 } 2471 } 2472 } 2473 } 2474 2475 void 2476 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured) 2477 { 2478 struct proc *p; 2479 2480 p = raidPtr->engine_thread; 2481 2482 if (vp != NULL) { 2483 if (auto_configured == 1) { 2484 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2485 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0); 2486 vput(vp); 2487 2488 } else { 2489 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p); 2490 } 2491 } 2492 } 2493 2494 2495 void 2496 rf_UnconfigureVnodes(RF_Raid_t *raidPtr) 2497 { 2498 int r,c; 2499 struct vnode *vp; 2500 int acd; 2501 2502 2503 /* We take this opportunity to close the vnodes like we should.. */ 2504 2505 for (c = 0; c < raidPtr->numCol; c++) { 2506 vp = raidPtr->raid_cinfo[c].ci_vp; 2507 acd = raidPtr->Disks[c].auto_configured; 2508 rf_close_component(raidPtr, vp, acd); 2509 raidPtr->raid_cinfo[c].ci_vp = NULL; 2510 raidPtr->Disks[c].auto_configured = 0; 2511 } 2512 2513 for (r = 0; r < raidPtr->numSpare; r++) { 2514 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp; 2515 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured; 2516 rf_close_component(raidPtr, vp, acd); 2517 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL; 2518 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0; 2519 } 2520 } 2521 2522 2523 void 2524 rf_ReconThread(struct rf_recon_req *req) 2525 { 2526 int s; 2527 RF_Raid_t *raidPtr; 2528 2529 s = splbio(); 2530 raidPtr = (RF_Raid_t *) req->raidPtr; 2531 raidPtr->recon_in_progress = 1; 2532 2533 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col, 2534 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0)); 2535 2536 RF_Free(req, sizeof(*req)); 2537 2538 raidPtr->recon_in_progress = 0; 2539 splx(s); 2540 2541 /* That's all... */ 2542 kthread_exit(0); /* does not return */ 2543 } 2544 2545 void 2546 rf_RewriteParityThread(RF_Raid_t *raidPtr) 2547 { 2548 int retcode; 2549 int s; 2550 2551 raidPtr->parity_rewrite_stripes_done = 0; 2552 raidPtr->parity_rewrite_in_progress = 1; 2553 s = splbio(); 2554 retcode = rf_RewriteParity(raidPtr); 2555 splx(s); 2556 if (retcode) { 2557 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid); 2558 } else { 2559 /* set the clean bit! If we shutdown correctly, 2560 the clean bit on each component label will get 2561 set */ 2562 raidPtr->parity_good = RF_RAID_CLEAN; 2563 } 2564 raidPtr->parity_rewrite_in_progress = 0; 2565 2566 /* Anyone waiting for us to stop? If so, inform them... */ 2567 if (raidPtr->waitShutdown) { 2568 wakeup(&raidPtr->parity_rewrite_in_progress); 2569 } 2570 2571 /* That's all... */ 2572 kthread_exit(0); /* does not return */ 2573 } 2574 2575 2576 void 2577 rf_CopybackThread(RF_Raid_t *raidPtr) 2578 { 2579 int s; 2580 2581 raidPtr->copyback_in_progress = 1; 2582 s = splbio(); 2583 rf_CopybackReconstructedData(raidPtr); 2584 splx(s); 2585 raidPtr->copyback_in_progress = 0; 2586 2587 /* That's all... */ 2588 kthread_exit(0); /* does not return */ 2589 } 2590 2591 2592 void 2593 rf_ReconstructInPlaceThread(struct rf_recon_req *req) 2594 { 2595 int s; 2596 RF_Raid_t *raidPtr; 2597 2598 s = splbio(); 2599 raidPtr = req->raidPtr; 2600 raidPtr->recon_in_progress = 1; 2601 rf_ReconstructInPlace(raidPtr, req->col); 2602 RF_Free(req, sizeof(*req)); 2603 raidPtr->recon_in_progress = 0; 2604 splx(s); 2605 2606 /* That's all... */ 2607 kthread_exit(0); /* does not return */ 2608 } 2609 2610 RF_AutoConfig_t * 2611 rf_find_raid_components() 2612 { 2613 struct vnode *vp; 2614 struct disklabel label; 2615 struct device *dv; 2616 dev_t dev; 2617 int bmajor; 2618 int error; 2619 int i; 2620 int good_one; 2621 RF_ComponentLabel_t *clabel; 2622 RF_AutoConfig_t *ac_list; 2623 RF_AutoConfig_t *ac; 2624 2625 2626 /* initialize the AutoConfig list */ 2627 ac_list = NULL; 2628 2629 /* we begin by trolling through *all* the devices on the system */ 2630 2631 for (dv = alldevs.tqh_first; dv != NULL; 2632 dv = dv->dv_list.tqe_next) { 2633 2634 /* we are only interested in disks... */ 2635 if (dv->dv_class != DV_DISK) 2636 continue; 2637 2638 /* we don't care about floppies... */ 2639 if (!strcmp(dv->dv_cfdata->cf_name,"fd")) { 2640 continue; 2641 } 2642 2643 /* we don't care about CD's... */ 2644 if (!strcmp(dv->dv_cfdata->cf_name,"cd")) { 2645 continue; 2646 } 2647 2648 /* hdfd is the Atari/Hades floppy driver */ 2649 if (!strcmp(dv->dv_cfdata->cf_name,"hdfd")) { 2650 continue; 2651 } 2652 /* fdisa is the Atari/Milan floppy driver */ 2653 if (!strcmp(dv->dv_cfdata->cf_name,"fdisa")) { 2654 continue; 2655 } 2656 2657 /* need to find the device_name_to_block_device_major stuff */ 2658 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0); 2659 2660 /* get a vnode for the raw partition of this disk */ 2661 2662 dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART); 2663 if (bdevvp(dev, &vp)) 2664 panic("RAID can't alloc vnode"); 2665 2666 error = VOP_OPEN(vp, FREAD, NOCRED, 0); 2667 2668 if (error) { 2669 /* "Who cares." Continue looking 2670 for something that exists*/ 2671 vput(vp); 2672 continue; 2673 } 2674 2675 /* Ok, the disk exists. Go get the disklabel. */ 2676 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0); 2677 if (error) { 2678 /* 2679 * XXX can't happen - open() would 2680 * have errored out (or faked up one) 2681 */ 2682 if (error != ENOTTY) 2683 printf("RAIDframe: can't get label for dev " 2684 "%s (%d)\n", dv->dv_xname, error); 2685 } 2686 2687 /* don't need this any more. We'll allocate it again 2688 a little later if we really do... */ 2689 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2690 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0); 2691 vput(vp); 2692 2693 if (error) 2694 continue; 2695 2696 for (i=0; i < label.d_npartitions; i++) { 2697 /* We only support partitions marked as RAID */ 2698 if (label.d_partitions[i].p_fstype != FS_RAID) 2699 continue; 2700 2701 dev = MAKEDISKDEV(bmajor, dv->dv_unit, i); 2702 if (bdevvp(dev, &vp)) 2703 panic("RAID can't alloc vnode"); 2704 2705 error = VOP_OPEN(vp, FREAD, NOCRED, 0); 2706 if (error) { 2707 /* Whatever... */ 2708 vput(vp); 2709 continue; 2710 } 2711 2712 good_one = 0; 2713 2714 clabel = (RF_ComponentLabel_t *) 2715 malloc(sizeof(RF_ComponentLabel_t), 2716 M_RAIDFRAME, M_NOWAIT); 2717 if (clabel == NULL) { 2718 /* XXX CLEANUP HERE */ 2719 printf("RAID auto config: out of memory!\n"); 2720 return(NULL); /* XXX probably should panic? */ 2721 } 2722 2723 if (!raidread_component_label(dev, vp, clabel)) { 2724 /* Got the label. Does it look reasonable? */ 2725 if (rf_reasonable_label(clabel) && 2726 (clabel->partitionSize <= 2727 label.d_partitions[i].p_size)) { 2728 #if DEBUG 2729 printf("Component on: %s%c: %d\n", 2730 dv->dv_xname, 'a'+i, 2731 label.d_partitions[i].p_size); 2732 rf_print_component_label(clabel); 2733 #endif 2734 /* if it's reasonable, add it, 2735 else ignore it. */ 2736 ac = (RF_AutoConfig_t *) 2737 malloc(sizeof(RF_AutoConfig_t), 2738 M_RAIDFRAME, 2739 M_NOWAIT); 2740 if (ac == NULL) { 2741 /* XXX should panic?? */ 2742 return(NULL); 2743 } 2744 2745 snprintf(ac->devname, 2746 sizeof(ac->devname), "%s%c", 2747 dv->dv_xname, 'a'+i); 2748 ac->dev = dev; 2749 ac->vp = vp; 2750 ac->clabel = clabel; 2751 ac->next = ac_list; 2752 ac_list = ac; 2753 good_one = 1; 2754 } 2755 } 2756 if (!good_one) { 2757 /* cleanup */ 2758 free(clabel, M_RAIDFRAME); 2759 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2760 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0); 2761 vput(vp); 2762 } 2763 } 2764 } 2765 return(ac_list); 2766 } 2767 2768 static int 2769 rf_reasonable_label(RF_ComponentLabel_t *clabel) 2770 { 2771 2772 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) || 2773 (clabel->version==RF_COMPONENT_LABEL_VERSION)) && 2774 ((clabel->clean == RF_RAID_CLEAN) || 2775 (clabel->clean == RF_RAID_DIRTY)) && 2776 clabel->row >=0 && 2777 clabel->column >= 0 && 2778 clabel->num_rows > 0 && 2779 clabel->num_columns > 0 && 2780 clabel->row < clabel->num_rows && 2781 clabel->column < clabel->num_columns && 2782 clabel->blockSize > 0 && 2783 clabel->numBlocks > 0) { 2784 /* label looks reasonable enough... */ 2785 return(1); 2786 } 2787 return(0); 2788 } 2789 2790 2791 #if DEBUG 2792 void 2793 rf_print_component_label(RF_ComponentLabel_t *clabel) 2794 { 2795 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", 2796 clabel->row, clabel->column, 2797 clabel->num_rows, clabel->num_columns); 2798 printf(" Version: %d Serial Number: %d Mod Counter: %d\n", 2799 clabel->version, clabel->serial_number, 2800 clabel->mod_counter); 2801 printf(" Clean: %s Status: %d\n", 2802 clabel->clean ? "Yes" : "No", clabel->status ); 2803 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n", 2804 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU); 2805 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n", 2806 (char) clabel->parityConfig, clabel->blockSize, 2807 clabel->numBlocks); 2808 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" ); 2809 printf(" Contains root partition: %s\n", 2810 clabel->root_partition ? "Yes" : "No" ); 2811 printf(" Last configured as: raid%d\n", clabel->last_unit ); 2812 #if 0 2813 printf(" Config order: %d\n", clabel->config_order); 2814 #endif 2815 2816 } 2817 #endif 2818 2819 RF_ConfigSet_t * 2820 rf_create_auto_sets(RF_AutoConfig_t *ac_list) 2821 { 2822 RF_AutoConfig_t *ac; 2823 RF_ConfigSet_t *config_sets; 2824 RF_ConfigSet_t *cset; 2825 RF_AutoConfig_t *ac_next; 2826 2827 2828 config_sets = NULL; 2829 2830 /* Go through the AutoConfig list, and figure out which components 2831 belong to what sets. */ 2832 ac = ac_list; 2833 while(ac!=NULL) { 2834 /* we're going to putz with ac->next, so save it here 2835 for use at the end of the loop */ 2836 ac_next = ac->next; 2837 2838 if (config_sets == NULL) { 2839 /* will need at least this one... */ 2840 config_sets = (RF_ConfigSet_t *) 2841 malloc(sizeof(RF_ConfigSet_t), 2842 M_RAIDFRAME, M_NOWAIT); 2843 if (config_sets == NULL) { 2844 panic("rf_create_auto_sets: No memory!"); 2845 } 2846 /* this one is easy :) */ 2847 config_sets->ac = ac; 2848 config_sets->next = NULL; 2849 config_sets->rootable = 0; 2850 ac->next = NULL; 2851 } else { 2852 /* which set does this component fit into? */ 2853 cset = config_sets; 2854 while(cset!=NULL) { 2855 if (rf_does_it_fit(cset, ac)) { 2856 /* looks like it matches... */ 2857 ac->next = cset->ac; 2858 cset->ac = ac; 2859 break; 2860 } 2861 cset = cset->next; 2862 } 2863 if (cset==NULL) { 2864 /* didn't find a match above... new set..*/ 2865 cset = (RF_ConfigSet_t *) 2866 malloc(sizeof(RF_ConfigSet_t), 2867 M_RAIDFRAME, M_NOWAIT); 2868 if (cset == NULL) { 2869 panic("rf_create_auto_sets: No memory!"); 2870 } 2871 cset->ac = ac; 2872 ac->next = NULL; 2873 cset->next = config_sets; 2874 cset->rootable = 0; 2875 config_sets = cset; 2876 } 2877 } 2878 ac = ac_next; 2879 } 2880 2881 2882 return(config_sets); 2883 } 2884 2885 static int 2886 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac) 2887 { 2888 RF_ComponentLabel_t *clabel1, *clabel2; 2889 2890 /* If this one matches the *first* one in the set, that's good 2891 enough, since the other members of the set would have been 2892 through here too... */ 2893 /* note that we are not checking partitionSize here.. 2894 2895 Note that we are also not checking the mod_counters here. 2896 If everything else matches execpt the mod_counter, that's 2897 good enough for this test. We will deal with the mod_counters 2898 a little later in the autoconfiguration process. 2899 2900 (clabel1->mod_counter == clabel2->mod_counter) && 2901 2902 The reason we don't check for this is that failed disks 2903 will have lower modification counts. If those disks are 2904 not added to the set they used to belong to, then they will 2905 form their own set, which may result in 2 different sets, 2906 for example, competing to be configured at raid0, and 2907 perhaps competing to be the root filesystem set. If the 2908 wrong ones get configured, or both attempt to become /, 2909 weird behaviour and or serious lossage will occur. Thus we 2910 need to bring them into the fold here, and kick them out at 2911 a later point. 2912 2913 */ 2914 2915 clabel1 = cset->ac->clabel; 2916 clabel2 = ac->clabel; 2917 if ((clabel1->version == clabel2->version) && 2918 (clabel1->serial_number == clabel2->serial_number) && 2919 (clabel1->num_rows == clabel2->num_rows) && 2920 (clabel1->num_columns == clabel2->num_columns) && 2921 (clabel1->sectPerSU == clabel2->sectPerSU) && 2922 (clabel1->SUsPerPU == clabel2->SUsPerPU) && 2923 (clabel1->SUsPerRU == clabel2->SUsPerRU) && 2924 (clabel1->parityConfig == clabel2->parityConfig) && 2925 (clabel1->maxOutstanding == clabel2->maxOutstanding) && 2926 (clabel1->blockSize == clabel2->blockSize) && 2927 (clabel1->numBlocks == clabel2->numBlocks) && 2928 (clabel1->autoconfigure == clabel2->autoconfigure) && 2929 (clabel1->root_partition == clabel2->root_partition) && 2930 (clabel1->last_unit == clabel2->last_unit) && 2931 (clabel1->config_order == clabel2->config_order)) { 2932 /* if it get's here, it almost *has* to be a match */ 2933 } else { 2934 /* it's not consistent with somebody in the set.. 2935 punt */ 2936 return(0); 2937 } 2938 /* all was fine.. it must fit... */ 2939 return(1); 2940 } 2941 2942 int 2943 rf_have_enough_components(RF_ConfigSet_t *cset) 2944 { 2945 RF_AutoConfig_t *ac; 2946 RF_AutoConfig_t *auto_config; 2947 RF_ComponentLabel_t *clabel; 2948 int c; 2949 int num_cols; 2950 int num_missing; 2951 int mod_counter; 2952 int mod_counter_found; 2953 int even_pair_failed; 2954 char parity_type; 2955 2956 2957 /* check to see that we have enough 'live' components 2958 of this set. If so, we can configure it if necessary */ 2959 2960 num_cols = cset->ac->clabel->num_columns; 2961 parity_type = cset->ac->clabel->parityConfig; 2962 2963 /* XXX Check for duplicate components!?!?!? */ 2964 2965 /* Determine what the mod_counter is supposed to be for this set. */ 2966 2967 mod_counter_found = 0; 2968 mod_counter = 0; 2969 ac = cset->ac; 2970 while(ac!=NULL) { 2971 if (mod_counter_found==0) { 2972 mod_counter = ac->clabel->mod_counter; 2973 mod_counter_found = 1; 2974 } else { 2975 if (ac->clabel->mod_counter > mod_counter) { 2976 mod_counter = ac->clabel->mod_counter; 2977 } 2978 } 2979 ac = ac->next; 2980 } 2981 2982 num_missing = 0; 2983 auto_config = cset->ac; 2984 2985 even_pair_failed = 0; 2986 for(c=0; c<num_cols; c++) { 2987 ac = auto_config; 2988 while(ac!=NULL) { 2989 if ((ac->clabel->column == c) && 2990 (ac->clabel->mod_counter == mod_counter)) { 2991 /* it's this one... */ 2992 #if DEBUG 2993 printf("Found: %s at %d\n", 2994 ac->devname,c); 2995 #endif 2996 break; 2997 } 2998 ac=ac->next; 2999 } 3000 if (ac==NULL) { 3001 /* Didn't find one here! */ 3002 /* special case for RAID 1, especially 3003 where there are more than 2 3004 components (where RAIDframe treats 3005 things a little differently :( ) */ 3006 if (parity_type == '1') { 3007 if (c%2 == 0) { /* even component */ 3008 even_pair_failed = 1; 3009 } else { /* odd component. If 3010 we're failed, and 3011 so is the even 3012 component, it's 3013 "Good Night, Charlie" */ 3014 if (even_pair_failed == 1) { 3015 return(0); 3016 } 3017 } 3018 } else { 3019 /* normal accounting */ 3020 num_missing++; 3021 } 3022 } 3023 if ((parity_type == '1') && (c%2 == 1)) { 3024 /* Just did an even component, and we didn't 3025 bail.. reset the even_pair_failed flag, 3026 and go on to the next component.... */ 3027 even_pair_failed = 0; 3028 } 3029 } 3030 3031 clabel = cset->ac->clabel; 3032 3033 if (((clabel->parityConfig == '0') && (num_missing > 0)) || 3034 ((clabel->parityConfig == '4') && (num_missing > 1)) || 3035 ((clabel->parityConfig == '5') && (num_missing > 1))) { 3036 /* XXX this needs to be made *much* more general */ 3037 /* Too many failures */ 3038 return(0); 3039 } 3040 /* otherwise, all is well, and we've got enough to take a kick 3041 at autoconfiguring this set */ 3042 return(1); 3043 } 3044 3045 void 3046 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config, 3047 RF_Raid_t *raidPtr) 3048 { 3049 RF_ComponentLabel_t *clabel; 3050 int i; 3051 3052 clabel = ac->clabel; 3053 3054 /* 1. Fill in the common stuff */ 3055 config->numRow = clabel->num_rows = 1; 3056 config->numCol = clabel->num_columns; 3057 config->numSpare = 0; /* XXX should this be set here? */ 3058 config->sectPerSU = clabel->sectPerSU; 3059 config->SUsPerPU = clabel->SUsPerPU; 3060 config->SUsPerRU = clabel->SUsPerRU; 3061 config->parityConfig = clabel->parityConfig; 3062 /* XXX... */ 3063 strcpy(config->diskQueueType,"fifo"); 3064 config->maxOutstandingDiskReqs = clabel->maxOutstanding; 3065 config->layoutSpecificSize = 0; /* XXX ?? */ 3066 3067 while(ac!=NULL) { 3068 /* row/col values will be in range due to the checks 3069 in reasonable_label() */ 3070 strcpy(config->devnames[0][ac->clabel->column], 3071 ac->devname); 3072 ac = ac->next; 3073 } 3074 3075 for(i=0;i<RF_MAXDBGV;i++) { 3076 config->debugVars[i][0] = 0; 3077 } 3078 } 3079 3080 int 3081 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value) 3082 { 3083 RF_ComponentLabel_t clabel; 3084 struct vnode *vp; 3085 dev_t dev; 3086 int column; 3087 int sparecol; 3088 3089 raidPtr->autoconfigure = new_value; 3090 3091 for(column=0; column<raidPtr->numCol; column++) { 3092 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3093 dev = raidPtr->Disks[column].dev; 3094 vp = raidPtr->raid_cinfo[column].ci_vp; 3095 raidread_component_label(dev, vp, &clabel); 3096 clabel.autoconfigure = new_value; 3097 raidwrite_component_label(dev, vp, &clabel); 3098 } 3099 } 3100 for(column = 0; column < raidPtr->numSpare ; column++) { 3101 sparecol = raidPtr->numCol + column; 3102 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3103 dev = raidPtr->Disks[sparecol].dev; 3104 vp = raidPtr->raid_cinfo[sparecol].ci_vp; 3105 raidread_component_label(dev, vp, &clabel); 3106 clabel.autoconfigure = new_value; 3107 raidwrite_component_label(dev, vp, &clabel); 3108 } 3109 } 3110 return(new_value); 3111 } 3112 3113 int 3114 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value) 3115 { 3116 RF_ComponentLabel_t clabel; 3117 struct vnode *vp; 3118 dev_t dev; 3119 int column; 3120 int sparecol; 3121 3122 raidPtr->root_partition = new_value; 3123 for(column=0; column<raidPtr->numCol; column++) { 3124 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3125 dev = raidPtr->Disks[column].dev; 3126 vp = raidPtr->raid_cinfo[column].ci_vp; 3127 raidread_component_label(dev, vp, &clabel); 3128 clabel.root_partition = new_value; 3129 raidwrite_component_label(dev, vp, &clabel); 3130 } 3131 } 3132 for(column = 0; column < raidPtr->numSpare ; column++) { 3133 sparecol = raidPtr->numCol + column; 3134 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3135 dev = raidPtr->Disks[sparecol].dev; 3136 vp = raidPtr->raid_cinfo[sparecol].ci_vp; 3137 raidread_component_label(dev, vp, &clabel); 3138 clabel.root_partition = new_value; 3139 raidwrite_component_label(dev, vp, &clabel); 3140 } 3141 } 3142 return(new_value); 3143 } 3144 3145 void 3146 rf_release_all_vps(RF_ConfigSet_t *cset) 3147 { 3148 RF_AutoConfig_t *ac; 3149 3150 ac = cset->ac; 3151 while(ac!=NULL) { 3152 /* Close the vp, and give it back */ 3153 if (ac->vp) { 3154 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY); 3155 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0); 3156 vput(ac->vp); 3157 ac->vp = NULL; 3158 } 3159 ac = ac->next; 3160 } 3161 } 3162 3163 3164 void 3165 rf_cleanup_config_set(RF_ConfigSet_t *cset) 3166 { 3167 RF_AutoConfig_t *ac; 3168 RF_AutoConfig_t *next_ac; 3169 3170 ac = cset->ac; 3171 while(ac!=NULL) { 3172 next_ac = ac->next; 3173 /* nuke the label */ 3174 free(ac->clabel, M_RAIDFRAME); 3175 /* cleanup the config structure */ 3176 free(ac, M_RAIDFRAME); 3177 /* "next.." */ 3178 ac = next_ac; 3179 } 3180 /* and, finally, nuke the config set */ 3181 free(cset, M_RAIDFRAME); 3182 } 3183 3184 3185 void 3186 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel) 3187 { 3188 /* current version number */ 3189 clabel->version = RF_COMPONENT_LABEL_VERSION; 3190 clabel->serial_number = raidPtr->serial_number; 3191 clabel->mod_counter = raidPtr->mod_counter; 3192 clabel->num_rows = 1; 3193 clabel->num_columns = raidPtr->numCol; 3194 clabel->clean = RF_RAID_DIRTY; /* not clean */ 3195 clabel->status = rf_ds_optimal; /* "It's good!" */ 3196 3197 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; 3198 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU; 3199 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU; 3200 3201 clabel->blockSize = raidPtr->bytesPerSector; 3202 clabel->numBlocks = raidPtr->sectorsPerDisk; 3203 3204 /* XXX not portable */ 3205 clabel->parityConfig = raidPtr->Layout.map->parityConfig; 3206 clabel->maxOutstanding = raidPtr->maxOutstanding; 3207 clabel->autoconfigure = raidPtr->autoconfigure; 3208 clabel->root_partition = raidPtr->root_partition; 3209 clabel->last_unit = raidPtr->raidid; 3210 clabel->config_order = raidPtr->config_order; 3211 } 3212 3213 int 3214 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit) 3215 { 3216 RF_Raid_t *raidPtr; 3217 RF_Config_t *config; 3218 int raidID; 3219 int retcode; 3220 3221 #if DEBUG 3222 printf("RAID autoconfigure\n"); 3223 #endif 3224 3225 retcode = 0; 3226 *unit = -1; 3227 3228 /* 1. Create a config structure */ 3229 3230 config = (RF_Config_t *)malloc(sizeof(RF_Config_t), 3231 M_RAIDFRAME, 3232 M_NOWAIT); 3233 if (config==NULL) { 3234 printf("Out of mem!?!?\n"); 3235 /* XXX do something more intelligent here. */ 3236 return(1); 3237 } 3238 3239 memset(config, 0, sizeof(RF_Config_t)); 3240 3241 /* 3242 2. Figure out what RAID ID this one is supposed to live at 3243 See if we can get the same RAID dev that it was configured 3244 on last time.. 3245 */ 3246 3247 raidID = cset->ac->clabel->last_unit; 3248 if ((raidID < 0) || (raidID >= numraid)) { 3249 /* let's not wander off into lala land. */ 3250 raidID = numraid - 1; 3251 } 3252 if (raidPtrs[raidID]->valid != 0) { 3253 3254 /* 3255 Nope... Go looking for an alternative... 3256 Start high so we don't immediately use raid0 if that's 3257 not taken. 3258 */ 3259 3260 for(raidID = numraid - 1; raidID >= 0; raidID--) { 3261 if (raidPtrs[raidID]->valid == 0) { 3262 /* can use this one! */ 3263 break; 3264 } 3265 } 3266 } 3267 3268 if (raidID < 0) { 3269 /* punt... */ 3270 printf("Unable to auto configure this set!\n"); 3271 printf("(Out of RAID devs!)\n"); 3272 return(1); 3273 } 3274 3275 #if DEBUG 3276 printf("Configuring raid%d:\n",raidID); 3277 #endif 3278 3279 raidPtr = raidPtrs[raidID]; 3280 3281 /* XXX all this stuff should be done SOMEWHERE ELSE! */ 3282 raidPtr->raidid = raidID; 3283 raidPtr->openings = RAIDOUTSTANDING; 3284 3285 /* 3. Build the configuration structure */ 3286 rf_create_configuration(cset->ac, config, raidPtr); 3287 3288 /* 4. Do the configuration */ 3289 retcode = rf_Configure(raidPtr, config, cset->ac); 3290 3291 if (retcode == 0) { 3292 3293 raidinit(raidPtrs[raidID]); 3294 3295 rf_markalldirty(raidPtrs[raidID]); 3296 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */ 3297 if (cset->ac->clabel->root_partition==1) { 3298 /* everything configured just fine. Make a note 3299 that this set is eligible to be root. */ 3300 cset->rootable = 1; 3301 /* XXX do this here? */ 3302 raidPtrs[raidID]->root_partition = 1; 3303 } 3304 } 3305 3306 /* 5. Cleanup */ 3307 free(config, M_RAIDFRAME); 3308 3309 *unit = raidID; 3310 return(retcode); 3311 } 3312 3313 void 3314 rf_disk_unbusy(RF_RaidAccessDesc_t *desc) 3315 { 3316 struct buf *bp; 3317 3318 bp = (struct buf *)desc->bp; 3319 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev, 3320 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ)); 3321 } 3322 3323 void 3324 rf_pool_init(struct pool *p, size_t size, const char *w_chan, 3325 size_t xmin, size_t xmax) 3326 { 3327 pool_init(p, size, 0, 0, 0, w_chan, NULL); 3328 pool_sethiwat(p, xmax); 3329 pool_prime(p, xmin); 3330 pool_setlowat(p, xmin); 3331 } 3332