1 /* $NetBSD: rf_netbsdkintf.c,v 1.199 2006/01/08 22:26:30 oster Exp $ */ 2 /*- 3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Greg Oster; Jason R. Thorpe. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the NetBSD 20 * Foundation, Inc. and its contributors. 21 * 4. Neither the name of The NetBSD Foundation nor the names of its 22 * contributors may be used to endorse or promote products derived 23 * from this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38 /* 39 * Copyright (c) 1990, 1993 40 * The Regents of the University of California. All rights reserved. 41 * 42 * This code is derived from software contributed to Berkeley by 43 * the Systems Programming Group of the University of Utah Computer 44 * Science Department. 45 * 46 * Redistribution and use in source and binary forms, with or without 47 * modification, are permitted provided that the following conditions 48 * are met: 49 * 1. Redistributions of source code must retain the above copyright 50 * notice, this list of conditions and the following disclaimer. 51 * 2. Redistributions in binary form must reproduce the above copyright 52 * notice, this list of conditions and the following disclaimer in the 53 * documentation and/or other materials provided with the distribution. 54 * 3. Neither the name of the University nor the names of its contributors 55 * may be used to endorse or promote products derived from this software 56 * without specific prior written permission. 57 * 58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 68 * SUCH DAMAGE. 69 * 70 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 71 * 72 * @(#)cd.c 8.2 (Berkeley) 11/16/93 73 */ 74 75 /* 76 * Copyright (c) 1988 University of Utah. 77 * 78 * This code is derived from software contributed to Berkeley by 79 * the Systems Programming Group of the University of Utah Computer 80 * Science Department. 81 * 82 * Redistribution and use in source and binary forms, with or without 83 * modification, are permitted provided that the following conditions 84 * are met: 85 * 1. Redistributions of source code must retain the above copyright 86 * notice, this list of conditions and the following disclaimer. 87 * 2. Redistributions in binary form must reproduce the above copyright 88 * notice, this list of conditions and the following disclaimer in the 89 * documentation and/or other materials provided with the distribution. 90 * 3. All advertising materials mentioning features or use of this software 91 * must display the following acknowledgement: 92 * This product includes software developed by the University of 93 * California, Berkeley and its contributors. 94 * 4. Neither the name of the University nor the names of its contributors 95 * may be used to endorse or promote products derived from this software 96 * without specific prior written permission. 97 * 98 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 99 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 100 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 101 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 102 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 103 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 104 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 105 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 106 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 107 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 108 * SUCH DAMAGE. 109 * 110 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 111 * 112 * @(#)cd.c 8.2 (Berkeley) 11/16/93 113 */ 114 115 /* 116 * Copyright (c) 1995 Carnegie-Mellon University. 117 * All rights reserved. 118 * 119 * Authors: Mark Holland, Jim Zelenka 120 * 121 * Permission to use, copy, modify and distribute this software and 122 * its documentation is hereby granted, provided that both the copyright 123 * notice and this permission notice appear in all copies of the 124 * software, derivative works or modified versions, and any portions 125 * thereof, and that both notices appear in supporting documentation. 126 * 127 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 128 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 129 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 130 * 131 * Carnegie Mellon requests users of this software to return to 132 * 133 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 134 * School of Computer Science 135 * Carnegie Mellon University 136 * Pittsburgh PA 15213-3890 137 * 138 * any improvements or extensions that they make and grant Carnegie the 139 * rights to redistribute these changes. 140 */ 141 142 /*********************************************************** 143 * 144 * rf_kintf.c -- the kernel interface routines for RAIDframe 145 * 146 ***********************************************************/ 147 148 #include <sys/cdefs.h> 149 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.199 2006/01/08 22:26:30 oster Exp $"); 150 151 #include <sys/param.h> 152 #include <sys/errno.h> 153 #include <sys/pool.h> 154 #include <sys/proc.h> 155 #include <sys/queue.h> 156 #include <sys/disk.h> 157 #include <sys/device.h> 158 #include <sys/stat.h> 159 #include <sys/ioctl.h> 160 #include <sys/fcntl.h> 161 #include <sys/systm.h> 162 #include <sys/namei.h> 163 #include <sys/vnode.h> 164 #include <sys/disklabel.h> 165 #include <sys/conf.h> 166 #include <sys/lock.h> 167 #include <sys/buf.h> 168 #include <sys/bufq.h> 169 #include <sys/user.h> 170 #include <sys/reboot.h> 171 172 #include <dev/raidframe/raidframevar.h> 173 #include <dev/raidframe/raidframeio.h> 174 #include "raid.h" 175 #include "opt_raid_autoconfig.h" 176 #include "rf_raid.h" 177 #include "rf_copyback.h" 178 #include "rf_dag.h" 179 #include "rf_dagflags.h" 180 #include "rf_desc.h" 181 #include "rf_diskqueue.h" 182 #include "rf_etimer.h" 183 #include "rf_general.h" 184 #include "rf_kintf.h" 185 #include "rf_options.h" 186 #include "rf_driver.h" 187 #include "rf_parityscan.h" 188 #include "rf_threadstuff.h" 189 190 #ifdef DEBUG 191 int rf_kdebug_level = 0; 192 #define db1_printf(a) if (rf_kdebug_level > 0) printf a 193 #else /* DEBUG */ 194 #define db1_printf(a) { } 195 #endif /* DEBUG */ 196 197 static RF_Raid_t **raidPtrs; /* global raid device descriptors */ 198 199 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex) 200 201 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a 202 * spare table */ 203 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from 204 * installation process */ 205 206 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures"); 207 208 /* prototypes */ 209 static void KernelWakeupFunc(struct buf *); 210 static void InitBP(struct buf *, struct vnode *, unsigned, 211 dev_t, RF_SectorNum_t, RF_SectorCount_t, caddr_t, void (*) (struct buf *), 212 void *, int, struct proc *); 213 static void raidinit(RF_Raid_t *); 214 215 void raidattach(int); 216 217 dev_type_open(raidopen); 218 dev_type_close(raidclose); 219 dev_type_read(raidread); 220 dev_type_write(raidwrite); 221 dev_type_ioctl(raidioctl); 222 dev_type_strategy(raidstrategy); 223 dev_type_dump(raiddump); 224 dev_type_size(raidsize); 225 226 const struct bdevsw raid_bdevsw = { 227 raidopen, raidclose, raidstrategy, raidioctl, 228 raiddump, raidsize, D_DISK 229 }; 230 231 const struct cdevsw raid_cdevsw = { 232 raidopen, raidclose, raidread, raidwrite, raidioctl, 233 nostop, notty, nopoll, nommap, nokqfilter, D_DISK 234 }; 235 236 /* XXX Not sure if the following should be replacing the raidPtrs above, 237 or if it should be used in conjunction with that... 238 */ 239 240 struct raid_softc { 241 int sc_flags; /* flags */ 242 int sc_cflags; /* configuration flags */ 243 size_t sc_size; /* size of the raid device */ 244 char sc_xname[20]; /* XXX external name */ 245 struct disk sc_dkdev; /* generic disk device info */ 246 struct bufq_state *buf_queue; /* used for the device queue */ 247 }; 248 /* sc_flags */ 249 #define RAIDF_INITED 0x01 /* unit has been initialized */ 250 #define RAIDF_WLABEL 0x02 /* label area is writable */ 251 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */ 252 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */ 253 #define RAIDF_LOCKED 0x80 /* unit is locked */ 254 255 #define raidunit(x) DISKUNIT(x) 256 int numraid = 0; 257 258 /* 259 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device. 260 * Be aware that large numbers can allow the driver to consume a lot of 261 * kernel memory, especially on writes, and in degraded mode reads. 262 * 263 * For example: with a stripe width of 64 blocks (32k) and 5 disks, 264 * a single 64K write will typically require 64K for the old data, 265 * 64K for the old parity, and 64K for the new parity, for a total 266 * of 192K (if the parity buffer is not re-used immediately). 267 * Even it if is used immediately, that's still 128K, which when multiplied 268 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data. 269 * 270 * Now in degraded mode, for example, a 64K read on the above setup may 271 * require data reconstruction, which will require *all* of the 4 remaining 272 * disks to participate -- 4 * 32K/disk == 128K again. 273 */ 274 275 #ifndef RAIDOUTSTANDING 276 #define RAIDOUTSTANDING 6 277 #endif 278 279 #define RAIDLABELDEV(dev) \ 280 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART)) 281 282 /* declared here, and made public, for the benefit of KVM stuff.. */ 283 struct raid_softc *raid_softc; 284 285 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *, 286 struct disklabel *); 287 static void raidgetdisklabel(dev_t); 288 static void raidmakedisklabel(struct raid_softc *); 289 290 static int raidlock(struct raid_softc *); 291 static void raidunlock(struct raid_softc *); 292 293 static void rf_markalldirty(RF_Raid_t *); 294 295 struct device *raidrootdev; 296 297 void rf_ReconThread(struct rf_recon_req *); 298 void rf_RewriteParityThread(RF_Raid_t *raidPtr); 299 void rf_CopybackThread(RF_Raid_t *raidPtr); 300 void rf_ReconstructInPlaceThread(struct rf_recon_req *); 301 int rf_autoconfig(struct device *self); 302 void rf_buildroothack(RF_ConfigSet_t *); 303 304 RF_AutoConfig_t *rf_find_raid_components(void); 305 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *); 306 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *); 307 static int rf_reasonable_label(RF_ComponentLabel_t *); 308 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *); 309 int rf_set_autoconfig(RF_Raid_t *, int); 310 int rf_set_rootpartition(RF_Raid_t *, int); 311 void rf_release_all_vps(RF_ConfigSet_t *); 312 void rf_cleanup_config_set(RF_ConfigSet_t *); 313 int rf_have_enough_components(RF_ConfigSet_t *); 314 int rf_auto_config_set(RF_ConfigSet_t *, int *); 315 316 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not 317 allow autoconfig to take place. 318 Note that this is overridden by having 319 RAID_AUTOCONFIG as an option in the 320 kernel config file. */ 321 322 struct RF_Pools_s rf_pools; 323 324 void 325 raidattach(int num) 326 { 327 int raidID; 328 int i, rc; 329 330 #ifdef DEBUG 331 printf("raidattach: Asked for %d units\n", num); 332 #endif 333 334 if (num <= 0) { 335 #ifdef DIAGNOSTIC 336 panic("raidattach: count <= 0"); 337 #endif 338 return; 339 } 340 /* This is where all the initialization stuff gets done. */ 341 342 numraid = num; 343 344 /* Make some space for requested number of units... */ 345 346 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **)); 347 if (raidPtrs == NULL) { 348 panic("raidPtrs is NULL!!"); 349 } 350 351 rf_mutex_init(&rf_sparet_wait_mutex); 352 353 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL; 354 355 for (i = 0; i < num; i++) 356 raidPtrs[i] = NULL; 357 rc = rf_BootRaidframe(); 358 if (rc == 0) 359 printf("Kernelized RAIDframe activated\n"); 360 else 361 panic("Serious error booting RAID!!"); 362 363 /* put together some datastructures like the CCD device does.. This 364 * lets us lock the device and what-not when it gets opened. */ 365 366 raid_softc = (struct raid_softc *) 367 malloc(num * sizeof(struct raid_softc), 368 M_RAIDFRAME, M_NOWAIT); 369 if (raid_softc == NULL) { 370 printf("WARNING: no memory for RAIDframe driver\n"); 371 return; 372 } 373 374 memset(raid_softc, 0, num * sizeof(struct raid_softc)); 375 376 raidrootdev = (struct device *)malloc(num * sizeof(struct device), 377 M_RAIDFRAME, M_NOWAIT); 378 if (raidrootdev == NULL) { 379 panic("No memory for RAIDframe driver!!?!?!"); 380 } 381 382 for (raidID = 0; raidID < num; raidID++) { 383 bufq_alloc(&raid_softc[raidID].buf_queue, "fcfs", 0); 384 pseudo_disk_init(&raid_softc[raidID].sc_dkdev); 385 386 raidrootdev[raidID].dv_class = DV_DISK; 387 raidrootdev[raidID].dv_cfdata = NULL; 388 raidrootdev[raidID].dv_unit = raidID; 389 raidrootdev[raidID].dv_parent = NULL; 390 raidrootdev[raidID].dv_flags = 0; 391 snprintf(raidrootdev[raidID].dv_xname, 392 sizeof(raidrootdev[raidID].dv_xname), "raid%d", raidID); 393 394 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t), 395 (RF_Raid_t *)); 396 if (raidPtrs[raidID] == NULL) { 397 printf("WARNING: raidPtrs[%d] is NULL\n", raidID); 398 numraid = raidID; 399 return; 400 } 401 } 402 403 #ifdef RAID_AUTOCONFIG 404 raidautoconfig = 1; 405 #endif 406 407 /* 408 * Register a finalizer which will be used to auto-config RAID 409 * sets once all real hardware devices have been found. 410 */ 411 if (config_finalize_register(NULL, rf_autoconfig) != 0) 412 printf("WARNING: unable to register RAIDframe finalizer\n"); 413 } 414 415 int 416 rf_autoconfig(struct device *self) 417 { 418 RF_AutoConfig_t *ac_list; 419 RF_ConfigSet_t *config_sets; 420 421 if (raidautoconfig == 0) 422 return (0); 423 424 /* XXX This code can only be run once. */ 425 raidautoconfig = 0; 426 427 /* 1. locate all RAID components on the system */ 428 #ifdef DEBUG 429 printf("Searching for RAID components...\n"); 430 #endif 431 ac_list = rf_find_raid_components(); 432 433 /* 2. Sort them into their respective sets. */ 434 config_sets = rf_create_auto_sets(ac_list); 435 436 /* 437 * 3. Evaluate each set andconfigure the valid ones. 438 * This gets done in rf_buildroothack(). 439 */ 440 rf_buildroothack(config_sets); 441 442 return (1); 443 } 444 445 void 446 rf_buildroothack(RF_ConfigSet_t *config_sets) 447 { 448 RF_ConfigSet_t *cset; 449 RF_ConfigSet_t *next_cset; 450 int retcode; 451 int raidID; 452 int rootID; 453 int num_root; 454 455 rootID = 0; 456 num_root = 0; 457 cset = config_sets; 458 while(cset != NULL ) { 459 next_cset = cset->next; 460 if (rf_have_enough_components(cset) && 461 cset->ac->clabel->autoconfigure==1) { 462 retcode = rf_auto_config_set(cset,&raidID); 463 if (!retcode) { 464 if (cset->rootable) { 465 rootID = raidID; 466 num_root++; 467 } 468 } else { 469 /* The autoconfig didn't work :( */ 470 #if DEBUG 471 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID); 472 #endif 473 rf_release_all_vps(cset); 474 } 475 } else { 476 /* we're not autoconfiguring this set... 477 release the associated resources */ 478 rf_release_all_vps(cset); 479 } 480 /* cleanup */ 481 rf_cleanup_config_set(cset); 482 cset = next_cset; 483 } 484 485 /* we found something bootable... */ 486 487 if (num_root == 1) { 488 booted_device = &raidrootdev[rootID]; 489 } else if (num_root > 1) { 490 /* we can't guess.. require the user to answer... */ 491 boothowto |= RB_ASKNAME; 492 } 493 } 494 495 496 int 497 raidsize(dev_t dev) 498 { 499 struct raid_softc *rs; 500 struct disklabel *lp; 501 int part, unit, omask, size; 502 503 unit = raidunit(dev); 504 if (unit >= numraid) 505 return (-1); 506 rs = &raid_softc[unit]; 507 508 if ((rs->sc_flags & RAIDF_INITED) == 0) 509 return (-1); 510 511 part = DISKPART(dev); 512 omask = rs->sc_dkdev.dk_openmask & (1 << part); 513 lp = rs->sc_dkdev.dk_label; 514 515 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp)) 516 return (-1); 517 518 if (lp->d_partitions[part].p_fstype != FS_SWAP) 519 size = -1; 520 else 521 size = lp->d_partitions[part].p_size * 522 (lp->d_secsize / DEV_BSIZE); 523 524 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp)) 525 return (-1); 526 527 return (size); 528 529 } 530 531 int 532 raiddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size) 533 { 534 /* Not implemented. */ 535 return ENXIO; 536 } 537 /* ARGSUSED */ 538 int 539 raidopen(dev_t dev, int flags, int fmt, struct lwp *l) 540 { 541 int unit = raidunit(dev); 542 struct raid_softc *rs; 543 struct disklabel *lp; 544 int part, pmask; 545 int error = 0; 546 547 if (unit >= numraid) 548 return (ENXIO); 549 rs = &raid_softc[unit]; 550 551 if ((error = raidlock(rs)) != 0) 552 return (error); 553 lp = rs->sc_dkdev.dk_label; 554 555 part = DISKPART(dev); 556 pmask = (1 << part); 557 558 if ((rs->sc_flags & RAIDF_INITED) && 559 (rs->sc_dkdev.dk_openmask == 0)) 560 raidgetdisklabel(dev); 561 562 /* make sure that this partition exists */ 563 564 if (part != RAW_PART) { 565 if (((rs->sc_flags & RAIDF_INITED) == 0) || 566 ((part >= lp->d_npartitions) || 567 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 568 error = ENXIO; 569 raidunlock(rs); 570 return (error); 571 } 572 } 573 /* Prevent this unit from being unconfigured while open. */ 574 switch (fmt) { 575 case S_IFCHR: 576 rs->sc_dkdev.dk_copenmask |= pmask; 577 break; 578 579 case S_IFBLK: 580 rs->sc_dkdev.dk_bopenmask |= pmask; 581 break; 582 } 583 584 if ((rs->sc_dkdev.dk_openmask == 0) && 585 ((rs->sc_flags & RAIDF_INITED) != 0)) { 586 /* First one... mark things as dirty... Note that we *MUST* 587 have done a configure before this. I DO NOT WANT TO BE 588 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED 589 THAT THEY BELONG TOGETHER!!!!! */ 590 /* XXX should check to see if we're only open for reading 591 here... If so, we needn't do this, but then need some 592 other way of keeping track of what's happened.. */ 593 594 rf_markalldirty( raidPtrs[unit] ); 595 } 596 597 598 rs->sc_dkdev.dk_openmask = 599 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 600 601 raidunlock(rs); 602 603 return (error); 604 605 606 } 607 /* ARGSUSED */ 608 int 609 raidclose(dev_t dev, int flags, int fmt, struct lwp *l) 610 { 611 int unit = raidunit(dev); 612 struct raid_softc *rs; 613 int error = 0; 614 int part; 615 616 if (unit >= numraid) 617 return (ENXIO); 618 rs = &raid_softc[unit]; 619 620 if ((error = raidlock(rs)) != 0) 621 return (error); 622 623 part = DISKPART(dev); 624 625 /* ...that much closer to allowing unconfiguration... */ 626 switch (fmt) { 627 case S_IFCHR: 628 rs->sc_dkdev.dk_copenmask &= ~(1 << part); 629 break; 630 631 case S_IFBLK: 632 rs->sc_dkdev.dk_bopenmask &= ~(1 << part); 633 break; 634 } 635 rs->sc_dkdev.dk_openmask = 636 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 637 638 if ((rs->sc_dkdev.dk_openmask == 0) && 639 ((rs->sc_flags & RAIDF_INITED) != 0)) { 640 /* Last one... device is not unconfigured yet. 641 Device shutdown has taken care of setting the 642 clean bits if RAIDF_INITED is not set 643 mark things as clean... */ 644 645 rf_update_component_labels(raidPtrs[unit], 646 RF_FINAL_COMPONENT_UPDATE); 647 if (doing_shutdown) { 648 /* last one, and we're going down, so 649 lights out for this RAID set too. */ 650 error = rf_Shutdown(raidPtrs[unit]); 651 652 /* It's no longer initialized... */ 653 rs->sc_flags &= ~RAIDF_INITED; 654 655 /* Detach the disk. */ 656 pseudo_disk_detach(&rs->sc_dkdev); 657 } 658 } 659 660 raidunlock(rs); 661 return (0); 662 663 } 664 665 void 666 raidstrategy(struct buf *bp) 667 { 668 int s; 669 670 unsigned int raidID = raidunit(bp->b_dev); 671 RF_Raid_t *raidPtr; 672 struct raid_softc *rs = &raid_softc[raidID]; 673 int wlabel; 674 675 if ((rs->sc_flags & RAIDF_INITED) ==0) { 676 bp->b_error = ENXIO; 677 bp->b_flags |= B_ERROR; 678 goto done; 679 } 680 if (raidID >= numraid || !raidPtrs[raidID]) { 681 bp->b_error = ENODEV; 682 bp->b_flags |= B_ERROR; 683 goto done; 684 } 685 raidPtr = raidPtrs[raidID]; 686 if (!raidPtr->valid) { 687 bp->b_error = ENODEV; 688 bp->b_flags |= B_ERROR; 689 goto done; 690 } 691 if (bp->b_bcount == 0) { 692 db1_printf(("b_bcount is zero..\n")); 693 goto done; 694 } 695 696 /* 697 * Do bounds checking and adjust transfer. If there's an 698 * error, the bounds check will flag that for us. 699 */ 700 701 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING); 702 if (DISKPART(bp->b_dev) == RAW_PART) { 703 uint64_t size; /* device size in DEV_BSIZE unit */ 704 705 if (raidPtr->logBytesPerSector > DEV_BSHIFT) { 706 size = raidPtr->totalSectors << 707 (raidPtr->logBytesPerSector - DEV_BSHIFT); 708 } else { 709 size = raidPtr->totalSectors >> 710 (DEV_BSHIFT - raidPtr->logBytesPerSector); 711 } 712 if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) { 713 goto done; 714 } 715 } else { 716 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) { 717 db1_printf(("Bounds check failed!!:%d %d\n", 718 (int) bp->b_blkno, (int) wlabel)); 719 goto done; 720 } 721 } 722 s = splbio(); 723 724 bp->b_resid = 0; 725 726 /* stuff it onto our queue */ 727 BUFQ_PUT(rs->buf_queue, bp); 728 729 /* scheduled the IO to happen at the next convenient time */ 730 wakeup(&(raidPtrs[raidID]->iodone)); 731 732 splx(s); 733 return; 734 735 done: 736 bp->b_resid = bp->b_bcount; 737 biodone(bp); 738 } 739 /* ARGSUSED */ 740 int 741 raidread(dev_t dev, struct uio *uio, int flags) 742 { 743 int unit = raidunit(dev); 744 struct raid_softc *rs; 745 746 if (unit >= numraid) 747 return (ENXIO); 748 rs = &raid_softc[unit]; 749 750 if ((rs->sc_flags & RAIDF_INITED) == 0) 751 return (ENXIO); 752 753 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio)); 754 755 } 756 /* ARGSUSED */ 757 int 758 raidwrite(dev_t dev, struct uio *uio, int flags) 759 { 760 int unit = raidunit(dev); 761 struct raid_softc *rs; 762 763 if (unit >= numraid) 764 return (ENXIO); 765 rs = &raid_softc[unit]; 766 767 if ((rs->sc_flags & RAIDF_INITED) == 0) 768 return (ENXIO); 769 770 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio)); 771 772 } 773 774 int 775 raidioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct lwp *l) 776 { 777 int unit = raidunit(dev); 778 int error = 0; 779 int part, pmask; 780 struct raid_softc *rs; 781 RF_Config_t *k_cfg, *u_cfg; 782 RF_Raid_t *raidPtr; 783 RF_RaidDisk_t *diskPtr; 784 RF_AccTotals_t *totals; 785 RF_DeviceConfig_t *d_cfg, **ucfgp; 786 u_char *specific_buf; 787 int retcode = 0; 788 int column; 789 int raidid; 790 struct rf_recon_req *rrcopy, *rr; 791 RF_ComponentLabel_t *clabel; 792 RF_ComponentLabel_t ci_label; 793 RF_ComponentLabel_t **clabel_ptr; 794 RF_SingleComponent_t *sparePtr,*componentPtr; 795 RF_SingleComponent_t hot_spare; 796 RF_SingleComponent_t component; 797 RF_ProgressInfo_t progressInfo, **progressInfoPtr; 798 int i, j, d; 799 #ifdef __HAVE_OLD_DISKLABEL 800 struct disklabel newlabel; 801 #endif 802 803 if (unit >= numraid) 804 return (ENXIO); 805 rs = &raid_softc[unit]; 806 raidPtr = raidPtrs[unit]; 807 808 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev, 809 (int) DISKPART(dev), (int) unit, (int) cmd)); 810 811 /* Must be open for writes for these commands... */ 812 switch (cmd) { 813 case DIOCSDINFO: 814 case DIOCWDINFO: 815 #ifdef __HAVE_OLD_DISKLABEL 816 case ODIOCWDINFO: 817 case ODIOCSDINFO: 818 #endif 819 case DIOCWLABEL: 820 if ((flag & FWRITE) == 0) 821 return (EBADF); 822 } 823 824 /* Must be initialized for these... */ 825 switch (cmd) { 826 case DIOCGDINFO: 827 case DIOCSDINFO: 828 case DIOCWDINFO: 829 #ifdef __HAVE_OLD_DISKLABEL 830 case ODIOCGDINFO: 831 case ODIOCWDINFO: 832 case ODIOCSDINFO: 833 case ODIOCGDEFLABEL: 834 #endif 835 case DIOCGPART: 836 case DIOCWLABEL: 837 case DIOCGDEFLABEL: 838 case RAIDFRAME_SHUTDOWN: 839 case RAIDFRAME_REWRITEPARITY: 840 case RAIDFRAME_GET_INFO: 841 case RAIDFRAME_RESET_ACCTOTALS: 842 case RAIDFRAME_GET_ACCTOTALS: 843 case RAIDFRAME_KEEP_ACCTOTALS: 844 case RAIDFRAME_GET_SIZE: 845 case RAIDFRAME_FAIL_DISK: 846 case RAIDFRAME_COPYBACK: 847 case RAIDFRAME_CHECK_RECON_STATUS: 848 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 849 case RAIDFRAME_GET_COMPONENT_LABEL: 850 case RAIDFRAME_SET_COMPONENT_LABEL: 851 case RAIDFRAME_ADD_HOT_SPARE: 852 case RAIDFRAME_REMOVE_HOT_SPARE: 853 case RAIDFRAME_INIT_LABELS: 854 case RAIDFRAME_REBUILD_IN_PLACE: 855 case RAIDFRAME_CHECK_PARITY: 856 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 857 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 858 case RAIDFRAME_CHECK_COPYBACK_STATUS: 859 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 860 case RAIDFRAME_SET_AUTOCONFIG: 861 case RAIDFRAME_SET_ROOT: 862 case RAIDFRAME_DELETE_COMPONENT: 863 case RAIDFRAME_INCORPORATE_HOT_SPARE: 864 if ((rs->sc_flags & RAIDF_INITED) == 0) 865 return (ENXIO); 866 } 867 868 switch (cmd) { 869 870 /* configure the system */ 871 case RAIDFRAME_CONFIGURE: 872 873 if (raidPtr->valid) { 874 /* There is a valid RAID set running on this unit! */ 875 printf("raid%d: Device already configured!\n",unit); 876 return(EINVAL); 877 } 878 879 /* copy-in the configuration information */ 880 /* data points to a pointer to the configuration structure */ 881 882 u_cfg = *((RF_Config_t **) data); 883 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *)); 884 if (k_cfg == NULL) { 885 return (ENOMEM); 886 } 887 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t)); 888 if (retcode) { 889 RF_Free(k_cfg, sizeof(RF_Config_t)); 890 db1_printf(("rf_ioctl: retcode=%d copyin.1\n", 891 retcode)); 892 return (retcode); 893 } 894 /* allocate a buffer for the layout-specific data, and copy it 895 * in */ 896 if (k_cfg->layoutSpecificSize) { 897 if (k_cfg->layoutSpecificSize > 10000) { 898 /* sanity check */ 899 RF_Free(k_cfg, sizeof(RF_Config_t)); 900 return (EINVAL); 901 } 902 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize, 903 (u_char *)); 904 if (specific_buf == NULL) { 905 RF_Free(k_cfg, sizeof(RF_Config_t)); 906 return (ENOMEM); 907 } 908 retcode = copyin(k_cfg->layoutSpecific, specific_buf, 909 k_cfg->layoutSpecificSize); 910 if (retcode) { 911 RF_Free(k_cfg, sizeof(RF_Config_t)); 912 RF_Free(specific_buf, 913 k_cfg->layoutSpecificSize); 914 db1_printf(("rf_ioctl: retcode=%d copyin.2\n", 915 retcode)); 916 return (retcode); 917 } 918 } else 919 specific_buf = NULL; 920 k_cfg->layoutSpecific = specific_buf; 921 922 /* should do some kind of sanity check on the configuration. 923 * Store the sum of all the bytes in the last byte? */ 924 925 /* configure the system */ 926 927 /* 928 * Clear the entire RAID descriptor, just to make sure 929 * there is no stale data left in the case of a 930 * reconfiguration 931 */ 932 memset((char *) raidPtr, 0, sizeof(RF_Raid_t)); 933 raidPtr->raidid = unit; 934 935 retcode = rf_Configure(raidPtr, k_cfg, NULL); 936 937 if (retcode == 0) { 938 939 /* allow this many simultaneous IO's to 940 this RAID device */ 941 raidPtr->openings = RAIDOUTSTANDING; 942 943 raidinit(raidPtr); 944 rf_markalldirty(raidPtr); 945 } 946 /* free the buffers. No return code here. */ 947 if (k_cfg->layoutSpecificSize) { 948 RF_Free(specific_buf, k_cfg->layoutSpecificSize); 949 } 950 RF_Free(k_cfg, sizeof(RF_Config_t)); 951 952 return (retcode); 953 954 /* shutdown the system */ 955 case RAIDFRAME_SHUTDOWN: 956 957 if ((error = raidlock(rs)) != 0) 958 return (error); 959 960 /* 961 * If somebody has a partition mounted, we shouldn't 962 * shutdown. 963 */ 964 965 part = DISKPART(dev); 966 pmask = (1 << part); 967 if ((rs->sc_dkdev.dk_openmask & ~pmask) || 968 ((rs->sc_dkdev.dk_bopenmask & pmask) && 969 (rs->sc_dkdev.dk_copenmask & pmask))) { 970 raidunlock(rs); 971 return (EBUSY); 972 } 973 974 retcode = rf_Shutdown(raidPtr); 975 976 /* It's no longer initialized... */ 977 rs->sc_flags &= ~RAIDF_INITED; 978 979 /* Detach the disk. */ 980 pseudo_disk_detach(&rs->sc_dkdev); 981 982 raidunlock(rs); 983 984 return (retcode); 985 case RAIDFRAME_GET_COMPONENT_LABEL: 986 clabel_ptr = (RF_ComponentLabel_t **) data; 987 /* need to read the component label for the disk indicated 988 by row,column in clabel */ 989 990 /* For practice, let's get it directly fromdisk, rather 991 than from the in-core copy */ 992 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ), 993 (RF_ComponentLabel_t *)); 994 if (clabel == NULL) 995 return (ENOMEM); 996 997 retcode = copyin( *clabel_ptr, clabel, 998 sizeof(RF_ComponentLabel_t)); 999 1000 if (retcode) { 1001 RF_Free( clabel, sizeof(RF_ComponentLabel_t)); 1002 return(retcode); 1003 } 1004 1005 clabel->row = 0; /* Don't allow looking at anything else.*/ 1006 1007 column = clabel->column; 1008 1009 if ((column < 0) || (column >= raidPtr->numCol + 1010 raidPtr->numSpare)) { 1011 RF_Free( clabel, sizeof(RF_ComponentLabel_t)); 1012 return(EINVAL); 1013 } 1014 1015 retcode = raidread_component_label(raidPtr->Disks[column].dev, 1016 raidPtr->raid_cinfo[column].ci_vp, 1017 clabel ); 1018 1019 if (retcode == 0) { 1020 retcode = copyout(clabel, *clabel_ptr, 1021 sizeof(RF_ComponentLabel_t)); 1022 } 1023 RF_Free(clabel, sizeof(RF_ComponentLabel_t)); 1024 return (retcode); 1025 1026 case RAIDFRAME_SET_COMPONENT_LABEL: 1027 clabel = (RF_ComponentLabel_t *) data; 1028 1029 /* XXX check the label for valid stuff... */ 1030 /* Note that some things *should not* get modified -- 1031 the user should be re-initing the labels instead of 1032 trying to patch things. 1033 */ 1034 1035 raidid = raidPtr->raidid; 1036 #if DEBUG 1037 printf("raid%d: Got component label:\n", raidid); 1038 printf("raid%d: Version: %d\n", raidid, clabel->version); 1039 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number); 1040 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter); 1041 printf("raid%d: Column: %d\n", raidid, clabel->column); 1042 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns); 1043 printf("raid%d: Clean: %d\n", raidid, clabel->clean); 1044 printf("raid%d: Status: %d\n", raidid, clabel->status); 1045 #endif 1046 clabel->row = 0; 1047 column = clabel->column; 1048 1049 if ((column < 0) || (column >= raidPtr->numCol)) { 1050 return(EINVAL); 1051 } 1052 1053 /* XXX this isn't allowed to do anything for now :-) */ 1054 1055 /* XXX and before it is, we need to fill in the rest 1056 of the fields!?!?!?! */ 1057 #if 0 1058 raidwrite_component_label( 1059 raidPtr->Disks[column].dev, 1060 raidPtr->raid_cinfo[column].ci_vp, 1061 clabel ); 1062 #endif 1063 return (0); 1064 1065 case RAIDFRAME_INIT_LABELS: 1066 clabel = (RF_ComponentLabel_t *) data; 1067 /* 1068 we only want the serial number from 1069 the above. We get all the rest of the information 1070 from the config that was used to create this RAID 1071 set. 1072 */ 1073 1074 raidPtr->serial_number = clabel->serial_number; 1075 1076 raid_init_component_label(raidPtr, &ci_label); 1077 ci_label.serial_number = clabel->serial_number; 1078 ci_label.row = 0; /* we dont' pretend to support more */ 1079 1080 for(column=0;column<raidPtr->numCol;column++) { 1081 diskPtr = &raidPtr->Disks[column]; 1082 if (!RF_DEAD_DISK(diskPtr->status)) { 1083 ci_label.partitionSize = diskPtr->partitionSize; 1084 ci_label.column = column; 1085 raidwrite_component_label( 1086 raidPtr->Disks[column].dev, 1087 raidPtr->raid_cinfo[column].ci_vp, 1088 &ci_label ); 1089 } 1090 } 1091 1092 return (retcode); 1093 case RAIDFRAME_SET_AUTOCONFIG: 1094 d = rf_set_autoconfig(raidPtr, *(int *) data); 1095 printf("raid%d: New autoconfig value is: %d\n", 1096 raidPtr->raidid, d); 1097 *(int *) data = d; 1098 return (retcode); 1099 1100 case RAIDFRAME_SET_ROOT: 1101 d = rf_set_rootpartition(raidPtr, *(int *) data); 1102 printf("raid%d: New rootpartition value is: %d\n", 1103 raidPtr->raidid, d); 1104 *(int *) data = d; 1105 return (retcode); 1106 1107 /* initialize all parity */ 1108 case RAIDFRAME_REWRITEPARITY: 1109 1110 if (raidPtr->Layout.map->faultsTolerated == 0) { 1111 /* Parity for RAID 0 is trivially correct */ 1112 raidPtr->parity_good = RF_RAID_CLEAN; 1113 return(0); 1114 } 1115 1116 if (raidPtr->parity_rewrite_in_progress == 1) { 1117 /* Re-write is already in progress! */ 1118 return(EINVAL); 1119 } 1120 1121 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread, 1122 rf_RewriteParityThread, 1123 raidPtr,"raid_parity"); 1124 return (retcode); 1125 1126 1127 case RAIDFRAME_ADD_HOT_SPARE: 1128 sparePtr = (RF_SingleComponent_t *) data; 1129 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t)); 1130 retcode = rf_add_hot_spare(raidPtr, &hot_spare); 1131 return(retcode); 1132 1133 case RAIDFRAME_REMOVE_HOT_SPARE: 1134 return(retcode); 1135 1136 case RAIDFRAME_DELETE_COMPONENT: 1137 componentPtr = (RF_SingleComponent_t *)data; 1138 memcpy( &component, componentPtr, 1139 sizeof(RF_SingleComponent_t)); 1140 retcode = rf_delete_component(raidPtr, &component); 1141 return(retcode); 1142 1143 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1144 componentPtr = (RF_SingleComponent_t *)data; 1145 memcpy( &component, componentPtr, 1146 sizeof(RF_SingleComponent_t)); 1147 retcode = rf_incorporate_hot_spare(raidPtr, &component); 1148 return(retcode); 1149 1150 case RAIDFRAME_REBUILD_IN_PLACE: 1151 1152 if (raidPtr->Layout.map->faultsTolerated == 0) { 1153 /* Can't do this on a RAID 0!! */ 1154 return(EINVAL); 1155 } 1156 1157 if (raidPtr->recon_in_progress == 1) { 1158 /* a reconstruct is already in progress! */ 1159 return(EINVAL); 1160 } 1161 1162 componentPtr = (RF_SingleComponent_t *) data; 1163 memcpy( &component, componentPtr, 1164 sizeof(RF_SingleComponent_t)); 1165 component.row = 0; /* we don't support any more */ 1166 column = component.column; 1167 1168 if ((column < 0) || (column >= raidPtr->numCol)) { 1169 return(EINVAL); 1170 } 1171 1172 RF_LOCK_MUTEX(raidPtr->mutex); 1173 if ((raidPtr->Disks[column].status == rf_ds_optimal) && 1174 (raidPtr->numFailures > 0)) { 1175 /* XXX 0 above shouldn't be constant!!! */ 1176 /* some component other than this has failed. 1177 Let's not make things worse than they already 1178 are... */ 1179 printf("raid%d: Unable to reconstruct to disk at:\n", 1180 raidPtr->raidid); 1181 printf("raid%d: Col: %d Too many failures.\n", 1182 raidPtr->raidid, column); 1183 RF_UNLOCK_MUTEX(raidPtr->mutex); 1184 return (EINVAL); 1185 } 1186 if (raidPtr->Disks[column].status == 1187 rf_ds_reconstructing) { 1188 printf("raid%d: Unable to reconstruct to disk at:\n", 1189 raidPtr->raidid); 1190 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column); 1191 1192 RF_UNLOCK_MUTEX(raidPtr->mutex); 1193 return (EINVAL); 1194 } 1195 if (raidPtr->Disks[column].status == rf_ds_spared) { 1196 RF_UNLOCK_MUTEX(raidPtr->mutex); 1197 return (EINVAL); 1198 } 1199 RF_UNLOCK_MUTEX(raidPtr->mutex); 1200 1201 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1202 if (rrcopy == NULL) 1203 return(ENOMEM); 1204 1205 rrcopy->raidPtr = (void *) raidPtr; 1206 rrcopy->col = column; 1207 1208 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1209 rf_ReconstructInPlaceThread, 1210 rrcopy,"raid_reconip"); 1211 return(retcode); 1212 1213 case RAIDFRAME_GET_INFO: 1214 if (!raidPtr->valid) 1215 return (ENODEV); 1216 ucfgp = (RF_DeviceConfig_t **) data; 1217 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t), 1218 (RF_DeviceConfig_t *)); 1219 if (d_cfg == NULL) 1220 return (ENOMEM); 1221 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t)); 1222 d_cfg->rows = 1; /* there is only 1 row now */ 1223 d_cfg->cols = raidPtr->numCol; 1224 d_cfg->ndevs = raidPtr->numCol; 1225 if (d_cfg->ndevs >= RF_MAX_DISKS) { 1226 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1227 return (ENOMEM); 1228 } 1229 d_cfg->nspares = raidPtr->numSpare; 1230 if (d_cfg->nspares >= RF_MAX_DISKS) { 1231 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1232 return (ENOMEM); 1233 } 1234 d_cfg->maxqdepth = raidPtr->maxQueueDepth; 1235 d = 0; 1236 for (j = 0; j < d_cfg->cols; j++) { 1237 d_cfg->devs[d] = raidPtr->Disks[j]; 1238 d++; 1239 } 1240 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) { 1241 d_cfg->spares[i] = raidPtr->Disks[j]; 1242 } 1243 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t)); 1244 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1245 1246 return (retcode); 1247 1248 case RAIDFRAME_CHECK_PARITY: 1249 *(int *) data = raidPtr->parity_good; 1250 return (0); 1251 1252 case RAIDFRAME_RESET_ACCTOTALS: 1253 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals)); 1254 return (0); 1255 1256 case RAIDFRAME_GET_ACCTOTALS: 1257 totals = (RF_AccTotals_t *) data; 1258 *totals = raidPtr->acc_totals; 1259 return (0); 1260 1261 case RAIDFRAME_KEEP_ACCTOTALS: 1262 raidPtr->keep_acc_totals = *(int *)data; 1263 return (0); 1264 1265 case RAIDFRAME_GET_SIZE: 1266 *(int *) data = raidPtr->totalSectors; 1267 return (0); 1268 1269 /* fail a disk & optionally start reconstruction */ 1270 case RAIDFRAME_FAIL_DISK: 1271 1272 if (raidPtr->Layout.map->faultsTolerated == 0) { 1273 /* Can't do this on a RAID 0!! */ 1274 return(EINVAL); 1275 } 1276 1277 rr = (struct rf_recon_req *) data; 1278 rr->row = 0; 1279 if (rr->col < 0 || rr->col >= raidPtr->numCol) 1280 return (EINVAL); 1281 1282 1283 RF_LOCK_MUTEX(raidPtr->mutex); 1284 if (raidPtr->status == rf_rs_reconstructing) { 1285 /* you can't fail a disk while we're reconstructing! */ 1286 /* XXX wrong for RAID6 */ 1287 RF_UNLOCK_MUTEX(raidPtr->mutex); 1288 return (EINVAL); 1289 } 1290 if ((raidPtr->Disks[rr->col].status == 1291 rf_ds_optimal) && (raidPtr->numFailures > 0)) { 1292 /* some other component has failed. Let's not make 1293 things worse. XXX wrong for RAID6 */ 1294 RF_UNLOCK_MUTEX(raidPtr->mutex); 1295 return (EINVAL); 1296 } 1297 if (raidPtr->Disks[rr->col].status == rf_ds_spared) { 1298 /* Can't fail a spared disk! */ 1299 RF_UNLOCK_MUTEX(raidPtr->mutex); 1300 return (EINVAL); 1301 } 1302 RF_UNLOCK_MUTEX(raidPtr->mutex); 1303 1304 /* make a copy of the recon request so that we don't rely on 1305 * the user's buffer */ 1306 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1307 if (rrcopy == NULL) 1308 return(ENOMEM); 1309 memcpy(rrcopy, rr, sizeof(*rr)); 1310 rrcopy->raidPtr = (void *) raidPtr; 1311 1312 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1313 rf_ReconThread, 1314 rrcopy,"raid_recon"); 1315 return (0); 1316 1317 /* invoke a copyback operation after recon on whatever disk 1318 * needs it, if any */ 1319 case RAIDFRAME_COPYBACK: 1320 1321 if (raidPtr->Layout.map->faultsTolerated == 0) { 1322 /* This makes no sense on a RAID 0!! */ 1323 return(EINVAL); 1324 } 1325 1326 if (raidPtr->copyback_in_progress == 1) { 1327 /* Copyback is already in progress! */ 1328 return(EINVAL); 1329 } 1330 1331 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread, 1332 rf_CopybackThread, 1333 raidPtr,"raid_copyback"); 1334 return (retcode); 1335 1336 /* return the percentage completion of reconstruction */ 1337 case RAIDFRAME_CHECK_RECON_STATUS: 1338 if (raidPtr->Layout.map->faultsTolerated == 0) { 1339 /* This makes no sense on a RAID 0, so tell the 1340 user it's done. */ 1341 *(int *) data = 100; 1342 return(0); 1343 } 1344 if (raidPtr->status != rf_rs_reconstructing) 1345 *(int *) data = 100; 1346 else { 1347 if (raidPtr->reconControl->numRUsTotal > 0) { 1348 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal); 1349 } else { 1350 *(int *) data = 0; 1351 } 1352 } 1353 return (0); 1354 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1355 progressInfoPtr = (RF_ProgressInfo_t **) data; 1356 if (raidPtr->status != rf_rs_reconstructing) { 1357 progressInfo.remaining = 0; 1358 progressInfo.completed = 100; 1359 progressInfo.total = 100; 1360 } else { 1361 progressInfo.total = 1362 raidPtr->reconControl->numRUsTotal; 1363 progressInfo.completed = 1364 raidPtr->reconControl->numRUsComplete; 1365 progressInfo.remaining = progressInfo.total - 1366 progressInfo.completed; 1367 } 1368 retcode = copyout(&progressInfo, *progressInfoPtr, 1369 sizeof(RF_ProgressInfo_t)); 1370 return (retcode); 1371 1372 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1373 if (raidPtr->Layout.map->faultsTolerated == 0) { 1374 /* This makes no sense on a RAID 0, so tell the 1375 user it's done. */ 1376 *(int *) data = 100; 1377 return(0); 1378 } 1379 if (raidPtr->parity_rewrite_in_progress == 1) { 1380 *(int *) data = 100 * 1381 raidPtr->parity_rewrite_stripes_done / 1382 raidPtr->Layout.numStripe; 1383 } else { 1384 *(int *) data = 100; 1385 } 1386 return (0); 1387 1388 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1389 progressInfoPtr = (RF_ProgressInfo_t **) data; 1390 if (raidPtr->parity_rewrite_in_progress == 1) { 1391 progressInfo.total = raidPtr->Layout.numStripe; 1392 progressInfo.completed = 1393 raidPtr->parity_rewrite_stripes_done; 1394 progressInfo.remaining = progressInfo.total - 1395 progressInfo.completed; 1396 } else { 1397 progressInfo.remaining = 0; 1398 progressInfo.completed = 100; 1399 progressInfo.total = 100; 1400 } 1401 retcode = copyout(&progressInfo, *progressInfoPtr, 1402 sizeof(RF_ProgressInfo_t)); 1403 return (retcode); 1404 1405 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1406 if (raidPtr->Layout.map->faultsTolerated == 0) { 1407 /* This makes no sense on a RAID 0 */ 1408 *(int *) data = 100; 1409 return(0); 1410 } 1411 if (raidPtr->copyback_in_progress == 1) { 1412 *(int *) data = 100 * raidPtr->copyback_stripes_done / 1413 raidPtr->Layout.numStripe; 1414 } else { 1415 *(int *) data = 100; 1416 } 1417 return (0); 1418 1419 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1420 progressInfoPtr = (RF_ProgressInfo_t **) data; 1421 if (raidPtr->copyback_in_progress == 1) { 1422 progressInfo.total = raidPtr->Layout.numStripe; 1423 progressInfo.completed = 1424 raidPtr->copyback_stripes_done; 1425 progressInfo.remaining = progressInfo.total - 1426 progressInfo.completed; 1427 } else { 1428 progressInfo.remaining = 0; 1429 progressInfo.completed = 100; 1430 progressInfo.total = 100; 1431 } 1432 retcode = copyout(&progressInfo, *progressInfoPtr, 1433 sizeof(RF_ProgressInfo_t)); 1434 return (retcode); 1435 1436 /* the sparetable daemon calls this to wait for the kernel to 1437 * need a spare table. this ioctl does not return until a 1438 * spare table is needed. XXX -- calling mpsleep here in the 1439 * ioctl code is almost certainly wrong and evil. -- XXX XXX 1440 * -- I should either compute the spare table in the kernel, 1441 * or have a different -- XXX XXX -- interface (a different 1442 * character device) for delivering the table -- XXX */ 1443 #if 0 1444 case RAIDFRAME_SPARET_WAIT: 1445 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1446 while (!rf_sparet_wait_queue) 1447 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE); 1448 waitreq = rf_sparet_wait_queue; 1449 rf_sparet_wait_queue = rf_sparet_wait_queue->next; 1450 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1451 1452 /* structure assignment */ 1453 *((RF_SparetWait_t *) data) = *waitreq; 1454 1455 RF_Free(waitreq, sizeof(*waitreq)); 1456 return (0); 1457 1458 /* wakes up a process waiting on SPARET_WAIT and puts an error 1459 * code in it that will cause the dameon to exit */ 1460 case RAIDFRAME_ABORT_SPARET_WAIT: 1461 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1462 waitreq->fcol = -1; 1463 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1464 waitreq->next = rf_sparet_wait_queue; 1465 rf_sparet_wait_queue = waitreq; 1466 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1467 wakeup(&rf_sparet_wait_queue); 1468 return (0); 1469 1470 /* used by the spare table daemon to deliver a spare table 1471 * into the kernel */ 1472 case RAIDFRAME_SEND_SPARET: 1473 1474 /* install the spare table */ 1475 retcode = rf_SetSpareTable(raidPtr, *(void **) data); 1476 1477 /* respond to the requestor. the return status of the spare 1478 * table installation is passed in the "fcol" field */ 1479 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1480 waitreq->fcol = retcode; 1481 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1482 waitreq->next = rf_sparet_resp_queue; 1483 rf_sparet_resp_queue = waitreq; 1484 wakeup(&rf_sparet_resp_queue); 1485 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1486 1487 return (retcode); 1488 #endif 1489 1490 default: 1491 break; /* fall through to the os-specific code below */ 1492 1493 } 1494 1495 if (!raidPtr->valid) 1496 return (EINVAL); 1497 1498 /* 1499 * Add support for "regular" device ioctls here. 1500 */ 1501 1502 switch (cmd) { 1503 case DIOCGDINFO: 1504 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label); 1505 break; 1506 #ifdef __HAVE_OLD_DISKLABEL 1507 case ODIOCGDINFO: 1508 newlabel = *(rs->sc_dkdev.dk_label); 1509 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1510 return ENOTTY; 1511 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1512 break; 1513 #endif 1514 1515 case DIOCGPART: 1516 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label; 1517 ((struct partinfo *) data)->part = 1518 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1519 break; 1520 1521 case DIOCWDINFO: 1522 case DIOCSDINFO: 1523 #ifdef __HAVE_OLD_DISKLABEL 1524 case ODIOCWDINFO: 1525 case ODIOCSDINFO: 1526 #endif 1527 { 1528 struct disklabel *lp; 1529 #ifdef __HAVE_OLD_DISKLABEL 1530 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1531 memset(&newlabel, 0, sizeof newlabel); 1532 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1533 lp = &newlabel; 1534 } else 1535 #endif 1536 lp = (struct disklabel *)data; 1537 1538 if ((error = raidlock(rs)) != 0) 1539 return (error); 1540 1541 rs->sc_flags |= RAIDF_LABELLING; 1542 1543 error = setdisklabel(rs->sc_dkdev.dk_label, 1544 lp, 0, rs->sc_dkdev.dk_cpulabel); 1545 if (error == 0) { 1546 if (cmd == DIOCWDINFO 1547 #ifdef __HAVE_OLD_DISKLABEL 1548 || cmd == ODIOCWDINFO 1549 #endif 1550 ) 1551 error = writedisklabel(RAIDLABELDEV(dev), 1552 raidstrategy, rs->sc_dkdev.dk_label, 1553 rs->sc_dkdev.dk_cpulabel); 1554 } 1555 rs->sc_flags &= ~RAIDF_LABELLING; 1556 1557 raidunlock(rs); 1558 1559 if (error) 1560 return (error); 1561 break; 1562 } 1563 1564 case DIOCWLABEL: 1565 if (*(int *) data != 0) 1566 rs->sc_flags |= RAIDF_WLABEL; 1567 else 1568 rs->sc_flags &= ~RAIDF_WLABEL; 1569 break; 1570 1571 case DIOCGDEFLABEL: 1572 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data); 1573 break; 1574 1575 #ifdef __HAVE_OLD_DISKLABEL 1576 case ODIOCGDEFLABEL: 1577 raidgetdefaultlabel(raidPtr, rs, &newlabel); 1578 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1579 return ENOTTY; 1580 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1581 break; 1582 #endif 1583 1584 default: 1585 retcode = ENOTTY; 1586 } 1587 return (retcode); 1588 1589 } 1590 1591 1592 /* raidinit -- complete the rest of the initialization for the 1593 RAIDframe device. */ 1594 1595 1596 static void 1597 raidinit(RF_Raid_t *raidPtr) 1598 { 1599 struct raid_softc *rs; 1600 int unit; 1601 1602 unit = raidPtr->raidid; 1603 1604 rs = &raid_softc[unit]; 1605 1606 /* XXX should check return code first... */ 1607 rs->sc_flags |= RAIDF_INITED; 1608 1609 /* XXX doesn't check bounds. */ 1610 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit); 1611 1612 rs->sc_dkdev.dk_name = rs->sc_xname; 1613 1614 /* disk_attach actually creates space for the CPU disklabel, among 1615 * other things, so it's critical to call this *BEFORE* we try putzing 1616 * with disklabels. */ 1617 1618 pseudo_disk_attach(&rs->sc_dkdev); 1619 1620 /* XXX There may be a weird interaction here between this, and 1621 * protectedSectors, as used in RAIDframe. */ 1622 1623 rs->sc_size = raidPtr->totalSectors; 1624 } 1625 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 1626 /* wake up the daemon & tell it to get us a spare table 1627 * XXX 1628 * the entries in the queues should be tagged with the raidPtr 1629 * so that in the extremely rare case that two recons happen at once, 1630 * we know for which device were requesting a spare table 1631 * XXX 1632 * 1633 * XXX This code is not currently used. GO 1634 */ 1635 int 1636 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req) 1637 { 1638 int retcode; 1639 1640 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1641 req->next = rf_sparet_wait_queue; 1642 rf_sparet_wait_queue = req; 1643 wakeup(&rf_sparet_wait_queue); 1644 1645 /* mpsleep unlocks the mutex */ 1646 while (!rf_sparet_resp_queue) { 1647 tsleep(&rf_sparet_resp_queue, PRIBIO, 1648 "raidframe getsparetable", 0); 1649 } 1650 req = rf_sparet_resp_queue; 1651 rf_sparet_resp_queue = req->next; 1652 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1653 1654 retcode = req->fcol; 1655 RF_Free(req, sizeof(*req)); /* this is not the same req as we 1656 * alloc'd */ 1657 return (retcode); 1658 } 1659 #endif 1660 1661 /* a wrapper around rf_DoAccess that extracts appropriate info from the 1662 * bp & passes it down. 1663 * any calls originating in the kernel must use non-blocking I/O 1664 * do some extra sanity checking to return "appropriate" error values for 1665 * certain conditions (to make some standard utilities work) 1666 * 1667 * Formerly known as: rf_DoAccessKernel 1668 */ 1669 void 1670 raidstart(RF_Raid_t *raidPtr) 1671 { 1672 RF_SectorCount_t num_blocks, pb, sum; 1673 RF_RaidAddr_t raid_addr; 1674 struct partition *pp; 1675 daddr_t blocknum; 1676 int unit; 1677 struct raid_softc *rs; 1678 int do_async; 1679 struct buf *bp; 1680 int rc; 1681 1682 unit = raidPtr->raidid; 1683 rs = &raid_softc[unit]; 1684 1685 /* quick check to see if anything has died recently */ 1686 RF_LOCK_MUTEX(raidPtr->mutex); 1687 if (raidPtr->numNewFailures > 0) { 1688 RF_UNLOCK_MUTEX(raidPtr->mutex); 1689 rf_update_component_labels(raidPtr, 1690 RF_NORMAL_COMPONENT_UPDATE); 1691 RF_LOCK_MUTEX(raidPtr->mutex); 1692 raidPtr->numNewFailures--; 1693 } 1694 1695 /* Check to see if we're at the limit... */ 1696 while (raidPtr->openings > 0) { 1697 RF_UNLOCK_MUTEX(raidPtr->mutex); 1698 1699 /* get the next item, if any, from the queue */ 1700 if ((bp = BUFQ_GET(rs->buf_queue)) == NULL) { 1701 /* nothing more to do */ 1702 return; 1703 } 1704 1705 /* Ok, for the bp we have here, bp->b_blkno is relative to the 1706 * partition.. Need to make it absolute to the underlying 1707 * device.. */ 1708 1709 blocknum = bp->b_blkno; 1710 if (DISKPART(bp->b_dev) != RAW_PART) { 1711 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; 1712 blocknum += pp->p_offset; 1713 } 1714 1715 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, 1716 (int) blocknum)); 1717 1718 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount)); 1719 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid)); 1720 1721 /* *THIS* is where we adjust what block we're going to... 1722 * but DO NOT TOUCH bp->b_blkno!!! */ 1723 raid_addr = blocknum; 1724 1725 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector; 1726 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0; 1727 sum = raid_addr + num_blocks + pb; 1728 if (1 || rf_debugKernelAccess) { 1729 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n", 1730 (int) raid_addr, (int) sum, (int) num_blocks, 1731 (int) pb, (int) bp->b_resid)); 1732 } 1733 if ((sum > raidPtr->totalSectors) || (sum < raid_addr) 1734 || (sum < num_blocks) || (sum < pb)) { 1735 bp->b_error = ENOSPC; 1736 bp->b_flags |= B_ERROR; 1737 bp->b_resid = bp->b_bcount; 1738 biodone(bp); 1739 RF_LOCK_MUTEX(raidPtr->mutex); 1740 continue; 1741 } 1742 /* 1743 * XXX rf_DoAccess() should do this, not just DoAccessKernel() 1744 */ 1745 1746 if (bp->b_bcount & raidPtr->sectorMask) { 1747 bp->b_error = EINVAL; 1748 bp->b_flags |= B_ERROR; 1749 bp->b_resid = bp->b_bcount; 1750 biodone(bp); 1751 RF_LOCK_MUTEX(raidPtr->mutex); 1752 continue; 1753 1754 } 1755 db1_printf(("Calling DoAccess..\n")); 1756 1757 1758 RF_LOCK_MUTEX(raidPtr->mutex); 1759 raidPtr->openings--; 1760 RF_UNLOCK_MUTEX(raidPtr->mutex); 1761 1762 /* 1763 * Everything is async. 1764 */ 1765 do_async = 1; 1766 1767 disk_busy(&rs->sc_dkdev); 1768 1769 /* XXX we're still at splbio() here... do we *really* 1770 need to be? */ 1771 1772 /* don't ever condition on bp->b_flags & B_WRITE. 1773 * always condition on B_READ instead */ 1774 1775 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ? 1776 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE, 1777 do_async, raid_addr, num_blocks, 1778 bp->b_data, bp, RF_DAG_NONBLOCKING_IO); 1779 1780 if (rc) { 1781 bp->b_error = rc; 1782 bp->b_flags |= B_ERROR; 1783 bp->b_resid = bp->b_bcount; 1784 biodone(bp); 1785 /* continue loop */ 1786 } 1787 1788 RF_LOCK_MUTEX(raidPtr->mutex); 1789 } 1790 RF_UNLOCK_MUTEX(raidPtr->mutex); 1791 } 1792 1793 1794 1795 1796 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */ 1797 1798 int 1799 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req) 1800 { 1801 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE; 1802 struct buf *bp; 1803 1804 req->queue = queue; 1805 1806 #if DIAGNOSTIC 1807 if (queue->raidPtr->raidid >= numraid) { 1808 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid, 1809 numraid); 1810 panic("Invalid Unit number in rf_DispatchKernelIO"); 1811 } 1812 #endif 1813 1814 bp = req->bp; 1815 1816 switch (req->type) { 1817 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */ 1818 /* XXX need to do something extra here.. */ 1819 /* I'm leaving this in, as I've never actually seen it used, 1820 * and I'd like folks to report it... GO */ 1821 printf(("WAKEUP CALLED\n")); 1822 queue->numOutstanding++; 1823 1824 bp->b_flags = 0; 1825 bp->b_fspriv.bf_private = req; 1826 1827 KernelWakeupFunc(bp); 1828 break; 1829 1830 case RF_IO_TYPE_READ: 1831 case RF_IO_TYPE_WRITE: 1832 #if RF_ACC_TRACE > 0 1833 if (req->tracerec) { 1834 RF_ETIMER_START(req->tracerec->timer); 1835 } 1836 #endif 1837 InitBP(bp, queue->rf_cinfo->ci_vp, 1838 op, queue->rf_cinfo->ci_dev, 1839 req->sectorOffset, req->numSector, 1840 req->buf, KernelWakeupFunc, (void *) req, 1841 queue->raidPtr->logBytesPerSector, req->b_proc); 1842 1843 if (rf_debugKernelAccess) { 1844 db1_printf(("dispatch: bp->b_blkno = %ld\n", 1845 (long) bp->b_blkno)); 1846 } 1847 queue->numOutstanding++; 1848 queue->last_deq_sector = req->sectorOffset; 1849 /* acc wouldn't have been let in if there were any pending 1850 * reqs at any other priority */ 1851 queue->curPriority = req->priority; 1852 1853 db1_printf(("Going for %c to unit %d col %d\n", 1854 req->type, queue->raidPtr->raidid, 1855 queue->col)); 1856 db1_printf(("sector %d count %d (%d bytes) %d\n", 1857 (int) req->sectorOffset, (int) req->numSector, 1858 (int) (req->numSector << 1859 queue->raidPtr->logBytesPerSector), 1860 (int) queue->raidPtr->logBytesPerSector)); 1861 VOP_STRATEGY(bp->b_vp, bp); 1862 1863 break; 1864 1865 default: 1866 panic("bad req->type in rf_DispatchKernelIO"); 1867 } 1868 db1_printf(("Exiting from DispatchKernelIO\n")); 1869 1870 return (0); 1871 } 1872 /* this is the callback function associated with a I/O invoked from 1873 kernel code. 1874 */ 1875 static void 1876 KernelWakeupFunc(struct buf *bp) 1877 { 1878 RF_DiskQueueData_t *req = NULL; 1879 RF_DiskQueue_t *queue; 1880 int s; 1881 1882 s = splbio(); 1883 db1_printf(("recovering the request queue:\n")); 1884 req = bp->b_fspriv.bf_private; 1885 1886 queue = (RF_DiskQueue_t *) req->queue; 1887 1888 #if RF_ACC_TRACE > 0 1889 if (req->tracerec) { 1890 RF_ETIMER_STOP(req->tracerec->timer); 1891 RF_ETIMER_EVAL(req->tracerec->timer); 1892 RF_LOCK_MUTEX(rf_tracing_mutex); 1893 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer); 1894 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer); 1895 req->tracerec->num_phys_ios++; 1896 RF_UNLOCK_MUTEX(rf_tracing_mutex); 1897 } 1898 #endif 1899 1900 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go 1901 * ballistic, and mark the component as hosed... */ 1902 1903 if (bp->b_flags & B_ERROR) { 1904 /* Mark the disk as dead */ 1905 /* but only mark it once... */ 1906 /* and only if it wouldn't leave this RAID set 1907 completely broken */ 1908 if (((queue->raidPtr->Disks[queue->col].status == 1909 rf_ds_optimal) || 1910 (queue->raidPtr->Disks[queue->col].status == 1911 rf_ds_used_spare)) && 1912 (queue->raidPtr->numFailures < 1913 queue->raidPtr->Layout.map->faultsTolerated)) { 1914 printf("raid%d: IO Error. Marking %s as failed.\n", 1915 queue->raidPtr->raidid, 1916 queue->raidPtr->Disks[queue->col].devname); 1917 queue->raidPtr->Disks[queue->col].status = 1918 rf_ds_failed; 1919 queue->raidPtr->status = rf_rs_degraded; 1920 queue->raidPtr->numFailures++; 1921 queue->raidPtr->numNewFailures++; 1922 } else { /* Disk is already dead... */ 1923 /* printf("Disk already marked as dead!\n"); */ 1924 } 1925 1926 } 1927 1928 /* Fill in the error value */ 1929 1930 req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0; 1931 1932 simple_lock(&queue->raidPtr->iodone_lock); 1933 1934 /* Drop this one on the "finished" queue... */ 1935 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries); 1936 1937 /* Let the raidio thread know there is work to be done. */ 1938 wakeup(&(queue->raidPtr->iodone)); 1939 1940 simple_unlock(&queue->raidPtr->iodone_lock); 1941 1942 splx(s); 1943 } 1944 1945 1946 1947 /* 1948 * initialize a buf structure for doing an I/O in the kernel. 1949 */ 1950 static void 1951 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev, 1952 RF_SectorNum_t startSect, RF_SectorCount_t numSect, caddr_t bf, 1953 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector, 1954 struct proc *b_proc) 1955 { 1956 /* bp->b_flags = B_PHYS | rw_flag; */ 1957 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */ 1958 bp->b_bcount = numSect << logBytesPerSector; 1959 bp->b_bufsize = bp->b_bcount; 1960 bp->b_error = 0; 1961 bp->b_dev = dev; 1962 bp->b_data = bf; 1963 bp->b_blkno = startSect; 1964 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */ 1965 if (bp->b_bcount == 0) { 1966 panic("bp->b_bcount is zero in InitBP!!"); 1967 } 1968 bp->b_proc = b_proc; 1969 bp->b_iodone = cbFunc; 1970 bp->b_fspriv.bf_private = cbArg; 1971 bp->b_vp = b_vp; 1972 if ((bp->b_flags & B_READ) == 0) { 1973 bp->b_vp->v_numoutput++; 1974 } 1975 1976 } 1977 1978 static void 1979 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs, 1980 struct disklabel *lp) 1981 { 1982 memset(lp, 0, sizeof(*lp)); 1983 1984 /* fabricate a label... */ 1985 lp->d_secperunit = raidPtr->totalSectors; 1986 lp->d_secsize = raidPtr->bytesPerSector; 1987 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe; 1988 lp->d_ntracks = 4 * raidPtr->numCol; 1989 lp->d_ncylinders = raidPtr->totalSectors / 1990 (lp->d_nsectors * lp->d_ntracks); 1991 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1992 1993 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename)); 1994 lp->d_type = DTYPE_RAID; 1995 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1996 lp->d_rpm = 3600; 1997 lp->d_interleave = 1; 1998 lp->d_flags = 0; 1999 2000 lp->d_partitions[RAW_PART].p_offset = 0; 2001 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors; 2002 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 2003 lp->d_npartitions = RAW_PART + 1; 2004 2005 lp->d_magic = DISKMAGIC; 2006 lp->d_magic2 = DISKMAGIC; 2007 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label); 2008 2009 } 2010 /* 2011 * Read the disklabel from the raid device. If one is not present, fake one 2012 * up. 2013 */ 2014 static void 2015 raidgetdisklabel(dev_t dev) 2016 { 2017 int unit = raidunit(dev); 2018 struct raid_softc *rs = &raid_softc[unit]; 2019 const char *errstring; 2020 struct disklabel *lp = rs->sc_dkdev.dk_label; 2021 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel; 2022 RF_Raid_t *raidPtr; 2023 2024 db1_printf(("Getting the disklabel...\n")); 2025 2026 memset(clp, 0, sizeof(*clp)); 2027 2028 raidPtr = raidPtrs[unit]; 2029 2030 raidgetdefaultlabel(raidPtr, rs, lp); 2031 2032 /* 2033 * Call the generic disklabel extraction routine. 2034 */ 2035 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy, 2036 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel); 2037 if (errstring) 2038 raidmakedisklabel(rs); 2039 else { 2040 int i; 2041 struct partition *pp; 2042 2043 /* 2044 * Sanity check whether the found disklabel is valid. 2045 * 2046 * This is necessary since total size of the raid device 2047 * may vary when an interleave is changed even though exactly 2048 * same componets are used, and old disklabel may used 2049 * if that is found. 2050 */ 2051 if (lp->d_secperunit != rs->sc_size) 2052 printf("raid%d: WARNING: %s: " 2053 "total sector size in disklabel (%d) != " 2054 "the size of raid (%ld)\n", unit, rs->sc_xname, 2055 lp->d_secperunit, (long) rs->sc_size); 2056 for (i = 0; i < lp->d_npartitions; i++) { 2057 pp = &lp->d_partitions[i]; 2058 if (pp->p_offset + pp->p_size > rs->sc_size) 2059 printf("raid%d: WARNING: %s: end of partition `%c' " 2060 "exceeds the size of raid (%ld)\n", 2061 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size); 2062 } 2063 } 2064 2065 } 2066 /* 2067 * Take care of things one might want to take care of in the event 2068 * that a disklabel isn't present. 2069 */ 2070 static void 2071 raidmakedisklabel(struct raid_softc *rs) 2072 { 2073 struct disklabel *lp = rs->sc_dkdev.dk_label; 2074 db1_printf(("Making a label..\n")); 2075 2076 /* 2077 * For historical reasons, if there's no disklabel present 2078 * the raw partition must be marked FS_BSDFFS. 2079 */ 2080 2081 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 2082 2083 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 2084 2085 lp->d_checksum = dkcksum(lp); 2086 } 2087 /* 2088 * Lookup the provided name in the filesystem. If the file exists, 2089 * is a valid block device, and isn't being used by anyone else, 2090 * set *vpp to the file's vnode. 2091 * You'll find the original of this in ccd.c 2092 */ 2093 int 2094 raidlookup(char *path, struct lwp *l, struct vnode **vpp) 2095 { 2096 struct nameidata nd; 2097 struct vnode *vp; 2098 struct proc *p; 2099 struct vattr va; 2100 int error; 2101 2102 p = l ? l->l_proc : NULL; 2103 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, l); 2104 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) { 2105 return (error); 2106 } 2107 vp = nd.ni_vp; 2108 if (vp->v_usecount > 1) { 2109 VOP_UNLOCK(vp, 0); 2110 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, l); 2111 return (EBUSY); 2112 } 2113 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, l)) != 0) { 2114 VOP_UNLOCK(vp, 0); 2115 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, l); 2116 return (error); 2117 } 2118 /* XXX: eventually we should handle VREG, too. */ 2119 if (va.va_type != VBLK) { 2120 VOP_UNLOCK(vp, 0); 2121 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, l); 2122 return (ENOTBLK); 2123 } 2124 VOP_UNLOCK(vp, 0); 2125 *vpp = vp; 2126 return (0); 2127 } 2128 /* 2129 * Wait interruptibly for an exclusive lock. 2130 * 2131 * XXX 2132 * Several drivers do this; it should be abstracted and made MP-safe. 2133 * (Hmm... where have we seen this warning before :-> GO ) 2134 */ 2135 static int 2136 raidlock(struct raid_softc *rs) 2137 { 2138 int error; 2139 2140 while ((rs->sc_flags & RAIDF_LOCKED) != 0) { 2141 rs->sc_flags |= RAIDF_WANTED; 2142 if ((error = 2143 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0) 2144 return (error); 2145 } 2146 rs->sc_flags |= RAIDF_LOCKED; 2147 return (0); 2148 } 2149 /* 2150 * Unlock and wake up any waiters. 2151 */ 2152 static void 2153 raidunlock(struct raid_softc *rs) 2154 { 2155 2156 rs->sc_flags &= ~RAIDF_LOCKED; 2157 if ((rs->sc_flags & RAIDF_WANTED) != 0) { 2158 rs->sc_flags &= ~RAIDF_WANTED; 2159 wakeup(rs); 2160 } 2161 } 2162 2163 2164 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */ 2165 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */ 2166 2167 int 2168 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter) 2169 { 2170 RF_ComponentLabel_t clabel; 2171 raidread_component_label(dev, b_vp, &clabel); 2172 clabel.mod_counter = mod_counter; 2173 clabel.clean = RF_RAID_CLEAN; 2174 raidwrite_component_label(dev, b_vp, &clabel); 2175 return(0); 2176 } 2177 2178 2179 int 2180 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter) 2181 { 2182 RF_ComponentLabel_t clabel; 2183 raidread_component_label(dev, b_vp, &clabel); 2184 clabel.mod_counter = mod_counter; 2185 clabel.clean = RF_RAID_DIRTY; 2186 raidwrite_component_label(dev, b_vp, &clabel); 2187 return(0); 2188 } 2189 2190 /* ARGSUSED */ 2191 int 2192 raidread_component_label(dev_t dev, struct vnode *b_vp, 2193 RF_ComponentLabel_t *clabel) 2194 { 2195 struct buf *bp; 2196 const struct bdevsw *bdev; 2197 int error; 2198 2199 /* XXX should probably ensure that we don't try to do this if 2200 someone has changed rf_protected_sectors. */ 2201 2202 if (b_vp == NULL) { 2203 /* For whatever reason, this component is not valid. 2204 Don't try to read a component label from it. */ 2205 return(EINVAL); 2206 } 2207 2208 /* get a block of the appropriate size... */ 2209 bp = geteblk((int)RF_COMPONENT_INFO_SIZE); 2210 bp->b_dev = dev; 2211 2212 /* get our ducks in a row for the read */ 2213 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE; 2214 bp->b_bcount = RF_COMPONENT_INFO_SIZE; 2215 bp->b_flags |= B_READ; 2216 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE; 2217 2218 bdev = bdevsw_lookup(bp->b_dev); 2219 if (bdev == NULL) 2220 return (ENXIO); 2221 (*bdev->d_strategy)(bp); 2222 2223 error = biowait(bp); 2224 2225 if (!error) { 2226 memcpy(clabel, bp->b_data, 2227 sizeof(RF_ComponentLabel_t)); 2228 } 2229 2230 brelse(bp); 2231 return(error); 2232 } 2233 /* ARGSUSED */ 2234 int 2235 raidwrite_component_label(dev_t dev, struct vnode *b_vp, 2236 RF_ComponentLabel_t *clabel) 2237 { 2238 struct buf *bp; 2239 const struct bdevsw *bdev; 2240 int error; 2241 2242 /* get a block of the appropriate size... */ 2243 bp = geteblk((int)RF_COMPONENT_INFO_SIZE); 2244 bp->b_dev = dev; 2245 2246 /* get our ducks in a row for the write */ 2247 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE; 2248 bp->b_bcount = RF_COMPONENT_INFO_SIZE; 2249 bp->b_flags |= B_WRITE; 2250 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE; 2251 2252 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE ); 2253 2254 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t)); 2255 2256 bdev = bdevsw_lookup(bp->b_dev); 2257 if (bdev == NULL) 2258 return (ENXIO); 2259 (*bdev->d_strategy)(bp); 2260 error = biowait(bp); 2261 brelse(bp); 2262 if (error) { 2263 #if 1 2264 printf("Failed to write RAID component info!\n"); 2265 #endif 2266 } 2267 2268 return(error); 2269 } 2270 2271 void 2272 rf_markalldirty(RF_Raid_t *raidPtr) 2273 { 2274 RF_ComponentLabel_t clabel; 2275 int sparecol; 2276 int c; 2277 int j; 2278 int scol = -1; 2279 2280 raidPtr->mod_counter++; 2281 for (c = 0; c < raidPtr->numCol; c++) { 2282 /* we don't want to touch (at all) a disk that has 2283 failed */ 2284 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) { 2285 raidread_component_label( 2286 raidPtr->Disks[c].dev, 2287 raidPtr->raid_cinfo[c].ci_vp, 2288 &clabel); 2289 if (clabel.status == rf_ds_spared) { 2290 /* XXX do something special... 2291 but whatever you do, don't 2292 try to access it!! */ 2293 } else { 2294 raidmarkdirty( 2295 raidPtr->Disks[c].dev, 2296 raidPtr->raid_cinfo[c].ci_vp, 2297 raidPtr->mod_counter); 2298 } 2299 } 2300 } 2301 2302 for( c = 0; c < raidPtr->numSpare ; c++) { 2303 sparecol = raidPtr->numCol + c; 2304 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2305 /* 2306 2307 we claim this disk is "optimal" if it's 2308 rf_ds_used_spare, as that means it should be 2309 directly substitutable for the disk it replaced. 2310 We note that too... 2311 2312 */ 2313 2314 for(j=0;j<raidPtr->numCol;j++) { 2315 if (raidPtr->Disks[j].spareCol == sparecol) { 2316 scol = j; 2317 break; 2318 } 2319 } 2320 2321 raidread_component_label( 2322 raidPtr->Disks[sparecol].dev, 2323 raidPtr->raid_cinfo[sparecol].ci_vp, 2324 &clabel); 2325 /* make sure status is noted */ 2326 2327 raid_init_component_label(raidPtr, &clabel); 2328 2329 clabel.row = 0; 2330 clabel.column = scol; 2331 /* Note: we *don't* change status from rf_ds_used_spare 2332 to rf_ds_optimal */ 2333 /* clabel.status = rf_ds_optimal; */ 2334 2335 raidmarkdirty(raidPtr->Disks[sparecol].dev, 2336 raidPtr->raid_cinfo[sparecol].ci_vp, 2337 raidPtr->mod_counter); 2338 } 2339 } 2340 } 2341 2342 2343 void 2344 rf_update_component_labels(RF_Raid_t *raidPtr, int final) 2345 { 2346 RF_ComponentLabel_t clabel; 2347 int sparecol; 2348 int c; 2349 int j; 2350 int scol; 2351 2352 scol = -1; 2353 2354 /* XXX should do extra checks to make sure things really are clean, 2355 rather than blindly setting the clean bit... */ 2356 2357 raidPtr->mod_counter++; 2358 2359 for (c = 0; c < raidPtr->numCol; c++) { 2360 if (raidPtr->Disks[c].status == rf_ds_optimal) { 2361 raidread_component_label( 2362 raidPtr->Disks[c].dev, 2363 raidPtr->raid_cinfo[c].ci_vp, 2364 &clabel); 2365 /* make sure status is noted */ 2366 clabel.status = rf_ds_optimal; 2367 /* bump the counter */ 2368 clabel.mod_counter = raidPtr->mod_counter; 2369 2370 raidwrite_component_label( 2371 raidPtr->Disks[c].dev, 2372 raidPtr->raid_cinfo[c].ci_vp, 2373 &clabel); 2374 if (final == RF_FINAL_COMPONENT_UPDATE) { 2375 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2376 raidmarkclean( 2377 raidPtr->Disks[c].dev, 2378 raidPtr->raid_cinfo[c].ci_vp, 2379 raidPtr->mod_counter); 2380 } 2381 } 2382 } 2383 /* else we don't touch it.. */ 2384 } 2385 2386 for( c = 0; c < raidPtr->numSpare ; c++) { 2387 sparecol = raidPtr->numCol + c; 2388 /* Need to ensure that the reconstruct actually completed! */ 2389 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2390 /* 2391 2392 we claim this disk is "optimal" if it's 2393 rf_ds_used_spare, as that means it should be 2394 directly substitutable for the disk it replaced. 2395 We note that too... 2396 2397 */ 2398 2399 for(j=0;j<raidPtr->numCol;j++) { 2400 if (raidPtr->Disks[j].spareCol == sparecol) { 2401 scol = j; 2402 break; 2403 } 2404 } 2405 2406 /* XXX shouldn't *really* need this... */ 2407 raidread_component_label( 2408 raidPtr->Disks[sparecol].dev, 2409 raidPtr->raid_cinfo[sparecol].ci_vp, 2410 &clabel); 2411 /* make sure status is noted */ 2412 2413 raid_init_component_label(raidPtr, &clabel); 2414 2415 clabel.mod_counter = raidPtr->mod_counter; 2416 clabel.column = scol; 2417 clabel.status = rf_ds_optimal; 2418 2419 raidwrite_component_label( 2420 raidPtr->Disks[sparecol].dev, 2421 raidPtr->raid_cinfo[sparecol].ci_vp, 2422 &clabel); 2423 if (final == RF_FINAL_COMPONENT_UPDATE) { 2424 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2425 raidmarkclean( raidPtr->Disks[sparecol].dev, 2426 raidPtr->raid_cinfo[sparecol].ci_vp, 2427 raidPtr->mod_counter); 2428 } 2429 } 2430 } 2431 } 2432 } 2433 2434 void 2435 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured) 2436 { 2437 struct proc *p; 2438 struct lwp *l; 2439 2440 p = raidPtr->engine_thread; 2441 l = LIST_FIRST(&p->p_lwps); 2442 2443 if (vp != NULL) { 2444 if (auto_configured == 1) { 2445 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2446 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0); 2447 vput(vp); 2448 2449 } else { 2450 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, l); 2451 } 2452 } 2453 } 2454 2455 2456 void 2457 rf_UnconfigureVnodes(RF_Raid_t *raidPtr) 2458 { 2459 int r,c; 2460 struct vnode *vp; 2461 int acd; 2462 2463 2464 /* We take this opportunity to close the vnodes like we should.. */ 2465 2466 for (c = 0; c < raidPtr->numCol; c++) { 2467 vp = raidPtr->raid_cinfo[c].ci_vp; 2468 acd = raidPtr->Disks[c].auto_configured; 2469 rf_close_component(raidPtr, vp, acd); 2470 raidPtr->raid_cinfo[c].ci_vp = NULL; 2471 raidPtr->Disks[c].auto_configured = 0; 2472 } 2473 2474 for (r = 0; r < raidPtr->numSpare; r++) { 2475 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp; 2476 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured; 2477 rf_close_component(raidPtr, vp, acd); 2478 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL; 2479 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0; 2480 } 2481 } 2482 2483 2484 void 2485 rf_ReconThread(struct rf_recon_req *req) 2486 { 2487 int s; 2488 RF_Raid_t *raidPtr; 2489 2490 s = splbio(); 2491 raidPtr = (RF_Raid_t *) req->raidPtr; 2492 raidPtr->recon_in_progress = 1; 2493 2494 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col, 2495 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0)); 2496 2497 RF_Free(req, sizeof(*req)); 2498 2499 raidPtr->recon_in_progress = 0; 2500 splx(s); 2501 2502 /* That's all... */ 2503 kthread_exit(0); /* does not return */ 2504 } 2505 2506 void 2507 rf_RewriteParityThread(RF_Raid_t *raidPtr) 2508 { 2509 int retcode; 2510 int s; 2511 2512 raidPtr->parity_rewrite_stripes_done = 0; 2513 raidPtr->parity_rewrite_in_progress = 1; 2514 s = splbio(); 2515 retcode = rf_RewriteParity(raidPtr); 2516 splx(s); 2517 if (retcode) { 2518 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid); 2519 } else { 2520 /* set the clean bit! If we shutdown correctly, 2521 the clean bit on each component label will get 2522 set */ 2523 raidPtr->parity_good = RF_RAID_CLEAN; 2524 } 2525 raidPtr->parity_rewrite_in_progress = 0; 2526 2527 /* Anyone waiting for us to stop? If so, inform them... */ 2528 if (raidPtr->waitShutdown) { 2529 wakeup(&raidPtr->parity_rewrite_in_progress); 2530 } 2531 2532 /* That's all... */ 2533 kthread_exit(0); /* does not return */ 2534 } 2535 2536 2537 void 2538 rf_CopybackThread(RF_Raid_t *raidPtr) 2539 { 2540 int s; 2541 2542 raidPtr->copyback_in_progress = 1; 2543 s = splbio(); 2544 rf_CopybackReconstructedData(raidPtr); 2545 splx(s); 2546 raidPtr->copyback_in_progress = 0; 2547 2548 /* That's all... */ 2549 kthread_exit(0); /* does not return */ 2550 } 2551 2552 2553 void 2554 rf_ReconstructInPlaceThread(struct rf_recon_req *req) 2555 { 2556 int s; 2557 RF_Raid_t *raidPtr; 2558 2559 s = splbio(); 2560 raidPtr = req->raidPtr; 2561 raidPtr->recon_in_progress = 1; 2562 rf_ReconstructInPlace(raidPtr, req->col); 2563 RF_Free(req, sizeof(*req)); 2564 raidPtr->recon_in_progress = 0; 2565 splx(s); 2566 2567 /* That's all... */ 2568 kthread_exit(0); /* does not return */ 2569 } 2570 2571 RF_AutoConfig_t * 2572 rf_find_raid_components() 2573 { 2574 struct vnode *vp; 2575 struct disklabel label; 2576 struct device *dv; 2577 dev_t dev; 2578 int bmajor; 2579 int error; 2580 int i; 2581 int good_one; 2582 RF_ComponentLabel_t *clabel; 2583 RF_AutoConfig_t *ac_list; 2584 RF_AutoConfig_t *ac; 2585 2586 2587 /* initialize the AutoConfig list */ 2588 ac_list = NULL; 2589 2590 /* we begin by trolling through *all* the devices on the system */ 2591 2592 for (dv = alldevs.tqh_first; dv != NULL; 2593 dv = dv->dv_list.tqe_next) { 2594 2595 /* we are only interested in disks... */ 2596 if (dv->dv_class != DV_DISK) 2597 continue; 2598 2599 /* we don't care about floppies... */ 2600 if (!strcmp(dv->dv_cfdata->cf_name,"fd")) { 2601 continue; 2602 } 2603 2604 /* we don't care about CD's... */ 2605 if (!strcmp(dv->dv_cfdata->cf_name,"cd")) { 2606 continue; 2607 } 2608 2609 /* hdfd is the Atari/Hades floppy driver */ 2610 if (!strcmp(dv->dv_cfdata->cf_name,"hdfd")) { 2611 continue; 2612 } 2613 /* fdisa is the Atari/Milan floppy driver */ 2614 if (!strcmp(dv->dv_cfdata->cf_name,"fdisa")) { 2615 continue; 2616 } 2617 2618 /* need to find the device_name_to_block_device_major stuff */ 2619 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0); 2620 2621 /* get a vnode for the raw partition of this disk */ 2622 2623 dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART); 2624 if (bdevvp(dev, &vp)) 2625 panic("RAID can't alloc vnode"); 2626 2627 error = VOP_OPEN(vp, FREAD, NOCRED, 0); 2628 2629 if (error) { 2630 /* "Who cares." Continue looking 2631 for something that exists*/ 2632 vput(vp); 2633 continue; 2634 } 2635 2636 /* Ok, the disk exists. Go get the disklabel. */ 2637 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0); 2638 if (error) { 2639 /* 2640 * XXX can't happen - open() would 2641 * have errored out (or faked up one) 2642 */ 2643 if (error != ENOTTY) 2644 printf("RAIDframe: can't get label for dev " 2645 "%s (%d)\n", dv->dv_xname, error); 2646 } 2647 2648 /* don't need this any more. We'll allocate it again 2649 a little later if we really do... */ 2650 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2651 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0); 2652 vput(vp); 2653 2654 if (error) 2655 continue; 2656 2657 for (i=0; i < label.d_npartitions; i++) { 2658 /* We only support partitions marked as RAID */ 2659 if (label.d_partitions[i].p_fstype != FS_RAID) 2660 continue; 2661 2662 dev = MAKEDISKDEV(bmajor, dv->dv_unit, i); 2663 if (bdevvp(dev, &vp)) 2664 panic("RAID can't alloc vnode"); 2665 2666 error = VOP_OPEN(vp, FREAD, NOCRED, 0); 2667 if (error) { 2668 /* Whatever... */ 2669 vput(vp); 2670 continue; 2671 } 2672 2673 good_one = 0; 2674 2675 clabel = (RF_ComponentLabel_t *) 2676 malloc(sizeof(RF_ComponentLabel_t), 2677 M_RAIDFRAME, M_NOWAIT); 2678 if (clabel == NULL) { 2679 /* XXX CLEANUP HERE */ 2680 printf("RAID auto config: out of memory!\n"); 2681 return(NULL); /* XXX probably should panic? */ 2682 } 2683 2684 if (!raidread_component_label(dev, vp, clabel)) { 2685 /* Got the label. Does it look reasonable? */ 2686 if (rf_reasonable_label(clabel) && 2687 (clabel->partitionSize <= 2688 label.d_partitions[i].p_size)) { 2689 #if DEBUG 2690 printf("Component on: %s%c: %d\n", 2691 dv->dv_xname, 'a'+i, 2692 label.d_partitions[i].p_size); 2693 rf_print_component_label(clabel); 2694 #endif 2695 /* if it's reasonable, add it, 2696 else ignore it. */ 2697 ac = (RF_AutoConfig_t *) 2698 malloc(sizeof(RF_AutoConfig_t), 2699 M_RAIDFRAME, 2700 M_NOWAIT); 2701 if (ac == NULL) { 2702 /* XXX should panic?? */ 2703 return(NULL); 2704 } 2705 2706 snprintf(ac->devname, 2707 sizeof(ac->devname), "%s%c", 2708 dv->dv_xname, 'a'+i); 2709 ac->dev = dev; 2710 ac->vp = vp; 2711 ac->clabel = clabel; 2712 ac->next = ac_list; 2713 ac_list = ac; 2714 good_one = 1; 2715 } 2716 } 2717 if (!good_one) { 2718 /* cleanup */ 2719 free(clabel, M_RAIDFRAME); 2720 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2721 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0); 2722 vput(vp); 2723 } 2724 } 2725 } 2726 return(ac_list); 2727 } 2728 2729 static int 2730 rf_reasonable_label(RF_ComponentLabel_t *clabel) 2731 { 2732 2733 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) || 2734 (clabel->version==RF_COMPONENT_LABEL_VERSION)) && 2735 ((clabel->clean == RF_RAID_CLEAN) || 2736 (clabel->clean == RF_RAID_DIRTY)) && 2737 clabel->row >=0 && 2738 clabel->column >= 0 && 2739 clabel->num_rows > 0 && 2740 clabel->num_columns > 0 && 2741 clabel->row < clabel->num_rows && 2742 clabel->column < clabel->num_columns && 2743 clabel->blockSize > 0 && 2744 clabel->numBlocks > 0) { 2745 /* label looks reasonable enough... */ 2746 return(1); 2747 } 2748 return(0); 2749 } 2750 2751 2752 #if DEBUG 2753 void 2754 rf_print_component_label(RF_ComponentLabel_t *clabel) 2755 { 2756 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", 2757 clabel->row, clabel->column, 2758 clabel->num_rows, clabel->num_columns); 2759 printf(" Version: %d Serial Number: %d Mod Counter: %d\n", 2760 clabel->version, clabel->serial_number, 2761 clabel->mod_counter); 2762 printf(" Clean: %s Status: %d\n", 2763 clabel->clean ? "Yes" : "No", clabel->status ); 2764 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n", 2765 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU); 2766 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n", 2767 (char) clabel->parityConfig, clabel->blockSize, 2768 clabel->numBlocks); 2769 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" ); 2770 printf(" Contains root partition: %s\n", 2771 clabel->root_partition ? "Yes" : "No" ); 2772 printf(" Last configured as: raid%d\n", clabel->last_unit ); 2773 #if 0 2774 printf(" Config order: %d\n", clabel->config_order); 2775 #endif 2776 2777 } 2778 #endif 2779 2780 RF_ConfigSet_t * 2781 rf_create_auto_sets(RF_AutoConfig_t *ac_list) 2782 { 2783 RF_AutoConfig_t *ac; 2784 RF_ConfigSet_t *config_sets; 2785 RF_ConfigSet_t *cset; 2786 RF_AutoConfig_t *ac_next; 2787 2788 2789 config_sets = NULL; 2790 2791 /* Go through the AutoConfig list, and figure out which components 2792 belong to what sets. */ 2793 ac = ac_list; 2794 while(ac!=NULL) { 2795 /* we're going to putz with ac->next, so save it here 2796 for use at the end of the loop */ 2797 ac_next = ac->next; 2798 2799 if (config_sets == NULL) { 2800 /* will need at least this one... */ 2801 config_sets = (RF_ConfigSet_t *) 2802 malloc(sizeof(RF_ConfigSet_t), 2803 M_RAIDFRAME, M_NOWAIT); 2804 if (config_sets == NULL) { 2805 panic("rf_create_auto_sets: No memory!"); 2806 } 2807 /* this one is easy :) */ 2808 config_sets->ac = ac; 2809 config_sets->next = NULL; 2810 config_sets->rootable = 0; 2811 ac->next = NULL; 2812 } else { 2813 /* which set does this component fit into? */ 2814 cset = config_sets; 2815 while(cset!=NULL) { 2816 if (rf_does_it_fit(cset, ac)) { 2817 /* looks like it matches... */ 2818 ac->next = cset->ac; 2819 cset->ac = ac; 2820 break; 2821 } 2822 cset = cset->next; 2823 } 2824 if (cset==NULL) { 2825 /* didn't find a match above... new set..*/ 2826 cset = (RF_ConfigSet_t *) 2827 malloc(sizeof(RF_ConfigSet_t), 2828 M_RAIDFRAME, M_NOWAIT); 2829 if (cset == NULL) { 2830 panic("rf_create_auto_sets: No memory!"); 2831 } 2832 cset->ac = ac; 2833 ac->next = NULL; 2834 cset->next = config_sets; 2835 cset->rootable = 0; 2836 config_sets = cset; 2837 } 2838 } 2839 ac = ac_next; 2840 } 2841 2842 2843 return(config_sets); 2844 } 2845 2846 static int 2847 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac) 2848 { 2849 RF_ComponentLabel_t *clabel1, *clabel2; 2850 2851 /* If this one matches the *first* one in the set, that's good 2852 enough, since the other members of the set would have been 2853 through here too... */ 2854 /* note that we are not checking partitionSize here.. 2855 2856 Note that we are also not checking the mod_counters here. 2857 If everything else matches execpt the mod_counter, that's 2858 good enough for this test. We will deal with the mod_counters 2859 a little later in the autoconfiguration process. 2860 2861 (clabel1->mod_counter == clabel2->mod_counter) && 2862 2863 The reason we don't check for this is that failed disks 2864 will have lower modification counts. If those disks are 2865 not added to the set they used to belong to, then they will 2866 form their own set, which may result in 2 different sets, 2867 for example, competing to be configured at raid0, and 2868 perhaps competing to be the root filesystem set. If the 2869 wrong ones get configured, or both attempt to become /, 2870 weird behaviour and or serious lossage will occur. Thus we 2871 need to bring them into the fold here, and kick them out at 2872 a later point. 2873 2874 */ 2875 2876 clabel1 = cset->ac->clabel; 2877 clabel2 = ac->clabel; 2878 if ((clabel1->version == clabel2->version) && 2879 (clabel1->serial_number == clabel2->serial_number) && 2880 (clabel1->num_rows == clabel2->num_rows) && 2881 (clabel1->num_columns == clabel2->num_columns) && 2882 (clabel1->sectPerSU == clabel2->sectPerSU) && 2883 (clabel1->SUsPerPU == clabel2->SUsPerPU) && 2884 (clabel1->SUsPerRU == clabel2->SUsPerRU) && 2885 (clabel1->parityConfig == clabel2->parityConfig) && 2886 (clabel1->maxOutstanding == clabel2->maxOutstanding) && 2887 (clabel1->blockSize == clabel2->blockSize) && 2888 (clabel1->numBlocks == clabel2->numBlocks) && 2889 (clabel1->autoconfigure == clabel2->autoconfigure) && 2890 (clabel1->root_partition == clabel2->root_partition) && 2891 (clabel1->last_unit == clabel2->last_unit) && 2892 (clabel1->config_order == clabel2->config_order)) { 2893 /* if it get's here, it almost *has* to be a match */ 2894 } else { 2895 /* it's not consistent with somebody in the set.. 2896 punt */ 2897 return(0); 2898 } 2899 /* all was fine.. it must fit... */ 2900 return(1); 2901 } 2902 2903 int 2904 rf_have_enough_components(RF_ConfigSet_t *cset) 2905 { 2906 RF_AutoConfig_t *ac; 2907 RF_AutoConfig_t *auto_config; 2908 RF_ComponentLabel_t *clabel; 2909 int c; 2910 int num_cols; 2911 int num_missing; 2912 int mod_counter; 2913 int mod_counter_found; 2914 int even_pair_failed; 2915 char parity_type; 2916 2917 2918 /* check to see that we have enough 'live' components 2919 of this set. If so, we can configure it if necessary */ 2920 2921 num_cols = cset->ac->clabel->num_columns; 2922 parity_type = cset->ac->clabel->parityConfig; 2923 2924 /* XXX Check for duplicate components!?!?!? */ 2925 2926 /* Determine what the mod_counter is supposed to be for this set. */ 2927 2928 mod_counter_found = 0; 2929 mod_counter = 0; 2930 ac = cset->ac; 2931 while(ac!=NULL) { 2932 if (mod_counter_found==0) { 2933 mod_counter = ac->clabel->mod_counter; 2934 mod_counter_found = 1; 2935 } else { 2936 if (ac->clabel->mod_counter > mod_counter) { 2937 mod_counter = ac->clabel->mod_counter; 2938 } 2939 } 2940 ac = ac->next; 2941 } 2942 2943 num_missing = 0; 2944 auto_config = cset->ac; 2945 2946 even_pair_failed = 0; 2947 for(c=0; c<num_cols; c++) { 2948 ac = auto_config; 2949 while(ac!=NULL) { 2950 if ((ac->clabel->column == c) && 2951 (ac->clabel->mod_counter == mod_counter)) { 2952 /* it's this one... */ 2953 #if DEBUG 2954 printf("Found: %s at %d\n", 2955 ac->devname,c); 2956 #endif 2957 break; 2958 } 2959 ac=ac->next; 2960 } 2961 if (ac==NULL) { 2962 /* Didn't find one here! */ 2963 /* special case for RAID 1, especially 2964 where there are more than 2 2965 components (where RAIDframe treats 2966 things a little differently :( ) */ 2967 if (parity_type == '1') { 2968 if (c%2 == 0) { /* even component */ 2969 even_pair_failed = 1; 2970 } else { /* odd component. If 2971 we're failed, and 2972 so is the even 2973 component, it's 2974 "Good Night, Charlie" */ 2975 if (even_pair_failed == 1) { 2976 return(0); 2977 } 2978 } 2979 } else { 2980 /* normal accounting */ 2981 num_missing++; 2982 } 2983 } 2984 if ((parity_type == '1') && (c%2 == 1)) { 2985 /* Just did an even component, and we didn't 2986 bail.. reset the even_pair_failed flag, 2987 and go on to the next component.... */ 2988 even_pair_failed = 0; 2989 } 2990 } 2991 2992 clabel = cset->ac->clabel; 2993 2994 if (((clabel->parityConfig == '0') && (num_missing > 0)) || 2995 ((clabel->parityConfig == '4') && (num_missing > 1)) || 2996 ((clabel->parityConfig == '5') && (num_missing > 1))) { 2997 /* XXX this needs to be made *much* more general */ 2998 /* Too many failures */ 2999 return(0); 3000 } 3001 /* otherwise, all is well, and we've got enough to take a kick 3002 at autoconfiguring this set */ 3003 return(1); 3004 } 3005 3006 void 3007 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config, 3008 RF_Raid_t *raidPtr) 3009 { 3010 RF_ComponentLabel_t *clabel; 3011 int i; 3012 3013 clabel = ac->clabel; 3014 3015 /* 1. Fill in the common stuff */ 3016 config->numRow = clabel->num_rows = 1; 3017 config->numCol = clabel->num_columns; 3018 config->numSpare = 0; /* XXX should this be set here? */ 3019 config->sectPerSU = clabel->sectPerSU; 3020 config->SUsPerPU = clabel->SUsPerPU; 3021 config->SUsPerRU = clabel->SUsPerRU; 3022 config->parityConfig = clabel->parityConfig; 3023 /* XXX... */ 3024 strcpy(config->diskQueueType,"fifo"); 3025 config->maxOutstandingDiskReqs = clabel->maxOutstanding; 3026 config->layoutSpecificSize = 0; /* XXX ?? */ 3027 3028 while(ac!=NULL) { 3029 /* row/col values will be in range due to the checks 3030 in reasonable_label() */ 3031 strcpy(config->devnames[0][ac->clabel->column], 3032 ac->devname); 3033 ac = ac->next; 3034 } 3035 3036 for(i=0;i<RF_MAXDBGV;i++) { 3037 config->debugVars[i][0] = 0; 3038 } 3039 } 3040 3041 int 3042 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value) 3043 { 3044 RF_ComponentLabel_t clabel; 3045 struct vnode *vp; 3046 dev_t dev; 3047 int column; 3048 int sparecol; 3049 3050 raidPtr->autoconfigure = new_value; 3051 3052 for(column=0; column<raidPtr->numCol; column++) { 3053 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3054 dev = raidPtr->Disks[column].dev; 3055 vp = raidPtr->raid_cinfo[column].ci_vp; 3056 raidread_component_label(dev, vp, &clabel); 3057 clabel.autoconfigure = new_value; 3058 raidwrite_component_label(dev, vp, &clabel); 3059 } 3060 } 3061 for(column = 0; column < raidPtr->numSpare ; column++) { 3062 sparecol = raidPtr->numCol + column; 3063 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3064 dev = raidPtr->Disks[sparecol].dev; 3065 vp = raidPtr->raid_cinfo[sparecol].ci_vp; 3066 raidread_component_label(dev, vp, &clabel); 3067 clabel.autoconfigure = new_value; 3068 raidwrite_component_label(dev, vp, &clabel); 3069 } 3070 } 3071 return(new_value); 3072 } 3073 3074 int 3075 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value) 3076 { 3077 RF_ComponentLabel_t clabel; 3078 struct vnode *vp; 3079 dev_t dev; 3080 int column; 3081 int sparecol; 3082 3083 raidPtr->root_partition = new_value; 3084 for(column=0; column<raidPtr->numCol; column++) { 3085 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3086 dev = raidPtr->Disks[column].dev; 3087 vp = raidPtr->raid_cinfo[column].ci_vp; 3088 raidread_component_label(dev, vp, &clabel); 3089 clabel.root_partition = new_value; 3090 raidwrite_component_label(dev, vp, &clabel); 3091 } 3092 } 3093 for(column = 0; column < raidPtr->numSpare ; column++) { 3094 sparecol = raidPtr->numCol + column; 3095 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3096 dev = raidPtr->Disks[sparecol].dev; 3097 vp = raidPtr->raid_cinfo[sparecol].ci_vp; 3098 raidread_component_label(dev, vp, &clabel); 3099 clabel.root_partition = new_value; 3100 raidwrite_component_label(dev, vp, &clabel); 3101 } 3102 } 3103 return(new_value); 3104 } 3105 3106 void 3107 rf_release_all_vps(RF_ConfigSet_t *cset) 3108 { 3109 RF_AutoConfig_t *ac; 3110 3111 ac = cset->ac; 3112 while(ac!=NULL) { 3113 /* Close the vp, and give it back */ 3114 if (ac->vp) { 3115 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY); 3116 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0); 3117 vput(ac->vp); 3118 ac->vp = NULL; 3119 } 3120 ac = ac->next; 3121 } 3122 } 3123 3124 3125 void 3126 rf_cleanup_config_set(RF_ConfigSet_t *cset) 3127 { 3128 RF_AutoConfig_t *ac; 3129 RF_AutoConfig_t *next_ac; 3130 3131 ac = cset->ac; 3132 while(ac!=NULL) { 3133 next_ac = ac->next; 3134 /* nuke the label */ 3135 free(ac->clabel, M_RAIDFRAME); 3136 /* cleanup the config structure */ 3137 free(ac, M_RAIDFRAME); 3138 /* "next.." */ 3139 ac = next_ac; 3140 } 3141 /* and, finally, nuke the config set */ 3142 free(cset, M_RAIDFRAME); 3143 } 3144 3145 3146 void 3147 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel) 3148 { 3149 /* current version number */ 3150 clabel->version = RF_COMPONENT_LABEL_VERSION; 3151 clabel->serial_number = raidPtr->serial_number; 3152 clabel->mod_counter = raidPtr->mod_counter; 3153 clabel->num_rows = 1; 3154 clabel->num_columns = raidPtr->numCol; 3155 clabel->clean = RF_RAID_DIRTY; /* not clean */ 3156 clabel->status = rf_ds_optimal; /* "It's good!" */ 3157 3158 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; 3159 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU; 3160 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU; 3161 3162 clabel->blockSize = raidPtr->bytesPerSector; 3163 clabel->numBlocks = raidPtr->sectorsPerDisk; 3164 3165 /* XXX not portable */ 3166 clabel->parityConfig = raidPtr->Layout.map->parityConfig; 3167 clabel->maxOutstanding = raidPtr->maxOutstanding; 3168 clabel->autoconfigure = raidPtr->autoconfigure; 3169 clabel->root_partition = raidPtr->root_partition; 3170 clabel->last_unit = raidPtr->raidid; 3171 clabel->config_order = raidPtr->config_order; 3172 } 3173 3174 int 3175 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit) 3176 { 3177 RF_Raid_t *raidPtr; 3178 RF_Config_t *config; 3179 int raidID; 3180 int retcode; 3181 3182 #if DEBUG 3183 printf("RAID autoconfigure\n"); 3184 #endif 3185 3186 retcode = 0; 3187 *unit = -1; 3188 3189 /* 1. Create a config structure */ 3190 3191 config = (RF_Config_t *)malloc(sizeof(RF_Config_t), 3192 M_RAIDFRAME, 3193 M_NOWAIT); 3194 if (config==NULL) { 3195 printf("Out of mem!?!?\n"); 3196 /* XXX do something more intelligent here. */ 3197 return(1); 3198 } 3199 3200 memset(config, 0, sizeof(RF_Config_t)); 3201 3202 /* 3203 2. Figure out what RAID ID this one is supposed to live at 3204 See if we can get the same RAID dev that it was configured 3205 on last time.. 3206 */ 3207 3208 raidID = cset->ac->clabel->last_unit; 3209 if ((raidID < 0) || (raidID >= numraid)) { 3210 /* let's not wander off into lala land. */ 3211 raidID = numraid - 1; 3212 } 3213 if (raidPtrs[raidID]->valid != 0) { 3214 3215 /* 3216 Nope... Go looking for an alternative... 3217 Start high so we don't immediately use raid0 if that's 3218 not taken. 3219 */ 3220 3221 for(raidID = numraid - 1; raidID >= 0; raidID--) { 3222 if (raidPtrs[raidID]->valid == 0) { 3223 /* can use this one! */ 3224 break; 3225 } 3226 } 3227 } 3228 3229 if (raidID < 0) { 3230 /* punt... */ 3231 printf("Unable to auto configure this set!\n"); 3232 printf("(Out of RAID devs!)\n"); 3233 return(1); 3234 } 3235 3236 #if DEBUG 3237 printf("Configuring raid%d:\n",raidID); 3238 #endif 3239 3240 raidPtr = raidPtrs[raidID]; 3241 3242 /* XXX all this stuff should be done SOMEWHERE ELSE! */ 3243 raidPtr->raidid = raidID; 3244 raidPtr->openings = RAIDOUTSTANDING; 3245 3246 /* 3. Build the configuration structure */ 3247 rf_create_configuration(cset->ac, config, raidPtr); 3248 3249 /* 4. Do the configuration */ 3250 retcode = rf_Configure(raidPtr, config, cset->ac); 3251 3252 if (retcode == 0) { 3253 3254 raidinit(raidPtrs[raidID]); 3255 3256 rf_markalldirty(raidPtrs[raidID]); 3257 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */ 3258 if (cset->ac->clabel->root_partition==1) { 3259 /* everything configured just fine. Make a note 3260 that this set is eligible to be root. */ 3261 cset->rootable = 1; 3262 /* XXX do this here? */ 3263 raidPtrs[raidID]->root_partition = 1; 3264 } 3265 } 3266 3267 /* 5. Cleanup */ 3268 free(config, M_RAIDFRAME); 3269 3270 *unit = raidID; 3271 return(retcode); 3272 } 3273 3274 void 3275 rf_disk_unbusy(RF_RaidAccessDesc_t *desc) 3276 { 3277 struct buf *bp; 3278 3279 bp = (struct buf *)desc->bp; 3280 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev, 3281 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ)); 3282 } 3283 3284 void 3285 rf_pool_init(struct pool *p, size_t size, const char *w_chan, 3286 size_t xmin, size_t xmax) 3287 { 3288 pool_init(p, size, 0, 0, 0, w_chan, NULL); 3289 pool_sethiwat(p, xmax); 3290 pool_prime(p, xmin); 3291 pool_setlowat(p, xmin); 3292 } 3293 3294 /* 3295 * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see 3296 * if there is IO pending and if that IO could possibly be done for a 3297 * given RAID set. Returns 0 if IO is waiting and can be done, 1 3298 * otherwise. 3299 * 3300 */ 3301 3302 int 3303 rf_buf_queue_check(int raidid) 3304 { 3305 if ((BUFQ_PEEK(raid_softc[raidid].buf_queue) != NULL) && 3306 raidPtrs[raidid]->openings > 0) { 3307 /* there is work to do */ 3308 return 0; 3309 } 3310 /* default is nothing to do */ 3311 return 1; 3312 } 3313