1 /* $NetBSD: rf_netbsdkintf.c,v 1.112 2001/11/13 07:11:14 lukem Exp $ */ 2 /*- 3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Greg Oster; Jason R. Thorpe. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the NetBSD 20 * Foundation, Inc. and its contributors. 21 * 4. Neither the name of The NetBSD Foundation nor the names of its 22 * contributors may be used to endorse or promote products derived 23 * from this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38 /* 39 * Copyright (c) 1988 University of Utah. 40 * Copyright (c) 1990, 1993 41 * The Regents of the University of California. All rights reserved. 42 * 43 * This code is derived from software contributed to Berkeley by 44 * the Systems Programming Group of the University of Utah Computer 45 * Science Department. 46 * 47 * Redistribution and use in source and binary forms, with or without 48 * modification, are permitted provided that the following conditions 49 * are met: 50 * 1. Redistributions of source code must retain the above copyright 51 * notice, this list of conditions and the following disclaimer. 52 * 2. Redistributions in binary form must reproduce the above copyright 53 * notice, this list of conditions and the following disclaimer in the 54 * documentation and/or other materials provided with the distribution. 55 * 3. All advertising materials mentioning features or use of this software 56 * must display the following acknowledgement: 57 * This product includes software developed by the University of 58 * California, Berkeley and its contributors. 59 * 4. Neither the name of the University nor the names of its contributors 60 * may be used to endorse or promote products derived from this software 61 * without specific prior written permission. 62 * 63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 73 * SUCH DAMAGE. 74 * 75 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 76 * 77 * @(#)cd.c 8.2 (Berkeley) 11/16/93 78 */ 79 80 81 82 83 /* 84 * Copyright (c) 1995 Carnegie-Mellon University. 85 * All rights reserved. 86 * 87 * Authors: Mark Holland, Jim Zelenka 88 * 89 * Permission to use, copy, modify and distribute this software and 90 * its documentation is hereby granted, provided that both the copyright 91 * notice and this permission notice appear in all copies of the 92 * software, derivative works or modified versions, and any portions 93 * thereof, and that both notices appear in supporting documentation. 94 * 95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 98 * 99 * Carnegie Mellon requests users of this software to return to 100 * 101 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 102 * School of Computer Science 103 * Carnegie Mellon University 104 * Pittsburgh PA 15213-3890 105 * 106 * any improvements or extensions that they make and grant Carnegie the 107 * rights to redistribute these changes. 108 */ 109 110 /*********************************************************** 111 * 112 * rf_kintf.c -- the kernel interface routines for RAIDframe 113 * 114 ***********************************************************/ 115 116 #include <sys/cdefs.h> 117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.112 2001/11/13 07:11:14 lukem Exp $"); 118 119 #include <sys/errno.h> 120 #include <sys/param.h> 121 #include <sys/pool.h> 122 #include <sys/queue.h> 123 #include <sys/disk.h> 124 #include <sys/device.h> 125 #include <sys/stat.h> 126 #include <sys/ioctl.h> 127 #include <sys/fcntl.h> 128 #include <sys/systm.h> 129 #include <sys/namei.h> 130 #include <sys/vnode.h> 131 #include <sys/param.h> 132 #include <sys/types.h> 133 #include <machine/types.h> 134 #include <sys/disklabel.h> 135 #include <sys/conf.h> 136 #include <sys/lock.h> 137 #include <sys/buf.h> 138 #include <sys/user.h> 139 #include <sys/reboot.h> 140 141 #include <dev/raidframe/raidframevar.h> 142 #include <dev/raidframe/raidframeio.h> 143 #include "raid.h" 144 #include "opt_raid_autoconfig.h" 145 #include "rf_raid.h" 146 #include "rf_copyback.h" 147 #include "rf_dag.h" 148 #include "rf_dagflags.h" 149 #include "rf_desc.h" 150 #include "rf_diskqueue.h" 151 #include "rf_acctrace.h" 152 #include "rf_etimer.h" 153 #include "rf_general.h" 154 #include "rf_debugMem.h" 155 #include "rf_kintf.h" 156 #include "rf_options.h" 157 #include "rf_driver.h" 158 #include "rf_parityscan.h" 159 #include "rf_debugprint.h" 160 #include "rf_threadstuff.h" 161 162 int rf_kdebug_level = 0; 163 164 #ifdef DEBUG 165 #define db1_printf(a) if (rf_kdebug_level > 0) printf a 166 #else /* DEBUG */ 167 #define db1_printf(a) { } 168 #endif /* DEBUG */ 169 170 static RF_Raid_t **raidPtrs; /* global raid device descriptors */ 171 172 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex) 173 174 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a 175 * spare table */ 176 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from 177 * installation process */ 178 179 /* prototypes */ 180 static void KernelWakeupFunc(struct buf * bp); 181 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag, 182 dev_t dev, RF_SectorNum_t startSect, 183 RF_SectorCount_t numSect, caddr_t buf, 184 void (*cbFunc) (struct buf *), void *cbArg, 185 int logBytesPerSector, struct proc * b_proc); 186 static void raidinit(RF_Raid_t *); 187 188 void raidattach(int); 189 int raidsize(dev_t); 190 int raidopen(dev_t, int, int, struct proc *); 191 int raidclose(dev_t, int, int, struct proc *); 192 int raidioctl(dev_t, u_long, caddr_t, int, struct proc *); 193 int raidwrite(dev_t, struct uio *, int); 194 int raidread(dev_t, struct uio *, int); 195 void raidstrategy(struct buf *); 196 int raiddump(dev_t, daddr_t, caddr_t, size_t); 197 198 /* 199 * Pilfered from ccd.c 200 */ 201 202 struct raidbuf { 203 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */ 204 struct buf *rf_obp; /* ptr. to original I/O buf */ 205 int rf_flags; /* misc. flags */ 206 RF_DiskQueueData_t *req;/* the request that this was part of.. */ 207 }; 208 209 210 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT) 211 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp) 212 213 /* XXX Not sure if the following should be replacing the raidPtrs above, 214 or if it should be used in conjunction with that... 215 */ 216 217 struct raid_softc { 218 int sc_flags; /* flags */ 219 int sc_cflags; /* configuration flags */ 220 size_t sc_size; /* size of the raid device */ 221 char sc_xname[20]; /* XXX external name */ 222 struct disk sc_dkdev; /* generic disk device info */ 223 struct pool sc_cbufpool; /* component buffer pool */ 224 struct buf_queue buf_queue; /* used for the device queue */ 225 }; 226 /* sc_flags */ 227 #define RAIDF_INITED 0x01 /* unit has been initialized */ 228 #define RAIDF_WLABEL 0x02 /* label area is writable */ 229 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */ 230 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */ 231 #define RAIDF_LOCKED 0x80 /* unit is locked */ 232 233 #define raidunit(x) DISKUNIT(x) 234 int numraid = 0; 235 236 /* 237 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device. 238 * Be aware that large numbers can allow the driver to consume a lot of 239 * kernel memory, especially on writes, and in degraded mode reads. 240 * 241 * For example: with a stripe width of 64 blocks (32k) and 5 disks, 242 * a single 64K write will typically require 64K for the old data, 243 * 64K for the old parity, and 64K for the new parity, for a total 244 * of 192K (if the parity buffer is not re-used immediately). 245 * Even it if is used immediately, that's still 128K, which when multiplied 246 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data. 247 * 248 * Now in degraded mode, for example, a 64K read on the above setup may 249 * require data reconstruction, which will require *all* of the 4 remaining 250 * disks to participate -- 4 * 32K/disk == 128K again. 251 */ 252 253 #ifndef RAIDOUTSTANDING 254 #define RAIDOUTSTANDING 6 255 #endif 256 257 #define RAIDLABELDEV(dev) \ 258 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART)) 259 260 /* declared here, and made public, for the benefit of KVM stuff.. */ 261 struct raid_softc *raid_softc; 262 263 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *, 264 struct disklabel *); 265 static void raidgetdisklabel(dev_t); 266 static void raidmakedisklabel(struct raid_softc *); 267 268 static int raidlock(struct raid_softc *); 269 static void raidunlock(struct raid_softc *); 270 271 static void rf_markalldirty(RF_Raid_t *); 272 void rf_mountroot_hook(struct device *); 273 274 struct device *raidrootdev; 275 276 void rf_ReconThread(struct rf_recon_req *); 277 /* XXX what I want is: */ 278 /*void rf_ReconThread(RF_Raid_t *raidPtr); */ 279 void rf_RewriteParityThread(RF_Raid_t *raidPtr); 280 void rf_CopybackThread(RF_Raid_t *raidPtr); 281 void rf_ReconstructInPlaceThread(struct rf_recon_req *); 282 void rf_buildroothack(void *); 283 284 RF_AutoConfig_t *rf_find_raid_components(void); 285 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *); 286 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *); 287 static int rf_reasonable_label(RF_ComponentLabel_t *); 288 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *); 289 int rf_set_autoconfig(RF_Raid_t *, int); 290 int rf_set_rootpartition(RF_Raid_t *, int); 291 void rf_release_all_vps(RF_ConfigSet_t *); 292 void rf_cleanup_config_set(RF_ConfigSet_t *); 293 int rf_have_enough_components(RF_ConfigSet_t *); 294 int rf_auto_config_set(RF_ConfigSet_t *, int *); 295 296 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not 297 allow autoconfig to take place. 298 Note that this is overridden by having 299 RAID_AUTOCONFIG as an option in the 300 kernel config file. */ 301 302 void 303 raidattach(num) 304 int num; 305 { 306 int raidID; 307 int i, rc; 308 RF_AutoConfig_t *ac_list; /* autoconfig list */ 309 RF_ConfigSet_t *config_sets; 310 311 #ifdef DEBUG 312 printf("raidattach: Asked for %d units\n", num); 313 #endif 314 315 if (num <= 0) { 316 #ifdef DIAGNOSTIC 317 panic("raidattach: count <= 0"); 318 #endif 319 return; 320 } 321 /* This is where all the initialization stuff gets done. */ 322 323 numraid = num; 324 325 /* Make some space for requested number of units... */ 326 327 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **)); 328 if (raidPtrs == NULL) { 329 panic("raidPtrs is NULL!!\n"); 330 } 331 332 rc = rf_mutex_init(&rf_sparet_wait_mutex); 333 if (rc) { 334 RF_PANIC(); 335 } 336 337 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL; 338 339 for (i = 0; i < num; i++) 340 raidPtrs[i] = NULL; 341 rc = rf_BootRaidframe(); 342 if (rc == 0) 343 printf("Kernelized RAIDframe activated\n"); 344 else 345 panic("Serious error booting RAID!!\n"); 346 347 /* put together some datastructures like the CCD device does.. This 348 * lets us lock the device and what-not when it gets opened. */ 349 350 raid_softc = (struct raid_softc *) 351 malloc(num * sizeof(struct raid_softc), 352 M_RAIDFRAME, M_NOWAIT); 353 if (raid_softc == NULL) { 354 printf("WARNING: no memory for RAIDframe driver\n"); 355 return; 356 } 357 358 memset(raid_softc, 0, num * sizeof(struct raid_softc)); 359 360 raidrootdev = (struct device *)malloc(num * sizeof(struct device), 361 M_RAIDFRAME, M_NOWAIT); 362 if (raidrootdev == NULL) { 363 panic("No memory for RAIDframe driver!!?!?!\n"); 364 } 365 366 for (raidID = 0; raidID < num; raidID++) { 367 BUFQ_INIT(&raid_softc[raidID].buf_queue); 368 369 raidrootdev[raidID].dv_class = DV_DISK; 370 raidrootdev[raidID].dv_cfdata = NULL; 371 raidrootdev[raidID].dv_unit = raidID; 372 raidrootdev[raidID].dv_parent = NULL; 373 raidrootdev[raidID].dv_flags = 0; 374 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID); 375 376 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t), 377 (RF_Raid_t *)); 378 if (raidPtrs[raidID] == NULL) { 379 printf("WARNING: raidPtrs[%d] is NULL\n", raidID); 380 numraid = raidID; 381 return; 382 } 383 } 384 385 #if RAID_AUTOCONFIG 386 raidautoconfig = 1; 387 #endif 388 389 if (raidautoconfig) { 390 /* 1. locate all RAID components on the system */ 391 392 #if DEBUG 393 printf("Searching for raid components...\n"); 394 #endif 395 ac_list = rf_find_raid_components(); 396 397 /* 2. sort them into their respective sets */ 398 399 config_sets = rf_create_auto_sets(ac_list); 400 401 /* 3. evaluate each set and configure the valid ones 402 This gets done in rf_buildroothack() */ 403 404 /* schedule the creation of the thread to do the 405 "/ on RAID" stuff */ 406 407 kthread_create(rf_buildroothack,config_sets); 408 409 #if 0 410 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]); 411 #endif 412 } 413 414 } 415 416 void 417 rf_buildroothack(arg) 418 void *arg; 419 { 420 RF_ConfigSet_t *config_sets = arg; 421 RF_ConfigSet_t *cset; 422 RF_ConfigSet_t *next_cset; 423 int retcode; 424 int raidID; 425 int rootID; 426 int num_root; 427 428 rootID = 0; 429 num_root = 0; 430 cset = config_sets; 431 while(cset != NULL ) { 432 next_cset = cset->next; 433 if (rf_have_enough_components(cset) && 434 cset->ac->clabel->autoconfigure==1) { 435 retcode = rf_auto_config_set(cset,&raidID); 436 if (!retcode) { 437 if (cset->rootable) { 438 rootID = raidID; 439 num_root++; 440 } 441 } else { 442 /* The autoconfig didn't work :( */ 443 #if DEBUG 444 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID); 445 #endif 446 rf_release_all_vps(cset); 447 } 448 } else { 449 /* we're not autoconfiguring this set... 450 release the associated resources */ 451 rf_release_all_vps(cset); 452 } 453 /* cleanup */ 454 rf_cleanup_config_set(cset); 455 cset = next_cset; 456 } 457 if (boothowto & RB_ASKNAME) { 458 /* We don't auto-config... */ 459 } else { 460 /* They didn't ask, and we found something bootable... */ 461 462 if (num_root == 1) { 463 booted_device = &raidrootdev[rootID]; 464 } else if (num_root > 1) { 465 /* we can't guess.. require the user to answer... */ 466 boothowto |= RB_ASKNAME; 467 } 468 } 469 } 470 471 472 int 473 raidsize(dev) 474 dev_t dev; 475 { 476 struct raid_softc *rs; 477 struct disklabel *lp; 478 int part, unit, omask, size; 479 480 unit = raidunit(dev); 481 if (unit >= numraid) 482 return (-1); 483 rs = &raid_softc[unit]; 484 485 if ((rs->sc_flags & RAIDF_INITED) == 0) 486 return (-1); 487 488 part = DISKPART(dev); 489 omask = rs->sc_dkdev.dk_openmask & (1 << part); 490 lp = rs->sc_dkdev.dk_label; 491 492 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc)) 493 return (-1); 494 495 if (lp->d_partitions[part].p_fstype != FS_SWAP) 496 size = -1; 497 else 498 size = lp->d_partitions[part].p_size * 499 (lp->d_secsize / DEV_BSIZE); 500 501 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc)) 502 return (-1); 503 504 return (size); 505 506 } 507 508 int 509 raiddump(dev, blkno, va, size) 510 dev_t dev; 511 daddr_t blkno; 512 caddr_t va; 513 size_t size; 514 { 515 /* Not implemented. */ 516 return ENXIO; 517 } 518 /* ARGSUSED */ 519 int 520 raidopen(dev, flags, fmt, p) 521 dev_t dev; 522 int flags, fmt; 523 struct proc *p; 524 { 525 int unit = raidunit(dev); 526 struct raid_softc *rs; 527 struct disklabel *lp; 528 int part, pmask; 529 int error = 0; 530 531 if (unit >= numraid) 532 return (ENXIO); 533 rs = &raid_softc[unit]; 534 535 if ((error = raidlock(rs)) != 0) 536 return (error); 537 lp = rs->sc_dkdev.dk_label; 538 539 part = DISKPART(dev); 540 pmask = (1 << part); 541 542 db1_printf(("Opening raid device number: %d partition: %d\n", 543 unit, part)); 544 545 546 if ((rs->sc_flags & RAIDF_INITED) && 547 (rs->sc_dkdev.dk_openmask == 0)) 548 raidgetdisklabel(dev); 549 550 /* make sure that this partition exists */ 551 552 if (part != RAW_PART) { 553 db1_printf(("Not a raw partition..\n")); 554 if (((rs->sc_flags & RAIDF_INITED) == 0) || 555 ((part >= lp->d_npartitions) || 556 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 557 error = ENXIO; 558 raidunlock(rs); 559 db1_printf(("Bailing out...\n")); 560 return (error); 561 } 562 } 563 /* Prevent this unit from being unconfigured while open. */ 564 switch (fmt) { 565 case S_IFCHR: 566 rs->sc_dkdev.dk_copenmask |= pmask; 567 break; 568 569 case S_IFBLK: 570 rs->sc_dkdev.dk_bopenmask |= pmask; 571 break; 572 } 573 574 if ((rs->sc_dkdev.dk_openmask == 0) && 575 ((rs->sc_flags & RAIDF_INITED) != 0)) { 576 /* First one... mark things as dirty... Note that we *MUST* 577 have done a configure before this. I DO NOT WANT TO BE 578 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED 579 THAT THEY BELONG TOGETHER!!!!! */ 580 /* XXX should check to see if we're only open for reading 581 here... If so, we needn't do this, but then need some 582 other way of keeping track of what's happened.. */ 583 584 rf_markalldirty( raidPtrs[unit] ); 585 } 586 587 588 rs->sc_dkdev.dk_openmask = 589 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 590 591 raidunlock(rs); 592 593 return (error); 594 595 596 } 597 /* ARGSUSED */ 598 int 599 raidclose(dev, flags, fmt, p) 600 dev_t dev; 601 int flags, fmt; 602 struct proc *p; 603 { 604 int unit = raidunit(dev); 605 struct raid_softc *rs; 606 int error = 0; 607 int part; 608 609 if (unit >= numraid) 610 return (ENXIO); 611 rs = &raid_softc[unit]; 612 613 if ((error = raidlock(rs)) != 0) 614 return (error); 615 616 part = DISKPART(dev); 617 618 /* ...that much closer to allowing unconfiguration... */ 619 switch (fmt) { 620 case S_IFCHR: 621 rs->sc_dkdev.dk_copenmask &= ~(1 << part); 622 break; 623 624 case S_IFBLK: 625 rs->sc_dkdev.dk_bopenmask &= ~(1 << part); 626 break; 627 } 628 rs->sc_dkdev.dk_openmask = 629 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 630 631 if ((rs->sc_dkdev.dk_openmask == 0) && 632 ((rs->sc_flags & RAIDF_INITED) != 0)) { 633 /* Last one... device is not unconfigured yet. 634 Device shutdown has taken care of setting the 635 clean bits if RAIDF_INITED is not set 636 mark things as clean... */ 637 #if 0 638 printf("Last one on raid%d. Updating status.\n",unit); 639 #endif 640 rf_update_component_labels(raidPtrs[unit], 641 RF_FINAL_COMPONENT_UPDATE); 642 if (doing_shutdown) { 643 /* last one, and we're going down, so 644 lights out for this RAID set too. */ 645 error = rf_Shutdown(raidPtrs[unit]); 646 pool_destroy(&rs->sc_cbufpool); 647 648 /* It's no longer initialized... */ 649 rs->sc_flags &= ~RAIDF_INITED; 650 651 /* Detach the disk. */ 652 disk_detach(&rs->sc_dkdev); 653 } 654 } 655 656 raidunlock(rs); 657 return (0); 658 659 } 660 661 void 662 raidstrategy(bp) 663 struct buf *bp; 664 { 665 int s; 666 667 unsigned int raidID = raidunit(bp->b_dev); 668 RF_Raid_t *raidPtr; 669 struct raid_softc *rs = &raid_softc[raidID]; 670 struct disklabel *lp; 671 int wlabel; 672 673 if ((rs->sc_flags & RAIDF_INITED) ==0) { 674 bp->b_error = ENXIO; 675 bp->b_flags |= B_ERROR; 676 bp->b_resid = bp->b_bcount; 677 biodone(bp); 678 return; 679 } 680 if (raidID >= numraid || !raidPtrs[raidID]) { 681 bp->b_error = ENODEV; 682 bp->b_flags |= B_ERROR; 683 bp->b_resid = bp->b_bcount; 684 biodone(bp); 685 return; 686 } 687 raidPtr = raidPtrs[raidID]; 688 if (!raidPtr->valid) { 689 bp->b_error = ENODEV; 690 bp->b_flags |= B_ERROR; 691 bp->b_resid = bp->b_bcount; 692 biodone(bp); 693 return; 694 } 695 if (bp->b_bcount == 0) { 696 db1_printf(("b_bcount is zero..\n")); 697 biodone(bp); 698 return; 699 } 700 lp = rs->sc_dkdev.dk_label; 701 702 /* 703 * Do bounds checking and adjust transfer. If there's an 704 * error, the bounds check will flag that for us. 705 */ 706 707 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING); 708 if (DISKPART(bp->b_dev) != RAW_PART) 709 if (bounds_check_with_label(bp, lp, wlabel) <= 0) { 710 db1_printf(("Bounds check failed!!:%d %d\n", 711 (int) bp->b_blkno, (int) wlabel)); 712 biodone(bp); 713 return; 714 } 715 s = splbio(); 716 717 bp->b_resid = 0; 718 719 /* stuff it onto our queue */ 720 BUFQ_INSERT_TAIL(&rs->buf_queue, bp); 721 722 raidstart(raidPtrs[raidID]); 723 724 splx(s); 725 } 726 /* ARGSUSED */ 727 int 728 raidread(dev, uio, flags) 729 dev_t dev; 730 struct uio *uio; 731 int flags; 732 { 733 int unit = raidunit(dev); 734 struct raid_softc *rs; 735 int part; 736 737 if (unit >= numraid) 738 return (ENXIO); 739 rs = &raid_softc[unit]; 740 741 if ((rs->sc_flags & RAIDF_INITED) == 0) 742 return (ENXIO); 743 part = DISKPART(dev); 744 745 db1_printf(("raidread: unit: %d partition: %d\n", unit, part)); 746 747 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio)); 748 749 } 750 /* ARGSUSED */ 751 int 752 raidwrite(dev, uio, flags) 753 dev_t dev; 754 struct uio *uio; 755 int flags; 756 { 757 int unit = raidunit(dev); 758 struct raid_softc *rs; 759 760 if (unit >= numraid) 761 return (ENXIO); 762 rs = &raid_softc[unit]; 763 764 if ((rs->sc_flags & RAIDF_INITED) == 0) 765 return (ENXIO); 766 db1_printf(("raidwrite\n")); 767 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio)); 768 769 } 770 771 int 772 raidioctl(dev, cmd, data, flag, p) 773 dev_t dev; 774 u_long cmd; 775 caddr_t data; 776 int flag; 777 struct proc *p; 778 { 779 int unit = raidunit(dev); 780 int error = 0; 781 int part, pmask; 782 struct raid_softc *rs; 783 RF_Config_t *k_cfg, *u_cfg; 784 RF_Raid_t *raidPtr; 785 RF_RaidDisk_t *diskPtr; 786 RF_AccTotals_t *totals; 787 RF_DeviceConfig_t *d_cfg, **ucfgp; 788 u_char *specific_buf; 789 int retcode = 0; 790 int row; 791 int column; 792 struct rf_recon_req *rrcopy, *rr; 793 RF_ComponentLabel_t *clabel; 794 RF_ComponentLabel_t ci_label; 795 RF_ComponentLabel_t **clabel_ptr; 796 RF_SingleComponent_t *sparePtr,*componentPtr; 797 RF_SingleComponent_t hot_spare; 798 RF_SingleComponent_t component; 799 RF_ProgressInfo_t progressInfo, **progressInfoPtr; 800 int i, j, d; 801 #ifdef __HAVE_OLD_DISKLABEL 802 struct disklabel newlabel; 803 #endif 804 805 if (unit >= numraid) 806 return (ENXIO); 807 rs = &raid_softc[unit]; 808 raidPtr = raidPtrs[unit]; 809 810 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev, 811 (int) DISKPART(dev), (int) unit, (int) cmd)); 812 813 /* Must be open for writes for these commands... */ 814 switch (cmd) { 815 case DIOCSDINFO: 816 case DIOCWDINFO: 817 #ifdef __HAVE_OLD_DISKLABEL 818 case ODIOCWDINFO: 819 case ODIOCSDINFO: 820 #endif 821 case DIOCWLABEL: 822 if ((flag & FWRITE) == 0) 823 return (EBADF); 824 } 825 826 /* Must be initialized for these... */ 827 switch (cmd) { 828 case DIOCGDINFO: 829 case DIOCSDINFO: 830 case DIOCWDINFO: 831 #ifdef __HAVE_OLD_DISKLABEL 832 case ODIOCGDINFO: 833 case ODIOCWDINFO: 834 case ODIOCSDINFO: 835 case ODIOCGDEFLABEL: 836 #endif 837 case DIOCGPART: 838 case DIOCWLABEL: 839 case DIOCGDEFLABEL: 840 case RAIDFRAME_SHUTDOWN: 841 case RAIDFRAME_REWRITEPARITY: 842 case RAIDFRAME_GET_INFO: 843 case RAIDFRAME_RESET_ACCTOTALS: 844 case RAIDFRAME_GET_ACCTOTALS: 845 case RAIDFRAME_KEEP_ACCTOTALS: 846 case RAIDFRAME_GET_SIZE: 847 case RAIDFRAME_FAIL_DISK: 848 case RAIDFRAME_COPYBACK: 849 case RAIDFRAME_CHECK_RECON_STATUS: 850 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 851 case RAIDFRAME_GET_COMPONENT_LABEL: 852 case RAIDFRAME_SET_COMPONENT_LABEL: 853 case RAIDFRAME_ADD_HOT_SPARE: 854 case RAIDFRAME_REMOVE_HOT_SPARE: 855 case RAIDFRAME_INIT_LABELS: 856 case RAIDFRAME_REBUILD_IN_PLACE: 857 case RAIDFRAME_CHECK_PARITY: 858 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 859 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 860 case RAIDFRAME_CHECK_COPYBACK_STATUS: 861 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 862 case RAIDFRAME_SET_AUTOCONFIG: 863 case RAIDFRAME_SET_ROOT: 864 case RAIDFRAME_DELETE_COMPONENT: 865 case RAIDFRAME_INCORPORATE_HOT_SPARE: 866 if ((rs->sc_flags & RAIDF_INITED) == 0) 867 return (ENXIO); 868 } 869 870 switch (cmd) { 871 872 /* configure the system */ 873 case RAIDFRAME_CONFIGURE: 874 875 if (raidPtr->valid) { 876 /* There is a valid RAID set running on this unit! */ 877 printf("raid%d: Device already configured!\n",unit); 878 return(EINVAL); 879 } 880 881 /* copy-in the configuration information */ 882 /* data points to a pointer to the configuration structure */ 883 884 u_cfg = *((RF_Config_t **) data); 885 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *)); 886 if (k_cfg == NULL) { 887 return (ENOMEM); 888 } 889 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg, 890 sizeof(RF_Config_t)); 891 if (retcode) { 892 RF_Free(k_cfg, sizeof(RF_Config_t)); 893 db1_printf(("rf_ioctl: retcode=%d copyin.1\n", 894 retcode)); 895 return (retcode); 896 } 897 /* allocate a buffer for the layout-specific data, and copy it 898 * in */ 899 if (k_cfg->layoutSpecificSize) { 900 if (k_cfg->layoutSpecificSize > 10000) { 901 /* sanity check */ 902 RF_Free(k_cfg, sizeof(RF_Config_t)); 903 return (EINVAL); 904 } 905 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize, 906 (u_char *)); 907 if (specific_buf == NULL) { 908 RF_Free(k_cfg, sizeof(RF_Config_t)); 909 return (ENOMEM); 910 } 911 retcode = copyin(k_cfg->layoutSpecific, 912 (caddr_t) specific_buf, 913 k_cfg->layoutSpecificSize); 914 if (retcode) { 915 RF_Free(k_cfg, sizeof(RF_Config_t)); 916 RF_Free(specific_buf, 917 k_cfg->layoutSpecificSize); 918 db1_printf(("rf_ioctl: retcode=%d copyin.2\n", 919 retcode)); 920 return (retcode); 921 } 922 } else 923 specific_buf = NULL; 924 k_cfg->layoutSpecific = specific_buf; 925 926 /* should do some kind of sanity check on the configuration. 927 * Store the sum of all the bytes in the last byte? */ 928 929 /* configure the system */ 930 931 /* 932 * Clear the entire RAID descriptor, just to make sure 933 * there is no stale data left in the case of a 934 * reconfiguration 935 */ 936 memset((char *) raidPtr, 0, sizeof(RF_Raid_t)); 937 raidPtr->raidid = unit; 938 939 retcode = rf_Configure(raidPtr, k_cfg, NULL); 940 941 if (retcode == 0) { 942 943 /* allow this many simultaneous IO's to 944 this RAID device */ 945 raidPtr->openings = RAIDOUTSTANDING; 946 947 raidinit(raidPtr); 948 rf_markalldirty(raidPtr); 949 } 950 /* free the buffers. No return code here. */ 951 if (k_cfg->layoutSpecificSize) { 952 RF_Free(specific_buf, k_cfg->layoutSpecificSize); 953 } 954 RF_Free(k_cfg, sizeof(RF_Config_t)); 955 956 return (retcode); 957 958 /* shutdown the system */ 959 case RAIDFRAME_SHUTDOWN: 960 961 if ((error = raidlock(rs)) != 0) 962 return (error); 963 964 /* 965 * If somebody has a partition mounted, we shouldn't 966 * shutdown. 967 */ 968 969 part = DISKPART(dev); 970 pmask = (1 << part); 971 if ((rs->sc_dkdev.dk_openmask & ~pmask) || 972 ((rs->sc_dkdev.dk_bopenmask & pmask) && 973 (rs->sc_dkdev.dk_copenmask & pmask))) { 974 raidunlock(rs); 975 return (EBUSY); 976 } 977 978 retcode = rf_Shutdown(raidPtr); 979 980 pool_destroy(&rs->sc_cbufpool); 981 982 /* It's no longer initialized... */ 983 rs->sc_flags &= ~RAIDF_INITED; 984 985 /* Detach the disk. */ 986 disk_detach(&rs->sc_dkdev); 987 988 raidunlock(rs); 989 990 return (retcode); 991 case RAIDFRAME_GET_COMPONENT_LABEL: 992 clabel_ptr = (RF_ComponentLabel_t **) data; 993 /* need to read the component label for the disk indicated 994 by row,column in clabel */ 995 996 /* For practice, let's get it directly fromdisk, rather 997 than from the in-core copy */ 998 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ), 999 (RF_ComponentLabel_t *)); 1000 if (clabel == NULL) 1001 return (ENOMEM); 1002 1003 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t)); 1004 1005 retcode = copyin( *clabel_ptr, clabel, 1006 sizeof(RF_ComponentLabel_t)); 1007 1008 if (retcode) { 1009 RF_Free( clabel, sizeof(RF_ComponentLabel_t)); 1010 return(retcode); 1011 } 1012 1013 row = clabel->row; 1014 column = clabel->column; 1015 1016 if ((row < 0) || (row >= raidPtr->numRow) || 1017 (column < 0) || (column >= raidPtr->numCol + 1018 raidPtr->numSpare)) { 1019 RF_Free( clabel, sizeof(RF_ComponentLabel_t)); 1020 return(EINVAL); 1021 } 1022 1023 raidread_component_label(raidPtr->Disks[row][column].dev, 1024 raidPtr->raid_cinfo[row][column].ci_vp, 1025 clabel ); 1026 1027 retcode = copyout((caddr_t) clabel, 1028 (caddr_t) *clabel_ptr, 1029 sizeof(RF_ComponentLabel_t)); 1030 RF_Free( clabel, sizeof(RF_ComponentLabel_t)); 1031 return (retcode); 1032 1033 case RAIDFRAME_SET_COMPONENT_LABEL: 1034 clabel = (RF_ComponentLabel_t *) data; 1035 1036 /* XXX check the label for valid stuff... */ 1037 /* Note that some things *should not* get modified -- 1038 the user should be re-initing the labels instead of 1039 trying to patch things. 1040 */ 1041 1042 printf("Got component label:\n"); 1043 printf("Version: %d\n",clabel->version); 1044 printf("Serial Number: %d\n",clabel->serial_number); 1045 printf("Mod counter: %d\n",clabel->mod_counter); 1046 printf("Row: %d\n", clabel->row); 1047 printf("Column: %d\n", clabel->column); 1048 printf("Num Rows: %d\n", clabel->num_rows); 1049 printf("Num Columns: %d\n", clabel->num_columns); 1050 printf("Clean: %d\n", clabel->clean); 1051 printf("Status: %d\n", clabel->status); 1052 1053 row = clabel->row; 1054 column = clabel->column; 1055 1056 if ((row < 0) || (row >= raidPtr->numRow) || 1057 (column < 0) || (column >= raidPtr->numCol)) { 1058 return(EINVAL); 1059 } 1060 1061 /* XXX this isn't allowed to do anything for now :-) */ 1062 1063 /* XXX and before it is, we need to fill in the rest 1064 of the fields!?!?!?! */ 1065 #if 0 1066 raidwrite_component_label( 1067 raidPtr->Disks[row][column].dev, 1068 raidPtr->raid_cinfo[row][column].ci_vp, 1069 clabel ); 1070 #endif 1071 return (0); 1072 1073 case RAIDFRAME_INIT_LABELS: 1074 clabel = (RF_ComponentLabel_t *) data; 1075 /* 1076 we only want the serial number from 1077 the above. We get all the rest of the information 1078 from the config that was used to create this RAID 1079 set. 1080 */ 1081 1082 raidPtr->serial_number = clabel->serial_number; 1083 1084 raid_init_component_label(raidPtr, &ci_label); 1085 ci_label.serial_number = clabel->serial_number; 1086 1087 for(row=0;row<raidPtr->numRow;row++) { 1088 ci_label.row = row; 1089 for(column=0;column<raidPtr->numCol;column++) { 1090 diskPtr = &raidPtr->Disks[row][column]; 1091 if (!RF_DEAD_DISK(diskPtr->status)) { 1092 ci_label.partitionSize = diskPtr->partitionSize; 1093 ci_label.column = column; 1094 raidwrite_component_label( 1095 raidPtr->Disks[row][column].dev, 1096 raidPtr->raid_cinfo[row][column].ci_vp, 1097 &ci_label ); 1098 } 1099 } 1100 } 1101 1102 return (retcode); 1103 case RAIDFRAME_SET_AUTOCONFIG: 1104 d = rf_set_autoconfig(raidPtr, *(int *) data); 1105 printf("New autoconfig value is: %d\n", d); 1106 *(int *) data = d; 1107 return (retcode); 1108 1109 case RAIDFRAME_SET_ROOT: 1110 d = rf_set_rootpartition(raidPtr, *(int *) data); 1111 printf("New rootpartition value is: %d\n", d); 1112 *(int *) data = d; 1113 return (retcode); 1114 1115 /* initialize all parity */ 1116 case RAIDFRAME_REWRITEPARITY: 1117 1118 if (raidPtr->Layout.map->faultsTolerated == 0) { 1119 /* Parity for RAID 0 is trivially correct */ 1120 raidPtr->parity_good = RF_RAID_CLEAN; 1121 return(0); 1122 } 1123 1124 if (raidPtr->parity_rewrite_in_progress == 1) { 1125 /* Re-write is already in progress! */ 1126 return(EINVAL); 1127 } 1128 1129 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread, 1130 rf_RewriteParityThread, 1131 raidPtr,"raid_parity"); 1132 return (retcode); 1133 1134 1135 case RAIDFRAME_ADD_HOT_SPARE: 1136 sparePtr = (RF_SingleComponent_t *) data; 1137 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t)); 1138 retcode = rf_add_hot_spare(raidPtr, &hot_spare); 1139 return(retcode); 1140 1141 case RAIDFRAME_REMOVE_HOT_SPARE: 1142 return(retcode); 1143 1144 case RAIDFRAME_DELETE_COMPONENT: 1145 componentPtr = (RF_SingleComponent_t *)data; 1146 memcpy( &component, componentPtr, 1147 sizeof(RF_SingleComponent_t)); 1148 retcode = rf_delete_component(raidPtr, &component); 1149 return(retcode); 1150 1151 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1152 componentPtr = (RF_SingleComponent_t *)data; 1153 memcpy( &component, componentPtr, 1154 sizeof(RF_SingleComponent_t)); 1155 retcode = rf_incorporate_hot_spare(raidPtr, &component); 1156 return(retcode); 1157 1158 case RAIDFRAME_REBUILD_IN_PLACE: 1159 1160 if (raidPtr->Layout.map->faultsTolerated == 0) { 1161 /* Can't do this on a RAID 0!! */ 1162 return(EINVAL); 1163 } 1164 1165 if (raidPtr->recon_in_progress == 1) { 1166 /* a reconstruct is already in progress! */ 1167 return(EINVAL); 1168 } 1169 1170 componentPtr = (RF_SingleComponent_t *) data; 1171 memcpy( &component, componentPtr, 1172 sizeof(RF_SingleComponent_t)); 1173 row = component.row; 1174 column = component.column; 1175 printf("Rebuild: %d %d\n",row, column); 1176 if ((row < 0) || (row >= raidPtr->numRow) || 1177 (column < 0) || (column >= raidPtr->numCol)) { 1178 return(EINVAL); 1179 } 1180 1181 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1182 if (rrcopy == NULL) 1183 return(ENOMEM); 1184 1185 rrcopy->raidPtr = (void *) raidPtr; 1186 rrcopy->row = row; 1187 rrcopy->col = column; 1188 1189 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1190 rf_ReconstructInPlaceThread, 1191 rrcopy,"raid_reconip"); 1192 return(retcode); 1193 1194 case RAIDFRAME_GET_INFO: 1195 if (!raidPtr->valid) 1196 return (ENODEV); 1197 ucfgp = (RF_DeviceConfig_t **) data; 1198 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t), 1199 (RF_DeviceConfig_t *)); 1200 if (d_cfg == NULL) 1201 return (ENOMEM); 1202 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t)); 1203 d_cfg->rows = raidPtr->numRow; 1204 d_cfg->cols = raidPtr->numCol; 1205 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol; 1206 if (d_cfg->ndevs >= RF_MAX_DISKS) { 1207 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1208 return (ENOMEM); 1209 } 1210 d_cfg->nspares = raidPtr->numSpare; 1211 if (d_cfg->nspares >= RF_MAX_DISKS) { 1212 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1213 return (ENOMEM); 1214 } 1215 d_cfg->maxqdepth = raidPtr->maxQueueDepth; 1216 d = 0; 1217 for (i = 0; i < d_cfg->rows; i++) { 1218 for (j = 0; j < d_cfg->cols; j++) { 1219 d_cfg->devs[d] = raidPtr->Disks[i][j]; 1220 d++; 1221 } 1222 } 1223 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) { 1224 d_cfg->spares[i] = raidPtr->Disks[0][j]; 1225 } 1226 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp, 1227 sizeof(RF_DeviceConfig_t)); 1228 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1229 1230 return (retcode); 1231 1232 case RAIDFRAME_CHECK_PARITY: 1233 *(int *) data = raidPtr->parity_good; 1234 return (0); 1235 1236 case RAIDFRAME_RESET_ACCTOTALS: 1237 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals)); 1238 return (0); 1239 1240 case RAIDFRAME_GET_ACCTOTALS: 1241 totals = (RF_AccTotals_t *) data; 1242 *totals = raidPtr->acc_totals; 1243 return (0); 1244 1245 case RAIDFRAME_KEEP_ACCTOTALS: 1246 raidPtr->keep_acc_totals = *(int *)data; 1247 return (0); 1248 1249 case RAIDFRAME_GET_SIZE: 1250 *(int *) data = raidPtr->totalSectors; 1251 return (0); 1252 1253 /* fail a disk & optionally start reconstruction */ 1254 case RAIDFRAME_FAIL_DISK: 1255 1256 if (raidPtr->Layout.map->faultsTolerated == 0) { 1257 /* Can't do this on a RAID 0!! */ 1258 return(EINVAL); 1259 } 1260 1261 rr = (struct rf_recon_req *) data; 1262 1263 if (rr->row < 0 || rr->row >= raidPtr->numRow 1264 || rr->col < 0 || rr->col >= raidPtr->numCol) 1265 return (EINVAL); 1266 1267 printf("raid%d: Failing the disk: row: %d col: %d\n", 1268 unit, rr->row, rr->col); 1269 1270 /* make a copy of the recon request so that we don't rely on 1271 * the user's buffer */ 1272 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1273 if (rrcopy == NULL) 1274 return(ENOMEM); 1275 bcopy(rr, rrcopy, sizeof(*rr)); 1276 rrcopy->raidPtr = (void *) raidPtr; 1277 1278 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1279 rf_ReconThread, 1280 rrcopy,"raid_recon"); 1281 return (0); 1282 1283 /* invoke a copyback operation after recon on whatever disk 1284 * needs it, if any */ 1285 case RAIDFRAME_COPYBACK: 1286 1287 if (raidPtr->Layout.map->faultsTolerated == 0) { 1288 /* This makes no sense on a RAID 0!! */ 1289 return(EINVAL); 1290 } 1291 1292 if (raidPtr->copyback_in_progress == 1) { 1293 /* Copyback is already in progress! */ 1294 return(EINVAL); 1295 } 1296 1297 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread, 1298 rf_CopybackThread, 1299 raidPtr,"raid_copyback"); 1300 return (retcode); 1301 1302 /* return the percentage completion of reconstruction */ 1303 case RAIDFRAME_CHECK_RECON_STATUS: 1304 if (raidPtr->Layout.map->faultsTolerated == 0) { 1305 /* This makes no sense on a RAID 0, so tell the 1306 user it's done. */ 1307 *(int *) data = 100; 1308 return(0); 1309 } 1310 row = 0; /* XXX we only consider a single row... */ 1311 if (raidPtr->status[row] != rf_rs_reconstructing) 1312 *(int *) data = 100; 1313 else 1314 *(int *) data = raidPtr->reconControl[row]->percentComplete; 1315 return (0); 1316 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1317 progressInfoPtr = (RF_ProgressInfo_t **) data; 1318 row = 0; /* XXX we only consider a single row... */ 1319 if (raidPtr->status[row] != rf_rs_reconstructing) { 1320 progressInfo.remaining = 0; 1321 progressInfo.completed = 100; 1322 progressInfo.total = 100; 1323 } else { 1324 progressInfo.total = 1325 raidPtr->reconControl[row]->numRUsTotal; 1326 progressInfo.completed = 1327 raidPtr->reconControl[row]->numRUsComplete; 1328 progressInfo.remaining = progressInfo.total - 1329 progressInfo.completed; 1330 } 1331 retcode = copyout((caddr_t) &progressInfo, 1332 (caddr_t) *progressInfoPtr, 1333 sizeof(RF_ProgressInfo_t)); 1334 return (retcode); 1335 1336 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1337 if (raidPtr->Layout.map->faultsTolerated == 0) { 1338 /* This makes no sense on a RAID 0, so tell the 1339 user it's done. */ 1340 *(int *) data = 100; 1341 return(0); 1342 } 1343 if (raidPtr->parity_rewrite_in_progress == 1) { 1344 *(int *) data = 100 * 1345 raidPtr->parity_rewrite_stripes_done / 1346 raidPtr->Layout.numStripe; 1347 } else { 1348 *(int *) data = 100; 1349 } 1350 return (0); 1351 1352 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1353 progressInfoPtr = (RF_ProgressInfo_t **) data; 1354 if (raidPtr->parity_rewrite_in_progress == 1) { 1355 progressInfo.total = raidPtr->Layout.numStripe; 1356 progressInfo.completed = 1357 raidPtr->parity_rewrite_stripes_done; 1358 progressInfo.remaining = progressInfo.total - 1359 progressInfo.completed; 1360 } else { 1361 progressInfo.remaining = 0; 1362 progressInfo.completed = 100; 1363 progressInfo.total = 100; 1364 } 1365 retcode = copyout((caddr_t) &progressInfo, 1366 (caddr_t) *progressInfoPtr, 1367 sizeof(RF_ProgressInfo_t)); 1368 return (retcode); 1369 1370 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1371 if (raidPtr->Layout.map->faultsTolerated == 0) { 1372 /* This makes no sense on a RAID 0 */ 1373 *(int *) data = 100; 1374 return(0); 1375 } 1376 if (raidPtr->copyback_in_progress == 1) { 1377 *(int *) data = 100 * raidPtr->copyback_stripes_done / 1378 raidPtr->Layout.numStripe; 1379 } else { 1380 *(int *) data = 100; 1381 } 1382 return (0); 1383 1384 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1385 progressInfoPtr = (RF_ProgressInfo_t **) data; 1386 if (raidPtr->copyback_in_progress == 1) { 1387 progressInfo.total = raidPtr->Layout.numStripe; 1388 progressInfo.completed = 1389 raidPtr->copyback_stripes_done; 1390 progressInfo.remaining = progressInfo.total - 1391 progressInfo.completed; 1392 } else { 1393 progressInfo.remaining = 0; 1394 progressInfo.completed = 100; 1395 progressInfo.total = 100; 1396 } 1397 retcode = copyout((caddr_t) &progressInfo, 1398 (caddr_t) *progressInfoPtr, 1399 sizeof(RF_ProgressInfo_t)); 1400 return (retcode); 1401 1402 /* the sparetable daemon calls this to wait for the kernel to 1403 * need a spare table. this ioctl does not return until a 1404 * spare table is needed. XXX -- calling mpsleep here in the 1405 * ioctl code is almost certainly wrong and evil. -- XXX XXX 1406 * -- I should either compute the spare table in the kernel, 1407 * or have a different -- XXX XXX -- interface (a different 1408 * character device) for delivering the table -- XXX */ 1409 #if 0 1410 case RAIDFRAME_SPARET_WAIT: 1411 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1412 while (!rf_sparet_wait_queue) 1413 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE); 1414 waitreq = rf_sparet_wait_queue; 1415 rf_sparet_wait_queue = rf_sparet_wait_queue->next; 1416 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1417 1418 /* structure assignment */ 1419 *((RF_SparetWait_t *) data) = *waitreq; 1420 1421 RF_Free(waitreq, sizeof(*waitreq)); 1422 return (0); 1423 1424 /* wakes up a process waiting on SPARET_WAIT and puts an error 1425 * code in it that will cause the dameon to exit */ 1426 case RAIDFRAME_ABORT_SPARET_WAIT: 1427 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1428 waitreq->fcol = -1; 1429 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1430 waitreq->next = rf_sparet_wait_queue; 1431 rf_sparet_wait_queue = waitreq; 1432 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1433 wakeup(&rf_sparet_wait_queue); 1434 return (0); 1435 1436 /* used by the spare table daemon to deliver a spare table 1437 * into the kernel */ 1438 case RAIDFRAME_SEND_SPARET: 1439 1440 /* install the spare table */ 1441 retcode = rf_SetSpareTable(raidPtr, *(void **) data); 1442 1443 /* respond to the requestor. the return status of the spare 1444 * table installation is passed in the "fcol" field */ 1445 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1446 waitreq->fcol = retcode; 1447 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1448 waitreq->next = rf_sparet_resp_queue; 1449 rf_sparet_resp_queue = waitreq; 1450 wakeup(&rf_sparet_resp_queue); 1451 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1452 1453 return (retcode); 1454 #endif 1455 1456 default: 1457 break; /* fall through to the os-specific code below */ 1458 1459 } 1460 1461 if (!raidPtr->valid) 1462 return (EINVAL); 1463 1464 /* 1465 * Add support for "regular" device ioctls here. 1466 */ 1467 1468 switch (cmd) { 1469 case DIOCGDINFO: 1470 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label); 1471 break; 1472 #ifdef __HAVE_OLD_DISKLABEL 1473 case ODIOCGDINFO: 1474 newlabel = *(rs->sc_dkdev.dk_label); 1475 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1476 return ENOTTY; 1477 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1478 break; 1479 #endif 1480 1481 case DIOCGPART: 1482 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label; 1483 ((struct partinfo *) data)->part = 1484 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1485 break; 1486 1487 case DIOCWDINFO: 1488 case DIOCSDINFO: 1489 #ifdef __HAVE_OLD_DISKLABEL 1490 case ODIOCWDINFO: 1491 case ODIOCSDINFO: 1492 #endif 1493 { 1494 struct disklabel *lp; 1495 #ifdef __HAVE_OLD_DISKLABEL 1496 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1497 memset(&newlabel, 0, sizeof newlabel); 1498 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1499 lp = &newlabel; 1500 } else 1501 #endif 1502 lp = (struct disklabel *)data; 1503 1504 if ((error = raidlock(rs)) != 0) 1505 return (error); 1506 1507 rs->sc_flags |= RAIDF_LABELLING; 1508 1509 error = setdisklabel(rs->sc_dkdev.dk_label, 1510 lp, 0, rs->sc_dkdev.dk_cpulabel); 1511 if (error == 0) { 1512 if (cmd == DIOCWDINFO 1513 #ifdef __HAVE_OLD_DISKLABEL 1514 || cmd == ODIOCWDINFO 1515 #endif 1516 ) 1517 error = writedisklabel(RAIDLABELDEV(dev), 1518 raidstrategy, rs->sc_dkdev.dk_label, 1519 rs->sc_dkdev.dk_cpulabel); 1520 } 1521 rs->sc_flags &= ~RAIDF_LABELLING; 1522 1523 raidunlock(rs); 1524 1525 if (error) 1526 return (error); 1527 break; 1528 } 1529 1530 case DIOCWLABEL: 1531 if (*(int *) data != 0) 1532 rs->sc_flags |= RAIDF_WLABEL; 1533 else 1534 rs->sc_flags &= ~RAIDF_WLABEL; 1535 break; 1536 1537 case DIOCGDEFLABEL: 1538 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data); 1539 break; 1540 1541 #ifdef __HAVE_OLD_DISKLABEL 1542 case ODIOCGDEFLABEL: 1543 raidgetdefaultlabel(raidPtr, rs, &newlabel); 1544 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1545 return ENOTTY; 1546 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1547 break; 1548 #endif 1549 1550 default: 1551 retcode = ENOTTY; 1552 } 1553 return (retcode); 1554 1555 } 1556 1557 1558 /* raidinit -- complete the rest of the initialization for the 1559 RAIDframe device. */ 1560 1561 1562 static void 1563 raidinit(raidPtr) 1564 RF_Raid_t *raidPtr; 1565 { 1566 struct raid_softc *rs; 1567 int unit; 1568 1569 unit = raidPtr->raidid; 1570 1571 rs = &raid_softc[unit]; 1572 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0, 1573 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME); 1574 1575 1576 /* XXX should check return code first... */ 1577 rs->sc_flags |= RAIDF_INITED; 1578 1579 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */ 1580 1581 rs->sc_dkdev.dk_name = rs->sc_xname; 1582 1583 /* disk_attach actually creates space for the CPU disklabel, among 1584 * other things, so it's critical to call this *BEFORE* we try putzing 1585 * with disklabels. */ 1586 1587 disk_attach(&rs->sc_dkdev); 1588 1589 /* XXX There may be a weird interaction here between this, and 1590 * protectedSectors, as used in RAIDframe. */ 1591 1592 rs->sc_size = raidPtr->totalSectors; 1593 1594 } 1595 1596 /* wake up the daemon & tell it to get us a spare table 1597 * XXX 1598 * the entries in the queues should be tagged with the raidPtr 1599 * so that in the extremely rare case that two recons happen at once, 1600 * we know for which device were requesting a spare table 1601 * XXX 1602 * 1603 * XXX This code is not currently used. GO 1604 */ 1605 int 1606 rf_GetSpareTableFromDaemon(req) 1607 RF_SparetWait_t *req; 1608 { 1609 int retcode; 1610 1611 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1612 req->next = rf_sparet_wait_queue; 1613 rf_sparet_wait_queue = req; 1614 wakeup(&rf_sparet_wait_queue); 1615 1616 /* mpsleep unlocks the mutex */ 1617 while (!rf_sparet_resp_queue) { 1618 tsleep(&rf_sparet_resp_queue, PRIBIO, 1619 "raidframe getsparetable", 0); 1620 } 1621 req = rf_sparet_resp_queue; 1622 rf_sparet_resp_queue = req->next; 1623 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1624 1625 retcode = req->fcol; 1626 RF_Free(req, sizeof(*req)); /* this is not the same req as we 1627 * alloc'd */ 1628 return (retcode); 1629 } 1630 1631 /* a wrapper around rf_DoAccess that extracts appropriate info from the 1632 * bp & passes it down. 1633 * any calls originating in the kernel must use non-blocking I/O 1634 * do some extra sanity checking to return "appropriate" error values for 1635 * certain conditions (to make some standard utilities work) 1636 * 1637 * Formerly known as: rf_DoAccessKernel 1638 */ 1639 void 1640 raidstart(raidPtr) 1641 RF_Raid_t *raidPtr; 1642 { 1643 RF_SectorCount_t num_blocks, pb, sum; 1644 RF_RaidAddr_t raid_addr; 1645 int retcode; 1646 struct partition *pp; 1647 daddr_t blocknum; 1648 int unit; 1649 struct raid_softc *rs; 1650 int do_async; 1651 struct buf *bp; 1652 1653 unit = raidPtr->raidid; 1654 rs = &raid_softc[unit]; 1655 1656 /* quick check to see if anything has died recently */ 1657 RF_LOCK_MUTEX(raidPtr->mutex); 1658 if (raidPtr->numNewFailures > 0) { 1659 rf_update_component_labels(raidPtr, 1660 RF_NORMAL_COMPONENT_UPDATE); 1661 raidPtr->numNewFailures--; 1662 } 1663 RF_UNLOCK_MUTEX(raidPtr->mutex); 1664 1665 /* Check to see if we're at the limit... */ 1666 RF_LOCK_MUTEX(raidPtr->mutex); 1667 while (raidPtr->openings > 0) { 1668 RF_UNLOCK_MUTEX(raidPtr->mutex); 1669 1670 /* get the next item, if any, from the queue */ 1671 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) { 1672 /* nothing more to do */ 1673 return; 1674 } 1675 BUFQ_REMOVE(&rs->buf_queue, bp); 1676 1677 /* Ok, for the bp we have here, bp->b_blkno is relative to the 1678 * partition.. Need to make it absolute to the underlying 1679 * device.. */ 1680 1681 blocknum = bp->b_blkno; 1682 if (DISKPART(bp->b_dev) != RAW_PART) { 1683 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; 1684 blocknum += pp->p_offset; 1685 } 1686 1687 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, 1688 (int) blocknum)); 1689 1690 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount)); 1691 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid)); 1692 1693 /* *THIS* is where we adjust what block we're going to... 1694 * but DO NOT TOUCH bp->b_blkno!!! */ 1695 raid_addr = blocknum; 1696 1697 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector; 1698 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0; 1699 sum = raid_addr + num_blocks + pb; 1700 if (1 || rf_debugKernelAccess) { 1701 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n", 1702 (int) raid_addr, (int) sum, (int) num_blocks, 1703 (int) pb, (int) bp->b_resid)); 1704 } 1705 if ((sum > raidPtr->totalSectors) || (sum < raid_addr) 1706 || (sum < num_blocks) || (sum < pb)) { 1707 bp->b_error = ENOSPC; 1708 bp->b_flags |= B_ERROR; 1709 bp->b_resid = bp->b_bcount; 1710 biodone(bp); 1711 RF_LOCK_MUTEX(raidPtr->mutex); 1712 continue; 1713 } 1714 /* 1715 * XXX rf_DoAccess() should do this, not just DoAccessKernel() 1716 */ 1717 1718 if (bp->b_bcount & raidPtr->sectorMask) { 1719 bp->b_error = EINVAL; 1720 bp->b_flags |= B_ERROR; 1721 bp->b_resid = bp->b_bcount; 1722 biodone(bp); 1723 RF_LOCK_MUTEX(raidPtr->mutex); 1724 continue; 1725 1726 } 1727 db1_printf(("Calling DoAccess..\n")); 1728 1729 1730 RF_LOCK_MUTEX(raidPtr->mutex); 1731 raidPtr->openings--; 1732 RF_UNLOCK_MUTEX(raidPtr->mutex); 1733 1734 /* 1735 * Everything is async. 1736 */ 1737 do_async = 1; 1738 1739 disk_busy(&rs->sc_dkdev); 1740 1741 /* XXX we're still at splbio() here... do we *really* 1742 need to be? */ 1743 1744 /* don't ever condition on bp->b_flags & B_WRITE. 1745 * always condition on B_READ instead */ 1746 1747 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ? 1748 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE, 1749 do_async, raid_addr, num_blocks, 1750 bp->b_data, bp, RF_DAG_NONBLOCKING_IO); 1751 1752 RF_LOCK_MUTEX(raidPtr->mutex); 1753 } 1754 RF_UNLOCK_MUTEX(raidPtr->mutex); 1755 } 1756 1757 1758 1759 1760 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */ 1761 1762 int 1763 rf_DispatchKernelIO(queue, req) 1764 RF_DiskQueue_t *queue; 1765 RF_DiskQueueData_t *req; 1766 { 1767 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE; 1768 struct buf *bp; 1769 struct raidbuf *raidbp = NULL; 1770 struct raid_softc *rs; 1771 int unit; 1772 int s; 1773 1774 s=0; 1775 /* s = splbio();*/ /* want to test this */ 1776 /* XXX along with the vnode, we also need the softc associated with 1777 * this device.. */ 1778 1779 req->queue = queue; 1780 1781 unit = queue->raidPtr->raidid; 1782 1783 db1_printf(("DispatchKernelIO unit: %d\n", unit)); 1784 1785 if (unit >= numraid) { 1786 printf("Invalid unit number: %d %d\n", unit, numraid); 1787 panic("Invalid Unit number in rf_DispatchKernelIO\n"); 1788 } 1789 rs = &raid_softc[unit]; 1790 1791 bp = req->bp; 1792 #if 1 1793 /* XXX when there is a physical disk failure, someone is passing us a 1794 * buffer that contains old stuff!! Attempt to deal with this problem 1795 * without taking a performance hit... (not sure where the real bug 1796 * is. It's buried in RAIDframe somewhere) :-( GO ) */ 1797 1798 if (bp->b_flags & B_ERROR) { 1799 bp->b_flags &= ~B_ERROR; 1800 } 1801 if (bp->b_error != 0) { 1802 bp->b_error = 0; 1803 } 1804 #endif 1805 raidbp = RAIDGETBUF(rs); 1806 1807 raidbp->rf_flags = 0; /* XXX not really used anywhere... */ 1808 1809 /* 1810 * context for raidiodone 1811 */ 1812 raidbp->rf_obp = bp; 1813 raidbp->req = req; 1814 1815 LIST_INIT(&raidbp->rf_buf.b_dep); 1816 1817 switch (req->type) { 1818 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */ 1819 /* XXX need to do something extra here.. */ 1820 /* I'm leaving this in, as I've never actually seen it used, 1821 * and I'd like folks to report it... GO */ 1822 printf(("WAKEUP CALLED\n")); 1823 queue->numOutstanding++; 1824 1825 /* XXX need to glue the original buffer into this?? */ 1826 1827 KernelWakeupFunc(&raidbp->rf_buf); 1828 break; 1829 1830 case RF_IO_TYPE_READ: 1831 case RF_IO_TYPE_WRITE: 1832 1833 if (req->tracerec) { 1834 RF_ETIMER_START(req->tracerec->timer); 1835 } 1836 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp, 1837 op | bp->b_flags, queue->rf_cinfo->ci_dev, 1838 req->sectorOffset, req->numSector, 1839 req->buf, KernelWakeupFunc, (void *) req, 1840 queue->raidPtr->logBytesPerSector, req->b_proc); 1841 1842 if (rf_debugKernelAccess) { 1843 db1_printf(("dispatch: bp->b_blkno = %ld\n", 1844 (long) bp->b_blkno)); 1845 } 1846 queue->numOutstanding++; 1847 queue->last_deq_sector = req->sectorOffset; 1848 /* acc wouldn't have been let in if there were any pending 1849 * reqs at any other priority */ 1850 queue->curPriority = req->priority; 1851 1852 db1_printf(("Going for %c to unit %d row %d col %d\n", 1853 req->type, unit, queue->row, queue->col)); 1854 db1_printf(("sector %d count %d (%d bytes) %d\n", 1855 (int) req->sectorOffset, (int) req->numSector, 1856 (int) (req->numSector << 1857 queue->raidPtr->logBytesPerSector), 1858 (int) queue->raidPtr->logBytesPerSector)); 1859 if ((raidbp->rf_buf.b_flags & B_READ) == 0) { 1860 raidbp->rf_buf.b_vp->v_numoutput++; 1861 } 1862 VOP_STRATEGY(&raidbp->rf_buf); 1863 1864 break; 1865 1866 default: 1867 panic("bad req->type in rf_DispatchKernelIO"); 1868 } 1869 db1_printf(("Exiting from DispatchKernelIO\n")); 1870 /* splx(s); */ /* want to test this */ 1871 return (0); 1872 } 1873 /* this is the callback function associated with a I/O invoked from 1874 kernel code. 1875 */ 1876 static void 1877 KernelWakeupFunc(vbp) 1878 struct buf *vbp; 1879 { 1880 RF_DiskQueueData_t *req = NULL; 1881 RF_DiskQueue_t *queue; 1882 struct raidbuf *raidbp = (struct raidbuf *) vbp; 1883 struct buf *bp; 1884 struct raid_softc *rs; 1885 int unit; 1886 int s; 1887 1888 s = splbio(); 1889 db1_printf(("recovering the request queue:\n")); 1890 req = raidbp->req; 1891 1892 bp = raidbp->rf_obp; 1893 1894 queue = (RF_DiskQueue_t *) req->queue; 1895 1896 if (raidbp->rf_buf.b_flags & B_ERROR) { 1897 bp->b_flags |= B_ERROR; 1898 bp->b_error = raidbp->rf_buf.b_error ? 1899 raidbp->rf_buf.b_error : EIO; 1900 } 1901 1902 /* XXX methinks this could be wrong... */ 1903 #if 1 1904 bp->b_resid = raidbp->rf_buf.b_resid; 1905 #endif 1906 1907 if (req->tracerec) { 1908 RF_ETIMER_STOP(req->tracerec->timer); 1909 RF_ETIMER_EVAL(req->tracerec->timer); 1910 RF_LOCK_MUTEX(rf_tracing_mutex); 1911 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer); 1912 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer); 1913 req->tracerec->num_phys_ios++; 1914 RF_UNLOCK_MUTEX(rf_tracing_mutex); 1915 } 1916 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */ 1917 1918 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */ 1919 1920 1921 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go 1922 * ballistic, and mark the component as hosed... */ 1923 1924 if (bp->b_flags & B_ERROR) { 1925 /* Mark the disk as dead */ 1926 /* but only mark it once... */ 1927 if (queue->raidPtr->Disks[queue->row][queue->col].status == 1928 rf_ds_optimal) { 1929 printf("raid%d: IO Error. Marking %s as failed.\n", 1930 unit, queue->raidPtr->Disks[queue->row][queue->col].devname); 1931 queue->raidPtr->Disks[queue->row][queue->col].status = 1932 rf_ds_failed; 1933 queue->raidPtr->status[queue->row] = rf_rs_degraded; 1934 queue->raidPtr->numFailures++; 1935 queue->raidPtr->numNewFailures++; 1936 } else { /* Disk is already dead... */ 1937 /* printf("Disk already marked as dead!\n"); */ 1938 } 1939 1940 } 1941 1942 rs = &raid_softc[unit]; 1943 RAIDPUTBUF(rs, raidbp); 1944 1945 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0); 1946 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0); 1947 1948 splx(s); 1949 } 1950 1951 1952 1953 /* 1954 * initialize a buf structure for doing an I/O in the kernel. 1955 */ 1956 static void 1957 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg, 1958 logBytesPerSector, b_proc) 1959 struct buf *bp; 1960 struct vnode *b_vp; 1961 unsigned rw_flag; 1962 dev_t dev; 1963 RF_SectorNum_t startSect; 1964 RF_SectorCount_t numSect; 1965 caddr_t buf; 1966 void (*cbFunc) (struct buf *); 1967 void *cbArg; 1968 int logBytesPerSector; 1969 struct proc *b_proc; 1970 { 1971 /* bp->b_flags = B_PHYS | rw_flag; */ 1972 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */ 1973 bp->b_bcount = numSect << logBytesPerSector; 1974 bp->b_bufsize = bp->b_bcount; 1975 bp->b_error = 0; 1976 bp->b_dev = dev; 1977 bp->b_data = buf; 1978 bp->b_blkno = startSect; 1979 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */ 1980 if (bp->b_bcount == 0) { 1981 panic("bp->b_bcount is zero in InitBP!!\n"); 1982 } 1983 bp->b_proc = b_proc; 1984 bp->b_iodone = cbFunc; 1985 bp->b_vp = b_vp; 1986 1987 } 1988 1989 static void 1990 raidgetdefaultlabel(raidPtr, rs, lp) 1991 RF_Raid_t *raidPtr; 1992 struct raid_softc *rs; 1993 struct disklabel *lp; 1994 { 1995 db1_printf(("Building a default label...\n")); 1996 memset(lp, 0, sizeof(*lp)); 1997 1998 /* fabricate a label... */ 1999 lp->d_secperunit = raidPtr->totalSectors; 2000 lp->d_secsize = raidPtr->bytesPerSector; 2001 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe; 2002 lp->d_ntracks = 4 * raidPtr->numCol; 2003 lp->d_ncylinders = raidPtr->totalSectors / 2004 (lp->d_nsectors * lp->d_ntracks); 2005 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 2006 2007 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename)); 2008 lp->d_type = DTYPE_RAID; 2009 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 2010 lp->d_rpm = 3600; 2011 lp->d_interleave = 1; 2012 lp->d_flags = 0; 2013 2014 lp->d_partitions[RAW_PART].p_offset = 0; 2015 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors; 2016 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 2017 lp->d_npartitions = RAW_PART + 1; 2018 2019 lp->d_magic = DISKMAGIC; 2020 lp->d_magic2 = DISKMAGIC; 2021 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label); 2022 2023 } 2024 /* 2025 * Read the disklabel from the raid device. If one is not present, fake one 2026 * up. 2027 */ 2028 static void 2029 raidgetdisklabel(dev) 2030 dev_t dev; 2031 { 2032 int unit = raidunit(dev); 2033 struct raid_softc *rs = &raid_softc[unit]; 2034 char *errstring; 2035 struct disklabel *lp = rs->sc_dkdev.dk_label; 2036 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel; 2037 RF_Raid_t *raidPtr; 2038 2039 db1_printf(("Getting the disklabel...\n")); 2040 2041 memset(clp, 0, sizeof(*clp)); 2042 2043 raidPtr = raidPtrs[unit]; 2044 2045 raidgetdefaultlabel(raidPtr, rs, lp); 2046 2047 /* 2048 * Call the generic disklabel extraction routine. 2049 */ 2050 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy, 2051 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel); 2052 if (errstring) 2053 raidmakedisklabel(rs); 2054 else { 2055 int i; 2056 struct partition *pp; 2057 2058 /* 2059 * Sanity check whether the found disklabel is valid. 2060 * 2061 * This is necessary since total size of the raid device 2062 * may vary when an interleave is changed even though exactly 2063 * same componets are used, and old disklabel may used 2064 * if that is found. 2065 */ 2066 if (lp->d_secperunit != rs->sc_size) 2067 printf("WARNING: %s: " 2068 "total sector size in disklabel (%d) != " 2069 "the size of raid (%ld)\n", rs->sc_xname, 2070 lp->d_secperunit, (long) rs->sc_size); 2071 for (i = 0; i < lp->d_npartitions; i++) { 2072 pp = &lp->d_partitions[i]; 2073 if (pp->p_offset + pp->p_size > rs->sc_size) 2074 printf("WARNING: %s: end of partition `%c' " 2075 "exceeds the size of raid (%ld)\n", 2076 rs->sc_xname, 'a' + i, (long) rs->sc_size); 2077 } 2078 } 2079 2080 } 2081 /* 2082 * Take care of things one might want to take care of in the event 2083 * that a disklabel isn't present. 2084 */ 2085 static void 2086 raidmakedisklabel(rs) 2087 struct raid_softc *rs; 2088 { 2089 struct disklabel *lp = rs->sc_dkdev.dk_label; 2090 db1_printf(("Making a label..\n")); 2091 2092 /* 2093 * For historical reasons, if there's no disklabel present 2094 * the raw partition must be marked FS_BSDFFS. 2095 */ 2096 2097 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 2098 2099 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 2100 2101 lp->d_checksum = dkcksum(lp); 2102 } 2103 /* 2104 * Lookup the provided name in the filesystem. If the file exists, 2105 * is a valid block device, and isn't being used by anyone else, 2106 * set *vpp to the file's vnode. 2107 * You'll find the original of this in ccd.c 2108 */ 2109 int 2110 raidlookup(path, p, vpp) 2111 char *path; 2112 struct proc *p; 2113 struct vnode **vpp; /* result */ 2114 { 2115 struct nameidata nd; 2116 struct vnode *vp; 2117 struct vattr va; 2118 int error; 2119 2120 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p); 2121 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) { 2122 #ifdef DEBUG 2123 printf("RAIDframe: vn_open returned %d\n", error); 2124 #endif 2125 return (error); 2126 } 2127 vp = nd.ni_vp; 2128 if (vp->v_usecount > 1) { 2129 VOP_UNLOCK(vp, 0); 2130 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p); 2131 return (EBUSY); 2132 } 2133 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) { 2134 VOP_UNLOCK(vp, 0); 2135 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p); 2136 return (error); 2137 } 2138 /* XXX: eventually we should handle VREG, too. */ 2139 if (va.va_type != VBLK) { 2140 VOP_UNLOCK(vp, 0); 2141 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p); 2142 return (ENOTBLK); 2143 } 2144 VOP_UNLOCK(vp, 0); 2145 *vpp = vp; 2146 return (0); 2147 } 2148 /* 2149 * Wait interruptibly for an exclusive lock. 2150 * 2151 * XXX 2152 * Several drivers do this; it should be abstracted and made MP-safe. 2153 * (Hmm... where have we seen this warning before :-> GO ) 2154 */ 2155 static int 2156 raidlock(rs) 2157 struct raid_softc *rs; 2158 { 2159 int error; 2160 2161 while ((rs->sc_flags & RAIDF_LOCKED) != 0) { 2162 rs->sc_flags |= RAIDF_WANTED; 2163 if ((error = 2164 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0) 2165 return (error); 2166 } 2167 rs->sc_flags |= RAIDF_LOCKED; 2168 return (0); 2169 } 2170 /* 2171 * Unlock and wake up any waiters. 2172 */ 2173 static void 2174 raidunlock(rs) 2175 struct raid_softc *rs; 2176 { 2177 2178 rs->sc_flags &= ~RAIDF_LOCKED; 2179 if ((rs->sc_flags & RAIDF_WANTED) != 0) { 2180 rs->sc_flags &= ~RAIDF_WANTED; 2181 wakeup(rs); 2182 } 2183 } 2184 2185 2186 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */ 2187 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */ 2188 2189 int 2190 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter) 2191 { 2192 RF_ComponentLabel_t clabel; 2193 raidread_component_label(dev, b_vp, &clabel); 2194 clabel.mod_counter = mod_counter; 2195 clabel.clean = RF_RAID_CLEAN; 2196 raidwrite_component_label(dev, b_vp, &clabel); 2197 return(0); 2198 } 2199 2200 2201 int 2202 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter) 2203 { 2204 RF_ComponentLabel_t clabel; 2205 raidread_component_label(dev, b_vp, &clabel); 2206 clabel.mod_counter = mod_counter; 2207 clabel.clean = RF_RAID_DIRTY; 2208 raidwrite_component_label(dev, b_vp, &clabel); 2209 return(0); 2210 } 2211 2212 /* ARGSUSED */ 2213 int 2214 raidread_component_label(dev, b_vp, clabel) 2215 dev_t dev; 2216 struct vnode *b_vp; 2217 RF_ComponentLabel_t *clabel; 2218 { 2219 struct buf *bp; 2220 int error; 2221 2222 /* XXX should probably ensure that we don't try to do this if 2223 someone has changed rf_protected_sectors. */ 2224 2225 if (b_vp == NULL) { 2226 /* For whatever reason, this component is not valid. 2227 Don't try to read a component label from it. */ 2228 return(EINVAL); 2229 } 2230 2231 /* get a block of the appropriate size... */ 2232 bp = geteblk((int)RF_COMPONENT_INFO_SIZE); 2233 bp->b_dev = dev; 2234 2235 /* get our ducks in a row for the read */ 2236 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE; 2237 bp->b_bcount = RF_COMPONENT_INFO_SIZE; 2238 bp->b_flags |= B_READ; 2239 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE; 2240 2241 (*bdevsw[major(bp->b_dev)].d_strategy)(bp); 2242 2243 error = biowait(bp); 2244 2245 if (!error) { 2246 memcpy(clabel, bp->b_data, 2247 sizeof(RF_ComponentLabel_t)); 2248 #if 0 2249 rf_print_component_label( clabel ); 2250 #endif 2251 } else { 2252 #if 0 2253 printf("Failed to read RAID component label!\n"); 2254 #endif 2255 } 2256 2257 brelse(bp); 2258 return(error); 2259 } 2260 /* ARGSUSED */ 2261 int 2262 raidwrite_component_label(dev, b_vp, clabel) 2263 dev_t dev; 2264 struct vnode *b_vp; 2265 RF_ComponentLabel_t *clabel; 2266 { 2267 struct buf *bp; 2268 int error; 2269 2270 /* get a block of the appropriate size... */ 2271 bp = geteblk((int)RF_COMPONENT_INFO_SIZE); 2272 bp->b_dev = dev; 2273 2274 /* get our ducks in a row for the write */ 2275 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE; 2276 bp->b_bcount = RF_COMPONENT_INFO_SIZE; 2277 bp->b_flags |= B_WRITE; 2278 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE; 2279 2280 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE ); 2281 2282 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t)); 2283 2284 (*bdevsw[major(bp->b_dev)].d_strategy)(bp); 2285 error = biowait(bp); 2286 brelse(bp); 2287 if (error) { 2288 #if 1 2289 printf("Failed to write RAID component info!\n"); 2290 #endif 2291 } 2292 2293 return(error); 2294 } 2295 2296 void 2297 rf_markalldirty(raidPtr) 2298 RF_Raid_t *raidPtr; 2299 { 2300 RF_ComponentLabel_t clabel; 2301 int r,c; 2302 2303 raidPtr->mod_counter++; 2304 for (r = 0; r < raidPtr->numRow; r++) { 2305 for (c = 0; c < raidPtr->numCol; c++) { 2306 /* we don't want to touch (at all) a disk that has 2307 failed */ 2308 if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) { 2309 raidread_component_label( 2310 raidPtr->Disks[r][c].dev, 2311 raidPtr->raid_cinfo[r][c].ci_vp, 2312 &clabel); 2313 if (clabel.status == rf_ds_spared) { 2314 /* XXX do something special... 2315 but whatever you do, don't 2316 try to access it!! */ 2317 } else { 2318 #if 0 2319 clabel.status = 2320 raidPtr->Disks[r][c].status; 2321 raidwrite_component_label( 2322 raidPtr->Disks[r][c].dev, 2323 raidPtr->raid_cinfo[r][c].ci_vp, 2324 &clabel); 2325 #endif 2326 raidmarkdirty( 2327 raidPtr->Disks[r][c].dev, 2328 raidPtr->raid_cinfo[r][c].ci_vp, 2329 raidPtr->mod_counter); 2330 } 2331 } 2332 } 2333 } 2334 /* printf("Component labels marked dirty.\n"); */ 2335 #if 0 2336 for( c = 0; c < raidPtr->numSpare ; c++) { 2337 sparecol = raidPtr->numCol + c; 2338 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) { 2339 /* 2340 2341 XXX this is where we get fancy and map this spare 2342 into it's correct spot in the array. 2343 2344 */ 2345 /* 2346 2347 we claim this disk is "optimal" if it's 2348 rf_ds_used_spare, as that means it should be 2349 directly substitutable for the disk it replaced. 2350 We note that too... 2351 2352 */ 2353 2354 for(i=0;i<raidPtr->numRow;i++) { 2355 for(j=0;j<raidPtr->numCol;j++) { 2356 if ((raidPtr->Disks[i][j].spareRow == 2357 r) && 2358 (raidPtr->Disks[i][j].spareCol == 2359 sparecol)) { 2360 srow = r; 2361 scol = sparecol; 2362 break; 2363 } 2364 } 2365 } 2366 2367 raidread_component_label( 2368 raidPtr->Disks[r][sparecol].dev, 2369 raidPtr->raid_cinfo[r][sparecol].ci_vp, 2370 &clabel); 2371 /* make sure status is noted */ 2372 clabel.version = RF_COMPONENT_LABEL_VERSION; 2373 clabel.mod_counter = raidPtr->mod_counter; 2374 clabel.serial_number = raidPtr->serial_number; 2375 clabel.row = srow; 2376 clabel.column = scol; 2377 clabel.num_rows = raidPtr->numRow; 2378 clabel.num_columns = raidPtr->numCol; 2379 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/ 2380 clabel.status = rf_ds_optimal; 2381 raidwrite_component_label( 2382 raidPtr->Disks[r][sparecol].dev, 2383 raidPtr->raid_cinfo[r][sparecol].ci_vp, 2384 &clabel); 2385 raidmarkclean( raidPtr->Disks[r][sparecol].dev, 2386 raidPtr->raid_cinfo[r][sparecol].ci_vp); 2387 } 2388 } 2389 2390 #endif 2391 } 2392 2393 2394 void 2395 rf_update_component_labels(raidPtr, final) 2396 RF_Raid_t *raidPtr; 2397 int final; 2398 { 2399 RF_ComponentLabel_t clabel; 2400 int sparecol; 2401 int r,c; 2402 int i,j; 2403 int srow, scol; 2404 2405 srow = -1; 2406 scol = -1; 2407 2408 /* XXX should do extra checks to make sure things really are clean, 2409 rather than blindly setting the clean bit... */ 2410 2411 raidPtr->mod_counter++; 2412 2413 for (r = 0; r < raidPtr->numRow; r++) { 2414 for (c = 0; c < raidPtr->numCol; c++) { 2415 if (raidPtr->Disks[r][c].status == rf_ds_optimal) { 2416 raidread_component_label( 2417 raidPtr->Disks[r][c].dev, 2418 raidPtr->raid_cinfo[r][c].ci_vp, 2419 &clabel); 2420 /* make sure status is noted */ 2421 clabel.status = rf_ds_optimal; 2422 /* bump the counter */ 2423 clabel.mod_counter = raidPtr->mod_counter; 2424 2425 raidwrite_component_label( 2426 raidPtr->Disks[r][c].dev, 2427 raidPtr->raid_cinfo[r][c].ci_vp, 2428 &clabel); 2429 if (final == RF_FINAL_COMPONENT_UPDATE) { 2430 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2431 raidmarkclean( 2432 raidPtr->Disks[r][c].dev, 2433 raidPtr->raid_cinfo[r][c].ci_vp, 2434 raidPtr->mod_counter); 2435 } 2436 } 2437 } 2438 /* else we don't touch it.. */ 2439 } 2440 } 2441 2442 for( c = 0; c < raidPtr->numSpare ; c++) { 2443 sparecol = raidPtr->numCol + c; 2444 /* Need to ensure that the reconstruct actually completed! */ 2445 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) { 2446 /* 2447 2448 we claim this disk is "optimal" if it's 2449 rf_ds_used_spare, as that means it should be 2450 directly substitutable for the disk it replaced. 2451 We note that too... 2452 2453 */ 2454 2455 for(i=0;i<raidPtr->numRow;i++) { 2456 for(j=0;j<raidPtr->numCol;j++) { 2457 if ((raidPtr->Disks[i][j].spareRow == 2458 0) && 2459 (raidPtr->Disks[i][j].spareCol == 2460 sparecol)) { 2461 srow = i; 2462 scol = j; 2463 break; 2464 } 2465 } 2466 } 2467 2468 /* XXX shouldn't *really* need this... */ 2469 raidread_component_label( 2470 raidPtr->Disks[0][sparecol].dev, 2471 raidPtr->raid_cinfo[0][sparecol].ci_vp, 2472 &clabel); 2473 /* make sure status is noted */ 2474 2475 raid_init_component_label(raidPtr, &clabel); 2476 2477 clabel.mod_counter = raidPtr->mod_counter; 2478 clabel.row = srow; 2479 clabel.column = scol; 2480 clabel.status = rf_ds_optimal; 2481 2482 raidwrite_component_label( 2483 raidPtr->Disks[0][sparecol].dev, 2484 raidPtr->raid_cinfo[0][sparecol].ci_vp, 2485 &clabel); 2486 if (final == RF_FINAL_COMPONENT_UPDATE) { 2487 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2488 raidmarkclean( raidPtr->Disks[0][sparecol].dev, 2489 raidPtr->raid_cinfo[0][sparecol].ci_vp, 2490 raidPtr->mod_counter); 2491 } 2492 } 2493 } 2494 } 2495 /* printf("Component labels updated\n"); */ 2496 } 2497 2498 void 2499 rf_close_component(raidPtr, vp, auto_configured) 2500 RF_Raid_t *raidPtr; 2501 struct vnode *vp; 2502 int auto_configured; 2503 { 2504 struct proc *p; 2505 2506 p = raidPtr->engine_thread; 2507 2508 if (vp != NULL) { 2509 if (auto_configured == 1) { 2510 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2511 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0); 2512 vput(vp); 2513 2514 } else { 2515 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p); 2516 } 2517 } else { 2518 printf("vnode was NULL\n"); 2519 } 2520 } 2521 2522 2523 void 2524 rf_UnconfigureVnodes(raidPtr) 2525 RF_Raid_t *raidPtr; 2526 { 2527 int r,c; 2528 struct proc *p; 2529 struct vnode *vp; 2530 int acd; 2531 2532 2533 /* We take this opportunity to close the vnodes like we should.. */ 2534 2535 p = raidPtr->engine_thread; 2536 2537 for (r = 0; r < raidPtr->numRow; r++) { 2538 for (c = 0; c < raidPtr->numCol; c++) { 2539 printf("Closing vnode for row: %d col: %d\n", r, c); 2540 vp = raidPtr->raid_cinfo[r][c].ci_vp; 2541 acd = raidPtr->Disks[r][c].auto_configured; 2542 rf_close_component(raidPtr, vp, acd); 2543 raidPtr->raid_cinfo[r][c].ci_vp = NULL; 2544 raidPtr->Disks[r][c].auto_configured = 0; 2545 } 2546 } 2547 for (r = 0; r < raidPtr->numSpare; r++) { 2548 printf("Closing vnode for spare: %d\n", r); 2549 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp; 2550 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured; 2551 rf_close_component(raidPtr, vp, acd); 2552 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL; 2553 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0; 2554 } 2555 } 2556 2557 2558 void 2559 rf_ReconThread(req) 2560 struct rf_recon_req *req; 2561 { 2562 int s; 2563 RF_Raid_t *raidPtr; 2564 2565 s = splbio(); 2566 raidPtr = (RF_Raid_t *) req->raidPtr; 2567 raidPtr->recon_in_progress = 1; 2568 2569 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col, 2570 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0)); 2571 2572 /* XXX get rid of this! we don't need it at all.. */ 2573 RF_Free(req, sizeof(*req)); 2574 2575 raidPtr->recon_in_progress = 0; 2576 splx(s); 2577 2578 /* That's all... */ 2579 kthread_exit(0); /* does not return */ 2580 } 2581 2582 void 2583 rf_RewriteParityThread(raidPtr) 2584 RF_Raid_t *raidPtr; 2585 { 2586 int retcode; 2587 int s; 2588 2589 raidPtr->parity_rewrite_in_progress = 1; 2590 s = splbio(); 2591 retcode = rf_RewriteParity(raidPtr); 2592 splx(s); 2593 if (retcode) { 2594 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid); 2595 } else { 2596 /* set the clean bit! If we shutdown correctly, 2597 the clean bit on each component label will get 2598 set */ 2599 raidPtr->parity_good = RF_RAID_CLEAN; 2600 } 2601 raidPtr->parity_rewrite_in_progress = 0; 2602 2603 /* Anyone waiting for us to stop? If so, inform them... */ 2604 if (raidPtr->waitShutdown) { 2605 wakeup(&raidPtr->parity_rewrite_in_progress); 2606 } 2607 2608 /* That's all... */ 2609 kthread_exit(0); /* does not return */ 2610 } 2611 2612 2613 void 2614 rf_CopybackThread(raidPtr) 2615 RF_Raid_t *raidPtr; 2616 { 2617 int s; 2618 2619 raidPtr->copyback_in_progress = 1; 2620 s = splbio(); 2621 rf_CopybackReconstructedData(raidPtr); 2622 splx(s); 2623 raidPtr->copyback_in_progress = 0; 2624 2625 /* That's all... */ 2626 kthread_exit(0); /* does not return */ 2627 } 2628 2629 2630 void 2631 rf_ReconstructInPlaceThread(req) 2632 struct rf_recon_req *req; 2633 { 2634 int retcode; 2635 int s; 2636 RF_Raid_t *raidPtr; 2637 2638 s = splbio(); 2639 raidPtr = req->raidPtr; 2640 raidPtr->recon_in_progress = 1; 2641 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col); 2642 RF_Free(req, sizeof(*req)); 2643 raidPtr->recon_in_progress = 0; 2644 splx(s); 2645 2646 /* That's all... */ 2647 kthread_exit(0); /* does not return */ 2648 } 2649 2650 void 2651 rf_mountroot_hook(dev) 2652 struct device *dev; 2653 { 2654 2655 } 2656 2657 2658 RF_AutoConfig_t * 2659 rf_find_raid_components() 2660 { 2661 struct devnametobdevmaj *dtobdm; 2662 struct vnode *vp; 2663 struct disklabel label; 2664 struct device *dv; 2665 char *cd_name; 2666 dev_t dev; 2667 int error; 2668 int i; 2669 int good_one; 2670 RF_ComponentLabel_t *clabel; 2671 RF_AutoConfig_t *ac_list; 2672 RF_AutoConfig_t *ac; 2673 2674 2675 /* initialize the AutoConfig list */ 2676 ac_list = NULL; 2677 2678 /* we begin by trolling through *all* the devices on the system */ 2679 2680 for (dv = alldevs.tqh_first; dv != NULL; 2681 dv = dv->dv_list.tqe_next) { 2682 2683 /* we are only interested in disks... */ 2684 if (dv->dv_class != DV_DISK) 2685 continue; 2686 2687 /* we don't care about floppies... */ 2688 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) { 2689 continue; 2690 } 2691 2692 /* need to find the device_name_to_block_device_major stuff */ 2693 cd_name = dv->dv_cfdata->cf_driver->cd_name; 2694 dtobdm = dev_name2blk; 2695 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) { 2696 dtobdm++; 2697 } 2698 2699 /* get a vnode for the raw partition of this disk */ 2700 2701 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART); 2702 if (bdevvp(dev, &vp)) 2703 panic("RAID can't alloc vnode"); 2704 2705 error = VOP_OPEN(vp, FREAD, NOCRED, 0); 2706 2707 if (error) { 2708 /* "Who cares." Continue looking 2709 for something that exists*/ 2710 vput(vp); 2711 continue; 2712 } 2713 2714 /* Ok, the disk exists. Go get the disklabel. */ 2715 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label, 2716 FREAD, NOCRED, 0); 2717 if (error) { 2718 /* 2719 * XXX can't happen - open() would 2720 * have errored out (or faked up one) 2721 */ 2722 printf("can't get label for dev %s%c (%d)!?!?\n", 2723 dv->dv_xname, 'a' + RAW_PART, error); 2724 } 2725 2726 /* don't need this any more. We'll allocate it again 2727 a little later if we really do... */ 2728 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2729 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0); 2730 vput(vp); 2731 2732 for (i=0; i < label.d_npartitions; i++) { 2733 /* We only support partitions marked as RAID */ 2734 if (label.d_partitions[i].p_fstype != FS_RAID) 2735 continue; 2736 2737 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i); 2738 if (bdevvp(dev, &vp)) 2739 panic("RAID can't alloc vnode"); 2740 2741 error = VOP_OPEN(vp, FREAD, NOCRED, 0); 2742 if (error) { 2743 /* Whatever... */ 2744 vput(vp); 2745 continue; 2746 } 2747 2748 good_one = 0; 2749 2750 clabel = (RF_ComponentLabel_t *) 2751 malloc(sizeof(RF_ComponentLabel_t), 2752 M_RAIDFRAME, M_NOWAIT); 2753 if (clabel == NULL) { 2754 /* XXX CLEANUP HERE */ 2755 printf("RAID auto config: out of memory!\n"); 2756 return(NULL); /* XXX probably should panic? */ 2757 } 2758 2759 if (!raidread_component_label(dev, vp, clabel)) { 2760 /* Got the label. Does it look reasonable? */ 2761 if (rf_reasonable_label(clabel) && 2762 (clabel->partitionSize <= 2763 label.d_partitions[i].p_size)) { 2764 #if DEBUG 2765 printf("Component on: %s%c: %d\n", 2766 dv->dv_xname, 'a'+i, 2767 label.d_partitions[i].p_size); 2768 rf_print_component_label(clabel); 2769 #endif 2770 /* if it's reasonable, add it, 2771 else ignore it. */ 2772 ac = (RF_AutoConfig_t *) 2773 malloc(sizeof(RF_AutoConfig_t), 2774 M_RAIDFRAME, 2775 M_NOWAIT); 2776 if (ac == NULL) { 2777 /* XXX should panic?? */ 2778 return(NULL); 2779 } 2780 2781 sprintf(ac->devname, "%s%c", 2782 dv->dv_xname, 'a'+i); 2783 ac->dev = dev; 2784 ac->vp = vp; 2785 ac->clabel = clabel; 2786 ac->next = ac_list; 2787 ac_list = ac; 2788 good_one = 1; 2789 } 2790 } 2791 if (!good_one) { 2792 /* cleanup */ 2793 free(clabel, M_RAIDFRAME); 2794 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2795 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0); 2796 vput(vp); 2797 } 2798 } 2799 } 2800 return(ac_list); 2801 } 2802 2803 static int 2804 rf_reasonable_label(clabel) 2805 RF_ComponentLabel_t *clabel; 2806 { 2807 2808 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) || 2809 (clabel->version==RF_COMPONENT_LABEL_VERSION)) && 2810 ((clabel->clean == RF_RAID_CLEAN) || 2811 (clabel->clean == RF_RAID_DIRTY)) && 2812 clabel->row >=0 && 2813 clabel->column >= 0 && 2814 clabel->num_rows > 0 && 2815 clabel->num_columns > 0 && 2816 clabel->row < clabel->num_rows && 2817 clabel->column < clabel->num_columns && 2818 clabel->blockSize > 0 && 2819 clabel->numBlocks > 0) { 2820 /* label looks reasonable enough... */ 2821 return(1); 2822 } 2823 return(0); 2824 } 2825 2826 2827 void 2828 rf_print_component_label(clabel) 2829 RF_ComponentLabel_t *clabel; 2830 { 2831 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", 2832 clabel->row, clabel->column, 2833 clabel->num_rows, clabel->num_columns); 2834 printf(" Version: %d Serial Number: %d Mod Counter: %d\n", 2835 clabel->version, clabel->serial_number, 2836 clabel->mod_counter); 2837 printf(" Clean: %s Status: %d\n", 2838 clabel->clean ? "Yes" : "No", clabel->status ); 2839 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n", 2840 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU); 2841 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n", 2842 (char) clabel->parityConfig, clabel->blockSize, 2843 clabel->numBlocks); 2844 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" ); 2845 printf(" Contains root partition: %s\n", 2846 clabel->root_partition ? "Yes" : "No" ); 2847 printf(" Last configured as: raid%d\n", clabel->last_unit ); 2848 #if 0 2849 printf(" Config order: %d\n", clabel->config_order); 2850 #endif 2851 2852 } 2853 2854 RF_ConfigSet_t * 2855 rf_create_auto_sets(ac_list) 2856 RF_AutoConfig_t *ac_list; 2857 { 2858 RF_AutoConfig_t *ac; 2859 RF_ConfigSet_t *config_sets; 2860 RF_ConfigSet_t *cset; 2861 RF_AutoConfig_t *ac_next; 2862 2863 2864 config_sets = NULL; 2865 2866 /* Go through the AutoConfig list, and figure out which components 2867 belong to what sets. */ 2868 ac = ac_list; 2869 while(ac!=NULL) { 2870 /* we're going to putz with ac->next, so save it here 2871 for use at the end of the loop */ 2872 ac_next = ac->next; 2873 2874 if (config_sets == NULL) { 2875 /* will need at least this one... */ 2876 config_sets = (RF_ConfigSet_t *) 2877 malloc(sizeof(RF_ConfigSet_t), 2878 M_RAIDFRAME, M_NOWAIT); 2879 if (config_sets == NULL) { 2880 panic("rf_create_auto_sets: No memory!\n"); 2881 } 2882 /* this one is easy :) */ 2883 config_sets->ac = ac; 2884 config_sets->next = NULL; 2885 config_sets->rootable = 0; 2886 ac->next = NULL; 2887 } else { 2888 /* which set does this component fit into? */ 2889 cset = config_sets; 2890 while(cset!=NULL) { 2891 if (rf_does_it_fit(cset, ac)) { 2892 /* looks like it matches... */ 2893 ac->next = cset->ac; 2894 cset->ac = ac; 2895 break; 2896 } 2897 cset = cset->next; 2898 } 2899 if (cset==NULL) { 2900 /* didn't find a match above... new set..*/ 2901 cset = (RF_ConfigSet_t *) 2902 malloc(sizeof(RF_ConfigSet_t), 2903 M_RAIDFRAME, M_NOWAIT); 2904 if (cset == NULL) { 2905 panic("rf_create_auto_sets: No memory!\n"); 2906 } 2907 cset->ac = ac; 2908 ac->next = NULL; 2909 cset->next = config_sets; 2910 cset->rootable = 0; 2911 config_sets = cset; 2912 } 2913 } 2914 ac = ac_next; 2915 } 2916 2917 2918 return(config_sets); 2919 } 2920 2921 static int 2922 rf_does_it_fit(cset, ac) 2923 RF_ConfigSet_t *cset; 2924 RF_AutoConfig_t *ac; 2925 { 2926 RF_ComponentLabel_t *clabel1, *clabel2; 2927 2928 /* If this one matches the *first* one in the set, that's good 2929 enough, since the other members of the set would have been 2930 through here too... */ 2931 /* note that we are not checking partitionSize here.. 2932 2933 Note that we are also not checking the mod_counters here. 2934 If everything else matches execpt the mod_counter, that's 2935 good enough for this test. We will deal with the mod_counters 2936 a little later in the autoconfiguration process. 2937 2938 (clabel1->mod_counter == clabel2->mod_counter) && 2939 2940 The reason we don't check for this is that failed disks 2941 will have lower modification counts. If those disks are 2942 not added to the set they used to belong to, then they will 2943 form their own set, which may result in 2 different sets, 2944 for example, competing to be configured at raid0, and 2945 perhaps competing to be the root filesystem set. If the 2946 wrong ones get configured, or both attempt to become /, 2947 weird behaviour and or serious lossage will occur. Thus we 2948 need to bring them into the fold here, and kick them out at 2949 a later point. 2950 2951 */ 2952 2953 clabel1 = cset->ac->clabel; 2954 clabel2 = ac->clabel; 2955 if ((clabel1->version == clabel2->version) && 2956 (clabel1->serial_number == clabel2->serial_number) && 2957 (clabel1->num_rows == clabel2->num_rows) && 2958 (clabel1->num_columns == clabel2->num_columns) && 2959 (clabel1->sectPerSU == clabel2->sectPerSU) && 2960 (clabel1->SUsPerPU == clabel2->SUsPerPU) && 2961 (clabel1->SUsPerRU == clabel2->SUsPerRU) && 2962 (clabel1->parityConfig == clabel2->parityConfig) && 2963 (clabel1->maxOutstanding == clabel2->maxOutstanding) && 2964 (clabel1->blockSize == clabel2->blockSize) && 2965 (clabel1->numBlocks == clabel2->numBlocks) && 2966 (clabel1->autoconfigure == clabel2->autoconfigure) && 2967 (clabel1->root_partition == clabel2->root_partition) && 2968 (clabel1->last_unit == clabel2->last_unit) && 2969 (clabel1->config_order == clabel2->config_order)) { 2970 /* if it get's here, it almost *has* to be a match */ 2971 } else { 2972 /* it's not consistent with somebody in the set.. 2973 punt */ 2974 return(0); 2975 } 2976 /* all was fine.. it must fit... */ 2977 return(1); 2978 } 2979 2980 int 2981 rf_have_enough_components(cset) 2982 RF_ConfigSet_t *cset; 2983 { 2984 RF_AutoConfig_t *ac; 2985 RF_AutoConfig_t *auto_config; 2986 RF_ComponentLabel_t *clabel; 2987 int r,c; 2988 int num_rows; 2989 int num_cols; 2990 int num_missing; 2991 int mod_counter; 2992 int mod_counter_found; 2993 int even_pair_failed; 2994 char parity_type; 2995 2996 2997 /* check to see that we have enough 'live' components 2998 of this set. If so, we can configure it if necessary */ 2999 3000 num_rows = cset->ac->clabel->num_rows; 3001 num_cols = cset->ac->clabel->num_columns; 3002 parity_type = cset->ac->clabel->parityConfig; 3003 3004 /* XXX Check for duplicate components!?!?!? */ 3005 3006 /* Determine what the mod_counter is supposed to be for this set. */ 3007 3008 mod_counter_found = 0; 3009 mod_counter = 0; 3010 ac = cset->ac; 3011 while(ac!=NULL) { 3012 if (mod_counter_found==0) { 3013 mod_counter = ac->clabel->mod_counter; 3014 mod_counter_found = 1; 3015 } else { 3016 if (ac->clabel->mod_counter > mod_counter) { 3017 mod_counter = ac->clabel->mod_counter; 3018 } 3019 } 3020 ac = ac->next; 3021 } 3022 3023 num_missing = 0; 3024 auto_config = cset->ac; 3025 3026 for(r=0; r<num_rows; r++) { 3027 even_pair_failed = 0; 3028 for(c=0; c<num_cols; c++) { 3029 ac = auto_config; 3030 while(ac!=NULL) { 3031 if ((ac->clabel->row == r) && 3032 (ac->clabel->column == c) && 3033 (ac->clabel->mod_counter == mod_counter)) { 3034 /* it's this one... */ 3035 #if DEBUG 3036 printf("Found: %s at %d,%d\n", 3037 ac->devname,r,c); 3038 #endif 3039 break; 3040 } 3041 ac=ac->next; 3042 } 3043 if (ac==NULL) { 3044 /* Didn't find one here! */ 3045 /* special case for RAID 1, especially 3046 where there are more than 2 3047 components (where RAIDframe treats 3048 things a little differently :( ) */ 3049 if (parity_type == '1') { 3050 if (c%2 == 0) { /* even component */ 3051 even_pair_failed = 1; 3052 } else { /* odd component. If 3053 we're failed, and 3054 so is the even 3055 component, it's 3056 "Good Night, Charlie" */ 3057 if (even_pair_failed == 1) { 3058 return(0); 3059 } 3060 } 3061 } else { 3062 /* normal accounting */ 3063 num_missing++; 3064 } 3065 } 3066 if ((parity_type == '1') && (c%2 == 1)) { 3067 /* Just did an even component, and we didn't 3068 bail.. reset the even_pair_failed flag, 3069 and go on to the next component.... */ 3070 even_pair_failed = 0; 3071 } 3072 } 3073 } 3074 3075 clabel = cset->ac->clabel; 3076 3077 if (((clabel->parityConfig == '0') && (num_missing > 0)) || 3078 ((clabel->parityConfig == '4') && (num_missing > 1)) || 3079 ((clabel->parityConfig == '5') && (num_missing > 1))) { 3080 /* XXX this needs to be made *much* more general */ 3081 /* Too many failures */ 3082 return(0); 3083 } 3084 /* otherwise, all is well, and we've got enough to take a kick 3085 at autoconfiguring this set */ 3086 return(1); 3087 } 3088 3089 void 3090 rf_create_configuration(ac,config,raidPtr) 3091 RF_AutoConfig_t *ac; 3092 RF_Config_t *config; 3093 RF_Raid_t *raidPtr; 3094 { 3095 RF_ComponentLabel_t *clabel; 3096 int i; 3097 3098 clabel = ac->clabel; 3099 3100 /* 1. Fill in the common stuff */ 3101 config->numRow = clabel->num_rows; 3102 config->numCol = clabel->num_columns; 3103 config->numSpare = 0; /* XXX should this be set here? */ 3104 config->sectPerSU = clabel->sectPerSU; 3105 config->SUsPerPU = clabel->SUsPerPU; 3106 config->SUsPerRU = clabel->SUsPerRU; 3107 config->parityConfig = clabel->parityConfig; 3108 /* XXX... */ 3109 strcpy(config->diskQueueType,"fifo"); 3110 config->maxOutstandingDiskReqs = clabel->maxOutstanding; 3111 config->layoutSpecificSize = 0; /* XXX ?? */ 3112 3113 while(ac!=NULL) { 3114 /* row/col values will be in range due to the checks 3115 in reasonable_label() */ 3116 strcpy(config->devnames[ac->clabel->row][ac->clabel->column], 3117 ac->devname); 3118 ac = ac->next; 3119 } 3120 3121 for(i=0;i<RF_MAXDBGV;i++) { 3122 config->debugVars[i][0] = NULL; 3123 } 3124 } 3125 3126 int 3127 rf_set_autoconfig(raidPtr, new_value) 3128 RF_Raid_t *raidPtr; 3129 int new_value; 3130 { 3131 RF_ComponentLabel_t clabel; 3132 struct vnode *vp; 3133 dev_t dev; 3134 int row, column; 3135 3136 raidPtr->autoconfigure = new_value; 3137 for(row=0; row<raidPtr->numRow; row++) { 3138 for(column=0; column<raidPtr->numCol; column++) { 3139 if (raidPtr->Disks[row][column].status == 3140 rf_ds_optimal) { 3141 dev = raidPtr->Disks[row][column].dev; 3142 vp = raidPtr->raid_cinfo[row][column].ci_vp; 3143 raidread_component_label(dev, vp, &clabel); 3144 clabel.autoconfigure = new_value; 3145 raidwrite_component_label(dev, vp, &clabel); 3146 } 3147 } 3148 } 3149 return(new_value); 3150 } 3151 3152 int 3153 rf_set_rootpartition(raidPtr, new_value) 3154 RF_Raid_t *raidPtr; 3155 int new_value; 3156 { 3157 RF_ComponentLabel_t clabel; 3158 struct vnode *vp; 3159 dev_t dev; 3160 int row, column; 3161 3162 raidPtr->root_partition = new_value; 3163 for(row=0; row<raidPtr->numRow; row++) { 3164 for(column=0; column<raidPtr->numCol; column++) { 3165 if (raidPtr->Disks[row][column].status == 3166 rf_ds_optimal) { 3167 dev = raidPtr->Disks[row][column].dev; 3168 vp = raidPtr->raid_cinfo[row][column].ci_vp; 3169 raidread_component_label(dev, vp, &clabel); 3170 clabel.root_partition = new_value; 3171 raidwrite_component_label(dev, vp, &clabel); 3172 } 3173 } 3174 } 3175 return(new_value); 3176 } 3177 3178 void 3179 rf_release_all_vps(cset) 3180 RF_ConfigSet_t *cset; 3181 { 3182 RF_AutoConfig_t *ac; 3183 3184 ac = cset->ac; 3185 while(ac!=NULL) { 3186 /* Close the vp, and give it back */ 3187 if (ac->vp) { 3188 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY); 3189 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0); 3190 vput(ac->vp); 3191 ac->vp = NULL; 3192 } 3193 ac = ac->next; 3194 } 3195 } 3196 3197 3198 void 3199 rf_cleanup_config_set(cset) 3200 RF_ConfigSet_t *cset; 3201 { 3202 RF_AutoConfig_t *ac; 3203 RF_AutoConfig_t *next_ac; 3204 3205 ac = cset->ac; 3206 while(ac!=NULL) { 3207 next_ac = ac->next; 3208 /* nuke the label */ 3209 free(ac->clabel, M_RAIDFRAME); 3210 /* cleanup the config structure */ 3211 free(ac, M_RAIDFRAME); 3212 /* "next.." */ 3213 ac = next_ac; 3214 } 3215 /* and, finally, nuke the config set */ 3216 free(cset, M_RAIDFRAME); 3217 } 3218 3219 3220 void 3221 raid_init_component_label(raidPtr, clabel) 3222 RF_Raid_t *raidPtr; 3223 RF_ComponentLabel_t *clabel; 3224 { 3225 /* current version number */ 3226 clabel->version = RF_COMPONENT_LABEL_VERSION; 3227 clabel->serial_number = raidPtr->serial_number; 3228 clabel->mod_counter = raidPtr->mod_counter; 3229 clabel->num_rows = raidPtr->numRow; 3230 clabel->num_columns = raidPtr->numCol; 3231 clabel->clean = RF_RAID_DIRTY; /* not clean */ 3232 clabel->status = rf_ds_optimal; /* "It's good!" */ 3233 3234 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; 3235 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU; 3236 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU; 3237 3238 clabel->blockSize = raidPtr->bytesPerSector; 3239 clabel->numBlocks = raidPtr->sectorsPerDisk; 3240 3241 /* XXX not portable */ 3242 clabel->parityConfig = raidPtr->Layout.map->parityConfig; 3243 clabel->maxOutstanding = raidPtr->maxOutstanding; 3244 clabel->autoconfigure = raidPtr->autoconfigure; 3245 clabel->root_partition = raidPtr->root_partition; 3246 clabel->last_unit = raidPtr->raidid; 3247 clabel->config_order = raidPtr->config_order; 3248 } 3249 3250 int 3251 rf_auto_config_set(cset,unit) 3252 RF_ConfigSet_t *cset; 3253 int *unit; 3254 { 3255 RF_Raid_t *raidPtr; 3256 RF_Config_t *config; 3257 int raidID; 3258 int retcode; 3259 3260 printf("RAID autoconfigure\n"); 3261 3262 retcode = 0; 3263 *unit = -1; 3264 3265 /* 1. Create a config structure */ 3266 3267 config = (RF_Config_t *)malloc(sizeof(RF_Config_t), 3268 M_RAIDFRAME, 3269 M_NOWAIT); 3270 if (config==NULL) { 3271 printf("Out of mem!?!?\n"); 3272 /* XXX do something more intelligent here. */ 3273 return(1); 3274 } 3275 3276 memset(config, 0, sizeof(RF_Config_t)); 3277 3278 /* XXX raidID needs to be set correctly.. */ 3279 3280 /* 3281 2. Figure out what RAID ID this one is supposed to live at 3282 See if we can get the same RAID dev that it was configured 3283 on last time.. 3284 */ 3285 3286 raidID = cset->ac->clabel->last_unit; 3287 if ((raidID < 0) || (raidID >= numraid)) { 3288 /* let's not wander off into lala land. */ 3289 raidID = numraid - 1; 3290 } 3291 if (raidPtrs[raidID]->valid != 0) { 3292 3293 /* 3294 Nope... Go looking for an alternative... 3295 Start high so we don't immediately use raid0 if that's 3296 not taken. 3297 */ 3298 3299 for(raidID = numraid; raidID >= 0; raidID--) { 3300 if (raidPtrs[raidID]->valid == 0) { 3301 /* can use this one! */ 3302 break; 3303 } 3304 } 3305 } 3306 3307 if (raidID < 0) { 3308 /* punt... */ 3309 printf("Unable to auto configure this set!\n"); 3310 printf("(Out of RAID devs!)\n"); 3311 return(1); 3312 } 3313 printf("Configuring raid%d:\n",raidID); 3314 raidPtr = raidPtrs[raidID]; 3315 3316 /* XXX all this stuff should be done SOMEWHERE ELSE! */ 3317 raidPtr->raidid = raidID; 3318 raidPtr->openings = RAIDOUTSTANDING; 3319 3320 /* 3. Build the configuration structure */ 3321 rf_create_configuration(cset->ac, config, raidPtr); 3322 3323 /* 4. Do the configuration */ 3324 retcode = rf_Configure(raidPtr, config, cset->ac); 3325 3326 if (retcode == 0) { 3327 3328 raidinit(raidPtrs[raidID]); 3329 3330 rf_markalldirty(raidPtrs[raidID]); 3331 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */ 3332 if (cset->ac->clabel->root_partition==1) { 3333 /* everything configured just fine. Make a note 3334 that this set is eligible to be root. */ 3335 cset->rootable = 1; 3336 /* XXX do this here? */ 3337 raidPtrs[raidID]->root_partition = 1; 3338 } 3339 } 3340 3341 /* 5. Cleanup */ 3342 free(config, M_RAIDFRAME); 3343 3344 *unit = raidID; 3345 return(retcode); 3346 } 3347 3348 void 3349 rf_disk_unbusy(desc) 3350 RF_RaidAccessDesc_t *desc; 3351 { 3352 struct buf *bp; 3353 3354 bp = (struct buf *)desc->bp; 3355 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev, 3356 (bp->b_bcount - bp->b_resid)); 3357 } 3358