1 /* $NetBSD: rf_netbsdkintf.c,v 1.114 2001/11/28 05:39:13 lukem Exp $ */ 2 /*- 3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Greg Oster; Jason R. Thorpe. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the NetBSD 20 * Foundation, Inc. and its contributors. 21 * 4. Neither the name of The NetBSD Foundation nor the names of its 22 * contributors may be used to endorse or promote products derived 23 * from this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38 /* 39 * Copyright (c) 1988 University of Utah. 40 * Copyright (c) 1990, 1993 41 * The Regents of the University of California. All rights reserved. 42 * 43 * This code is derived from software contributed to Berkeley by 44 * the Systems Programming Group of the University of Utah Computer 45 * Science Department. 46 * 47 * Redistribution and use in source and binary forms, with or without 48 * modification, are permitted provided that the following conditions 49 * are met: 50 * 1. Redistributions of source code must retain the above copyright 51 * notice, this list of conditions and the following disclaimer. 52 * 2. Redistributions in binary form must reproduce the above copyright 53 * notice, this list of conditions and the following disclaimer in the 54 * documentation and/or other materials provided with the distribution. 55 * 3. All advertising materials mentioning features or use of this software 56 * must display the following acknowledgement: 57 * This product includes software developed by the University of 58 * California, Berkeley and its contributors. 59 * 4. Neither the name of the University nor the names of its contributors 60 * may be used to endorse or promote products derived from this software 61 * without specific prior written permission. 62 * 63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 73 * SUCH DAMAGE. 74 * 75 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 76 * 77 * @(#)cd.c 8.2 (Berkeley) 11/16/93 78 */ 79 80 81 82 83 /* 84 * Copyright (c) 1995 Carnegie-Mellon University. 85 * All rights reserved. 86 * 87 * Authors: Mark Holland, Jim Zelenka 88 * 89 * Permission to use, copy, modify and distribute this software and 90 * its documentation is hereby granted, provided that both the copyright 91 * notice and this permission notice appear in all copies of the 92 * software, derivative works or modified versions, and any portions 93 * thereof, and that both notices appear in supporting documentation. 94 * 95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 98 * 99 * Carnegie Mellon requests users of this software to return to 100 * 101 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 102 * School of Computer Science 103 * Carnegie Mellon University 104 * Pittsburgh PA 15213-3890 105 * 106 * any improvements or extensions that they make and grant Carnegie the 107 * rights to redistribute these changes. 108 */ 109 110 /*********************************************************** 111 * 112 * rf_kintf.c -- the kernel interface routines for RAIDframe 113 * 114 ***********************************************************/ 115 116 #include <sys/cdefs.h> 117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.114 2001/11/28 05:39:13 lukem Exp $"); 118 119 #include <sys/param.h> 120 #include <sys/errno.h> 121 #include <sys/pool.h> 122 #include <sys/queue.h> 123 #include <sys/disk.h> 124 #include <sys/device.h> 125 #include <sys/stat.h> 126 #include <sys/ioctl.h> 127 #include <sys/fcntl.h> 128 #include <sys/systm.h> 129 #include <sys/namei.h> 130 #include <sys/vnode.h> 131 #include <sys/disklabel.h> 132 #include <sys/conf.h> 133 #include <sys/lock.h> 134 #include <sys/buf.h> 135 #include <sys/user.h> 136 #include <sys/reboot.h> 137 138 #include <dev/raidframe/raidframevar.h> 139 #include <dev/raidframe/raidframeio.h> 140 #include "raid.h" 141 #include "opt_raid_autoconfig.h" 142 #include "rf_raid.h" 143 #include "rf_copyback.h" 144 #include "rf_dag.h" 145 #include "rf_dagflags.h" 146 #include "rf_desc.h" 147 #include "rf_diskqueue.h" 148 #include "rf_acctrace.h" 149 #include "rf_etimer.h" 150 #include "rf_general.h" 151 #include "rf_debugMem.h" 152 #include "rf_kintf.h" 153 #include "rf_options.h" 154 #include "rf_driver.h" 155 #include "rf_parityscan.h" 156 #include "rf_debugprint.h" 157 #include "rf_threadstuff.h" 158 159 int rf_kdebug_level = 0; 160 161 #ifdef DEBUG 162 #define db1_printf(a) if (rf_kdebug_level > 0) printf a 163 #else /* DEBUG */ 164 #define db1_printf(a) { } 165 #endif /* DEBUG */ 166 167 static RF_Raid_t **raidPtrs; /* global raid device descriptors */ 168 169 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex) 170 171 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a 172 * spare table */ 173 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from 174 * installation process */ 175 176 /* prototypes */ 177 static void KernelWakeupFunc(struct buf * bp); 178 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag, 179 dev_t dev, RF_SectorNum_t startSect, 180 RF_SectorCount_t numSect, caddr_t buf, 181 void (*cbFunc) (struct buf *), void *cbArg, 182 int logBytesPerSector, struct proc * b_proc); 183 static void raidinit(RF_Raid_t *); 184 185 void raidattach(int); 186 int raidsize(dev_t); 187 int raidopen(dev_t, int, int, struct proc *); 188 int raidclose(dev_t, int, int, struct proc *); 189 int raidioctl(dev_t, u_long, caddr_t, int, struct proc *); 190 int raidwrite(dev_t, struct uio *, int); 191 int raidread(dev_t, struct uio *, int); 192 void raidstrategy(struct buf *); 193 int raiddump(dev_t, daddr_t, caddr_t, size_t); 194 195 /* 196 * Pilfered from ccd.c 197 */ 198 199 struct raidbuf { 200 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */ 201 struct buf *rf_obp; /* ptr. to original I/O buf */ 202 int rf_flags; /* misc. flags */ 203 RF_DiskQueueData_t *req;/* the request that this was part of.. */ 204 }; 205 206 207 #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT) 208 #define RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp) 209 210 /* XXX Not sure if the following should be replacing the raidPtrs above, 211 or if it should be used in conjunction with that... 212 */ 213 214 struct raid_softc { 215 int sc_flags; /* flags */ 216 int sc_cflags; /* configuration flags */ 217 size_t sc_size; /* size of the raid device */ 218 char sc_xname[20]; /* XXX external name */ 219 struct disk sc_dkdev; /* generic disk device info */ 220 struct pool sc_cbufpool; /* component buffer pool */ 221 struct buf_queue buf_queue; /* used for the device queue */ 222 }; 223 /* sc_flags */ 224 #define RAIDF_INITED 0x01 /* unit has been initialized */ 225 #define RAIDF_WLABEL 0x02 /* label area is writable */ 226 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */ 227 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */ 228 #define RAIDF_LOCKED 0x80 /* unit is locked */ 229 230 #define raidunit(x) DISKUNIT(x) 231 int numraid = 0; 232 233 /* 234 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device. 235 * Be aware that large numbers can allow the driver to consume a lot of 236 * kernel memory, especially on writes, and in degraded mode reads. 237 * 238 * For example: with a stripe width of 64 blocks (32k) and 5 disks, 239 * a single 64K write will typically require 64K for the old data, 240 * 64K for the old parity, and 64K for the new parity, for a total 241 * of 192K (if the parity buffer is not re-used immediately). 242 * Even it if is used immediately, that's still 128K, which when multiplied 243 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data. 244 * 245 * Now in degraded mode, for example, a 64K read on the above setup may 246 * require data reconstruction, which will require *all* of the 4 remaining 247 * disks to participate -- 4 * 32K/disk == 128K again. 248 */ 249 250 #ifndef RAIDOUTSTANDING 251 #define RAIDOUTSTANDING 6 252 #endif 253 254 #define RAIDLABELDEV(dev) \ 255 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART)) 256 257 /* declared here, and made public, for the benefit of KVM stuff.. */ 258 struct raid_softc *raid_softc; 259 260 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *, 261 struct disklabel *); 262 static void raidgetdisklabel(dev_t); 263 static void raidmakedisklabel(struct raid_softc *); 264 265 static int raidlock(struct raid_softc *); 266 static void raidunlock(struct raid_softc *); 267 268 static void rf_markalldirty(RF_Raid_t *); 269 void rf_mountroot_hook(struct device *); 270 271 struct device *raidrootdev; 272 273 void rf_ReconThread(struct rf_recon_req *); 274 /* XXX what I want is: */ 275 /*void rf_ReconThread(RF_Raid_t *raidPtr); */ 276 void rf_RewriteParityThread(RF_Raid_t *raidPtr); 277 void rf_CopybackThread(RF_Raid_t *raidPtr); 278 void rf_ReconstructInPlaceThread(struct rf_recon_req *); 279 void rf_buildroothack(void *); 280 281 RF_AutoConfig_t *rf_find_raid_components(void); 282 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *); 283 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *); 284 static int rf_reasonable_label(RF_ComponentLabel_t *); 285 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *); 286 int rf_set_autoconfig(RF_Raid_t *, int); 287 int rf_set_rootpartition(RF_Raid_t *, int); 288 void rf_release_all_vps(RF_ConfigSet_t *); 289 void rf_cleanup_config_set(RF_ConfigSet_t *); 290 int rf_have_enough_components(RF_ConfigSet_t *); 291 int rf_auto_config_set(RF_ConfigSet_t *, int *); 292 293 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not 294 allow autoconfig to take place. 295 Note that this is overridden by having 296 RAID_AUTOCONFIG as an option in the 297 kernel config file. */ 298 299 void 300 raidattach(num) 301 int num; 302 { 303 int raidID; 304 int i, rc; 305 RF_AutoConfig_t *ac_list; /* autoconfig list */ 306 RF_ConfigSet_t *config_sets; 307 308 #ifdef DEBUG 309 printf("raidattach: Asked for %d units\n", num); 310 #endif 311 312 if (num <= 0) { 313 #ifdef DIAGNOSTIC 314 panic("raidattach: count <= 0"); 315 #endif 316 return; 317 } 318 /* This is where all the initialization stuff gets done. */ 319 320 numraid = num; 321 322 /* Make some space for requested number of units... */ 323 324 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **)); 325 if (raidPtrs == NULL) { 326 panic("raidPtrs is NULL!!\n"); 327 } 328 329 rc = rf_mutex_init(&rf_sparet_wait_mutex); 330 if (rc) { 331 RF_PANIC(); 332 } 333 334 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL; 335 336 for (i = 0; i < num; i++) 337 raidPtrs[i] = NULL; 338 rc = rf_BootRaidframe(); 339 if (rc == 0) 340 printf("Kernelized RAIDframe activated\n"); 341 else 342 panic("Serious error booting RAID!!\n"); 343 344 /* put together some datastructures like the CCD device does.. This 345 * lets us lock the device and what-not when it gets opened. */ 346 347 raid_softc = (struct raid_softc *) 348 malloc(num * sizeof(struct raid_softc), 349 M_RAIDFRAME, M_NOWAIT); 350 if (raid_softc == NULL) { 351 printf("WARNING: no memory for RAIDframe driver\n"); 352 return; 353 } 354 355 memset(raid_softc, 0, num * sizeof(struct raid_softc)); 356 357 raidrootdev = (struct device *)malloc(num * sizeof(struct device), 358 M_RAIDFRAME, M_NOWAIT); 359 if (raidrootdev == NULL) { 360 panic("No memory for RAIDframe driver!!?!?!\n"); 361 } 362 363 for (raidID = 0; raidID < num; raidID++) { 364 BUFQ_INIT(&raid_softc[raidID].buf_queue); 365 366 raidrootdev[raidID].dv_class = DV_DISK; 367 raidrootdev[raidID].dv_cfdata = NULL; 368 raidrootdev[raidID].dv_unit = raidID; 369 raidrootdev[raidID].dv_parent = NULL; 370 raidrootdev[raidID].dv_flags = 0; 371 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID); 372 373 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t), 374 (RF_Raid_t *)); 375 if (raidPtrs[raidID] == NULL) { 376 printf("WARNING: raidPtrs[%d] is NULL\n", raidID); 377 numraid = raidID; 378 return; 379 } 380 } 381 382 #ifdef RAID_AUTOCONFIG 383 raidautoconfig = 1; 384 #endif 385 386 if (raidautoconfig) { 387 /* 1. locate all RAID components on the system */ 388 389 #if DEBUG 390 printf("Searching for raid components...\n"); 391 #endif 392 ac_list = rf_find_raid_components(); 393 394 /* 2. sort them into their respective sets */ 395 396 config_sets = rf_create_auto_sets(ac_list); 397 398 /* 3. evaluate each set and configure the valid ones 399 This gets done in rf_buildroothack() */ 400 401 /* schedule the creation of the thread to do the 402 "/ on RAID" stuff */ 403 404 kthread_create(rf_buildroothack,config_sets); 405 406 #if 0 407 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]); 408 #endif 409 } 410 411 } 412 413 void 414 rf_buildroothack(arg) 415 void *arg; 416 { 417 RF_ConfigSet_t *config_sets = arg; 418 RF_ConfigSet_t *cset; 419 RF_ConfigSet_t *next_cset; 420 int retcode; 421 int raidID; 422 int rootID; 423 int num_root; 424 425 rootID = 0; 426 num_root = 0; 427 cset = config_sets; 428 while(cset != NULL ) { 429 next_cset = cset->next; 430 if (rf_have_enough_components(cset) && 431 cset->ac->clabel->autoconfigure==1) { 432 retcode = rf_auto_config_set(cset,&raidID); 433 if (!retcode) { 434 if (cset->rootable) { 435 rootID = raidID; 436 num_root++; 437 } 438 } else { 439 /* The autoconfig didn't work :( */ 440 #if DEBUG 441 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID); 442 #endif 443 rf_release_all_vps(cset); 444 } 445 } else { 446 /* we're not autoconfiguring this set... 447 release the associated resources */ 448 rf_release_all_vps(cset); 449 } 450 /* cleanup */ 451 rf_cleanup_config_set(cset); 452 cset = next_cset; 453 } 454 if (boothowto & RB_ASKNAME) { 455 /* We don't auto-config... */ 456 } else { 457 /* They didn't ask, and we found something bootable... */ 458 459 if (num_root == 1) { 460 booted_device = &raidrootdev[rootID]; 461 } else if (num_root > 1) { 462 /* we can't guess.. require the user to answer... */ 463 boothowto |= RB_ASKNAME; 464 } 465 } 466 } 467 468 469 int 470 raidsize(dev) 471 dev_t dev; 472 { 473 struct raid_softc *rs; 474 struct disklabel *lp; 475 int part, unit, omask, size; 476 477 unit = raidunit(dev); 478 if (unit >= numraid) 479 return (-1); 480 rs = &raid_softc[unit]; 481 482 if ((rs->sc_flags & RAIDF_INITED) == 0) 483 return (-1); 484 485 part = DISKPART(dev); 486 omask = rs->sc_dkdev.dk_openmask & (1 << part); 487 lp = rs->sc_dkdev.dk_label; 488 489 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc)) 490 return (-1); 491 492 if (lp->d_partitions[part].p_fstype != FS_SWAP) 493 size = -1; 494 else 495 size = lp->d_partitions[part].p_size * 496 (lp->d_secsize / DEV_BSIZE); 497 498 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc)) 499 return (-1); 500 501 return (size); 502 503 } 504 505 int 506 raiddump(dev, blkno, va, size) 507 dev_t dev; 508 daddr_t blkno; 509 caddr_t va; 510 size_t size; 511 { 512 /* Not implemented. */ 513 return ENXIO; 514 } 515 /* ARGSUSED */ 516 int 517 raidopen(dev, flags, fmt, p) 518 dev_t dev; 519 int flags, fmt; 520 struct proc *p; 521 { 522 int unit = raidunit(dev); 523 struct raid_softc *rs; 524 struct disklabel *lp; 525 int part, pmask; 526 int error = 0; 527 528 if (unit >= numraid) 529 return (ENXIO); 530 rs = &raid_softc[unit]; 531 532 if ((error = raidlock(rs)) != 0) 533 return (error); 534 lp = rs->sc_dkdev.dk_label; 535 536 part = DISKPART(dev); 537 pmask = (1 << part); 538 539 db1_printf(("Opening raid device number: %d partition: %d\n", 540 unit, part)); 541 542 543 if ((rs->sc_flags & RAIDF_INITED) && 544 (rs->sc_dkdev.dk_openmask == 0)) 545 raidgetdisklabel(dev); 546 547 /* make sure that this partition exists */ 548 549 if (part != RAW_PART) { 550 db1_printf(("Not a raw partition..\n")); 551 if (((rs->sc_flags & RAIDF_INITED) == 0) || 552 ((part >= lp->d_npartitions) || 553 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 554 error = ENXIO; 555 raidunlock(rs); 556 db1_printf(("Bailing out...\n")); 557 return (error); 558 } 559 } 560 /* Prevent this unit from being unconfigured while open. */ 561 switch (fmt) { 562 case S_IFCHR: 563 rs->sc_dkdev.dk_copenmask |= pmask; 564 break; 565 566 case S_IFBLK: 567 rs->sc_dkdev.dk_bopenmask |= pmask; 568 break; 569 } 570 571 if ((rs->sc_dkdev.dk_openmask == 0) && 572 ((rs->sc_flags & RAIDF_INITED) != 0)) { 573 /* First one... mark things as dirty... Note that we *MUST* 574 have done a configure before this. I DO NOT WANT TO BE 575 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED 576 THAT THEY BELONG TOGETHER!!!!! */ 577 /* XXX should check to see if we're only open for reading 578 here... If so, we needn't do this, but then need some 579 other way of keeping track of what's happened.. */ 580 581 rf_markalldirty( raidPtrs[unit] ); 582 } 583 584 585 rs->sc_dkdev.dk_openmask = 586 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 587 588 raidunlock(rs); 589 590 return (error); 591 592 593 } 594 /* ARGSUSED */ 595 int 596 raidclose(dev, flags, fmt, p) 597 dev_t dev; 598 int flags, fmt; 599 struct proc *p; 600 { 601 int unit = raidunit(dev); 602 struct raid_softc *rs; 603 int error = 0; 604 int part; 605 606 if (unit >= numraid) 607 return (ENXIO); 608 rs = &raid_softc[unit]; 609 610 if ((error = raidlock(rs)) != 0) 611 return (error); 612 613 part = DISKPART(dev); 614 615 /* ...that much closer to allowing unconfiguration... */ 616 switch (fmt) { 617 case S_IFCHR: 618 rs->sc_dkdev.dk_copenmask &= ~(1 << part); 619 break; 620 621 case S_IFBLK: 622 rs->sc_dkdev.dk_bopenmask &= ~(1 << part); 623 break; 624 } 625 rs->sc_dkdev.dk_openmask = 626 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 627 628 if ((rs->sc_dkdev.dk_openmask == 0) && 629 ((rs->sc_flags & RAIDF_INITED) != 0)) { 630 /* Last one... device is not unconfigured yet. 631 Device shutdown has taken care of setting the 632 clean bits if RAIDF_INITED is not set 633 mark things as clean... */ 634 #if 0 635 printf("Last one on raid%d. Updating status.\n",unit); 636 #endif 637 rf_update_component_labels(raidPtrs[unit], 638 RF_FINAL_COMPONENT_UPDATE); 639 if (doing_shutdown) { 640 /* last one, and we're going down, so 641 lights out for this RAID set too. */ 642 error = rf_Shutdown(raidPtrs[unit]); 643 pool_destroy(&rs->sc_cbufpool); 644 645 /* It's no longer initialized... */ 646 rs->sc_flags &= ~RAIDF_INITED; 647 648 /* Detach the disk. */ 649 disk_detach(&rs->sc_dkdev); 650 } 651 } 652 653 raidunlock(rs); 654 return (0); 655 656 } 657 658 void 659 raidstrategy(bp) 660 struct buf *bp; 661 { 662 int s; 663 664 unsigned int raidID = raidunit(bp->b_dev); 665 RF_Raid_t *raidPtr; 666 struct raid_softc *rs = &raid_softc[raidID]; 667 struct disklabel *lp; 668 int wlabel; 669 670 if ((rs->sc_flags & RAIDF_INITED) ==0) { 671 bp->b_error = ENXIO; 672 bp->b_flags |= B_ERROR; 673 bp->b_resid = bp->b_bcount; 674 biodone(bp); 675 return; 676 } 677 if (raidID >= numraid || !raidPtrs[raidID]) { 678 bp->b_error = ENODEV; 679 bp->b_flags |= B_ERROR; 680 bp->b_resid = bp->b_bcount; 681 biodone(bp); 682 return; 683 } 684 raidPtr = raidPtrs[raidID]; 685 if (!raidPtr->valid) { 686 bp->b_error = ENODEV; 687 bp->b_flags |= B_ERROR; 688 bp->b_resid = bp->b_bcount; 689 biodone(bp); 690 return; 691 } 692 if (bp->b_bcount == 0) { 693 db1_printf(("b_bcount is zero..\n")); 694 biodone(bp); 695 return; 696 } 697 lp = rs->sc_dkdev.dk_label; 698 699 /* 700 * Do bounds checking and adjust transfer. If there's an 701 * error, the bounds check will flag that for us. 702 */ 703 704 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING); 705 if (DISKPART(bp->b_dev) != RAW_PART) 706 if (bounds_check_with_label(bp, lp, wlabel) <= 0) { 707 db1_printf(("Bounds check failed!!:%d %d\n", 708 (int) bp->b_blkno, (int) wlabel)); 709 biodone(bp); 710 return; 711 } 712 s = splbio(); 713 714 bp->b_resid = 0; 715 716 /* stuff it onto our queue */ 717 BUFQ_INSERT_TAIL(&rs->buf_queue, bp); 718 719 raidstart(raidPtrs[raidID]); 720 721 splx(s); 722 } 723 /* ARGSUSED */ 724 int 725 raidread(dev, uio, flags) 726 dev_t dev; 727 struct uio *uio; 728 int flags; 729 { 730 int unit = raidunit(dev); 731 struct raid_softc *rs; 732 int part; 733 734 if (unit >= numraid) 735 return (ENXIO); 736 rs = &raid_softc[unit]; 737 738 if ((rs->sc_flags & RAIDF_INITED) == 0) 739 return (ENXIO); 740 part = DISKPART(dev); 741 742 db1_printf(("raidread: unit: %d partition: %d\n", unit, part)); 743 744 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio)); 745 746 } 747 /* ARGSUSED */ 748 int 749 raidwrite(dev, uio, flags) 750 dev_t dev; 751 struct uio *uio; 752 int flags; 753 { 754 int unit = raidunit(dev); 755 struct raid_softc *rs; 756 757 if (unit >= numraid) 758 return (ENXIO); 759 rs = &raid_softc[unit]; 760 761 if ((rs->sc_flags & RAIDF_INITED) == 0) 762 return (ENXIO); 763 db1_printf(("raidwrite\n")); 764 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio)); 765 766 } 767 768 int 769 raidioctl(dev, cmd, data, flag, p) 770 dev_t dev; 771 u_long cmd; 772 caddr_t data; 773 int flag; 774 struct proc *p; 775 { 776 int unit = raidunit(dev); 777 int error = 0; 778 int part, pmask; 779 struct raid_softc *rs; 780 RF_Config_t *k_cfg, *u_cfg; 781 RF_Raid_t *raidPtr; 782 RF_RaidDisk_t *diskPtr; 783 RF_AccTotals_t *totals; 784 RF_DeviceConfig_t *d_cfg, **ucfgp; 785 u_char *specific_buf; 786 int retcode = 0; 787 int row; 788 int column; 789 struct rf_recon_req *rrcopy, *rr; 790 RF_ComponentLabel_t *clabel; 791 RF_ComponentLabel_t ci_label; 792 RF_ComponentLabel_t **clabel_ptr; 793 RF_SingleComponent_t *sparePtr,*componentPtr; 794 RF_SingleComponent_t hot_spare; 795 RF_SingleComponent_t component; 796 RF_ProgressInfo_t progressInfo, **progressInfoPtr; 797 int i, j, d; 798 #ifdef __HAVE_OLD_DISKLABEL 799 struct disklabel newlabel; 800 #endif 801 802 if (unit >= numraid) 803 return (ENXIO); 804 rs = &raid_softc[unit]; 805 raidPtr = raidPtrs[unit]; 806 807 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev, 808 (int) DISKPART(dev), (int) unit, (int) cmd)); 809 810 /* Must be open for writes for these commands... */ 811 switch (cmd) { 812 case DIOCSDINFO: 813 case DIOCWDINFO: 814 #ifdef __HAVE_OLD_DISKLABEL 815 case ODIOCWDINFO: 816 case ODIOCSDINFO: 817 #endif 818 case DIOCWLABEL: 819 if ((flag & FWRITE) == 0) 820 return (EBADF); 821 } 822 823 /* Must be initialized for these... */ 824 switch (cmd) { 825 case DIOCGDINFO: 826 case DIOCSDINFO: 827 case DIOCWDINFO: 828 #ifdef __HAVE_OLD_DISKLABEL 829 case ODIOCGDINFO: 830 case ODIOCWDINFO: 831 case ODIOCSDINFO: 832 case ODIOCGDEFLABEL: 833 #endif 834 case DIOCGPART: 835 case DIOCWLABEL: 836 case DIOCGDEFLABEL: 837 case RAIDFRAME_SHUTDOWN: 838 case RAIDFRAME_REWRITEPARITY: 839 case RAIDFRAME_GET_INFO: 840 case RAIDFRAME_RESET_ACCTOTALS: 841 case RAIDFRAME_GET_ACCTOTALS: 842 case RAIDFRAME_KEEP_ACCTOTALS: 843 case RAIDFRAME_GET_SIZE: 844 case RAIDFRAME_FAIL_DISK: 845 case RAIDFRAME_COPYBACK: 846 case RAIDFRAME_CHECK_RECON_STATUS: 847 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 848 case RAIDFRAME_GET_COMPONENT_LABEL: 849 case RAIDFRAME_SET_COMPONENT_LABEL: 850 case RAIDFRAME_ADD_HOT_SPARE: 851 case RAIDFRAME_REMOVE_HOT_SPARE: 852 case RAIDFRAME_INIT_LABELS: 853 case RAIDFRAME_REBUILD_IN_PLACE: 854 case RAIDFRAME_CHECK_PARITY: 855 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 856 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 857 case RAIDFRAME_CHECK_COPYBACK_STATUS: 858 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 859 case RAIDFRAME_SET_AUTOCONFIG: 860 case RAIDFRAME_SET_ROOT: 861 case RAIDFRAME_DELETE_COMPONENT: 862 case RAIDFRAME_INCORPORATE_HOT_SPARE: 863 if ((rs->sc_flags & RAIDF_INITED) == 0) 864 return (ENXIO); 865 } 866 867 switch (cmd) { 868 869 /* configure the system */ 870 case RAIDFRAME_CONFIGURE: 871 872 if (raidPtr->valid) { 873 /* There is a valid RAID set running on this unit! */ 874 printf("raid%d: Device already configured!\n",unit); 875 return(EINVAL); 876 } 877 878 /* copy-in the configuration information */ 879 /* data points to a pointer to the configuration structure */ 880 881 u_cfg = *((RF_Config_t **) data); 882 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *)); 883 if (k_cfg == NULL) { 884 return (ENOMEM); 885 } 886 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg, 887 sizeof(RF_Config_t)); 888 if (retcode) { 889 RF_Free(k_cfg, sizeof(RF_Config_t)); 890 db1_printf(("rf_ioctl: retcode=%d copyin.1\n", 891 retcode)); 892 return (retcode); 893 } 894 /* allocate a buffer for the layout-specific data, and copy it 895 * in */ 896 if (k_cfg->layoutSpecificSize) { 897 if (k_cfg->layoutSpecificSize > 10000) { 898 /* sanity check */ 899 RF_Free(k_cfg, sizeof(RF_Config_t)); 900 return (EINVAL); 901 } 902 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize, 903 (u_char *)); 904 if (specific_buf == NULL) { 905 RF_Free(k_cfg, sizeof(RF_Config_t)); 906 return (ENOMEM); 907 } 908 retcode = copyin(k_cfg->layoutSpecific, 909 (caddr_t) specific_buf, 910 k_cfg->layoutSpecificSize); 911 if (retcode) { 912 RF_Free(k_cfg, sizeof(RF_Config_t)); 913 RF_Free(specific_buf, 914 k_cfg->layoutSpecificSize); 915 db1_printf(("rf_ioctl: retcode=%d copyin.2\n", 916 retcode)); 917 return (retcode); 918 } 919 } else 920 specific_buf = NULL; 921 k_cfg->layoutSpecific = specific_buf; 922 923 /* should do some kind of sanity check on the configuration. 924 * Store the sum of all the bytes in the last byte? */ 925 926 /* configure the system */ 927 928 /* 929 * Clear the entire RAID descriptor, just to make sure 930 * there is no stale data left in the case of a 931 * reconfiguration 932 */ 933 memset((char *) raidPtr, 0, sizeof(RF_Raid_t)); 934 raidPtr->raidid = unit; 935 936 retcode = rf_Configure(raidPtr, k_cfg, NULL); 937 938 if (retcode == 0) { 939 940 /* allow this many simultaneous IO's to 941 this RAID device */ 942 raidPtr->openings = RAIDOUTSTANDING; 943 944 raidinit(raidPtr); 945 rf_markalldirty(raidPtr); 946 } 947 /* free the buffers. No return code here. */ 948 if (k_cfg->layoutSpecificSize) { 949 RF_Free(specific_buf, k_cfg->layoutSpecificSize); 950 } 951 RF_Free(k_cfg, sizeof(RF_Config_t)); 952 953 return (retcode); 954 955 /* shutdown the system */ 956 case RAIDFRAME_SHUTDOWN: 957 958 if ((error = raidlock(rs)) != 0) 959 return (error); 960 961 /* 962 * If somebody has a partition mounted, we shouldn't 963 * shutdown. 964 */ 965 966 part = DISKPART(dev); 967 pmask = (1 << part); 968 if ((rs->sc_dkdev.dk_openmask & ~pmask) || 969 ((rs->sc_dkdev.dk_bopenmask & pmask) && 970 (rs->sc_dkdev.dk_copenmask & pmask))) { 971 raidunlock(rs); 972 return (EBUSY); 973 } 974 975 retcode = rf_Shutdown(raidPtr); 976 977 pool_destroy(&rs->sc_cbufpool); 978 979 /* It's no longer initialized... */ 980 rs->sc_flags &= ~RAIDF_INITED; 981 982 /* Detach the disk. */ 983 disk_detach(&rs->sc_dkdev); 984 985 raidunlock(rs); 986 987 return (retcode); 988 case RAIDFRAME_GET_COMPONENT_LABEL: 989 clabel_ptr = (RF_ComponentLabel_t **) data; 990 /* need to read the component label for the disk indicated 991 by row,column in clabel */ 992 993 /* For practice, let's get it directly fromdisk, rather 994 than from the in-core copy */ 995 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ), 996 (RF_ComponentLabel_t *)); 997 if (clabel == NULL) 998 return (ENOMEM); 999 1000 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t)); 1001 1002 retcode = copyin( *clabel_ptr, clabel, 1003 sizeof(RF_ComponentLabel_t)); 1004 1005 if (retcode) { 1006 RF_Free( clabel, sizeof(RF_ComponentLabel_t)); 1007 return(retcode); 1008 } 1009 1010 row = clabel->row; 1011 column = clabel->column; 1012 1013 if ((row < 0) || (row >= raidPtr->numRow) || 1014 (column < 0) || (column >= raidPtr->numCol + 1015 raidPtr->numSpare)) { 1016 RF_Free( clabel, sizeof(RF_ComponentLabel_t)); 1017 return(EINVAL); 1018 } 1019 1020 raidread_component_label(raidPtr->Disks[row][column].dev, 1021 raidPtr->raid_cinfo[row][column].ci_vp, 1022 clabel ); 1023 1024 retcode = copyout((caddr_t) clabel, 1025 (caddr_t) *clabel_ptr, 1026 sizeof(RF_ComponentLabel_t)); 1027 RF_Free( clabel, sizeof(RF_ComponentLabel_t)); 1028 return (retcode); 1029 1030 case RAIDFRAME_SET_COMPONENT_LABEL: 1031 clabel = (RF_ComponentLabel_t *) data; 1032 1033 /* XXX check the label for valid stuff... */ 1034 /* Note that some things *should not* get modified -- 1035 the user should be re-initing the labels instead of 1036 trying to patch things. 1037 */ 1038 1039 printf("Got component label:\n"); 1040 printf("Version: %d\n",clabel->version); 1041 printf("Serial Number: %d\n",clabel->serial_number); 1042 printf("Mod counter: %d\n",clabel->mod_counter); 1043 printf("Row: %d\n", clabel->row); 1044 printf("Column: %d\n", clabel->column); 1045 printf("Num Rows: %d\n", clabel->num_rows); 1046 printf("Num Columns: %d\n", clabel->num_columns); 1047 printf("Clean: %d\n", clabel->clean); 1048 printf("Status: %d\n", clabel->status); 1049 1050 row = clabel->row; 1051 column = clabel->column; 1052 1053 if ((row < 0) || (row >= raidPtr->numRow) || 1054 (column < 0) || (column >= raidPtr->numCol)) { 1055 return(EINVAL); 1056 } 1057 1058 /* XXX this isn't allowed to do anything for now :-) */ 1059 1060 /* XXX and before it is, we need to fill in the rest 1061 of the fields!?!?!?! */ 1062 #if 0 1063 raidwrite_component_label( 1064 raidPtr->Disks[row][column].dev, 1065 raidPtr->raid_cinfo[row][column].ci_vp, 1066 clabel ); 1067 #endif 1068 return (0); 1069 1070 case RAIDFRAME_INIT_LABELS: 1071 clabel = (RF_ComponentLabel_t *) data; 1072 /* 1073 we only want the serial number from 1074 the above. We get all the rest of the information 1075 from the config that was used to create this RAID 1076 set. 1077 */ 1078 1079 raidPtr->serial_number = clabel->serial_number; 1080 1081 raid_init_component_label(raidPtr, &ci_label); 1082 ci_label.serial_number = clabel->serial_number; 1083 1084 for(row=0;row<raidPtr->numRow;row++) { 1085 ci_label.row = row; 1086 for(column=0;column<raidPtr->numCol;column++) { 1087 diskPtr = &raidPtr->Disks[row][column]; 1088 if (!RF_DEAD_DISK(diskPtr->status)) { 1089 ci_label.partitionSize = diskPtr->partitionSize; 1090 ci_label.column = column; 1091 raidwrite_component_label( 1092 raidPtr->Disks[row][column].dev, 1093 raidPtr->raid_cinfo[row][column].ci_vp, 1094 &ci_label ); 1095 } 1096 } 1097 } 1098 1099 return (retcode); 1100 case RAIDFRAME_SET_AUTOCONFIG: 1101 d = rf_set_autoconfig(raidPtr, *(int *) data); 1102 printf("New autoconfig value is: %d\n", d); 1103 *(int *) data = d; 1104 return (retcode); 1105 1106 case RAIDFRAME_SET_ROOT: 1107 d = rf_set_rootpartition(raidPtr, *(int *) data); 1108 printf("New rootpartition value is: %d\n", d); 1109 *(int *) data = d; 1110 return (retcode); 1111 1112 /* initialize all parity */ 1113 case RAIDFRAME_REWRITEPARITY: 1114 1115 if (raidPtr->Layout.map->faultsTolerated == 0) { 1116 /* Parity for RAID 0 is trivially correct */ 1117 raidPtr->parity_good = RF_RAID_CLEAN; 1118 return(0); 1119 } 1120 1121 if (raidPtr->parity_rewrite_in_progress == 1) { 1122 /* Re-write is already in progress! */ 1123 return(EINVAL); 1124 } 1125 1126 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread, 1127 rf_RewriteParityThread, 1128 raidPtr,"raid_parity"); 1129 return (retcode); 1130 1131 1132 case RAIDFRAME_ADD_HOT_SPARE: 1133 sparePtr = (RF_SingleComponent_t *) data; 1134 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t)); 1135 retcode = rf_add_hot_spare(raidPtr, &hot_spare); 1136 return(retcode); 1137 1138 case RAIDFRAME_REMOVE_HOT_SPARE: 1139 return(retcode); 1140 1141 case RAIDFRAME_DELETE_COMPONENT: 1142 componentPtr = (RF_SingleComponent_t *)data; 1143 memcpy( &component, componentPtr, 1144 sizeof(RF_SingleComponent_t)); 1145 retcode = rf_delete_component(raidPtr, &component); 1146 return(retcode); 1147 1148 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1149 componentPtr = (RF_SingleComponent_t *)data; 1150 memcpy( &component, componentPtr, 1151 sizeof(RF_SingleComponent_t)); 1152 retcode = rf_incorporate_hot_spare(raidPtr, &component); 1153 return(retcode); 1154 1155 case RAIDFRAME_REBUILD_IN_PLACE: 1156 1157 if (raidPtr->Layout.map->faultsTolerated == 0) { 1158 /* Can't do this on a RAID 0!! */ 1159 return(EINVAL); 1160 } 1161 1162 if (raidPtr->recon_in_progress == 1) { 1163 /* a reconstruct is already in progress! */ 1164 return(EINVAL); 1165 } 1166 1167 componentPtr = (RF_SingleComponent_t *) data; 1168 memcpy( &component, componentPtr, 1169 sizeof(RF_SingleComponent_t)); 1170 row = component.row; 1171 column = component.column; 1172 printf("Rebuild: %d %d\n",row, column); 1173 if ((row < 0) || (row >= raidPtr->numRow) || 1174 (column < 0) || (column >= raidPtr->numCol)) { 1175 return(EINVAL); 1176 } 1177 1178 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1179 if (rrcopy == NULL) 1180 return(ENOMEM); 1181 1182 rrcopy->raidPtr = (void *) raidPtr; 1183 rrcopy->row = row; 1184 rrcopy->col = column; 1185 1186 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1187 rf_ReconstructInPlaceThread, 1188 rrcopy,"raid_reconip"); 1189 return(retcode); 1190 1191 case RAIDFRAME_GET_INFO: 1192 if (!raidPtr->valid) 1193 return (ENODEV); 1194 ucfgp = (RF_DeviceConfig_t **) data; 1195 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t), 1196 (RF_DeviceConfig_t *)); 1197 if (d_cfg == NULL) 1198 return (ENOMEM); 1199 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t)); 1200 d_cfg->rows = raidPtr->numRow; 1201 d_cfg->cols = raidPtr->numCol; 1202 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol; 1203 if (d_cfg->ndevs >= RF_MAX_DISKS) { 1204 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1205 return (ENOMEM); 1206 } 1207 d_cfg->nspares = raidPtr->numSpare; 1208 if (d_cfg->nspares >= RF_MAX_DISKS) { 1209 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1210 return (ENOMEM); 1211 } 1212 d_cfg->maxqdepth = raidPtr->maxQueueDepth; 1213 d = 0; 1214 for (i = 0; i < d_cfg->rows; i++) { 1215 for (j = 0; j < d_cfg->cols; j++) { 1216 d_cfg->devs[d] = raidPtr->Disks[i][j]; 1217 d++; 1218 } 1219 } 1220 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) { 1221 d_cfg->spares[i] = raidPtr->Disks[0][j]; 1222 } 1223 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp, 1224 sizeof(RF_DeviceConfig_t)); 1225 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1226 1227 return (retcode); 1228 1229 case RAIDFRAME_CHECK_PARITY: 1230 *(int *) data = raidPtr->parity_good; 1231 return (0); 1232 1233 case RAIDFRAME_RESET_ACCTOTALS: 1234 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals)); 1235 return (0); 1236 1237 case RAIDFRAME_GET_ACCTOTALS: 1238 totals = (RF_AccTotals_t *) data; 1239 *totals = raidPtr->acc_totals; 1240 return (0); 1241 1242 case RAIDFRAME_KEEP_ACCTOTALS: 1243 raidPtr->keep_acc_totals = *(int *)data; 1244 return (0); 1245 1246 case RAIDFRAME_GET_SIZE: 1247 *(int *) data = raidPtr->totalSectors; 1248 return (0); 1249 1250 /* fail a disk & optionally start reconstruction */ 1251 case RAIDFRAME_FAIL_DISK: 1252 1253 if (raidPtr->Layout.map->faultsTolerated == 0) { 1254 /* Can't do this on a RAID 0!! */ 1255 return(EINVAL); 1256 } 1257 1258 rr = (struct rf_recon_req *) data; 1259 1260 if (rr->row < 0 || rr->row >= raidPtr->numRow 1261 || rr->col < 0 || rr->col >= raidPtr->numCol) 1262 return (EINVAL); 1263 1264 printf("raid%d: Failing the disk: row: %d col: %d\n", 1265 unit, rr->row, rr->col); 1266 1267 /* make a copy of the recon request so that we don't rely on 1268 * the user's buffer */ 1269 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1270 if (rrcopy == NULL) 1271 return(ENOMEM); 1272 bcopy(rr, rrcopy, sizeof(*rr)); 1273 rrcopy->raidPtr = (void *) raidPtr; 1274 1275 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1276 rf_ReconThread, 1277 rrcopy,"raid_recon"); 1278 return (0); 1279 1280 /* invoke a copyback operation after recon on whatever disk 1281 * needs it, if any */ 1282 case RAIDFRAME_COPYBACK: 1283 1284 if (raidPtr->Layout.map->faultsTolerated == 0) { 1285 /* This makes no sense on a RAID 0!! */ 1286 return(EINVAL); 1287 } 1288 1289 if (raidPtr->copyback_in_progress == 1) { 1290 /* Copyback is already in progress! */ 1291 return(EINVAL); 1292 } 1293 1294 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread, 1295 rf_CopybackThread, 1296 raidPtr,"raid_copyback"); 1297 return (retcode); 1298 1299 /* return the percentage completion of reconstruction */ 1300 case RAIDFRAME_CHECK_RECON_STATUS: 1301 if (raidPtr->Layout.map->faultsTolerated == 0) { 1302 /* This makes no sense on a RAID 0, so tell the 1303 user it's done. */ 1304 *(int *) data = 100; 1305 return(0); 1306 } 1307 row = 0; /* XXX we only consider a single row... */ 1308 if (raidPtr->status[row] != rf_rs_reconstructing) 1309 *(int *) data = 100; 1310 else 1311 *(int *) data = raidPtr->reconControl[row]->percentComplete; 1312 return (0); 1313 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1314 progressInfoPtr = (RF_ProgressInfo_t **) data; 1315 row = 0; /* XXX we only consider a single row... */ 1316 if (raidPtr->status[row] != rf_rs_reconstructing) { 1317 progressInfo.remaining = 0; 1318 progressInfo.completed = 100; 1319 progressInfo.total = 100; 1320 } else { 1321 progressInfo.total = 1322 raidPtr->reconControl[row]->numRUsTotal; 1323 progressInfo.completed = 1324 raidPtr->reconControl[row]->numRUsComplete; 1325 progressInfo.remaining = progressInfo.total - 1326 progressInfo.completed; 1327 } 1328 retcode = copyout((caddr_t) &progressInfo, 1329 (caddr_t) *progressInfoPtr, 1330 sizeof(RF_ProgressInfo_t)); 1331 return (retcode); 1332 1333 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1334 if (raidPtr->Layout.map->faultsTolerated == 0) { 1335 /* This makes no sense on a RAID 0, so tell the 1336 user it's done. */ 1337 *(int *) data = 100; 1338 return(0); 1339 } 1340 if (raidPtr->parity_rewrite_in_progress == 1) { 1341 *(int *) data = 100 * 1342 raidPtr->parity_rewrite_stripes_done / 1343 raidPtr->Layout.numStripe; 1344 } else { 1345 *(int *) data = 100; 1346 } 1347 return (0); 1348 1349 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1350 progressInfoPtr = (RF_ProgressInfo_t **) data; 1351 if (raidPtr->parity_rewrite_in_progress == 1) { 1352 progressInfo.total = raidPtr->Layout.numStripe; 1353 progressInfo.completed = 1354 raidPtr->parity_rewrite_stripes_done; 1355 progressInfo.remaining = progressInfo.total - 1356 progressInfo.completed; 1357 } else { 1358 progressInfo.remaining = 0; 1359 progressInfo.completed = 100; 1360 progressInfo.total = 100; 1361 } 1362 retcode = copyout((caddr_t) &progressInfo, 1363 (caddr_t) *progressInfoPtr, 1364 sizeof(RF_ProgressInfo_t)); 1365 return (retcode); 1366 1367 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1368 if (raidPtr->Layout.map->faultsTolerated == 0) { 1369 /* This makes no sense on a RAID 0 */ 1370 *(int *) data = 100; 1371 return(0); 1372 } 1373 if (raidPtr->copyback_in_progress == 1) { 1374 *(int *) data = 100 * raidPtr->copyback_stripes_done / 1375 raidPtr->Layout.numStripe; 1376 } else { 1377 *(int *) data = 100; 1378 } 1379 return (0); 1380 1381 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1382 progressInfoPtr = (RF_ProgressInfo_t **) data; 1383 if (raidPtr->copyback_in_progress == 1) { 1384 progressInfo.total = raidPtr->Layout.numStripe; 1385 progressInfo.completed = 1386 raidPtr->copyback_stripes_done; 1387 progressInfo.remaining = progressInfo.total - 1388 progressInfo.completed; 1389 } else { 1390 progressInfo.remaining = 0; 1391 progressInfo.completed = 100; 1392 progressInfo.total = 100; 1393 } 1394 retcode = copyout((caddr_t) &progressInfo, 1395 (caddr_t) *progressInfoPtr, 1396 sizeof(RF_ProgressInfo_t)); 1397 return (retcode); 1398 1399 /* the sparetable daemon calls this to wait for the kernel to 1400 * need a spare table. this ioctl does not return until a 1401 * spare table is needed. XXX -- calling mpsleep here in the 1402 * ioctl code is almost certainly wrong and evil. -- XXX XXX 1403 * -- I should either compute the spare table in the kernel, 1404 * or have a different -- XXX XXX -- interface (a different 1405 * character device) for delivering the table -- XXX */ 1406 #if 0 1407 case RAIDFRAME_SPARET_WAIT: 1408 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1409 while (!rf_sparet_wait_queue) 1410 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE); 1411 waitreq = rf_sparet_wait_queue; 1412 rf_sparet_wait_queue = rf_sparet_wait_queue->next; 1413 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1414 1415 /* structure assignment */ 1416 *((RF_SparetWait_t *) data) = *waitreq; 1417 1418 RF_Free(waitreq, sizeof(*waitreq)); 1419 return (0); 1420 1421 /* wakes up a process waiting on SPARET_WAIT and puts an error 1422 * code in it that will cause the dameon to exit */ 1423 case RAIDFRAME_ABORT_SPARET_WAIT: 1424 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1425 waitreq->fcol = -1; 1426 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1427 waitreq->next = rf_sparet_wait_queue; 1428 rf_sparet_wait_queue = waitreq; 1429 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1430 wakeup(&rf_sparet_wait_queue); 1431 return (0); 1432 1433 /* used by the spare table daemon to deliver a spare table 1434 * into the kernel */ 1435 case RAIDFRAME_SEND_SPARET: 1436 1437 /* install the spare table */ 1438 retcode = rf_SetSpareTable(raidPtr, *(void **) data); 1439 1440 /* respond to the requestor. the return status of the spare 1441 * table installation is passed in the "fcol" field */ 1442 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1443 waitreq->fcol = retcode; 1444 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1445 waitreq->next = rf_sparet_resp_queue; 1446 rf_sparet_resp_queue = waitreq; 1447 wakeup(&rf_sparet_resp_queue); 1448 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1449 1450 return (retcode); 1451 #endif 1452 1453 default: 1454 break; /* fall through to the os-specific code below */ 1455 1456 } 1457 1458 if (!raidPtr->valid) 1459 return (EINVAL); 1460 1461 /* 1462 * Add support for "regular" device ioctls here. 1463 */ 1464 1465 switch (cmd) { 1466 case DIOCGDINFO: 1467 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label); 1468 break; 1469 #ifdef __HAVE_OLD_DISKLABEL 1470 case ODIOCGDINFO: 1471 newlabel = *(rs->sc_dkdev.dk_label); 1472 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1473 return ENOTTY; 1474 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1475 break; 1476 #endif 1477 1478 case DIOCGPART: 1479 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label; 1480 ((struct partinfo *) data)->part = 1481 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1482 break; 1483 1484 case DIOCWDINFO: 1485 case DIOCSDINFO: 1486 #ifdef __HAVE_OLD_DISKLABEL 1487 case ODIOCWDINFO: 1488 case ODIOCSDINFO: 1489 #endif 1490 { 1491 struct disklabel *lp; 1492 #ifdef __HAVE_OLD_DISKLABEL 1493 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1494 memset(&newlabel, 0, sizeof newlabel); 1495 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1496 lp = &newlabel; 1497 } else 1498 #endif 1499 lp = (struct disklabel *)data; 1500 1501 if ((error = raidlock(rs)) != 0) 1502 return (error); 1503 1504 rs->sc_flags |= RAIDF_LABELLING; 1505 1506 error = setdisklabel(rs->sc_dkdev.dk_label, 1507 lp, 0, rs->sc_dkdev.dk_cpulabel); 1508 if (error == 0) { 1509 if (cmd == DIOCWDINFO 1510 #ifdef __HAVE_OLD_DISKLABEL 1511 || cmd == ODIOCWDINFO 1512 #endif 1513 ) 1514 error = writedisklabel(RAIDLABELDEV(dev), 1515 raidstrategy, rs->sc_dkdev.dk_label, 1516 rs->sc_dkdev.dk_cpulabel); 1517 } 1518 rs->sc_flags &= ~RAIDF_LABELLING; 1519 1520 raidunlock(rs); 1521 1522 if (error) 1523 return (error); 1524 break; 1525 } 1526 1527 case DIOCWLABEL: 1528 if (*(int *) data != 0) 1529 rs->sc_flags |= RAIDF_WLABEL; 1530 else 1531 rs->sc_flags &= ~RAIDF_WLABEL; 1532 break; 1533 1534 case DIOCGDEFLABEL: 1535 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data); 1536 break; 1537 1538 #ifdef __HAVE_OLD_DISKLABEL 1539 case ODIOCGDEFLABEL: 1540 raidgetdefaultlabel(raidPtr, rs, &newlabel); 1541 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1542 return ENOTTY; 1543 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1544 break; 1545 #endif 1546 1547 default: 1548 retcode = ENOTTY; 1549 } 1550 return (retcode); 1551 1552 } 1553 1554 1555 /* raidinit -- complete the rest of the initialization for the 1556 RAIDframe device. */ 1557 1558 1559 static void 1560 raidinit(raidPtr) 1561 RF_Raid_t *raidPtr; 1562 { 1563 struct raid_softc *rs; 1564 int unit; 1565 1566 unit = raidPtr->raidid; 1567 1568 rs = &raid_softc[unit]; 1569 pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0, 1570 0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME); 1571 1572 1573 /* XXX should check return code first... */ 1574 rs->sc_flags |= RAIDF_INITED; 1575 1576 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */ 1577 1578 rs->sc_dkdev.dk_name = rs->sc_xname; 1579 1580 /* disk_attach actually creates space for the CPU disklabel, among 1581 * other things, so it's critical to call this *BEFORE* we try putzing 1582 * with disklabels. */ 1583 1584 disk_attach(&rs->sc_dkdev); 1585 1586 /* XXX There may be a weird interaction here between this, and 1587 * protectedSectors, as used in RAIDframe. */ 1588 1589 rs->sc_size = raidPtr->totalSectors; 1590 1591 } 1592 1593 /* wake up the daemon & tell it to get us a spare table 1594 * XXX 1595 * the entries in the queues should be tagged with the raidPtr 1596 * so that in the extremely rare case that two recons happen at once, 1597 * we know for which device were requesting a spare table 1598 * XXX 1599 * 1600 * XXX This code is not currently used. GO 1601 */ 1602 int 1603 rf_GetSpareTableFromDaemon(req) 1604 RF_SparetWait_t *req; 1605 { 1606 int retcode; 1607 1608 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1609 req->next = rf_sparet_wait_queue; 1610 rf_sparet_wait_queue = req; 1611 wakeup(&rf_sparet_wait_queue); 1612 1613 /* mpsleep unlocks the mutex */ 1614 while (!rf_sparet_resp_queue) { 1615 tsleep(&rf_sparet_resp_queue, PRIBIO, 1616 "raidframe getsparetable", 0); 1617 } 1618 req = rf_sparet_resp_queue; 1619 rf_sparet_resp_queue = req->next; 1620 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1621 1622 retcode = req->fcol; 1623 RF_Free(req, sizeof(*req)); /* this is not the same req as we 1624 * alloc'd */ 1625 return (retcode); 1626 } 1627 1628 /* a wrapper around rf_DoAccess that extracts appropriate info from the 1629 * bp & passes it down. 1630 * any calls originating in the kernel must use non-blocking I/O 1631 * do some extra sanity checking to return "appropriate" error values for 1632 * certain conditions (to make some standard utilities work) 1633 * 1634 * Formerly known as: rf_DoAccessKernel 1635 */ 1636 void 1637 raidstart(raidPtr) 1638 RF_Raid_t *raidPtr; 1639 { 1640 RF_SectorCount_t num_blocks, pb, sum; 1641 RF_RaidAddr_t raid_addr; 1642 int retcode; 1643 struct partition *pp; 1644 daddr_t blocknum; 1645 int unit; 1646 struct raid_softc *rs; 1647 int do_async; 1648 struct buf *bp; 1649 1650 unit = raidPtr->raidid; 1651 rs = &raid_softc[unit]; 1652 1653 /* quick check to see if anything has died recently */ 1654 RF_LOCK_MUTEX(raidPtr->mutex); 1655 if (raidPtr->numNewFailures > 0) { 1656 rf_update_component_labels(raidPtr, 1657 RF_NORMAL_COMPONENT_UPDATE); 1658 raidPtr->numNewFailures--; 1659 } 1660 RF_UNLOCK_MUTEX(raidPtr->mutex); 1661 1662 /* Check to see if we're at the limit... */ 1663 RF_LOCK_MUTEX(raidPtr->mutex); 1664 while (raidPtr->openings > 0) { 1665 RF_UNLOCK_MUTEX(raidPtr->mutex); 1666 1667 /* get the next item, if any, from the queue */ 1668 if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) { 1669 /* nothing more to do */ 1670 return; 1671 } 1672 BUFQ_REMOVE(&rs->buf_queue, bp); 1673 1674 /* Ok, for the bp we have here, bp->b_blkno is relative to the 1675 * partition.. Need to make it absolute to the underlying 1676 * device.. */ 1677 1678 blocknum = bp->b_blkno; 1679 if (DISKPART(bp->b_dev) != RAW_PART) { 1680 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; 1681 blocknum += pp->p_offset; 1682 } 1683 1684 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, 1685 (int) blocknum)); 1686 1687 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount)); 1688 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid)); 1689 1690 /* *THIS* is where we adjust what block we're going to... 1691 * but DO NOT TOUCH bp->b_blkno!!! */ 1692 raid_addr = blocknum; 1693 1694 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector; 1695 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0; 1696 sum = raid_addr + num_blocks + pb; 1697 if (1 || rf_debugKernelAccess) { 1698 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n", 1699 (int) raid_addr, (int) sum, (int) num_blocks, 1700 (int) pb, (int) bp->b_resid)); 1701 } 1702 if ((sum > raidPtr->totalSectors) || (sum < raid_addr) 1703 || (sum < num_blocks) || (sum < pb)) { 1704 bp->b_error = ENOSPC; 1705 bp->b_flags |= B_ERROR; 1706 bp->b_resid = bp->b_bcount; 1707 biodone(bp); 1708 RF_LOCK_MUTEX(raidPtr->mutex); 1709 continue; 1710 } 1711 /* 1712 * XXX rf_DoAccess() should do this, not just DoAccessKernel() 1713 */ 1714 1715 if (bp->b_bcount & raidPtr->sectorMask) { 1716 bp->b_error = EINVAL; 1717 bp->b_flags |= B_ERROR; 1718 bp->b_resid = bp->b_bcount; 1719 biodone(bp); 1720 RF_LOCK_MUTEX(raidPtr->mutex); 1721 continue; 1722 1723 } 1724 db1_printf(("Calling DoAccess..\n")); 1725 1726 1727 RF_LOCK_MUTEX(raidPtr->mutex); 1728 raidPtr->openings--; 1729 RF_UNLOCK_MUTEX(raidPtr->mutex); 1730 1731 /* 1732 * Everything is async. 1733 */ 1734 do_async = 1; 1735 1736 disk_busy(&rs->sc_dkdev); 1737 1738 /* XXX we're still at splbio() here... do we *really* 1739 need to be? */ 1740 1741 /* don't ever condition on bp->b_flags & B_WRITE. 1742 * always condition on B_READ instead */ 1743 1744 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ? 1745 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE, 1746 do_async, raid_addr, num_blocks, 1747 bp->b_data, bp, RF_DAG_NONBLOCKING_IO); 1748 1749 RF_LOCK_MUTEX(raidPtr->mutex); 1750 } 1751 RF_UNLOCK_MUTEX(raidPtr->mutex); 1752 } 1753 1754 1755 1756 1757 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */ 1758 1759 int 1760 rf_DispatchKernelIO(queue, req) 1761 RF_DiskQueue_t *queue; 1762 RF_DiskQueueData_t *req; 1763 { 1764 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE; 1765 struct buf *bp; 1766 struct raidbuf *raidbp = NULL; 1767 struct raid_softc *rs; 1768 int unit; 1769 int s; 1770 1771 s=0; 1772 /* s = splbio();*/ /* want to test this */ 1773 /* XXX along with the vnode, we also need the softc associated with 1774 * this device.. */ 1775 1776 req->queue = queue; 1777 1778 unit = queue->raidPtr->raidid; 1779 1780 db1_printf(("DispatchKernelIO unit: %d\n", unit)); 1781 1782 if (unit >= numraid) { 1783 printf("Invalid unit number: %d %d\n", unit, numraid); 1784 panic("Invalid Unit number in rf_DispatchKernelIO\n"); 1785 } 1786 rs = &raid_softc[unit]; 1787 1788 bp = req->bp; 1789 #if 1 1790 /* XXX when there is a physical disk failure, someone is passing us a 1791 * buffer that contains old stuff!! Attempt to deal with this problem 1792 * without taking a performance hit... (not sure where the real bug 1793 * is. It's buried in RAIDframe somewhere) :-( GO ) */ 1794 1795 if (bp->b_flags & B_ERROR) { 1796 bp->b_flags &= ~B_ERROR; 1797 } 1798 if (bp->b_error != 0) { 1799 bp->b_error = 0; 1800 } 1801 #endif 1802 raidbp = RAIDGETBUF(rs); 1803 1804 raidbp->rf_flags = 0; /* XXX not really used anywhere... */ 1805 1806 /* 1807 * context for raidiodone 1808 */ 1809 raidbp->rf_obp = bp; 1810 raidbp->req = req; 1811 1812 LIST_INIT(&raidbp->rf_buf.b_dep); 1813 1814 switch (req->type) { 1815 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */ 1816 /* XXX need to do something extra here.. */ 1817 /* I'm leaving this in, as I've never actually seen it used, 1818 * and I'd like folks to report it... GO */ 1819 printf(("WAKEUP CALLED\n")); 1820 queue->numOutstanding++; 1821 1822 /* XXX need to glue the original buffer into this?? */ 1823 1824 KernelWakeupFunc(&raidbp->rf_buf); 1825 break; 1826 1827 case RF_IO_TYPE_READ: 1828 case RF_IO_TYPE_WRITE: 1829 1830 if (req->tracerec) { 1831 RF_ETIMER_START(req->tracerec->timer); 1832 } 1833 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp, 1834 op | bp->b_flags, queue->rf_cinfo->ci_dev, 1835 req->sectorOffset, req->numSector, 1836 req->buf, KernelWakeupFunc, (void *) req, 1837 queue->raidPtr->logBytesPerSector, req->b_proc); 1838 1839 if (rf_debugKernelAccess) { 1840 db1_printf(("dispatch: bp->b_blkno = %ld\n", 1841 (long) bp->b_blkno)); 1842 } 1843 queue->numOutstanding++; 1844 queue->last_deq_sector = req->sectorOffset; 1845 /* acc wouldn't have been let in if there were any pending 1846 * reqs at any other priority */ 1847 queue->curPriority = req->priority; 1848 1849 db1_printf(("Going for %c to unit %d row %d col %d\n", 1850 req->type, unit, queue->row, queue->col)); 1851 db1_printf(("sector %d count %d (%d bytes) %d\n", 1852 (int) req->sectorOffset, (int) req->numSector, 1853 (int) (req->numSector << 1854 queue->raidPtr->logBytesPerSector), 1855 (int) queue->raidPtr->logBytesPerSector)); 1856 if ((raidbp->rf_buf.b_flags & B_READ) == 0) { 1857 raidbp->rf_buf.b_vp->v_numoutput++; 1858 } 1859 VOP_STRATEGY(&raidbp->rf_buf); 1860 1861 break; 1862 1863 default: 1864 panic("bad req->type in rf_DispatchKernelIO"); 1865 } 1866 db1_printf(("Exiting from DispatchKernelIO\n")); 1867 /* splx(s); */ /* want to test this */ 1868 return (0); 1869 } 1870 /* this is the callback function associated with a I/O invoked from 1871 kernel code. 1872 */ 1873 static void 1874 KernelWakeupFunc(vbp) 1875 struct buf *vbp; 1876 { 1877 RF_DiskQueueData_t *req = NULL; 1878 RF_DiskQueue_t *queue; 1879 struct raidbuf *raidbp = (struct raidbuf *) vbp; 1880 struct buf *bp; 1881 struct raid_softc *rs; 1882 int unit; 1883 int s; 1884 1885 s = splbio(); 1886 db1_printf(("recovering the request queue:\n")); 1887 req = raidbp->req; 1888 1889 bp = raidbp->rf_obp; 1890 1891 queue = (RF_DiskQueue_t *) req->queue; 1892 1893 if (raidbp->rf_buf.b_flags & B_ERROR) { 1894 bp->b_flags |= B_ERROR; 1895 bp->b_error = raidbp->rf_buf.b_error ? 1896 raidbp->rf_buf.b_error : EIO; 1897 } 1898 1899 /* XXX methinks this could be wrong... */ 1900 #if 1 1901 bp->b_resid = raidbp->rf_buf.b_resid; 1902 #endif 1903 1904 if (req->tracerec) { 1905 RF_ETIMER_STOP(req->tracerec->timer); 1906 RF_ETIMER_EVAL(req->tracerec->timer); 1907 RF_LOCK_MUTEX(rf_tracing_mutex); 1908 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer); 1909 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer); 1910 req->tracerec->num_phys_ios++; 1911 RF_UNLOCK_MUTEX(rf_tracing_mutex); 1912 } 1913 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */ 1914 1915 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */ 1916 1917 1918 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go 1919 * ballistic, and mark the component as hosed... */ 1920 1921 if (bp->b_flags & B_ERROR) { 1922 /* Mark the disk as dead */ 1923 /* but only mark it once... */ 1924 if (queue->raidPtr->Disks[queue->row][queue->col].status == 1925 rf_ds_optimal) { 1926 printf("raid%d: IO Error. Marking %s as failed.\n", 1927 unit, queue->raidPtr->Disks[queue->row][queue->col].devname); 1928 queue->raidPtr->Disks[queue->row][queue->col].status = 1929 rf_ds_failed; 1930 queue->raidPtr->status[queue->row] = rf_rs_degraded; 1931 queue->raidPtr->numFailures++; 1932 queue->raidPtr->numNewFailures++; 1933 } else { /* Disk is already dead... */ 1934 /* printf("Disk already marked as dead!\n"); */ 1935 } 1936 1937 } 1938 1939 rs = &raid_softc[unit]; 1940 RAIDPUTBUF(rs, raidbp); 1941 1942 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0); 1943 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0); 1944 1945 splx(s); 1946 } 1947 1948 1949 1950 /* 1951 * initialize a buf structure for doing an I/O in the kernel. 1952 */ 1953 static void 1954 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg, 1955 logBytesPerSector, b_proc) 1956 struct buf *bp; 1957 struct vnode *b_vp; 1958 unsigned rw_flag; 1959 dev_t dev; 1960 RF_SectorNum_t startSect; 1961 RF_SectorCount_t numSect; 1962 caddr_t buf; 1963 void (*cbFunc) (struct buf *); 1964 void *cbArg; 1965 int logBytesPerSector; 1966 struct proc *b_proc; 1967 { 1968 /* bp->b_flags = B_PHYS | rw_flag; */ 1969 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */ 1970 bp->b_bcount = numSect << logBytesPerSector; 1971 bp->b_bufsize = bp->b_bcount; 1972 bp->b_error = 0; 1973 bp->b_dev = dev; 1974 bp->b_data = buf; 1975 bp->b_blkno = startSect; 1976 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */ 1977 if (bp->b_bcount == 0) { 1978 panic("bp->b_bcount is zero in InitBP!!\n"); 1979 } 1980 bp->b_proc = b_proc; 1981 bp->b_iodone = cbFunc; 1982 bp->b_vp = b_vp; 1983 1984 } 1985 1986 static void 1987 raidgetdefaultlabel(raidPtr, rs, lp) 1988 RF_Raid_t *raidPtr; 1989 struct raid_softc *rs; 1990 struct disklabel *lp; 1991 { 1992 db1_printf(("Building a default label...\n")); 1993 memset(lp, 0, sizeof(*lp)); 1994 1995 /* fabricate a label... */ 1996 lp->d_secperunit = raidPtr->totalSectors; 1997 lp->d_secsize = raidPtr->bytesPerSector; 1998 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe; 1999 lp->d_ntracks = 4 * raidPtr->numCol; 2000 lp->d_ncylinders = raidPtr->totalSectors / 2001 (lp->d_nsectors * lp->d_ntracks); 2002 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 2003 2004 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename)); 2005 lp->d_type = DTYPE_RAID; 2006 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 2007 lp->d_rpm = 3600; 2008 lp->d_interleave = 1; 2009 lp->d_flags = 0; 2010 2011 lp->d_partitions[RAW_PART].p_offset = 0; 2012 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors; 2013 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 2014 lp->d_npartitions = RAW_PART + 1; 2015 2016 lp->d_magic = DISKMAGIC; 2017 lp->d_magic2 = DISKMAGIC; 2018 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label); 2019 2020 } 2021 /* 2022 * Read the disklabel from the raid device. If one is not present, fake one 2023 * up. 2024 */ 2025 static void 2026 raidgetdisklabel(dev) 2027 dev_t dev; 2028 { 2029 int unit = raidunit(dev); 2030 struct raid_softc *rs = &raid_softc[unit]; 2031 char *errstring; 2032 struct disklabel *lp = rs->sc_dkdev.dk_label; 2033 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel; 2034 RF_Raid_t *raidPtr; 2035 2036 db1_printf(("Getting the disklabel...\n")); 2037 2038 memset(clp, 0, sizeof(*clp)); 2039 2040 raidPtr = raidPtrs[unit]; 2041 2042 raidgetdefaultlabel(raidPtr, rs, lp); 2043 2044 /* 2045 * Call the generic disklabel extraction routine. 2046 */ 2047 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy, 2048 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel); 2049 if (errstring) 2050 raidmakedisklabel(rs); 2051 else { 2052 int i; 2053 struct partition *pp; 2054 2055 /* 2056 * Sanity check whether the found disklabel is valid. 2057 * 2058 * This is necessary since total size of the raid device 2059 * may vary when an interleave is changed even though exactly 2060 * same componets are used, and old disklabel may used 2061 * if that is found. 2062 */ 2063 if (lp->d_secperunit != rs->sc_size) 2064 printf("WARNING: %s: " 2065 "total sector size in disklabel (%d) != " 2066 "the size of raid (%ld)\n", rs->sc_xname, 2067 lp->d_secperunit, (long) rs->sc_size); 2068 for (i = 0; i < lp->d_npartitions; i++) { 2069 pp = &lp->d_partitions[i]; 2070 if (pp->p_offset + pp->p_size > rs->sc_size) 2071 printf("WARNING: %s: end of partition `%c' " 2072 "exceeds the size of raid (%ld)\n", 2073 rs->sc_xname, 'a' + i, (long) rs->sc_size); 2074 } 2075 } 2076 2077 } 2078 /* 2079 * Take care of things one might want to take care of in the event 2080 * that a disklabel isn't present. 2081 */ 2082 static void 2083 raidmakedisklabel(rs) 2084 struct raid_softc *rs; 2085 { 2086 struct disklabel *lp = rs->sc_dkdev.dk_label; 2087 db1_printf(("Making a label..\n")); 2088 2089 /* 2090 * For historical reasons, if there's no disklabel present 2091 * the raw partition must be marked FS_BSDFFS. 2092 */ 2093 2094 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 2095 2096 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 2097 2098 lp->d_checksum = dkcksum(lp); 2099 } 2100 /* 2101 * Lookup the provided name in the filesystem. If the file exists, 2102 * is a valid block device, and isn't being used by anyone else, 2103 * set *vpp to the file's vnode. 2104 * You'll find the original of this in ccd.c 2105 */ 2106 int 2107 raidlookup(path, p, vpp) 2108 char *path; 2109 struct proc *p; 2110 struct vnode **vpp; /* result */ 2111 { 2112 struct nameidata nd; 2113 struct vnode *vp; 2114 struct vattr va; 2115 int error; 2116 2117 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p); 2118 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) { 2119 #ifdef DEBUG 2120 printf("RAIDframe: vn_open returned %d\n", error); 2121 #endif 2122 return (error); 2123 } 2124 vp = nd.ni_vp; 2125 if (vp->v_usecount > 1) { 2126 VOP_UNLOCK(vp, 0); 2127 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p); 2128 return (EBUSY); 2129 } 2130 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) { 2131 VOP_UNLOCK(vp, 0); 2132 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p); 2133 return (error); 2134 } 2135 /* XXX: eventually we should handle VREG, too. */ 2136 if (va.va_type != VBLK) { 2137 VOP_UNLOCK(vp, 0); 2138 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p); 2139 return (ENOTBLK); 2140 } 2141 VOP_UNLOCK(vp, 0); 2142 *vpp = vp; 2143 return (0); 2144 } 2145 /* 2146 * Wait interruptibly for an exclusive lock. 2147 * 2148 * XXX 2149 * Several drivers do this; it should be abstracted and made MP-safe. 2150 * (Hmm... where have we seen this warning before :-> GO ) 2151 */ 2152 static int 2153 raidlock(rs) 2154 struct raid_softc *rs; 2155 { 2156 int error; 2157 2158 while ((rs->sc_flags & RAIDF_LOCKED) != 0) { 2159 rs->sc_flags |= RAIDF_WANTED; 2160 if ((error = 2161 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0) 2162 return (error); 2163 } 2164 rs->sc_flags |= RAIDF_LOCKED; 2165 return (0); 2166 } 2167 /* 2168 * Unlock and wake up any waiters. 2169 */ 2170 static void 2171 raidunlock(rs) 2172 struct raid_softc *rs; 2173 { 2174 2175 rs->sc_flags &= ~RAIDF_LOCKED; 2176 if ((rs->sc_flags & RAIDF_WANTED) != 0) { 2177 rs->sc_flags &= ~RAIDF_WANTED; 2178 wakeup(rs); 2179 } 2180 } 2181 2182 2183 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */ 2184 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */ 2185 2186 int 2187 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter) 2188 { 2189 RF_ComponentLabel_t clabel; 2190 raidread_component_label(dev, b_vp, &clabel); 2191 clabel.mod_counter = mod_counter; 2192 clabel.clean = RF_RAID_CLEAN; 2193 raidwrite_component_label(dev, b_vp, &clabel); 2194 return(0); 2195 } 2196 2197 2198 int 2199 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter) 2200 { 2201 RF_ComponentLabel_t clabel; 2202 raidread_component_label(dev, b_vp, &clabel); 2203 clabel.mod_counter = mod_counter; 2204 clabel.clean = RF_RAID_DIRTY; 2205 raidwrite_component_label(dev, b_vp, &clabel); 2206 return(0); 2207 } 2208 2209 /* ARGSUSED */ 2210 int 2211 raidread_component_label(dev, b_vp, clabel) 2212 dev_t dev; 2213 struct vnode *b_vp; 2214 RF_ComponentLabel_t *clabel; 2215 { 2216 struct buf *bp; 2217 int error; 2218 2219 /* XXX should probably ensure that we don't try to do this if 2220 someone has changed rf_protected_sectors. */ 2221 2222 if (b_vp == NULL) { 2223 /* For whatever reason, this component is not valid. 2224 Don't try to read a component label from it. */ 2225 return(EINVAL); 2226 } 2227 2228 /* get a block of the appropriate size... */ 2229 bp = geteblk((int)RF_COMPONENT_INFO_SIZE); 2230 bp->b_dev = dev; 2231 2232 /* get our ducks in a row for the read */ 2233 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE; 2234 bp->b_bcount = RF_COMPONENT_INFO_SIZE; 2235 bp->b_flags |= B_READ; 2236 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE; 2237 2238 (*bdevsw[major(bp->b_dev)].d_strategy)(bp); 2239 2240 error = biowait(bp); 2241 2242 if (!error) { 2243 memcpy(clabel, bp->b_data, 2244 sizeof(RF_ComponentLabel_t)); 2245 #if 0 2246 rf_print_component_label( clabel ); 2247 #endif 2248 } else { 2249 #if 0 2250 printf("Failed to read RAID component label!\n"); 2251 #endif 2252 } 2253 2254 brelse(bp); 2255 return(error); 2256 } 2257 /* ARGSUSED */ 2258 int 2259 raidwrite_component_label(dev, b_vp, clabel) 2260 dev_t dev; 2261 struct vnode *b_vp; 2262 RF_ComponentLabel_t *clabel; 2263 { 2264 struct buf *bp; 2265 int error; 2266 2267 /* get a block of the appropriate size... */ 2268 bp = geteblk((int)RF_COMPONENT_INFO_SIZE); 2269 bp->b_dev = dev; 2270 2271 /* get our ducks in a row for the write */ 2272 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE; 2273 bp->b_bcount = RF_COMPONENT_INFO_SIZE; 2274 bp->b_flags |= B_WRITE; 2275 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE; 2276 2277 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE ); 2278 2279 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t)); 2280 2281 (*bdevsw[major(bp->b_dev)].d_strategy)(bp); 2282 error = biowait(bp); 2283 brelse(bp); 2284 if (error) { 2285 #if 1 2286 printf("Failed to write RAID component info!\n"); 2287 #endif 2288 } 2289 2290 return(error); 2291 } 2292 2293 void 2294 rf_markalldirty(raidPtr) 2295 RF_Raid_t *raidPtr; 2296 { 2297 RF_ComponentLabel_t clabel; 2298 int r,c; 2299 2300 raidPtr->mod_counter++; 2301 for (r = 0; r < raidPtr->numRow; r++) { 2302 for (c = 0; c < raidPtr->numCol; c++) { 2303 /* we don't want to touch (at all) a disk that has 2304 failed */ 2305 if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) { 2306 raidread_component_label( 2307 raidPtr->Disks[r][c].dev, 2308 raidPtr->raid_cinfo[r][c].ci_vp, 2309 &clabel); 2310 if (clabel.status == rf_ds_spared) { 2311 /* XXX do something special... 2312 but whatever you do, don't 2313 try to access it!! */ 2314 } else { 2315 #if 0 2316 clabel.status = 2317 raidPtr->Disks[r][c].status; 2318 raidwrite_component_label( 2319 raidPtr->Disks[r][c].dev, 2320 raidPtr->raid_cinfo[r][c].ci_vp, 2321 &clabel); 2322 #endif 2323 raidmarkdirty( 2324 raidPtr->Disks[r][c].dev, 2325 raidPtr->raid_cinfo[r][c].ci_vp, 2326 raidPtr->mod_counter); 2327 } 2328 } 2329 } 2330 } 2331 /* printf("Component labels marked dirty.\n"); */ 2332 #if 0 2333 for( c = 0; c < raidPtr->numSpare ; c++) { 2334 sparecol = raidPtr->numCol + c; 2335 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) { 2336 /* 2337 2338 XXX this is where we get fancy and map this spare 2339 into it's correct spot in the array. 2340 2341 */ 2342 /* 2343 2344 we claim this disk is "optimal" if it's 2345 rf_ds_used_spare, as that means it should be 2346 directly substitutable for the disk it replaced. 2347 We note that too... 2348 2349 */ 2350 2351 for(i=0;i<raidPtr->numRow;i++) { 2352 for(j=0;j<raidPtr->numCol;j++) { 2353 if ((raidPtr->Disks[i][j].spareRow == 2354 r) && 2355 (raidPtr->Disks[i][j].spareCol == 2356 sparecol)) { 2357 srow = r; 2358 scol = sparecol; 2359 break; 2360 } 2361 } 2362 } 2363 2364 raidread_component_label( 2365 raidPtr->Disks[r][sparecol].dev, 2366 raidPtr->raid_cinfo[r][sparecol].ci_vp, 2367 &clabel); 2368 /* make sure status is noted */ 2369 clabel.version = RF_COMPONENT_LABEL_VERSION; 2370 clabel.mod_counter = raidPtr->mod_counter; 2371 clabel.serial_number = raidPtr->serial_number; 2372 clabel.row = srow; 2373 clabel.column = scol; 2374 clabel.num_rows = raidPtr->numRow; 2375 clabel.num_columns = raidPtr->numCol; 2376 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/ 2377 clabel.status = rf_ds_optimal; 2378 raidwrite_component_label( 2379 raidPtr->Disks[r][sparecol].dev, 2380 raidPtr->raid_cinfo[r][sparecol].ci_vp, 2381 &clabel); 2382 raidmarkclean( raidPtr->Disks[r][sparecol].dev, 2383 raidPtr->raid_cinfo[r][sparecol].ci_vp); 2384 } 2385 } 2386 2387 #endif 2388 } 2389 2390 2391 void 2392 rf_update_component_labels(raidPtr, final) 2393 RF_Raid_t *raidPtr; 2394 int final; 2395 { 2396 RF_ComponentLabel_t clabel; 2397 int sparecol; 2398 int r,c; 2399 int i,j; 2400 int srow, scol; 2401 2402 srow = -1; 2403 scol = -1; 2404 2405 /* XXX should do extra checks to make sure things really are clean, 2406 rather than blindly setting the clean bit... */ 2407 2408 raidPtr->mod_counter++; 2409 2410 for (r = 0; r < raidPtr->numRow; r++) { 2411 for (c = 0; c < raidPtr->numCol; c++) { 2412 if (raidPtr->Disks[r][c].status == rf_ds_optimal) { 2413 raidread_component_label( 2414 raidPtr->Disks[r][c].dev, 2415 raidPtr->raid_cinfo[r][c].ci_vp, 2416 &clabel); 2417 /* make sure status is noted */ 2418 clabel.status = rf_ds_optimal; 2419 /* bump the counter */ 2420 clabel.mod_counter = raidPtr->mod_counter; 2421 2422 raidwrite_component_label( 2423 raidPtr->Disks[r][c].dev, 2424 raidPtr->raid_cinfo[r][c].ci_vp, 2425 &clabel); 2426 if (final == RF_FINAL_COMPONENT_UPDATE) { 2427 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2428 raidmarkclean( 2429 raidPtr->Disks[r][c].dev, 2430 raidPtr->raid_cinfo[r][c].ci_vp, 2431 raidPtr->mod_counter); 2432 } 2433 } 2434 } 2435 /* else we don't touch it.. */ 2436 } 2437 } 2438 2439 for( c = 0; c < raidPtr->numSpare ; c++) { 2440 sparecol = raidPtr->numCol + c; 2441 /* Need to ensure that the reconstruct actually completed! */ 2442 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) { 2443 /* 2444 2445 we claim this disk is "optimal" if it's 2446 rf_ds_used_spare, as that means it should be 2447 directly substitutable for the disk it replaced. 2448 We note that too... 2449 2450 */ 2451 2452 for(i=0;i<raidPtr->numRow;i++) { 2453 for(j=0;j<raidPtr->numCol;j++) { 2454 if ((raidPtr->Disks[i][j].spareRow == 2455 0) && 2456 (raidPtr->Disks[i][j].spareCol == 2457 sparecol)) { 2458 srow = i; 2459 scol = j; 2460 break; 2461 } 2462 } 2463 } 2464 2465 /* XXX shouldn't *really* need this... */ 2466 raidread_component_label( 2467 raidPtr->Disks[0][sparecol].dev, 2468 raidPtr->raid_cinfo[0][sparecol].ci_vp, 2469 &clabel); 2470 /* make sure status is noted */ 2471 2472 raid_init_component_label(raidPtr, &clabel); 2473 2474 clabel.mod_counter = raidPtr->mod_counter; 2475 clabel.row = srow; 2476 clabel.column = scol; 2477 clabel.status = rf_ds_optimal; 2478 2479 raidwrite_component_label( 2480 raidPtr->Disks[0][sparecol].dev, 2481 raidPtr->raid_cinfo[0][sparecol].ci_vp, 2482 &clabel); 2483 if (final == RF_FINAL_COMPONENT_UPDATE) { 2484 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2485 raidmarkclean( raidPtr->Disks[0][sparecol].dev, 2486 raidPtr->raid_cinfo[0][sparecol].ci_vp, 2487 raidPtr->mod_counter); 2488 } 2489 } 2490 } 2491 } 2492 /* printf("Component labels updated\n"); */ 2493 } 2494 2495 void 2496 rf_close_component(raidPtr, vp, auto_configured) 2497 RF_Raid_t *raidPtr; 2498 struct vnode *vp; 2499 int auto_configured; 2500 { 2501 struct proc *p; 2502 2503 p = raidPtr->engine_thread; 2504 2505 if (vp != NULL) { 2506 if (auto_configured == 1) { 2507 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2508 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0); 2509 vput(vp); 2510 2511 } else { 2512 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p); 2513 } 2514 } else { 2515 printf("vnode was NULL\n"); 2516 } 2517 } 2518 2519 2520 void 2521 rf_UnconfigureVnodes(raidPtr) 2522 RF_Raid_t *raidPtr; 2523 { 2524 int r,c; 2525 struct proc *p; 2526 struct vnode *vp; 2527 int acd; 2528 2529 2530 /* We take this opportunity to close the vnodes like we should.. */ 2531 2532 p = raidPtr->engine_thread; 2533 2534 for (r = 0; r < raidPtr->numRow; r++) { 2535 for (c = 0; c < raidPtr->numCol; c++) { 2536 printf("Closing vnode for row: %d col: %d\n", r, c); 2537 vp = raidPtr->raid_cinfo[r][c].ci_vp; 2538 acd = raidPtr->Disks[r][c].auto_configured; 2539 rf_close_component(raidPtr, vp, acd); 2540 raidPtr->raid_cinfo[r][c].ci_vp = NULL; 2541 raidPtr->Disks[r][c].auto_configured = 0; 2542 } 2543 } 2544 for (r = 0; r < raidPtr->numSpare; r++) { 2545 printf("Closing vnode for spare: %d\n", r); 2546 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp; 2547 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured; 2548 rf_close_component(raidPtr, vp, acd); 2549 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL; 2550 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0; 2551 } 2552 } 2553 2554 2555 void 2556 rf_ReconThread(req) 2557 struct rf_recon_req *req; 2558 { 2559 int s; 2560 RF_Raid_t *raidPtr; 2561 2562 s = splbio(); 2563 raidPtr = (RF_Raid_t *) req->raidPtr; 2564 raidPtr->recon_in_progress = 1; 2565 2566 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col, 2567 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0)); 2568 2569 /* XXX get rid of this! we don't need it at all.. */ 2570 RF_Free(req, sizeof(*req)); 2571 2572 raidPtr->recon_in_progress = 0; 2573 splx(s); 2574 2575 /* That's all... */ 2576 kthread_exit(0); /* does not return */ 2577 } 2578 2579 void 2580 rf_RewriteParityThread(raidPtr) 2581 RF_Raid_t *raidPtr; 2582 { 2583 int retcode; 2584 int s; 2585 2586 raidPtr->parity_rewrite_in_progress = 1; 2587 s = splbio(); 2588 retcode = rf_RewriteParity(raidPtr); 2589 splx(s); 2590 if (retcode) { 2591 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid); 2592 } else { 2593 /* set the clean bit! If we shutdown correctly, 2594 the clean bit on each component label will get 2595 set */ 2596 raidPtr->parity_good = RF_RAID_CLEAN; 2597 } 2598 raidPtr->parity_rewrite_in_progress = 0; 2599 2600 /* Anyone waiting for us to stop? If so, inform them... */ 2601 if (raidPtr->waitShutdown) { 2602 wakeup(&raidPtr->parity_rewrite_in_progress); 2603 } 2604 2605 /* That's all... */ 2606 kthread_exit(0); /* does not return */ 2607 } 2608 2609 2610 void 2611 rf_CopybackThread(raidPtr) 2612 RF_Raid_t *raidPtr; 2613 { 2614 int s; 2615 2616 raidPtr->copyback_in_progress = 1; 2617 s = splbio(); 2618 rf_CopybackReconstructedData(raidPtr); 2619 splx(s); 2620 raidPtr->copyback_in_progress = 0; 2621 2622 /* That's all... */ 2623 kthread_exit(0); /* does not return */ 2624 } 2625 2626 2627 void 2628 rf_ReconstructInPlaceThread(req) 2629 struct rf_recon_req *req; 2630 { 2631 int retcode; 2632 int s; 2633 RF_Raid_t *raidPtr; 2634 2635 s = splbio(); 2636 raidPtr = req->raidPtr; 2637 raidPtr->recon_in_progress = 1; 2638 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col); 2639 RF_Free(req, sizeof(*req)); 2640 raidPtr->recon_in_progress = 0; 2641 splx(s); 2642 2643 /* That's all... */ 2644 kthread_exit(0); /* does not return */ 2645 } 2646 2647 void 2648 rf_mountroot_hook(dev) 2649 struct device *dev; 2650 { 2651 2652 } 2653 2654 2655 RF_AutoConfig_t * 2656 rf_find_raid_components() 2657 { 2658 struct devnametobdevmaj *dtobdm; 2659 struct vnode *vp; 2660 struct disklabel label; 2661 struct device *dv; 2662 char *cd_name; 2663 dev_t dev; 2664 int error; 2665 int i; 2666 int good_one; 2667 RF_ComponentLabel_t *clabel; 2668 RF_AutoConfig_t *ac_list; 2669 RF_AutoConfig_t *ac; 2670 2671 2672 /* initialize the AutoConfig list */ 2673 ac_list = NULL; 2674 2675 /* we begin by trolling through *all* the devices on the system */ 2676 2677 for (dv = alldevs.tqh_first; dv != NULL; 2678 dv = dv->dv_list.tqe_next) { 2679 2680 /* we are only interested in disks... */ 2681 if (dv->dv_class != DV_DISK) 2682 continue; 2683 2684 /* we don't care about floppies... */ 2685 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) { 2686 continue; 2687 } 2688 2689 /* need to find the device_name_to_block_device_major stuff */ 2690 cd_name = dv->dv_cfdata->cf_driver->cd_name; 2691 dtobdm = dev_name2blk; 2692 while (dtobdm->d_name && strcmp(dtobdm->d_name, cd_name)) { 2693 dtobdm++; 2694 } 2695 2696 /* get a vnode for the raw partition of this disk */ 2697 2698 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, RAW_PART); 2699 if (bdevvp(dev, &vp)) 2700 panic("RAID can't alloc vnode"); 2701 2702 error = VOP_OPEN(vp, FREAD, NOCRED, 0); 2703 2704 if (error) { 2705 /* "Who cares." Continue looking 2706 for something that exists*/ 2707 vput(vp); 2708 continue; 2709 } 2710 2711 /* Ok, the disk exists. Go get the disklabel. */ 2712 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label, 2713 FREAD, NOCRED, 0); 2714 if (error) { 2715 /* 2716 * XXX can't happen - open() would 2717 * have errored out (or faked up one) 2718 */ 2719 printf("can't get label for dev %s%c (%d)!?!?\n", 2720 dv->dv_xname, 'a' + RAW_PART, error); 2721 } 2722 2723 /* don't need this any more. We'll allocate it again 2724 a little later if we really do... */ 2725 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2726 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0); 2727 vput(vp); 2728 2729 for (i=0; i < label.d_npartitions; i++) { 2730 /* We only support partitions marked as RAID */ 2731 if (label.d_partitions[i].p_fstype != FS_RAID) 2732 continue; 2733 2734 dev = MAKEDISKDEV(dtobdm->d_maj, dv->dv_unit, i); 2735 if (bdevvp(dev, &vp)) 2736 panic("RAID can't alloc vnode"); 2737 2738 error = VOP_OPEN(vp, FREAD, NOCRED, 0); 2739 if (error) { 2740 /* Whatever... */ 2741 vput(vp); 2742 continue; 2743 } 2744 2745 good_one = 0; 2746 2747 clabel = (RF_ComponentLabel_t *) 2748 malloc(sizeof(RF_ComponentLabel_t), 2749 M_RAIDFRAME, M_NOWAIT); 2750 if (clabel == NULL) { 2751 /* XXX CLEANUP HERE */ 2752 printf("RAID auto config: out of memory!\n"); 2753 return(NULL); /* XXX probably should panic? */ 2754 } 2755 2756 if (!raidread_component_label(dev, vp, clabel)) { 2757 /* Got the label. Does it look reasonable? */ 2758 if (rf_reasonable_label(clabel) && 2759 (clabel->partitionSize <= 2760 label.d_partitions[i].p_size)) { 2761 #if DEBUG 2762 printf("Component on: %s%c: %d\n", 2763 dv->dv_xname, 'a'+i, 2764 label.d_partitions[i].p_size); 2765 rf_print_component_label(clabel); 2766 #endif 2767 /* if it's reasonable, add it, 2768 else ignore it. */ 2769 ac = (RF_AutoConfig_t *) 2770 malloc(sizeof(RF_AutoConfig_t), 2771 M_RAIDFRAME, 2772 M_NOWAIT); 2773 if (ac == NULL) { 2774 /* XXX should panic?? */ 2775 return(NULL); 2776 } 2777 2778 sprintf(ac->devname, "%s%c", 2779 dv->dv_xname, 'a'+i); 2780 ac->dev = dev; 2781 ac->vp = vp; 2782 ac->clabel = clabel; 2783 ac->next = ac_list; 2784 ac_list = ac; 2785 good_one = 1; 2786 } 2787 } 2788 if (!good_one) { 2789 /* cleanup */ 2790 free(clabel, M_RAIDFRAME); 2791 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2792 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0); 2793 vput(vp); 2794 } 2795 } 2796 } 2797 return(ac_list); 2798 } 2799 2800 static int 2801 rf_reasonable_label(clabel) 2802 RF_ComponentLabel_t *clabel; 2803 { 2804 2805 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) || 2806 (clabel->version==RF_COMPONENT_LABEL_VERSION)) && 2807 ((clabel->clean == RF_RAID_CLEAN) || 2808 (clabel->clean == RF_RAID_DIRTY)) && 2809 clabel->row >=0 && 2810 clabel->column >= 0 && 2811 clabel->num_rows > 0 && 2812 clabel->num_columns > 0 && 2813 clabel->row < clabel->num_rows && 2814 clabel->column < clabel->num_columns && 2815 clabel->blockSize > 0 && 2816 clabel->numBlocks > 0) { 2817 /* label looks reasonable enough... */ 2818 return(1); 2819 } 2820 return(0); 2821 } 2822 2823 2824 void 2825 rf_print_component_label(clabel) 2826 RF_ComponentLabel_t *clabel; 2827 { 2828 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", 2829 clabel->row, clabel->column, 2830 clabel->num_rows, clabel->num_columns); 2831 printf(" Version: %d Serial Number: %d Mod Counter: %d\n", 2832 clabel->version, clabel->serial_number, 2833 clabel->mod_counter); 2834 printf(" Clean: %s Status: %d\n", 2835 clabel->clean ? "Yes" : "No", clabel->status ); 2836 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n", 2837 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU); 2838 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n", 2839 (char) clabel->parityConfig, clabel->blockSize, 2840 clabel->numBlocks); 2841 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" ); 2842 printf(" Contains root partition: %s\n", 2843 clabel->root_partition ? "Yes" : "No" ); 2844 printf(" Last configured as: raid%d\n", clabel->last_unit ); 2845 #if 0 2846 printf(" Config order: %d\n", clabel->config_order); 2847 #endif 2848 2849 } 2850 2851 RF_ConfigSet_t * 2852 rf_create_auto_sets(ac_list) 2853 RF_AutoConfig_t *ac_list; 2854 { 2855 RF_AutoConfig_t *ac; 2856 RF_ConfigSet_t *config_sets; 2857 RF_ConfigSet_t *cset; 2858 RF_AutoConfig_t *ac_next; 2859 2860 2861 config_sets = NULL; 2862 2863 /* Go through the AutoConfig list, and figure out which components 2864 belong to what sets. */ 2865 ac = ac_list; 2866 while(ac!=NULL) { 2867 /* we're going to putz with ac->next, so save it here 2868 for use at the end of the loop */ 2869 ac_next = ac->next; 2870 2871 if (config_sets == NULL) { 2872 /* will need at least this one... */ 2873 config_sets = (RF_ConfigSet_t *) 2874 malloc(sizeof(RF_ConfigSet_t), 2875 M_RAIDFRAME, M_NOWAIT); 2876 if (config_sets == NULL) { 2877 panic("rf_create_auto_sets: No memory!\n"); 2878 } 2879 /* this one is easy :) */ 2880 config_sets->ac = ac; 2881 config_sets->next = NULL; 2882 config_sets->rootable = 0; 2883 ac->next = NULL; 2884 } else { 2885 /* which set does this component fit into? */ 2886 cset = config_sets; 2887 while(cset!=NULL) { 2888 if (rf_does_it_fit(cset, ac)) { 2889 /* looks like it matches... */ 2890 ac->next = cset->ac; 2891 cset->ac = ac; 2892 break; 2893 } 2894 cset = cset->next; 2895 } 2896 if (cset==NULL) { 2897 /* didn't find a match above... new set..*/ 2898 cset = (RF_ConfigSet_t *) 2899 malloc(sizeof(RF_ConfigSet_t), 2900 M_RAIDFRAME, M_NOWAIT); 2901 if (cset == NULL) { 2902 panic("rf_create_auto_sets: No memory!\n"); 2903 } 2904 cset->ac = ac; 2905 ac->next = NULL; 2906 cset->next = config_sets; 2907 cset->rootable = 0; 2908 config_sets = cset; 2909 } 2910 } 2911 ac = ac_next; 2912 } 2913 2914 2915 return(config_sets); 2916 } 2917 2918 static int 2919 rf_does_it_fit(cset, ac) 2920 RF_ConfigSet_t *cset; 2921 RF_AutoConfig_t *ac; 2922 { 2923 RF_ComponentLabel_t *clabel1, *clabel2; 2924 2925 /* If this one matches the *first* one in the set, that's good 2926 enough, since the other members of the set would have been 2927 through here too... */ 2928 /* note that we are not checking partitionSize here.. 2929 2930 Note that we are also not checking the mod_counters here. 2931 If everything else matches execpt the mod_counter, that's 2932 good enough for this test. We will deal with the mod_counters 2933 a little later in the autoconfiguration process. 2934 2935 (clabel1->mod_counter == clabel2->mod_counter) && 2936 2937 The reason we don't check for this is that failed disks 2938 will have lower modification counts. If those disks are 2939 not added to the set they used to belong to, then they will 2940 form their own set, which may result in 2 different sets, 2941 for example, competing to be configured at raid0, and 2942 perhaps competing to be the root filesystem set. If the 2943 wrong ones get configured, or both attempt to become /, 2944 weird behaviour and or serious lossage will occur. Thus we 2945 need to bring them into the fold here, and kick them out at 2946 a later point. 2947 2948 */ 2949 2950 clabel1 = cset->ac->clabel; 2951 clabel2 = ac->clabel; 2952 if ((clabel1->version == clabel2->version) && 2953 (clabel1->serial_number == clabel2->serial_number) && 2954 (clabel1->num_rows == clabel2->num_rows) && 2955 (clabel1->num_columns == clabel2->num_columns) && 2956 (clabel1->sectPerSU == clabel2->sectPerSU) && 2957 (clabel1->SUsPerPU == clabel2->SUsPerPU) && 2958 (clabel1->SUsPerRU == clabel2->SUsPerRU) && 2959 (clabel1->parityConfig == clabel2->parityConfig) && 2960 (clabel1->maxOutstanding == clabel2->maxOutstanding) && 2961 (clabel1->blockSize == clabel2->blockSize) && 2962 (clabel1->numBlocks == clabel2->numBlocks) && 2963 (clabel1->autoconfigure == clabel2->autoconfigure) && 2964 (clabel1->root_partition == clabel2->root_partition) && 2965 (clabel1->last_unit == clabel2->last_unit) && 2966 (clabel1->config_order == clabel2->config_order)) { 2967 /* if it get's here, it almost *has* to be a match */ 2968 } else { 2969 /* it's not consistent with somebody in the set.. 2970 punt */ 2971 return(0); 2972 } 2973 /* all was fine.. it must fit... */ 2974 return(1); 2975 } 2976 2977 int 2978 rf_have_enough_components(cset) 2979 RF_ConfigSet_t *cset; 2980 { 2981 RF_AutoConfig_t *ac; 2982 RF_AutoConfig_t *auto_config; 2983 RF_ComponentLabel_t *clabel; 2984 int r,c; 2985 int num_rows; 2986 int num_cols; 2987 int num_missing; 2988 int mod_counter; 2989 int mod_counter_found; 2990 int even_pair_failed; 2991 char parity_type; 2992 2993 2994 /* check to see that we have enough 'live' components 2995 of this set. If so, we can configure it if necessary */ 2996 2997 num_rows = cset->ac->clabel->num_rows; 2998 num_cols = cset->ac->clabel->num_columns; 2999 parity_type = cset->ac->clabel->parityConfig; 3000 3001 /* XXX Check for duplicate components!?!?!? */ 3002 3003 /* Determine what the mod_counter is supposed to be for this set. */ 3004 3005 mod_counter_found = 0; 3006 mod_counter = 0; 3007 ac = cset->ac; 3008 while(ac!=NULL) { 3009 if (mod_counter_found==0) { 3010 mod_counter = ac->clabel->mod_counter; 3011 mod_counter_found = 1; 3012 } else { 3013 if (ac->clabel->mod_counter > mod_counter) { 3014 mod_counter = ac->clabel->mod_counter; 3015 } 3016 } 3017 ac = ac->next; 3018 } 3019 3020 num_missing = 0; 3021 auto_config = cset->ac; 3022 3023 for(r=0; r<num_rows; r++) { 3024 even_pair_failed = 0; 3025 for(c=0; c<num_cols; c++) { 3026 ac = auto_config; 3027 while(ac!=NULL) { 3028 if ((ac->clabel->row == r) && 3029 (ac->clabel->column == c) && 3030 (ac->clabel->mod_counter == mod_counter)) { 3031 /* it's this one... */ 3032 #if DEBUG 3033 printf("Found: %s at %d,%d\n", 3034 ac->devname,r,c); 3035 #endif 3036 break; 3037 } 3038 ac=ac->next; 3039 } 3040 if (ac==NULL) { 3041 /* Didn't find one here! */ 3042 /* special case for RAID 1, especially 3043 where there are more than 2 3044 components (where RAIDframe treats 3045 things a little differently :( ) */ 3046 if (parity_type == '1') { 3047 if (c%2 == 0) { /* even component */ 3048 even_pair_failed = 1; 3049 } else { /* odd component. If 3050 we're failed, and 3051 so is the even 3052 component, it's 3053 "Good Night, Charlie" */ 3054 if (even_pair_failed == 1) { 3055 return(0); 3056 } 3057 } 3058 } else { 3059 /* normal accounting */ 3060 num_missing++; 3061 } 3062 } 3063 if ((parity_type == '1') && (c%2 == 1)) { 3064 /* Just did an even component, and we didn't 3065 bail.. reset the even_pair_failed flag, 3066 and go on to the next component.... */ 3067 even_pair_failed = 0; 3068 } 3069 } 3070 } 3071 3072 clabel = cset->ac->clabel; 3073 3074 if (((clabel->parityConfig == '0') && (num_missing > 0)) || 3075 ((clabel->parityConfig == '4') && (num_missing > 1)) || 3076 ((clabel->parityConfig == '5') && (num_missing > 1))) { 3077 /* XXX this needs to be made *much* more general */ 3078 /* Too many failures */ 3079 return(0); 3080 } 3081 /* otherwise, all is well, and we've got enough to take a kick 3082 at autoconfiguring this set */ 3083 return(1); 3084 } 3085 3086 void 3087 rf_create_configuration(ac,config,raidPtr) 3088 RF_AutoConfig_t *ac; 3089 RF_Config_t *config; 3090 RF_Raid_t *raidPtr; 3091 { 3092 RF_ComponentLabel_t *clabel; 3093 int i; 3094 3095 clabel = ac->clabel; 3096 3097 /* 1. Fill in the common stuff */ 3098 config->numRow = clabel->num_rows; 3099 config->numCol = clabel->num_columns; 3100 config->numSpare = 0; /* XXX should this be set here? */ 3101 config->sectPerSU = clabel->sectPerSU; 3102 config->SUsPerPU = clabel->SUsPerPU; 3103 config->SUsPerRU = clabel->SUsPerRU; 3104 config->parityConfig = clabel->parityConfig; 3105 /* XXX... */ 3106 strcpy(config->diskQueueType,"fifo"); 3107 config->maxOutstandingDiskReqs = clabel->maxOutstanding; 3108 config->layoutSpecificSize = 0; /* XXX ?? */ 3109 3110 while(ac!=NULL) { 3111 /* row/col values will be in range due to the checks 3112 in reasonable_label() */ 3113 strcpy(config->devnames[ac->clabel->row][ac->clabel->column], 3114 ac->devname); 3115 ac = ac->next; 3116 } 3117 3118 for(i=0;i<RF_MAXDBGV;i++) { 3119 config->debugVars[i][0] = NULL; 3120 } 3121 } 3122 3123 int 3124 rf_set_autoconfig(raidPtr, new_value) 3125 RF_Raid_t *raidPtr; 3126 int new_value; 3127 { 3128 RF_ComponentLabel_t clabel; 3129 struct vnode *vp; 3130 dev_t dev; 3131 int row, column; 3132 3133 raidPtr->autoconfigure = new_value; 3134 for(row=0; row<raidPtr->numRow; row++) { 3135 for(column=0; column<raidPtr->numCol; column++) { 3136 if (raidPtr->Disks[row][column].status == 3137 rf_ds_optimal) { 3138 dev = raidPtr->Disks[row][column].dev; 3139 vp = raidPtr->raid_cinfo[row][column].ci_vp; 3140 raidread_component_label(dev, vp, &clabel); 3141 clabel.autoconfigure = new_value; 3142 raidwrite_component_label(dev, vp, &clabel); 3143 } 3144 } 3145 } 3146 return(new_value); 3147 } 3148 3149 int 3150 rf_set_rootpartition(raidPtr, new_value) 3151 RF_Raid_t *raidPtr; 3152 int new_value; 3153 { 3154 RF_ComponentLabel_t clabel; 3155 struct vnode *vp; 3156 dev_t dev; 3157 int row, column; 3158 3159 raidPtr->root_partition = new_value; 3160 for(row=0; row<raidPtr->numRow; row++) { 3161 for(column=0; column<raidPtr->numCol; column++) { 3162 if (raidPtr->Disks[row][column].status == 3163 rf_ds_optimal) { 3164 dev = raidPtr->Disks[row][column].dev; 3165 vp = raidPtr->raid_cinfo[row][column].ci_vp; 3166 raidread_component_label(dev, vp, &clabel); 3167 clabel.root_partition = new_value; 3168 raidwrite_component_label(dev, vp, &clabel); 3169 } 3170 } 3171 } 3172 return(new_value); 3173 } 3174 3175 void 3176 rf_release_all_vps(cset) 3177 RF_ConfigSet_t *cset; 3178 { 3179 RF_AutoConfig_t *ac; 3180 3181 ac = cset->ac; 3182 while(ac!=NULL) { 3183 /* Close the vp, and give it back */ 3184 if (ac->vp) { 3185 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY); 3186 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0); 3187 vput(ac->vp); 3188 ac->vp = NULL; 3189 } 3190 ac = ac->next; 3191 } 3192 } 3193 3194 3195 void 3196 rf_cleanup_config_set(cset) 3197 RF_ConfigSet_t *cset; 3198 { 3199 RF_AutoConfig_t *ac; 3200 RF_AutoConfig_t *next_ac; 3201 3202 ac = cset->ac; 3203 while(ac!=NULL) { 3204 next_ac = ac->next; 3205 /* nuke the label */ 3206 free(ac->clabel, M_RAIDFRAME); 3207 /* cleanup the config structure */ 3208 free(ac, M_RAIDFRAME); 3209 /* "next.." */ 3210 ac = next_ac; 3211 } 3212 /* and, finally, nuke the config set */ 3213 free(cset, M_RAIDFRAME); 3214 } 3215 3216 3217 void 3218 raid_init_component_label(raidPtr, clabel) 3219 RF_Raid_t *raidPtr; 3220 RF_ComponentLabel_t *clabel; 3221 { 3222 /* current version number */ 3223 clabel->version = RF_COMPONENT_LABEL_VERSION; 3224 clabel->serial_number = raidPtr->serial_number; 3225 clabel->mod_counter = raidPtr->mod_counter; 3226 clabel->num_rows = raidPtr->numRow; 3227 clabel->num_columns = raidPtr->numCol; 3228 clabel->clean = RF_RAID_DIRTY; /* not clean */ 3229 clabel->status = rf_ds_optimal; /* "It's good!" */ 3230 3231 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; 3232 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU; 3233 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU; 3234 3235 clabel->blockSize = raidPtr->bytesPerSector; 3236 clabel->numBlocks = raidPtr->sectorsPerDisk; 3237 3238 /* XXX not portable */ 3239 clabel->parityConfig = raidPtr->Layout.map->parityConfig; 3240 clabel->maxOutstanding = raidPtr->maxOutstanding; 3241 clabel->autoconfigure = raidPtr->autoconfigure; 3242 clabel->root_partition = raidPtr->root_partition; 3243 clabel->last_unit = raidPtr->raidid; 3244 clabel->config_order = raidPtr->config_order; 3245 } 3246 3247 int 3248 rf_auto_config_set(cset,unit) 3249 RF_ConfigSet_t *cset; 3250 int *unit; 3251 { 3252 RF_Raid_t *raidPtr; 3253 RF_Config_t *config; 3254 int raidID; 3255 int retcode; 3256 3257 printf("RAID autoconfigure\n"); 3258 3259 retcode = 0; 3260 *unit = -1; 3261 3262 /* 1. Create a config structure */ 3263 3264 config = (RF_Config_t *)malloc(sizeof(RF_Config_t), 3265 M_RAIDFRAME, 3266 M_NOWAIT); 3267 if (config==NULL) { 3268 printf("Out of mem!?!?\n"); 3269 /* XXX do something more intelligent here. */ 3270 return(1); 3271 } 3272 3273 memset(config, 0, sizeof(RF_Config_t)); 3274 3275 /* XXX raidID needs to be set correctly.. */ 3276 3277 /* 3278 2. Figure out what RAID ID this one is supposed to live at 3279 See if we can get the same RAID dev that it was configured 3280 on last time.. 3281 */ 3282 3283 raidID = cset->ac->clabel->last_unit; 3284 if ((raidID < 0) || (raidID >= numraid)) { 3285 /* let's not wander off into lala land. */ 3286 raidID = numraid - 1; 3287 } 3288 if (raidPtrs[raidID]->valid != 0) { 3289 3290 /* 3291 Nope... Go looking for an alternative... 3292 Start high so we don't immediately use raid0 if that's 3293 not taken. 3294 */ 3295 3296 for(raidID = numraid; raidID >= 0; raidID--) { 3297 if (raidPtrs[raidID]->valid == 0) { 3298 /* can use this one! */ 3299 break; 3300 } 3301 } 3302 } 3303 3304 if (raidID < 0) { 3305 /* punt... */ 3306 printf("Unable to auto configure this set!\n"); 3307 printf("(Out of RAID devs!)\n"); 3308 return(1); 3309 } 3310 printf("Configuring raid%d:\n",raidID); 3311 raidPtr = raidPtrs[raidID]; 3312 3313 /* XXX all this stuff should be done SOMEWHERE ELSE! */ 3314 raidPtr->raidid = raidID; 3315 raidPtr->openings = RAIDOUTSTANDING; 3316 3317 /* 3. Build the configuration structure */ 3318 rf_create_configuration(cset->ac, config, raidPtr); 3319 3320 /* 4. Do the configuration */ 3321 retcode = rf_Configure(raidPtr, config, cset->ac); 3322 3323 if (retcode == 0) { 3324 3325 raidinit(raidPtrs[raidID]); 3326 3327 rf_markalldirty(raidPtrs[raidID]); 3328 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */ 3329 if (cset->ac->clabel->root_partition==1) { 3330 /* everything configured just fine. Make a note 3331 that this set is eligible to be root. */ 3332 cset->rootable = 1; 3333 /* XXX do this here? */ 3334 raidPtrs[raidID]->root_partition = 1; 3335 } 3336 } 3337 3338 /* 5. Cleanup */ 3339 free(config, M_RAIDFRAME); 3340 3341 *unit = raidID; 3342 return(retcode); 3343 } 3344 3345 void 3346 rf_disk_unbusy(desc) 3347 RF_RaidAccessDesc_t *desc; 3348 { 3349 struct buf *bp; 3350 3351 bp = (struct buf *)desc->bp; 3352 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev, 3353 (bp->b_bcount - bp->b_resid)); 3354 } 3355