1 /* $NetBSD: rf_netbsdkintf.c,v 1.130 2002/09/06 13:18:43 gehenna Exp $ */ 2 /*- 3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Greg Oster; Jason R. Thorpe. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the NetBSD 20 * Foundation, Inc. and its contributors. 21 * 4. Neither the name of The NetBSD Foundation nor the names of its 22 * contributors may be used to endorse or promote products derived 23 * from this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38 /* 39 * Copyright (c) 1988 University of Utah. 40 * Copyright (c) 1990, 1993 41 * The Regents of the University of California. All rights reserved. 42 * 43 * This code is derived from software contributed to Berkeley by 44 * the Systems Programming Group of the University of Utah Computer 45 * Science Department. 46 * 47 * Redistribution and use in source and binary forms, with or without 48 * modification, are permitted provided that the following conditions 49 * are met: 50 * 1. Redistributions of source code must retain the above copyright 51 * notice, this list of conditions and the following disclaimer. 52 * 2. Redistributions in binary form must reproduce the above copyright 53 * notice, this list of conditions and the following disclaimer in the 54 * documentation and/or other materials provided with the distribution. 55 * 3. All advertising materials mentioning features or use of this software 56 * must display the following acknowledgement: 57 * This product includes software developed by the University of 58 * California, Berkeley and its contributors. 59 * 4. Neither the name of the University nor the names of its contributors 60 * may be used to endorse or promote products derived from this software 61 * without specific prior written permission. 62 * 63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 73 * SUCH DAMAGE. 74 * 75 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 76 * 77 * @(#)cd.c 8.2 (Berkeley) 11/16/93 78 */ 79 80 81 82 83 /* 84 * Copyright (c) 1995 Carnegie-Mellon University. 85 * All rights reserved. 86 * 87 * Authors: Mark Holland, Jim Zelenka 88 * 89 * Permission to use, copy, modify and distribute this software and 90 * its documentation is hereby granted, provided that both the copyright 91 * notice and this permission notice appear in all copies of the 92 * software, derivative works or modified versions, and any portions 93 * thereof, and that both notices appear in supporting documentation. 94 * 95 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 96 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 97 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 98 * 99 * Carnegie Mellon requests users of this software to return to 100 * 101 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 102 * School of Computer Science 103 * Carnegie Mellon University 104 * Pittsburgh PA 15213-3890 105 * 106 * any improvements or extensions that they make and grant Carnegie the 107 * rights to redistribute these changes. 108 */ 109 110 /*********************************************************** 111 * 112 * rf_kintf.c -- the kernel interface routines for RAIDframe 113 * 114 ***********************************************************/ 115 116 #include <sys/cdefs.h> 117 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.130 2002/09/06 13:18:43 gehenna Exp $"); 118 119 #include <sys/param.h> 120 #include <sys/errno.h> 121 #include <sys/pool.h> 122 #include <sys/queue.h> 123 #include <sys/disk.h> 124 #include <sys/device.h> 125 #include <sys/stat.h> 126 #include <sys/ioctl.h> 127 #include <sys/fcntl.h> 128 #include <sys/systm.h> 129 #include <sys/namei.h> 130 #include <sys/vnode.h> 131 #include <sys/disklabel.h> 132 #include <sys/conf.h> 133 #include <sys/lock.h> 134 #include <sys/buf.h> 135 #include <sys/user.h> 136 #include <sys/reboot.h> 137 138 #include <dev/raidframe/raidframevar.h> 139 #include <dev/raidframe/raidframeio.h> 140 #include "raid.h" 141 #include "opt_raid_autoconfig.h" 142 #include "rf_raid.h" 143 #include "rf_copyback.h" 144 #include "rf_dag.h" 145 #include "rf_dagflags.h" 146 #include "rf_desc.h" 147 #include "rf_diskqueue.h" 148 #include "rf_etimer.h" 149 #include "rf_general.h" 150 #include "rf_kintf.h" 151 #include "rf_options.h" 152 #include "rf_driver.h" 153 #include "rf_parityscan.h" 154 #include "rf_threadstuff.h" 155 156 int rf_kdebug_level = 0; 157 158 #ifdef DEBUG 159 #define db1_printf(a) if (rf_kdebug_level > 0) printf a 160 #else /* DEBUG */ 161 #define db1_printf(a) { } 162 #endif /* DEBUG */ 163 164 static RF_Raid_t **raidPtrs; /* global raid device descriptors */ 165 166 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex) 167 168 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a 169 * spare table */ 170 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from 171 * installation process */ 172 173 /* prototypes */ 174 static void KernelWakeupFunc(struct buf * bp); 175 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag, 176 dev_t dev, RF_SectorNum_t startSect, 177 RF_SectorCount_t numSect, caddr_t buf, 178 void (*cbFunc) (struct buf *), void *cbArg, 179 int logBytesPerSector, struct proc * b_proc); 180 static void raidinit(RF_Raid_t *); 181 182 void raidattach(int); 183 184 dev_type_open(raidopen); 185 dev_type_close(raidclose); 186 dev_type_read(raidread); 187 dev_type_write(raidwrite); 188 dev_type_ioctl(raidioctl); 189 dev_type_strategy(raidstrategy); 190 dev_type_dump(raiddump); 191 dev_type_size(raidsize); 192 193 const struct bdevsw raid_bdevsw = { 194 raidopen, raidclose, raidstrategy, raidioctl, 195 raiddump, raidsize, D_DISK 196 }; 197 198 const struct cdevsw raid_cdevsw = { 199 raidopen, raidclose, raidread, raidwrite, raidioctl, 200 nostop, notty, nopoll, nommap, D_DISK 201 }; 202 203 /* 204 * Pilfered from ccd.c 205 */ 206 207 struct raidbuf { 208 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */ 209 struct buf *rf_obp; /* ptr. to original I/O buf */ 210 int rf_flags; /* misc. flags */ 211 RF_DiskQueueData_t *req;/* the request that this was part of.. */ 212 }; 213 214 /* component buffer pool */ 215 struct pool raidframe_cbufpool; 216 217 #define RAIDGETBUF(rs) pool_get(&raidframe_cbufpool, PR_NOWAIT) 218 #define RAIDPUTBUF(rs, cbp) pool_put(&raidframe_cbufpool, cbp) 219 220 /* XXX Not sure if the following should be replacing the raidPtrs above, 221 or if it should be used in conjunction with that... 222 */ 223 224 struct raid_softc { 225 int sc_flags; /* flags */ 226 int sc_cflags; /* configuration flags */ 227 size_t sc_size; /* size of the raid device */ 228 char sc_xname[20]; /* XXX external name */ 229 struct disk sc_dkdev; /* generic disk device info */ 230 struct bufq_state buf_queue; /* used for the device queue */ 231 }; 232 /* sc_flags */ 233 #define RAIDF_INITED 0x01 /* unit has been initialized */ 234 #define RAIDF_WLABEL 0x02 /* label area is writable */ 235 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */ 236 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */ 237 #define RAIDF_LOCKED 0x80 /* unit is locked */ 238 239 #define raidunit(x) DISKUNIT(x) 240 int numraid = 0; 241 242 /* 243 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device. 244 * Be aware that large numbers can allow the driver to consume a lot of 245 * kernel memory, especially on writes, and in degraded mode reads. 246 * 247 * For example: with a stripe width of 64 blocks (32k) and 5 disks, 248 * a single 64K write will typically require 64K for the old data, 249 * 64K for the old parity, and 64K for the new parity, for a total 250 * of 192K (if the parity buffer is not re-used immediately). 251 * Even it if is used immediately, that's still 128K, which when multiplied 252 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data. 253 * 254 * Now in degraded mode, for example, a 64K read on the above setup may 255 * require data reconstruction, which will require *all* of the 4 remaining 256 * disks to participate -- 4 * 32K/disk == 128K again. 257 */ 258 259 #ifndef RAIDOUTSTANDING 260 #define RAIDOUTSTANDING 6 261 #endif 262 263 #define RAIDLABELDEV(dev) \ 264 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART)) 265 266 /* declared here, and made public, for the benefit of KVM stuff.. */ 267 struct raid_softc *raid_softc; 268 269 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *, 270 struct disklabel *); 271 static void raidgetdisklabel(dev_t); 272 static void raidmakedisklabel(struct raid_softc *); 273 274 static int raidlock(struct raid_softc *); 275 static void raidunlock(struct raid_softc *); 276 277 static void rf_markalldirty(RF_Raid_t *); 278 void rf_mountroot_hook(struct device *); 279 280 struct device *raidrootdev; 281 282 void rf_ReconThread(struct rf_recon_req *); 283 /* XXX what I want is: */ 284 /*void rf_ReconThread(RF_Raid_t *raidPtr); */ 285 void rf_RewriteParityThread(RF_Raid_t *raidPtr); 286 void rf_CopybackThread(RF_Raid_t *raidPtr); 287 void rf_ReconstructInPlaceThread(struct rf_recon_req *); 288 void rf_buildroothack(void *); 289 290 RF_AutoConfig_t *rf_find_raid_components(void); 291 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *); 292 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *); 293 static int rf_reasonable_label(RF_ComponentLabel_t *); 294 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *); 295 int rf_set_autoconfig(RF_Raid_t *, int); 296 int rf_set_rootpartition(RF_Raid_t *, int); 297 void rf_release_all_vps(RF_ConfigSet_t *); 298 void rf_cleanup_config_set(RF_ConfigSet_t *); 299 int rf_have_enough_components(RF_ConfigSet_t *); 300 int rf_auto_config_set(RF_ConfigSet_t *, int *); 301 302 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not 303 allow autoconfig to take place. 304 Note that this is overridden by having 305 RAID_AUTOCONFIG as an option in the 306 kernel config file. */ 307 308 void 309 raidattach(num) 310 int num; 311 { 312 int raidID; 313 int i, rc; 314 RF_AutoConfig_t *ac_list; /* autoconfig list */ 315 RF_ConfigSet_t *config_sets; 316 317 #ifdef DEBUG 318 printf("raidattach: Asked for %d units\n", num); 319 #endif 320 321 if (num <= 0) { 322 #ifdef DIAGNOSTIC 323 panic("raidattach: count <= 0"); 324 #endif 325 return; 326 } 327 /* This is where all the initialization stuff gets done. */ 328 329 numraid = num; 330 331 /* Make some space for requested number of units... */ 332 333 RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **)); 334 if (raidPtrs == NULL) { 335 panic("raidPtrs is NULL!!\n"); 336 } 337 338 /* Initialize the component buffer pool. */ 339 pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0, 340 0, 0, "raidpl", NULL); 341 342 rc = rf_mutex_init(&rf_sparet_wait_mutex); 343 if (rc) { 344 RF_PANIC(); 345 } 346 347 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL; 348 349 for (i = 0; i < num; i++) 350 raidPtrs[i] = NULL; 351 rc = rf_BootRaidframe(); 352 if (rc == 0) 353 printf("Kernelized RAIDframe activated\n"); 354 else 355 panic("Serious error booting RAID!!\n"); 356 357 /* put together some datastructures like the CCD device does.. This 358 * lets us lock the device and what-not when it gets opened. */ 359 360 raid_softc = (struct raid_softc *) 361 malloc(num * sizeof(struct raid_softc), 362 M_RAIDFRAME, M_NOWAIT); 363 if (raid_softc == NULL) { 364 printf("WARNING: no memory for RAIDframe driver\n"); 365 return; 366 } 367 368 memset(raid_softc, 0, num * sizeof(struct raid_softc)); 369 370 raidrootdev = (struct device *)malloc(num * sizeof(struct device), 371 M_RAIDFRAME, M_NOWAIT); 372 if (raidrootdev == NULL) { 373 panic("No memory for RAIDframe driver!!?!?!\n"); 374 } 375 376 for (raidID = 0; raidID < num; raidID++) { 377 bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS); 378 379 raidrootdev[raidID].dv_class = DV_DISK; 380 raidrootdev[raidID].dv_cfdata = NULL; 381 raidrootdev[raidID].dv_unit = raidID; 382 raidrootdev[raidID].dv_parent = NULL; 383 raidrootdev[raidID].dv_flags = 0; 384 sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID); 385 386 RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t), 387 (RF_Raid_t *)); 388 if (raidPtrs[raidID] == NULL) { 389 printf("WARNING: raidPtrs[%d] is NULL\n", raidID); 390 numraid = raidID; 391 return; 392 } 393 } 394 395 #ifdef RAID_AUTOCONFIG 396 raidautoconfig = 1; 397 #endif 398 399 if (raidautoconfig) { 400 /* 1. locate all RAID components on the system */ 401 402 #if DEBUG 403 printf("Searching for raid components...\n"); 404 #endif 405 ac_list = rf_find_raid_components(); 406 407 /* 2. sort them into their respective sets */ 408 409 config_sets = rf_create_auto_sets(ac_list); 410 411 /* 3. evaluate each set and configure the valid ones 412 This gets done in rf_buildroothack() */ 413 414 /* schedule the creation of the thread to do the 415 "/ on RAID" stuff */ 416 417 kthread_create(rf_buildroothack,config_sets); 418 419 #if 0 420 mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]); 421 #endif 422 } 423 424 } 425 426 void 427 rf_buildroothack(arg) 428 void *arg; 429 { 430 RF_ConfigSet_t *config_sets = arg; 431 RF_ConfigSet_t *cset; 432 RF_ConfigSet_t *next_cset; 433 int retcode; 434 int raidID; 435 int rootID; 436 int num_root; 437 438 rootID = 0; 439 num_root = 0; 440 cset = config_sets; 441 while(cset != NULL ) { 442 next_cset = cset->next; 443 if (rf_have_enough_components(cset) && 444 cset->ac->clabel->autoconfigure==1) { 445 retcode = rf_auto_config_set(cset,&raidID); 446 if (!retcode) { 447 if (cset->rootable) { 448 rootID = raidID; 449 num_root++; 450 } 451 } else { 452 /* The autoconfig didn't work :( */ 453 #if DEBUG 454 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID); 455 #endif 456 rf_release_all_vps(cset); 457 } 458 } else { 459 /* we're not autoconfiguring this set... 460 release the associated resources */ 461 rf_release_all_vps(cset); 462 } 463 /* cleanup */ 464 rf_cleanup_config_set(cset); 465 cset = next_cset; 466 } 467 468 /* we found something bootable... */ 469 470 if (num_root == 1) { 471 booted_device = &raidrootdev[rootID]; 472 } else if (num_root > 1) { 473 /* we can't guess.. require the user to answer... */ 474 boothowto |= RB_ASKNAME; 475 } 476 } 477 478 479 int 480 raidsize(dev) 481 dev_t dev; 482 { 483 struct raid_softc *rs; 484 struct disklabel *lp; 485 int part, unit, omask, size; 486 487 unit = raidunit(dev); 488 if (unit >= numraid) 489 return (-1); 490 rs = &raid_softc[unit]; 491 492 if ((rs->sc_flags & RAIDF_INITED) == 0) 493 return (-1); 494 495 part = DISKPART(dev); 496 omask = rs->sc_dkdev.dk_openmask & (1 << part); 497 lp = rs->sc_dkdev.dk_label; 498 499 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc)) 500 return (-1); 501 502 if (lp->d_partitions[part].p_fstype != FS_SWAP) 503 size = -1; 504 else 505 size = lp->d_partitions[part].p_size * 506 (lp->d_secsize / DEV_BSIZE); 507 508 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc)) 509 return (-1); 510 511 return (size); 512 513 } 514 515 int 516 raiddump(dev, blkno, va, size) 517 dev_t dev; 518 daddr_t blkno; 519 caddr_t va; 520 size_t size; 521 { 522 /* Not implemented. */ 523 return ENXIO; 524 } 525 /* ARGSUSED */ 526 int 527 raidopen(dev, flags, fmt, p) 528 dev_t dev; 529 int flags, fmt; 530 struct proc *p; 531 { 532 int unit = raidunit(dev); 533 struct raid_softc *rs; 534 struct disklabel *lp; 535 int part, pmask; 536 int error = 0; 537 538 if (unit >= numraid) 539 return (ENXIO); 540 rs = &raid_softc[unit]; 541 542 if ((error = raidlock(rs)) != 0) 543 return (error); 544 lp = rs->sc_dkdev.dk_label; 545 546 part = DISKPART(dev); 547 pmask = (1 << part); 548 549 db1_printf(("Opening raid device number: %d partition: %d\n", 550 unit, part)); 551 552 553 if ((rs->sc_flags & RAIDF_INITED) && 554 (rs->sc_dkdev.dk_openmask == 0)) 555 raidgetdisklabel(dev); 556 557 /* make sure that this partition exists */ 558 559 if (part != RAW_PART) { 560 db1_printf(("Not a raw partition..\n")); 561 if (((rs->sc_flags & RAIDF_INITED) == 0) || 562 ((part >= lp->d_npartitions) || 563 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 564 error = ENXIO; 565 raidunlock(rs); 566 db1_printf(("Bailing out...\n")); 567 return (error); 568 } 569 } 570 /* Prevent this unit from being unconfigured while open. */ 571 switch (fmt) { 572 case S_IFCHR: 573 rs->sc_dkdev.dk_copenmask |= pmask; 574 break; 575 576 case S_IFBLK: 577 rs->sc_dkdev.dk_bopenmask |= pmask; 578 break; 579 } 580 581 if ((rs->sc_dkdev.dk_openmask == 0) && 582 ((rs->sc_flags & RAIDF_INITED) != 0)) { 583 /* First one... mark things as dirty... Note that we *MUST* 584 have done a configure before this. I DO NOT WANT TO BE 585 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED 586 THAT THEY BELONG TOGETHER!!!!! */ 587 /* XXX should check to see if we're only open for reading 588 here... If so, we needn't do this, but then need some 589 other way of keeping track of what's happened.. */ 590 591 rf_markalldirty( raidPtrs[unit] ); 592 } 593 594 595 rs->sc_dkdev.dk_openmask = 596 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 597 598 raidunlock(rs); 599 600 return (error); 601 602 603 } 604 /* ARGSUSED */ 605 int 606 raidclose(dev, flags, fmt, p) 607 dev_t dev; 608 int flags, fmt; 609 struct proc *p; 610 { 611 int unit = raidunit(dev); 612 struct raid_softc *rs; 613 int error = 0; 614 int part; 615 616 if (unit >= numraid) 617 return (ENXIO); 618 rs = &raid_softc[unit]; 619 620 if ((error = raidlock(rs)) != 0) 621 return (error); 622 623 part = DISKPART(dev); 624 625 /* ...that much closer to allowing unconfiguration... */ 626 switch (fmt) { 627 case S_IFCHR: 628 rs->sc_dkdev.dk_copenmask &= ~(1 << part); 629 break; 630 631 case S_IFBLK: 632 rs->sc_dkdev.dk_bopenmask &= ~(1 << part); 633 break; 634 } 635 rs->sc_dkdev.dk_openmask = 636 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 637 638 if ((rs->sc_dkdev.dk_openmask == 0) && 639 ((rs->sc_flags & RAIDF_INITED) != 0)) { 640 /* Last one... device is not unconfigured yet. 641 Device shutdown has taken care of setting the 642 clean bits if RAIDF_INITED is not set 643 mark things as clean... */ 644 #if 0 645 printf("Last one on raid%d. Updating status.\n",unit); 646 #endif 647 rf_update_component_labels(raidPtrs[unit], 648 RF_FINAL_COMPONENT_UPDATE); 649 if (doing_shutdown) { 650 /* last one, and we're going down, so 651 lights out for this RAID set too. */ 652 error = rf_Shutdown(raidPtrs[unit]); 653 654 /* It's no longer initialized... */ 655 rs->sc_flags &= ~RAIDF_INITED; 656 657 /* Detach the disk. */ 658 disk_detach(&rs->sc_dkdev); 659 } 660 } 661 662 raidunlock(rs); 663 return (0); 664 665 } 666 667 void 668 raidstrategy(bp) 669 struct buf *bp; 670 { 671 int s; 672 673 unsigned int raidID = raidunit(bp->b_dev); 674 RF_Raid_t *raidPtr; 675 struct raid_softc *rs = &raid_softc[raidID]; 676 struct disklabel *lp; 677 int wlabel; 678 679 if ((rs->sc_flags & RAIDF_INITED) ==0) { 680 bp->b_error = ENXIO; 681 bp->b_flags |= B_ERROR; 682 bp->b_resid = bp->b_bcount; 683 biodone(bp); 684 return; 685 } 686 if (raidID >= numraid || !raidPtrs[raidID]) { 687 bp->b_error = ENODEV; 688 bp->b_flags |= B_ERROR; 689 bp->b_resid = bp->b_bcount; 690 biodone(bp); 691 return; 692 } 693 raidPtr = raidPtrs[raidID]; 694 if (!raidPtr->valid) { 695 bp->b_error = ENODEV; 696 bp->b_flags |= B_ERROR; 697 bp->b_resid = bp->b_bcount; 698 biodone(bp); 699 return; 700 } 701 if (bp->b_bcount == 0) { 702 db1_printf(("b_bcount is zero..\n")); 703 biodone(bp); 704 return; 705 } 706 lp = rs->sc_dkdev.dk_label; 707 708 /* 709 * Do bounds checking and adjust transfer. If there's an 710 * error, the bounds check will flag that for us. 711 */ 712 713 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING); 714 if (DISKPART(bp->b_dev) != RAW_PART) 715 if (bounds_check_with_label(bp, lp, wlabel) <= 0) { 716 db1_printf(("Bounds check failed!!:%d %d\n", 717 (int) bp->b_blkno, (int) wlabel)); 718 biodone(bp); 719 return; 720 } 721 s = splbio(); 722 723 bp->b_resid = 0; 724 725 /* stuff it onto our queue */ 726 BUFQ_PUT(&rs->buf_queue, bp); 727 728 raidstart(raidPtrs[raidID]); 729 730 splx(s); 731 } 732 /* ARGSUSED */ 733 int 734 raidread(dev, uio, flags) 735 dev_t dev; 736 struct uio *uio; 737 int flags; 738 { 739 int unit = raidunit(dev); 740 struct raid_softc *rs; 741 int part; 742 743 if (unit >= numraid) 744 return (ENXIO); 745 rs = &raid_softc[unit]; 746 747 if ((rs->sc_flags & RAIDF_INITED) == 0) 748 return (ENXIO); 749 part = DISKPART(dev); 750 751 db1_printf(("raidread: unit: %d partition: %d\n", unit, part)); 752 753 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio)); 754 755 } 756 /* ARGSUSED */ 757 int 758 raidwrite(dev, uio, flags) 759 dev_t dev; 760 struct uio *uio; 761 int flags; 762 { 763 int unit = raidunit(dev); 764 struct raid_softc *rs; 765 766 if (unit >= numraid) 767 return (ENXIO); 768 rs = &raid_softc[unit]; 769 770 if ((rs->sc_flags & RAIDF_INITED) == 0) 771 return (ENXIO); 772 db1_printf(("raidwrite\n")); 773 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio)); 774 775 } 776 777 int 778 raidioctl(dev, cmd, data, flag, p) 779 dev_t dev; 780 u_long cmd; 781 caddr_t data; 782 int flag; 783 struct proc *p; 784 { 785 int unit = raidunit(dev); 786 int error = 0; 787 int part, pmask; 788 struct raid_softc *rs; 789 RF_Config_t *k_cfg, *u_cfg; 790 RF_Raid_t *raidPtr; 791 RF_RaidDisk_t *diskPtr; 792 RF_AccTotals_t *totals; 793 RF_DeviceConfig_t *d_cfg, **ucfgp; 794 u_char *specific_buf; 795 int retcode = 0; 796 int row; 797 int column; 798 int raidid; 799 struct rf_recon_req *rrcopy, *rr; 800 RF_ComponentLabel_t *clabel; 801 RF_ComponentLabel_t ci_label; 802 RF_ComponentLabel_t **clabel_ptr; 803 RF_SingleComponent_t *sparePtr,*componentPtr; 804 RF_SingleComponent_t hot_spare; 805 RF_SingleComponent_t component; 806 RF_ProgressInfo_t progressInfo, **progressInfoPtr; 807 int i, j, d; 808 #ifdef __HAVE_OLD_DISKLABEL 809 struct disklabel newlabel; 810 #endif 811 812 if (unit >= numraid) 813 return (ENXIO); 814 rs = &raid_softc[unit]; 815 raidPtr = raidPtrs[unit]; 816 817 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev, 818 (int) DISKPART(dev), (int) unit, (int) cmd)); 819 820 /* Must be open for writes for these commands... */ 821 switch (cmd) { 822 case DIOCSDINFO: 823 case DIOCWDINFO: 824 #ifdef __HAVE_OLD_DISKLABEL 825 case ODIOCWDINFO: 826 case ODIOCSDINFO: 827 #endif 828 case DIOCWLABEL: 829 if ((flag & FWRITE) == 0) 830 return (EBADF); 831 } 832 833 /* Must be initialized for these... */ 834 switch (cmd) { 835 case DIOCGDINFO: 836 case DIOCSDINFO: 837 case DIOCWDINFO: 838 #ifdef __HAVE_OLD_DISKLABEL 839 case ODIOCGDINFO: 840 case ODIOCWDINFO: 841 case ODIOCSDINFO: 842 case ODIOCGDEFLABEL: 843 #endif 844 case DIOCGPART: 845 case DIOCWLABEL: 846 case DIOCGDEFLABEL: 847 case RAIDFRAME_SHUTDOWN: 848 case RAIDFRAME_REWRITEPARITY: 849 case RAIDFRAME_GET_INFO: 850 case RAIDFRAME_RESET_ACCTOTALS: 851 case RAIDFRAME_GET_ACCTOTALS: 852 case RAIDFRAME_KEEP_ACCTOTALS: 853 case RAIDFRAME_GET_SIZE: 854 case RAIDFRAME_FAIL_DISK: 855 case RAIDFRAME_COPYBACK: 856 case RAIDFRAME_CHECK_RECON_STATUS: 857 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 858 case RAIDFRAME_GET_COMPONENT_LABEL: 859 case RAIDFRAME_SET_COMPONENT_LABEL: 860 case RAIDFRAME_ADD_HOT_SPARE: 861 case RAIDFRAME_REMOVE_HOT_SPARE: 862 case RAIDFRAME_INIT_LABELS: 863 case RAIDFRAME_REBUILD_IN_PLACE: 864 case RAIDFRAME_CHECK_PARITY: 865 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 866 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 867 case RAIDFRAME_CHECK_COPYBACK_STATUS: 868 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 869 case RAIDFRAME_SET_AUTOCONFIG: 870 case RAIDFRAME_SET_ROOT: 871 case RAIDFRAME_DELETE_COMPONENT: 872 case RAIDFRAME_INCORPORATE_HOT_SPARE: 873 if ((rs->sc_flags & RAIDF_INITED) == 0) 874 return (ENXIO); 875 } 876 877 switch (cmd) { 878 879 /* configure the system */ 880 case RAIDFRAME_CONFIGURE: 881 882 if (raidPtr->valid) { 883 /* There is a valid RAID set running on this unit! */ 884 printf("raid%d: Device already configured!\n",unit); 885 return(EINVAL); 886 } 887 888 /* copy-in the configuration information */ 889 /* data points to a pointer to the configuration structure */ 890 891 u_cfg = *((RF_Config_t **) data); 892 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *)); 893 if (k_cfg == NULL) { 894 return (ENOMEM); 895 } 896 retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg, 897 sizeof(RF_Config_t)); 898 if (retcode) { 899 RF_Free(k_cfg, sizeof(RF_Config_t)); 900 db1_printf(("rf_ioctl: retcode=%d copyin.1\n", 901 retcode)); 902 return (retcode); 903 } 904 /* allocate a buffer for the layout-specific data, and copy it 905 * in */ 906 if (k_cfg->layoutSpecificSize) { 907 if (k_cfg->layoutSpecificSize > 10000) { 908 /* sanity check */ 909 RF_Free(k_cfg, sizeof(RF_Config_t)); 910 return (EINVAL); 911 } 912 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize, 913 (u_char *)); 914 if (specific_buf == NULL) { 915 RF_Free(k_cfg, sizeof(RF_Config_t)); 916 return (ENOMEM); 917 } 918 retcode = copyin(k_cfg->layoutSpecific, 919 (caddr_t) specific_buf, 920 k_cfg->layoutSpecificSize); 921 if (retcode) { 922 RF_Free(k_cfg, sizeof(RF_Config_t)); 923 RF_Free(specific_buf, 924 k_cfg->layoutSpecificSize); 925 db1_printf(("rf_ioctl: retcode=%d copyin.2\n", 926 retcode)); 927 return (retcode); 928 } 929 } else 930 specific_buf = NULL; 931 k_cfg->layoutSpecific = specific_buf; 932 933 /* should do some kind of sanity check on the configuration. 934 * Store the sum of all the bytes in the last byte? */ 935 936 /* configure the system */ 937 938 /* 939 * Clear the entire RAID descriptor, just to make sure 940 * there is no stale data left in the case of a 941 * reconfiguration 942 */ 943 memset((char *) raidPtr, 0, sizeof(RF_Raid_t)); 944 raidPtr->raidid = unit; 945 946 retcode = rf_Configure(raidPtr, k_cfg, NULL); 947 948 if (retcode == 0) { 949 950 /* allow this many simultaneous IO's to 951 this RAID device */ 952 raidPtr->openings = RAIDOUTSTANDING; 953 954 raidinit(raidPtr); 955 rf_markalldirty(raidPtr); 956 } 957 /* free the buffers. No return code here. */ 958 if (k_cfg->layoutSpecificSize) { 959 RF_Free(specific_buf, k_cfg->layoutSpecificSize); 960 } 961 RF_Free(k_cfg, sizeof(RF_Config_t)); 962 963 return (retcode); 964 965 /* shutdown the system */ 966 case RAIDFRAME_SHUTDOWN: 967 968 if ((error = raidlock(rs)) != 0) 969 return (error); 970 971 /* 972 * If somebody has a partition mounted, we shouldn't 973 * shutdown. 974 */ 975 976 part = DISKPART(dev); 977 pmask = (1 << part); 978 if ((rs->sc_dkdev.dk_openmask & ~pmask) || 979 ((rs->sc_dkdev.dk_bopenmask & pmask) && 980 (rs->sc_dkdev.dk_copenmask & pmask))) { 981 raidunlock(rs); 982 return (EBUSY); 983 } 984 985 retcode = rf_Shutdown(raidPtr); 986 987 /* It's no longer initialized... */ 988 rs->sc_flags &= ~RAIDF_INITED; 989 990 /* Detach the disk. */ 991 disk_detach(&rs->sc_dkdev); 992 993 raidunlock(rs); 994 995 return (retcode); 996 case RAIDFRAME_GET_COMPONENT_LABEL: 997 clabel_ptr = (RF_ComponentLabel_t **) data; 998 /* need to read the component label for the disk indicated 999 by row,column in clabel */ 1000 1001 /* For practice, let's get it directly fromdisk, rather 1002 than from the in-core copy */ 1003 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ), 1004 (RF_ComponentLabel_t *)); 1005 if (clabel == NULL) 1006 return (ENOMEM); 1007 1008 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t)); 1009 1010 retcode = copyin( *clabel_ptr, clabel, 1011 sizeof(RF_ComponentLabel_t)); 1012 1013 if (retcode) { 1014 RF_Free( clabel, sizeof(RF_ComponentLabel_t)); 1015 return(retcode); 1016 } 1017 1018 row = clabel->row; 1019 column = clabel->column; 1020 1021 if ((row < 0) || (row >= raidPtr->numRow) || 1022 (column < 0) || (column >= raidPtr->numCol + 1023 raidPtr->numSpare)) { 1024 RF_Free( clabel, sizeof(RF_ComponentLabel_t)); 1025 return(EINVAL); 1026 } 1027 1028 raidread_component_label(raidPtr->Disks[row][column].dev, 1029 raidPtr->raid_cinfo[row][column].ci_vp, 1030 clabel ); 1031 1032 retcode = copyout((caddr_t) clabel, 1033 (caddr_t) *clabel_ptr, 1034 sizeof(RF_ComponentLabel_t)); 1035 RF_Free( clabel, sizeof(RF_ComponentLabel_t)); 1036 return (retcode); 1037 1038 case RAIDFRAME_SET_COMPONENT_LABEL: 1039 clabel = (RF_ComponentLabel_t *) data; 1040 1041 /* XXX check the label for valid stuff... */ 1042 /* Note that some things *should not* get modified -- 1043 the user should be re-initing the labels instead of 1044 trying to patch things. 1045 */ 1046 1047 raidid = raidPtr->raidid; 1048 printf("raid%d: Got component label:\n", raidid); 1049 printf("raid%d: Version: %d\n", raidid, clabel->version); 1050 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number); 1051 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter); 1052 printf("raid%d: Row: %d\n", raidid, clabel->row); 1053 printf("raid%d: Column: %d\n", raidid, clabel->column); 1054 printf("raid%d: Num Rows: %d\n", raidid, clabel->num_rows); 1055 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns); 1056 printf("raid%d: Clean: %d\n", raidid, clabel->clean); 1057 printf("raid%d: Status: %d\n", raidid, clabel->status); 1058 1059 row = clabel->row; 1060 column = clabel->column; 1061 1062 if ((row < 0) || (row >= raidPtr->numRow) || 1063 (column < 0) || (column >= raidPtr->numCol)) { 1064 return(EINVAL); 1065 } 1066 1067 /* XXX this isn't allowed to do anything for now :-) */ 1068 1069 /* XXX and before it is, we need to fill in the rest 1070 of the fields!?!?!?! */ 1071 #if 0 1072 raidwrite_component_label( 1073 raidPtr->Disks[row][column].dev, 1074 raidPtr->raid_cinfo[row][column].ci_vp, 1075 clabel ); 1076 #endif 1077 return (0); 1078 1079 case RAIDFRAME_INIT_LABELS: 1080 clabel = (RF_ComponentLabel_t *) data; 1081 /* 1082 we only want the serial number from 1083 the above. We get all the rest of the information 1084 from the config that was used to create this RAID 1085 set. 1086 */ 1087 1088 raidPtr->serial_number = clabel->serial_number; 1089 1090 raid_init_component_label(raidPtr, &ci_label); 1091 ci_label.serial_number = clabel->serial_number; 1092 1093 for(row=0;row<raidPtr->numRow;row++) { 1094 ci_label.row = row; 1095 for(column=0;column<raidPtr->numCol;column++) { 1096 diskPtr = &raidPtr->Disks[row][column]; 1097 if (!RF_DEAD_DISK(diskPtr->status)) { 1098 ci_label.partitionSize = diskPtr->partitionSize; 1099 ci_label.column = column; 1100 raidwrite_component_label( 1101 raidPtr->Disks[row][column].dev, 1102 raidPtr->raid_cinfo[row][column].ci_vp, 1103 &ci_label ); 1104 } 1105 } 1106 } 1107 1108 return (retcode); 1109 case RAIDFRAME_SET_AUTOCONFIG: 1110 d = rf_set_autoconfig(raidPtr, *(int *) data); 1111 printf("raid%d: New autoconfig value is: %d\n", 1112 raidPtr->raidid, d); 1113 *(int *) data = d; 1114 return (retcode); 1115 1116 case RAIDFRAME_SET_ROOT: 1117 d = rf_set_rootpartition(raidPtr, *(int *) data); 1118 printf("raid%d: New rootpartition value is: %d\n", 1119 raidPtr->raidid, d); 1120 *(int *) data = d; 1121 return (retcode); 1122 1123 /* initialize all parity */ 1124 case RAIDFRAME_REWRITEPARITY: 1125 1126 if (raidPtr->Layout.map->faultsTolerated == 0) { 1127 /* Parity for RAID 0 is trivially correct */ 1128 raidPtr->parity_good = RF_RAID_CLEAN; 1129 return(0); 1130 } 1131 1132 if (raidPtr->parity_rewrite_in_progress == 1) { 1133 /* Re-write is already in progress! */ 1134 return(EINVAL); 1135 } 1136 1137 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread, 1138 rf_RewriteParityThread, 1139 raidPtr,"raid_parity"); 1140 return (retcode); 1141 1142 1143 case RAIDFRAME_ADD_HOT_SPARE: 1144 sparePtr = (RF_SingleComponent_t *) data; 1145 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t)); 1146 retcode = rf_add_hot_spare(raidPtr, &hot_spare); 1147 return(retcode); 1148 1149 case RAIDFRAME_REMOVE_HOT_SPARE: 1150 return(retcode); 1151 1152 case RAIDFRAME_DELETE_COMPONENT: 1153 componentPtr = (RF_SingleComponent_t *)data; 1154 memcpy( &component, componentPtr, 1155 sizeof(RF_SingleComponent_t)); 1156 retcode = rf_delete_component(raidPtr, &component); 1157 return(retcode); 1158 1159 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1160 componentPtr = (RF_SingleComponent_t *)data; 1161 memcpy( &component, componentPtr, 1162 sizeof(RF_SingleComponent_t)); 1163 retcode = rf_incorporate_hot_spare(raidPtr, &component); 1164 return(retcode); 1165 1166 case RAIDFRAME_REBUILD_IN_PLACE: 1167 1168 if (raidPtr->Layout.map->faultsTolerated == 0) { 1169 /* Can't do this on a RAID 0!! */ 1170 return(EINVAL); 1171 } 1172 1173 if (raidPtr->recon_in_progress == 1) { 1174 /* a reconstruct is already in progress! */ 1175 return(EINVAL); 1176 } 1177 1178 componentPtr = (RF_SingleComponent_t *) data; 1179 memcpy( &component, componentPtr, 1180 sizeof(RF_SingleComponent_t)); 1181 row = component.row; 1182 column = component.column; 1183 printf("raid%d: Rebuild: %d %d\n", raidPtr->raidid, 1184 row, column); 1185 if ((row < 0) || (row >= raidPtr->numRow) || 1186 (column < 0) || (column >= raidPtr->numCol)) { 1187 return(EINVAL); 1188 } 1189 1190 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1191 if (rrcopy == NULL) 1192 return(ENOMEM); 1193 1194 rrcopy->raidPtr = (void *) raidPtr; 1195 rrcopy->row = row; 1196 rrcopy->col = column; 1197 1198 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1199 rf_ReconstructInPlaceThread, 1200 rrcopy,"raid_reconip"); 1201 return(retcode); 1202 1203 case RAIDFRAME_GET_INFO: 1204 if (!raidPtr->valid) 1205 return (ENODEV); 1206 ucfgp = (RF_DeviceConfig_t **) data; 1207 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t), 1208 (RF_DeviceConfig_t *)); 1209 if (d_cfg == NULL) 1210 return (ENOMEM); 1211 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t)); 1212 d_cfg->rows = raidPtr->numRow; 1213 d_cfg->cols = raidPtr->numCol; 1214 d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol; 1215 if (d_cfg->ndevs >= RF_MAX_DISKS) { 1216 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1217 return (ENOMEM); 1218 } 1219 d_cfg->nspares = raidPtr->numSpare; 1220 if (d_cfg->nspares >= RF_MAX_DISKS) { 1221 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1222 return (ENOMEM); 1223 } 1224 d_cfg->maxqdepth = raidPtr->maxQueueDepth; 1225 d = 0; 1226 for (i = 0; i < d_cfg->rows; i++) { 1227 for (j = 0; j < d_cfg->cols; j++) { 1228 d_cfg->devs[d] = raidPtr->Disks[i][j]; 1229 d++; 1230 } 1231 } 1232 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) { 1233 d_cfg->spares[i] = raidPtr->Disks[0][j]; 1234 } 1235 retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp, 1236 sizeof(RF_DeviceConfig_t)); 1237 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1238 1239 return (retcode); 1240 1241 case RAIDFRAME_CHECK_PARITY: 1242 *(int *) data = raidPtr->parity_good; 1243 return (0); 1244 1245 case RAIDFRAME_RESET_ACCTOTALS: 1246 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals)); 1247 return (0); 1248 1249 case RAIDFRAME_GET_ACCTOTALS: 1250 totals = (RF_AccTotals_t *) data; 1251 *totals = raidPtr->acc_totals; 1252 return (0); 1253 1254 case RAIDFRAME_KEEP_ACCTOTALS: 1255 raidPtr->keep_acc_totals = *(int *)data; 1256 return (0); 1257 1258 case RAIDFRAME_GET_SIZE: 1259 *(int *) data = raidPtr->totalSectors; 1260 return (0); 1261 1262 /* fail a disk & optionally start reconstruction */ 1263 case RAIDFRAME_FAIL_DISK: 1264 1265 if (raidPtr->Layout.map->faultsTolerated == 0) { 1266 /* Can't do this on a RAID 0!! */ 1267 return(EINVAL); 1268 } 1269 1270 rr = (struct rf_recon_req *) data; 1271 1272 if (rr->row < 0 || rr->row >= raidPtr->numRow 1273 || rr->col < 0 || rr->col >= raidPtr->numCol) 1274 return (EINVAL); 1275 1276 printf("raid%d: Failing the disk: row: %d col: %d\n", 1277 unit, rr->row, rr->col); 1278 1279 /* make a copy of the recon request so that we don't rely on 1280 * the user's buffer */ 1281 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1282 if (rrcopy == NULL) 1283 return(ENOMEM); 1284 memcpy(rrcopy, rr, sizeof(*rr)); 1285 rrcopy->raidPtr = (void *) raidPtr; 1286 1287 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1288 rf_ReconThread, 1289 rrcopy,"raid_recon"); 1290 return (0); 1291 1292 /* invoke a copyback operation after recon on whatever disk 1293 * needs it, if any */ 1294 case RAIDFRAME_COPYBACK: 1295 1296 if (raidPtr->Layout.map->faultsTolerated == 0) { 1297 /* This makes no sense on a RAID 0!! */ 1298 return(EINVAL); 1299 } 1300 1301 if (raidPtr->copyback_in_progress == 1) { 1302 /* Copyback is already in progress! */ 1303 return(EINVAL); 1304 } 1305 1306 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread, 1307 rf_CopybackThread, 1308 raidPtr,"raid_copyback"); 1309 return (retcode); 1310 1311 /* return the percentage completion of reconstruction */ 1312 case RAIDFRAME_CHECK_RECON_STATUS: 1313 if (raidPtr->Layout.map->faultsTolerated == 0) { 1314 /* This makes no sense on a RAID 0, so tell the 1315 user it's done. */ 1316 *(int *) data = 100; 1317 return(0); 1318 } 1319 row = 0; /* XXX we only consider a single row... */ 1320 if (raidPtr->status[row] != rf_rs_reconstructing) 1321 *(int *) data = 100; 1322 else 1323 *(int *) data = raidPtr->reconControl[row]->percentComplete; 1324 return (0); 1325 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1326 progressInfoPtr = (RF_ProgressInfo_t **) data; 1327 row = 0; /* XXX we only consider a single row... */ 1328 if (raidPtr->status[row] != rf_rs_reconstructing) { 1329 progressInfo.remaining = 0; 1330 progressInfo.completed = 100; 1331 progressInfo.total = 100; 1332 } else { 1333 progressInfo.total = 1334 raidPtr->reconControl[row]->numRUsTotal; 1335 progressInfo.completed = 1336 raidPtr->reconControl[row]->numRUsComplete; 1337 progressInfo.remaining = progressInfo.total - 1338 progressInfo.completed; 1339 } 1340 retcode = copyout((caddr_t) &progressInfo, 1341 (caddr_t) *progressInfoPtr, 1342 sizeof(RF_ProgressInfo_t)); 1343 return (retcode); 1344 1345 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1346 if (raidPtr->Layout.map->faultsTolerated == 0) { 1347 /* This makes no sense on a RAID 0, so tell the 1348 user it's done. */ 1349 *(int *) data = 100; 1350 return(0); 1351 } 1352 if (raidPtr->parity_rewrite_in_progress == 1) { 1353 *(int *) data = 100 * 1354 raidPtr->parity_rewrite_stripes_done / 1355 raidPtr->Layout.numStripe; 1356 } else { 1357 *(int *) data = 100; 1358 } 1359 return (0); 1360 1361 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1362 progressInfoPtr = (RF_ProgressInfo_t **) data; 1363 if (raidPtr->parity_rewrite_in_progress == 1) { 1364 progressInfo.total = raidPtr->Layout.numStripe; 1365 progressInfo.completed = 1366 raidPtr->parity_rewrite_stripes_done; 1367 progressInfo.remaining = progressInfo.total - 1368 progressInfo.completed; 1369 } else { 1370 progressInfo.remaining = 0; 1371 progressInfo.completed = 100; 1372 progressInfo.total = 100; 1373 } 1374 retcode = copyout((caddr_t) &progressInfo, 1375 (caddr_t) *progressInfoPtr, 1376 sizeof(RF_ProgressInfo_t)); 1377 return (retcode); 1378 1379 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1380 if (raidPtr->Layout.map->faultsTolerated == 0) { 1381 /* This makes no sense on a RAID 0 */ 1382 *(int *) data = 100; 1383 return(0); 1384 } 1385 if (raidPtr->copyback_in_progress == 1) { 1386 *(int *) data = 100 * raidPtr->copyback_stripes_done / 1387 raidPtr->Layout.numStripe; 1388 } else { 1389 *(int *) data = 100; 1390 } 1391 return (0); 1392 1393 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1394 progressInfoPtr = (RF_ProgressInfo_t **) data; 1395 if (raidPtr->copyback_in_progress == 1) { 1396 progressInfo.total = raidPtr->Layout.numStripe; 1397 progressInfo.completed = 1398 raidPtr->copyback_stripes_done; 1399 progressInfo.remaining = progressInfo.total - 1400 progressInfo.completed; 1401 } else { 1402 progressInfo.remaining = 0; 1403 progressInfo.completed = 100; 1404 progressInfo.total = 100; 1405 } 1406 retcode = copyout((caddr_t) &progressInfo, 1407 (caddr_t) *progressInfoPtr, 1408 sizeof(RF_ProgressInfo_t)); 1409 return (retcode); 1410 1411 /* the sparetable daemon calls this to wait for the kernel to 1412 * need a spare table. this ioctl does not return until a 1413 * spare table is needed. XXX -- calling mpsleep here in the 1414 * ioctl code is almost certainly wrong and evil. -- XXX XXX 1415 * -- I should either compute the spare table in the kernel, 1416 * or have a different -- XXX XXX -- interface (a different 1417 * character device) for delivering the table -- XXX */ 1418 #if 0 1419 case RAIDFRAME_SPARET_WAIT: 1420 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1421 while (!rf_sparet_wait_queue) 1422 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE); 1423 waitreq = rf_sparet_wait_queue; 1424 rf_sparet_wait_queue = rf_sparet_wait_queue->next; 1425 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1426 1427 /* structure assignment */ 1428 *((RF_SparetWait_t *) data) = *waitreq; 1429 1430 RF_Free(waitreq, sizeof(*waitreq)); 1431 return (0); 1432 1433 /* wakes up a process waiting on SPARET_WAIT and puts an error 1434 * code in it that will cause the dameon to exit */ 1435 case RAIDFRAME_ABORT_SPARET_WAIT: 1436 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1437 waitreq->fcol = -1; 1438 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1439 waitreq->next = rf_sparet_wait_queue; 1440 rf_sparet_wait_queue = waitreq; 1441 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1442 wakeup(&rf_sparet_wait_queue); 1443 return (0); 1444 1445 /* used by the spare table daemon to deliver a spare table 1446 * into the kernel */ 1447 case RAIDFRAME_SEND_SPARET: 1448 1449 /* install the spare table */ 1450 retcode = rf_SetSpareTable(raidPtr, *(void **) data); 1451 1452 /* respond to the requestor. the return status of the spare 1453 * table installation is passed in the "fcol" field */ 1454 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1455 waitreq->fcol = retcode; 1456 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1457 waitreq->next = rf_sparet_resp_queue; 1458 rf_sparet_resp_queue = waitreq; 1459 wakeup(&rf_sparet_resp_queue); 1460 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1461 1462 return (retcode); 1463 #endif 1464 1465 default: 1466 break; /* fall through to the os-specific code below */ 1467 1468 } 1469 1470 if (!raidPtr->valid) 1471 return (EINVAL); 1472 1473 /* 1474 * Add support for "regular" device ioctls here. 1475 */ 1476 1477 switch (cmd) { 1478 case DIOCGDINFO: 1479 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label); 1480 break; 1481 #ifdef __HAVE_OLD_DISKLABEL 1482 case ODIOCGDINFO: 1483 newlabel = *(rs->sc_dkdev.dk_label); 1484 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1485 return ENOTTY; 1486 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1487 break; 1488 #endif 1489 1490 case DIOCGPART: 1491 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label; 1492 ((struct partinfo *) data)->part = 1493 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1494 break; 1495 1496 case DIOCWDINFO: 1497 case DIOCSDINFO: 1498 #ifdef __HAVE_OLD_DISKLABEL 1499 case ODIOCWDINFO: 1500 case ODIOCSDINFO: 1501 #endif 1502 { 1503 struct disklabel *lp; 1504 #ifdef __HAVE_OLD_DISKLABEL 1505 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1506 memset(&newlabel, 0, sizeof newlabel); 1507 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1508 lp = &newlabel; 1509 } else 1510 #endif 1511 lp = (struct disklabel *)data; 1512 1513 if ((error = raidlock(rs)) != 0) 1514 return (error); 1515 1516 rs->sc_flags |= RAIDF_LABELLING; 1517 1518 error = setdisklabel(rs->sc_dkdev.dk_label, 1519 lp, 0, rs->sc_dkdev.dk_cpulabel); 1520 if (error == 0) { 1521 if (cmd == DIOCWDINFO 1522 #ifdef __HAVE_OLD_DISKLABEL 1523 || cmd == ODIOCWDINFO 1524 #endif 1525 ) 1526 error = writedisklabel(RAIDLABELDEV(dev), 1527 raidstrategy, rs->sc_dkdev.dk_label, 1528 rs->sc_dkdev.dk_cpulabel); 1529 } 1530 rs->sc_flags &= ~RAIDF_LABELLING; 1531 1532 raidunlock(rs); 1533 1534 if (error) 1535 return (error); 1536 break; 1537 } 1538 1539 case DIOCWLABEL: 1540 if (*(int *) data != 0) 1541 rs->sc_flags |= RAIDF_WLABEL; 1542 else 1543 rs->sc_flags &= ~RAIDF_WLABEL; 1544 break; 1545 1546 case DIOCGDEFLABEL: 1547 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data); 1548 break; 1549 1550 #ifdef __HAVE_OLD_DISKLABEL 1551 case ODIOCGDEFLABEL: 1552 raidgetdefaultlabel(raidPtr, rs, &newlabel); 1553 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1554 return ENOTTY; 1555 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1556 break; 1557 #endif 1558 1559 default: 1560 retcode = ENOTTY; 1561 } 1562 return (retcode); 1563 1564 } 1565 1566 1567 /* raidinit -- complete the rest of the initialization for the 1568 RAIDframe device. */ 1569 1570 1571 static void 1572 raidinit(raidPtr) 1573 RF_Raid_t *raidPtr; 1574 { 1575 struct raid_softc *rs; 1576 int unit; 1577 1578 unit = raidPtr->raidid; 1579 1580 rs = &raid_softc[unit]; 1581 1582 /* XXX should check return code first... */ 1583 rs->sc_flags |= RAIDF_INITED; 1584 1585 sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */ 1586 1587 rs->sc_dkdev.dk_name = rs->sc_xname; 1588 1589 /* disk_attach actually creates space for the CPU disklabel, among 1590 * other things, so it's critical to call this *BEFORE* we try putzing 1591 * with disklabels. */ 1592 1593 disk_attach(&rs->sc_dkdev); 1594 1595 /* XXX There may be a weird interaction here between this, and 1596 * protectedSectors, as used in RAIDframe. */ 1597 1598 rs->sc_size = raidPtr->totalSectors; 1599 1600 } 1601 1602 /* wake up the daemon & tell it to get us a spare table 1603 * XXX 1604 * the entries in the queues should be tagged with the raidPtr 1605 * so that in the extremely rare case that two recons happen at once, 1606 * we know for which device were requesting a spare table 1607 * XXX 1608 * 1609 * XXX This code is not currently used. GO 1610 */ 1611 int 1612 rf_GetSpareTableFromDaemon(req) 1613 RF_SparetWait_t *req; 1614 { 1615 int retcode; 1616 1617 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1618 req->next = rf_sparet_wait_queue; 1619 rf_sparet_wait_queue = req; 1620 wakeup(&rf_sparet_wait_queue); 1621 1622 /* mpsleep unlocks the mutex */ 1623 while (!rf_sparet_resp_queue) { 1624 tsleep(&rf_sparet_resp_queue, PRIBIO, 1625 "raidframe getsparetable", 0); 1626 } 1627 req = rf_sparet_resp_queue; 1628 rf_sparet_resp_queue = req->next; 1629 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1630 1631 retcode = req->fcol; 1632 RF_Free(req, sizeof(*req)); /* this is not the same req as we 1633 * alloc'd */ 1634 return (retcode); 1635 } 1636 1637 /* a wrapper around rf_DoAccess that extracts appropriate info from the 1638 * bp & passes it down. 1639 * any calls originating in the kernel must use non-blocking I/O 1640 * do some extra sanity checking to return "appropriate" error values for 1641 * certain conditions (to make some standard utilities work) 1642 * 1643 * Formerly known as: rf_DoAccessKernel 1644 */ 1645 void 1646 raidstart(raidPtr) 1647 RF_Raid_t *raidPtr; 1648 { 1649 RF_SectorCount_t num_blocks, pb, sum; 1650 RF_RaidAddr_t raid_addr; 1651 int retcode; 1652 struct partition *pp; 1653 daddr_t blocknum; 1654 int unit; 1655 struct raid_softc *rs; 1656 int do_async; 1657 struct buf *bp; 1658 1659 unit = raidPtr->raidid; 1660 rs = &raid_softc[unit]; 1661 1662 /* quick check to see if anything has died recently */ 1663 RF_LOCK_MUTEX(raidPtr->mutex); 1664 if (raidPtr->numNewFailures > 0) { 1665 rf_update_component_labels(raidPtr, 1666 RF_NORMAL_COMPONENT_UPDATE); 1667 raidPtr->numNewFailures--; 1668 } 1669 1670 /* Check to see if we're at the limit... */ 1671 while (raidPtr->openings > 0) { 1672 RF_UNLOCK_MUTEX(raidPtr->mutex); 1673 1674 /* get the next item, if any, from the queue */ 1675 if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) { 1676 /* nothing more to do */ 1677 return; 1678 } 1679 1680 /* Ok, for the bp we have here, bp->b_blkno is relative to the 1681 * partition.. Need to make it absolute to the underlying 1682 * device.. */ 1683 1684 blocknum = bp->b_blkno; 1685 if (DISKPART(bp->b_dev) != RAW_PART) { 1686 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; 1687 blocknum += pp->p_offset; 1688 } 1689 1690 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, 1691 (int) blocknum)); 1692 1693 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount)); 1694 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid)); 1695 1696 /* *THIS* is where we adjust what block we're going to... 1697 * but DO NOT TOUCH bp->b_blkno!!! */ 1698 raid_addr = blocknum; 1699 1700 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector; 1701 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0; 1702 sum = raid_addr + num_blocks + pb; 1703 if (1 || rf_debugKernelAccess) { 1704 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n", 1705 (int) raid_addr, (int) sum, (int) num_blocks, 1706 (int) pb, (int) bp->b_resid)); 1707 } 1708 if ((sum > raidPtr->totalSectors) || (sum < raid_addr) 1709 || (sum < num_blocks) || (sum < pb)) { 1710 bp->b_error = ENOSPC; 1711 bp->b_flags |= B_ERROR; 1712 bp->b_resid = bp->b_bcount; 1713 biodone(bp); 1714 RF_LOCK_MUTEX(raidPtr->mutex); 1715 continue; 1716 } 1717 /* 1718 * XXX rf_DoAccess() should do this, not just DoAccessKernel() 1719 */ 1720 1721 if (bp->b_bcount & raidPtr->sectorMask) { 1722 bp->b_error = EINVAL; 1723 bp->b_flags |= B_ERROR; 1724 bp->b_resid = bp->b_bcount; 1725 biodone(bp); 1726 RF_LOCK_MUTEX(raidPtr->mutex); 1727 continue; 1728 1729 } 1730 db1_printf(("Calling DoAccess..\n")); 1731 1732 1733 RF_LOCK_MUTEX(raidPtr->mutex); 1734 raidPtr->openings--; 1735 RF_UNLOCK_MUTEX(raidPtr->mutex); 1736 1737 /* 1738 * Everything is async. 1739 */ 1740 do_async = 1; 1741 1742 disk_busy(&rs->sc_dkdev); 1743 1744 /* XXX we're still at splbio() here... do we *really* 1745 need to be? */ 1746 1747 /* don't ever condition on bp->b_flags & B_WRITE. 1748 * always condition on B_READ instead */ 1749 1750 retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ? 1751 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE, 1752 do_async, raid_addr, num_blocks, 1753 bp->b_data, bp, RF_DAG_NONBLOCKING_IO); 1754 1755 RF_LOCK_MUTEX(raidPtr->mutex); 1756 } 1757 RF_UNLOCK_MUTEX(raidPtr->mutex); 1758 } 1759 1760 1761 1762 1763 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */ 1764 1765 int 1766 rf_DispatchKernelIO(queue, req) 1767 RF_DiskQueue_t *queue; 1768 RF_DiskQueueData_t *req; 1769 { 1770 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE; 1771 struct buf *bp; 1772 struct raidbuf *raidbp = NULL; 1773 struct raid_softc *rs; 1774 int unit; 1775 int s; 1776 1777 s=0; 1778 /* s = splbio();*/ /* want to test this */ 1779 /* XXX along with the vnode, we also need the softc associated with 1780 * this device.. */ 1781 1782 req->queue = queue; 1783 1784 unit = queue->raidPtr->raidid; 1785 1786 db1_printf(("DispatchKernelIO unit: %d\n", unit)); 1787 1788 if (unit >= numraid) { 1789 printf("Invalid unit number: %d %d\n", unit, numraid); 1790 panic("Invalid Unit number in rf_DispatchKernelIO\n"); 1791 } 1792 rs = &raid_softc[unit]; 1793 1794 bp = req->bp; 1795 #if 1 1796 /* XXX when there is a physical disk failure, someone is passing us a 1797 * buffer that contains old stuff!! Attempt to deal with this problem 1798 * without taking a performance hit... (not sure where the real bug 1799 * is. It's buried in RAIDframe somewhere) :-( GO ) */ 1800 1801 if (bp->b_flags & B_ERROR) { 1802 bp->b_flags &= ~B_ERROR; 1803 } 1804 if (bp->b_error != 0) { 1805 bp->b_error = 0; 1806 } 1807 #endif 1808 raidbp = RAIDGETBUF(rs); 1809 1810 raidbp->rf_flags = 0; /* XXX not really used anywhere... */ 1811 1812 /* 1813 * context for raidiodone 1814 */ 1815 raidbp->rf_obp = bp; 1816 raidbp->req = req; 1817 1818 LIST_INIT(&raidbp->rf_buf.b_dep); 1819 1820 switch (req->type) { 1821 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */ 1822 /* XXX need to do something extra here.. */ 1823 /* I'm leaving this in, as I've never actually seen it used, 1824 * and I'd like folks to report it... GO */ 1825 printf(("WAKEUP CALLED\n")); 1826 queue->numOutstanding++; 1827 1828 /* XXX need to glue the original buffer into this?? */ 1829 1830 KernelWakeupFunc(&raidbp->rf_buf); 1831 break; 1832 1833 case RF_IO_TYPE_READ: 1834 case RF_IO_TYPE_WRITE: 1835 1836 if (req->tracerec) { 1837 RF_ETIMER_START(req->tracerec->timer); 1838 } 1839 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp, 1840 op | bp->b_flags, queue->rf_cinfo->ci_dev, 1841 req->sectorOffset, req->numSector, 1842 req->buf, KernelWakeupFunc, (void *) req, 1843 queue->raidPtr->logBytesPerSector, req->b_proc); 1844 1845 if (rf_debugKernelAccess) { 1846 db1_printf(("dispatch: bp->b_blkno = %ld\n", 1847 (long) bp->b_blkno)); 1848 } 1849 queue->numOutstanding++; 1850 queue->last_deq_sector = req->sectorOffset; 1851 /* acc wouldn't have been let in if there were any pending 1852 * reqs at any other priority */ 1853 queue->curPriority = req->priority; 1854 1855 db1_printf(("Going for %c to unit %d row %d col %d\n", 1856 req->type, unit, queue->row, queue->col)); 1857 db1_printf(("sector %d count %d (%d bytes) %d\n", 1858 (int) req->sectorOffset, (int) req->numSector, 1859 (int) (req->numSector << 1860 queue->raidPtr->logBytesPerSector), 1861 (int) queue->raidPtr->logBytesPerSector)); 1862 if ((raidbp->rf_buf.b_flags & B_READ) == 0) { 1863 raidbp->rf_buf.b_vp->v_numoutput++; 1864 } 1865 VOP_STRATEGY(&raidbp->rf_buf); 1866 1867 break; 1868 1869 default: 1870 panic("bad req->type in rf_DispatchKernelIO"); 1871 } 1872 db1_printf(("Exiting from DispatchKernelIO\n")); 1873 /* splx(s); */ /* want to test this */ 1874 return (0); 1875 } 1876 /* this is the callback function associated with a I/O invoked from 1877 kernel code. 1878 */ 1879 static void 1880 KernelWakeupFunc(vbp) 1881 struct buf *vbp; 1882 { 1883 RF_DiskQueueData_t *req = NULL; 1884 RF_DiskQueue_t *queue; 1885 struct raidbuf *raidbp = (struct raidbuf *) vbp; 1886 struct buf *bp; 1887 struct raid_softc *rs; 1888 int unit; 1889 int s; 1890 1891 s = splbio(); 1892 db1_printf(("recovering the request queue:\n")); 1893 req = raidbp->req; 1894 1895 bp = raidbp->rf_obp; 1896 1897 queue = (RF_DiskQueue_t *) req->queue; 1898 1899 if (raidbp->rf_buf.b_flags & B_ERROR) { 1900 bp->b_flags |= B_ERROR; 1901 bp->b_error = raidbp->rf_buf.b_error ? 1902 raidbp->rf_buf.b_error : EIO; 1903 } 1904 1905 /* XXX methinks this could be wrong... */ 1906 #if 1 1907 bp->b_resid = raidbp->rf_buf.b_resid; 1908 #endif 1909 1910 if (req->tracerec) { 1911 RF_ETIMER_STOP(req->tracerec->timer); 1912 RF_ETIMER_EVAL(req->tracerec->timer); 1913 RF_LOCK_MUTEX(rf_tracing_mutex); 1914 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer); 1915 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer); 1916 req->tracerec->num_phys_ios++; 1917 RF_UNLOCK_MUTEX(rf_tracing_mutex); 1918 } 1919 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */ 1920 1921 unit = queue->raidPtr->raidid; /* *Much* simpler :-> */ 1922 1923 1924 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go 1925 * ballistic, and mark the component as hosed... */ 1926 1927 if (bp->b_flags & B_ERROR) { 1928 /* Mark the disk as dead */ 1929 /* but only mark it once... */ 1930 if (queue->raidPtr->Disks[queue->row][queue->col].status == 1931 rf_ds_optimal) { 1932 printf("raid%d: IO Error. Marking %s as failed.\n", 1933 unit, queue->raidPtr->Disks[queue->row][queue->col].devname); 1934 queue->raidPtr->Disks[queue->row][queue->col].status = 1935 rf_ds_failed; 1936 queue->raidPtr->status[queue->row] = rf_rs_degraded; 1937 queue->raidPtr->numFailures++; 1938 queue->raidPtr->numNewFailures++; 1939 } else { /* Disk is already dead... */ 1940 /* printf("Disk already marked as dead!\n"); */ 1941 } 1942 1943 } 1944 1945 rs = &raid_softc[unit]; 1946 RAIDPUTBUF(rs, raidbp); 1947 1948 rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0); 1949 (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0); 1950 1951 splx(s); 1952 } 1953 1954 1955 1956 /* 1957 * initialize a buf structure for doing an I/O in the kernel. 1958 */ 1959 static void 1960 InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg, 1961 logBytesPerSector, b_proc) 1962 struct buf *bp; 1963 struct vnode *b_vp; 1964 unsigned rw_flag; 1965 dev_t dev; 1966 RF_SectorNum_t startSect; 1967 RF_SectorCount_t numSect; 1968 caddr_t buf; 1969 void (*cbFunc) (struct buf *); 1970 void *cbArg; 1971 int logBytesPerSector; 1972 struct proc *b_proc; 1973 { 1974 /* bp->b_flags = B_PHYS | rw_flag; */ 1975 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */ 1976 bp->b_bcount = numSect << logBytesPerSector; 1977 bp->b_bufsize = bp->b_bcount; 1978 bp->b_error = 0; 1979 bp->b_dev = dev; 1980 bp->b_data = buf; 1981 bp->b_blkno = startSect; 1982 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */ 1983 if (bp->b_bcount == 0) { 1984 panic("bp->b_bcount is zero in InitBP!!\n"); 1985 } 1986 bp->b_proc = b_proc; 1987 bp->b_iodone = cbFunc; 1988 bp->b_vp = b_vp; 1989 1990 } 1991 1992 static void 1993 raidgetdefaultlabel(raidPtr, rs, lp) 1994 RF_Raid_t *raidPtr; 1995 struct raid_softc *rs; 1996 struct disklabel *lp; 1997 { 1998 db1_printf(("Building a default label...\n")); 1999 memset(lp, 0, sizeof(*lp)); 2000 2001 /* fabricate a label... */ 2002 lp->d_secperunit = raidPtr->totalSectors; 2003 lp->d_secsize = raidPtr->bytesPerSector; 2004 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe; 2005 lp->d_ntracks = 4 * raidPtr->numCol; 2006 lp->d_ncylinders = raidPtr->totalSectors / 2007 (lp->d_nsectors * lp->d_ntracks); 2008 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 2009 2010 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename)); 2011 lp->d_type = DTYPE_RAID; 2012 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 2013 lp->d_rpm = 3600; 2014 lp->d_interleave = 1; 2015 lp->d_flags = 0; 2016 2017 lp->d_partitions[RAW_PART].p_offset = 0; 2018 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors; 2019 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 2020 lp->d_npartitions = RAW_PART + 1; 2021 2022 lp->d_magic = DISKMAGIC; 2023 lp->d_magic2 = DISKMAGIC; 2024 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label); 2025 2026 } 2027 /* 2028 * Read the disklabel from the raid device. If one is not present, fake one 2029 * up. 2030 */ 2031 static void 2032 raidgetdisklabel(dev) 2033 dev_t dev; 2034 { 2035 int unit = raidunit(dev); 2036 struct raid_softc *rs = &raid_softc[unit]; 2037 char *errstring; 2038 struct disklabel *lp = rs->sc_dkdev.dk_label; 2039 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel; 2040 RF_Raid_t *raidPtr; 2041 2042 db1_printf(("Getting the disklabel...\n")); 2043 2044 memset(clp, 0, sizeof(*clp)); 2045 2046 raidPtr = raidPtrs[unit]; 2047 2048 raidgetdefaultlabel(raidPtr, rs, lp); 2049 2050 /* 2051 * Call the generic disklabel extraction routine. 2052 */ 2053 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy, 2054 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel); 2055 if (errstring) 2056 raidmakedisklabel(rs); 2057 else { 2058 int i; 2059 struct partition *pp; 2060 2061 /* 2062 * Sanity check whether the found disklabel is valid. 2063 * 2064 * This is necessary since total size of the raid device 2065 * may vary when an interleave is changed even though exactly 2066 * same componets are used, and old disklabel may used 2067 * if that is found. 2068 */ 2069 if (lp->d_secperunit != rs->sc_size) 2070 printf("raid%d: WARNING: %s: " 2071 "total sector size in disklabel (%d) != " 2072 "the size of raid (%ld)\n", unit, rs->sc_xname, 2073 lp->d_secperunit, (long) rs->sc_size); 2074 for (i = 0; i < lp->d_npartitions; i++) { 2075 pp = &lp->d_partitions[i]; 2076 if (pp->p_offset + pp->p_size > rs->sc_size) 2077 printf("raid%d: WARNING: %s: end of partition `%c' " 2078 "exceeds the size of raid (%ld)\n", 2079 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size); 2080 } 2081 } 2082 2083 } 2084 /* 2085 * Take care of things one might want to take care of in the event 2086 * that a disklabel isn't present. 2087 */ 2088 static void 2089 raidmakedisklabel(rs) 2090 struct raid_softc *rs; 2091 { 2092 struct disklabel *lp = rs->sc_dkdev.dk_label; 2093 db1_printf(("Making a label..\n")); 2094 2095 /* 2096 * For historical reasons, if there's no disklabel present 2097 * the raw partition must be marked FS_BSDFFS. 2098 */ 2099 2100 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 2101 2102 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 2103 2104 lp->d_checksum = dkcksum(lp); 2105 } 2106 /* 2107 * Lookup the provided name in the filesystem. If the file exists, 2108 * is a valid block device, and isn't being used by anyone else, 2109 * set *vpp to the file's vnode. 2110 * You'll find the original of this in ccd.c 2111 */ 2112 int 2113 raidlookup(path, p, vpp) 2114 char *path; 2115 struct proc *p; 2116 struct vnode **vpp; /* result */ 2117 { 2118 struct nameidata nd; 2119 struct vnode *vp; 2120 struct vattr va; 2121 int error; 2122 2123 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p); 2124 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) { 2125 #if 0 2126 printf("RAIDframe: vn_open returned %d\n", error); 2127 #endif 2128 return (error); 2129 } 2130 vp = nd.ni_vp; 2131 if (vp->v_usecount > 1) { 2132 VOP_UNLOCK(vp, 0); 2133 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p); 2134 return (EBUSY); 2135 } 2136 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) { 2137 VOP_UNLOCK(vp, 0); 2138 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p); 2139 return (error); 2140 } 2141 /* XXX: eventually we should handle VREG, too. */ 2142 if (va.va_type != VBLK) { 2143 VOP_UNLOCK(vp, 0); 2144 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p); 2145 return (ENOTBLK); 2146 } 2147 VOP_UNLOCK(vp, 0); 2148 *vpp = vp; 2149 return (0); 2150 } 2151 /* 2152 * Wait interruptibly for an exclusive lock. 2153 * 2154 * XXX 2155 * Several drivers do this; it should be abstracted and made MP-safe. 2156 * (Hmm... where have we seen this warning before :-> GO ) 2157 */ 2158 static int 2159 raidlock(rs) 2160 struct raid_softc *rs; 2161 { 2162 int error; 2163 2164 while ((rs->sc_flags & RAIDF_LOCKED) != 0) { 2165 rs->sc_flags |= RAIDF_WANTED; 2166 if ((error = 2167 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0) 2168 return (error); 2169 } 2170 rs->sc_flags |= RAIDF_LOCKED; 2171 return (0); 2172 } 2173 /* 2174 * Unlock and wake up any waiters. 2175 */ 2176 static void 2177 raidunlock(rs) 2178 struct raid_softc *rs; 2179 { 2180 2181 rs->sc_flags &= ~RAIDF_LOCKED; 2182 if ((rs->sc_flags & RAIDF_WANTED) != 0) { 2183 rs->sc_flags &= ~RAIDF_WANTED; 2184 wakeup(rs); 2185 } 2186 } 2187 2188 2189 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */ 2190 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */ 2191 2192 int 2193 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter) 2194 { 2195 RF_ComponentLabel_t clabel; 2196 raidread_component_label(dev, b_vp, &clabel); 2197 clabel.mod_counter = mod_counter; 2198 clabel.clean = RF_RAID_CLEAN; 2199 raidwrite_component_label(dev, b_vp, &clabel); 2200 return(0); 2201 } 2202 2203 2204 int 2205 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter) 2206 { 2207 RF_ComponentLabel_t clabel; 2208 raidread_component_label(dev, b_vp, &clabel); 2209 clabel.mod_counter = mod_counter; 2210 clabel.clean = RF_RAID_DIRTY; 2211 raidwrite_component_label(dev, b_vp, &clabel); 2212 return(0); 2213 } 2214 2215 /* ARGSUSED */ 2216 int 2217 raidread_component_label(dev, b_vp, clabel) 2218 dev_t dev; 2219 struct vnode *b_vp; 2220 RF_ComponentLabel_t *clabel; 2221 { 2222 struct buf *bp; 2223 const struct bdevsw *bdev; 2224 int error; 2225 2226 /* XXX should probably ensure that we don't try to do this if 2227 someone has changed rf_protected_sectors. */ 2228 2229 if (b_vp == NULL) { 2230 /* For whatever reason, this component is not valid. 2231 Don't try to read a component label from it. */ 2232 return(EINVAL); 2233 } 2234 2235 /* get a block of the appropriate size... */ 2236 bp = geteblk((int)RF_COMPONENT_INFO_SIZE); 2237 bp->b_dev = dev; 2238 2239 /* get our ducks in a row for the read */ 2240 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE; 2241 bp->b_bcount = RF_COMPONENT_INFO_SIZE; 2242 bp->b_flags |= B_READ; 2243 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE; 2244 2245 bdev = bdevsw_lookup(bp->b_dev); 2246 if (bdev == NULL) 2247 return (ENXIO); 2248 (*bdev->d_strategy)(bp); 2249 2250 error = biowait(bp); 2251 2252 if (!error) { 2253 memcpy(clabel, bp->b_data, 2254 sizeof(RF_ComponentLabel_t)); 2255 #if 0 2256 rf_print_component_label( clabel ); 2257 #endif 2258 } else { 2259 #if 0 2260 printf("Failed to read RAID component label!\n"); 2261 #endif 2262 } 2263 2264 brelse(bp); 2265 return(error); 2266 } 2267 /* ARGSUSED */ 2268 int 2269 raidwrite_component_label(dev, b_vp, clabel) 2270 dev_t dev; 2271 struct vnode *b_vp; 2272 RF_ComponentLabel_t *clabel; 2273 { 2274 struct buf *bp; 2275 const struct bdevsw *bdev; 2276 int error; 2277 2278 /* get a block of the appropriate size... */ 2279 bp = geteblk((int)RF_COMPONENT_INFO_SIZE); 2280 bp->b_dev = dev; 2281 2282 /* get our ducks in a row for the write */ 2283 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE; 2284 bp->b_bcount = RF_COMPONENT_INFO_SIZE; 2285 bp->b_flags |= B_WRITE; 2286 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE; 2287 2288 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE ); 2289 2290 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t)); 2291 2292 bdev = bdevsw_lookup(bp->b_dev); 2293 if (bdev == NULL) 2294 return (ENXIO); 2295 (*bdev->d_strategy)(bp); 2296 error = biowait(bp); 2297 brelse(bp); 2298 if (error) { 2299 #if 1 2300 printf("Failed to write RAID component info!\n"); 2301 #endif 2302 } 2303 2304 return(error); 2305 } 2306 2307 void 2308 rf_markalldirty(raidPtr) 2309 RF_Raid_t *raidPtr; 2310 { 2311 RF_ComponentLabel_t clabel; 2312 int r,c; 2313 2314 raidPtr->mod_counter++; 2315 for (r = 0; r < raidPtr->numRow; r++) { 2316 for (c = 0; c < raidPtr->numCol; c++) { 2317 /* we don't want to touch (at all) a disk that has 2318 failed */ 2319 if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) { 2320 raidread_component_label( 2321 raidPtr->Disks[r][c].dev, 2322 raidPtr->raid_cinfo[r][c].ci_vp, 2323 &clabel); 2324 if (clabel.status == rf_ds_spared) { 2325 /* XXX do something special... 2326 but whatever you do, don't 2327 try to access it!! */ 2328 } else { 2329 #if 0 2330 clabel.status = 2331 raidPtr->Disks[r][c].status; 2332 raidwrite_component_label( 2333 raidPtr->Disks[r][c].dev, 2334 raidPtr->raid_cinfo[r][c].ci_vp, 2335 &clabel); 2336 #endif 2337 raidmarkdirty( 2338 raidPtr->Disks[r][c].dev, 2339 raidPtr->raid_cinfo[r][c].ci_vp, 2340 raidPtr->mod_counter); 2341 } 2342 } 2343 } 2344 } 2345 /* printf("Component labels marked dirty.\n"); */ 2346 #if 0 2347 for( c = 0; c < raidPtr->numSpare ; c++) { 2348 sparecol = raidPtr->numCol + c; 2349 if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) { 2350 /* 2351 2352 XXX this is where we get fancy and map this spare 2353 into it's correct spot in the array. 2354 2355 */ 2356 /* 2357 2358 we claim this disk is "optimal" if it's 2359 rf_ds_used_spare, as that means it should be 2360 directly substitutable for the disk it replaced. 2361 We note that too... 2362 2363 */ 2364 2365 for(i=0;i<raidPtr->numRow;i++) { 2366 for(j=0;j<raidPtr->numCol;j++) { 2367 if ((raidPtr->Disks[i][j].spareRow == 2368 r) && 2369 (raidPtr->Disks[i][j].spareCol == 2370 sparecol)) { 2371 srow = r; 2372 scol = sparecol; 2373 break; 2374 } 2375 } 2376 } 2377 2378 raidread_component_label( 2379 raidPtr->Disks[r][sparecol].dev, 2380 raidPtr->raid_cinfo[r][sparecol].ci_vp, 2381 &clabel); 2382 /* make sure status is noted */ 2383 clabel.version = RF_COMPONENT_LABEL_VERSION; 2384 clabel.mod_counter = raidPtr->mod_counter; 2385 clabel.serial_number = raidPtr->serial_number; 2386 clabel.row = srow; 2387 clabel.column = scol; 2388 clabel.num_rows = raidPtr->numRow; 2389 clabel.num_columns = raidPtr->numCol; 2390 clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/ 2391 clabel.status = rf_ds_optimal; 2392 raidwrite_component_label( 2393 raidPtr->Disks[r][sparecol].dev, 2394 raidPtr->raid_cinfo[r][sparecol].ci_vp, 2395 &clabel); 2396 raidmarkclean( raidPtr->Disks[r][sparecol].dev, 2397 raidPtr->raid_cinfo[r][sparecol].ci_vp); 2398 } 2399 } 2400 2401 #endif 2402 } 2403 2404 2405 void 2406 rf_update_component_labels(raidPtr, final) 2407 RF_Raid_t *raidPtr; 2408 int final; 2409 { 2410 RF_ComponentLabel_t clabel; 2411 int sparecol; 2412 int r,c; 2413 int i,j; 2414 int srow, scol; 2415 2416 srow = -1; 2417 scol = -1; 2418 2419 /* XXX should do extra checks to make sure things really are clean, 2420 rather than blindly setting the clean bit... */ 2421 2422 raidPtr->mod_counter++; 2423 2424 for (r = 0; r < raidPtr->numRow; r++) { 2425 for (c = 0; c < raidPtr->numCol; c++) { 2426 if (raidPtr->Disks[r][c].status == rf_ds_optimal) { 2427 raidread_component_label( 2428 raidPtr->Disks[r][c].dev, 2429 raidPtr->raid_cinfo[r][c].ci_vp, 2430 &clabel); 2431 /* make sure status is noted */ 2432 clabel.status = rf_ds_optimal; 2433 /* bump the counter */ 2434 clabel.mod_counter = raidPtr->mod_counter; 2435 2436 raidwrite_component_label( 2437 raidPtr->Disks[r][c].dev, 2438 raidPtr->raid_cinfo[r][c].ci_vp, 2439 &clabel); 2440 if (final == RF_FINAL_COMPONENT_UPDATE) { 2441 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2442 raidmarkclean( 2443 raidPtr->Disks[r][c].dev, 2444 raidPtr->raid_cinfo[r][c].ci_vp, 2445 raidPtr->mod_counter); 2446 } 2447 } 2448 } 2449 /* else we don't touch it.. */ 2450 } 2451 } 2452 2453 for( c = 0; c < raidPtr->numSpare ; c++) { 2454 sparecol = raidPtr->numCol + c; 2455 /* Need to ensure that the reconstruct actually completed! */ 2456 if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) { 2457 /* 2458 2459 we claim this disk is "optimal" if it's 2460 rf_ds_used_spare, as that means it should be 2461 directly substitutable for the disk it replaced. 2462 We note that too... 2463 2464 */ 2465 2466 for(i=0;i<raidPtr->numRow;i++) { 2467 for(j=0;j<raidPtr->numCol;j++) { 2468 if ((raidPtr->Disks[i][j].spareRow == 2469 0) && 2470 (raidPtr->Disks[i][j].spareCol == 2471 sparecol)) { 2472 srow = i; 2473 scol = j; 2474 break; 2475 } 2476 } 2477 } 2478 2479 /* XXX shouldn't *really* need this... */ 2480 raidread_component_label( 2481 raidPtr->Disks[0][sparecol].dev, 2482 raidPtr->raid_cinfo[0][sparecol].ci_vp, 2483 &clabel); 2484 /* make sure status is noted */ 2485 2486 raid_init_component_label(raidPtr, &clabel); 2487 2488 clabel.mod_counter = raidPtr->mod_counter; 2489 clabel.row = srow; 2490 clabel.column = scol; 2491 clabel.status = rf_ds_optimal; 2492 2493 raidwrite_component_label( 2494 raidPtr->Disks[0][sparecol].dev, 2495 raidPtr->raid_cinfo[0][sparecol].ci_vp, 2496 &clabel); 2497 if (final == RF_FINAL_COMPONENT_UPDATE) { 2498 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2499 raidmarkclean( raidPtr->Disks[0][sparecol].dev, 2500 raidPtr->raid_cinfo[0][sparecol].ci_vp, 2501 raidPtr->mod_counter); 2502 } 2503 } 2504 } 2505 } 2506 /* printf("Component labels updated\n"); */ 2507 } 2508 2509 void 2510 rf_close_component(raidPtr, vp, auto_configured) 2511 RF_Raid_t *raidPtr; 2512 struct vnode *vp; 2513 int auto_configured; 2514 { 2515 struct proc *p; 2516 2517 p = raidPtr->engine_thread; 2518 2519 if (vp != NULL) { 2520 if (auto_configured == 1) { 2521 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2522 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0); 2523 vput(vp); 2524 2525 } else { 2526 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p); 2527 } 2528 } else { 2529 #if 0 2530 printf("vnode was NULL\n"); 2531 #endif 2532 } 2533 } 2534 2535 2536 void 2537 rf_UnconfigureVnodes(raidPtr) 2538 RF_Raid_t *raidPtr; 2539 { 2540 int r,c; 2541 struct proc *p; 2542 struct vnode *vp; 2543 int acd; 2544 2545 2546 /* We take this opportunity to close the vnodes like we should.. */ 2547 2548 p = raidPtr->engine_thread; 2549 2550 for (r = 0; r < raidPtr->numRow; r++) { 2551 for (c = 0; c < raidPtr->numCol; c++) { 2552 #if 0 2553 printf("raid%d: Closing vnode for row: %d col: %d\n", 2554 raidPtr->raidid, r, c); 2555 #endif 2556 vp = raidPtr->raid_cinfo[r][c].ci_vp; 2557 acd = raidPtr->Disks[r][c].auto_configured; 2558 rf_close_component(raidPtr, vp, acd); 2559 raidPtr->raid_cinfo[r][c].ci_vp = NULL; 2560 raidPtr->Disks[r][c].auto_configured = 0; 2561 } 2562 } 2563 for (r = 0; r < raidPtr->numSpare; r++) { 2564 #if 0 2565 printf("raid%d: Closing vnode for spare: %d\n", 2566 raidPtr->raidid, r); 2567 #endif 2568 vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp; 2569 acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured; 2570 rf_close_component(raidPtr, vp, acd); 2571 raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL; 2572 raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0; 2573 } 2574 } 2575 2576 2577 void 2578 rf_ReconThread(req) 2579 struct rf_recon_req *req; 2580 { 2581 int s; 2582 RF_Raid_t *raidPtr; 2583 2584 s = splbio(); 2585 raidPtr = (RF_Raid_t *) req->raidPtr; 2586 raidPtr->recon_in_progress = 1; 2587 2588 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col, 2589 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0)); 2590 2591 /* XXX get rid of this! we don't need it at all.. */ 2592 RF_Free(req, sizeof(*req)); 2593 2594 raidPtr->recon_in_progress = 0; 2595 splx(s); 2596 2597 /* That's all... */ 2598 kthread_exit(0); /* does not return */ 2599 } 2600 2601 void 2602 rf_RewriteParityThread(raidPtr) 2603 RF_Raid_t *raidPtr; 2604 { 2605 int retcode; 2606 int s; 2607 2608 raidPtr->parity_rewrite_in_progress = 1; 2609 s = splbio(); 2610 retcode = rf_RewriteParity(raidPtr); 2611 splx(s); 2612 if (retcode) { 2613 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid); 2614 } else { 2615 /* set the clean bit! If we shutdown correctly, 2616 the clean bit on each component label will get 2617 set */ 2618 raidPtr->parity_good = RF_RAID_CLEAN; 2619 } 2620 raidPtr->parity_rewrite_in_progress = 0; 2621 2622 /* Anyone waiting for us to stop? If so, inform them... */ 2623 if (raidPtr->waitShutdown) { 2624 wakeup(&raidPtr->parity_rewrite_in_progress); 2625 } 2626 2627 /* That's all... */ 2628 kthread_exit(0); /* does not return */ 2629 } 2630 2631 2632 void 2633 rf_CopybackThread(raidPtr) 2634 RF_Raid_t *raidPtr; 2635 { 2636 int s; 2637 2638 raidPtr->copyback_in_progress = 1; 2639 s = splbio(); 2640 rf_CopybackReconstructedData(raidPtr); 2641 splx(s); 2642 raidPtr->copyback_in_progress = 0; 2643 2644 /* That's all... */ 2645 kthread_exit(0); /* does not return */ 2646 } 2647 2648 2649 void 2650 rf_ReconstructInPlaceThread(req) 2651 struct rf_recon_req *req; 2652 { 2653 int retcode; 2654 int s; 2655 RF_Raid_t *raidPtr; 2656 2657 s = splbio(); 2658 raidPtr = req->raidPtr; 2659 raidPtr->recon_in_progress = 1; 2660 retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col); 2661 RF_Free(req, sizeof(*req)); 2662 raidPtr->recon_in_progress = 0; 2663 splx(s); 2664 2665 /* That's all... */ 2666 kthread_exit(0); /* does not return */ 2667 } 2668 2669 void 2670 rf_mountroot_hook(dev) 2671 struct device *dev; 2672 { 2673 2674 } 2675 2676 2677 RF_AutoConfig_t * 2678 rf_find_raid_components() 2679 { 2680 struct vnode *vp; 2681 struct disklabel label; 2682 struct device *dv; 2683 dev_t dev; 2684 int bmajor; 2685 int error; 2686 int i; 2687 int good_one; 2688 RF_ComponentLabel_t *clabel; 2689 RF_AutoConfig_t *ac_list; 2690 RF_AutoConfig_t *ac; 2691 2692 2693 /* initialize the AutoConfig list */ 2694 ac_list = NULL; 2695 2696 /* we begin by trolling through *all* the devices on the system */ 2697 2698 for (dv = alldevs.tqh_first; dv != NULL; 2699 dv = dv->dv_list.tqe_next) { 2700 2701 /* we are only interested in disks... */ 2702 if (dv->dv_class != DV_DISK) 2703 continue; 2704 2705 /* we don't care about floppies... */ 2706 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) { 2707 continue; 2708 } 2709 2710 /* we don't care about CD's... */ 2711 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"cd")) { 2712 continue; 2713 } 2714 2715 /* hdfd is the Atari/Hades floppy driver */ 2716 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"hdfd")) { 2717 continue; 2718 } 2719 /* fdisa is the Atari/Milan floppy driver */ 2720 if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fdisa")) { 2721 continue; 2722 } 2723 2724 /* need to find the device_name_to_block_device_major stuff */ 2725 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0); 2726 2727 /* get a vnode for the raw partition of this disk */ 2728 2729 dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART); 2730 if (bdevvp(dev, &vp)) 2731 panic("RAID can't alloc vnode"); 2732 2733 error = VOP_OPEN(vp, FREAD, NOCRED, 0); 2734 2735 if (error) { 2736 /* "Who cares." Continue looking 2737 for something that exists*/ 2738 vput(vp); 2739 continue; 2740 } 2741 2742 /* Ok, the disk exists. Go get the disklabel. */ 2743 error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label, 2744 FREAD, NOCRED, 0); 2745 if (error) { 2746 /* 2747 * XXX can't happen - open() would 2748 * have errored out (or faked up one) 2749 */ 2750 printf("can't get label for dev %s%c (%d)!?!?\n", 2751 dv->dv_xname, 'a' + RAW_PART, error); 2752 } 2753 2754 /* don't need this any more. We'll allocate it again 2755 a little later if we really do... */ 2756 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2757 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0); 2758 vput(vp); 2759 2760 for (i=0; i < label.d_npartitions; i++) { 2761 /* We only support partitions marked as RAID */ 2762 if (label.d_partitions[i].p_fstype != FS_RAID) 2763 continue; 2764 2765 dev = MAKEDISKDEV(bmajor, dv->dv_unit, i); 2766 if (bdevvp(dev, &vp)) 2767 panic("RAID can't alloc vnode"); 2768 2769 error = VOP_OPEN(vp, FREAD, NOCRED, 0); 2770 if (error) { 2771 /* Whatever... */ 2772 vput(vp); 2773 continue; 2774 } 2775 2776 good_one = 0; 2777 2778 clabel = (RF_ComponentLabel_t *) 2779 malloc(sizeof(RF_ComponentLabel_t), 2780 M_RAIDFRAME, M_NOWAIT); 2781 if (clabel == NULL) { 2782 /* XXX CLEANUP HERE */ 2783 printf("RAID auto config: out of memory!\n"); 2784 return(NULL); /* XXX probably should panic? */ 2785 } 2786 2787 if (!raidread_component_label(dev, vp, clabel)) { 2788 /* Got the label. Does it look reasonable? */ 2789 if (rf_reasonable_label(clabel) && 2790 (clabel->partitionSize <= 2791 label.d_partitions[i].p_size)) { 2792 #if DEBUG 2793 printf("Component on: %s%c: %d\n", 2794 dv->dv_xname, 'a'+i, 2795 label.d_partitions[i].p_size); 2796 rf_print_component_label(clabel); 2797 #endif 2798 /* if it's reasonable, add it, 2799 else ignore it. */ 2800 ac = (RF_AutoConfig_t *) 2801 malloc(sizeof(RF_AutoConfig_t), 2802 M_RAIDFRAME, 2803 M_NOWAIT); 2804 if (ac == NULL) { 2805 /* XXX should panic?? */ 2806 return(NULL); 2807 } 2808 2809 sprintf(ac->devname, "%s%c", 2810 dv->dv_xname, 'a'+i); 2811 ac->dev = dev; 2812 ac->vp = vp; 2813 ac->clabel = clabel; 2814 ac->next = ac_list; 2815 ac_list = ac; 2816 good_one = 1; 2817 } 2818 } 2819 if (!good_one) { 2820 /* cleanup */ 2821 free(clabel, M_RAIDFRAME); 2822 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2823 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0); 2824 vput(vp); 2825 } 2826 } 2827 } 2828 return(ac_list); 2829 } 2830 2831 static int 2832 rf_reasonable_label(clabel) 2833 RF_ComponentLabel_t *clabel; 2834 { 2835 2836 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) || 2837 (clabel->version==RF_COMPONENT_LABEL_VERSION)) && 2838 ((clabel->clean == RF_RAID_CLEAN) || 2839 (clabel->clean == RF_RAID_DIRTY)) && 2840 clabel->row >=0 && 2841 clabel->column >= 0 && 2842 clabel->num_rows > 0 && 2843 clabel->num_columns > 0 && 2844 clabel->row < clabel->num_rows && 2845 clabel->column < clabel->num_columns && 2846 clabel->blockSize > 0 && 2847 clabel->numBlocks > 0) { 2848 /* label looks reasonable enough... */ 2849 return(1); 2850 } 2851 return(0); 2852 } 2853 2854 2855 void 2856 rf_print_component_label(clabel) 2857 RF_ComponentLabel_t *clabel; 2858 { 2859 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", 2860 clabel->row, clabel->column, 2861 clabel->num_rows, clabel->num_columns); 2862 printf(" Version: %d Serial Number: %d Mod Counter: %d\n", 2863 clabel->version, clabel->serial_number, 2864 clabel->mod_counter); 2865 printf(" Clean: %s Status: %d\n", 2866 clabel->clean ? "Yes" : "No", clabel->status ); 2867 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n", 2868 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU); 2869 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n", 2870 (char) clabel->parityConfig, clabel->blockSize, 2871 clabel->numBlocks); 2872 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" ); 2873 printf(" Contains root partition: %s\n", 2874 clabel->root_partition ? "Yes" : "No" ); 2875 printf(" Last configured as: raid%d\n", clabel->last_unit ); 2876 #if 0 2877 printf(" Config order: %d\n", clabel->config_order); 2878 #endif 2879 2880 } 2881 2882 RF_ConfigSet_t * 2883 rf_create_auto_sets(ac_list) 2884 RF_AutoConfig_t *ac_list; 2885 { 2886 RF_AutoConfig_t *ac; 2887 RF_ConfigSet_t *config_sets; 2888 RF_ConfigSet_t *cset; 2889 RF_AutoConfig_t *ac_next; 2890 2891 2892 config_sets = NULL; 2893 2894 /* Go through the AutoConfig list, and figure out which components 2895 belong to what sets. */ 2896 ac = ac_list; 2897 while(ac!=NULL) { 2898 /* we're going to putz with ac->next, so save it here 2899 for use at the end of the loop */ 2900 ac_next = ac->next; 2901 2902 if (config_sets == NULL) { 2903 /* will need at least this one... */ 2904 config_sets = (RF_ConfigSet_t *) 2905 malloc(sizeof(RF_ConfigSet_t), 2906 M_RAIDFRAME, M_NOWAIT); 2907 if (config_sets == NULL) { 2908 panic("rf_create_auto_sets: No memory!\n"); 2909 } 2910 /* this one is easy :) */ 2911 config_sets->ac = ac; 2912 config_sets->next = NULL; 2913 config_sets->rootable = 0; 2914 ac->next = NULL; 2915 } else { 2916 /* which set does this component fit into? */ 2917 cset = config_sets; 2918 while(cset!=NULL) { 2919 if (rf_does_it_fit(cset, ac)) { 2920 /* looks like it matches... */ 2921 ac->next = cset->ac; 2922 cset->ac = ac; 2923 break; 2924 } 2925 cset = cset->next; 2926 } 2927 if (cset==NULL) { 2928 /* didn't find a match above... new set..*/ 2929 cset = (RF_ConfigSet_t *) 2930 malloc(sizeof(RF_ConfigSet_t), 2931 M_RAIDFRAME, M_NOWAIT); 2932 if (cset == NULL) { 2933 panic("rf_create_auto_sets: No memory!\n"); 2934 } 2935 cset->ac = ac; 2936 ac->next = NULL; 2937 cset->next = config_sets; 2938 cset->rootable = 0; 2939 config_sets = cset; 2940 } 2941 } 2942 ac = ac_next; 2943 } 2944 2945 2946 return(config_sets); 2947 } 2948 2949 static int 2950 rf_does_it_fit(cset, ac) 2951 RF_ConfigSet_t *cset; 2952 RF_AutoConfig_t *ac; 2953 { 2954 RF_ComponentLabel_t *clabel1, *clabel2; 2955 2956 /* If this one matches the *first* one in the set, that's good 2957 enough, since the other members of the set would have been 2958 through here too... */ 2959 /* note that we are not checking partitionSize here.. 2960 2961 Note that we are also not checking the mod_counters here. 2962 If everything else matches execpt the mod_counter, that's 2963 good enough for this test. We will deal with the mod_counters 2964 a little later in the autoconfiguration process. 2965 2966 (clabel1->mod_counter == clabel2->mod_counter) && 2967 2968 The reason we don't check for this is that failed disks 2969 will have lower modification counts. If those disks are 2970 not added to the set they used to belong to, then they will 2971 form their own set, which may result in 2 different sets, 2972 for example, competing to be configured at raid0, and 2973 perhaps competing to be the root filesystem set. If the 2974 wrong ones get configured, or both attempt to become /, 2975 weird behaviour and or serious lossage will occur. Thus we 2976 need to bring them into the fold here, and kick them out at 2977 a later point. 2978 2979 */ 2980 2981 clabel1 = cset->ac->clabel; 2982 clabel2 = ac->clabel; 2983 if ((clabel1->version == clabel2->version) && 2984 (clabel1->serial_number == clabel2->serial_number) && 2985 (clabel1->num_rows == clabel2->num_rows) && 2986 (clabel1->num_columns == clabel2->num_columns) && 2987 (clabel1->sectPerSU == clabel2->sectPerSU) && 2988 (clabel1->SUsPerPU == clabel2->SUsPerPU) && 2989 (clabel1->SUsPerRU == clabel2->SUsPerRU) && 2990 (clabel1->parityConfig == clabel2->parityConfig) && 2991 (clabel1->maxOutstanding == clabel2->maxOutstanding) && 2992 (clabel1->blockSize == clabel2->blockSize) && 2993 (clabel1->numBlocks == clabel2->numBlocks) && 2994 (clabel1->autoconfigure == clabel2->autoconfigure) && 2995 (clabel1->root_partition == clabel2->root_partition) && 2996 (clabel1->last_unit == clabel2->last_unit) && 2997 (clabel1->config_order == clabel2->config_order)) { 2998 /* if it get's here, it almost *has* to be a match */ 2999 } else { 3000 /* it's not consistent with somebody in the set.. 3001 punt */ 3002 return(0); 3003 } 3004 /* all was fine.. it must fit... */ 3005 return(1); 3006 } 3007 3008 int 3009 rf_have_enough_components(cset) 3010 RF_ConfigSet_t *cset; 3011 { 3012 RF_AutoConfig_t *ac; 3013 RF_AutoConfig_t *auto_config; 3014 RF_ComponentLabel_t *clabel; 3015 int r,c; 3016 int num_rows; 3017 int num_cols; 3018 int num_missing; 3019 int mod_counter; 3020 int mod_counter_found; 3021 int even_pair_failed; 3022 char parity_type; 3023 3024 3025 /* check to see that we have enough 'live' components 3026 of this set. If so, we can configure it if necessary */ 3027 3028 num_rows = cset->ac->clabel->num_rows; 3029 num_cols = cset->ac->clabel->num_columns; 3030 parity_type = cset->ac->clabel->parityConfig; 3031 3032 /* XXX Check for duplicate components!?!?!? */ 3033 3034 /* Determine what the mod_counter is supposed to be for this set. */ 3035 3036 mod_counter_found = 0; 3037 mod_counter = 0; 3038 ac = cset->ac; 3039 while(ac!=NULL) { 3040 if (mod_counter_found==0) { 3041 mod_counter = ac->clabel->mod_counter; 3042 mod_counter_found = 1; 3043 } else { 3044 if (ac->clabel->mod_counter > mod_counter) { 3045 mod_counter = ac->clabel->mod_counter; 3046 } 3047 } 3048 ac = ac->next; 3049 } 3050 3051 num_missing = 0; 3052 auto_config = cset->ac; 3053 3054 for(r=0; r<num_rows; r++) { 3055 even_pair_failed = 0; 3056 for(c=0; c<num_cols; c++) { 3057 ac = auto_config; 3058 while(ac!=NULL) { 3059 if ((ac->clabel->row == r) && 3060 (ac->clabel->column == c) && 3061 (ac->clabel->mod_counter == mod_counter)) { 3062 /* it's this one... */ 3063 #if DEBUG 3064 printf("Found: %s at %d,%d\n", 3065 ac->devname,r,c); 3066 #endif 3067 break; 3068 } 3069 ac=ac->next; 3070 } 3071 if (ac==NULL) { 3072 /* Didn't find one here! */ 3073 /* special case for RAID 1, especially 3074 where there are more than 2 3075 components (where RAIDframe treats 3076 things a little differently :( ) */ 3077 if (parity_type == '1') { 3078 if (c%2 == 0) { /* even component */ 3079 even_pair_failed = 1; 3080 } else { /* odd component. If 3081 we're failed, and 3082 so is the even 3083 component, it's 3084 "Good Night, Charlie" */ 3085 if (even_pair_failed == 1) { 3086 return(0); 3087 } 3088 } 3089 } else { 3090 /* normal accounting */ 3091 num_missing++; 3092 } 3093 } 3094 if ((parity_type == '1') && (c%2 == 1)) { 3095 /* Just did an even component, and we didn't 3096 bail.. reset the even_pair_failed flag, 3097 and go on to the next component.... */ 3098 even_pair_failed = 0; 3099 } 3100 } 3101 } 3102 3103 clabel = cset->ac->clabel; 3104 3105 if (((clabel->parityConfig == '0') && (num_missing > 0)) || 3106 ((clabel->parityConfig == '4') && (num_missing > 1)) || 3107 ((clabel->parityConfig == '5') && (num_missing > 1))) { 3108 /* XXX this needs to be made *much* more general */ 3109 /* Too many failures */ 3110 return(0); 3111 } 3112 /* otherwise, all is well, and we've got enough to take a kick 3113 at autoconfiguring this set */ 3114 return(1); 3115 } 3116 3117 void 3118 rf_create_configuration(ac,config,raidPtr) 3119 RF_AutoConfig_t *ac; 3120 RF_Config_t *config; 3121 RF_Raid_t *raidPtr; 3122 { 3123 RF_ComponentLabel_t *clabel; 3124 int i; 3125 3126 clabel = ac->clabel; 3127 3128 /* 1. Fill in the common stuff */ 3129 config->numRow = clabel->num_rows; 3130 config->numCol = clabel->num_columns; 3131 config->numSpare = 0; /* XXX should this be set here? */ 3132 config->sectPerSU = clabel->sectPerSU; 3133 config->SUsPerPU = clabel->SUsPerPU; 3134 config->SUsPerRU = clabel->SUsPerRU; 3135 config->parityConfig = clabel->parityConfig; 3136 /* XXX... */ 3137 strcpy(config->diskQueueType,"fifo"); 3138 config->maxOutstandingDiskReqs = clabel->maxOutstanding; 3139 config->layoutSpecificSize = 0; /* XXX ?? */ 3140 3141 while(ac!=NULL) { 3142 /* row/col values will be in range due to the checks 3143 in reasonable_label() */ 3144 strcpy(config->devnames[ac->clabel->row][ac->clabel->column], 3145 ac->devname); 3146 ac = ac->next; 3147 } 3148 3149 for(i=0;i<RF_MAXDBGV;i++) { 3150 config->debugVars[i][0] = NULL; 3151 } 3152 } 3153 3154 int 3155 rf_set_autoconfig(raidPtr, new_value) 3156 RF_Raid_t *raidPtr; 3157 int new_value; 3158 { 3159 RF_ComponentLabel_t clabel; 3160 struct vnode *vp; 3161 dev_t dev; 3162 int row, column; 3163 3164 raidPtr->autoconfigure = new_value; 3165 for(row=0; row<raidPtr->numRow; row++) { 3166 for(column=0; column<raidPtr->numCol; column++) { 3167 if (raidPtr->Disks[row][column].status == 3168 rf_ds_optimal) { 3169 dev = raidPtr->Disks[row][column].dev; 3170 vp = raidPtr->raid_cinfo[row][column].ci_vp; 3171 raidread_component_label(dev, vp, &clabel); 3172 clabel.autoconfigure = new_value; 3173 raidwrite_component_label(dev, vp, &clabel); 3174 } 3175 } 3176 } 3177 return(new_value); 3178 } 3179 3180 int 3181 rf_set_rootpartition(raidPtr, new_value) 3182 RF_Raid_t *raidPtr; 3183 int new_value; 3184 { 3185 RF_ComponentLabel_t clabel; 3186 struct vnode *vp; 3187 dev_t dev; 3188 int row, column; 3189 3190 raidPtr->root_partition = new_value; 3191 for(row=0; row<raidPtr->numRow; row++) { 3192 for(column=0; column<raidPtr->numCol; column++) { 3193 if (raidPtr->Disks[row][column].status == 3194 rf_ds_optimal) { 3195 dev = raidPtr->Disks[row][column].dev; 3196 vp = raidPtr->raid_cinfo[row][column].ci_vp; 3197 raidread_component_label(dev, vp, &clabel); 3198 clabel.root_partition = new_value; 3199 raidwrite_component_label(dev, vp, &clabel); 3200 } 3201 } 3202 } 3203 return(new_value); 3204 } 3205 3206 void 3207 rf_release_all_vps(cset) 3208 RF_ConfigSet_t *cset; 3209 { 3210 RF_AutoConfig_t *ac; 3211 3212 ac = cset->ac; 3213 while(ac!=NULL) { 3214 /* Close the vp, and give it back */ 3215 if (ac->vp) { 3216 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY); 3217 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0); 3218 vput(ac->vp); 3219 ac->vp = NULL; 3220 } 3221 ac = ac->next; 3222 } 3223 } 3224 3225 3226 void 3227 rf_cleanup_config_set(cset) 3228 RF_ConfigSet_t *cset; 3229 { 3230 RF_AutoConfig_t *ac; 3231 RF_AutoConfig_t *next_ac; 3232 3233 ac = cset->ac; 3234 while(ac!=NULL) { 3235 next_ac = ac->next; 3236 /* nuke the label */ 3237 free(ac->clabel, M_RAIDFRAME); 3238 /* cleanup the config structure */ 3239 free(ac, M_RAIDFRAME); 3240 /* "next.." */ 3241 ac = next_ac; 3242 } 3243 /* and, finally, nuke the config set */ 3244 free(cset, M_RAIDFRAME); 3245 } 3246 3247 3248 void 3249 raid_init_component_label(raidPtr, clabel) 3250 RF_Raid_t *raidPtr; 3251 RF_ComponentLabel_t *clabel; 3252 { 3253 /* current version number */ 3254 clabel->version = RF_COMPONENT_LABEL_VERSION; 3255 clabel->serial_number = raidPtr->serial_number; 3256 clabel->mod_counter = raidPtr->mod_counter; 3257 clabel->num_rows = raidPtr->numRow; 3258 clabel->num_columns = raidPtr->numCol; 3259 clabel->clean = RF_RAID_DIRTY; /* not clean */ 3260 clabel->status = rf_ds_optimal; /* "It's good!" */ 3261 3262 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; 3263 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU; 3264 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU; 3265 3266 clabel->blockSize = raidPtr->bytesPerSector; 3267 clabel->numBlocks = raidPtr->sectorsPerDisk; 3268 3269 /* XXX not portable */ 3270 clabel->parityConfig = raidPtr->Layout.map->parityConfig; 3271 clabel->maxOutstanding = raidPtr->maxOutstanding; 3272 clabel->autoconfigure = raidPtr->autoconfigure; 3273 clabel->root_partition = raidPtr->root_partition; 3274 clabel->last_unit = raidPtr->raidid; 3275 clabel->config_order = raidPtr->config_order; 3276 } 3277 3278 int 3279 rf_auto_config_set(cset,unit) 3280 RF_ConfigSet_t *cset; 3281 int *unit; 3282 { 3283 RF_Raid_t *raidPtr; 3284 RF_Config_t *config; 3285 int raidID; 3286 int retcode; 3287 3288 #if DEBUG 3289 printf("RAID autoconfigure\n"); 3290 #endif 3291 3292 retcode = 0; 3293 *unit = -1; 3294 3295 /* 1. Create a config structure */ 3296 3297 config = (RF_Config_t *)malloc(sizeof(RF_Config_t), 3298 M_RAIDFRAME, 3299 M_NOWAIT); 3300 if (config==NULL) { 3301 printf("Out of mem!?!?\n"); 3302 /* XXX do something more intelligent here. */ 3303 return(1); 3304 } 3305 3306 memset(config, 0, sizeof(RF_Config_t)); 3307 3308 /* XXX raidID needs to be set correctly.. */ 3309 3310 /* 3311 2. Figure out what RAID ID this one is supposed to live at 3312 See if we can get the same RAID dev that it was configured 3313 on last time.. 3314 */ 3315 3316 raidID = cset->ac->clabel->last_unit; 3317 if ((raidID < 0) || (raidID >= numraid)) { 3318 /* let's not wander off into lala land. */ 3319 raidID = numraid - 1; 3320 } 3321 if (raidPtrs[raidID]->valid != 0) { 3322 3323 /* 3324 Nope... Go looking for an alternative... 3325 Start high so we don't immediately use raid0 if that's 3326 not taken. 3327 */ 3328 3329 for(raidID = numraid - 1; raidID >= 0; raidID--) { 3330 if (raidPtrs[raidID]->valid == 0) { 3331 /* can use this one! */ 3332 break; 3333 } 3334 } 3335 } 3336 3337 if (raidID < 0) { 3338 /* punt... */ 3339 printf("Unable to auto configure this set!\n"); 3340 printf("(Out of RAID devs!)\n"); 3341 return(1); 3342 } 3343 3344 #if DEBUG 3345 printf("Configuring raid%d:\n",raidID); 3346 #endif 3347 3348 raidPtr = raidPtrs[raidID]; 3349 3350 /* XXX all this stuff should be done SOMEWHERE ELSE! */ 3351 raidPtr->raidid = raidID; 3352 raidPtr->openings = RAIDOUTSTANDING; 3353 3354 /* 3. Build the configuration structure */ 3355 rf_create_configuration(cset->ac, config, raidPtr); 3356 3357 /* 4. Do the configuration */ 3358 retcode = rf_Configure(raidPtr, config, cset->ac); 3359 3360 if (retcode == 0) { 3361 3362 raidinit(raidPtrs[raidID]); 3363 3364 rf_markalldirty(raidPtrs[raidID]); 3365 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */ 3366 if (cset->ac->clabel->root_partition==1) { 3367 /* everything configured just fine. Make a note 3368 that this set is eligible to be root. */ 3369 cset->rootable = 1; 3370 /* XXX do this here? */ 3371 raidPtrs[raidID]->root_partition = 1; 3372 } 3373 } 3374 3375 /* 5. Cleanup */ 3376 free(config, M_RAIDFRAME); 3377 3378 *unit = raidID; 3379 return(retcode); 3380 } 3381 3382 void 3383 rf_disk_unbusy(desc) 3384 RF_RaidAccessDesc_t *desc; 3385 { 3386 struct buf *bp; 3387 3388 bp = (struct buf *)desc->bp; 3389 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev, 3390 (bp->b_bcount - bp->b_resid)); 3391 } 3392