1 /* $NetBSD: rf_netbsdkintf.c,v 1.180 2004/07/01 17:48:45 oster Exp $ */ 2 /*- 3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Greg Oster; Jason R. Thorpe. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the NetBSD 20 * Foundation, Inc. and its contributors. 21 * 4. Neither the name of The NetBSD Foundation nor the names of its 22 * contributors may be used to endorse or promote products derived 23 * from this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38 /* 39 * Copyright (c) 1990, 1993 40 * The Regents of the University of California. All rights reserved. 41 * 42 * This code is derived from software contributed to Berkeley by 43 * the Systems Programming Group of the University of Utah Computer 44 * Science Department. 45 * 46 * Redistribution and use in source and binary forms, with or without 47 * modification, are permitted provided that the following conditions 48 * are met: 49 * 1. Redistributions of source code must retain the above copyright 50 * notice, this list of conditions and the following disclaimer. 51 * 2. Redistributions in binary form must reproduce the above copyright 52 * notice, this list of conditions and the following disclaimer in the 53 * documentation and/or other materials provided with the distribution. 54 * 3. Neither the name of the University nor the names of its contributors 55 * may be used to endorse or promote products derived from this software 56 * without specific prior written permission. 57 * 58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 68 * SUCH DAMAGE. 69 * 70 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 71 * 72 * @(#)cd.c 8.2 (Berkeley) 11/16/93 73 */ 74 75 /* 76 * Copyright (c) 1988 University of Utah. 77 * 78 * This code is derived from software contributed to Berkeley by 79 * the Systems Programming Group of the University of Utah Computer 80 * Science Department. 81 * 82 * Redistribution and use in source and binary forms, with or without 83 * modification, are permitted provided that the following conditions 84 * are met: 85 * 1. Redistributions of source code must retain the above copyright 86 * notice, this list of conditions and the following disclaimer. 87 * 2. Redistributions in binary form must reproduce the above copyright 88 * notice, this list of conditions and the following disclaimer in the 89 * documentation and/or other materials provided with the distribution. 90 * 3. All advertising materials mentioning features or use of this software 91 * must display the following acknowledgement: 92 * This product includes software developed by the University of 93 * California, Berkeley and its contributors. 94 * 4. Neither the name of the University nor the names of its contributors 95 * may be used to endorse or promote products derived from this software 96 * without specific prior written permission. 97 * 98 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 99 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 100 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 101 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 102 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 103 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 104 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 105 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 106 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 107 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 108 * SUCH DAMAGE. 109 * 110 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 111 * 112 * @(#)cd.c 8.2 (Berkeley) 11/16/93 113 */ 114 115 /* 116 * Copyright (c) 1995 Carnegie-Mellon University. 117 * All rights reserved. 118 * 119 * Authors: Mark Holland, Jim Zelenka 120 * 121 * Permission to use, copy, modify and distribute this software and 122 * its documentation is hereby granted, provided that both the copyright 123 * notice and this permission notice appear in all copies of the 124 * software, derivative works or modified versions, and any portions 125 * thereof, and that both notices appear in supporting documentation. 126 * 127 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 128 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 129 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 130 * 131 * Carnegie Mellon requests users of this software to return to 132 * 133 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 134 * School of Computer Science 135 * Carnegie Mellon University 136 * Pittsburgh PA 15213-3890 137 * 138 * any improvements or extensions that they make and grant Carnegie the 139 * rights to redistribute these changes. 140 */ 141 142 /*********************************************************** 143 * 144 * rf_kintf.c -- the kernel interface routines for RAIDframe 145 * 146 ***********************************************************/ 147 148 #include <sys/cdefs.h> 149 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.180 2004/07/01 17:48:45 oster Exp $"); 150 151 #include <sys/param.h> 152 #include <sys/errno.h> 153 #include <sys/pool.h> 154 #include <sys/proc.h> 155 #include <sys/queue.h> 156 #include <sys/disk.h> 157 #include <sys/device.h> 158 #include <sys/stat.h> 159 #include <sys/ioctl.h> 160 #include <sys/fcntl.h> 161 #include <sys/systm.h> 162 #include <sys/namei.h> 163 #include <sys/vnode.h> 164 #include <sys/disklabel.h> 165 #include <sys/conf.h> 166 #include <sys/lock.h> 167 #include <sys/buf.h> 168 #include <sys/user.h> 169 #include <sys/reboot.h> 170 171 #include <dev/raidframe/raidframevar.h> 172 #include <dev/raidframe/raidframeio.h> 173 #include "raid.h" 174 #include "opt_raid_autoconfig.h" 175 #include "rf_raid.h" 176 #include "rf_copyback.h" 177 #include "rf_dag.h" 178 #include "rf_dagflags.h" 179 #include "rf_desc.h" 180 #include "rf_diskqueue.h" 181 #include "rf_etimer.h" 182 #include "rf_general.h" 183 #include "rf_kintf.h" 184 #include "rf_options.h" 185 #include "rf_driver.h" 186 #include "rf_parityscan.h" 187 #include "rf_threadstuff.h" 188 189 #ifdef DEBUG 190 int rf_kdebug_level = 0; 191 #define db1_printf(a) if (rf_kdebug_level > 0) printf a 192 #else /* DEBUG */ 193 #define db1_printf(a) { } 194 #endif /* DEBUG */ 195 196 static RF_Raid_t **raidPtrs; /* global raid device descriptors */ 197 198 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex) 199 200 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a 201 * spare table */ 202 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from 203 * installation process */ 204 205 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures"); 206 207 /* prototypes */ 208 static void KernelWakeupFunc(struct buf * bp); 209 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag, 210 dev_t dev, RF_SectorNum_t startSect, 211 RF_SectorCount_t numSect, caddr_t buf, 212 void (*cbFunc) (struct buf *), void *cbArg, 213 int logBytesPerSector, struct proc * b_proc); 214 static void raidinit(RF_Raid_t *); 215 216 void raidattach(int); 217 218 dev_type_open(raidopen); 219 dev_type_close(raidclose); 220 dev_type_read(raidread); 221 dev_type_write(raidwrite); 222 dev_type_ioctl(raidioctl); 223 dev_type_strategy(raidstrategy); 224 dev_type_dump(raiddump); 225 dev_type_size(raidsize); 226 227 const struct bdevsw raid_bdevsw = { 228 raidopen, raidclose, raidstrategy, raidioctl, 229 raiddump, raidsize, D_DISK 230 }; 231 232 const struct cdevsw raid_cdevsw = { 233 raidopen, raidclose, raidread, raidwrite, raidioctl, 234 nostop, notty, nopoll, nommap, nokqfilter, D_DISK 235 }; 236 237 /* 238 * Pilfered from ccd.c 239 */ 240 241 struct raidbuf { 242 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */ 243 struct buf *rf_obp; /* ptr. to original I/O buf */ 244 RF_DiskQueueData_t *req;/* the request that this was part of.. */ 245 }; 246 247 /* XXX Not sure if the following should be replacing the raidPtrs above, 248 or if it should be used in conjunction with that... 249 */ 250 251 struct raid_softc { 252 int sc_flags; /* flags */ 253 int sc_cflags; /* configuration flags */ 254 size_t sc_size; /* size of the raid device */ 255 char sc_xname[20]; /* XXX external name */ 256 struct disk sc_dkdev; /* generic disk device info */ 257 struct bufq_state buf_queue; /* used for the device queue */ 258 }; 259 /* sc_flags */ 260 #define RAIDF_INITED 0x01 /* unit has been initialized */ 261 #define RAIDF_WLABEL 0x02 /* label area is writable */ 262 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */ 263 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */ 264 #define RAIDF_LOCKED 0x80 /* unit is locked */ 265 266 #define raidunit(x) DISKUNIT(x) 267 int numraid = 0; 268 269 /* 270 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device. 271 * Be aware that large numbers can allow the driver to consume a lot of 272 * kernel memory, especially on writes, and in degraded mode reads. 273 * 274 * For example: with a stripe width of 64 blocks (32k) and 5 disks, 275 * a single 64K write will typically require 64K for the old data, 276 * 64K for the old parity, and 64K for the new parity, for a total 277 * of 192K (if the parity buffer is not re-used immediately). 278 * Even it if is used immediately, that's still 128K, which when multiplied 279 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data. 280 * 281 * Now in degraded mode, for example, a 64K read on the above setup may 282 * require data reconstruction, which will require *all* of the 4 remaining 283 * disks to participate -- 4 * 32K/disk == 128K again. 284 */ 285 286 #ifndef RAIDOUTSTANDING 287 #define RAIDOUTSTANDING 6 288 #endif 289 290 #define RAIDLABELDEV(dev) \ 291 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART)) 292 293 /* declared here, and made public, for the benefit of KVM stuff.. */ 294 struct raid_softc *raid_softc; 295 296 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *, 297 struct disklabel *); 298 static void raidgetdisklabel(dev_t); 299 static void raidmakedisklabel(struct raid_softc *); 300 301 static int raidlock(struct raid_softc *); 302 static void raidunlock(struct raid_softc *); 303 304 static void rf_markalldirty(RF_Raid_t *); 305 306 struct device *raidrootdev; 307 308 void rf_ReconThread(struct rf_recon_req *); 309 void rf_RewriteParityThread(RF_Raid_t *raidPtr); 310 void rf_CopybackThread(RF_Raid_t *raidPtr); 311 void rf_ReconstructInPlaceThread(struct rf_recon_req *); 312 int rf_autoconfig(struct device *self); 313 void rf_buildroothack(RF_ConfigSet_t *); 314 315 RF_AutoConfig_t *rf_find_raid_components(void); 316 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *); 317 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *); 318 static int rf_reasonable_label(RF_ComponentLabel_t *); 319 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *); 320 int rf_set_autoconfig(RF_Raid_t *, int); 321 int rf_set_rootpartition(RF_Raid_t *, int); 322 void rf_release_all_vps(RF_ConfigSet_t *); 323 void rf_cleanup_config_set(RF_ConfigSet_t *); 324 int rf_have_enough_components(RF_ConfigSet_t *); 325 int rf_auto_config_set(RF_ConfigSet_t *, int *); 326 327 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not 328 allow autoconfig to take place. 329 Note that this is overridden by having 330 RAID_AUTOCONFIG as an option in the 331 kernel config file. */ 332 333 struct RF_Pools_s rf_pools; 334 335 void 336 raidattach(int num) 337 { 338 int raidID; 339 int i, rc; 340 341 #ifdef DEBUG 342 printf("raidattach: Asked for %d units\n", num); 343 #endif 344 345 if (num <= 0) { 346 #ifdef DIAGNOSTIC 347 panic("raidattach: count <= 0"); 348 #endif 349 return; 350 } 351 /* This is where all the initialization stuff gets done. */ 352 353 numraid = num; 354 355 /* Make some space for requested number of units... */ 356 357 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **)); 358 if (raidPtrs == NULL) { 359 panic("raidPtrs is NULL!!"); 360 } 361 362 /* Initialize the component buffer pool. */ 363 rf_pool_init(&rf_pools.cbuf, sizeof(struct raidbuf), 364 "raidpl", num * RAIDOUTSTANDING, 365 2 * num * RAIDOUTSTANDING); 366 367 rf_mutex_init(&rf_sparet_wait_mutex); 368 369 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL; 370 371 for (i = 0; i < num; i++) 372 raidPtrs[i] = NULL; 373 rc = rf_BootRaidframe(); 374 if (rc == 0) 375 printf("Kernelized RAIDframe activated\n"); 376 else 377 panic("Serious error booting RAID!!"); 378 379 /* put together some datastructures like the CCD device does.. This 380 * lets us lock the device and what-not when it gets opened. */ 381 382 raid_softc = (struct raid_softc *) 383 malloc(num * sizeof(struct raid_softc), 384 M_RAIDFRAME, M_NOWAIT); 385 if (raid_softc == NULL) { 386 printf("WARNING: no memory for RAIDframe driver\n"); 387 return; 388 } 389 390 memset(raid_softc, 0, num * sizeof(struct raid_softc)); 391 392 raidrootdev = (struct device *)malloc(num * sizeof(struct device), 393 M_RAIDFRAME, M_NOWAIT); 394 if (raidrootdev == NULL) { 395 panic("No memory for RAIDframe driver!!?!?!"); 396 } 397 398 for (raidID = 0; raidID < num; raidID++) { 399 bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS); 400 401 raidrootdev[raidID].dv_class = DV_DISK; 402 raidrootdev[raidID].dv_cfdata = NULL; 403 raidrootdev[raidID].dv_unit = raidID; 404 raidrootdev[raidID].dv_parent = NULL; 405 raidrootdev[raidID].dv_flags = 0; 406 snprintf(raidrootdev[raidID].dv_xname, 407 sizeof(raidrootdev[raidID].dv_xname), "raid%d", raidID); 408 409 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t), 410 (RF_Raid_t *)); 411 if (raidPtrs[raidID] == NULL) { 412 printf("WARNING: raidPtrs[%d] is NULL\n", raidID); 413 numraid = raidID; 414 return; 415 } 416 } 417 418 #ifdef RAID_AUTOCONFIG 419 raidautoconfig = 1; 420 #endif 421 422 /* 423 * Register a finalizer which will be used to auto-config RAID 424 * sets once all real hardware devices have been found. 425 */ 426 if (config_finalize_register(NULL, rf_autoconfig) != 0) 427 printf("WARNING: unable to register RAIDframe finalizer\n"); 428 } 429 430 int 431 rf_autoconfig(struct device *self) 432 { 433 RF_AutoConfig_t *ac_list; 434 RF_ConfigSet_t *config_sets; 435 436 if (raidautoconfig == 0) 437 return (0); 438 439 /* XXX This code can only be run once. */ 440 raidautoconfig = 0; 441 442 /* 1. locate all RAID components on the system */ 443 #ifdef DEBUG 444 printf("Searching for RAID components...\n"); 445 #endif 446 ac_list = rf_find_raid_components(); 447 448 /* 2. Sort them into their respective sets. */ 449 config_sets = rf_create_auto_sets(ac_list); 450 451 /* 452 * 3. Evaluate each set andconfigure the valid ones. 453 * This gets done in rf_buildroothack(). 454 */ 455 rf_buildroothack(config_sets); 456 457 return (1); 458 } 459 460 void 461 rf_buildroothack(RF_ConfigSet_t *config_sets) 462 { 463 RF_ConfigSet_t *cset; 464 RF_ConfigSet_t *next_cset; 465 int retcode; 466 int raidID; 467 int rootID; 468 int num_root; 469 470 rootID = 0; 471 num_root = 0; 472 cset = config_sets; 473 while(cset != NULL ) { 474 next_cset = cset->next; 475 if (rf_have_enough_components(cset) && 476 cset->ac->clabel->autoconfigure==1) { 477 retcode = rf_auto_config_set(cset,&raidID); 478 if (!retcode) { 479 if (cset->rootable) { 480 rootID = raidID; 481 num_root++; 482 } 483 } else { 484 /* The autoconfig didn't work :( */ 485 #if DEBUG 486 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID); 487 #endif 488 rf_release_all_vps(cset); 489 } 490 } else { 491 /* we're not autoconfiguring this set... 492 release the associated resources */ 493 rf_release_all_vps(cset); 494 } 495 /* cleanup */ 496 rf_cleanup_config_set(cset); 497 cset = next_cset; 498 } 499 500 /* we found something bootable... */ 501 502 if (num_root == 1) { 503 booted_device = &raidrootdev[rootID]; 504 } else if (num_root > 1) { 505 /* we can't guess.. require the user to answer... */ 506 boothowto |= RB_ASKNAME; 507 } 508 } 509 510 511 int 512 raidsize(dev_t dev) 513 { 514 struct raid_softc *rs; 515 struct disklabel *lp; 516 int part, unit, omask, size; 517 518 unit = raidunit(dev); 519 if (unit >= numraid) 520 return (-1); 521 rs = &raid_softc[unit]; 522 523 if ((rs->sc_flags & RAIDF_INITED) == 0) 524 return (-1); 525 526 part = DISKPART(dev); 527 omask = rs->sc_dkdev.dk_openmask & (1 << part); 528 lp = rs->sc_dkdev.dk_label; 529 530 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc)) 531 return (-1); 532 533 if (lp->d_partitions[part].p_fstype != FS_SWAP) 534 size = -1; 535 else 536 size = lp->d_partitions[part].p_size * 537 (lp->d_secsize / DEV_BSIZE); 538 539 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc)) 540 return (-1); 541 542 return (size); 543 544 } 545 546 int 547 raiddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size) 548 { 549 /* Not implemented. */ 550 return ENXIO; 551 } 552 /* ARGSUSED */ 553 int 554 raidopen(dev_t dev, int flags, int fmt, struct proc *p) 555 { 556 int unit = raidunit(dev); 557 struct raid_softc *rs; 558 struct disklabel *lp; 559 int part, pmask; 560 int error = 0; 561 562 if (unit >= numraid) 563 return (ENXIO); 564 rs = &raid_softc[unit]; 565 566 if ((error = raidlock(rs)) != 0) 567 return (error); 568 lp = rs->sc_dkdev.dk_label; 569 570 part = DISKPART(dev); 571 pmask = (1 << part); 572 573 if ((rs->sc_flags & RAIDF_INITED) && 574 (rs->sc_dkdev.dk_openmask == 0)) 575 raidgetdisklabel(dev); 576 577 /* make sure that this partition exists */ 578 579 if (part != RAW_PART) { 580 if (((rs->sc_flags & RAIDF_INITED) == 0) || 581 ((part >= lp->d_npartitions) || 582 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 583 error = ENXIO; 584 raidunlock(rs); 585 return (error); 586 } 587 } 588 /* Prevent this unit from being unconfigured while open. */ 589 switch (fmt) { 590 case S_IFCHR: 591 rs->sc_dkdev.dk_copenmask |= pmask; 592 break; 593 594 case S_IFBLK: 595 rs->sc_dkdev.dk_bopenmask |= pmask; 596 break; 597 } 598 599 if ((rs->sc_dkdev.dk_openmask == 0) && 600 ((rs->sc_flags & RAIDF_INITED) != 0)) { 601 /* First one... mark things as dirty... Note that we *MUST* 602 have done a configure before this. I DO NOT WANT TO BE 603 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED 604 THAT THEY BELONG TOGETHER!!!!! */ 605 /* XXX should check to see if we're only open for reading 606 here... If so, we needn't do this, but then need some 607 other way of keeping track of what's happened.. */ 608 609 rf_markalldirty( raidPtrs[unit] ); 610 } 611 612 613 rs->sc_dkdev.dk_openmask = 614 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 615 616 raidunlock(rs); 617 618 return (error); 619 620 621 } 622 /* ARGSUSED */ 623 int 624 raidclose(dev_t dev, int flags, int fmt, struct proc *p) 625 { 626 int unit = raidunit(dev); 627 struct raid_softc *rs; 628 int error = 0; 629 int part; 630 631 if (unit >= numraid) 632 return (ENXIO); 633 rs = &raid_softc[unit]; 634 635 if ((error = raidlock(rs)) != 0) 636 return (error); 637 638 part = DISKPART(dev); 639 640 /* ...that much closer to allowing unconfiguration... */ 641 switch (fmt) { 642 case S_IFCHR: 643 rs->sc_dkdev.dk_copenmask &= ~(1 << part); 644 break; 645 646 case S_IFBLK: 647 rs->sc_dkdev.dk_bopenmask &= ~(1 << part); 648 break; 649 } 650 rs->sc_dkdev.dk_openmask = 651 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 652 653 if ((rs->sc_dkdev.dk_openmask == 0) && 654 ((rs->sc_flags & RAIDF_INITED) != 0)) { 655 /* Last one... device is not unconfigured yet. 656 Device shutdown has taken care of setting the 657 clean bits if RAIDF_INITED is not set 658 mark things as clean... */ 659 660 rf_update_component_labels(raidPtrs[unit], 661 RF_FINAL_COMPONENT_UPDATE); 662 if (doing_shutdown) { 663 /* last one, and we're going down, so 664 lights out for this RAID set too. */ 665 error = rf_Shutdown(raidPtrs[unit]); 666 667 /* It's no longer initialized... */ 668 rs->sc_flags &= ~RAIDF_INITED; 669 670 /* Detach the disk. */ 671 disk_detach(&rs->sc_dkdev); 672 } 673 } 674 675 raidunlock(rs); 676 return (0); 677 678 } 679 680 void 681 raidstrategy(struct buf *bp) 682 { 683 int s; 684 685 unsigned int raidID = raidunit(bp->b_dev); 686 RF_Raid_t *raidPtr; 687 struct raid_softc *rs = &raid_softc[raidID]; 688 int wlabel; 689 690 if ((rs->sc_flags & RAIDF_INITED) ==0) { 691 bp->b_error = ENXIO; 692 bp->b_flags |= B_ERROR; 693 bp->b_resid = bp->b_bcount; 694 biodone(bp); 695 return; 696 } 697 if (raidID >= numraid || !raidPtrs[raidID]) { 698 bp->b_error = ENODEV; 699 bp->b_flags |= B_ERROR; 700 bp->b_resid = bp->b_bcount; 701 biodone(bp); 702 return; 703 } 704 raidPtr = raidPtrs[raidID]; 705 if (!raidPtr->valid) { 706 bp->b_error = ENODEV; 707 bp->b_flags |= B_ERROR; 708 bp->b_resid = bp->b_bcount; 709 biodone(bp); 710 return; 711 } 712 if (bp->b_bcount == 0) { 713 db1_printf(("b_bcount is zero..\n")); 714 biodone(bp); 715 return; 716 } 717 718 /* 719 * Do bounds checking and adjust transfer. If there's an 720 * error, the bounds check will flag that for us. 721 */ 722 723 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING); 724 if (DISKPART(bp->b_dev) != RAW_PART) 725 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) { 726 db1_printf(("Bounds check failed!!:%d %d\n", 727 (int) bp->b_blkno, (int) wlabel)); 728 biodone(bp); 729 return; 730 } 731 s = splbio(); 732 733 bp->b_resid = 0; 734 735 /* stuff it onto our queue */ 736 BUFQ_PUT(&rs->buf_queue, bp); 737 738 raidstart(raidPtrs[raidID]); 739 740 splx(s); 741 } 742 /* ARGSUSED */ 743 int 744 raidread(dev_t dev, struct uio *uio, int flags) 745 { 746 int unit = raidunit(dev); 747 struct raid_softc *rs; 748 749 if (unit >= numraid) 750 return (ENXIO); 751 rs = &raid_softc[unit]; 752 753 if ((rs->sc_flags & RAIDF_INITED) == 0) 754 return (ENXIO); 755 756 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio)); 757 758 } 759 /* ARGSUSED */ 760 int 761 raidwrite(dev_t dev, struct uio *uio, int flags) 762 { 763 int unit = raidunit(dev); 764 struct raid_softc *rs; 765 766 if (unit >= numraid) 767 return (ENXIO); 768 rs = &raid_softc[unit]; 769 770 if ((rs->sc_flags & RAIDF_INITED) == 0) 771 return (ENXIO); 772 773 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio)); 774 775 } 776 777 int 778 raidioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) 779 { 780 int unit = raidunit(dev); 781 int error = 0; 782 int part, pmask; 783 struct raid_softc *rs; 784 RF_Config_t *k_cfg, *u_cfg; 785 RF_Raid_t *raidPtr; 786 RF_RaidDisk_t *diskPtr; 787 RF_AccTotals_t *totals; 788 RF_DeviceConfig_t *d_cfg, **ucfgp; 789 u_char *specific_buf; 790 int retcode = 0; 791 int column; 792 int raidid; 793 struct rf_recon_req *rrcopy, *rr; 794 RF_ComponentLabel_t *clabel; 795 RF_ComponentLabel_t ci_label; 796 RF_ComponentLabel_t **clabel_ptr; 797 RF_SingleComponent_t *sparePtr,*componentPtr; 798 RF_SingleComponent_t hot_spare; 799 RF_SingleComponent_t component; 800 RF_ProgressInfo_t progressInfo, **progressInfoPtr; 801 int i, j, d; 802 #ifdef __HAVE_OLD_DISKLABEL 803 struct disklabel newlabel; 804 #endif 805 806 if (unit >= numraid) 807 return (ENXIO); 808 rs = &raid_softc[unit]; 809 raidPtr = raidPtrs[unit]; 810 811 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev, 812 (int) DISKPART(dev), (int) unit, (int) cmd)); 813 814 /* Must be open for writes for these commands... */ 815 switch (cmd) { 816 case DIOCSDINFO: 817 case DIOCWDINFO: 818 #ifdef __HAVE_OLD_DISKLABEL 819 case ODIOCWDINFO: 820 case ODIOCSDINFO: 821 #endif 822 case DIOCWLABEL: 823 if ((flag & FWRITE) == 0) 824 return (EBADF); 825 } 826 827 /* Must be initialized for these... */ 828 switch (cmd) { 829 case DIOCGDINFO: 830 case DIOCSDINFO: 831 case DIOCWDINFO: 832 #ifdef __HAVE_OLD_DISKLABEL 833 case ODIOCGDINFO: 834 case ODIOCWDINFO: 835 case ODIOCSDINFO: 836 case ODIOCGDEFLABEL: 837 #endif 838 case DIOCGPART: 839 case DIOCWLABEL: 840 case DIOCGDEFLABEL: 841 case RAIDFRAME_SHUTDOWN: 842 case RAIDFRAME_REWRITEPARITY: 843 case RAIDFRAME_GET_INFO: 844 case RAIDFRAME_RESET_ACCTOTALS: 845 case RAIDFRAME_GET_ACCTOTALS: 846 case RAIDFRAME_KEEP_ACCTOTALS: 847 case RAIDFRAME_GET_SIZE: 848 case RAIDFRAME_FAIL_DISK: 849 case RAIDFRAME_COPYBACK: 850 case RAIDFRAME_CHECK_RECON_STATUS: 851 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 852 case RAIDFRAME_GET_COMPONENT_LABEL: 853 case RAIDFRAME_SET_COMPONENT_LABEL: 854 case RAIDFRAME_ADD_HOT_SPARE: 855 case RAIDFRAME_REMOVE_HOT_SPARE: 856 case RAIDFRAME_INIT_LABELS: 857 case RAIDFRAME_REBUILD_IN_PLACE: 858 case RAIDFRAME_CHECK_PARITY: 859 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 860 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 861 case RAIDFRAME_CHECK_COPYBACK_STATUS: 862 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 863 case RAIDFRAME_SET_AUTOCONFIG: 864 case RAIDFRAME_SET_ROOT: 865 case RAIDFRAME_DELETE_COMPONENT: 866 case RAIDFRAME_INCORPORATE_HOT_SPARE: 867 if ((rs->sc_flags & RAIDF_INITED) == 0) 868 return (ENXIO); 869 } 870 871 switch (cmd) { 872 873 /* configure the system */ 874 case RAIDFRAME_CONFIGURE: 875 876 if (raidPtr->valid) { 877 /* There is a valid RAID set running on this unit! */ 878 printf("raid%d: Device already configured!\n",unit); 879 return(EINVAL); 880 } 881 882 /* copy-in the configuration information */ 883 /* data points to a pointer to the configuration structure */ 884 885 u_cfg = *((RF_Config_t **) data); 886 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *)); 887 if (k_cfg == NULL) { 888 return (ENOMEM); 889 } 890 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t)); 891 if (retcode) { 892 RF_Free(k_cfg, sizeof(RF_Config_t)); 893 db1_printf(("rf_ioctl: retcode=%d copyin.1\n", 894 retcode)); 895 return (retcode); 896 } 897 /* allocate a buffer for the layout-specific data, and copy it 898 * in */ 899 if (k_cfg->layoutSpecificSize) { 900 if (k_cfg->layoutSpecificSize > 10000) { 901 /* sanity check */ 902 RF_Free(k_cfg, sizeof(RF_Config_t)); 903 return (EINVAL); 904 } 905 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize, 906 (u_char *)); 907 if (specific_buf == NULL) { 908 RF_Free(k_cfg, sizeof(RF_Config_t)); 909 return (ENOMEM); 910 } 911 retcode = copyin(k_cfg->layoutSpecific, specific_buf, 912 k_cfg->layoutSpecificSize); 913 if (retcode) { 914 RF_Free(k_cfg, sizeof(RF_Config_t)); 915 RF_Free(specific_buf, 916 k_cfg->layoutSpecificSize); 917 db1_printf(("rf_ioctl: retcode=%d copyin.2\n", 918 retcode)); 919 return (retcode); 920 } 921 } else 922 specific_buf = NULL; 923 k_cfg->layoutSpecific = specific_buf; 924 925 /* should do some kind of sanity check on the configuration. 926 * Store the sum of all the bytes in the last byte? */ 927 928 /* configure the system */ 929 930 /* 931 * Clear the entire RAID descriptor, just to make sure 932 * there is no stale data left in the case of a 933 * reconfiguration 934 */ 935 memset((char *) raidPtr, 0, sizeof(RF_Raid_t)); 936 raidPtr->raidid = unit; 937 938 retcode = rf_Configure(raidPtr, k_cfg, NULL); 939 940 if (retcode == 0) { 941 942 /* allow this many simultaneous IO's to 943 this RAID device */ 944 raidPtr->openings = RAIDOUTSTANDING; 945 946 raidinit(raidPtr); 947 rf_markalldirty(raidPtr); 948 } 949 /* free the buffers. No return code here. */ 950 if (k_cfg->layoutSpecificSize) { 951 RF_Free(specific_buf, k_cfg->layoutSpecificSize); 952 } 953 RF_Free(k_cfg, sizeof(RF_Config_t)); 954 955 return (retcode); 956 957 /* shutdown the system */ 958 case RAIDFRAME_SHUTDOWN: 959 960 if ((error = raidlock(rs)) != 0) 961 return (error); 962 963 /* 964 * If somebody has a partition mounted, we shouldn't 965 * shutdown. 966 */ 967 968 part = DISKPART(dev); 969 pmask = (1 << part); 970 if ((rs->sc_dkdev.dk_openmask & ~pmask) || 971 ((rs->sc_dkdev.dk_bopenmask & pmask) && 972 (rs->sc_dkdev.dk_copenmask & pmask))) { 973 raidunlock(rs); 974 return (EBUSY); 975 } 976 977 retcode = rf_Shutdown(raidPtr); 978 979 /* It's no longer initialized... */ 980 rs->sc_flags &= ~RAIDF_INITED; 981 982 /* Detach the disk. */ 983 disk_detach(&rs->sc_dkdev); 984 985 raidunlock(rs); 986 987 return (retcode); 988 case RAIDFRAME_GET_COMPONENT_LABEL: 989 clabel_ptr = (RF_ComponentLabel_t **) data; 990 /* need to read the component label for the disk indicated 991 by row,column in clabel */ 992 993 /* For practice, let's get it directly fromdisk, rather 994 than from the in-core copy */ 995 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ), 996 (RF_ComponentLabel_t *)); 997 if (clabel == NULL) 998 return (ENOMEM); 999 1000 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t)); 1001 1002 retcode = copyin( *clabel_ptr, clabel, 1003 sizeof(RF_ComponentLabel_t)); 1004 1005 if (retcode) { 1006 RF_Free( clabel, sizeof(RF_ComponentLabel_t)); 1007 return(retcode); 1008 } 1009 1010 clabel->row = 0; /* Don't allow looking at anything else.*/ 1011 1012 column = clabel->column; 1013 1014 if ((column < 0) || (column >= raidPtr->numCol + 1015 raidPtr->numSpare)) { 1016 RF_Free( clabel, sizeof(RF_ComponentLabel_t)); 1017 return(EINVAL); 1018 } 1019 1020 raidread_component_label(raidPtr->Disks[column].dev, 1021 raidPtr->raid_cinfo[column].ci_vp, 1022 clabel ); 1023 1024 retcode = copyout(clabel, *clabel_ptr, 1025 sizeof(RF_ComponentLabel_t)); 1026 RF_Free(clabel, sizeof(RF_ComponentLabel_t)); 1027 return (retcode); 1028 1029 case RAIDFRAME_SET_COMPONENT_LABEL: 1030 clabel = (RF_ComponentLabel_t *) data; 1031 1032 /* XXX check the label for valid stuff... */ 1033 /* Note that some things *should not* get modified -- 1034 the user should be re-initing the labels instead of 1035 trying to patch things. 1036 */ 1037 1038 raidid = raidPtr->raidid; 1039 #if DEBUG 1040 printf("raid%d: Got component label:\n", raidid); 1041 printf("raid%d: Version: %d\n", raidid, clabel->version); 1042 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number); 1043 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter); 1044 printf("raid%d: Column: %d\n", raidid, clabel->column); 1045 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns); 1046 printf("raid%d: Clean: %d\n", raidid, clabel->clean); 1047 printf("raid%d: Status: %d\n", raidid, clabel->status); 1048 #endif 1049 clabel->row = 0; 1050 column = clabel->column; 1051 1052 if ((column < 0) || (column >= raidPtr->numCol)) { 1053 return(EINVAL); 1054 } 1055 1056 /* XXX this isn't allowed to do anything for now :-) */ 1057 1058 /* XXX and before it is, we need to fill in the rest 1059 of the fields!?!?!?! */ 1060 #if 0 1061 raidwrite_component_label( 1062 raidPtr->Disks[column].dev, 1063 raidPtr->raid_cinfo[column].ci_vp, 1064 clabel ); 1065 #endif 1066 return (0); 1067 1068 case RAIDFRAME_INIT_LABELS: 1069 clabel = (RF_ComponentLabel_t *) data; 1070 /* 1071 we only want the serial number from 1072 the above. We get all the rest of the information 1073 from the config that was used to create this RAID 1074 set. 1075 */ 1076 1077 raidPtr->serial_number = clabel->serial_number; 1078 1079 raid_init_component_label(raidPtr, &ci_label); 1080 ci_label.serial_number = clabel->serial_number; 1081 ci_label.row = 0; /* we dont' pretend to support more */ 1082 1083 for(column=0;column<raidPtr->numCol;column++) { 1084 diskPtr = &raidPtr->Disks[column]; 1085 if (!RF_DEAD_DISK(diskPtr->status)) { 1086 ci_label.partitionSize = diskPtr->partitionSize; 1087 ci_label.column = column; 1088 raidwrite_component_label( 1089 raidPtr->Disks[column].dev, 1090 raidPtr->raid_cinfo[column].ci_vp, 1091 &ci_label ); 1092 } 1093 } 1094 1095 return (retcode); 1096 case RAIDFRAME_SET_AUTOCONFIG: 1097 d = rf_set_autoconfig(raidPtr, *(int *) data); 1098 printf("raid%d: New autoconfig value is: %d\n", 1099 raidPtr->raidid, d); 1100 *(int *) data = d; 1101 return (retcode); 1102 1103 case RAIDFRAME_SET_ROOT: 1104 d = rf_set_rootpartition(raidPtr, *(int *) data); 1105 printf("raid%d: New rootpartition value is: %d\n", 1106 raidPtr->raidid, d); 1107 *(int *) data = d; 1108 return (retcode); 1109 1110 /* initialize all parity */ 1111 case RAIDFRAME_REWRITEPARITY: 1112 1113 if (raidPtr->Layout.map->faultsTolerated == 0) { 1114 /* Parity for RAID 0 is trivially correct */ 1115 raidPtr->parity_good = RF_RAID_CLEAN; 1116 return(0); 1117 } 1118 1119 if (raidPtr->parity_rewrite_in_progress == 1) { 1120 /* Re-write is already in progress! */ 1121 return(EINVAL); 1122 } 1123 1124 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread, 1125 rf_RewriteParityThread, 1126 raidPtr,"raid_parity"); 1127 return (retcode); 1128 1129 1130 case RAIDFRAME_ADD_HOT_SPARE: 1131 sparePtr = (RF_SingleComponent_t *) data; 1132 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t)); 1133 retcode = rf_add_hot_spare(raidPtr, &hot_spare); 1134 return(retcode); 1135 1136 case RAIDFRAME_REMOVE_HOT_SPARE: 1137 return(retcode); 1138 1139 case RAIDFRAME_DELETE_COMPONENT: 1140 componentPtr = (RF_SingleComponent_t *)data; 1141 memcpy( &component, componentPtr, 1142 sizeof(RF_SingleComponent_t)); 1143 retcode = rf_delete_component(raidPtr, &component); 1144 return(retcode); 1145 1146 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1147 componentPtr = (RF_SingleComponent_t *)data; 1148 memcpy( &component, componentPtr, 1149 sizeof(RF_SingleComponent_t)); 1150 retcode = rf_incorporate_hot_spare(raidPtr, &component); 1151 return(retcode); 1152 1153 case RAIDFRAME_REBUILD_IN_PLACE: 1154 1155 if (raidPtr->Layout.map->faultsTolerated == 0) { 1156 /* Can't do this on a RAID 0!! */ 1157 return(EINVAL); 1158 } 1159 1160 if (raidPtr->recon_in_progress == 1) { 1161 /* a reconstruct is already in progress! */ 1162 return(EINVAL); 1163 } 1164 1165 componentPtr = (RF_SingleComponent_t *) data; 1166 memcpy( &component, componentPtr, 1167 sizeof(RF_SingleComponent_t)); 1168 component.row = 0; /* we don't support any more */ 1169 column = component.column; 1170 1171 if ((column < 0) || (column >= raidPtr->numCol)) { 1172 return(EINVAL); 1173 } 1174 1175 RF_LOCK_MUTEX(raidPtr->mutex); 1176 if ((raidPtr->Disks[column].status == rf_ds_optimal) && 1177 (raidPtr->numFailures > 0)) { 1178 /* XXX 0 above shouldn't be constant!!! */ 1179 /* some component other than this has failed. 1180 Let's not make things worse than they already 1181 are... */ 1182 printf("raid%d: Unable to reconstruct to disk at:\n", 1183 raidPtr->raidid); 1184 printf("raid%d: Col: %d Too many failures.\n", 1185 raidPtr->raidid, column); 1186 RF_UNLOCK_MUTEX(raidPtr->mutex); 1187 return (EINVAL); 1188 } 1189 if (raidPtr->Disks[column].status == 1190 rf_ds_reconstructing) { 1191 printf("raid%d: Unable to reconstruct to disk at:\n", 1192 raidPtr->raidid); 1193 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column); 1194 1195 RF_UNLOCK_MUTEX(raidPtr->mutex); 1196 return (EINVAL); 1197 } 1198 if (raidPtr->Disks[column].status == rf_ds_spared) { 1199 RF_UNLOCK_MUTEX(raidPtr->mutex); 1200 return (EINVAL); 1201 } 1202 RF_UNLOCK_MUTEX(raidPtr->mutex); 1203 1204 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1205 if (rrcopy == NULL) 1206 return(ENOMEM); 1207 1208 rrcopy->raidPtr = (void *) raidPtr; 1209 rrcopy->col = column; 1210 1211 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1212 rf_ReconstructInPlaceThread, 1213 rrcopy,"raid_reconip"); 1214 return(retcode); 1215 1216 case RAIDFRAME_GET_INFO: 1217 if (!raidPtr->valid) 1218 return (ENODEV); 1219 ucfgp = (RF_DeviceConfig_t **) data; 1220 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t), 1221 (RF_DeviceConfig_t *)); 1222 if (d_cfg == NULL) 1223 return (ENOMEM); 1224 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t)); 1225 d_cfg->rows = 1; /* there is only 1 row now */ 1226 d_cfg->cols = raidPtr->numCol; 1227 d_cfg->ndevs = raidPtr->numCol; 1228 if (d_cfg->ndevs >= RF_MAX_DISKS) { 1229 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1230 return (ENOMEM); 1231 } 1232 d_cfg->nspares = raidPtr->numSpare; 1233 if (d_cfg->nspares >= RF_MAX_DISKS) { 1234 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1235 return (ENOMEM); 1236 } 1237 d_cfg->maxqdepth = raidPtr->maxQueueDepth; 1238 d = 0; 1239 for (j = 0; j < d_cfg->cols; j++) { 1240 d_cfg->devs[d] = raidPtr->Disks[j]; 1241 d++; 1242 } 1243 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) { 1244 d_cfg->spares[i] = raidPtr->Disks[j]; 1245 } 1246 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t)); 1247 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1248 1249 return (retcode); 1250 1251 case RAIDFRAME_CHECK_PARITY: 1252 *(int *) data = raidPtr->parity_good; 1253 return (0); 1254 1255 case RAIDFRAME_RESET_ACCTOTALS: 1256 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals)); 1257 return (0); 1258 1259 case RAIDFRAME_GET_ACCTOTALS: 1260 totals = (RF_AccTotals_t *) data; 1261 *totals = raidPtr->acc_totals; 1262 return (0); 1263 1264 case RAIDFRAME_KEEP_ACCTOTALS: 1265 raidPtr->keep_acc_totals = *(int *)data; 1266 return (0); 1267 1268 case RAIDFRAME_GET_SIZE: 1269 *(int *) data = raidPtr->totalSectors; 1270 return (0); 1271 1272 /* fail a disk & optionally start reconstruction */ 1273 case RAIDFRAME_FAIL_DISK: 1274 1275 if (raidPtr->Layout.map->faultsTolerated == 0) { 1276 /* Can't do this on a RAID 0!! */ 1277 return(EINVAL); 1278 } 1279 1280 rr = (struct rf_recon_req *) data; 1281 rr->row = 0; 1282 if (rr->col < 0 || rr->col >= raidPtr->numCol) 1283 return (EINVAL); 1284 1285 1286 RF_LOCK_MUTEX(raidPtr->mutex); 1287 if ((raidPtr->Disks[rr->col].status == 1288 rf_ds_optimal) && (raidPtr->numFailures > 0)) { 1289 /* some other component has failed. Let's not make 1290 things worse. XXX wrong for RAID6 */ 1291 RF_UNLOCK_MUTEX(raidPtr->mutex); 1292 return (EINVAL); 1293 } 1294 if (raidPtr->Disks[rr->col].status == rf_ds_spared) { 1295 /* Can't fail a spared disk! */ 1296 RF_UNLOCK_MUTEX(raidPtr->mutex); 1297 return (EINVAL); 1298 } 1299 RF_UNLOCK_MUTEX(raidPtr->mutex); 1300 1301 /* make a copy of the recon request so that we don't rely on 1302 * the user's buffer */ 1303 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1304 if (rrcopy == NULL) 1305 return(ENOMEM); 1306 memcpy(rrcopy, rr, sizeof(*rr)); 1307 rrcopy->raidPtr = (void *) raidPtr; 1308 1309 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1310 rf_ReconThread, 1311 rrcopy,"raid_recon"); 1312 return (0); 1313 1314 /* invoke a copyback operation after recon on whatever disk 1315 * needs it, if any */ 1316 case RAIDFRAME_COPYBACK: 1317 1318 if (raidPtr->Layout.map->faultsTolerated == 0) { 1319 /* This makes no sense on a RAID 0!! */ 1320 return(EINVAL); 1321 } 1322 1323 if (raidPtr->copyback_in_progress == 1) { 1324 /* Copyback is already in progress! */ 1325 return(EINVAL); 1326 } 1327 1328 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread, 1329 rf_CopybackThread, 1330 raidPtr,"raid_copyback"); 1331 return (retcode); 1332 1333 /* return the percentage completion of reconstruction */ 1334 case RAIDFRAME_CHECK_RECON_STATUS: 1335 if (raidPtr->Layout.map->faultsTolerated == 0) { 1336 /* This makes no sense on a RAID 0, so tell the 1337 user it's done. */ 1338 *(int *) data = 100; 1339 return(0); 1340 } 1341 if (raidPtr->status != rf_rs_reconstructing) 1342 *(int *) data = 100; 1343 else { 1344 if (raidPtr->reconControl->numRUsTotal > 0) { 1345 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal); 1346 } else { 1347 *(int *) data = 0; 1348 } 1349 } 1350 return (0); 1351 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1352 progressInfoPtr = (RF_ProgressInfo_t **) data; 1353 if (raidPtr->status != rf_rs_reconstructing) { 1354 progressInfo.remaining = 0; 1355 progressInfo.completed = 100; 1356 progressInfo.total = 100; 1357 } else { 1358 progressInfo.total = 1359 raidPtr->reconControl->numRUsTotal; 1360 progressInfo.completed = 1361 raidPtr->reconControl->numRUsComplete; 1362 progressInfo.remaining = progressInfo.total - 1363 progressInfo.completed; 1364 } 1365 retcode = copyout(&progressInfo, *progressInfoPtr, 1366 sizeof(RF_ProgressInfo_t)); 1367 return (retcode); 1368 1369 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1370 if (raidPtr->Layout.map->faultsTolerated == 0) { 1371 /* This makes no sense on a RAID 0, so tell the 1372 user it's done. */ 1373 *(int *) data = 100; 1374 return(0); 1375 } 1376 if (raidPtr->parity_rewrite_in_progress == 1) { 1377 *(int *) data = 100 * 1378 raidPtr->parity_rewrite_stripes_done / 1379 raidPtr->Layout.numStripe; 1380 } else { 1381 *(int *) data = 100; 1382 } 1383 return (0); 1384 1385 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1386 progressInfoPtr = (RF_ProgressInfo_t **) data; 1387 if (raidPtr->parity_rewrite_in_progress == 1) { 1388 progressInfo.total = raidPtr->Layout.numStripe; 1389 progressInfo.completed = 1390 raidPtr->parity_rewrite_stripes_done; 1391 progressInfo.remaining = progressInfo.total - 1392 progressInfo.completed; 1393 } else { 1394 progressInfo.remaining = 0; 1395 progressInfo.completed = 100; 1396 progressInfo.total = 100; 1397 } 1398 retcode = copyout(&progressInfo, *progressInfoPtr, 1399 sizeof(RF_ProgressInfo_t)); 1400 return (retcode); 1401 1402 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1403 if (raidPtr->Layout.map->faultsTolerated == 0) { 1404 /* This makes no sense on a RAID 0 */ 1405 *(int *) data = 100; 1406 return(0); 1407 } 1408 if (raidPtr->copyback_in_progress == 1) { 1409 *(int *) data = 100 * raidPtr->copyback_stripes_done / 1410 raidPtr->Layout.numStripe; 1411 } else { 1412 *(int *) data = 100; 1413 } 1414 return (0); 1415 1416 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1417 progressInfoPtr = (RF_ProgressInfo_t **) data; 1418 if (raidPtr->copyback_in_progress == 1) { 1419 progressInfo.total = raidPtr->Layout.numStripe; 1420 progressInfo.completed = 1421 raidPtr->copyback_stripes_done; 1422 progressInfo.remaining = progressInfo.total - 1423 progressInfo.completed; 1424 } else { 1425 progressInfo.remaining = 0; 1426 progressInfo.completed = 100; 1427 progressInfo.total = 100; 1428 } 1429 retcode = copyout(&progressInfo, *progressInfoPtr, 1430 sizeof(RF_ProgressInfo_t)); 1431 return (retcode); 1432 1433 /* the sparetable daemon calls this to wait for the kernel to 1434 * need a spare table. this ioctl does not return until a 1435 * spare table is needed. XXX -- calling mpsleep here in the 1436 * ioctl code is almost certainly wrong and evil. -- XXX XXX 1437 * -- I should either compute the spare table in the kernel, 1438 * or have a different -- XXX XXX -- interface (a different 1439 * character device) for delivering the table -- XXX */ 1440 #if 0 1441 case RAIDFRAME_SPARET_WAIT: 1442 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1443 while (!rf_sparet_wait_queue) 1444 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE); 1445 waitreq = rf_sparet_wait_queue; 1446 rf_sparet_wait_queue = rf_sparet_wait_queue->next; 1447 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1448 1449 /* structure assignment */ 1450 *((RF_SparetWait_t *) data) = *waitreq; 1451 1452 RF_Free(waitreq, sizeof(*waitreq)); 1453 return (0); 1454 1455 /* wakes up a process waiting on SPARET_WAIT and puts an error 1456 * code in it that will cause the dameon to exit */ 1457 case RAIDFRAME_ABORT_SPARET_WAIT: 1458 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1459 waitreq->fcol = -1; 1460 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1461 waitreq->next = rf_sparet_wait_queue; 1462 rf_sparet_wait_queue = waitreq; 1463 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1464 wakeup(&rf_sparet_wait_queue); 1465 return (0); 1466 1467 /* used by the spare table daemon to deliver a spare table 1468 * into the kernel */ 1469 case RAIDFRAME_SEND_SPARET: 1470 1471 /* install the spare table */ 1472 retcode = rf_SetSpareTable(raidPtr, *(void **) data); 1473 1474 /* respond to the requestor. the return status of the spare 1475 * table installation is passed in the "fcol" field */ 1476 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1477 waitreq->fcol = retcode; 1478 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1479 waitreq->next = rf_sparet_resp_queue; 1480 rf_sparet_resp_queue = waitreq; 1481 wakeup(&rf_sparet_resp_queue); 1482 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1483 1484 return (retcode); 1485 #endif 1486 1487 default: 1488 break; /* fall through to the os-specific code below */ 1489 1490 } 1491 1492 if (!raidPtr->valid) 1493 return (EINVAL); 1494 1495 /* 1496 * Add support for "regular" device ioctls here. 1497 */ 1498 1499 switch (cmd) { 1500 case DIOCGDINFO: 1501 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label); 1502 break; 1503 #ifdef __HAVE_OLD_DISKLABEL 1504 case ODIOCGDINFO: 1505 newlabel = *(rs->sc_dkdev.dk_label); 1506 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1507 return ENOTTY; 1508 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1509 break; 1510 #endif 1511 1512 case DIOCGPART: 1513 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label; 1514 ((struct partinfo *) data)->part = 1515 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1516 break; 1517 1518 case DIOCWDINFO: 1519 case DIOCSDINFO: 1520 #ifdef __HAVE_OLD_DISKLABEL 1521 case ODIOCWDINFO: 1522 case ODIOCSDINFO: 1523 #endif 1524 { 1525 struct disklabel *lp; 1526 #ifdef __HAVE_OLD_DISKLABEL 1527 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1528 memset(&newlabel, 0, sizeof newlabel); 1529 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1530 lp = &newlabel; 1531 } else 1532 #endif 1533 lp = (struct disklabel *)data; 1534 1535 if ((error = raidlock(rs)) != 0) 1536 return (error); 1537 1538 rs->sc_flags |= RAIDF_LABELLING; 1539 1540 error = setdisklabel(rs->sc_dkdev.dk_label, 1541 lp, 0, rs->sc_dkdev.dk_cpulabel); 1542 if (error == 0) { 1543 if (cmd == DIOCWDINFO 1544 #ifdef __HAVE_OLD_DISKLABEL 1545 || cmd == ODIOCWDINFO 1546 #endif 1547 ) 1548 error = writedisklabel(RAIDLABELDEV(dev), 1549 raidstrategy, rs->sc_dkdev.dk_label, 1550 rs->sc_dkdev.dk_cpulabel); 1551 } 1552 rs->sc_flags &= ~RAIDF_LABELLING; 1553 1554 raidunlock(rs); 1555 1556 if (error) 1557 return (error); 1558 break; 1559 } 1560 1561 case DIOCWLABEL: 1562 if (*(int *) data != 0) 1563 rs->sc_flags |= RAIDF_WLABEL; 1564 else 1565 rs->sc_flags &= ~RAIDF_WLABEL; 1566 break; 1567 1568 case DIOCGDEFLABEL: 1569 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data); 1570 break; 1571 1572 #ifdef __HAVE_OLD_DISKLABEL 1573 case ODIOCGDEFLABEL: 1574 raidgetdefaultlabel(raidPtr, rs, &newlabel); 1575 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1576 return ENOTTY; 1577 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1578 break; 1579 #endif 1580 1581 default: 1582 retcode = ENOTTY; 1583 } 1584 return (retcode); 1585 1586 } 1587 1588 1589 /* raidinit -- complete the rest of the initialization for the 1590 RAIDframe device. */ 1591 1592 1593 static void 1594 raidinit(RF_Raid_t *raidPtr) 1595 { 1596 struct raid_softc *rs; 1597 int unit; 1598 1599 unit = raidPtr->raidid; 1600 1601 rs = &raid_softc[unit]; 1602 1603 /* XXX should check return code first... */ 1604 rs->sc_flags |= RAIDF_INITED; 1605 1606 /* XXX doesn't check bounds. */ 1607 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit); 1608 1609 rs->sc_dkdev.dk_name = rs->sc_xname; 1610 1611 /* disk_attach actually creates space for the CPU disklabel, among 1612 * other things, so it's critical to call this *BEFORE* we try putzing 1613 * with disklabels. */ 1614 1615 disk_attach(&rs->sc_dkdev); 1616 1617 /* XXX There may be a weird interaction here between this, and 1618 * protectedSectors, as used in RAIDframe. */ 1619 1620 rs->sc_size = raidPtr->totalSectors; 1621 } 1622 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 1623 /* wake up the daemon & tell it to get us a spare table 1624 * XXX 1625 * the entries in the queues should be tagged with the raidPtr 1626 * so that in the extremely rare case that two recons happen at once, 1627 * we know for which device were requesting a spare table 1628 * XXX 1629 * 1630 * XXX This code is not currently used. GO 1631 */ 1632 int 1633 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req) 1634 { 1635 int retcode; 1636 1637 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1638 req->next = rf_sparet_wait_queue; 1639 rf_sparet_wait_queue = req; 1640 wakeup(&rf_sparet_wait_queue); 1641 1642 /* mpsleep unlocks the mutex */ 1643 while (!rf_sparet_resp_queue) { 1644 tsleep(&rf_sparet_resp_queue, PRIBIO, 1645 "raidframe getsparetable", 0); 1646 } 1647 req = rf_sparet_resp_queue; 1648 rf_sparet_resp_queue = req->next; 1649 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1650 1651 retcode = req->fcol; 1652 RF_Free(req, sizeof(*req)); /* this is not the same req as we 1653 * alloc'd */ 1654 return (retcode); 1655 } 1656 #endif 1657 1658 /* a wrapper around rf_DoAccess that extracts appropriate info from the 1659 * bp & passes it down. 1660 * any calls originating in the kernel must use non-blocking I/O 1661 * do some extra sanity checking to return "appropriate" error values for 1662 * certain conditions (to make some standard utilities work) 1663 * 1664 * Formerly known as: rf_DoAccessKernel 1665 */ 1666 void 1667 raidstart(RF_Raid_t *raidPtr) 1668 { 1669 RF_SectorCount_t num_blocks, pb, sum; 1670 RF_RaidAddr_t raid_addr; 1671 struct partition *pp; 1672 daddr_t blocknum; 1673 int unit; 1674 struct raid_softc *rs; 1675 int do_async; 1676 struct buf *bp; 1677 int rc; 1678 1679 unit = raidPtr->raidid; 1680 rs = &raid_softc[unit]; 1681 1682 /* quick check to see if anything has died recently */ 1683 RF_LOCK_MUTEX(raidPtr->mutex); 1684 if (raidPtr->numNewFailures > 0) { 1685 RF_UNLOCK_MUTEX(raidPtr->mutex); 1686 rf_update_component_labels(raidPtr, 1687 RF_NORMAL_COMPONENT_UPDATE); 1688 RF_LOCK_MUTEX(raidPtr->mutex); 1689 raidPtr->numNewFailures--; 1690 } 1691 1692 /* Check to see if we're at the limit... */ 1693 while (raidPtr->openings > 0) { 1694 RF_UNLOCK_MUTEX(raidPtr->mutex); 1695 1696 /* get the next item, if any, from the queue */ 1697 if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) { 1698 /* nothing more to do */ 1699 return; 1700 } 1701 1702 /* Ok, for the bp we have here, bp->b_blkno is relative to the 1703 * partition.. Need to make it absolute to the underlying 1704 * device.. */ 1705 1706 blocknum = bp->b_blkno; 1707 if (DISKPART(bp->b_dev) != RAW_PART) { 1708 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; 1709 blocknum += pp->p_offset; 1710 } 1711 1712 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, 1713 (int) blocknum)); 1714 1715 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount)); 1716 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid)); 1717 1718 /* *THIS* is where we adjust what block we're going to... 1719 * but DO NOT TOUCH bp->b_blkno!!! */ 1720 raid_addr = blocknum; 1721 1722 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector; 1723 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0; 1724 sum = raid_addr + num_blocks + pb; 1725 if (1 || rf_debugKernelAccess) { 1726 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n", 1727 (int) raid_addr, (int) sum, (int) num_blocks, 1728 (int) pb, (int) bp->b_resid)); 1729 } 1730 if ((sum > raidPtr->totalSectors) || (sum < raid_addr) 1731 || (sum < num_blocks) || (sum < pb)) { 1732 bp->b_error = ENOSPC; 1733 bp->b_flags |= B_ERROR; 1734 bp->b_resid = bp->b_bcount; 1735 biodone(bp); 1736 RF_LOCK_MUTEX(raidPtr->mutex); 1737 continue; 1738 } 1739 /* 1740 * XXX rf_DoAccess() should do this, not just DoAccessKernel() 1741 */ 1742 1743 if (bp->b_bcount & raidPtr->sectorMask) { 1744 bp->b_error = EINVAL; 1745 bp->b_flags |= B_ERROR; 1746 bp->b_resid = bp->b_bcount; 1747 biodone(bp); 1748 RF_LOCK_MUTEX(raidPtr->mutex); 1749 continue; 1750 1751 } 1752 db1_printf(("Calling DoAccess..\n")); 1753 1754 1755 RF_LOCK_MUTEX(raidPtr->mutex); 1756 raidPtr->openings--; 1757 RF_UNLOCK_MUTEX(raidPtr->mutex); 1758 1759 /* 1760 * Everything is async. 1761 */ 1762 do_async = 1; 1763 1764 disk_busy(&rs->sc_dkdev); 1765 1766 /* XXX we're still at splbio() here... do we *really* 1767 need to be? */ 1768 1769 /* don't ever condition on bp->b_flags & B_WRITE. 1770 * always condition on B_READ instead */ 1771 1772 rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ? 1773 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE, 1774 do_async, raid_addr, num_blocks, 1775 bp->b_data, bp, RF_DAG_NONBLOCKING_IO); 1776 1777 if (rc) { 1778 bp->b_error = rc; 1779 bp->b_flags |= B_ERROR; 1780 bp->b_resid = bp->b_bcount; 1781 biodone(bp); 1782 /* continue loop */ 1783 } 1784 1785 RF_LOCK_MUTEX(raidPtr->mutex); 1786 } 1787 RF_UNLOCK_MUTEX(raidPtr->mutex); 1788 } 1789 1790 1791 1792 1793 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */ 1794 1795 int 1796 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req) 1797 { 1798 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE; 1799 struct buf *bp; 1800 struct raidbuf *raidbp = NULL; 1801 1802 req->queue = queue; 1803 1804 #if DIAGNOSTIC 1805 if (queue->raidPtr->raidid >= numraid) { 1806 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid, 1807 numraid); 1808 panic("Invalid Unit number in rf_DispatchKernelIO"); 1809 } 1810 #endif 1811 1812 bp = req->bp; 1813 #if 1 1814 /* XXX when there is a physical disk failure, someone is passing us a 1815 * buffer that contains old stuff!! Attempt to deal with this problem 1816 * without taking a performance hit... (not sure where the real bug 1817 * is. It's buried in RAIDframe somewhere) :-( GO ) */ 1818 1819 if (bp->b_flags & B_ERROR) { 1820 bp->b_flags &= ~B_ERROR; 1821 } 1822 if (bp->b_error != 0) { 1823 bp->b_error = 0; 1824 } 1825 #endif 1826 raidbp = pool_get(&rf_pools.cbuf, PR_NOWAIT); 1827 if (raidbp == NULL) { 1828 bp->b_flags |= B_ERROR; 1829 bp->b_error = ENOMEM; 1830 return (ENOMEM); 1831 } 1832 BUF_INIT(&raidbp->rf_buf); 1833 1834 /* 1835 * context for raidiodone 1836 */ 1837 raidbp->rf_obp = bp; 1838 raidbp->req = req; 1839 1840 BIO_COPYPRIO(&raidbp->rf_buf, bp); 1841 1842 switch (req->type) { 1843 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */ 1844 /* XXX need to do something extra here.. */ 1845 /* I'm leaving this in, as I've never actually seen it used, 1846 * and I'd like folks to report it... GO */ 1847 printf(("WAKEUP CALLED\n")); 1848 queue->numOutstanding++; 1849 1850 /* XXX need to glue the original buffer into this?? */ 1851 1852 KernelWakeupFunc(&raidbp->rf_buf); 1853 break; 1854 1855 case RF_IO_TYPE_READ: 1856 case RF_IO_TYPE_WRITE: 1857 #if RF_ACC_TRACE > 0 1858 if (req->tracerec) { 1859 RF_ETIMER_START(req->tracerec->timer); 1860 } 1861 #endif 1862 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp, 1863 op | bp->b_flags, queue->rf_cinfo->ci_dev, 1864 req->sectorOffset, req->numSector, 1865 req->buf, KernelWakeupFunc, (void *) req, 1866 queue->raidPtr->logBytesPerSector, req->b_proc); 1867 1868 if (rf_debugKernelAccess) { 1869 db1_printf(("dispatch: bp->b_blkno = %ld\n", 1870 (long) bp->b_blkno)); 1871 } 1872 queue->numOutstanding++; 1873 queue->last_deq_sector = req->sectorOffset; 1874 /* acc wouldn't have been let in if there were any pending 1875 * reqs at any other priority */ 1876 queue->curPriority = req->priority; 1877 1878 db1_printf(("Going for %c to unit %d col %d\n", 1879 req->type, queue->raidPtr->raidid, 1880 queue->col)); 1881 db1_printf(("sector %d count %d (%d bytes) %d\n", 1882 (int) req->sectorOffset, (int) req->numSector, 1883 (int) (req->numSector << 1884 queue->raidPtr->logBytesPerSector), 1885 (int) queue->raidPtr->logBytesPerSector)); 1886 if ((raidbp->rf_buf.b_flags & B_READ) == 0) { 1887 raidbp->rf_buf.b_vp->v_numoutput++; 1888 } 1889 VOP_STRATEGY(raidbp->rf_buf.b_vp, &raidbp->rf_buf); 1890 1891 break; 1892 1893 default: 1894 panic("bad req->type in rf_DispatchKernelIO"); 1895 } 1896 db1_printf(("Exiting from DispatchKernelIO\n")); 1897 1898 return (0); 1899 } 1900 /* this is the callback function associated with a I/O invoked from 1901 kernel code. 1902 */ 1903 static void 1904 KernelWakeupFunc(struct buf *vbp) 1905 { 1906 RF_DiskQueueData_t *req = NULL; 1907 RF_DiskQueue_t *queue; 1908 struct raidbuf *raidbp = (struct raidbuf *) vbp; 1909 struct buf *bp; 1910 int s; 1911 1912 s = splbio(); 1913 db1_printf(("recovering the request queue:\n")); 1914 req = raidbp->req; 1915 1916 bp = raidbp->rf_obp; 1917 1918 queue = (RF_DiskQueue_t *) req->queue; 1919 1920 if (raidbp->rf_buf.b_flags & B_ERROR) { 1921 bp->b_flags |= B_ERROR; 1922 bp->b_error = raidbp->rf_buf.b_error ? 1923 raidbp->rf_buf.b_error : EIO; 1924 } 1925 1926 /* XXX methinks this could be wrong... */ 1927 #if 1 1928 bp->b_resid = raidbp->rf_buf.b_resid; 1929 #endif 1930 #if RF_ACC_TRACE > 0 1931 if (req->tracerec) { 1932 RF_ETIMER_STOP(req->tracerec->timer); 1933 RF_ETIMER_EVAL(req->tracerec->timer); 1934 RF_LOCK_MUTEX(rf_tracing_mutex); 1935 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer); 1936 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer); 1937 req->tracerec->num_phys_ios++; 1938 RF_UNLOCK_MUTEX(rf_tracing_mutex); 1939 } 1940 #endif 1941 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */ 1942 1943 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go 1944 * ballistic, and mark the component as hosed... */ 1945 1946 if (bp->b_flags & B_ERROR) { 1947 /* Mark the disk as dead */ 1948 /* but only mark it once... */ 1949 if (queue->raidPtr->Disks[queue->col].status == 1950 rf_ds_optimal) { 1951 printf("raid%d: IO Error. Marking %s as failed.\n", 1952 queue->raidPtr->raidid, 1953 queue->raidPtr->Disks[queue->col].devname); 1954 queue->raidPtr->Disks[queue->col].status = 1955 rf_ds_failed; 1956 queue->raidPtr->status = rf_rs_degraded; 1957 queue->raidPtr->numFailures++; 1958 queue->raidPtr->numNewFailures++; 1959 } else { /* Disk is already dead... */ 1960 /* printf("Disk already marked as dead!\n"); */ 1961 } 1962 1963 } 1964 1965 pool_put(&rf_pools.cbuf, raidbp); 1966 1967 /* Fill in the error value */ 1968 1969 req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0; 1970 1971 simple_lock(&queue->raidPtr->iodone_lock); 1972 1973 /* Drop this one on the "finished" queue... */ 1974 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries); 1975 1976 /* Let the raidio thread know there is work to be done. */ 1977 wakeup(&(queue->raidPtr->iodone)); 1978 1979 simple_unlock(&queue->raidPtr->iodone_lock); 1980 1981 splx(s); 1982 } 1983 1984 1985 1986 /* 1987 * initialize a buf structure for doing an I/O in the kernel. 1988 */ 1989 static void 1990 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev, 1991 RF_SectorNum_t startSect, RF_SectorCount_t numSect, caddr_t buf, 1992 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector, 1993 struct proc *b_proc) 1994 { 1995 /* bp->b_flags = B_PHYS | rw_flag; */ 1996 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */ 1997 bp->b_bcount = numSect << logBytesPerSector; 1998 bp->b_bufsize = bp->b_bcount; 1999 bp->b_error = 0; 2000 bp->b_dev = dev; 2001 bp->b_data = buf; 2002 bp->b_blkno = startSect; 2003 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */ 2004 if (bp->b_bcount == 0) { 2005 panic("bp->b_bcount is zero in InitBP!!"); 2006 } 2007 bp->b_proc = b_proc; 2008 bp->b_iodone = cbFunc; 2009 bp->b_vp = b_vp; 2010 2011 } 2012 2013 static void 2014 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs, 2015 struct disklabel *lp) 2016 { 2017 memset(lp, 0, sizeof(*lp)); 2018 2019 /* fabricate a label... */ 2020 lp->d_secperunit = raidPtr->totalSectors; 2021 lp->d_secsize = raidPtr->bytesPerSector; 2022 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe; 2023 lp->d_ntracks = 4 * raidPtr->numCol; 2024 lp->d_ncylinders = raidPtr->totalSectors / 2025 (lp->d_nsectors * lp->d_ntracks); 2026 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 2027 2028 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename)); 2029 lp->d_type = DTYPE_RAID; 2030 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 2031 lp->d_rpm = 3600; 2032 lp->d_interleave = 1; 2033 lp->d_flags = 0; 2034 2035 lp->d_partitions[RAW_PART].p_offset = 0; 2036 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors; 2037 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 2038 lp->d_npartitions = RAW_PART + 1; 2039 2040 lp->d_magic = DISKMAGIC; 2041 lp->d_magic2 = DISKMAGIC; 2042 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label); 2043 2044 } 2045 /* 2046 * Read the disklabel from the raid device. If one is not present, fake one 2047 * up. 2048 */ 2049 static void 2050 raidgetdisklabel(dev_t dev) 2051 { 2052 int unit = raidunit(dev); 2053 struct raid_softc *rs = &raid_softc[unit]; 2054 const char *errstring; 2055 struct disklabel *lp = rs->sc_dkdev.dk_label; 2056 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel; 2057 RF_Raid_t *raidPtr; 2058 2059 db1_printf(("Getting the disklabel...\n")); 2060 2061 memset(clp, 0, sizeof(*clp)); 2062 2063 raidPtr = raidPtrs[unit]; 2064 2065 raidgetdefaultlabel(raidPtr, rs, lp); 2066 2067 /* 2068 * Call the generic disklabel extraction routine. 2069 */ 2070 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy, 2071 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel); 2072 if (errstring) 2073 raidmakedisklabel(rs); 2074 else { 2075 int i; 2076 struct partition *pp; 2077 2078 /* 2079 * Sanity check whether the found disklabel is valid. 2080 * 2081 * This is necessary since total size of the raid device 2082 * may vary when an interleave is changed even though exactly 2083 * same componets are used, and old disklabel may used 2084 * if that is found. 2085 */ 2086 if (lp->d_secperunit != rs->sc_size) 2087 printf("raid%d: WARNING: %s: " 2088 "total sector size in disklabel (%d) != " 2089 "the size of raid (%ld)\n", unit, rs->sc_xname, 2090 lp->d_secperunit, (long) rs->sc_size); 2091 for (i = 0; i < lp->d_npartitions; i++) { 2092 pp = &lp->d_partitions[i]; 2093 if (pp->p_offset + pp->p_size > rs->sc_size) 2094 printf("raid%d: WARNING: %s: end of partition `%c' " 2095 "exceeds the size of raid (%ld)\n", 2096 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size); 2097 } 2098 } 2099 2100 } 2101 /* 2102 * Take care of things one might want to take care of in the event 2103 * that a disklabel isn't present. 2104 */ 2105 static void 2106 raidmakedisklabel(struct raid_softc *rs) 2107 { 2108 struct disklabel *lp = rs->sc_dkdev.dk_label; 2109 db1_printf(("Making a label..\n")); 2110 2111 /* 2112 * For historical reasons, if there's no disklabel present 2113 * the raw partition must be marked FS_BSDFFS. 2114 */ 2115 2116 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 2117 2118 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 2119 2120 lp->d_checksum = dkcksum(lp); 2121 } 2122 /* 2123 * Lookup the provided name in the filesystem. If the file exists, 2124 * is a valid block device, and isn't being used by anyone else, 2125 * set *vpp to the file's vnode. 2126 * You'll find the original of this in ccd.c 2127 */ 2128 int 2129 raidlookup(char *path, struct proc *p, struct vnode **vpp) 2130 { 2131 struct nameidata nd; 2132 struct vnode *vp; 2133 struct vattr va; 2134 int error; 2135 2136 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p); 2137 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) { 2138 return (error); 2139 } 2140 vp = nd.ni_vp; 2141 if (vp->v_usecount > 1) { 2142 VOP_UNLOCK(vp, 0); 2143 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p); 2144 return (EBUSY); 2145 } 2146 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) { 2147 VOP_UNLOCK(vp, 0); 2148 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p); 2149 return (error); 2150 } 2151 /* XXX: eventually we should handle VREG, too. */ 2152 if (va.va_type != VBLK) { 2153 VOP_UNLOCK(vp, 0); 2154 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p); 2155 return (ENOTBLK); 2156 } 2157 VOP_UNLOCK(vp, 0); 2158 *vpp = vp; 2159 return (0); 2160 } 2161 /* 2162 * Wait interruptibly for an exclusive lock. 2163 * 2164 * XXX 2165 * Several drivers do this; it should be abstracted and made MP-safe. 2166 * (Hmm... where have we seen this warning before :-> GO ) 2167 */ 2168 static int 2169 raidlock(struct raid_softc *rs) 2170 { 2171 int error; 2172 2173 while ((rs->sc_flags & RAIDF_LOCKED) != 0) { 2174 rs->sc_flags |= RAIDF_WANTED; 2175 if ((error = 2176 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0) 2177 return (error); 2178 } 2179 rs->sc_flags |= RAIDF_LOCKED; 2180 return (0); 2181 } 2182 /* 2183 * Unlock and wake up any waiters. 2184 */ 2185 static void 2186 raidunlock(struct raid_softc *rs) 2187 { 2188 2189 rs->sc_flags &= ~RAIDF_LOCKED; 2190 if ((rs->sc_flags & RAIDF_WANTED) != 0) { 2191 rs->sc_flags &= ~RAIDF_WANTED; 2192 wakeup(rs); 2193 } 2194 } 2195 2196 2197 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */ 2198 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */ 2199 2200 int 2201 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter) 2202 { 2203 RF_ComponentLabel_t clabel; 2204 raidread_component_label(dev, b_vp, &clabel); 2205 clabel.mod_counter = mod_counter; 2206 clabel.clean = RF_RAID_CLEAN; 2207 raidwrite_component_label(dev, b_vp, &clabel); 2208 return(0); 2209 } 2210 2211 2212 int 2213 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter) 2214 { 2215 RF_ComponentLabel_t clabel; 2216 raidread_component_label(dev, b_vp, &clabel); 2217 clabel.mod_counter = mod_counter; 2218 clabel.clean = RF_RAID_DIRTY; 2219 raidwrite_component_label(dev, b_vp, &clabel); 2220 return(0); 2221 } 2222 2223 /* ARGSUSED */ 2224 int 2225 raidread_component_label(dev_t dev, struct vnode *b_vp, 2226 RF_ComponentLabel_t *clabel) 2227 { 2228 struct buf *bp; 2229 const struct bdevsw *bdev; 2230 int error; 2231 2232 /* XXX should probably ensure that we don't try to do this if 2233 someone has changed rf_protected_sectors. */ 2234 2235 if (b_vp == NULL) { 2236 /* For whatever reason, this component is not valid. 2237 Don't try to read a component label from it. */ 2238 return(EINVAL); 2239 } 2240 2241 /* get a block of the appropriate size... */ 2242 bp = geteblk((int)RF_COMPONENT_INFO_SIZE); 2243 bp->b_dev = dev; 2244 2245 /* get our ducks in a row for the read */ 2246 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE; 2247 bp->b_bcount = RF_COMPONENT_INFO_SIZE; 2248 bp->b_flags |= B_READ; 2249 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE; 2250 2251 bdev = bdevsw_lookup(bp->b_dev); 2252 if (bdev == NULL) 2253 return (ENXIO); 2254 (*bdev->d_strategy)(bp); 2255 2256 error = biowait(bp); 2257 2258 if (!error) { 2259 memcpy(clabel, bp->b_data, 2260 sizeof(RF_ComponentLabel_t)); 2261 } 2262 2263 brelse(bp); 2264 return(error); 2265 } 2266 /* ARGSUSED */ 2267 int 2268 raidwrite_component_label(dev_t dev, struct vnode *b_vp, 2269 RF_ComponentLabel_t *clabel) 2270 { 2271 struct buf *bp; 2272 const struct bdevsw *bdev; 2273 int error; 2274 2275 /* get a block of the appropriate size... */ 2276 bp = geteblk((int)RF_COMPONENT_INFO_SIZE); 2277 bp->b_dev = dev; 2278 2279 /* get our ducks in a row for the write */ 2280 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE; 2281 bp->b_bcount = RF_COMPONENT_INFO_SIZE; 2282 bp->b_flags |= B_WRITE; 2283 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE; 2284 2285 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE ); 2286 2287 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t)); 2288 2289 bdev = bdevsw_lookup(bp->b_dev); 2290 if (bdev == NULL) 2291 return (ENXIO); 2292 (*bdev->d_strategy)(bp); 2293 error = biowait(bp); 2294 brelse(bp); 2295 if (error) { 2296 #if 1 2297 printf("Failed to write RAID component info!\n"); 2298 #endif 2299 } 2300 2301 return(error); 2302 } 2303 2304 void 2305 rf_markalldirty(RF_Raid_t *raidPtr) 2306 { 2307 RF_ComponentLabel_t clabel; 2308 int sparecol; 2309 int c; 2310 int j; 2311 int scol = -1; 2312 2313 raidPtr->mod_counter++; 2314 for (c = 0; c < raidPtr->numCol; c++) { 2315 /* we don't want to touch (at all) a disk that has 2316 failed */ 2317 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) { 2318 raidread_component_label( 2319 raidPtr->Disks[c].dev, 2320 raidPtr->raid_cinfo[c].ci_vp, 2321 &clabel); 2322 if (clabel.status == rf_ds_spared) { 2323 /* XXX do something special... 2324 but whatever you do, don't 2325 try to access it!! */ 2326 } else { 2327 raidmarkdirty( 2328 raidPtr->Disks[c].dev, 2329 raidPtr->raid_cinfo[c].ci_vp, 2330 raidPtr->mod_counter); 2331 } 2332 } 2333 } 2334 2335 for( c = 0; c < raidPtr->numSpare ; c++) { 2336 sparecol = raidPtr->numCol + c; 2337 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2338 /* 2339 2340 we claim this disk is "optimal" if it's 2341 rf_ds_used_spare, as that means it should be 2342 directly substitutable for the disk it replaced. 2343 We note that too... 2344 2345 */ 2346 2347 for(j=0;j<raidPtr->numCol;j++) { 2348 if (raidPtr->Disks[j].spareCol == sparecol) { 2349 scol = j; 2350 break; 2351 } 2352 } 2353 2354 raidread_component_label( 2355 raidPtr->Disks[sparecol].dev, 2356 raidPtr->raid_cinfo[sparecol].ci_vp, 2357 &clabel); 2358 /* make sure status is noted */ 2359 2360 raid_init_component_label(raidPtr, &clabel); 2361 2362 clabel.row = 0; 2363 clabel.column = scol; 2364 /* Note: we *don't* change status from rf_ds_used_spare 2365 to rf_ds_optimal */ 2366 /* clabel.status = rf_ds_optimal; */ 2367 2368 raidmarkdirty(raidPtr->Disks[sparecol].dev, 2369 raidPtr->raid_cinfo[sparecol].ci_vp, 2370 raidPtr->mod_counter); 2371 } 2372 } 2373 } 2374 2375 2376 void 2377 rf_update_component_labels(RF_Raid_t *raidPtr, int final) 2378 { 2379 RF_ComponentLabel_t clabel; 2380 int sparecol; 2381 int c; 2382 int j; 2383 int scol; 2384 2385 scol = -1; 2386 2387 /* XXX should do extra checks to make sure things really are clean, 2388 rather than blindly setting the clean bit... */ 2389 2390 raidPtr->mod_counter++; 2391 2392 for (c = 0; c < raidPtr->numCol; c++) { 2393 if (raidPtr->Disks[c].status == rf_ds_optimal) { 2394 raidread_component_label( 2395 raidPtr->Disks[c].dev, 2396 raidPtr->raid_cinfo[c].ci_vp, 2397 &clabel); 2398 /* make sure status is noted */ 2399 clabel.status = rf_ds_optimal; 2400 /* bump the counter */ 2401 clabel.mod_counter = raidPtr->mod_counter; 2402 2403 raidwrite_component_label( 2404 raidPtr->Disks[c].dev, 2405 raidPtr->raid_cinfo[c].ci_vp, 2406 &clabel); 2407 if (final == RF_FINAL_COMPONENT_UPDATE) { 2408 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2409 raidmarkclean( 2410 raidPtr->Disks[c].dev, 2411 raidPtr->raid_cinfo[c].ci_vp, 2412 raidPtr->mod_counter); 2413 } 2414 } 2415 } 2416 /* else we don't touch it.. */ 2417 } 2418 2419 for( c = 0; c < raidPtr->numSpare ; c++) { 2420 sparecol = raidPtr->numCol + c; 2421 /* Need to ensure that the reconstruct actually completed! */ 2422 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2423 /* 2424 2425 we claim this disk is "optimal" if it's 2426 rf_ds_used_spare, as that means it should be 2427 directly substitutable for the disk it replaced. 2428 We note that too... 2429 2430 */ 2431 2432 for(j=0;j<raidPtr->numCol;j++) { 2433 if (raidPtr->Disks[j].spareCol == sparecol) { 2434 scol = j; 2435 break; 2436 } 2437 } 2438 2439 /* XXX shouldn't *really* need this... */ 2440 raidread_component_label( 2441 raidPtr->Disks[sparecol].dev, 2442 raidPtr->raid_cinfo[sparecol].ci_vp, 2443 &clabel); 2444 /* make sure status is noted */ 2445 2446 raid_init_component_label(raidPtr, &clabel); 2447 2448 clabel.mod_counter = raidPtr->mod_counter; 2449 clabel.column = scol; 2450 clabel.status = rf_ds_optimal; 2451 2452 raidwrite_component_label( 2453 raidPtr->Disks[sparecol].dev, 2454 raidPtr->raid_cinfo[sparecol].ci_vp, 2455 &clabel); 2456 if (final == RF_FINAL_COMPONENT_UPDATE) { 2457 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2458 raidmarkclean( raidPtr->Disks[sparecol].dev, 2459 raidPtr->raid_cinfo[sparecol].ci_vp, 2460 raidPtr->mod_counter); 2461 } 2462 } 2463 } 2464 } 2465 } 2466 2467 void 2468 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured) 2469 { 2470 struct proc *p; 2471 2472 p = raidPtr->engine_thread; 2473 2474 if (vp != NULL) { 2475 if (auto_configured == 1) { 2476 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2477 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0); 2478 vput(vp); 2479 2480 } else { 2481 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p); 2482 } 2483 } 2484 } 2485 2486 2487 void 2488 rf_UnconfigureVnodes(RF_Raid_t *raidPtr) 2489 { 2490 int r,c; 2491 struct vnode *vp; 2492 int acd; 2493 2494 2495 /* We take this opportunity to close the vnodes like we should.. */ 2496 2497 for (c = 0; c < raidPtr->numCol; c++) { 2498 vp = raidPtr->raid_cinfo[c].ci_vp; 2499 acd = raidPtr->Disks[c].auto_configured; 2500 rf_close_component(raidPtr, vp, acd); 2501 raidPtr->raid_cinfo[c].ci_vp = NULL; 2502 raidPtr->Disks[c].auto_configured = 0; 2503 } 2504 2505 for (r = 0; r < raidPtr->numSpare; r++) { 2506 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp; 2507 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured; 2508 rf_close_component(raidPtr, vp, acd); 2509 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL; 2510 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0; 2511 } 2512 } 2513 2514 2515 void 2516 rf_ReconThread(struct rf_recon_req *req) 2517 { 2518 int s; 2519 RF_Raid_t *raidPtr; 2520 2521 s = splbio(); 2522 raidPtr = (RF_Raid_t *) req->raidPtr; 2523 raidPtr->recon_in_progress = 1; 2524 2525 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col, 2526 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0)); 2527 2528 RF_Free(req, sizeof(*req)); 2529 2530 raidPtr->recon_in_progress = 0; 2531 splx(s); 2532 2533 /* That's all... */ 2534 kthread_exit(0); /* does not return */ 2535 } 2536 2537 void 2538 rf_RewriteParityThread(RF_Raid_t *raidPtr) 2539 { 2540 int retcode; 2541 int s; 2542 2543 raidPtr->parity_rewrite_in_progress = 1; 2544 s = splbio(); 2545 retcode = rf_RewriteParity(raidPtr); 2546 splx(s); 2547 if (retcode) { 2548 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid); 2549 } else { 2550 /* set the clean bit! If we shutdown correctly, 2551 the clean bit on each component label will get 2552 set */ 2553 raidPtr->parity_good = RF_RAID_CLEAN; 2554 } 2555 raidPtr->parity_rewrite_in_progress = 0; 2556 2557 /* Anyone waiting for us to stop? If so, inform them... */ 2558 if (raidPtr->waitShutdown) { 2559 wakeup(&raidPtr->parity_rewrite_in_progress); 2560 } 2561 2562 /* That's all... */ 2563 kthread_exit(0); /* does not return */ 2564 } 2565 2566 2567 void 2568 rf_CopybackThread(RF_Raid_t *raidPtr) 2569 { 2570 int s; 2571 2572 raidPtr->copyback_in_progress = 1; 2573 s = splbio(); 2574 rf_CopybackReconstructedData(raidPtr); 2575 splx(s); 2576 raidPtr->copyback_in_progress = 0; 2577 2578 /* That's all... */ 2579 kthread_exit(0); /* does not return */ 2580 } 2581 2582 2583 void 2584 rf_ReconstructInPlaceThread(struct rf_recon_req *req) 2585 { 2586 int s; 2587 RF_Raid_t *raidPtr; 2588 2589 s = splbio(); 2590 raidPtr = req->raidPtr; 2591 raidPtr->recon_in_progress = 1; 2592 rf_ReconstructInPlace(raidPtr, req->col); 2593 RF_Free(req, sizeof(*req)); 2594 raidPtr->recon_in_progress = 0; 2595 splx(s); 2596 2597 /* That's all... */ 2598 kthread_exit(0); /* does not return */ 2599 } 2600 2601 RF_AutoConfig_t * 2602 rf_find_raid_components() 2603 { 2604 struct vnode *vp; 2605 struct disklabel label; 2606 struct device *dv; 2607 dev_t dev; 2608 int bmajor; 2609 int error; 2610 int i; 2611 int good_one; 2612 RF_ComponentLabel_t *clabel; 2613 RF_AutoConfig_t *ac_list; 2614 RF_AutoConfig_t *ac; 2615 2616 2617 /* initialize the AutoConfig list */ 2618 ac_list = NULL; 2619 2620 /* we begin by trolling through *all* the devices on the system */ 2621 2622 for (dv = alldevs.tqh_first; dv != NULL; 2623 dv = dv->dv_list.tqe_next) { 2624 2625 /* we are only interested in disks... */ 2626 if (dv->dv_class != DV_DISK) 2627 continue; 2628 2629 /* we don't care about floppies... */ 2630 if (!strcmp(dv->dv_cfdata->cf_name,"fd")) { 2631 continue; 2632 } 2633 2634 /* we don't care about CD's... */ 2635 if (!strcmp(dv->dv_cfdata->cf_name,"cd")) { 2636 continue; 2637 } 2638 2639 /* hdfd is the Atari/Hades floppy driver */ 2640 if (!strcmp(dv->dv_cfdata->cf_name,"hdfd")) { 2641 continue; 2642 } 2643 /* fdisa is the Atari/Milan floppy driver */ 2644 if (!strcmp(dv->dv_cfdata->cf_name,"fdisa")) { 2645 continue; 2646 } 2647 2648 /* need to find the device_name_to_block_device_major stuff */ 2649 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0); 2650 2651 /* get a vnode for the raw partition of this disk */ 2652 2653 dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART); 2654 if (bdevvp(dev, &vp)) 2655 panic("RAID can't alloc vnode"); 2656 2657 error = VOP_OPEN(vp, FREAD, NOCRED, 0); 2658 2659 if (error) { 2660 /* "Who cares." Continue looking 2661 for something that exists*/ 2662 vput(vp); 2663 continue; 2664 } 2665 2666 /* Ok, the disk exists. Go get the disklabel. */ 2667 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0); 2668 if (error) { 2669 /* 2670 * XXX can't happen - open() would 2671 * have errored out (or faked up one) 2672 */ 2673 printf("can't get label for dev %s%c (%d)!?!?\n", 2674 dv->dv_xname, 'a' + RAW_PART, error); 2675 } 2676 2677 /* don't need this any more. We'll allocate it again 2678 a little later if we really do... */ 2679 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2680 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0); 2681 vput(vp); 2682 2683 for (i=0; i < label.d_npartitions; i++) { 2684 /* We only support partitions marked as RAID */ 2685 if (label.d_partitions[i].p_fstype != FS_RAID) 2686 continue; 2687 2688 dev = MAKEDISKDEV(bmajor, dv->dv_unit, i); 2689 if (bdevvp(dev, &vp)) 2690 panic("RAID can't alloc vnode"); 2691 2692 error = VOP_OPEN(vp, FREAD, NOCRED, 0); 2693 if (error) { 2694 /* Whatever... */ 2695 vput(vp); 2696 continue; 2697 } 2698 2699 good_one = 0; 2700 2701 clabel = (RF_ComponentLabel_t *) 2702 malloc(sizeof(RF_ComponentLabel_t), 2703 M_RAIDFRAME, M_NOWAIT); 2704 if (clabel == NULL) { 2705 /* XXX CLEANUP HERE */ 2706 printf("RAID auto config: out of memory!\n"); 2707 return(NULL); /* XXX probably should panic? */ 2708 } 2709 2710 if (!raidread_component_label(dev, vp, clabel)) { 2711 /* Got the label. Does it look reasonable? */ 2712 if (rf_reasonable_label(clabel) && 2713 (clabel->partitionSize <= 2714 label.d_partitions[i].p_size)) { 2715 #if DEBUG 2716 printf("Component on: %s%c: %d\n", 2717 dv->dv_xname, 'a'+i, 2718 label.d_partitions[i].p_size); 2719 rf_print_component_label(clabel); 2720 #endif 2721 /* if it's reasonable, add it, 2722 else ignore it. */ 2723 ac = (RF_AutoConfig_t *) 2724 malloc(sizeof(RF_AutoConfig_t), 2725 M_RAIDFRAME, 2726 M_NOWAIT); 2727 if (ac == NULL) { 2728 /* XXX should panic?? */ 2729 return(NULL); 2730 } 2731 2732 snprintf(ac->devname, 2733 sizeof(ac->devname), "%s%c", 2734 dv->dv_xname, 'a'+i); 2735 ac->dev = dev; 2736 ac->vp = vp; 2737 ac->clabel = clabel; 2738 ac->next = ac_list; 2739 ac_list = ac; 2740 good_one = 1; 2741 } 2742 } 2743 if (!good_one) { 2744 /* cleanup */ 2745 free(clabel, M_RAIDFRAME); 2746 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2747 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0); 2748 vput(vp); 2749 } 2750 } 2751 } 2752 return(ac_list); 2753 } 2754 2755 static int 2756 rf_reasonable_label(RF_ComponentLabel_t *clabel) 2757 { 2758 2759 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) || 2760 (clabel->version==RF_COMPONENT_LABEL_VERSION)) && 2761 ((clabel->clean == RF_RAID_CLEAN) || 2762 (clabel->clean == RF_RAID_DIRTY)) && 2763 clabel->row >=0 && 2764 clabel->column >= 0 && 2765 clabel->num_rows > 0 && 2766 clabel->num_columns > 0 && 2767 clabel->row < clabel->num_rows && 2768 clabel->column < clabel->num_columns && 2769 clabel->blockSize > 0 && 2770 clabel->numBlocks > 0) { 2771 /* label looks reasonable enough... */ 2772 return(1); 2773 } 2774 return(0); 2775 } 2776 2777 2778 #if DEBUG 2779 void 2780 rf_print_component_label(RF_ComponentLabel_t *clabel) 2781 { 2782 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", 2783 clabel->row, clabel->column, 2784 clabel->num_rows, clabel->num_columns); 2785 printf(" Version: %d Serial Number: %d Mod Counter: %d\n", 2786 clabel->version, clabel->serial_number, 2787 clabel->mod_counter); 2788 printf(" Clean: %s Status: %d\n", 2789 clabel->clean ? "Yes" : "No", clabel->status ); 2790 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n", 2791 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU); 2792 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n", 2793 (char) clabel->parityConfig, clabel->blockSize, 2794 clabel->numBlocks); 2795 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" ); 2796 printf(" Contains root partition: %s\n", 2797 clabel->root_partition ? "Yes" : "No" ); 2798 printf(" Last configured as: raid%d\n", clabel->last_unit ); 2799 #if 0 2800 printf(" Config order: %d\n", clabel->config_order); 2801 #endif 2802 2803 } 2804 #endif 2805 2806 RF_ConfigSet_t * 2807 rf_create_auto_sets(RF_AutoConfig_t *ac_list) 2808 { 2809 RF_AutoConfig_t *ac; 2810 RF_ConfigSet_t *config_sets; 2811 RF_ConfigSet_t *cset; 2812 RF_AutoConfig_t *ac_next; 2813 2814 2815 config_sets = NULL; 2816 2817 /* Go through the AutoConfig list, and figure out which components 2818 belong to what sets. */ 2819 ac = ac_list; 2820 while(ac!=NULL) { 2821 /* we're going to putz with ac->next, so save it here 2822 for use at the end of the loop */ 2823 ac_next = ac->next; 2824 2825 if (config_sets == NULL) { 2826 /* will need at least this one... */ 2827 config_sets = (RF_ConfigSet_t *) 2828 malloc(sizeof(RF_ConfigSet_t), 2829 M_RAIDFRAME, M_NOWAIT); 2830 if (config_sets == NULL) { 2831 panic("rf_create_auto_sets: No memory!"); 2832 } 2833 /* this one is easy :) */ 2834 config_sets->ac = ac; 2835 config_sets->next = NULL; 2836 config_sets->rootable = 0; 2837 ac->next = NULL; 2838 } else { 2839 /* which set does this component fit into? */ 2840 cset = config_sets; 2841 while(cset!=NULL) { 2842 if (rf_does_it_fit(cset, ac)) { 2843 /* looks like it matches... */ 2844 ac->next = cset->ac; 2845 cset->ac = ac; 2846 break; 2847 } 2848 cset = cset->next; 2849 } 2850 if (cset==NULL) { 2851 /* didn't find a match above... new set..*/ 2852 cset = (RF_ConfigSet_t *) 2853 malloc(sizeof(RF_ConfigSet_t), 2854 M_RAIDFRAME, M_NOWAIT); 2855 if (cset == NULL) { 2856 panic("rf_create_auto_sets: No memory!"); 2857 } 2858 cset->ac = ac; 2859 ac->next = NULL; 2860 cset->next = config_sets; 2861 cset->rootable = 0; 2862 config_sets = cset; 2863 } 2864 } 2865 ac = ac_next; 2866 } 2867 2868 2869 return(config_sets); 2870 } 2871 2872 static int 2873 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac) 2874 { 2875 RF_ComponentLabel_t *clabel1, *clabel2; 2876 2877 /* If this one matches the *first* one in the set, that's good 2878 enough, since the other members of the set would have been 2879 through here too... */ 2880 /* note that we are not checking partitionSize here.. 2881 2882 Note that we are also not checking the mod_counters here. 2883 If everything else matches execpt the mod_counter, that's 2884 good enough for this test. We will deal with the mod_counters 2885 a little later in the autoconfiguration process. 2886 2887 (clabel1->mod_counter == clabel2->mod_counter) && 2888 2889 The reason we don't check for this is that failed disks 2890 will have lower modification counts. If those disks are 2891 not added to the set they used to belong to, then they will 2892 form their own set, which may result in 2 different sets, 2893 for example, competing to be configured at raid0, and 2894 perhaps competing to be the root filesystem set. If the 2895 wrong ones get configured, or both attempt to become /, 2896 weird behaviour and or serious lossage will occur. Thus we 2897 need to bring them into the fold here, and kick them out at 2898 a later point. 2899 2900 */ 2901 2902 clabel1 = cset->ac->clabel; 2903 clabel2 = ac->clabel; 2904 if ((clabel1->version == clabel2->version) && 2905 (clabel1->serial_number == clabel2->serial_number) && 2906 (clabel1->num_rows == clabel2->num_rows) && 2907 (clabel1->num_columns == clabel2->num_columns) && 2908 (clabel1->sectPerSU == clabel2->sectPerSU) && 2909 (clabel1->SUsPerPU == clabel2->SUsPerPU) && 2910 (clabel1->SUsPerRU == clabel2->SUsPerRU) && 2911 (clabel1->parityConfig == clabel2->parityConfig) && 2912 (clabel1->maxOutstanding == clabel2->maxOutstanding) && 2913 (clabel1->blockSize == clabel2->blockSize) && 2914 (clabel1->numBlocks == clabel2->numBlocks) && 2915 (clabel1->autoconfigure == clabel2->autoconfigure) && 2916 (clabel1->root_partition == clabel2->root_partition) && 2917 (clabel1->last_unit == clabel2->last_unit) && 2918 (clabel1->config_order == clabel2->config_order)) { 2919 /* if it get's here, it almost *has* to be a match */ 2920 } else { 2921 /* it's not consistent with somebody in the set.. 2922 punt */ 2923 return(0); 2924 } 2925 /* all was fine.. it must fit... */ 2926 return(1); 2927 } 2928 2929 int 2930 rf_have_enough_components(RF_ConfigSet_t *cset) 2931 { 2932 RF_AutoConfig_t *ac; 2933 RF_AutoConfig_t *auto_config; 2934 RF_ComponentLabel_t *clabel; 2935 int c; 2936 int num_cols; 2937 int num_missing; 2938 int mod_counter; 2939 int mod_counter_found; 2940 int even_pair_failed; 2941 char parity_type; 2942 2943 2944 /* check to see that we have enough 'live' components 2945 of this set. If so, we can configure it if necessary */ 2946 2947 num_cols = cset->ac->clabel->num_columns; 2948 parity_type = cset->ac->clabel->parityConfig; 2949 2950 /* XXX Check for duplicate components!?!?!? */ 2951 2952 /* Determine what the mod_counter is supposed to be for this set. */ 2953 2954 mod_counter_found = 0; 2955 mod_counter = 0; 2956 ac = cset->ac; 2957 while(ac!=NULL) { 2958 if (mod_counter_found==0) { 2959 mod_counter = ac->clabel->mod_counter; 2960 mod_counter_found = 1; 2961 } else { 2962 if (ac->clabel->mod_counter > mod_counter) { 2963 mod_counter = ac->clabel->mod_counter; 2964 } 2965 } 2966 ac = ac->next; 2967 } 2968 2969 num_missing = 0; 2970 auto_config = cset->ac; 2971 2972 even_pair_failed = 0; 2973 for(c=0; c<num_cols; c++) { 2974 ac = auto_config; 2975 while(ac!=NULL) { 2976 if ((ac->clabel->column == c) && 2977 (ac->clabel->mod_counter == mod_counter)) { 2978 /* it's this one... */ 2979 #if DEBUG 2980 printf("Found: %s at %d\n", 2981 ac->devname,c); 2982 #endif 2983 break; 2984 } 2985 ac=ac->next; 2986 } 2987 if (ac==NULL) { 2988 /* Didn't find one here! */ 2989 /* special case for RAID 1, especially 2990 where there are more than 2 2991 components (where RAIDframe treats 2992 things a little differently :( ) */ 2993 if (parity_type == '1') { 2994 if (c%2 == 0) { /* even component */ 2995 even_pair_failed = 1; 2996 } else { /* odd component. If 2997 we're failed, and 2998 so is the even 2999 component, it's 3000 "Good Night, Charlie" */ 3001 if (even_pair_failed == 1) { 3002 return(0); 3003 } 3004 } 3005 } else { 3006 /* normal accounting */ 3007 num_missing++; 3008 } 3009 } 3010 if ((parity_type == '1') && (c%2 == 1)) { 3011 /* Just did an even component, and we didn't 3012 bail.. reset the even_pair_failed flag, 3013 and go on to the next component.... */ 3014 even_pair_failed = 0; 3015 } 3016 } 3017 3018 clabel = cset->ac->clabel; 3019 3020 if (((clabel->parityConfig == '0') && (num_missing > 0)) || 3021 ((clabel->parityConfig == '4') && (num_missing > 1)) || 3022 ((clabel->parityConfig == '5') && (num_missing > 1))) { 3023 /* XXX this needs to be made *much* more general */ 3024 /* Too many failures */ 3025 return(0); 3026 } 3027 /* otherwise, all is well, and we've got enough to take a kick 3028 at autoconfiguring this set */ 3029 return(1); 3030 } 3031 3032 void 3033 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config, 3034 RF_Raid_t *raidPtr) 3035 { 3036 RF_ComponentLabel_t *clabel; 3037 int i; 3038 3039 clabel = ac->clabel; 3040 3041 /* 1. Fill in the common stuff */ 3042 config->numRow = clabel->num_rows = 1; 3043 config->numCol = clabel->num_columns; 3044 config->numSpare = 0; /* XXX should this be set here? */ 3045 config->sectPerSU = clabel->sectPerSU; 3046 config->SUsPerPU = clabel->SUsPerPU; 3047 config->SUsPerRU = clabel->SUsPerRU; 3048 config->parityConfig = clabel->parityConfig; 3049 /* XXX... */ 3050 strcpy(config->diskQueueType,"fifo"); 3051 config->maxOutstandingDiskReqs = clabel->maxOutstanding; 3052 config->layoutSpecificSize = 0; /* XXX ?? */ 3053 3054 while(ac!=NULL) { 3055 /* row/col values will be in range due to the checks 3056 in reasonable_label() */ 3057 strcpy(config->devnames[0][ac->clabel->column], 3058 ac->devname); 3059 ac = ac->next; 3060 } 3061 3062 for(i=0;i<RF_MAXDBGV;i++) { 3063 config->debugVars[i][0] = 0; 3064 } 3065 } 3066 3067 int 3068 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value) 3069 { 3070 RF_ComponentLabel_t clabel; 3071 struct vnode *vp; 3072 dev_t dev; 3073 int column; 3074 int sparecol; 3075 3076 raidPtr->autoconfigure = new_value; 3077 3078 for(column=0; column<raidPtr->numCol; column++) { 3079 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3080 dev = raidPtr->Disks[column].dev; 3081 vp = raidPtr->raid_cinfo[column].ci_vp; 3082 raidread_component_label(dev, vp, &clabel); 3083 clabel.autoconfigure = new_value; 3084 raidwrite_component_label(dev, vp, &clabel); 3085 } 3086 } 3087 for(column = 0; column < raidPtr->numSpare ; column++) { 3088 sparecol = raidPtr->numCol + column; 3089 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3090 dev = raidPtr->Disks[sparecol].dev; 3091 vp = raidPtr->raid_cinfo[sparecol].ci_vp; 3092 raidread_component_label(dev, vp, &clabel); 3093 clabel.autoconfigure = new_value; 3094 raidwrite_component_label(dev, vp, &clabel); 3095 } 3096 } 3097 return(new_value); 3098 } 3099 3100 int 3101 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value) 3102 { 3103 RF_ComponentLabel_t clabel; 3104 struct vnode *vp; 3105 dev_t dev; 3106 int column; 3107 int sparecol; 3108 3109 raidPtr->root_partition = new_value; 3110 for(column=0; column<raidPtr->numCol; column++) { 3111 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3112 dev = raidPtr->Disks[column].dev; 3113 vp = raidPtr->raid_cinfo[column].ci_vp; 3114 raidread_component_label(dev, vp, &clabel); 3115 clabel.root_partition = new_value; 3116 raidwrite_component_label(dev, vp, &clabel); 3117 } 3118 } 3119 for(column = 0; column < raidPtr->numSpare ; column++) { 3120 sparecol = raidPtr->numCol + column; 3121 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3122 dev = raidPtr->Disks[sparecol].dev; 3123 vp = raidPtr->raid_cinfo[sparecol].ci_vp; 3124 raidread_component_label(dev, vp, &clabel); 3125 clabel.root_partition = new_value; 3126 raidwrite_component_label(dev, vp, &clabel); 3127 } 3128 } 3129 return(new_value); 3130 } 3131 3132 void 3133 rf_release_all_vps(RF_ConfigSet_t *cset) 3134 { 3135 RF_AutoConfig_t *ac; 3136 3137 ac = cset->ac; 3138 while(ac!=NULL) { 3139 /* Close the vp, and give it back */ 3140 if (ac->vp) { 3141 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY); 3142 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0); 3143 vput(ac->vp); 3144 ac->vp = NULL; 3145 } 3146 ac = ac->next; 3147 } 3148 } 3149 3150 3151 void 3152 rf_cleanup_config_set(RF_ConfigSet_t *cset) 3153 { 3154 RF_AutoConfig_t *ac; 3155 RF_AutoConfig_t *next_ac; 3156 3157 ac = cset->ac; 3158 while(ac!=NULL) { 3159 next_ac = ac->next; 3160 /* nuke the label */ 3161 free(ac->clabel, M_RAIDFRAME); 3162 /* cleanup the config structure */ 3163 free(ac, M_RAIDFRAME); 3164 /* "next.." */ 3165 ac = next_ac; 3166 } 3167 /* and, finally, nuke the config set */ 3168 free(cset, M_RAIDFRAME); 3169 } 3170 3171 3172 void 3173 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel) 3174 { 3175 /* current version number */ 3176 clabel->version = RF_COMPONENT_LABEL_VERSION; 3177 clabel->serial_number = raidPtr->serial_number; 3178 clabel->mod_counter = raidPtr->mod_counter; 3179 clabel->num_rows = 1; 3180 clabel->num_columns = raidPtr->numCol; 3181 clabel->clean = RF_RAID_DIRTY; /* not clean */ 3182 clabel->status = rf_ds_optimal; /* "It's good!" */ 3183 3184 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; 3185 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU; 3186 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU; 3187 3188 clabel->blockSize = raidPtr->bytesPerSector; 3189 clabel->numBlocks = raidPtr->sectorsPerDisk; 3190 3191 /* XXX not portable */ 3192 clabel->parityConfig = raidPtr->Layout.map->parityConfig; 3193 clabel->maxOutstanding = raidPtr->maxOutstanding; 3194 clabel->autoconfigure = raidPtr->autoconfigure; 3195 clabel->root_partition = raidPtr->root_partition; 3196 clabel->last_unit = raidPtr->raidid; 3197 clabel->config_order = raidPtr->config_order; 3198 } 3199 3200 int 3201 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit) 3202 { 3203 RF_Raid_t *raidPtr; 3204 RF_Config_t *config; 3205 int raidID; 3206 int retcode; 3207 3208 #if DEBUG 3209 printf("RAID autoconfigure\n"); 3210 #endif 3211 3212 retcode = 0; 3213 *unit = -1; 3214 3215 /* 1. Create a config structure */ 3216 3217 config = (RF_Config_t *)malloc(sizeof(RF_Config_t), 3218 M_RAIDFRAME, 3219 M_NOWAIT); 3220 if (config==NULL) { 3221 printf("Out of mem!?!?\n"); 3222 /* XXX do something more intelligent here. */ 3223 return(1); 3224 } 3225 3226 memset(config, 0, sizeof(RF_Config_t)); 3227 3228 /* 3229 2. Figure out what RAID ID this one is supposed to live at 3230 See if we can get the same RAID dev that it was configured 3231 on last time.. 3232 */ 3233 3234 raidID = cset->ac->clabel->last_unit; 3235 if ((raidID < 0) || (raidID >= numraid)) { 3236 /* let's not wander off into lala land. */ 3237 raidID = numraid - 1; 3238 } 3239 if (raidPtrs[raidID]->valid != 0) { 3240 3241 /* 3242 Nope... Go looking for an alternative... 3243 Start high so we don't immediately use raid0 if that's 3244 not taken. 3245 */ 3246 3247 for(raidID = numraid - 1; raidID >= 0; raidID--) { 3248 if (raidPtrs[raidID]->valid == 0) { 3249 /* can use this one! */ 3250 break; 3251 } 3252 } 3253 } 3254 3255 if (raidID < 0) { 3256 /* punt... */ 3257 printf("Unable to auto configure this set!\n"); 3258 printf("(Out of RAID devs!)\n"); 3259 return(1); 3260 } 3261 3262 #if DEBUG 3263 printf("Configuring raid%d:\n",raidID); 3264 #endif 3265 3266 raidPtr = raidPtrs[raidID]; 3267 3268 /* XXX all this stuff should be done SOMEWHERE ELSE! */ 3269 raidPtr->raidid = raidID; 3270 raidPtr->openings = RAIDOUTSTANDING; 3271 3272 /* 3. Build the configuration structure */ 3273 rf_create_configuration(cset->ac, config, raidPtr); 3274 3275 /* 4. Do the configuration */ 3276 retcode = rf_Configure(raidPtr, config, cset->ac); 3277 3278 if (retcode == 0) { 3279 3280 raidinit(raidPtrs[raidID]); 3281 3282 rf_markalldirty(raidPtrs[raidID]); 3283 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */ 3284 if (cset->ac->clabel->root_partition==1) { 3285 /* everything configured just fine. Make a note 3286 that this set is eligible to be root. */ 3287 cset->rootable = 1; 3288 /* XXX do this here? */ 3289 raidPtrs[raidID]->root_partition = 1; 3290 } 3291 } 3292 3293 /* 5. Cleanup */ 3294 free(config, M_RAIDFRAME); 3295 3296 *unit = raidID; 3297 return(retcode); 3298 } 3299 3300 void 3301 rf_disk_unbusy(RF_RaidAccessDesc_t *desc) 3302 { 3303 struct buf *bp; 3304 3305 bp = (struct buf *)desc->bp; 3306 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev, 3307 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ)); 3308 } 3309 3310 void 3311 rf_pool_init(struct pool *p, size_t size, char *w_chan, 3312 size_t min, size_t max) 3313 { 3314 pool_init(p, size, 0, 0, 0, w_chan, NULL); 3315 pool_sethiwat(p, max); 3316 pool_prime(p, min); 3317 pool_setlowat(p, min); 3318 } 3319