1 /* $NetBSD: rf_netbsdkintf.c,v 1.179 2004/04/22 00:17:13 itojun Exp $ */ 2 /*- 3 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Greg Oster; Jason R. Thorpe. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the NetBSD 20 * Foundation, Inc. and its contributors. 21 * 4. Neither the name of The NetBSD Foundation nor the names of its 22 * contributors may be used to endorse or promote products derived 23 * from this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38 /* 39 * Copyright (c) 1990, 1993 40 * The Regents of the University of California. All rights reserved. 41 * 42 * This code is derived from software contributed to Berkeley by 43 * the Systems Programming Group of the University of Utah Computer 44 * Science Department. 45 * 46 * Redistribution and use in source and binary forms, with or without 47 * modification, are permitted provided that the following conditions 48 * are met: 49 * 1. Redistributions of source code must retain the above copyright 50 * notice, this list of conditions and the following disclaimer. 51 * 2. Redistributions in binary form must reproduce the above copyright 52 * notice, this list of conditions and the following disclaimer in the 53 * documentation and/or other materials provided with the distribution. 54 * 3. Neither the name of the University nor the names of its contributors 55 * may be used to endorse or promote products derived from this software 56 * without specific prior written permission. 57 * 58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 68 * SUCH DAMAGE. 69 * 70 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 71 * 72 * @(#)cd.c 8.2 (Berkeley) 11/16/93 73 */ 74 75 /* 76 * Copyright (c) 1988 University of Utah. 77 * 78 * This code is derived from software contributed to Berkeley by 79 * the Systems Programming Group of the University of Utah Computer 80 * Science Department. 81 * 82 * Redistribution and use in source and binary forms, with or without 83 * modification, are permitted provided that the following conditions 84 * are met: 85 * 1. Redistributions of source code must retain the above copyright 86 * notice, this list of conditions and the following disclaimer. 87 * 2. Redistributions in binary form must reproduce the above copyright 88 * notice, this list of conditions and the following disclaimer in the 89 * documentation and/or other materials provided with the distribution. 90 * 3. All advertising materials mentioning features or use of this software 91 * must display the following acknowledgement: 92 * This product includes software developed by the University of 93 * California, Berkeley and its contributors. 94 * 4. Neither the name of the University nor the names of its contributors 95 * may be used to endorse or promote products derived from this software 96 * without specific prior written permission. 97 * 98 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 99 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 100 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 101 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 102 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 103 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 104 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 105 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 106 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 107 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 108 * SUCH DAMAGE. 109 * 110 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 111 * 112 * @(#)cd.c 8.2 (Berkeley) 11/16/93 113 */ 114 115 /* 116 * Copyright (c) 1995 Carnegie-Mellon University. 117 * All rights reserved. 118 * 119 * Authors: Mark Holland, Jim Zelenka 120 * 121 * Permission to use, copy, modify and distribute this software and 122 * its documentation is hereby granted, provided that both the copyright 123 * notice and this permission notice appear in all copies of the 124 * software, derivative works or modified versions, and any portions 125 * thereof, and that both notices appear in supporting documentation. 126 * 127 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 128 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 129 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 130 * 131 * Carnegie Mellon requests users of this software to return to 132 * 133 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 134 * School of Computer Science 135 * Carnegie Mellon University 136 * Pittsburgh PA 15213-3890 137 * 138 * any improvements or extensions that they make and grant Carnegie the 139 * rights to redistribute these changes. 140 */ 141 142 /*********************************************************** 143 * 144 * rf_kintf.c -- the kernel interface routines for RAIDframe 145 * 146 ***********************************************************/ 147 148 #include <sys/cdefs.h> 149 __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.179 2004/04/22 00:17:13 itojun Exp $"); 150 151 #include <sys/param.h> 152 #include <sys/errno.h> 153 #include <sys/pool.h> 154 #include <sys/proc.h> 155 #include <sys/queue.h> 156 #include <sys/disk.h> 157 #include <sys/device.h> 158 #include <sys/stat.h> 159 #include <sys/ioctl.h> 160 #include <sys/fcntl.h> 161 #include <sys/systm.h> 162 #include <sys/namei.h> 163 #include <sys/vnode.h> 164 #include <sys/disklabel.h> 165 #include <sys/conf.h> 166 #include <sys/lock.h> 167 #include <sys/buf.h> 168 #include <sys/user.h> 169 #include <sys/reboot.h> 170 171 #include <dev/raidframe/raidframevar.h> 172 #include <dev/raidframe/raidframeio.h> 173 #include "raid.h" 174 #include "opt_raid_autoconfig.h" 175 #include "rf_raid.h" 176 #include "rf_copyback.h" 177 #include "rf_dag.h" 178 #include "rf_dagflags.h" 179 #include "rf_desc.h" 180 #include "rf_diskqueue.h" 181 #include "rf_etimer.h" 182 #include "rf_general.h" 183 #include "rf_kintf.h" 184 #include "rf_options.h" 185 #include "rf_driver.h" 186 #include "rf_parityscan.h" 187 #include "rf_threadstuff.h" 188 189 #ifdef DEBUG 190 int rf_kdebug_level = 0; 191 #define db1_printf(a) if (rf_kdebug_level > 0) printf a 192 #else /* DEBUG */ 193 #define db1_printf(a) { } 194 #endif /* DEBUG */ 195 196 static RF_Raid_t **raidPtrs; /* global raid device descriptors */ 197 198 RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex) 199 200 static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a 201 * spare table */ 202 static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from 203 * installation process */ 204 205 MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures"); 206 207 /* prototypes */ 208 static void KernelWakeupFunc(struct buf * bp); 209 static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag, 210 dev_t dev, RF_SectorNum_t startSect, 211 RF_SectorCount_t numSect, caddr_t buf, 212 void (*cbFunc) (struct buf *), void *cbArg, 213 int logBytesPerSector, struct proc * b_proc); 214 static void raidinit(RF_Raid_t *); 215 216 void raidattach(int); 217 218 dev_type_open(raidopen); 219 dev_type_close(raidclose); 220 dev_type_read(raidread); 221 dev_type_write(raidwrite); 222 dev_type_ioctl(raidioctl); 223 dev_type_strategy(raidstrategy); 224 dev_type_dump(raiddump); 225 dev_type_size(raidsize); 226 227 const struct bdevsw raid_bdevsw = { 228 raidopen, raidclose, raidstrategy, raidioctl, 229 raiddump, raidsize, D_DISK 230 }; 231 232 const struct cdevsw raid_cdevsw = { 233 raidopen, raidclose, raidread, raidwrite, raidioctl, 234 nostop, notty, nopoll, nommap, nokqfilter, D_DISK 235 }; 236 237 /* 238 * Pilfered from ccd.c 239 */ 240 241 struct raidbuf { 242 struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */ 243 struct buf *rf_obp; /* ptr. to original I/O buf */ 244 RF_DiskQueueData_t *req;/* the request that this was part of.. */ 245 }; 246 247 /* XXX Not sure if the following should be replacing the raidPtrs above, 248 or if it should be used in conjunction with that... 249 */ 250 251 struct raid_softc { 252 int sc_flags; /* flags */ 253 int sc_cflags; /* configuration flags */ 254 size_t sc_size; /* size of the raid device */ 255 char sc_xname[20]; /* XXX external name */ 256 struct disk sc_dkdev; /* generic disk device info */ 257 struct bufq_state buf_queue; /* used for the device queue */ 258 }; 259 /* sc_flags */ 260 #define RAIDF_INITED 0x01 /* unit has been initialized */ 261 #define RAIDF_WLABEL 0x02 /* label area is writable */ 262 #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */ 263 #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */ 264 #define RAIDF_LOCKED 0x80 /* unit is locked */ 265 266 #define raidunit(x) DISKUNIT(x) 267 int numraid = 0; 268 269 /* 270 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device. 271 * Be aware that large numbers can allow the driver to consume a lot of 272 * kernel memory, especially on writes, and in degraded mode reads. 273 * 274 * For example: with a stripe width of 64 blocks (32k) and 5 disks, 275 * a single 64K write will typically require 64K for the old data, 276 * 64K for the old parity, and 64K for the new parity, for a total 277 * of 192K (if the parity buffer is not re-used immediately). 278 * Even it if is used immediately, that's still 128K, which when multiplied 279 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data. 280 * 281 * Now in degraded mode, for example, a 64K read on the above setup may 282 * require data reconstruction, which will require *all* of the 4 remaining 283 * disks to participate -- 4 * 32K/disk == 128K again. 284 */ 285 286 #ifndef RAIDOUTSTANDING 287 #define RAIDOUTSTANDING 6 288 #endif 289 290 #define RAIDLABELDEV(dev) \ 291 (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART)) 292 293 /* declared here, and made public, for the benefit of KVM stuff.. */ 294 struct raid_softc *raid_softc; 295 296 static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *, 297 struct disklabel *); 298 static void raidgetdisklabel(dev_t); 299 static void raidmakedisklabel(struct raid_softc *); 300 301 static int raidlock(struct raid_softc *); 302 static void raidunlock(struct raid_softc *); 303 304 static void rf_markalldirty(RF_Raid_t *); 305 306 struct device *raidrootdev; 307 308 void rf_ReconThread(struct rf_recon_req *); 309 void rf_RewriteParityThread(RF_Raid_t *raidPtr); 310 void rf_CopybackThread(RF_Raid_t *raidPtr); 311 void rf_ReconstructInPlaceThread(struct rf_recon_req *); 312 int rf_autoconfig(struct device *self); 313 void rf_buildroothack(RF_ConfigSet_t *); 314 315 RF_AutoConfig_t *rf_find_raid_components(void); 316 RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *); 317 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *); 318 static int rf_reasonable_label(RF_ComponentLabel_t *); 319 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *); 320 int rf_set_autoconfig(RF_Raid_t *, int); 321 int rf_set_rootpartition(RF_Raid_t *, int); 322 void rf_release_all_vps(RF_ConfigSet_t *); 323 void rf_cleanup_config_set(RF_ConfigSet_t *); 324 int rf_have_enough_components(RF_ConfigSet_t *); 325 int rf_auto_config_set(RF_ConfigSet_t *, int *); 326 327 static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not 328 allow autoconfig to take place. 329 Note that this is overridden by having 330 RAID_AUTOCONFIG as an option in the 331 kernel config file. */ 332 333 struct RF_Pools_s rf_pools; 334 335 void 336 raidattach(int num) 337 { 338 int raidID; 339 int i, rc; 340 341 #ifdef DEBUG 342 printf("raidattach: Asked for %d units\n", num); 343 #endif 344 345 if (num <= 0) { 346 #ifdef DIAGNOSTIC 347 panic("raidattach: count <= 0"); 348 #endif 349 return; 350 } 351 /* This is where all the initialization stuff gets done. */ 352 353 numraid = num; 354 355 /* Make some space for requested number of units... */ 356 357 RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **)); 358 if (raidPtrs == NULL) { 359 panic("raidPtrs is NULL!!"); 360 } 361 362 /* Initialize the component buffer pool. */ 363 rf_pool_init(&rf_pools.cbuf, sizeof(struct raidbuf), 364 "raidpl", num * RAIDOUTSTANDING, 365 2 * num * RAIDOUTSTANDING); 366 367 rf_mutex_init(&rf_sparet_wait_mutex); 368 369 rf_sparet_wait_queue = rf_sparet_resp_queue = NULL; 370 371 for (i = 0; i < num; i++) 372 raidPtrs[i] = NULL; 373 rc = rf_BootRaidframe(); 374 if (rc == 0) 375 printf("Kernelized RAIDframe activated\n"); 376 else 377 panic("Serious error booting RAID!!"); 378 379 /* put together some datastructures like the CCD device does.. This 380 * lets us lock the device and what-not when it gets opened. */ 381 382 raid_softc = (struct raid_softc *) 383 malloc(num * sizeof(struct raid_softc), 384 M_RAIDFRAME, M_NOWAIT); 385 if (raid_softc == NULL) { 386 printf("WARNING: no memory for RAIDframe driver\n"); 387 return; 388 } 389 390 memset(raid_softc, 0, num * sizeof(struct raid_softc)); 391 392 raidrootdev = (struct device *)malloc(num * sizeof(struct device), 393 M_RAIDFRAME, M_NOWAIT); 394 if (raidrootdev == NULL) { 395 panic("No memory for RAIDframe driver!!?!?!"); 396 } 397 398 for (raidID = 0; raidID < num; raidID++) { 399 bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS); 400 401 raidrootdev[raidID].dv_class = DV_DISK; 402 raidrootdev[raidID].dv_cfdata = NULL; 403 raidrootdev[raidID].dv_unit = raidID; 404 raidrootdev[raidID].dv_parent = NULL; 405 raidrootdev[raidID].dv_flags = 0; 406 snprintf(raidrootdev[raidID].dv_xname, 407 sizeof(raidrootdev[raidID].dv_xname), "raid%d", raidID); 408 409 RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t), 410 (RF_Raid_t *)); 411 if (raidPtrs[raidID] == NULL) { 412 printf("WARNING: raidPtrs[%d] is NULL\n", raidID); 413 numraid = raidID; 414 return; 415 } 416 } 417 418 #ifdef RAID_AUTOCONFIG 419 raidautoconfig = 1; 420 #endif 421 422 /* 423 * Register a finalizer which will be used to auto-config RAID 424 * sets once all real hardware devices have been found. 425 */ 426 if (config_finalize_register(NULL, rf_autoconfig) != 0) 427 printf("WARNING: unable to register RAIDframe finalizer\n"); 428 } 429 430 int 431 rf_autoconfig(struct device *self) 432 { 433 RF_AutoConfig_t *ac_list; 434 RF_ConfigSet_t *config_sets; 435 436 if (raidautoconfig == 0) 437 return (0); 438 439 /* XXX This code can only be run once. */ 440 raidautoconfig = 0; 441 442 /* 1. locate all RAID components on the system */ 443 #ifdef DEBUG 444 printf("Searching for RAID components...\n"); 445 #endif 446 ac_list = rf_find_raid_components(); 447 448 /* 2. Sort them into their respective sets. */ 449 config_sets = rf_create_auto_sets(ac_list); 450 451 /* 452 * 3. Evaluate each set andconfigure the valid ones. 453 * This gets done in rf_buildroothack(). 454 */ 455 rf_buildroothack(config_sets); 456 457 return (1); 458 } 459 460 void 461 rf_buildroothack(RF_ConfigSet_t *config_sets) 462 { 463 RF_ConfigSet_t *cset; 464 RF_ConfigSet_t *next_cset; 465 int retcode; 466 int raidID; 467 int rootID; 468 int num_root; 469 470 rootID = 0; 471 num_root = 0; 472 cset = config_sets; 473 while(cset != NULL ) { 474 next_cset = cset->next; 475 if (rf_have_enough_components(cset) && 476 cset->ac->clabel->autoconfigure==1) { 477 retcode = rf_auto_config_set(cset,&raidID); 478 if (!retcode) { 479 if (cset->rootable) { 480 rootID = raidID; 481 num_root++; 482 } 483 } else { 484 /* The autoconfig didn't work :( */ 485 #if DEBUG 486 printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID); 487 #endif 488 rf_release_all_vps(cset); 489 } 490 } else { 491 /* we're not autoconfiguring this set... 492 release the associated resources */ 493 rf_release_all_vps(cset); 494 } 495 /* cleanup */ 496 rf_cleanup_config_set(cset); 497 cset = next_cset; 498 } 499 500 /* we found something bootable... */ 501 502 if (num_root == 1) { 503 booted_device = &raidrootdev[rootID]; 504 } else if (num_root > 1) { 505 /* we can't guess.. require the user to answer... */ 506 boothowto |= RB_ASKNAME; 507 } 508 } 509 510 511 int 512 raidsize(dev_t dev) 513 { 514 struct raid_softc *rs; 515 struct disklabel *lp; 516 int part, unit, omask, size; 517 518 unit = raidunit(dev); 519 if (unit >= numraid) 520 return (-1); 521 rs = &raid_softc[unit]; 522 523 if ((rs->sc_flags & RAIDF_INITED) == 0) 524 return (-1); 525 526 part = DISKPART(dev); 527 omask = rs->sc_dkdev.dk_openmask & (1 << part); 528 lp = rs->sc_dkdev.dk_label; 529 530 if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc)) 531 return (-1); 532 533 if (lp->d_partitions[part].p_fstype != FS_SWAP) 534 size = -1; 535 else 536 size = lp->d_partitions[part].p_size * 537 (lp->d_secsize / DEV_BSIZE); 538 539 if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc)) 540 return (-1); 541 542 return (size); 543 544 } 545 546 int 547 raiddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size) 548 { 549 /* Not implemented. */ 550 return ENXIO; 551 } 552 /* ARGSUSED */ 553 int 554 raidopen(dev_t dev, int flags, int fmt, struct proc *p) 555 { 556 int unit = raidunit(dev); 557 struct raid_softc *rs; 558 struct disklabel *lp; 559 int part, pmask; 560 int error = 0; 561 562 if (unit >= numraid) 563 return (ENXIO); 564 rs = &raid_softc[unit]; 565 566 if ((error = raidlock(rs)) != 0) 567 return (error); 568 lp = rs->sc_dkdev.dk_label; 569 570 part = DISKPART(dev); 571 pmask = (1 << part); 572 573 if ((rs->sc_flags & RAIDF_INITED) && 574 (rs->sc_dkdev.dk_openmask == 0)) 575 raidgetdisklabel(dev); 576 577 /* make sure that this partition exists */ 578 579 if (part != RAW_PART) { 580 if (((rs->sc_flags & RAIDF_INITED) == 0) || 581 ((part >= lp->d_npartitions) || 582 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 583 error = ENXIO; 584 raidunlock(rs); 585 return (error); 586 } 587 } 588 /* Prevent this unit from being unconfigured while open. */ 589 switch (fmt) { 590 case S_IFCHR: 591 rs->sc_dkdev.dk_copenmask |= pmask; 592 break; 593 594 case S_IFBLK: 595 rs->sc_dkdev.dk_bopenmask |= pmask; 596 break; 597 } 598 599 if ((rs->sc_dkdev.dk_openmask == 0) && 600 ((rs->sc_flags & RAIDF_INITED) != 0)) { 601 /* First one... mark things as dirty... Note that we *MUST* 602 have done a configure before this. I DO NOT WANT TO BE 603 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED 604 THAT THEY BELONG TOGETHER!!!!! */ 605 /* XXX should check to see if we're only open for reading 606 here... If so, we needn't do this, but then need some 607 other way of keeping track of what's happened.. */ 608 609 rf_markalldirty( raidPtrs[unit] ); 610 } 611 612 613 rs->sc_dkdev.dk_openmask = 614 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 615 616 raidunlock(rs); 617 618 return (error); 619 620 621 } 622 /* ARGSUSED */ 623 int 624 raidclose(dev_t dev, int flags, int fmt, struct proc *p) 625 { 626 int unit = raidunit(dev); 627 struct raid_softc *rs; 628 int error = 0; 629 int part; 630 631 if (unit >= numraid) 632 return (ENXIO); 633 rs = &raid_softc[unit]; 634 635 if ((error = raidlock(rs)) != 0) 636 return (error); 637 638 part = DISKPART(dev); 639 640 /* ...that much closer to allowing unconfiguration... */ 641 switch (fmt) { 642 case S_IFCHR: 643 rs->sc_dkdev.dk_copenmask &= ~(1 << part); 644 break; 645 646 case S_IFBLK: 647 rs->sc_dkdev.dk_bopenmask &= ~(1 << part); 648 break; 649 } 650 rs->sc_dkdev.dk_openmask = 651 rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask; 652 653 if ((rs->sc_dkdev.dk_openmask == 0) && 654 ((rs->sc_flags & RAIDF_INITED) != 0)) { 655 /* Last one... device is not unconfigured yet. 656 Device shutdown has taken care of setting the 657 clean bits if RAIDF_INITED is not set 658 mark things as clean... */ 659 660 rf_update_component_labels(raidPtrs[unit], 661 RF_FINAL_COMPONENT_UPDATE); 662 if (doing_shutdown) { 663 /* last one, and we're going down, so 664 lights out for this RAID set too. */ 665 error = rf_Shutdown(raidPtrs[unit]); 666 667 /* It's no longer initialized... */ 668 rs->sc_flags &= ~RAIDF_INITED; 669 670 /* Detach the disk. */ 671 disk_detach(&rs->sc_dkdev); 672 } 673 } 674 675 raidunlock(rs); 676 return (0); 677 678 } 679 680 void 681 raidstrategy(struct buf *bp) 682 { 683 int s; 684 685 unsigned int raidID = raidunit(bp->b_dev); 686 RF_Raid_t *raidPtr; 687 struct raid_softc *rs = &raid_softc[raidID]; 688 int wlabel; 689 690 if ((rs->sc_flags & RAIDF_INITED) ==0) { 691 bp->b_error = ENXIO; 692 bp->b_flags |= B_ERROR; 693 bp->b_resid = bp->b_bcount; 694 biodone(bp); 695 return; 696 } 697 if (raidID >= numraid || !raidPtrs[raidID]) { 698 bp->b_error = ENODEV; 699 bp->b_flags |= B_ERROR; 700 bp->b_resid = bp->b_bcount; 701 biodone(bp); 702 return; 703 } 704 raidPtr = raidPtrs[raidID]; 705 if (!raidPtr->valid) { 706 bp->b_error = ENODEV; 707 bp->b_flags |= B_ERROR; 708 bp->b_resid = bp->b_bcount; 709 biodone(bp); 710 return; 711 } 712 if (bp->b_bcount == 0) { 713 db1_printf(("b_bcount is zero..\n")); 714 biodone(bp); 715 return; 716 } 717 718 /* 719 * Do bounds checking and adjust transfer. If there's an 720 * error, the bounds check will flag that for us. 721 */ 722 723 wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING); 724 if (DISKPART(bp->b_dev) != RAW_PART) 725 if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) { 726 db1_printf(("Bounds check failed!!:%d %d\n", 727 (int) bp->b_blkno, (int) wlabel)); 728 biodone(bp); 729 return; 730 } 731 s = splbio(); 732 733 bp->b_resid = 0; 734 735 /* stuff it onto our queue */ 736 BUFQ_PUT(&rs->buf_queue, bp); 737 738 raidstart(raidPtrs[raidID]); 739 740 splx(s); 741 } 742 /* ARGSUSED */ 743 int 744 raidread(dev_t dev, struct uio *uio, int flags) 745 { 746 int unit = raidunit(dev); 747 struct raid_softc *rs; 748 749 if (unit >= numraid) 750 return (ENXIO); 751 rs = &raid_softc[unit]; 752 753 if ((rs->sc_flags & RAIDF_INITED) == 0) 754 return (ENXIO); 755 756 return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio)); 757 758 } 759 /* ARGSUSED */ 760 int 761 raidwrite(dev_t dev, struct uio *uio, int flags) 762 { 763 int unit = raidunit(dev); 764 struct raid_softc *rs; 765 766 if (unit >= numraid) 767 return (ENXIO); 768 rs = &raid_softc[unit]; 769 770 if ((rs->sc_flags & RAIDF_INITED) == 0) 771 return (ENXIO); 772 773 return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio)); 774 775 } 776 777 int 778 raidioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) 779 { 780 int unit = raidunit(dev); 781 int error = 0; 782 int part, pmask; 783 struct raid_softc *rs; 784 RF_Config_t *k_cfg, *u_cfg; 785 RF_Raid_t *raidPtr; 786 RF_RaidDisk_t *diskPtr; 787 RF_AccTotals_t *totals; 788 RF_DeviceConfig_t *d_cfg, **ucfgp; 789 u_char *specific_buf; 790 int retcode = 0; 791 int column; 792 int raidid; 793 struct rf_recon_req *rrcopy, *rr; 794 RF_ComponentLabel_t *clabel; 795 RF_ComponentLabel_t ci_label; 796 RF_ComponentLabel_t **clabel_ptr; 797 RF_SingleComponent_t *sparePtr,*componentPtr; 798 RF_SingleComponent_t hot_spare; 799 RF_SingleComponent_t component; 800 RF_ProgressInfo_t progressInfo, **progressInfoPtr; 801 int i, j, d; 802 #ifdef __HAVE_OLD_DISKLABEL 803 struct disklabel newlabel; 804 #endif 805 806 if (unit >= numraid) 807 return (ENXIO); 808 rs = &raid_softc[unit]; 809 raidPtr = raidPtrs[unit]; 810 811 db1_printf(("raidioctl: %d %d %d %d\n", (int) dev, 812 (int) DISKPART(dev), (int) unit, (int) cmd)); 813 814 /* Must be open for writes for these commands... */ 815 switch (cmd) { 816 case DIOCSDINFO: 817 case DIOCWDINFO: 818 #ifdef __HAVE_OLD_DISKLABEL 819 case ODIOCWDINFO: 820 case ODIOCSDINFO: 821 #endif 822 case DIOCWLABEL: 823 if ((flag & FWRITE) == 0) 824 return (EBADF); 825 } 826 827 /* Must be initialized for these... */ 828 switch (cmd) { 829 case DIOCGDINFO: 830 case DIOCSDINFO: 831 case DIOCWDINFO: 832 #ifdef __HAVE_OLD_DISKLABEL 833 case ODIOCGDINFO: 834 case ODIOCWDINFO: 835 case ODIOCSDINFO: 836 case ODIOCGDEFLABEL: 837 #endif 838 case DIOCGPART: 839 case DIOCWLABEL: 840 case DIOCGDEFLABEL: 841 case RAIDFRAME_SHUTDOWN: 842 case RAIDFRAME_REWRITEPARITY: 843 case RAIDFRAME_GET_INFO: 844 case RAIDFRAME_RESET_ACCTOTALS: 845 case RAIDFRAME_GET_ACCTOTALS: 846 case RAIDFRAME_KEEP_ACCTOTALS: 847 case RAIDFRAME_GET_SIZE: 848 case RAIDFRAME_FAIL_DISK: 849 case RAIDFRAME_COPYBACK: 850 case RAIDFRAME_CHECK_RECON_STATUS: 851 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 852 case RAIDFRAME_GET_COMPONENT_LABEL: 853 case RAIDFRAME_SET_COMPONENT_LABEL: 854 case RAIDFRAME_ADD_HOT_SPARE: 855 case RAIDFRAME_REMOVE_HOT_SPARE: 856 case RAIDFRAME_INIT_LABELS: 857 case RAIDFRAME_REBUILD_IN_PLACE: 858 case RAIDFRAME_CHECK_PARITY: 859 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 860 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 861 case RAIDFRAME_CHECK_COPYBACK_STATUS: 862 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 863 case RAIDFRAME_SET_AUTOCONFIG: 864 case RAIDFRAME_SET_ROOT: 865 case RAIDFRAME_DELETE_COMPONENT: 866 case RAIDFRAME_INCORPORATE_HOT_SPARE: 867 if ((rs->sc_flags & RAIDF_INITED) == 0) 868 return (ENXIO); 869 } 870 871 switch (cmd) { 872 873 /* configure the system */ 874 case RAIDFRAME_CONFIGURE: 875 876 if (raidPtr->valid) { 877 /* There is a valid RAID set running on this unit! */ 878 printf("raid%d: Device already configured!\n",unit); 879 return(EINVAL); 880 } 881 882 /* copy-in the configuration information */ 883 /* data points to a pointer to the configuration structure */ 884 885 u_cfg = *((RF_Config_t **) data); 886 RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *)); 887 if (k_cfg == NULL) { 888 return (ENOMEM); 889 } 890 retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t)); 891 if (retcode) { 892 RF_Free(k_cfg, sizeof(RF_Config_t)); 893 db1_printf(("rf_ioctl: retcode=%d copyin.1\n", 894 retcode)); 895 return (retcode); 896 } 897 /* allocate a buffer for the layout-specific data, and copy it 898 * in */ 899 if (k_cfg->layoutSpecificSize) { 900 if (k_cfg->layoutSpecificSize > 10000) { 901 /* sanity check */ 902 RF_Free(k_cfg, sizeof(RF_Config_t)); 903 return (EINVAL); 904 } 905 RF_Malloc(specific_buf, k_cfg->layoutSpecificSize, 906 (u_char *)); 907 if (specific_buf == NULL) { 908 RF_Free(k_cfg, sizeof(RF_Config_t)); 909 return (ENOMEM); 910 } 911 retcode = copyin(k_cfg->layoutSpecific, specific_buf, 912 k_cfg->layoutSpecificSize); 913 if (retcode) { 914 RF_Free(k_cfg, sizeof(RF_Config_t)); 915 RF_Free(specific_buf, 916 k_cfg->layoutSpecificSize); 917 db1_printf(("rf_ioctl: retcode=%d copyin.2\n", 918 retcode)); 919 return (retcode); 920 } 921 } else 922 specific_buf = NULL; 923 k_cfg->layoutSpecific = specific_buf; 924 925 /* should do some kind of sanity check on the configuration. 926 * Store the sum of all the bytes in the last byte? */ 927 928 /* configure the system */ 929 930 /* 931 * Clear the entire RAID descriptor, just to make sure 932 * there is no stale data left in the case of a 933 * reconfiguration 934 */ 935 memset((char *) raidPtr, 0, sizeof(RF_Raid_t)); 936 raidPtr->raidid = unit; 937 938 retcode = rf_Configure(raidPtr, k_cfg, NULL); 939 940 if (retcode == 0) { 941 942 /* allow this many simultaneous IO's to 943 this RAID device */ 944 raidPtr->openings = RAIDOUTSTANDING; 945 946 raidinit(raidPtr); 947 rf_markalldirty(raidPtr); 948 } 949 /* free the buffers. No return code here. */ 950 if (k_cfg->layoutSpecificSize) { 951 RF_Free(specific_buf, k_cfg->layoutSpecificSize); 952 } 953 RF_Free(k_cfg, sizeof(RF_Config_t)); 954 955 return (retcode); 956 957 /* shutdown the system */ 958 case RAIDFRAME_SHUTDOWN: 959 960 if ((error = raidlock(rs)) != 0) 961 return (error); 962 963 /* 964 * If somebody has a partition mounted, we shouldn't 965 * shutdown. 966 */ 967 968 part = DISKPART(dev); 969 pmask = (1 << part); 970 if ((rs->sc_dkdev.dk_openmask & ~pmask) || 971 ((rs->sc_dkdev.dk_bopenmask & pmask) && 972 (rs->sc_dkdev.dk_copenmask & pmask))) { 973 raidunlock(rs); 974 return (EBUSY); 975 } 976 977 retcode = rf_Shutdown(raidPtr); 978 979 /* It's no longer initialized... */ 980 rs->sc_flags &= ~RAIDF_INITED; 981 982 /* Detach the disk. */ 983 disk_detach(&rs->sc_dkdev); 984 985 raidunlock(rs); 986 987 return (retcode); 988 case RAIDFRAME_GET_COMPONENT_LABEL: 989 clabel_ptr = (RF_ComponentLabel_t **) data; 990 /* need to read the component label for the disk indicated 991 by row,column in clabel */ 992 993 /* For practice, let's get it directly fromdisk, rather 994 than from the in-core copy */ 995 RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ), 996 (RF_ComponentLabel_t *)); 997 if (clabel == NULL) 998 return (ENOMEM); 999 1000 memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t)); 1001 1002 retcode = copyin( *clabel_ptr, clabel, 1003 sizeof(RF_ComponentLabel_t)); 1004 1005 if (retcode) { 1006 RF_Free( clabel, sizeof(RF_ComponentLabel_t)); 1007 return(retcode); 1008 } 1009 1010 clabel->row = 0; /* Don't allow looking at anything else.*/ 1011 1012 column = clabel->column; 1013 1014 if ((column < 0) || (column >= raidPtr->numCol + 1015 raidPtr->numSpare)) { 1016 RF_Free( clabel, sizeof(RF_ComponentLabel_t)); 1017 return(EINVAL); 1018 } 1019 1020 raidread_component_label(raidPtr->Disks[column].dev, 1021 raidPtr->raid_cinfo[column].ci_vp, 1022 clabel ); 1023 1024 retcode = copyout(clabel, *clabel_ptr, 1025 sizeof(RF_ComponentLabel_t)); 1026 RF_Free(clabel, sizeof(RF_ComponentLabel_t)); 1027 return (retcode); 1028 1029 case RAIDFRAME_SET_COMPONENT_LABEL: 1030 clabel = (RF_ComponentLabel_t *) data; 1031 1032 /* XXX check the label for valid stuff... */ 1033 /* Note that some things *should not* get modified -- 1034 the user should be re-initing the labels instead of 1035 trying to patch things. 1036 */ 1037 1038 raidid = raidPtr->raidid; 1039 #if DEBUG 1040 printf("raid%d: Got component label:\n", raidid); 1041 printf("raid%d: Version: %d\n", raidid, clabel->version); 1042 printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number); 1043 printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter); 1044 printf("raid%d: Column: %d\n", raidid, clabel->column); 1045 printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns); 1046 printf("raid%d: Clean: %d\n", raidid, clabel->clean); 1047 printf("raid%d: Status: %d\n", raidid, clabel->status); 1048 #endif 1049 clabel->row = 0; 1050 column = clabel->column; 1051 1052 if ((column < 0) || (column >= raidPtr->numCol)) { 1053 return(EINVAL); 1054 } 1055 1056 /* XXX this isn't allowed to do anything for now :-) */ 1057 1058 /* XXX and before it is, we need to fill in the rest 1059 of the fields!?!?!?! */ 1060 #if 0 1061 raidwrite_component_label( 1062 raidPtr->Disks[column].dev, 1063 raidPtr->raid_cinfo[column].ci_vp, 1064 clabel ); 1065 #endif 1066 return (0); 1067 1068 case RAIDFRAME_INIT_LABELS: 1069 clabel = (RF_ComponentLabel_t *) data; 1070 /* 1071 we only want the serial number from 1072 the above. We get all the rest of the information 1073 from the config that was used to create this RAID 1074 set. 1075 */ 1076 1077 raidPtr->serial_number = clabel->serial_number; 1078 1079 raid_init_component_label(raidPtr, &ci_label); 1080 ci_label.serial_number = clabel->serial_number; 1081 ci_label.row = 0; /* we dont' pretend to support more */ 1082 1083 for(column=0;column<raidPtr->numCol;column++) { 1084 diskPtr = &raidPtr->Disks[column]; 1085 if (!RF_DEAD_DISK(diskPtr->status)) { 1086 ci_label.partitionSize = diskPtr->partitionSize; 1087 ci_label.column = column; 1088 raidwrite_component_label( 1089 raidPtr->Disks[column].dev, 1090 raidPtr->raid_cinfo[column].ci_vp, 1091 &ci_label ); 1092 } 1093 } 1094 1095 return (retcode); 1096 case RAIDFRAME_SET_AUTOCONFIG: 1097 d = rf_set_autoconfig(raidPtr, *(int *) data); 1098 printf("raid%d: New autoconfig value is: %d\n", 1099 raidPtr->raidid, d); 1100 *(int *) data = d; 1101 return (retcode); 1102 1103 case RAIDFRAME_SET_ROOT: 1104 d = rf_set_rootpartition(raidPtr, *(int *) data); 1105 printf("raid%d: New rootpartition value is: %d\n", 1106 raidPtr->raidid, d); 1107 *(int *) data = d; 1108 return (retcode); 1109 1110 /* initialize all parity */ 1111 case RAIDFRAME_REWRITEPARITY: 1112 1113 if (raidPtr->Layout.map->faultsTolerated == 0) { 1114 /* Parity for RAID 0 is trivially correct */ 1115 raidPtr->parity_good = RF_RAID_CLEAN; 1116 return(0); 1117 } 1118 1119 if (raidPtr->parity_rewrite_in_progress == 1) { 1120 /* Re-write is already in progress! */ 1121 return(EINVAL); 1122 } 1123 1124 retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread, 1125 rf_RewriteParityThread, 1126 raidPtr,"raid_parity"); 1127 return (retcode); 1128 1129 1130 case RAIDFRAME_ADD_HOT_SPARE: 1131 sparePtr = (RF_SingleComponent_t *) data; 1132 memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t)); 1133 retcode = rf_add_hot_spare(raidPtr, &hot_spare); 1134 return(retcode); 1135 1136 case RAIDFRAME_REMOVE_HOT_SPARE: 1137 return(retcode); 1138 1139 case RAIDFRAME_DELETE_COMPONENT: 1140 componentPtr = (RF_SingleComponent_t *)data; 1141 memcpy( &component, componentPtr, 1142 sizeof(RF_SingleComponent_t)); 1143 retcode = rf_delete_component(raidPtr, &component); 1144 return(retcode); 1145 1146 case RAIDFRAME_INCORPORATE_HOT_SPARE: 1147 componentPtr = (RF_SingleComponent_t *)data; 1148 memcpy( &component, componentPtr, 1149 sizeof(RF_SingleComponent_t)); 1150 retcode = rf_incorporate_hot_spare(raidPtr, &component); 1151 return(retcode); 1152 1153 case RAIDFRAME_REBUILD_IN_PLACE: 1154 1155 if (raidPtr->Layout.map->faultsTolerated == 0) { 1156 /* Can't do this on a RAID 0!! */ 1157 return(EINVAL); 1158 } 1159 1160 if (raidPtr->recon_in_progress == 1) { 1161 /* a reconstruct is already in progress! */ 1162 return(EINVAL); 1163 } 1164 1165 componentPtr = (RF_SingleComponent_t *) data; 1166 memcpy( &component, componentPtr, 1167 sizeof(RF_SingleComponent_t)); 1168 component.row = 0; /* we don't support any more */ 1169 column = component.column; 1170 1171 if ((column < 0) || (column >= raidPtr->numCol)) { 1172 return(EINVAL); 1173 } 1174 1175 RF_LOCK_MUTEX(raidPtr->mutex); 1176 if ((raidPtr->Disks[column].status == rf_ds_optimal) && 1177 (raidPtr->numFailures > 0)) { 1178 /* XXX 0 above shouldn't be constant!!! */ 1179 /* some component other than this has failed. 1180 Let's not make things worse than they already 1181 are... */ 1182 printf("raid%d: Unable to reconstruct to disk at:\n", 1183 raidPtr->raidid); 1184 printf("raid%d: Col: %d Too many failures.\n", 1185 raidPtr->raidid, column); 1186 RF_UNLOCK_MUTEX(raidPtr->mutex); 1187 return (EINVAL); 1188 } 1189 if (raidPtr->Disks[column].status == 1190 rf_ds_reconstructing) { 1191 printf("raid%d: Unable to reconstruct to disk at:\n", 1192 raidPtr->raidid); 1193 printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column); 1194 1195 RF_UNLOCK_MUTEX(raidPtr->mutex); 1196 return (EINVAL); 1197 } 1198 if (raidPtr->Disks[column].status == rf_ds_spared) { 1199 RF_UNLOCK_MUTEX(raidPtr->mutex); 1200 return (EINVAL); 1201 } 1202 RF_UNLOCK_MUTEX(raidPtr->mutex); 1203 1204 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1205 if (rrcopy == NULL) 1206 return(ENOMEM); 1207 1208 rrcopy->raidPtr = (void *) raidPtr; 1209 rrcopy->col = column; 1210 1211 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1212 rf_ReconstructInPlaceThread, 1213 rrcopy,"raid_reconip"); 1214 return(retcode); 1215 1216 case RAIDFRAME_GET_INFO: 1217 if (!raidPtr->valid) 1218 return (ENODEV); 1219 ucfgp = (RF_DeviceConfig_t **) data; 1220 RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t), 1221 (RF_DeviceConfig_t *)); 1222 if (d_cfg == NULL) 1223 return (ENOMEM); 1224 memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t)); 1225 d_cfg->rows = 1; /* there is only 1 row now */ 1226 d_cfg->cols = raidPtr->numCol; 1227 d_cfg->ndevs = raidPtr->numCol; 1228 if (d_cfg->ndevs >= RF_MAX_DISKS) { 1229 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1230 return (ENOMEM); 1231 } 1232 d_cfg->nspares = raidPtr->numSpare; 1233 if (d_cfg->nspares >= RF_MAX_DISKS) { 1234 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1235 return (ENOMEM); 1236 } 1237 d_cfg->maxqdepth = raidPtr->maxQueueDepth; 1238 d = 0; 1239 for (j = 0; j < d_cfg->cols; j++) { 1240 d_cfg->devs[d] = raidPtr->Disks[j]; 1241 d++; 1242 } 1243 for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) { 1244 d_cfg->spares[i] = raidPtr->Disks[j]; 1245 } 1246 retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t)); 1247 RF_Free(d_cfg, sizeof(RF_DeviceConfig_t)); 1248 1249 return (retcode); 1250 1251 case RAIDFRAME_CHECK_PARITY: 1252 *(int *) data = raidPtr->parity_good; 1253 return (0); 1254 1255 case RAIDFRAME_RESET_ACCTOTALS: 1256 memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals)); 1257 return (0); 1258 1259 case RAIDFRAME_GET_ACCTOTALS: 1260 totals = (RF_AccTotals_t *) data; 1261 *totals = raidPtr->acc_totals; 1262 return (0); 1263 1264 case RAIDFRAME_KEEP_ACCTOTALS: 1265 raidPtr->keep_acc_totals = *(int *)data; 1266 return (0); 1267 1268 case RAIDFRAME_GET_SIZE: 1269 *(int *) data = raidPtr->totalSectors; 1270 return (0); 1271 1272 /* fail a disk & optionally start reconstruction */ 1273 case RAIDFRAME_FAIL_DISK: 1274 1275 if (raidPtr->Layout.map->faultsTolerated == 0) { 1276 /* Can't do this on a RAID 0!! */ 1277 return(EINVAL); 1278 } 1279 1280 rr = (struct rf_recon_req *) data; 1281 rr->row = 0; 1282 if (rr->col < 0 || rr->col >= raidPtr->numCol) 1283 return (EINVAL); 1284 1285 1286 RF_LOCK_MUTEX(raidPtr->mutex); 1287 if ((raidPtr->Disks[rr->col].status == 1288 rf_ds_optimal) && (raidPtr->numFailures > 0)) { 1289 /* some other component has failed. Let's not make 1290 things worse. XXX wrong for RAID6 */ 1291 RF_UNLOCK_MUTEX(raidPtr->mutex); 1292 return (EINVAL); 1293 } 1294 if (raidPtr->Disks[rr->col].status == rf_ds_spared) { 1295 /* Can't fail a spared disk! */ 1296 RF_UNLOCK_MUTEX(raidPtr->mutex); 1297 return (EINVAL); 1298 } 1299 RF_UNLOCK_MUTEX(raidPtr->mutex); 1300 1301 /* make a copy of the recon request so that we don't rely on 1302 * the user's buffer */ 1303 RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *)); 1304 if (rrcopy == NULL) 1305 return(ENOMEM); 1306 memcpy(rrcopy, rr, sizeof(*rr)); 1307 rrcopy->raidPtr = (void *) raidPtr; 1308 1309 retcode = RF_CREATE_THREAD(raidPtr->recon_thread, 1310 rf_ReconThread, 1311 rrcopy,"raid_recon"); 1312 return (0); 1313 1314 /* invoke a copyback operation after recon on whatever disk 1315 * needs it, if any */ 1316 case RAIDFRAME_COPYBACK: 1317 1318 if (raidPtr->Layout.map->faultsTolerated == 0) { 1319 /* This makes no sense on a RAID 0!! */ 1320 return(EINVAL); 1321 } 1322 1323 if (raidPtr->copyback_in_progress == 1) { 1324 /* Copyback is already in progress! */ 1325 return(EINVAL); 1326 } 1327 1328 retcode = RF_CREATE_THREAD(raidPtr->copyback_thread, 1329 rf_CopybackThread, 1330 raidPtr,"raid_copyback"); 1331 return (retcode); 1332 1333 /* return the percentage completion of reconstruction */ 1334 case RAIDFRAME_CHECK_RECON_STATUS: 1335 if (raidPtr->Layout.map->faultsTolerated == 0) { 1336 /* This makes no sense on a RAID 0, so tell the 1337 user it's done. */ 1338 *(int *) data = 100; 1339 return(0); 1340 } 1341 if (raidPtr->status != rf_rs_reconstructing) 1342 *(int *) data = 100; 1343 else { 1344 if (raidPtr->reconControl->numRUsTotal > 0) { 1345 *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal); 1346 } else { 1347 *(int *) data = 0; 1348 } 1349 } 1350 return (0); 1351 case RAIDFRAME_CHECK_RECON_STATUS_EXT: 1352 progressInfoPtr = (RF_ProgressInfo_t **) data; 1353 if (raidPtr->status != rf_rs_reconstructing) { 1354 progressInfo.remaining = 0; 1355 progressInfo.completed = 100; 1356 progressInfo.total = 100; 1357 } else { 1358 progressInfo.total = 1359 raidPtr->reconControl->numRUsTotal; 1360 progressInfo.completed = 1361 raidPtr->reconControl->numRUsComplete; 1362 progressInfo.remaining = progressInfo.total - 1363 progressInfo.completed; 1364 } 1365 retcode = copyout(&progressInfo, *progressInfoPtr, 1366 sizeof(RF_ProgressInfo_t)); 1367 return (retcode); 1368 1369 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS: 1370 if (raidPtr->Layout.map->faultsTolerated == 0) { 1371 /* This makes no sense on a RAID 0, so tell the 1372 user it's done. */ 1373 *(int *) data = 100; 1374 return(0); 1375 } 1376 if (raidPtr->parity_rewrite_in_progress == 1) { 1377 *(int *) data = 100 * 1378 raidPtr->parity_rewrite_stripes_done / 1379 raidPtr->Layout.numStripe; 1380 } else { 1381 *(int *) data = 100; 1382 } 1383 return (0); 1384 1385 case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT: 1386 progressInfoPtr = (RF_ProgressInfo_t **) data; 1387 if (raidPtr->parity_rewrite_in_progress == 1) { 1388 progressInfo.total = raidPtr->Layout.numStripe; 1389 progressInfo.completed = 1390 raidPtr->parity_rewrite_stripes_done; 1391 progressInfo.remaining = progressInfo.total - 1392 progressInfo.completed; 1393 } else { 1394 progressInfo.remaining = 0; 1395 progressInfo.completed = 100; 1396 progressInfo.total = 100; 1397 } 1398 retcode = copyout(&progressInfo, *progressInfoPtr, 1399 sizeof(RF_ProgressInfo_t)); 1400 return (retcode); 1401 1402 case RAIDFRAME_CHECK_COPYBACK_STATUS: 1403 if (raidPtr->Layout.map->faultsTolerated == 0) { 1404 /* This makes no sense on a RAID 0 */ 1405 *(int *) data = 100; 1406 return(0); 1407 } 1408 if (raidPtr->copyback_in_progress == 1) { 1409 *(int *) data = 100 * raidPtr->copyback_stripes_done / 1410 raidPtr->Layout.numStripe; 1411 } else { 1412 *(int *) data = 100; 1413 } 1414 return (0); 1415 1416 case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT: 1417 progressInfoPtr = (RF_ProgressInfo_t **) data; 1418 if (raidPtr->copyback_in_progress == 1) { 1419 progressInfo.total = raidPtr->Layout.numStripe; 1420 progressInfo.completed = 1421 raidPtr->copyback_stripes_done; 1422 progressInfo.remaining = progressInfo.total - 1423 progressInfo.completed; 1424 } else { 1425 progressInfo.remaining = 0; 1426 progressInfo.completed = 100; 1427 progressInfo.total = 100; 1428 } 1429 retcode = copyout(&progressInfo, *progressInfoPtr, 1430 sizeof(RF_ProgressInfo_t)); 1431 return (retcode); 1432 1433 /* the sparetable daemon calls this to wait for the kernel to 1434 * need a spare table. this ioctl does not return until a 1435 * spare table is needed. XXX -- calling mpsleep here in the 1436 * ioctl code is almost certainly wrong and evil. -- XXX XXX 1437 * -- I should either compute the spare table in the kernel, 1438 * or have a different -- XXX XXX -- interface (a different 1439 * character device) for delivering the table -- XXX */ 1440 #if 0 1441 case RAIDFRAME_SPARET_WAIT: 1442 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1443 while (!rf_sparet_wait_queue) 1444 mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE); 1445 waitreq = rf_sparet_wait_queue; 1446 rf_sparet_wait_queue = rf_sparet_wait_queue->next; 1447 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1448 1449 /* structure assignment */ 1450 *((RF_SparetWait_t *) data) = *waitreq; 1451 1452 RF_Free(waitreq, sizeof(*waitreq)); 1453 return (0); 1454 1455 /* wakes up a process waiting on SPARET_WAIT and puts an error 1456 * code in it that will cause the dameon to exit */ 1457 case RAIDFRAME_ABORT_SPARET_WAIT: 1458 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1459 waitreq->fcol = -1; 1460 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1461 waitreq->next = rf_sparet_wait_queue; 1462 rf_sparet_wait_queue = waitreq; 1463 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1464 wakeup(&rf_sparet_wait_queue); 1465 return (0); 1466 1467 /* used by the spare table daemon to deliver a spare table 1468 * into the kernel */ 1469 case RAIDFRAME_SEND_SPARET: 1470 1471 /* install the spare table */ 1472 retcode = rf_SetSpareTable(raidPtr, *(void **) data); 1473 1474 /* respond to the requestor. the return status of the spare 1475 * table installation is passed in the "fcol" field */ 1476 RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *)); 1477 waitreq->fcol = retcode; 1478 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1479 waitreq->next = rf_sparet_resp_queue; 1480 rf_sparet_resp_queue = waitreq; 1481 wakeup(&rf_sparet_resp_queue); 1482 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1483 1484 return (retcode); 1485 #endif 1486 1487 default: 1488 break; /* fall through to the os-specific code below */ 1489 1490 } 1491 1492 if (!raidPtr->valid) 1493 return (EINVAL); 1494 1495 /* 1496 * Add support for "regular" device ioctls here. 1497 */ 1498 1499 switch (cmd) { 1500 case DIOCGDINFO: 1501 *(struct disklabel *) data = *(rs->sc_dkdev.dk_label); 1502 break; 1503 #ifdef __HAVE_OLD_DISKLABEL 1504 case ODIOCGDINFO: 1505 newlabel = *(rs->sc_dkdev.dk_label); 1506 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1507 return ENOTTY; 1508 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1509 break; 1510 #endif 1511 1512 case DIOCGPART: 1513 ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label; 1514 ((struct partinfo *) data)->part = 1515 &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1516 break; 1517 1518 case DIOCWDINFO: 1519 case DIOCSDINFO: 1520 #ifdef __HAVE_OLD_DISKLABEL 1521 case ODIOCWDINFO: 1522 case ODIOCSDINFO: 1523 #endif 1524 { 1525 struct disklabel *lp; 1526 #ifdef __HAVE_OLD_DISKLABEL 1527 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1528 memset(&newlabel, 0, sizeof newlabel); 1529 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1530 lp = &newlabel; 1531 } else 1532 #endif 1533 lp = (struct disklabel *)data; 1534 1535 if ((error = raidlock(rs)) != 0) 1536 return (error); 1537 1538 rs->sc_flags |= RAIDF_LABELLING; 1539 1540 error = setdisklabel(rs->sc_dkdev.dk_label, 1541 lp, 0, rs->sc_dkdev.dk_cpulabel); 1542 if (error == 0) { 1543 if (cmd == DIOCWDINFO 1544 #ifdef __HAVE_OLD_DISKLABEL 1545 || cmd == ODIOCWDINFO 1546 #endif 1547 ) 1548 error = writedisklabel(RAIDLABELDEV(dev), 1549 raidstrategy, rs->sc_dkdev.dk_label, 1550 rs->sc_dkdev.dk_cpulabel); 1551 } 1552 rs->sc_flags &= ~RAIDF_LABELLING; 1553 1554 raidunlock(rs); 1555 1556 if (error) 1557 return (error); 1558 break; 1559 } 1560 1561 case DIOCWLABEL: 1562 if (*(int *) data != 0) 1563 rs->sc_flags |= RAIDF_WLABEL; 1564 else 1565 rs->sc_flags &= ~RAIDF_WLABEL; 1566 break; 1567 1568 case DIOCGDEFLABEL: 1569 raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data); 1570 break; 1571 1572 #ifdef __HAVE_OLD_DISKLABEL 1573 case ODIOCGDEFLABEL: 1574 raidgetdefaultlabel(raidPtr, rs, &newlabel); 1575 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1576 return ENOTTY; 1577 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1578 break; 1579 #endif 1580 1581 default: 1582 retcode = ENOTTY; 1583 } 1584 return (retcode); 1585 1586 } 1587 1588 1589 /* raidinit -- complete the rest of the initialization for the 1590 RAIDframe device. */ 1591 1592 1593 static void 1594 raidinit(RF_Raid_t *raidPtr) 1595 { 1596 struct raid_softc *rs; 1597 int unit; 1598 1599 unit = raidPtr->raidid; 1600 1601 rs = &raid_softc[unit]; 1602 1603 /* XXX should check return code first... */ 1604 rs->sc_flags |= RAIDF_INITED; 1605 1606 /* XXX doesn't check bounds. */ 1607 snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit); 1608 1609 rs->sc_dkdev.dk_name = rs->sc_xname; 1610 1611 /* disk_attach actually creates space for the CPU disklabel, among 1612 * other things, so it's critical to call this *BEFORE* we try putzing 1613 * with disklabels. */ 1614 1615 disk_attach(&rs->sc_dkdev); 1616 1617 /* XXX There may be a weird interaction here between this, and 1618 * protectedSectors, as used in RAIDframe. */ 1619 1620 rs->sc_size = raidPtr->totalSectors; 1621 } 1622 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) 1623 /* wake up the daemon & tell it to get us a spare table 1624 * XXX 1625 * the entries in the queues should be tagged with the raidPtr 1626 * so that in the extremely rare case that two recons happen at once, 1627 * we know for which device were requesting a spare table 1628 * XXX 1629 * 1630 * XXX This code is not currently used. GO 1631 */ 1632 int 1633 rf_GetSpareTableFromDaemon(RF_SparetWait_t *req) 1634 { 1635 int retcode; 1636 1637 RF_LOCK_MUTEX(rf_sparet_wait_mutex); 1638 req->next = rf_sparet_wait_queue; 1639 rf_sparet_wait_queue = req; 1640 wakeup(&rf_sparet_wait_queue); 1641 1642 /* mpsleep unlocks the mutex */ 1643 while (!rf_sparet_resp_queue) { 1644 tsleep(&rf_sparet_resp_queue, PRIBIO, 1645 "raidframe getsparetable", 0); 1646 } 1647 req = rf_sparet_resp_queue; 1648 rf_sparet_resp_queue = req->next; 1649 RF_UNLOCK_MUTEX(rf_sparet_wait_mutex); 1650 1651 retcode = req->fcol; 1652 RF_Free(req, sizeof(*req)); /* this is not the same req as we 1653 * alloc'd */ 1654 return (retcode); 1655 } 1656 #endif 1657 1658 /* a wrapper around rf_DoAccess that extracts appropriate info from the 1659 * bp & passes it down. 1660 * any calls originating in the kernel must use non-blocking I/O 1661 * do some extra sanity checking to return "appropriate" error values for 1662 * certain conditions (to make some standard utilities work) 1663 * 1664 * Formerly known as: rf_DoAccessKernel 1665 */ 1666 void 1667 raidstart(RF_Raid_t *raidPtr) 1668 { 1669 RF_SectorCount_t num_blocks, pb, sum; 1670 RF_RaidAddr_t raid_addr; 1671 struct partition *pp; 1672 daddr_t blocknum; 1673 int unit; 1674 struct raid_softc *rs; 1675 int do_async; 1676 struct buf *bp; 1677 1678 unit = raidPtr->raidid; 1679 rs = &raid_softc[unit]; 1680 1681 /* quick check to see if anything has died recently */ 1682 RF_LOCK_MUTEX(raidPtr->mutex); 1683 if (raidPtr->numNewFailures > 0) { 1684 RF_UNLOCK_MUTEX(raidPtr->mutex); 1685 rf_update_component_labels(raidPtr, 1686 RF_NORMAL_COMPONENT_UPDATE); 1687 RF_LOCK_MUTEX(raidPtr->mutex); 1688 raidPtr->numNewFailures--; 1689 } 1690 1691 /* Check to see if we're at the limit... */ 1692 while (raidPtr->openings > 0) { 1693 RF_UNLOCK_MUTEX(raidPtr->mutex); 1694 1695 /* get the next item, if any, from the queue */ 1696 if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) { 1697 /* nothing more to do */ 1698 return; 1699 } 1700 1701 /* Ok, for the bp we have here, bp->b_blkno is relative to the 1702 * partition.. Need to make it absolute to the underlying 1703 * device.. */ 1704 1705 blocknum = bp->b_blkno; 1706 if (DISKPART(bp->b_dev) != RAW_PART) { 1707 pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; 1708 blocknum += pp->p_offset; 1709 } 1710 1711 db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, 1712 (int) blocknum)); 1713 1714 db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount)); 1715 db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid)); 1716 1717 /* *THIS* is where we adjust what block we're going to... 1718 * but DO NOT TOUCH bp->b_blkno!!! */ 1719 raid_addr = blocknum; 1720 1721 num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector; 1722 pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0; 1723 sum = raid_addr + num_blocks + pb; 1724 if (1 || rf_debugKernelAccess) { 1725 db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n", 1726 (int) raid_addr, (int) sum, (int) num_blocks, 1727 (int) pb, (int) bp->b_resid)); 1728 } 1729 if ((sum > raidPtr->totalSectors) || (sum < raid_addr) 1730 || (sum < num_blocks) || (sum < pb)) { 1731 bp->b_error = ENOSPC; 1732 bp->b_flags |= B_ERROR; 1733 bp->b_resid = bp->b_bcount; 1734 biodone(bp); 1735 RF_LOCK_MUTEX(raidPtr->mutex); 1736 continue; 1737 } 1738 /* 1739 * XXX rf_DoAccess() should do this, not just DoAccessKernel() 1740 */ 1741 1742 if (bp->b_bcount & raidPtr->sectorMask) { 1743 bp->b_error = EINVAL; 1744 bp->b_flags |= B_ERROR; 1745 bp->b_resid = bp->b_bcount; 1746 biodone(bp); 1747 RF_LOCK_MUTEX(raidPtr->mutex); 1748 continue; 1749 1750 } 1751 db1_printf(("Calling DoAccess..\n")); 1752 1753 1754 RF_LOCK_MUTEX(raidPtr->mutex); 1755 raidPtr->openings--; 1756 RF_UNLOCK_MUTEX(raidPtr->mutex); 1757 1758 /* 1759 * Everything is async. 1760 */ 1761 do_async = 1; 1762 1763 disk_busy(&rs->sc_dkdev); 1764 1765 /* XXX we're still at splbio() here... do we *really* 1766 need to be? */ 1767 1768 /* don't ever condition on bp->b_flags & B_WRITE. 1769 * always condition on B_READ instead */ 1770 1771 bp->b_error = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ? 1772 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE, 1773 do_async, raid_addr, num_blocks, 1774 bp->b_data, bp, RF_DAG_NONBLOCKING_IO); 1775 1776 if (bp->b_error) { 1777 bp->b_flags |= B_ERROR; 1778 } 1779 1780 RF_LOCK_MUTEX(raidPtr->mutex); 1781 } 1782 RF_UNLOCK_MUTEX(raidPtr->mutex); 1783 } 1784 1785 1786 1787 1788 /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */ 1789 1790 int 1791 rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req) 1792 { 1793 int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE; 1794 struct buf *bp; 1795 struct raidbuf *raidbp = NULL; 1796 1797 req->queue = queue; 1798 1799 #if DIAGNOSTIC 1800 if (queue->raidPtr->raidid >= numraid) { 1801 printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid, 1802 numraid); 1803 panic("Invalid Unit number in rf_DispatchKernelIO"); 1804 } 1805 #endif 1806 1807 bp = req->bp; 1808 #if 1 1809 /* XXX when there is a physical disk failure, someone is passing us a 1810 * buffer that contains old stuff!! Attempt to deal with this problem 1811 * without taking a performance hit... (not sure where the real bug 1812 * is. It's buried in RAIDframe somewhere) :-( GO ) */ 1813 1814 if (bp->b_flags & B_ERROR) { 1815 bp->b_flags &= ~B_ERROR; 1816 } 1817 if (bp->b_error != 0) { 1818 bp->b_error = 0; 1819 } 1820 #endif 1821 raidbp = pool_get(&rf_pools.cbuf, PR_NOWAIT); 1822 if (raidbp == NULL) { 1823 bp->b_flags |= B_ERROR; 1824 bp->b_error = ENOMEM; 1825 return (ENOMEM); 1826 } 1827 BUF_INIT(&raidbp->rf_buf); 1828 1829 /* 1830 * context for raidiodone 1831 */ 1832 raidbp->rf_obp = bp; 1833 raidbp->req = req; 1834 1835 BIO_COPYPRIO(&raidbp->rf_buf, bp); 1836 1837 switch (req->type) { 1838 case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */ 1839 /* XXX need to do something extra here.. */ 1840 /* I'm leaving this in, as I've never actually seen it used, 1841 * and I'd like folks to report it... GO */ 1842 printf(("WAKEUP CALLED\n")); 1843 queue->numOutstanding++; 1844 1845 /* XXX need to glue the original buffer into this?? */ 1846 1847 KernelWakeupFunc(&raidbp->rf_buf); 1848 break; 1849 1850 case RF_IO_TYPE_READ: 1851 case RF_IO_TYPE_WRITE: 1852 #if RF_ACC_TRACE > 0 1853 if (req->tracerec) { 1854 RF_ETIMER_START(req->tracerec->timer); 1855 } 1856 #endif 1857 InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp, 1858 op | bp->b_flags, queue->rf_cinfo->ci_dev, 1859 req->sectorOffset, req->numSector, 1860 req->buf, KernelWakeupFunc, (void *) req, 1861 queue->raidPtr->logBytesPerSector, req->b_proc); 1862 1863 if (rf_debugKernelAccess) { 1864 db1_printf(("dispatch: bp->b_blkno = %ld\n", 1865 (long) bp->b_blkno)); 1866 } 1867 queue->numOutstanding++; 1868 queue->last_deq_sector = req->sectorOffset; 1869 /* acc wouldn't have been let in if there were any pending 1870 * reqs at any other priority */ 1871 queue->curPriority = req->priority; 1872 1873 db1_printf(("Going for %c to unit %d col %d\n", 1874 req->type, queue->raidPtr->raidid, 1875 queue->col)); 1876 db1_printf(("sector %d count %d (%d bytes) %d\n", 1877 (int) req->sectorOffset, (int) req->numSector, 1878 (int) (req->numSector << 1879 queue->raidPtr->logBytesPerSector), 1880 (int) queue->raidPtr->logBytesPerSector)); 1881 if ((raidbp->rf_buf.b_flags & B_READ) == 0) { 1882 raidbp->rf_buf.b_vp->v_numoutput++; 1883 } 1884 VOP_STRATEGY(raidbp->rf_buf.b_vp, &raidbp->rf_buf); 1885 1886 break; 1887 1888 default: 1889 panic("bad req->type in rf_DispatchKernelIO"); 1890 } 1891 db1_printf(("Exiting from DispatchKernelIO\n")); 1892 1893 return (0); 1894 } 1895 /* this is the callback function associated with a I/O invoked from 1896 kernel code. 1897 */ 1898 static void 1899 KernelWakeupFunc(struct buf *vbp) 1900 { 1901 RF_DiskQueueData_t *req = NULL; 1902 RF_DiskQueue_t *queue; 1903 struct raidbuf *raidbp = (struct raidbuf *) vbp; 1904 struct buf *bp; 1905 int s; 1906 1907 s = splbio(); 1908 db1_printf(("recovering the request queue:\n")); 1909 req = raidbp->req; 1910 1911 bp = raidbp->rf_obp; 1912 1913 queue = (RF_DiskQueue_t *) req->queue; 1914 1915 if (raidbp->rf_buf.b_flags & B_ERROR) { 1916 bp->b_flags |= B_ERROR; 1917 bp->b_error = raidbp->rf_buf.b_error ? 1918 raidbp->rf_buf.b_error : EIO; 1919 } 1920 1921 /* XXX methinks this could be wrong... */ 1922 #if 1 1923 bp->b_resid = raidbp->rf_buf.b_resid; 1924 #endif 1925 #if RF_ACC_TRACE > 0 1926 if (req->tracerec) { 1927 RF_ETIMER_STOP(req->tracerec->timer); 1928 RF_ETIMER_EVAL(req->tracerec->timer); 1929 RF_LOCK_MUTEX(rf_tracing_mutex); 1930 req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer); 1931 req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer); 1932 req->tracerec->num_phys_ios++; 1933 RF_UNLOCK_MUTEX(rf_tracing_mutex); 1934 } 1935 #endif 1936 bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */ 1937 1938 /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go 1939 * ballistic, and mark the component as hosed... */ 1940 1941 if (bp->b_flags & B_ERROR) { 1942 /* Mark the disk as dead */ 1943 /* but only mark it once... */ 1944 if (queue->raidPtr->Disks[queue->col].status == 1945 rf_ds_optimal) { 1946 printf("raid%d: IO Error. Marking %s as failed.\n", 1947 queue->raidPtr->raidid, 1948 queue->raidPtr->Disks[queue->col].devname); 1949 queue->raidPtr->Disks[queue->col].status = 1950 rf_ds_failed; 1951 queue->raidPtr->status = rf_rs_degraded; 1952 queue->raidPtr->numFailures++; 1953 queue->raidPtr->numNewFailures++; 1954 } else { /* Disk is already dead... */ 1955 /* printf("Disk already marked as dead!\n"); */ 1956 } 1957 1958 } 1959 1960 pool_put(&rf_pools.cbuf, raidbp); 1961 1962 /* Fill in the error value */ 1963 1964 req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0; 1965 1966 simple_lock(&queue->raidPtr->iodone_lock); 1967 1968 /* Drop this one on the "finished" queue... */ 1969 TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries); 1970 1971 /* Let the raidio thread know there is work to be done. */ 1972 wakeup(&(queue->raidPtr->iodone)); 1973 1974 simple_unlock(&queue->raidPtr->iodone_lock); 1975 1976 splx(s); 1977 } 1978 1979 1980 1981 /* 1982 * initialize a buf structure for doing an I/O in the kernel. 1983 */ 1984 static void 1985 InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev, 1986 RF_SectorNum_t startSect, RF_SectorCount_t numSect, caddr_t buf, 1987 void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector, 1988 struct proc *b_proc) 1989 { 1990 /* bp->b_flags = B_PHYS | rw_flag; */ 1991 bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */ 1992 bp->b_bcount = numSect << logBytesPerSector; 1993 bp->b_bufsize = bp->b_bcount; 1994 bp->b_error = 0; 1995 bp->b_dev = dev; 1996 bp->b_data = buf; 1997 bp->b_blkno = startSect; 1998 bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */ 1999 if (bp->b_bcount == 0) { 2000 panic("bp->b_bcount is zero in InitBP!!"); 2001 } 2002 bp->b_proc = b_proc; 2003 bp->b_iodone = cbFunc; 2004 bp->b_vp = b_vp; 2005 2006 } 2007 2008 static void 2009 raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs, 2010 struct disklabel *lp) 2011 { 2012 memset(lp, 0, sizeof(*lp)); 2013 2014 /* fabricate a label... */ 2015 lp->d_secperunit = raidPtr->totalSectors; 2016 lp->d_secsize = raidPtr->bytesPerSector; 2017 lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe; 2018 lp->d_ntracks = 4 * raidPtr->numCol; 2019 lp->d_ncylinders = raidPtr->totalSectors / 2020 (lp->d_nsectors * lp->d_ntracks); 2021 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 2022 2023 strncpy(lp->d_typename, "raid", sizeof(lp->d_typename)); 2024 lp->d_type = DTYPE_RAID; 2025 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 2026 lp->d_rpm = 3600; 2027 lp->d_interleave = 1; 2028 lp->d_flags = 0; 2029 2030 lp->d_partitions[RAW_PART].p_offset = 0; 2031 lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors; 2032 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 2033 lp->d_npartitions = RAW_PART + 1; 2034 2035 lp->d_magic = DISKMAGIC; 2036 lp->d_magic2 = DISKMAGIC; 2037 lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label); 2038 2039 } 2040 /* 2041 * Read the disklabel from the raid device. If one is not present, fake one 2042 * up. 2043 */ 2044 static void 2045 raidgetdisklabel(dev_t dev) 2046 { 2047 int unit = raidunit(dev); 2048 struct raid_softc *rs = &raid_softc[unit]; 2049 const char *errstring; 2050 struct disklabel *lp = rs->sc_dkdev.dk_label; 2051 struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel; 2052 RF_Raid_t *raidPtr; 2053 2054 db1_printf(("Getting the disklabel...\n")); 2055 2056 memset(clp, 0, sizeof(*clp)); 2057 2058 raidPtr = raidPtrs[unit]; 2059 2060 raidgetdefaultlabel(raidPtr, rs, lp); 2061 2062 /* 2063 * Call the generic disklabel extraction routine. 2064 */ 2065 errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy, 2066 rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel); 2067 if (errstring) 2068 raidmakedisklabel(rs); 2069 else { 2070 int i; 2071 struct partition *pp; 2072 2073 /* 2074 * Sanity check whether the found disklabel is valid. 2075 * 2076 * This is necessary since total size of the raid device 2077 * may vary when an interleave is changed even though exactly 2078 * same componets are used, and old disklabel may used 2079 * if that is found. 2080 */ 2081 if (lp->d_secperunit != rs->sc_size) 2082 printf("raid%d: WARNING: %s: " 2083 "total sector size in disklabel (%d) != " 2084 "the size of raid (%ld)\n", unit, rs->sc_xname, 2085 lp->d_secperunit, (long) rs->sc_size); 2086 for (i = 0; i < lp->d_npartitions; i++) { 2087 pp = &lp->d_partitions[i]; 2088 if (pp->p_offset + pp->p_size > rs->sc_size) 2089 printf("raid%d: WARNING: %s: end of partition `%c' " 2090 "exceeds the size of raid (%ld)\n", 2091 unit, rs->sc_xname, 'a' + i, (long) rs->sc_size); 2092 } 2093 } 2094 2095 } 2096 /* 2097 * Take care of things one might want to take care of in the event 2098 * that a disklabel isn't present. 2099 */ 2100 static void 2101 raidmakedisklabel(struct raid_softc *rs) 2102 { 2103 struct disklabel *lp = rs->sc_dkdev.dk_label; 2104 db1_printf(("Making a label..\n")); 2105 2106 /* 2107 * For historical reasons, if there's no disklabel present 2108 * the raw partition must be marked FS_BSDFFS. 2109 */ 2110 2111 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 2112 2113 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 2114 2115 lp->d_checksum = dkcksum(lp); 2116 } 2117 /* 2118 * Lookup the provided name in the filesystem. If the file exists, 2119 * is a valid block device, and isn't being used by anyone else, 2120 * set *vpp to the file's vnode. 2121 * You'll find the original of this in ccd.c 2122 */ 2123 int 2124 raidlookup(char *path, struct proc *p, struct vnode **vpp) 2125 { 2126 struct nameidata nd; 2127 struct vnode *vp; 2128 struct vattr va; 2129 int error; 2130 2131 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p); 2132 if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) { 2133 return (error); 2134 } 2135 vp = nd.ni_vp; 2136 if (vp->v_usecount > 1) { 2137 VOP_UNLOCK(vp, 0); 2138 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p); 2139 return (EBUSY); 2140 } 2141 if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) { 2142 VOP_UNLOCK(vp, 0); 2143 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p); 2144 return (error); 2145 } 2146 /* XXX: eventually we should handle VREG, too. */ 2147 if (va.va_type != VBLK) { 2148 VOP_UNLOCK(vp, 0); 2149 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p); 2150 return (ENOTBLK); 2151 } 2152 VOP_UNLOCK(vp, 0); 2153 *vpp = vp; 2154 return (0); 2155 } 2156 /* 2157 * Wait interruptibly for an exclusive lock. 2158 * 2159 * XXX 2160 * Several drivers do this; it should be abstracted and made MP-safe. 2161 * (Hmm... where have we seen this warning before :-> GO ) 2162 */ 2163 static int 2164 raidlock(struct raid_softc *rs) 2165 { 2166 int error; 2167 2168 while ((rs->sc_flags & RAIDF_LOCKED) != 0) { 2169 rs->sc_flags |= RAIDF_WANTED; 2170 if ((error = 2171 tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0) 2172 return (error); 2173 } 2174 rs->sc_flags |= RAIDF_LOCKED; 2175 return (0); 2176 } 2177 /* 2178 * Unlock and wake up any waiters. 2179 */ 2180 static void 2181 raidunlock(struct raid_softc *rs) 2182 { 2183 2184 rs->sc_flags &= ~RAIDF_LOCKED; 2185 if ((rs->sc_flags & RAIDF_WANTED) != 0) { 2186 rs->sc_flags &= ~RAIDF_WANTED; 2187 wakeup(rs); 2188 } 2189 } 2190 2191 2192 #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */ 2193 #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */ 2194 2195 int 2196 raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter) 2197 { 2198 RF_ComponentLabel_t clabel; 2199 raidread_component_label(dev, b_vp, &clabel); 2200 clabel.mod_counter = mod_counter; 2201 clabel.clean = RF_RAID_CLEAN; 2202 raidwrite_component_label(dev, b_vp, &clabel); 2203 return(0); 2204 } 2205 2206 2207 int 2208 raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter) 2209 { 2210 RF_ComponentLabel_t clabel; 2211 raidread_component_label(dev, b_vp, &clabel); 2212 clabel.mod_counter = mod_counter; 2213 clabel.clean = RF_RAID_DIRTY; 2214 raidwrite_component_label(dev, b_vp, &clabel); 2215 return(0); 2216 } 2217 2218 /* ARGSUSED */ 2219 int 2220 raidread_component_label(dev_t dev, struct vnode *b_vp, 2221 RF_ComponentLabel_t *clabel) 2222 { 2223 struct buf *bp; 2224 const struct bdevsw *bdev; 2225 int error; 2226 2227 /* XXX should probably ensure that we don't try to do this if 2228 someone has changed rf_protected_sectors. */ 2229 2230 if (b_vp == NULL) { 2231 /* For whatever reason, this component is not valid. 2232 Don't try to read a component label from it. */ 2233 return(EINVAL); 2234 } 2235 2236 /* get a block of the appropriate size... */ 2237 bp = geteblk((int)RF_COMPONENT_INFO_SIZE); 2238 bp->b_dev = dev; 2239 2240 /* get our ducks in a row for the read */ 2241 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE; 2242 bp->b_bcount = RF_COMPONENT_INFO_SIZE; 2243 bp->b_flags |= B_READ; 2244 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE; 2245 2246 bdev = bdevsw_lookup(bp->b_dev); 2247 if (bdev == NULL) 2248 return (ENXIO); 2249 (*bdev->d_strategy)(bp); 2250 2251 error = biowait(bp); 2252 2253 if (!error) { 2254 memcpy(clabel, bp->b_data, 2255 sizeof(RF_ComponentLabel_t)); 2256 } 2257 2258 brelse(bp); 2259 return(error); 2260 } 2261 /* ARGSUSED */ 2262 int 2263 raidwrite_component_label(dev_t dev, struct vnode *b_vp, 2264 RF_ComponentLabel_t *clabel) 2265 { 2266 struct buf *bp; 2267 const struct bdevsw *bdev; 2268 int error; 2269 2270 /* get a block of the appropriate size... */ 2271 bp = geteblk((int)RF_COMPONENT_INFO_SIZE); 2272 bp->b_dev = dev; 2273 2274 /* get our ducks in a row for the write */ 2275 bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE; 2276 bp->b_bcount = RF_COMPONENT_INFO_SIZE; 2277 bp->b_flags |= B_WRITE; 2278 bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE; 2279 2280 memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE ); 2281 2282 memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t)); 2283 2284 bdev = bdevsw_lookup(bp->b_dev); 2285 if (bdev == NULL) 2286 return (ENXIO); 2287 (*bdev->d_strategy)(bp); 2288 error = biowait(bp); 2289 brelse(bp); 2290 if (error) { 2291 #if 1 2292 printf("Failed to write RAID component info!\n"); 2293 #endif 2294 } 2295 2296 return(error); 2297 } 2298 2299 void 2300 rf_markalldirty(RF_Raid_t *raidPtr) 2301 { 2302 RF_ComponentLabel_t clabel; 2303 int sparecol; 2304 int c; 2305 int j; 2306 int scol = -1; 2307 2308 raidPtr->mod_counter++; 2309 for (c = 0; c < raidPtr->numCol; c++) { 2310 /* we don't want to touch (at all) a disk that has 2311 failed */ 2312 if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) { 2313 raidread_component_label( 2314 raidPtr->Disks[c].dev, 2315 raidPtr->raid_cinfo[c].ci_vp, 2316 &clabel); 2317 if (clabel.status == rf_ds_spared) { 2318 /* XXX do something special... 2319 but whatever you do, don't 2320 try to access it!! */ 2321 } else { 2322 raidmarkdirty( 2323 raidPtr->Disks[c].dev, 2324 raidPtr->raid_cinfo[c].ci_vp, 2325 raidPtr->mod_counter); 2326 } 2327 } 2328 } 2329 2330 for( c = 0; c < raidPtr->numSpare ; c++) { 2331 sparecol = raidPtr->numCol + c; 2332 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2333 /* 2334 2335 we claim this disk is "optimal" if it's 2336 rf_ds_used_spare, as that means it should be 2337 directly substitutable for the disk it replaced. 2338 We note that too... 2339 2340 */ 2341 2342 for(j=0;j<raidPtr->numCol;j++) { 2343 if (raidPtr->Disks[j].spareCol == sparecol) { 2344 scol = j; 2345 break; 2346 } 2347 } 2348 2349 raidread_component_label( 2350 raidPtr->Disks[sparecol].dev, 2351 raidPtr->raid_cinfo[sparecol].ci_vp, 2352 &clabel); 2353 /* make sure status is noted */ 2354 2355 raid_init_component_label(raidPtr, &clabel); 2356 2357 clabel.row = 0; 2358 clabel.column = scol; 2359 /* Note: we *don't* change status from rf_ds_used_spare 2360 to rf_ds_optimal */ 2361 /* clabel.status = rf_ds_optimal; */ 2362 2363 raidmarkdirty(raidPtr->Disks[sparecol].dev, 2364 raidPtr->raid_cinfo[sparecol].ci_vp, 2365 raidPtr->mod_counter); 2366 } 2367 } 2368 } 2369 2370 2371 void 2372 rf_update_component_labels(RF_Raid_t *raidPtr, int final) 2373 { 2374 RF_ComponentLabel_t clabel; 2375 int sparecol; 2376 int c; 2377 int j; 2378 int scol; 2379 2380 scol = -1; 2381 2382 /* XXX should do extra checks to make sure things really are clean, 2383 rather than blindly setting the clean bit... */ 2384 2385 raidPtr->mod_counter++; 2386 2387 for (c = 0; c < raidPtr->numCol; c++) { 2388 if (raidPtr->Disks[c].status == rf_ds_optimal) { 2389 raidread_component_label( 2390 raidPtr->Disks[c].dev, 2391 raidPtr->raid_cinfo[c].ci_vp, 2392 &clabel); 2393 /* make sure status is noted */ 2394 clabel.status = rf_ds_optimal; 2395 /* bump the counter */ 2396 clabel.mod_counter = raidPtr->mod_counter; 2397 2398 raidwrite_component_label( 2399 raidPtr->Disks[c].dev, 2400 raidPtr->raid_cinfo[c].ci_vp, 2401 &clabel); 2402 if (final == RF_FINAL_COMPONENT_UPDATE) { 2403 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2404 raidmarkclean( 2405 raidPtr->Disks[c].dev, 2406 raidPtr->raid_cinfo[c].ci_vp, 2407 raidPtr->mod_counter); 2408 } 2409 } 2410 } 2411 /* else we don't touch it.. */ 2412 } 2413 2414 for( c = 0; c < raidPtr->numSpare ; c++) { 2415 sparecol = raidPtr->numCol + c; 2416 /* Need to ensure that the reconstruct actually completed! */ 2417 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 2418 /* 2419 2420 we claim this disk is "optimal" if it's 2421 rf_ds_used_spare, as that means it should be 2422 directly substitutable for the disk it replaced. 2423 We note that too... 2424 2425 */ 2426 2427 for(j=0;j<raidPtr->numCol;j++) { 2428 if (raidPtr->Disks[j].spareCol == sparecol) { 2429 scol = j; 2430 break; 2431 } 2432 } 2433 2434 /* XXX shouldn't *really* need this... */ 2435 raidread_component_label( 2436 raidPtr->Disks[sparecol].dev, 2437 raidPtr->raid_cinfo[sparecol].ci_vp, 2438 &clabel); 2439 /* make sure status is noted */ 2440 2441 raid_init_component_label(raidPtr, &clabel); 2442 2443 clabel.mod_counter = raidPtr->mod_counter; 2444 clabel.column = scol; 2445 clabel.status = rf_ds_optimal; 2446 2447 raidwrite_component_label( 2448 raidPtr->Disks[sparecol].dev, 2449 raidPtr->raid_cinfo[sparecol].ci_vp, 2450 &clabel); 2451 if (final == RF_FINAL_COMPONENT_UPDATE) { 2452 if (raidPtr->parity_good == RF_RAID_CLEAN) { 2453 raidmarkclean( raidPtr->Disks[sparecol].dev, 2454 raidPtr->raid_cinfo[sparecol].ci_vp, 2455 raidPtr->mod_counter); 2456 } 2457 } 2458 } 2459 } 2460 } 2461 2462 void 2463 rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured) 2464 { 2465 struct proc *p; 2466 2467 p = raidPtr->engine_thread; 2468 2469 if (vp != NULL) { 2470 if (auto_configured == 1) { 2471 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2472 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0); 2473 vput(vp); 2474 2475 } else { 2476 (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p); 2477 } 2478 } 2479 } 2480 2481 2482 void 2483 rf_UnconfigureVnodes(RF_Raid_t *raidPtr) 2484 { 2485 int r,c; 2486 struct vnode *vp; 2487 int acd; 2488 2489 2490 /* We take this opportunity to close the vnodes like we should.. */ 2491 2492 for (c = 0; c < raidPtr->numCol; c++) { 2493 vp = raidPtr->raid_cinfo[c].ci_vp; 2494 acd = raidPtr->Disks[c].auto_configured; 2495 rf_close_component(raidPtr, vp, acd); 2496 raidPtr->raid_cinfo[c].ci_vp = NULL; 2497 raidPtr->Disks[c].auto_configured = 0; 2498 } 2499 2500 for (r = 0; r < raidPtr->numSpare; r++) { 2501 vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp; 2502 acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured; 2503 rf_close_component(raidPtr, vp, acd); 2504 raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL; 2505 raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0; 2506 } 2507 } 2508 2509 2510 void 2511 rf_ReconThread(struct rf_recon_req *req) 2512 { 2513 int s; 2514 RF_Raid_t *raidPtr; 2515 2516 s = splbio(); 2517 raidPtr = (RF_Raid_t *) req->raidPtr; 2518 raidPtr->recon_in_progress = 1; 2519 2520 rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col, 2521 ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0)); 2522 2523 RF_Free(req, sizeof(*req)); 2524 2525 raidPtr->recon_in_progress = 0; 2526 splx(s); 2527 2528 /* That's all... */ 2529 kthread_exit(0); /* does not return */ 2530 } 2531 2532 void 2533 rf_RewriteParityThread(RF_Raid_t *raidPtr) 2534 { 2535 int retcode; 2536 int s; 2537 2538 raidPtr->parity_rewrite_in_progress = 1; 2539 s = splbio(); 2540 retcode = rf_RewriteParity(raidPtr); 2541 splx(s); 2542 if (retcode) { 2543 printf("raid%d: Error re-writing parity!\n",raidPtr->raidid); 2544 } else { 2545 /* set the clean bit! If we shutdown correctly, 2546 the clean bit on each component label will get 2547 set */ 2548 raidPtr->parity_good = RF_RAID_CLEAN; 2549 } 2550 raidPtr->parity_rewrite_in_progress = 0; 2551 2552 /* Anyone waiting for us to stop? If so, inform them... */ 2553 if (raidPtr->waitShutdown) { 2554 wakeup(&raidPtr->parity_rewrite_in_progress); 2555 } 2556 2557 /* That's all... */ 2558 kthread_exit(0); /* does not return */ 2559 } 2560 2561 2562 void 2563 rf_CopybackThread(RF_Raid_t *raidPtr) 2564 { 2565 int s; 2566 2567 raidPtr->copyback_in_progress = 1; 2568 s = splbio(); 2569 rf_CopybackReconstructedData(raidPtr); 2570 splx(s); 2571 raidPtr->copyback_in_progress = 0; 2572 2573 /* That's all... */ 2574 kthread_exit(0); /* does not return */ 2575 } 2576 2577 2578 void 2579 rf_ReconstructInPlaceThread(struct rf_recon_req *req) 2580 { 2581 int s; 2582 RF_Raid_t *raidPtr; 2583 2584 s = splbio(); 2585 raidPtr = req->raidPtr; 2586 raidPtr->recon_in_progress = 1; 2587 rf_ReconstructInPlace(raidPtr, req->col); 2588 RF_Free(req, sizeof(*req)); 2589 raidPtr->recon_in_progress = 0; 2590 splx(s); 2591 2592 /* That's all... */ 2593 kthread_exit(0); /* does not return */ 2594 } 2595 2596 RF_AutoConfig_t * 2597 rf_find_raid_components() 2598 { 2599 struct vnode *vp; 2600 struct disklabel label; 2601 struct device *dv; 2602 dev_t dev; 2603 int bmajor; 2604 int error; 2605 int i; 2606 int good_one; 2607 RF_ComponentLabel_t *clabel; 2608 RF_AutoConfig_t *ac_list; 2609 RF_AutoConfig_t *ac; 2610 2611 2612 /* initialize the AutoConfig list */ 2613 ac_list = NULL; 2614 2615 /* we begin by trolling through *all* the devices on the system */ 2616 2617 for (dv = alldevs.tqh_first; dv != NULL; 2618 dv = dv->dv_list.tqe_next) { 2619 2620 /* we are only interested in disks... */ 2621 if (dv->dv_class != DV_DISK) 2622 continue; 2623 2624 /* we don't care about floppies... */ 2625 if (!strcmp(dv->dv_cfdata->cf_name,"fd")) { 2626 continue; 2627 } 2628 2629 /* we don't care about CD's... */ 2630 if (!strcmp(dv->dv_cfdata->cf_name,"cd")) { 2631 continue; 2632 } 2633 2634 /* hdfd is the Atari/Hades floppy driver */ 2635 if (!strcmp(dv->dv_cfdata->cf_name,"hdfd")) { 2636 continue; 2637 } 2638 /* fdisa is the Atari/Milan floppy driver */ 2639 if (!strcmp(dv->dv_cfdata->cf_name,"fdisa")) { 2640 continue; 2641 } 2642 2643 /* need to find the device_name_to_block_device_major stuff */ 2644 bmajor = devsw_name2blk(dv->dv_xname, NULL, 0); 2645 2646 /* get a vnode for the raw partition of this disk */ 2647 2648 dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART); 2649 if (bdevvp(dev, &vp)) 2650 panic("RAID can't alloc vnode"); 2651 2652 error = VOP_OPEN(vp, FREAD, NOCRED, 0); 2653 2654 if (error) { 2655 /* "Who cares." Continue looking 2656 for something that exists*/ 2657 vput(vp); 2658 continue; 2659 } 2660 2661 /* Ok, the disk exists. Go get the disklabel. */ 2662 error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0); 2663 if (error) { 2664 /* 2665 * XXX can't happen - open() would 2666 * have errored out (or faked up one) 2667 */ 2668 printf("can't get label for dev %s%c (%d)!?!?\n", 2669 dv->dv_xname, 'a' + RAW_PART, error); 2670 } 2671 2672 /* don't need this any more. We'll allocate it again 2673 a little later if we really do... */ 2674 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2675 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0); 2676 vput(vp); 2677 2678 for (i=0; i < label.d_npartitions; i++) { 2679 /* We only support partitions marked as RAID */ 2680 if (label.d_partitions[i].p_fstype != FS_RAID) 2681 continue; 2682 2683 dev = MAKEDISKDEV(bmajor, dv->dv_unit, i); 2684 if (bdevvp(dev, &vp)) 2685 panic("RAID can't alloc vnode"); 2686 2687 error = VOP_OPEN(vp, FREAD, NOCRED, 0); 2688 if (error) { 2689 /* Whatever... */ 2690 vput(vp); 2691 continue; 2692 } 2693 2694 good_one = 0; 2695 2696 clabel = (RF_ComponentLabel_t *) 2697 malloc(sizeof(RF_ComponentLabel_t), 2698 M_RAIDFRAME, M_NOWAIT); 2699 if (clabel == NULL) { 2700 /* XXX CLEANUP HERE */ 2701 printf("RAID auto config: out of memory!\n"); 2702 return(NULL); /* XXX probably should panic? */ 2703 } 2704 2705 if (!raidread_component_label(dev, vp, clabel)) { 2706 /* Got the label. Does it look reasonable? */ 2707 if (rf_reasonable_label(clabel) && 2708 (clabel->partitionSize <= 2709 label.d_partitions[i].p_size)) { 2710 #if DEBUG 2711 printf("Component on: %s%c: %d\n", 2712 dv->dv_xname, 'a'+i, 2713 label.d_partitions[i].p_size); 2714 rf_print_component_label(clabel); 2715 #endif 2716 /* if it's reasonable, add it, 2717 else ignore it. */ 2718 ac = (RF_AutoConfig_t *) 2719 malloc(sizeof(RF_AutoConfig_t), 2720 M_RAIDFRAME, 2721 M_NOWAIT); 2722 if (ac == NULL) { 2723 /* XXX should panic?? */ 2724 return(NULL); 2725 } 2726 2727 snprintf(ac->devname, 2728 sizeof(ac->devname), "%s%c", 2729 dv->dv_xname, 'a'+i); 2730 ac->dev = dev; 2731 ac->vp = vp; 2732 ac->clabel = clabel; 2733 ac->next = ac_list; 2734 ac_list = ac; 2735 good_one = 1; 2736 } 2737 } 2738 if (!good_one) { 2739 /* cleanup */ 2740 free(clabel, M_RAIDFRAME); 2741 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2742 VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0); 2743 vput(vp); 2744 } 2745 } 2746 } 2747 return(ac_list); 2748 } 2749 2750 static int 2751 rf_reasonable_label(RF_ComponentLabel_t *clabel) 2752 { 2753 2754 if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) || 2755 (clabel->version==RF_COMPONENT_LABEL_VERSION)) && 2756 ((clabel->clean == RF_RAID_CLEAN) || 2757 (clabel->clean == RF_RAID_DIRTY)) && 2758 clabel->row >=0 && 2759 clabel->column >= 0 && 2760 clabel->num_rows > 0 && 2761 clabel->num_columns > 0 && 2762 clabel->row < clabel->num_rows && 2763 clabel->column < clabel->num_columns && 2764 clabel->blockSize > 0 && 2765 clabel->numBlocks > 0) { 2766 /* label looks reasonable enough... */ 2767 return(1); 2768 } 2769 return(0); 2770 } 2771 2772 2773 #if DEBUG 2774 void 2775 rf_print_component_label(RF_ComponentLabel_t *clabel) 2776 { 2777 printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n", 2778 clabel->row, clabel->column, 2779 clabel->num_rows, clabel->num_columns); 2780 printf(" Version: %d Serial Number: %d Mod Counter: %d\n", 2781 clabel->version, clabel->serial_number, 2782 clabel->mod_counter); 2783 printf(" Clean: %s Status: %d\n", 2784 clabel->clean ? "Yes" : "No", clabel->status ); 2785 printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n", 2786 clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU); 2787 printf(" RAID Level: %c blocksize: %d numBlocks: %d\n", 2788 (char) clabel->parityConfig, clabel->blockSize, 2789 clabel->numBlocks); 2790 printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" ); 2791 printf(" Contains root partition: %s\n", 2792 clabel->root_partition ? "Yes" : "No" ); 2793 printf(" Last configured as: raid%d\n", clabel->last_unit ); 2794 #if 0 2795 printf(" Config order: %d\n", clabel->config_order); 2796 #endif 2797 2798 } 2799 #endif 2800 2801 RF_ConfigSet_t * 2802 rf_create_auto_sets(RF_AutoConfig_t *ac_list) 2803 { 2804 RF_AutoConfig_t *ac; 2805 RF_ConfigSet_t *config_sets; 2806 RF_ConfigSet_t *cset; 2807 RF_AutoConfig_t *ac_next; 2808 2809 2810 config_sets = NULL; 2811 2812 /* Go through the AutoConfig list, and figure out which components 2813 belong to what sets. */ 2814 ac = ac_list; 2815 while(ac!=NULL) { 2816 /* we're going to putz with ac->next, so save it here 2817 for use at the end of the loop */ 2818 ac_next = ac->next; 2819 2820 if (config_sets == NULL) { 2821 /* will need at least this one... */ 2822 config_sets = (RF_ConfigSet_t *) 2823 malloc(sizeof(RF_ConfigSet_t), 2824 M_RAIDFRAME, M_NOWAIT); 2825 if (config_sets == NULL) { 2826 panic("rf_create_auto_sets: No memory!"); 2827 } 2828 /* this one is easy :) */ 2829 config_sets->ac = ac; 2830 config_sets->next = NULL; 2831 config_sets->rootable = 0; 2832 ac->next = NULL; 2833 } else { 2834 /* which set does this component fit into? */ 2835 cset = config_sets; 2836 while(cset!=NULL) { 2837 if (rf_does_it_fit(cset, ac)) { 2838 /* looks like it matches... */ 2839 ac->next = cset->ac; 2840 cset->ac = ac; 2841 break; 2842 } 2843 cset = cset->next; 2844 } 2845 if (cset==NULL) { 2846 /* didn't find a match above... new set..*/ 2847 cset = (RF_ConfigSet_t *) 2848 malloc(sizeof(RF_ConfigSet_t), 2849 M_RAIDFRAME, M_NOWAIT); 2850 if (cset == NULL) { 2851 panic("rf_create_auto_sets: No memory!"); 2852 } 2853 cset->ac = ac; 2854 ac->next = NULL; 2855 cset->next = config_sets; 2856 cset->rootable = 0; 2857 config_sets = cset; 2858 } 2859 } 2860 ac = ac_next; 2861 } 2862 2863 2864 return(config_sets); 2865 } 2866 2867 static int 2868 rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac) 2869 { 2870 RF_ComponentLabel_t *clabel1, *clabel2; 2871 2872 /* If this one matches the *first* one in the set, that's good 2873 enough, since the other members of the set would have been 2874 through here too... */ 2875 /* note that we are not checking partitionSize here.. 2876 2877 Note that we are also not checking the mod_counters here. 2878 If everything else matches execpt the mod_counter, that's 2879 good enough for this test. We will deal with the mod_counters 2880 a little later in the autoconfiguration process. 2881 2882 (clabel1->mod_counter == clabel2->mod_counter) && 2883 2884 The reason we don't check for this is that failed disks 2885 will have lower modification counts. If those disks are 2886 not added to the set they used to belong to, then they will 2887 form their own set, which may result in 2 different sets, 2888 for example, competing to be configured at raid0, and 2889 perhaps competing to be the root filesystem set. If the 2890 wrong ones get configured, or both attempt to become /, 2891 weird behaviour and or serious lossage will occur. Thus we 2892 need to bring them into the fold here, and kick them out at 2893 a later point. 2894 2895 */ 2896 2897 clabel1 = cset->ac->clabel; 2898 clabel2 = ac->clabel; 2899 if ((clabel1->version == clabel2->version) && 2900 (clabel1->serial_number == clabel2->serial_number) && 2901 (clabel1->num_rows == clabel2->num_rows) && 2902 (clabel1->num_columns == clabel2->num_columns) && 2903 (clabel1->sectPerSU == clabel2->sectPerSU) && 2904 (clabel1->SUsPerPU == clabel2->SUsPerPU) && 2905 (clabel1->SUsPerRU == clabel2->SUsPerRU) && 2906 (clabel1->parityConfig == clabel2->parityConfig) && 2907 (clabel1->maxOutstanding == clabel2->maxOutstanding) && 2908 (clabel1->blockSize == clabel2->blockSize) && 2909 (clabel1->numBlocks == clabel2->numBlocks) && 2910 (clabel1->autoconfigure == clabel2->autoconfigure) && 2911 (clabel1->root_partition == clabel2->root_partition) && 2912 (clabel1->last_unit == clabel2->last_unit) && 2913 (clabel1->config_order == clabel2->config_order)) { 2914 /* if it get's here, it almost *has* to be a match */ 2915 } else { 2916 /* it's not consistent with somebody in the set.. 2917 punt */ 2918 return(0); 2919 } 2920 /* all was fine.. it must fit... */ 2921 return(1); 2922 } 2923 2924 int 2925 rf_have_enough_components(RF_ConfigSet_t *cset) 2926 { 2927 RF_AutoConfig_t *ac; 2928 RF_AutoConfig_t *auto_config; 2929 RF_ComponentLabel_t *clabel; 2930 int c; 2931 int num_cols; 2932 int num_missing; 2933 int mod_counter; 2934 int mod_counter_found; 2935 int even_pair_failed; 2936 char parity_type; 2937 2938 2939 /* check to see that we have enough 'live' components 2940 of this set. If so, we can configure it if necessary */ 2941 2942 num_cols = cset->ac->clabel->num_columns; 2943 parity_type = cset->ac->clabel->parityConfig; 2944 2945 /* XXX Check for duplicate components!?!?!? */ 2946 2947 /* Determine what the mod_counter is supposed to be for this set. */ 2948 2949 mod_counter_found = 0; 2950 mod_counter = 0; 2951 ac = cset->ac; 2952 while(ac!=NULL) { 2953 if (mod_counter_found==0) { 2954 mod_counter = ac->clabel->mod_counter; 2955 mod_counter_found = 1; 2956 } else { 2957 if (ac->clabel->mod_counter > mod_counter) { 2958 mod_counter = ac->clabel->mod_counter; 2959 } 2960 } 2961 ac = ac->next; 2962 } 2963 2964 num_missing = 0; 2965 auto_config = cset->ac; 2966 2967 even_pair_failed = 0; 2968 for(c=0; c<num_cols; c++) { 2969 ac = auto_config; 2970 while(ac!=NULL) { 2971 if ((ac->clabel->column == c) && 2972 (ac->clabel->mod_counter == mod_counter)) { 2973 /* it's this one... */ 2974 #if DEBUG 2975 printf("Found: %s at %d\n", 2976 ac->devname,c); 2977 #endif 2978 break; 2979 } 2980 ac=ac->next; 2981 } 2982 if (ac==NULL) { 2983 /* Didn't find one here! */ 2984 /* special case for RAID 1, especially 2985 where there are more than 2 2986 components (where RAIDframe treats 2987 things a little differently :( ) */ 2988 if (parity_type == '1') { 2989 if (c%2 == 0) { /* even component */ 2990 even_pair_failed = 1; 2991 } else { /* odd component. If 2992 we're failed, and 2993 so is the even 2994 component, it's 2995 "Good Night, Charlie" */ 2996 if (even_pair_failed == 1) { 2997 return(0); 2998 } 2999 } 3000 } else { 3001 /* normal accounting */ 3002 num_missing++; 3003 } 3004 } 3005 if ((parity_type == '1') && (c%2 == 1)) { 3006 /* Just did an even component, and we didn't 3007 bail.. reset the even_pair_failed flag, 3008 and go on to the next component.... */ 3009 even_pair_failed = 0; 3010 } 3011 } 3012 3013 clabel = cset->ac->clabel; 3014 3015 if (((clabel->parityConfig == '0') && (num_missing > 0)) || 3016 ((clabel->parityConfig == '4') && (num_missing > 1)) || 3017 ((clabel->parityConfig == '5') && (num_missing > 1))) { 3018 /* XXX this needs to be made *much* more general */ 3019 /* Too many failures */ 3020 return(0); 3021 } 3022 /* otherwise, all is well, and we've got enough to take a kick 3023 at autoconfiguring this set */ 3024 return(1); 3025 } 3026 3027 void 3028 rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config, 3029 RF_Raid_t *raidPtr) 3030 { 3031 RF_ComponentLabel_t *clabel; 3032 int i; 3033 3034 clabel = ac->clabel; 3035 3036 /* 1. Fill in the common stuff */ 3037 config->numRow = clabel->num_rows = 1; 3038 config->numCol = clabel->num_columns; 3039 config->numSpare = 0; /* XXX should this be set here? */ 3040 config->sectPerSU = clabel->sectPerSU; 3041 config->SUsPerPU = clabel->SUsPerPU; 3042 config->SUsPerRU = clabel->SUsPerRU; 3043 config->parityConfig = clabel->parityConfig; 3044 /* XXX... */ 3045 strcpy(config->diskQueueType,"fifo"); 3046 config->maxOutstandingDiskReqs = clabel->maxOutstanding; 3047 config->layoutSpecificSize = 0; /* XXX ?? */ 3048 3049 while(ac!=NULL) { 3050 /* row/col values will be in range due to the checks 3051 in reasonable_label() */ 3052 strcpy(config->devnames[0][ac->clabel->column], 3053 ac->devname); 3054 ac = ac->next; 3055 } 3056 3057 for(i=0;i<RF_MAXDBGV;i++) { 3058 config->debugVars[i][0] = 0; 3059 } 3060 } 3061 3062 int 3063 rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value) 3064 { 3065 RF_ComponentLabel_t clabel; 3066 struct vnode *vp; 3067 dev_t dev; 3068 int column; 3069 int sparecol; 3070 3071 raidPtr->autoconfigure = new_value; 3072 3073 for(column=0; column<raidPtr->numCol; column++) { 3074 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3075 dev = raidPtr->Disks[column].dev; 3076 vp = raidPtr->raid_cinfo[column].ci_vp; 3077 raidread_component_label(dev, vp, &clabel); 3078 clabel.autoconfigure = new_value; 3079 raidwrite_component_label(dev, vp, &clabel); 3080 } 3081 } 3082 for(column = 0; column < raidPtr->numSpare ; column++) { 3083 sparecol = raidPtr->numCol + column; 3084 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3085 dev = raidPtr->Disks[sparecol].dev; 3086 vp = raidPtr->raid_cinfo[sparecol].ci_vp; 3087 raidread_component_label(dev, vp, &clabel); 3088 clabel.autoconfigure = new_value; 3089 raidwrite_component_label(dev, vp, &clabel); 3090 } 3091 } 3092 return(new_value); 3093 } 3094 3095 int 3096 rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value) 3097 { 3098 RF_ComponentLabel_t clabel; 3099 struct vnode *vp; 3100 dev_t dev; 3101 int column; 3102 int sparecol; 3103 3104 raidPtr->root_partition = new_value; 3105 for(column=0; column<raidPtr->numCol; column++) { 3106 if (raidPtr->Disks[column].status == rf_ds_optimal) { 3107 dev = raidPtr->Disks[column].dev; 3108 vp = raidPtr->raid_cinfo[column].ci_vp; 3109 raidread_component_label(dev, vp, &clabel); 3110 clabel.root_partition = new_value; 3111 raidwrite_component_label(dev, vp, &clabel); 3112 } 3113 } 3114 for(column = 0; column < raidPtr->numSpare ; column++) { 3115 sparecol = raidPtr->numCol + column; 3116 if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) { 3117 dev = raidPtr->Disks[sparecol].dev; 3118 vp = raidPtr->raid_cinfo[sparecol].ci_vp; 3119 raidread_component_label(dev, vp, &clabel); 3120 clabel.root_partition = new_value; 3121 raidwrite_component_label(dev, vp, &clabel); 3122 } 3123 } 3124 return(new_value); 3125 } 3126 3127 void 3128 rf_release_all_vps(RF_ConfigSet_t *cset) 3129 { 3130 RF_AutoConfig_t *ac; 3131 3132 ac = cset->ac; 3133 while(ac!=NULL) { 3134 /* Close the vp, and give it back */ 3135 if (ac->vp) { 3136 vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY); 3137 VOP_CLOSE(ac->vp, FREAD, NOCRED, 0); 3138 vput(ac->vp); 3139 ac->vp = NULL; 3140 } 3141 ac = ac->next; 3142 } 3143 } 3144 3145 3146 void 3147 rf_cleanup_config_set(RF_ConfigSet_t *cset) 3148 { 3149 RF_AutoConfig_t *ac; 3150 RF_AutoConfig_t *next_ac; 3151 3152 ac = cset->ac; 3153 while(ac!=NULL) { 3154 next_ac = ac->next; 3155 /* nuke the label */ 3156 free(ac->clabel, M_RAIDFRAME); 3157 /* cleanup the config structure */ 3158 free(ac, M_RAIDFRAME); 3159 /* "next.." */ 3160 ac = next_ac; 3161 } 3162 /* and, finally, nuke the config set */ 3163 free(cset, M_RAIDFRAME); 3164 } 3165 3166 3167 void 3168 raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel) 3169 { 3170 /* current version number */ 3171 clabel->version = RF_COMPONENT_LABEL_VERSION; 3172 clabel->serial_number = raidPtr->serial_number; 3173 clabel->mod_counter = raidPtr->mod_counter; 3174 clabel->num_rows = 1; 3175 clabel->num_columns = raidPtr->numCol; 3176 clabel->clean = RF_RAID_DIRTY; /* not clean */ 3177 clabel->status = rf_ds_optimal; /* "It's good!" */ 3178 3179 clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit; 3180 clabel->SUsPerPU = raidPtr->Layout.SUsPerPU; 3181 clabel->SUsPerRU = raidPtr->Layout.SUsPerRU; 3182 3183 clabel->blockSize = raidPtr->bytesPerSector; 3184 clabel->numBlocks = raidPtr->sectorsPerDisk; 3185 3186 /* XXX not portable */ 3187 clabel->parityConfig = raidPtr->Layout.map->parityConfig; 3188 clabel->maxOutstanding = raidPtr->maxOutstanding; 3189 clabel->autoconfigure = raidPtr->autoconfigure; 3190 clabel->root_partition = raidPtr->root_partition; 3191 clabel->last_unit = raidPtr->raidid; 3192 clabel->config_order = raidPtr->config_order; 3193 } 3194 3195 int 3196 rf_auto_config_set(RF_ConfigSet_t *cset, int *unit) 3197 { 3198 RF_Raid_t *raidPtr; 3199 RF_Config_t *config; 3200 int raidID; 3201 int retcode; 3202 3203 #if DEBUG 3204 printf("RAID autoconfigure\n"); 3205 #endif 3206 3207 retcode = 0; 3208 *unit = -1; 3209 3210 /* 1. Create a config structure */ 3211 3212 config = (RF_Config_t *)malloc(sizeof(RF_Config_t), 3213 M_RAIDFRAME, 3214 M_NOWAIT); 3215 if (config==NULL) { 3216 printf("Out of mem!?!?\n"); 3217 /* XXX do something more intelligent here. */ 3218 return(1); 3219 } 3220 3221 memset(config, 0, sizeof(RF_Config_t)); 3222 3223 /* 3224 2. Figure out what RAID ID this one is supposed to live at 3225 See if we can get the same RAID dev that it was configured 3226 on last time.. 3227 */ 3228 3229 raidID = cset->ac->clabel->last_unit; 3230 if ((raidID < 0) || (raidID >= numraid)) { 3231 /* let's not wander off into lala land. */ 3232 raidID = numraid - 1; 3233 } 3234 if (raidPtrs[raidID]->valid != 0) { 3235 3236 /* 3237 Nope... Go looking for an alternative... 3238 Start high so we don't immediately use raid0 if that's 3239 not taken. 3240 */ 3241 3242 for(raidID = numraid - 1; raidID >= 0; raidID--) { 3243 if (raidPtrs[raidID]->valid == 0) { 3244 /* can use this one! */ 3245 break; 3246 } 3247 } 3248 } 3249 3250 if (raidID < 0) { 3251 /* punt... */ 3252 printf("Unable to auto configure this set!\n"); 3253 printf("(Out of RAID devs!)\n"); 3254 return(1); 3255 } 3256 3257 #if DEBUG 3258 printf("Configuring raid%d:\n",raidID); 3259 #endif 3260 3261 raidPtr = raidPtrs[raidID]; 3262 3263 /* XXX all this stuff should be done SOMEWHERE ELSE! */ 3264 raidPtr->raidid = raidID; 3265 raidPtr->openings = RAIDOUTSTANDING; 3266 3267 /* 3. Build the configuration structure */ 3268 rf_create_configuration(cset->ac, config, raidPtr); 3269 3270 /* 4. Do the configuration */ 3271 retcode = rf_Configure(raidPtr, config, cset->ac); 3272 3273 if (retcode == 0) { 3274 3275 raidinit(raidPtrs[raidID]); 3276 3277 rf_markalldirty(raidPtrs[raidID]); 3278 raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */ 3279 if (cset->ac->clabel->root_partition==1) { 3280 /* everything configured just fine. Make a note 3281 that this set is eligible to be root. */ 3282 cset->rootable = 1; 3283 /* XXX do this here? */ 3284 raidPtrs[raidID]->root_partition = 1; 3285 } 3286 } 3287 3288 /* 5. Cleanup */ 3289 free(config, M_RAIDFRAME); 3290 3291 *unit = raidID; 3292 return(retcode); 3293 } 3294 3295 void 3296 rf_disk_unbusy(RF_RaidAccessDesc_t *desc) 3297 { 3298 struct buf *bp; 3299 3300 bp = (struct buf *)desc->bp; 3301 disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev, 3302 (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ)); 3303 } 3304 3305 void 3306 rf_pool_init(struct pool *p, size_t size, char *w_chan, 3307 size_t min, size_t max) 3308 { 3309 pool_init(p, size, 0, 0, 0, w_chan, NULL); 3310 pool_sethiwat(p, max); 3311 pool_prime(p, min); 3312 pool_setlowat(p, min); 3313 } 3314