1 /* $NetBSD: ld_ataraid.c,v 1.22 2007/11/26 19:01:36 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 2003 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Jason R. Thorpe for Wasabi Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project by 20 * Wasabi Systems, Inc. 21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22 * or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38 /* 39 * Support for ATA RAID logical disks. 40 * 41 * Note that all the RAID happens in software here; the ATA RAID 42 * controllers we're dealing with (Promise, etc.) only support 43 * configuration data on the component disks, with the BIOS supporting 44 * booting from the RAID volumes. 45 */ 46 47 #include <sys/cdefs.h> 48 __KERNEL_RCSID(0, "$NetBSD: ld_ataraid.c,v 1.22 2007/11/26 19:01:36 pooka Exp $"); 49 50 #include "rnd.h" 51 52 #include <sys/param.h> 53 #include <sys/systm.h> 54 #include <sys/conf.h> 55 #include <sys/kernel.h> 56 #include <sys/device.h> 57 #include <sys/buf.h> 58 #include <sys/bufq.h> 59 #include <sys/dkio.h> 60 #include <sys/disk.h> 61 #include <sys/disklabel.h> 62 #include <sys/fcntl.h> 63 #include <sys/malloc.h> 64 #include <sys/vnode.h> 65 #include <sys/kauth.h> 66 #if NRND > 0 67 #include <sys/rnd.h> 68 #endif 69 70 #include <miscfs/specfs/specdev.h> 71 72 #include <dev/ldvar.h> 73 74 #include <dev/ata/ata_raidvar.h> 75 76 struct ld_ataraid_softc { 77 struct ld_softc sc_ld; 78 79 struct ataraid_array_info *sc_aai; 80 struct vnode *sc_vnodes[ATA_RAID_MAX_DISKS]; 81 82 void (*sc_iodone)(struct buf *); 83 }; 84 85 static int ld_ataraid_match(struct device *, struct cfdata *, void *); 86 static void ld_ataraid_attach(struct device *, struct device *, void *); 87 88 static int ld_ataraid_dump(struct ld_softc *, void *, int, int); 89 90 static int ld_ataraid_start_span(struct ld_softc *, struct buf *); 91 92 static int ld_ataraid_start_raid0(struct ld_softc *, struct buf *); 93 static void ld_ataraid_iodone_raid0(struct buf *); 94 95 CFATTACH_DECL(ld_ataraid, sizeof(struct ld_ataraid_softc), 96 ld_ataraid_match, ld_ataraid_attach, NULL, NULL); 97 98 static int ld_ataraid_initialized; 99 static struct pool ld_ataraid_cbufpl; 100 101 struct cbuf { 102 struct buf cb_buf; /* new I/O buf */ 103 struct buf *cb_obp; /* ptr. to original I/O buf */ 104 struct ld_ataraid_softc *cb_sc; /* pointer to ld softc */ 105 u_int cb_comp; /* target component */ 106 SIMPLEQ_ENTRY(cbuf) cb_q; /* fifo of component buffers */ 107 struct cbuf *cb_other; /* other cbuf in case of mirror */ 108 int cb_flags; 109 #define CBUF_IODONE 0x00000001 /* I/O is already successfully done */ 110 }; 111 112 #define CBUF_GET() pool_get(&ld_ataraid_cbufpl, PR_NOWAIT); 113 #define CBUF_PUT(cbp) pool_put(&ld_ataraid_cbufpl, (cbp)) 114 115 static int 116 ld_ataraid_match(struct device *parent, 117 struct cfdata *match, void *aux) 118 { 119 120 return (1); 121 } 122 123 static void 124 ld_ataraid_attach(struct device *parent, struct device *self, 125 void *aux) 126 { 127 struct ld_ataraid_softc *sc = (void *) self; 128 struct ld_softc *ld = &sc->sc_ld; 129 struct ataraid_array_info *aai = aux; 130 const char *level; 131 struct vnode *vp; 132 char unklev[32]; 133 u_int i; 134 135 if (ld_ataraid_initialized == 0) { 136 ld_ataraid_initialized = 1; 137 pool_init(&ld_ataraid_cbufpl, sizeof(struct cbuf), 0, 138 0, 0, "ldcbuf", NULL, IPL_BIO); 139 } 140 141 sc->sc_aai = aai; /* this data persists */ 142 143 ld->sc_maxxfer = MAXPHYS * aai->aai_width; /* XXX */ 144 ld->sc_secperunit = aai->aai_capacity; 145 ld->sc_secsize = 512; /* XXX */ 146 ld->sc_maxqueuecnt = 128; /* XXX */ 147 ld->sc_dump = ld_ataraid_dump; 148 149 switch (aai->aai_level) { 150 case AAI_L_SPAN: 151 level = "SPAN"; 152 ld->sc_start = ld_ataraid_start_span; 153 sc->sc_iodone = ld_ataraid_iodone_raid0; 154 break; 155 156 case AAI_L_RAID0: 157 level = "RAID-0"; 158 ld->sc_start = ld_ataraid_start_raid0; 159 sc->sc_iodone = ld_ataraid_iodone_raid0; 160 break; 161 162 case AAI_L_RAID1: 163 level = "RAID-1"; 164 ld->sc_start = ld_ataraid_start_raid0; 165 sc->sc_iodone = ld_ataraid_iodone_raid0; 166 break; 167 168 case AAI_L_RAID0 | AAI_L_RAID1: 169 level = "RAID-10"; 170 ld->sc_start = ld_ataraid_start_raid0; 171 sc->sc_iodone = ld_ataraid_iodone_raid0; 172 break; 173 174 default: 175 snprintf(unklev, sizeof(unklev), "<unknown level 0x%x>", 176 aai->aai_level); 177 level = unklev; 178 } 179 180 aprint_naive(": ATA %s array\n", level); 181 aprint_normal(": %s ATA %s array\n", 182 ata_raid_type_name(aai->aai_type), level); 183 184 if (ld->sc_start == NULL) { 185 aprint_error("%s: unsupported array type\n", 186 ld->sc_dv.dv_xname); 187 return; 188 } 189 190 /* 191 * We get a geometry from the device; use it. 192 */ 193 ld->sc_nheads = aai->aai_heads; 194 ld->sc_nsectors = aai->aai_sectors; 195 ld->sc_ncylinders = aai->aai_cylinders; 196 197 /* 198 * Configure all the component disks. 199 */ 200 for (i = 0; i < aai->aai_ndisks; i++) { 201 struct ataraid_disk_info *adi = &aai->aai_disks[i]; 202 int bmajor, error; 203 dev_t dev; 204 205 bmajor = devsw_name2blk(adi->adi_dev->dv_xname, NULL, 0); 206 dev = MAKEDISKDEV(bmajor, device_unit(adi->adi_dev), RAW_PART); 207 error = bdevvp(dev, &vp); 208 if (error) 209 break; 210 error = VOP_OPEN(vp, FREAD|FWRITE, NOCRED); 211 if (error) { 212 vput(vp); 213 /* 214 * XXX This is bogus. We should just mark the 215 * XXX component as FAILED, and write-back new 216 * XXX config blocks. 217 */ 218 break; 219 } 220 221 VOP_UNLOCK(vp, 0); 222 sc->sc_vnodes[i] = vp; 223 } 224 if (i == aai->aai_ndisks) { 225 ld->sc_flags = LDF_ENABLED; 226 goto finish; 227 } 228 229 for (i = 0; i < aai->aai_ndisks; i++) { 230 vp = sc->sc_vnodes[i]; 231 sc->sc_vnodes[i] = NULL; 232 if (vp != NULL) 233 (void) vn_close(vp, FREAD|FWRITE, NOCRED, curlwp); 234 } 235 236 finish: 237 ldattach(ld); 238 } 239 240 static struct cbuf * 241 ld_ataraid_make_cbuf(struct ld_ataraid_softc *sc, struct buf *bp, 242 u_int comp, daddr_t bn, void *addr, long bcount) 243 { 244 struct cbuf *cbp; 245 246 cbp = CBUF_GET(); 247 if (cbp == NULL) 248 return (NULL); 249 BUF_INIT(&cbp->cb_buf); 250 cbp->cb_buf.b_flags = bp->b_flags | B_CALL; 251 cbp->cb_buf.b_iodone = sc->sc_iodone; 252 cbp->cb_buf.b_proc = bp->b_proc; 253 cbp->cb_buf.b_vp = sc->sc_vnodes[comp]; 254 cbp->cb_buf.b_blkno = bn + sc->sc_aai->aai_offset; 255 cbp->cb_buf.b_data = addr; 256 cbp->cb_buf.b_bcount = bcount; 257 258 /* Context for iodone */ 259 cbp->cb_obp = bp; 260 cbp->cb_sc = sc; 261 cbp->cb_comp = comp; 262 cbp->cb_other = NULL; 263 cbp->cb_flags = 0; 264 265 return (cbp); 266 } 267 268 static int 269 ld_ataraid_start_span(struct ld_softc *ld, struct buf *bp) 270 { 271 struct ld_ataraid_softc *sc = (void *) ld; 272 struct ataraid_array_info *aai = sc->sc_aai; 273 struct ataraid_disk_info *adi; 274 SIMPLEQ_HEAD(, cbuf) cbufq; 275 struct cbuf *cbp; 276 char *addr; 277 daddr_t bn; 278 long bcount, rcount; 279 u_int comp; 280 281 /* Allocate component buffers. */ 282 SIMPLEQ_INIT(&cbufq); 283 addr = bp->b_data; 284 285 /* Find the first component. */ 286 comp = 0; 287 adi = &aai->aai_disks[comp]; 288 bn = bp->b_rawblkno; 289 while (bn >= adi->adi_compsize) { 290 bn -= adi->adi_compsize; 291 adi = &aai->aai_disks[++comp]; 292 } 293 294 bp->b_resid = bp->b_bcount; 295 296 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 297 rcount = bp->b_bcount; 298 if ((adi->adi_compsize - bn) < btodb(rcount)) 299 rcount = dbtob(adi->adi_compsize - bn); 300 301 cbp = ld_ataraid_make_cbuf(sc, bp, comp, bn, addr, rcount); 302 if (cbp == NULL) { 303 /* Free the already allocated component buffers. */ 304 while ((cbp = SIMPLEQ_FIRST(&cbufq)) != NULL) { 305 SIMPLEQ_REMOVE_HEAD(&cbufq, cb_q); 306 CBUF_PUT(cbp); 307 } 308 return (EAGAIN); 309 } 310 311 /* 312 * For a span, we always know we advance to the next disk, 313 * and always start at offset 0 on that disk. 314 */ 315 adi = &aai->aai_disks[++comp]; 316 bn = 0; 317 318 SIMPLEQ_INSERT_TAIL(&cbufq, cbp, cb_q); 319 addr += rcount; 320 } 321 322 /* Now fire off the requests. */ 323 while ((cbp = SIMPLEQ_FIRST(&cbufq)) != NULL) { 324 SIMPLEQ_REMOVE_HEAD(&cbufq, cb_q); 325 if ((cbp->cb_buf.b_flags & B_READ) == 0) 326 cbp->cb_buf.b_vp->v_numoutput++; 327 VOP_STRATEGY(cbp->cb_buf.b_vp, &cbp->cb_buf); 328 } 329 330 return (0); 331 } 332 333 static int 334 ld_ataraid_start_raid0(struct ld_softc *ld, struct buf *bp) 335 { 336 struct ld_ataraid_softc *sc = (void *) ld; 337 struct ataraid_array_info *aai = sc->sc_aai; 338 struct ataraid_disk_info *adi; 339 SIMPLEQ_HEAD(, cbuf) cbufq; 340 struct cbuf *cbp, *other_cbp; 341 char *addr; 342 daddr_t bn, cbn, tbn, off; 343 long bcount, rcount; 344 u_int comp; 345 const int read = bp->b_flags & B_READ; 346 const int mirror = aai->aai_level & AAI_L_RAID1; 347 int error; 348 349 /* Allocate component buffers. */ 350 SIMPLEQ_INIT(&cbufq); 351 addr = bp->b_data; 352 bn = bp->b_rawblkno; 353 354 bp->b_resid = bp->b_bcount; 355 356 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 357 tbn = bn / aai->aai_interleave; 358 off = bn % aai->aai_interleave; 359 360 if (__predict_false(tbn == aai->aai_capacity / 361 aai->aai_interleave)) { 362 /* Last stripe. */ 363 daddr_t sz = (aai->aai_capacity - 364 (tbn * aai->aai_interleave)) / 365 aai->aai_width; 366 comp = off / sz; 367 cbn = ((tbn / aai->aai_width) * aai->aai_interleave) + 368 (off % sz); 369 rcount = min(bcount, dbtob(sz)); 370 } else { 371 comp = tbn % aai->aai_width; 372 cbn = ((tbn / aai->aai_width) * aai->aai_interleave) + 373 off; 374 rcount = min(bcount, dbtob(aai->aai_interleave - off)); 375 } 376 377 /* 378 * See if a component is valid. 379 */ 380 try_mirror: 381 adi = &aai->aai_disks[comp]; 382 if ((adi->adi_status & ADI_S_ONLINE) == 0) { 383 if (mirror && comp < aai->aai_width) { 384 comp += aai->aai_width; 385 goto try_mirror; 386 } 387 388 /* 389 * No component available. 390 */ 391 error = EIO; 392 goto free_and_exit; 393 } 394 395 cbp = ld_ataraid_make_cbuf(sc, bp, comp, cbn, addr, rcount); 396 if (cbp == NULL) { 397 resource_shortage: 398 error = EAGAIN; 399 free_and_exit: 400 /* Free the already allocated component buffers. */ 401 while ((cbp = SIMPLEQ_FIRST(&cbufq)) != NULL) { 402 SIMPLEQ_REMOVE_HEAD(&cbufq, cb_q); 403 CBUF_PUT(cbp); 404 } 405 return (error); 406 } 407 SIMPLEQ_INSERT_TAIL(&cbufq, cbp, cb_q); 408 if (mirror && !read && comp < aai->aai_width) { 409 comp += aai->aai_width; 410 adi = &aai->aai_disks[comp]; 411 if (adi->adi_status & ADI_S_ONLINE) { 412 other_cbp = ld_ataraid_make_cbuf(sc, bp, 413 comp, cbn, addr, rcount); 414 if (other_cbp == NULL) 415 goto resource_shortage; 416 SIMPLEQ_INSERT_TAIL(&cbufq, other_cbp, cb_q); 417 other_cbp->cb_other = cbp; 418 cbp->cb_other = other_cbp; 419 } 420 } 421 bn += btodb(rcount); 422 addr += rcount; 423 } 424 425 /* Now fire off the requests. */ 426 while ((cbp = SIMPLEQ_FIRST(&cbufq)) != NULL) { 427 SIMPLEQ_REMOVE_HEAD(&cbufq, cb_q); 428 if ((cbp->cb_buf.b_flags & B_READ) == 0) 429 cbp->cb_buf.b_vp->v_numoutput++; 430 VOP_STRATEGY(cbp->cb_buf.b_vp, &cbp->cb_buf); 431 } 432 433 return (0); 434 } 435 436 /* 437 * Called at interrupt time. Mark the component as done and if all 438 * components are done, take an "interrupt". 439 */ 440 static void 441 ld_ataraid_iodone_raid0(struct buf *vbp) 442 { 443 struct cbuf *cbp = (struct cbuf *) vbp, *other_cbp; 444 struct buf *bp = cbp->cb_obp; 445 struct ld_ataraid_softc *sc = cbp->cb_sc; 446 struct ataraid_array_info *aai = sc->sc_aai; 447 struct ataraid_disk_info *adi; 448 long count; 449 int s, iodone; 450 451 s = splbio(); 452 453 iodone = cbp->cb_flags & CBUF_IODONE; 454 other_cbp = cbp->cb_other; 455 if (other_cbp != NULL) 456 /* You are alone */ 457 other_cbp->cb_other = NULL; 458 459 if (cbp->cb_buf.b_error != 0) { 460 /* 461 * Mark this component broken. 462 */ 463 adi = &aai->aai_disks[cbp->cb_comp]; 464 adi->adi_status &= ~ADI_S_ONLINE; 465 466 printf("%s: error %d on component %d (%s)\n", 467 sc->sc_ld.sc_dv.dv_xname, bp->b_error, cbp->cb_comp, 468 adi->adi_dev->dv_xname); 469 470 /* 471 * If we didn't see an error yet and we are reading 472 * RAID1 disk, try another component. 473 */ 474 if (bp->b_error == 0 && 475 (cbp->cb_buf.b_flags & B_READ) != 0 && 476 (aai->aai_level & AAI_L_RAID1) != 0 && 477 cbp->cb_comp < aai->aai_width) { 478 cbp->cb_comp += aai->aai_width; 479 adi = &aai->aai_disks[cbp->cb_comp]; 480 if (adi->adi_status & ADI_S_ONLINE) { 481 cbp->cb_buf.b_error = 0; 482 VOP_STRATEGY(cbp->cb_buf.b_vp, &cbp->cb_buf); 483 goto out; 484 } 485 } 486 487 if (iodone || other_cbp != NULL) 488 /* 489 * If I/O on other component successfully done 490 * or the I/O is still in progress, no need 491 * to tell an error to upper layer. 492 */ 493 ; 494 else { 495 bp->b_error = cbp->cb_buf.b_error ? 496 cbp->cb_buf.b_error : EIO; 497 } 498 499 /* XXX Update component config blocks. */ 500 501 } else { 502 /* 503 * If other I/O is still in progress, tell it that 504 * our I/O is successfully done. 505 */ 506 if (other_cbp != NULL) 507 other_cbp->cb_flags |= CBUF_IODONE; 508 } 509 count = cbp->cb_buf.b_bcount; 510 CBUF_PUT(cbp); 511 512 if (other_cbp != NULL) 513 goto out; 514 515 /* If all done, "interrupt". */ 516 bp->b_resid -= count; 517 if (bp->b_resid < 0) 518 panic("ld_ataraid_iodone_raid0: count"); 519 if (bp->b_resid == 0) 520 lddone(&sc->sc_ld, bp); 521 522 out: 523 splx(s); 524 } 525 526 static int 527 ld_ataraid_dump(struct ld_softc *sc, void *data, 528 int blkno, int blkcnt) 529 { 530 531 return (EIO); 532 } 533