1 /* $NetBSD: ld_ataraid.c,v 1.14 2005/12/11 12:21:14 christos Exp $ */ 2 3 /* 4 * Copyright (c) 2003 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Jason R. Thorpe for Wasabi Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project by 20 * Wasabi Systems, Inc. 21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22 * or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38 /* 39 * Support for ATA RAID logical disks. 40 * 41 * Note that all the RAID happens in software here; the ATA RAID 42 * controllers we're dealing with (Promise, etc.) only support 43 * configuration data on the component disks, with the BIOS supporting 44 * booting from the RAID volumes. 45 */ 46 47 #include <sys/cdefs.h> 48 __KERNEL_RCSID(0, "$NetBSD: ld_ataraid.c,v 1.14 2005/12/11 12:21:14 christos Exp $"); 49 50 #include "rnd.h" 51 52 #include <sys/param.h> 53 #include <sys/systm.h> 54 #include <sys/conf.h> 55 #include <sys/kernel.h> 56 #include <sys/device.h> 57 #include <sys/buf.h> 58 #include <sys/bufq.h> 59 #include <sys/dkio.h> 60 #include <sys/disk.h> 61 #include <sys/disklabel.h> 62 #include <sys/fcntl.h> 63 #include <sys/malloc.h> 64 #include <sys/vnode.h> 65 #if NRND > 0 66 #include <sys/rnd.h> 67 #endif 68 69 #include <miscfs/specfs/specdev.h> 70 71 #include <dev/ldvar.h> 72 73 #include <dev/ata/ata_raidvar.h> 74 75 struct ld_ataraid_softc { 76 struct ld_softc sc_ld; 77 78 struct ataraid_array_info *sc_aai; 79 struct vnode *sc_vnodes[ATA_RAID_MAX_DISKS]; 80 81 void (*sc_iodone)(struct buf *); 82 }; 83 84 static int ld_ataraid_match(struct device *, struct cfdata *, void *); 85 static void ld_ataraid_attach(struct device *, struct device *, void *); 86 87 static int ld_ataraid_dump(struct ld_softc *, void *, int, int); 88 89 static int ld_ataraid_start_span(struct ld_softc *, struct buf *); 90 91 static int ld_ataraid_start_raid0(struct ld_softc *, struct buf *); 92 static void ld_ataraid_iodone_raid0(struct buf *); 93 94 CFATTACH_DECL(ld_ataraid, sizeof(struct ld_ataraid_softc), 95 ld_ataraid_match, ld_ataraid_attach, NULL, NULL); 96 97 static int ld_ataraid_initialized; 98 static struct pool ld_ataraid_cbufpl; 99 100 struct cbuf { 101 struct buf cb_buf; /* new I/O buf */ 102 struct buf *cb_obp; /* ptr. to original I/O buf */ 103 struct ld_ataraid_softc *cb_sc; /* pointer to ld softc */ 104 u_int cb_comp; /* target component */ 105 SIMPLEQ_ENTRY(cbuf) cb_q; /* fifo of component buffers */ 106 struct cbuf *cb_other; /* other cbuf in case of mirror */ 107 int cb_flags; 108 #define CBUF_IODONE 0x00000001 /* I/O is already successfully done */ 109 }; 110 111 #define CBUF_GET() pool_get(&ld_ataraid_cbufpl, PR_NOWAIT); 112 #define CBUF_PUT(cbp) pool_put(&ld_ataraid_cbufpl, (cbp)) 113 114 static int 115 ld_ataraid_match(struct device *parent, struct cfdata *match, void *aux) 116 { 117 118 return (1); 119 } 120 121 static void 122 ld_ataraid_attach(struct device *parent, struct device *self, void *aux) 123 { 124 struct ld_ataraid_softc *sc = (void *) self; 125 struct ld_softc *ld = &sc->sc_ld; 126 struct ataraid_array_info *aai = aux; 127 const char *level; 128 struct vnode *vp; 129 char unklev[32]; 130 u_int i; 131 132 if (ld_ataraid_initialized == 0) { 133 ld_ataraid_initialized = 1; 134 pool_init(&ld_ataraid_cbufpl, sizeof(struct cbuf), 0, 135 0, 0, "ldcbuf", NULL); 136 } 137 138 sc->sc_aai = aai; /* this data persists */ 139 140 ld->sc_maxxfer = MAXPHYS * aai->aai_width; /* XXX */ 141 ld->sc_secperunit = aai->aai_capacity; 142 ld->sc_secsize = 512; /* XXX */ 143 ld->sc_maxqueuecnt = 128; /* XXX */ 144 ld->sc_dump = ld_ataraid_dump; 145 146 switch (aai->aai_level) { 147 case AAI_L_SPAN: 148 level = "SPAN"; 149 ld->sc_start = ld_ataraid_start_span; 150 sc->sc_iodone = ld_ataraid_iodone_raid0; 151 break; 152 153 case AAI_L_RAID0: 154 level = "RAID-0"; 155 ld->sc_start = ld_ataraid_start_raid0; 156 sc->sc_iodone = ld_ataraid_iodone_raid0; 157 break; 158 159 case AAI_L_RAID1: 160 level = "RAID-1"; 161 ld->sc_start = ld_ataraid_start_raid0; 162 sc->sc_iodone = ld_ataraid_iodone_raid0; 163 break; 164 165 case AAI_L_RAID0 | AAI_L_RAID1: 166 level = "RAID-10"; 167 ld->sc_start = ld_ataraid_start_raid0; 168 sc->sc_iodone = ld_ataraid_iodone_raid0; 169 break; 170 171 default: 172 snprintf(unklev, sizeof(unklev), "<unknown level 0x%x>", 173 aai->aai_level); 174 level = unklev; 175 } 176 177 aprint_naive(": ATA %s array\n", level); 178 aprint_normal(": %s ATA %s array\n", 179 ata_raid_type_name(aai->aai_type), level); 180 181 if (ld->sc_start == NULL) { 182 aprint_error("%s: unsupported array type\n", 183 ld->sc_dv.dv_xname); 184 return; 185 } 186 187 /* 188 * We get a geometry from the device; use it. 189 */ 190 ld->sc_nheads = aai->aai_heads; 191 ld->sc_nsectors = aai->aai_sectors; 192 ld->sc_ncylinders = aai->aai_cylinders; 193 194 /* 195 * Configure all the component disks. 196 */ 197 for (i = 0; i < aai->aai_ndisks; i++) { 198 struct ataraid_disk_info *adi = &aai->aai_disks[i]; 199 int bmajor, error; 200 dev_t dev; 201 202 bmajor = devsw_name2blk(adi->adi_dev->dv_xname, NULL, 0); 203 dev = MAKEDISKDEV(bmajor, adi->adi_dev->dv_unit, RAW_PART); 204 error = bdevvp(dev, &vp); 205 if (error) 206 break; 207 error = VOP_OPEN(vp, FREAD|FWRITE, NOCRED, 0); 208 if (error) { 209 vput(vp); 210 /* 211 * XXX This is bogus. We should just mark the 212 * XXX component as FAILED, and write-back new 213 * XXX config blocks. 214 */ 215 break; 216 } 217 218 VOP_UNLOCK(vp, 0); 219 sc->sc_vnodes[i] = vp; 220 } 221 if (i == aai->aai_ndisks) { 222 ld->sc_flags = LDF_ENABLED; 223 goto finish; 224 } 225 226 for (i = 0; i < aai->aai_ndisks; i++) { 227 vp = sc->sc_vnodes[i]; 228 sc->sc_vnodes[i] = NULL; 229 if (vp != NULL) 230 (void) vn_close(vp, FREAD|FWRITE, NOCRED, curlwp); 231 } 232 233 finish: 234 ldattach(ld); 235 } 236 237 static struct cbuf * 238 ld_ataraid_make_cbuf(struct ld_ataraid_softc *sc, struct buf *bp, 239 u_int comp, daddr_t bn, caddr_t addr, long bcount) 240 { 241 struct cbuf *cbp; 242 243 cbp = CBUF_GET(); 244 if (cbp == NULL) 245 return (NULL); 246 BUF_INIT(&cbp->cb_buf); 247 cbp->cb_buf.b_flags = bp->b_flags | B_CALL; 248 cbp->cb_buf.b_iodone = sc->sc_iodone; 249 cbp->cb_buf.b_proc = bp->b_proc; 250 cbp->cb_buf.b_vp = sc->sc_vnodes[comp]; 251 cbp->cb_buf.b_blkno = bn + sc->sc_aai->aai_offset; 252 cbp->cb_buf.b_data = addr; 253 cbp->cb_buf.b_bcount = bcount; 254 255 /* Context for iodone */ 256 cbp->cb_obp = bp; 257 cbp->cb_sc = sc; 258 cbp->cb_comp = comp; 259 cbp->cb_other = NULL; 260 cbp->cb_flags = 0; 261 262 return (cbp); 263 } 264 265 static int 266 ld_ataraid_start_span(struct ld_softc *ld, struct buf *bp) 267 { 268 struct ld_ataraid_softc *sc = (void *) ld; 269 struct ataraid_array_info *aai = sc->sc_aai; 270 struct ataraid_disk_info *adi; 271 SIMPLEQ_HEAD(, cbuf) cbufq; 272 struct cbuf *cbp; 273 caddr_t addr; 274 daddr_t bn; 275 long bcount, rcount; 276 u_int comp; 277 278 /* Allocate component buffers. */ 279 SIMPLEQ_INIT(&cbufq); 280 addr = bp->b_data; 281 282 /* Find the first component. */ 283 comp = 0; 284 adi = &aai->aai_disks[comp]; 285 bn = bp->b_rawblkno; 286 while (bn >= adi->adi_compsize) { 287 bn -= adi->adi_compsize; 288 adi = &aai->aai_disks[++comp]; 289 } 290 291 bp->b_resid = bp->b_bcount; 292 293 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 294 rcount = bp->b_bcount; 295 if ((adi->adi_compsize - bn) < btodb(rcount)) 296 rcount = dbtob(adi->adi_compsize - bn); 297 298 cbp = ld_ataraid_make_cbuf(sc, bp, comp, bn, addr, rcount); 299 if (cbp == NULL) { 300 /* Free the already allocated component buffers. */ 301 while ((cbp = SIMPLEQ_FIRST(&cbufq)) != NULL) { 302 SIMPLEQ_REMOVE_HEAD(&cbufq, cb_q); 303 CBUF_PUT(cbp); 304 } 305 return (EAGAIN); 306 } 307 308 /* 309 * For a span, we always know we advance to the next disk, 310 * and always start at offset 0 on that disk. 311 */ 312 adi = &aai->aai_disks[++comp]; 313 bn = 0; 314 315 SIMPLEQ_INSERT_TAIL(&cbufq, cbp, cb_q); 316 addr += rcount; 317 } 318 319 /* Now fire off the requests. */ 320 while ((cbp = SIMPLEQ_FIRST(&cbufq)) != NULL) { 321 SIMPLEQ_REMOVE_HEAD(&cbufq, cb_q); 322 if ((cbp->cb_buf.b_flags & B_READ) == 0) 323 cbp->cb_buf.b_vp->v_numoutput++; 324 VOP_STRATEGY(cbp->cb_buf.b_vp, &cbp->cb_buf); 325 } 326 327 return (0); 328 } 329 330 static int 331 ld_ataraid_start_raid0(struct ld_softc *ld, struct buf *bp) 332 { 333 struct ld_ataraid_softc *sc = (void *) ld; 334 struct ataraid_array_info *aai = sc->sc_aai; 335 struct ataraid_disk_info *adi; 336 SIMPLEQ_HEAD(, cbuf) cbufq; 337 struct cbuf *cbp, *other_cbp; 338 caddr_t addr; 339 daddr_t bn, cbn, tbn, off; 340 long bcount, rcount; 341 u_int comp; 342 const int read = bp->b_flags & B_READ; 343 const int mirror = aai->aai_level & AAI_L_RAID1; 344 int error; 345 346 /* Allocate component buffers. */ 347 SIMPLEQ_INIT(&cbufq); 348 addr = bp->b_data; 349 bn = bp->b_rawblkno; 350 351 bp->b_resid = bp->b_bcount; 352 353 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 354 tbn = bn / aai->aai_interleave; 355 off = bn % aai->aai_interleave; 356 357 if (__predict_false(tbn == aai->aai_capacity / 358 aai->aai_interleave)) { 359 /* Last stripe. */ 360 daddr_t sz = (aai->aai_capacity - 361 (tbn * aai->aai_interleave)) / 362 aai->aai_width; 363 comp = off / sz; 364 cbn = ((tbn / aai->aai_width) * aai->aai_interleave) + 365 (off % sz); 366 rcount = min(bcount, dbtob(sz)); 367 } else { 368 comp = tbn % aai->aai_width; 369 cbn = ((tbn / aai->aai_width) * aai->aai_interleave) + 370 off; 371 rcount = min(bcount, dbtob(aai->aai_interleave - off)); 372 } 373 374 /* 375 * See if a component is valid. 376 */ 377 try_mirror: 378 adi = &aai->aai_disks[comp]; 379 if ((adi->adi_status & ADI_S_ONLINE) == 0) { 380 if (mirror && comp < aai->aai_width) { 381 comp += aai->aai_width; 382 goto try_mirror; 383 } 384 385 /* 386 * No component available. 387 */ 388 error = EIO; 389 goto free_and_exit; 390 } 391 392 cbp = ld_ataraid_make_cbuf(sc, bp, comp, cbn, addr, rcount); 393 if (cbp == NULL) { 394 resource_shortage: 395 error = EAGAIN; 396 free_and_exit: 397 /* Free the already allocated component buffers. */ 398 while ((cbp = SIMPLEQ_FIRST(&cbufq)) != NULL) { 399 SIMPLEQ_REMOVE_HEAD(&cbufq, cb_q); 400 CBUF_PUT(cbp); 401 } 402 return (error); 403 } 404 SIMPLEQ_INSERT_TAIL(&cbufq, cbp, cb_q); 405 if (mirror && !read && comp < aai->aai_width) { 406 comp += aai->aai_width; 407 adi = &aai->aai_disks[comp]; 408 if (adi->adi_status & ADI_S_ONLINE) { 409 other_cbp = ld_ataraid_make_cbuf(sc, bp, 410 comp, cbn, addr, rcount); 411 if (other_cbp == NULL) 412 goto resource_shortage; 413 SIMPLEQ_INSERT_TAIL(&cbufq, other_cbp, cb_q); 414 other_cbp->cb_other = cbp; 415 cbp->cb_other = other_cbp; 416 } 417 } 418 bn += btodb(rcount); 419 addr += rcount; 420 } 421 422 /* Now fire off the requests. */ 423 while ((cbp = SIMPLEQ_FIRST(&cbufq)) != NULL) { 424 SIMPLEQ_REMOVE_HEAD(&cbufq, cb_q); 425 if ((cbp->cb_buf.b_flags & B_READ) == 0) 426 cbp->cb_buf.b_vp->v_numoutput++; 427 VOP_STRATEGY(cbp->cb_buf.b_vp, &cbp->cb_buf); 428 } 429 430 return (0); 431 } 432 433 /* 434 * Called at interrupt time. Mark the component as done and if all 435 * components are done, take an "interrupt". 436 */ 437 static void 438 ld_ataraid_iodone_raid0(struct buf *vbp) 439 { 440 struct cbuf *cbp = (struct cbuf *) vbp, *other_cbp; 441 struct buf *bp = cbp->cb_obp; 442 struct ld_ataraid_softc *sc = cbp->cb_sc; 443 struct ataraid_array_info *aai = sc->sc_aai; 444 struct ataraid_disk_info *adi; 445 long count; 446 int s, iodone; 447 448 s = splbio(); 449 450 iodone = cbp->cb_flags & CBUF_IODONE; 451 other_cbp = cbp->cb_other; 452 if (other_cbp != NULL) 453 /* You are alone */ 454 other_cbp->cb_other = NULL; 455 456 if (cbp->cb_buf.b_flags & B_ERROR) { 457 /* 458 * Mark this component broken. 459 */ 460 adi = &aai->aai_disks[cbp->cb_comp]; 461 adi->adi_status &= ~ADI_S_ONLINE; 462 463 printf("%s: error %d on component %d (%s)\n", 464 sc->sc_ld.sc_dv.dv_xname, bp->b_error, cbp->cb_comp, 465 adi->adi_dev->dv_xname); 466 467 /* 468 * If we didn't see an error yet and we are reading 469 * RAID1 disk, try another component. 470 */ 471 if ((bp->b_flags & B_ERROR) == 0 && 472 (cbp->cb_buf.b_flags & B_READ) != 0 && 473 (aai->aai_level & AAI_L_RAID1) != 0 && 474 cbp->cb_comp < aai->aai_width) { 475 cbp->cb_comp += aai->aai_width; 476 adi = &aai->aai_disks[cbp->cb_comp]; 477 if (adi->adi_status & ADI_S_ONLINE) { 478 cbp->cb_buf.b_flags &= ~B_ERROR; 479 VOP_STRATEGY(cbp->cb_buf.b_vp, &cbp->cb_buf); 480 goto out; 481 } 482 } 483 484 if (iodone || other_cbp != NULL) 485 /* 486 * If I/O on other component successfully done 487 * or the I/O is still in progress, no need 488 * to tell an error to upper layer. 489 */ 490 ; 491 else { 492 bp->b_flags |= B_ERROR; 493 bp->b_error = cbp->cb_buf.b_error ? 494 cbp->cb_buf.b_error : EIO; 495 } 496 497 /* XXX Update component config blocks. */ 498 499 } else { 500 /* 501 * If other I/O is still in progress, tell it that 502 * our I/O is successfully done. 503 */ 504 if (other_cbp != NULL) 505 other_cbp->cb_flags |= CBUF_IODONE; 506 } 507 count = cbp->cb_buf.b_bcount; 508 CBUF_PUT(cbp); 509 510 if (other_cbp != NULL) 511 goto out; 512 513 /* If all done, "interrupt". */ 514 bp->b_resid -= count; 515 if (bp->b_resid < 0) 516 panic("ld_ataraid_iodone_raid0: count"); 517 if (bp->b_resid == 0) 518 lddone(&sc->sc_ld, bp); 519 520 out: 521 splx(s); 522 } 523 524 static int 525 ld_ataraid_dump(struct ld_softc *sc, void *data, int blkno, int blkcnt) 526 { 527 528 return (EIO); 529 } 530