1 /* $NetBSD: ld_ataraid.c,v 1.5 2003/06/07 23:37:25 thorpej Exp $ */ 2 3 /* 4 * Copyright (c) 2003 Wasabi Systems, Inc. 5 * All rights reserved. 6 * 7 * Written by Jason R. Thorpe for Wasabi Systems, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed for the NetBSD Project by 20 * Wasabi Systems, Inc. 21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 22 * or promote products derived from this software without specific prior 23 * written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 38 /* 39 * Support for ATA RAID logical disks. 40 * 41 * Note that all the RAID happens in software here; the ATA RAID 42 * controllers we're dealing with (Promise, etc.) only support 43 * configuration data on the component disks, with the BIOS supporting 44 * booting from the RAID volumes. 45 */ 46 47 #include "rnd.h" 48 49 #include <sys/param.h> 50 #include <sys/systm.h> 51 #include <sys/conf.h> 52 #include <sys/kernel.h> 53 #include <sys/device.h> 54 #include <sys/buf.h> 55 #include <sys/dkio.h> 56 #include <sys/disk.h> 57 #include <sys/disklabel.h> 58 #include <sys/fcntl.h> 59 #include <sys/malloc.h> 60 #include <sys/vnode.h> 61 #if NRND > 0 62 #include <sys/rnd.h> 63 #endif 64 65 #include <miscfs/specfs/specdev.h> 66 67 #include <dev/ldvar.h> 68 69 #include <dev/ata/ata_raidvar.h> 70 71 struct ld_ataraid_softc { 72 struct ld_softc sc_ld; 73 74 struct ataraid_array_info *sc_aai; 75 struct vnode *sc_vnodes[ATA_RAID_MAX_DISKS]; 76 77 void (*sc_iodone)(struct buf *); 78 }; 79 80 static int ld_ataraid_match(struct device *, struct cfdata *, void *); 81 static void ld_ataraid_attach(struct device *, struct device *, void *); 82 83 static int ld_ataraid_dump(struct ld_softc *, void *, int, int); 84 85 static int ld_ataraid_start_span(struct ld_softc *, struct buf *); 86 87 static int ld_ataraid_start_raid0(struct ld_softc *, struct buf *); 88 static void ld_ataraid_iodone_raid0(struct buf *); 89 90 CFATTACH_DECL(ld_ataraid, sizeof(struct ld_ataraid_softc), 91 ld_ataraid_match, ld_ataraid_attach, NULL, NULL); 92 93 static int ld_ataraid_initialized; 94 static struct pool ld_ataraid_cbufpl; 95 96 struct cbuf { 97 struct buf cb_buf; /* new I/O buf */ 98 struct buf *cb_obp; /* ptr. to original I/O buf */ 99 struct ld_ataraid_softc *cb_sc; /* pointer to ld softc */ 100 u_int cb_comp; /* target component */ 101 SIMPLEQ_ENTRY(cbuf) cb_q; /* fifo of component buffers */ 102 }; 103 104 #define CBUF_GET() pool_get(&ld_ataraid_cbufpl, PR_NOWAIT); 105 #define CBUF_PUT(cbp) pool_put(&ld_ataraid_cbufpl, (cbp)) 106 107 static int 108 ld_ataraid_match(struct device *parent, struct cfdata *match, void *aux) 109 { 110 111 return (1); 112 } 113 114 static void 115 ld_ataraid_attach(struct device *parent, struct device *self, void *aux) 116 { 117 struct ld_ataraid_softc *sc = (void *) self; 118 struct ld_softc *ld = &sc->sc_ld; 119 struct ataraid_array_info *aai = aux; 120 const char *level; 121 struct vnode *vp; 122 char unklev[32]; 123 u_int i; 124 125 if (ld_ataraid_initialized == 0) { 126 ld_ataraid_initialized = 1; 127 pool_init(&ld_ataraid_cbufpl, sizeof(struct cbuf), 0, 128 0, 0, "ldcbuf", NULL); 129 } 130 131 sc->sc_aai = aai; /* this data persists */ 132 133 ld->sc_maxxfer = MAXPHYS * aai->aai_width; /* XXX */ 134 ld->sc_secperunit = aai->aai_capacity; 135 ld->sc_secsize = 512; /* XXX */ 136 ld->sc_maxqueuecnt = 128; /* XXX */ 137 ld->sc_dump = ld_ataraid_dump; 138 139 switch (aai->aai_level) { 140 case AAI_L_SPAN: 141 level = "SPAN"; 142 ld->sc_start = ld_ataraid_start_span; 143 sc->sc_iodone = ld_ataraid_iodone_raid0; 144 break; 145 146 case AAI_L_RAID0: 147 level = "RAID0"; 148 ld->sc_start = ld_ataraid_start_raid0; 149 sc->sc_iodone = ld_ataraid_iodone_raid0; 150 break; 151 152 case AAI_L_RAID1: 153 level = "RAID1"; 154 break; 155 156 case AAI_L_RAID0 | AAI_L_RAID1: 157 level = "RAID0+1"; 158 break; 159 160 default: 161 sprintf(unklev, "<unknown level 0x%x>", aai->aai_level); 162 level = unklev; 163 } 164 165 aprint_naive(": ATA %s array\n", level); 166 aprint_normal(": %s ATA %s array\n", 167 ata_raid_type_name(aai->aai_type), level); 168 169 if (ld->sc_start == NULL) { 170 aprint_error("%s: unsupported array type\n", 171 ld->sc_dv.dv_xname); 172 return; 173 } 174 175 /* 176 * We get a geometry from the device; use it. 177 */ 178 ld->sc_nheads = aai->aai_heads; 179 ld->sc_nsectors = aai->aai_sectors; 180 ld->sc_ncylinders = aai->aai_cylinders; 181 182 /* 183 * Configure all the component disks. 184 */ 185 for (i = 0; i < aai->aai_ndisks; i++) { 186 struct ataraid_disk_info *adi = &aai->aai_disks[i]; 187 int bmajor, error; 188 dev_t dev; 189 190 bmajor = devsw_name2blk(adi->adi_dev->dv_xname, NULL, 0); 191 dev = MAKEDISKDEV(bmajor, adi->adi_dev->dv_unit, RAW_PART); 192 error = bdevvp(dev, &vp); 193 if (error) 194 break; 195 error = VOP_OPEN(vp, FREAD|FWRITE, NOCRED, 0); 196 if (error) { 197 vput(vp); 198 /* 199 * XXX This is bogus. We should just mark the 200 * XXX component as FAILED, and write-back new 201 * XXX config blocks. 202 */ 203 break; 204 } 205 206 VOP_UNLOCK(vp, 0); 207 sc->sc_vnodes[i] = vp; 208 } 209 if (i == aai->aai_ndisks) { 210 ld->sc_flags = LDF_ENABLED; 211 goto finish; 212 } 213 214 for (i = 0; i < aai->aai_ndisks; i++) { 215 vp = sc->sc_vnodes[i]; 216 sc->sc_vnodes[i] = NULL; 217 if (vp != NULL) 218 (void) vn_close(vp, FREAD|FWRITE, NOCRED, curproc); 219 } 220 221 finish: 222 ldattach(ld); 223 } 224 225 static struct cbuf * 226 ld_ataraid_make_cbuf(struct ld_ataraid_softc *sc, struct buf *bp, 227 u_int comp, daddr_t bn, caddr_t addr, long bcount) 228 { 229 struct cbuf *cbp; 230 231 cbp = CBUF_GET(); 232 if (cbp == NULL) 233 return (NULL); 234 BUF_INIT(&cbp->cb_buf); 235 cbp->cb_buf.b_flags = bp->b_flags | B_CALL; 236 cbp->cb_buf.b_iodone = sc->sc_iodone; 237 cbp->cb_buf.b_proc = bp->b_proc; 238 cbp->cb_buf.b_vp = sc->sc_vnodes[comp]; 239 cbp->cb_buf.b_dev = sc->sc_vnodes[comp]->v_rdev; 240 cbp->cb_buf.b_blkno = bn + sc->sc_aai->aai_offset; 241 cbp->cb_buf.b_data = addr; 242 cbp->cb_buf.b_bcount = bcount; 243 244 /* Context for iodone */ 245 cbp->cb_obp = bp; 246 cbp->cb_sc = sc; 247 cbp->cb_comp = comp; 248 249 return (cbp); 250 } 251 252 static int 253 ld_ataraid_start_span(struct ld_softc *ld, struct buf *bp) 254 { 255 struct ld_ataraid_softc *sc = (void *) ld; 256 struct ataraid_array_info *aai = sc->sc_aai; 257 struct ataraid_disk_info *adi; 258 SIMPLEQ_HEAD(, cbuf) cbufq; 259 struct cbuf *cbp; 260 caddr_t addr; 261 daddr_t bn; 262 long bcount, rcount; 263 u_int comp; 264 265 /* Allocate component buffers. */ 266 SIMPLEQ_INIT(&cbufq); 267 addr = bp->b_data; 268 269 /* Find the first component. */ 270 comp = 0; 271 adi = &aai->aai_disks[comp]; 272 bn = bp->b_rawblkno; 273 while (bn >= adi->adi_compsize) { 274 bn -= adi->adi_compsize; 275 adi = &aai->aai_disks[++comp]; 276 } 277 278 bp->b_resid = bp->b_bcount; 279 280 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 281 rcount = bp->b_bcount; 282 if ((adi->adi_compsize - bn) < btodb(rcount)) 283 rcount = dbtob(adi->adi_compsize - bn); 284 285 cbp = ld_ataraid_make_cbuf(sc, bp, comp, bn, addr, rcount); 286 if (cbp == NULL) { 287 /* Free the already allocated component buffers. */ 288 while ((cbp = SIMPLEQ_FIRST(&cbufq)) != NULL) { 289 SIMPLEQ_REMOVE_HEAD(&cbufq, cb_q); 290 CBUF_PUT(cbp); 291 } 292 return (EAGAIN); 293 } 294 295 /* 296 * For a span, we always know we advance to the next disk, 297 * and always start at offset 0 on that disk. 298 */ 299 adi = &aai->aai_disks[++comp]; 300 bn = 0; 301 302 SIMPLEQ_INSERT_TAIL(&cbufq, cbp, cb_q); 303 addr += rcount; 304 } 305 306 /* Now fire off the requests. */ 307 while ((cbp = SIMPLEQ_FIRST(&cbufq)) != NULL) { 308 SIMPLEQ_REMOVE_HEAD(&cbufq, cb_q); 309 if ((cbp->cb_buf.b_flags & B_READ) == 0) 310 cbp->cb_buf.b_vp->v_numoutput++; 311 VOP_STRATEGY(&cbp->cb_buf); 312 } 313 314 return (0); 315 } 316 317 static int 318 ld_ataraid_start_raid0(struct ld_softc *ld, struct buf *bp) 319 { 320 struct ld_ataraid_softc *sc = (void *) ld; 321 struct ataraid_array_info *aai = sc->sc_aai; 322 SIMPLEQ_HEAD(, cbuf) cbufq; 323 struct cbuf *cbp; 324 caddr_t addr; 325 daddr_t bn, cbn, tbn, off; 326 long bcount, rcount; 327 u_int comp; 328 329 /* Allocate component buffers. */ 330 SIMPLEQ_INIT(&cbufq); 331 addr = bp->b_data; 332 bn = bp->b_rawblkno; 333 334 bp->b_resid = bp->b_bcount; 335 336 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 337 tbn = bn / aai->aai_interleave; 338 off = bn % aai->aai_interleave; 339 340 if (__predict_false(tbn == aai->aai_capacity / 341 aai->aai_interleave)) { 342 /* Last stripe. */ 343 daddr_t sz = (aai->aai_capacity - 344 (tbn * aai->aai_interleave)) / 345 aai->aai_width; 346 comp = off / sz; 347 cbn = ((tbn / aai->aai_width) * aai->aai_interleave) + 348 (off % sz); 349 rcount = min(bcount, dbtob(sz)); 350 } else { 351 comp = tbn % aai->aai_width; 352 cbn = ((tbn / aai->aai_width) * aai->aai_interleave) + 353 off; 354 rcount = min(bcount, dbtob(aai->aai_interleave - off)); 355 } 356 357 cbp = ld_ataraid_make_cbuf(sc, bp, comp, cbn, addr, rcount); 358 if (cbp == NULL) { 359 /* Free the already allocated component buffers. */ 360 while ((cbp = SIMPLEQ_FIRST(&cbufq)) != NULL) { 361 SIMPLEQ_REMOVE_HEAD(&cbufq, cb_q); 362 CBUF_PUT(cbp); 363 } 364 return (EAGAIN); 365 } 366 SIMPLEQ_INSERT_TAIL(&cbufq, cbp, cb_q); 367 bn += btodb(rcount); 368 addr += rcount; 369 } 370 371 /* Now fire off the requests. */ 372 while ((cbp = SIMPLEQ_FIRST(&cbufq)) != NULL) { 373 SIMPLEQ_REMOVE_HEAD(&cbufq, cb_q); 374 if ((cbp->cb_buf.b_flags & B_READ) == 0) 375 cbp->cb_buf.b_vp->v_numoutput++; 376 VOP_STRATEGY(&cbp->cb_buf); 377 } 378 379 return (0); 380 } 381 382 /* 383 * Called at interrupt time. Mark the component as done and if all 384 * components are done, take an "interrupt". 385 */ 386 static void 387 ld_ataraid_iodone_raid0(struct buf *vbp) 388 { 389 struct cbuf *cbp = (struct cbuf *) vbp; 390 struct buf *bp = cbp->cb_obp; 391 struct ld_ataraid_softc *sc = cbp->cb_sc; 392 long count; 393 int s; 394 395 s = splbio(); 396 397 if (cbp->cb_buf.b_flags & B_ERROR) { 398 bp->b_flags |= B_ERROR; 399 bp->b_error = cbp->cb_buf.b_error ? 400 cbp->cb_buf.b_error : EIO; 401 402 /* XXX Update component config blocks. */ 403 404 printf("%s: error %d on component %d\n", 405 sc->sc_ld.sc_dv.dv_xname, bp->b_error, cbp->cb_comp); 406 } 407 count = cbp->cb_buf.b_bcount; 408 CBUF_PUT(cbp); 409 410 /* If all done, "interrupt". */ 411 bp->b_resid -= count; 412 if (bp->b_resid < 0) 413 panic("ld_ataraid_iodone_raid0: count"); 414 if (bp->b_resid == 0) 415 lddone(&sc->sc_ld, bp); 416 splx(s); 417 } 418 419 static int 420 ld_ataraid_dump(struct ld_softc *sc, void *data, int blkno, int blkcnt) 421 { 422 423 return (EIO); 424 } 425