1 /* $OpenBSD: softraid_raid0.c,v 1.16 2009/08/09 14:12:25 marco Exp $ */ 2 /* 3 * Copyright (c) 2008 Marco Peereboom <marco@peereboom.us> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18 #include "bio.h" 19 20 #include <sys/param.h> 21 #include <sys/systm.h> 22 #include <sys/buf.h> 23 #include <sys/device.h> 24 #include <sys/ioctl.h> 25 #include <sys/proc.h> 26 #include <sys/malloc.h> 27 #include <sys/kernel.h> 28 #include <sys/disk.h> 29 #include <sys/rwlock.h> 30 #include <sys/queue.h> 31 #include <sys/fcntl.h> 32 #include <sys/disklabel.h> 33 #include <sys/mount.h> 34 #include <sys/sensors.h> 35 #include <sys/stat.h> 36 #include <sys/conf.h> 37 #include <sys/uio.h> 38 39 #include <scsi/scsi_all.h> 40 #include <scsi/scsiconf.h> 41 #include <scsi/scsi_disk.h> 42 43 #include <dev/softraidvar.h> 44 #include <dev/rndvar.h> 45 46 /* RAID 0 functions. */ 47 int sr_raid0_alloc_resources(struct sr_discipline *); 48 int sr_raid0_free_resources(struct sr_discipline *); 49 int sr_raid0_rw(struct sr_workunit *); 50 void sr_raid0_intr(struct buf *); 51 void sr_raid0_set_chunk_state(struct sr_discipline *, int, int); 52 void sr_raid0_set_vol_state(struct sr_discipline *); 53 54 /* Discipline initialisation. */ 55 void 56 sr_raid0_discipline_init(struct sr_discipline *sd) 57 { 58 59 /* Fill out discipline members. */ 60 sd->sd_type = SR_MD_RAID0; 61 sd->sd_max_ccb_per_wu = 62 (MAXPHYS / sd->sd_meta->ssdi.ssd_strip_size + 1) * 63 SR_RAID0_NOWU * sd->sd_meta->ssdi.ssd_chunk_no; 64 sd->sd_max_wu = SR_RAID0_NOWU; 65 66 /* Setup discipline pointers. */ 67 sd->sd_alloc_resources = sr_raid0_alloc_resources; 68 sd->sd_free_resources = sr_raid0_free_resources; 69 sd->sd_start_discipline = NULL; 70 sd->sd_scsi_inquiry = sr_raid_inquiry; 71 sd->sd_scsi_read_cap = sr_raid_read_cap; 72 sd->sd_scsi_tur = sr_raid_tur; 73 sd->sd_scsi_req_sense = sr_raid_request_sense; 74 sd->sd_scsi_start_stop = sr_raid_start_stop; 75 sd->sd_scsi_sync = sr_raid_sync; 76 sd->sd_scsi_rw = sr_raid0_rw; 77 sd->sd_set_chunk_state = sr_raid0_set_chunk_state; 78 sd->sd_set_vol_state = sr_raid0_set_vol_state; 79 } 80 81 int 82 sr_raid0_alloc_resources(struct sr_discipline *sd) 83 { 84 int rv = EINVAL; 85 86 if (!sd) 87 return (rv); 88 89 DNPRINTF(SR_D_DIS, "%s: sr_raid0_alloc_resources\n", 90 DEVNAME(sd->sd_sc)); 91 92 if (sr_wu_alloc(sd)) 93 goto bad; 94 if (sr_ccb_alloc(sd)) 95 goto bad; 96 97 /* setup runtime values */ 98 sd->mds.mdd_raid0.sr0_strip_bits = 99 sr_validate_stripsize(sd->sd_meta->ssdi.ssd_strip_size); 100 if (sd->mds.mdd_raid0.sr0_strip_bits == -1) 101 goto bad; 102 103 rv = 0; 104 bad: 105 return (rv); 106 } 107 108 int 109 sr_raid0_free_resources(struct sr_discipline *sd) 110 { 111 int rv = EINVAL; 112 113 if (!sd) 114 return (rv); 115 116 DNPRINTF(SR_D_DIS, "%s: sr_raid0_free_resources\n", 117 DEVNAME(sd->sd_sc)); 118 119 sr_wu_free(sd); 120 sr_ccb_free(sd); 121 122 rv = 0; 123 return (rv); 124 } 125 126 void 127 sr_raid0_set_chunk_state(struct sr_discipline *sd, int c, int new_state) 128 { 129 int old_state, s; 130 131 DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n", 132 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 133 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state); 134 135 /* ok to go to splbio since this only happens in error path */ 136 s = splbio(); 137 old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status; 138 139 /* multiple IOs to the same chunk that fail will come through here */ 140 if (old_state == new_state) 141 goto done; 142 143 switch (old_state) { 144 case BIOC_SDONLINE: 145 if (new_state == BIOC_SDOFFLINE) 146 break; 147 else 148 goto die; 149 break; 150 151 case BIOC_SDOFFLINE: 152 goto die; 153 154 default: 155 die: 156 splx(s); /* XXX */ 157 panic("%s: %s: %s: invalid chunk state transition " 158 "%d -> %d\n", DEVNAME(sd->sd_sc), 159 sd->sd_meta->ssd_devname, 160 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, 161 old_state, new_state); 162 /* NOTREACHED */ 163 } 164 165 sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state; 166 sd->sd_set_vol_state(sd); 167 168 sd->sd_must_flush = 1; 169 workq_add_task(NULL, 0, sr_meta_save_callback, sd, NULL); 170 done: 171 splx(s); 172 } 173 174 void 175 sr_raid0_set_vol_state(struct sr_discipline *sd) 176 { 177 int states[SR_MAX_STATES]; 178 int new_state, i, s, nd; 179 int old_state = sd->sd_vol_status; 180 181 DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n", 182 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname); 183 184 nd = sd->sd_meta->ssdi.ssd_chunk_no; 185 186 for (i = 0; i < SR_MAX_STATES; i++) 187 states[i] = 0; 188 189 for (i = 0; i < nd; i++) { 190 s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status; 191 if (s >= SR_MAX_STATES) 192 panic("%s: %s: %s: invalid chunk state", 193 DEVNAME(sd->sd_sc), 194 sd->sd_meta->ssd_devname, 195 sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname); 196 states[s]++; 197 } 198 199 if (states[BIOC_SDONLINE] == nd) 200 new_state = BIOC_SVONLINE; 201 else 202 new_state = BIOC_SVOFFLINE; 203 204 DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state %d -> %d\n", 205 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 206 old_state, new_state); 207 208 switch (old_state) { 209 case BIOC_SVONLINE: 210 if (new_state == BIOC_SVOFFLINE || new_state == BIOC_SVONLINE) 211 break; 212 else 213 goto die; 214 break; 215 216 case BIOC_SVOFFLINE: 217 /* XXX this might be a little too much */ 218 goto die; 219 220 default: 221 die: 222 panic("%s: %s: invalid volume state transition " 223 "%d -> %d\n", DEVNAME(sd->sd_sc), 224 sd->sd_meta->ssd_devname, 225 old_state, new_state); 226 /* NOTREACHED */ 227 } 228 229 sd->sd_vol_status = new_state; 230 } 231 232 int 233 sr_raid0_rw(struct sr_workunit *wu) 234 { 235 struct sr_discipline *sd = wu->swu_dis; 236 struct scsi_xfer *xs = wu->swu_xs; 237 struct sr_ccb *ccb; 238 struct sr_chunk *scp; 239 int s; 240 daddr64_t blk, lbaoffs, strip_no, chunk, stripoffs; 241 daddr64_t strip_size, no_chunk, chunkoffs, physoffs; 242 daddr64_t strip_bits, length, leftover; 243 u_int8_t *data; 244 245 /* blk and scsi error will be handled by sr_validate_io */ 246 if (sr_validate_io(wu, &blk, "sr_raid0_rw")) 247 goto bad; 248 249 strip_size = sd->sd_meta->ssdi.ssd_strip_size; 250 strip_bits = sd->mds.mdd_raid0.sr0_strip_bits; 251 no_chunk = sd->sd_meta->ssdi.ssd_chunk_no; 252 253 DNPRINTF(SR_D_DIS, "%s: %s: front end io: lba %lld size %d\n", 254 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 255 blk, xs->datalen); 256 257 /* all offs are in bytes */ 258 lbaoffs = blk << DEV_BSHIFT; 259 strip_no = lbaoffs >> strip_bits; 260 chunk = strip_no % no_chunk; 261 stripoffs = lbaoffs & (strip_size - 1); 262 chunkoffs = (strip_no / no_chunk) << strip_bits; 263 physoffs = chunkoffs + stripoffs + 264 ((SR_META_OFFSET + SR_META_SIZE) << DEV_BSHIFT); 265 length = MIN(xs->datalen, strip_size - stripoffs); 266 leftover = xs->datalen; 267 data = xs->data; 268 for (wu->swu_io_count = 1;; wu->swu_io_count++) { 269 /* make sure chunk is online */ 270 scp = sd->sd_vol.sv_chunks[chunk]; 271 if (scp->src_meta.scm_status != BIOC_SDONLINE) { 272 goto bad; 273 } 274 275 ccb = sr_ccb_get(sd); 276 if (!ccb) { 277 /* should never happen but handle more gracefully */ 278 printf("%s: %s: too many ccbs queued\n", 279 DEVNAME(sd->sd_sc), 280 sd->sd_meta->ssd_devname); 281 goto bad; 282 } 283 284 DNPRINTF(SR_D_DIS, "%s: %s raid io: lbaoffs: %lld " 285 "strip_no: %lld chunk: %lld stripoffs: %lld " 286 "chunkoffs: %lld physoffs: %lld length: %lld " 287 "leftover: %lld data: %p\n", 288 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, lbaoffs, 289 strip_no, chunk, stripoffs, chunkoffs, physoffs, length, 290 leftover, data); 291 292 ccb->ccb_buf.b_flags = B_CALL | B_PHYS; 293 ccb->ccb_buf.b_iodone = sr_raid0_intr; 294 ccb->ccb_buf.b_blkno = physoffs >> DEV_BSHIFT; 295 ccb->ccb_buf.b_bcount = length; 296 ccb->ccb_buf.b_bufsize = length; 297 ccb->ccb_buf.b_resid = length; 298 ccb->ccb_buf.b_data = data; 299 ccb->ccb_buf.b_error = 0; 300 ccb->ccb_buf.b_proc = curproc; 301 ccb->ccb_wu = wu; 302 ccb->ccb_buf.b_flags |= xs->flags & SCSI_DATA_IN ? 303 B_READ : B_WRITE; 304 ccb->ccb_target = chunk; 305 ccb->ccb_buf.b_dev = sd->sd_vol.sv_chunks[chunk]->src_dev_mm; 306 ccb->ccb_buf.b_vp = sd->sd_vol.sv_chunks[chunk]->src_vn; 307 if ((ccb->ccb_buf.b_flags & B_READ) == 0) 308 ccb->ccb_buf.b_vp->v_numoutput++; 309 LIST_INIT(&ccb->ccb_buf.b_dep); 310 TAILQ_INSERT_TAIL(&wu->swu_ccb, ccb, ccb_link); 311 312 DNPRINTF(SR_D_DIS, "%s: %s: sr_raid0: b_bcount: %d " 313 "b_blkno: %lld b_flags 0x%0x b_data %p\n", 314 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, 315 ccb->ccb_buf.b_bcount, ccb->ccb_buf.b_blkno, 316 ccb->ccb_buf.b_flags, ccb->ccb_buf.b_data); 317 318 leftover -= length; 319 if (leftover == 0) 320 break; 321 322 data += length; 323 if (++chunk > no_chunk - 1) { 324 chunk = 0; 325 physoffs += length; 326 } else if (wu->swu_io_count == 1) 327 physoffs -= stripoffs; 328 length = MIN(leftover,strip_size); 329 } 330 331 s = splbio(); 332 333 if (sr_check_io_collision(wu)) 334 goto queued; 335 336 sr_raid_startwu(wu); 337 queued: 338 splx(s); 339 return (0); 340 bad: 341 /* wu is unwound by sr_wu_put */ 342 return (1); 343 } 344 345 void 346 sr_raid0_intr(struct buf *bp) 347 { 348 struct sr_ccb *ccb = (struct sr_ccb *)bp; 349 struct sr_workunit *wu = ccb->ccb_wu, *wup; 350 struct sr_discipline *sd = wu->swu_dis; 351 struct scsi_xfer *xs = wu->swu_xs; 352 struct sr_softc *sc = sd->sd_sc; 353 int s, pend; 354 355 DNPRINTF(SR_D_INTR, "%s: sr_intr bp %x xs %x\n", 356 DEVNAME(sc), bp, xs); 357 358 DNPRINTF(SR_D_INTR, "%s: sr_intr: b_bcount: %d b_resid: %d" 359 " b_flags: 0x%0x block: %lld target: %d\n", DEVNAME(sc), 360 ccb->ccb_buf.b_bcount, ccb->ccb_buf.b_resid, ccb->ccb_buf.b_flags, 361 ccb->ccb_buf.b_blkno, ccb->ccb_target); 362 363 s = splbio(); 364 365 if (ccb->ccb_buf.b_flags & B_ERROR) { 366 printf("%s: i/o error on block %lld target: %d b_error: %d\n", 367 DEVNAME(sc), ccb->ccb_buf.b_blkno, ccb->ccb_target, 368 ccb->ccb_buf.b_error); 369 DNPRINTF(SR_D_INTR, "%s: i/o error on block %lld target: %d\n", 370 DEVNAME(sc), ccb->ccb_buf.b_blkno, ccb->ccb_target); 371 wu->swu_ios_failed++; 372 ccb->ccb_state = SR_CCB_FAILED; 373 if (ccb->ccb_target != -1) 374 sd->sd_set_chunk_state(sd, ccb->ccb_target, 375 BIOC_SDOFFLINE); 376 else 377 panic("%s: invalid target on wu: %p", DEVNAME(sc), wu); 378 } else { 379 ccb->ccb_state = SR_CCB_OK; 380 wu->swu_ios_succeeded++; 381 } 382 wu->swu_ios_complete++; 383 384 DNPRINTF(SR_D_INTR, "%s: sr_intr: comp: %d count: %d failed: %d\n", 385 DEVNAME(sc), wu->swu_ios_complete, wu->swu_io_count, 386 wu->swu_ios_failed); 387 388 if (wu->swu_ios_complete >= wu->swu_io_count) { 389 if (wu->swu_ios_failed) 390 goto bad; 391 392 xs->error = XS_NOERROR; 393 xs->resid = 0; 394 xs->flags |= ITSDONE; 395 396 pend = 0; 397 TAILQ_FOREACH(wup, &sd->sd_wu_pendq, swu_link) { 398 if (wu == wup) { 399 /* wu on pendq, remove */ 400 TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link); 401 pend = 1; 402 403 if (wu->swu_collider) { 404 /* restart deferred wu */ 405 wu->swu_collider->swu_state = 406 SR_WU_INPROGRESS; 407 TAILQ_REMOVE(&sd->sd_wu_defq, 408 wu->swu_collider, swu_link); 409 sr_raid_startwu(wu->swu_collider); 410 } 411 break; 412 } 413 } 414 415 if (!pend) 416 printf("%s: wu: %p not on pending queue\n", 417 DEVNAME(sc), wu); 418 419 /* do not change the order of these 2 functions */ 420 sr_wu_put(wu); 421 sr_scsi_done(sd, xs); 422 423 if (sd->sd_sync && sd->sd_wu_pending == 0) 424 wakeup(sd); 425 } 426 427 splx(s); 428 return; 429 bad: 430 xs->error = XS_DRIVER_STUFFUP; 431 xs->flags |= ITSDONE; 432 sr_wu_put(wu); 433 sr_scsi_done(sd, xs); 434 splx(s); 435 } 436