xref: /openbsd-src/sys/dev/softraid_raid0.c (revision 43003dfe3ad45d1698bed8a37f2b0f5b14f20d4f)
1 /* $OpenBSD: softraid_raid0.c,v 1.16 2009/08/09 14:12:25 marco Exp $ */
2 /*
3  * Copyright (c) 2008 Marco Peereboom <marco@peereboom.us>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include "bio.h"
19 
20 #include <sys/param.h>
21 #include <sys/systm.h>
22 #include <sys/buf.h>
23 #include <sys/device.h>
24 #include <sys/ioctl.h>
25 #include <sys/proc.h>
26 #include <sys/malloc.h>
27 #include <sys/kernel.h>
28 #include <sys/disk.h>
29 #include <sys/rwlock.h>
30 #include <sys/queue.h>
31 #include <sys/fcntl.h>
32 #include <sys/disklabel.h>
33 #include <sys/mount.h>
34 #include <sys/sensors.h>
35 #include <sys/stat.h>
36 #include <sys/conf.h>
37 #include <sys/uio.h>
38 
39 #include <scsi/scsi_all.h>
40 #include <scsi/scsiconf.h>
41 #include <scsi/scsi_disk.h>
42 
43 #include <dev/softraidvar.h>
44 #include <dev/rndvar.h>
45 
46 /* RAID 0 functions. */
47 int	sr_raid0_alloc_resources(struct sr_discipline *);
48 int	sr_raid0_free_resources(struct sr_discipline *);
49 int	sr_raid0_rw(struct sr_workunit *);
50 void	sr_raid0_intr(struct buf *);
51 void	sr_raid0_set_chunk_state(struct sr_discipline *, int, int);
52 void	sr_raid0_set_vol_state(struct sr_discipline *);
53 
54 /* Discipline initialisation. */
55 void
56 sr_raid0_discipline_init(struct sr_discipline *sd)
57 {
58 
59 	/* Fill out discipline members. */
60 	sd->sd_type = SR_MD_RAID0;
61 	sd->sd_max_ccb_per_wu =
62 	    (MAXPHYS / sd->sd_meta->ssdi.ssd_strip_size + 1) *
63 	    SR_RAID0_NOWU * sd->sd_meta->ssdi.ssd_chunk_no;
64 	sd->sd_max_wu = SR_RAID0_NOWU;
65 
66 	/* Setup discipline pointers. */
67 	sd->sd_alloc_resources = sr_raid0_alloc_resources;
68 	sd->sd_free_resources = sr_raid0_free_resources;
69 	sd->sd_start_discipline = NULL;
70 	sd->sd_scsi_inquiry = sr_raid_inquiry;
71 	sd->sd_scsi_read_cap = sr_raid_read_cap;
72 	sd->sd_scsi_tur = sr_raid_tur;
73 	sd->sd_scsi_req_sense = sr_raid_request_sense;
74 	sd->sd_scsi_start_stop = sr_raid_start_stop;
75 	sd->sd_scsi_sync = sr_raid_sync;
76 	sd->sd_scsi_rw = sr_raid0_rw;
77 	sd->sd_set_chunk_state = sr_raid0_set_chunk_state;
78 	sd->sd_set_vol_state = sr_raid0_set_vol_state;
79 }
80 
81 int
82 sr_raid0_alloc_resources(struct sr_discipline *sd)
83 {
84 	int			rv = EINVAL;
85 
86 	if (!sd)
87 		return (rv);
88 
89 	DNPRINTF(SR_D_DIS, "%s: sr_raid0_alloc_resources\n",
90 	    DEVNAME(sd->sd_sc));
91 
92 	if (sr_wu_alloc(sd))
93 		goto bad;
94 	if (sr_ccb_alloc(sd))
95 		goto bad;
96 
97 	/* setup runtime values */
98 	sd->mds.mdd_raid0.sr0_strip_bits =
99 	    sr_validate_stripsize(sd->sd_meta->ssdi.ssd_strip_size);
100 	if (sd->mds.mdd_raid0.sr0_strip_bits == -1)
101 		goto bad;
102 
103 	rv = 0;
104 bad:
105 	return (rv);
106 }
107 
108 int
109 sr_raid0_free_resources(struct sr_discipline *sd)
110 {
111 	int			rv = EINVAL;
112 
113 	if (!sd)
114 		return (rv);
115 
116 	DNPRINTF(SR_D_DIS, "%s: sr_raid0_free_resources\n",
117 	    DEVNAME(sd->sd_sc));
118 
119 	sr_wu_free(sd);
120 	sr_ccb_free(sd);
121 
122 	rv = 0;
123 	return (rv);
124 }
125 
126 void
127 sr_raid0_set_chunk_state(struct sr_discipline *sd, int c, int new_state)
128 {
129 	int			old_state, s;
130 
131 	DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n",
132 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
133 	    sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state);
134 
135 	/* ok to go to splbio since this only happens in error path */
136 	s = splbio();
137 	old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status;
138 
139 	/* multiple IOs to the same chunk that fail will come through here */
140 	if (old_state == new_state)
141 		goto done;
142 
143 	switch (old_state) {
144 	case BIOC_SDONLINE:
145 		if (new_state == BIOC_SDOFFLINE)
146 			break;
147 		else
148 			goto die;
149 		break;
150 
151 	case BIOC_SDOFFLINE:
152 		goto die;
153 
154 	default:
155 die:
156 		splx(s); /* XXX */
157 		panic("%s: %s: %s: invalid chunk state transition "
158 		    "%d -> %d\n", DEVNAME(sd->sd_sc),
159 		    sd->sd_meta->ssd_devname,
160 		    sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname,
161 		    old_state, new_state);
162 		/* NOTREACHED */
163 	}
164 
165 	sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state;
166 	sd->sd_set_vol_state(sd);
167 
168 	sd->sd_must_flush = 1;
169 	workq_add_task(NULL, 0, sr_meta_save_callback, sd, NULL);
170 done:
171 	splx(s);
172 }
173 
174 void
175 sr_raid0_set_vol_state(struct sr_discipline *sd)
176 {
177 	int			states[SR_MAX_STATES];
178 	int			new_state, i, s, nd;
179 	int			old_state = sd->sd_vol_status;
180 
181 	DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n",
182 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname);
183 
184 	nd = sd->sd_meta->ssdi.ssd_chunk_no;
185 
186 	for (i = 0; i < SR_MAX_STATES; i++)
187 		states[i] = 0;
188 
189 	for (i = 0; i < nd; i++) {
190 		s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status;
191 		if (s >= SR_MAX_STATES)
192 			panic("%s: %s: %s: invalid chunk state",
193 			    DEVNAME(sd->sd_sc),
194 			    sd->sd_meta->ssd_devname,
195 			    sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname);
196 		states[s]++;
197 	}
198 
199 	if (states[BIOC_SDONLINE] == nd)
200 		new_state = BIOC_SVONLINE;
201 	else
202 		new_state = BIOC_SVOFFLINE;
203 
204 	DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state %d -> %d\n",
205 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
206 	    old_state, new_state);
207 
208 	switch (old_state) {
209 	case BIOC_SVONLINE:
210 		if (new_state == BIOC_SVOFFLINE || new_state == BIOC_SVONLINE)
211 			break;
212 		else
213 			goto die;
214 		break;
215 
216 	case BIOC_SVOFFLINE:
217 		/* XXX this might be a little too much */
218 		goto die;
219 
220 	default:
221 die:
222 		panic("%s: %s: invalid volume state transition "
223 		    "%d -> %d\n", DEVNAME(sd->sd_sc),
224 		    sd->sd_meta->ssd_devname,
225 		    old_state, new_state);
226 		/* NOTREACHED */
227 	}
228 
229 	sd->sd_vol_status = new_state;
230 }
231 
232 int
233 sr_raid0_rw(struct sr_workunit *wu)
234 {
235 	struct sr_discipline	*sd = wu->swu_dis;
236 	struct scsi_xfer	*xs = wu->swu_xs;
237 	struct sr_ccb		*ccb;
238 	struct sr_chunk		*scp;
239 	int			s;
240 	daddr64_t		blk, lbaoffs, strip_no, chunk, stripoffs;
241 	daddr64_t		strip_size, no_chunk, chunkoffs, physoffs;
242 	daddr64_t		strip_bits, length, leftover;
243 	u_int8_t		*data;
244 
245 	/* blk and scsi error will be handled by sr_validate_io */
246 	if (sr_validate_io(wu, &blk, "sr_raid0_rw"))
247 		goto bad;
248 
249 	strip_size = sd->sd_meta->ssdi.ssd_strip_size;
250 	strip_bits = sd->mds.mdd_raid0.sr0_strip_bits;
251 	no_chunk = sd->sd_meta->ssdi.ssd_chunk_no;
252 
253 	DNPRINTF(SR_D_DIS, "%s: %s: front end io: lba %lld size %d\n",
254 	    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
255 	    blk, xs->datalen);
256 
257 	/* all offs are in bytes */
258 	lbaoffs = blk << DEV_BSHIFT;
259 	strip_no = lbaoffs >> strip_bits;
260 	chunk = strip_no % no_chunk;
261 	stripoffs = lbaoffs & (strip_size - 1);
262 	chunkoffs = (strip_no / no_chunk) << strip_bits;
263 	physoffs = chunkoffs + stripoffs +
264 	    ((SR_META_OFFSET + SR_META_SIZE) << DEV_BSHIFT);
265 	length = MIN(xs->datalen, strip_size - stripoffs);
266 	leftover = xs->datalen;
267 	data = xs->data;
268 	for (wu->swu_io_count = 1;; wu->swu_io_count++) {
269 		/* make sure chunk is online */
270 		scp = sd->sd_vol.sv_chunks[chunk];
271 		if (scp->src_meta.scm_status != BIOC_SDONLINE) {
272 			goto bad;
273 		}
274 
275 		ccb = sr_ccb_get(sd);
276 		if (!ccb) {
277 			/* should never happen but handle more gracefully */
278 			printf("%s: %s: too many ccbs queued\n",
279 			    DEVNAME(sd->sd_sc),
280 			    sd->sd_meta->ssd_devname);
281 			goto bad;
282 		}
283 
284 		DNPRINTF(SR_D_DIS, "%s: %s raid io: lbaoffs: %lld "
285 		    "strip_no: %lld chunk: %lld stripoffs: %lld "
286 		    "chunkoffs: %lld physoffs: %lld length: %lld "
287 		    "leftover: %lld data: %p\n",
288 		    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, lbaoffs,
289 		    strip_no, chunk, stripoffs, chunkoffs, physoffs, length,
290 		    leftover, data);
291 
292 		ccb->ccb_buf.b_flags = B_CALL | B_PHYS;
293 		ccb->ccb_buf.b_iodone = sr_raid0_intr;
294 		ccb->ccb_buf.b_blkno = physoffs >> DEV_BSHIFT;
295 		ccb->ccb_buf.b_bcount = length;
296 		ccb->ccb_buf.b_bufsize = length;
297 		ccb->ccb_buf.b_resid = length;
298 		ccb->ccb_buf.b_data = data;
299 		ccb->ccb_buf.b_error = 0;
300 		ccb->ccb_buf.b_proc = curproc;
301 		ccb->ccb_wu = wu;
302 		ccb->ccb_buf.b_flags |= xs->flags & SCSI_DATA_IN ?
303 		    B_READ : B_WRITE;
304 		ccb->ccb_target = chunk;
305 		ccb->ccb_buf.b_dev = sd->sd_vol.sv_chunks[chunk]->src_dev_mm;
306 		ccb->ccb_buf.b_vp = sd->sd_vol.sv_chunks[chunk]->src_vn;
307 		if ((ccb->ccb_buf.b_flags & B_READ) == 0)
308 			ccb->ccb_buf.b_vp->v_numoutput++;
309 		LIST_INIT(&ccb->ccb_buf.b_dep);
310 		TAILQ_INSERT_TAIL(&wu->swu_ccb, ccb, ccb_link);
311 
312 		DNPRINTF(SR_D_DIS, "%s: %s: sr_raid0: b_bcount: %d "
313 		    "b_blkno: %lld b_flags 0x%0x b_data %p\n",
314 		    DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
315 		    ccb->ccb_buf.b_bcount, ccb->ccb_buf.b_blkno,
316 		    ccb->ccb_buf.b_flags, ccb->ccb_buf.b_data);
317 
318 		leftover -= length;
319 		if (leftover == 0)
320 			break;
321 
322 		data += length;
323 		if (++chunk > no_chunk - 1) {
324 			chunk = 0;
325 			physoffs += length;
326 		} else if (wu->swu_io_count == 1)
327 			physoffs -= stripoffs;
328 		length = MIN(leftover,strip_size);
329 	}
330 
331 	s = splbio();
332 
333 	if (sr_check_io_collision(wu))
334 		goto queued;
335 
336 	sr_raid_startwu(wu);
337 queued:
338 	splx(s);
339 	return (0);
340 bad:
341 	/* wu is unwound by sr_wu_put */
342 	return (1);
343 }
344 
345 void
346 sr_raid0_intr(struct buf *bp)
347 {
348 	struct sr_ccb		*ccb = (struct sr_ccb *)bp;
349 	struct sr_workunit	*wu = ccb->ccb_wu, *wup;
350 	struct sr_discipline	*sd = wu->swu_dis;
351 	struct scsi_xfer	*xs = wu->swu_xs;
352 	struct sr_softc		*sc = sd->sd_sc;
353 	int			s, pend;
354 
355 	DNPRINTF(SR_D_INTR, "%s: sr_intr bp %x xs %x\n",
356 	    DEVNAME(sc), bp, xs);
357 
358 	DNPRINTF(SR_D_INTR, "%s: sr_intr: b_bcount: %d b_resid: %d"
359 	    " b_flags: 0x%0x block: %lld target: %d\n", DEVNAME(sc),
360 	    ccb->ccb_buf.b_bcount, ccb->ccb_buf.b_resid, ccb->ccb_buf.b_flags,
361 	    ccb->ccb_buf.b_blkno, ccb->ccb_target);
362 
363 	s = splbio();
364 
365 	if (ccb->ccb_buf.b_flags & B_ERROR) {
366 		printf("%s: i/o error on block %lld target: %d b_error: %d\n",
367 		    DEVNAME(sc), ccb->ccb_buf.b_blkno, ccb->ccb_target,
368 		    ccb->ccb_buf.b_error);
369 		DNPRINTF(SR_D_INTR, "%s: i/o error on block %lld target: %d\n",
370 		    DEVNAME(sc), ccb->ccb_buf.b_blkno, ccb->ccb_target);
371 		wu->swu_ios_failed++;
372 		ccb->ccb_state = SR_CCB_FAILED;
373 		if (ccb->ccb_target != -1)
374 			sd->sd_set_chunk_state(sd, ccb->ccb_target,
375 			    BIOC_SDOFFLINE);
376 		else
377 			panic("%s: invalid target on wu: %p", DEVNAME(sc), wu);
378 	} else {
379 		ccb->ccb_state = SR_CCB_OK;
380 		wu->swu_ios_succeeded++;
381 	}
382 	wu->swu_ios_complete++;
383 
384 	DNPRINTF(SR_D_INTR, "%s: sr_intr: comp: %d count: %d failed: %d\n",
385 	    DEVNAME(sc), wu->swu_ios_complete, wu->swu_io_count,
386 	    wu->swu_ios_failed);
387 
388 	if (wu->swu_ios_complete >= wu->swu_io_count) {
389 		if (wu->swu_ios_failed)
390 			goto bad;
391 
392 		xs->error = XS_NOERROR;
393 		xs->resid = 0;
394 		xs->flags |= ITSDONE;
395 
396 		pend = 0;
397 		TAILQ_FOREACH(wup, &sd->sd_wu_pendq, swu_link) {
398 			if (wu == wup) {
399 				/* wu on pendq, remove */
400 				TAILQ_REMOVE(&sd->sd_wu_pendq, wu, swu_link);
401 				pend = 1;
402 
403 				if (wu->swu_collider) {
404 					/* restart deferred wu */
405 					wu->swu_collider->swu_state =
406 					    SR_WU_INPROGRESS;
407 					TAILQ_REMOVE(&sd->sd_wu_defq,
408 					    wu->swu_collider, swu_link);
409 					sr_raid_startwu(wu->swu_collider);
410 				}
411 				break;
412 			}
413 		}
414 
415 		if (!pend)
416 			printf("%s: wu: %p not on pending queue\n",
417 			    DEVNAME(sc), wu);
418 
419 		/* do not change the order of these 2 functions */
420 		sr_wu_put(wu);
421 		sr_scsi_done(sd, xs);
422 
423 		if (sd->sd_sync && sd->sd_wu_pending == 0)
424 			wakeup(sd);
425 	}
426 
427 	splx(s);
428 	return;
429 bad:
430 	xs->error = XS_DRIVER_STUFFUP;
431 	xs->flags |= ITSDONE;
432 	sr_wu_put(wu);
433 	sr_scsi_done(sd, xs);
434 	splx(s);
435 }
436