xref: /spdk/module/bdev/raid/raid0.c (revision 4036f95bf8ee340375616c31dd71fcab215e02cb)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "bdev_raid.h"
35 
36 #include "spdk/env.h"
37 #include "spdk/io_channel.h"
38 #include "spdk/string.h"
39 #include "spdk/util.h"
40 
41 #include "spdk_internal/log.h"
42 
43 /*
44  * brief:
45  * raid0_bdev_io_completion function is called by lower layers to notify raid
46  * module that particular bdev_io is completed.
47  * params:
48  * bdev_io - pointer to bdev io submitted to lower layers, like child io
49  * success - bdev_io status
50  * cb_arg - function callback context, like parent io pointer
51  * returns:
52  * none
53  */
54 static void
55 raid0_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
56 {
57 	struct spdk_bdev_io         *parent_io = cb_arg;
58 
59 	spdk_bdev_free_io(bdev_io);
60 
61 	if (success) {
62 		spdk_bdev_io_complete(parent_io, SPDK_BDEV_IO_STATUS_SUCCESS);
63 	} else {
64 		spdk_bdev_io_complete(parent_io, SPDK_BDEV_IO_STATUS_FAILED);
65 	}
66 }
67 
68 static void
69 raid0_waitq_io_process(void *ctx);
70 
71 /*
72  * brief:
73  * raid0_submit_rw_request function is used to submit I/O to the correct
74  * member disk for raid0 bdevs.
75  * params:
76  * raid_io
77  * returns:
78  * none
79  */
80 void
81 raid0_submit_rw_request(struct raid_bdev_io *raid_io)
82 {
83 	struct spdk_bdev_io		*bdev_io = spdk_bdev_io_from_ctx(raid_io);
84 	struct raid_bdev_io_channel	*raid_ch = raid_io->raid_ch;
85 	struct raid_bdev		*raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt;
86 	uint64_t			pd_strip;
87 	uint32_t			offset_in_strip;
88 	uint64_t			pd_lba;
89 	uint64_t			pd_blocks;
90 	uint8_t				pd_idx;
91 	int				ret = 0;
92 	uint64_t			start_strip;
93 	uint64_t			end_strip;
94 
95 	start_strip = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift;
96 	end_strip = (bdev_io->u.bdev.offset_blocks + bdev_io->u.bdev.num_blocks - 1) >>
97 		    raid_bdev->strip_size_shift;
98 	if (start_strip != end_strip && raid_bdev->num_base_bdevs > 1) {
99 		assert(false);
100 		SPDK_ERRLOG("I/O spans strip boundary!\n");
101 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
102 		return;
103 	}
104 
105 	pd_strip = start_strip / raid_bdev->num_base_bdevs;
106 	pd_idx = start_strip % raid_bdev->num_base_bdevs;
107 	offset_in_strip = bdev_io->u.bdev.offset_blocks & (raid_bdev->strip_size - 1);
108 	pd_lba = (pd_strip << raid_bdev->strip_size_shift) + offset_in_strip;
109 	pd_blocks = bdev_io->u.bdev.num_blocks;
110 	if (raid_bdev->base_bdev_info[pd_idx].desc == NULL) {
111 		SPDK_ERRLOG("base bdev desc null for pd_idx %u\n", pd_idx);
112 		assert(0);
113 	}
114 
115 	/*
116 	 * Submit child io to bdev layer with using base bdev descriptors, base
117 	 * bdev lba, base bdev child io length in blocks, buffer, completion
118 	 * function and function callback context
119 	 */
120 	assert(raid_ch != NULL);
121 	assert(raid_ch->base_channel);
122 	if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
123 		ret = spdk_bdev_readv_blocks(raid_bdev->base_bdev_info[pd_idx].desc,
124 					     raid_ch->base_channel[pd_idx],
125 					     bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
126 					     pd_lba, pd_blocks, raid0_bdev_io_completion,
127 					     bdev_io);
128 	} else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
129 		ret = spdk_bdev_writev_blocks(raid_bdev->base_bdev_info[pd_idx].desc,
130 					      raid_ch->base_channel[pd_idx],
131 					      bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
132 					      pd_lba, pd_blocks, raid0_bdev_io_completion,
133 					      bdev_io);
134 	} else {
135 		SPDK_ERRLOG("Recvd not supported io type %u\n", bdev_io->type);
136 		assert(0);
137 	}
138 
139 	if (ret) {
140 		raid_bdev_queue_io_wait(bdev_io, pd_idx, raid0_waitq_io_process, ret);
141 	}
142 }
143 
144 /*
145  * brief:
146  * raid0_waitq_io_process function is the callback function
147  * registered by raid bdev module to bdev when bdev_io was unavailable
148  * for raid0 bdevs.
149  * params:
150  * ctx - pointer to raid_bdev_io
151  * returns:
152  * none
153  */
154 static void
155 raid0_waitq_io_process(void *ctx)
156 {
157 	struct spdk_bdev_io     *bdev_io = ctx;
158 	struct raid_bdev_io	*raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx;
159 
160 	raid0_submit_rw_request(raid_io);
161 }
162 
163 /* raid0 IO range */
164 struct raid_bdev_io_range {
165 	uint64_t	strip_size;
166 	uint64_t	start_strip_in_disk;
167 	uint64_t	end_strip_in_disk;
168 	uint64_t	start_offset_in_strip;
169 	uint64_t	end_offset_in_strip;
170 	uint8_t		start_disk;
171 	uint8_t		end_disk;
172 	uint8_t		n_disks_involved;
173 };
174 
175 static inline void
176 _raid0_get_io_range(struct raid_bdev_io_range *io_range,
177 		    uint8_t num_base_bdevs, uint64_t strip_size, uint64_t strip_size_shift,
178 		    uint64_t offset_blocks, uint64_t num_blocks)
179 {
180 	uint64_t	start_strip;
181 	uint64_t	end_strip;
182 
183 	io_range->strip_size = strip_size;
184 
185 	/* The start and end strip index in raid0 bdev scope */
186 	start_strip = offset_blocks >> strip_size_shift;
187 	end_strip = (offset_blocks + num_blocks - 1) >> strip_size_shift;
188 	io_range->start_strip_in_disk = start_strip / num_base_bdevs;
189 	io_range->end_strip_in_disk = end_strip / num_base_bdevs;
190 
191 	/* The first strip may have unaligned start LBA offset.
192 	 * The end strip may have unaligned end LBA offset.
193 	 * Strips between them certainly have aligned offset and length to boundaries.
194 	 */
195 	io_range->start_offset_in_strip = offset_blocks % strip_size;
196 	io_range->end_offset_in_strip = (offset_blocks + num_blocks - 1) % strip_size;
197 
198 	/* The base bdev indexes in which start and end strips are located */
199 	io_range->start_disk = start_strip % num_base_bdevs;
200 	io_range->end_disk = end_strip % num_base_bdevs;
201 
202 	/* Calculate how many base_bdevs are involved in io operation.
203 	 * Number of base bdevs involved is between 1 and num_base_bdevs.
204 	 * It will be 1 if the first strip and last strip are the same one.
205 	 */
206 	io_range->n_disks_involved = spdk_min((end_strip - start_strip + 1), num_base_bdevs);
207 }
208 
209 static inline void
210 _raid0_split_io_range(struct raid_bdev_io_range *io_range, uint8_t disk_idx,
211 		      uint64_t *_offset_in_disk, uint64_t *_nblocks_in_disk)
212 {
213 	uint64_t n_strips_in_disk;
214 	uint64_t start_offset_in_disk;
215 	uint64_t end_offset_in_disk;
216 	uint64_t offset_in_disk;
217 	uint64_t nblocks_in_disk;
218 	uint64_t start_strip_in_disk;
219 	uint64_t end_strip_in_disk;
220 
221 	start_strip_in_disk = io_range->start_strip_in_disk;
222 	if (disk_idx < io_range->start_disk) {
223 		start_strip_in_disk += 1;
224 	}
225 
226 	end_strip_in_disk = io_range->end_strip_in_disk;
227 	if (disk_idx > io_range->end_disk) {
228 		end_strip_in_disk -= 1;
229 	}
230 
231 	assert(end_strip_in_disk >= start_strip_in_disk);
232 	n_strips_in_disk = end_strip_in_disk - start_strip_in_disk + 1;
233 
234 	if (disk_idx == io_range->start_disk) {
235 		start_offset_in_disk = io_range->start_offset_in_strip;
236 	} else {
237 		start_offset_in_disk = 0;
238 	}
239 
240 	if (disk_idx == io_range->end_disk) {
241 		end_offset_in_disk = io_range->end_offset_in_strip;
242 	} else {
243 		end_offset_in_disk = io_range->strip_size - 1;
244 	}
245 
246 	offset_in_disk = start_offset_in_disk + start_strip_in_disk * io_range->strip_size;
247 	nblocks_in_disk = (n_strips_in_disk - 1) * io_range->strip_size
248 			  + end_offset_in_disk - start_offset_in_disk + 1;
249 
250 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID0,
251 		      "raid_bdev (strip_size 0x%lx) splits IO to base_bdev (%u) at (0x%lx, 0x%lx).\n",
252 		      io_range->strip_size, disk_idx, offset_in_disk, nblocks_in_disk);
253 
254 	*_offset_in_disk = offset_in_disk;
255 	*_nblocks_in_disk = nblocks_in_disk;
256 }
257 
258 static void
259 _raid0_submit_null_payload_request(void *_bdev_io)
260 {
261 	struct spdk_bdev_io *bdev_io = _bdev_io;
262 	struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx;
263 
264 	raid0_submit_null_payload_request(raid_io);
265 }
266 
267 /*
268  * brief:
269  * raid0_submit_null_payload_request function submits the next batch of
270  * io requests with range but without payload, like FLUSH and UNMAP, to member disks;
271  * it will submit as many as possible unless one base io request fails with -ENOMEM,
272  * in which case it will queue itself for later submission.
273  * params:
274  * bdev_io - pointer to parent bdev_io on raid bdev device
275  * returns:
276  * none
277  */
278 void
279 raid0_submit_null_payload_request(struct raid_bdev_io *raid_io)
280 {
281 	struct spdk_bdev_io		*bdev_io;
282 	struct raid_bdev		*raid_bdev;
283 	struct raid_bdev_io_range	io_range;
284 	int				ret;
285 
286 	bdev_io = spdk_bdev_io_from_ctx(raid_io);
287 	raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt;
288 
289 	_raid0_get_io_range(&io_range, raid_bdev->num_base_bdevs,
290 			    raid_bdev->strip_size, raid_bdev->strip_size_shift,
291 			    bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks);
292 
293 	raid_io->base_bdev_io_expected = io_range.n_disks_involved;
294 
295 	while (raid_io->base_bdev_io_submitted < raid_io->base_bdev_io_expected) {
296 		uint8_t disk_idx;
297 		uint64_t offset_in_disk;
298 		uint64_t nblocks_in_disk;
299 
300 		/* base_bdev is started from start_disk to end_disk.
301 		 * It is possible that index of start_disk is larger than end_disk's.
302 		 */
303 		disk_idx = (io_range.start_disk + raid_io->base_bdev_io_submitted) % raid_bdev->num_base_bdevs;
304 
305 		_raid0_split_io_range(&io_range, disk_idx, &offset_in_disk, &nblocks_in_disk);
306 
307 		switch (bdev_io->type) {
308 		case SPDK_BDEV_IO_TYPE_UNMAP:
309 			ret = spdk_bdev_unmap_blocks(raid_bdev->base_bdev_info[disk_idx].desc,
310 						     raid_io->raid_ch->base_channel[disk_idx],
311 						     offset_in_disk, nblocks_in_disk,
312 						     raid_bdev_base_io_completion, bdev_io);
313 			break;
314 
315 		case SPDK_BDEV_IO_TYPE_FLUSH:
316 			ret = spdk_bdev_flush_blocks(raid_bdev->base_bdev_info[disk_idx].desc,
317 						     raid_io->raid_ch->base_channel[disk_idx],
318 						     offset_in_disk, nblocks_in_disk,
319 						     raid_bdev_base_io_completion, bdev_io);
320 			break;
321 
322 		default:
323 			SPDK_ERRLOG("submit request, invalid io type with null payload %u\n", bdev_io->type);
324 			assert(false);
325 			ret = -EIO;
326 		}
327 
328 		if (ret == 0) {
329 			raid_io->base_bdev_io_submitted++;
330 		} else {
331 			raid_bdev_queue_io_wait(bdev_io, disk_idx,
332 						_raid0_submit_null_payload_request, ret);
333 			return;
334 		}
335 	}
336 }
337 
338 static struct raid_bdev_module g_raid0_module = {
339 	.level = RAID0,
340 };
341 RAID_MODULE_REGISTER(&g_raid0_module)
342 
343 SPDK_LOG_REGISTER_COMPONENT("bdev_raid0", SPDK_LOG_BDEV_RAID0)
344