xref: /spdk/module/bdev/raid/concat.c (revision 0098e636761237b77c12c30c2408263a5d2260cc)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (c) Peng Yu yupeng0921@gmail.com.
3  *   All rights reserved.
4  */
5 
6 #include "bdev_raid.h"
7 
8 #include "spdk/env.h"
9 #include "spdk/thread.h"
10 #include "spdk/string.h"
11 #include "spdk/util.h"
12 
13 #include "spdk/log.h"
14 
15 struct concat_block_range {
16 	uint64_t start;
17 	uint64_t length;
18 };
19 
20 /*
21  * brief:
22  * concat_bdev_io_completion function is called by lower layers to notify raid
23  * module that particular bdev_io is completed.
24  * params:
25  * bdev_io - pointer to bdev io submitted to lower layers, like child io
26  * success - bdev_io status
27  * cb_arg - function callback context (parent raid_bdev_io)
28  * returns:
29  * none
30  */
31 static void
32 concat_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
33 {
34 	struct raid_bdev_io *raid_io = cb_arg;
35 
36 	spdk_bdev_free_io(bdev_io);
37 
38 	if (success) {
39 		raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_SUCCESS);
40 	} else {
41 		raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
42 	}
43 }
44 
45 static void concat_submit_rw_request(struct raid_bdev_io *raid_io);
46 
47 static void
48 _concat_submit_rw_request(void *_raid_io)
49 {
50 	struct raid_bdev_io *raid_io = _raid_io;
51 
52 	concat_submit_rw_request(raid_io);
53 }
54 
55 /*
56  * brief:
57  * concat_submit_rw_request function is used to submit I/O to the correct
58  * member disk for concat bdevs.
59  * params:
60  * raid_io
61  * returns:
62  * none
63  */
64 static void
65 concat_submit_rw_request(struct raid_bdev_io *raid_io)
66 {
67 	struct spdk_bdev_io		*bdev_io = spdk_bdev_io_from_ctx(raid_io);
68 	struct raid_bdev_io_channel	*raid_ch = raid_io->raid_ch;
69 	struct raid_bdev		*raid_bdev = raid_io->raid_bdev;
70 	struct concat_block_range	*block_range = raid_bdev->module_private;
71 	uint64_t			pd_lba;
72 	uint64_t			pd_blocks;
73 	int				pd_idx;
74 	int				ret = 0;
75 	struct raid_base_bdev_info	*base_info;
76 	struct spdk_io_channel		*base_ch;
77 	int i;
78 
79 	pd_idx = -1;
80 	for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
81 		if (block_range[i].start > bdev_io->u.bdev.offset_blocks) {
82 			break;
83 		}
84 		pd_idx = i;
85 	}
86 	assert(pd_idx >= 0);
87 	assert(bdev_io->u.bdev.offset_blocks >= block_range[pd_idx].start);
88 	pd_lba = bdev_io->u.bdev.offset_blocks - block_range[pd_idx].start;
89 	pd_blocks = bdev_io->u.bdev.num_blocks;
90 	base_info = &raid_bdev->base_bdev_info[pd_idx];
91 	if (base_info->desc == NULL) {
92 		SPDK_ERRLOG("base bdev desc null for pd_idx %u\n", pd_idx);
93 		assert(0);
94 	}
95 
96 	/*
97 	 * Submit child io to bdev layer with using base bdev descriptors, base
98 	 * bdev lba, base bdev child io length in blocks, buffer, completion
99 	 * function and function callback context
100 	 */
101 	assert(raid_ch != NULL);
102 	assert(raid_ch->base_channel);
103 	base_ch = raid_ch->base_channel[pd_idx];
104 	if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
105 		ret = spdk_bdev_readv_blocks(base_info->desc, base_ch,
106 					     bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
107 					     pd_lba, pd_blocks, concat_bdev_io_completion,
108 					     raid_io);
109 	} else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
110 		ret = spdk_bdev_writev_blocks(base_info->desc, base_ch,
111 					      bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
112 					      pd_lba, pd_blocks, concat_bdev_io_completion,
113 					      raid_io);
114 	} else {
115 		SPDK_ERRLOG("Recvd not supported io type %u\n", bdev_io->type);
116 		assert(0);
117 	}
118 
119 	if (ret == -ENOMEM) {
120 		raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch,
121 					_concat_submit_rw_request);
122 	} else if (ret != 0) {
123 		SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n");
124 		assert(false);
125 		raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
126 	}
127 }
128 
129 static void concat_submit_null_payload_request(struct raid_bdev_io *raid_io);
130 
131 static void
132 _concat_submit_null_payload_request(void *_raid_io)
133 {
134 	struct raid_bdev_io *raid_io = _raid_io;
135 
136 	concat_submit_null_payload_request(raid_io);
137 }
138 
139 static void
140 concat_base_io_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
141 {
142 	struct raid_bdev_io *raid_io = cb_arg;
143 
144 	raid_bdev_io_complete_part(raid_io, 1, success ?
145 				   SPDK_BDEV_IO_STATUS_SUCCESS :
146 				   SPDK_BDEV_IO_STATUS_FAILED);
147 
148 	spdk_bdev_free_io(bdev_io);
149 }
150 
151 /*
152  * brief:
153  * concat_submit_null_payload_request function submits the next batch of
154  * io requests with range but without payload, like FLUSH and UNMAP, to member disks;
155  * it will submit as many as possible unless one base io request fails with -ENOMEM,
156  * in which case it will queue itself for later submission.
157  * params:
158  * bdev_io - pointer to parent bdev_io on raid bdev device
159  * returns:
160  * none
161  */
162 static void
163 concat_submit_null_payload_request(struct raid_bdev_io *raid_io)
164 {
165 	struct spdk_bdev_io		*bdev_io;
166 	struct raid_bdev		*raid_bdev;
167 	int				ret;
168 	struct raid_base_bdev_info	*base_info;
169 	struct spdk_io_channel		*base_ch;
170 	uint64_t			pd_lba;
171 	uint64_t			pd_blocks;
172 	uint64_t			offset_blocks;
173 	uint64_t			num_blocks;
174 	struct concat_block_range	*block_range;
175 	int				i, start_idx, stop_idx;
176 
177 	bdev_io = spdk_bdev_io_from_ctx(raid_io);
178 	raid_bdev = raid_io->raid_bdev;
179 	block_range = raid_bdev->module_private;
180 
181 	offset_blocks = bdev_io->u.bdev.offset_blocks;
182 	num_blocks = bdev_io->u.bdev.num_blocks;
183 	start_idx = -1;
184 	stop_idx = -1;
185 	/*
186 	 * Go through all base bdevs, find the first bdev and the last bdev
187 	 */
188 	for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
189 		/* skip the bdevs before the offset_blocks */
190 		if (offset_blocks >= block_range[i].start + block_range[i].length) {
191 			continue;
192 		}
193 		if (start_idx == -1) {
194 			start_idx = i;
195 		} else {
196 			/*
197 			 * The offset_blocks might be at the middle of the first bdev.
198 			 * Besides the first bdev, the offset_blocks should be always
199 			 * at the start of the bdev.
200 			 */
201 			assert(offset_blocks == block_range[i].start);
202 		}
203 		pd_lba = offset_blocks - block_range[i].start;
204 		pd_blocks = spdk_min(num_blocks, block_range[i].length - pd_lba);
205 		offset_blocks += pd_blocks;
206 		num_blocks -= pd_blocks;
207 		if (num_blocks == 0) {
208 			stop_idx = i;
209 			break;
210 		}
211 	}
212 	assert(start_idx >= 0);
213 	assert(stop_idx >= 0);
214 
215 	if (raid_io->base_bdev_io_remaining == 0) {
216 		raid_io->base_bdev_io_remaining = stop_idx - start_idx + 1;
217 	}
218 	offset_blocks = bdev_io->u.bdev.offset_blocks;
219 	num_blocks = bdev_io->u.bdev.num_blocks;
220 	for (i = start_idx; i <= stop_idx; i++) {
221 		assert(offset_blocks >= block_range[i].start);
222 		assert(offset_blocks < block_range[i].start + block_range[i].length);
223 		pd_lba = offset_blocks -  block_range[i].start;
224 		pd_blocks = spdk_min(num_blocks, block_range[i].length - pd_lba);
225 		offset_blocks += pd_blocks;
226 		num_blocks -= pd_blocks;
227 		/*
228 		 * Skip the IOs we have submitted
229 		 */
230 		if (i < start_idx + raid_io->base_bdev_io_submitted) {
231 			continue;
232 		}
233 		base_info = &raid_bdev->base_bdev_info[i];
234 		base_ch = raid_io->raid_ch->base_channel[i];
235 		switch (bdev_io->type) {
236 		case SPDK_BDEV_IO_TYPE_UNMAP:
237 			ret = spdk_bdev_unmap_blocks(base_info->desc, base_ch,
238 						     pd_lba, pd_blocks,
239 						     concat_base_io_complete, raid_io);
240 			break;
241 		case SPDK_BDEV_IO_TYPE_FLUSH:
242 			ret = spdk_bdev_flush_blocks(base_info->desc, base_ch,
243 						     pd_lba, pd_blocks,
244 						     concat_base_io_complete, raid_io);
245 			break;
246 		default:
247 			SPDK_ERRLOG("submit request, invalid io type with null payload %u\n", bdev_io->type);
248 			assert(false);
249 			ret = -EIO;
250 		}
251 		if (ret == 0) {
252 			raid_io->base_bdev_io_submitted++;
253 		} else if (ret == -ENOMEM) {
254 			raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch,
255 						_concat_submit_null_payload_request);
256 			return;
257 		} else {
258 			SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n");
259 			assert(false);
260 			raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
261 			return;
262 		}
263 	}
264 }
265 
266 static int
267 concat_start(struct raid_bdev *raid_bdev)
268 {
269 	uint64_t total_blockcnt = 0;
270 	struct raid_base_bdev_info *base_info;
271 	struct concat_block_range *block_range;
272 
273 	block_range = calloc(raid_bdev->num_base_bdevs, sizeof(struct concat_block_range));
274 	if (!block_range) {
275 		SPDK_ERRLOG("Can not allocate block_range, num_base_bdevs: %u",
276 			    raid_bdev->num_base_bdevs);
277 		return -ENOMEM;
278 	}
279 
280 	int idx = 0;
281 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
282 		uint64_t strip_cnt = base_info->bdev->blockcnt >> raid_bdev->strip_size_shift;
283 		uint64_t pd_block_cnt = strip_cnt << raid_bdev->strip_size_shift;
284 
285 		block_range[idx].start = total_blockcnt;
286 		block_range[idx].length = pd_block_cnt;
287 		total_blockcnt += pd_block_cnt;
288 		idx++;
289 	}
290 
291 	raid_bdev->module_private = block_range;
292 
293 	SPDK_DEBUGLOG(bdev_concat, "total blockcount %" PRIu64 ",  numbasedev %u, strip size shift %u\n",
294 		      total_blockcnt, raid_bdev->num_base_bdevs, raid_bdev->strip_size_shift);
295 	raid_bdev->bdev.blockcnt = total_blockcnt;
296 
297 	raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size;
298 	raid_bdev->bdev.split_on_optimal_io_boundary = true;
299 
300 	return 0;
301 }
302 
303 static void
304 concat_stop(struct raid_bdev *raid_bdev)
305 {
306 	struct concat_block_range *block_range = raid_bdev->module_private;
307 
308 	free(block_range);
309 }
310 
311 static struct raid_bdev_module g_concat_module = {
312 	.level = CONCAT,
313 	.base_bdevs_min = 1,
314 	.start = concat_start,
315 	.stop = concat_stop,
316 	.submit_rw_request = concat_submit_rw_request,
317 	.submit_null_payload_request = concat_submit_null_payload_request,
318 };
319 RAID_MODULE_REGISTER(&g_concat_module)
320 
321 SPDK_LOG_REGISTER_COMPONENT(bdev_concat)
322