xref: /spdk/module/bdev/raid/concat.c (revision 307b8c112ffd90a26d53dd15fad67bd9038ef526)
1  /*   SPDX-License-Identifier: BSD-3-Clause
2   *   Copyright (c) Peng Yu yupeng0921@gmail.com.
3   *   All rights reserved.
4   */
5  
6  #include "bdev_raid.h"
7  
8  #include "spdk/env.h"
9  #include "spdk/thread.h"
10  #include "spdk/string.h"
11  #include "spdk/util.h"
12  
13  #include "spdk/log.h"
14  
15  struct concat_block_range {
16  	uint64_t start;
17  	uint64_t length;
18  };
19  
20  /*
21   * brief:
22   * concat_bdev_io_completion function is called by lower layers to notify raid
23   * module that particular bdev_io is completed.
24   * params:
25   * bdev_io - pointer to bdev io submitted to lower layers, like child io
26   * success - bdev_io status
27   * cb_arg - function callback context (parent raid_bdev_io)
28   * returns:
29   * none
30   */
31  static void
32  concat_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
33  {
34  	struct raid_bdev_io *raid_io = cb_arg;
35  
36  	spdk_bdev_free_io(bdev_io);
37  
38  	if (success) {
39  		raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_SUCCESS);
40  	} else {
41  		raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
42  	}
43  }
44  
45  static void concat_submit_rw_request(struct raid_bdev_io *raid_io);
46  
47  static void
48  _concat_submit_rw_request(void *_raid_io)
49  {
50  	struct raid_bdev_io *raid_io = _raid_io;
51  
52  	concat_submit_rw_request(raid_io);
53  }
54  
55  /*
56   * brief:
57   * concat_submit_rw_request function is used to submit I/O to the correct
58   * member disk for concat bdevs.
59   * params:
60   * raid_io
61   * returns:
62   * none
63   */
64  static void
65  concat_submit_rw_request(struct raid_bdev_io *raid_io)
66  {
67  	struct spdk_bdev_io		*bdev_io = spdk_bdev_io_from_ctx(raid_io);
68  	struct raid_bdev_io_channel	*raid_ch = raid_io->raid_ch;
69  	struct raid_bdev		*raid_bdev = raid_io->raid_bdev;
70  	struct concat_block_range	*block_range = raid_bdev->module_private;
71  	uint64_t			pd_lba;
72  	uint64_t			pd_blocks;
73  	int				pd_idx;
74  	int				ret = 0;
75  	struct raid_base_bdev_info	*base_info;
76  	struct spdk_io_channel		*base_ch;
77  	int i;
78  
79  	pd_idx = -1;
80  	for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
81  		if (block_range[i].start > bdev_io->u.bdev.offset_blocks) {
82  			break;
83  		}
84  		pd_idx = i;
85  	}
86  	assert(pd_idx >= 0);
87  	assert(bdev_io->u.bdev.offset_blocks >= block_range[pd_idx].start);
88  	pd_lba = bdev_io->u.bdev.offset_blocks - block_range[pd_idx].start;
89  	pd_blocks = bdev_io->u.bdev.num_blocks;
90  	base_info = &raid_bdev->base_bdev_info[pd_idx];
91  	if (base_info->desc == NULL) {
92  		SPDK_ERRLOG("base bdev desc null for pd_idx %u\n", pd_idx);
93  		assert(0);
94  	}
95  
96  	/*
97  	 * Submit child io to bdev layer with using base bdev descriptors, base
98  	 * bdev lba, base bdev child io length in blocks, buffer, completion
99  	 * function and function callback context
100  	 */
101  	assert(raid_ch != NULL);
102  	assert(raid_ch->base_channel);
103  	base_ch = raid_ch->base_channel[pd_idx];
104  	if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
105  		ret = spdk_bdev_readv_blocks_ext(base_info->desc, base_ch,
106  						 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
107  						 pd_lba, pd_blocks, concat_bdev_io_completion,
108  						 raid_io, bdev_io->u.bdev.ext_opts);
109  	} else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
110  		ret = spdk_bdev_writev_blocks_ext(base_info->desc, base_ch,
111  						  bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
112  						  pd_lba, pd_blocks, concat_bdev_io_completion,
113  						  raid_io, bdev_io->u.bdev.ext_opts);
114  	} else {
115  		SPDK_ERRLOG("Recvd not supported io type %u\n", bdev_io->type);
116  		assert(0);
117  	}
118  
119  	if (ret == -ENOMEM) {
120  		raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch,
121  					_concat_submit_rw_request);
122  	} else if (ret != 0) {
123  		SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n");
124  		assert(false);
125  		raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
126  	}
127  }
128  
129  static void concat_submit_null_payload_request(struct raid_bdev_io *raid_io);
130  
131  static void
132  _concat_submit_null_payload_request(void *_raid_io)
133  {
134  	struct raid_bdev_io *raid_io = _raid_io;
135  
136  	concat_submit_null_payload_request(raid_io);
137  }
138  
139  static void
140  concat_base_io_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
141  {
142  	struct raid_bdev_io *raid_io = cb_arg;
143  
144  	raid_bdev_io_complete_part(raid_io, 1, success ?
145  				   SPDK_BDEV_IO_STATUS_SUCCESS :
146  				   SPDK_BDEV_IO_STATUS_FAILED);
147  
148  	spdk_bdev_free_io(bdev_io);
149  }
150  
151  /*
152   * brief:
153   * concat_submit_null_payload_request function submits the next batch of
154   * io requests with range but without payload, like FLUSH and UNMAP, to member disks;
155   * it will submit as many as possible unless one base io request fails with -ENOMEM,
156   * in which case it will queue itself for later submission.
157   * params:
158   * bdev_io - pointer to parent bdev_io on raid bdev device
159   * returns:
160   * none
161   */
162  static void
163  concat_submit_null_payload_request(struct raid_bdev_io *raid_io)
164  {
165  	struct spdk_bdev_io		*bdev_io;
166  	struct raid_bdev		*raid_bdev;
167  	int				ret;
168  	struct raid_base_bdev_info	*base_info;
169  	struct spdk_io_channel		*base_ch;
170  	uint64_t			pd_lba;
171  	uint64_t			pd_blocks;
172  	uint64_t			offset_blocks;
173  	uint64_t			num_blocks;
174  	struct concat_block_range	*block_range;
175  	int				i, start_idx, stop_idx;
176  
177  	bdev_io = spdk_bdev_io_from_ctx(raid_io);
178  	raid_bdev = raid_io->raid_bdev;
179  	block_range = raid_bdev->module_private;
180  
181  	offset_blocks = bdev_io->u.bdev.offset_blocks;
182  	num_blocks = bdev_io->u.bdev.num_blocks;
183  	start_idx = -1;
184  	stop_idx = -1;
185  	/*
186  	 * Go through all base bdevs, find the first bdev and the last bdev
187  	 */
188  	for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
189  		/* skip the bdevs before the offset_blocks */
190  		if (offset_blocks >= block_range[i].start + block_range[i].length) {
191  			continue;
192  		}
193  		if (start_idx == -1) {
194  			start_idx = i;
195  		} else {
196  			/*
197  			 * The offset_blocks might be at the middle of the first bdev.
198  			 * Besides the first bdev, the offset_blocks should be always
199  			 * at the start of the bdev.
200  			 */
201  			assert(offset_blocks == block_range[i].start);
202  		}
203  		pd_lba = offset_blocks - block_range[i].start;
204  		pd_blocks = spdk_min(num_blocks, block_range[i].length - pd_lba);
205  		offset_blocks += pd_blocks;
206  		num_blocks -= pd_blocks;
207  		if (num_blocks == 0) {
208  			stop_idx = i;
209  			break;
210  		}
211  	}
212  	assert(start_idx >= 0);
213  	assert(stop_idx >= 0);
214  
215  	if (raid_io->base_bdev_io_remaining == 0) {
216  		raid_io->base_bdev_io_remaining = stop_idx - start_idx + 1;
217  	}
218  	offset_blocks = bdev_io->u.bdev.offset_blocks;
219  	num_blocks = bdev_io->u.bdev.num_blocks;
220  	for (i = start_idx; i <= stop_idx; i++) {
221  		assert(offset_blocks >= block_range[i].start);
222  		assert(offset_blocks < block_range[i].start + block_range[i].length);
223  		pd_lba = offset_blocks -  block_range[i].start;
224  		pd_blocks = spdk_min(num_blocks, block_range[i].length - pd_lba);
225  		offset_blocks += pd_blocks;
226  		num_blocks -= pd_blocks;
227  		/*
228  		 * Skip the IOs we have submitted
229  		 */
230  		if (i < start_idx + raid_io->base_bdev_io_submitted) {
231  			continue;
232  		}
233  		base_info = &raid_bdev->base_bdev_info[i];
234  		base_ch = raid_io->raid_ch->base_channel[i];
235  		switch (bdev_io->type) {
236  		case SPDK_BDEV_IO_TYPE_UNMAP:
237  			ret = spdk_bdev_unmap_blocks(base_info->desc, base_ch,
238  						     pd_lba, pd_blocks,
239  						     concat_base_io_complete, raid_io);
240  			break;
241  		case SPDK_BDEV_IO_TYPE_FLUSH:
242  			ret = spdk_bdev_flush_blocks(base_info->desc, base_ch,
243  						     pd_lba, pd_blocks,
244  						     concat_base_io_complete, raid_io);
245  			break;
246  		default:
247  			SPDK_ERRLOG("submit request, invalid io type with null payload %u\n", bdev_io->type);
248  			assert(false);
249  			ret = -EIO;
250  		}
251  		if (ret == 0) {
252  			raid_io->base_bdev_io_submitted++;
253  		} else if (ret == -ENOMEM) {
254  			raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch,
255  						_concat_submit_null_payload_request);
256  			return;
257  		} else {
258  			SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n");
259  			assert(false);
260  			raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
261  			return;
262  		}
263  	}
264  }
265  
266  static int
267  concat_start(struct raid_bdev *raid_bdev)
268  {
269  	uint64_t total_blockcnt = 0;
270  	struct raid_base_bdev_info *base_info;
271  	struct concat_block_range *block_range;
272  
273  	block_range = calloc(raid_bdev->num_base_bdevs, sizeof(struct concat_block_range));
274  	if (!block_range) {
275  		SPDK_ERRLOG("Can not allocate block_range, num_base_bdevs: %u",
276  			    raid_bdev->num_base_bdevs);
277  		return -ENOMEM;
278  	}
279  
280  	int idx = 0;
281  	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
282  		uint64_t strip_cnt = base_info->bdev->blockcnt >> raid_bdev->strip_size_shift;
283  		uint64_t pd_block_cnt = strip_cnt << raid_bdev->strip_size_shift;
284  
285  		block_range[idx].start = total_blockcnt;
286  		block_range[idx].length = pd_block_cnt;
287  		total_blockcnt += pd_block_cnt;
288  		idx++;
289  	}
290  
291  	raid_bdev->module_private = block_range;
292  
293  	SPDK_DEBUGLOG(bdev_concat, "total blockcount %" PRIu64 ",  numbasedev %u, strip size shift %u\n",
294  		      total_blockcnt, raid_bdev->num_base_bdevs, raid_bdev->strip_size_shift);
295  	raid_bdev->bdev.blockcnt = total_blockcnt;
296  
297  	raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size;
298  	raid_bdev->bdev.split_on_optimal_io_boundary = true;
299  
300  	return 0;
301  }
302  
303  static bool
304  concat_stop(struct raid_bdev *raid_bdev)
305  {
306  	struct concat_block_range *block_range = raid_bdev->module_private;
307  
308  	free(block_range);
309  
310  	return true;
311  }
312  
313  static struct raid_bdev_module g_concat_module = {
314  	.level = CONCAT,
315  	.base_bdevs_min = 1,
316  	.start = concat_start,
317  	.stop = concat_stop,
318  	.submit_rw_request = concat_submit_rw_request,
319  	.submit_null_payload_request = concat_submit_null_payload_request,
320  };
321  RAID_MODULE_REGISTER(&g_concat_module)
322  
323  SPDK_LOG_REGISTER_COMPONENT(bdev_concat)
324