1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright (C) 2022 Intel Corporation.
3 * Copyright (c) Peng Yu yupeng0921@gmail.com.
4 * All rights reserved.
5 */
6
7 #include "bdev_raid.h"
8
9 #include "spdk/env.h"
10 #include "spdk/thread.h"
11 #include "spdk/string.h"
12 #include "spdk/util.h"
13
14 #include "spdk/log.h"
15
16 struct concat_block_range {
17 uint64_t start;
18 uint64_t length;
19 };
20
21 /*
22 * brief:
23 * concat_bdev_io_completion function is called by lower layers to notify raid
24 * module that particular bdev_io is completed.
25 * params:
26 * bdev_io - pointer to bdev io submitted to lower layers, like child io
27 * success - bdev_io status
28 * cb_arg - function callback context (parent raid_bdev_io)
29 * returns:
30 * none
31 */
32 static void
concat_bdev_io_completion(struct spdk_bdev_io * bdev_io,bool success,void * cb_arg)33 concat_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
34 {
35 struct raid_bdev_io *raid_io = cb_arg;
36
37 spdk_bdev_free_io(bdev_io);
38
39 if (success) {
40 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_SUCCESS);
41 } else {
42 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
43 }
44 }
45
46 static void concat_submit_rw_request(struct raid_bdev_io *raid_io);
47
48 static void
_concat_submit_rw_request(void * _raid_io)49 _concat_submit_rw_request(void *_raid_io)
50 {
51 struct raid_bdev_io *raid_io = _raid_io;
52
53 concat_submit_rw_request(raid_io);
54 }
55
56 /*
57 * brief:
58 * concat_submit_rw_request function is used to submit I/O to the correct
59 * member disk for concat bdevs.
60 * params:
61 * raid_io
62 * returns:
63 * none
64 */
65 static void
concat_submit_rw_request(struct raid_bdev_io * raid_io)66 concat_submit_rw_request(struct raid_bdev_io *raid_io)
67 {
68 struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch;
69 struct raid_bdev *raid_bdev = raid_io->raid_bdev;
70 struct concat_block_range *block_range = raid_bdev->module_private;
71 uint64_t pd_lba;
72 uint64_t pd_blocks;
73 int pd_idx;
74 int ret = 0;
75 struct raid_base_bdev_info *base_info;
76 struct spdk_io_channel *base_ch;
77 struct spdk_bdev_ext_io_opts io_opts = {};
78 int i;
79
80 pd_idx = -1;
81 for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
82 if (block_range[i].start > raid_io->offset_blocks) {
83 break;
84 }
85 pd_idx = i;
86 }
87 assert(pd_idx >= 0);
88 assert(raid_io->offset_blocks >= block_range[pd_idx].start);
89 pd_lba = raid_io->offset_blocks - block_range[pd_idx].start;
90 pd_blocks = raid_io->num_blocks;
91 base_info = &raid_bdev->base_bdev_info[pd_idx];
92 if (base_info->desc == NULL) {
93 SPDK_ERRLOG("base bdev desc null for pd_idx %u\n", pd_idx);
94 assert(0);
95 }
96
97 /*
98 * Submit child io to bdev layer with using base bdev descriptors, base
99 * bdev lba, base bdev child io length in blocks, buffer, completion
100 * function and function callback context
101 */
102 assert(raid_ch != NULL);
103 base_ch = raid_bdev_channel_get_base_channel(raid_ch, pd_idx);
104
105 io_opts.size = sizeof(io_opts);
106 io_opts.memory_domain = raid_io->memory_domain;
107 io_opts.memory_domain_ctx = raid_io->memory_domain_ctx;
108 io_opts.metadata = raid_io->md_buf;
109
110 if (raid_io->type == SPDK_BDEV_IO_TYPE_READ) {
111 ret = raid_bdev_readv_blocks_ext(base_info, base_ch,
112 raid_io->iovs, raid_io->iovcnt,
113 pd_lba, pd_blocks, concat_bdev_io_completion,
114 raid_io, &io_opts);
115 } else if (raid_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
116 ret = raid_bdev_writev_blocks_ext(base_info, base_ch,
117 raid_io->iovs, raid_io->iovcnt,
118 pd_lba, pd_blocks, concat_bdev_io_completion,
119 raid_io, &io_opts);
120 } else {
121 SPDK_ERRLOG("Recvd not supported io type %u\n", raid_io->type);
122 assert(0);
123 }
124
125 if (ret == -ENOMEM) {
126 raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc),
127 base_ch, _concat_submit_rw_request);
128 } else if (ret != 0) {
129 SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n");
130 assert(false);
131 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
132 }
133 }
134
135 static void concat_submit_null_payload_request(struct raid_bdev_io *raid_io);
136
137 static void
_concat_submit_null_payload_request(void * _raid_io)138 _concat_submit_null_payload_request(void *_raid_io)
139 {
140 struct raid_bdev_io *raid_io = _raid_io;
141
142 concat_submit_null_payload_request(raid_io);
143 }
144
145 static void
concat_base_io_complete(struct spdk_bdev_io * bdev_io,bool success,void * cb_arg)146 concat_base_io_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
147 {
148 struct raid_bdev_io *raid_io = cb_arg;
149
150 raid_bdev_io_complete_part(raid_io, 1, success ?
151 SPDK_BDEV_IO_STATUS_SUCCESS :
152 SPDK_BDEV_IO_STATUS_FAILED);
153
154 spdk_bdev_free_io(bdev_io);
155 }
156
157 /*
158 * brief:
159 * concat_submit_null_payload_request function submits the next batch of
160 * io requests with range but without payload, like FLUSH and UNMAP, to member disks;
161 * it will submit as many as possible unless one base io request fails with -ENOMEM,
162 * in which case it will queue itself for later submission.
163 * params:
164 * bdev_io - pointer to parent bdev_io on raid bdev device
165 * returns:
166 * none
167 */
168 static void
concat_submit_null_payload_request(struct raid_bdev_io * raid_io)169 concat_submit_null_payload_request(struct raid_bdev_io *raid_io)
170 {
171 struct raid_bdev *raid_bdev;
172 int ret;
173 struct raid_base_bdev_info *base_info;
174 struct spdk_io_channel *base_ch;
175 uint64_t pd_lba;
176 uint64_t pd_blocks;
177 uint64_t offset_blocks;
178 uint64_t num_blocks;
179 struct concat_block_range *block_range;
180 int i, start_idx, stop_idx;
181
182 raid_bdev = raid_io->raid_bdev;
183 block_range = raid_bdev->module_private;
184
185 offset_blocks = raid_io->offset_blocks;
186 num_blocks = raid_io->num_blocks;
187 start_idx = -1;
188 stop_idx = -1;
189 /*
190 * Go through all base bdevs, find the first bdev and the last bdev
191 */
192 for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
193 /* skip the bdevs before the offset_blocks */
194 if (offset_blocks >= block_range[i].start + block_range[i].length) {
195 continue;
196 }
197 if (start_idx == -1) {
198 start_idx = i;
199 } else {
200 /*
201 * The offset_blocks might be at the middle of the first bdev.
202 * Besides the first bdev, the offset_blocks should be always
203 * at the start of the bdev.
204 */
205 assert(offset_blocks == block_range[i].start);
206 }
207 pd_lba = offset_blocks - block_range[i].start;
208 pd_blocks = spdk_min(num_blocks, block_range[i].length - pd_lba);
209 offset_blocks += pd_blocks;
210 num_blocks -= pd_blocks;
211 if (num_blocks == 0) {
212 stop_idx = i;
213 break;
214 }
215 }
216 assert(start_idx >= 0);
217 assert(stop_idx >= 0);
218
219 if (raid_io->base_bdev_io_remaining == 0) {
220 raid_io->base_bdev_io_remaining = stop_idx - start_idx + 1;
221 }
222 offset_blocks = raid_io->offset_blocks;
223 num_blocks = raid_io->num_blocks;
224 for (i = start_idx; i <= stop_idx; i++) {
225 assert(offset_blocks >= block_range[i].start);
226 assert(offset_blocks < block_range[i].start + block_range[i].length);
227 pd_lba = offset_blocks - block_range[i].start;
228 pd_blocks = spdk_min(num_blocks, block_range[i].length - pd_lba);
229 offset_blocks += pd_blocks;
230 num_blocks -= pd_blocks;
231 /*
232 * Skip the IOs we have submitted
233 */
234 if (i < start_idx + raid_io->base_bdev_io_submitted) {
235 continue;
236 }
237 base_info = &raid_bdev->base_bdev_info[i];
238 base_ch = raid_bdev_channel_get_base_channel(raid_io->raid_ch, i);
239 switch (raid_io->type) {
240 case SPDK_BDEV_IO_TYPE_UNMAP:
241 ret = raid_bdev_unmap_blocks(base_info, base_ch,
242 pd_lba, pd_blocks,
243 concat_base_io_complete, raid_io);
244 break;
245 case SPDK_BDEV_IO_TYPE_FLUSH:
246 ret = raid_bdev_flush_blocks(base_info, base_ch,
247 pd_lba, pd_blocks,
248 concat_base_io_complete, raid_io);
249 break;
250 default:
251 SPDK_ERRLOG("submit request, invalid io type with null payload %u\n", raid_io->type);
252 assert(false);
253 ret = -EIO;
254 }
255 if (ret == 0) {
256 raid_io->base_bdev_io_submitted++;
257 } else if (ret == -ENOMEM) {
258 raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc),
259 base_ch, _concat_submit_null_payload_request);
260 return;
261 } else {
262 SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n");
263 assert(false);
264 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
265 return;
266 }
267 }
268 }
269
270 static int
concat_start(struct raid_bdev * raid_bdev)271 concat_start(struct raid_bdev *raid_bdev)
272 {
273 uint64_t total_blockcnt = 0;
274 struct raid_base_bdev_info *base_info;
275 struct concat_block_range *block_range;
276
277 block_range = calloc(raid_bdev->num_base_bdevs, sizeof(struct concat_block_range));
278 if (!block_range) {
279 SPDK_ERRLOG("Can not allocate block_range, num_base_bdevs: %u",
280 raid_bdev->num_base_bdevs);
281 return -ENOMEM;
282 }
283
284 int idx = 0;
285 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
286 uint64_t strip_cnt = base_info->data_size >> raid_bdev->strip_size_shift;
287 uint64_t pd_block_cnt = strip_cnt << raid_bdev->strip_size_shift;
288
289 base_info->data_size = pd_block_cnt;
290
291 block_range[idx].start = total_blockcnt;
292 block_range[idx].length = pd_block_cnt;
293 total_blockcnt += pd_block_cnt;
294 idx++;
295 }
296
297 raid_bdev->module_private = block_range;
298
299 SPDK_DEBUGLOG(bdev_concat, "total blockcount %" PRIu64 ", numbasedev %u, strip size shift %u\n",
300 total_blockcnt, raid_bdev->num_base_bdevs, raid_bdev->strip_size_shift);
301 raid_bdev->bdev.blockcnt = total_blockcnt;
302
303 raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size;
304 raid_bdev->bdev.split_on_optimal_io_boundary = true;
305
306 return 0;
307 }
308
309 static bool
concat_stop(struct raid_bdev * raid_bdev)310 concat_stop(struct raid_bdev *raid_bdev)
311 {
312 struct concat_block_range *block_range = raid_bdev->module_private;
313
314 free(block_range);
315
316 return true;
317 }
318
319 static struct raid_bdev_module g_concat_module = {
320 .level = CONCAT,
321 .base_bdevs_min = 1,
322 .memory_domains_supported = true,
323 .start = concat_start,
324 .stop = concat_stop,
325 .submit_rw_request = concat_submit_rw_request,
326 .submit_null_payload_request = concat_submit_null_payload_request,
327 };
328 RAID_MODULE_REGISTER(&g_concat_module)
329
330 SPDK_LOG_REGISTER_COMPONENT(bdev_concat)
331