1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2022 Intel Corporation. 3 * Copyright (c) Peng Yu yupeng0921@gmail.com. 4 * All rights reserved. 5 */ 6 7 #include "bdev_raid.h" 8 9 #include "spdk/env.h" 10 #include "spdk/thread.h" 11 #include "spdk/string.h" 12 #include "spdk/util.h" 13 14 #include "spdk/log.h" 15 16 struct concat_block_range { 17 uint64_t start; 18 uint64_t length; 19 }; 20 21 /* 22 * brief: 23 * concat_bdev_io_completion function is called by lower layers to notify raid 24 * module that particular bdev_io is completed. 25 * params: 26 * bdev_io - pointer to bdev io submitted to lower layers, like child io 27 * success - bdev_io status 28 * cb_arg - function callback context (parent raid_bdev_io) 29 * returns: 30 * none 31 */ 32 static void 33 concat_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 34 { 35 struct raid_bdev_io *raid_io = cb_arg; 36 37 spdk_bdev_free_io(bdev_io); 38 39 if (success) { 40 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_SUCCESS); 41 } else { 42 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 43 } 44 } 45 46 static void concat_submit_rw_request(struct raid_bdev_io *raid_io); 47 48 static void 49 _concat_submit_rw_request(void *_raid_io) 50 { 51 struct raid_bdev_io *raid_io = _raid_io; 52 53 concat_submit_rw_request(raid_io); 54 } 55 56 /* 57 * brief: 58 * concat_submit_rw_request function is used to submit I/O to the correct 59 * member disk for concat bdevs. 60 * params: 61 * raid_io 62 * returns: 63 * none 64 */ 65 static void 66 concat_submit_rw_request(struct raid_bdev_io *raid_io) 67 { 68 struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch; 69 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 70 struct concat_block_range *block_range = raid_bdev->module_private; 71 uint64_t pd_lba; 72 uint64_t pd_blocks; 73 int pd_idx; 74 int ret = 0; 75 struct raid_base_bdev_info *base_info; 76 struct spdk_io_channel *base_ch; 77 struct spdk_bdev_ext_io_opts io_opts = {}; 78 int i; 79 80 pd_idx = -1; 81 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 82 if (block_range[i].start > raid_io->offset_blocks) { 83 break; 84 } 85 pd_idx = i; 86 } 87 assert(pd_idx >= 0); 88 assert(raid_io->offset_blocks >= block_range[pd_idx].start); 89 pd_lba = raid_io->offset_blocks - block_range[pd_idx].start; 90 pd_blocks = raid_io->num_blocks; 91 base_info = &raid_bdev->base_bdev_info[pd_idx]; 92 if (base_info->desc == NULL) { 93 SPDK_ERRLOG("base bdev desc null for pd_idx %u\n", pd_idx); 94 assert(0); 95 } 96 97 /* 98 * Submit child io to bdev layer with using base bdev descriptors, base 99 * bdev lba, base bdev child io length in blocks, buffer, completion 100 * function and function callback context 101 */ 102 assert(raid_ch != NULL); 103 base_ch = raid_bdev_channel_get_base_channel(raid_ch, pd_idx); 104 105 io_opts.size = sizeof(io_opts); 106 io_opts.memory_domain = raid_io->memory_domain; 107 io_opts.memory_domain_ctx = raid_io->memory_domain_ctx; 108 io_opts.metadata = raid_io->md_buf; 109 110 if (raid_io->type == SPDK_BDEV_IO_TYPE_READ) { 111 ret = raid_bdev_readv_blocks_ext(base_info, base_ch, 112 raid_io->iovs, raid_io->iovcnt, 113 pd_lba, pd_blocks, concat_bdev_io_completion, 114 raid_io, &io_opts); 115 } else if (raid_io->type == SPDK_BDEV_IO_TYPE_WRITE) { 116 ret = raid_bdev_writev_blocks_ext(base_info, base_ch, 117 raid_io->iovs, raid_io->iovcnt, 118 pd_lba, pd_blocks, concat_bdev_io_completion, 119 raid_io, &io_opts); 120 } else { 121 SPDK_ERRLOG("Recvd not supported io type %u\n", raid_io->type); 122 assert(0); 123 } 124 125 if (ret == -ENOMEM) { 126 raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), 127 base_ch, _concat_submit_rw_request); 128 } else if (ret != 0) { 129 SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); 130 assert(false); 131 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 132 } 133 } 134 135 static void concat_submit_null_payload_request(struct raid_bdev_io *raid_io); 136 137 static void 138 _concat_submit_null_payload_request(void *_raid_io) 139 { 140 struct raid_bdev_io *raid_io = _raid_io; 141 142 concat_submit_null_payload_request(raid_io); 143 } 144 145 static void 146 concat_base_io_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 147 { 148 struct raid_bdev_io *raid_io = cb_arg; 149 150 raid_bdev_io_complete_part(raid_io, 1, success ? 151 SPDK_BDEV_IO_STATUS_SUCCESS : 152 SPDK_BDEV_IO_STATUS_FAILED); 153 154 spdk_bdev_free_io(bdev_io); 155 } 156 157 /* 158 * brief: 159 * concat_submit_null_payload_request function submits the next batch of 160 * io requests with range but without payload, like FLUSH and UNMAP, to member disks; 161 * it will submit as many as possible unless one base io request fails with -ENOMEM, 162 * in which case it will queue itself for later submission. 163 * params: 164 * bdev_io - pointer to parent bdev_io on raid bdev device 165 * returns: 166 * none 167 */ 168 static void 169 concat_submit_null_payload_request(struct raid_bdev_io *raid_io) 170 { 171 struct raid_bdev *raid_bdev; 172 int ret; 173 struct raid_base_bdev_info *base_info; 174 struct spdk_io_channel *base_ch; 175 uint64_t pd_lba; 176 uint64_t pd_blocks; 177 uint64_t offset_blocks; 178 uint64_t num_blocks; 179 struct concat_block_range *block_range; 180 int i, start_idx, stop_idx; 181 182 raid_bdev = raid_io->raid_bdev; 183 block_range = raid_bdev->module_private; 184 185 offset_blocks = raid_io->offset_blocks; 186 num_blocks = raid_io->num_blocks; 187 start_idx = -1; 188 stop_idx = -1; 189 /* 190 * Go through all base bdevs, find the first bdev and the last bdev 191 */ 192 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 193 /* skip the bdevs before the offset_blocks */ 194 if (offset_blocks >= block_range[i].start + block_range[i].length) { 195 continue; 196 } 197 if (start_idx == -1) { 198 start_idx = i; 199 } else { 200 /* 201 * The offset_blocks might be at the middle of the first bdev. 202 * Besides the first bdev, the offset_blocks should be always 203 * at the start of the bdev. 204 */ 205 assert(offset_blocks == block_range[i].start); 206 } 207 pd_lba = offset_blocks - block_range[i].start; 208 pd_blocks = spdk_min(num_blocks, block_range[i].length - pd_lba); 209 offset_blocks += pd_blocks; 210 num_blocks -= pd_blocks; 211 if (num_blocks == 0) { 212 stop_idx = i; 213 break; 214 } 215 } 216 assert(start_idx >= 0); 217 assert(stop_idx >= 0); 218 219 if (raid_io->base_bdev_io_remaining == 0) { 220 raid_io->base_bdev_io_remaining = stop_idx - start_idx + 1; 221 } 222 offset_blocks = raid_io->offset_blocks; 223 num_blocks = raid_io->num_blocks; 224 for (i = start_idx; i <= stop_idx; i++) { 225 assert(offset_blocks >= block_range[i].start); 226 assert(offset_blocks < block_range[i].start + block_range[i].length); 227 pd_lba = offset_blocks - block_range[i].start; 228 pd_blocks = spdk_min(num_blocks, block_range[i].length - pd_lba); 229 offset_blocks += pd_blocks; 230 num_blocks -= pd_blocks; 231 /* 232 * Skip the IOs we have submitted 233 */ 234 if (i < start_idx + raid_io->base_bdev_io_submitted) { 235 continue; 236 } 237 base_info = &raid_bdev->base_bdev_info[i]; 238 base_ch = raid_bdev_channel_get_base_channel(raid_io->raid_ch, i); 239 switch (raid_io->type) { 240 case SPDK_BDEV_IO_TYPE_UNMAP: 241 ret = raid_bdev_unmap_blocks(base_info, base_ch, 242 pd_lba, pd_blocks, 243 concat_base_io_complete, raid_io); 244 break; 245 case SPDK_BDEV_IO_TYPE_FLUSH: 246 ret = raid_bdev_flush_blocks(base_info, base_ch, 247 pd_lba, pd_blocks, 248 concat_base_io_complete, raid_io); 249 break; 250 default: 251 SPDK_ERRLOG("submit request, invalid io type with null payload %u\n", raid_io->type); 252 assert(false); 253 ret = -EIO; 254 } 255 if (ret == 0) { 256 raid_io->base_bdev_io_submitted++; 257 } else if (ret == -ENOMEM) { 258 raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), 259 base_ch, _concat_submit_null_payload_request); 260 return; 261 } else { 262 SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); 263 assert(false); 264 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 265 return; 266 } 267 } 268 } 269 270 static int 271 concat_start(struct raid_bdev *raid_bdev) 272 { 273 uint64_t total_blockcnt = 0; 274 struct raid_base_bdev_info *base_info; 275 struct concat_block_range *block_range; 276 277 block_range = calloc(raid_bdev->num_base_bdevs, sizeof(struct concat_block_range)); 278 if (!block_range) { 279 SPDK_ERRLOG("Can not allocate block_range, num_base_bdevs: %u", 280 raid_bdev->num_base_bdevs); 281 return -ENOMEM; 282 } 283 284 int idx = 0; 285 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 286 uint64_t strip_cnt = base_info->data_size >> raid_bdev->strip_size_shift; 287 uint64_t pd_block_cnt = strip_cnt << raid_bdev->strip_size_shift; 288 289 base_info->data_size = pd_block_cnt; 290 291 block_range[idx].start = total_blockcnt; 292 block_range[idx].length = pd_block_cnt; 293 total_blockcnt += pd_block_cnt; 294 idx++; 295 } 296 297 raid_bdev->module_private = block_range; 298 299 SPDK_DEBUGLOG(bdev_concat, "total blockcount %" PRIu64 ", numbasedev %u, strip size shift %u\n", 300 total_blockcnt, raid_bdev->num_base_bdevs, raid_bdev->strip_size_shift); 301 raid_bdev->bdev.blockcnt = total_blockcnt; 302 303 raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size; 304 raid_bdev->bdev.split_on_optimal_io_boundary = true; 305 306 return 0; 307 } 308 309 static bool 310 concat_stop(struct raid_bdev *raid_bdev) 311 { 312 struct concat_block_range *block_range = raid_bdev->module_private; 313 314 free(block_range); 315 316 return true; 317 } 318 319 static struct raid_bdev_module g_concat_module = { 320 .level = CONCAT, 321 .base_bdevs_min = 1, 322 .memory_domains_supported = true, 323 .start = concat_start, 324 .stop = concat_stop, 325 .submit_rw_request = concat_submit_rw_request, 326 .submit_null_payload_request = concat_submit_null_payload_request, 327 }; 328 RAID_MODULE_REGISTER(&g_concat_module) 329 330 SPDK_LOG_REGISTER_COMPONENT(bdev_concat) 331