1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) Peng Yu yupeng0921@gmail.com. 3 * All rights reserved. 4 */ 5 6 #include "bdev_raid.h" 7 8 #include "spdk/env.h" 9 #include "spdk/thread.h" 10 #include "spdk/string.h" 11 #include "spdk/util.h" 12 13 #include "spdk/log.h" 14 15 struct concat_block_range { 16 uint64_t start; 17 uint64_t length; 18 }; 19 20 /* 21 * brief: 22 * concat_bdev_io_completion function is called by lower layers to notify raid 23 * module that particular bdev_io is completed. 24 * params: 25 * bdev_io - pointer to bdev io submitted to lower layers, like child io 26 * success - bdev_io status 27 * cb_arg - function callback context (parent raid_bdev_io) 28 * returns: 29 * none 30 */ 31 static void 32 concat_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 33 { 34 struct raid_bdev_io *raid_io = cb_arg; 35 36 spdk_bdev_free_io(bdev_io); 37 38 if (success) { 39 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_SUCCESS); 40 } else { 41 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 42 } 43 } 44 45 static void 46 concat_submit_rw_request(struct raid_bdev_io *raid_io); 47 48 static void 49 _concat_submit_rw_request(void *_raid_io) 50 { 51 struct raid_bdev_io *raid_io = _raid_io; 52 53 concat_submit_rw_request(raid_io); 54 } 55 56 /* 57 * brief: 58 * concat_submit_rw_request function is used to submit I/O to the correct 59 * member disk for concat bdevs. 60 * params: 61 * raid_io 62 * returns: 63 * none 64 */ 65 static void 66 concat_submit_rw_request(struct raid_bdev_io *raid_io) 67 { 68 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 69 struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch; 70 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 71 struct concat_block_range *block_range = raid_bdev->module_private; 72 uint64_t pd_lba; 73 uint64_t pd_blocks; 74 int pd_idx; 75 int ret = 0; 76 struct raid_base_bdev_info *base_info; 77 struct spdk_io_channel *base_ch; 78 int i; 79 80 pd_idx = -1; 81 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 82 if (block_range[i].start > bdev_io->u.bdev.offset_blocks) { 83 break; 84 } 85 pd_idx = i; 86 } 87 assert(pd_idx >= 0); 88 assert(bdev_io->u.bdev.offset_blocks >= block_range[pd_idx].start); 89 pd_lba = bdev_io->u.bdev.offset_blocks - block_range[pd_idx].start; 90 pd_blocks = bdev_io->u.bdev.num_blocks; 91 base_info = &raid_bdev->base_bdev_info[pd_idx]; 92 if (base_info->desc == NULL) { 93 SPDK_ERRLOG("base bdev desc null for pd_idx %u\n", pd_idx); 94 assert(0); 95 } 96 97 /* 98 * Submit child io to bdev layer with using base bdev descriptors, base 99 * bdev lba, base bdev child io length in blocks, buffer, completion 100 * function and function callback context 101 */ 102 assert(raid_ch != NULL); 103 assert(raid_ch->base_channel); 104 base_ch = raid_ch->base_channel[pd_idx]; 105 if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) { 106 ret = spdk_bdev_readv_blocks(base_info->desc, base_ch, 107 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 108 pd_lba, pd_blocks, concat_bdev_io_completion, 109 raid_io); 110 } else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) { 111 ret = spdk_bdev_writev_blocks(base_info->desc, base_ch, 112 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 113 pd_lba, pd_blocks, concat_bdev_io_completion, 114 raid_io); 115 } else { 116 SPDK_ERRLOG("Recvd not supported io type %u\n", bdev_io->type); 117 assert(0); 118 } 119 120 if (ret == -ENOMEM) { 121 raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch, 122 _concat_submit_rw_request); 123 } else if (ret != 0) { 124 SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); 125 assert(false); 126 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 127 } 128 } 129 130 static void 131 concat_submit_null_payload_request(struct raid_bdev_io *raid_io); 132 133 static void 134 _concat_submit_null_payload_request(void *_raid_io) 135 { 136 struct raid_bdev_io *raid_io = _raid_io; 137 138 concat_submit_null_payload_request(raid_io); 139 } 140 141 static void 142 concat_base_io_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 143 { 144 struct raid_bdev_io *raid_io = cb_arg; 145 146 raid_bdev_io_complete_part(raid_io, 1, success ? 147 SPDK_BDEV_IO_STATUS_SUCCESS : 148 SPDK_BDEV_IO_STATUS_FAILED); 149 150 spdk_bdev_free_io(bdev_io); 151 } 152 153 /* 154 * brief: 155 * concat_submit_null_payload_request function submits the next batch of 156 * io requests with range but without payload, like FLUSH and UNMAP, to member disks; 157 * it will submit as many as possible unless one base io request fails with -ENOMEM, 158 * in which case it will queue itself for later submission. 159 * params: 160 * bdev_io - pointer to parent bdev_io on raid bdev device 161 * returns: 162 * none 163 */ 164 static void 165 concat_submit_null_payload_request(struct raid_bdev_io *raid_io) 166 { 167 struct spdk_bdev_io *bdev_io; 168 struct raid_bdev *raid_bdev; 169 int ret; 170 struct raid_base_bdev_info *base_info; 171 struct spdk_io_channel *base_ch; 172 uint64_t pd_lba; 173 uint64_t pd_blocks; 174 uint64_t offset_blocks; 175 uint64_t num_blocks; 176 struct concat_block_range *block_range; 177 int i, start_idx, stop_idx; 178 179 bdev_io = spdk_bdev_io_from_ctx(raid_io); 180 raid_bdev = raid_io->raid_bdev; 181 block_range = raid_bdev->module_private; 182 183 offset_blocks = bdev_io->u.bdev.offset_blocks; 184 num_blocks = bdev_io->u.bdev.num_blocks; 185 start_idx = -1; 186 stop_idx = -1; 187 /* 188 * Go through all base bdevs, find the first bdev and the last bdev 189 */ 190 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 191 /* skip the bdevs before the offset_blocks */ 192 if (offset_blocks >= block_range[i].start + block_range[i].length) { 193 continue; 194 } 195 if (start_idx == -1) { 196 start_idx = i; 197 } else { 198 /* 199 * The offset_blocks might be at the middle of the first bdev. 200 * Besides the first bdev, the offset_blocks should be always 201 * at the start of the bdev. 202 */ 203 assert(offset_blocks == block_range[i].start); 204 } 205 pd_lba = offset_blocks - block_range[i].start; 206 pd_blocks = spdk_min(num_blocks, block_range[i].length - pd_lba); 207 offset_blocks += pd_blocks; 208 num_blocks -= pd_blocks; 209 if (num_blocks == 0) { 210 stop_idx = i; 211 break; 212 } 213 } 214 assert(start_idx >= 0); 215 assert(stop_idx >= 0); 216 217 if (raid_io->base_bdev_io_remaining == 0) { 218 raid_io->base_bdev_io_remaining = stop_idx - start_idx + 1; 219 } 220 offset_blocks = bdev_io->u.bdev.offset_blocks; 221 num_blocks = bdev_io->u.bdev.num_blocks; 222 for (i = start_idx; i <= stop_idx; i++) { 223 assert(offset_blocks >= block_range[i].start); 224 assert(offset_blocks < block_range[i].start + block_range[i].length); 225 pd_lba = offset_blocks - block_range[i].start; 226 pd_blocks = spdk_min(num_blocks, block_range[i].length - pd_lba); 227 offset_blocks += pd_blocks; 228 num_blocks -= pd_blocks; 229 /* 230 * Skip the IOs we have submitted 231 */ 232 if (i < start_idx + raid_io->base_bdev_io_submitted) { 233 continue; 234 } 235 base_info = &raid_bdev->base_bdev_info[i]; 236 base_ch = raid_io->raid_ch->base_channel[i]; 237 switch (bdev_io->type) { 238 case SPDK_BDEV_IO_TYPE_UNMAP: 239 ret = spdk_bdev_unmap_blocks(base_info->desc, base_ch, 240 pd_lba, pd_blocks, 241 concat_base_io_complete, raid_io); 242 break; 243 case SPDK_BDEV_IO_TYPE_FLUSH: 244 ret = spdk_bdev_flush_blocks(base_info->desc, base_ch, 245 pd_lba, pd_blocks, 246 concat_base_io_complete, raid_io); 247 break; 248 default: 249 SPDK_ERRLOG("submit request, invalid io type with null payload %u\n", bdev_io->type); 250 assert(false); 251 ret = -EIO; 252 } 253 if (ret == 0) { 254 raid_io->base_bdev_io_submitted++; 255 } else if (ret == -ENOMEM) { 256 raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch, 257 _concat_submit_null_payload_request); 258 return; 259 } else { 260 SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); 261 assert(false); 262 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 263 return; 264 } 265 } 266 } 267 268 static int concat_start(struct raid_bdev *raid_bdev) 269 { 270 uint64_t total_blockcnt = 0; 271 struct raid_base_bdev_info *base_info; 272 struct concat_block_range *block_range; 273 274 block_range = calloc(raid_bdev->num_base_bdevs, sizeof(struct concat_block_range)); 275 if (!block_range) { 276 SPDK_ERRLOG("Can not allocate block_range, num_base_bdevs: %u", 277 raid_bdev->num_base_bdevs); 278 return -ENOMEM; 279 } 280 281 int idx = 0; 282 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 283 uint64_t strip_cnt = base_info->bdev->blockcnt >> raid_bdev->strip_size_shift; 284 uint64_t pd_block_cnt = strip_cnt << raid_bdev->strip_size_shift; 285 286 block_range[idx].start = total_blockcnt; 287 block_range[idx].length = pd_block_cnt; 288 total_blockcnt += pd_block_cnt; 289 idx++; 290 } 291 292 raid_bdev->module_private = block_range; 293 294 SPDK_DEBUGLOG(bdev_concat, "total blockcount %" PRIu64 ", numbasedev %u, strip size shift %u\n", 295 total_blockcnt, raid_bdev->num_base_bdevs, raid_bdev->strip_size_shift); 296 raid_bdev->bdev.blockcnt = total_blockcnt; 297 298 raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size; 299 raid_bdev->bdev.split_on_optimal_io_boundary = true; 300 301 return 0; 302 } 303 304 static void 305 concat_stop(struct raid_bdev *raid_bdev) 306 { 307 struct concat_block_range *block_range = raid_bdev->module_private; 308 309 free(block_range); 310 } 311 312 static struct raid_bdev_module g_concat_module = { 313 .level = CONCAT, 314 .base_bdevs_min = 1, 315 .start = concat_start, 316 .stop = concat_stop, 317 .submit_rw_request = concat_submit_rw_request, 318 .submit_null_payload_request = concat_submit_null_payload_request, 319 }; 320 RAID_MODULE_REGISTER(&g_concat_module) 321 322 SPDK_LOG_REGISTER_COMPONENT(bdev_concat) 323