1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) Peng Yu yupeng0921@gmail.com. 3 * All rights reserved. 4 */ 5 6 #include "bdev_raid.h" 7 8 #include "spdk/env.h" 9 #include "spdk/thread.h" 10 #include "spdk/string.h" 11 #include "spdk/util.h" 12 13 #include "spdk/log.h" 14 15 struct concat_block_range { 16 uint64_t start; 17 uint64_t length; 18 }; 19 20 /* 21 * brief: 22 * concat_bdev_io_completion function is called by lower layers to notify raid 23 * module that particular bdev_io is completed. 24 * params: 25 * bdev_io - pointer to bdev io submitted to lower layers, like child io 26 * success - bdev_io status 27 * cb_arg - function callback context (parent raid_bdev_io) 28 * returns: 29 * none 30 */ 31 static void 32 concat_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 33 { 34 struct raid_bdev_io *raid_io = cb_arg; 35 36 spdk_bdev_free_io(bdev_io); 37 38 if (success) { 39 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_SUCCESS); 40 } else { 41 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 42 } 43 } 44 45 static void concat_submit_rw_request(struct raid_bdev_io *raid_io); 46 47 static void 48 _concat_submit_rw_request(void *_raid_io) 49 { 50 struct raid_bdev_io *raid_io = _raid_io; 51 52 concat_submit_rw_request(raid_io); 53 } 54 55 /* 56 * brief: 57 * concat_submit_rw_request function is used to submit I/O to the correct 58 * member disk for concat bdevs. 59 * params: 60 * raid_io 61 * returns: 62 * none 63 */ 64 static void 65 concat_submit_rw_request(struct raid_bdev_io *raid_io) 66 { 67 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 68 struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch; 69 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 70 struct concat_block_range *block_range = raid_bdev->module_private; 71 uint64_t pd_lba; 72 uint64_t pd_blocks; 73 int pd_idx; 74 int ret = 0; 75 struct raid_base_bdev_info *base_info; 76 struct spdk_io_channel *base_ch; 77 int i; 78 79 pd_idx = -1; 80 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 81 if (block_range[i].start > bdev_io->u.bdev.offset_blocks) { 82 break; 83 } 84 pd_idx = i; 85 } 86 assert(pd_idx >= 0); 87 assert(bdev_io->u.bdev.offset_blocks >= block_range[pd_idx].start); 88 pd_lba = bdev_io->u.bdev.offset_blocks - block_range[pd_idx].start; 89 pd_blocks = bdev_io->u.bdev.num_blocks; 90 base_info = &raid_bdev->base_bdev_info[pd_idx]; 91 if (base_info->desc == NULL) { 92 SPDK_ERRLOG("base bdev desc null for pd_idx %u\n", pd_idx); 93 assert(0); 94 } 95 96 /* 97 * Submit child io to bdev layer with using base bdev descriptors, base 98 * bdev lba, base bdev child io length in blocks, buffer, completion 99 * function and function callback context 100 */ 101 assert(raid_ch != NULL); 102 assert(raid_ch->base_channel); 103 base_ch = raid_ch->base_channel[pd_idx]; 104 if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) { 105 ret = spdk_bdev_readv_blocks(base_info->desc, base_ch, 106 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 107 pd_lba, pd_blocks, concat_bdev_io_completion, 108 raid_io); 109 } else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) { 110 ret = spdk_bdev_writev_blocks(base_info->desc, base_ch, 111 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 112 pd_lba, pd_blocks, concat_bdev_io_completion, 113 raid_io); 114 } else { 115 SPDK_ERRLOG("Recvd not supported io type %u\n", bdev_io->type); 116 assert(0); 117 } 118 119 if (ret == -ENOMEM) { 120 raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch, 121 _concat_submit_rw_request); 122 } else if (ret != 0) { 123 SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); 124 assert(false); 125 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 126 } 127 } 128 129 static void concat_submit_null_payload_request(struct raid_bdev_io *raid_io); 130 131 static void 132 _concat_submit_null_payload_request(void *_raid_io) 133 { 134 struct raid_bdev_io *raid_io = _raid_io; 135 136 concat_submit_null_payload_request(raid_io); 137 } 138 139 static void 140 concat_base_io_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 141 { 142 struct raid_bdev_io *raid_io = cb_arg; 143 144 raid_bdev_io_complete_part(raid_io, 1, success ? 145 SPDK_BDEV_IO_STATUS_SUCCESS : 146 SPDK_BDEV_IO_STATUS_FAILED); 147 148 spdk_bdev_free_io(bdev_io); 149 } 150 151 /* 152 * brief: 153 * concat_submit_null_payload_request function submits the next batch of 154 * io requests with range but without payload, like FLUSH and UNMAP, to member disks; 155 * it will submit as many as possible unless one base io request fails with -ENOMEM, 156 * in which case it will queue itself for later submission. 157 * params: 158 * bdev_io - pointer to parent bdev_io on raid bdev device 159 * returns: 160 * none 161 */ 162 static void 163 concat_submit_null_payload_request(struct raid_bdev_io *raid_io) 164 { 165 struct spdk_bdev_io *bdev_io; 166 struct raid_bdev *raid_bdev; 167 int ret; 168 struct raid_base_bdev_info *base_info; 169 struct spdk_io_channel *base_ch; 170 uint64_t pd_lba; 171 uint64_t pd_blocks; 172 uint64_t offset_blocks; 173 uint64_t num_blocks; 174 struct concat_block_range *block_range; 175 int i, start_idx, stop_idx; 176 177 bdev_io = spdk_bdev_io_from_ctx(raid_io); 178 raid_bdev = raid_io->raid_bdev; 179 block_range = raid_bdev->module_private; 180 181 offset_blocks = bdev_io->u.bdev.offset_blocks; 182 num_blocks = bdev_io->u.bdev.num_blocks; 183 start_idx = -1; 184 stop_idx = -1; 185 /* 186 * Go through all base bdevs, find the first bdev and the last bdev 187 */ 188 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 189 /* skip the bdevs before the offset_blocks */ 190 if (offset_blocks >= block_range[i].start + block_range[i].length) { 191 continue; 192 } 193 if (start_idx == -1) { 194 start_idx = i; 195 } else { 196 /* 197 * The offset_blocks might be at the middle of the first bdev. 198 * Besides the first bdev, the offset_blocks should be always 199 * at the start of the bdev. 200 */ 201 assert(offset_blocks == block_range[i].start); 202 } 203 pd_lba = offset_blocks - block_range[i].start; 204 pd_blocks = spdk_min(num_blocks, block_range[i].length - pd_lba); 205 offset_blocks += pd_blocks; 206 num_blocks -= pd_blocks; 207 if (num_blocks == 0) { 208 stop_idx = i; 209 break; 210 } 211 } 212 assert(start_idx >= 0); 213 assert(stop_idx >= 0); 214 215 if (raid_io->base_bdev_io_remaining == 0) { 216 raid_io->base_bdev_io_remaining = stop_idx - start_idx + 1; 217 } 218 offset_blocks = bdev_io->u.bdev.offset_blocks; 219 num_blocks = bdev_io->u.bdev.num_blocks; 220 for (i = start_idx; i <= stop_idx; i++) { 221 assert(offset_blocks >= block_range[i].start); 222 assert(offset_blocks < block_range[i].start + block_range[i].length); 223 pd_lba = offset_blocks - block_range[i].start; 224 pd_blocks = spdk_min(num_blocks, block_range[i].length - pd_lba); 225 offset_blocks += pd_blocks; 226 num_blocks -= pd_blocks; 227 /* 228 * Skip the IOs we have submitted 229 */ 230 if (i < start_idx + raid_io->base_bdev_io_submitted) { 231 continue; 232 } 233 base_info = &raid_bdev->base_bdev_info[i]; 234 base_ch = raid_io->raid_ch->base_channel[i]; 235 switch (bdev_io->type) { 236 case SPDK_BDEV_IO_TYPE_UNMAP: 237 ret = spdk_bdev_unmap_blocks(base_info->desc, base_ch, 238 pd_lba, pd_blocks, 239 concat_base_io_complete, raid_io); 240 break; 241 case SPDK_BDEV_IO_TYPE_FLUSH: 242 ret = spdk_bdev_flush_blocks(base_info->desc, base_ch, 243 pd_lba, pd_blocks, 244 concat_base_io_complete, raid_io); 245 break; 246 default: 247 SPDK_ERRLOG("submit request, invalid io type with null payload %u\n", bdev_io->type); 248 assert(false); 249 ret = -EIO; 250 } 251 if (ret == 0) { 252 raid_io->base_bdev_io_submitted++; 253 } else if (ret == -ENOMEM) { 254 raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch, 255 _concat_submit_null_payload_request); 256 return; 257 } else { 258 SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); 259 assert(false); 260 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 261 return; 262 } 263 } 264 } 265 266 static int 267 concat_start(struct raid_bdev *raid_bdev) 268 { 269 uint64_t total_blockcnt = 0; 270 struct raid_base_bdev_info *base_info; 271 struct concat_block_range *block_range; 272 273 block_range = calloc(raid_bdev->num_base_bdevs, sizeof(struct concat_block_range)); 274 if (!block_range) { 275 SPDK_ERRLOG("Can not allocate block_range, num_base_bdevs: %u", 276 raid_bdev->num_base_bdevs); 277 return -ENOMEM; 278 } 279 280 int idx = 0; 281 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 282 uint64_t strip_cnt = base_info->bdev->blockcnt >> raid_bdev->strip_size_shift; 283 uint64_t pd_block_cnt = strip_cnt << raid_bdev->strip_size_shift; 284 285 block_range[idx].start = total_blockcnt; 286 block_range[idx].length = pd_block_cnt; 287 total_blockcnt += pd_block_cnt; 288 idx++; 289 } 290 291 raid_bdev->module_private = block_range; 292 293 SPDK_DEBUGLOG(bdev_concat, "total blockcount %" PRIu64 ", numbasedev %u, strip size shift %u\n", 294 total_blockcnt, raid_bdev->num_base_bdevs, raid_bdev->strip_size_shift); 295 raid_bdev->bdev.blockcnt = total_blockcnt; 296 297 raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size; 298 raid_bdev->bdev.split_on_optimal_io_boundary = true; 299 300 return 0; 301 } 302 303 static void 304 concat_stop(struct raid_bdev *raid_bdev) 305 { 306 struct concat_block_range *block_range = raid_bdev->module_private; 307 308 free(block_range); 309 } 310 311 static struct raid_bdev_module g_concat_module = { 312 .level = CONCAT, 313 .base_bdevs_min = 1, 314 .start = concat_start, 315 .stop = concat_stop, 316 .submit_rw_request = concat_submit_rw_request, 317 .submit_null_payload_request = concat_submit_null_payload_request, 318 }; 319 RAID_MODULE_REGISTER(&g_concat_module) 320 321 SPDK_LOG_REGISTER_COMPONENT(bdev_concat) 322