1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Peng Yu yupeng0921@gmail.com. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "bdev_raid.h" 35 36 #include "spdk/env.h" 37 #include "spdk/thread.h" 38 #include "spdk/string.h" 39 #include "spdk/util.h" 40 41 #include "spdk/log.h" 42 43 struct concat_block_range { 44 uint64_t start; 45 uint64_t length; 46 }; 47 48 /* 49 * brief: 50 * concat_bdev_io_completion function is called by lower layers to notify raid 51 * module that particular bdev_io is completed. 52 * params: 53 * bdev_io - pointer to bdev io submitted to lower layers, like child io 54 * success - bdev_io status 55 * cb_arg - function callback context (parent raid_bdev_io) 56 * returns: 57 * none 58 */ 59 static void 60 concat_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 61 { 62 struct raid_bdev_io *raid_io = cb_arg; 63 64 spdk_bdev_free_io(bdev_io); 65 66 if (success) { 67 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_SUCCESS); 68 } else { 69 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 70 } 71 } 72 73 static void 74 concat_submit_rw_request(struct raid_bdev_io *raid_io); 75 76 static void 77 _concat_submit_rw_request(void *_raid_io) 78 { 79 struct raid_bdev_io *raid_io = _raid_io; 80 81 concat_submit_rw_request(raid_io); 82 } 83 84 /* 85 * brief: 86 * concat_submit_rw_request function is used to submit I/O to the correct 87 * member disk for concat bdevs. 88 * params: 89 * raid_io 90 * returns: 91 * none 92 */ 93 static void 94 concat_submit_rw_request(struct raid_bdev_io *raid_io) 95 { 96 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 97 struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch; 98 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 99 struct concat_block_range *block_range = raid_bdev->module_private; 100 uint64_t pd_lba; 101 uint64_t pd_blocks; 102 int pd_idx; 103 int ret = 0; 104 struct raid_base_bdev_info *base_info; 105 struct spdk_io_channel *base_ch; 106 int i; 107 108 pd_idx = -1; 109 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 110 if (block_range[i].start > bdev_io->u.bdev.offset_blocks) { 111 break; 112 } 113 pd_idx = i; 114 } 115 assert(pd_idx >= 0); 116 assert(bdev_io->u.bdev.offset_blocks >= block_range[pd_idx].start); 117 pd_lba = bdev_io->u.bdev.offset_blocks - block_range[pd_idx].start; 118 pd_blocks = bdev_io->u.bdev.num_blocks; 119 base_info = &raid_bdev->base_bdev_info[pd_idx]; 120 if (base_info->desc == NULL) { 121 SPDK_ERRLOG("base bdev desc null for pd_idx %u\n", pd_idx); 122 assert(0); 123 } 124 125 /* 126 * Submit child io to bdev layer with using base bdev descriptors, base 127 * bdev lba, base bdev child io length in blocks, buffer, completion 128 * function and function callback context 129 */ 130 assert(raid_ch != NULL); 131 assert(raid_ch->base_channel); 132 base_ch = raid_ch->base_channel[pd_idx]; 133 if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) { 134 ret = spdk_bdev_readv_blocks(base_info->desc, base_ch, 135 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 136 pd_lba, pd_blocks, concat_bdev_io_completion, 137 raid_io); 138 } else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) { 139 ret = spdk_bdev_writev_blocks(base_info->desc, base_ch, 140 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 141 pd_lba, pd_blocks, concat_bdev_io_completion, 142 raid_io); 143 } else { 144 SPDK_ERRLOG("Recvd not supported io type %u\n", bdev_io->type); 145 assert(0); 146 } 147 148 if (ret == -ENOMEM) { 149 raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch, 150 _concat_submit_rw_request); 151 } else if (ret != 0) { 152 SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); 153 assert(false); 154 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 155 } 156 } 157 158 static void 159 concat_submit_null_payload_request(struct raid_bdev_io *raid_io); 160 161 static void 162 _concat_submit_null_payload_request(void *_raid_io) 163 { 164 struct raid_bdev_io *raid_io = _raid_io; 165 166 concat_submit_null_payload_request(raid_io); 167 } 168 169 static void 170 concat_base_io_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 171 { 172 struct raid_bdev_io *raid_io = cb_arg; 173 174 raid_bdev_io_complete_part(raid_io, 1, success ? 175 SPDK_BDEV_IO_STATUS_SUCCESS : 176 SPDK_BDEV_IO_STATUS_FAILED); 177 178 spdk_bdev_free_io(bdev_io); 179 } 180 181 /* 182 * brief: 183 * concat_submit_null_payload_request function submits the next batch of 184 * io requests with range but without payload, like FLUSH and UNMAP, to member disks; 185 * it will submit as many as possible unless one base io request fails with -ENOMEM, 186 * in which case it will queue itself for later submission. 187 * params: 188 * bdev_io - pointer to parent bdev_io on raid bdev device 189 * returns: 190 * none 191 */ 192 static void 193 concat_submit_null_payload_request(struct raid_bdev_io *raid_io) 194 { 195 struct spdk_bdev_io *bdev_io; 196 struct raid_bdev *raid_bdev; 197 int ret; 198 struct raid_base_bdev_info *base_info; 199 struct spdk_io_channel *base_ch; 200 uint64_t pd_lba; 201 uint64_t pd_blocks; 202 uint64_t offset_blocks; 203 uint64_t num_blocks; 204 struct concat_block_range *block_range; 205 int i, start_idx, stop_idx; 206 207 bdev_io = spdk_bdev_io_from_ctx(raid_io); 208 raid_bdev = raid_io->raid_bdev; 209 block_range = raid_bdev->module_private; 210 211 offset_blocks = bdev_io->u.bdev.offset_blocks; 212 num_blocks = bdev_io->u.bdev.num_blocks; 213 start_idx = -1; 214 stop_idx = -1; 215 /* 216 * Go through all base bdevs, find the first bdev and the last bdev 217 */ 218 for (i = 0; i < raid_bdev->num_base_bdevs; i++) { 219 /* skip the bdevs before the offset_blocks */ 220 if (offset_blocks >= block_range[i].start + block_range[i].length) { 221 continue; 222 } 223 if (start_idx == -1) { 224 start_idx = i; 225 } else { 226 /* 227 * The offset_blocks might be at the middle of the first bdev. 228 * Besides the first bdev, the offset_blocks should be always 229 * at the start of the bdev. 230 */ 231 assert(offset_blocks == block_range[i].start); 232 } 233 pd_lba = offset_blocks - block_range[i].start; 234 pd_blocks = spdk_min(num_blocks, block_range[i].length - pd_lba); 235 offset_blocks += pd_blocks; 236 num_blocks -= pd_blocks; 237 if (num_blocks == 0) { 238 stop_idx = i; 239 break; 240 } 241 } 242 assert(start_idx >= 0); 243 assert(stop_idx >= 0); 244 245 if (raid_io->base_bdev_io_remaining == 0) { 246 raid_io->base_bdev_io_remaining = stop_idx - start_idx + 1; 247 } 248 offset_blocks = bdev_io->u.bdev.offset_blocks; 249 num_blocks = bdev_io->u.bdev.num_blocks; 250 for (i = start_idx; i <= stop_idx; i++) { 251 assert(offset_blocks >= block_range[i].start); 252 assert(offset_blocks < block_range[i].start + block_range[i].length); 253 pd_lba = offset_blocks - block_range[i].start; 254 pd_blocks = spdk_min(num_blocks, block_range[i].length - pd_lba); 255 offset_blocks += pd_blocks; 256 num_blocks -= pd_blocks; 257 /* 258 * Skip the IOs we have submitted 259 */ 260 if (i < start_idx + raid_io->base_bdev_io_submitted) { 261 continue; 262 } 263 base_info = &raid_bdev->base_bdev_info[i]; 264 base_ch = raid_io->raid_ch->base_channel[i]; 265 switch (bdev_io->type) { 266 case SPDK_BDEV_IO_TYPE_UNMAP: 267 ret = spdk_bdev_unmap_blocks(base_info->desc, base_ch, 268 pd_lba, pd_blocks, 269 concat_base_io_complete, raid_io); 270 break; 271 case SPDK_BDEV_IO_TYPE_FLUSH: 272 ret = spdk_bdev_flush_blocks(base_info->desc, base_ch, 273 pd_lba, pd_blocks, 274 concat_base_io_complete, raid_io); 275 break; 276 default: 277 SPDK_ERRLOG("submit request, invalid io type with null payload %u\n", bdev_io->type); 278 assert(false); 279 ret = -EIO; 280 } 281 if (ret == 0) { 282 raid_io->base_bdev_io_submitted++; 283 } else if (ret == -ENOMEM) { 284 raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch, 285 _concat_submit_null_payload_request); 286 return; 287 } else { 288 SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n"); 289 assert(false); 290 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 291 return; 292 } 293 } 294 } 295 296 static int concat_start(struct raid_bdev *raid_bdev) 297 { 298 uint64_t total_blockcnt = 0; 299 struct raid_base_bdev_info *base_info; 300 struct concat_block_range *block_range; 301 302 block_range = calloc(raid_bdev->num_base_bdevs, sizeof(struct concat_block_range)); 303 if (!block_range) { 304 SPDK_ERRLOG("Can not allocate block_range, num_base_bdevs: %u", 305 raid_bdev->num_base_bdevs); 306 return -ENOMEM; 307 } 308 309 int idx = 0; 310 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 311 uint64_t strip_cnt = base_info->bdev->blockcnt >> raid_bdev->strip_size_shift; 312 uint64_t pd_block_cnt = strip_cnt << raid_bdev->strip_size_shift; 313 314 block_range[idx].start = total_blockcnt; 315 block_range[idx].length = pd_block_cnt; 316 total_blockcnt += pd_block_cnt; 317 idx++; 318 } 319 320 raid_bdev->module_private = block_range; 321 322 SPDK_DEBUGLOG(bdev_concat, "total blockcount %" PRIu64 ", numbasedev %u, strip size shift %u\n", 323 total_blockcnt, raid_bdev->num_base_bdevs, raid_bdev->strip_size_shift); 324 raid_bdev->bdev.blockcnt = total_blockcnt; 325 326 raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size; 327 raid_bdev->bdev.split_on_optimal_io_boundary = true; 328 329 return 0; 330 } 331 332 static void 333 concat_stop(struct raid_bdev *raid_bdev) 334 { 335 struct concat_block_range *block_range = raid_bdev->module_private; 336 337 free(block_range); 338 } 339 340 static struct raid_bdev_module g_concat_module = { 341 .level = CONCAT, 342 .base_bdevs_min = 1, 343 .start = concat_start, 344 .stop = concat_stop, 345 .submit_rw_request = concat_submit_rw_request, 346 .submit_null_payload_request = concat_submit_null_payload_request, 347 }; 348 RAID_MODULE_REGISTER(&g_concat_module) 349 350 SPDK_LOG_REGISTER_COMPONENT(bdev_concat) 351