1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "bdev_raid.h" 35 36 #include "spdk/env.h" 37 #include "spdk/io_channel.h" 38 #include "spdk/string.h" 39 #include "spdk/util.h" 40 41 #include "spdk_internal/log.h" 42 43 /* 44 * brief: 45 * raid0_bdev_io_completion function is called by lower layers to notify raid 46 * module that particular bdev_io is completed. 47 * params: 48 * bdev_io - pointer to bdev io submitted to lower layers, like child io 49 * success - bdev_io status 50 * cb_arg - function callback context, like parent io pointer 51 * returns: 52 * none 53 */ 54 static void 55 raid0_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 56 { 57 struct spdk_bdev_io *parent_io = cb_arg; 58 59 spdk_bdev_free_io(bdev_io); 60 61 if (success) { 62 spdk_bdev_io_complete(parent_io, SPDK_BDEV_IO_STATUS_SUCCESS); 63 } else { 64 spdk_bdev_io_complete(parent_io, SPDK_BDEV_IO_STATUS_FAILED); 65 } 66 } 67 68 static void 69 raid0_waitq_io_process(void *ctx); 70 71 /* 72 * brief: 73 * raid0_submit_rw_request function is used to submit I/O to the correct 74 * member disk for raid0 bdevs. 75 * params: 76 * bdev_io - parent bdev io 77 * start_strip - start strip number of this io 78 * returns: 79 * none 80 */ 81 static void 82 raid0_submit_rw_request(struct spdk_bdev_io *bdev_io, uint64_t start_strip) 83 { 84 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 85 struct raid_bdev_io_channel *raid_ch = spdk_io_channel_get_ctx(raid_io->ch); 86 struct raid_bdev *raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt; 87 uint64_t pd_strip; 88 uint32_t offset_in_strip; 89 uint64_t pd_lba; 90 uint64_t pd_blocks; 91 uint8_t pd_idx; 92 int ret = 0; 93 94 pd_strip = start_strip / raid_bdev->num_base_bdevs; 95 pd_idx = start_strip % raid_bdev->num_base_bdevs; 96 offset_in_strip = bdev_io->u.bdev.offset_blocks & (raid_bdev->strip_size - 1); 97 pd_lba = (pd_strip << raid_bdev->strip_size_shift) + offset_in_strip; 98 pd_blocks = bdev_io->u.bdev.num_blocks; 99 if (raid_bdev->base_bdev_info[pd_idx].desc == NULL) { 100 SPDK_ERRLOG("base bdev desc null for pd_idx %u\n", pd_idx); 101 assert(0); 102 } 103 104 /* 105 * Submit child io to bdev layer with using base bdev descriptors, base 106 * bdev lba, base bdev child io length in blocks, buffer, completion 107 * function and function callback context 108 */ 109 assert(raid_ch != NULL); 110 assert(raid_ch->base_channel); 111 if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) { 112 ret = spdk_bdev_readv_blocks(raid_bdev->base_bdev_info[pd_idx].desc, 113 raid_ch->base_channel[pd_idx], 114 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 115 pd_lba, pd_blocks, raid0_bdev_io_completion, 116 bdev_io); 117 } else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) { 118 ret = spdk_bdev_writev_blocks(raid_bdev->base_bdev_info[pd_idx].desc, 119 raid_ch->base_channel[pd_idx], 120 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 121 pd_lba, pd_blocks, raid0_bdev_io_completion, 122 bdev_io); 123 } else { 124 SPDK_ERRLOG("Recvd not supported io type %u\n", bdev_io->type); 125 assert(0); 126 } 127 128 if (ret) { 129 raid_bdev_queue_io_wait(bdev_io, pd_idx, raid0_waitq_io_process, ret); 130 } 131 } 132 133 /* 134 * brief: 135 * raid0_waitq_io_process function is the callback function 136 * registered by raid bdev module to bdev when bdev_io was unavailable 137 * for raid0 bdevs. 138 * params: 139 * ctx - pointer to raid_bdev_io 140 * returns: 141 * none 142 */ 143 static void 144 raid0_waitq_io_process(void *ctx) 145 { 146 struct spdk_bdev_io *bdev_io = ctx; 147 struct raid_bdev *raid_bdev; 148 uint64_t start_strip; 149 150 /* 151 * Try to submit childs of parent bdev io. If failed due to resource 152 * crunch then break the loop and don't try to process other queued IOs. 153 */ 154 raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt; 155 start_strip = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift; 156 raid0_submit_rw_request(bdev_io, start_strip); 157 } 158 159 /* 160 * brief: 161 * raid0_start_rw_request function is the submit_request function for 162 * read/write requests for raid0 bdevs. 163 * params: 164 * ch - pointer to raid bdev io channel 165 * bdev_io - pointer to parent bdev_io on raid bdev device 166 * returns: 167 * none 168 */ 169 void 170 raid0_start_rw_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 171 { 172 struct raid_bdev_io *raid_io; 173 struct raid_bdev *raid_bdev; 174 uint64_t start_strip = 0; 175 uint64_t end_strip = 0; 176 177 raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt; 178 raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 179 raid_io->ch = ch; 180 start_strip = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift; 181 end_strip = (bdev_io->u.bdev.offset_blocks + bdev_io->u.bdev.num_blocks - 1) >> 182 raid_bdev->strip_size_shift; 183 if (start_strip != end_strip && raid_bdev->num_base_bdevs > 1) { 184 assert(false); 185 SPDK_ERRLOG("I/O spans strip boundary!\n"); 186 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 187 return; 188 } 189 raid0_submit_rw_request(bdev_io, start_strip); 190 } 191 192 /* raid0 IO range */ 193 struct raid_bdev_io_range { 194 uint64_t strip_size; 195 uint64_t start_strip_in_disk; 196 uint64_t end_strip_in_disk; 197 uint64_t start_offset_in_strip; 198 uint64_t end_offset_in_strip; 199 uint8_t start_disk; 200 uint8_t end_disk; 201 uint8_t n_disks_involved; 202 }; 203 204 static inline void 205 _raid0_get_io_range(struct raid_bdev_io_range *io_range, 206 uint8_t num_base_bdevs, uint64_t strip_size, uint64_t strip_size_shift, 207 uint64_t offset_blocks, uint64_t num_blocks) 208 { 209 uint64_t start_strip; 210 uint64_t end_strip; 211 212 io_range->strip_size = strip_size; 213 214 /* The start and end strip index in raid0 bdev scope */ 215 start_strip = offset_blocks >> strip_size_shift; 216 end_strip = (offset_blocks + num_blocks - 1) >> strip_size_shift; 217 io_range->start_strip_in_disk = start_strip / num_base_bdevs; 218 io_range->end_strip_in_disk = end_strip / num_base_bdevs; 219 220 /* The first strip may have unaligned start LBA offset. 221 * The end strip may have unaligned end LBA offset. 222 * Strips between them certainly have aligned offset and length to boundaries. 223 */ 224 io_range->start_offset_in_strip = offset_blocks % strip_size; 225 io_range->end_offset_in_strip = (offset_blocks + num_blocks - 1) % strip_size; 226 227 /* The base bdev indexes in which start and end strips are located */ 228 io_range->start_disk = start_strip % num_base_bdevs; 229 io_range->end_disk = end_strip % num_base_bdevs; 230 231 /* Calculate how many base_bdevs are involved in io operation. 232 * Number of base bdevs involved is between 1 and num_base_bdevs. 233 * It will be 1 if the first strip and last strip are the same one. 234 */ 235 io_range->n_disks_involved = spdk_min((end_strip - start_strip + 1), num_base_bdevs); 236 } 237 238 static inline void 239 _raid0_split_io_range(struct raid_bdev_io_range *io_range, uint8_t disk_idx, 240 uint64_t *_offset_in_disk, uint64_t *_nblocks_in_disk) 241 { 242 uint64_t n_strips_in_disk; 243 uint64_t start_offset_in_disk; 244 uint64_t end_offset_in_disk; 245 uint64_t offset_in_disk; 246 uint64_t nblocks_in_disk; 247 uint64_t start_strip_in_disk; 248 uint64_t end_strip_in_disk; 249 250 start_strip_in_disk = io_range->start_strip_in_disk; 251 if (disk_idx < io_range->start_disk) { 252 start_strip_in_disk += 1; 253 } 254 255 end_strip_in_disk = io_range->end_strip_in_disk; 256 if (disk_idx > io_range->end_disk) { 257 end_strip_in_disk -= 1; 258 } 259 260 assert(end_strip_in_disk >= start_strip_in_disk); 261 n_strips_in_disk = end_strip_in_disk - start_strip_in_disk + 1; 262 263 if (disk_idx == io_range->start_disk) { 264 start_offset_in_disk = io_range->start_offset_in_strip; 265 } else { 266 start_offset_in_disk = 0; 267 } 268 269 if (disk_idx == io_range->end_disk) { 270 end_offset_in_disk = io_range->end_offset_in_strip; 271 } else { 272 end_offset_in_disk = io_range->strip_size - 1; 273 } 274 275 offset_in_disk = start_offset_in_disk + start_strip_in_disk * io_range->strip_size; 276 nblocks_in_disk = (n_strips_in_disk - 1) * io_range->strip_size 277 + end_offset_in_disk - start_offset_in_disk + 1; 278 279 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID0, 280 "raid_bdev (strip_size 0x%lx) splits IO to base_bdev (%u) at (0x%lx, 0x%lx).\n", 281 io_range->strip_size, disk_idx, offset_in_disk, nblocks_in_disk); 282 283 *_offset_in_disk = offset_in_disk; 284 *_nblocks_in_disk = nblocks_in_disk; 285 } 286 287 /* 288 * brief: 289 * raid0_submit_null_payload_request function submits the next batch of 290 * io requests with range but without payload, like FLUSH and UNMAP, to member disks; 291 * it will submit as many as possible unless one base io request fails with -ENOMEM, 292 * in which case it will queue itself for later submission. 293 * params: 294 * bdev_io - pointer to parent bdev_io on raid bdev device 295 * returns: 296 * none 297 */ 298 void 299 raid0_submit_null_payload_request(void *_bdev_io) 300 { 301 struct spdk_bdev_io *bdev_io = _bdev_io; 302 struct raid_bdev_io *raid_io; 303 struct raid_bdev *raid_bdev; 304 struct raid_bdev_io_channel *raid_ch; 305 struct raid_bdev_io_range io_range; 306 int ret; 307 308 raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt; 309 raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 310 raid_ch = spdk_io_channel_get_ctx(raid_io->ch); 311 312 _raid0_get_io_range(&io_range, raid_bdev->num_base_bdevs, 313 raid_bdev->strip_size, raid_bdev->strip_size_shift, 314 bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks); 315 316 raid_io->base_bdev_io_expected = io_range.n_disks_involved; 317 318 while (raid_io->base_bdev_io_submitted < raid_io->base_bdev_io_expected) { 319 uint8_t disk_idx; 320 uint64_t offset_in_disk; 321 uint64_t nblocks_in_disk; 322 323 /* base_bdev is started from start_disk to end_disk. 324 * It is possible that index of start_disk is larger than end_disk's. 325 */ 326 disk_idx = (io_range.start_disk + raid_io->base_bdev_io_submitted) % raid_bdev->num_base_bdevs; 327 328 _raid0_split_io_range(&io_range, disk_idx, &offset_in_disk, &nblocks_in_disk); 329 330 switch (bdev_io->type) { 331 case SPDK_BDEV_IO_TYPE_UNMAP: 332 ret = spdk_bdev_unmap_blocks(raid_bdev->base_bdev_info[disk_idx].desc, 333 raid_ch->base_channel[disk_idx], 334 offset_in_disk, nblocks_in_disk, 335 raid_bdev_base_io_completion, bdev_io); 336 break; 337 338 case SPDK_BDEV_IO_TYPE_FLUSH: 339 ret = spdk_bdev_flush_blocks(raid_bdev->base_bdev_info[disk_idx].desc, 340 raid_ch->base_channel[disk_idx], 341 offset_in_disk, nblocks_in_disk, 342 raid_bdev_base_io_completion, bdev_io); 343 break; 344 345 default: 346 SPDK_ERRLOG("submit request, invalid io type with null payload %u\n", bdev_io->type); 347 assert(false); 348 ret = -EIO; 349 } 350 351 if (ret == 0) { 352 raid_io->base_bdev_io_submitted++; 353 } else { 354 raid_bdev_queue_io_wait(bdev_io, disk_idx, 355 raid0_submit_null_payload_request, ret); 356 return; 357 } 358 } 359 } 360 361 SPDK_LOG_REGISTER_COMPONENT("bdev_raid0", SPDK_LOG_BDEV_RAID0) 362