1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "bdev_raid.h" 35 36 #include "spdk/env.h" 37 #include "spdk/io_channel.h" 38 #include "spdk/string.h" 39 #include "spdk/util.h" 40 41 #include "spdk_internal/log.h" 42 43 /* 44 * brief: 45 * raid0_bdev_io_completion function is called by lower layers to notify raid 46 * module that particular bdev_io is completed. 47 * params: 48 * bdev_io - pointer to bdev io submitted to lower layers, like child io 49 * success - bdev_io status 50 * cb_arg - function callback context, like parent io pointer 51 * returns: 52 * none 53 */ 54 static void 55 raid0_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 56 { 57 struct spdk_bdev_io *parent_io = cb_arg; 58 59 spdk_bdev_free_io(bdev_io); 60 61 if (success) { 62 spdk_bdev_io_complete(parent_io, SPDK_BDEV_IO_STATUS_SUCCESS); 63 } else { 64 spdk_bdev_io_complete(parent_io, SPDK_BDEV_IO_STATUS_FAILED); 65 } 66 } 67 68 static void 69 raid0_waitq_io_process(void *ctx); 70 71 /* 72 * brief: 73 * raid0_submit_rw_request function is used to submit I/O to the correct 74 * member disk for raid0 bdevs. 75 * params: 76 * raid_io 77 * returns: 78 * none 79 */ 80 void 81 raid0_submit_rw_request(struct raid_bdev_io *raid_io) 82 { 83 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 84 struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch; 85 struct raid_bdev *raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt; 86 uint64_t pd_strip; 87 uint32_t offset_in_strip; 88 uint64_t pd_lba; 89 uint64_t pd_blocks; 90 uint8_t pd_idx; 91 int ret = 0; 92 uint64_t start_strip; 93 uint64_t end_strip; 94 95 start_strip = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift; 96 end_strip = (bdev_io->u.bdev.offset_blocks + bdev_io->u.bdev.num_blocks - 1) >> 97 raid_bdev->strip_size_shift; 98 if (start_strip != end_strip && raid_bdev->num_base_bdevs > 1) { 99 assert(false); 100 SPDK_ERRLOG("I/O spans strip boundary!\n"); 101 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 102 return; 103 } 104 105 pd_strip = start_strip / raid_bdev->num_base_bdevs; 106 pd_idx = start_strip % raid_bdev->num_base_bdevs; 107 offset_in_strip = bdev_io->u.bdev.offset_blocks & (raid_bdev->strip_size - 1); 108 pd_lba = (pd_strip << raid_bdev->strip_size_shift) + offset_in_strip; 109 pd_blocks = bdev_io->u.bdev.num_blocks; 110 if (raid_bdev->base_bdev_info[pd_idx].desc == NULL) { 111 SPDK_ERRLOG("base bdev desc null for pd_idx %u\n", pd_idx); 112 assert(0); 113 } 114 115 /* 116 * Submit child io to bdev layer with using base bdev descriptors, base 117 * bdev lba, base bdev child io length in blocks, buffer, completion 118 * function and function callback context 119 */ 120 assert(raid_ch != NULL); 121 assert(raid_ch->base_channel); 122 if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) { 123 ret = spdk_bdev_readv_blocks(raid_bdev->base_bdev_info[pd_idx].desc, 124 raid_ch->base_channel[pd_idx], 125 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 126 pd_lba, pd_blocks, raid0_bdev_io_completion, 127 bdev_io); 128 } else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) { 129 ret = spdk_bdev_writev_blocks(raid_bdev->base_bdev_info[pd_idx].desc, 130 raid_ch->base_channel[pd_idx], 131 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 132 pd_lba, pd_blocks, raid0_bdev_io_completion, 133 bdev_io); 134 } else { 135 SPDK_ERRLOG("Recvd not supported io type %u\n", bdev_io->type); 136 assert(0); 137 } 138 139 if (ret) { 140 raid_bdev_queue_io_wait(bdev_io, pd_idx, raid0_waitq_io_process, ret); 141 } 142 } 143 144 /* 145 * brief: 146 * raid0_waitq_io_process function is the callback function 147 * registered by raid bdev module to bdev when bdev_io was unavailable 148 * for raid0 bdevs. 149 * params: 150 * ctx - pointer to raid_bdev_io 151 * returns: 152 * none 153 */ 154 static void 155 raid0_waitq_io_process(void *ctx) 156 { 157 struct spdk_bdev_io *bdev_io = ctx; 158 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 159 160 raid0_submit_rw_request(raid_io); 161 } 162 163 /* raid0 IO range */ 164 struct raid_bdev_io_range { 165 uint64_t strip_size; 166 uint64_t start_strip_in_disk; 167 uint64_t end_strip_in_disk; 168 uint64_t start_offset_in_strip; 169 uint64_t end_offset_in_strip; 170 uint8_t start_disk; 171 uint8_t end_disk; 172 uint8_t n_disks_involved; 173 }; 174 175 static inline void 176 _raid0_get_io_range(struct raid_bdev_io_range *io_range, 177 uint8_t num_base_bdevs, uint64_t strip_size, uint64_t strip_size_shift, 178 uint64_t offset_blocks, uint64_t num_blocks) 179 { 180 uint64_t start_strip; 181 uint64_t end_strip; 182 183 io_range->strip_size = strip_size; 184 185 /* The start and end strip index in raid0 bdev scope */ 186 start_strip = offset_blocks >> strip_size_shift; 187 end_strip = (offset_blocks + num_blocks - 1) >> strip_size_shift; 188 io_range->start_strip_in_disk = start_strip / num_base_bdevs; 189 io_range->end_strip_in_disk = end_strip / num_base_bdevs; 190 191 /* The first strip may have unaligned start LBA offset. 192 * The end strip may have unaligned end LBA offset. 193 * Strips between them certainly have aligned offset and length to boundaries. 194 */ 195 io_range->start_offset_in_strip = offset_blocks % strip_size; 196 io_range->end_offset_in_strip = (offset_blocks + num_blocks - 1) % strip_size; 197 198 /* The base bdev indexes in which start and end strips are located */ 199 io_range->start_disk = start_strip % num_base_bdevs; 200 io_range->end_disk = end_strip % num_base_bdevs; 201 202 /* Calculate how many base_bdevs are involved in io operation. 203 * Number of base bdevs involved is between 1 and num_base_bdevs. 204 * It will be 1 if the first strip and last strip are the same one. 205 */ 206 io_range->n_disks_involved = spdk_min((end_strip - start_strip + 1), num_base_bdevs); 207 } 208 209 static inline void 210 _raid0_split_io_range(struct raid_bdev_io_range *io_range, uint8_t disk_idx, 211 uint64_t *_offset_in_disk, uint64_t *_nblocks_in_disk) 212 { 213 uint64_t n_strips_in_disk; 214 uint64_t start_offset_in_disk; 215 uint64_t end_offset_in_disk; 216 uint64_t offset_in_disk; 217 uint64_t nblocks_in_disk; 218 uint64_t start_strip_in_disk; 219 uint64_t end_strip_in_disk; 220 221 start_strip_in_disk = io_range->start_strip_in_disk; 222 if (disk_idx < io_range->start_disk) { 223 start_strip_in_disk += 1; 224 } 225 226 end_strip_in_disk = io_range->end_strip_in_disk; 227 if (disk_idx > io_range->end_disk) { 228 end_strip_in_disk -= 1; 229 } 230 231 assert(end_strip_in_disk >= start_strip_in_disk); 232 n_strips_in_disk = end_strip_in_disk - start_strip_in_disk + 1; 233 234 if (disk_idx == io_range->start_disk) { 235 start_offset_in_disk = io_range->start_offset_in_strip; 236 } else { 237 start_offset_in_disk = 0; 238 } 239 240 if (disk_idx == io_range->end_disk) { 241 end_offset_in_disk = io_range->end_offset_in_strip; 242 } else { 243 end_offset_in_disk = io_range->strip_size - 1; 244 } 245 246 offset_in_disk = start_offset_in_disk + start_strip_in_disk * io_range->strip_size; 247 nblocks_in_disk = (n_strips_in_disk - 1) * io_range->strip_size 248 + end_offset_in_disk - start_offset_in_disk + 1; 249 250 SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID0, 251 "raid_bdev (strip_size 0x%lx) splits IO to base_bdev (%u) at (0x%lx, 0x%lx).\n", 252 io_range->strip_size, disk_idx, offset_in_disk, nblocks_in_disk); 253 254 *_offset_in_disk = offset_in_disk; 255 *_nblocks_in_disk = nblocks_in_disk; 256 } 257 258 static void 259 _raid0_submit_null_payload_request(void *_bdev_io) 260 { 261 struct spdk_bdev_io *bdev_io = _bdev_io; 262 struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx; 263 264 raid0_submit_null_payload_request(raid_io); 265 } 266 267 /* 268 * brief: 269 * raid0_submit_null_payload_request function submits the next batch of 270 * io requests with range but without payload, like FLUSH and UNMAP, to member disks; 271 * it will submit as many as possible unless one base io request fails with -ENOMEM, 272 * in which case it will queue itself for later submission. 273 * params: 274 * bdev_io - pointer to parent bdev_io on raid bdev device 275 * returns: 276 * none 277 */ 278 void 279 raid0_submit_null_payload_request(struct raid_bdev_io *raid_io) 280 { 281 struct spdk_bdev_io *bdev_io; 282 struct raid_bdev *raid_bdev; 283 struct raid_bdev_io_range io_range; 284 int ret; 285 286 bdev_io = spdk_bdev_io_from_ctx(raid_io); 287 raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt; 288 289 _raid0_get_io_range(&io_range, raid_bdev->num_base_bdevs, 290 raid_bdev->strip_size, raid_bdev->strip_size_shift, 291 bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks); 292 293 raid_io->base_bdev_io_expected = io_range.n_disks_involved; 294 295 while (raid_io->base_bdev_io_submitted < raid_io->base_bdev_io_expected) { 296 uint8_t disk_idx; 297 uint64_t offset_in_disk; 298 uint64_t nblocks_in_disk; 299 300 /* base_bdev is started from start_disk to end_disk. 301 * It is possible that index of start_disk is larger than end_disk's. 302 */ 303 disk_idx = (io_range.start_disk + raid_io->base_bdev_io_submitted) % raid_bdev->num_base_bdevs; 304 305 _raid0_split_io_range(&io_range, disk_idx, &offset_in_disk, &nblocks_in_disk); 306 307 switch (bdev_io->type) { 308 case SPDK_BDEV_IO_TYPE_UNMAP: 309 ret = spdk_bdev_unmap_blocks(raid_bdev->base_bdev_info[disk_idx].desc, 310 raid_io->raid_ch->base_channel[disk_idx], 311 offset_in_disk, nblocks_in_disk, 312 raid_bdev_base_io_completion, bdev_io); 313 break; 314 315 case SPDK_BDEV_IO_TYPE_FLUSH: 316 ret = spdk_bdev_flush_blocks(raid_bdev->base_bdev_info[disk_idx].desc, 317 raid_io->raid_ch->base_channel[disk_idx], 318 offset_in_disk, nblocks_in_disk, 319 raid_bdev_base_io_completion, bdev_io); 320 break; 321 322 default: 323 SPDK_ERRLOG("submit request, invalid io type with null payload %u\n", bdev_io->type); 324 assert(false); 325 ret = -EIO; 326 } 327 328 if (ret == 0) { 329 raid_io->base_bdev_io_submitted++; 330 } else { 331 raid_bdev_queue_io_wait(bdev_io, disk_idx, 332 _raid0_submit_null_payload_request, ret); 333 return; 334 } 335 } 336 } 337 338 static struct raid_bdev_module g_raid0_module = { 339 .level = RAID0, 340 }; 341 RAID_MODULE_REGISTER(&g_raid0_module) 342 343 SPDK_LOG_REGISTER_COMPONENT("bdev_raid0", SPDK_LOG_BDEV_RAID0) 344