1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include <rte_config.h> 37 #include <rte_mempool.h> 38 39 #include "spdk/nvme.h" 40 #include "spdk/queue.h" 41 42 struct dev_ctx { 43 TAILQ_ENTRY(dev_ctx) tailq; 44 bool is_new; 45 bool is_removed; 46 bool is_draining; 47 struct spdk_nvme_ctrlr *ctrlr; 48 struct spdk_nvme_ns *ns; 49 struct spdk_nvme_qpair *qpair; 50 uint32_t io_size_blocks; 51 uint64_t size_in_ios; 52 uint64_t io_completed; 53 uint64_t prev_io_completed; 54 uint64_t current_queue_depth; 55 uint64_t offset_in_ios; 56 char name[1024]; 57 }; 58 59 struct perf_task { 60 struct dev_ctx *dev; 61 void *buf; 62 }; 63 64 static struct rte_mempool *task_pool; 65 66 static TAILQ_HEAD(, dev_ctx) g_devs = TAILQ_HEAD_INITIALIZER(g_devs); 67 68 static uint64_t g_tsc_rate; 69 70 static uint32_t g_io_size_bytes = 4096; 71 static int g_queue_depth = 4; 72 static int g_time_in_sec; 73 static int g_expected_insert_times = -1; 74 static int g_expected_removal_times = -1; 75 static int g_insert_times; 76 static int g_removal_times; 77 static int g_shm_id = -1; 78 79 static void 80 task_complete(struct perf_task *task); 81 82 static void 83 register_dev(struct spdk_nvme_ctrlr *ctrlr) 84 { 85 struct dev_ctx *dev; 86 const struct spdk_nvme_ctrlr_data *cdata = spdk_nvme_ctrlr_get_data(ctrlr); 87 88 dev = calloc(1, sizeof(*dev)); 89 if (dev == NULL) { 90 perror("dev_ctx malloc"); 91 exit(1); 92 } 93 94 snprintf(dev->name, sizeof(dev->name), "%-20.20s (%-20.20s)", cdata->mn, cdata->sn); 95 96 dev->ctrlr = ctrlr; 97 dev->is_new = true; 98 dev->is_removed = false; 99 dev->is_draining = false; 100 101 dev->ns = spdk_nvme_ctrlr_get_ns(ctrlr, 1); 102 103 if (!dev->ns || !spdk_nvme_ns_is_active(dev->ns)) { 104 fprintf(stderr, "Controller %s: No active namespace; skipping\n", dev->name); 105 goto skip; 106 } 107 108 if (spdk_nvme_ns_get_size(dev->ns) < g_io_size_bytes || 109 spdk_nvme_ns_get_sector_size(dev->ns) > g_io_size_bytes) { 110 fprintf(stderr, "Controller %s: Invalid " 111 "ns size %" PRIu64 " / block size %u for I/O size %u\n", 112 dev->name, 113 spdk_nvme_ns_get_size(dev->ns), 114 spdk_nvme_ns_get_sector_size(dev->ns), 115 g_io_size_bytes); 116 goto skip; 117 } 118 119 dev->size_in_ios = spdk_nvme_ns_get_size(dev->ns) / g_io_size_bytes; 120 dev->io_size_blocks = g_io_size_bytes / spdk_nvme_ns_get_sector_size(dev->ns); 121 122 dev->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, NULL, 0); 123 if (!dev->qpair) { 124 fprintf(stderr, "ERROR: spdk_nvme_ctrlr_alloc_io_qpair() failed\n"); 125 goto skip; 126 } 127 g_insert_times++; 128 TAILQ_INSERT_TAIL(&g_devs, dev, tailq); 129 return; 130 131 skip: 132 free(dev); 133 } 134 135 static void 136 unregister_dev(struct dev_ctx *dev) 137 { 138 fprintf(stderr, "unregister_dev: %s\n", dev->name); 139 140 spdk_nvme_ctrlr_free_io_qpair(dev->qpair); 141 spdk_nvme_detach(dev->ctrlr); 142 143 TAILQ_REMOVE(&g_devs, dev, tailq); 144 free(dev); 145 } 146 147 static void task_ctor(struct rte_mempool *mp, void *arg, void *__task, unsigned id) 148 { 149 struct perf_task *task = __task; 150 task->buf = spdk_dma_zmalloc(g_io_size_bytes, 0x200, NULL); 151 if (task->buf == NULL) { 152 fprintf(stderr, "task->buf rte_malloc failed\n"); 153 exit(1); 154 } 155 memset(task->buf, id % 8, g_io_size_bytes); 156 } 157 158 static void io_complete(void *ctx, const struct spdk_nvme_cpl *completion); 159 160 static void 161 submit_single_io(struct dev_ctx *dev) 162 { 163 struct perf_task *task = NULL; 164 uint64_t offset_in_ios; 165 int rc; 166 167 if (rte_mempool_get(task_pool, (void **)&task) != 0) { 168 fprintf(stderr, "task_pool rte_mempool_get failed\n"); 169 exit(1); 170 } 171 172 task->dev = dev; 173 174 offset_in_ios = dev->offset_in_ios++; 175 if (dev->offset_in_ios == dev->size_in_ios) { 176 dev->offset_in_ios = 0; 177 } 178 179 rc = spdk_nvme_ns_cmd_read(dev->ns, dev->qpair, task->buf, 180 offset_in_ios * dev->io_size_blocks, 181 dev->io_size_blocks, io_complete, task, 0); 182 183 if (rc != 0) { 184 fprintf(stderr, "starting I/O failed\n"); 185 rte_mempool_put(task_pool, task); 186 } else { 187 dev->current_queue_depth++; 188 } 189 } 190 191 static void 192 task_complete(struct perf_task *task) 193 { 194 struct dev_ctx *dev; 195 196 dev = task->dev; 197 dev->current_queue_depth--; 198 dev->io_completed++; 199 200 rte_mempool_put(task_pool, task); 201 202 /* 203 * is_draining indicates when time has expired for the test run 204 * and we are just waiting for the previously submitted I/O 205 * to complete. In this case, do not submit a new I/O to replace 206 * the one just completed. 207 */ 208 if (!dev->is_draining && !dev->is_removed) { 209 submit_single_io(dev); 210 } 211 } 212 213 static void 214 io_complete(void *ctx, const struct spdk_nvme_cpl *completion) 215 { 216 task_complete((struct perf_task *)ctx); 217 } 218 219 static void 220 check_io(struct dev_ctx *dev) 221 { 222 spdk_nvme_qpair_process_completions(dev->qpair, 0); 223 } 224 225 static void 226 submit_io(struct dev_ctx *dev, int queue_depth) 227 { 228 while (queue_depth-- > 0) { 229 submit_single_io(dev); 230 } 231 } 232 233 static void 234 drain_io(struct dev_ctx *dev) 235 { 236 dev->is_draining = true; 237 while (dev->current_queue_depth > 0) { 238 check_io(dev); 239 } 240 } 241 242 static void 243 print_stats(void) 244 { 245 struct dev_ctx *dev; 246 247 TAILQ_FOREACH(dev, &g_devs, tailq) { 248 fprintf(stderr, "%-43.43s: %10" PRIu64 " I/Os completed (+%" PRIu64 ")\n", 249 dev->name, 250 dev->io_completed, 251 dev->io_completed - dev->prev_io_completed); 252 dev->prev_io_completed = dev->io_completed; 253 } 254 255 fprintf(stderr, "\n"); 256 } 257 258 static bool 259 probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 260 struct spdk_nvme_ctrlr_opts *opts) 261 { 262 fprintf(stderr, "Attaching to %s\n", trid->traddr); 263 264 return true; 265 } 266 267 static void 268 attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, 269 struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) 270 { 271 fprintf(stderr, "Attached to %s\n", trid->traddr); 272 273 register_dev(ctrlr); 274 } 275 276 static void 277 remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr) 278 { 279 struct dev_ctx *dev; 280 281 TAILQ_FOREACH(dev, &g_devs, tailq) { 282 if (dev->ctrlr == ctrlr) { 283 /* 284 * Mark the device as removed, but don't detach yet. 285 * 286 * The I/O handling code will detach once it sees that 287 * is_removed is true and all outstanding I/O have been completed. 288 */ 289 dev->is_removed = true; 290 fprintf(stderr, "Controller removed: %s\n", dev->name); 291 return; 292 } 293 } 294 295 /* 296 * If we get here, this remove_cb is for a controller that we are not tracking 297 * in g_devs (for example, because we skipped it during register_dev), 298 * so immediately detach it. 299 */ 300 spdk_nvme_detach(ctrlr); 301 } 302 303 static void 304 io_loop(void) 305 { 306 struct dev_ctx *dev, *dev_tmp; 307 uint64_t tsc_end; 308 uint64_t next_stats_tsc; 309 310 tsc_end = spdk_get_ticks() + g_time_in_sec * g_tsc_rate; 311 next_stats_tsc = spdk_get_ticks(); 312 313 while (1) { 314 uint64_t now; 315 316 /* 317 * Check for completed I/O for each controller. A new 318 * I/O will be submitted in the io_complete callback 319 * to replace each I/O that is completed. 320 */ 321 TAILQ_FOREACH(dev, &g_devs, tailq) { 322 if (dev->is_new) { 323 /* Submit initial I/O for this controller. */ 324 submit_io(dev, g_queue_depth); 325 dev->is_new = false; 326 } 327 328 check_io(dev); 329 } 330 331 /* 332 * Check for hotplug events. 333 */ 334 if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, remove_cb) != 0) { 335 fprintf(stderr, "spdk_nvme_probe() failed\n"); 336 break; 337 } 338 339 /* 340 * Check for devices which were hot-removed and have finished 341 * processing outstanding I/Os. 342 * 343 * unregister_dev() may remove devs from the list, so use the 344 * removal-safe iterator. 345 */ 346 TAILQ_FOREACH_SAFE(dev, &g_devs, tailq, dev_tmp) { 347 if (dev->is_removed && dev->current_queue_depth == 0) { 348 g_removal_times++; 349 unregister_dev(dev); 350 } 351 } 352 353 now = spdk_get_ticks(); 354 if (now > tsc_end) { 355 break; 356 } 357 if (now > next_stats_tsc) { 358 print_stats(); 359 next_stats_tsc += g_tsc_rate; 360 } 361 362 if (g_insert_times == g_expected_insert_times && g_removal_times == g_expected_removal_times) { 363 break; 364 } 365 } 366 367 TAILQ_FOREACH_SAFE(dev, &g_devs, tailq, dev_tmp) { 368 drain_io(dev); 369 unregister_dev(dev); 370 } 371 } 372 373 static void usage(char *program_name) 374 { 375 printf("%s options", program_name); 376 printf("\n"); 377 printf("\t[-i shm id (optional)]\n"); 378 printf("\t[-n expected hot insert times]\n"); 379 printf("\t[-r expected hot removal times]\n"); 380 printf("\t[-t time in seconds]\n"); 381 } 382 383 static int 384 parse_args(int argc, char **argv) 385 { 386 int op; 387 388 /* default value */ 389 g_time_in_sec = 0; 390 391 while ((op = getopt(argc, argv, "i:n:r:t:")) != -1) { 392 switch (op) { 393 case 'i': 394 g_shm_id = atoi(optarg); 395 break; 396 case 'n': 397 g_expected_insert_times = atoi(optarg); 398 break; 399 case 'r': 400 g_expected_removal_times = atoi(optarg); 401 break; 402 case 't': 403 g_time_in_sec = atoi(optarg); 404 break; 405 default: 406 usage(argv[0]); 407 return 1; 408 } 409 } 410 411 if (!g_time_in_sec) { 412 usage(argv[0]); 413 return 1; 414 } 415 416 return 0; 417 } 418 419 420 static int 421 register_controllers(void) 422 { 423 fprintf(stderr, "Initializing NVMe Controllers\n"); 424 425 if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, remove_cb) != 0) { 426 fprintf(stderr, "spdk_nvme_probe() failed\n"); 427 return 1; 428 } 429 /* Reset g_insert_times to 0 so that we do not count controllers attached at start as hotplug events. */ 430 g_insert_times = 0; 431 return 0; 432 } 433 434 int main(int argc, char **argv) 435 { 436 int rc; 437 struct spdk_env_opts opts; 438 439 rc = parse_args(argc, argv); 440 if (rc != 0) { 441 return rc; 442 } 443 444 spdk_env_opts_init(&opts); 445 opts.name = "hotplug"; 446 opts.core_mask = "0x1"; 447 if (g_shm_id > -1) { 448 opts.shm_id = g_shm_id; 449 } 450 spdk_env_init(&opts); 451 452 task_pool = rte_mempool_create("task_pool", 8192, 453 sizeof(struct perf_task), 454 64, 0, NULL, NULL, task_ctor, NULL, 455 SOCKET_ID_ANY, 0); 456 457 g_tsc_rate = spdk_get_ticks_hz(); 458 459 /* Detect the controllers that are plugged in at startup. */ 460 if (register_controllers() != 0) { 461 return 1; 462 } 463 464 fprintf(stderr, "Initialization complete. Starting I/O...\n"); 465 io_loop(); 466 467 if (g_expected_insert_times != -1 && g_insert_times != g_expected_insert_times) { 468 fprintf(stderr, "Expected inserts %d != actual inserts %d\n", 469 g_expected_insert_times, g_insert_times); 470 return 1; 471 } 472 473 if (g_expected_removal_times != -1 && g_removal_times != g_expected_removal_times) { 474 fprintf(stderr, "Expected removals %d != actual removals %d\n", 475 g_expected_removal_times, g_removal_times); 476 return 1; 477 } 478 479 return 0; 480 } 481