1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include "spdk/ioat.h" 37 #include "spdk/env.h" 38 #include "spdk/queue.h" 39 #include "spdk/string.h" 40 #include "spdk/util.h" 41 42 #define SRC_BUFFER_SIZE (512*1024) 43 44 enum ioat_task_type { 45 IOAT_COPY_TYPE, 46 IOAT_FILL_TYPE, 47 }; 48 49 struct user_config { 50 int queue_depth; 51 int time_in_sec; 52 char *core_mask; 53 }; 54 55 struct ioat_device { 56 struct spdk_ioat_chan *ioat; 57 TAILQ_ENTRY(ioat_device) tailq; 58 }; 59 60 static TAILQ_HEAD(, ioat_device) g_devices = TAILQ_HEAD_INITIALIZER(g_devices); 61 static struct ioat_device *g_next_device; 62 63 static struct user_config g_user_config; 64 65 struct thread_entry { 66 struct spdk_ioat_chan *chan; 67 uint64_t xfer_completed; 68 uint64_t xfer_failed; 69 uint64_t fill_completed; 70 uint64_t fill_failed; 71 uint64_t current_queue_depth; 72 unsigned lcore_id; 73 bool is_draining; 74 bool init_failed; 75 struct spdk_mempool *data_pool; 76 struct spdk_mempool *task_pool; 77 }; 78 79 struct ioat_task { 80 enum ioat_task_type type; 81 struct thread_entry *thread_entry; 82 void *buffer; 83 int len; 84 uint64_t fill_pattern; 85 void *src; 86 void *dst; 87 }; 88 89 static __thread unsigned int seed = 0; 90 91 static unsigned char *g_src; 92 93 static void submit_single_xfer(struct ioat_task *ioat_task); 94 95 static void 96 construct_user_config(struct user_config *self) 97 { 98 self->queue_depth = 32; 99 self->time_in_sec = 10; 100 self->core_mask = "0x1"; 101 } 102 103 static void 104 dump_user_config(struct user_config *self) 105 { 106 printf("User configuration:\n"); 107 printf("Run time: %u seconds\n", self->time_in_sec); 108 printf("Core mask: %s\n", self->core_mask); 109 printf("Queue depth: %u\n", self->queue_depth); 110 } 111 112 static void 113 ioat_exit(void) 114 { 115 struct ioat_device *dev; 116 117 while (!TAILQ_EMPTY(&g_devices)) { 118 dev = TAILQ_FIRST(&g_devices); 119 TAILQ_REMOVE(&g_devices, dev, tailq); 120 if (dev->ioat) { 121 spdk_ioat_detach(dev->ioat); 122 } 123 free(dev); 124 } 125 } 126 static void prepare_ioat_task(struct thread_entry *thread_entry, struct ioat_task *ioat_task) 127 { 128 int len; 129 uintptr_t src_offset; 130 uintptr_t dst_offset; 131 uint64_t fill_pattern; 132 133 if (ioat_task->type == IOAT_FILL_TYPE) { 134 fill_pattern = rand_r(&seed); 135 fill_pattern = fill_pattern << 32 | rand_r(&seed); 136 137 /* Ensure that the length of memset block is 8 Bytes aligned. 138 * In case the buffer crosses hugepage boundary and must be split, 139 * we also need to ensure 8 byte address alignment. We do it 140 * unconditionally to keep things simple. 141 */ 142 len = 8 + ((rand_r(&seed) % (SRC_BUFFER_SIZE - 16)) & ~0x7); 143 dst_offset = 8 + rand_r(&seed) % (SRC_BUFFER_SIZE - 8 - len); 144 ioat_task->fill_pattern = fill_pattern; 145 ioat_task->dst = (void *)(((uintptr_t)ioat_task->buffer + dst_offset) & ~0x7); 146 } else { 147 src_offset = rand_r(&seed) % SRC_BUFFER_SIZE; 148 len = rand_r(&seed) % (SRC_BUFFER_SIZE - src_offset); 149 dst_offset = rand_r(&seed) % (SRC_BUFFER_SIZE - len); 150 151 memset(ioat_task->buffer, 0, SRC_BUFFER_SIZE); 152 ioat_task->src = (void *)((uintptr_t)g_src + src_offset); 153 ioat_task->dst = (void *)((uintptr_t)ioat_task->buffer + dst_offset); 154 } 155 ioat_task->len = len; 156 ioat_task->thread_entry = thread_entry; 157 } 158 159 static void 160 ioat_done(void *cb_arg) 161 { 162 char *value; 163 int i, failed = 0; 164 struct ioat_task *ioat_task = (struct ioat_task *)cb_arg; 165 struct thread_entry *thread_entry = ioat_task->thread_entry; 166 167 if (ioat_task->type == IOAT_FILL_TYPE) { 168 value = ioat_task->dst; 169 for (i = 0; i < ioat_task->len / 8; i++) { 170 if (memcmp(value, &ioat_task->fill_pattern, 8) != 0) { 171 thread_entry->fill_failed++; 172 failed = 1; 173 break; 174 } 175 value += 8; 176 } 177 if (!failed) { 178 thread_entry->fill_completed++; 179 } 180 } else { 181 if (memcmp(ioat_task->src, ioat_task->dst, ioat_task->len)) { 182 thread_entry->xfer_failed++; 183 } else { 184 thread_entry->xfer_completed++; 185 } 186 } 187 188 thread_entry->current_queue_depth--; 189 if (thread_entry->is_draining) { 190 spdk_mempool_put(thread_entry->data_pool, ioat_task->buffer); 191 spdk_mempool_put(thread_entry->task_pool, ioat_task); 192 } else { 193 prepare_ioat_task(thread_entry, ioat_task); 194 submit_single_xfer(ioat_task); 195 } 196 } 197 198 static bool 199 probe_cb(void *cb_ctx, struct spdk_pci_device *pci_dev) 200 { 201 printf(" Found matching device at %04x:%02x:%02x.%x " 202 "vendor:0x%04x device:0x%04x\n", 203 spdk_pci_device_get_domain(pci_dev), 204 spdk_pci_device_get_bus(pci_dev), spdk_pci_device_get_dev(pci_dev), 205 spdk_pci_device_get_func(pci_dev), 206 spdk_pci_device_get_vendor_id(pci_dev), spdk_pci_device_get_device_id(pci_dev)); 207 208 return true; 209 } 210 211 static void 212 attach_cb(void *cb_ctx, struct spdk_pci_device *pci_dev, struct spdk_ioat_chan *ioat) 213 { 214 struct ioat_device *dev; 215 216 dev = malloc(sizeof(*dev)); 217 if (dev == NULL) { 218 printf("Failed to allocate device struct\n"); 219 return; 220 } 221 memset(dev, 0, sizeof(*dev)); 222 223 dev->ioat = ioat; 224 TAILQ_INSERT_TAIL(&g_devices, dev, tailq); 225 } 226 227 static int 228 ioat_init(void) 229 { 230 if (spdk_ioat_probe(NULL, probe_cb, attach_cb) != 0) { 231 fprintf(stderr, "ioat_probe() failed\n"); 232 return 1; 233 } 234 235 return 0; 236 } 237 238 static void 239 usage(char *program_name) 240 { 241 printf("%s options\n", program_name); 242 printf("\t[-h help message]\n"); 243 printf("\t[-c core mask for distributing I/O submission/completion work]\n"); 244 printf("\t[-t time in seconds]\n"); 245 printf("\t[-q queue depth]\n"); 246 } 247 248 static int 249 parse_args(int argc, char **argv) 250 { 251 int op; 252 253 construct_user_config(&g_user_config); 254 while ((op = getopt(argc, argv, "c:ht:q:")) != -1) { 255 switch (op) { 256 case 't': 257 g_user_config.time_in_sec = spdk_strtol(optarg, 10); 258 break; 259 case 'c': 260 g_user_config.core_mask = optarg; 261 break; 262 case 'q': 263 g_user_config.queue_depth = spdk_strtol(optarg, 10); 264 break; 265 case 'h': 266 usage(argv[0]); 267 exit(0); 268 default: 269 usage(argv[0]); 270 return 1; 271 } 272 } 273 if (g_user_config.time_in_sec <= 0 || !g_user_config.core_mask || 274 g_user_config.queue_depth <= 0) { 275 usage(argv[0]); 276 return 1; 277 } 278 279 return 0; 280 } 281 282 static void 283 drain_xfers(struct thread_entry *thread_entry) 284 { 285 while (thread_entry->current_queue_depth > 0) { 286 spdk_ioat_process_events(thread_entry->chan); 287 } 288 } 289 290 static void 291 submit_single_xfer(struct ioat_task *ioat_task) 292 { 293 if (ioat_task->type == IOAT_FILL_TYPE) 294 spdk_ioat_submit_fill(ioat_task->thread_entry->chan, ioat_task, ioat_done, 295 ioat_task->dst, ioat_task->fill_pattern, ioat_task->len); 296 else 297 spdk_ioat_submit_copy(ioat_task->thread_entry->chan, ioat_task, ioat_done, 298 ioat_task->dst, ioat_task->src, ioat_task->len); 299 ioat_task->thread_entry->current_queue_depth++; 300 } 301 302 static void 303 submit_xfers(struct thread_entry *thread_entry, uint64_t queue_depth) 304 { 305 while (queue_depth-- > 0) { 306 struct ioat_task *ioat_task = NULL; 307 ioat_task = spdk_mempool_get(thread_entry->task_pool); 308 assert(ioat_task != NULL); 309 ioat_task->buffer = spdk_mempool_get(thread_entry->data_pool); 310 assert(ioat_task->buffer != NULL); 311 312 ioat_task->type = IOAT_COPY_TYPE; 313 if (spdk_ioat_get_dma_capabilities(thread_entry->chan) & SPDK_IOAT_ENGINE_FILL_SUPPORTED) { 314 if (queue_depth % 2) { 315 ioat_task->type = IOAT_FILL_TYPE; 316 } 317 } 318 prepare_ioat_task(thread_entry, ioat_task); 319 submit_single_xfer(ioat_task); 320 } 321 } 322 323 static int 324 work_fn(void *arg) 325 { 326 uint64_t tsc_end; 327 char buf_pool_name[20], task_pool_name[20]; 328 struct thread_entry *t = (struct thread_entry *)arg; 329 330 if (!t->chan) { 331 return 1; 332 } 333 334 t->lcore_id = spdk_env_get_current_core(); 335 336 snprintf(buf_pool_name, sizeof(buf_pool_name), "buf_pool_%u", t->lcore_id); 337 snprintf(task_pool_name, sizeof(task_pool_name), "task_pool_%u", t->lcore_id); 338 t->data_pool = spdk_mempool_create(buf_pool_name, g_user_config.queue_depth, SRC_BUFFER_SIZE, 339 SPDK_MEMPOOL_DEFAULT_CACHE_SIZE, 340 SPDK_ENV_SOCKET_ID_ANY); 341 t->task_pool = spdk_mempool_create(task_pool_name, g_user_config.queue_depth, 342 sizeof(struct ioat_task), 343 SPDK_MEMPOOL_DEFAULT_CACHE_SIZE, 344 SPDK_ENV_SOCKET_ID_ANY); 345 if (!t->data_pool || !t->task_pool) { 346 fprintf(stderr, "Could not allocate buffer pool.\n"); 347 t->init_failed = true; 348 return 1; 349 } 350 351 tsc_end = spdk_get_ticks() + g_user_config.time_in_sec * spdk_get_ticks_hz(); 352 353 submit_xfers(t, g_user_config.queue_depth); 354 while (spdk_get_ticks() < tsc_end) { 355 spdk_ioat_process_events(t->chan); 356 } 357 358 t->is_draining = true; 359 drain_xfers(t); 360 361 return 0; 362 } 363 364 static int 365 init_src_buffer(void) 366 { 367 int i; 368 369 g_src = spdk_dma_zmalloc(SRC_BUFFER_SIZE, 512, NULL); 370 if (g_src == NULL) { 371 fprintf(stderr, "Allocate src buffer failed\n"); 372 return 1; 373 } 374 375 for (i = 0; i < SRC_BUFFER_SIZE / 4; i++) { 376 memset((g_src + (4 * i)), i, 4); 377 } 378 379 return 0; 380 } 381 382 static int 383 init(void) 384 { 385 struct spdk_env_opts opts; 386 387 spdk_env_opts_init(&opts); 388 opts.name = "verify"; 389 opts.core_mask = g_user_config.core_mask; 390 if (spdk_env_init(&opts) < 0) { 391 fprintf(stderr, "Unable to initialize SPDK env\n"); 392 return 1; 393 } 394 395 if (init_src_buffer() != 0) { 396 fprintf(stderr, "Could not init src buffer\n"); 397 return 1; 398 } 399 if (ioat_init() != 0) { 400 fprintf(stderr, "Could not init ioat\n"); 401 return 1; 402 } 403 404 return 0; 405 } 406 407 static int 408 dump_result(struct thread_entry *threads, uint32_t num_threads) 409 { 410 uint32_t i; 411 uint64_t total_completed = 0; 412 uint64_t total_failed = 0; 413 414 for (i = 0; i < num_threads; i++) { 415 struct thread_entry *t = &threads[i]; 416 417 if (!t->chan) { 418 continue; 419 } 420 421 if (t->init_failed) { 422 total_failed++; 423 continue; 424 } 425 426 total_completed += t->xfer_completed; 427 total_completed += t->fill_completed; 428 total_failed += t->xfer_failed; 429 total_failed += t->fill_failed; 430 if (total_completed || total_failed) 431 printf("lcore = %d, copy success = %" PRIu64 ", copy failed = %" PRIu64 ", fill success = %" PRIu64 432 ", fill failed = %" PRIu64 "\n", 433 t->lcore_id, t->xfer_completed, t->xfer_failed, t->fill_completed, t->fill_failed); 434 } 435 return total_failed ? 1 : 0; 436 } 437 438 static struct spdk_ioat_chan * 439 get_next_chan(void) 440 { 441 struct spdk_ioat_chan *chan; 442 443 if (g_next_device == NULL) { 444 fprintf(stderr, "Not enough ioat channels found. Check that ioat channels are bound\n"); 445 fprintf(stderr, "to uio_pci_generic or vfio-pci. scripts/setup.sh can help with this.\n"); 446 return NULL; 447 } 448 449 chan = g_next_device->ioat; 450 451 g_next_device = TAILQ_NEXT(g_next_device, tailq); 452 453 return chan; 454 } 455 456 static uint32_t 457 get_max_core(void) 458 { 459 uint32_t i; 460 uint32_t max_core = 0; 461 462 SPDK_ENV_FOREACH_CORE(i) { 463 if (i > max_core) { 464 max_core = i; 465 } 466 } 467 468 return max_core; 469 } 470 471 int 472 main(int argc, char **argv) 473 { 474 uint32_t i, current_core; 475 struct thread_entry *threads; 476 uint32_t num_threads; 477 int rc; 478 479 if (parse_args(argc, argv) != 0) { 480 return 1; 481 } 482 483 if (init() != 0) { 484 return 1; 485 } 486 487 dump_user_config(&g_user_config); 488 489 g_next_device = TAILQ_FIRST(&g_devices); 490 491 num_threads = get_max_core() + 1; 492 threads = calloc(num_threads, sizeof(*threads)); 493 if (!threads) { 494 fprintf(stderr, "Thread memory allocation failed\n"); 495 rc = 1; 496 goto cleanup; 497 } 498 499 current_core = spdk_env_get_current_core(); 500 SPDK_ENV_FOREACH_CORE(i) { 501 if (i != current_core) { 502 threads[i].chan = get_next_chan(); 503 spdk_env_thread_launch_pinned(i, work_fn, &threads[i]); 504 } 505 } 506 507 threads[current_core].chan = get_next_chan(); 508 if (work_fn(&threads[current_core]) != 0) { 509 rc = 1; 510 goto cleanup; 511 } 512 513 spdk_env_thread_wait_all(); 514 rc = dump_result(threads, num_threads); 515 516 cleanup: 517 spdk_dma_free(g_src); 518 ioat_exit(); 519 free(threads); 520 521 return rc; 522 } 523