1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include "spdk/ioat.h" 37 #include "spdk/env.h" 38 #include "spdk/queue.h" 39 #include "spdk/string.h" 40 #include "spdk/util.h" 41 42 #define SRC_BUFFER_SIZE (512*1024) 43 44 enum ioat_task_type { 45 IOAT_COPY_TYPE, 46 IOAT_FILL_TYPE, 47 }; 48 49 struct user_config { 50 int queue_depth; 51 int time_in_sec; 52 char *core_mask; 53 }; 54 55 struct ioat_device { 56 struct spdk_ioat_chan *ioat; 57 TAILQ_ENTRY(ioat_device) tailq; 58 }; 59 60 static TAILQ_HEAD(, ioat_device) g_devices; 61 static struct ioat_device *g_next_device; 62 63 static struct user_config g_user_config; 64 65 struct thread_entry { 66 struct spdk_ioat_chan *chan; 67 uint64_t xfer_completed; 68 uint64_t xfer_failed; 69 uint64_t fill_completed; 70 uint64_t fill_failed; 71 uint64_t current_queue_depth; 72 unsigned lcore_id; 73 bool is_draining; 74 bool init_failed; 75 struct spdk_mempool *data_pool; 76 struct spdk_mempool *task_pool; 77 }; 78 79 struct ioat_task { 80 enum ioat_task_type type; 81 struct thread_entry *thread_entry; 82 void *buffer; 83 int len; 84 uint64_t fill_pattern; 85 void *src; 86 void *dst; 87 }; 88 89 static __thread unsigned int seed = 0; 90 91 static unsigned char *g_src; 92 93 static void submit_single_xfer(struct ioat_task *ioat_task); 94 95 static void 96 construct_user_config(struct user_config *self) 97 { 98 self->queue_depth = 32; 99 self->time_in_sec = 10; 100 self->core_mask = "0x1"; 101 } 102 103 static void 104 dump_user_config(struct user_config *self) 105 { 106 printf("User configuration:\n"); 107 printf("Run time: %u seconds\n", self->time_in_sec); 108 printf("Core mask: %s\n", self->core_mask); 109 printf("Queue depth: %u\n", self->queue_depth); 110 } 111 112 static void 113 ioat_exit(void) 114 { 115 struct ioat_device *dev; 116 117 while (!TAILQ_EMPTY(&g_devices)) { 118 dev = TAILQ_FIRST(&g_devices); 119 TAILQ_REMOVE(&g_devices, dev, tailq); 120 if (dev->ioat) { 121 spdk_ioat_detach(dev->ioat); 122 } 123 free(dev); 124 } 125 } 126 static void prepare_ioat_task(struct thread_entry *thread_entry, struct ioat_task *ioat_task) 127 { 128 int len; 129 uintptr_t src_offset; 130 uintptr_t dst_offset; 131 uint64_t fill_pattern; 132 133 if (ioat_task->type == IOAT_FILL_TYPE) { 134 fill_pattern = rand_r(&seed); 135 fill_pattern = fill_pattern << 32 | rand_r(&seed); 136 137 /* Ensure that the length of memset block is 8 Bytes aligned. 138 * In case the buffer crosses hugepage boundary and must be split, 139 * we also need to ensure 8 byte address alignment. We do it 140 * unconditionally to keep things simple. 141 */ 142 len = 8 + ((rand_r(&seed) % (SRC_BUFFER_SIZE - 16)) & ~0x7); 143 dst_offset = 8 + rand_r(&seed) % (SRC_BUFFER_SIZE - 8 - len); 144 ioat_task->fill_pattern = fill_pattern; 145 ioat_task->dst = (void *)(((uintptr_t)ioat_task->buffer + dst_offset) & ~0x7); 146 } else { 147 src_offset = rand_r(&seed) % SRC_BUFFER_SIZE; 148 len = rand_r(&seed) % (SRC_BUFFER_SIZE - src_offset); 149 dst_offset = rand_r(&seed) % (SRC_BUFFER_SIZE - len); 150 151 memset(ioat_task->buffer, 0, SRC_BUFFER_SIZE); 152 ioat_task->src = (void *)((uintptr_t)g_src + src_offset); 153 ioat_task->dst = (void *)((uintptr_t)ioat_task->buffer + dst_offset); 154 } 155 ioat_task->len = len; 156 ioat_task->thread_entry = thread_entry; 157 } 158 159 static void 160 ioat_done(void *cb_arg) 161 { 162 char *value; 163 int i, failed = 0; 164 struct ioat_task *ioat_task = (struct ioat_task *)cb_arg; 165 struct thread_entry *thread_entry = ioat_task->thread_entry; 166 167 if (ioat_task->type == IOAT_FILL_TYPE) { 168 value = ioat_task->dst; 169 for (i = 0; i < ioat_task->len / 8; i++) { 170 if (memcmp(value, &ioat_task->fill_pattern, 8) != 0) { 171 thread_entry->fill_failed++; 172 failed = 1; 173 break; 174 } 175 value += 8; 176 } 177 if (!failed) { 178 thread_entry->fill_completed++; 179 } 180 } else { 181 if (memcmp(ioat_task->src, ioat_task->dst, ioat_task->len)) { 182 thread_entry->xfer_failed++; 183 } else { 184 thread_entry->xfer_completed++; 185 } 186 } 187 188 thread_entry->current_queue_depth--; 189 if (thread_entry->is_draining) { 190 spdk_mempool_put(thread_entry->data_pool, ioat_task->buffer); 191 spdk_mempool_put(thread_entry->task_pool, ioat_task); 192 } else { 193 prepare_ioat_task(thread_entry, ioat_task); 194 submit_single_xfer(ioat_task); 195 } 196 } 197 198 static bool 199 probe_cb(void *cb_ctx, struct spdk_pci_device *pci_dev) 200 { 201 printf(" Found matching device at %04x:%02x:%02x.%x " 202 "vendor:0x%04x device:0x%04x\n", 203 spdk_pci_device_get_domain(pci_dev), 204 spdk_pci_device_get_bus(pci_dev), spdk_pci_device_get_dev(pci_dev), 205 spdk_pci_device_get_func(pci_dev), 206 spdk_pci_device_get_vendor_id(pci_dev), spdk_pci_device_get_device_id(pci_dev)); 207 208 return true; 209 } 210 211 static void 212 attach_cb(void *cb_ctx, struct spdk_pci_device *pci_dev, struct spdk_ioat_chan *ioat) 213 { 214 struct ioat_device *dev; 215 216 dev = malloc(sizeof(*dev)); 217 if (dev == NULL) { 218 printf("Failed to allocate device struct\n"); 219 return; 220 } 221 memset(dev, 0, sizeof(*dev)); 222 223 dev->ioat = ioat; 224 TAILQ_INSERT_TAIL(&g_devices, dev, tailq); 225 } 226 227 static int 228 ioat_init(void) 229 { 230 TAILQ_INIT(&g_devices); 231 232 if (spdk_ioat_probe(NULL, probe_cb, attach_cb) != 0) { 233 fprintf(stderr, "ioat_probe() failed\n"); 234 return 1; 235 } 236 237 return 0; 238 } 239 240 static void 241 usage(char *program_name) 242 { 243 printf("%s options\n", program_name); 244 printf("\t[-h help message]\n"); 245 printf("\t[-c core mask for distributing I/O submission/completion work]\n"); 246 printf("\t[-t time in seconds]\n"); 247 printf("\t[-q queue depth]\n"); 248 } 249 250 static int 251 parse_args(int argc, char **argv) 252 { 253 int op; 254 255 construct_user_config(&g_user_config); 256 while ((op = getopt(argc, argv, "c:ht:q:")) != -1) { 257 switch (op) { 258 case 't': 259 g_user_config.time_in_sec = spdk_strtol(optarg, 10); 260 break; 261 case 'c': 262 g_user_config.core_mask = optarg; 263 break; 264 case 'q': 265 g_user_config.queue_depth = spdk_strtol(optarg, 10); 266 break; 267 case 'h': 268 usage(argv[0]); 269 exit(0); 270 default: 271 usage(argv[0]); 272 return 1; 273 } 274 } 275 if (g_user_config.time_in_sec <= 0 || !g_user_config.core_mask || 276 g_user_config.queue_depth <= 0) { 277 usage(argv[0]); 278 return 1; 279 } 280 281 return 0; 282 } 283 284 static void 285 drain_xfers(struct thread_entry *thread_entry) 286 { 287 while (thread_entry->current_queue_depth > 0) { 288 spdk_ioat_process_events(thread_entry->chan); 289 } 290 } 291 292 static void 293 submit_single_xfer(struct ioat_task *ioat_task) 294 { 295 if (ioat_task->type == IOAT_FILL_TYPE) 296 spdk_ioat_submit_fill(ioat_task->thread_entry->chan, ioat_task, ioat_done, 297 ioat_task->dst, ioat_task->fill_pattern, ioat_task->len); 298 else 299 spdk_ioat_submit_copy(ioat_task->thread_entry->chan, ioat_task, ioat_done, 300 ioat_task->dst, ioat_task->src, ioat_task->len); 301 ioat_task->thread_entry->current_queue_depth++; 302 } 303 304 static void 305 submit_xfers(struct thread_entry *thread_entry, uint64_t queue_depth) 306 { 307 while (queue_depth-- > 0) { 308 struct ioat_task *ioat_task = NULL; 309 ioat_task = spdk_mempool_get(thread_entry->task_pool); 310 assert(ioat_task != NULL); 311 ioat_task->buffer = spdk_mempool_get(thread_entry->data_pool); 312 assert(ioat_task->buffer != NULL); 313 314 ioat_task->type = IOAT_COPY_TYPE; 315 if (spdk_ioat_get_dma_capabilities(thread_entry->chan) & SPDK_IOAT_ENGINE_FILL_SUPPORTED) { 316 if (queue_depth % 2) { 317 ioat_task->type = IOAT_FILL_TYPE; 318 } 319 } 320 prepare_ioat_task(thread_entry, ioat_task); 321 submit_single_xfer(ioat_task); 322 } 323 } 324 325 static int 326 work_fn(void *arg) 327 { 328 uint64_t tsc_end; 329 char buf_pool_name[20], task_pool_name[20]; 330 struct thread_entry *t = (struct thread_entry *)arg; 331 332 if (!t->chan) { 333 return 1; 334 } 335 336 t->lcore_id = spdk_env_get_current_core(); 337 338 snprintf(buf_pool_name, sizeof(buf_pool_name), "buf_pool_%u", t->lcore_id); 339 snprintf(task_pool_name, sizeof(task_pool_name), "task_pool_%u", t->lcore_id); 340 t->data_pool = spdk_mempool_create(buf_pool_name, g_user_config.queue_depth, SRC_BUFFER_SIZE, 341 SPDK_MEMPOOL_DEFAULT_CACHE_SIZE, 342 SPDK_ENV_SOCKET_ID_ANY); 343 t->task_pool = spdk_mempool_create(task_pool_name, g_user_config.queue_depth, 344 sizeof(struct ioat_task), 345 SPDK_MEMPOOL_DEFAULT_CACHE_SIZE, 346 SPDK_ENV_SOCKET_ID_ANY); 347 if (!t->data_pool || !t->task_pool) { 348 fprintf(stderr, "Could not allocate buffer pool.\n"); 349 t->init_failed = true; 350 return 1; 351 } 352 353 tsc_end = spdk_get_ticks() + g_user_config.time_in_sec * spdk_get_ticks_hz(); 354 355 submit_xfers(t, g_user_config.queue_depth); 356 while (spdk_get_ticks() < tsc_end) { 357 spdk_ioat_process_events(t->chan); 358 } 359 360 t->is_draining = true; 361 drain_xfers(t); 362 363 return 0; 364 } 365 366 static int 367 init_src_buffer(void) 368 { 369 int i; 370 371 g_src = spdk_dma_zmalloc(SRC_BUFFER_SIZE, 512, NULL); 372 if (g_src == NULL) { 373 fprintf(stderr, "Allocate src buffer failed\n"); 374 return 1; 375 } 376 377 for (i = 0; i < SRC_BUFFER_SIZE / 4; i++) { 378 memset((g_src + (4 * i)), i, 4); 379 } 380 381 return 0; 382 } 383 384 static int 385 init(void) 386 { 387 struct spdk_env_opts opts; 388 389 spdk_env_opts_init(&opts); 390 opts.name = "verify"; 391 opts.core_mask = g_user_config.core_mask; 392 if (spdk_env_init(&opts) < 0) { 393 fprintf(stderr, "Unable to initialize SPDK env\n"); 394 return 1; 395 } 396 397 if (init_src_buffer() != 0) { 398 fprintf(stderr, "Could not init src buffer\n"); 399 return 1; 400 } 401 if (ioat_init() != 0) { 402 fprintf(stderr, "Could not init ioat\n"); 403 return 1; 404 } 405 406 return 0; 407 } 408 409 static int 410 dump_result(struct thread_entry *threads, uint32_t num_threads) 411 { 412 uint32_t i; 413 uint64_t total_completed = 0; 414 uint64_t total_failed = 0; 415 416 for (i = 0; i < num_threads; i++) { 417 struct thread_entry *t = &threads[i]; 418 419 if (!t->chan) { 420 continue; 421 } 422 423 if (t->init_failed) { 424 total_failed++; 425 continue; 426 } 427 428 total_completed += t->xfer_completed; 429 total_completed += t->fill_completed; 430 total_failed += t->xfer_failed; 431 total_failed += t->fill_failed; 432 if (total_completed || total_failed) 433 printf("lcore = %d, copy success = %ld, copy failed = %ld, fill success = %ld, fill failed = %ld\n", 434 t->lcore_id, t->xfer_completed, t->xfer_failed, t->fill_completed, t->fill_failed); 435 } 436 return total_failed ? 1 : 0; 437 } 438 439 static struct spdk_ioat_chan * 440 get_next_chan(void) 441 { 442 struct spdk_ioat_chan *chan; 443 444 if (g_next_device == NULL) { 445 fprintf(stderr, "Not enough ioat channels found. Check that ioat channels are bound\n"); 446 fprintf(stderr, "to uio_pci_generic or vfio-pci. scripts/setup.sh can help with this.\n"); 447 return NULL; 448 } 449 450 chan = g_next_device->ioat; 451 452 g_next_device = TAILQ_NEXT(g_next_device, tailq); 453 454 return chan; 455 } 456 457 static uint32_t 458 get_max_core(void) 459 { 460 uint32_t i; 461 uint32_t max_core = 0; 462 463 SPDK_ENV_FOREACH_CORE(i) { 464 if (i > max_core) { 465 max_core = i; 466 } 467 } 468 469 return max_core; 470 } 471 472 int 473 main(int argc, char **argv) 474 { 475 uint32_t i, current_core; 476 struct thread_entry *threads; 477 uint32_t num_threads; 478 int rc; 479 480 if (parse_args(argc, argv) != 0) { 481 return 1; 482 } 483 484 if (init() != 0) { 485 return 1; 486 } 487 488 dump_user_config(&g_user_config); 489 490 g_next_device = TAILQ_FIRST(&g_devices); 491 492 num_threads = get_max_core() + 1; 493 threads = calloc(num_threads, sizeof(*threads)); 494 if (!threads) { 495 fprintf(stderr, "Thread memory allocation failed\n"); 496 rc = 1; 497 goto cleanup; 498 } 499 500 current_core = spdk_env_get_current_core(); 501 SPDK_ENV_FOREACH_CORE(i) { 502 if (i != current_core) { 503 threads[i].chan = get_next_chan(); 504 spdk_env_thread_launch_pinned(i, work_fn, &threads[i]); 505 } 506 } 507 508 threads[current_core].chan = get_next_chan(); 509 if (work_fn(&threads[current_core]) != 0) { 510 rc = 1; 511 goto cleanup; 512 } 513 514 spdk_env_thread_wait_all(); 515 rc = dump_result(threads, num_threads); 516 517 cleanup: 518 spdk_dma_free(g_src); 519 ioat_exit(); 520 free(threads); 521 522 return rc; 523 } 524