1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include "spdk/ioat.h" 37 #include "spdk/env.h" 38 #include "spdk/queue.h" 39 #include "spdk/string.h" 40 #include "spdk/util.h" 41 42 #define SRC_BUFFER_SIZE (512*1024) 43 44 enum ioat_task_type { 45 IOAT_COPY_TYPE, 46 IOAT_FILL_TYPE, 47 }; 48 49 struct user_config { 50 int queue_depth; 51 int time_in_sec; 52 char *core_mask; 53 }; 54 55 struct ioat_device { 56 struct spdk_ioat_chan *ioat; 57 TAILQ_ENTRY(ioat_device) tailq; 58 }; 59 60 static TAILQ_HEAD(, ioat_device) g_devices; 61 static struct ioat_device *g_next_device; 62 63 static struct user_config g_user_config; 64 65 struct thread_entry { 66 struct spdk_ioat_chan *chan; 67 uint64_t xfer_completed; 68 uint64_t xfer_failed; 69 uint64_t fill_completed; 70 uint64_t fill_failed; 71 uint64_t current_queue_depth; 72 unsigned lcore_id; 73 bool is_draining; 74 bool init_failed; 75 struct spdk_mempool *data_pool; 76 struct spdk_mempool *task_pool; 77 }; 78 79 struct ioat_task { 80 enum ioat_task_type type; 81 struct thread_entry *thread_entry; 82 void *buffer; 83 int len; 84 uint64_t fill_pattern; 85 void *src; 86 void *dst; 87 }; 88 89 static __thread unsigned int seed = 0; 90 91 static unsigned char *g_src; 92 93 static void submit_single_xfer(struct ioat_task *ioat_task); 94 95 static void 96 construct_user_config(struct user_config *self) 97 { 98 self->queue_depth = 32; 99 self->time_in_sec = 10; 100 self->core_mask = "0x1"; 101 } 102 103 static void 104 dump_user_config(struct user_config *self) 105 { 106 printf("User configuration:\n"); 107 printf("Run time: %u seconds\n", self->time_in_sec); 108 printf("Core mask: %s\n", self->core_mask); 109 printf("Queue depth: %u\n", self->queue_depth); 110 } 111 112 static void 113 ioat_exit(void) 114 { 115 struct ioat_device *dev; 116 117 while (!TAILQ_EMPTY(&g_devices)) { 118 dev = TAILQ_FIRST(&g_devices); 119 TAILQ_REMOVE(&g_devices, dev, tailq); 120 if (dev->ioat) { 121 spdk_ioat_detach(dev->ioat); 122 } 123 free(dev); 124 } 125 } 126 static void prepare_ioat_task(struct thread_entry *thread_entry, struct ioat_task *ioat_task) 127 { 128 int len; 129 uintptr_t src_offset; 130 uintptr_t dst_offset; 131 uint64_t fill_pattern; 132 133 if (ioat_task->type == IOAT_FILL_TYPE) { 134 fill_pattern = rand_r(&seed); 135 fill_pattern = fill_pattern << 32 | rand_r(&seed); 136 137 /* Ensure that the length of memset block is 8 Bytes aligned. 138 * In case the buffer crosses hugepage boundary and must be split, 139 * we also need to ensure 8 byte address alignment. We do it 140 * unconditionally to keep things simple. 141 */ 142 len = 8 + ((rand_r(&seed) % (SRC_BUFFER_SIZE - 16)) & ~0x7); 143 dst_offset = 8 + rand_r(&seed) % (SRC_BUFFER_SIZE - 8 - len); 144 ioat_task->fill_pattern = fill_pattern; 145 ioat_task->dst = (void *)(((uintptr_t)ioat_task->buffer + dst_offset) & ~0x7); 146 } else { 147 src_offset = rand_r(&seed) % SRC_BUFFER_SIZE; 148 len = rand_r(&seed) % (SRC_BUFFER_SIZE - src_offset); 149 dst_offset = rand_r(&seed) % (SRC_BUFFER_SIZE - len); 150 151 memset(ioat_task->buffer, 0, SRC_BUFFER_SIZE); 152 ioat_task->src = (void *)((uintptr_t)g_src + src_offset); 153 ioat_task->dst = (void *)((uintptr_t)ioat_task->buffer + dst_offset); 154 } 155 ioat_task->len = len; 156 ioat_task->thread_entry = thread_entry; 157 } 158 159 static void 160 ioat_done(void *cb_arg) 161 { 162 char *value; 163 int i, failed = 0; 164 struct ioat_task *ioat_task = (struct ioat_task *)cb_arg; 165 struct thread_entry *thread_entry = ioat_task->thread_entry; 166 167 if (ioat_task->type == IOAT_FILL_TYPE) { 168 value = ioat_task->dst; 169 for (i = 0; i < ioat_task->len / 8; i++) { 170 if (memcmp(value, &ioat_task->fill_pattern, 8) != 0) { 171 thread_entry->fill_failed++; 172 failed = 1; 173 break; 174 } 175 value += 8; 176 } 177 if (!failed) { 178 thread_entry->fill_completed++; 179 } 180 } else { 181 if (memcmp(ioat_task->src, ioat_task->dst, ioat_task->len)) { 182 thread_entry->xfer_failed++; 183 } else { 184 thread_entry->xfer_completed++; 185 } 186 } 187 188 thread_entry->current_queue_depth--; 189 if (thread_entry->is_draining) { 190 spdk_mempool_put(thread_entry->data_pool, ioat_task->buffer); 191 spdk_mempool_put(thread_entry->task_pool, ioat_task); 192 } else { 193 prepare_ioat_task(thread_entry, ioat_task); 194 submit_single_xfer(ioat_task); 195 } 196 } 197 198 static bool 199 probe_cb(void *cb_ctx, struct spdk_pci_device *pci_dev) 200 { 201 printf(" Found matching device at %04x:%02x:%02x.%x " 202 "vendor:0x%04x device:0x%04x\n", 203 spdk_pci_device_get_domain(pci_dev), 204 spdk_pci_device_get_bus(pci_dev), spdk_pci_device_get_dev(pci_dev), 205 spdk_pci_device_get_func(pci_dev), 206 spdk_pci_device_get_vendor_id(pci_dev), spdk_pci_device_get_device_id(pci_dev)); 207 208 return true; 209 } 210 211 static void 212 attach_cb(void *cb_ctx, struct spdk_pci_device *pci_dev, struct spdk_ioat_chan *ioat) 213 { 214 struct ioat_device *dev; 215 216 dev = malloc(sizeof(*dev)); 217 if (dev == NULL) { 218 printf("Failed to allocate device struct\n"); 219 return; 220 } 221 memset(dev, 0, sizeof(*dev)); 222 223 dev->ioat = ioat; 224 TAILQ_INSERT_TAIL(&g_devices, dev, tailq); 225 } 226 227 static int 228 ioat_init(void) 229 { 230 TAILQ_INIT(&g_devices); 231 232 if (spdk_ioat_probe(NULL, probe_cb, attach_cb) != 0) { 233 fprintf(stderr, "ioat_probe() failed\n"); 234 return 1; 235 } 236 237 return 0; 238 } 239 240 static void 241 usage(char *program_name) 242 { 243 printf("%s options\n", program_name); 244 printf("\t[-h help message]\n"); 245 printf("\t[-c core mask for distributing I/O submission/completion work]\n"); 246 printf("\t[-t time in seconds]\n"); 247 printf("\t[-q queue depth]\n"); 248 } 249 250 static int 251 parse_args(int argc, char **argv) 252 { 253 int op; 254 255 construct_user_config(&g_user_config); 256 while ((op = getopt(argc, argv, "c:ht:q:")) != -1) { 257 switch (op) { 258 case 't': 259 g_user_config.time_in_sec = atoi(optarg); 260 break; 261 case 'c': 262 g_user_config.core_mask = optarg; 263 break; 264 case 'q': 265 g_user_config.queue_depth = atoi(optarg); 266 break; 267 case 'h': 268 usage(argv[0]); 269 exit(0); 270 default: 271 usage(argv[0]); 272 return 1; 273 } 274 } 275 if (!g_user_config.time_in_sec || !g_user_config.core_mask || !g_user_config.queue_depth) { 276 usage(argv[0]); 277 return 1; 278 } 279 280 return 0; 281 } 282 283 static void 284 drain_xfers(struct thread_entry *thread_entry) 285 { 286 while (thread_entry->current_queue_depth > 0) { 287 spdk_ioat_process_events(thread_entry->chan); 288 } 289 } 290 291 static void 292 submit_single_xfer(struct ioat_task *ioat_task) 293 { 294 if (ioat_task->type == IOAT_FILL_TYPE) 295 spdk_ioat_submit_fill(ioat_task->thread_entry->chan, ioat_task, ioat_done, 296 ioat_task->dst, ioat_task->fill_pattern, ioat_task->len); 297 else 298 spdk_ioat_submit_copy(ioat_task->thread_entry->chan, ioat_task, ioat_done, 299 ioat_task->dst, ioat_task->src, ioat_task->len); 300 ioat_task->thread_entry->current_queue_depth++; 301 } 302 303 static void 304 submit_xfers(struct thread_entry *thread_entry, uint64_t queue_depth) 305 { 306 while (queue_depth-- > 0) { 307 struct ioat_task *ioat_task = NULL; 308 ioat_task = spdk_mempool_get(thread_entry->task_pool); 309 ioat_task->buffer = spdk_mempool_get(thread_entry->data_pool); 310 311 ioat_task->type = IOAT_COPY_TYPE; 312 if (spdk_ioat_get_dma_capabilities(thread_entry->chan) & SPDK_IOAT_ENGINE_FILL_SUPPORTED) { 313 if (queue_depth % 2) { 314 ioat_task->type = IOAT_FILL_TYPE; 315 } 316 } 317 prepare_ioat_task(thread_entry, ioat_task); 318 submit_single_xfer(ioat_task); 319 } 320 } 321 322 static int 323 work_fn(void *arg) 324 { 325 uint64_t tsc_end; 326 char buf_pool_name[20], task_pool_name[20]; 327 struct thread_entry *t = (struct thread_entry *)arg; 328 329 if (!t->chan) { 330 return 0; 331 } 332 333 t->lcore_id = spdk_env_get_current_core(); 334 335 snprintf(buf_pool_name, sizeof(buf_pool_name), "buf_pool_%u", t->lcore_id); 336 snprintf(task_pool_name, sizeof(task_pool_name), "task_pool_%u", t->lcore_id); 337 t->data_pool = spdk_mempool_create(buf_pool_name, g_user_config.queue_depth, SRC_BUFFER_SIZE, 338 SPDK_MEMPOOL_DEFAULT_CACHE_SIZE, 339 SPDK_ENV_SOCKET_ID_ANY); 340 t->task_pool = spdk_mempool_create(task_pool_name, g_user_config.queue_depth, 341 sizeof(struct ioat_task), 342 SPDK_MEMPOOL_DEFAULT_CACHE_SIZE, 343 SPDK_ENV_SOCKET_ID_ANY); 344 if (!t->data_pool || !t->task_pool) { 345 fprintf(stderr, "Could not allocate buffer pool.\n"); 346 t->init_failed = true; 347 return 1; 348 } 349 350 tsc_end = spdk_get_ticks() + g_user_config.time_in_sec * spdk_get_ticks_hz(); 351 352 submit_xfers(t, g_user_config.queue_depth); 353 while (spdk_get_ticks() < tsc_end) { 354 spdk_ioat_process_events(t->chan); 355 } 356 357 t->is_draining = true; 358 drain_xfers(t); 359 360 return 0; 361 } 362 363 static int 364 init_src_buffer(void) 365 { 366 int i; 367 368 g_src = spdk_dma_zmalloc(SRC_BUFFER_SIZE, 512, NULL); 369 if (g_src == NULL) { 370 fprintf(stderr, "Allocate src buffer failed\n"); 371 return -1; 372 } 373 374 for (i = 0; i < SRC_BUFFER_SIZE / 4; i++) { 375 memset((g_src + (4 * i)), i, 4); 376 } 377 378 return 0; 379 } 380 381 static int 382 init(void) 383 { 384 struct spdk_env_opts opts; 385 386 spdk_env_opts_init(&opts); 387 opts.name = "verify"; 388 opts.core_mask = g_user_config.core_mask; 389 if (spdk_env_init(&opts) < 0) { 390 fprintf(stderr, "Unable to initialize SPDK env\n"); 391 return 1; 392 } 393 394 if (init_src_buffer() != 0) { 395 fprintf(stderr, "Could not init src buffer\n"); 396 return 1; 397 } 398 if (ioat_init() != 0) { 399 fprintf(stderr, "Could not init ioat\n"); 400 return 1; 401 } 402 403 return 0; 404 } 405 406 static int 407 dump_result(struct thread_entry *threads, uint32_t num_threads) 408 { 409 uint32_t i; 410 uint64_t total_completed = 0; 411 uint64_t total_failed = 0; 412 413 for (i = 0; i < num_threads; i++) { 414 struct thread_entry *t = &threads[i]; 415 416 if (!t->chan) { 417 continue; 418 } 419 420 if (t->init_failed) { 421 total_failed++; 422 continue; 423 } 424 425 total_completed += t->xfer_completed; 426 total_completed += t->fill_completed; 427 total_failed += t->xfer_failed; 428 total_failed += t->fill_failed; 429 if (total_completed || total_failed) 430 printf("lcore = %d, copy success = %ld, copy failed = %ld, fill success = %ld, fill failed = %ld\n", 431 t->lcore_id, t->xfer_completed, t->xfer_failed, t->fill_completed, t->fill_failed); 432 } 433 return total_failed ? 1 : 0; 434 } 435 436 static struct spdk_ioat_chan * 437 get_next_chan(void) 438 { 439 struct spdk_ioat_chan *chan; 440 441 if (g_next_device == NULL) { 442 fprintf(stderr, "Not enough ioat channels found. Check that ioatdma driver is unloaded.\n"); 443 return NULL; 444 } 445 446 chan = g_next_device->ioat; 447 448 g_next_device = TAILQ_NEXT(g_next_device, tailq); 449 450 return chan; 451 } 452 453 static uint32_t 454 get_max_core(void) 455 { 456 uint32_t i; 457 uint32_t max_core = 0; 458 459 SPDK_ENV_FOREACH_CORE(i) { 460 if (i > max_core) { 461 max_core = i; 462 } 463 } 464 465 return max_core; 466 } 467 468 int 469 main(int argc, char **argv) 470 { 471 uint32_t i, current_core; 472 struct thread_entry *threads; 473 uint32_t num_threads; 474 int rc; 475 476 if (parse_args(argc, argv) != 0) { 477 return 1; 478 } 479 480 if (init() != 0) { 481 return 1; 482 } 483 484 dump_user_config(&g_user_config); 485 486 g_next_device = TAILQ_FIRST(&g_devices); 487 488 num_threads = get_max_core() + 1; 489 threads = calloc(num_threads, sizeof(*threads)); 490 if (!threads) { 491 fprintf(stderr, "Thread memory allocation failed\n"); 492 rc = 1; 493 goto cleanup; 494 } 495 496 current_core = spdk_env_get_current_core(); 497 SPDK_ENV_FOREACH_CORE(i) { 498 if (i != current_core) { 499 threads[i].chan = get_next_chan(); 500 spdk_env_thread_launch_pinned(i, work_fn, &threads[i]); 501 } 502 } 503 504 threads[current_core].chan = get_next_chan(); 505 work_fn(&threads[current_core]); 506 507 spdk_env_thread_wait_all(); 508 rc = dump_result(threads, num_threads); 509 510 cleanup: 511 spdk_dma_free(g_src); 512 ioat_exit(); 513 free(threads); 514 515 return rc; 516 } 517