1 /* 2 * Copyright (c) 2012 The DragonFly Project. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in 12 * the documentation and/or other materials provided with the 13 * distribution. 14 * 3. Neither the name of The DragonFly Project nor the names of its 15 * contributors may be used to endorse or promote products derived 16 * from this software without specific, prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 21 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 22 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 26 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 28 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 */ 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/kernel.h> 36 #include <sys/sysctl.h> 37 #include <sys/sbuf.h> 38 #include <sys/cpu_topology.h> 39 40 #include <machine/smp.h> 41 42 #ifndef NAPICID 43 #define NAPICID 256 44 #endif 45 46 #define INDENT_BUF_SIZE LEVEL_NO*3 47 #define INVALID_ID -1 48 49 /* Per-cpu sysctl nodes and info */ 50 struct per_cpu_sysctl_info { 51 struct sysctl_ctx_list sysctl_ctx; 52 struct sysctl_oid *sysctl_tree; 53 char cpu_name[32]; 54 int physical_id; 55 int core_id; 56 char physical_siblings[8*MAXCPU]; 57 char core_siblings[8*MAXCPU]; 58 }; 59 typedef struct per_cpu_sysctl_info per_cpu_sysctl_info_t; 60 61 static cpu_node_t cpu_topology_nodes[MAXCPU]; /* Memory for topology */ 62 static cpu_node_t *cpu_root_node; /* Root node pointer */ 63 64 static struct sysctl_ctx_list cpu_topology_sysctl_ctx; 65 static struct sysctl_oid *cpu_topology_sysctl_tree; 66 static char cpu_topology_members[8*MAXCPU]; 67 static per_cpu_sysctl_info_t pcpu_sysctl[MAXCPU]; 68 69 int cpu_topology_levels_number = 1; 70 cpu_node_t *root_cpu_node; 71 72 /* Get the next valid apicid starting 73 * from current apicid (curr_apicid 74 */ 75 static int 76 get_next_valid_apicid(int curr_apicid) 77 { 78 int next_apicid = curr_apicid; 79 do { 80 next_apicid++; 81 } 82 while(get_cpuid_from_apicid(next_apicid) == -1 && 83 next_apicid < NAPICID); 84 if (next_apicid == NAPICID) { 85 kprintf("Warning: No next valid APICID found. Returning -1\n"); 86 return -1; 87 } 88 return next_apicid; 89 } 90 91 /* Generic topology tree. The parameters have the following meaning: 92 * - children_no_per_level : the number of children on each level 93 * - level_types : the type of the level (THREAD, CORE, CHIP, etc) 94 * - cur_level : the current level of the tree 95 * - node : the current node 96 * - last_free_node : the last free node in the global array. 97 * - cpuid : basicly this are the ids of the leafs 98 */ 99 static void 100 build_topology_tree(int *children_no_per_level, 101 uint8_t *level_types, 102 int cur_level, 103 cpu_node_t *node, 104 cpu_node_t **last_free_node, 105 int *apicid) 106 { 107 int i; 108 109 node->child_no = children_no_per_level[cur_level]; 110 node->type = level_types[cur_level]; 111 CPUMASK_ASSZERO(node->members); 112 node->compute_unit_id = -1; 113 114 if (node->child_no == 0) { 115 *apicid = get_next_valid_apicid(*apicid); 116 CPUMASK_ASSBIT(node->members, get_cpuid_from_apicid(*apicid)); 117 return; 118 } 119 120 if (node->parent_node == NULL) 121 root_cpu_node = node; 122 123 for (i = 0; i < node->child_no; i++) { 124 node->child_node[i] = *last_free_node; 125 (*last_free_node)++; 126 127 node->child_node[i]->parent_node = node; 128 129 build_topology_tree(children_no_per_level, 130 level_types, 131 cur_level + 1, 132 node->child_node[i], 133 last_free_node, 134 apicid); 135 136 CPUMASK_ORMASK(node->members, node->child_node[i]->members); 137 } 138 } 139 140 #if defined(__x86_64__) && !defined(_KERNEL_VIRTUAL) 141 static void 142 migrate_elements(cpu_node_t **a, int n, int pos) 143 { 144 int i; 145 146 for (i = pos; i < n - 1 ; i++) { 147 a[i] = a[i+1]; 148 } 149 a[i] = NULL; 150 } 151 #endif 152 153 /* Build CPU topology. The detection is made by comparing the 154 * chip, core and logical IDs of each CPU with the IDs of the 155 * BSP. When we found a match, at that level the CPUs are siblings. 156 */ 157 static void 158 build_cpu_topology(void) 159 { 160 detect_cpu_topology(); 161 int i; 162 int BSPID = 0; 163 int threads_per_core = 0; 164 int cores_per_chip = 0; 165 int chips_per_package = 0; 166 int children_no_per_level[LEVEL_NO]; 167 uint8_t level_types[LEVEL_NO]; 168 int apicid = -1; 169 170 cpu_node_t *root = &cpu_topology_nodes[0]; 171 cpu_node_t *last_free_node = root + 1; 172 173 /* Assume that the topology is uniform. 174 * Find the number of siblings within chip 175 * and witin core to build up the topology 176 */ 177 for (i = 0; i < ncpus; i++) { 178 cpumask_t mask; 179 180 CPUMASK_ASSBIT(mask, i); 181 182 if (CPUMASK_TESTMASK(mask, smp_active_mask) == 0) 183 continue; 184 185 if (get_chip_ID(BSPID) == get_chip_ID(i)) 186 cores_per_chip++; 187 else 188 continue; 189 190 if (get_core_number_within_chip(BSPID) == 191 get_core_number_within_chip(i)) 192 threads_per_core++; 193 } 194 195 cores_per_chip /= threads_per_core; 196 chips_per_package = ncpus / (cores_per_chip * threads_per_core); 197 198 if (bootverbose) 199 kprintf("CPU Topology: cores_per_chip: %d; threads_per_core: %d; chips_per_package: %d;\n", 200 cores_per_chip, threads_per_core, chips_per_package); 201 202 if (threads_per_core > 1) { /* HT available - 4 levels */ 203 204 children_no_per_level[0] = chips_per_package; 205 children_no_per_level[1] = cores_per_chip; 206 children_no_per_level[2] = threads_per_core; 207 children_no_per_level[3] = 0; 208 209 level_types[0] = PACKAGE_LEVEL; 210 level_types[1] = CHIP_LEVEL; 211 level_types[2] = CORE_LEVEL; 212 level_types[3] = THREAD_LEVEL; 213 214 build_topology_tree(children_no_per_level, 215 level_types, 216 0, 217 root, 218 &last_free_node, 219 &apicid); 220 221 cpu_topology_levels_number = 4; 222 223 } else if (cores_per_chip > 1) { /* No HT available - 3 levels */ 224 225 children_no_per_level[0] = chips_per_package; 226 children_no_per_level[1] = cores_per_chip; 227 children_no_per_level[2] = 0; 228 229 level_types[0] = PACKAGE_LEVEL; 230 level_types[1] = CHIP_LEVEL; 231 level_types[2] = CORE_LEVEL; 232 233 build_topology_tree(children_no_per_level, 234 level_types, 235 0, 236 root, 237 &last_free_node, 238 &apicid); 239 240 cpu_topology_levels_number = 3; 241 242 } else { /* No HT and no Multi-Core - 2 levels */ 243 244 children_no_per_level[0] = chips_per_package; 245 children_no_per_level[1] = 0; 246 247 level_types[0] = PACKAGE_LEVEL; 248 level_types[1] = CHIP_LEVEL; 249 250 build_topology_tree(children_no_per_level, 251 level_types, 252 0, 253 root, 254 &last_free_node, 255 &apicid); 256 257 cpu_topology_levels_number = 2; 258 259 } 260 261 cpu_root_node = root; 262 263 264 #if defined(__x86_64__) && !defined(_KERNEL_VIRTUAL) 265 if (fix_amd_topology() == 0) { 266 int visited[MAXCPU], i, j, pos, cpuid; 267 cpu_node_t *leaf, *parent; 268 269 bzero(visited, MAXCPU * sizeof(int)); 270 271 for (i = 0; i < ncpus; i++) { 272 if (visited[i] == 0) { 273 pos = 0; 274 visited[i] = 1; 275 leaf = get_cpu_node_by_cpuid(i); 276 277 if (leaf->type == CORE_LEVEL) { 278 parent = leaf->parent_node; 279 280 last_free_node->child_node[0] = leaf; 281 last_free_node->child_no = 1; 282 last_free_node->members = leaf->members; 283 last_free_node->compute_unit_id = leaf->compute_unit_id; 284 last_free_node->parent_node = parent; 285 last_free_node->type = CORE_LEVEL; 286 287 288 for (j = 0; j < parent->child_no; j++) { 289 if (parent->child_node[j] != leaf) { 290 291 cpuid = BSFCPUMASK(parent->child_node[j]->members); 292 if (visited[cpuid] == 0 && 293 parent->child_node[j]->compute_unit_id == leaf->compute_unit_id) { 294 295 last_free_node->child_node[last_free_node->child_no] = parent->child_node[j]; 296 last_free_node->child_no++; 297 CPUMASK_ORMASK(last_free_node->members, parent->child_node[j]->members); 298 299 parent->child_node[j]->type = THREAD_LEVEL; 300 parent->child_node[j]->parent_node = last_free_node; 301 visited[cpuid] = 1; 302 303 migrate_elements(parent->child_node, parent->child_no, j); 304 parent->child_no--; 305 j--; 306 } 307 } else { 308 pos = j; 309 } 310 } 311 if (last_free_node->child_no > 1) { 312 parent->child_node[pos] = last_free_node; 313 leaf->type = THREAD_LEVEL; 314 leaf->parent_node = last_free_node; 315 last_free_node++; 316 } 317 } 318 } 319 } 320 } 321 #endif 322 } 323 324 /* Recursive function helper to print the CPU topology tree */ 325 static void 326 print_cpu_topology_tree_sysctl_helper(cpu_node_t *node, 327 struct sbuf *sb, 328 char * buf, 329 int buf_len, 330 int last) 331 { 332 int i; 333 int bsr_member; 334 335 sbuf_bcat(sb, buf, buf_len); 336 if (last) { 337 sbuf_printf(sb, "\\-"); 338 buf[buf_len] = ' ';buf_len++; 339 buf[buf_len] = ' ';buf_len++; 340 } else { 341 sbuf_printf(sb, "|-"); 342 buf[buf_len] = '|';buf_len++; 343 buf[buf_len] = ' ';buf_len++; 344 } 345 346 bsr_member = BSRCPUMASK(node->members); 347 348 if (node->type == PACKAGE_LEVEL) { 349 sbuf_printf(sb,"PACKAGE MEMBERS: "); 350 } else if (node->type == CHIP_LEVEL) { 351 sbuf_printf(sb,"CHIP ID %d: ", 352 get_chip_ID(bsr_member)); 353 } else if (node->type == CORE_LEVEL) { 354 if (node->compute_unit_id != (uint8_t)-1) { 355 sbuf_printf(sb,"Compute Unit ID %d: ", 356 node->compute_unit_id); 357 } else { 358 sbuf_printf(sb,"CORE ID %d: ", 359 get_core_number_within_chip(bsr_member)); 360 } 361 } else if (node->type == THREAD_LEVEL) { 362 if (node->compute_unit_id != (uint8_t)-1) { 363 sbuf_printf(sb,"CORE ID %d: ", 364 get_core_number_within_chip(bsr_member)); 365 } else { 366 sbuf_printf(sb,"THREAD ID %d: ", 367 get_logical_CPU_number_within_core(bsr_member)); 368 } 369 } else { 370 sbuf_printf(sb,"UNKNOWN: "); 371 } 372 CPUSET_FOREACH(i, node->members) { 373 sbuf_printf(sb,"cpu%d ", i); 374 } 375 376 sbuf_printf(sb,"\n"); 377 378 for (i = 0; i < node->child_no; i++) { 379 print_cpu_topology_tree_sysctl_helper(node->child_node[i], 380 sb, buf, buf_len, i == (node->child_no -1)); 381 } 382 } 383 384 /* SYSCTL PROCEDURE for printing the CPU Topology tree */ 385 static int 386 print_cpu_topology_tree_sysctl(SYSCTL_HANDLER_ARGS) 387 { 388 struct sbuf *sb; 389 int ret; 390 char buf[INDENT_BUF_SIZE]; 391 392 KASSERT(cpu_root_node != NULL, ("cpu_root_node isn't initialized")); 393 394 sb = sbuf_new(NULL, NULL, 500, SBUF_AUTOEXTEND); 395 if (sb == NULL) { 396 return (ENOMEM); 397 } 398 sbuf_printf(sb,"\n"); 399 print_cpu_topology_tree_sysctl_helper(cpu_root_node, sb, buf, 0, 1); 400 401 sbuf_finish(sb); 402 403 ret = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb)); 404 405 sbuf_delete(sb); 406 407 return ret; 408 } 409 410 /* SYSCTL PROCEDURE for printing the CPU Topology level description */ 411 static int 412 print_cpu_topology_level_description_sysctl(SYSCTL_HANDLER_ARGS) 413 { 414 struct sbuf *sb; 415 int ret; 416 417 sb = sbuf_new(NULL, NULL, 500, SBUF_AUTOEXTEND); 418 if (sb == NULL) 419 return (ENOMEM); 420 421 if (cpu_topology_levels_number == 4) /* HT available */ 422 sbuf_printf(sb, "0 - thread; 1 - core; 2 - socket; 3 - anything"); 423 else if (cpu_topology_levels_number == 3) /* No HT available */ 424 sbuf_printf(sb, "0 - core; 1 - socket; 2 - anything"); 425 else if (cpu_topology_levels_number == 2) /* No HT and no Multi-Core */ 426 sbuf_printf(sb, "0 - socket; 1 - anything"); 427 else 428 sbuf_printf(sb, "Unknown"); 429 430 sbuf_finish(sb); 431 432 ret = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb)); 433 434 sbuf_delete(sb); 435 436 return ret; 437 } 438 439 /* Find a cpu_node_t by a mask */ 440 static cpu_node_t * 441 get_cpu_node_by_cpumask(cpu_node_t * node, 442 cpumask_t mask) { 443 444 cpu_node_t * found = NULL; 445 int i; 446 447 if (CPUMASK_CMPMASKEQ(node->members, mask)) 448 return node; 449 450 for (i = 0; i < node->child_no; i++) { 451 found = get_cpu_node_by_cpumask(node->child_node[i], mask); 452 if (found != NULL) { 453 return found; 454 } 455 } 456 return NULL; 457 } 458 459 cpu_node_t * 460 get_cpu_node_by_cpuid(int cpuid) { 461 cpumask_t mask; 462 463 CPUMASK_ASSBIT(mask, cpuid); 464 465 KASSERT(cpu_root_node != NULL, ("cpu_root_node isn't initialized")); 466 467 return get_cpu_node_by_cpumask(cpu_root_node, mask); 468 } 469 470 /* Get the mask of siblings for level_type of a cpuid */ 471 cpumask_t 472 get_cpumask_from_level(int cpuid, 473 uint8_t level_type) 474 { 475 cpu_node_t * node; 476 cpumask_t mask; 477 478 CPUMASK_ASSBIT(mask, cpuid); 479 480 KASSERT(cpu_root_node != NULL, ("cpu_root_node isn't initialized")); 481 482 node = get_cpu_node_by_cpumask(cpu_root_node, mask); 483 484 if (node == NULL) { 485 CPUMASK_ASSZERO(mask); 486 return mask; 487 } 488 489 while (node != NULL) { 490 if (node->type == level_type) { 491 return node->members; 492 } 493 node = node->parent_node; 494 } 495 CPUMASK_ASSZERO(mask); 496 497 return mask; 498 } 499 500 /* init pcpu_sysctl structure info */ 501 static void 502 init_pcpu_topology_sysctl(void) 503 { 504 int cpu; 505 int i; 506 cpumask_t mask; 507 struct sbuf sb; 508 509 for (i = 0; i < ncpus; i++) { 510 511 sbuf_new(&sb, pcpu_sysctl[i].cpu_name, 512 sizeof(pcpu_sysctl[i].cpu_name), SBUF_FIXEDLEN); 513 sbuf_printf(&sb,"cpu%d", i); 514 sbuf_finish(&sb); 515 516 517 /* Get physical siblings */ 518 mask = get_cpumask_from_level(i, CHIP_LEVEL); 519 if (CPUMASK_TESTZERO(mask)) { 520 pcpu_sysctl[i].physical_id = INVALID_ID; 521 continue; 522 } 523 524 sbuf_new(&sb, pcpu_sysctl[i].physical_siblings, 525 sizeof(pcpu_sysctl[i].physical_siblings), SBUF_FIXEDLEN); 526 CPUSET_FOREACH(cpu, mask) { 527 sbuf_printf(&sb,"cpu%d ", cpu); 528 } 529 sbuf_trim(&sb); 530 sbuf_finish(&sb); 531 532 pcpu_sysctl[i].physical_id = get_chip_ID(i); 533 534 /* Get core siblings */ 535 mask = get_cpumask_from_level(i, CORE_LEVEL); 536 if (CPUMASK_TESTZERO(mask)) { 537 pcpu_sysctl[i].core_id = INVALID_ID; 538 continue; 539 } 540 541 sbuf_new(&sb, pcpu_sysctl[i].core_siblings, 542 sizeof(pcpu_sysctl[i].core_siblings), SBUF_FIXEDLEN); 543 CPUSET_FOREACH(cpu, mask) { 544 sbuf_printf(&sb,"cpu%d ", cpu); 545 } 546 sbuf_trim(&sb); 547 sbuf_finish(&sb); 548 549 pcpu_sysctl[i].core_id = get_core_number_within_chip(i); 550 551 } 552 } 553 554 /* Build SYSCTL structure for revealing 555 * the CPU Topology to user-space. 556 */ 557 static void 558 build_sysctl_cpu_topology(void) 559 { 560 int i; 561 struct sbuf sb; 562 563 /* SYSCTL new leaf for "cpu_topology" */ 564 sysctl_ctx_init(&cpu_topology_sysctl_ctx); 565 cpu_topology_sysctl_tree = SYSCTL_ADD_NODE(&cpu_topology_sysctl_ctx, 566 SYSCTL_STATIC_CHILDREN(_hw), 567 OID_AUTO, 568 "cpu_topology", 569 CTLFLAG_RD, 0, ""); 570 571 /* SYSCTL cpu_topology "tree" entry */ 572 SYSCTL_ADD_PROC(&cpu_topology_sysctl_ctx, 573 SYSCTL_CHILDREN(cpu_topology_sysctl_tree), 574 OID_AUTO, "tree", CTLTYPE_STRING | CTLFLAG_RD, 575 NULL, 0, print_cpu_topology_tree_sysctl, "A", 576 "Tree print of CPU topology"); 577 578 /* SYSCTL cpu_topology "level_description" entry */ 579 SYSCTL_ADD_PROC(&cpu_topology_sysctl_ctx, 580 SYSCTL_CHILDREN(cpu_topology_sysctl_tree), 581 OID_AUTO, "level_description", CTLTYPE_STRING | CTLFLAG_RD, 582 NULL, 0, print_cpu_topology_level_description_sysctl, "A", 583 "Level description of CPU topology"); 584 585 /* SYSCTL cpu_topology "members" entry */ 586 sbuf_new(&sb, cpu_topology_members, 587 sizeof(cpu_topology_members), SBUF_FIXEDLEN); 588 CPUSET_FOREACH(i, cpu_root_node->members) { 589 sbuf_printf(&sb,"cpu%d ", i); 590 } 591 sbuf_trim(&sb); 592 sbuf_finish(&sb); 593 SYSCTL_ADD_STRING(&cpu_topology_sysctl_ctx, 594 SYSCTL_CHILDREN(cpu_topology_sysctl_tree), 595 OID_AUTO, "members", CTLFLAG_RD, 596 cpu_topology_members, 0, 597 "Members of the CPU Topology"); 598 599 /* SYSCTL per_cpu info */ 600 for (i = 0; i < ncpus; i++) { 601 /* New leaf : hw.cpu_topology.cpux */ 602 sysctl_ctx_init(&pcpu_sysctl[i].sysctl_ctx); 603 pcpu_sysctl[i].sysctl_tree = SYSCTL_ADD_NODE(&pcpu_sysctl[i].sysctl_ctx, 604 SYSCTL_CHILDREN(cpu_topology_sysctl_tree), 605 OID_AUTO, 606 pcpu_sysctl[i].cpu_name, 607 CTLFLAG_RD, 0, ""); 608 609 /* Check if the physical_id found is valid */ 610 if (pcpu_sysctl[i].physical_id == INVALID_ID) { 611 continue; 612 } 613 614 /* Add physical id info */ 615 SYSCTL_ADD_INT(&pcpu_sysctl[i].sysctl_ctx, 616 SYSCTL_CHILDREN(pcpu_sysctl[i].sysctl_tree), 617 OID_AUTO, "physical_id", CTLFLAG_RD, 618 &pcpu_sysctl[i].physical_id, 0, 619 "Physical ID"); 620 621 /* Add physical siblings */ 622 SYSCTL_ADD_STRING(&pcpu_sysctl[i].sysctl_ctx, 623 SYSCTL_CHILDREN(pcpu_sysctl[i].sysctl_tree), 624 OID_AUTO, "physical_siblings", CTLFLAG_RD, 625 pcpu_sysctl[i].physical_siblings, 0, 626 "Physical siblings"); 627 628 /* Check if the core_id found is valid */ 629 if (pcpu_sysctl[i].core_id == INVALID_ID) { 630 continue; 631 } 632 633 /* Add core id info */ 634 SYSCTL_ADD_INT(&pcpu_sysctl[i].sysctl_ctx, 635 SYSCTL_CHILDREN(pcpu_sysctl[i].sysctl_tree), 636 OID_AUTO, "core_id", CTLFLAG_RD, 637 &pcpu_sysctl[i].core_id, 0, 638 "Core ID"); 639 640 /*Add core siblings */ 641 SYSCTL_ADD_STRING(&pcpu_sysctl[i].sysctl_ctx, 642 SYSCTL_CHILDREN(pcpu_sysctl[i].sysctl_tree), 643 OID_AUTO, "core_siblings", CTLFLAG_RD, 644 pcpu_sysctl[i].core_siblings, 0, 645 "Core siblings"); 646 } 647 } 648 649 /* Build the CPU Topology and SYSCTL Topology tree */ 650 static void 651 init_cpu_topology(void) 652 { 653 build_cpu_topology(); 654 655 init_pcpu_topology_sysctl(); 656 build_sysctl_cpu_topology(); 657 } 658 SYSINIT(cpu_topology, SI_BOOT2_CPU_TOPOLOGY, SI_ORDER_FIRST, 659 init_cpu_topology, NULL) 660