1 /* Subroutines for the gcc driver. 2 Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc. 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 3, or (at your option) 9 any later version. 10 11 GCC is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GCC; see the file COPYING3. If not see 18 <http://www.gnu.org/licenses/>. */ 19 20 #include "config.h" 21 #include "system.h" 22 #include "coretypes.h" 23 #include "tm.h" 24 #include <stdlib.h> 25 26 const char *host_detect_local_cpu (int argc, const char **argv); 27 28 #ifdef __GNUC__ 29 #include "cpuid.h" 30 31 struct cache_desc 32 { 33 unsigned sizekb; 34 unsigned assoc; 35 unsigned line; 36 }; 37 38 /* Returns command line parameters that describe size and 39 cache line size of the processor caches. */ 40 41 static char * 42 describe_cache (struct cache_desc level1, struct cache_desc level2) 43 { 44 char size[100], line[100], size2[100]; 45 46 /* At the moment, gcc does not use the information 47 about the associativity of the cache. */ 48 49 snprintf (size, sizeof (size), 50 "--param l1-cache-size=%u ", level1.sizekb); 51 snprintf (line, sizeof (line), 52 "--param l1-cache-line-size=%u ", level1.line); 53 54 snprintf (size2, sizeof (size2), 55 "--param l2-cache-size=%u ", level2.sizekb); 56 57 return concat (size, line, size2, NULL); 58 } 59 60 /* Detect L2 cache parameters using CPUID extended function 0x80000006. */ 61 62 static void 63 detect_l2_cache (struct cache_desc *level2) 64 { 65 unsigned eax, ebx, ecx, edx; 66 unsigned assoc; 67 68 __cpuid (0x80000006, eax, ebx, ecx, edx); 69 70 level2->sizekb = (ecx >> 16) & 0xffff; 71 level2->line = ecx & 0xff; 72 73 assoc = (ecx >> 12) & 0xf; 74 if (assoc == 6) 75 assoc = 8; 76 else if (assoc == 8) 77 assoc = 16; 78 else if (assoc >= 0xa && assoc <= 0xc) 79 assoc = 32 + (assoc - 0xa) * 16; 80 else if (assoc >= 0xd && assoc <= 0xe) 81 assoc = 96 + (assoc - 0xd) * 32; 82 83 level2->assoc = assoc; 84 } 85 86 /* Returns the description of caches for an AMD processor. */ 87 88 static const char * 89 detect_caches_amd (unsigned max_ext_level) 90 { 91 unsigned eax, ebx, ecx, edx; 92 93 struct cache_desc level1, level2 = {0, 0, 0}; 94 95 if (max_ext_level < 0x80000005) 96 return ""; 97 98 __cpuid (0x80000005, eax, ebx, ecx, edx); 99 100 level1.sizekb = (ecx >> 24) & 0xff; 101 level1.assoc = (ecx >> 16) & 0xff; 102 level1.line = ecx & 0xff; 103 104 if (max_ext_level >= 0x80000006) 105 detect_l2_cache (&level2); 106 107 return describe_cache (level1, level2); 108 } 109 110 /* Decodes the size, the associativity and the cache line size of 111 L1/L2 caches of an Intel processor. Values are based on 112 "Intel Processor Identification and the CPUID Instruction" 113 [Application Note 485], revision -032, December 2007. */ 114 115 static void 116 decode_caches_intel (unsigned reg, bool xeon_mp, 117 struct cache_desc *level1, struct cache_desc *level2) 118 { 119 int i; 120 121 for (i = 24; i >= 0; i -= 8) 122 switch ((reg >> i) & 0xff) 123 { 124 case 0x0a: 125 level1->sizekb = 8; level1->assoc = 2; level1->line = 32; 126 break; 127 case 0x0c: 128 level1->sizekb = 16; level1->assoc = 4; level1->line = 32; 129 break; 130 case 0x2c: 131 level1->sizekb = 32; level1->assoc = 8; level1->line = 64; 132 break; 133 case 0x39: 134 level2->sizekb = 128; level2->assoc = 4; level2->line = 64; 135 break; 136 case 0x3a: 137 level2->sizekb = 192; level2->assoc = 6; level2->line = 64; 138 break; 139 case 0x3b: 140 level2->sizekb = 128; level2->assoc = 2; level2->line = 64; 141 break; 142 case 0x3c: 143 level2->sizekb = 256; level2->assoc = 4; level2->line = 64; 144 break; 145 case 0x3d: 146 level2->sizekb = 384; level2->assoc = 6; level2->line = 64; 147 break; 148 case 0x3e: 149 level2->sizekb = 512; level2->assoc = 4; level2->line = 64; 150 break; 151 case 0x41: 152 level2->sizekb = 128; level2->assoc = 4; level2->line = 32; 153 break; 154 case 0x42: 155 level2->sizekb = 256; level2->assoc = 4; level2->line = 32; 156 break; 157 case 0x43: 158 level2->sizekb = 512; level2->assoc = 4; level2->line = 32; 159 break; 160 case 0x44: 161 level2->sizekb = 1024; level2->assoc = 4; level2->line = 32; 162 break; 163 case 0x45: 164 level2->sizekb = 2048; level2->assoc = 4; level2->line = 32; 165 break; 166 case 0x49: 167 if (xeon_mp) 168 break; 169 level2->sizekb = 4096; level2->assoc = 16; level2->line = 64; 170 break; 171 case 0x4e: 172 level2->sizekb = 6144; level2->assoc = 24; level2->line = 64; 173 break; 174 case 0x60: 175 level1->sizekb = 16; level1->assoc = 8; level1->line = 64; 176 break; 177 case 0x66: 178 level1->sizekb = 8; level1->assoc = 4; level1->line = 64; 179 break; 180 case 0x67: 181 level1->sizekb = 16; level1->assoc = 4; level1->line = 64; 182 break; 183 case 0x68: 184 level1->sizekb = 32; level1->assoc = 4; level1->line = 64; 185 break; 186 case 0x78: 187 level2->sizekb = 1024; level2->assoc = 4; level2->line = 64; 188 break; 189 case 0x79: 190 level2->sizekb = 128; level2->assoc = 8; level2->line = 64; 191 break; 192 case 0x7a: 193 level2->sizekb = 256; level2->assoc = 8; level2->line = 64; 194 break; 195 case 0x7b: 196 level2->sizekb = 512; level2->assoc = 8; level2->line = 64; 197 break; 198 case 0x7c: 199 level2->sizekb = 1024; level2->assoc = 8; level2->line = 64; 200 break; 201 case 0x7d: 202 level2->sizekb = 2048; level2->assoc = 8; level2->line = 64; 203 break; 204 case 0x7f: 205 level2->sizekb = 512; level2->assoc = 2; level2->line = 64; 206 break; 207 case 0x82: 208 level2->sizekb = 256; level2->assoc = 8; level2->line = 32; 209 break; 210 case 0x83: 211 level2->sizekb = 512; level2->assoc = 8; level2->line = 32; 212 break; 213 case 0x84: 214 level2->sizekb = 1024; level2->assoc = 8; level2->line = 32; 215 break; 216 case 0x85: 217 level2->sizekb = 2048; level2->assoc = 8; level2->line = 32; 218 break; 219 case 0x86: 220 level2->sizekb = 512; level2->assoc = 4; level2->line = 64; 221 break; 222 case 0x87: 223 level2->sizekb = 1024; level2->assoc = 8; level2->line = 64; 224 225 default: 226 break; 227 } 228 } 229 230 /* Detect cache parameters using CPUID function 2. */ 231 232 static void 233 detect_caches_cpuid2 (bool xeon_mp, 234 struct cache_desc *level1, struct cache_desc *level2) 235 { 236 unsigned regs[4]; 237 int nreps, i; 238 239 __cpuid (2, regs[0], regs[1], regs[2], regs[3]); 240 241 nreps = regs[0] & 0x0f; 242 regs[0] &= ~0x0f; 243 244 while (--nreps >= 0) 245 { 246 for (i = 0; i < 4; i++) 247 if (regs[i] && !((regs[i] >> 31) & 1)) 248 decode_caches_intel (regs[i], xeon_mp, level1, level2); 249 250 if (nreps) 251 __cpuid (2, regs[0], regs[1], regs[2], regs[3]); 252 } 253 } 254 255 /* Detect cache parameters using CPUID function 4. This 256 method doesn't require hardcoded tables. */ 257 258 enum cache_type 259 { 260 CACHE_END = 0, 261 CACHE_DATA = 1, 262 CACHE_INST = 2, 263 CACHE_UNIFIED = 3 264 }; 265 266 static void 267 detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2, 268 struct cache_desc *level3) 269 { 270 struct cache_desc *cache; 271 272 unsigned eax, ebx, ecx, edx; 273 int count; 274 275 for (count = 0;; count++) 276 { 277 __cpuid_count(4, count, eax, ebx, ecx, edx); 278 switch (eax & 0x1f) 279 { 280 case CACHE_END: 281 return; 282 case CACHE_DATA: 283 case CACHE_UNIFIED: 284 { 285 switch ((eax >> 5) & 0x07) 286 { 287 case 1: 288 cache = level1; 289 break; 290 case 2: 291 cache = level2; 292 break; 293 case 3: 294 cache = level3; 295 break; 296 default: 297 cache = NULL; 298 } 299 300 if (cache) 301 { 302 unsigned sets = ecx + 1; 303 unsigned part = ((ebx >> 12) & 0x03ff) + 1; 304 305 cache->assoc = ((ebx >> 22) & 0x03ff) + 1; 306 cache->line = (ebx & 0x0fff) + 1; 307 308 cache->sizekb = (cache->assoc * part 309 * cache->line * sets) / 1024; 310 } 311 } 312 default: 313 break; 314 } 315 } 316 } 317 318 /* Returns the description of caches for an Intel processor. */ 319 320 static const char * 321 detect_caches_intel (bool xeon_mp, unsigned max_level, 322 unsigned max_ext_level, unsigned *l2sizekb) 323 { 324 struct cache_desc level1 = {0, 0, 0}, level2 = {0, 0, 0}, level3 = {0, 0, 0}; 325 326 if (max_level >= 4) 327 detect_caches_cpuid4 (&level1, &level2, &level3); 328 else if (max_level >= 2) 329 detect_caches_cpuid2 (xeon_mp, &level1, &level2); 330 else 331 return ""; 332 333 if (level1.sizekb == 0) 334 return ""; 335 336 /* Let the L3 replace the L2. This assumes inclusive caches 337 and single threaded program for now. */ 338 if (level3.sizekb) 339 level2 = level3; 340 341 /* Intel CPUs are equipped with AMD style L2 cache info. Try this 342 method if other methods fail to provide L2 cache parameters. */ 343 if (level2.sizekb == 0 && max_ext_level >= 0x80000006) 344 detect_l2_cache (&level2); 345 346 *l2sizekb = level2.sizekb; 347 348 return describe_cache (level1, level2); 349 } 350 351 enum vendor_signatures 352 { 353 SIG_INTEL = 0x756e6547 /* Genu */, 354 SIG_AMD = 0x68747541 /* Auth */ 355 }; 356 357 enum processor_signatures 358 { 359 SIG_GEODE = 0x646f6547 /* Geod */ 360 }; 361 362 /* This will be called by the spec parser in gcc.c when it sees 363 a %:local_cpu_detect(args) construct. Currently it will be called 364 with either "arch" or "tune" as argument depending on if -march=native 365 or -mtune=native is to be substituted. 366 367 It returns a string containing new command line parameters to be 368 put at the place of the above two options, depending on what CPU 369 this is executed. E.g. "-march=k8" on an AMD64 machine 370 for -march=native. 371 372 ARGC and ARGV are set depending on the actual arguments given 373 in the spec. */ 374 375 const char *host_detect_local_cpu (int argc, const char **argv) 376 { 377 enum processor_type processor = PROCESSOR_I386; 378 const char *cpu = "i386"; 379 380 const char *cache = ""; 381 const char *options = ""; 382 383 unsigned int eax, ebx, ecx, edx; 384 385 unsigned int max_level, ext_level; 386 387 unsigned int vendor; 388 unsigned int model, family; 389 390 unsigned int has_sse3, has_ssse3, has_cmpxchg16b; 391 unsigned int has_cmpxchg8b, has_cmov, has_mmx, has_sse, has_sse2; 392 393 /* Extended features */ 394 unsigned int has_lahf_lm = 0, has_sse4a = 0; 395 unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0; 396 unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0; 397 unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0; 398 unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0; 399 400 bool arch; 401 402 unsigned int l2sizekb = 0; 403 404 if (argc < 1) 405 return NULL; 406 407 arch = !strcmp (argv[0], "arch"); 408 409 if (!arch && strcmp (argv[0], "tune")) 410 return NULL; 411 412 max_level = __get_cpuid_max (0, &vendor); 413 if (max_level < 1) 414 goto done; 415 416 __cpuid (1, eax, ebx, ecx, edx); 417 418 model = (eax >> 4) & 0x0f; 419 family = (eax >> 8) & 0x0f; 420 if (vendor == SIG_INTEL) 421 { 422 unsigned int extended_model, extended_family; 423 424 extended_model = (eax >> 12) & 0xf0; 425 extended_family = (eax >> 20) & 0xff; 426 if (family == 0x0f) 427 { 428 family += extended_family; 429 model += extended_model; 430 } 431 else if (family == 0x06) 432 model += extended_model; 433 } 434 435 has_sse3 = ecx & bit_SSE3; 436 has_ssse3 = ecx & bit_SSSE3; 437 has_sse4_1 = ecx & bit_SSE4_1; 438 has_sse4_2 = ecx & bit_SSE4_2; 439 /* Don't check XCR0[2] - I think that can be 'lazy enabled' by the OS */ 440 has_avx = (ecx & bit_AVX) && (ecx & bit_OSXSAVE); 441 has_cmpxchg16b = ecx & bit_CMPXCHG16B; 442 has_movbe = ecx & bit_MOVBE; 443 has_popcnt = ecx & bit_POPCNT; 444 has_aes = ecx & bit_AES; 445 has_pclmul = ecx & bit_PCLMUL; 446 447 has_cmpxchg8b = edx & bit_CMPXCHG8B; 448 has_cmov = edx & bit_CMOV; 449 has_mmx = edx & bit_MMX; 450 has_sse = edx & bit_SSE; 451 has_sse2 = edx & bit_SSE2; 452 453 /* Check cpuid level of extended features. */ 454 __cpuid (0x80000000, ext_level, ebx, ecx, edx); 455 456 if (ext_level > 0x80000000) 457 { 458 __cpuid (0x80000001, eax, ebx, ecx, edx); 459 460 has_lahf_lm = ecx & bit_LAHF_LM; 461 has_sse4a = ecx & bit_SSE4a; 462 has_abm = ecx & bit_ABM; 463 has_lwp = ecx & bit_LWP; 464 465 has_longmode = edx & bit_LM; 466 has_3dnowp = edx & bit_3DNOWP; 467 has_3dnow = edx & bit_3DNOW; 468 } 469 470 if (!arch) 471 { 472 if (vendor == SIG_AMD) 473 cache = detect_caches_amd (ext_level); 474 else if (vendor == SIG_INTEL) 475 { 476 bool xeon_mp = (family == 15 && model == 6); 477 cache = detect_caches_intel (xeon_mp, max_level, 478 ext_level, &l2sizekb); 479 } 480 } 481 482 if (vendor == SIG_AMD) 483 { 484 unsigned int name; 485 486 /* Detect geode processor by its processor signature. */ 487 if (ext_level > 0x80000001) 488 __cpuid (0x80000002, name, ebx, ecx, edx); 489 else 490 name = 0; 491 492 if (name == SIG_GEODE) 493 processor = PROCESSOR_GEODE; 494 else if (has_sse4a) 495 processor = PROCESSOR_AMDFAM10; 496 else if (has_sse2 || has_longmode) 497 processor = PROCESSOR_K8; 498 else if (has_3dnowp && family == 6) 499 processor = PROCESSOR_ATHLON; 500 else if (has_mmx) 501 processor = PROCESSOR_K6; 502 else 503 processor = PROCESSOR_PENTIUM; 504 } 505 else 506 { 507 switch (family) 508 { 509 case 4: 510 processor = PROCESSOR_I486; 511 break; 512 case 5: 513 processor = PROCESSOR_PENTIUM; 514 break; 515 case 6: 516 processor = PROCESSOR_PENTIUMPRO; 517 break; 518 case 15: 519 processor = PROCESSOR_PENTIUM4; 520 break; 521 default: 522 /* We have no idea. */ 523 processor = PROCESSOR_GENERIC32; 524 } 525 } 526 527 switch (processor) 528 { 529 case PROCESSOR_I386: 530 /* Default. */ 531 break; 532 case PROCESSOR_I486: 533 cpu = "i486"; 534 break; 535 case PROCESSOR_PENTIUM: 536 if (arch && has_mmx) 537 cpu = "pentium-mmx"; 538 else 539 cpu = "pentium"; 540 break; 541 case PROCESSOR_PENTIUMPRO: 542 switch (model) 543 { 544 case 0x1c: 545 case 0x26: 546 /* Atom. */ 547 cpu = "atom"; 548 break; 549 case 0x1a: 550 case 0x1e: 551 case 0x1f: 552 case 0x2e: 553 /* FIXME: Optimize for Nehalem. */ 554 cpu = "core2"; 555 break; 556 case 0x25: 557 case 0x2f: 558 /* FIXME: Optimize for Westmere. */ 559 cpu = "core2"; 560 break; 561 case 0x17: 562 case 0x1d: 563 /* Penryn. FIXME: -mtune=core2 is slower than -mtune=generic */ 564 cpu = "core2"; 565 break; 566 case 0x0f: 567 /* Merom. FIXME: -mtune=core2 is slower than -mtune=generic */ 568 cpu = "core2"; 569 break; 570 default: 571 if (arch) 572 { 573 if (has_ssse3) 574 /* If it is an unknown CPU with SSSE3, assume Core 2. */ 575 cpu = "core2"; 576 else if (has_sse3) 577 /* It is Core Duo. */ 578 cpu = "pentium-m"; 579 else if (has_sse2) 580 /* It is Pentium M. */ 581 cpu = "pentium-m"; 582 else if (has_sse) 583 /* It is Pentium III. */ 584 cpu = "pentium3"; 585 else if (has_mmx) 586 /* It is Pentium II. */ 587 cpu = "pentium2"; 588 else 589 /* Default to Pentium Pro. */ 590 cpu = "pentiumpro"; 591 } 592 else 593 /* For -mtune, we default to -mtune=generic. */ 594 cpu = "generic"; 595 break; 596 } 597 break; 598 case PROCESSOR_PENTIUM4: 599 if (has_sse3) 600 { 601 if (has_longmode) 602 cpu = "nocona"; 603 else 604 cpu = "prescott"; 605 } 606 else 607 cpu = "pentium4"; 608 break; 609 case PROCESSOR_GEODE: 610 cpu = "geode"; 611 break; 612 case PROCESSOR_K6: 613 if (arch && has_3dnow) 614 cpu = "k6-3"; 615 else 616 cpu = "k6"; 617 break; 618 case PROCESSOR_ATHLON: 619 if (arch && has_sse) 620 cpu = "athlon-4"; 621 else 622 cpu = "athlon"; 623 break; 624 case PROCESSOR_K8: 625 if (arch && has_sse3) 626 cpu = "k8-sse3"; 627 else 628 cpu = "k8"; 629 break; 630 case PROCESSOR_AMDFAM10: 631 cpu = "amdfam10"; 632 break; 633 634 default: 635 /* Use something reasonable. */ 636 if (arch) 637 { 638 if (has_ssse3) 639 cpu = "core2"; 640 else if (has_sse3) 641 { 642 if (has_longmode) 643 cpu = "nocona"; 644 else 645 cpu = "prescott"; 646 } 647 else if (has_sse2) 648 cpu = "pentium4"; 649 else if (has_cmov) 650 cpu = "pentiumpro"; 651 else if (has_mmx) 652 cpu = "pentium-mmx"; 653 else if (has_cmpxchg8b) 654 cpu = "pentium"; 655 } 656 else 657 cpu = "generic"; 658 } 659 660 if (arch) 661 { 662 if (has_cmpxchg16b) 663 options = concat (options, " -mcx16", NULL); 664 if (has_lahf_lm) 665 options = concat (options, " -msahf", NULL); 666 if (has_movbe) 667 options = concat (options, " -mmovbe", NULL); 668 if (has_aes) 669 options = concat (options, " -maes", NULL); 670 if (has_pclmul) 671 options = concat (options, " -mpclmul", NULL); 672 if (has_popcnt) 673 options = concat (options, " -mpopcnt", NULL); 674 if (has_abm) 675 options = concat (options, " -mabm", NULL); 676 if (has_lwp) 677 options = concat (options, " -mlwp", NULL); 678 679 if (has_avx) 680 options = concat (options, " -mavx", NULL); 681 else if (has_sse4_2) 682 options = concat (options, " -msse4.2", NULL); 683 else if (has_sse4_1) 684 options = concat (options, " -msse4.1", NULL); 685 } 686 687 done: 688 return concat (cache, "-m", argv[0], "=", cpu, options, NULL); 689 } 690 #else 691 692 /* If we aren't compiling with GCC then the driver will just ignore 693 -march and -mtune "native" target and will leave to the newly 694 built compiler to generate code for its default target. */ 695 696 const char *host_detect_local_cpu (int argc ATTRIBUTE_UNUSED, 697 const char **argv ATTRIBUTE_UNUSED) 698 { 699 return NULL; 700 } 701 #endif /* __GNUC__ */ 702