1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2018 Intel Corporation 3 */ 4 5 #include <stdio.h> 6 #include <stdlib.h> 7 #include <fcntl.h> 8 #include <string.h> 9 #include <unistd.h> 10 #include <limits.h> 11 #include <errno.h> 12 #include <inttypes.h> 13 14 #include <rte_memcpy.h> 15 #include <rte_stdatomic.h> 16 17 #include "rte_power_pmd_mgmt.h" 18 #include "intel_pstate_cpufreq.h" 19 #include "power_common.h" 20 21 /* macros used for rounding frequency to nearest 100000 */ 22 #define FREQ_ROUNDING_DELTA 50000 23 #define ROUND_FREQ_TO_N_100000 100000 24 25 #define BUS_FREQ 100000 26 27 #define POWER_GOVERNOR_PERF "performance" 28 #define POWER_SYSFILE_MAX_FREQ \ 29 "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_max_freq" 30 #define POWER_SYSFILE_MIN_FREQ \ 31 "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_min_freq" 32 #define POWER_SYSFILE_CUR_FREQ \ 33 "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_cur_freq" 34 #define POWER_SYSFILE_BASE_MAX_FREQ \ 35 "/sys/devices/system/cpu/cpu%u/cpufreq/cpuinfo_max_freq" 36 #define POWER_SYSFILE_BASE_MIN_FREQ \ 37 "/sys/devices/system/cpu/cpu%u/cpufreq/cpuinfo_min_freq" 38 #define POWER_SYSFILE_BASE_FREQ \ 39 "/sys/devices/system/cpu/cpu%u/cpufreq/base_frequency" 40 #define POWER_SYSFILE_TURBO_PCT \ 41 "/sys/devices/system/cpu/intel_pstate/turbo_pct" 42 #define POWER_PSTATE_DRIVER "intel_pstate" 43 44 45 enum power_state { 46 POWER_IDLE = 0, 47 POWER_ONGOING, 48 POWER_USED, 49 POWER_UNKNOWN 50 }; 51 52 struct __rte_cache_aligned pstate_power_info { 53 unsigned int lcore_id; /**< Logical core id */ 54 uint32_t freqs[RTE_MAX_LCORE_FREQS]; /**< Frequency array */ 55 uint32_t nb_freqs; /**< number of available freqs */ 56 FILE *f_cur_min; /**< FD of scaling_min */ 57 FILE *f_cur_max; /**< FD of scaling_max */ 58 char governor_ori[32]; /**< Original governor name */ 59 uint32_t curr_idx; /**< Freq index in freqs array */ 60 uint32_t non_turbo_max_ratio; /**< Non Turbo Max ratio */ 61 uint32_t sys_max_freq; /**< system wide max freq */ 62 uint32_t core_base_freq; /**< core base freq */ 63 RTE_ATOMIC(uint32_t) state; /**< Power in use state */ 64 uint16_t turbo_available; /**< Turbo Boost available */ 65 uint16_t turbo_enable; /**< Turbo Boost enable/disable */ 66 uint16_t priority_core; /**< High Performance core */ 67 }; 68 69 70 static struct pstate_power_info lcore_power_info[RTE_MAX_LCORE]; 71 72 /** 73 * It is to read the turbo mode percentage from sysfs 74 */ 75 static int32_t 76 power_read_turbo_pct(uint64_t *outVal) 77 { 78 int fd, ret; 79 char val[4] = {0}; 80 char *endptr; 81 82 fd = open(POWER_SYSFILE_TURBO_PCT, O_RDONLY); 83 84 if (fd < 0) { 85 POWER_LOG(ERR, "Error opening '%s': %s", POWER_SYSFILE_TURBO_PCT, 86 strerror(errno)); 87 return fd; 88 } 89 90 ret = read(fd, val, sizeof(val)); 91 92 if (ret < 0) { 93 POWER_LOG(ERR, "Error reading '%s': %s", POWER_SYSFILE_TURBO_PCT, 94 strerror(errno)); 95 goto out; 96 } 97 98 errno = 0; 99 *outVal = (uint64_t) strtol(val, &endptr, 10); 100 if (errno != 0 || (*endptr != 0 && *endptr != '\n')) { 101 POWER_LOG(ERR, "Error converting str to digits, read from %s: %s", 102 POWER_SYSFILE_TURBO_PCT, strerror(errno)); 103 ret = -1; 104 goto out; 105 } 106 107 POWER_DEBUG_LOG("power turbo pct: %"PRIu64, *outVal); 108 109 out: close(fd); 110 return ret; 111 } 112 113 /** 114 * It is to fopen the sys file for the future setting the lcore frequency. 115 */ 116 static int 117 power_init_for_setting_freq(struct pstate_power_info *pi) 118 { 119 FILE *f_base = NULL, *f_base_min = NULL, *f_base_max = NULL, 120 *f_min = NULL, *f_max = NULL; 121 uint32_t base_ratio, base_min_ratio, base_max_ratio; 122 uint64_t max_non_turbo; 123 int ret; 124 125 /* open all files we expect to have open */ 126 open_core_sysfs_file(&f_base_max, "r", POWER_SYSFILE_BASE_MAX_FREQ, 127 pi->lcore_id); 128 if (f_base_max == NULL) { 129 POWER_LOG(ERR, "failed to open %s", 130 POWER_SYSFILE_BASE_MAX_FREQ); 131 goto err; 132 } 133 134 open_core_sysfs_file(&f_base_min, "r", POWER_SYSFILE_BASE_MIN_FREQ, 135 pi->lcore_id); 136 if (f_base_min == NULL) { 137 POWER_LOG(ERR, "failed to open %s", 138 POWER_SYSFILE_BASE_MIN_FREQ); 139 goto err; 140 } 141 142 open_core_sysfs_file(&f_min, "rw+", POWER_SYSFILE_MIN_FREQ, 143 pi->lcore_id); 144 if (f_min == NULL) { 145 POWER_LOG(ERR, "failed to open %s", 146 POWER_SYSFILE_MIN_FREQ); 147 goto err; 148 } 149 150 open_core_sysfs_file(&f_max, "rw+", POWER_SYSFILE_MAX_FREQ, 151 pi->lcore_id); 152 if (f_max == NULL) { 153 POWER_LOG(ERR, "failed to open %s", 154 POWER_SYSFILE_MAX_FREQ); 155 goto err; 156 } 157 158 open_core_sysfs_file(&f_base, "r", POWER_SYSFILE_BASE_FREQ, 159 pi->lcore_id); 160 /* base ratio file may not exist in some kernels, so no error check */ 161 162 /* read base max ratio */ 163 ret = read_core_sysfs_u32(f_base_max, &base_max_ratio); 164 if (ret < 0) { 165 POWER_LOG(ERR, "Failed to read %s", 166 POWER_SYSFILE_BASE_MAX_FREQ); 167 goto err; 168 } 169 170 /* read base min ratio */ 171 ret = read_core_sysfs_u32(f_base_min, &base_min_ratio); 172 if (ret < 0) { 173 POWER_LOG(ERR, "Failed to read %s", 174 POWER_SYSFILE_BASE_MIN_FREQ); 175 goto err; 176 } 177 178 /* base ratio may not exist */ 179 if (f_base != NULL) { 180 ret = read_core_sysfs_u32(f_base, &base_ratio); 181 if (ret < 0) { 182 POWER_LOG(ERR, "Failed to read %s", 183 POWER_SYSFILE_BASE_FREQ); 184 goto err; 185 } 186 } else { 187 base_ratio = 0; 188 } 189 190 /* convert ratios to bins */ 191 base_max_ratio /= BUS_FREQ; 192 base_min_ratio /= BUS_FREQ; 193 base_ratio /= BUS_FREQ; 194 195 /* assign file handles */ 196 pi->f_cur_min = f_min; 197 pi->f_cur_max = f_max; 198 199 /* try to get turbo from global sysfs entry for less privileges than from MSR */ 200 if (power_read_turbo_pct(&max_non_turbo) < 0) 201 goto err; 202 /* no errors after this point */ 203 204 max_non_turbo = base_min_ratio 205 + (100 - max_non_turbo) * (base_max_ratio - base_min_ratio) / 100; 206 207 POWER_DEBUG_LOG("no turbo perf %"PRIu64, max_non_turbo); 208 209 pi->non_turbo_max_ratio = (uint32_t)max_non_turbo; 210 211 /* 212 * If base_frequency is reported as greater than the maximum 213 * turbo frequency, that's a known issue with some kernels. 214 * Set base_frequency to max_non_turbo as a workaround. 215 */ 216 if (base_ratio > base_max_ratio) { 217 /* base_ratio is greater than max turbo. Kernel bug. */ 218 pi->priority_core = 0; 219 goto out; 220 } 221 222 /* 223 * If base_frequency is reported as greater than the maximum 224 * non-turbo frequency, then mark it as a high priority core. 225 */ 226 if (base_ratio > max_non_turbo) 227 pi->priority_core = 1; 228 else 229 pi->priority_core = 0; 230 pi->core_base_freq = base_ratio * BUS_FREQ; 231 232 out: 233 if (f_base != NULL) 234 fclose(f_base); 235 fclose(f_base_max); 236 fclose(f_base_min); 237 /* f_min and f_max are stored, no need to close */ 238 return 0; 239 240 err: 241 if (f_base != NULL) 242 fclose(f_base); 243 if (f_base_min != NULL) 244 fclose(f_base_min); 245 if (f_base_max != NULL) 246 fclose(f_base_max); 247 if (f_min != NULL) 248 fclose(f_min); 249 if (f_max != NULL) 250 fclose(f_max); 251 return -1; 252 } 253 254 static int 255 set_freq_internal(struct pstate_power_info *pi, uint32_t idx) 256 { 257 uint32_t target_freq = 0; 258 259 if (idx >= RTE_MAX_LCORE_FREQS || idx >= pi->nb_freqs) { 260 POWER_LOG(ERR, "Invalid frequency index %u, which " 261 "should be less than %u", idx, pi->nb_freqs); 262 return -1; 263 } 264 265 /* Check if it is the same as current */ 266 if (idx == pi->curr_idx) 267 return 0; 268 269 /* Because Intel Pstate Driver only allow user change min/max hint 270 * User need change the min/max as same value. 271 */ 272 if (fseek(pi->f_cur_min, 0, SEEK_SET) < 0) { 273 POWER_LOG(ERR, "Fail to set file position indicator to 0 " 274 "for setting frequency for lcore %u", 275 pi->lcore_id); 276 return -1; 277 } 278 279 if (fseek(pi->f_cur_max, 0, SEEK_SET) < 0) { 280 POWER_LOG(ERR, "Fail to set file position indicator to 0 " 281 "for setting frequency for lcore %u", 282 pi->lcore_id); 283 return -1; 284 } 285 286 /* Turbo is available and enabled, first freq bucket is sys max freq */ 287 if (pi->turbo_available && idx == 0) { 288 if (pi->turbo_enable) 289 target_freq = pi->sys_max_freq; 290 else { 291 POWER_LOG(ERR, "Turbo is off, frequency can't be scaled up more %u", 292 pi->lcore_id); 293 return -1; 294 } 295 } else 296 target_freq = pi->freqs[idx]; 297 298 /* Decrease freq, the min freq should be updated first */ 299 if (idx > pi->curr_idx) { 300 301 if (fprintf(pi->f_cur_min, "%u", target_freq) < 0) { 302 POWER_LOG(ERR, "Fail to write new frequency for " 303 "lcore %u", pi->lcore_id); 304 return -1; 305 } 306 307 if (fprintf(pi->f_cur_max, "%u", target_freq) < 0) { 308 POWER_LOG(ERR, "Fail to write new frequency for " 309 "lcore %u", pi->lcore_id); 310 return -1; 311 } 312 313 POWER_DEBUG_LOG("Frequency '%u' to be set for lcore %u", 314 target_freq, pi->lcore_id); 315 316 fflush(pi->f_cur_min); 317 fflush(pi->f_cur_max); 318 319 } 320 321 /* Increase freq, the max freq should be updated first */ 322 if (idx < pi->curr_idx) { 323 324 if (fprintf(pi->f_cur_max, "%u", target_freq) < 0) { 325 POWER_LOG(ERR, "Fail to write new frequency for " 326 "lcore %u", pi->lcore_id); 327 return -1; 328 } 329 330 if (fprintf(pi->f_cur_min, "%u", target_freq) < 0) { 331 POWER_LOG(ERR, "Fail to write new frequency for " 332 "lcore %u", pi->lcore_id); 333 return -1; 334 } 335 336 POWER_DEBUG_LOG("Frequency '%u' to be set for lcore %u", 337 target_freq, pi->lcore_id); 338 339 fflush(pi->f_cur_max); 340 fflush(pi->f_cur_min); 341 } 342 343 pi->curr_idx = idx; 344 345 return 1; 346 } 347 348 /** 349 * It is to check the current scaling governor by reading sys file, and then 350 * set it into 'performance' if it is not by writing the sys file. The original 351 * governor will be saved for rolling back. 352 */ 353 static int 354 power_set_governor_performance(struct pstate_power_info *pi) 355 { 356 return power_set_governor(pi->lcore_id, POWER_GOVERNOR_PERF, 357 pi->governor_ori, sizeof(pi->governor_ori)); 358 } 359 360 /** 361 * It is to check the governor and then set the original governor back if 362 * needed by writing the sys file. 363 */ 364 static int 365 power_set_governor_original(struct pstate_power_info *pi) 366 { 367 return power_set_governor(pi->lcore_id, pi->governor_ori, NULL, 0); 368 } 369 370 /** 371 * It is to get the available frequencies of the specific lcore by reading the 372 * sys file. 373 */ 374 static int 375 power_get_available_freqs(struct pstate_power_info *pi) 376 { 377 FILE *f_min = NULL, *f_max = NULL; 378 int ret = -1; 379 uint32_t sys_min_freq = 0, sys_max_freq = 0, base_max_freq = 0; 380 int config_min_freq, config_max_freq; 381 uint32_t i, num_freqs = 0; 382 383 /* open all files */ 384 open_core_sysfs_file(&f_max, "r", POWER_SYSFILE_BASE_MAX_FREQ, 385 pi->lcore_id); 386 if (f_max == NULL) { 387 POWER_LOG(ERR, "failed to open %s", 388 POWER_SYSFILE_BASE_MAX_FREQ); 389 goto out; 390 } 391 392 open_core_sysfs_file(&f_min, "r", POWER_SYSFILE_BASE_MIN_FREQ, 393 pi->lcore_id); 394 if (f_min == NULL) { 395 POWER_LOG(ERR, "failed to open %s", 396 POWER_SYSFILE_BASE_MIN_FREQ); 397 goto out; 398 } 399 400 /* read base ratios */ 401 ret = read_core_sysfs_u32(f_max, &sys_max_freq); 402 if (ret < 0) { 403 POWER_LOG(ERR, "Failed to read %s", 404 POWER_SYSFILE_BASE_MAX_FREQ); 405 goto out; 406 } 407 408 ret = read_core_sysfs_u32(f_min, &sys_min_freq); 409 if (ret < 0) { 410 POWER_LOG(ERR, "Failed to read %s", 411 POWER_SYSFILE_BASE_MIN_FREQ); 412 goto out; 413 } 414 415 /* check for config set by user or application to limit frequency range */ 416 config_min_freq = rte_power_pmd_mgmt_get_scaling_freq_min(pi->lcore_id); 417 if (config_min_freq < 0) 418 goto out; 419 config_max_freq = rte_power_pmd_mgmt_get_scaling_freq_max(pi->lcore_id); 420 if (config_max_freq < 0) 421 goto out; 422 423 sys_min_freq = RTE_MAX(sys_min_freq, (uint32_t)config_min_freq); 424 if (config_max_freq > 0) /* Only use config_max_freq if a value has been set */ 425 sys_max_freq = RTE_MIN(sys_max_freq, (uint32_t)config_max_freq); 426 427 if (sys_max_freq < sys_min_freq) 428 goto out; 429 430 pi->sys_max_freq = sys_max_freq; 431 432 if (pi->priority_core == 1) 433 base_max_freq = pi->core_base_freq; 434 else 435 base_max_freq = pi->non_turbo_max_ratio * BUS_FREQ; 436 437 POWER_DEBUG_LOG("sys min %u, sys max %u, base_max %u", 438 sys_min_freq, 439 sys_max_freq, 440 base_max_freq); 441 442 if (base_max_freq < sys_max_freq) 443 pi->turbo_available = 1; 444 else 445 pi->turbo_available = 0; 446 447 /* If turbo is available then there is one extra freq bucket 448 * to store the sys max freq which value is base_max +1 449 */ 450 num_freqs = (RTE_MIN(base_max_freq, sys_max_freq) - sys_min_freq) / BUS_FREQ 451 + 1 + pi->turbo_available; 452 if (num_freqs >= RTE_MAX_LCORE_FREQS) { 453 POWER_LOG(ERR, "Too many available frequencies: %d", 454 num_freqs); 455 goto out; 456 } 457 458 /* Generate the freq bucket array. 459 * If turbo is available the freq bucket[0] value is base_max +1 460 * the bucket[1] is base_max, bucket[2] is base_max - BUS_FREQ 461 * and so on. 462 * If turbo is not available bucket[0] is base_max and so on 463 */ 464 for (i = 0, pi->nb_freqs = 0; i < num_freqs; i++) { 465 if ((i == 0) && pi->turbo_available) 466 pi->freqs[pi->nb_freqs++] = RTE_MIN(base_max_freq, sys_max_freq) + 1; 467 else 468 pi->freqs[pi->nb_freqs++] = RTE_MIN(base_max_freq, sys_max_freq) - 469 (i - pi->turbo_available) * BUS_FREQ; 470 } 471 472 ret = 0; 473 474 POWER_DEBUG_LOG("%d frequency(s) of lcore %u are available", 475 num_freqs, pi->lcore_id); 476 477 out: 478 if (f_min != NULL) 479 fclose(f_min); 480 if (f_max != NULL) 481 fclose(f_max); 482 483 return ret; 484 } 485 486 static int 487 power_get_cur_idx(struct pstate_power_info *pi) 488 { 489 FILE *f_cur; 490 int ret = -1; 491 uint32_t sys_cur_freq = 0; 492 unsigned int i; 493 494 open_core_sysfs_file(&f_cur, "r", POWER_SYSFILE_CUR_FREQ, 495 pi->lcore_id); 496 if (f_cur == NULL) { 497 POWER_LOG(ERR, "failed to open %s", 498 POWER_SYSFILE_CUR_FREQ); 499 goto fail; 500 } 501 502 ret = read_core_sysfs_u32(f_cur, &sys_cur_freq); 503 if (ret < 0) { 504 POWER_LOG(ERR, "Failed to read %s", 505 POWER_SYSFILE_CUR_FREQ); 506 goto fail; 507 } 508 509 /* convert the frequency to nearest 100000 value 510 * Ex: if sys_cur_freq=1396789 then freq_conv=1400000 511 * Ex: if sys_cur_freq=800030 then freq_conv=800000 512 * Ex: if sys_cur_freq=800030 then freq_conv=800000 513 */ 514 unsigned int freq_conv = 0; 515 freq_conv = (sys_cur_freq + FREQ_ROUNDING_DELTA) 516 / ROUND_FREQ_TO_N_100000; 517 freq_conv = freq_conv * ROUND_FREQ_TO_N_100000; 518 519 for (i = 0; i < pi->nb_freqs; i++) { 520 if (freq_conv == pi->freqs[i]) { 521 pi->curr_idx = i; 522 break; 523 } 524 } 525 526 ret = 0; 527 fail: 528 if (f_cur != NULL) 529 fclose(f_cur); 530 return ret; 531 } 532 533 int 534 power_pstate_cpufreq_check_supported(void) 535 { 536 return cpufreq_check_scaling_driver(POWER_PSTATE_DRIVER); 537 } 538 539 int 540 power_pstate_cpufreq_init(unsigned int lcore_id) 541 { 542 struct pstate_power_info *pi; 543 uint32_t exp_state; 544 545 if (!power_pstate_cpufreq_check_supported()) { 546 POWER_LOG(ERR, "%s driver is not supported", 547 POWER_PSTATE_DRIVER); 548 return -1; 549 } 550 551 if (lcore_id >= RTE_MAX_LCORE) { 552 POWER_LOG(ERR, "Lcore id %u can not exceed %u", 553 lcore_id, RTE_MAX_LCORE - 1U); 554 return -1; 555 } 556 557 pi = &lcore_power_info[lcore_id]; 558 exp_state = POWER_IDLE; 559 /* The power in use state works as a guard variable between 560 * the CPU frequency control initialization and exit process. 561 * The ACQUIRE memory ordering here pairs with the RELEASE 562 * ordering below as lock to make sure the frequency operations 563 * in the critical section are done under the correct state. 564 */ 565 if (!rte_atomic_compare_exchange_strong_explicit(&(pi->state), &exp_state, 566 POWER_ONGOING, 567 rte_memory_order_acquire, rte_memory_order_relaxed)) { 568 POWER_LOG(INFO, "Power management of lcore %u is " 569 "in use", lcore_id); 570 return -1; 571 } 572 573 if (power_get_lcore_mapped_cpu_id(lcore_id, &pi->lcore_id) < 0) { 574 POWER_LOG(ERR, "Cannot get CPU ID mapped for lcore %u", lcore_id); 575 return -1; 576 } 577 578 /* Check and set the governor */ 579 if (power_set_governor_performance(pi) < 0) { 580 POWER_LOG(ERR, "Cannot set governor of lcore %u to " 581 "performance", lcore_id); 582 goto fail; 583 } 584 /* Init for setting lcore frequency */ 585 if (power_init_for_setting_freq(pi) < 0) { 586 POWER_LOG(ERR, "Cannot init for setting frequency for " 587 "lcore %u", lcore_id); 588 goto fail; 589 } 590 591 /* Get the available frequencies */ 592 if (power_get_available_freqs(pi) < 0) { 593 POWER_LOG(ERR, "Cannot get available frequencies of " 594 "lcore %u", lcore_id); 595 goto fail; 596 } 597 598 if (power_get_cur_idx(pi) < 0) { 599 POWER_LOG(ERR, "Cannot get current frequency " 600 "index of lcore %u", lcore_id); 601 goto fail; 602 } 603 604 /* Set freq to max by default */ 605 if (power_pstate_cpufreq_freq_max(lcore_id) < 0) { 606 POWER_LOG(ERR, "Cannot set frequency of lcore %u " 607 "to max", lcore_id); 608 goto fail; 609 } 610 611 POWER_LOG(INFO, "Initialized successfully for lcore %u " 612 "power management", lcore_id); 613 exp_state = POWER_ONGOING; 614 rte_atomic_compare_exchange_strong_explicit(&(pi->state), &exp_state, POWER_USED, 615 rte_memory_order_release, rte_memory_order_relaxed); 616 617 return 0; 618 619 fail: 620 exp_state = POWER_ONGOING; 621 rte_atomic_compare_exchange_strong_explicit(&(pi->state), &exp_state, POWER_UNKNOWN, 622 rte_memory_order_release, rte_memory_order_relaxed); 623 624 return -1; 625 } 626 627 int 628 power_pstate_cpufreq_exit(unsigned int lcore_id) 629 { 630 struct pstate_power_info *pi; 631 uint32_t exp_state; 632 633 if (lcore_id >= RTE_MAX_LCORE) { 634 POWER_LOG(ERR, "Lcore id %u can not exceeds %u", 635 lcore_id, RTE_MAX_LCORE - 1U); 636 return -1; 637 } 638 pi = &lcore_power_info[lcore_id]; 639 640 exp_state = POWER_USED; 641 /* The power in use state works as a guard variable between 642 * the CPU frequency control initialization and exit process. 643 * The ACQUIRE memory ordering here pairs with the RELEASE 644 * ordering below as lock to make sure the frequency operations 645 * in the critical section are under done the correct state. 646 */ 647 if (!rte_atomic_compare_exchange_strong_explicit(&(pi->state), &exp_state, 648 POWER_ONGOING, 649 rte_memory_order_acquire, rte_memory_order_relaxed)) { 650 POWER_LOG(INFO, "Power management of lcore %u is " 651 "not used", lcore_id); 652 return -1; 653 } 654 655 /* Close FD of setting freq */ 656 fclose(pi->f_cur_min); 657 fclose(pi->f_cur_max); 658 pi->f_cur_min = NULL; 659 pi->f_cur_max = NULL; 660 661 /* Set the governor back to the original */ 662 if (power_set_governor_original(pi) < 0) { 663 POWER_LOG(ERR, "Cannot set the governor of %u back " 664 "to the original", lcore_id); 665 goto fail; 666 } 667 668 POWER_LOG(INFO, "Power management of lcore %u has exited from " 669 "'performance' mode and been set back to the " 670 "original", lcore_id); 671 exp_state = POWER_ONGOING; 672 rte_atomic_compare_exchange_strong_explicit(&(pi->state), &exp_state, POWER_IDLE, 673 rte_memory_order_release, rte_memory_order_relaxed); 674 675 return 0; 676 677 fail: 678 exp_state = POWER_ONGOING; 679 rte_atomic_compare_exchange_strong_explicit(&(pi->state), &exp_state, POWER_UNKNOWN, 680 rte_memory_order_release, rte_memory_order_relaxed); 681 682 return -1; 683 } 684 685 686 uint32_t 687 power_pstate_cpufreq_freqs(unsigned int lcore_id, uint32_t *freqs, uint32_t num) 688 { 689 struct pstate_power_info *pi; 690 691 if (lcore_id >= RTE_MAX_LCORE) { 692 POWER_LOG(ERR, "Invalid lcore ID"); 693 return 0; 694 } 695 696 if (freqs == NULL) { 697 POWER_LOG(ERR, "NULL buffer supplied"); 698 return 0; 699 } 700 701 pi = &lcore_power_info[lcore_id]; 702 if (num < pi->nb_freqs) { 703 POWER_LOG(ERR, "Buffer size is not enough"); 704 return 0; 705 } 706 rte_memcpy(freqs, pi->freqs, pi->nb_freqs * sizeof(uint32_t)); 707 708 return pi->nb_freqs; 709 } 710 711 uint32_t 712 power_pstate_cpufreq_get_freq(unsigned int lcore_id) 713 { 714 if (lcore_id >= RTE_MAX_LCORE) { 715 POWER_LOG(ERR, "Invalid lcore ID"); 716 return RTE_POWER_INVALID_FREQ_INDEX; 717 } 718 719 return lcore_power_info[lcore_id].curr_idx; 720 } 721 722 723 int 724 power_pstate_cpufreq_set_freq(unsigned int lcore_id, uint32_t index) 725 { 726 if (lcore_id >= RTE_MAX_LCORE) { 727 POWER_LOG(ERR, "Invalid lcore ID"); 728 return -1; 729 } 730 731 return set_freq_internal(&(lcore_power_info[lcore_id]), index); 732 } 733 734 int 735 power_pstate_cpufreq_freq_up(unsigned int lcore_id) 736 { 737 struct pstate_power_info *pi; 738 739 if (lcore_id >= RTE_MAX_LCORE) { 740 POWER_LOG(ERR, "Invalid lcore ID"); 741 return -1; 742 } 743 744 pi = &lcore_power_info[lcore_id]; 745 if (pi->curr_idx == 0 || 746 (pi->curr_idx == 1 && pi->turbo_available && !pi->turbo_enable)) 747 return 0; 748 749 /* Frequencies in the array are from high to low. */ 750 return set_freq_internal(pi, pi->curr_idx - 1); 751 } 752 753 int 754 power_pstate_cpufreq_freq_down(unsigned int lcore_id) 755 { 756 struct pstate_power_info *pi; 757 758 if (lcore_id >= RTE_MAX_LCORE) { 759 POWER_LOG(ERR, "Invalid lcore ID"); 760 return -1; 761 } 762 763 pi = &lcore_power_info[lcore_id]; 764 if (pi->curr_idx + 1 == pi->nb_freqs) 765 return 0; 766 767 /* Frequencies in the array are from high to low. */ 768 return set_freq_internal(pi, pi->curr_idx + 1); 769 } 770 771 int 772 power_pstate_cpufreq_freq_max(unsigned int lcore_id) 773 { 774 if (lcore_id >= RTE_MAX_LCORE) { 775 POWER_LOG(ERR, "Invalid lcore ID"); 776 return -1; 777 } 778 779 /* Frequencies in the array are from high to low. */ 780 if (lcore_power_info[lcore_id].turbo_available) { 781 if (lcore_power_info[lcore_id].turbo_enable) 782 /* Set to Turbo */ 783 return set_freq_internal( 784 &lcore_power_info[lcore_id], 0); 785 else 786 /* Set to max non-turbo */ 787 return set_freq_internal( 788 &lcore_power_info[lcore_id], 1); 789 } else 790 return set_freq_internal(&lcore_power_info[lcore_id], 0); 791 } 792 793 794 int 795 power_pstate_cpufreq_freq_min(unsigned int lcore_id) 796 { 797 struct pstate_power_info *pi; 798 799 if (lcore_id >= RTE_MAX_LCORE) { 800 POWER_LOG(ERR, "Invalid lcore ID"); 801 return -1; 802 } 803 804 pi = &lcore_power_info[lcore_id]; 805 806 /* Frequencies in the array are from high to low. */ 807 return set_freq_internal(pi, pi->nb_freqs - 1); 808 } 809 810 811 int 812 power_pstate_turbo_status(unsigned int lcore_id) 813 { 814 struct pstate_power_info *pi; 815 816 if (lcore_id >= RTE_MAX_LCORE) { 817 POWER_LOG(ERR, "Invalid lcore ID"); 818 return -1; 819 } 820 821 pi = &lcore_power_info[lcore_id]; 822 823 return pi->turbo_enable; 824 } 825 826 int 827 power_pstate_enable_turbo(unsigned int lcore_id) 828 { 829 struct pstate_power_info *pi; 830 831 if (lcore_id >= RTE_MAX_LCORE) { 832 POWER_LOG(ERR, "Invalid lcore ID"); 833 return -1; 834 } 835 836 pi = &lcore_power_info[lcore_id]; 837 838 if (pi->turbo_available) 839 pi->turbo_enable = 1; 840 else { 841 pi->turbo_enable = 0; 842 POWER_LOG(ERR, 843 "Failed to enable turbo on lcore %u", 844 lcore_id); 845 return -1; 846 } 847 848 return 0; 849 } 850 851 852 int 853 power_pstate_disable_turbo(unsigned int lcore_id) 854 { 855 struct pstate_power_info *pi; 856 857 if (lcore_id >= RTE_MAX_LCORE) { 858 POWER_LOG(ERR, "Invalid lcore ID"); 859 return -1; 860 } 861 862 pi = &lcore_power_info[lcore_id]; 863 864 pi->turbo_enable = 0; 865 866 if (pi->turbo_available && pi->curr_idx <= 1) { 867 /* Try to set freq to max by default coming out of turbo */ 868 if (power_pstate_cpufreq_freq_max(lcore_id) < 0) { 869 POWER_LOG(ERR, 870 "Failed to set frequency of lcore %u to max", 871 lcore_id); 872 return -1; 873 } 874 } 875 876 return 0; 877 } 878 879 880 int power_pstate_get_capabilities(unsigned int lcore_id, 881 struct rte_power_core_capabilities *caps) 882 { 883 struct pstate_power_info *pi; 884 885 if (lcore_id >= RTE_MAX_LCORE) { 886 POWER_LOG(ERR, "Invalid lcore ID"); 887 return -1; 888 } 889 if (caps == NULL) { 890 POWER_LOG(ERR, "Invalid argument"); 891 return -1; 892 } 893 894 pi = &lcore_power_info[lcore_id]; 895 caps->capabilities = 0; 896 caps->turbo = !!(pi->turbo_available); 897 caps->priority = pi->priority_core; 898 899 return 0; 900 } 901 902 static struct rte_power_cpufreq_ops pstate_ops = { 903 .name = "intel-pstate", 904 .init = power_pstate_cpufreq_init, 905 .exit = power_pstate_cpufreq_exit, 906 .check_env_support = power_pstate_cpufreq_check_supported, 907 .get_avail_freqs = power_pstate_cpufreq_freqs, 908 .get_freq = power_pstate_cpufreq_get_freq, 909 .set_freq = power_pstate_cpufreq_set_freq, 910 .freq_down = power_pstate_cpufreq_freq_down, 911 .freq_up = power_pstate_cpufreq_freq_up, 912 .freq_max = power_pstate_cpufreq_freq_max, 913 .freq_min = power_pstate_cpufreq_freq_min, 914 .turbo_status = power_pstate_turbo_status, 915 .enable_turbo = power_pstate_enable_turbo, 916 .disable_turbo = power_pstate_disable_turbo, 917 .get_caps = power_pstate_get_capabilities 918 }; 919 920 RTE_POWER_REGISTER_CPUFREQ_OPS(pstate_ops); 921