1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #include <stdio.h> 6 #include <fcntl.h> 7 #include <stdlib.h> 8 9 #include <rte_memcpy.h> 10 #include <rte_stdatomic.h> 11 #include <rte_string_fns.h> 12 13 #include "acpi_cpufreq.h" 14 #include "power_common.h" 15 16 #define STR_SIZE 1024 17 #define POWER_CONVERT_TO_DECIMAL 10 18 19 #define POWER_GOVERNOR_USERSPACE "userspace" 20 #define POWER_SYSFILE_AVAIL_FREQ \ 21 "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_available_frequencies" 22 #define POWER_SYSFILE_SETSPEED \ 23 "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_setspeed" 24 #define POWER_ACPI_DRIVER "acpi-cpufreq" 25 26 /* 27 * MSR related 28 */ 29 #define PLATFORM_INFO 0x0CE 30 #define TURBO_RATIO_LIMIT 0x1AD 31 #define IA32_PERF_CTL 0x199 32 #define CORE_TURBO_DISABLE_BIT ((uint64_t)1<<32) 33 34 enum power_state { 35 POWER_IDLE = 0, 36 POWER_ONGOING, 37 POWER_USED, 38 POWER_UNKNOWN 39 }; 40 41 /** 42 * Power info per lcore. 43 */ 44 struct __rte_cache_aligned acpi_power_info { 45 unsigned int lcore_id; /**< Logical core id */ 46 uint32_t freqs[RTE_MAX_LCORE_FREQS]; /**< Frequency array */ 47 uint32_t nb_freqs; /**< number of available freqs */ 48 FILE *f; /**< FD of scaling_setspeed */ 49 char governor_ori[32]; /**< Original governor name */ 50 uint32_t curr_idx; /**< Freq index in freqs array */ 51 RTE_ATOMIC(uint32_t) state; /**< Power in use state */ 52 uint16_t turbo_available; /**< Turbo Boost available */ 53 uint16_t turbo_enable; /**< Turbo Boost enable/disable */ 54 }; 55 56 static struct acpi_power_info lcore_power_info[RTE_MAX_LCORE]; 57 58 /** 59 * It is to set specific freq for specific logical core, according to the index 60 * of supported frequencies. 61 */ 62 static int 63 set_freq_internal(struct acpi_power_info *pi, uint32_t idx) 64 { 65 if (idx >= RTE_MAX_LCORE_FREQS || idx >= pi->nb_freqs) { 66 POWER_LOG(ERR, "Invalid frequency index %u, which " 67 "should be less than %u", idx, pi->nb_freqs); 68 return -1; 69 } 70 71 /* Check if it is the same as current */ 72 if (idx == pi->curr_idx) 73 return 0; 74 75 POWER_DEBUG_LOG("Frequency[%u] %u to be set for lcore %u", 76 idx, pi->freqs[idx], pi->lcore_id); 77 if (fseek(pi->f, 0, SEEK_SET) < 0) { 78 POWER_LOG(ERR, "Fail to set file position indicator to 0 " 79 "for setting frequency for lcore %u", pi->lcore_id); 80 return -1; 81 } 82 if (fprintf(pi->f, "%u", pi->freqs[idx]) < 0) { 83 POWER_LOG(ERR, "Fail to write new frequency for " 84 "lcore %u", pi->lcore_id); 85 return -1; 86 } 87 fflush(pi->f); 88 pi->curr_idx = idx; 89 90 return 1; 91 } 92 93 /** 94 * It is to check the current scaling governor by reading sys file, and then 95 * set it into 'userspace' if it is not by writing the sys file. The original 96 * governor will be saved for rolling back. 97 */ 98 static int 99 power_set_governor_userspace(struct acpi_power_info *pi) 100 { 101 return power_set_governor(pi->lcore_id, POWER_GOVERNOR_USERSPACE, 102 pi->governor_ori, sizeof(pi->governor_ori)); 103 } 104 105 /** 106 * It is to check the governor and then set the original governor back if 107 * needed by writing the sys file. 108 */ 109 static int 110 power_set_governor_original(struct acpi_power_info *pi) 111 { 112 return power_set_governor(pi->lcore_id, pi->governor_ori, NULL, 0); 113 } 114 115 /** 116 * It is to get the available frequencies of the specific lcore by reading the 117 * sys file. 118 */ 119 static int 120 power_get_available_freqs(struct acpi_power_info *pi) 121 { 122 FILE *f; 123 int ret = -1, i, count; 124 char *p; 125 char buf[BUFSIZ]; 126 char *freqs[RTE_MAX_LCORE_FREQS]; 127 128 open_core_sysfs_file(&f, "r", POWER_SYSFILE_AVAIL_FREQ, pi->lcore_id); 129 if (f == NULL) { 130 POWER_LOG(ERR, "failed to open %s", 131 POWER_SYSFILE_AVAIL_FREQ); 132 goto out; 133 } 134 135 ret = read_core_sysfs_s(f, buf, sizeof(buf)); 136 if ((ret) < 0) { 137 POWER_LOG(ERR, "Failed to read %s", 138 POWER_SYSFILE_AVAIL_FREQ); 139 goto out; 140 } 141 142 /* Split string into at most RTE_MAX_LCORE_FREQS frequencies */ 143 count = rte_strsplit(buf, sizeof(buf), freqs, 144 RTE_MAX_LCORE_FREQS, ' '); 145 if (count <= 0) { 146 POWER_LOG(ERR, "No available frequency in " 147 POWER_SYSFILE_AVAIL_FREQ, pi->lcore_id); 148 goto out; 149 } 150 if (count >= RTE_MAX_LCORE_FREQS) { 151 POWER_LOG(ERR, "Too many available frequencies : %d", 152 count); 153 goto out; 154 } 155 156 /* Store the available frequencies into power context */ 157 for (i = 0, pi->nb_freqs = 0; i < count; i++) { 158 POWER_DEBUG_LOG("Lcore %u frequency[%d]: %s", pi->lcore_id, 159 i, freqs[i]); 160 pi->freqs[pi->nb_freqs++] = strtoul(freqs[i], &p, 161 POWER_CONVERT_TO_DECIMAL); 162 } 163 164 if ((pi->freqs[0]-1000) == pi->freqs[1]) { 165 pi->turbo_available = 1; 166 pi->turbo_enable = 1; 167 POWER_DEBUG_LOG("Lcore %u Can do Turbo Boost", 168 pi->lcore_id); 169 } else { 170 pi->turbo_available = 0; 171 pi->turbo_enable = 0; 172 POWER_DEBUG_LOG("Turbo Boost not available on Lcore %u", 173 pi->lcore_id); 174 } 175 176 ret = 0; 177 POWER_DEBUG_LOG("%d frequency(s) of lcore %u are available", 178 count, pi->lcore_id); 179 out: 180 if (f != NULL) 181 fclose(f); 182 183 return ret; 184 } 185 186 /** 187 * It is to fopen the sys file for the future setting the lcore frequency. 188 */ 189 static int 190 power_init_for_setting_freq(struct acpi_power_info *pi) 191 { 192 FILE *f; 193 char buf[BUFSIZ]; 194 uint32_t i, freq; 195 int ret; 196 197 open_core_sysfs_file(&f, "rw+", POWER_SYSFILE_SETSPEED, pi->lcore_id); 198 if (f == NULL) { 199 POWER_LOG(ERR, "Failed to open %s", 200 POWER_SYSFILE_SETSPEED); 201 goto err; 202 } 203 204 ret = read_core_sysfs_s(f, buf, sizeof(buf)); 205 if ((ret) < 0) { 206 POWER_LOG(ERR, "Failed to read %s", 207 POWER_SYSFILE_SETSPEED); 208 goto err; 209 } 210 211 freq = strtoul(buf, NULL, POWER_CONVERT_TO_DECIMAL); 212 for (i = 0; i < pi->nb_freqs; i++) { 213 if (freq == pi->freqs[i]) { 214 pi->curr_idx = i; 215 pi->f = f; 216 return 0; 217 } 218 } 219 220 err: 221 if (f != NULL) 222 fclose(f); 223 224 return -1; 225 } 226 227 int 228 power_acpi_cpufreq_check_supported(void) 229 { 230 return cpufreq_check_scaling_driver(POWER_ACPI_DRIVER); 231 } 232 233 int 234 power_acpi_cpufreq_init(unsigned int lcore_id) 235 { 236 struct acpi_power_info *pi; 237 uint32_t exp_state; 238 239 if (!power_acpi_cpufreq_check_supported()) { 240 POWER_LOG(ERR, "%s driver is not supported", 241 POWER_ACPI_DRIVER); 242 return -1; 243 } 244 245 if (lcore_id >= RTE_MAX_LCORE) { 246 POWER_LOG(ERR, "Lcore id %u can not exceeds %u", 247 lcore_id, RTE_MAX_LCORE - 1U); 248 return -1; 249 } 250 251 pi = &lcore_power_info[lcore_id]; 252 exp_state = POWER_IDLE; 253 /* The power in use state works as a guard variable between 254 * the CPU frequency control initialization and exit process. 255 * The ACQUIRE memory ordering here pairs with the RELEASE 256 * ordering below as lock to make sure the frequency operations 257 * in the critical section are done under the correct state. 258 */ 259 if (!rte_atomic_compare_exchange_strong_explicit(&(pi->state), &exp_state, 260 POWER_ONGOING, 261 rte_memory_order_acquire, rte_memory_order_relaxed)) { 262 POWER_LOG(INFO, "Power management of lcore %u is " 263 "in use", lcore_id); 264 return -1; 265 } 266 267 if (power_get_lcore_mapped_cpu_id(lcore_id, &pi->lcore_id) < 0) { 268 POWER_LOG(ERR, "Cannot get CPU ID mapped for lcore %u", lcore_id); 269 return -1; 270 } 271 272 /* Check and set the governor */ 273 if (power_set_governor_userspace(pi) < 0) { 274 POWER_LOG(ERR, "Cannot set governor of lcore %u to " 275 "userspace", lcore_id); 276 goto fail; 277 } 278 279 /* Get the available frequencies */ 280 if (power_get_available_freqs(pi) < 0) { 281 POWER_LOG(ERR, "Cannot get available frequencies of " 282 "lcore %u", lcore_id); 283 goto fail; 284 } 285 286 /* Init for setting lcore frequency */ 287 if (power_init_for_setting_freq(pi) < 0) { 288 POWER_LOG(ERR, "Cannot init for setting frequency for " 289 "lcore %u", lcore_id); 290 goto fail; 291 } 292 293 /* Set freq to max by default */ 294 if (power_acpi_cpufreq_freq_max(lcore_id) < 0) { 295 POWER_LOG(ERR, "Cannot set frequency of lcore %u " 296 "to max", lcore_id); 297 goto fail; 298 } 299 300 POWER_LOG(INFO, "Initialized successfully for lcore %u " 301 "power management", lcore_id); 302 exp_state = POWER_ONGOING; 303 rte_atomic_compare_exchange_strong_explicit(&(pi->state), &exp_state, POWER_USED, 304 rte_memory_order_release, rte_memory_order_relaxed); 305 306 return 0; 307 308 fail: 309 exp_state = POWER_ONGOING; 310 rte_atomic_compare_exchange_strong_explicit(&(pi->state), &exp_state, POWER_UNKNOWN, 311 rte_memory_order_release, rte_memory_order_relaxed); 312 313 return -1; 314 } 315 316 int 317 power_acpi_cpufreq_exit(unsigned int lcore_id) 318 { 319 struct acpi_power_info *pi; 320 uint32_t exp_state; 321 322 if (lcore_id >= RTE_MAX_LCORE) { 323 POWER_LOG(ERR, "Lcore id %u can not exceeds %u", 324 lcore_id, RTE_MAX_LCORE - 1U); 325 return -1; 326 } 327 pi = &lcore_power_info[lcore_id]; 328 exp_state = POWER_USED; 329 /* The power in use state works as a guard variable between 330 * the CPU frequency control initialization and exit process. 331 * The ACQUIRE memory ordering here pairs with the RELEASE 332 * ordering below as lock to make sure the frequency operations 333 * in the critical section are done under the correct state. 334 */ 335 if (!rte_atomic_compare_exchange_strong_explicit(&(pi->state), &exp_state, 336 POWER_ONGOING, 337 rte_memory_order_acquire, rte_memory_order_relaxed)) { 338 POWER_LOG(INFO, "Power management of lcore %u is " 339 "not used", lcore_id); 340 return -1; 341 } 342 343 /* Close FD of setting freq */ 344 fclose(pi->f); 345 pi->f = NULL; 346 347 /* Set the governor back to the original */ 348 if (power_set_governor_original(pi) < 0) { 349 POWER_LOG(ERR, "Cannot set the governor of %u back " 350 "to the original", lcore_id); 351 goto fail; 352 } 353 354 POWER_LOG(INFO, "Power management of lcore %u has exited from " 355 "'userspace' mode and been set back to the " 356 "original", lcore_id); 357 exp_state = POWER_ONGOING; 358 rte_atomic_compare_exchange_strong_explicit(&(pi->state), &exp_state, POWER_IDLE, 359 rte_memory_order_release, rte_memory_order_relaxed); 360 361 return 0; 362 363 fail: 364 exp_state = POWER_ONGOING; 365 rte_atomic_compare_exchange_strong_explicit(&(pi->state), &exp_state, POWER_UNKNOWN, 366 rte_memory_order_release, rte_memory_order_relaxed); 367 368 return -1; 369 } 370 371 uint32_t 372 power_acpi_cpufreq_freqs(unsigned int lcore_id, uint32_t *freqs, uint32_t num) 373 { 374 struct acpi_power_info *pi; 375 376 if (lcore_id >= RTE_MAX_LCORE) { 377 POWER_LOG(ERR, "Invalid lcore ID"); 378 return 0; 379 } 380 381 if (freqs == NULL) { 382 POWER_LOG(ERR, "NULL buffer supplied"); 383 return 0; 384 } 385 386 pi = &lcore_power_info[lcore_id]; 387 if (num < pi->nb_freqs) { 388 POWER_LOG(ERR, "Buffer size is not enough"); 389 return 0; 390 } 391 rte_memcpy(freqs, pi->freqs, pi->nb_freqs * sizeof(uint32_t)); 392 393 return pi->nb_freqs; 394 } 395 396 uint32_t 397 power_acpi_cpufreq_get_freq(unsigned int lcore_id) 398 { 399 if (lcore_id >= RTE_MAX_LCORE) { 400 POWER_LOG(ERR, "Invalid lcore ID"); 401 return RTE_POWER_INVALID_FREQ_INDEX; 402 } 403 404 return lcore_power_info[lcore_id].curr_idx; 405 } 406 407 int 408 power_acpi_cpufreq_set_freq(unsigned int lcore_id, uint32_t index) 409 { 410 if (lcore_id >= RTE_MAX_LCORE) { 411 POWER_LOG(ERR, "Invalid lcore ID"); 412 return -1; 413 } 414 415 return set_freq_internal(&(lcore_power_info[lcore_id]), index); 416 } 417 418 int 419 power_acpi_cpufreq_freq_down(unsigned int lcore_id) 420 { 421 struct acpi_power_info *pi; 422 423 if (lcore_id >= RTE_MAX_LCORE) { 424 POWER_LOG(ERR, "Invalid lcore ID"); 425 return -1; 426 } 427 428 pi = &lcore_power_info[lcore_id]; 429 if (pi->curr_idx + 1 == pi->nb_freqs) 430 return 0; 431 432 /* Frequencies in the array are from high to low. */ 433 return set_freq_internal(pi, pi->curr_idx + 1); 434 } 435 436 int 437 power_acpi_cpufreq_freq_up(unsigned int lcore_id) 438 { 439 struct acpi_power_info *pi; 440 441 if (lcore_id >= RTE_MAX_LCORE) { 442 POWER_LOG(ERR, "Invalid lcore ID"); 443 return -1; 444 } 445 446 pi = &lcore_power_info[lcore_id]; 447 if (pi->curr_idx == 0 || 448 (pi->curr_idx == 1 && pi->turbo_available && !pi->turbo_enable)) 449 return 0; 450 451 /* Frequencies in the array are from high to low. */ 452 return set_freq_internal(pi, pi->curr_idx - 1); 453 } 454 455 int 456 power_acpi_cpufreq_freq_max(unsigned int lcore_id) 457 { 458 if (lcore_id >= RTE_MAX_LCORE) { 459 POWER_LOG(ERR, "Invalid lcore ID"); 460 return -1; 461 } 462 463 /* Frequencies in the array are from high to low. */ 464 if (lcore_power_info[lcore_id].turbo_available) { 465 if (lcore_power_info[lcore_id].turbo_enable) 466 /* Set to Turbo */ 467 return set_freq_internal( 468 &lcore_power_info[lcore_id], 0); 469 else 470 /* Set to max non-turbo */ 471 return set_freq_internal( 472 &lcore_power_info[lcore_id], 1); 473 } else 474 return set_freq_internal(&lcore_power_info[lcore_id], 0); 475 } 476 477 int 478 power_acpi_cpufreq_freq_min(unsigned int lcore_id) 479 { 480 struct acpi_power_info *pi; 481 482 if (lcore_id >= RTE_MAX_LCORE) { 483 POWER_LOG(ERR, "Invalid lcore ID"); 484 return -1; 485 } 486 487 pi = &lcore_power_info[lcore_id]; 488 489 /* Frequencies in the array are from high to low. */ 490 return set_freq_internal(pi, pi->nb_freqs - 1); 491 } 492 493 494 int 495 power_acpi_turbo_status(unsigned int lcore_id) 496 { 497 struct acpi_power_info *pi; 498 499 if (lcore_id >= RTE_MAX_LCORE) { 500 POWER_LOG(ERR, "Invalid lcore ID"); 501 return -1; 502 } 503 504 pi = &lcore_power_info[lcore_id]; 505 506 return pi->turbo_enable; 507 } 508 509 510 int 511 power_acpi_enable_turbo(unsigned int lcore_id) 512 { 513 struct acpi_power_info *pi; 514 515 if (lcore_id >= RTE_MAX_LCORE) { 516 POWER_LOG(ERR, "Invalid lcore ID"); 517 return -1; 518 } 519 520 pi = &lcore_power_info[lcore_id]; 521 522 if (pi->turbo_available) 523 pi->turbo_enable = 1; 524 else { 525 pi->turbo_enable = 0; 526 POWER_LOG(ERR, 527 "Failed to enable turbo on lcore %u", 528 lcore_id); 529 return -1; 530 } 531 532 /* Max may have changed, so call to max function */ 533 if (power_acpi_cpufreq_freq_max(lcore_id) < 0) { 534 POWER_LOG(ERR, 535 "Failed to set frequency of lcore %u to max", 536 lcore_id); 537 return -1; 538 } 539 540 return 0; 541 } 542 543 int 544 power_acpi_disable_turbo(unsigned int lcore_id) 545 { 546 struct acpi_power_info *pi; 547 548 if (lcore_id >= RTE_MAX_LCORE) { 549 POWER_LOG(ERR, "Invalid lcore ID"); 550 return -1; 551 } 552 553 pi = &lcore_power_info[lcore_id]; 554 555 pi->turbo_enable = 0; 556 557 if ((pi->turbo_available) && (pi->curr_idx <= 1)) { 558 /* Try to set freq to max by default coming out of turbo */ 559 if (power_acpi_cpufreq_freq_max(lcore_id) < 0) { 560 POWER_LOG(ERR, 561 "Failed to set frequency of lcore %u to max", 562 lcore_id); 563 return -1; 564 } 565 } 566 567 return 0; 568 } 569 570 int power_acpi_get_capabilities(unsigned int lcore_id, 571 struct rte_power_core_capabilities *caps) 572 { 573 struct acpi_power_info *pi; 574 575 if (lcore_id >= RTE_MAX_LCORE) { 576 POWER_LOG(ERR, "Invalid lcore ID"); 577 return -1; 578 } 579 if (caps == NULL) { 580 POWER_LOG(ERR, "Invalid argument"); 581 return -1; 582 } 583 584 pi = &lcore_power_info[lcore_id]; 585 caps->capabilities = 0; 586 caps->turbo = !!(pi->turbo_available); 587 588 return 0; 589 } 590 591 static struct rte_power_cpufreq_ops acpi_ops = { 592 .name = "acpi", 593 .init = power_acpi_cpufreq_init, 594 .exit = power_acpi_cpufreq_exit, 595 .check_env_support = power_acpi_cpufreq_check_supported, 596 .get_avail_freqs = power_acpi_cpufreq_freqs, 597 .get_freq = power_acpi_cpufreq_get_freq, 598 .set_freq = power_acpi_cpufreq_set_freq, 599 .freq_down = power_acpi_cpufreq_freq_down, 600 .freq_up = power_acpi_cpufreq_freq_up, 601 .freq_max = power_acpi_cpufreq_freq_max, 602 .freq_min = power_acpi_cpufreq_freq_min, 603 .turbo_status = power_acpi_turbo_status, 604 .enable_turbo = power_acpi_enable_turbo, 605 .disable_turbo = power_acpi_disable_turbo, 606 .get_caps = power_acpi_get_capabilities 607 }; 608 609 RTE_POWER_REGISTER_CPUFREQ_OPS(acpi_ops); 610