1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2020 Intel Corporation 3 */ 4 5 #include <rte_lcore.h> 6 #include <rte_cycles.h> 7 #include <rte_cpuflags.h> 8 #include <rte_malloc.h> 9 #include <rte_ethdev.h> 10 #include <rte_power_intrinsics.h> 11 12 #include "rte_power_pmd_mgmt.h" 13 14 #define EMPTYPOLL_MAX 512 15 16 /* store some internal state */ 17 static struct pmd_conf_data { 18 /** what do we support? */ 19 struct rte_cpu_intrinsics intrinsics_support; 20 /** pre-calculated tsc diff for 1us */ 21 uint64_t tsc_per_us; 22 /** how many rte_pause can we fit in a microsecond? */ 23 uint64_t pause_per_us; 24 } global_data; 25 26 /** 27 * Possible power management states of an ethdev port. 28 */ 29 enum pmd_mgmt_state { 30 /** Device power management is disabled. */ 31 PMD_MGMT_DISABLED = 0, 32 /** Device power management is enabled. */ 33 PMD_MGMT_ENABLED 34 }; 35 36 struct pmd_queue_cfg { 37 volatile enum pmd_mgmt_state pwr_mgmt_state; 38 /**< State of power management for this queue */ 39 enum rte_power_pmd_mgmt_type cb_mode; 40 /**< Callback mode for this queue */ 41 const struct rte_eth_rxtx_callback *cur_cb; 42 /**< Callback instance */ 43 volatile bool umwait_in_progress; 44 /**< are we currently sleeping? */ 45 uint64_t empty_poll_stats; 46 /**< Number of empty polls */ 47 } __rte_cache_aligned; 48 49 static struct pmd_queue_cfg port_cfg[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT]; 50 51 static void 52 calc_tsc(void) 53 { 54 const uint64_t hz = rte_get_timer_hz(); 55 const uint64_t tsc_per_us = hz / US_PER_S; /* 1us */ 56 57 global_data.tsc_per_us = tsc_per_us; 58 59 /* only do this if we don't have tpause */ 60 if (!global_data.intrinsics_support.power_pause) { 61 const uint64_t start = rte_rdtsc_precise(); 62 const uint32_t n_pauses = 10000; 63 double us, us_per_pause; 64 uint64_t end; 65 unsigned int i; 66 67 /* estimate number of rte_pause() calls per us*/ 68 for (i = 0; i < n_pauses; i++) 69 rte_pause(); 70 71 end = rte_rdtsc_precise(); 72 us = (end - start) / (double)tsc_per_us; 73 us_per_pause = us / n_pauses; 74 75 global_data.pause_per_us = (uint64_t)(1.0 / us_per_pause); 76 } 77 } 78 79 static uint16_t 80 clb_umwait(uint16_t port_id, uint16_t qidx, struct rte_mbuf **pkts __rte_unused, 81 uint16_t nb_rx, uint16_t max_pkts __rte_unused, 82 void *addr __rte_unused) 83 { 84 85 struct pmd_queue_cfg *q_conf; 86 87 q_conf = &port_cfg[port_id][qidx]; 88 89 if (unlikely(nb_rx == 0)) { 90 q_conf->empty_poll_stats++; 91 if (unlikely(q_conf->empty_poll_stats > EMPTYPOLL_MAX)) { 92 struct rte_power_monitor_cond pmc; 93 uint16_t ret; 94 95 /* 96 * we might get a cancellation request while being 97 * inside the callback, in which case the wakeup 98 * wouldn't work because it would've arrived too early. 99 * 100 * to get around this, we notify the other thread that 101 * we're sleeping, so that it can spin until we're done. 102 * unsolicited wakeups are perfectly safe. 103 */ 104 q_conf->umwait_in_progress = true; 105 106 rte_atomic_thread_fence(__ATOMIC_SEQ_CST); 107 108 /* check if we need to cancel sleep */ 109 if (q_conf->pwr_mgmt_state == PMD_MGMT_ENABLED) { 110 /* use monitoring condition to sleep */ 111 ret = rte_eth_get_monitor_addr(port_id, qidx, 112 &pmc); 113 if (ret == 0) 114 rte_power_monitor(&pmc, UINT64_MAX); 115 } 116 q_conf->umwait_in_progress = false; 117 118 rte_atomic_thread_fence(__ATOMIC_SEQ_CST); 119 } 120 } else 121 q_conf->empty_poll_stats = 0; 122 123 return nb_rx; 124 } 125 126 static uint16_t 127 clb_pause(uint16_t port_id, uint16_t qidx, struct rte_mbuf **pkts __rte_unused, 128 uint16_t nb_rx, uint16_t max_pkts __rte_unused, 129 void *addr __rte_unused) 130 { 131 struct pmd_queue_cfg *q_conf; 132 133 q_conf = &port_cfg[port_id][qidx]; 134 135 if (unlikely(nb_rx == 0)) { 136 q_conf->empty_poll_stats++; 137 /* sleep for 1 microsecond */ 138 if (unlikely(q_conf->empty_poll_stats > EMPTYPOLL_MAX)) { 139 /* use tpause if we have it */ 140 if (global_data.intrinsics_support.power_pause) { 141 const uint64_t cur = rte_rdtsc(); 142 const uint64_t wait_tsc = 143 cur + global_data.tsc_per_us; 144 rte_power_pause(wait_tsc); 145 } else { 146 uint64_t i; 147 for (i = 0; i < global_data.pause_per_us; i++) 148 rte_pause(); 149 } 150 } 151 } else 152 q_conf->empty_poll_stats = 0; 153 154 return nb_rx; 155 } 156 157 static uint16_t 158 clb_scale_freq(uint16_t port_id, uint16_t qidx, 159 struct rte_mbuf **pkts __rte_unused, uint16_t nb_rx, 160 uint16_t max_pkts __rte_unused, void *_ __rte_unused) 161 { 162 struct pmd_queue_cfg *q_conf; 163 164 q_conf = &port_cfg[port_id][qidx]; 165 166 if (unlikely(nb_rx == 0)) { 167 q_conf->empty_poll_stats++; 168 if (unlikely(q_conf->empty_poll_stats > EMPTYPOLL_MAX)) 169 /* scale down freq */ 170 rte_power_freq_min(rte_lcore_id()); 171 } else { 172 q_conf->empty_poll_stats = 0; 173 /* scale up freq */ 174 rte_power_freq_max(rte_lcore_id()); 175 } 176 177 return nb_rx; 178 } 179 180 int 181 rte_power_ethdev_pmgmt_queue_enable(unsigned int lcore_id, uint16_t port_id, 182 uint16_t queue_id, enum rte_power_pmd_mgmt_type mode) 183 { 184 struct pmd_queue_cfg *queue_cfg; 185 struct rte_eth_dev_info info; 186 int ret; 187 188 RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); 189 190 if (queue_id >= RTE_MAX_QUEUES_PER_PORT || lcore_id >= RTE_MAX_LCORE) { 191 ret = -EINVAL; 192 goto end; 193 } 194 195 if (rte_eth_dev_info_get(port_id, &info) < 0) { 196 ret = -EINVAL; 197 goto end; 198 } 199 200 /* check if queue id is valid */ 201 if (queue_id >= info.nb_rx_queues) { 202 ret = -EINVAL; 203 goto end; 204 } 205 206 queue_cfg = &port_cfg[port_id][queue_id]; 207 208 if (queue_cfg->pwr_mgmt_state != PMD_MGMT_DISABLED) { 209 ret = -EINVAL; 210 goto end; 211 } 212 213 /* we need this in various places */ 214 rte_cpu_get_intrinsics_support(&global_data.intrinsics_support); 215 216 switch (mode) { 217 case RTE_POWER_MGMT_TYPE_MONITOR: 218 { 219 struct rte_power_monitor_cond dummy; 220 221 /* check if rte_power_monitor is supported */ 222 if (!global_data.intrinsics_support.power_monitor) { 223 RTE_LOG(DEBUG, POWER, "Monitoring intrinsics are not supported\n"); 224 ret = -ENOTSUP; 225 goto end; 226 } 227 228 /* check if the device supports the necessary PMD API */ 229 if (rte_eth_get_monitor_addr(port_id, queue_id, 230 &dummy) == -ENOTSUP) { 231 RTE_LOG(DEBUG, POWER, "The device does not support rte_eth_get_monitor_addr\n"); 232 ret = -ENOTSUP; 233 goto end; 234 } 235 /* initialize data before enabling the callback */ 236 queue_cfg->empty_poll_stats = 0; 237 queue_cfg->cb_mode = mode; 238 queue_cfg->umwait_in_progress = false; 239 queue_cfg->pwr_mgmt_state = PMD_MGMT_ENABLED; 240 241 /* ensure we update our state before callback starts */ 242 rte_atomic_thread_fence(__ATOMIC_SEQ_CST); 243 244 queue_cfg->cur_cb = rte_eth_add_rx_callback(port_id, queue_id, 245 clb_umwait, NULL); 246 break; 247 } 248 case RTE_POWER_MGMT_TYPE_SCALE: 249 { 250 enum power_management_env env; 251 /* only PSTATE and ACPI modes are supported */ 252 if (!rte_power_check_env_supported(PM_ENV_ACPI_CPUFREQ) && 253 !rte_power_check_env_supported( 254 PM_ENV_PSTATE_CPUFREQ)) { 255 RTE_LOG(DEBUG, POWER, "Neither ACPI nor PSTATE modes are supported\n"); 256 ret = -ENOTSUP; 257 goto end; 258 } 259 /* ensure we could initialize the power library */ 260 if (rte_power_init(lcore_id)) { 261 ret = -EINVAL; 262 goto end; 263 } 264 /* ensure we initialized the correct env */ 265 env = rte_power_get_env(); 266 if (env != PM_ENV_ACPI_CPUFREQ && 267 env != PM_ENV_PSTATE_CPUFREQ) { 268 RTE_LOG(DEBUG, POWER, "Neither ACPI nor PSTATE modes were initialized\n"); 269 ret = -ENOTSUP; 270 goto end; 271 } 272 /* initialize data before enabling the callback */ 273 queue_cfg->empty_poll_stats = 0; 274 queue_cfg->cb_mode = mode; 275 queue_cfg->pwr_mgmt_state = PMD_MGMT_ENABLED; 276 277 /* this is not necessary here, but do it anyway */ 278 rte_atomic_thread_fence(__ATOMIC_SEQ_CST); 279 280 queue_cfg->cur_cb = rte_eth_add_rx_callback(port_id, 281 queue_id, clb_scale_freq, NULL); 282 break; 283 } 284 case RTE_POWER_MGMT_TYPE_PAUSE: 285 /* figure out various time-to-tsc conversions */ 286 if (global_data.tsc_per_us == 0) 287 calc_tsc(); 288 289 /* initialize data before enabling the callback */ 290 queue_cfg->empty_poll_stats = 0; 291 queue_cfg->cb_mode = mode; 292 queue_cfg->pwr_mgmt_state = PMD_MGMT_ENABLED; 293 294 /* this is not necessary here, but do it anyway */ 295 rte_atomic_thread_fence(__ATOMIC_SEQ_CST); 296 297 queue_cfg->cur_cb = rte_eth_add_rx_callback(port_id, queue_id, 298 clb_pause, NULL); 299 break; 300 } 301 ret = 0; 302 end: 303 return ret; 304 } 305 306 int 307 rte_power_ethdev_pmgmt_queue_disable(unsigned int lcore_id, 308 uint16_t port_id, uint16_t queue_id) 309 { 310 struct pmd_queue_cfg *queue_cfg; 311 312 RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); 313 314 if (lcore_id >= RTE_MAX_LCORE || queue_id >= RTE_MAX_QUEUES_PER_PORT) 315 return -EINVAL; 316 317 /* no need to check queue id as wrong queue id would not be enabled */ 318 queue_cfg = &port_cfg[port_id][queue_id]; 319 320 if (queue_cfg->pwr_mgmt_state != PMD_MGMT_ENABLED) 321 return -EINVAL; 322 323 /* stop any callbacks from progressing */ 324 queue_cfg->pwr_mgmt_state = PMD_MGMT_DISABLED; 325 326 /* ensure we update our state before continuing */ 327 rte_atomic_thread_fence(__ATOMIC_SEQ_CST); 328 329 switch (queue_cfg->cb_mode) { 330 case RTE_POWER_MGMT_TYPE_MONITOR: 331 { 332 bool exit = false; 333 do { 334 /* 335 * we may request cancellation while the other thread 336 * has just entered the callback but hasn't started 337 * sleeping yet, so keep waking it up until we know it's 338 * done sleeping. 339 */ 340 if (queue_cfg->umwait_in_progress) 341 rte_power_monitor_wakeup(lcore_id); 342 else 343 exit = true; 344 } while (!exit); 345 } 346 /* fall-through */ 347 case RTE_POWER_MGMT_TYPE_PAUSE: 348 rte_eth_remove_rx_callback(port_id, queue_id, 349 queue_cfg->cur_cb); 350 break; 351 case RTE_POWER_MGMT_TYPE_SCALE: 352 rte_power_freq_max(lcore_id); 353 rte_eth_remove_rx_callback(port_id, queue_id, 354 queue_cfg->cur_cb); 355 rte_power_exit(lcore_id); 356 break; 357 } 358 /* 359 * we don't free the RX callback here because it is unsafe to do so 360 * unless we know for a fact that all data plane threads have stopped. 361 */ 362 queue_cfg->cur_cb = NULL; 363 364 return 0; 365 } 366