xref: /dpdk/lib/power/rte_power_pmd_mgmt.c (revision 13064331957930f6b6c49ad02a638d7d5516c88f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2020 Intel Corporation
3  */
4 
5 #include <stdlib.h>
6 
7 #include <rte_lcore.h>
8 #include <rte_lcore_var.h>
9 #include <rte_cycles.h>
10 #include <rte_cpuflags.h>
11 #include <rte_malloc.h>
12 #include <rte_ethdev.h>
13 #include <rte_power_intrinsics.h>
14 
15 #include "rte_power_pmd_mgmt.h"
16 #include "power_common.h"
17 
18 unsigned int emptypoll_max;
19 unsigned int pause_duration;
20 unsigned int scale_freq_min[RTE_MAX_LCORE];
21 unsigned int scale_freq_max[RTE_MAX_LCORE];
22 
23 /* store some internal state */
24 static struct pmd_conf_data {
25 	/** what do we support? */
26 	struct rte_cpu_intrinsics intrinsics_support;
27 	/** pre-calculated tsc diff for 1us */
28 	uint64_t tsc_per_us;
29 	/** how many rte_pause can we fit in a microsecond? */
30 	uint64_t pause_per_us;
31 } global_data;
32 
33 /**
34  * Possible power management states of an ethdev port.
35  */
36 enum pmd_mgmt_state {
37 	/** Device power management is disabled. */
38 	PMD_MGMT_DISABLED = 0,
39 	/** Device power management is enabled. */
40 	PMD_MGMT_ENABLED
41 };
42 
43 union queue {
44 	uint32_t val;
45 	struct {
46 		uint16_t portid;
47 		uint16_t qid;
48 	};
49 };
50 
51 struct queue_list_entry {
52 	TAILQ_ENTRY(queue_list_entry) next;
53 	union queue queue;
54 	uint64_t n_empty_polls;
55 	uint64_t n_sleeps;
56 	const struct rte_eth_rxtx_callback *cb;
57 };
58 
59 struct __rte_cache_aligned pmd_core_cfg {
60 	TAILQ_HEAD(queue_list_head, queue_list_entry) head;
61 	/**< List of queues associated with this lcore */
62 	size_t n_queues;
63 	/**< How many queues are in the list? */
64 	volatile enum pmd_mgmt_state pwr_mgmt_state;
65 	/**< State of power management for this queue */
66 	enum rte_power_pmd_mgmt_type cb_mode;
67 	/**< Callback mode for this queue */
68 	uint64_t n_queues_ready_to_sleep;
69 	/**< Number of queues ready to enter power optimized state */
70 	uint64_t sleep_target;
71 	/**< Prevent a queue from triggering sleep multiple times */
72 };
73 static RTE_LCORE_VAR_HANDLE(struct pmd_core_cfg, lcore_cfgs);
74 
75 static inline bool
76 queue_equal(const union queue *l, const union queue *r)
77 {
78 	return l->val == r->val;
79 }
80 
81 static inline void
82 queue_copy(union queue *dst, const union queue *src)
83 {
84 	dst->val = src->val;
85 }
86 
87 static struct queue_list_entry *
88 queue_list_find(const struct pmd_core_cfg *cfg, const union queue *q)
89 {
90 	struct queue_list_entry *cur;
91 
92 	TAILQ_FOREACH(cur, &cfg->head, next) {
93 		if (queue_equal(&cur->queue, q))
94 			return cur;
95 	}
96 	return NULL;
97 }
98 
99 static int
100 queue_list_add(struct pmd_core_cfg *cfg, const union queue *q)
101 {
102 	struct queue_list_entry *qle;
103 
104 	/* is it already in the list? */
105 	if (queue_list_find(cfg, q) != NULL)
106 		return -EEXIST;
107 
108 	qle = malloc(sizeof(*qle));
109 	if (qle == NULL)
110 		return -ENOMEM;
111 	memset(qle, 0, sizeof(*qle));
112 
113 	queue_copy(&qle->queue, q);
114 	TAILQ_INSERT_TAIL(&cfg->head, qle, next);
115 	cfg->n_queues++;
116 
117 	return 0;
118 }
119 
120 static struct queue_list_entry *
121 queue_list_take(struct pmd_core_cfg *cfg, const union queue *q)
122 {
123 	struct queue_list_entry *found;
124 
125 	found = queue_list_find(cfg, q);
126 	if (found == NULL)
127 		return NULL;
128 
129 	TAILQ_REMOVE(&cfg->head, found, next);
130 	cfg->n_queues--;
131 
132 	/* freeing is responsibility of the caller */
133 	return found;
134 }
135 
136 static inline int
137 get_monitor_addresses(struct pmd_core_cfg *cfg,
138 		struct rte_power_monitor_cond *pmc, size_t len)
139 {
140 	const struct queue_list_entry *qle;
141 	size_t i = 0;
142 	int ret;
143 
144 	TAILQ_FOREACH(qle, &cfg->head, next) {
145 		const union queue *q = &qle->queue;
146 		struct rte_power_monitor_cond *cur;
147 
148 		/* attempted out of bounds access */
149 		if (i >= len) {
150 			POWER_LOG(ERR, "Too many queues being monitored");
151 			return -1;
152 		}
153 
154 		cur = &pmc[i++];
155 		ret = rte_eth_get_monitor_addr(q->portid, q->qid, cur);
156 		if (ret < 0)
157 			return ret;
158 	}
159 	return 0;
160 }
161 
162 static void
163 calc_tsc(void)
164 {
165 	const uint64_t hz = rte_get_timer_hz();
166 	const uint64_t tsc_per_us = hz / US_PER_S; /* 1us */
167 
168 	global_data.tsc_per_us = tsc_per_us;
169 
170 	/* only do this if we don't have tpause */
171 	if (!global_data.intrinsics_support.power_pause) {
172 		const uint64_t start = rte_rdtsc_precise();
173 		const uint32_t n_pauses = 10000;
174 		double us, us_per_pause;
175 		uint64_t end;
176 		unsigned int i;
177 
178 		/* estimate number of rte_pause() calls per us*/
179 		for (i = 0; i < n_pauses; i++)
180 			rte_pause();
181 
182 		end = rte_rdtsc_precise();
183 		us = (end - start) / (double)tsc_per_us;
184 		us_per_pause = us / n_pauses;
185 
186 		global_data.pause_per_us = (uint64_t)(1.0 / us_per_pause);
187 	}
188 }
189 
190 static inline void
191 queue_reset(struct pmd_core_cfg *cfg, struct queue_list_entry *qcfg)
192 {
193 	const bool is_ready_to_sleep = qcfg->n_sleeps == cfg->sleep_target;
194 
195 	/* reset empty poll counter for this queue */
196 	qcfg->n_empty_polls = 0;
197 	/* reset the queue sleep counter as well */
198 	qcfg->n_sleeps = 0;
199 	/* remove the queue from list of queues ready to sleep */
200 	if (is_ready_to_sleep)
201 		cfg->n_queues_ready_to_sleep--;
202 	/*
203 	 * no need change the lcore sleep target counter because this lcore will
204 	 * reach the n_sleeps anyway, and the other cores are already counted so
205 	 * there's no need to do anything else.
206 	 */
207 }
208 
209 static inline bool
210 queue_can_sleep(struct pmd_core_cfg *cfg, struct queue_list_entry *qcfg)
211 {
212 	/* this function is called - that means we have an empty poll */
213 	qcfg->n_empty_polls++;
214 
215 	/* if we haven't reached threshold for empty polls, we can't sleep */
216 	if (qcfg->n_empty_polls <= emptypoll_max)
217 		return false;
218 
219 	/*
220 	 * we've reached a point where we are able to sleep, but we still need
221 	 * to check if this queue has already been marked for sleeping.
222 	 */
223 	if (qcfg->n_sleeps == cfg->sleep_target)
224 		return true;
225 
226 	/* mark this queue as ready for sleep */
227 	qcfg->n_sleeps = cfg->sleep_target;
228 	cfg->n_queues_ready_to_sleep++;
229 
230 	return true;
231 }
232 
233 static inline bool
234 lcore_can_sleep(struct pmd_core_cfg *cfg)
235 {
236 	/* are all queues ready to sleep? */
237 	if (cfg->n_queues_ready_to_sleep != cfg->n_queues)
238 		return false;
239 
240 	/* we've reached an iteration where we can sleep, reset sleep counter */
241 	cfg->n_queues_ready_to_sleep = 0;
242 	cfg->sleep_target++;
243 	/*
244 	 * we do not reset any individual queue empty poll counters, because
245 	 * we want to keep sleeping on every poll until we actually get traffic.
246 	 */
247 
248 	return true;
249 }
250 
251 static uint16_t
252 clb_multiwait(uint16_t port_id __rte_unused, uint16_t qidx __rte_unused,
253 		struct rte_mbuf **pkts __rte_unused, uint16_t nb_rx,
254 		uint16_t max_pkts __rte_unused, void *arg)
255 {
256 	struct queue_list_entry *queue_conf = arg;
257 	struct pmd_core_cfg *lcore_conf;
258 	const bool empty = nb_rx == 0;
259 
260 	lcore_conf = RTE_LCORE_VAR(lcore_cfgs);
261 
262 	/* early exit */
263 	if (likely(!empty))
264 		/* early exit */
265 		queue_reset(lcore_conf, queue_conf);
266 	else {
267 		struct rte_power_monitor_cond pmc[lcore_conf->n_queues];
268 		int ret;
269 
270 		/* can this queue sleep? */
271 		if (!queue_can_sleep(lcore_conf, queue_conf))
272 			return nb_rx;
273 
274 		/* can this lcore sleep? */
275 		if (!lcore_can_sleep(lcore_conf))
276 			return nb_rx;
277 
278 		/* gather all monitoring conditions */
279 		ret = get_monitor_addresses(lcore_conf, pmc,
280 				lcore_conf->n_queues);
281 		if (ret < 0)
282 			return nb_rx;
283 
284 		rte_power_monitor_multi(pmc, lcore_conf->n_queues, UINT64_MAX);
285 	}
286 
287 	return nb_rx;
288 }
289 
290 static uint16_t
291 clb_umwait(uint16_t port_id, uint16_t qidx, struct rte_mbuf **pkts __rte_unused,
292 		uint16_t nb_rx, uint16_t max_pkts __rte_unused, void *arg)
293 {
294 	struct queue_list_entry *queue_conf = arg;
295 
296 	/* this callback can't do more than one queue, omit multiqueue logic */
297 	if (unlikely(nb_rx == 0)) {
298 		queue_conf->n_empty_polls++;
299 		if (unlikely(queue_conf->n_empty_polls > emptypoll_max)) {
300 			struct rte_power_monitor_cond pmc;
301 			int ret;
302 
303 			/* use monitoring condition to sleep */
304 			ret = rte_eth_get_monitor_addr(port_id, qidx,
305 					&pmc);
306 			if (ret == 0)
307 				rte_power_monitor(&pmc, UINT64_MAX);
308 		}
309 	} else
310 		queue_conf->n_empty_polls = 0;
311 
312 	return nb_rx;
313 }
314 
315 static uint16_t
316 clb_pause(uint16_t port_id __rte_unused, uint16_t qidx __rte_unused,
317 		struct rte_mbuf **pkts __rte_unused, uint16_t nb_rx,
318 		uint16_t max_pkts __rte_unused, void *arg)
319 {
320 	struct queue_list_entry *queue_conf = arg;
321 	struct pmd_core_cfg *lcore_conf;
322 	const bool empty = nb_rx == 0;
323 	uint32_t pause_duration = rte_power_pmd_mgmt_get_pause_duration();
324 
325 	lcore_conf = RTE_LCORE_VAR(lcore_cfgs);
326 
327 	if (likely(!empty))
328 		/* early exit */
329 		queue_reset(lcore_conf, queue_conf);
330 	else {
331 		/* can this queue sleep? */
332 		if (!queue_can_sleep(lcore_conf, queue_conf))
333 			return nb_rx;
334 
335 		/* can this lcore sleep? */
336 		if (!lcore_can_sleep(lcore_conf))
337 			return nb_rx;
338 
339 		/* sleep for 1 microsecond, use tpause if we have it */
340 		if (global_data.intrinsics_support.power_pause) {
341 			const uint64_t cur = rte_rdtsc();
342 			const uint64_t wait_tsc =
343 					cur + global_data.tsc_per_us * pause_duration;
344 			rte_power_pause(wait_tsc);
345 		} else {
346 			uint64_t i;
347 			for (i = 0; i < global_data.pause_per_us * pause_duration; i++)
348 				rte_pause();
349 		}
350 	}
351 
352 	return nb_rx;
353 }
354 
355 static uint16_t
356 clb_scale_freq(uint16_t port_id __rte_unused, uint16_t qidx __rte_unused,
357 		struct rte_mbuf **pkts __rte_unused, uint16_t nb_rx,
358 		uint16_t max_pkts __rte_unused, void *arg)
359 {
360 	const bool empty = nb_rx == 0;
361 	struct pmd_core_cfg *lcore_conf = RTE_LCORE_VAR(lcore_cfgs);
362 	struct queue_list_entry *queue_conf = arg;
363 
364 	if (likely(!empty)) {
365 		/* early exit */
366 		queue_reset(lcore_conf, queue_conf);
367 
368 		/* scale up freq immediately */
369 		rte_power_freq_max(rte_lcore_id());
370 	} else {
371 		/* can this queue sleep? */
372 		if (!queue_can_sleep(lcore_conf, queue_conf))
373 			return nb_rx;
374 
375 		/* can this lcore sleep? */
376 		if (!lcore_can_sleep(lcore_conf))
377 			return nb_rx;
378 
379 		rte_power_freq_min(rte_lcore_id());
380 	}
381 
382 	return nb_rx;
383 }
384 
385 static int
386 queue_stopped(const uint16_t port_id, const uint16_t queue_id)
387 {
388 	struct rte_eth_rxq_info qinfo;
389 
390 	int ret = rte_eth_rx_queue_info_get(port_id, queue_id, &qinfo);
391 	if (ret < 0) {
392 		if (ret == -ENOTSUP)
393 			return 1;
394 		else
395 			return -1;
396 	}
397 
398 	return qinfo.queue_state == RTE_ETH_QUEUE_STATE_STOPPED;
399 }
400 
401 static int
402 cfg_queues_stopped(struct pmd_core_cfg *queue_cfg)
403 {
404 	const struct queue_list_entry *entry;
405 
406 	TAILQ_FOREACH(entry, &queue_cfg->head, next) {
407 		const union queue *q = &entry->queue;
408 		int ret = queue_stopped(q->portid, q->qid);
409 		if (ret != 1)
410 			return ret;
411 	}
412 	return 1;
413 }
414 
415 static int
416 check_scale(unsigned int lcore)
417 {
418 	enum power_management_env env;
419 
420 	/* only PSTATE, AMD-PSTATE, ACPI and CPPC modes are supported */
421 	if (!rte_power_check_env_supported(PM_ENV_ACPI_CPUFREQ) &&
422 			!rte_power_check_env_supported(PM_ENV_PSTATE_CPUFREQ) &&
423 			!rte_power_check_env_supported(PM_ENV_AMD_PSTATE_CPUFREQ) &&
424 			!rte_power_check_env_supported(PM_ENV_CPPC_CPUFREQ)) {
425 		POWER_LOG(DEBUG, "Only ACPI, PSTATE, AMD-PSTATE, or CPPC modes are supported");
426 		return -ENOTSUP;
427 	}
428 	/* ensure we could initialize the power library */
429 	if (rte_power_init(lcore))
430 		return -EINVAL;
431 
432 	/* ensure we initialized the correct env */
433 	env = rte_power_get_env();
434 	if (env != PM_ENV_ACPI_CPUFREQ && env != PM_ENV_PSTATE_CPUFREQ &&
435 			env != PM_ENV_AMD_PSTATE_CPUFREQ && env != PM_ENV_CPPC_CPUFREQ) {
436 		POWER_LOG(DEBUG, "Unable to initialize ACPI, PSTATE, AMD-PSTATE, or CPPC modes");
437 		return -ENOTSUP;
438 	}
439 
440 	/* we're done */
441 	return 0;
442 }
443 
444 static int
445 check_monitor(struct pmd_core_cfg *cfg, const union queue *qdata)
446 {
447 	struct rte_power_monitor_cond dummy;
448 	bool multimonitor_supported;
449 
450 	/* check if rte_power_monitor is supported */
451 	if (!global_data.intrinsics_support.power_monitor) {
452 		POWER_LOG(DEBUG, "Monitoring intrinsics are not supported");
453 		return -ENOTSUP;
454 	}
455 	/* check if multi-monitor is supported */
456 	multimonitor_supported =
457 			global_data.intrinsics_support.power_monitor_multi;
458 
459 	/* if we're adding a new queue, do we support multiple queues? */
460 	if (cfg->n_queues > 0 && !multimonitor_supported) {
461 		POWER_LOG(DEBUG, "Monitoring multiple queues is not supported");
462 		return -ENOTSUP;
463 	}
464 
465 	/* check if the device supports the necessary PMD API */
466 	if (rte_eth_get_monitor_addr(qdata->portid, qdata->qid,
467 			&dummy) == -ENOTSUP) {
468 		POWER_LOG(DEBUG, "The device does not support rte_eth_get_monitor_addr");
469 		return -ENOTSUP;
470 	}
471 
472 	/* we're done */
473 	return 0;
474 }
475 
476 static inline rte_rx_callback_fn
477 get_monitor_callback(void)
478 {
479 	return global_data.intrinsics_support.power_monitor_multi ?
480 		clb_multiwait : clb_umwait;
481 }
482 
483 int
484 rte_power_ethdev_pmgmt_queue_enable(unsigned int lcore_id, uint16_t port_id,
485 		uint16_t queue_id, enum rte_power_pmd_mgmt_type mode)
486 {
487 	const union queue qdata = {.portid = port_id, .qid = queue_id};
488 	struct pmd_core_cfg *lcore_cfg;
489 	struct queue_list_entry *queue_cfg;
490 	struct rte_eth_dev_info info;
491 	rte_rx_callback_fn clb;
492 	int ret;
493 
494 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
495 
496 	if (queue_id >= RTE_MAX_QUEUES_PER_PORT || lcore_id >= RTE_MAX_LCORE) {
497 		ret = -EINVAL;
498 		goto end;
499 	}
500 
501 	if (rte_eth_dev_info_get(port_id, &info) < 0) {
502 		ret = -EINVAL;
503 		goto end;
504 	}
505 
506 	/* check if queue id is valid */
507 	if (queue_id >= info.nb_rx_queues) {
508 		ret = -EINVAL;
509 		goto end;
510 	}
511 
512 	/* check if the queue is stopped */
513 	ret = queue_stopped(port_id, queue_id);
514 	if (ret != 1) {
515 		/* error means invalid queue, 0 means queue wasn't stopped */
516 		ret = ret < 0 ? -EINVAL : -EBUSY;
517 		goto end;
518 	}
519 
520 	lcore_cfg = RTE_LCORE_VAR_LCORE(lcore_id, lcore_cfgs);
521 
522 	/* check if other queues are stopped as well */
523 	ret = cfg_queues_stopped(lcore_cfg);
524 	if (ret != 1) {
525 		/* error means invalid queue, 0 means queue wasn't stopped */
526 		ret = ret < 0 ? -EINVAL : -EBUSY;
527 		goto end;
528 	}
529 
530 	/* if callback was already enabled, check current callback type */
531 	if (lcore_cfg->pwr_mgmt_state != PMD_MGMT_DISABLED &&
532 			lcore_cfg->cb_mode != mode) {
533 		ret = -EINVAL;
534 		goto end;
535 	}
536 
537 	/* we need this in various places */
538 	rte_cpu_get_intrinsics_support(&global_data.intrinsics_support);
539 
540 	switch (mode) {
541 	case RTE_POWER_MGMT_TYPE_MONITOR:
542 		/* check if we can add a new queue */
543 		ret = check_monitor(lcore_cfg, &qdata);
544 		if (ret < 0)
545 			goto end;
546 
547 		clb = get_monitor_callback();
548 		break;
549 	case RTE_POWER_MGMT_TYPE_SCALE:
550 		clb = clb_scale_freq;
551 
552 		/* we only have to check this when enabling first queue */
553 		if (lcore_cfg->pwr_mgmt_state != PMD_MGMT_DISABLED)
554 			break;
555 		/* check if we can add a new queue */
556 		ret = check_scale(lcore_id);
557 		if (ret < 0)
558 			goto end;
559 		break;
560 	case RTE_POWER_MGMT_TYPE_PAUSE:
561 		/* figure out various time-to-tsc conversions */
562 		if (global_data.tsc_per_us == 0)
563 			calc_tsc();
564 
565 		clb = clb_pause;
566 		break;
567 	default:
568 		POWER_LOG(DEBUG, "Invalid power management type");
569 		ret = -EINVAL;
570 		goto end;
571 	}
572 	/* add this queue to the list */
573 	ret = queue_list_add(lcore_cfg, &qdata);
574 	if (ret < 0) {
575 		POWER_LOG(DEBUG, "Failed to add queue to list: %s",
576 				strerror(-ret));
577 		goto end;
578 	}
579 	/* new queue is always added last */
580 	queue_cfg = TAILQ_LAST(&lcore_cfg->head, queue_list_head);
581 
582 	/* when enabling first queue, ensure sleep target is not 0 */
583 	if (lcore_cfg->n_queues == 1 && lcore_cfg->sleep_target == 0)
584 		lcore_cfg->sleep_target = 1;
585 
586 	/* initialize data before enabling the callback */
587 	if (lcore_cfg->n_queues == 1) {
588 		lcore_cfg->cb_mode = mode;
589 		lcore_cfg->pwr_mgmt_state = PMD_MGMT_ENABLED;
590 	}
591 	queue_cfg->cb = rte_eth_add_rx_callback(port_id, queue_id,
592 			clb, queue_cfg);
593 
594 	ret = 0;
595 end:
596 	return ret;
597 }
598 
599 int
600 rte_power_ethdev_pmgmt_queue_disable(unsigned int lcore_id,
601 		uint16_t port_id, uint16_t queue_id)
602 {
603 	const union queue qdata = {.portid = port_id, .qid = queue_id};
604 	struct pmd_core_cfg *lcore_cfg;
605 	struct queue_list_entry *queue_cfg;
606 	int ret;
607 
608 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
609 
610 	if (lcore_id >= RTE_MAX_LCORE || queue_id >= RTE_MAX_QUEUES_PER_PORT)
611 		return -EINVAL;
612 
613 	/* check if the queue is stopped */
614 	ret = queue_stopped(port_id, queue_id);
615 	if (ret != 1) {
616 		/* error means invalid queue, 0 means queue wasn't stopped */
617 		return ret < 0 ? -EINVAL : -EBUSY;
618 	}
619 
620 	/* no need to check queue id as wrong queue id would not be enabled */
621 	lcore_cfg = RTE_LCORE_VAR_LCORE(lcore_id, lcore_cfgs);
622 
623 	/* check if other queues are stopped as well */
624 	ret = cfg_queues_stopped(lcore_cfg);
625 	if (ret != 1) {
626 		/* error means invalid queue, 0 means queue wasn't stopped */
627 		return ret < 0 ? -EINVAL : -EBUSY;
628 	}
629 
630 	if (lcore_cfg->pwr_mgmt_state != PMD_MGMT_ENABLED)
631 		return -EINVAL;
632 
633 	/*
634 	 * There is no good/easy way to do this without race conditions, so we
635 	 * are just going to throw our hands in the air and hope that the user
636 	 * has read the documentation and has ensured that ports are stopped at
637 	 * the time we enter the API functions.
638 	 */
639 	queue_cfg = queue_list_take(lcore_cfg, &qdata);
640 	if (queue_cfg == NULL)
641 		return -ENOENT;
642 
643 	/* if we've removed all queues from the lists, set state to disabled */
644 	if (lcore_cfg->n_queues == 0)
645 		lcore_cfg->pwr_mgmt_state = PMD_MGMT_DISABLED;
646 
647 	switch (lcore_cfg->cb_mode) {
648 	case RTE_POWER_MGMT_TYPE_MONITOR: /* fall-through */
649 	case RTE_POWER_MGMT_TYPE_PAUSE:
650 		rte_eth_remove_rx_callback(port_id, queue_id, queue_cfg->cb);
651 		break;
652 	case RTE_POWER_MGMT_TYPE_SCALE:
653 		rte_eth_remove_rx_callback(port_id, queue_id, queue_cfg->cb);
654 		/* disable power library on this lcore if this was last queue */
655 		if (lcore_cfg->pwr_mgmt_state == PMD_MGMT_DISABLED) {
656 			rte_power_freq_max(lcore_id);
657 			rte_power_exit(lcore_id);
658 		}
659 		break;
660 	}
661 	/*
662 	 * the API doc mandates that the user stops all processing on affected
663 	 * ports before calling any of these API's, so we can assume that the
664 	 * callbacks can be freed. we're intentionally casting away const-ness.
665 	 */
666 	rte_free((void *)(uintptr_t)queue_cfg->cb);
667 	free(queue_cfg);
668 
669 	return 0;
670 }
671 
672 void
673 rte_power_pmd_mgmt_set_emptypoll_max(unsigned int max)
674 {
675 	emptypoll_max = max;
676 }
677 
678 unsigned int
679 rte_power_pmd_mgmt_get_emptypoll_max(void)
680 {
681 	return emptypoll_max;
682 }
683 
684 int
685 rte_power_pmd_mgmt_set_pause_duration(unsigned int duration)
686 {
687 	if (duration == 0) {
688 		POWER_LOG(ERR, "Pause duration must be greater than 0, value unchanged");
689 		return -EINVAL;
690 	}
691 	pause_duration = duration;
692 
693 	return 0;
694 }
695 
696 unsigned int
697 rte_power_pmd_mgmt_get_pause_duration(void)
698 {
699 	return pause_duration;
700 }
701 
702 int
703 rte_power_pmd_mgmt_set_scaling_freq_min(unsigned int lcore, unsigned int min)
704 {
705 	if (lcore >= RTE_MAX_LCORE) {
706 		POWER_LOG(ERR, "Invalid lcore ID: %u", lcore);
707 		return -EINVAL;
708 	}
709 
710 	if (min > scale_freq_max[lcore]) {
711 		POWER_LOG(ERR, "Invalid min frequency: Cannot be greater than max frequency");
712 		return -EINVAL;
713 	}
714 	scale_freq_min[lcore] = min;
715 
716 	return 0;
717 }
718 
719 int
720 rte_power_pmd_mgmt_set_scaling_freq_max(unsigned int lcore, unsigned int max)
721 {
722 	if (lcore >= RTE_MAX_LCORE) {
723 		POWER_LOG(ERR, "Invalid lcore ID: %u", lcore);
724 		return -EINVAL;
725 	}
726 
727 	/* Zero means 'not set'. Use UINT32_MAX to enable RTE_MIN/MAX macro use when scaling. */
728 	if (max == 0)
729 		max = UINT32_MAX;
730 	if (max < scale_freq_min[lcore]) {
731 		POWER_LOG(ERR, "Invalid max frequency: Cannot be less than min frequency");
732 		return -EINVAL;
733 	}
734 
735 	scale_freq_max[lcore] = max;
736 
737 	return 0;
738 }
739 
740 int
741 rte_power_pmd_mgmt_get_scaling_freq_min(unsigned int lcore)
742 {
743 	if (lcore >= RTE_MAX_LCORE) {
744 		POWER_LOG(ERR, "Invalid lcore ID: %u", lcore);
745 		return -EINVAL;
746 	}
747 
748 	if (scale_freq_max[lcore] == 0)
749 		POWER_LOG(DEBUG, "Scaling freq min config not set. Using sysfs min freq.");
750 
751 	return scale_freq_min[lcore];
752 }
753 
754 int
755 rte_power_pmd_mgmt_get_scaling_freq_max(unsigned int lcore)
756 {
757 	if (lcore >= RTE_MAX_LCORE) {
758 		POWER_LOG(ERR, "Invalid lcore ID: %u", lcore);
759 		return -EINVAL;
760 	}
761 
762 	if (scale_freq_max[lcore] == UINT32_MAX) {
763 		POWER_LOG(DEBUG, "Scaling freq max config not set. Using sysfs max freq.");
764 		return 0;
765 	}
766 
767 	return scale_freq_max[lcore];
768 }
769 
770 RTE_INIT(rte_power_ethdev_pmgmt_init) {
771 	unsigned int lcore_id;
772 	struct pmd_core_cfg *lcore_cfg;
773 	int i;
774 
775 	RTE_LCORE_VAR_ALLOC(lcore_cfgs);
776 
777 	/* initialize all tailqs */
778 	RTE_LCORE_VAR_FOREACH(lcore_id, lcore_cfg, lcore_cfgs)
779 		TAILQ_INIT(&lcore_cfg->head);
780 
781 	/* initialize config defaults */
782 	emptypoll_max = 512;
783 	pause_duration = 1;
784 	/* scaling defaults out of range to ensure not used unless set by user or app */
785 	for (i = 0; i < RTE_MAX_LCORE; i++) {
786 		scale_freq_min[i] = 0;
787 		scale_freq_max[i] = UINT32_MAX;
788 	}
789 }
790