xref: /dpdk/lib/power/rte_power_pmd_mgmt.c (revision 25a2a0dc3de31ca0a6fbc9371cf3dd85dfd74b07)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2020 Intel Corporation
3  */
4 
5 #include <stdlib.h>
6 
7 #include <rte_lcore.h>
8 #include <rte_cycles.h>
9 #include <rte_cpuflags.h>
10 #include <rte_malloc.h>
11 #include <rte_ethdev.h>
12 #include <rte_power_intrinsics.h>
13 
14 #include "rte_power_pmd_mgmt.h"
15 #include "power_common.h"
16 
17 unsigned int emptypoll_max;
18 unsigned int pause_duration;
19 unsigned int scale_freq_min[RTE_MAX_LCORE];
20 unsigned int scale_freq_max[RTE_MAX_LCORE];
21 
22 /* store some internal state */
23 static struct pmd_conf_data {
24 	/** what do we support? */
25 	struct rte_cpu_intrinsics intrinsics_support;
26 	/** pre-calculated tsc diff for 1us */
27 	uint64_t tsc_per_us;
28 	/** how many rte_pause can we fit in a microsecond? */
29 	uint64_t pause_per_us;
30 } global_data;
31 
32 /**
33  * Possible power management states of an ethdev port.
34  */
35 enum pmd_mgmt_state {
36 	/** Device power management is disabled. */
37 	PMD_MGMT_DISABLED = 0,
38 	/** Device power management is enabled. */
39 	PMD_MGMT_ENABLED
40 };
41 
42 union queue {
43 	uint32_t val;
44 	struct {
45 		uint16_t portid;
46 		uint16_t qid;
47 	};
48 };
49 
50 struct queue_list_entry {
51 	TAILQ_ENTRY(queue_list_entry) next;
52 	union queue queue;
53 	uint64_t n_empty_polls;
54 	uint64_t n_sleeps;
55 	const struct rte_eth_rxtx_callback *cb;
56 };
57 
58 struct __rte_cache_aligned pmd_core_cfg {
59 	TAILQ_HEAD(queue_list_head, queue_list_entry) head;
60 	/**< List of queues associated with this lcore */
61 	size_t n_queues;
62 	/**< How many queues are in the list? */
63 	volatile enum pmd_mgmt_state pwr_mgmt_state;
64 	/**< State of power management for this queue */
65 	enum rte_power_pmd_mgmt_type cb_mode;
66 	/**< Callback mode for this queue */
67 	uint64_t n_queues_ready_to_sleep;
68 	/**< Number of queues ready to enter power optimized state */
69 	uint64_t sleep_target;
70 	/**< Prevent a queue from triggering sleep multiple times */
71 };
72 static struct pmd_core_cfg lcore_cfgs[RTE_MAX_LCORE];
73 
74 static inline bool
75 queue_equal(const union queue *l, const union queue *r)
76 {
77 	return l->val == r->val;
78 }
79 
80 static inline void
81 queue_copy(union queue *dst, const union queue *src)
82 {
83 	dst->val = src->val;
84 }
85 
86 static struct queue_list_entry *
87 queue_list_find(const struct pmd_core_cfg *cfg, const union queue *q)
88 {
89 	struct queue_list_entry *cur;
90 
91 	TAILQ_FOREACH(cur, &cfg->head, next) {
92 		if (queue_equal(&cur->queue, q))
93 			return cur;
94 	}
95 	return NULL;
96 }
97 
98 static int
99 queue_list_add(struct pmd_core_cfg *cfg, const union queue *q)
100 {
101 	struct queue_list_entry *qle;
102 
103 	/* is it already in the list? */
104 	if (queue_list_find(cfg, q) != NULL)
105 		return -EEXIST;
106 
107 	qle = malloc(sizeof(*qle));
108 	if (qle == NULL)
109 		return -ENOMEM;
110 	memset(qle, 0, sizeof(*qle));
111 
112 	queue_copy(&qle->queue, q);
113 	TAILQ_INSERT_TAIL(&cfg->head, qle, next);
114 	cfg->n_queues++;
115 
116 	return 0;
117 }
118 
119 static struct queue_list_entry *
120 queue_list_take(struct pmd_core_cfg *cfg, const union queue *q)
121 {
122 	struct queue_list_entry *found;
123 
124 	found = queue_list_find(cfg, q);
125 	if (found == NULL)
126 		return NULL;
127 
128 	TAILQ_REMOVE(&cfg->head, found, next);
129 	cfg->n_queues--;
130 
131 	/* freeing is responsibility of the caller */
132 	return found;
133 }
134 
135 static inline int
136 get_monitor_addresses(struct pmd_core_cfg *cfg,
137 		struct rte_power_monitor_cond *pmc, size_t len)
138 {
139 	const struct queue_list_entry *qle;
140 	size_t i = 0;
141 	int ret;
142 
143 	TAILQ_FOREACH(qle, &cfg->head, next) {
144 		const union queue *q = &qle->queue;
145 		struct rte_power_monitor_cond *cur;
146 
147 		/* attempted out of bounds access */
148 		if (i >= len) {
149 			POWER_LOG(ERR, "Too many queues being monitored");
150 			return -1;
151 		}
152 
153 		cur = &pmc[i++];
154 		ret = rte_eth_get_monitor_addr(q->portid, q->qid, cur);
155 		if (ret < 0)
156 			return ret;
157 	}
158 	return 0;
159 }
160 
161 static void
162 calc_tsc(void)
163 {
164 	const uint64_t hz = rte_get_timer_hz();
165 	const uint64_t tsc_per_us = hz / US_PER_S; /* 1us */
166 
167 	global_data.tsc_per_us = tsc_per_us;
168 
169 	/* only do this if we don't have tpause */
170 	if (!global_data.intrinsics_support.power_pause) {
171 		const uint64_t start = rte_rdtsc_precise();
172 		const uint32_t n_pauses = 10000;
173 		double us, us_per_pause;
174 		uint64_t end;
175 		unsigned int i;
176 
177 		/* estimate number of rte_pause() calls per us*/
178 		for (i = 0; i < n_pauses; i++)
179 			rte_pause();
180 
181 		end = rte_rdtsc_precise();
182 		us = (end - start) / (double)tsc_per_us;
183 		us_per_pause = us / n_pauses;
184 
185 		global_data.pause_per_us = (uint64_t)(1.0 / us_per_pause);
186 	}
187 }
188 
189 static inline void
190 queue_reset(struct pmd_core_cfg *cfg, struct queue_list_entry *qcfg)
191 {
192 	const bool is_ready_to_sleep = qcfg->n_sleeps == cfg->sleep_target;
193 
194 	/* reset empty poll counter for this queue */
195 	qcfg->n_empty_polls = 0;
196 	/* reset the queue sleep counter as well */
197 	qcfg->n_sleeps = 0;
198 	/* remove the queue from list of queues ready to sleep */
199 	if (is_ready_to_sleep)
200 		cfg->n_queues_ready_to_sleep--;
201 	/*
202 	 * no need change the lcore sleep target counter because this lcore will
203 	 * reach the n_sleeps anyway, and the other cores are already counted so
204 	 * there's no need to do anything else.
205 	 */
206 }
207 
208 static inline bool
209 queue_can_sleep(struct pmd_core_cfg *cfg, struct queue_list_entry *qcfg)
210 {
211 	/* this function is called - that means we have an empty poll */
212 	qcfg->n_empty_polls++;
213 
214 	/* if we haven't reached threshold for empty polls, we can't sleep */
215 	if (qcfg->n_empty_polls <= emptypoll_max)
216 		return false;
217 
218 	/*
219 	 * we've reached a point where we are able to sleep, but we still need
220 	 * to check if this queue has already been marked for sleeping.
221 	 */
222 	if (qcfg->n_sleeps == cfg->sleep_target)
223 		return true;
224 
225 	/* mark this queue as ready for sleep */
226 	qcfg->n_sleeps = cfg->sleep_target;
227 	cfg->n_queues_ready_to_sleep++;
228 
229 	return true;
230 }
231 
232 static inline bool
233 lcore_can_sleep(struct pmd_core_cfg *cfg)
234 {
235 	/* are all queues ready to sleep? */
236 	if (cfg->n_queues_ready_to_sleep != cfg->n_queues)
237 		return false;
238 
239 	/* we've reached an iteration where we can sleep, reset sleep counter */
240 	cfg->n_queues_ready_to_sleep = 0;
241 	cfg->sleep_target++;
242 	/*
243 	 * we do not reset any individual queue empty poll counters, because
244 	 * we want to keep sleeping on every poll until we actually get traffic.
245 	 */
246 
247 	return true;
248 }
249 
250 static uint16_t
251 clb_multiwait(uint16_t port_id __rte_unused, uint16_t qidx __rte_unused,
252 		struct rte_mbuf **pkts __rte_unused, uint16_t nb_rx,
253 		uint16_t max_pkts __rte_unused, void *arg)
254 {
255 	const unsigned int lcore = rte_lcore_id();
256 	struct queue_list_entry *queue_conf = arg;
257 	struct pmd_core_cfg *lcore_conf;
258 	const bool empty = nb_rx == 0;
259 
260 	lcore_conf = &lcore_cfgs[lcore];
261 
262 	/* early exit */
263 	if (likely(!empty))
264 		/* early exit */
265 		queue_reset(lcore_conf, queue_conf);
266 	else {
267 		struct rte_power_monitor_cond pmc[lcore_conf->n_queues];
268 		int ret;
269 
270 		/* can this queue sleep? */
271 		if (!queue_can_sleep(lcore_conf, queue_conf))
272 			return nb_rx;
273 
274 		/* can this lcore sleep? */
275 		if (!lcore_can_sleep(lcore_conf))
276 			return nb_rx;
277 
278 		/* gather all monitoring conditions */
279 		ret = get_monitor_addresses(lcore_conf, pmc,
280 				lcore_conf->n_queues);
281 		if (ret < 0)
282 			return nb_rx;
283 
284 		rte_power_monitor_multi(pmc, lcore_conf->n_queues, UINT64_MAX);
285 	}
286 
287 	return nb_rx;
288 }
289 
290 static uint16_t
291 clb_umwait(uint16_t port_id, uint16_t qidx, struct rte_mbuf **pkts __rte_unused,
292 		uint16_t nb_rx, uint16_t max_pkts __rte_unused, void *arg)
293 {
294 	struct queue_list_entry *queue_conf = arg;
295 
296 	/* this callback can't do more than one queue, omit multiqueue logic */
297 	if (unlikely(nb_rx == 0)) {
298 		queue_conf->n_empty_polls++;
299 		if (unlikely(queue_conf->n_empty_polls > emptypoll_max)) {
300 			struct rte_power_monitor_cond pmc;
301 			int ret;
302 
303 			/* use monitoring condition to sleep */
304 			ret = rte_eth_get_monitor_addr(port_id, qidx,
305 					&pmc);
306 			if (ret == 0)
307 				rte_power_monitor(&pmc, UINT64_MAX);
308 		}
309 	} else
310 		queue_conf->n_empty_polls = 0;
311 
312 	return nb_rx;
313 }
314 
315 static uint16_t
316 clb_pause(uint16_t port_id __rte_unused, uint16_t qidx __rte_unused,
317 		struct rte_mbuf **pkts __rte_unused, uint16_t nb_rx,
318 		uint16_t max_pkts __rte_unused, void *arg)
319 {
320 	const unsigned int lcore = rte_lcore_id();
321 	struct queue_list_entry *queue_conf = arg;
322 	struct pmd_core_cfg *lcore_conf;
323 	const bool empty = nb_rx == 0;
324 	uint32_t pause_duration = rte_power_pmd_mgmt_get_pause_duration();
325 
326 	lcore_conf = &lcore_cfgs[lcore];
327 
328 	if (likely(!empty))
329 		/* early exit */
330 		queue_reset(lcore_conf, queue_conf);
331 	else {
332 		/* can this queue sleep? */
333 		if (!queue_can_sleep(lcore_conf, queue_conf))
334 			return nb_rx;
335 
336 		/* can this lcore sleep? */
337 		if (!lcore_can_sleep(lcore_conf))
338 			return nb_rx;
339 
340 		/* sleep for 1 microsecond, use tpause if we have it */
341 		if (global_data.intrinsics_support.power_pause) {
342 			const uint64_t cur = rte_rdtsc();
343 			const uint64_t wait_tsc =
344 					cur + global_data.tsc_per_us * pause_duration;
345 			rte_power_pause(wait_tsc);
346 		} else {
347 			uint64_t i;
348 			for (i = 0; i < global_data.pause_per_us * pause_duration; i++)
349 				rte_pause();
350 		}
351 	}
352 
353 	return nb_rx;
354 }
355 
356 static uint16_t
357 clb_scale_freq(uint16_t port_id __rte_unused, uint16_t qidx __rte_unused,
358 		struct rte_mbuf **pkts __rte_unused, uint16_t nb_rx,
359 		uint16_t max_pkts __rte_unused, void *arg)
360 {
361 	const unsigned int lcore = rte_lcore_id();
362 	const bool empty = nb_rx == 0;
363 	struct pmd_core_cfg *lcore_conf = &lcore_cfgs[lcore];
364 	struct queue_list_entry *queue_conf = arg;
365 
366 	if (likely(!empty)) {
367 		/* early exit */
368 		queue_reset(lcore_conf, queue_conf);
369 
370 		/* scale up freq immediately */
371 		rte_power_freq_max(rte_lcore_id());
372 	} else {
373 		/* can this queue sleep? */
374 		if (!queue_can_sleep(lcore_conf, queue_conf))
375 			return nb_rx;
376 
377 		/* can this lcore sleep? */
378 		if (!lcore_can_sleep(lcore_conf))
379 			return nb_rx;
380 
381 		rte_power_freq_min(rte_lcore_id());
382 	}
383 
384 	return nb_rx;
385 }
386 
387 static int
388 queue_stopped(const uint16_t port_id, const uint16_t queue_id)
389 {
390 	struct rte_eth_rxq_info qinfo;
391 
392 	int ret = rte_eth_rx_queue_info_get(port_id, queue_id, &qinfo);
393 	if (ret < 0) {
394 		if (ret == -ENOTSUP)
395 			return 1;
396 		else
397 			return -1;
398 	}
399 
400 	return qinfo.queue_state == RTE_ETH_QUEUE_STATE_STOPPED;
401 }
402 
403 static int
404 cfg_queues_stopped(struct pmd_core_cfg *queue_cfg)
405 {
406 	const struct queue_list_entry *entry;
407 
408 	TAILQ_FOREACH(entry, &queue_cfg->head, next) {
409 		const union queue *q = &entry->queue;
410 		int ret = queue_stopped(q->portid, q->qid);
411 		if (ret != 1)
412 			return ret;
413 	}
414 	return 1;
415 }
416 
417 static int
418 check_scale(unsigned int lcore)
419 {
420 	enum power_management_env env;
421 
422 	/* only PSTATE, AMD-PSTATE, ACPI and CPPC modes are supported */
423 	if (!rte_power_check_env_supported(PM_ENV_ACPI_CPUFREQ) &&
424 			!rte_power_check_env_supported(PM_ENV_PSTATE_CPUFREQ) &&
425 			!rte_power_check_env_supported(PM_ENV_AMD_PSTATE_CPUFREQ) &&
426 			!rte_power_check_env_supported(PM_ENV_CPPC_CPUFREQ)) {
427 		POWER_LOG(DEBUG, "Only ACPI, PSTATE, AMD-PSTATE, or CPPC modes are supported");
428 		return -ENOTSUP;
429 	}
430 	/* ensure we could initialize the power library */
431 	if (rte_power_init(lcore))
432 		return -EINVAL;
433 
434 	/* ensure we initialized the correct env */
435 	env = rte_power_get_env();
436 	if (env != PM_ENV_ACPI_CPUFREQ && env != PM_ENV_PSTATE_CPUFREQ &&
437 			env != PM_ENV_AMD_PSTATE_CPUFREQ && env != PM_ENV_CPPC_CPUFREQ) {
438 		POWER_LOG(DEBUG, "Unable to initialize ACPI, PSTATE, AMD-PSTATE, or CPPC modes");
439 		return -ENOTSUP;
440 	}
441 
442 	/* we're done */
443 	return 0;
444 }
445 
446 static int
447 check_monitor(struct pmd_core_cfg *cfg, const union queue *qdata)
448 {
449 	struct rte_power_monitor_cond dummy;
450 	bool multimonitor_supported;
451 
452 	/* check if rte_power_monitor is supported */
453 	if (!global_data.intrinsics_support.power_monitor) {
454 		POWER_LOG(DEBUG, "Monitoring intrinsics are not supported");
455 		return -ENOTSUP;
456 	}
457 	/* check if multi-monitor is supported */
458 	multimonitor_supported =
459 			global_data.intrinsics_support.power_monitor_multi;
460 
461 	/* if we're adding a new queue, do we support multiple queues? */
462 	if (cfg->n_queues > 0 && !multimonitor_supported) {
463 		POWER_LOG(DEBUG, "Monitoring multiple queues is not supported");
464 		return -ENOTSUP;
465 	}
466 
467 	/* check if the device supports the necessary PMD API */
468 	if (rte_eth_get_monitor_addr(qdata->portid, qdata->qid,
469 			&dummy) == -ENOTSUP) {
470 		POWER_LOG(DEBUG, "The device does not support rte_eth_get_monitor_addr");
471 		return -ENOTSUP;
472 	}
473 
474 	/* we're done */
475 	return 0;
476 }
477 
478 static inline rte_rx_callback_fn
479 get_monitor_callback(void)
480 {
481 	return global_data.intrinsics_support.power_monitor_multi ?
482 		clb_multiwait : clb_umwait;
483 }
484 
485 int
486 rte_power_ethdev_pmgmt_queue_enable(unsigned int lcore_id, uint16_t port_id,
487 		uint16_t queue_id, enum rte_power_pmd_mgmt_type mode)
488 {
489 	const union queue qdata = {.portid = port_id, .qid = queue_id};
490 	struct pmd_core_cfg *lcore_cfg;
491 	struct queue_list_entry *queue_cfg;
492 	struct rte_eth_dev_info info;
493 	rte_rx_callback_fn clb;
494 	int ret;
495 
496 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
497 
498 	if (queue_id >= RTE_MAX_QUEUES_PER_PORT || lcore_id >= RTE_MAX_LCORE) {
499 		ret = -EINVAL;
500 		goto end;
501 	}
502 
503 	if (rte_eth_dev_info_get(port_id, &info) < 0) {
504 		ret = -EINVAL;
505 		goto end;
506 	}
507 
508 	/* check if queue id is valid */
509 	if (queue_id >= info.nb_rx_queues) {
510 		ret = -EINVAL;
511 		goto end;
512 	}
513 
514 	/* check if the queue is stopped */
515 	ret = queue_stopped(port_id, queue_id);
516 	if (ret != 1) {
517 		/* error means invalid queue, 0 means queue wasn't stopped */
518 		ret = ret < 0 ? -EINVAL : -EBUSY;
519 		goto end;
520 	}
521 
522 	lcore_cfg = &lcore_cfgs[lcore_id];
523 
524 	/* check if other queues are stopped as well */
525 	ret = cfg_queues_stopped(lcore_cfg);
526 	if (ret != 1) {
527 		/* error means invalid queue, 0 means queue wasn't stopped */
528 		ret = ret < 0 ? -EINVAL : -EBUSY;
529 		goto end;
530 	}
531 
532 	/* if callback was already enabled, check current callback type */
533 	if (lcore_cfg->pwr_mgmt_state != PMD_MGMT_DISABLED &&
534 			lcore_cfg->cb_mode != mode) {
535 		ret = -EINVAL;
536 		goto end;
537 	}
538 
539 	/* we need this in various places */
540 	rte_cpu_get_intrinsics_support(&global_data.intrinsics_support);
541 
542 	switch (mode) {
543 	case RTE_POWER_MGMT_TYPE_MONITOR:
544 		/* check if we can add a new queue */
545 		ret = check_monitor(lcore_cfg, &qdata);
546 		if (ret < 0)
547 			goto end;
548 
549 		clb = get_monitor_callback();
550 		break;
551 	case RTE_POWER_MGMT_TYPE_SCALE:
552 		clb = clb_scale_freq;
553 
554 		/* we only have to check this when enabling first queue */
555 		if (lcore_cfg->pwr_mgmt_state != PMD_MGMT_DISABLED)
556 			break;
557 		/* check if we can add a new queue */
558 		ret = check_scale(lcore_id);
559 		if (ret < 0)
560 			goto end;
561 		break;
562 	case RTE_POWER_MGMT_TYPE_PAUSE:
563 		/* figure out various time-to-tsc conversions */
564 		if (global_data.tsc_per_us == 0)
565 			calc_tsc();
566 
567 		clb = clb_pause;
568 		break;
569 	default:
570 		POWER_LOG(DEBUG, "Invalid power management type");
571 		ret = -EINVAL;
572 		goto end;
573 	}
574 	/* add this queue to the list */
575 	ret = queue_list_add(lcore_cfg, &qdata);
576 	if (ret < 0) {
577 		POWER_LOG(DEBUG, "Failed to add queue to list: %s",
578 				strerror(-ret));
579 		goto end;
580 	}
581 	/* new queue is always added last */
582 	queue_cfg = TAILQ_LAST(&lcore_cfg->head, queue_list_head);
583 
584 	/* when enabling first queue, ensure sleep target is not 0 */
585 	if (lcore_cfg->n_queues == 1 && lcore_cfg->sleep_target == 0)
586 		lcore_cfg->sleep_target = 1;
587 
588 	/* initialize data before enabling the callback */
589 	if (lcore_cfg->n_queues == 1) {
590 		lcore_cfg->cb_mode = mode;
591 		lcore_cfg->pwr_mgmt_state = PMD_MGMT_ENABLED;
592 	}
593 	queue_cfg->cb = rte_eth_add_rx_callback(port_id, queue_id,
594 			clb, queue_cfg);
595 
596 	ret = 0;
597 end:
598 	return ret;
599 }
600 
601 int
602 rte_power_ethdev_pmgmt_queue_disable(unsigned int lcore_id,
603 		uint16_t port_id, uint16_t queue_id)
604 {
605 	const union queue qdata = {.portid = port_id, .qid = queue_id};
606 	struct pmd_core_cfg *lcore_cfg;
607 	struct queue_list_entry *queue_cfg;
608 	int ret;
609 
610 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
611 
612 	if (lcore_id >= RTE_MAX_LCORE || queue_id >= RTE_MAX_QUEUES_PER_PORT)
613 		return -EINVAL;
614 
615 	/* check if the queue is stopped */
616 	ret = queue_stopped(port_id, queue_id);
617 	if (ret != 1) {
618 		/* error means invalid queue, 0 means queue wasn't stopped */
619 		return ret < 0 ? -EINVAL : -EBUSY;
620 	}
621 
622 	/* no need to check queue id as wrong queue id would not be enabled */
623 	lcore_cfg = &lcore_cfgs[lcore_id];
624 
625 	/* check if other queues are stopped as well */
626 	ret = cfg_queues_stopped(lcore_cfg);
627 	if (ret != 1) {
628 		/* error means invalid queue, 0 means queue wasn't stopped */
629 		return ret < 0 ? -EINVAL : -EBUSY;
630 	}
631 
632 	if (lcore_cfg->pwr_mgmt_state != PMD_MGMT_ENABLED)
633 		return -EINVAL;
634 
635 	/*
636 	 * There is no good/easy way to do this without race conditions, so we
637 	 * are just going to throw our hands in the air and hope that the user
638 	 * has read the documentation and has ensured that ports are stopped at
639 	 * the time we enter the API functions.
640 	 */
641 	queue_cfg = queue_list_take(lcore_cfg, &qdata);
642 	if (queue_cfg == NULL)
643 		return -ENOENT;
644 
645 	/* if we've removed all queues from the lists, set state to disabled */
646 	if (lcore_cfg->n_queues == 0)
647 		lcore_cfg->pwr_mgmt_state = PMD_MGMT_DISABLED;
648 
649 	switch (lcore_cfg->cb_mode) {
650 	case RTE_POWER_MGMT_TYPE_MONITOR: /* fall-through */
651 	case RTE_POWER_MGMT_TYPE_PAUSE:
652 		rte_eth_remove_rx_callback(port_id, queue_id, queue_cfg->cb);
653 		break;
654 	case RTE_POWER_MGMT_TYPE_SCALE:
655 		rte_eth_remove_rx_callback(port_id, queue_id, queue_cfg->cb);
656 		/* disable power library on this lcore if this was last queue */
657 		if (lcore_cfg->pwr_mgmt_state == PMD_MGMT_DISABLED) {
658 			rte_power_freq_max(lcore_id);
659 			rte_power_exit(lcore_id);
660 		}
661 		break;
662 	}
663 	/*
664 	 * the API doc mandates that the user stops all processing on affected
665 	 * ports before calling any of these API's, so we can assume that the
666 	 * callbacks can be freed. we're intentionally casting away const-ness.
667 	 */
668 	rte_free((void *)queue_cfg->cb);
669 	free(queue_cfg);
670 
671 	return 0;
672 }
673 
674 void
675 rte_power_pmd_mgmt_set_emptypoll_max(unsigned int max)
676 {
677 	emptypoll_max = max;
678 }
679 
680 unsigned int
681 rte_power_pmd_mgmt_get_emptypoll_max(void)
682 {
683 	return emptypoll_max;
684 }
685 
686 int
687 rte_power_pmd_mgmt_set_pause_duration(unsigned int duration)
688 {
689 	if (duration == 0) {
690 		POWER_LOG(ERR, "Pause duration must be greater than 0, value unchanged");
691 		return -EINVAL;
692 	}
693 	pause_duration = duration;
694 
695 	return 0;
696 }
697 
698 unsigned int
699 rte_power_pmd_mgmt_get_pause_duration(void)
700 {
701 	return pause_duration;
702 }
703 
704 int
705 rte_power_pmd_mgmt_set_scaling_freq_min(unsigned int lcore, unsigned int min)
706 {
707 	if (lcore >= RTE_MAX_LCORE) {
708 		POWER_LOG(ERR, "Invalid lcore ID: %u", lcore);
709 		return -EINVAL;
710 	}
711 
712 	if (min > scale_freq_max[lcore]) {
713 		POWER_LOG(ERR, "Invalid min frequency: Cannot be greater than max frequency");
714 		return -EINVAL;
715 	}
716 	scale_freq_min[lcore] = min;
717 
718 	return 0;
719 }
720 
721 int
722 rte_power_pmd_mgmt_set_scaling_freq_max(unsigned int lcore, unsigned int max)
723 {
724 	if (lcore >= RTE_MAX_LCORE) {
725 		POWER_LOG(ERR, "Invalid lcore ID: %u", lcore);
726 		return -EINVAL;
727 	}
728 
729 	/* Zero means 'not set'. Use UINT32_MAX to enable RTE_MIN/MAX macro use when scaling. */
730 	if (max == 0)
731 		max = UINT32_MAX;
732 	if (max < scale_freq_min[lcore]) {
733 		POWER_LOG(ERR, "Invalid max frequency: Cannot be less than min frequency");
734 		return -EINVAL;
735 	}
736 
737 	scale_freq_max[lcore] = max;
738 
739 	return 0;
740 }
741 
742 int
743 rte_power_pmd_mgmt_get_scaling_freq_min(unsigned int lcore)
744 {
745 	if (lcore >= RTE_MAX_LCORE) {
746 		POWER_LOG(ERR, "Invalid lcore ID: %u", lcore);
747 		return -EINVAL;
748 	}
749 
750 	if (scale_freq_max[lcore] == 0)
751 		POWER_LOG(DEBUG, "Scaling freq min config not set. Using sysfs min freq.");
752 
753 	return scale_freq_min[lcore];
754 }
755 
756 int
757 rte_power_pmd_mgmt_get_scaling_freq_max(unsigned int lcore)
758 {
759 	if (lcore >= RTE_MAX_LCORE) {
760 		POWER_LOG(ERR, "Invalid lcore ID: %u", lcore);
761 		return -EINVAL;
762 	}
763 
764 	if (scale_freq_max[lcore] == UINT32_MAX) {
765 		POWER_LOG(DEBUG, "Scaling freq max config not set. Using sysfs max freq.");
766 		return 0;
767 	}
768 
769 	return scale_freq_max[lcore];
770 }
771 
772 RTE_INIT(rte_power_ethdev_pmgmt_init) {
773 	size_t i;
774 	int j;
775 
776 	/* initialize all tailqs */
777 	for (i = 0; i < RTE_DIM(lcore_cfgs); i++) {
778 		struct pmd_core_cfg *cfg = &lcore_cfgs[i];
779 		TAILQ_INIT(&cfg->head);
780 	}
781 
782 	/* initialize config defaults */
783 	emptypoll_max = 512;
784 	pause_duration = 1;
785 	/* scaling defaults out of range to ensure not used unless set by user or app */
786 	for (j = 0; j < RTE_MAX_LCORE; j++) {
787 		scale_freq_min[j] = 0;
788 		scale_freq_max[j] = UINT32_MAX;
789 	}
790 }
791