xref: /dpdk/lib/power/rte_power_pmd_mgmt.c (revision 448e01f1b5848b20cb0300d339100dd82f4459e9)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2020 Intel Corporation
3  */
4 
5 #include <rte_lcore.h>
6 #include <rte_cycles.h>
7 #include <rte_cpuflags.h>
8 #include <rte_malloc.h>
9 #include <rte_ethdev.h>
10 #include <rte_power_intrinsics.h>
11 
12 #include "rte_power_pmd_mgmt.h"
13 #include "power_common.h"
14 
15 unsigned int emptypoll_max;
16 unsigned int pause_duration;
17 unsigned int scale_freq_min[RTE_MAX_LCORE];
18 unsigned int scale_freq_max[RTE_MAX_LCORE];
19 
20 /* store some internal state */
21 static struct pmd_conf_data {
22 	/** what do we support? */
23 	struct rte_cpu_intrinsics intrinsics_support;
24 	/** pre-calculated tsc diff for 1us */
25 	uint64_t tsc_per_us;
26 	/** how many rte_pause can we fit in a microsecond? */
27 	uint64_t pause_per_us;
28 } global_data;
29 
30 /**
31  * Possible power management states of an ethdev port.
32  */
33 enum pmd_mgmt_state {
34 	/** Device power management is disabled. */
35 	PMD_MGMT_DISABLED = 0,
36 	/** Device power management is enabled. */
37 	PMD_MGMT_ENABLED
38 };
39 
40 union queue {
41 	uint32_t val;
42 	struct {
43 		uint16_t portid;
44 		uint16_t qid;
45 	};
46 };
47 
48 struct queue_list_entry {
49 	TAILQ_ENTRY(queue_list_entry) next;
50 	union queue queue;
51 	uint64_t n_empty_polls;
52 	uint64_t n_sleeps;
53 	const struct rte_eth_rxtx_callback *cb;
54 };
55 
56 struct pmd_core_cfg {
57 	TAILQ_HEAD(queue_list_head, queue_list_entry) head;
58 	/**< List of queues associated with this lcore */
59 	size_t n_queues;
60 	/**< How many queues are in the list? */
61 	volatile enum pmd_mgmt_state pwr_mgmt_state;
62 	/**< State of power management for this queue */
63 	enum rte_power_pmd_mgmt_type cb_mode;
64 	/**< Callback mode for this queue */
65 	uint64_t n_queues_ready_to_sleep;
66 	/**< Number of queues ready to enter power optimized state */
67 	uint64_t sleep_target;
68 	/**< Prevent a queue from triggering sleep multiple times */
69 } __rte_cache_aligned;
70 static struct pmd_core_cfg lcore_cfgs[RTE_MAX_LCORE];
71 
72 static inline bool
73 queue_equal(const union queue *l, const union queue *r)
74 {
75 	return l->val == r->val;
76 }
77 
78 static inline void
79 queue_copy(union queue *dst, const union queue *src)
80 {
81 	dst->val = src->val;
82 }
83 
84 static struct queue_list_entry *
85 queue_list_find(const struct pmd_core_cfg *cfg, const union queue *q)
86 {
87 	struct queue_list_entry *cur;
88 
89 	TAILQ_FOREACH(cur, &cfg->head, next) {
90 		if (queue_equal(&cur->queue, q))
91 			return cur;
92 	}
93 	return NULL;
94 }
95 
96 static int
97 queue_list_add(struct pmd_core_cfg *cfg, const union queue *q)
98 {
99 	struct queue_list_entry *qle;
100 
101 	/* is it already in the list? */
102 	if (queue_list_find(cfg, q) != NULL)
103 		return -EEXIST;
104 
105 	qle = malloc(sizeof(*qle));
106 	if (qle == NULL)
107 		return -ENOMEM;
108 	memset(qle, 0, sizeof(*qle));
109 
110 	queue_copy(&qle->queue, q);
111 	TAILQ_INSERT_TAIL(&cfg->head, qle, next);
112 	cfg->n_queues++;
113 
114 	return 0;
115 }
116 
117 static struct queue_list_entry *
118 queue_list_take(struct pmd_core_cfg *cfg, const union queue *q)
119 {
120 	struct queue_list_entry *found;
121 
122 	found = queue_list_find(cfg, q);
123 	if (found == NULL)
124 		return NULL;
125 
126 	TAILQ_REMOVE(&cfg->head, found, next);
127 	cfg->n_queues--;
128 
129 	/* freeing is responsibility of the caller */
130 	return found;
131 }
132 
133 static inline int
134 get_monitor_addresses(struct pmd_core_cfg *cfg,
135 		struct rte_power_monitor_cond *pmc, size_t len)
136 {
137 	const struct queue_list_entry *qle;
138 	size_t i = 0;
139 	int ret;
140 
141 	TAILQ_FOREACH(qle, &cfg->head, next) {
142 		const union queue *q = &qle->queue;
143 		struct rte_power_monitor_cond *cur;
144 
145 		/* attempted out of bounds access */
146 		if (i >= len) {
147 			RTE_LOG(ERR, POWER, "Too many queues being monitored\n");
148 			return -1;
149 		}
150 
151 		cur = &pmc[i++];
152 		ret = rte_eth_get_monitor_addr(q->portid, q->qid, cur);
153 		if (ret < 0)
154 			return ret;
155 	}
156 	return 0;
157 }
158 
159 static void
160 calc_tsc(void)
161 {
162 	const uint64_t hz = rte_get_timer_hz();
163 	const uint64_t tsc_per_us = hz / US_PER_S; /* 1us */
164 
165 	global_data.tsc_per_us = tsc_per_us;
166 
167 	/* only do this if we don't have tpause */
168 	if (!global_data.intrinsics_support.power_pause) {
169 		const uint64_t start = rte_rdtsc_precise();
170 		const uint32_t n_pauses = 10000;
171 		double us, us_per_pause;
172 		uint64_t end;
173 		unsigned int i;
174 
175 		/* estimate number of rte_pause() calls per us*/
176 		for (i = 0; i < n_pauses; i++)
177 			rte_pause();
178 
179 		end = rte_rdtsc_precise();
180 		us = (end - start) / (double)tsc_per_us;
181 		us_per_pause = us / n_pauses;
182 
183 		global_data.pause_per_us = (uint64_t)(1.0 / us_per_pause);
184 	}
185 }
186 
187 static inline void
188 queue_reset(struct pmd_core_cfg *cfg, struct queue_list_entry *qcfg)
189 {
190 	const bool is_ready_to_sleep = qcfg->n_sleeps == cfg->sleep_target;
191 
192 	/* reset empty poll counter for this queue */
193 	qcfg->n_empty_polls = 0;
194 	/* reset the queue sleep counter as well */
195 	qcfg->n_sleeps = 0;
196 	/* remove the queue from list of queues ready to sleep */
197 	if (is_ready_to_sleep)
198 		cfg->n_queues_ready_to_sleep--;
199 	/*
200 	 * no need change the lcore sleep target counter because this lcore will
201 	 * reach the n_sleeps anyway, and the other cores are already counted so
202 	 * there's no need to do anything else.
203 	 */
204 }
205 
206 static inline bool
207 queue_can_sleep(struct pmd_core_cfg *cfg, struct queue_list_entry *qcfg)
208 {
209 	/* this function is called - that means we have an empty poll */
210 	qcfg->n_empty_polls++;
211 
212 	/* if we haven't reached threshold for empty polls, we can't sleep */
213 	if (qcfg->n_empty_polls <= emptypoll_max)
214 		return false;
215 
216 	/*
217 	 * we've reached a point where we are able to sleep, but we still need
218 	 * to check if this queue has already been marked for sleeping.
219 	 */
220 	if (qcfg->n_sleeps == cfg->sleep_target)
221 		return true;
222 
223 	/* mark this queue as ready for sleep */
224 	qcfg->n_sleeps = cfg->sleep_target;
225 	cfg->n_queues_ready_to_sleep++;
226 
227 	return true;
228 }
229 
230 static inline bool
231 lcore_can_sleep(struct pmd_core_cfg *cfg)
232 {
233 	/* are all queues ready to sleep? */
234 	if (cfg->n_queues_ready_to_sleep != cfg->n_queues)
235 		return false;
236 
237 	/* we've reached an iteration where we can sleep, reset sleep counter */
238 	cfg->n_queues_ready_to_sleep = 0;
239 	cfg->sleep_target++;
240 	/*
241 	 * we do not reset any individual queue empty poll counters, because
242 	 * we want to keep sleeping on every poll until we actually get traffic.
243 	 */
244 
245 	return true;
246 }
247 
248 static uint16_t
249 clb_multiwait(uint16_t port_id __rte_unused, uint16_t qidx __rte_unused,
250 		struct rte_mbuf **pkts __rte_unused, uint16_t nb_rx,
251 		uint16_t max_pkts __rte_unused, void *arg)
252 {
253 	const unsigned int lcore = rte_lcore_id();
254 	struct queue_list_entry *queue_conf = arg;
255 	struct pmd_core_cfg *lcore_conf;
256 	const bool empty = nb_rx == 0;
257 
258 	lcore_conf = &lcore_cfgs[lcore];
259 
260 	/* early exit */
261 	if (likely(!empty))
262 		/* early exit */
263 		queue_reset(lcore_conf, queue_conf);
264 	else {
265 		struct rte_power_monitor_cond pmc[lcore_conf->n_queues];
266 		int ret;
267 
268 		/* can this queue sleep? */
269 		if (!queue_can_sleep(lcore_conf, queue_conf))
270 			return nb_rx;
271 
272 		/* can this lcore sleep? */
273 		if (!lcore_can_sleep(lcore_conf))
274 			return nb_rx;
275 
276 		/* gather all monitoring conditions */
277 		ret = get_monitor_addresses(lcore_conf, pmc,
278 				lcore_conf->n_queues);
279 		if (ret < 0)
280 			return nb_rx;
281 
282 		rte_power_monitor_multi(pmc, lcore_conf->n_queues, UINT64_MAX);
283 	}
284 
285 	return nb_rx;
286 }
287 
288 static uint16_t
289 clb_umwait(uint16_t port_id, uint16_t qidx, struct rte_mbuf **pkts __rte_unused,
290 		uint16_t nb_rx, uint16_t max_pkts __rte_unused, void *arg)
291 {
292 	struct queue_list_entry *queue_conf = arg;
293 
294 	/* this callback can't do more than one queue, omit multiqueue logic */
295 	if (unlikely(nb_rx == 0)) {
296 		queue_conf->n_empty_polls++;
297 		if (unlikely(queue_conf->n_empty_polls > emptypoll_max)) {
298 			struct rte_power_monitor_cond pmc;
299 			int ret;
300 
301 			/* use monitoring condition to sleep */
302 			ret = rte_eth_get_monitor_addr(port_id, qidx,
303 					&pmc);
304 			if (ret == 0)
305 				rte_power_monitor(&pmc, UINT64_MAX);
306 		}
307 	} else
308 		queue_conf->n_empty_polls = 0;
309 
310 	return nb_rx;
311 }
312 
313 static uint16_t
314 clb_pause(uint16_t port_id __rte_unused, uint16_t qidx __rte_unused,
315 		struct rte_mbuf **pkts __rte_unused, uint16_t nb_rx,
316 		uint16_t max_pkts __rte_unused, void *arg)
317 {
318 	const unsigned int lcore = rte_lcore_id();
319 	struct queue_list_entry *queue_conf = arg;
320 	struct pmd_core_cfg *lcore_conf;
321 	const bool empty = nb_rx == 0;
322 	uint32_t pause_duration = rte_power_pmd_mgmt_get_pause_duration();
323 
324 	lcore_conf = &lcore_cfgs[lcore];
325 
326 	if (likely(!empty))
327 		/* early exit */
328 		queue_reset(lcore_conf, queue_conf);
329 	else {
330 		/* can this queue sleep? */
331 		if (!queue_can_sleep(lcore_conf, queue_conf))
332 			return nb_rx;
333 
334 		/* can this lcore sleep? */
335 		if (!lcore_can_sleep(lcore_conf))
336 			return nb_rx;
337 
338 		/* sleep for 1 microsecond, use tpause if we have it */
339 		if (global_data.intrinsics_support.power_pause) {
340 			const uint64_t cur = rte_rdtsc();
341 			const uint64_t wait_tsc =
342 					cur + global_data.tsc_per_us * pause_duration;
343 			rte_power_pause(wait_tsc);
344 		} else {
345 			uint64_t i;
346 			for (i = 0; i < global_data.pause_per_us * pause_duration; i++)
347 				rte_pause();
348 		}
349 	}
350 
351 	return nb_rx;
352 }
353 
354 static uint16_t
355 clb_scale_freq(uint16_t port_id __rte_unused, uint16_t qidx __rte_unused,
356 		struct rte_mbuf **pkts __rte_unused, uint16_t nb_rx,
357 		uint16_t max_pkts __rte_unused, void *arg)
358 {
359 	const unsigned int lcore = rte_lcore_id();
360 	const bool empty = nb_rx == 0;
361 	struct pmd_core_cfg *lcore_conf = &lcore_cfgs[lcore];
362 	struct queue_list_entry *queue_conf = arg;
363 
364 	if (likely(!empty)) {
365 		/* early exit */
366 		queue_reset(lcore_conf, queue_conf);
367 
368 		/* scale up freq immediately */
369 		rte_power_freq_max(rte_lcore_id());
370 	} else {
371 		/* can this queue sleep? */
372 		if (!queue_can_sleep(lcore_conf, queue_conf))
373 			return nb_rx;
374 
375 		/* can this lcore sleep? */
376 		if (!lcore_can_sleep(lcore_conf))
377 			return nb_rx;
378 
379 		rte_power_freq_min(rte_lcore_id());
380 	}
381 
382 	return nb_rx;
383 }
384 
385 static int
386 queue_stopped(const uint16_t port_id, const uint16_t queue_id)
387 {
388 	struct rte_eth_rxq_info qinfo;
389 
390 	int ret = rte_eth_rx_queue_info_get(port_id, queue_id, &qinfo);
391 	if (ret < 0) {
392 		if (ret == -ENOTSUP)
393 			return 1;
394 		else
395 			return -1;
396 	}
397 
398 	return qinfo.queue_state == RTE_ETH_QUEUE_STATE_STOPPED;
399 }
400 
401 static int
402 cfg_queues_stopped(struct pmd_core_cfg *queue_cfg)
403 {
404 	const struct queue_list_entry *entry;
405 
406 	TAILQ_FOREACH(entry, &queue_cfg->head, next) {
407 		const union queue *q = &entry->queue;
408 		int ret = queue_stopped(q->portid, q->qid);
409 		if (ret != 1)
410 			return ret;
411 	}
412 	return 1;
413 }
414 
415 static int
416 check_scale(unsigned int lcore)
417 {
418 	enum power_management_env env;
419 
420 	/* only PSTATE and ACPI modes are supported */
421 	if (!rte_power_check_env_supported(PM_ENV_ACPI_CPUFREQ) &&
422 	    !rte_power_check_env_supported(PM_ENV_PSTATE_CPUFREQ)) {
423 		RTE_LOG(DEBUG, POWER, "Neither ACPI nor PSTATE modes are supported\n");
424 		return -ENOTSUP;
425 	}
426 	/* ensure we could initialize the power library */
427 	if (rte_power_init(lcore))
428 		return -EINVAL;
429 
430 	/* ensure we initialized the correct env */
431 	env = rte_power_get_env();
432 	if (env != PM_ENV_ACPI_CPUFREQ && env != PM_ENV_PSTATE_CPUFREQ) {
433 		RTE_LOG(DEBUG, POWER, "Neither ACPI nor PSTATE modes were initialized\n");
434 		return -ENOTSUP;
435 	}
436 
437 	/* we're done */
438 	return 0;
439 }
440 
441 static int
442 check_monitor(struct pmd_core_cfg *cfg, const union queue *qdata)
443 {
444 	struct rte_power_monitor_cond dummy;
445 	bool multimonitor_supported;
446 
447 	/* check if rte_power_monitor is supported */
448 	if (!global_data.intrinsics_support.power_monitor) {
449 		RTE_LOG(DEBUG, POWER, "Monitoring intrinsics are not supported\n");
450 		return -ENOTSUP;
451 	}
452 	/* check if multi-monitor is supported */
453 	multimonitor_supported =
454 			global_data.intrinsics_support.power_monitor_multi;
455 
456 	/* if we're adding a new queue, do we support multiple queues? */
457 	if (cfg->n_queues > 0 && !multimonitor_supported) {
458 		RTE_LOG(DEBUG, POWER, "Monitoring multiple queues is not supported\n");
459 		return -ENOTSUP;
460 	}
461 
462 	/* check if the device supports the necessary PMD API */
463 	if (rte_eth_get_monitor_addr(qdata->portid, qdata->qid,
464 			&dummy) == -ENOTSUP) {
465 		RTE_LOG(DEBUG, POWER, "The device does not support rte_eth_get_monitor_addr\n");
466 		return -ENOTSUP;
467 	}
468 
469 	/* we're done */
470 	return 0;
471 }
472 
473 static inline rte_rx_callback_fn
474 get_monitor_callback(void)
475 {
476 	return global_data.intrinsics_support.power_monitor_multi ?
477 		clb_multiwait : clb_umwait;
478 }
479 
480 int
481 rte_power_ethdev_pmgmt_queue_enable(unsigned int lcore_id, uint16_t port_id,
482 		uint16_t queue_id, enum rte_power_pmd_mgmt_type mode)
483 {
484 	const union queue qdata = {.portid = port_id, .qid = queue_id};
485 	struct pmd_core_cfg *lcore_cfg;
486 	struct queue_list_entry *queue_cfg;
487 	struct rte_eth_dev_info info;
488 	rte_rx_callback_fn clb;
489 	int ret;
490 
491 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
492 
493 	if (queue_id >= RTE_MAX_QUEUES_PER_PORT || lcore_id >= RTE_MAX_LCORE) {
494 		ret = -EINVAL;
495 		goto end;
496 	}
497 
498 	if (rte_eth_dev_info_get(port_id, &info) < 0) {
499 		ret = -EINVAL;
500 		goto end;
501 	}
502 
503 	/* check if queue id is valid */
504 	if (queue_id >= info.nb_rx_queues) {
505 		ret = -EINVAL;
506 		goto end;
507 	}
508 
509 	/* check if the queue is stopped */
510 	ret = queue_stopped(port_id, queue_id);
511 	if (ret != 1) {
512 		/* error means invalid queue, 0 means queue wasn't stopped */
513 		ret = ret < 0 ? -EINVAL : -EBUSY;
514 		goto end;
515 	}
516 
517 	lcore_cfg = &lcore_cfgs[lcore_id];
518 
519 	/* check if other queues are stopped as well */
520 	ret = cfg_queues_stopped(lcore_cfg);
521 	if (ret != 1) {
522 		/* error means invalid queue, 0 means queue wasn't stopped */
523 		ret = ret < 0 ? -EINVAL : -EBUSY;
524 		goto end;
525 	}
526 
527 	/* if callback was already enabled, check current callback type */
528 	if (lcore_cfg->pwr_mgmt_state != PMD_MGMT_DISABLED &&
529 			lcore_cfg->cb_mode != mode) {
530 		ret = -EINVAL;
531 		goto end;
532 	}
533 
534 	/* we need this in various places */
535 	rte_cpu_get_intrinsics_support(&global_data.intrinsics_support);
536 
537 	switch (mode) {
538 	case RTE_POWER_MGMT_TYPE_MONITOR:
539 		/* check if we can add a new queue */
540 		ret = check_monitor(lcore_cfg, &qdata);
541 		if (ret < 0)
542 			goto end;
543 
544 		clb = get_monitor_callback();
545 		break;
546 	case RTE_POWER_MGMT_TYPE_SCALE:
547 		clb = clb_scale_freq;
548 
549 		/* we only have to check this when enabling first queue */
550 		if (lcore_cfg->pwr_mgmt_state != PMD_MGMT_DISABLED)
551 			break;
552 		/* check if we can add a new queue */
553 		ret = check_scale(lcore_id);
554 		if (ret < 0)
555 			goto end;
556 		break;
557 	case RTE_POWER_MGMT_TYPE_PAUSE:
558 		/* figure out various time-to-tsc conversions */
559 		if (global_data.tsc_per_us == 0)
560 			calc_tsc();
561 
562 		clb = clb_pause;
563 		break;
564 	default:
565 		RTE_LOG(DEBUG, POWER, "Invalid power management type\n");
566 		ret = -EINVAL;
567 		goto end;
568 	}
569 	/* add this queue to the list */
570 	ret = queue_list_add(lcore_cfg, &qdata);
571 	if (ret < 0) {
572 		RTE_LOG(DEBUG, POWER, "Failed to add queue to list: %s\n",
573 				strerror(-ret));
574 		goto end;
575 	}
576 	/* new queue is always added last */
577 	queue_cfg = TAILQ_LAST(&lcore_cfg->head, queue_list_head);
578 
579 	/* when enabling first queue, ensure sleep target is not 0 */
580 	if (lcore_cfg->n_queues == 1 && lcore_cfg->sleep_target == 0)
581 		lcore_cfg->sleep_target = 1;
582 
583 	/* initialize data before enabling the callback */
584 	if (lcore_cfg->n_queues == 1) {
585 		lcore_cfg->cb_mode = mode;
586 		lcore_cfg->pwr_mgmt_state = PMD_MGMT_ENABLED;
587 	}
588 	queue_cfg->cb = rte_eth_add_rx_callback(port_id, queue_id,
589 			clb, queue_cfg);
590 
591 	ret = 0;
592 end:
593 	return ret;
594 }
595 
596 int
597 rte_power_ethdev_pmgmt_queue_disable(unsigned int lcore_id,
598 		uint16_t port_id, uint16_t queue_id)
599 {
600 	const union queue qdata = {.portid = port_id, .qid = queue_id};
601 	struct pmd_core_cfg *lcore_cfg;
602 	struct queue_list_entry *queue_cfg;
603 	int ret;
604 
605 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
606 
607 	if (lcore_id >= RTE_MAX_LCORE || queue_id >= RTE_MAX_QUEUES_PER_PORT)
608 		return -EINVAL;
609 
610 	/* check if the queue is stopped */
611 	ret = queue_stopped(port_id, queue_id);
612 	if (ret != 1) {
613 		/* error means invalid queue, 0 means queue wasn't stopped */
614 		return ret < 0 ? -EINVAL : -EBUSY;
615 	}
616 
617 	/* no need to check queue id as wrong queue id would not be enabled */
618 	lcore_cfg = &lcore_cfgs[lcore_id];
619 
620 	/* check if other queues are stopped as well */
621 	ret = cfg_queues_stopped(lcore_cfg);
622 	if (ret != 1) {
623 		/* error means invalid queue, 0 means queue wasn't stopped */
624 		return ret < 0 ? -EINVAL : -EBUSY;
625 	}
626 
627 	if (lcore_cfg->pwr_mgmt_state != PMD_MGMT_ENABLED)
628 		return -EINVAL;
629 
630 	/*
631 	 * There is no good/easy way to do this without race conditions, so we
632 	 * are just going to throw our hands in the air and hope that the user
633 	 * has read the documentation and has ensured that ports are stopped at
634 	 * the time we enter the API functions.
635 	 */
636 	queue_cfg = queue_list_take(lcore_cfg, &qdata);
637 	if (queue_cfg == NULL)
638 		return -ENOENT;
639 
640 	/* if we've removed all queues from the lists, set state to disabled */
641 	if (lcore_cfg->n_queues == 0)
642 		lcore_cfg->pwr_mgmt_state = PMD_MGMT_DISABLED;
643 
644 	switch (lcore_cfg->cb_mode) {
645 	case RTE_POWER_MGMT_TYPE_MONITOR: /* fall-through */
646 	case RTE_POWER_MGMT_TYPE_PAUSE:
647 		rte_eth_remove_rx_callback(port_id, queue_id, queue_cfg->cb);
648 		break;
649 	case RTE_POWER_MGMT_TYPE_SCALE:
650 		rte_eth_remove_rx_callback(port_id, queue_id, queue_cfg->cb);
651 		/* disable power library on this lcore if this was last queue */
652 		if (lcore_cfg->pwr_mgmt_state == PMD_MGMT_DISABLED) {
653 			rte_power_freq_max(lcore_id);
654 			rte_power_exit(lcore_id);
655 		}
656 		break;
657 	}
658 	/*
659 	 * the API doc mandates that the user stops all processing on affected
660 	 * ports before calling any of these API's, so we can assume that the
661 	 * callbacks can be freed. we're intentionally casting away const-ness.
662 	 */
663 	rte_free((void *)queue_cfg->cb);
664 	free(queue_cfg);
665 
666 	return 0;
667 }
668 
669 void
670 rte_power_pmd_mgmt_set_emptypoll_max(unsigned int max)
671 {
672 	emptypoll_max = max;
673 }
674 
675 unsigned int
676 rte_power_pmd_mgmt_get_emptypoll_max(void)
677 {
678 	return emptypoll_max;
679 }
680 
681 int
682 rte_power_pmd_mgmt_set_pause_duration(unsigned int duration)
683 {
684 	if (duration == 0) {
685 		RTE_LOG(ERR, POWER, "Pause duration must be greater than 0, value unchanged");
686 		return -EINVAL;
687 	}
688 	pause_duration = duration;
689 
690 	return 0;
691 }
692 
693 unsigned int
694 rte_power_pmd_mgmt_get_pause_duration(void)
695 {
696 	return pause_duration;
697 }
698 
699 int
700 rte_power_pmd_mgmt_set_scaling_freq_min(unsigned int lcore, unsigned int min)
701 {
702 	if (lcore >= RTE_MAX_LCORE) {
703 		RTE_LOG(ERR, POWER, "Invalid lcore ID: %u\n", lcore);
704 		return -EINVAL;
705 	}
706 
707 	if (min > scale_freq_max[lcore]) {
708 		RTE_LOG(ERR, POWER, "Invalid min frequency: Cannot be greater than max frequency");
709 		return -EINVAL;
710 	}
711 	scale_freq_min[lcore] = min;
712 
713 	return 0;
714 }
715 
716 int
717 rte_power_pmd_mgmt_set_scaling_freq_max(unsigned int lcore, unsigned int max)
718 {
719 	if (lcore >= RTE_MAX_LCORE) {
720 		RTE_LOG(ERR, POWER, "Invalid lcore ID: %u\n", lcore);
721 		return -EINVAL;
722 	}
723 
724 	/* Zero means 'not set'. Use UINT32_MAX to enable RTE_MIN/MAX macro use when scaling. */
725 	if (max == 0)
726 		max = UINT32_MAX;
727 	if (max < scale_freq_min[lcore]) {
728 		RTE_LOG(ERR, POWER, "Invalid max frequency: Cannot be less than min frequency");
729 		return -EINVAL;
730 	}
731 
732 	scale_freq_max[lcore] = max;
733 
734 	return 0;
735 }
736 
737 int
738 rte_power_pmd_mgmt_get_scaling_freq_min(unsigned int lcore)
739 {
740 	if (lcore >= RTE_MAX_LCORE) {
741 		RTE_LOG(ERR, POWER, "Invalid lcore ID: %u\n", lcore);
742 		return -EINVAL;
743 	}
744 
745 	if (scale_freq_max[lcore] == 0)
746 		RTE_LOG(DEBUG, POWER, "Scaling freq min config not set. Using sysfs min freq.\n");
747 
748 	return scale_freq_min[lcore];
749 }
750 
751 int
752 rte_power_pmd_mgmt_get_scaling_freq_max(unsigned int lcore)
753 {
754 	if (lcore >= RTE_MAX_LCORE) {
755 		RTE_LOG(ERR, POWER, "Invalid lcore ID: %u\n", lcore);
756 		return -EINVAL;
757 	}
758 
759 	if (scale_freq_max[lcore] == UINT32_MAX) {
760 		RTE_LOG(DEBUG, POWER, "Scaling freq max config not set. Using sysfs max freq.\n");
761 		return 0;
762 	}
763 
764 	return scale_freq_max[lcore];
765 }
766 
767 RTE_INIT(rte_power_ethdev_pmgmt_init) {
768 	size_t i;
769 	int j;
770 
771 	/* initialize all tailqs */
772 	for (i = 0; i < RTE_DIM(lcore_cfgs); i++) {
773 		struct pmd_core_cfg *cfg = &lcore_cfgs[i];
774 		TAILQ_INIT(&cfg->head);
775 	}
776 
777 	/* initialize config defaults */
778 	emptypoll_max = 512;
779 	pause_duration = 1;
780 	/* scaling defaults out of range to ensure not used unless set by user or app */
781 	for (j = 0; j < RTE_MAX_LCORE; j++) {
782 		scale_freq_min[j] = 0;
783 		scale_freq_max[j] = UINT32_MAX;
784 	}
785 }
786