xref: /dpdk/examples/vm_power_manager/channel_monitor.c (revision ade02f0f3e7fbb5acd98b2fdf5fce0945ad9554a)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <unistd.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <stdint.h>
38 #include <signal.h>
39 #include <errno.h>
40 #include <string.h>
41 #include <sys/types.h>
42 #include <sys/epoll.h>
43 #include <sys/queue.h>
44 #include <sys/time.h>
45 
46 #include <rte_log.h>
47 #include <rte_memory.h>
48 #include <rte_malloc.h>
49 #include <rte_atomic.h>
50 #include <rte_cycles.h>
51 #include <rte_ethdev.h>
52 #include <rte_pmd_i40e.h>
53 
54 #include <libvirt/libvirt.h>
55 #include "channel_monitor.h"
56 #include "channel_commands.h"
57 #include "channel_manager.h"
58 #include "power_manager.h"
59 
60 #define RTE_LOGTYPE_CHANNEL_MONITOR RTE_LOGTYPE_USER1
61 
62 #define MAX_EVENTS 256
63 
64 uint64_t vsi_pkt_count_prev[384];
65 uint64_t rdtsc_prev[384];
66 
67 double time_period_s = 1;
68 static volatile unsigned run_loop = 1;
69 static int global_event_fd;
70 static unsigned int policy_is_set;
71 static struct epoll_event *global_events_list;
72 static struct policy policies[MAX_VMS];
73 
74 void channel_monitor_exit(void)
75 {
76 	run_loop = 0;
77 	rte_free(global_events_list);
78 }
79 
80 static void
81 core_share(int pNo, int z, int x, int t)
82 {
83 	if (policies[pNo].core_share[z].pcpu == lvm_info[x].pcpus[t]) {
84 		if (strcmp(policies[pNo].pkt.vm_name,
85 				lvm_info[x].vm_name) != 0) {
86 			policies[pNo].core_share[z].status = 1;
87 			power_manager_scale_core_max(
88 					policies[pNo].core_share[z].pcpu);
89 		}
90 	}
91 }
92 
93 static void
94 core_share_status(int pNo)
95 {
96 
97 	int noVms, noVcpus, z, x, t;
98 
99 	get_all_vm(&noVms, &noVcpus);
100 
101 	/* Reset Core Share Status. */
102 	for (z = 0; z < noVcpus; z++)
103 		policies[pNo].core_share[z].status = 0;
104 
105 	/* Foreach vcpu in a policy. */
106 	for (z = 0; z < policies[pNo].pkt.num_vcpu; z++) {
107 		/* Foreach VM on the platform. */
108 		for (x = 0; x < noVms; x++) {
109 			/* Foreach vcpu of VMs on platform. */
110 			for (t = 0; t < lvm_info[x].num_cpus; t++)
111 				core_share(pNo, z, x, t);
112 		}
113 	}
114 }
115 
116 static void
117 get_pcpu_to_control(struct policy *pol)
118 {
119 
120 	/* Convert vcpu to pcpu. */
121 	struct vm_info info;
122 	int pcpu, count;
123 	uint64_t mask_u64b;
124 
125 	RTE_LOG(INFO, CHANNEL_MONITOR, "Looking for pcpu for %s\n",
126 			pol->pkt.vm_name);
127 	get_info_vm(pol->pkt.vm_name, &info);
128 
129 	for (count = 0; count < pol->pkt.num_vcpu; count++) {
130 		mask_u64b = info.pcpu_mask[pol->pkt.vcpu_to_control[count]];
131 		for (pcpu = 0; mask_u64b; mask_u64b &= ~(1ULL << pcpu++)) {
132 			if ((mask_u64b >> pcpu) & 1)
133 				pol->core_share[count].pcpu = pcpu;
134 		}
135 	}
136 }
137 
138 static int
139 get_pfid(struct policy *pol)
140 {
141 
142 	int i, x, ret = 0, nb_ports;
143 
144 	nb_ports = rte_eth_dev_count();
145 	for (i = 0; i < pol->pkt.nb_mac_to_monitor; i++) {
146 
147 		for (x = 0; x < nb_ports; x++) {
148 			ret = rte_pmd_i40e_query_vfid_by_mac(x,
149 				(struct ether_addr *)&(pol->pkt.vfid[i]));
150 			if (ret != -EINVAL) {
151 				pol->port[i] = x;
152 				break;
153 			}
154 		}
155 		if (ret == -EINVAL || ret == -ENOTSUP || ret == ENODEV) {
156 			RTE_LOG(INFO, CHANNEL_MONITOR,
157 				"Error with Policy. MAC not found on "
158 				"attached ports ");
159 			pol->enabled = 0;
160 			return ret;
161 		}
162 		pol->pfid[i] = ret;
163 	}
164 	return 1;
165 }
166 
167 static int
168 update_policy(struct channel_packet *pkt)
169 {
170 
171 	unsigned int updated = 0;
172 
173 	for (int i = 0; i < MAX_VMS; i++) {
174 		if (strcmp(policies[i].pkt.vm_name, pkt->vm_name) == 0) {
175 			policies[i].pkt = *pkt;
176 			get_pcpu_to_control(&policies[i]);
177 			if (get_pfid(&policies[i]) == -1) {
178 				updated = 1;
179 				break;
180 			}
181 			core_share_status(i);
182 			policies[i].enabled = 1;
183 			updated = 1;
184 		}
185 	}
186 	if (!updated) {
187 		for (int i = 0; i < MAX_VMS; i++) {
188 			if (policies[i].enabled == 0) {
189 				policies[i].pkt = *pkt;
190 				get_pcpu_to_control(&policies[i]);
191 				if (get_pfid(&policies[i]) == -1)
192 					break;
193 				core_share_status(i);
194 				policies[i].enabled = 1;
195 				break;
196 			}
197 		}
198 	}
199 	return 0;
200 }
201 
202 static uint64_t
203 get_pkt_diff(struct policy *pol)
204 {
205 
206 	uint64_t vsi_pkt_count,
207 		vsi_pkt_total = 0,
208 		vsi_pkt_count_prev_total = 0;
209 	double rdtsc_curr, rdtsc_diff, diff;
210 	int x;
211 	struct rte_eth_stats vf_stats;
212 
213 	for (x = 0; x < pol->pkt.nb_mac_to_monitor; x++) {
214 
215 		/*Read vsi stats*/
216 		if (rte_pmd_i40e_get_vf_stats(x, pol->pfid[x], &vf_stats) == 0)
217 			vsi_pkt_count = vf_stats.ipackets;
218 		else
219 			vsi_pkt_count = -1;
220 
221 		vsi_pkt_total += vsi_pkt_count;
222 
223 		vsi_pkt_count_prev_total += vsi_pkt_count_prev[pol->pfid[x]];
224 		vsi_pkt_count_prev[pol->pfid[x]] = vsi_pkt_count;
225 	}
226 
227 	rdtsc_curr = rte_rdtsc_precise();
228 	rdtsc_diff = rdtsc_curr - rdtsc_prev[pol->pfid[x-1]];
229 	rdtsc_prev[pol->pfid[x-1]] = rdtsc_curr;
230 
231 	diff = (vsi_pkt_total - vsi_pkt_count_prev_total) *
232 			((double)rte_get_tsc_hz() / rdtsc_diff);
233 
234 	return diff;
235 }
236 
237 static void
238 apply_traffic_profile(struct policy *pol)
239 {
240 
241 	int count;
242 	uint64_t diff = 0;
243 
244 	diff = get_pkt_diff(pol);
245 
246 	RTE_LOG(INFO, CHANNEL_MONITOR, "Applying traffic profile\n");
247 
248 	if (diff >= (pol->pkt.traffic_policy.max_max_packet_thresh)) {
249 		for (count = 0; count < pol->pkt.num_vcpu; count++) {
250 			if (pol->core_share[count].status != 1)
251 				power_manager_scale_core_max(
252 						pol->core_share[count].pcpu);
253 		}
254 	} else if (diff >= (pol->pkt.traffic_policy.avg_max_packet_thresh)) {
255 		for (count = 0; count < pol->pkt.num_vcpu; count++) {
256 			if (pol->core_share[count].status != 1)
257 				power_manager_scale_core_med(
258 						pol->core_share[count].pcpu);
259 		}
260 	} else if (diff < (pol->pkt.traffic_policy.avg_max_packet_thresh)) {
261 		for (count = 0; count < pol->pkt.num_vcpu; count++) {
262 			if (pol->core_share[count].status != 1)
263 				power_manager_scale_core_min(
264 						pol->core_share[count].pcpu);
265 		}
266 	}
267 }
268 
269 static void
270 apply_time_profile(struct policy *pol)
271 {
272 
273 	int count, x;
274 	struct timeval tv;
275 	struct tm *ptm;
276 	char time_string[40];
277 
278 	/* Obtain the time of day, and convert it to a tm struct. */
279 	gettimeofday(&tv, NULL);
280 	ptm = localtime(&tv.tv_sec);
281 	/* Format the date and time, down to a single second. */
282 	strftime(time_string, sizeof(time_string), "%Y-%m-%d %H:%M:%S", ptm);
283 
284 	for (x = 0; x < HOURS; x++) {
285 
286 		if (ptm->tm_hour == pol->pkt.timer_policy.busy_hours[x]) {
287 			for (count = 0; count < pol->pkt.num_vcpu; count++) {
288 				if (pol->core_share[count].status != 1) {
289 					power_manager_scale_core_max(
290 						pol->core_share[count].pcpu);
291 				RTE_LOG(INFO, CHANNEL_MONITOR,
292 					"Scaling up core %d to max\n",
293 					pol->core_share[count].pcpu);
294 				}
295 			}
296 			break;
297 		} else if (ptm->tm_hour ==
298 				pol->pkt.timer_policy.quiet_hours[x]) {
299 			for (count = 0; count < pol->pkt.num_vcpu; count++) {
300 				if (pol->core_share[count].status != 1) {
301 					power_manager_scale_core_min(
302 						pol->core_share[count].pcpu);
303 				RTE_LOG(INFO, CHANNEL_MONITOR,
304 					"Scaling down core %d to min\n",
305 					pol->core_share[count].pcpu);
306 			}
307 		}
308 			break;
309 		} else if (ptm->tm_hour ==
310 			pol->pkt.timer_policy.hours_to_use_traffic_profile[x]) {
311 			apply_traffic_profile(pol);
312 			break;
313 		}
314 	}
315 }
316 
317 static void
318 apply_workload_profile(struct policy *pol)
319 {
320 
321 	int count;
322 
323 	if (pol->pkt.workload == HIGH) {
324 		for (count = 0; count < pol->pkt.num_vcpu; count++) {
325 			if (pol->core_share[count].status != 1)
326 				power_manager_scale_core_max(
327 						pol->core_share[count].pcpu);
328 		}
329 	} else if (pol->pkt.workload == MEDIUM) {
330 		for (count = 0; count < pol->pkt.num_vcpu; count++) {
331 			if (pol->core_share[count].status != 1)
332 				power_manager_scale_core_med(
333 						pol->core_share[count].pcpu);
334 		}
335 	} else if (pol->pkt.workload == LOW) {
336 		for (count = 0; count < pol->pkt.num_vcpu; count++) {
337 			if (pol->core_share[count].status != 1)
338 				power_manager_scale_core_min(
339 						pol->core_share[count].pcpu);
340 		}
341 	}
342 }
343 
344 static void
345 apply_policy(struct policy *pol)
346 {
347 
348 	struct channel_packet *pkt = &pol->pkt;
349 
350 	/*Check policy to use*/
351 	if (pkt->policy_to_use == TRAFFIC)
352 		apply_traffic_profile(pol);
353 	else if (pkt->policy_to_use == TIME)
354 		apply_time_profile(pol);
355 	else if (pkt->policy_to_use == WORKLOAD)
356 		apply_workload_profile(pol);
357 }
358 
359 
360 static int
361 process_request(struct channel_packet *pkt, struct channel_info *chan_info)
362 {
363 	uint64_t core_mask;
364 
365 	if (chan_info == NULL)
366 		return -1;
367 
368 	if (rte_atomic32_cmpset(&(chan_info->status), CHANNEL_MGR_CHANNEL_CONNECTED,
369 			CHANNEL_MGR_CHANNEL_PROCESSING) == 0)
370 		return -1;
371 
372 	if (pkt->command == CPU_POWER) {
373 		core_mask = get_pcpus_mask(chan_info, pkt->resource_id);
374 		if (core_mask == 0) {
375 			RTE_LOG(ERR, CHANNEL_MONITOR, "Error get physical CPU mask for "
376 				"channel '%s' using vCPU(%u)\n", chan_info->channel_path,
377 				(unsigned)pkt->unit);
378 			return -1;
379 		}
380 		if (__builtin_popcountll(core_mask) == 1) {
381 
382 			unsigned core_num = __builtin_ffsll(core_mask) - 1;
383 
384 			switch (pkt->unit) {
385 			case(CPU_POWER_SCALE_MIN):
386 					power_manager_scale_core_min(core_num);
387 			break;
388 			case(CPU_POWER_SCALE_MAX):
389 					power_manager_scale_core_max(core_num);
390 			break;
391 			case(CPU_POWER_SCALE_DOWN):
392 					power_manager_scale_core_down(core_num);
393 			break;
394 			case(CPU_POWER_SCALE_UP):
395 					power_manager_scale_core_up(core_num);
396 			break;
397 			case(CPU_POWER_ENABLE_TURBO):
398 				power_manager_enable_turbo_core(core_num);
399 			break;
400 			case(CPU_POWER_DISABLE_TURBO):
401 				power_manager_disable_turbo_core(core_num);
402 			break;
403 			default:
404 				break;
405 			}
406 		} else {
407 			switch (pkt->unit) {
408 			case(CPU_POWER_SCALE_MIN):
409 					power_manager_scale_mask_min(core_mask);
410 			break;
411 			case(CPU_POWER_SCALE_MAX):
412 					power_manager_scale_mask_max(core_mask);
413 			break;
414 			case(CPU_POWER_SCALE_DOWN):
415 					power_manager_scale_mask_down(core_mask);
416 			break;
417 			case(CPU_POWER_SCALE_UP):
418 					power_manager_scale_mask_up(core_mask);
419 			break;
420 			case(CPU_POWER_ENABLE_TURBO):
421 				power_manager_enable_turbo_mask(core_mask);
422 			break;
423 			case(CPU_POWER_DISABLE_TURBO):
424 				power_manager_disable_turbo_mask(core_mask);
425 			break;
426 			default:
427 				break;
428 			}
429 
430 		}
431 	}
432 
433 	if (pkt->command == PKT_POLICY) {
434 		RTE_LOG(INFO, CHANNEL_MONITOR, "\nProcessing Policy request from Guest\n");
435 		update_policy(pkt);
436 		policy_is_set = 1;
437 	}
438 
439 	/* Return is not checked as channel status may have been set to DISABLED
440 	 * from management thread
441 	 */
442 	rte_atomic32_cmpset(&(chan_info->status), CHANNEL_MGR_CHANNEL_PROCESSING,
443 			CHANNEL_MGR_CHANNEL_CONNECTED);
444 	return 0;
445 
446 }
447 
448 int
449 add_channel_to_monitor(struct channel_info **chan_info)
450 {
451 	struct channel_info *info = *chan_info;
452 	struct epoll_event event;
453 
454 	event.events = EPOLLIN;
455 	event.data.ptr = info;
456 	if (epoll_ctl(global_event_fd, EPOLL_CTL_ADD, info->fd, &event) < 0) {
457 		RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to add channel '%s' "
458 				"to epoll\n", info->channel_path);
459 		return -1;
460 	}
461 	return 0;
462 }
463 
464 int
465 remove_channel_from_monitor(struct channel_info *chan_info)
466 {
467 	if (epoll_ctl(global_event_fd, EPOLL_CTL_DEL, chan_info->fd, NULL) < 0) {
468 		RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to remove channel '%s' "
469 				"from epoll\n", chan_info->channel_path);
470 		return -1;
471 	}
472 	return 0;
473 }
474 
475 int
476 channel_monitor_init(void)
477 {
478 	global_event_fd = epoll_create1(0);
479 	if (global_event_fd == 0) {
480 		RTE_LOG(ERR, CHANNEL_MONITOR, "Error creating epoll context with "
481 				"error %s\n", strerror(errno));
482 		return -1;
483 	}
484 	global_events_list = rte_malloc("epoll_events", sizeof(*global_events_list)
485 			* MAX_EVENTS, RTE_CACHE_LINE_SIZE);
486 	if (global_events_list == NULL) {
487 		RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to rte_malloc for "
488 				"epoll events\n");
489 		return -1;
490 	}
491 	return 0;
492 }
493 
494 void
495 run_channel_monitor(void)
496 {
497 	while (run_loop) {
498 		int n_events, i;
499 
500 		n_events = epoll_wait(global_event_fd, global_events_list,
501 				MAX_EVENTS, 1);
502 		if (!run_loop)
503 			break;
504 		for (i = 0; i < n_events; i++) {
505 			struct channel_info *chan_info = (struct channel_info *)
506 					global_events_list[i].data.ptr;
507 			if ((global_events_list[i].events & EPOLLERR) ||
508 				(global_events_list[i].events & EPOLLHUP)) {
509 				RTE_LOG(DEBUG, CHANNEL_MONITOR, "Remote closed connection for "
510 						"channel '%s'\n",
511 						chan_info->channel_path);
512 				remove_channel(&chan_info);
513 				continue;
514 			}
515 			if (global_events_list[i].events & EPOLLIN) {
516 
517 				int n_bytes, err = 0;
518 				struct channel_packet pkt;
519 				void *buffer = &pkt;
520 				int buffer_len = sizeof(pkt);
521 
522 				while (buffer_len > 0) {
523 					n_bytes = read(chan_info->fd,
524 							buffer, buffer_len);
525 					if (n_bytes == buffer_len)
526 						break;
527 					if (n_bytes == -1) {
528 						err = errno;
529 						RTE_LOG(DEBUG, CHANNEL_MONITOR,
530 							"Received error on "
531 							"channel '%s' read: %s\n",
532 							chan_info->channel_path,
533 							strerror(err));
534 						remove_channel(&chan_info);
535 						break;
536 					}
537 					buffer = (char *)buffer + n_bytes;
538 					buffer_len -= n_bytes;
539 				}
540 				if (!err)
541 					process_request(&pkt, chan_info);
542 			}
543 		}
544 		rte_delay_us(time_period_s*1000000);
545 		if (policy_is_set) {
546 			for (int j = 0; j < MAX_VMS; j++) {
547 				if (policies[j].enabled == 1)
548 					apply_policy(&policies[j]);
549 			}
550 		}
551 	}
552 }
553