xref: /dpdk/examples/vm_power_manager/channel_monitor.c (revision cdea34452b09f5ce3c2f3ada6182afa0071fce47)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <unistd.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <stdint.h>
38 #include <signal.h>
39 #include <errno.h>
40 #include <string.h>
41 #include <sys/types.h>
42 #include <sys/epoll.h>
43 #include <sys/queue.h>
44 #include <sys/time.h>
45 
46 #include <rte_log.h>
47 #include <rte_memory.h>
48 #include <rte_malloc.h>
49 #include <rte_atomic.h>
50 #include <rte_cycles.h>
51 #include <rte_ethdev.h>
52 #include <rte_pmd_i40e.h>
53 
54 #include <libvirt/libvirt.h>
55 #include "channel_monitor.h"
56 #include "channel_commands.h"
57 #include "channel_manager.h"
58 #include "power_manager.h"
59 
60 #define RTE_LOGTYPE_CHANNEL_MONITOR RTE_LOGTYPE_USER1
61 
62 #define MAX_EVENTS 256
63 
64 uint64_t vsi_pkt_count_prev[384];
65 uint64_t rdtsc_prev[384];
66 
67 double time_period_ms = 1;
68 static volatile unsigned run_loop = 1;
69 static int global_event_fd;
70 static unsigned int policy_is_set;
71 static struct epoll_event *global_events_list;
72 static struct policy policies[MAX_VMS];
73 
74 void channel_monitor_exit(void)
75 {
76 	run_loop = 0;
77 	rte_free(global_events_list);
78 }
79 
80 static void
81 core_share(int pNo, int z, int x, int t)
82 {
83 	if (policies[pNo].core_share[z].pcpu == lvm_info[x].pcpus[t]) {
84 		if (strcmp(policies[pNo].pkt.vm_name,
85 				lvm_info[x].vm_name) != 0) {
86 			policies[pNo].core_share[z].status = 1;
87 			power_manager_scale_core_max(
88 					policies[pNo].core_share[z].pcpu);
89 		}
90 	}
91 }
92 
93 static void
94 core_share_status(int pNo)
95 {
96 
97 	int noVms, noVcpus, z, x, t;
98 
99 	get_all_vm(&noVms, &noVcpus);
100 
101 	/* Reset Core Share Status. */
102 	for (z = 0; z < noVcpus; z++)
103 		policies[pNo].core_share[z].status = 0;
104 
105 	/* Foreach vcpu in a policy. */
106 	for (z = 0; z < policies[pNo].pkt.num_vcpu; z++) {
107 		/* Foreach VM on the platform. */
108 		for (x = 0; x < noVms; x++) {
109 			/* Foreach vcpu of VMs on platform. */
110 			for (t = 0; t < lvm_info[x].num_cpus; t++)
111 				core_share(pNo, z, x, t);
112 		}
113 	}
114 }
115 
116 static void
117 get_pcpu_to_control(struct policy *pol)
118 {
119 
120 	/* Convert vcpu to pcpu. */
121 	struct vm_info info;
122 	int pcpu, count;
123 	uint64_t mask_u64b;
124 
125 	RTE_LOG(INFO, CHANNEL_MONITOR, "Looking for pcpu for %s\n",
126 			pol->pkt.vm_name);
127 	get_info_vm(pol->pkt.vm_name, &info);
128 
129 	for (count = 0; count < pol->pkt.num_vcpu; count++) {
130 		mask_u64b = info.pcpu_mask[pol->pkt.vcpu_to_control[count]];
131 		for (pcpu = 0; mask_u64b; mask_u64b &= ~(1ULL << pcpu++)) {
132 			if ((mask_u64b >> pcpu) & 1)
133 				pol->core_share[count].pcpu = pcpu;
134 		}
135 	}
136 }
137 
138 static int
139 get_pfid(struct policy *pol)
140 {
141 
142 	int i, x, ret = 0, nb_ports;
143 
144 	nb_ports = rte_eth_dev_count();
145 	for (i = 0; i < pol->pkt.nb_mac_to_monitor; i++) {
146 
147 		for (x = 0; x < nb_ports; x++) {
148 			ret = rte_pmd_i40e_query_vfid_by_mac(x,
149 				(struct ether_addr *)&(pol->pkt.vfid[i]));
150 			if (ret != -EINVAL) {
151 				pol->port[i] = x;
152 				break;
153 			}
154 		}
155 		if (ret == -EINVAL || ret == -ENOTSUP || ret == ENODEV) {
156 			RTE_LOG(INFO, CHANNEL_MONITOR,
157 				"Error with Policy. MAC not found on "
158 				"attached ports ");
159 			pol->enabled = 0;
160 			return ret;
161 		}
162 		pol->pfid[i] = ret;
163 	}
164 	return 1;
165 }
166 
167 static int
168 update_policy(struct channel_packet *pkt)
169 {
170 
171 	unsigned int updated = 0;
172 	int i;
173 
174 	for (i = 0; i < MAX_VMS; i++) {
175 		if (strcmp(policies[i].pkt.vm_name, pkt->vm_name) == 0) {
176 			policies[i].pkt = *pkt;
177 			get_pcpu_to_control(&policies[i]);
178 			if (get_pfid(&policies[i]) == -1) {
179 				updated = 1;
180 				break;
181 			}
182 			core_share_status(i);
183 			policies[i].enabled = 1;
184 			updated = 1;
185 		}
186 	}
187 	if (!updated) {
188 		for (i = 0; i < MAX_VMS; i++) {
189 			if (policies[i].enabled == 0) {
190 				policies[i].pkt = *pkt;
191 				get_pcpu_to_control(&policies[i]);
192 				if (get_pfid(&policies[i]) == -1)
193 					break;
194 				core_share_status(i);
195 				policies[i].enabled = 1;
196 				break;
197 			}
198 		}
199 	}
200 	return 0;
201 }
202 
203 static uint64_t
204 get_pkt_diff(struct policy *pol)
205 {
206 
207 	uint64_t vsi_pkt_count,
208 		vsi_pkt_total = 0,
209 		vsi_pkt_count_prev_total = 0;
210 	double rdtsc_curr, rdtsc_diff, diff;
211 	int x;
212 	struct rte_eth_stats vf_stats;
213 
214 	for (x = 0; x < pol->pkt.nb_mac_to_monitor; x++) {
215 
216 		/*Read vsi stats*/
217 		if (rte_pmd_i40e_get_vf_stats(x, pol->pfid[x], &vf_stats) == 0)
218 			vsi_pkt_count = vf_stats.ipackets;
219 		else
220 			vsi_pkt_count = -1;
221 
222 		vsi_pkt_total += vsi_pkt_count;
223 
224 		vsi_pkt_count_prev_total += vsi_pkt_count_prev[pol->pfid[x]];
225 		vsi_pkt_count_prev[pol->pfid[x]] = vsi_pkt_count;
226 	}
227 
228 	rdtsc_curr = rte_rdtsc_precise();
229 	rdtsc_diff = rdtsc_curr - rdtsc_prev[pol->pfid[x-1]];
230 	rdtsc_prev[pol->pfid[x-1]] = rdtsc_curr;
231 
232 	diff = (vsi_pkt_total - vsi_pkt_count_prev_total) *
233 			((double)rte_get_tsc_hz() / rdtsc_diff);
234 
235 	return diff;
236 }
237 
238 static void
239 apply_traffic_profile(struct policy *pol)
240 {
241 
242 	int count;
243 	uint64_t diff = 0;
244 
245 	diff = get_pkt_diff(pol);
246 
247 	RTE_LOG(INFO, CHANNEL_MONITOR, "Applying traffic profile\n");
248 
249 	if (diff >= (pol->pkt.traffic_policy.max_max_packet_thresh)) {
250 		for (count = 0; count < pol->pkt.num_vcpu; count++) {
251 			if (pol->core_share[count].status != 1)
252 				power_manager_scale_core_max(
253 						pol->core_share[count].pcpu);
254 		}
255 	} else if (diff >= (pol->pkt.traffic_policy.avg_max_packet_thresh)) {
256 		for (count = 0; count < pol->pkt.num_vcpu; count++) {
257 			if (pol->core_share[count].status != 1)
258 				power_manager_scale_core_med(
259 						pol->core_share[count].pcpu);
260 		}
261 	} else if (diff < (pol->pkt.traffic_policy.avg_max_packet_thresh)) {
262 		for (count = 0; count < pol->pkt.num_vcpu; count++) {
263 			if (pol->core_share[count].status != 1)
264 				power_manager_scale_core_min(
265 						pol->core_share[count].pcpu);
266 		}
267 	}
268 }
269 
270 static void
271 apply_time_profile(struct policy *pol)
272 {
273 
274 	int count, x;
275 	struct timeval tv;
276 	struct tm *ptm;
277 	char time_string[40];
278 
279 	/* Obtain the time of day, and convert it to a tm struct. */
280 	gettimeofday(&tv, NULL);
281 	ptm = localtime(&tv.tv_sec);
282 	/* Format the date and time, down to a single second. */
283 	strftime(time_string, sizeof(time_string), "%Y-%m-%d %H:%M:%S", ptm);
284 
285 	for (x = 0; x < HOURS; x++) {
286 
287 		if (ptm->tm_hour == pol->pkt.timer_policy.busy_hours[x]) {
288 			for (count = 0; count < pol->pkt.num_vcpu; count++) {
289 				if (pol->core_share[count].status != 1) {
290 					power_manager_scale_core_max(
291 						pol->core_share[count].pcpu);
292 				RTE_LOG(INFO, CHANNEL_MONITOR,
293 					"Scaling up core %d to max\n",
294 					pol->core_share[count].pcpu);
295 				}
296 			}
297 			break;
298 		} else if (ptm->tm_hour ==
299 				pol->pkt.timer_policy.quiet_hours[x]) {
300 			for (count = 0; count < pol->pkt.num_vcpu; count++) {
301 				if (pol->core_share[count].status != 1) {
302 					power_manager_scale_core_min(
303 						pol->core_share[count].pcpu);
304 				RTE_LOG(INFO, CHANNEL_MONITOR,
305 					"Scaling down core %d to min\n",
306 					pol->core_share[count].pcpu);
307 			}
308 		}
309 			break;
310 		} else if (ptm->tm_hour ==
311 			pol->pkt.timer_policy.hours_to_use_traffic_profile[x]) {
312 			apply_traffic_profile(pol);
313 			break;
314 		}
315 	}
316 }
317 
318 static void
319 apply_workload_profile(struct policy *pol)
320 {
321 
322 	int count;
323 
324 	if (pol->pkt.workload == HIGH) {
325 		for (count = 0; count < pol->pkt.num_vcpu; count++) {
326 			if (pol->core_share[count].status != 1)
327 				power_manager_scale_core_max(
328 						pol->core_share[count].pcpu);
329 		}
330 	} else if (pol->pkt.workload == MEDIUM) {
331 		for (count = 0; count < pol->pkt.num_vcpu; count++) {
332 			if (pol->core_share[count].status != 1)
333 				power_manager_scale_core_med(
334 						pol->core_share[count].pcpu);
335 		}
336 	} else if (pol->pkt.workload == LOW) {
337 		for (count = 0; count < pol->pkt.num_vcpu; count++) {
338 			if (pol->core_share[count].status != 1)
339 				power_manager_scale_core_min(
340 						pol->core_share[count].pcpu);
341 		}
342 	}
343 }
344 
345 static void
346 apply_policy(struct policy *pol)
347 {
348 
349 	struct channel_packet *pkt = &pol->pkt;
350 
351 	/*Check policy to use*/
352 	if (pkt->policy_to_use == TRAFFIC)
353 		apply_traffic_profile(pol);
354 	else if (pkt->policy_to_use == TIME)
355 		apply_time_profile(pol);
356 	else if (pkt->policy_to_use == WORKLOAD)
357 		apply_workload_profile(pol);
358 }
359 
360 
361 static int
362 process_request(struct channel_packet *pkt, struct channel_info *chan_info)
363 {
364 	uint64_t core_mask;
365 
366 	if (chan_info == NULL)
367 		return -1;
368 
369 	if (rte_atomic32_cmpset(&(chan_info->status), CHANNEL_MGR_CHANNEL_CONNECTED,
370 			CHANNEL_MGR_CHANNEL_PROCESSING) == 0)
371 		return -1;
372 
373 	if (pkt->command == CPU_POWER) {
374 		core_mask = get_pcpus_mask(chan_info, pkt->resource_id);
375 		if (core_mask == 0) {
376 			RTE_LOG(ERR, CHANNEL_MONITOR, "Error get physical CPU mask for "
377 				"channel '%s' using vCPU(%u)\n", chan_info->channel_path,
378 				(unsigned)pkt->unit);
379 			return -1;
380 		}
381 		if (__builtin_popcountll(core_mask) == 1) {
382 
383 			unsigned core_num = __builtin_ffsll(core_mask) - 1;
384 
385 			switch (pkt->unit) {
386 			case(CPU_POWER_SCALE_MIN):
387 					power_manager_scale_core_min(core_num);
388 			break;
389 			case(CPU_POWER_SCALE_MAX):
390 					power_manager_scale_core_max(core_num);
391 			break;
392 			case(CPU_POWER_SCALE_DOWN):
393 					power_manager_scale_core_down(core_num);
394 			break;
395 			case(CPU_POWER_SCALE_UP):
396 					power_manager_scale_core_up(core_num);
397 			break;
398 			case(CPU_POWER_ENABLE_TURBO):
399 				power_manager_enable_turbo_core(core_num);
400 			break;
401 			case(CPU_POWER_DISABLE_TURBO):
402 				power_manager_disable_turbo_core(core_num);
403 			break;
404 			default:
405 				break;
406 			}
407 		} else {
408 			switch (pkt->unit) {
409 			case(CPU_POWER_SCALE_MIN):
410 					power_manager_scale_mask_min(core_mask);
411 			break;
412 			case(CPU_POWER_SCALE_MAX):
413 					power_manager_scale_mask_max(core_mask);
414 			break;
415 			case(CPU_POWER_SCALE_DOWN):
416 					power_manager_scale_mask_down(core_mask);
417 			break;
418 			case(CPU_POWER_SCALE_UP):
419 					power_manager_scale_mask_up(core_mask);
420 			break;
421 			case(CPU_POWER_ENABLE_TURBO):
422 				power_manager_enable_turbo_mask(core_mask);
423 			break;
424 			case(CPU_POWER_DISABLE_TURBO):
425 				power_manager_disable_turbo_mask(core_mask);
426 			break;
427 			default:
428 				break;
429 			}
430 
431 		}
432 	}
433 
434 	if (pkt->command == PKT_POLICY) {
435 		RTE_LOG(INFO, CHANNEL_MONITOR, "\nProcessing Policy request from Guest\n");
436 		update_policy(pkt);
437 		policy_is_set = 1;
438 	}
439 
440 	/* Return is not checked as channel status may have been set to DISABLED
441 	 * from management thread
442 	 */
443 	rte_atomic32_cmpset(&(chan_info->status), CHANNEL_MGR_CHANNEL_PROCESSING,
444 			CHANNEL_MGR_CHANNEL_CONNECTED);
445 	return 0;
446 
447 }
448 
449 int
450 add_channel_to_monitor(struct channel_info **chan_info)
451 {
452 	struct channel_info *info = *chan_info;
453 	struct epoll_event event;
454 
455 	event.events = EPOLLIN;
456 	event.data.ptr = info;
457 	if (epoll_ctl(global_event_fd, EPOLL_CTL_ADD, info->fd, &event) < 0) {
458 		RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to add channel '%s' "
459 				"to epoll\n", info->channel_path);
460 		return -1;
461 	}
462 	return 0;
463 }
464 
465 int
466 remove_channel_from_monitor(struct channel_info *chan_info)
467 {
468 	if (epoll_ctl(global_event_fd, EPOLL_CTL_DEL, chan_info->fd, NULL) < 0) {
469 		RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to remove channel '%s' "
470 				"from epoll\n", chan_info->channel_path);
471 		return -1;
472 	}
473 	return 0;
474 }
475 
476 int
477 channel_monitor_init(void)
478 {
479 	global_event_fd = epoll_create1(0);
480 	if (global_event_fd == 0) {
481 		RTE_LOG(ERR, CHANNEL_MONITOR, "Error creating epoll context with "
482 				"error %s\n", strerror(errno));
483 		return -1;
484 	}
485 	global_events_list = rte_malloc("epoll_events", sizeof(*global_events_list)
486 			* MAX_EVENTS, RTE_CACHE_LINE_SIZE);
487 	if (global_events_list == NULL) {
488 		RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to rte_malloc for "
489 				"epoll events\n");
490 		return -1;
491 	}
492 	return 0;
493 }
494 
495 void
496 run_channel_monitor(void)
497 {
498 	while (run_loop) {
499 		int n_events, i;
500 
501 		n_events = epoll_wait(global_event_fd, global_events_list,
502 				MAX_EVENTS, 1);
503 		if (!run_loop)
504 			break;
505 		for (i = 0; i < n_events; i++) {
506 			struct channel_info *chan_info = (struct channel_info *)
507 					global_events_list[i].data.ptr;
508 			if ((global_events_list[i].events & EPOLLERR) ||
509 				(global_events_list[i].events & EPOLLHUP)) {
510 				RTE_LOG(DEBUG, CHANNEL_MONITOR, "Remote closed connection for "
511 						"channel '%s'\n",
512 						chan_info->channel_path);
513 				remove_channel(&chan_info);
514 				continue;
515 			}
516 			if (global_events_list[i].events & EPOLLIN) {
517 
518 				int n_bytes, err = 0;
519 				struct channel_packet pkt;
520 				void *buffer = &pkt;
521 				int buffer_len = sizeof(pkt);
522 
523 				while (buffer_len > 0) {
524 					n_bytes = read(chan_info->fd,
525 							buffer, buffer_len);
526 					if (n_bytes == buffer_len)
527 						break;
528 					if (n_bytes == -1) {
529 						err = errno;
530 						RTE_LOG(DEBUG, CHANNEL_MONITOR,
531 							"Received error on "
532 							"channel '%s' read: %s\n",
533 							chan_info->channel_path,
534 							strerror(err));
535 						remove_channel(&chan_info);
536 						break;
537 					}
538 					buffer = (char *)buffer + n_bytes;
539 					buffer_len -= n_bytes;
540 				}
541 				if (!err)
542 					process_request(&pkt, chan_info);
543 			}
544 		}
545 		rte_delay_us(time_period_ms*1000);
546 		if (policy_is_set) {
547 			int j;
548 
549 			for (j = 0; j < MAX_VMS; j++) {
550 				if (policies[j].enabled == 1)
551 					apply_policy(&policies[j]);
552 			}
553 		}
554 	}
555 }
556