xref: /dpdk/examples/vm_power_manager/channel_monitor.c (revision 945acb4a0d644d194f1823084a234f9c286dcf8c)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 
5 #include <unistd.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <stdint.h>
9 #include <signal.h>
10 #include <errno.h>
11 #include <string.h>
12 #include <sys/types.h>
13 #include <sys/epoll.h>
14 #include <sys/queue.h>
15 #include <sys/time.h>
16 
17 #include <rte_log.h>
18 #include <rte_memory.h>
19 #include <rte_malloc.h>
20 #include <rte_atomic.h>
21 #include <rte_cycles.h>
22 #include <rte_ethdev.h>
23 #include <rte_pmd_i40e.h>
24 
25 #include <libvirt/libvirt.h>
26 #include "channel_monitor.h"
27 #include "channel_commands.h"
28 #include "channel_manager.h"
29 #include "power_manager.h"
30 
31 #define RTE_LOGTYPE_CHANNEL_MONITOR RTE_LOGTYPE_USER1
32 
33 #define MAX_EVENTS 256
34 
35 uint64_t vsi_pkt_count_prev[384];
36 uint64_t rdtsc_prev[384];
37 
38 double time_period_ms = 1;
39 static volatile unsigned run_loop = 1;
40 static int global_event_fd;
41 static unsigned int policy_is_set;
42 static struct epoll_event *global_events_list;
43 static struct policy policies[MAX_VMS];
44 
45 void channel_monitor_exit(void)
46 {
47 	run_loop = 0;
48 	rte_free(global_events_list);
49 }
50 
51 static void
52 core_share(int pNo, int z, int x, int t)
53 {
54 	if (policies[pNo].core_share[z].pcpu == lvm_info[x].pcpus[t]) {
55 		if (strcmp(policies[pNo].pkt.vm_name,
56 				lvm_info[x].vm_name) != 0) {
57 			policies[pNo].core_share[z].status = 1;
58 			power_manager_scale_core_max(
59 					policies[pNo].core_share[z].pcpu);
60 		}
61 	}
62 }
63 
64 static void
65 core_share_status(int pNo)
66 {
67 
68 	int noVms, noVcpus, z, x, t;
69 
70 	get_all_vm(&noVms, &noVcpus);
71 
72 	/* Reset Core Share Status. */
73 	for (z = 0; z < noVcpus; z++)
74 		policies[pNo].core_share[z].status = 0;
75 
76 	/* Foreach vcpu in a policy. */
77 	for (z = 0; z < policies[pNo].pkt.num_vcpu; z++) {
78 		/* Foreach VM on the platform. */
79 		for (x = 0; x < noVms; x++) {
80 			/* Foreach vcpu of VMs on platform. */
81 			for (t = 0; t < lvm_info[x].num_cpus; t++)
82 				core_share(pNo, z, x, t);
83 		}
84 	}
85 }
86 
87 static void
88 get_pcpu_to_control(struct policy *pol)
89 {
90 
91 	/* Convert vcpu to pcpu. */
92 	struct vm_info info;
93 	int pcpu, count;
94 	uint64_t mask_u64b;
95 
96 	RTE_LOG(INFO, CHANNEL_MONITOR, "Looking for pcpu for %s\n",
97 			pol->pkt.vm_name);
98 	get_info_vm(pol->pkt.vm_name, &info);
99 
100 	for (count = 0; count < pol->pkt.num_vcpu; count++) {
101 		mask_u64b = info.pcpu_mask[pol->pkt.vcpu_to_control[count]];
102 		for (pcpu = 0; mask_u64b; mask_u64b &= ~(1ULL << pcpu++)) {
103 			if ((mask_u64b >> pcpu) & 1)
104 				pol->core_share[count].pcpu = pcpu;
105 		}
106 	}
107 }
108 
109 static int
110 get_pfid(struct policy *pol)
111 {
112 
113 	int i, x, ret = 0, nb_ports;
114 
115 	nb_ports = rte_eth_dev_count();
116 	for (i = 0; i < pol->pkt.nb_mac_to_monitor; i++) {
117 
118 		for (x = 0; x < nb_ports; x++) {
119 			ret = rte_pmd_i40e_query_vfid_by_mac(x,
120 				(struct ether_addr *)&(pol->pkt.vfid[i]));
121 			if (ret != -EINVAL) {
122 				pol->port[i] = x;
123 				break;
124 			}
125 		}
126 		if (ret == -EINVAL || ret == -ENOTSUP || ret == ENODEV) {
127 			RTE_LOG(INFO, CHANNEL_MONITOR,
128 				"Error with Policy. MAC not found on "
129 				"attached ports ");
130 			pol->enabled = 0;
131 			return ret;
132 		}
133 		pol->pfid[i] = ret;
134 	}
135 	return 1;
136 }
137 
138 static int
139 update_policy(struct channel_packet *pkt)
140 {
141 
142 	unsigned int updated = 0;
143 	int i;
144 
145 	for (i = 0; i < MAX_VMS; i++) {
146 		if (strcmp(policies[i].pkt.vm_name, pkt->vm_name) == 0) {
147 			policies[i].pkt = *pkt;
148 			get_pcpu_to_control(&policies[i]);
149 			if (get_pfid(&policies[i]) == -1) {
150 				updated = 1;
151 				break;
152 			}
153 			core_share_status(i);
154 			policies[i].enabled = 1;
155 			updated = 1;
156 		}
157 	}
158 	if (!updated) {
159 		for (i = 0; i < MAX_VMS; i++) {
160 			if (policies[i].enabled == 0) {
161 				policies[i].pkt = *pkt;
162 				get_pcpu_to_control(&policies[i]);
163 				if (get_pfid(&policies[i]) == -1)
164 					break;
165 				core_share_status(i);
166 				policies[i].enabled = 1;
167 				break;
168 			}
169 		}
170 	}
171 	return 0;
172 }
173 
174 static uint64_t
175 get_pkt_diff(struct policy *pol)
176 {
177 
178 	uint64_t vsi_pkt_count,
179 		vsi_pkt_total = 0,
180 		vsi_pkt_count_prev_total = 0;
181 	double rdtsc_curr, rdtsc_diff, diff;
182 	int x;
183 	struct rte_eth_stats vf_stats;
184 
185 	for (x = 0; x < pol->pkt.nb_mac_to_monitor; x++) {
186 
187 		/*Read vsi stats*/
188 		if (rte_pmd_i40e_get_vf_stats(x, pol->pfid[x], &vf_stats) == 0)
189 			vsi_pkt_count = vf_stats.ipackets;
190 		else
191 			vsi_pkt_count = -1;
192 
193 		vsi_pkt_total += vsi_pkt_count;
194 
195 		vsi_pkt_count_prev_total += vsi_pkt_count_prev[pol->pfid[x]];
196 		vsi_pkt_count_prev[pol->pfid[x]] = vsi_pkt_count;
197 	}
198 
199 	rdtsc_curr = rte_rdtsc_precise();
200 	rdtsc_diff = rdtsc_curr - rdtsc_prev[pol->pfid[x-1]];
201 	rdtsc_prev[pol->pfid[x-1]] = rdtsc_curr;
202 
203 	diff = (vsi_pkt_total - vsi_pkt_count_prev_total) *
204 			((double)rte_get_tsc_hz() / rdtsc_diff);
205 
206 	return diff;
207 }
208 
209 static void
210 apply_traffic_profile(struct policy *pol)
211 {
212 
213 	int count;
214 	uint64_t diff = 0;
215 
216 	diff = get_pkt_diff(pol);
217 
218 	RTE_LOG(INFO, CHANNEL_MONITOR, "Applying traffic profile\n");
219 
220 	if (diff >= (pol->pkt.traffic_policy.max_max_packet_thresh)) {
221 		for (count = 0; count < pol->pkt.num_vcpu; count++) {
222 			if (pol->core_share[count].status != 1)
223 				power_manager_scale_core_max(
224 						pol->core_share[count].pcpu);
225 		}
226 	} else if (diff >= (pol->pkt.traffic_policy.avg_max_packet_thresh)) {
227 		for (count = 0; count < pol->pkt.num_vcpu; count++) {
228 			if (pol->core_share[count].status != 1)
229 				power_manager_scale_core_med(
230 						pol->core_share[count].pcpu);
231 		}
232 	} else if (diff < (pol->pkt.traffic_policy.avg_max_packet_thresh)) {
233 		for (count = 0; count < pol->pkt.num_vcpu; count++) {
234 			if (pol->core_share[count].status != 1)
235 				power_manager_scale_core_min(
236 						pol->core_share[count].pcpu);
237 		}
238 	}
239 }
240 
241 static void
242 apply_time_profile(struct policy *pol)
243 {
244 
245 	int count, x;
246 	struct timeval tv;
247 	struct tm *ptm;
248 	char time_string[40];
249 
250 	/* Obtain the time of day, and convert it to a tm struct. */
251 	gettimeofday(&tv, NULL);
252 	ptm = localtime(&tv.tv_sec);
253 	/* Format the date and time, down to a single second. */
254 	strftime(time_string, sizeof(time_string), "%Y-%m-%d %H:%M:%S", ptm);
255 
256 	for (x = 0; x < HOURS; x++) {
257 
258 		if (ptm->tm_hour == pol->pkt.timer_policy.busy_hours[x]) {
259 			for (count = 0; count < pol->pkt.num_vcpu; count++) {
260 				if (pol->core_share[count].status != 1) {
261 					power_manager_scale_core_max(
262 						pol->core_share[count].pcpu);
263 				RTE_LOG(INFO, CHANNEL_MONITOR,
264 					"Scaling up core %d to max\n",
265 					pol->core_share[count].pcpu);
266 				}
267 			}
268 			break;
269 		} else if (ptm->tm_hour ==
270 				pol->pkt.timer_policy.quiet_hours[x]) {
271 			for (count = 0; count < pol->pkt.num_vcpu; count++) {
272 				if (pol->core_share[count].status != 1) {
273 					power_manager_scale_core_min(
274 						pol->core_share[count].pcpu);
275 				RTE_LOG(INFO, CHANNEL_MONITOR,
276 					"Scaling down core %d to min\n",
277 					pol->core_share[count].pcpu);
278 			}
279 		}
280 			break;
281 		} else if (ptm->tm_hour ==
282 			pol->pkt.timer_policy.hours_to_use_traffic_profile[x]) {
283 			apply_traffic_profile(pol);
284 			break;
285 		}
286 	}
287 }
288 
289 static void
290 apply_workload_profile(struct policy *pol)
291 {
292 
293 	int count;
294 
295 	if (pol->pkt.workload == HIGH) {
296 		for (count = 0; count < pol->pkt.num_vcpu; count++) {
297 			if (pol->core_share[count].status != 1)
298 				power_manager_scale_core_max(
299 						pol->core_share[count].pcpu);
300 		}
301 	} else if (pol->pkt.workload == MEDIUM) {
302 		for (count = 0; count < pol->pkt.num_vcpu; count++) {
303 			if (pol->core_share[count].status != 1)
304 				power_manager_scale_core_med(
305 						pol->core_share[count].pcpu);
306 		}
307 	} else if (pol->pkt.workload == LOW) {
308 		for (count = 0; count < pol->pkt.num_vcpu; count++) {
309 			if (pol->core_share[count].status != 1)
310 				power_manager_scale_core_min(
311 						pol->core_share[count].pcpu);
312 		}
313 	}
314 }
315 
316 static void
317 apply_policy(struct policy *pol)
318 {
319 
320 	struct channel_packet *pkt = &pol->pkt;
321 
322 	/*Check policy to use*/
323 	if (pkt->policy_to_use == TRAFFIC)
324 		apply_traffic_profile(pol);
325 	else if (pkt->policy_to_use == TIME)
326 		apply_time_profile(pol);
327 	else if (pkt->policy_to_use == WORKLOAD)
328 		apply_workload_profile(pol);
329 }
330 
331 
332 static int
333 process_request(struct channel_packet *pkt, struct channel_info *chan_info)
334 {
335 	uint64_t core_mask;
336 
337 	if (chan_info == NULL)
338 		return -1;
339 
340 	if (rte_atomic32_cmpset(&(chan_info->status), CHANNEL_MGR_CHANNEL_CONNECTED,
341 			CHANNEL_MGR_CHANNEL_PROCESSING) == 0)
342 		return -1;
343 
344 	if (pkt->command == CPU_POWER) {
345 		core_mask = get_pcpus_mask(chan_info, pkt->resource_id);
346 		if (core_mask == 0) {
347 			RTE_LOG(ERR, CHANNEL_MONITOR, "Error get physical CPU mask for "
348 				"channel '%s' using vCPU(%u)\n", chan_info->channel_path,
349 				(unsigned)pkt->unit);
350 			return -1;
351 		}
352 		if (__builtin_popcountll(core_mask) == 1) {
353 
354 			unsigned core_num = __builtin_ffsll(core_mask) - 1;
355 
356 			switch (pkt->unit) {
357 			case(CPU_POWER_SCALE_MIN):
358 					power_manager_scale_core_min(core_num);
359 			break;
360 			case(CPU_POWER_SCALE_MAX):
361 					power_manager_scale_core_max(core_num);
362 			break;
363 			case(CPU_POWER_SCALE_DOWN):
364 					power_manager_scale_core_down(core_num);
365 			break;
366 			case(CPU_POWER_SCALE_UP):
367 					power_manager_scale_core_up(core_num);
368 			break;
369 			case(CPU_POWER_ENABLE_TURBO):
370 				power_manager_enable_turbo_core(core_num);
371 			break;
372 			case(CPU_POWER_DISABLE_TURBO):
373 				power_manager_disable_turbo_core(core_num);
374 			break;
375 			default:
376 				break;
377 			}
378 		} else {
379 			switch (pkt->unit) {
380 			case(CPU_POWER_SCALE_MIN):
381 					power_manager_scale_mask_min(core_mask);
382 			break;
383 			case(CPU_POWER_SCALE_MAX):
384 					power_manager_scale_mask_max(core_mask);
385 			break;
386 			case(CPU_POWER_SCALE_DOWN):
387 					power_manager_scale_mask_down(core_mask);
388 			break;
389 			case(CPU_POWER_SCALE_UP):
390 					power_manager_scale_mask_up(core_mask);
391 			break;
392 			case(CPU_POWER_ENABLE_TURBO):
393 				power_manager_enable_turbo_mask(core_mask);
394 			break;
395 			case(CPU_POWER_DISABLE_TURBO):
396 				power_manager_disable_turbo_mask(core_mask);
397 			break;
398 			default:
399 				break;
400 			}
401 
402 		}
403 	}
404 
405 	if (pkt->command == PKT_POLICY) {
406 		RTE_LOG(INFO, CHANNEL_MONITOR, "\nProcessing Policy request from Guest\n");
407 		update_policy(pkt);
408 		policy_is_set = 1;
409 	}
410 
411 	/* Return is not checked as channel status may have been set to DISABLED
412 	 * from management thread
413 	 */
414 	rte_atomic32_cmpset(&(chan_info->status), CHANNEL_MGR_CHANNEL_PROCESSING,
415 			CHANNEL_MGR_CHANNEL_CONNECTED);
416 	return 0;
417 
418 }
419 
420 int
421 add_channel_to_monitor(struct channel_info **chan_info)
422 {
423 	struct channel_info *info = *chan_info;
424 	struct epoll_event event;
425 
426 	event.events = EPOLLIN;
427 	event.data.ptr = info;
428 	if (epoll_ctl(global_event_fd, EPOLL_CTL_ADD, info->fd, &event) < 0) {
429 		RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to add channel '%s' "
430 				"to epoll\n", info->channel_path);
431 		return -1;
432 	}
433 	return 0;
434 }
435 
436 int
437 remove_channel_from_monitor(struct channel_info *chan_info)
438 {
439 	if (epoll_ctl(global_event_fd, EPOLL_CTL_DEL, chan_info->fd, NULL) < 0) {
440 		RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to remove channel '%s' "
441 				"from epoll\n", chan_info->channel_path);
442 		return -1;
443 	}
444 	return 0;
445 }
446 
447 int
448 channel_monitor_init(void)
449 {
450 	global_event_fd = epoll_create1(0);
451 	if (global_event_fd == 0) {
452 		RTE_LOG(ERR, CHANNEL_MONITOR, "Error creating epoll context with "
453 				"error %s\n", strerror(errno));
454 		return -1;
455 	}
456 	global_events_list = rte_malloc("epoll_events", sizeof(*global_events_list)
457 			* MAX_EVENTS, RTE_CACHE_LINE_SIZE);
458 	if (global_events_list == NULL) {
459 		RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to rte_malloc for "
460 				"epoll events\n");
461 		return -1;
462 	}
463 	return 0;
464 }
465 
466 void
467 run_channel_monitor(void)
468 {
469 	while (run_loop) {
470 		int n_events, i;
471 
472 		n_events = epoll_wait(global_event_fd, global_events_list,
473 				MAX_EVENTS, 1);
474 		if (!run_loop)
475 			break;
476 		for (i = 0; i < n_events; i++) {
477 			struct channel_info *chan_info = (struct channel_info *)
478 					global_events_list[i].data.ptr;
479 			if ((global_events_list[i].events & EPOLLERR) ||
480 				(global_events_list[i].events & EPOLLHUP)) {
481 				RTE_LOG(DEBUG, CHANNEL_MONITOR, "Remote closed connection for "
482 						"channel '%s'\n",
483 						chan_info->channel_path);
484 				remove_channel(&chan_info);
485 				continue;
486 			}
487 			if (global_events_list[i].events & EPOLLIN) {
488 
489 				int n_bytes, err = 0;
490 				struct channel_packet pkt;
491 				void *buffer = &pkt;
492 				int buffer_len = sizeof(pkt);
493 
494 				while (buffer_len > 0) {
495 					n_bytes = read(chan_info->fd,
496 							buffer, buffer_len);
497 					if (n_bytes == buffer_len)
498 						break;
499 					if (n_bytes == -1) {
500 						err = errno;
501 						RTE_LOG(DEBUG, CHANNEL_MONITOR,
502 							"Received error on "
503 							"channel '%s' read: %s\n",
504 							chan_info->channel_path,
505 							strerror(err));
506 						remove_channel(&chan_info);
507 						break;
508 					}
509 					buffer = (char *)buffer + n_bytes;
510 					buffer_len -= n_bytes;
511 				}
512 				if (!err)
513 					process_request(&pkt, chan_info);
514 			}
515 		}
516 		rte_delay_us(time_period_ms*1000);
517 		if (policy_is_set) {
518 			int j;
519 
520 			for (j = 0; j < MAX_VMS; j++) {
521 				if (policies[j].enabled == 1)
522 					apply_policy(&policies[j]);
523 			}
524 		}
525 	}
526 }
527