1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <unistd.h> 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <stdint.h> 38 #include <signal.h> 39 #include <errno.h> 40 #include <string.h> 41 #include <sys/types.h> 42 #include <sys/epoll.h> 43 #include <sys/queue.h> 44 #include <sys/time.h> 45 46 #include <rte_log.h> 47 #include <rte_memory.h> 48 #include <rte_malloc.h> 49 #include <rte_atomic.h> 50 #include <rte_cycles.h> 51 #include <rte_ethdev.h> 52 #include <rte_pmd_i40e.h> 53 54 #include <libvirt/libvirt.h> 55 #include "channel_monitor.h" 56 #include "channel_commands.h" 57 #include "channel_manager.h" 58 #include "power_manager.h" 59 60 #define RTE_LOGTYPE_CHANNEL_MONITOR RTE_LOGTYPE_USER1 61 62 #define MAX_EVENTS 256 63 64 uint64_t vsi_pkt_count_prev[384]; 65 uint64_t rdtsc_prev[384]; 66 67 double time_period_s = 1; 68 static volatile unsigned run_loop = 1; 69 static int global_event_fd; 70 static unsigned int policy_is_set; 71 static struct epoll_event *global_events_list; 72 static struct policy policies[MAX_VMS]; 73 74 void channel_monitor_exit(void) 75 { 76 run_loop = 0; 77 rte_free(global_events_list); 78 } 79 80 static void 81 core_share(int pNo, int z, int x, int t) 82 { 83 if (policies[pNo].core_share[z].pcpu == lvm_info[x].pcpus[t]) { 84 if (strcmp(policies[pNo].pkt.vm_name, 85 lvm_info[x].vm_name) != 0) { 86 policies[pNo].core_share[z].status = 1; 87 power_manager_scale_core_max( 88 policies[pNo].core_share[z].pcpu); 89 } 90 } 91 } 92 93 static void 94 core_share_status(int pNo) 95 { 96 97 int noVms, noVcpus, z, x, t; 98 99 get_all_vm(&noVms, &noVcpus); 100 101 /* Reset Core Share Status. */ 102 for (z = 0; z < noVcpus; z++) 103 policies[pNo].core_share[z].status = 0; 104 105 /* Foreach vcpu in a policy. */ 106 for (z = 0; z < policies[pNo].pkt.num_vcpu; z++) { 107 /* Foreach VM on the platform. */ 108 for (x = 0; x < noVms; x++) { 109 /* Foreach vcpu of VMs on platform. */ 110 for (t = 0; t < lvm_info[x].num_cpus; t++) 111 core_share(pNo, z, x, t); 112 } 113 } 114 } 115 116 static void 117 get_pcpu_to_control(struct policy *pol) 118 { 119 120 /* Convert vcpu to pcpu. */ 121 struct vm_info info; 122 int pcpu, count; 123 uint64_t mask_u64b; 124 125 RTE_LOG(INFO, CHANNEL_MONITOR, "Looking for pcpu for %s\n", 126 pol->pkt.vm_name); 127 get_info_vm(pol->pkt.vm_name, &info); 128 129 for (count = 0; count < pol->pkt.num_vcpu; count++) { 130 mask_u64b = info.pcpu_mask[pol->pkt.vcpu_to_control[count]]; 131 for (pcpu = 0; mask_u64b; mask_u64b &= ~(1ULL << pcpu++)) { 132 if ((mask_u64b >> pcpu) & 1) 133 pol->core_share[count].pcpu = pcpu; 134 } 135 } 136 } 137 138 static int 139 get_pfid(struct policy *pol) 140 { 141 142 int i, x, ret = 0, nb_ports; 143 144 nb_ports = rte_eth_dev_count(); 145 for (i = 0; i < pol->pkt.nb_mac_to_monitor; i++) { 146 147 for (x = 0; x < nb_ports; x++) { 148 ret = rte_pmd_i40e_query_vfid_by_mac(x, 149 (struct ether_addr *)&(pol->pkt.vfid[i])); 150 if (ret != -EINVAL) { 151 pol->port[i] = x; 152 break; 153 } 154 } 155 if (ret == -EINVAL || ret == -ENOTSUP || ret == ENODEV) { 156 RTE_LOG(INFO, CHANNEL_MONITOR, 157 "Error with Policy. MAC not found on " 158 "attached ports "); 159 pol->enabled = 0; 160 return ret; 161 } 162 pol->pfid[i] = ret; 163 } 164 return 1; 165 } 166 167 static int 168 update_policy(struct channel_packet *pkt) 169 { 170 171 unsigned int updated = 0; 172 173 for (int i = 0; i < MAX_VMS; i++) { 174 if (strcmp(policies[i].pkt.vm_name, pkt->vm_name) == 0) { 175 policies[i].pkt = *pkt; 176 get_pcpu_to_control(&policies[i]); 177 if (get_pfid(&policies[i]) == -1) { 178 updated = 1; 179 break; 180 } 181 core_share_status(i); 182 policies[i].enabled = 1; 183 updated = 1; 184 } 185 } 186 if (!updated) { 187 for (int i = 0; i < MAX_VMS; i++) { 188 if (policies[i].enabled == 0) { 189 policies[i].pkt = *pkt; 190 get_pcpu_to_control(&policies[i]); 191 if (get_pfid(&policies[i]) == -1) 192 break; 193 core_share_status(i); 194 policies[i].enabled = 1; 195 break; 196 } 197 } 198 } 199 return 0; 200 } 201 202 static uint64_t 203 get_pkt_diff(struct policy *pol) 204 { 205 206 uint64_t vsi_pkt_count, 207 vsi_pkt_total = 0, 208 vsi_pkt_count_prev_total = 0; 209 double rdtsc_curr, rdtsc_diff, diff; 210 int x; 211 struct rte_eth_stats vf_stats; 212 213 for (x = 0; x < pol->pkt.nb_mac_to_monitor; x++) { 214 215 /*Read vsi stats*/ 216 if (rte_pmd_i40e_get_vf_stats(x, pol->pfid[x], &vf_stats) == 0) 217 vsi_pkt_count = vf_stats.ipackets; 218 else 219 vsi_pkt_count = -1; 220 221 vsi_pkt_total += vsi_pkt_count; 222 223 vsi_pkt_count_prev_total += vsi_pkt_count_prev[pol->pfid[x]]; 224 vsi_pkt_count_prev[pol->pfid[x]] = vsi_pkt_count; 225 } 226 227 rdtsc_curr = rte_rdtsc_precise(); 228 rdtsc_diff = rdtsc_curr - rdtsc_prev[pol->pfid[x-1]]; 229 rdtsc_prev[pol->pfid[x-1]] = rdtsc_curr; 230 231 diff = (vsi_pkt_total - vsi_pkt_count_prev_total) * 232 ((double)rte_get_tsc_hz() / rdtsc_diff); 233 234 return diff; 235 } 236 237 static void 238 apply_traffic_profile(struct policy *pol) 239 { 240 241 int count; 242 uint64_t diff = 0; 243 244 diff = get_pkt_diff(pol); 245 246 RTE_LOG(INFO, CHANNEL_MONITOR, "Applying traffic profile\n"); 247 248 if (diff >= (pol->pkt.traffic_policy.max_max_packet_thresh)) { 249 for (count = 0; count < pol->pkt.num_vcpu; count++) { 250 if (pol->core_share[count].status != 1) 251 power_manager_scale_core_max( 252 pol->core_share[count].pcpu); 253 } 254 } else if (diff >= (pol->pkt.traffic_policy.avg_max_packet_thresh)) { 255 for (count = 0; count < pol->pkt.num_vcpu; count++) { 256 if (pol->core_share[count].status != 1) 257 power_manager_scale_core_med( 258 pol->core_share[count].pcpu); 259 } 260 } else if (diff < (pol->pkt.traffic_policy.avg_max_packet_thresh)) { 261 for (count = 0; count < pol->pkt.num_vcpu; count++) { 262 if (pol->core_share[count].status != 1) 263 power_manager_scale_core_min( 264 pol->core_share[count].pcpu); 265 } 266 } 267 } 268 269 static void 270 apply_time_profile(struct policy *pol) 271 { 272 273 int count, x; 274 struct timeval tv; 275 struct tm *ptm; 276 char time_string[40]; 277 278 /* Obtain the time of day, and convert it to a tm struct. */ 279 gettimeofday(&tv, NULL); 280 ptm = localtime(&tv.tv_sec); 281 /* Format the date and time, down to a single second. */ 282 strftime(time_string, sizeof(time_string), "%Y-%m-%d %H:%M:%S", ptm); 283 284 for (x = 0; x < HOURS; x++) { 285 286 if (ptm->tm_hour == pol->pkt.timer_policy.busy_hours[x]) { 287 for (count = 0; count < pol->pkt.num_vcpu; count++) { 288 if (pol->core_share[count].status != 1) { 289 power_manager_scale_core_max( 290 pol->core_share[count].pcpu); 291 RTE_LOG(INFO, CHANNEL_MONITOR, 292 "Scaling up core %d to max\n", 293 pol->core_share[count].pcpu); 294 } 295 } 296 break; 297 } else if (ptm->tm_hour == 298 pol->pkt.timer_policy.quiet_hours[x]) { 299 for (count = 0; count < pol->pkt.num_vcpu; count++) { 300 if (pol->core_share[count].status != 1) { 301 power_manager_scale_core_min( 302 pol->core_share[count].pcpu); 303 RTE_LOG(INFO, CHANNEL_MONITOR, 304 "Scaling down core %d to min\n", 305 pol->core_share[count].pcpu); 306 } 307 } 308 break; 309 } else if (ptm->tm_hour == 310 pol->pkt.timer_policy.hours_to_use_traffic_profile[x]) { 311 apply_traffic_profile(pol); 312 break; 313 } 314 } 315 } 316 317 static void 318 apply_workload_profile(struct policy *pol) 319 { 320 321 int count; 322 323 if (pol->pkt.workload == HIGH) { 324 for (count = 0; count < pol->pkt.num_vcpu; count++) { 325 if (pol->core_share[count].status != 1) 326 power_manager_scale_core_max( 327 pol->core_share[count].pcpu); 328 } 329 } else if (pol->pkt.workload == MEDIUM) { 330 for (count = 0; count < pol->pkt.num_vcpu; count++) { 331 if (pol->core_share[count].status != 1) 332 power_manager_scale_core_med( 333 pol->core_share[count].pcpu); 334 } 335 } else if (pol->pkt.workload == LOW) { 336 for (count = 0; count < pol->pkt.num_vcpu; count++) { 337 if (pol->core_share[count].status != 1) 338 power_manager_scale_core_min( 339 pol->core_share[count].pcpu); 340 } 341 } 342 } 343 344 static void 345 apply_policy(struct policy *pol) 346 { 347 348 struct channel_packet *pkt = &pol->pkt; 349 350 /*Check policy to use*/ 351 if (pkt->policy_to_use == TRAFFIC) 352 apply_traffic_profile(pol); 353 else if (pkt->policy_to_use == TIME) 354 apply_time_profile(pol); 355 else if (pkt->policy_to_use == WORKLOAD) 356 apply_workload_profile(pol); 357 } 358 359 360 static int 361 process_request(struct channel_packet *pkt, struct channel_info *chan_info) 362 { 363 uint64_t core_mask; 364 365 if (chan_info == NULL) 366 return -1; 367 368 if (rte_atomic32_cmpset(&(chan_info->status), CHANNEL_MGR_CHANNEL_CONNECTED, 369 CHANNEL_MGR_CHANNEL_PROCESSING) == 0) 370 return -1; 371 372 if (pkt->command == CPU_POWER) { 373 core_mask = get_pcpus_mask(chan_info, pkt->resource_id); 374 if (core_mask == 0) { 375 RTE_LOG(ERR, CHANNEL_MONITOR, "Error get physical CPU mask for " 376 "channel '%s' using vCPU(%u)\n", chan_info->channel_path, 377 (unsigned)pkt->unit); 378 return -1; 379 } 380 if (__builtin_popcountll(core_mask) == 1) { 381 382 unsigned core_num = __builtin_ffsll(core_mask) - 1; 383 384 switch (pkt->unit) { 385 case(CPU_POWER_SCALE_MIN): 386 power_manager_scale_core_min(core_num); 387 break; 388 case(CPU_POWER_SCALE_MAX): 389 power_manager_scale_core_max(core_num); 390 break; 391 case(CPU_POWER_SCALE_DOWN): 392 power_manager_scale_core_down(core_num); 393 break; 394 case(CPU_POWER_SCALE_UP): 395 power_manager_scale_core_up(core_num); 396 break; 397 case(CPU_POWER_ENABLE_TURBO): 398 power_manager_enable_turbo_core(core_num); 399 break; 400 case(CPU_POWER_DISABLE_TURBO): 401 power_manager_disable_turbo_core(core_num); 402 break; 403 default: 404 break; 405 } 406 } else { 407 switch (pkt->unit) { 408 case(CPU_POWER_SCALE_MIN): 409 power_manager_scale_mask_min(core_mask); 410 break; 411 case(CPU_POWER_SCALE_MAX): 412 power_manager_scale_mask_max(core_mask); 413 break; 414 case(CPU_POWER_SCALE_DOWN): 415 power_manager_scale_mask_down(core_mask); 416 break; 417 case(CPU_POWER_SCALE_UP): 418 power_manager_scale_mask_up(core_mask); 419 break; 420 case(CPU_POWER_ENABLE_TURBO): 421 power_manager_enable_turbo_mask(core_mask); 422 break; 423 case(CPU_POWER_DISABLE_TURBO): 424 power_manager_disable_turbo_mask(core_mask); 425 break; 426 default: 427 break; 428 } 429 430 } 431 } 432 433 if (pkt->command == PKT_POLICY) { 434 RTE_LOG(INFO, CHANNEL_MONITOR, "\nProcessing Policy request from Guest\n"); 435 update_policy(pkt); 436 policy_is_set = 1; 437 } 438 439 /* Return is not checked as channel status may have been set to DISABLED 440 * from management thread 441 */ 442 rte_atomic32_cmpset(&(chan_info->status), CHANNEL_MGR_CHANNEL_PROCESSING, 443 CHANNEL_MGR_CHANNEL_CONNECTED); 444 return 0; 445 446 } 447 448 int 449 add_channel_to_monitor(struct channel_info **chan_info) 450 { 451 struct channel_info *info = *chan_info; 452 struct epoll_event event; 453 454 event.events = EPOLLIN; 455 event.data.ptr = info; 456 if (epoll_ctl(global_event_fd, EPOLL_CTL_ADD, info->fd, &event) < 0) { 457 RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to add channel '%s' " 458 "to epoll\n", info->channel_path); 459 return -1; 460 } 461 return 0; 462 } 463 464 int 465 remove_channel_from_monitor(struct channel_info *chan_info) 466 { 467 if (epoll_ctl(global_event_fd, EPOLL_CTL_DEL, chan_info->fd, NULL) < 0) { 468 RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to remove channel '%s' " 469 "from epoll\n", chan_info->channel_path); 470 return -1; 471 } 472 return 0; 473 } 474 475 int 476 channel_monitor_init(void) 477 { 478 global_event_fd = epoll_create1(0); 479 if (global_event_fd == 0) { 480 RTE_LOG(ERR, CHANNEL_MONITOR, "Error creating epoll context with " 481 "error %s\n", strerror(errno)); 482 return -1; 483 } 484 global_events_list = rte_malloc("epoll_events", sizeof(*global_events_list) 485 * MAX_EVENTS, RTE_CACHE_LINE_SIZE); 486 if (global_events_list == NULL) { 487 RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to rte_malloc for " 488 "epoll events\n"); 489 return -1; 490 } 491 return 0; 492 } 493 494 void 495 run_channel_monitor(void) 496 { 497 while (run_loop) { 498 int n_events, i; 499 500 n_events = epoll_wait(global_event_fd, global_events_list, 501 MAX_EVENTS, 1); 502 if (!run_loop) 503 break; 504 for (i = 0; i < n_events; i++) { 505 struct channel_info *chan_info = (struct channel_info *) 506 global_events_list[i].data.ptr; 507 if ((global_events_list[i].events & EPOLLERR) || 508 (global_events_list[i].events & EPOLLHUP)) { 509 RTE_LOG(DEBUG, CHANNEL_MONITOR, "Remote closed connection for " 510 "channel '%s'\n", 511 chan_info->channel_path); 512 remove_channel(&chan_info); 513 continue; 514 } 515 if (global_events_list[i].events & EPOLLIN) { 516 517 int n_bytes, err = 0; 518 struct channel_packet pkt; 519 void *buffer = &pkt; 520 int buffer_len = sizeof(pkt); 521 522 while (buffer_len > 0) { 523 n_bytes = read(chan_info->fd, 524 buffer, buffer_len); 525 if (n_bytes == buffer_len) 526 break; 527 if (n_bytes == -1) { 528 err = errno; 529 RTE_LOG(DEBUG, CHANNEL_MONITOR, 530 "Received error on " 531 "channel '%s' read: %s\n", 532 chan_info->channel_path, 533 strerror(err)); 534 remove_channel(&chan_info); 535 break; 536 } 537 buffer = (char *)buffer + n_bytes; 538 buffer_len -= n_bytes; 539 } 540 if (!err) 541 process_request(&pkt, chan_info); 542 } 543 } 544 rte_delay_us(time_period_s*1000000); 545 if (policy_is_set) { 546 for (int j = 0; j < MAX_VMS; j++) { 547 if (policies[j].enabled == 1) 548 apply_policy(&policies[j]); 549 } 550 } 551 } 552 } 553