1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <unistd.h> 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <stdint.h> 38 #include <signal.h> 39 #include <errno.h> 40 #include <string.h> 41 #include <sys/types.h> 42 #include <sys/epoll.h> 43 #include <sys/queue.h> 44 #include <sys/time.h> 45 46 #include <rte_log.h> 47 #include <rte_memory.h> 48 #include <rte_malloc.h> 49 #include <rte_atomic.h> 50 #include <rte_cycles.h> 51 #include <rte_ethdev.h> 52 #include <rte_pmd_i40e.h> 53 54 #include <libvirt/libvirt.h> 55 #include "channel_monitor.h" 56 #include "channel_commands.h" 57 #include "channel_manager.h" 58 #include "power_manager.h" 59 60 #define RTE_LOGTYPE_CHANNEL_MONITOR RTE_LOGTYPE_USER1 61 62 #define MAX_EVENTS 256 63 64 uint64_t vsi_pkt_count_prev[384]; 65 uint64_t rdtsc_prev[384]; 66 67 double time_period_ms = 1; 68 static volatile unsigned run_loop = 1; 69 static int global_event_fd; 70 static unsigned int policy_is_set; 71 static struct epoll_event *global_events_list; 72 static struct policy policies[MAX_VMS]; 73 74 void channel_monitor_exit(void) 75 { 76 run_loop = 0; 77 rte_free(global_events_list); 78 } 79 80 static void 81 core_share(int pNo, int z, int x, int t) 82 { 83 if (policies[pNo].core_share[z].pcpu == lvm_info[x].pcpus[t]) { 84 if (strcmp(policies[pNo].pkt.vm_name, 85 lvm_info[x].vm_name) != 0) { 86 policies[pNo].core_share[z].status = 1; 87 power_manager_scale_core_max( 88 policies[pNo].core_share[z].pcpu); 89 } 90 } 91 } 92 93 static void 94 core_share_status(int pNo) 95 { 96 97 int noVms, noVcpus, z, x, t; 98 99 get_all_vm(&noVms, &noVcpus); 100 101 /* Reset Core Share Status. */ 102 for (z = 0; z < noVcpus; z++) 103 policies[pNo].core_share[z].status = 0; 104 105 /* Foreach vcpu in a policy. */ 106 for (z = 0; z < policies[pNo].pkt.num_vcpu; z++) { 107 /* Foreach VM on the platform. */ 108 for (x = 0; x < noVms; x++) { 109 /* Foreach vcpu of VMs on platform. */ 110 for (t = 0; t < lvm_info[x].num_cpus; t++) 111 core_share(pNo, z, x, t); 112 } 113 } 114 } 115 116 static void 117 get_pcpu_to_control(struct policy *pol) 118 { 119 120 /* Convert vcpu to pcpu. */ 121 struct vm_info info; 122 int pcpu, count; 123 uint64_t mask_u64b; 124 125 RTE_LOG(INFO, CHANNEL_MONITOR, "Looking for pcpu for %s\n", 126 pol->pkt.vm_name); 127 get_info_vm(pol->pkt.vm_name, &info); 128 129 for (count = 0; count < pol->pkt.num_vcpu; count++) { 130 mask_u64b = info.pcpu_mask[pol->pkt.vcpu_to_control[count]]; 131 for (pcpu = 0; mask_u64b; mask_u64b &= ~(1ULL << pcpu++)) { 132 if ((mask_u64b >> pcpu) & 1) 133 pol->core_share[count].pcpu = pcpu; 134 } 135 } 136 } 137 138 static int 139 get_pfid(struct policy *pol) 140 { 141 142 int i, x, ret = 0, nb_ports; 143 144 nb_ports = rte_eth_dev_count(); 145 for (i = 0; i < pol->pkt.nb_mac_to_monitor; i++) { 146 147 for (x = 0; x < nb_ports; x++) { 148 ret = rte_pmd_i40e_query_vfid_by_mac(x, 149 (struct ether_addr *)&(pol->pkt.vfid[i])); 150 if (ret != -EINVAL) { 151 pol->port[i] = x; 152 break; 153 } 154 } 155 if (ret == -EINVAL || ret == -ENOTSUP || ret == ENODEV) { 156 RTE_LOG(INFO, CHANNEL_MONITOR, 157 "Error with Policy. MAC not found on " 158 "attached ports "); 159 pol->enabled = 0; 160 return ret; 161 } 162 pol->pfid[i] = ret; 163 } 164 return 1; 165 } 166 167 static int 168 update_policy(struct channel_packet *pkt) 169 { 170 171 unsigned int updated = 0; 172 int i; 173 174 for (i = 0; i < MAX_VMS; i++) { 175 if (strcmp(policies[i].pkt.vm_name, pkt->vm_name) == 0) { 176 policies[i].pkt = *pkt; 177 get_pcpu_to_control(&policies[i]); 178 if (get_pfid(&policies[i]) == -1) { 179 updated = 1; 180 break; 181 } 182 core_share_status(i); 183 policies[i].enabled = 1; 184 updated = 1; 185 } 186 } 187 if (!updated) { 188 for (i = 0; i < MAX_VMS; i++) { 189 if (policies[i].enabled == 0) { 190 policies[i].pkt = *pkt; 191 get_pcpu_to_control(&policies[i]); 192 if (get_pfid(&policies[i]) == -1) 193 break; 194 core_share_status(i); 195 policies[i].enabled = 1; 196 break; 197 } 198 } 199 } 200 return 0; 201 } 202 203 static uint64_t 204 get_pkt_diff(struct policy *pol) 205 { 206 207 uint64_t vsi_pkt_count, 208 vsi_pkt_total = 0, 209 vsi_pkt_count_prev_total = 0; 210 double rdtsc_curr, rdtsc_diff, diff; 211 int x; 212 struct rte_eth_stats vf_stats; 213 214 for (x = 0; x < pol->pkt.nb_mac_to_monitor; x++) { 215 216 /*Read vsi stats*/ 217 if (rte_pmd_i40e_get_vf_stats(x, pol->pfid[x], &vf_stats) == 0) 218 vsi_pkt_count = vf_stats.ipackets; 219 else 220 vsi_pkt_count = -1; 221 222 vsi_pkt_total += vsi_pkt_count; 223 224 vsi_pkt_count_prev_total += vsi_pkt_count_prev[pol->pfid[x]]; 225 vsi_pkt_count_prev[pol->pfid[x]] = vsi_pkt_count; 226 } 227 228 rdtsc_curr = rte_rdtsc_precise(); 229 rdtsc_diff = rdtsc_curr - rdtsc_prev[pol->pfid[x-1]]; 230 rdtsc_prev[pol->pfid[x-1]] = rdtsc_curr; 231 232 diff = (vsi_pkt_total - vsi_pkt_count_prev_total) * 233 ((double)rte_get_tsc_hz() / rdtsc_diff); 234 235 return diff; 236 } 237 238 static void 239 apply_traffic_profile(struct policy *pol) 240 { 241 242 int count; 243 uint64_t diff = 0; 244 245 diff = get_pkt_diff(pol); 246 247 RTE_LOG(INFO, CHANNEL_MONITOR, "Applying traffic profile\n"); 248 249 if (diff >= (pol->pkt.traffic_policy.max_max_packet_thresh)) { 250 for (count = 0; count < pol->pkt.num_vcpu; count++) { 251 if (pol->core_share[count].status != 1) 252 power_manager_scale_core_max( 253 pol->core_share[count].pcpu); 254 } 255 } else if (diff >= (pol->pkt.traffic_policy.avg_max_packet_thresh)) { 256 for (count = 0; count < pol->pkt.num_vcpu; count++) { 257 if (pol->core_share[count].status != 1) 258 power_manager_scale_core_med( 259 pol->core_share[count].pcpu); 260 } 261 } else if (diff < (pol->pkt.traffic_policy.avg_max_packet_thresh)) { 262 for (count = 0; count < pol->pkt.num_vcpu; count++) { 263 if (pol->core_share[count].status != 1) 264 power_manager_scale_core_min( 265 pol->core_share[count].pcpu); 266 } 267 } 268 } 269 270 static void 271 apply_time_profile(struct policy *pol) 272 { 273 274 int count, x; 275 struct timeval tv; 276 struct tm *ptm; 277 char time_string[40]; 278 279 /* Obtain the time of day, and convert it to a tm struct. */ 280 gettimeofday(&tv, NULL); 281 ptm = localtime(&tv.tv_sec); 282 /* Format the date and time, down to a single second. */ 283 strftime(time_string, sizeof(time_string), "%Y-%m-%d %H:%M:%S", ptm); 284 285 for (x = 0; x < HOURS; x++) { 286 287 if (ptm->tm_hour == pol->pkt.timer_policy.busy_hours[x]) { 288 for (count = 0; count < pol->pkt.num_vcpu; count++) { 289 if (pol->core_share[count].status != 1) { 290 power_manager_scale_core_max( 291 pol->core_share[count].pcpu); 292 RTE_LOG(INFO, CHANNEL_MONITOR, 293 "Scaling up core %d to max\n", 294 pol->core_share[count].pcpu); 295 } 296 } 297 break; 298 } else if (ptm->tm_hour == 299 pol->pkt.timer_policy.quiet_hours[x]) { 300 for (count = 0; count < pol->pkt.num_vcpu; count++) { 301 if (pol->core_share[count].status != 1) { 302 power_manager_scale_core_min( 303 pol->core_share[count].pcpu); 304 RTE_LOG(INFO, CHANNEL_MONITOR, 305 "Scaling down core %d to min\n", 306 pol->core_share[count].pcpu); 307 } 308 } 309 break; 310 } else if (ptm->tm_hour == 311 pol->pkt.timer_policy.hours_to_use_traffic_profile[x]) { 312 apply_traffic_profile(pol); 313 break; 314 } 315 } 316 } 317 318 static void 319 apply_workload_profile(struct policy *pol) 320 { 321 322 int count; 323 324 if (pol->pkt.workload == HIGH) { 325 for (count = 0; count < pol->pkt.num_vcpu; count++) { 326 if (pol->core_share[count].status != 1) 327 power_manager_scale_core_max( 328 pol->core_share[count].pcpu); 329 } 330 } else if (pol->pkt.workload == MEDIUM) { 331 for (count = 0; count < pol->pkt.num_vcpu; count++) { 332 if (pol->core_share[count].status != 1) 333 power_manager_scale_core_med( 334 pol->core_share[count].pcpu); 335 } 336 } else if (pol->pkt.workload == LOW) { 337 for (count = 0; count < pol->pkt.num_vcpu; count++) { 338 if (pol->core_share[count].status != 1) 339 power_manager_scale_core_min( 340 pol->core_share[count].pcpu); 341 } 342 } 343 } 344 345 static void 346 apply_policy(struct policy *pol) 347 { 348 349 struct channel_packet *pkt = &pol->pkt; 350 351 /*Check policy to use*/ 352 if (pkt->policy_to_use == TRAFFIC) 353 apply_traffic_profile(pol); 354 else if (pkt->policy_to_use == TIME) 355 apply_time_profile(pol); 356 else if (pkt->policy_to_use == WORKLOAD) 357 apply_workload_profile(pol); 358 } 359 360 361 static int 362 process_request(struct channel_packet *pkt, struct channel_info *chan_info) 363 { 364 uint64_t core_mask; 365 366 if (chan_info == NULL) 367 return -1; 368 369 if (rte_atomic32_cmpset(&(chan_info->status), CHANNEL_MGR_CHANNEL_CONNECTED, 370 CHANNEL_MGR_CHANNEL_PROCESSING) == 0) 371 return -1; 372 373 if (pkt->command == CPU_POWER) { 374 core_mask = get_pcpus_mask(chan_info, pkt->resource_id); 375 if (core_mask == 0) { 376 RTE_LOG(ERR, CHANNEL_MONITOR, "Error get physical CPU mask for " 377 "channel '%s' using vCPU(%u)\n", chan_info->channel_path, 378 (unsigned)pkt->unit); 379 return -1; 380 } 381 if (__builtin_popcountll(core_mask) == 1) { 382 383 unsigned core_num = __builtin_ffsll(core_mask) - 1; 384 385 switch (pkt->unit) { 386 case(CPU_POWER_SCALE_MIN): 387 power_manager_scale_core_min(core_num); 388 break; 389 case(CPU_POWER_SCALE_MAX): 390 power_manager_scale_core_max(core_num); 391 break; 392 case(CPU_POWER_SCALE_DOWN): 393 power_manager_scale_core_down(core_num); 394 break; 395 case(CPU_POWER_SCALE_UP): 396 power_manager_scale_core_up(core_num); 397 break; 398 case(CPU_POWER_ENABLE_TURBO): 399 power_manager_enable_turbo_core(core_num); 400 break; 401 case(CPU_POWER_DISABLE_TURBO): 402 power_manager_disable_turbo_core(core_num); 403 break; 404 default: 405 break; 406 } 407 } else { 408 switch (pkt->unit) { 409 case(CPU_POWER_SCALE_MIN): 410 power_manager_scale_mask_min(core_mask); 411 break; 412 case(CPU_POWER_SCALE_MAX): 413 power_manager_scale_mask_max(core_mask); 414 break; 415 case(CPU_POWER_SCALE_DOWN): 416 power_manager_scale_mask_down(core_mask); 417 break; 418 case(CPU_POWER_SCALE_UP): 419 power_manager_scale_mask_up(core_mask); 420 break; 421 case(CPU_POWER_ENABLE_TURBO): 422 power_manager_enable_turbo_mask(core_mask); 423 break; 424 case(CPU_POWER_DISABLE_TURBO): 425 power_manager_disable_turbo_mask(core_mask); 426 break; 427 default: 428 break; 429 } 430 431 } 432 } 433 434 if (pkt->command == PKT_POLICY) { 435 RTE_LOG(INFO, CHANNEL_MONITOR, "\nProcessing Policy request from Guest\n"); 436 update_policy(pkt); 437 policy_is_set = 1; 438 } 439 440 /* Return is not checked as channel status may have been set to DISABLED 441 * from management thread 442 */ 443 rte_atomic32_cmpset(&(chan_info->status), CHANNEL_MGR_CHANNEL_PROCESSING, 444 CHANNEL_MGR_CHANNEL_CONNECTED); 445 return 0; 446 447 } 448 449 int 450 add_channel_to_monitor(struct channel_info **chan_info) 451 { 452 struct channel_info *info = *chan_info; 453 struct epoll_event event; 454 455 event.events = EPOLLIN; 456 event.data.ptr = info; 457 if (epoll_ctl(global_event_fd, EPOLL_CTL_ADD, info->fd, &event) < 0) { 458 RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to add channel '%s' " 459 "to epoll\n", info->channel_path); 460 return -1; 461 } 462 return 0; 463 } 464 465 int 466 remove_channel_from_monitor(struct channel_info *chan_info) 467 { 468 if (epoll_ctl(global_event_fd, EPOLL_CTL_DEL, chan_info->fd, NULL) < 0) { 469 RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to remove channel '%s' " 470 "from epoll\n", chan_info->channel_path); 471 return -1; 472 } 473 return 0; 474 } 475 476 int 477 channel_monitor_init(void) 478 { 479 global_event_fd = epoll_create1(0); 480 if (global_event_fd == 0) { 481 RTE_LOG(ERR, CHANNEL_MONITOR, "Error creating epoll context with " 482 "error %s\n", strerror(errno)); 483 return -1; 484 } 485 global_events_list = rte_malloc("epoll_events", sizeof(*global_events_list) 486 * MAX_EVENTS, RTE_CACHE_LINE_SIZE); 487 if (global_events_list == NULL) { 488 RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to rte_malloc for " 489 "epoll events\n"); 490 return -1; 491 } 492 return 0; 493 } 494 495 void 496 run_channel_monitor(void) 497 { 498 while (run_loop) { 499 int n_events, i; 500 501 n_events = epoll_wait(global_event_fd, global_events_list, 502 MAX_EVENTS, 1); 503 if (!run_loop) 504 break; 505 for (i = 0; i < n_events; i++) { 506 struct channel_info *chan_info = (struct channel_info *) 507 global_events_list[i].data.ptr; 508 if ((global_events_list[i].events & EPOLLERR) || 509 (global_events_list[i].events & EPOLLHUP)) { 510 RTE_LOG(DEBUG, CHANNEL_MONITOR, "Remote closed connection for " 511 "channel '%s'\n", 512 chan_info->channel_path); 513 remove_channel(&chan_info); 514 continue; 515 } 516 if (global_events_list[i].events & EPOLLIN) { 517 518 int n_bytes, err = 0; 519 struct channel_packet pkt; 520 void *buffer = &pkt; 521 int buffer_len = sizeof(pkt); 522 523 while (buffer_len > 0) { 524 n_bytes = read(chan_info->fd, 525 buffer, buffer_len); 526 if (n_bytes == buffer_len) 527 break; 528 if (n_bytes == -1) { 529 err = errno; 530 RTE_LOG(DEBUG, CHANNEL_MONITOR, 531 "Received error on " 532 "channel '%s' read: %s\n", 533 chan_info->channel_path, 534 strerror(err)); 535 remove_channel(&chan_info); 536 break; 537 } 538 buffer = (char *)buffer + n_bytes; 539 buffer_len -= n_bytes; 540 } 541 if (!err) 542 process_request(&pkt, chan_info); 543 } 544 } 545 rte_delay_us(time_period_ms*1000); 546 if (policy_is_set) { 547 int j; 548 549 for (j = 0; j < MAX_VMS; j++) { 550 if (policies[j].enabled == 1) 551 apply_policy(&policies[j]); 552 } 553 } 554 } 555 } 556