1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2018 Intel Corporation 3 */ 4 5 #include <unistd.h> 6 #include <fcntl.h> 7 #include <rte_log.h> 8 9 #include "oob_monitor.h" 10 #include "power_manager.h" 11 #include "channel_manager.h" 12 13 static volatile unsigned run_loop = 1; 14 static uint64_t g_branches, g_branch_misses; 15 static int g_active; 16 17 void branch_monitor_exit(void) 18 { 19 run_loop = 0; 20 } 21 22 /* Number of microseconds between each poll */ 23 #define INTERVAL 100 24 #define PRINT_LOOP_COUNT (1000000/INTERVAL) 25 #define IA32_PERFEVTSEL0 0x186 26 #define IA32_PERFEVTSEL1 0x187 27 #define IA32_PERFCTR0 0xc1 28 #define IA32_PERFCTR1 0xc2 29 #define IA32_PERFEVT_BRANCH_HITS 0x05300c4 30 #define IA32_PERFEVT_BRANCH_MISS 0x05300c5 31 32 static float 33 apply_policy(int core) 34 { 35 struct core_info *ci; 36 uint64_t counter = 0; 37 uint64_t branches, branch_misses; 38 uint64_t last_branches, last_branch_misses; 39 int64_t hits_diff, miss_diff; 40 float ratio; 41 int ret; 42 int freq_window_idx, up_count = 0, i; 43 44 g_active = 0; 45 ci = get_core_info(); 46 47 last_branches = ci->cd[core].last_branches; 48 last_branch_misses = ci->cd[core].last_branch_misses; 49 50 ret = pread(ci->cd[core].msr_fd, &counter, 51 sizeof(counter), IA32_PERFCTR0); 52 if (ret < 0) 53 RTE_LOG(ERR, POWER_MANAGER, 54 "unable to read counter for core %u\n", 55 core); 56 branches = counter; 57 58 counter = 0; 59 ret = pread(ci->cd[core].msr_fd, &counter, 60 sizeof(counter), IA32_PERFCTR1); 61 if (ret < 0) 62 RTE_LOG(ERR, POWER_MANAGER, 63 "unable to read counter for core %u\n", 64 core); 65 branch_misses = counter; 66 67 68 ci->cd[core].last_branches = branches; 69 ci->cd[core].last_branch_misses = branch_misses; 70 71 /* 72 * Intentional right shift to make MSB 0 to avoid 73 * possible signed overflow or truncation. 74 */ 75 branches >>= 1; 76 last_branches >>= 1; 77 hits_diff = (int64_t)branches - (int64_t)last_branches; 78 if (hits_diff <= 0) { 79 /* Likely a counter overflow condition, skip this round */ 80 return -1.0; 81 } 82 83 /* 84 * Intentional right shift to make MSB 0 to avoid 85 * possible signed overflow or truncation. 86 */ 87 branch_misses >>= 1; 88 last_branch_misses >>= 1; 89 miss_diff = (int64_t)branch_misses - (int64_t)last_branch_misses; 90 if (miss_diff <= 0) { 91 /* Likely a counter overflow condition, skip this round */ 92 return -1.0; 93 } 94 95 g_branches = hits_diff; 96 g_branch_misses = miss_diff; 97 98 if (hits_diff < (INTERVAL*100)) { 99 /* Likely no workload running on this core. Skip. */ 100 return -1.0; 101 } 102 103 ratio = (float)miss_diff * (float)100 / (float)hits_diff; 104 105 /* 106 * Store the last few directions that the ratio indicates 107 * we should take. If there's on 'up', then we scale up 108 * quickly. If all indicate 'down', only then do we scale 109 * down. Each core_details struct has it's own array. 110 */ 111 freq_window_idx = ci->cd[core].freq_window_idx; 112 if (ratio > ci->branch_ratio_threshold) 113 ci->cd[core].freq_directions[freq_window_idx] = 1; 114 else 115 ci->cd[core].freq_directions[freq_window_idx] = 0; 116 117 freq_window_idx++; 118 freq_window_idx = freq_window_idx & (FREQ_WINDOW_SIZE-1); 119 ci->cd[core].freq_window_idx = freq_window_idx; 120 121 up_count = 0; 122 for (i = 0; i < FREQ_WINDOW_SIZE; i++) 123 up_count += ci->cd[core].freq_directions[i]; 124 125 if (up_count == 0) { 126 if (ci->cd[core].freq_state != FREQ_MIN) { 127 power_manager_scale_core_min(core); 128 ci->cd[core].freq_state = FREQ_MIN; 129 } 130 } else { 131 if (ci->cd[core].freq_state != FREQ_MAX) { 132 power_manager_scale_core_max(core); 133 ci->cd[core].freq_state = FREQ_MAX; 134 } 135 } 136 137 g_active = 1; 138 return ratio; 139 } 140 141 int 142 add_core_to_monitor(int core) 143 { 144 struct core_info *ci; 145 char proc_file[UNIX_PATH_MAX]; 146 int ret; 147 148 ci = get_core_info(); 149 150 if (core < ci->core_count) { 151 long setup; 152 153 snprintf(proc_file, UNIX_PATH_MAX, "/dev/cpu/%d/msr", core); 154 ci->cd[core].msr_fd = open(proc_file, O_RDWR | O_SYNC); 155 if (ci->cd[core].msr_fd < 0) { 156 RTE_LOG(ERR, POWER_MANAGER, 157 "Error opening MSR file for core %d " 158 "(is msr kernel module loaded?)\n", 159 core); 160 return -1; 161 } 162 /* 163 * Set up branch counters 164 */ 165 setup = IA32_PERFEVT_BRANCH_HITS; 166 ret = pwrite(ci->cd[core].msr_fd, &setup, 167 sizeof(setup), IA32_PERFEVTSEL0); 168 if (ret < 0) { 169 RTE_LOG(ERR, POWER_MANAGER, 170 "unable to set counter for core %u\n", 171 core); 172 return ret; 173 } 174 setup = IA32_PERFEVT_BRANCH_MISS; 175 ret = pwrite(ci->cd[core].msr_fd, &setup, 176 sizeof(setup), IA32_PERFEVTSEL1); 177 if (ret < 0) { 178 RTE_LOG(ERR, POWER_MANAGER, 179 "unable to set counter for core %u\n", 180 core); 181 return ret; 182 } 183 /* 184 * Close the file and re-open as read only so 185 * as not to hog the resource 186 */ 187 close(ci->cd[core].msr_fd); 188 ci->cd[core].msr_fd = open(proc_file, O_RDONLY); 189 if (ci->cd[core].msr_fd < 0) { 190 RTE_LOG(ERR, POWER_MANAGER, 191 "Error opening MSR file for core %d " 192 "(is msr kernel module loaded?)\n", 193 core); 194 return -1; 195 } 196 ci->cd[core].oob_enabled = 1; 197 } 198 return 0; 199 } 200 201 int 202 remove_core_from_monitor(int core) 203 { 204 struct core_info *ci; 205 char proc_file[UNIX_PATH_MAX]; 206 int ret; 207 208 ci = get_core_info(); 209 210 if (ci->cd[core].oob_enabled) { 211 long setup; 212 213 /* 214 * close the msr file, then reopen rw so we can 215 * disable the counters 216 */ 217 if (ci->cd[core].msr_fd != 0) 218 close(ci->cd[core].msr_fd); 219 snprintf(proc_file, UNIX_PATH_MAX, "/dev/cpu/%d/msr", core); 220 ci->cd[core].msr_fd = open(proc_file, O_RDWR | O_SYNC); 221 if (ci->cd[core].msr_fd < 0) { 222 RTE_LOG(ERR, POWER_MANAGER, 223 "Error opening MSR file for core %d " 224 "(is msr kernel module loaded?)\n", 225 core); 226 return -1; 227 } 228 setup = 0x0; /* clear event */ 229 ret = pwrite(ci->cd[core].msr_fd, &setup, 230 sizeof(setup), IA32_PERFEVTSEL0); 231 if (ret < 0) { 232 RTE_LOG(ERR, POWER_MANAGER, 233 "unable to set counter for core %u\n", 234 core); 235 return ret; 236 } 237 setup = 0x0; /* clear event */ 238 ret = pwrite(ci->cd[core].msr_fd, &setup, 239 sizeof(setup), IA32_PERFEVTSEL1); 240 if (ret < 0) { 241 RTE_LOG(ERR, POWER_MANAGER, 242 "unable to set counter for core %u\n", 243 core); 244 return ret; 245 } 246 247 close(ci->cd[core].msr_fd); 248 ci->cd[core].msr_fd = 0; 249 ci->cd[core].oob_enabled = 0; 250 } 251 return 0; 252 } 253 254 int 255 branch_monitor_init(void) 256 { 257 return 0; 258 } 259 260 void 261 run_branch_monitor(void) 262 { 263 struct core_info *ci; 264 int print = 0; 265 float ratio; 266 int printed; 267 int reads = 0; 268 269 ci = get_core_info(); 270 271 while (run_loop) { 272 273 if (!run_loop) 274 break; 275 usleep(INTERVAL); 276 int j; 277 print++; 278 printed = 0; 279 for (j = 0; j < ci->core_count; j++) { 280 if (ci->cd[j].oob_enabled) { 281 ratio = apply_policy(j); 282 if ((print > PRINT_LOOP_COUNT) && (g_active)) { 283 printf(" %d: %.4f {%lu} {%d}", j, 284 ratio, g_branches, 285 reads); 286 printed = 1; 287 reads = 0; 288 } else { 289 reads++; 290 } 291 } 292 } 293 if (print > PRINT_LOOP_COUNT) { 294 if (printed) 295 printf("\n"); 296 print = 0; 297 } 298 } 299 } 300