xref: /dpdk/examples/vm_power_manager/oob_monitor_x86.c (revision f43d3dbbd90c9e195d26d18ac7da9ca2854c3f1e)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2018 Intel Corporation
3  */
4 
5 #include <unistd.h>
6 #include <fcntl.h>
7 #include <rte_log.h>
8 
9 #include "oob_monitor.h"
10 #include "power_manager.h"
11 #include "channel_manager.h"
12 
13 static volatile unsigned run_loop = 1;
14 static uint64_t g_branches, g_branch_misses;
15 static int g_active;
16 
17 void branch_monitor_exit(void)
18 {
19 	run_loop = 0;
20 }
21 
22 /* Number of microseconds between each poll */
23 #define INTERVAL 100
24 #define PRINT_LOOP_COUNT (1000000/INTERVAL)
25 #define IA32_PERFEVTSEL0 0x186
26 #define IA32_PERFEVTSEL1 0x187
27 #define IA32_PERFCTR0 0xc1
28 #define IA32_PERFCTR1 0xc2
29 #define IA32_PERFEVT_BRANCH_HITS 0x05300c4
30 #define IA32_PERFEVT_BRANCH_MISS 0x05300c5
31 
32 static float
33 apply_policy(int core)
34 {
35 	struct core_info *ci;
36 	uint64_t counter = 0;
37 	uint64_t branches, branch_misses;
38 	uint64_t last_branches, last_branch_misses;
39 	int64_t hits_diff, miss_diff;
40 	float ratio;
41 	int ret;
42 	int freq_window_idx, up_count = 0, i;
43 
44 	g_active = 0;
45 	ci = get_core_info();
46 
47 	last_branches = ci->cd[core].last_branches;
48 	last_branch_misses = ci->cd[core].last_branch_misses;
49 
50 	ret = pread(ci->cd[core].msr_fd, &counter,
51 			sizeof(counter), IA32_PERFCTR0);
52 	if (ret < 0)
53 		RTE_LOG(ERR, POWER_MANAGER,
54 				"unable to read counter for core %u\n",
55 				core);
56 	branches = counter;
57 
58 	counter = 0;
59 	ret = pread(ci->cd[core].msr_fd, &counter,
60 			sizeof(counter), IA32_PERFCTR1);
61 	if (ret < 0)
62 		RTE_LOG(ERR, POWER_MANAGER,
63 				"unable to read counter for core %u\n",
64 				core);
65 	branch_misses = counter;
66 
67 
68 	ci->cd[core].last_branches = branches;
69 	ci->cd[core].last_branch_misses = branch_misses;
70 
71 	/*
72 	 * Intentional right shift to make MSB 0 to avoid
73 	 * possible signed overflow or truncation.
74 	 */
75 	branches >>= 1;
76 	last_branches >>= 1;
77 	hits_diff = (int64_t)branches - (int64_t)last_branches;
78 	if (hits_diff <= 0) {
79 		/* Likely a counter overflow condition, skip this round */
80 		return -1.0;
81 	}
82 
83 	/*
84 	 * Intentional right shift to make MSB 0 to avoid
85 	 * possible signed overflow or truncation.
86 	 */
87 	branch_misses >>= 1;
88 	last_branch_misses >>= 1;
89 	miss_diff = (int64_t)branch_misses - (int64_t)last_branch_misses;
90 	if (miss_diff <= 0) {
91 		/* Likely a counter overflow condition, skip this round */
92 		return -1.0;
93 	}
94 
95 	g_branches = hits_diff;
96 	g_branch_misses = miss_diff;
97 
98 	if (hits_diff < (INTERVAL*100)) {
99 		/* Likely no workload running on this core. Skip. */
100 		return -1.0;
101 	}
102 
103 	ratio = (float)miss_diff * (float)100 / (float)hits_diff;
104 
105 	/*
106 	 * Store the last few directions that the ratio indicates
107 	 * we should take. If there's on 'up', then we scale up
108 	 * quickly. If all indicate 'down', only then do we scale
109 	 * down. Each core_details struct has it's own array.
110 	 */
111 	freq_window_idx = ci->cd[core].freq_window_idx;
112 	if (ratio > ci->branch_ratio_threshold)
113 		ci->cd[core].freq_directions[freq_window_idx] = 1;
114 	else
115 		ci->cd[core].freq_directions[freq_window_idx] = 0;
116 
117 	freq_window_idx++;
118 	freq_window_idx = freq_window_idx & (FREQ_WINDOW_SIZE-1);
119 	ci->cd[core].freq_window_idx = freq_window_idx;
120 
121 	up_count = 0;
122 	for (i = 0; i < FREQ_WINDOW_SIZE; i++)
123 		up_count +=  ci->cd[core].freq_directions[i];
124 
125 	if (up_count == 0) {
126 		if (ci->cd[core].freq_state != FREQ_MIN) {
127 			power_manager_scale_core_min(core);
128 			ci->cd[core].freq_state = FREQ_MIN;
129 		}
130 	} else {
131 		if (ci->cd[core].freq_state != FREQ_MAX) {
132 			power_manager_scale_core_max(core);
133 			ci->cd[core].freq_state = FREQ_MAX;
134 		}
135 	}
136 
137 	g_active = 1;
138 	return ratio;
139 }
140 
141 int
142 add_core_to_monitor(int core)
143 {
144 	struct core_info *ci;
145 	char proc_file[UNIX_PATH_MAX];
146 	int ret;
147 
148 	ci = get_core_info();
149 
150 	if (core < ci->core_count) {
151 		long setup;
152 
153 		snprintf(proc_file, UNIX_PATH_MAX, "/dev/cpu/%d/msr", core);
154 		ci->cd[core].msr_fd = open(proc_file, O_RDWR | O_SYNC);
155 		if (ci->cd[core].msr_fd < 0) {
156 			RTE_LOG(ERR, POWER_MANAGER,
157 					"Error opening MSR file for core %d "
158 					"(is msr kernel module loaded?)\n",
159 					core);
160 			return -1;
161 		}
162 		/*
163 		 * Set up branch counters
164 		 */
165 		setup = IA32_PERFEVT_BRANCH_HITS;
166 		ret = pwrite(ci->cd[core].msr_fd, &setup,
167 				sizeof(setup), IA32_PERFEVTSEL0);
168 		if (ret < 0) {
169 			RTE_LOG(ERR, POWER_MANAGER,
170 					"unable to set counter for core %u\n",
171 					core);
172 			return ret;
173 		}
174 		setup = IA32_PERFEVT_BRANCH_MISS;
175 		ret = pwrite(ci->cd[core].msr_fd, &setup,
176 				sizeof(setup), IA32_PERFEVTSEL1);
177 		if (ret < 0) {
178 			RTE_LOG(ERR, POWER_MANAGER,
179 					"unable to set counter for core %u\n",
180 					core);
181 			return ret;
182 		}
183 		/*
184 		 * Close the file and re-open as read only so
185 		 * as not to hog the resource
186 		 */
187 		close(ci->cd[core].msr_fd);
188 		ci->cd[core].msr_fd = open(proc_file, O_RDONLY);
189 		if (ci->cd[core].msr_fd < 0) {
190 			RTE_LOG(ERR, POWER_MANAGER,
191 					"Error opening MSR file for core %d "
192 					"(is msr kernel module loaded?)\n",
193 					core);
194 			return -1;
195 		}
196 		ci->cd[core].oob_enabled = 1;
197 	}
198 	return 0;
199 }
200 
201 int
202 remove_core_from_monitor(int core)
203 {
204 	struct core_info *ci;
205 	char proc_file[UNIX_PATH_MAX];
206 	int ret;
207 
208 	ci = get_core_info();
209 
210 	if (ci->cd[core].oob_enabled) {
211 		long setup;
212 
213 		/*
214 		 * close the msr file, then reopen rw so we can
215 		 * disable the counters
216 		 */
217 		if (ci->cd[core].msr_fd != 0)
218 			close(ci->cd[core].msr_fd);
219 		snprintf(proc_file, UNIX_PATH_MAX, "/dev/cpu/%d/msr", core);
220 		ci->cd[core].msr_fd = open(proc_file, O_RDWR | O_SYNC);
221 		if (ci->cd[core].msr_fd < 0) {
222 			RTE_LOG(ERR, POWER_MANAGER,
223 					"Error opening MSR file for core %d "
224 					"(is msr kernel module loaded?)\n",
225 					core);
226 			return -1;
227 		}
228 		setup = 0x0; /* clear event */
229 		ret = pwrite(ci->cd[core].msr_fd, &setup,
230 				sizeof(setup), IA32_PERFEVTSEL0);
231 		if (ret < 0) {
232 			RTE_LOG(ERR, POWER_MANAGER,
233 					"unable to set counter for core %u\n",
234 					core);
235 			return ret;
236 		}
237 		setup = 0x0; /* clear event */
238 		ret = pwrite(ci->cd[core].msr_fd, &setup,
239 				sizeof(setup), IA32_PERFEVTSEL1);
240 		if (ret < 0) {
241 			RTE_LOG(ERR, POWER_MANAGER,
242 					"unable to set counter for core %u\n",
243 					core);
244 			return ret;
245 		}
246 
247 		close(ci->cd[core].msr_fd);
248 		ci->cd[core].msr_fd = 0;
249 		ci->cd[core].oob_enabled = 0;
250 	}
251 	return 0;
252 }
253 
254 int
255 branch_monitor_init(void)
256 {
257 	return 0;
258 }
259 
260 void
261 run_branch_monitor(void)
262 {
263 	struct core_info *ci;
264 	int print = 0;
265 	float ratio;
266 	int printed;
267 	int reads = 0;
268 
269 	ci = get_core_info();
270 
271 	while (run_loop) {
272 
273 		if (!run_loop)
274 			break;
275 		usleep(INTERVAL);
276 		int j;
277 		print++;
278 		printed = 0;
279 		for (j = 0; j < ci->core_count; j++) {
280 			if (ci->cd[j].oob_enabled) {
281 				ratio = apply_policy(j);
282 				if ((print > PRINT_LOOP_COUNT) && (g_active)) {
283 					printf("  %d: %.4f {%lu} {%d}", j,
284 							ratio, g_branches,
285 							reads);
286 					printed = 1;
287 					reads = 0;
288 				} else {
289 					reads++;
290 				}
291 			}
292 		}
293 		if (print > PRINT_LOOP_COUNT) {
294 			if (printed)
295 				printf("\n");
296 			print = 0;
297 		}
298 	}
299 }
300