1#! /usr/bin/env python3 2# SPDX-License-Identifier: BSD-3-Clause 3# Copyright (c) 2020 Microsoft Corporation 4 5"""Script to query and setup huge pages for DPDK applications.""" 6 7import argparse 8import os 9import re 10import subprocess 11import sys 12import typing as T 13from math import log2 14 15# Standard binary prefix 16BINARY_PREFIX = "KMG" 17 18# systemd mount point for huge pages 19HUGE_MOUNT = "/dev/hugepages" 20# default directory for non-NUMA huge pages 21NO_NUMA_HUGE_DIR = "/sys/kernel/mm/hugepages" 22# default base directory for NUMA nodes 23NUMA_NODE_BASE_DIR = "/sys/devices/system/node" 24# procfs paths 25MEMINFO_PATH = "/proc/meminfo" 26MOUNTS_PATH = "/proc/mounts" 27 28 29class HugepageMount: 30 """Mount operations for huge page filesystem.""" 31 32 def __init__(self, path: str, mounted: bool): 33 self.path = path 34 # current mount status 35 self.mounted = mounted 36 37 def mount( 38 self, pagesize_kb: int, user: T.Optional[str], group: T.Optional[str] 39 ) -> None: 40 """Mount the huge TLB file system""" 41 if self.mounted: 42 return 43 cmd = ["mount", "-t", "hugetlbfs"] 44 cmd += ["-o", f"pagesize={pagesize_kb * 1024}"] 45 if user is not None: 46 cmd += ["-o", f"uid={user}"] 47 if group is not None: 48 cmd += ["-o", f"gid={group}"] 49 cmd += ["nodev", self.path] 50 51 subprocess.run(cmd, check=True) 52 self.mounted = True 53 54 def unmount(self) -> None: 55 """Unmount the huge TLB file system (if mounted)""" 56 if self.mounted: 57 subprocess.run(["umount", self.path], check=True) 58 self.mounted = False 59 60 61class HugepageRes: 62 """Huge page reserve operations. Can be NUMA-node-specific.""" 63 64 def __init__(self, path: str, node: T.Optional[int] = None): 65 self.path = path 66 # if this is a per-NUMA node huge page dir, store the node number 67 self.node = node 68 self.valid_page_sizes = self._get_valid_page_sizes() 69 70 def _get_valid_page_sizes(self) -> T.List[int]: 71 """Extract valid huge page sizes""" 72 return [get_memsize(d.split("-")[1]) for d in os.listdir(self.path)] 73 74 def _nr_pages_path(self, sz: int) -> str: 75 if sz not in self.valid_page_sizes: 76 raise ValueError( 77 f"Invalid page size {sz}. " f"Valid sizes: {self.valid_page_sizes}" 78 ) 79 return os.path.join(self.path, f"hugepages-{sz}kB", "nr_hugepages") 80 81 def __getitem__(self, sz: int) -> int: 82 """Get current number of reserved pages of specified size""" 83 with open(self._nr_pages_path(sz), encoding="utf-8") as f: 84 return int(f.read()) 85 86 def __setitem__(self, sz: int, nr_pages: int) -> None: 87 """Set number of reserved pages of specified size""" 88 with open(self._nr_pages_path(sz), "w", encoding="utf-8") as f: 89 f.write(f"{nr_pages}\n") 90 91 92def fmt_memsize(kb: int) -> str: 93 """Format memory size in kB into conventional format""" 94 logk = int(log2(kb) / 10) 95 suffix = BINARY_PREFIX[logk] 96 unit = 2 ** (logk * 10) 97 return f"{int(kb / unit)}{suffix}b" 98 99 100def get_memsize(arg: str) -> int: 101 """Convert memory size with suffix to kB""" 102 # arg may have a 'b' at the end 103 if arg[-1].lower() == "b": 104 arg = arg[:-1] 105 match = re.match(rf"(\d+)([{BINARY_PREFIX}]?)$", arg.upper()) 106 if match is None: 107 raise ValueError(f"{arg} is not a valid size") 108 num = float(match.group(1)) 109 suffix = match.group(2) 110 if not suffix: 111 return int(num / 1024) 112 idx = BINARY_PREFIX.find(suffix) 113 return int(num * (2 ** (idx * 10))) 114 115 116def is_numa() -> bool: 117 """Check if NUMA is supported""" 118 return os.path.exists(NUMA_NODE_BASE_DIR) 119 120 121def default_pagesize() -> int: 122 """Get default huge page size from /proc/meminfo""" 123 key = "Hugepagesize" 124 with open(MEMINFO_PATH, encoding="utf-8") as meminfo: 125 for line in meminfo: 126 if line.startswith(f"{key}:"): 127 return int(line.split()[1]) 128 raise KeyError(f'"{key}" not found in {MEMINFO_PATH}') 129 130 131def get_hugetlbfs_mountpoints() -> T.List[str]: 132 """Get list of where huge page filesystem is mounted""" 133 mounted: T.List[str] = [] 134 with open(MOUNTS_PATH, encoding="utf-8") as mounts: 135 for line in mounts: 136 fields = line.split() 137 if fields[2] != "hugetlbfs": 138 continue 139 mounted.append(fields[1]) 140 return mounted 141 142 143def print_row(cells: T.Tuple[str, ...], widths: T.List[int]) -> None: 144 """Print a row of a table with the given column widths""" 145 first, *rest = cells 146 w_first, *w_rest = widths 147 first_end = " " * 2 148 rest_end = " " * 2 149 150 print(first.ljust(w_first), end=first_end) 151 for cell, width in zip(rest, w_rest): 152 print(cell.rjust(width), end=rest_end) 153 print() 154 155 156def print_hp_status(hp_res: T.List[HugepageRes]) -> None: 157 """Display status of huge page reservations""" 158 numa = is_numa() 159 160 # print out huge page information in a table 161 rows: T.List[T.Tuple[str, ...]] 162 headers: T.Tuple[str, ...] 163 if numa: 164 headers = "Node", "Pages", "Size", "Total" 165 rows = [ 166 ( 167 str(hp.node), 168 str(nr_pages), 169 fmt_memsize(sz), 170 fmt_memsize(sz * nr_pages), 171 ) 172 # iterate over each huge page sysfs node... 173 for hp in hp_res 174 # ...and each page size within that node... 175 for sz in hp.valid_page_sizes 176 # ...we need number of pages multiple times, so we read it here... 177 for nr_pages in [hp[sz]] 178 # ...include this row only if there are pages reserved 179 if nr_pages 180 ] 181 else: 182 headers = "Pages", "Size", "Total" 183 # if NUMA is disabled, we know there's only one huge page dir 184 hp = hp_res[0] 185 rows = [ 186 (str(nr_pages), fmt_memsize(sz), fmt_memsize(sz * nr_pages)) 187 # iterate over each page size within the huge page dir 188 for sz in hp.valid_page_sizes 189 # read number of pages for this size 190 for nr_pages in [hp[sz]] 191 # skip if no pages 192 if nr_pages 193 ] 194 if not rows: 195 print("No huge pages reserved") 196 return 197 198 # find max widths for each column, including header and rows 199 col_widths = [ 200 max(len(tup[col_idx]) for tup in rows + [headers]) 201 for col_idx in range(len(headers)) 202 ] 203 204 # print everything 205 print_row(headers, col_widths) 206 for r in rows: 207 print_row(r, col_widths) 208 209 210def print_mount_status() -> None: 211 """Display status of huge page filesystem mounts""" 212 mounted = get_hugetlbfs_mountpoints() 213 if not mounted: 214 print("No huge page filesystems mounted") 215 return 216 print("Huge page filesystems mounted at:", *mounted, sep=" ") 217 218 219def scan_huge_dirs(node: T.Optional[int]) -> T.List[HugepageRes]: 220 """Return a HugepageRes object for each huge page directory""" 221 # if NUMA is enabled, scan per-NUMA node huge pages 222 if is_numa(): 223 # helper function to extract node number from directory name 224 def _get_node(path: str) -> T.Optional[int]: 225 m = re.match(r"node(\d+)", os.path.basename(path)) 226 return int(m.group(1)) if m else None 227 228 # we want a sorted list of NUMA nodes 229 nodes = sorted( 230 n 231 # iterate over all directories in the base directory 232 for d in os.listdir(NUMA_NODE_BASE_DIR) 233 # extract the node number from the directory name 234 for n in [_get_node(d)] 235 # filter out None values (non-NUMA node directories) 236 if n is not None 237 ) 238 return [ 239 HugepageRes(os.path.join(NUMA_NODE_BASE_DIR, f"node{n}", "hugepages"), n) 240 for n in nodes 241 # if user requested a specific node, only include that one 242 if node is None or n == node 243 ] 244 # otherwise, use non-NUMA huge page directory 245 if node is not None: 246 raise ValueError("NUMA node requested but not supported") 247 return [HugepageRes(NO_NUMA_HUGE_DIR)] 248 249 250def try_reserve_huge_pages( 251 hp_res: T.List[HugepageRes], mem_sz: str, pagesize_kb: int 252) -> None: 253 """Reserve huge pages if possible""" 254 reserve_kb = get_memsize(mem_sz) 255 256 # is this a valid request? 257 if reserve_kb % pagesize_kb != 0: 258 fmt_res = fmt_memsize(reserve_kb) 259 fmt_sz = fmt_memsize(pagesize_kb) 260 raise ValueError( 261 f"Huge reservation {fmt_res} is " f"not a multiple of page size {fmt_sz}" 262 ) 263 264 # request is valid, reserve pages 265 for hp in hp_res: 266 req = reserve_kb // pagesize_kb 267 hp[pagesize_kb] = req 268 got = hp[pagesize_kb] 269 # did we fulfill our request? 270 if got != req: 271 raise OSError( 272 f"Failed to reserve {req} pages of size " 273 f"{fmt_memsize(pagesize_kb)}, " 274 f"got {got} pages instead" 275 ) 276 277 278def main(): 279 """Process the command line arguments and setup huge pages""" 280 parser = argparse.ArgumentParser( 281 formatter_class=argparse.RawDescriptionHelpFormatter, 282 description="Setup huge pages", 283 epilog=""" 284Examples: 285 286To display current huge page settings: 287 %(prog)s -s 288 289To a complete setup of with 2 Gigabyte of 1G huge pages: 290 %(prog)s -p 1G --setup 2G 291""", 292 ) 293 parser.add_argument( 294 "--show", 295 "-s", 296 action="store_true", 297 help="Print current huge page configuration", 298 ) 299 parser.add_argument( 300 "--clear", "-c", action="store_true", help="Clear existing huge pages" 301 ) 302 parser.add_argument( 303 "--mount", 304 "-m", 305 action="store_true", 306 help="Mount the huge page filesystem", 307 ) 308 parser.add_argument( 309 "--unmount", 310 "-u", 311 action="store_true", 312 help="Unmount the system huge page directory", 313 ) 314 parser.add_argument( 315 "--directory", 316 "-d", 317 metavar="DIR", 318 default=HUGE_MOUNT, 319 help="Mount point for huge pages", 320 ) 321 parser.add_argument( 322 "--user", 323 "-U", 324 metavar="UID", 325 help="Set the mounted directory owner user", 326 ) 327 parser.add_argument( 328 "--group", 329 "-G", 330 metavar="GID", 331 help="Set the mounted directory owner group", 332 ) 333 parser.add_argument( 334 "--node", "-n", type=int, help="Select numa node to reserve pages on" 335 ) 336 parser.add_argument( 337 "--pagesize", "-p", metavar="SIZE", help="Choose huge page size to use" 338 ) 339 parser.add_argument( 340 "--reserve", 341 "-r", 342 metavar="SIZE", 343 help="Reserve huge pages. Size is in bytes with K, M, or G suffix", 344 ) 345 parser.add_argument( 346 "--setup", 347 metavar="SIZE", 348 help="Setup huge pages by doing clear, unmount, reserve and mount", 349 ) 350 args = parser.parse_args() 351 352 # setup is clear, then unmount, then reserve, then mount 353 if args.setup: 354 args.clear = True 355 args.unmount = True 356 args.reserve = args.setup 357 args.mount = True 358 359 if not (args.show or args.mount or args.unmount or args.clear or args.reserve): 360 parser.error("no action specified") 361 362 # read huge page data from sysfs 363 hp_res = scan_huge_dirs(args.node) 364 365 # read huge page mountpoint data 366 hp_mountpoint = args.directory 367 hp_mounted = hp_mountpoint in get_hugetlbfs_mountpoints() 368 hp_mount = HugepageMount(hp_mountpoint, hp_mounted) 369 370 # get requested page size we will be working with 371 if args.pagesize: 372 pagesize_kb = get_memsize(args.pagesize) 373 else: 374 pagesize_kb = default_pagesize() 375 376 # were we asked to clear? 377 if args.clear: 378 for hp in hp_res: 379 for sz in hp.valid_page_sizes: 380 hp[sz] = 0 381 382 # were we asked to unmount? 383 if args.unmount: 384 hp_mount.unmount() 385 386 # were we asked to reserve pages? 387 if args.reserve: 388 try_reserve_huge_pages(hp_res, args.reserve, pagesize_kb) 389 390 # were we asked to mount? 391 if args.mount: 392 hp_mount.mount(pagesize_kb, args.user, args.group) 393 394 # were we asked to display status? 395 if args.show: 396 print_hp_status(hp_res) 397 print() 398 print_mount_status() 399 400 401if __name__ == "__main__": 402 try: 403 main() 404 except PermissionError: 405 sys.exit("Permission denied: need to be root!") 406 except subprocess.CalledProcessError as e: 407 sys.exit(f"Command failed: {e}") 408 except (KeyError, ValueError, OSError) as e: 409 sys.exit(f"Error: {e}") 410