xref: /dpdk/usertools/dpdk-hugepages.py (revision f63ca249b8a13171000d348c31d14bda1b3ac0f0)
1#! /usr/bin/env python3
2# SPDX-License-Identifier: BSD-3-Clause
3# Copyright (c) 2020 Microsoft Corporation
4
5"""Script to query and setup huge pages for DPDK applications."""
6
7import argparse
8import os
9import re
10import subprocess
11import sys
12import typing as T
13from math import log2
14
15# Standard binary prefix
16BINARY_PREFIX = "KMG"
17
18# systemd mount point for huge pages
19HUGE_MOUNT = "/dev/hugepages"
20# default directory for non-NUMA huge pages
21NO_NUMA_HUGE_DIR = "/sys/kernel/mm/hugepages"
22# default base directory for NUMA nodes
23NUMA_NODE_BASE_DIR = "/sys/devices/system/node"
24# procfs paths
25MEMINFO_PATH = "/proc/meminfo"
26MOUNTS_PATH = "/proc/mounts"
27
28
29class HugepageMount:
30    """Mount operations for huge page filesystem."""
31
32    def __init__(self, path: str, mounted: bool):
33        self.path = path
34        # current mount status
35        self.mounted = mounted
36
37    def mount(
38        self, pagesize_kb: int, user: T.Optional[str], group: T.Optional[str]
39    ) -> None:
40        """Mount the huge TLB file system"""
41        if self.mounted:
42            return
43        cmd = ["mount", "-t", "hugetlbfs"]
44        cmd += ["-o", f"pagesize={pagesize_kb * 1024}"]
45        if user is not None:
46            cmd += ["-o", f"uid={user}"]
47        if group is not None:
48            cmd += ["-o", f"gid={group}"]
49        cmd += ["nodev", self.path]
50
51        subprocess.run(cmd, check=True)
52        self.mounted = True
53
54    def unmount(self) -> None:
55        """Unmount the huge TLB file system (if mounted)"""
56        if self.mounted:
57            subprocess.run(["umount", self.path], check=True)
58            self.mounted = False
59
60
61class HugepageRes:
62    """Huge page reserve operations. Can be NUMA-node-specific."""
63
64    def __init__(self, path: str, node: T.Optional[int] = None):
65        self.path = path
66        # if this is a per-NUMA node huge page dir, store the node number
67        self.node = node
68        self.valid_page_sizes = self._get_valid_page_sizes()
69
70    def _get_valid_page_sizes(self) -> T.List[int]:
71        """Extract valid huge page sizes"""
72        return [get_memsize(d.split("-")[1]) for d in os.listdir(self.path)]
73
74    def _nr_pages_path(self, sz: int) -> str:
75        if sz not in self.valid_page_sizes:
76            raise ValueError(
77                f"Invalid page size {sz}. " f"Valid sizes: {self.valid_page_sizes}"
78            )
79        return os.path.join(self.path, f"hugepages-{sz}kB", "nr_hugepages")
80
81    def __getitem__(self, sz: int) -> int:
82        """Get current number of reserved pages of specified size"""
83        with open(self._nr_pages_path(sz), encoding="utf-8") as f:
84            return int(f.read())
85
86    def __setitem__(self, sz: int, nr_pages: int) -> None:
87        """Set number of reserved pages of specified size"""
88        with open(self._nr_pages_path(sz), "w", encoding="utf-8") as f:
89            f.write(f"{nr_pages}\n")
90
91
92def fmt_memsize(kb: int) -> str:
93    """Format memory size in kB into conventional format"""
94    logk = int(log2(kb) / 10)
95    suffix = BINARY_PREFIX[logk]
96    unit = 2 ** (logk * 10)
97    return f"{int(kb / unit)}{suffix}b"
98
99
100def get_memsize(arg: str) -> int:
101    """Convert memory size with suffix to kB"""
102    # arg may have a 'b' at the end
103    if arg[-1].lower() == "b":
104        arg = arg[:-1]
105    match = re.match(rf"(\d+)([{BINARY_PREFIX}]?)$", arg.upper())
106    if match is None:
107        raise ValueError(f"{arg} is not a valid size")
108    num = float(match.group(1))
109    suffix = match.group(2)
110    if not suffix:
111        return int(num / 1024)
112    idx = BINARY_PREFIX.find(suffix)
113    return int(num * (2 ** (idx * 10)))
114
115
116def is_numa() -> bool:
117    """Check if NUMA is supported"""
118    return os.path.exists(NUMA_NODE_BASE_DIR)
119
120
121def default_pagesize() -> int:
122    """Get default huge page size from /proc/meminfo"""
123    key = "Hugepagesize"
124    with open(MEMINFO_PATH, encoding="utf-8") as meminfo:
125        for line in meminfo:
126            if line.startswith(f"{key}:"):
127                return int(line.split()[1])
128    raise KeyError(f'"{key}" not found in {MEMINFO_PATH}')
129
130
131def get_hugetlbfs_mountpoints() -> T.List[str]:
132    """Get list of where huge page filesystem is mounted"""
133    mounted: T.List[str] = []
134    with open(MOUNTS_PATH, encoding="utf-8") as mounts:
135        for line in mounts:
136            fields = line.split()
137            if fields[2] != "hugetlbfs":
138                continue
139            mounted.append(fields[1])
140    return mounted
141
142
143def print_row(cells: T.Tuple[str, ...], widths: T.List[int]) -> None:
144    """Print a row of a table with the given column widths"""
145    first, *rest = cells
146    w_first, *w_rest = widths
147    first_end = " " * 2
148    rest_end = " " * 2
149
150    print(first.ljust(w_first), end=first_end)
151    for cell, width in zip(rest, w_rest):
152        print(cell.rjust(width), end=rest_end)
153    print()
154
155
156def print_hp_status(hp_res: T.List[HugepageRes]) -> None:
157    """Display status of huge page reservations"""
158    numa = is_numa()
159
160    # print out huge page information in a table
161    rows: T.List[T.Tuple[str, ...]]
162    headers: T.Tuple[str, ...]
163    if numa:
164        headers = "Node", "Pages", "Size", "Total"
165        rows = [
166            (
167                str(hp.node),
168                str(nr_pages),
169                fmt_memsize(sz),
170                fmt_memsize(sz * nr_pages),
171            )
172            # iterate over each huge page sysfs node...
173            for hp in hp_res
174            # ...and each page size within that node...
175            for sz in hp.valid_page_sizes
176            # ...we need number of pages multiple times, so we read it here...
177            for nr_pages in [hp[sz]]
178            # ...include this row only if there are pages reserved
179            if nr_pages
180        ]
181    else:
182        headers = "Pages", "Size", "Total"
183        # if NUMA is disabled, we know there's only one huge page dir
184        hp = hp_res[0]
185        rows = [
186            (str(nr_pages), fmt_memsize(sz), fmt_memsize(sz * nr_pages))
187            # iterate over each page size within the huge page dir
188            for sz in hp.valid_page_sizes
189            # read number of pages for this size
190            for nr_pages in [hp[sz]]
191            # skip if no pages
192            if nr_pages
193        ]
194    if not rows:
195        print("No huge pages reserved")
196        return
197
198    # find max widths for each column, including header and rows
199    col_widths = [
200        max(len(tup[col_idx]) for tup in rows + [headers])
201        for col_idx in range(len(headers))
202    ]
203
204    # print everything
205    print_row(headers, col_widths)
206    for r in rows:
207        print_row(r, col_widths)
208
209
210def print_mount_status() -> None:
211    """Display status of huge page filesystem mounts"""
212    mounted = get_hugetlbfs_mountpoints()
213    if not mounted:
214        print("No huge page filesystems mounted")
215        return
216    print("Huge page filesystems mounted at:", *mounted, sep=" ")
217
218
219def scan_huge_dirs(node: T.Optional[int]) -> T.List[HugepageRes]:
220    """Return a HugepageRes object for each huge page directory"""
221    # if NUMA is enabled, scan per-NUMA node huge pages
222    if is_numa():
223        # helper function to extract node number from directory name
224        def _get_node(path: str) -> T.Optional[int]:
225            m = re.match(r"node(\d+)", os.path.basename(path))
226            return int(m.group(1)) if m else None
227
228        # we want a sorted list of NUMA nodes
229        nodes = sorted(
230            n
231            # iterate over all directories in the base directory
232            for d in os.listdir(NUMA_NODE_BASE_DIR)
233            # extract the node number from the directory name
234            for n in [_get_node(d)]
235            # filter out None values (non-NUMA node directories)
236            if n is not None
237        )
238        return [
239            HugepageRes(os.path.join(NUMA_NODE_BASE_DIR, f"node{n}", "hugepages"), n)
240            for n in nodes
241            # if user requested a specific node, only include that one
242            if node is None or n == node
243        ]
244    # otherwise, use non-NUMA huge page directory
245    if node is not None:
246        raise ValueError("NUMA node requested but not supported")
247    return [HugepageRes(NO_NUMA_HUGE_DIR)]
248
249
250def try_reserve_huge_pages(
251    hp_res: T.List[HugepageRes], mem_sz: str, pagesize_kb: int
252) -> None:
253    """Reserve huge pages if possible"""
254    reserve_kb = get_memsize(mem_sz)
255
256    # is this a valid request?
257    if reserve_kb % pagesize_kb != 0:
258        fmt_res = fmt_memsize(reserve_kb)
259        fmt_sz = fmt_memsize(pagesize_kb)
260        raise ValueError(
261            f"Huge reservation {fmt_res} is " f"not a multiple of page size {fmt_sz}"
262        )
263
264    # request is valid, reserve pages
265    for hp in hp_res:
266        req = reserve_kb // pagesize_kb
267        hp[pagesize_kb] = req
268        got = hp[pagesize_kb]
269        # did we fulfill our request?
270        if got != req:
271            raise OSError(
272                f"Failed to reserve {req} pages of size "
273                f"{fmt_memsize(pagesize_kb)}, "
274                f"got {got} pages instead"
275            )
276
277
278def main():
279    """Process the command line arguments and setup huge pages"""
280    parser = argparse.ArgumentParser(
281        formatter_class=argparse.RawDescriptionHelpFormatter,
282        description="Setup huge pages",
283        epilog="""
284Examples:
285
286To display current huge page settings:
287    %(prog)s -s
288
289To a complete setup of with 2 Gigabyte of 1G huge pages:
290    %(prog)s -p 1G --setup 2G
291""",
292    )
293    parser.add_argument(
294        "--show",
295        "-s",
296        action="store_true",
297        help="Print current huge page configuration",
298    )
299    parser.add_argument(
300        "--clear", "-c", action="store_true", help="Clear existing huge pages"
301    )
302    parser.add_argument(
303        "--mount",
304        "-m",
305        action="store_true",
306        help="Mount the huge page filesystem",
307    )
308    parser.add_argument(
309        "--unmount",
310        "-u",
311        action="store_true",
312        help="Unmount the system huge page directory",
313    )
314    parser.add_argument(
315        "--directory",
316        "-d",
317        metavar="DIR",
318        default=HUGE_MOUNT,
319        help="Mount point for huge pages",
320    )
321    parser.add_argument(
322        "--user",
323        "-U",
324        metavar="UID",
325        help="Set the mounted directory owner user",
326    )
327    parser.add_argument(
328        "--group",
329        "-G",
330        metavar="GID",
331        help="Set the mounted directory owner group",
332    )
333    parser.add_argument(
334        "--node", "-n", type=int, help="Select numa node to reserve pages on"
335    )
336    parser.add_argument(
337        "--pagesize", "-p", metavar="SIZE", help="Choose huge page size to use"
338    )
339    parser.add_argument(
340        "--reserve",
341        "-r",
342        metavar="SIZE",
343        help="Reserve huge pages. Size is in bytes with K, M, or G suffix",
344    )
345    parser.add_argument(
346        "--setup",
347        metavar="SIZE",
348        help="Setup huge pages by doing clear, unmount, reserve and mount",
349    )
350    args = parser.parse_args()
351
352    # setup is clear, then unmount, then reserve, then mount
353    if args.setup:
354        args.clear = True
355        args.unmount = True
356        args.reserve = args.setup
357        args.mount = True
358
359    if not (args.show or args.mount or args.unmount or args.clear or args.reserve):
360        parser.error("no action specified")
361
362    # read huge page data from sysfs
363    hp_res = scan_huge_dirs(args.node)
364
365    # read huge page mountpoint data
366    hp_mountpoint = args.directory
367    hp_mounted = hp_mountpoint in get_hugetlbfs_mountpoints()
368    hp_mount = HugepageMount(hp_mountpoint, hp_mounted)
369
370    # get requested page size we will be working with
371    if args.pagesize:
372        pagesize_kb = get_memsize(args.pagesize)
373    else:
374        pagesize_kb = default_pagesize()
375
376    # were we asked to clear?
377    if args.clear:
378        for hp in hp_res:
379            for sz in hp.valid_page_sizes:
380                hp[sz] = 0
381
382    # were we asked to unmount?
383    if args.unmount:
384        hp_mount.unmount()
385
386    # were we asked to reserve pages?
387    if args.reserve:
388        try_reserve_huge_pages(hp_res, args.reserve, pagesize_kb)
389
390    # were we asked to mount?
391    if args.mount:
392        hp_mount.mount(pagesize_kb, args.user, args.group)
393
394    # were we asked to display status?
395    if args.show:
396        print_hp_status(hp_res)
397        print()
398        print_mount_status()
399
400
401if __name__ == "__main__":
402    try:
403        main()
404    except PermissionError:
405        sys.exit("Permission denied: need to be root!")
406    except subprocess.CalledProcessError as e:
407        sys.exit(f"Command failed: {e}")
408    except (KeyError, ValueError, OSError) as e:
409        sys.exit(f"Error: {e}")
410