155dc5f21SBen Walker#!/usr/bin/env bash 2eb53c232Spaul luse# SPDX-License-Identifier: BSD-3-Clause 3eb53c232Spaul luse# Copyright (C) 2016 Intel Corporation 4eb53c232Spaul luse# All rights reserved. 5eb53c232Spaul luse# 655dc5f21SBen Walkerset -e 78c198512SMichal Bergershopt -s nullglob extglob 855dc5f21SBen Walker 918c02887SMichal Bergeros=$(uname -s) 1018c02887SMichal Berger 1118c02887SMichal Bergerif [[ $os != Linux && $os != FreeBSD ]]; then 1218c02887SMichal Berger echo "Not supported platform ($os), aborting" 1318c02887SMichal Berger exit 1 1418c02887SMichal Bergerfi 1518c02887SMichal Berger 1655dc5f21SBen Walkerrootdir=$(readlink -f $(dirname $0))/.. 17f0c20934SDaniel Verkampsource "$rootdir/scripts/common.sh" 1855dc5f21SBen Walker 19844c8ec3SMichal Bergerfunction usage() { 2018c02887SMichal Berger if [[ $os == Linux ]]; then 217a1bd398SMichal Berger options="[config|reset|status|cleanup|interactive|help]" 225f247660SDariusz Stojaczyk else 237a1bd398SMichal Berger options="[config|reset|interactive|help]" 245f247660SDariusz Stojaczyk fi 255f247660SDariusz Stojaczyk 26844c8ec3SMichal Berger [[ -n $2 ]] && ( 27844c8ec3SMichal Berger echo "$2" 28844c8ec3SMichal Berger echo "" 29844c8ec3SMichal Berger ) 30a6edaa96SWojciech Malikowski echo "Helper script for allocating hugepages and binding NVMe, I/OAT, VMD and Virtio devices" 31a6edaa96SWojciech Malikowski echo "to a generic VFIO kernel driver. If VFIO is not available on the system, this script" 32a6edaa96SWojciech Malikowski echo "will fall back to UIO. NVMe and Virtio devices with active mountpoints will be ignored." 335f247660SDariusz Stojaczyk echo "All hugepage operations use default hugepage size on the system (hugepagesz)." 345f247660SDariusz Stojaczyk echo "Usage: $(basename $1) $options" 355f247660SDariusz Stojaczyk echo 365f247660SDariusz Stojaczyk echo "$options - as following:" 375f247660SDariusz Stojaczyk echo "config Default mode. Allocate hugepages and bind PCI devices." 3818c02887SMichal Berger if [[ $os == Linux ]]; then 39e47f972dSPawel Wodkowski echo "cleanup Remove any orphaned files that can be left in the system after SPDK application exit" 40e47f972dSPawel Wodkowski fi 415f247660SDariusz Stojaczyk echo "reset Rebind PCI devices back to their original drivers." 425f247660SDariusz Stojaczyk echo " Also cleanup any leftover spdk files/resources." 435f247660SDariusz Stojaczyk echo " Hugepage memory size will remain unchanged." 4418c02887SMichal Berger if [[ $os == Linux ]]; then 455f247660SDariusz Stojaczyk echo "status Print status of all SPDK-compatible devices on the system." 465f247660SDariusz Stojaczyk fi 477a1bd398SMichal Berger echo "interactive Executes script in interactive mode." 485f247660SDariusz Stojaczyk echo "help Print this help message." 495f247660SDariusz Stojaczyk echo 505f247660SDariusz Stojaczyk echo "The following environment variables can be specified." 515f247660SDariusz Stojaczyk echo "HUGEMEM Size of hugepage memory to allocate (in MB). 2048 by default." 522b80955cSMichal Berger echo " For NUMA systems, the hugepages will be distributed on node0 by" 532b80955cSMichal Berger echo " default." 545f247660SDariusz Stojaczyk echo "NRHUGE Number of hugepages to allocate. This variable overwrites HUGEMEM." 552b80955cSMichal Berger echo "HUGENODE Specific NUMA node to allocate hugepages on. Multiple nodes can be" 56bcf9f8ffSMichal Berger echo " separated with comas. By default, NRHUGE will be applied on each node." 57bcf9f8ffSMichal Berger echo " Hugepages can be defined per node with e.g.:" 58bcf9f8ffSMichal Berger echo " HUGENODE='nodes_hp[0]=2048,nodes_hp[1]=512,2' - this will allocate" 59bcf9f8ffSMichal Berger echo " 2048 pages for node0, 512 for node1 and default NRHUGE for node2." 602b80955cSMichal Berger echo "HUGEPGSZ Size of the hugepages to use in kB. If not set, kernel's default" 612b80955cSMichal Berger echo " setting is used." 6228bfb876SMichal Berger echo "SHRINK_HUGE If set to 'yes', hugepages allocation won't be skipped in case" 6328bfb876SMichal Berger echo " number of requested hugepages is lower from what's already" 64*0070858eSMichal Berger echo " allocated." 652b80955cSMichal Berger echo "CLEAR_HUGE If set to 'yes', the attempt to remove hugepages from all nodes will" 662b80955cSMichal Berger echo " be made prior to allocation". 67a1280c98SJim Harris echo "PCI_ALLOWED" 68a1280c98SJim Harris echo "PCI_BLOCKED Whitespace separated list of PCI devices (NVMe, I/OAT, VMD, Virtio)." 695f247660SDariusz Stojaczyk echo " Each device must be specified as a full PCI address." 70a1280c98SJim Harris echo " E.g. PCI_ALLOWED=\"0000:01:00.0 0000:02:00.0\"" 71e4ef7b39SJim Harris echo " To block all PCI devices: PCI_ALLOWED=\"none\"" 72e4ef7b39SJim Harris echo " To allow all PCI devices except 0000:01:00.0: PCI_BLOCKED=\"0000:01:00.0\"" 73e4ef7b39SJim Harris echo " To allow only PCI device 0000:01:00.0: PCI_ALLOWED=\"0000:01:00.0\"" 74a1280c98SJim Harris echo " If PCI_ALLOWED and PCI_BLOCKED are empty or unset, all PCI devices" 75c778e3e5SPawel Wodkowski echo " will be bound." 76a1280c98SJim Harris echo " Each device in PCI_BLOCKED will be ignored (driver won't be changed)." 77a1280c98SJim Harris echo " PCI_BLOCKED has precedence over PCI_ALLOWED." 785f247660SDariusz Stojaczyk echo "TARGET_USER User that will own hugepage mountpoint directory and vfio groups." 795f247660SDariusz Stojaczyk echo " By default the current user will be used." 80e93d56b1Stone.zhang echo "DRIVER_OVERRIDE Disable automatic vfio-pci/uio_pci_generic selection and forcefully" 81e93d56b1Stone.zhang echo " bind devices to the given driver." 82c8bcedf4SSeth Howell echo " E.g. DRIVER_OVERRIDE=uio_pci_generic or DRIVER_OVERRIDE=/home/public/dpdk/build/kmod/igb_uio.ko" 8344775a80SMichal Berger echo "PCI_BLOCK_SYNC_ON_RESET" 8444775a80SMichal Berger echo " If set in the environment, the attempt to wait for block devices associated" 8544775a80SMichal Berger echo " with given PCI device will be made upon reset" 86db0d8682SMichal Berger echo "UNBIND_ENTIRE_IOMMU_GROUP" 87db0d8682SMichal Berger echo " If set, all devices from nvme's iommu group will be unbound from their drivers." 88db0d8682SMichal Berger echo " Use with caution." 897014f640SMichal Berger echo "DEV_TYPE" 907014f640SMichal Berger echo " Perform action only against selected type of devices. Supported:" 917014f640SMichal Berger echo " IOAT|DSA|IAA|VIRTIO|VMD|NVME." 927014f640SMichal Berger echo " Default is to select all types." 932635e73dSMichal Berger echo "FORCE_NIC_UIO_REBIND" 942635e73dSMichal Berger echo " When set to 'yes', an attempt to reload nic_uio will be made regardless" 952635e73dSMichal Berger echo " of the kernel environment. Applicable only under FreeBSD." 965f247660SDariusz Stojaczyk exit 0 975f247660SDariusz Stojaczyk} 985f247660SDariusz Stojaczyk 99e1817b60SStephen Bates# In monolithic kernels the lsmod won't work. So 1002f5767d7SPawel Wodkowski# back that with a /sys/modules. We also check 1012f5767d7SPawel Wodkowski# /sys/bus/pci/drivers/ as neither lsmod nor /sys/modules might 1022f5767d7SPawel Wodkowski# contain needed info (like in Fedora-like OS). 103844c8ec3SMichal Bergerfunction check_for_driver() { 104eb8655b2SMichal Berger if [[ -z $1 ]]; then 105eb8655b2SMichal Berger return 0 106eb8655b2SMichal Berger fi 107eb8655b2SMichal Berger 1082f5767d7SPawel Wodkowski if lsmod | grep -q ${1//-/_}; then 109e1817b60SStephen Bates return 1 110e1817b60SStephen Bates fi 1112f5767d7SPawel Wodkowski 112dfb2950fSMichal Berger if [[ -d /sys/module/${1} || 113dfb2950fSMichal Berger -d /sys/module/${1//-/_} || 114dfb2950fSMichal Berger -d /sys/bus/pci/drivers/${1} || 115dfb2950fSMichal Berger -d /sys/bus/pci/drivers/${1//-/_} ]]; then 1162f5767d7SPawel Wodkowski return 2 117e1817b60SStephen Bates fi 118e1817b60SStephen Bates return 0 119e1817b60SStephen Bates} 120e1817b60SStephen Bates 12194067e8bSMichal Bergerfunction check_for_driver_freebsd() { 12294067e8bSMichal Berger # Check if dpdk drivers (nic_uio, contigmem) are in the kernel's module path. 12394067e8bSMichal Berger local search_paths path driver 12494067e8bSMichal Berger IFS=";" read -ra search_paths < <(kldconfig -rU) 12594067e8bSMichal Berger 12694067e8bSMichal Berger for driver in contigmem.ko nic_uio.ko; do 12794067e8bSMichal Berger for path in "${search_paths[@]}"; do 12894067e8bSMichal Berger [[ -f $path/$driver ]] && continue 2 12994067e8bSMichal Berger done 13094067e8bSMichal Berger return 1 13194067e8bSMichal Berger done 13294067e8bSMichal Berger return 0 13394067e8bSMichal Berger} 13494067e8bSMichal Berger 135768cc8eeSPawel Wodkowskifunction pci_dev_echo() { 136768cc8eeSPawel Wodkowski local bdf="$1" 137768cc8eeSPawel Wodkowski shift 1385ea54946SMichal Berger echo "$bdf (${pci_ids_vendor["$bdf"]#0x} ${pci_ids_device["$bdf"]#0x}): $*" 139768cc8eeSPawel Wodkowski} 140768cc8eeSPawel Wodkowski 141db0d8682SMichal Bergerfunction probe_driver() { 142db0d8682SMichal Berger local bdf=$1 143db0d8682SMichal Berger local driver_name=$2 1444f8177b5SMichal Berger old_driver_name=${pci_bus_driver["$bdf"]:-no driver} 14521173cd0SDaniel Verkamp 1460897e4dbSMichal Berger if [[ $driver_name == "$old_driver_name" ]]; then 147768cc8eeSPawel Wodkowski pci_dev_echo "$bdf" "Already using the $old_driver_name driver" 14821173cd0SDaniel Verkamp return 0 14921173cd0SDaniel Verkamp fi 15021173cd0SDaniel Verkamp 1516ca498c1Swanghailiangx if [[ $old_driver_name != "no driver" ]]; then 15221173cd0SDaniel Verkamp echo "$bdf" > "/sys/bus/pci/devices/$bdf/driver/unbind" 1536ca498c1Swanghailiangx fi 15421173cd0SDaniel Verkamp 155768cc8eeSPawel Wodkowski pci_dev_echo "$bdf" "$old_driver_name -> $driver_name" 15621173cd0SDaniel Verkamp 157b0aba3fcSSamir Raval if [[ $driver_name == "none" ]]; then 158b0aba3fcSSamir Raval return 0 159b0aba3fcSSamir Raval fi 160b0aba3fcSSamir Raval 161eab0c664SMichal Berger local probe_attempts=0 1621f59abaeSMichal Berger echo "$driver_name" > "/sys/bus/pci/devices/$bdf/driver_override" 163eab0c664SMichal Berger while ! echo "$bdf" > "/sys/bus/pci/drivers_probe" && ((probe_attempts++ < 10)); do 164eab0c664SMichal Berger pci_dev_echo "$bdf" "failed to bind to $driver_name, retrying ($probe_attempts)" 165eab0c664SMichal Berger sleep 0.5 166eab0c664SMichal Berger done 2> /dev/null 167eab0c664SMichal Berger 1681f59abaeSMichal Berger echo "" > "/sys/bus/pci/devices/$bdf/driver_override" 1697b25f04cSCunyin Chang 170eab0c664SMichal Berger if [[ ! -e /sys/bus/pci/drivers/$driver_name/$bdf ]]; then 171eab0c664SMichal Berger pci_dev_echo "$bdf" "failed to bind to $driver_name, aborting" 172eab0c664SMichal Berger return 1 173e5a08642SMichal Berger fi 174db0d8682SMichal Berger} 175db0d8682SMichal Berger 176db0d8682SMichal Bergerfunction linux_bind_driver() { 177db0d8682SMichal Berger local bdf="$1" 178db0d8682SMichal Berger local driver_name="$2" 179db0d8682SMichal Berger 180db0d8682SMichal Berger probe_driver "$bdf" "$driver_name" 181e5a08642SMichal Berger 182b44bbd24SMichal Berger local iommu_group=${pci_iommu_groups["$bdf"]} 1837b25f04cSCunyin Chang if [ -e "/dev/vfio/$iommu_group" ]; then 1844b428979SDariusz Stojaczyk if [ -n "$TARGET_USER" ]; then 185da7e3bb8SDariusz Stojaczyk chown "$TARGET_USER" "/dev/vfio/$iommu_group" 1867b25f04cSCunyin Chang fi 18756306a46SDaniel Verkamp fi 188db0d8682SMichal Berger 189b44bbd24SMichal Berger local iommug=("${!iommu_groups[iommu_group]}") 190db0d8682SMichal Berger local _bdf _driver 191b44bbd24SMichal Berger if ((${#iommug[@]} > 1)) && [[ $driver_name == vfio* ]]; then 192db0d8682SMichal Berger pci_dev_echo "$bdf" "WARNING: detected multiple devices (${#iommug[@]}) under the same IOMMU group!" 193db0d8682SMichal Berger for _bdf in "${iommug[@]}"; do 194c75664c8SMichal Berger [[ $_bdf == "$bdf" ]] && continue 195c75664c8SMichal Berger _driver=$(readlink -f "/sys/bus/pci/devices/$_bdf/driver") && _driver=${_driver##*/} 196c75664c8SMichal Berger if [[ $_driver == "$driver_name" ]]; then 197db0d8682SMichal Berger continue 198db0d8682SMichal Berger fi 199db0d8682SMichal Berger # See what DPDK considers to be a "viable" iommu group: dpdk/lib/eal/linux/eal_vfio.c -> rte_vfio_setup_device() 200aec2e33bSMichal Berger pci_dev_echo "$bdf" "WARNING: ${_bdf##*/} not bound to $driver_name (${_driver:-no driver})" 201db0d8682SMichal Berger pci_dev_echo "$bdf" "WARNING All devices in the IOMMU group must be bound to the same driver or unbound" 202db0d8682SMichal Berger if [[ $UNBIND_ENTIRE_IOMMU_GROUP == yes ]]; then 203db0d8682SMichal Berger pci_dev_echo "$bdf" "WARNING: Attempting to unbind ${_bdf##*/}" 204aec2e33bSMichal Berger pci_bus_driver["${_bdf##*/}"]=$_driver 205db0d8682SMichal Berger probe_driver "${_bdf##*/}" none 206db0d8682SMichal Berger fi 207db0d8682SMichal Berger done 208db0d8682SMichal Berger fi 209db0d8682SMichal Berger 21021173cd0SDaniel Verkamp} 21121173cd0SDaniel Verkamp 21210283728SJim Harrisfunction linux_unbind_driver() { 213fdcd8b70SPawel Wodkowski local bdf="$1" 2144f8177b5SMichal Berger local old_driver_name=${pci_bus_driver["$bdf"]:-no driver} 21510283728SJim Harris 216eb8655b2SMichal Berger if [[ $old_driver_name == "no driver" ]]; then 217eb8655b2SMichal Berger pci_dev_echo "$bdf" "Not bound to any driver" 218eb8655b2SMichal Berger return 0 219eb8655b2SMichal Berger fi 220eb8655b2SMichal Berger 2210897e4dbSMichal Berger if [[ -e /sys/bus/pci/drivers/$old_driver_name ]]; then 2220897e4dbSMichal Berger echo "$bdf" > "/sys/bus/pci/drivers/$old_driver_name/unbind" 2231f59abaeSMichal Berger echo "" > "/sys/bus/pci/devices/$bdf/driver_override" 224fdcd8b70SPawel Wodkowski fi 225fdcd8b70SPawel Wodkowski 226fdcd8b70SPawel Wodkowski pci_dev_echo "$bdf" "$old_driver_name -> no driver" 22710283728SJim Harris} 22810283728SJim Harris 229f8c1c71cSDariusz Stojaczykfunction linux_hugetlbfs_mounts() { 230fc58aceaSDaniel Verkamp mount | grep ' type hugetlbfs ' | awk '{ print $3 }' 2319603193fSDaniel Verkamp} 2329603193fSDaniel Verkamp 2339af7c30eSMichal Bergerfunction get_used_bdf_block_devs() { 23469ae10f7SMichal Berger local bdf=$1 235bb4657c7SMichal Berger local blocks block blockp dev mount holder 2369af7c30eSMichal Berger local used 2379c44fad7SDariusz Stojaczyk 238274a9ffbSJim Harris hash lsblk &> /dev/null || return 1 23969ae10f7SMichal Berger blocks=($(get_block_dev_from_bdf "$bdf")) 24069ae10f7SMichal Berger 24169ae10f7SMichal Berger for block in "${blocks[@]}"; do 242bb4657c7SMichal Berger # Check if the device is hold by some other, regardless if it's mounted 243bb4657c7SMichal Berger # or not. 244bb4657c7SMichal Berger for holder in "/sys/class/block/$block"*/holders/*; do 245bb4657c7SMichal Berger [[ -e $holder ]] || continue 246bb4657c7SMichal Berger blockp=${holder%/holders*} blockp=${blockp##*/} 247bb4657c7SMichal Berger if [[ -e $holder/slaves/$blockp ]]; then 2489af7c30eSMichal Berger used+=("holder@$blockp:${holder##*/}") 249bb4657c7SMichal Berger fi 250bb4657c7SMichal Berger done 251602b134fSMichal Berger while read -r dev mount; do 252602b134fSMichal Berger if [[ -e $mount ]]; then 2539af7c30eSMichal Berger used+=("mount@$block:$dev") 254f869082aSDariusz Stojaczyk fi 255602b134fSMichal Berger done < <(lsblk -l -n -o NAME,MOUNTPOINT "/dev/$block") 2569af7c30eSMichal Berger if ((${#used[@]} == 0)); then 2579af7c30eSMichal Berger # Make sure we check if there's any valid data present on the target device 2589af7c30eSMichal Berger # regardless if it's being actively used or not. This is mainly done to make 2599af7c30eSMichal Berger # sure we don't miss more complex setups like ZFS pools, etc. 2609af7c30eSMichal Berger if block_in_use "$block" > /dev/null; then 2619af7c30eSMichal Berger used+=("data@$block") 2629af7c30eSMichal Berger fi 2639af7c30eSMichal Berger fi 264f869082aSDariusz Stojaczyk done 2659af7c30eSMichal Berger 2669af7c30eSMichal Berger if ((${#used[@]} > 0)); then 2679af7c30eSMichal Berger printf '%s\n' "${used[@]}" 2689af7c30eSMichal Berger fi 269f869082aSDariusz Stojaczyk} 270f869082aSDariusz Stojaczyk 27151b5fa85SMichal Bergerfunction collect_devices() { 2724f8177b5SMichal Berger local mode=$1 in_use 27351b5fa85SMichal Berger 2747014f640SMichal Berger map_supported_devices "$DEV_TYPE" 27551b5fa85SMichal Berger 2764f8177b5SMichal Berger for bdf in "${!all_devices_d[@]}"; do 2775ea54946SMichal Berger in_use=0 2784f8177b5SMichal Berger if [[ $mode != status ]]; then 279b9ba32aaSMichal Berger if ! pci_can_use "$bdf"; then 280b9ba32aaSMichal Berger pci_dev_echo "$bdf" "Skipping denied controller at $bdf" 2815ea54946SMichal Berger in_use=1 2825ea54946SMichal Berger fi 2834f8177b5SMichal Berger fi 2844f8177b5SMichal Berger if [[ -n ${nvme_d["$bdf"]} || -n ${virtio_d["$bdf"]} ]]; then 2859af7c30eSMichal Berger if ! verify_bdf_block_devs "$bdf"; then 2865ea54946SMichal Berger in_use=1 2875ea54946SMichal Berger fi 2885ea54946SMichal Berger fi 2894f8177b5SMichal Berger if [[ -n ${vmd_d["$bdf"]} ]]; then 290a1280c98SJim Harris if [[ $PCI_ALLOWED != *"$bdf"* ]]; then 291b9ba32aaSMichal Berger pci_dev_echo "$bdf" "Skipping not allowed VMD controller at $bdf" 292b9ba32aaSMichal Berger in_use=1 2934f8177b5SMichal Berger elif ((vmd_nvme_count["$bdf"] > 0)) && [[ $DRIVER_OVERRLDE != none && $mode == config ]]; then 294b0aba3fcSSamir Raval cat <<- MESSAGE 2954f8177b5SMichal Berger Binding new driver to VMD device with NVMe SSDs attached to the kernel: 2964f8177b5SMichal Berger ${!vmd_nvme_d["$bdf"]} 2974f8177b5SMichal Berger The binding process may go faster if you first run this script with 2984f8177b5SMichal Berger DRIVER_OVERRIDE="none" to unbind only the NVMe SSDs, and then run 2994f8177b5SMichal Berger again to unbind the VMD devices. 300b0aba3fcSSamir Raval MESSAGE 301b0aba3fcSSamir Raval fi 302b0aba3fcSSamir Raval fi 303e70594d4SSlawomir Ptak if [[ -n ${dsa_d["$bdf"]} ]] && [[ $PCI_ALLOWED != *"$bdf"* ]]; then 304e70594d4SSlawomir Ptak pci_dev_echo "$bdf" "Skipping not allowed DSA controller at $bdf" 305e70594d4SSlawomir Ptak in_use=1 306e70594d4SSlawomir Ptak fi 307e70594d4SSlawomir Ptak if [[ -n ${iaa_d["$bdf"]} ]] && [[ $PCI_ALLOWED != *"$bdf"* ]]; then 308e70594d4SSlawomir Ptak pci_dev_echo "$bdf" "Skipping not allowed IAA controller at $bdf" 309e70594d4SSlawomir Ptak in_use=1 310e70594d4SSlawomir Ptak fi 3114f8177b5SMichal Berger # Update in-use for each bdf. Default from the map_supported_devices() is 0 == "not used" 3124f8177b5SMichal Berger local -n type_ref=${all_devices_type_d["$bdf"]}_d 3134f8177b5SMichal Berger type_ref["$bdf"]=$in_use 3145ea54946SMichal Berger all_devices_d["$bdf"]=$in_use 3154c01eb58SMichal Berger done 3164c01eb58SMichal Berger 3174c01eb58SMichal Berger # Check if we got any nvmes attached to VMDs sharing the same iommu_group - if there are 3184c01eb58SMichal Berger # any skip them since they won't be usable by SPDK without moving the entire VMD ctrl 3194c01eb58SMichal Berger # away from the kernel first. That said, allow to touch the nvmes in case user requested 3204c01eb58SMichal Berger # all devices to be unbound from any driver or if dedicated override flag was set. 3214c01eb58SMichal Berger [[ -z $ALLOW_NVME_BEHIND_VMD && $DRIVER_OVERRIDE != none ]] || return 0 3224c01eb58SMichal Berger 3234c01eb58SMichal Berger for bdf in "${!nvme_d[@]}"; do 3244c01eb58SMichal Berger is_nvme_iommu_shared_with_vmd "$bdf" || continue 3254c01eb58SMichal Berger nvme_d["$bdf"]=1 all_devices_d["$bdf"]=1 3264c01eb58SMichal Berger pci_dev_echo "$bdf" "Skipping nvme behind VMD (${nvme_vmd_d["$bdf"]})" 3274c01eb58SMichal Berger done 3284c01eb58SMichal Berger 3292635e73dSMichal Berger get_unsupported_nic_uio_hw 3302635e73dSMichal Berger 3314c01eb58SMichal Berger return 0 33251b5fa85SMichal Berger} 33351b5fa85SMichal Berger 33485501619SMichal Bergerfunction collect_driver() { 33585501619SMichal Berger local bdf=$1 33685501619SMichal Berger local drivers driver 33785501619SMichal Berger 338eb8655b2SMichal Berger if [[ -e /sys/bus/pci/devices/$bdf/modalias ]] \ 339eb8655b2SMichal Berger && drivers=($(modprobe -R "$(< "/sys/bus/pci/devices/$bdf/modalias")")); then 34085501619SMichal Berger # Pick first entry in case multiple aliases are bound to a driver. 34185501619SMichal Berger driver=$(readlink -f "/sys/module/${drivers[0]}/drivers/pci:"*) 34285501619SMichal Berger driver=${driver##*/} 34385501619SMichal Berger else 344203fd7ffSMichal Berger [[ -n ${nvme_d["$bdf"]} ]] && driver=nvme 345203fd7ffSMichal Berger [[ -n ${ioat_d["$bdf"]} ]] && driver=ioatdma 346b711a565SMichal Berger [[ -n ${dsa_d["$bdf"]} ]] && driver=idxd 347b711a565SMichal Berger [[ -n ${iaa_d["$bdf"]} ]] && driver=idxd 348203fd7ffSMichal Berger [[ -n ${virtio_d["$bdf"]} ]] && driver=virtio-pci 349203fd7ffSMichal Berger [[ -n ${vmd_d["$bdf"]} ]] && driver=vmd 35085501619SMichal Berger fi 2> /dev/null 35185501619SMichal Berger echo "$driver" 35285501619SMichal Berger} 35385501619SMichal Berger 3549af7c30eSMichal Bergerfunction verify_bdf_block_devs() { 3555ea54946SMichal Berger local bdf=$1 35606058e9bSMichal Berger local blknames 3579af7c30eSMichal Berger blknames=($(get_used_bdf_block_devs "$bdf")) || return 1 3585ea54946SMichal Berger 3595ea54946SMichal Berger if ((${#blknames[@]} > 0)); then 360602b134fSMichal Berger local IFS="," 3619af7c30eSMichal Berger pci_dev_echo "$bdf" "Active devices: ${blknames[*]}, so not binding PCI dev" 3625ea54946SMichal Berger return 1 3635ea54946SMichal Berger fi 3645ea54946SMichal Berger} 3655ea54946SMichal Berger 366844c8ec3SMichal Bergerfunction configure_linux_pci() { 367c8bcedf4SSeth Howell local driver_path="" 368c8bcedf4SSeth Howell driver_name="" 369e5a08642SMichal Berger igb_uio_fallback="" 370e5a08642SMichal Berger 371e5a08642SMichal Berger if [[ -r "$rootdir/dpdk/build-tmp/kernel/linux/igb_uio/igb_uio.ko" ]]; then 372fcc35c86SMichal Berger # igb_uio is a common driver to override with and it depends on uio. 373fcc35c86SMichal Berger modprobe uio || true 374fcc35c86SMichal Berger if ! check_for_driver igb_uio || insmod "$rootdir/dpdk/build-tmp/kernel/linux/igb_uio/igb_uio.ko"; then 375fcc35c86SMichal Berger igb_uio_fallback="$rootdir/dpdk/build-tmp/kernel/linux/igb_uio/igb_uio.ko" 376fcc35c86SMichal Berger fi 377e5a08642SMichal Berger fi 378e5a08642SMichal Berger 379b0aba3fcSSamir Raval if [[ "${DRIVER_OVERRIDE}" == "none" ]]; then 380b0aba3fcSSamir Raval driver_name=none 381b0aba3fcSSamir Raval elif [[ -n "${DRIVER_OVERRIDE}" ]]; then 382615b6849SSeth Howell driver_path="$DRIVER_OVERRIDE" 383c8bcedf4SSeth Howell driver_name="${DRIVER_OVERRIDE##*/}" 384615b6849SSeth Howell # modprobe and the sysfs don't use the .ko suffix. 385615b6849SSeth Howell driver_name=${driver_name%.ko} 386c8bcedf4SSeth Howell # path = name -> there is no path 387c8bcedf4SSeth Howell if [[ "$driver_path" = "$driver_name" ]]; then 388c8bcedf4SSeth Howell driver_path="" 389c8bcedf4SSeth Howell fi 3903ac0a6edSMichal Berger elif is_iommu_enabled; then 39155dc5f21SBen Walker driver_name=vfio-pci 392cac9322dSMichal Berger # Just in case, attempt to load VFIO_IOMMU_TYPE1 module into the kernel - this 393cac9322dSMichal Berger # should be done automatically by modprobe since this particular module should 394cac9322dSMichal Berger # be a part of vfio-pci dependencies, however, on some distros, it seems that 395cac9322dSMichal Berger # it's not the case. See #1689. 396cac9322dSMichal Berger if modinfo vfio_iommu_type1 > /dev/null; then 397cac9322dSMichal Berger modprobe vfio_iommu_type1 398cac9322dSMichal Berger fi 3999801533fSMichal Berger elif ! check_for_driver uio_pci_generic || modinfo uio_pci_generic > /dev/null 2>&1; then 40055dc5f21SBen Walker driver_name=uio_pci_generic 401e5a08642SMichal Berger elif [[ -e $igb_uio_fallback ]]; then 402fcc35c86SMichal Berger driver_path="$igb_uio_fallback" 403c8bcedf4SSeth Howell driver_name="igb_uio" 404c8bcedf4SSeth Howell echo "WARNING: uio_pci_generic not detected - using $driver_name" 405e93d56b1Stone.zhang else 406aa22321aSTomasz Zawadzki echo "No valid drivers found [vfio-pci, uio_pci_generic, igb_uio]. Please enable one of the kernel modules." 407c8bcedf4SSeth Howell return 1 408c8bcedf4SSeth Howell fi 409c8bcedf4SSeth Howell 410c8bcedf4SSeth Howell # modprobe assumes the directory of the module. If the user passes in a path, we should use insmod 411b0aba3fcSSamir Raval if [[ $driver_name != "none" ]]; then 412c8bcedf4SSeth Howell if [[ -n "$driver_path" ]]; then 413c8bcedf4SSeth Howell insmod $driver_path || true 414c8bcedf4SSeth Howell else 415c8bcedf4SSeth Howell modprobe $driver_name 416e93d56b1Stone.zhang fi 417b0aba3fcSSamir Raval fi 41855dc5f21SBen Walker 4195ea54946SMichal Berger for bdf in "${!all_devices_d[@]}"; do 4205ea54946SMichal Berger if ((all_devices_d["$bdf"] == 0)); then 421904ac49fSMichal Berger if [[ -n ${nvme_d["$bdf"]} ]]; then 422904ac49fSMichal Berger # Some nvme controllers may take significant amount of time while being 423904ac49fSMichal Berger # unbound from the driver. Put that task into background to speed up the 424904ac49fSMichal Berger # whole process. Currently this is done only for the devices bound to the 425904ac49fSMichal Berger # nvme driver as other, i.e., ioatdma's, trigger a kernel BUG when being 426904ac49fSMichal Berger # unbound in parallel. See https://bugzilla.kernel.org/show_bug.cgi?id=209041. 427904ac49fSMichal Berger linux_bind_driver "$bdf" "$driver_name" & 428904ac49fSMichal Berger else 42921173cd0SDaniel Verkamp linux_bind_driver "$bdf" "$driver_name" 4301a15ce9bSJim Harris fi 431904ac49fSMichal Berger fi 43255dc5f21SBen Walker done 433904ac49fSMichal Berger wait 43455dc5f21SBen Walker 43555dc5f21SBen Walker echo "1" > "/sys/bus/pci/rescan" 43655ac2263SGangCao} 43755ac2263SGangCao 438844c8ec3SMichal Bergerfunction cleanup_linux() { 439af5b654dSMichal Berger local dirs_to_clean=() files_to_clean=() opened_files=() file_locks=() 440af5b654dSMichal Berger local match_spdk="spdk_tgt|iscsi|vhost|nvmf|rocksdb|bdevio|bdevperf|vhost_fuzz|nvme_fuzz|accel_perf|bdev_svc" 441af5b654dSMichal Berger 442af5b654dSMichal Berger dirs_to_clean=({/var/run,/tmp}/dpdk/spdk{,_pid}+([0-9])) 443af5b654dSMichal Berger if [[ -d $XDG_RUNTIME_DIR ]]; then 444af5b654dSMichal Berger dirs_to_clean+=("$XDG_RUNTIME_DIR/dpdk/spdk"{,_pid}+([0-9])) 4451469679fSDariusz Stojaczyk fi 4461469679fSDariusz Stojaczyk 447af5b654dSMichal Berger for dir in "${dirs_to_clean[@]}"; do 448af5b654dSMichal Berger files_to_clean+=("$dir/"*) 4491469679fSDariusz Stojaczyk done 450637d9e60SMichal Berger file_locks+=(/var/tmp/spdk_pci_lock*) 4510af934b3SKrzysztof Karas file_locks+=(/var/tmp/spdk_cpu_lock*) 4521469679fSDariusz Stojaczyk 45371605a52SMichal Berger files_to_clean+=(/dev/shm/@(@($match_spdk)_trace|spdk_iscsi_conns)*) 454af5b654dSMichal Berger files_to_clean+=("${file_locks[@]}") 455e47f972dSPawel Wodkowski 456cb8174dcSMichal Berger # This may fail in case path that readlink attempts to resolve suddenly 457cb8174dcSMichal Berger # disappears (as it may happen with terminating processes). 458cb8174dcSMichal Berger opened_files+=($(readlink -f /proc/+([0-9])/fd/+([0-9]))) || true 459e47f972dSPawel Wodkowski 460af5b654dSMichal Berger if ((${#opened_files[@]} == 0)); then 461e47f972dSPawel Wodkowski echo "Can't get list of opened files!" 462e47f972dSPawel Wodkowski exit 1 463e47f972dSPawel Wodkowski fi 464e47f972dSPawel Wodkowski 465e47f972dSPawel Wodkowski echo 'Cleaning' 466af5b654dSMichal Berger for f in "${files_to_clean[@]}"; do 467af5b654dSMichal Berger [[ -e $f ]] || continue 468af5b654dSMichal Berger if [[ ${opened_files[*]} != *"$f"* ]]; then 469e47f972dSPawel Wodkowski echo "Removing: $f" 470e47f972dSPawel Wodkowski rm $f 471e47f972dSPawel Wodkowski else 472e47f972dSPawel Wodkowski echo "Still open: $f" 473e47f972dSPawel Wodkowski fi 474e47f972dSPawel Wodkowski done 4751469679fSDariusz Stojaczyk 476af5b654dSMichal Berger for dir in "${dirs_to_clean[@]}"; do 477af5b654dSMichal Berger [[ -d $dir ]] || continue 478af5b654dSMichal Berger if [[ ${opened_files[*]} != *"$dir"* ]]; then 4791469679fSDariusz Stojaczyk echo "Removing: $dir" 4801469679fSDariusz Stojaczyk rmdir $dir 4811469679fSDariusz Stojaczyk else 4821469679fSDariusz Stojaczyk echo "Still open: $dir" 4831469679fSDariusz Stojaczyk fi 4841469679fSDariusz Stojaczyk done 485e47f972dSPawel Wodkowski echo "Clean" 486e47f972dSPawel Wodkowski} 487e47f972dSPawel Wodkowski 4882b80955cSMichal Bergercheck_hugepages_alloc() { 4892b80955cSMichal Berger local hp_int=$1 4902b80955cSMichal Berger local allocated_hugepages 4912b80955cSMichal Berger 49228bfb876SMichal Berger allocated_hugepages=$(< "$hp_int") 49328bfb876SMichal Berger 49428bfb876SMichal Berger if ((NRHUGE <= allocated_hugepages)) && [[ $SHRINK_HUGE != yes ]]; then 49528bfb876SMichal Berger echo "INFO: Requested $NRHUGE hugepages but $allocated_hugepages already allocated ${2:+on node$2}" 49628bfb876SMichal Berger return 0 49728bfb876SMichal Berger fi 49828bfb876SMichal Berger 4992b80955cSMichal Berger echo $((NRHUGE < 0 ? 0 : NRHUGE)) > "$hp_int" 5002b80955cSMichal Berger 5012b80955cSMichal Berger allocated_hugepages=$(< "$hp_int") 5022b80955cSMichal Berger if ((allocated_hugepages < NRHUGE)); then 5032b80955cSMichal Berger cat <<- ERROR 5042b80955cSMichal Berger 5052b80955cSMichal Berger ## ERROR: requested $NRHUGE hugepages but $allocated_hugepages could be allocated ${2:+on node$2}. 5062b80955cSMichal Berger ## Memory might be heavily fragmented. Please try flushing the system cache, or reboot the machine. 5072b80955cSMichal Berger ERROR 5082b80955cSMichal Berger return 1 5092b80955cSMichal Berger fi 5102b80955cSMichal Berger} 5112b80955cSMichal Berger 5122b80955cSMichal Bergerclear_hugepages() { echo 0 > /proc/sys/vm/nr_hugepages; } 5132b80955cSMichal Berger 5142b80955cSMichal Bergerconfigure_linux_hugepages() { 515bcf9f8ffSMichal Berger local node system_nodes 516bcf9f8ffSMichal Berger local nodes_to_use nodes_hp 5172b80955cSMichal Berger 5182b80955cSMichal Berger if [[ $CLEAR_HUGE == yes ]]; then 5192b80955cSMichal Berger clear_hugepages 5202b80955cSMichal Berger fi 5212b80955cSMichal Berger 522*0070858eSMichal Berger if [[ -z $HUGENODE ]]; then 5232b80955cSMichal Berger check_hugepages_alloc /proc/sys/vm/nr_hugepages 5242b80955cSMichal Berger return 0 5252b80955cSMichal Berger fi 5262b80955cSMichal Berger 5272b80955cSMichal Berger for node in /sys/devices/system/node/node*; do 5282b80955cSMichal Berger [[ -e $node ]] || continue 5292b80955cSMichal Berger nodes[${node##*node}]=$node/hugepages/hugepages-${HUGEPGSZ}kB/nr_hugepages 5302b80955cSMichal Berger done 5312b80955cSMichal Berger 53285395e28SMichal Berger if ((${#nodes[@]} == 0)); then 53385395e28SMichal Berger # No NUMA support? Fallback to common interface 53485395e28SMichal Berger check_hugepages_alloc /proc/sys/vm/nr_hugepages 53585395e28SMichal Berger return 0 53685395e28SMichal Berger fi 53785395e28SMichal Berger 5382b80955cSMichal Berger IFS="," read -ra nodes_to_use <<< "$HUGENODE" 5392b80955cSMichal Berger if ((${#nodes_to_use[@]} == 0)); then 540bcf9f8ffSMichal Berger nodes_to_use[0]=0 5412b80955cSMichal Berger fi 5422b80955cSMichal Berger 543bcf9f8ffSMichal Berger # Align indexes with node ids 544bcf9f8ffSMichal Berger for node in "${!nodes_to_use[@]}"; do 545bcf9f8ffSMichal Berger if [[ ${nodes_to_use[node]} =~ ^nodes_hp\[[0-9]+\]= ]]; then 546bcf9f8ffSMichal Berger eval "${nodes_to_use[node]}" 547bcf9f8ffSMichal Berger elif [[ ${nodes_to_use[node]} =~ ^[0-9]+$ ]]; then 548bcf9f8ffSMichal Berger nodes_hp[nodes_to_use[node]]=$NRHUGE 549bcf9f8ffSMichal Berger fi 550bcf9f8ffSMichal Berger done 551bcf9f8ffSMichal Berger 552bcf9f8ffSMichal Berger for node in "${!nodes_hp[@]}"; do 5532b80955cSMichal Berger if [[ -z ${nodes[node]} ]]; then 5542b80955cSMichal Berger echo "Node $node doesn't exist, ignoring" >&2 5552b80955cSMichal Berger continue 5562b80955cSMichal Berger fi 557bcf9f8ffSMichal Berger NRHUGE=${nodes_hp[node]:-$NRHUGE} check_hugepages_alloc "${nodes[node]}" "$node" 5582b80955cSMichal Berger done 5592b80955cSMichal Berger} 5602b80955cSMichal Berger 561844c8ec3SMichal Bergerfunction configure_linux() { 56255ac2263SGangCao configure_linux_pci 563f8c1c71cSDariusz Stojaczyk hugetlbfs_mounts=$(linux_hugetlbfs_mounts) 5649603193fSDaniel Verkamp 565f8c1c71cSDariusz Stojaczyk if [ -z "$hugetlbfs_mounts" ]; then 566f8c1c71cSDariusz Stojaczyk hugetlbfs_mounts=/mnt/huge 567f8c1c71cSDariusz Stojaczyk echo "Mounting hugetlbfs at $hugetlbfs_mounts" 568f8c1c71cSDariusz Stojaczyk mkdir -p "$hugetlbfs_mounts" 569f8c1c71cSDariusz Stojaczyk mount -t hugetlbfs nodev "$hugetlbfs_mounts" 570c83f9378SDaniel Verkamp fi 5717ef370dcSDariusz Stojaczyk 5722b80955cSMichal Berger configure_linux_hugepages 5737b25f04cSCunyin Chang 5747b25f04cSCunyin Chang if [ "$driver_name" = "vfio-pci" ]; then 5754b428979SDariusz Stojaczyk if [ -n "$TARGET_USER" ]; then 576f8c1c71cSDariusz Stojaczyk for mount in $hugetlbfs_mounts; do 577f8c1c71cSDariusz Stojaczyk chown "$TARGET_USER" "$mount" 578f8c1c71cSDariusz Stojaczyk chmod g+w "$mount" 579f8c1c71cSDariusz Stojaczyk done 5807b25f04cSCunyin Chang 5812e55b97dSTomasz Zawadzki MEMLOCK_AMNT=$(su "$TARGET_USER" -c "ulimit -l") 5822e55b97dSTomasz Zawadzki if [[ $MEMLOCK_AMNT != "unlimited" ]]; then 5839a4a87b5SMaciej Wawryk MEMLOCK_MB=$((MEMLOCK_AMNT / 1024)) 5842e55b97dSTomasz Zawadzki cat <<- MEMLOCK 5852e55b97dSTomasz Zawadzki "$TARGET_USER" user memlock limit: $MEMLOCK_MB MB 5867b25f04cSCunyin Chang 5872e55b97dSTomasz Zawadzki This is the maximum amount of memory you will be 5882e55b97dSTomasz Zawadzki able to use with DPDK and VFIO if run as user "$TARGET_USER". 5892e55b97dSTomasz Zawadzki To change this, please adjust limits.conf memlock limit for user "$TARGET_USER". 5902e55b97dSTomasz Zawadzki MEMLOCK 5912e55b97dSTomasz Zawadzki if ((MEMLOCK_AMNT < 65536)); then 5927b25f04cSCunyin Chang echo "" 5937b25f04cSCunyin Chang echo "## WARNING: memlock limit is less than 64MB" 5947b25f04cSCunyin Chang echo -n "## DPDK with VFIO may not be able to initialize " 5952e55b97dSTomasz Zawadzki echo "if run as user \"$TARGET_USER\"." 5962e55b97dSTomasz Zawadzki fi 5977b25f04cSCunyin Chang fi 5987b25f04cSCunyin Chang fi 5997b25f04cSCunyin Chang fi 600f09be44eSJim Harris 601686dcd88SGal Hammer if [ $(uname -i) == "x86_64" ] && [ ! -e /dev/cpu/0/msr ]; then 602f09be44eSJim Harris # Some distros build msr as a module. Make sure it's loaded to ensure 603f09be44eSJim Harris # DPDK can easily figure out the TSC rate rather than relying on 100ms 604f09be44eSJim Harris # sleeps. 60595c589e6SJim Harris modprobe msr &> /dev/null || true 606f09be44eSJim Harris fi 60755dc5f21SBen Walker} 60855dc5f21SBen Walker 609844c8ec3SMichal Bergerfunction reset_linux_pci() { 61017d55c9fSDariusz Stojaczyk # virtio 61110283728SJim Harris # TODO: check if virtio-pci is loaded first and just unbind if it is not loaded 61210283728SJim Harris # Requires some more investigation - for example, some kernels do not seem to have 61310283728SJim Harris # virtio-pci but just virtio_scsi instead. Also need to make sure we get the 61410283728SJim Harris # underscore vs. dash right in the virtio_scsi name. 615c2175d2cSJim Harris modprobe virtio-pci || true 6165ea54946SMichal Berger for bdf in "${!all_devices_d[@]}"; do 6175ea54946SMichal Berger ((all_devices_d["$bdf"] == 0)) || continue 6185ea54946SMichal Berger 619203fd7ffSMichal Berger driver=$(collect_driver "$bdf") 620eb8655b2SMichal Berger if [[ -n $driver ]] && ! check_for_driver "$driver"; then 62185501619SMichal Berger linux_bind_driver "$bdf" "$driver" 6225ea54946SMichal Berger else 6235ea54946SMichal Berger linux_unbind_driver "$bdf" 624a6edaa96SWojciech Malikowski fi 625a6edaa96SWojciech Malikowski done 626a6edaa96SWojciech Malikowski 62755dc5f21SBen Walker echo "1" > "/sys/bus/pci/rescan" 62855ac2263SGangCao} 62955ac2263SGangCao 630844c8ec3SMichal Bergerfunction reset_linux() { 63155ac2263SGangCao reset_linux_pci 632f8c1c71cSDariusz Stojaczyk for mount in $(linux_hugetlbfs_mounts); do 633c0648d4bSMichal Berger for hp in "$mount"/spdk*map_*; do 634c0648d4bSMichal Berger flock -n "$hp" true && rm -f "$hp" 635c0648d4bSMichal Berger done 636f8c1c71cSDariusz Stojaczyk done 6375bdb2886SSeth Howell rm -f /run/.spdk* 63855dc5f21SBen Walker} 63955dc5f21SBen Walker 640844c8ec3SMichal Bergerfunction status_linux() { 64107e251efSMichal Berger echo "Hugepages" >&2 64207e251efSMichal Berger printf "%-6s %10s %8s / %6s\n" "node" "hugesize" "free" "total" >&2 64355241750SDariusz Stojaczyk 64455241750SDariusz Stojaczyk numa_nodes=0 6455853749bSMichal Berger for path in /sys/devices/system/node/node*/hugepages/hugepages-*/; do 64655241750SDariusz Stojaczyk numa_nodes=$((numa_nodes + 1)) 647cf090c6cSKarol Latecki free_pages=$(cat $path/free_hugepages) 648cf090c6cSKarol Latecki all_pages=$(cat $path/nr_hugepages) 64955241750SDariusz Stojaczyk 65055241750SDariusz Stojaczyk [[ $path =~ (node[0-9]+)/hugepages/hugepages-([0-9]+kB) ]] 65155241750SDariusz Stojaczyk 65255241750SDariusz Stojaczyk node=${BASH_REMATCH[1]} 65355241750SDariusz Stojaczyk huge_size=${BASH_REMATCH[2]} 65455241750SDariusz Stojaczyk 65555241750SDariusz Stojaczyk printf "%-6s %10s %8s / %6s\n" $node $huge_size $free_pages $all_pages 65655241750SDariusz Stojaczyk done 65755241750SDariusz Stojaczyk 65855241750SDariusz Stojaczyk # fall back to system-wide hugepages 65955241750SDariusz Stojaczyk if [ "$numa_nodes" = "0" ]; then 660cf090c6cSKarol Latecki free_pages=$(grep HugePages_Free /proc/meminfo | awk '{ print $2 }') 661cf090c6cSKarol Latecki all_pages=$(grep HugePages_Total /proc/meminfo | awk '{ print $2 }') 66255241750SDariusz Stojaczyk node="-" 66355241750SDariusz Stojaczyk huge_size="$HUGEPGSZ" 66455241750SDariusz Stojaczyk 66555241750SDariusz Stojaczyk printf "%-6s %10s %8s / %6s\n" $node $huge_size $free_pages $all_pages 66655241750SDariusz Stojaczyk fi 66755241750SDariusz Stojaczyk 6684c01eb58SMichal Berger printf '\n%-25s %-15s %-6s %-6s %-7s %-16s %-10s %s\n' \ 66907e251efSMichal Berger "Type" "BDF" "Vendor" "Device" "NUMA" "Driver" "Device" "Block devices" >&2 6705c42f218SPiotr Pelplinski 6718c176db2SMichal Berger sorted_bdfs=($(printf '%s\n' "${!all_devices_d[@]}" | sort)) 6728c176db2SMichal Berger 6738c176db2SMichal Berger for bdf in "${sorted_bdfs[@]}"; do 6744f8177b5SMichal Berger driver=${pci_bus_driver["$bdf"]} 67530bfdc9cSQingmin Liu if [ "$numa_nodes" = "0" ]; then 67630bfdc9cSQingmin Liu node="-" 67730bfdc9cSQingmin Liu else 678768cc8eeSPawel Wodkowski node=$(cat /sys/bus/pci/devices/$bdf/numa_node) 67915f52aecSMichal Berger if ((node == -1)); then 68015f52aecSMichal Berger node=unknown 68115f52aecSMichal Berger fi 68230bfdc9cSQingmin Liu fi 68379e56681SMaciej Wawryk if [ "$driver" = "nvme" ] && [ -d /sys/bus/pci/devices/$bdf/nvme ]; then 6847f343675SMichal Berger name=$(ls /sys/bus/pci/devices/$bdf/nvme) 6855c42f218SPiotr Pelplinski else 686844c8ec3SMichal Berger name="-" 6875c42f218SPiotr Pelplinski fi 6885c42f218SPiotr Pelplinski 6897f343675SMichal Berger if [[ -n ${nvme_d["$bdf"]} || -n ${virtio_d["$bdf"]} ]]; then 6907f343675SMichal Berger blknames=($(get_block_dev_from_bdf "$bdf")) 69130bfdc9cSQingmin Liu else 6927f343675SMichal Berger blknames=("-") 69315f52aecSMichal Berger fi 694c2175d2cSJim Harris 6957f343675SMichal Berger desc="" 6964c01eb58SMichal Berger desc=${desc:-${nvme_d["$bdf"]:+NVMe${nvme_vmd_d["$bdf"]:+@${nvme_vmd_d["$bdf"]}(VMD)}}} 6977f343675SMichal Berger desc=${desc:-${ioat_d["$bdf"]:+I/OAT}} 69846ac1b8dSpaul luse desc=${desc:-${dsa_d["$bdf"]:+DSA}} 6999c55555cSpaul luse desc=${desc:-${iaa_d["$bdf"]:+IAA}} 7007f343675SMichal Berger desc=${desc:-${virtio_d["$bdf"]:+virtio}} 7017f343675SMichal Berger desc=${desc:-${vmd_d["$bdf"]:+VMD}} 7023429f97aSpaul luse 7034c01eb58SMichal Berger printf '%-25s %-15s %-6s %-6s %-7s %-16s %-10s %s\n' \ 7047f343675SMichal Berger "$desc" "$bdf" "${pci_ids_vendor["$bdf"]#0x}" "${pci_ids_device["$bdf"]#0x}" \ 7057f343675SMichal Berger "$node" "${driver:--}" "${name:-}" "${blknames[*]:--}" 7068c176db2SMichal Berger done 7075c42f218SPiotr Pelplinski} 7085c42f218SPiotr Pelplinski 709d51345c0SMichal Bergerfunction status_freebsd() { 7105ea54946SMichal Berger local pci 711d51345c0SMichal Berger 712d51345c0SMichal Berger status_print() ( 7137a4a97dfSMichal Berger local type=$1 714d51345c0SMichal Berger local dev driver 715d51345c0SMichal Berger 7167a4a97dfSMichal Berger shift 717d51345c0SMichal Berger 7185ea54946SMichal Berger for pci; do 7197a4a97dfSMichal Berger printf '%-8s %-15s %-6s %-6s %-16s\n' \ 7207a4a97dfSMichal Berger "$type" \ 721d51345c0SMichal Berger "$pci" \ 722d51345c0SMichal Berger "${pci_ids_vendor["$pci"]}" \ 723d51345c0SMichal Berger "${pci_ids_device["$pci"]}" \ 7247201d0e6SMichal Berger "${pci_bus_driver["$pci"]}" 7257a4a97dfSMichal Berger done | sort -k2,2 726d51345c0SMichal Berger ) 727d51345c0SMichal Berger 728d51345c0SMichal Berger local contigmem=present 729c90dac7aSMichal Berger local contigmem_buffer_size 730c90dac7aSMichal Berger local contigmem_num_buffers 731c90dac7aSMichal Berger 732d51345c0SMichal Berger if ! kldstat -q -m contigmem; then 733d51345c0SMichal Berger contigmem="not present" 734d51345c0SMichal Berger fi 735c90dac7aSMichal Berger if ! contigmem_buffer_size=$(kenv hw.contigmem.buffer_size 2> /dev/null); then 736c90dac7aSMichal Berger contigmem_buffer_size="not set" 737c90dac7aSMichal Berger fi 738c90dac7aSMichal Berger if ! contigmem_num_buffers=$(kenv hw.contigmem.num_buffers 2> /dev/null); then 739c90dac7aSMichal Berger contigmem_num_buffers="not set" 740c90dac7aSMichal Berger fi 741d51345c0SMichal Berger 742d51345c0SMichal Berger cat <<- BSD_INFO 743d51345c0SMichal Berger Contigmem ($contigmem) 744c90dac7aSMichal Berger Buffer Size: $contigmem_buffer_size 745c90dac7aSMichal Berger Num Buffers: $contigmem_num_buffers 746d51345c0SMichal Berger 747d51345c0SMichal Berger BSD_INFO 7487a4a97dfSMichal Berger 7497a4a97dfSMichal Berger printf '\n%-8s %-15s %-6s %-6s %-16s\n' \ 7507a4a97dfSMichal Berger "Type" "BDF" "Vendor" "Device" "Driver" >&2 7517a4a97dfSMichal Berger 7527a4a97dfSMichal Berger status_print "NVMe" "${!nvme_d[@]}" 7537a4a97dfSMichal Berger status_print "I/OAT" "${!ioat_d[@]}" 7547a4a97dfSMichal Berger status_print "DSA" "${!dsa_d[@]}" 7557a4a97dfSMichal Berger status_print "IAA" "${!iaa_d[@]}" 7567a4a97dfSMichal Berger status_print "VMD" "${!vmd_d[@]}" 757d51345c0SMichal Berger} 758d51345c0SMichal Berger 759844c8ec3SMichal Bergerfunction configure_freebsd_pci() { 76036e573fcSMichal Berger local BDFS 7616b1e4e73SBen Walker 7622635e73dSMichal Berger BDFS+=("$@") 763a6edaa96SWojciech Malikowski 7642635e73dSMichal Berger if ((${#unsupported_nic_uio_hw[@]} > 0)) && [[ $FORCE_NIC_UIO_REBIND != yes ]]; then 7652635e73dSMichal Berger warn_unsupported_nic_uio_hw 7662635e73dSMichal Berger return 1 7672635e73dSMichal Berger fi 7682635e73dSMichal Berger 7692635e73dSMichal Berger BDFS+=("${unsupported_nic_uio_hw[@]}") 7702635e73dSMichal Berger 7712635e73dSMichal Berger if kldstat -n nic_uio &> /dev/null; then 7722635e73dSMichal Berger kldunload nic_uio.ko 7732635e73dSMichal Berger fi 7746b1e4e73SBen Walker 77536e573fcSMichal Berger local IFS="," 77636e573fcSMichal Berger kenv hw.nic_uio.bdfs="${BDFS[*]}" 77755dc5f21SBen Walker kldload nic_uio.ko 77855ac2263SGangCao} 77955ac2263SGangCao 7802635e73dSMichal Bergerfunction get_unsupported_nic_uio_hw() { 7812635e73dSMichal Berger local bdfs bdf all_devices 7822635e73dSMichal Berger local -g unsupported_nic_uio_hw 7832635e73dSMichal Berger 7842635e73dSMichal Berger IFS="," read -ra bdfs < <(kenv hw.nic_uio.bdfs 2> /dev/null) || return 0 7852635e73dSMichal Berger 7862635e73dSMichal Berger for bdf in "${bdfs[@]}"; do 7872635e73dSMichal Berger grep -q "$bdf" <(printf '%s\n' "${!all_devices_d[@]}") || unsupported_nic_uio_hw+=("$bdf") 7882635e73dSMichal Berger done 7892635e73dSMichal Berger 7902635e73dSMichal Berger return 0 7912635e73dSMichal Berger} 7922635e73dSMichal Berger 7932635e73dSMichal Bergerfunction warn_unsupported_nic_uio_hw() { 7942635e73dSMichal Berger cat <<- NIC_UIO 7952635e73dSMichal Berger 7962635e73dSMichal Berger WARNING: Unsupported devices detected in the nic_uio setup: 7972635e73dSMichal Berger 7982635e73dSMichal Berger $(printf ' %s\n' "${unsupported_nic_uio_hw[@]}") 7992635e73dSMichal Berger 8002635e73dSMichal Berger Remove them first or pass FORCE_NIC_UIO_REBIND=yes through the environment. 8012635e73dSMichal Berger 8022635e73dSMichal Berger NIC_UIO 8032635e73dSMichal Berger} 8042635e73dSMichal Berger 805844c8ec3SMichal Bergerfunction configure_freebsd() { 8062635e73dSMichal Berger _configure_freebsd "${!nvme_d[@]}" "${!ioat_d[@]}" "${!dsa_d[@]}" "${!iaa_d[@]}" "${!vmd_d[@]}" 8072635e73dSMichal Berger} 8082635e73dSMichal Berger 8092635e73dSMichal Bergerfunction _configure_freebsd() { 81094067e8bSMichal Berger if ! check_for_driver_freebsd; then 81194067e8bSMichal Berger echo "DPDK drivers (contigmem and/or nic_uio) are missing, aborting" >&2 81294067e8bSMichal Berger return 1 81394067e8bSMichal Berger fi 8142635e73dSMichal Berger configure_freebsd_pci "$@" 8158021da8bSJim Harris # If contigmem is already loaded but the HUGEMEM specified doesn't match the 8168021da8bSJim Harris # previous value, unload contigmem so that we can reload with the new value. 8178021da8bSJim Harris if kldstat -q -m contigmem; then 8181431ea02SMichal Berger # contigmem may be loaded, but the kernel environment doesn't have to 8191431ea02SMichal Berger # be necessarily set at this point. If it isn't, kenv will fail to 8201431ea02SMichal Berger # pick up the hw. options. Handle it. 8211431ea02SMichal Berger if ! contigmem_num_buffers=$(kenv hw.contigmem.num_buffers); then 8221431ea02SMichal Berger contigmem_num_buffers=-1 8231431ea02SMichal Berger fi 2> /dev/null 8241431ea02SMichal Berger if ((contigmem_num_buffers != HUGEMEM / 256)); then 8258021da8bSJim Harris kldunload contigmem.ko 8268021da8bSJim Harris fi 8278021da8bSJim Harris fi 8288021da8bSJim Harris if ! kldstat -q -m contigmem; then 829f062f797SHailiang Wang kenv hw.contigmem.num_buffers=$((HUGEMEM / 256)) 830b9f3538eSDaniel Verkamp kenv hw.contigmem.buffer_size=$((256 * 1024 * 1024)) 831c83f9378SDaniel Verkamp kldload contigmem.ko 8328021da8bSJim Harris fi 83355dc5f21SBen Walker} 83455dc5f21SBen Walker 835844c8ec3SMichal Bergerfunction reset_freebsd() { 8362635e73dSMichal Berger # Don't reap the entire nic_uio setup in case there are unsupported devices in the kernel env 8372635e73dSMichal Berger if ((${#unsupported_nic_uio_hw[@]} > 0)) && [[ $FORCE_NIC_UIO_REBIND != yes ]]; then 8382635e73dSMichal Berger warn_unsupported_nic_uio_hw 8392635e73dSMichal Berger return 1 8402635e73dSMichal Berger fi 8412635e73dSMichal Berger 84255dc5f21SBen Walker kldunload contigmem.ko || true 84355dc5f21SBen Walker kldunload nic_uio.ko || true 8442635e73dSMichal Berger 8452635e73dSMichal Berger if ((${#unsupported_nic_uio_hw[@]} > 0)); then 8462635e73dSMichal Berger # HACK: try to be nice and recreate the setup but only with the unsupported devices 8472635e73dSMichal Berger _unsupported_nic_uio_hw=("${unsupported_nic_uio_hw[@]}") unsupported_nic_uio_hw=() 8482635e73dSMichal Berger _configure_freebsd "${_unsupported_nic_uio_hw[@]}" 8492635e73dSMichal Berger fi 85055dc5f21SBen Walker} 85155dc5f21SBen Walker 8527a1bd398SMichal Bergerfunction set_hp() { 8537a1bd398SMichal Berger if [[ -n $HUGEPGSZ && ! -e /sys/kernel/mm/hugepages/hugepages-${HUGEPGSZ}kB ]]; then 8547a1bd398SMichal Berger echo "${HUGEPGSZ}kB is not supported by the running kernel, ignoring" >&2 8557a1bd398SMichal Berger unset -v HUGEPGSZ 8567a1bd398SMichal Berger fi 8577a1bd398SMichal Berger 8587a1bd398SMichal Berger HUGEPGSZ=${HUGEPGSZ:-$(grep Hugepagesize /proc/meminfo | cut -d : -f 2 | tr -dc '0-9')} 8597a1bd398SMichal Berger HUGEPGSZ_MB=$((HUGEPGSZ / 1024)) 8607a1bd398SMichal Berger NRHUGE=${NRHUGE:-$(((HUGEMEM + HUGEPGSZ_MB - 1) / HUGEPGSZ_MB))} 8617a1bd398SMichal Berger} 8627a1bd398SMichal Berger 863443e1ea3SJim Harriskmsg "spdk: $0 $* (start)" 864443e1ea3SJim Harris 86536e573fcSMichal BergerCMD=reset cache_pci_bus 86636e573fcSMichal Berger 867da7e3bb8SDariusz Stojaczykmode=$1 8687b25f04cSCunyin Chang 869da7e3bb8SDariusz Stojaczykif [ -z "$mode" ]; then 87055dc5f21SBen Walker mode="config" 87155dc5f21SBen Walkerfi 87255dc5f21SBen Walker 873f062f797SHailiang Wang: ${HUGEMEM:=2048} 874a1280c98SJim Harris: ${PCI_ALLOWED:=""} 875a1280c98SJim Harris: ${PCI_BLOCKED:=""} 8763779dda4SDariusz Stojaczyk 877f1a966dcSJim Harrisif [ -n "$NVME_ALLOWED" ]; then 878f1a966dcSJim Harris PCI_ALLOWED="$PCI_ALLOWED $NVME_ALLOWED" 8793779dda4SDariusz Stojaczykfi 8803779dda4SDariusz Stojaczyk 8818be76f85SDariusz Stojaczykif [ -n "$SKIP_PCI" ]; then 882a1280c98SJim Harris PCI_ALLOWED="none" 8838be76f85SDariusz Stojaczykfi 8848be76f85SDariusz Stojaczyk 885da7e3bb8SDariusz Stojaczykif [ -z "$TARGET_USER" ]; then 886da7e3bb8SDariusz Stojaczyk TARGET_USER="$SUDO_USER" 887da7e3bb8SDariusz Stojaczyk if [ -z "$TARGET_USER" ]; then 888cf090c6cSKarol Latecki TARGET_USER=$(logname 2> /dev/null) || true 889da7e3bb8SDariusz Stojaczyk fi 890da7e3bb8SDariusz Stojaczykfi 891da7e3bb8SDariusz Stojaczyk 89251b5fa85SMichal Bergercollect_devices "$mode" 89344775a80SMichal Berger 8947a1bd398SMichal Bergerif [[ $os == Linux ]]; then 8957a1bd398SMichal Berger set_hp 8967a1bd398SMichal Bergerfi 8977a1bd398SMichal Berger 8987a1bd398SMichal Bergerif [[ $mode == interactive ]]; then 8997a1bd398SMichal Berger source "$rootdir/scripts/common/setup/interactive.sh" 90044fef7d2SMichal Berger main_menu "$2" || exit 0 9017a1bd398SMichal Bergerfi 9027a1bd398SMichal Berger 90344775a80SMichal Bergerif [[ $mode == reset && $PCI_BLOCK_SYNC_ON_RESET == yes ]]; then 90444775a80SMichal Berger # Note that this will wait only for the first block device attached to 90544775a80SMichal Berger # a given storage controller. For nvme this may miss some of the devs 90644775a80SMichal Berger # in case multiple namespaces are being in place. 90744775a80SMichal Berger # FIXME: Wait for nvme controller(s) to be in live state and determine 90844775a80SMichal Berger # number of configured namespaces, build list of potential block devs 90944775a80SMichal Berger # and pass them to sync_dev_uevents. Is it worth the effort? 91044775a80SMichal Berger bdfs_to_wait_for=() 91144775a80SMichal Berger for bdf in "${!all_devices_d[@]}"; do 91244775a80SMichal Berger ((all_devices_d["$bdf"] == 0)) || continue 91344775a80SMichal Berger if [[ -n ${nvme_d["$bdf"]} || -n ${virtio_d["$bdf"]} ]]; then 9144f8177b5SMichal Berger [[ $(collect_driver "$bdf") != "${pci_bus_driver["$bdf"]}" ]] || continue 91544775a80SMichal Berger bdfs_to_wait_for+=("$bdf") 91644775a80SMichal Berger fi 91744775a80SMichal Berger done 91844775a80SMichal Berger if ((${#bdfs_to_wait_for[@]} > 0)); then 91944775a80SMichal Berger echo "Waiting for block devices as requested" 92044775a80SMichal Berger export UEVENT_TIMEOUT=5 DEVPATH_LOOKUP=yes DEVPATH_SUBSYSTEM=pci 92144775a80SMichal Berger "$rootdir/scripts/sync_dev_uevents.sh" \ 92244775a80SMichal Berger block/disk \ 92344775a80SMichal Berger "${bdfs_to_wait_for[@]}" & 92444775a80SMichal Berger sync_pid=$! 92544775a80SMichal Berger fi 92644775a80SMichal Bergerfi 92744775a80SMichal Berger 92818c02887SMichal Bergerif [[ $os == Linux ]]; then 92955dc5f21SBen Walker if [ "$mode" == "config" ]; then 93055dc5f21SBen Walker configure_linux 931e47f972dSPawel Wodkowski elif [ "$mode" == "cleanup" ]; then 932e47f972dSPawel Wodkowski cleanup_linux 933ddb42b2eSMichal Berger clear_hugepages 93455dc5f21SBen Walker elif [ "$mode" == "reset" ]; then 93555dc5f21SBen Walker reset_linux 9365c42f218SPiotr Pelplinski elif [ "$mode" == "status" ]; then 9375c42f218SPiotr Pelplinski status_linux 9385f247660SDariusz Stojaczyk elif [ "$mode" == "help" ]; then 9395f247660SDariusz Stojaczyk usage $0 9405f247660SDariusz Stojaczyk else 9415f247660SDariusz Stojaczyk usage $0 "Invalid argument '$mode'" 94255dc5f21SBen Walker fi 94355dc5f21SBen Walkerelse 94455dc5f21SBen Walker if [ "$mode" == "config" ]; then 94555dc5f21SBen Walker configure_freebsd 94655dc5f21SBen Walker elif [ "$mode" == "reset" ]; then 94755dc5f21SBen Walker reset_freebsd 948c7917f22SKarol Latecki elif [ "$mode" == "cleanup" ]; then 94918c02887SMichal Berger echo "setup.sh cleanup function not yet supported on $os" 950c7917f22SKarol Latecki elif [ "$mode" == "status" ]; then 951d51345c0SMichal Berger status_freebsd 9525f247660SDariusz Stojaczyk elif [ "$mode" == "help" ]; then 9535f247660SDariusz Stojaczyk usage $0 9545f247660SDariusz Stojaczyk else 9555f247660SDariusz Stojaczyk usage $0 "Invalid argument '$mode'" 95655dc5f21SBen Walker fi 95755dc5f21SBen Walkerfi 95844775a80SMichal Berger 95944775a80SMichal Bergerif [[ -e /proc/$sync_pid/status ]]; then 96044775a80SMichal Berger wait "$sync_pid" 96144775a80SMichal Bergerfi 962443e1ea3SJim Harris 963443e1ea3SJim Harriskmsg "spdk: $0 $* (done)" 964