1#!/usr/bin/env bash 2 3set -e 4 5os=$(uname -s) 6 7if [[ $os != Linux && $os != FreeBSD ]]; then 8 echo "Not supported platform ($os), aborting" 9 exit 1 10fi 11 12rootdir=$(readlink -f $(dirname $0))/.. 13source "$rootdir/scripts/common.sh" 14 15function usage() { 16 if [[ $os == Linux ]]; then 17 options="[config|reset|status|cleanup|help]" 18 else 19 options="[config|reset|help]" 20 fi 21 22 [[ -n $2 ]] && ( 23 echo "$2" 24 echo "" 25 ) 26 echo "Helper script for allocating hugepages and binding NVMe, I/OAT, VMD and Virtio devices" 27 echo "to a generic VFIO kernel driver. If VFIO is not available on the system, this script" 28 echo "will fall back to UIO. NVMe and Virtio devices with active mountpoints will be ignored." 29 echo "All hugepage operations use default hugepage size on the system (hugepagesz)." 30 echo "Usage: $(basename $1) $options" 31 echo 32 echo "$options - as following:" 33 echo "config Default mode. Allocate hugepages and bind PCI devices." 34 if [[ $os == Linux ]]; then 35 echo "cleanup Remove any orphaned files that can be left in the system after SPDK application exit" 36 fi 37 echo "reset Rebind PCI devices back to their original drivers." 38 echo " Also cleanup any leftover spdk files/resources." 39 echo " Hugepage memory size will remain unchanged." 40 if [[ $os == Linux ]]; then 41 echo "status Print status of all SPDK-compatible devices on the system." 42 fi 43 echo "help Print this help message." 44 echo 45 echo "The following environment variables can be specified." 46 echo "HUGEMEM Size of hugepage memory to allocate (in MB). 2048 by default." 47 echo " For NUMA systems, the hugepages will be evenly distributed" 48 echo " between CPU nodes" 49 echo "NRHUGE Number of hugepages to allocate. This variable overwrites HUGEMEM." 50 echo "HUGENODE Specific NUMA node to allocate hugepages on. To allocate" 51 echo " hugepages on multiple nodes run this script multiple times -" 52 echo " once for each node." 53 echo "PCI_WHITELIST" 54 echo "PCI_BLACKLIST Whitespace separated list of PCI devices (NVMe, I/OAT, VMD, Virtio)." 55 echo " Each device must be specified as a full PCI address." 56 echo " E.g. PCI_WHITELIST=\"0000:01:00.0 0000:02:00.0\"" 57 echo " To blacklist all PCI devices use a non-valid address." 58 echo " E.g. PCI_WHITELIST=\"none\"" 59 echo " If PCI_WHITELIST and PCI_BLACKLIST are empty or unset, all PCI devices" 60 echo " will be bound." 61 echo " Each device in PCI_BLACKLIST will be ignored (driver won't be changed)." 62 echo " PCI_BLACKLIST has precedence over PCI_WHITELIST." 63 echo "TARGET_USER User that will own hugepage mountpoint directory and vfio groups." 64 echo " By default the current user will be used." 65 echo "DRIVER_OVERRIDE Disable automatic vfio-pci/uio_pci_generic selection and forcefully" 66 echo " bind devices to the given driver." 67 echo " E.g. DRIVER_OVERRIDE=uio_pci_generic or DRIVER_OVERRIDE=/home/public/dpdk/build/kmod/igb_uio.ko" 68 echo "PCI_BLOCK_SYNC_ON_RESET" 69 echo " If set in the environment, the attempt to wait for block devices associated" 70 echo " with given PCI device will be made upon reset" 71 exit 0 72} 73 74# In monolithic kernels the lsmod won't work. So 75# back that with a /sys/modules. We also check 76# /sys/bus/pci/drivers/ as neither lsmod nor /sys/modules might 77# contain needed info (like in Fedora-like OS). 78function check_for_driver() { 79 if lsmod | grep -q ${1//-/_}; then 80 return 1 81 fi 82 83 if [[ -d /sys/module/${1} || -d \ 84 /sys/module/${1//-/_} || -d \ 85 /sys/bus/pci/drivers/${1} || -d \ 86 /sys/bus/pci/drivers/${1//-/_} ]]; then 87 return 2 88 fi 89 return 0 90} 91 92function pci_dev_echo() { 93 local bdf="$1" 94 shift 95 echo "$bdf (${pci_ids_vendor["$bdf"]#0x} ${pci_ids_device["$bdf"]#0x}): $*" 96} 97 98function linux_bind_driver() { 99 bdf="$1" 100 driver_name="$2" 101 old_driver_name=${drivers_d["$bdf"]:-no driver} 102 ven_dev_id="${pci_ids_vendor["$bdf"]#0x} ${pci_ids_device["$bdf"]#0x}" 103 104 if [[ $driver_name == "$old_driver_name" ]]; then 105 pci_dev_echo "$bdf" "Already using the $old_driver_name driver" 106 return 0 107 fi 108 109 if [[ $old_driver_name != "no driver" ]]; then 110 echo "$ven_dev_id" > "/sys/bus/pci/devices/$bdf/driver/remove_id" 2> /dev/null || true 111 echo "$bdf" > "/sys/bus/pci/devices/$bdf/driver/unbind" 112 fi 113 114 pci_dev_echo "$bdf" "$old_driver_name -> $driver_name" 115 116 echo "$ven_dev_id" > "/sys/bus/pci/drivers/$driver_name/new_id" 2> /dev/null || true 117 echo "$bdf" > "/sys/bus/pci/drivers/$driver_name/bind" 2> /dev/null || true 118 119 if [[ $driver_name == uio_pci_generic && -e /sys/module/igb_uio ]]; then 120 # Check if the uio_pci_generic driver is broken as it might be in 121 # some 4.18.x kernels (see centos8 for instance) - if our device 122 # didn't get a proper uio entry, fallback to igb_uio 123 if [[ ! -e /sys/bus/pci/devices/$bdf/uio ]]; then 124 pci_dev_echo "$bdf" "uio_pci_generic potentially broken, moving to igb_uio" 125 drivers_d["$bdf"]="no driver" 126 # This call will override $driver_name for remaining devices as well 127 linux_bind_driver "$bdf" igb_uio 128 fi 129 fi 130 131 iommu_group=$(basename $(readlink -f /sys/bus/pci/devices/$bdf/iommu_group)) 132 if [ -e "/dev/vfio/$iommu_group" ]; then 133 if [ -n "$TARGET_USER" ]; then 134 chown "$TARGET_USER" "/dev/vfio/$iommu_group" 135 fi 136 fi 137} 138 139function linux_unbind_driver() { 140 local bdf="$1" 141 local ven_dev_id 142 ven_dev_id="${pci_ids_vendor["$bdf"]#0x} ${pci_ids_device["$bdf"]#0x}" 143 local old_driver_name=${drivers_d["$bdf"]:-no driver} 144 145 if [[ -e /sys/bus/pci/drivers/$old_driver_name ]]; then 146 echo "$ven_dev_id" > "/sys/bus/pci/drivers/$old_driver_name/remove_id" 2> /dev/null || true 147 echo "$bdf" > "/sys/bus/pci/drivers/$old_driver_name/unbind" 148 fi 149 150 pci_dev_echo "$bdf" "$old_driver_name -> no driver" 151} 152 153function linux_hugetlbfs_mounts() { 154 mount | grep ' type hugetlbfs ' | awk '{ print $3 }' 155} 156 157function get_block_dev_from_bdf() { 158 local bdf=$1 159 local block 160 161 for block in /sys/block/*; do 162 if [[ $(readlink -f "$block/device") == *"/$bdf/"* ]]; then 163 echo "${block##*/}" 164 return 0 165 fi 166 done 167} 168 169function get_mounted_part_dev_from_bdf_block() { 170 local bdf=$1 171 local blocks block part 172 173 blocks=($(get_block_dev_from_bdf "$bdf")) 174 175 for block in "${blocks[@]}"; do 176 for part in "/sys/block/$block/$block"*; do 177 [[ -b /dev/${part##*/} ]] || continue 178 if [[ $(< /proc/self/mountinfo) == *" $(< "$part/dev") "* ]]; then 179 echo "${part##*/}" 180 fi 181 done 182 done 183} 184 185function collect_devices() { 186 # NVMe, IOAT, IDXD, VIRTIO, VMD 187 188 local ids dev_type dev_id bdf bdfs in_use driver 189 190 ids+="PCI_DEVICE_ID_INTEL_IOAT" 191 ids+="|PCI_DEVICE_ID_INTEL_IDXD" 192 ids+="|PCI_DEVICE_ID_VIRTIO" 193 ids+="|PCI_DEVICE_ID_INTEL_VMD" 194 ids+="|SPDK_PCI_CLASS_NVME" 195 196 local -gA nvme_d ioat_d idxd_d virtio_d vmd_d all_devices_d drivers_d 197 198 while read -r _ dev_type dev_id; do 199 bdfs=(${pci_bus_cache["0x8086:$dev_id"]}) 200 [[ $dev_type == *NVME* ]] && bdfs=(${pci_bus_cache["$dev_id"]}) 201 [[ $dev_type == *VIRT* ]] && bdfs=(${pci_bus_cache["0x1af4:$dev_id"]}) 202 [[ $dev_type =~ (NVME|IOAT|IDXD|VIRTIO|VMD) ]] && dev_type=${BASH_REMATCH[1],,} 203 for bdf in "${bdfs[@]}"; do 204 in_use=0 205 if [[ $1 != status ]]; then 206 if ! pci_can_use "$bdf"; then 207 pci_dev_echo "$bdf" "Skipping denied controller at $bdf" 208 in_use=1 209 fi 210 if [[ $dev_type == nvme || $dev_type == virtio ]]; then 211 if ! verify_bdf_mounts "$bdf"; then 212 in_use=1 213 fi 214 fi 215 if [[ $dev_type == vmd ]]; then 216 if [[ $PCI_WHITELIST != *"$bdf"* ]]; then 217 pci_dev_echo "$bdf" "Skipping not allowed VMD controller at $bdf" 218 in_use=1 219 fi 220 fi 221 fi 222 eval "${dev_type}_d[$bdf]=$in_use" 223 all_devices_d["$bdf"]=$in_use 224 if [[ -e /sys/bus/pci/devices/$bdf/driver ]]; then 225 driver=$(readlink -f "/sys/bus/pci/devices/$bdf/driver") 226 drivers_d["$bdf"]=${driver##*/} 227 fi 228 done 229 done < <(grep -E "$ids" "$rootdir/include/spdk/pci_ids.h") 230} 231 232function collect_driver() { 233 local bdf=$1 234 local override_driver=$2 235 local drivers driver 236 237 [[ -e /sys/bus/pci/devices/$bdf/modalias ]] || return 1 238 if drivers=($(modprobe -R "$(< "/sys/bus/pci/devices/$bdf/modalias")")); then 239 # Pick first entry in case multiple aliases are bound to a driver. 240 driver=$(readlink -f "/sys/module/${drivers[0]}/drivers/pci:"*) 241 driver=${driver##*/} 242 else 243 driver=$override_driver 244 fi 2> /dev/null 245 echo "$driver" 246} 247 248function verify_bdf_mounts() { 249 local bdf=$1 250 local blknames=($(get_mounted_part_dev_from_bdf_block "$bdf")) 251 252 if ((${#blknames[@]} > 0)); then 253 for name in "${blknames[@]}"; do 254 pci_dev_echo "$bdf" "Active mountpoints on /dev/$name, so not binding PCI dev" 255 done 256 return 1 257 fi 258} 259 260function configure_linux_pci() { 261 local driver_path="" 262 driver_name="" 263 igb_uio_fallback="" 264 265 # igb_uio is a common driver to override with and it depends on uio. 266 modprobe uio 267 if [[ -r "$rootdir/dpdk/build-tmp/kernel/linux/igb_uio/igb_uio.ko" ]]; then 268 igb_uio_fallback=$rootdir/dpdk/build-tmp/kernel/linux/igb_uio/igb_uio.ko 269 insmod "$igb_uio_fallback" || true 270 fi 271 272 if [[ -n "${DRIVER_OVERRIDE}" ]]; then 273 driver_path="$DRIVER_OVERRIDE" 274 driver_name="${DRIVER_OVERRIDE##*/}" 275 # modprobe and the sysfs don't use the .ko suffix. 276 driver_name=${driver_name%.ko} 277 # path = name -> there is no path 278 if [[ "$driver_path" = "$driver_name" ]]; then 279 driver_path="" 280 fi 281 elif [[ -n "$(ls /sys/kernel/iommu_groups)" || (-e \ 282 /sys/module/vfio/parameters/enable_unsafe_noiommu_mode && \ 283 "$(cat /sys/module/vfio/parameters/enable_unsafe_noiommu_mode)" == "Y") ]]; then 284 driver_name=vfio-pci 285 elif modinfo uio_pci_generic > /dev/null 2>&1; then 286 driver_name=uio_pci_generic 287 elif [[ -e $igb_uio_fallback ]]; then 288 driver_name="igb_uio" 289 echo "WARNING: uio_pci_generic not detected - using $driver_name" 290 else 291 echo "No valid drivers found [vfio-pci, uio_pci_generic, igb_uio]. Please either enable the vfio-pci or uio_pci_generic" 292 echo "kernel modules, or have SPDK build the igb_uio driver by running ./configure --with-igb-uio-driver and recompiling." 293 return 1 294 fi 295 296 # modprobe assumes the directory of the module. If the user passes in a path, we should use insmod 297 if [[ -n "$driver_path" ]]; then 298 insmod $driver_path || true 299 else 300 modprobe $driver_name 301 fi 302 303 for bdf in "${!all_devices_d[@]}"; do 304 if ((all_devices_d["$bdf"] == 0)); then 305 if [[ -n ${nvme_d["$bdf"]} ]]; then 306 # Some nvme controllers may take significant amount of time while being 307 # unbound from the driver. Put that task into background to speed up the 308 # whole process. Currently this is done only for the devices bound to the 309 # nvme driver as other, i.e., ioatdma's, trigger a kernel BUG when being 310 # unbound in parallel. See https://bugzilla.kernel.org/show_bug.cgi?id=209041. 311 linux_bind_driver "$bdf" "$driver_name" & 312 else 313 linux_bind_driver "$bdf" "$driver_name" 314 fi 315 fi 316 done 317 wait 318 319 echo "1" > "/sys/bus/pci/rescan" 320} 321 322function cleanup_linux() { 323 shopt -s extglob nullglob 324 dirs_to_clean="" 325 dirs_to_clean="$(echo {/var/run,/tmp}/dpdk/spdk{,_pid}+([0-9])) " 326 if [[ -d $XDG_RUNTIME_DIR && $XDG_RUNTIME_DIR != *" "* ]]; then 327 dirs_to_clean+="$(readlink -e assert_not_empty $XDG_RUNTIME_DIR/dpdk/spdk{,_pid}+([0-9]) || true) " 328 fi 329 330 files_to_clean="" 331 for dir in $dirs_to_clean; do 332 files_to_clean+="$(echo $dir/*) " 333 done 334 shopt -u extglob nullglob 335 336 files_to_clean+="$(ls -1 /dev/shm/* \ 337 | grep -E '(spdk_tgt|iscsi|vhost|nvmf|rocksdb|bdevio|bdevperf|vhost_fuzz|nvme_fuzz)_trace|spdk_iscsi_conns' || true) " 338 files_to_clean="$(readlink -e assert_not_empty $files_to_clean || true)" 339 if [[ -z "$files_to_clean" ]]; then 340 echo "Clean" 341 return 0 342 fi 343 344 shopt -s extglob 345 for fd_dir in $(echo /proc/+([0-9])); do 346 opened_files+="$(readlink -e assert_not_empty $fd_dir/fd/* || true)" 347 done 348 shopt -u extglob 349 350 if [[ -z "$opened_files" ]]; then 351 echo "Can't get list of opened files!" 352 exit 1 353 fi 354 355 echo 'Cleaning' 356 for f in $files_to_clean; do 357 if ! echo "$opened_files" | grep -E -q "^$f\$"; then 358 echo "Removing: $f" 359 rm $f 360 else 361 echo "Still open: $f" 362 fi 363 done 364 365 for dir in $dirs_to_clean; do 366 if ! echo "$opened_files" | grep -E -q "^$dir\$"; then 367 echo "Removing: $dir" 368 rmdir $dir 369 else 370 echo "Still open: $dir" 371 fi 372 done 373 echo "Clean" 374 375 unset dirs_to_clean files_to_clean opened_files 376} 377 378function configure_linux() { 379 configure_linux_pci 380 hugetlbfs_mounts=$(linux_hugetlbfs_mounts) 381 382 if [ -z "$hugetlbfs_mounts" ]; then 383 hugetlbfs_mounts=/mnt/huge 384 echo "Mounting hugetlbfs at $hugetlbfs_mounts" 385 mkdir -p "$hugetlbfs_mounts" 386 mount -t hugetlbfs nodev "$hugetlbfs_mounts" 387 fi 388 389 if [ -z "$HUGENODE" ]; then 390 hugepages_target="/proc/sys/vm/nr_hugepages" 391 else 392 hugepages_target="/sys/devices/system/node/node${HUGENODE}/hugepages/hugepages-${HUGEPGSZ}kB/nr_hugepages" 393 fi 394 395 echo "$NRHUGE" > "$hugepages_target" 396 allocated_hugepages=$(cat $hugepages_target) 397 if [ "$allocated_hugepages" -lt "$NRHUGE" ]; then 398 echo "" 399 echo "## ERROR: requested $NRHUGE hugepages but only $allocated_hugepages could be allocated." 400 echo "## Memory might be heavily fragmented. Please try flushing the system cache, or reboot the machine." 401 exit 1 402 fi 403 404 if [ "$driver_name" = "vfio-pci" ]; then 405 if [ -n "$TARGET_USER" ]; then 406 for mount in $hugetlbfs_mounts; do 407 chown "$TARGET_USER" "$mount" 408 chmod g+w "$mount" 409 done 410 411 MEMLOCK_AMNT=$(su "$TARGET_USER" -c "ulimit -l") 412 if [[ $MEMLOCK_AMNT != "unlimited" ]]; then 413 MEMLOCK_MB=$((MEMLOCK_AMNT / 1024)) 414 cat <<- MEMLOCK 415 "$TARGET_USER" user memlock limit: $MEMLOCK_MB MB 416 417 This is the maximum amount of memory you will be 418 able to use with DPDK and VFIO if run as user "$TARGET_USER". 419 To change this, please adjust limits.conf memlock limit for user "$TARGET_USER". 420 MEMLOCK 421 if ((MEMLOCK_AMNT < 65536)); then 422 echo "" 423 echo "## WARNING: memlock limit is less than 64MB" 424 echo -n "## DPDK with VFIO may not be able to initialize " 425 echo "if run as user \"$TARGET_USER\"." 426 fi 427 fi 428 fi 429 fi 430 431 if [ ! -f /dev/cpu/0/msr ]; then 432 # Some distros build msr as a module. Make sure it's loaded to ensure 433 # DPDK can easily figure out the TSC rate rather than relying on 100ms 434 # sleeps. 435 modprobe msr || true 436 fi 437} 438 439function reset_linux_pci() { 440 # virtio 441 # TODO: check if virtio-pci is loaded first and just unbind if it is not loaded 442 # Requires some more investigation - for example, some kernels do not seem to have 443 # virtio-pci but just virtio_scsi instead. Also need to make sure we get the 444 # underscore vs. dash right in the virtio_scsi name. 445 modprobe virtio-pci || true 446 for bdf in "${!all_devices_d[@]}"; do 447 ((all_devices_d["$bdf"] == 0)) || continue 448 449 [[ -n ${nvme_d["$bdf"]} ]] && fallback_driver=nvme 450 [[ -n ${ioat_d["$bdf"]} ]] && fallback_driver=ioatdma 451 [[ -n ${idxd_d["$bdf"]} ]] && fallback_driver=idxd 452 [[ -n ${virtio_d["$bdf"]} ]] && fallback_driver=virtio-pci 453 [[ -n ${vmd_d["$bdf"]} ]] && fallback_driver=vmd 454 driver=$(collect_driver "$bdf" "$fallback_driver") 455 456 if ! check_for_driver "$driver"; then 457 linux_bind_driver "$bdf" "$driver" 458 else 459 linux_unbind_driver "$bdf" 460 fi 461 done 462 463 echo "1" > "/sys/bus/pci/rescan" 464} 465 466function reset_linux() { 467 reset_linux_pci 468 for mount in $(linux_hugetlbfs_mounts); do 469 rm -f "$mount"/spdk*map_* 470 done 471 rm -f /run/.spdk* 472} 473 474function status_linux() { 475 echo "Hugepages" 476 printf "%-6s %10s %8s / %6s\n" "node" "hugesize" "free" "total" 477 478 numa_nodes=0 479 shopt -s nullglob 480 for path in /sys/devices/system/node/node*/hugepages/hugepages-*/; do 481 numa_nodes=$((numa_nodes + 1)) 482 free_pages=$(cat $path/free_hugepages) 483 all_pages=$(cat $path/nr_hugepages) 484 485 [[ $path =~ (node[0-9]+)/hugepages/hugepages-([0-9]+kB) ]] 486 487 node=${BASH_REMATCH[1]} 488 huge_size=${BASH_REMATCH[2]} 489 490 printf "%-6s %10s %8s / %6s\n" $node $huge_size $free_pages $all_pages 491 done 492 shopt -u nullglob 493 494 # fall back to system-wide hugepages 495 if [ "$numa_nodes" = "0" ]; then 496 free_pages=$(grep HugePages_Free /proc/meminfo | awk '{ print $2 }') 497 all_pages=$(grep HugePages_Total /proc/meminfo | awk '{ print $2 }') 498 node="-" 499 huge_size="$HUGEPGSZ" 500 501 printf "%-6s %10s %8s / %6s\n" $node $huge_size $free_pages $all_pages 502 fi 503 504 echo -e "\nBDF\t\tVendor\tDevice\tNUMA\tDriver\t\tDevice name\n" 505 echo "NVMe devices" 506 507 for bdf in "${!nvme_d[@]}"; do 508 driver=${drivers_d["$bdf"]} 509 if [ "$numa_nodes" = "0" ]; then 510 node="-" 511 else 512 node=$(cat /sys/bus/pci/devices/$bdf/numa_node) 513 if ((node == -1)); then 514 node=unknown 515 fi 516 fi 517 if [ "$driver" = "nvme" ] && [ -d /sys/bus/pci/devices/$bdf/nvme ]; then 518 name="\t"$(ls /sys/bus/pci/devices/$bdf/nvme) 519 else 520 name="-" 521 fi 522 echo -e "$bdf\t${pci_ids_vendor["$bdf"]#0x}\t${pci_ids_device["$bdf"]#0x}\t$node\t${driver:--}\t\t$name" 523 done 524 525 echo "" 526 echo "I/OAT Engine" 527 528 for bdf in "${!ioat_d[@]}"; do 529 driver=${drivers_d["$bdf"]} 530 if [ "$numa_nodes" = "0" ]; then 531 node="-" 532 else 533 node=$(cat /sys/bus/pci/devices/$bdf/numa_node) 534 if ((node == -1)); then 535 node=unknown 536 fi 537 fi 538 echo -e "$bdf\t${pci_ids_vendor["$bdf"]#0x}\t${pci_ids_device["$bdf"]#0x}\t$node\t${driver:--}" 539 done 540 541 echo "" 542 echo "IDXD Engine" 543 544 for bdf in "${!idxd_d[@]}"; do 545 driver=${drivers_d["$bdf"]} 546 if [ "$numa_nodes" = "0" ]; then 547 node="-" 548 else 549 node=$(cat /sys/bus/pci/devices/$bdf/numa_node) 550 fi 551 echo -e "$bdf\t${pci_ids_vendor["$bdf"]#0x}\t${pci_ids_device["$bdf"]#0x}\t$node\t${driver:--}" 552 done 553 554 echo "" 555 echo "virtio" 556 557 for bdf in "${!virtio_d[@]}"; do 558 driver=${drivers_d["$bdf"]} 559 if [ "$numa_nodes" = "0" ]; then 560 node="-" 561 else 562 node=$(cat /sys/bus/pci/devices/$bdf/numa_node) 563 if ((node == -1)); then 564 node=unknown 565 fi 566 fi 567 blknames=($(get_mounted_part_dev_from_bdf_block "$bdf")) 568 echo -e "$bdf\t${pci_ids_vendor["$bdf"]#0x}\t${pci_ids_device["$bdf"]#0x}\t$node\t\t${driver:--}\t\t" "${blknames[@]}" 569 done 570 571 echo "" 572 echo "VMD" 573 574 for bdf in "${!vmd_d[@]}"; do 575 driver=${drivers_d["$bdf"]} 576 node=$(cat /sys/bus/pci/devices/$bdf/numa_node) 577 if ((node == -1)); then 578 node=unknown 579 fi 580 echo -e "$bdf\t$node\t\t$driver" 581 done 582} 583 584function status_freebsd() { 585 local pci 586 587 status_print() ( 588 local dev driver 589 590 echo -e "BDF\t\tVendor\tDevice\tDriver" 591 592 for pci; do 593 driver=$(pciconf -l "pci$pci") 594 driver=${driver%@*} 595 printf '%s\t%s\t%s\t%s\n' \ 596 "$pci" \ 597 "${pci_ids_vendor["$pci"]}" \ 598 "${pci_ids_device["$pci"]}" \ 599 "$driver" 600 done 601 ) 602 603 local contigmem=present 604 if ! kldstat -q -m contigmem; then 605 contigmem="not present" 606 fi 607 608 cat <<- BSD_INFO 609 Contigmem ($contigmem) 610 Buffer Size: $(kenv hw.contigmem.buffer_size) 611 Num Buffers: $(kenv hw.contigmem.num_buffers) 612 613 NVMe devices 614 $(status_print "${!nvme_d[@]}") 615 616 I/IOAT DMA 617 $(status_print "${!ioat_d[@]}") 618 619 IDXD DMA 620 $(status_print "${!idxd_d[@]}") 621 622 VMD 623 $(status_print "${!vmd_d[@]}") 624 BSD_INFO 625} 626 627function configure_freebsd_pci() { 628 local BDFS 629 630 BDFS+=("${!nvme_d[@]}") 631 BDFS+=("${!ioat_d[@]}") 632 BDFS+=("${!idxd_d[@]}") 633 BDFS+=("${!vmd_d[@]}") 634 635 # Drop the domain part from all the addresses 636 BDFS=("${BDFS[@]#*:}") 637 638 local IFS="," 639 kldunload nic_uio.ko || true 640 kenv hw.nic_uio.bdfs="${BDFS[*]}" 641 kldload nic_uio.ko 642} 643 644function configure_freebsd() { 645 configure_freebsd_pci 646 # If contigmem is already loaded but the HUGEMEM specified doesn't match the 647 # previous value, unload contigmem so that we can reload with the new value. 648 if kldstat -q -m contigmem; then 649 if [ $(kenv hw.contigmem.num_buffers) -ne "$((HUGEMEM / 256))" ]; then 650 kldunload contigmem.ko 651 fi 652 fi 653 if ! kldstat -q -m contigmem; then 654 kenv hw.contigmem.num_buffers=$((HUGEMEM / 256)) 655 kenv hw.contigmem.buffer_size=$((256 * 1024 * 1024)) 656 kldload contigmem.ko 657 fi 658} 659 660function reset_freebsd() { 661 kldunload contigmem.ko || true 662 kldunload nic_uio.ko || true 663} 664 665CMD=reset cache_pci_bus 666 667mode=$1 668 669if [ -z "$mode" ]; then 670 mode="config" 671fi 672 673: ${HUGEMEM:=2048} 674: ${PCI_WHITELIST:=""} 675: ${PCI_BLACKLIST:=""} 676 677if [ -n "$NVME_WHITELIST" ]; then 678 PCI_WHITELIST="$PCI_WHITELIST $NVME_WHITELIST" 679fi 680 681if [ -n "$SKIP_PCI" ]; then 682 PCI_WHITELIST="none" 683fi 684 685if [ -z "$TARGET_USER" ]; then 686 TARGET_USER="$SUDO_USER" 687 if [ -z "$TARGET_USER" ]; then 688 TARGET_USER=$(logname 2> /dev/null) || true 689 fi 690fi 691 692collect_devices "$mode" 693 694if [[ $mode == reset && $PCI_BLOCK_SYNC_ON_RESET == yes ]]; then 695 # Note that this will wait only for the first block device attached to 696 # a given storage controller. For nvme this may miss some of the devs 697 # in case multiple namespaces are being in place. 698 # FIXME: Wait for nvme controller(s) to be in live state and determine 699 # number of configured namespaces, build list of potential block devs 700 # and pass them to sync_dev_uevents. Is it worth the effort? 701 bdfs_to_wait_for=() 702 for bdf in "${!all_devices_d[@]}"; do 703 ((all_devices_d["$bdf"] == 0)) || continue 704 if [[ -n ${nvme_d["$bdf"]} || -n ${virtio_d["$bdf"]} ]]; then 705 [[ $(collect_driver "$bdf") != "${drivers_d["$bdf"]}" ]] || continue 706 bdfs_to_wait_for+=("$bdf") 707 fi 708 done 709 if ((${#bdfs_to_wait_for[@]} > 0)); then 710 echo "Waiting for block devices as requested" 711 export UEVENT_TIMEOUT=5 DEVPATH_LOOKUP=yes DEVPATH_SUBSYSTEM=pci 712 "$rootdir/scripts/sync_dev_uevents.sh" \ 713 block/disk \ 714 "${bdfs_to_wait_for[@]}" & 715 sync_pid=$! 716 fi 717fi 718 719if [[ $os == Linux ]]; then 720 HUGEPGSZ=$(($(grep Hugepagesize /proc/meminfo | cut -d : -f 2 | tr -dc '0-9'))) 721 HUGEPGSZ_MB=$((HUGEPGSZ / 1024)) 722 : ${NRHUGE=$(((HUGEMEM + HUGEPGSZ_MB - 1) / HUGEPGSZ_MB))} 723 724 if [ "$mode" == "config" ]; then 725 configure_linux 726 elif [ "$mode" == "cleanup" ]; then 727 cleanup_linux 728 elif [ "$mode" == "reset" ]; then 729 reset_linux 730 elif [ "$mode" == "status" ]; then 731 status_linux 732 elif [ "$mode" == "help" ]; then 733 usage $0 734 else 735 usage $0 "Invalid argument '$mode'" 736 fi 737else 738 if [ "$mode" == "config" ]; then 739 configure_freebsd 740 elif [ "$mode" == "reset" ]; then 741 reset_freebsd 742 elif [ "$mode" == "cleanup" ]; then 743 echo "setup.sh cleanup function not yet supported on $os" 744 elif [ "$mode" == "status" ]; then 745 status_freebsd 746 elif [ "$mode" == "help" ]; then 747 usage $0 748 else 749 usage $0 "Invalid argument '$mode'" 750 fi 751fi 752 753if [[ -e /proc/$sync_pid/status ]]; then 754 wait "$sync_pid" 755fi 756