1#!/usr/bin/env bash 2 3set -e 4 5os=$(uname -s) 6 7if [[ $os != Linux && $os != FreeBSD ]]; then 8 echo "Not supported platform ($os), aborting" 9 exit 1 10fi 11 12rootdir=$(readlink -f $(dirname $0))/.. 13source "$rootdir/scripts/common.sh" 14 15function usage() { 16 if [[ $os == Linux ]]; then 17 options="[config|reset|status|cleanup|help]" 18 else 19 options="[config|reset|help]" 20 fi 21 22 [[ -n $2 ]] && ( 23 echo "$2" 24 echo "" 25 ) 26 echo "Helper script for allocating hugepages and binding NVMe, I/OAT, VMD and Virtio devices" 27 echo "to a generic VFIO kernel driver. If VFIO is not available on the system, this script" 28 echo "will fall back to UIO. NVMe and Virtio devices with active mountpoints will be ignored." 29 echo "All hugepage operations use default hugepage size on the system (hugepagesz)." 30 echo "Usage: $(basename $1) $options" 31 echo 32 echo "$options - as following:" 33 echo "config Default mode. Allocate hugepages and bind PCI devices." 34 if [[ $os == Linux ]]; then 35 echo "cleanup Remove any orphaned files that can be left in the system after SPDK application exit" 36 fi 37 echo "reset Rebind PCI devices back to their original drivers." 38 echo " Also cleanup any leftover spdk files/resources." 39 echo " Hugepage memory size will remain unchanged." 40 if [[ $os == Linux ]]; then 41 echo "status Print status of all SPDK-compatible devices on the system." 42 fi 43 echo "help Print this help message." 44 echo 45 echo "The following environment variables can be specified." 46 echo "HUGEMEM Size of hugepage memory to allocate (in MB). 2048 by default." 47 echo " For NUMA systems, the hugepages will be evenly distributed" 48 echo " between CPU nodes" 49 echo "NRHUGE Number of hugepages to allocate. This variable overwrites HUGEMEM." 50 echo "HUGENODE Specific NUMA node to allocate hugepages on. To allocate" 51 echo " hugepages on multiple nodes run this script multiple times -" 52 echo " once for each node." 53 echo "PCI_WHITELIST" 54 echo "PCI_BLACKLIST Whitespace separated list of PCI devices (NVMe, I/OAT, VMD, Virtio)." 55 echo " Each device must be specified as a full PCI address." 56 echo " E.g. PCI_WHITELIST=\"0000:01:00.0 0000:02:00.0\"" 57 echo " To blacklist all PCI devices use a non-valid address." 58 echo " E.g. PCI_WHITELIST=\"none\"" 59 echo " If PCI_WHITELIST and PCI_BLACKLIST are empty or unset, all PCI devices" 60 echo " will be bound." 61 echo " Each device in PCI_BLACKLIST will be ignored (driver won't be changed)." 62 echo " PCI_BLACKLIST has precedence over PCI_WHITELIST." 63 echo "TARGET_USER User that will own hugepage mountpoint directory and vfio groups." 64 echo " By default the current user will be used." 65 echo "DRIVER_OVERRIDE Disable automatic vfio-pci/uio_pci_generic selection and forcefully" 66 echo " bind devices to the given driver." 67 echo " E.g. DRIVER_OVERRIDE=uio_pci_generic or DRIVER_OVERRIDE=/home/public/dpdk/build/kmod/igb_uio.ko" 68 exit 0 69} 70 71# In monolithic kernels the lsmod won't work. So 72# back that with a /sys/modules. We also check 73# /sys/bus/pci/drivers/ as neither lsmod nor /sys/modules might 74# contain needed info (like in Fedora-like OS). 75function check_for_driver() { 76 if lsmod | grep -q ${1//-/_}; then 77 return 1 78 fi 79 80 if [[ -d /sys/module/${1} || -d \ 81 /sys/module/${1//-/_} || -d \ 82 /sys/bus/pci/drivers/${1} || -d \ 83 /sys/bus/pci/drivers/${1//-/_} ]]; then 84 return 2 85 fi 86 return 0 87} 88 89function pci_dev_echo() { 90 local bdf="$1" 91 local vendor 92 local device 93 vendor="$(cat /sys/bus/pci/devices/$bdf/vendor)" 94 device="$(cat /sys/bus/pci/devices/$bdf/device)" 95 shift 96 echo "$bdf (${vendor#0x} ${device#0x}): $*" 97} 98 99function linux_bind_driver() { 100 bdf="$1" 101 driver_name="$2" 102 old_driver_name="no driver" 103 ven_dev_id=$(lspci -n -s $bdf | cut -d' ' -f3 | sed 's/:/ /') 104 105 if [ -e "/sys/bus/pci/devices/$bdf/driver" ]; then 106 old_driver_name=$(basename $(readlink /sys/bus/pci/devices/$bdf/driver)) 107 108 if [ "$driver_name" = "$old_driver_name" ]; then 109 pci_dev_echo "$bdf" "Already using the $old_driver_name driver" 110 return 0 111 fi 112 113 echo "$ven_dev_id" > "/sys/bus/pci/devices/$bdf/driver/remove_id" 2> /dev/null || true 114 echo "$bdf" > "/sys/bus/pci/devices/$bdf/driver/unbind" 115 fi 116 117 pci_dev_echo "$bdf" "$old_driver_name -> $driver_name" 118 119 echo "$ven_dev_id" > "/sys/bus/pci/drivers/$driver_name/new_id" 2> /dev/null || true 120 echo "$bdf" > "/sys/bus/pci/drivers/$driver_name/bind" 2> /dev/null || true 121 122 iommu_group=$(basename $(readlink -f /sys/bus/pci/devices/$bdf/iommu_group)) 123 if [ -e "/dev/vfio/$iommu_group" ]; then 124 if [ -n "$TARGET_USER" ]; then 125 chown "$TARGET_USER" "/dev/vfio/$iommu_group" 126 fi 127 fi 128} 129 130function linux_unbind_driver() { 131 local bdf="$1" 132 local ven_dev_id 133 ven_dev_id=$(lspci -n -s $bdf | cut -d' ' -f3 | sed 's/:/ /') 134 local old_driver_name="no driver" 135 136 if [ -e "/sys/bus/pci/devices/$bdf/driver" ]; then 137 old_driver_name=$(basename $(readlink /sys/bus/pci/devices/$bdf/driver)) 138 echo "$ven_dev_id" > "/sys/bus/pci/devices/$bdf/driver/remove_id" 2> /dev/null || true 139 echo "$bdf" > "/sys/bus/pci/devices/$bdf/driver/unbind" 140 fi 141 142 pci_dev_echo "$bdf" "$old_driver_name -> no driver" 143} 144 145function linux_hugetlbfs_mounts() { 146 mount | grep ' type hugetlbfs ' | awk '{ print $3 }' 147} 148 149function get_nvme_name_from_bdf() { 150 local blknames=() 151 152 set +e 153 nvme_devs=$(lsblk -d --output NAME | grep "^nvme") 154 set -e 155 for dev in $nvme_devs; do 156 link_name=$(readlink /sys/block/$dev/device/device) || true 157 if [ -z "$link_name" ]; then 158 link_name=$(readlink /sys/block/$dev/device) 159 fi 160 link_bdf=$(basename "$link_name") 161 if [ "$link_bdf" = "$1" ]; then 162 blknames+=($dev) 163 fi 164 done 165 166 printf '%s\n' "${blknames[@]}" 167} 168 169function get_virtio_names_from_bdf() { 170 blk_devs=$(lsblk --nodeps --output NAME) 171 virtio_names=() 172 173 for dev in $blk_devs; do 174 if readlink "/sys/block/$dev" | grep -q "$1"; then 175 virtio_names+=("$dev") 176 fi 177 done 178 179 eval "$2=( " "${virtio_names[@]}" " )" 180} 181 182function configure_linux_pci() { 183 local driver_path="" 184 driver_name="" 185 if [[ -n "${DRIVER_OVERRIDE}" ]]; then 186 driver_path="$DRIVER_OVERRIDE" 187 driver_name="${DRIVER_OVERRIDE##*/}" 188 # modprobe and the sysfs don't use the .ko suffix. 189 driver_name=${driver_name%.ko} 190 # path = name -> there is no path 191 if [[ "$driver_path" = "$driver_name" ]]; then 192 driver_path="" 193 fi 194 # igb_uio is a common driver to override with and it depends on uio. 195 if [[ "$driver_name" = "igb_uio" ]]; then 196 modprobe uio 197 fi 198 elif [[ -n "$(ls /sys/kernel/iommu_groups)" || (-e \ 199 /sys/module/vfio/parameters/enable_unsafe_noiommu_mode && \ 200 "$(cat /sys/module/vfio/parameters/enable_unsafe_noiommu_mode)" == "Y") ]]; then 201 driver_name=vfio-pci 202 elif modinfo uio_pci_generic > /dev/null 2>&1; then 203 driver_name=uio_pci_generic 204 elif [[ -r "$rootdir/dpdk/build/kmod/igb_uio.ko" ]]; then 205 driver_path="$rootdir/dpdk/build/kmod/igb_uio.ko" 206 driver_name="igb_uio" 207 modprobe uio 208 echo "WARNING: uio_pci_generic not detected - using $driver_name" 209 else 210 echo "No valid drivers found [vfio-pci, uio_pci_generic, igb_uio]. Please either enable the vfio-pci or uio_pci_generic" 211 echo "kernel modules, or have SPDK build the igb_uio driver by running ./configure --with-igb-uio-driver and recompiling." 212 return 1 213 fi 214 215 # modprobe assumes the directory of the module. If the user passes in a path, we should use insmod 216 if [[ -n "$driver_path" ]]; then 217 insmod $driver_path || true 218 else 219 modprobe $driver_name 220 fi 221 222 # NVMe 223 for bdf in ${pci_bus_cache["0x010802"]}; do 224 blknames=() 225 if ! pci_can_use $bdf; then 226 pci_dev_echo "$bdf" "Skipping un-whitelisted NVMe controller at $bdf" 227 continue 228 fi 229 230 mount=false 231 for blkname in $(get_nvme_name_from_bdf $bdf); do 232 mountpoints=$(lsblk /dev/$blkname --output MOUNTPOINT -n | wc -w) 233 if [ "$mountpoints" != "0" ]; then 234 mount=true 235 blknames+=($blkname) 236 fi 237 done 238 239 if ! $mount; then 240 linux_bind_driver "$bdf" "$driver_name" 241 else 242 for name in "${blknames[@]}"; do 243 pci_dev_echo "$bdf" "Active mountpoints on /dev/$name, so not binding PCI dev" 244 done 245 fi 246 done 247 248 # IOAT 249 TMP=$(mktemp) 250 #collect all the device_id info of ioat devices. 251 grep "PCI_DEVICE_ID_INTEL_IOAT" $rootdir/include/spdk/pci_ids.h \ 252 | awk -F"x" '{print $2}' > $TMP 253 254 while IFS= read -r dev_id; do 255 for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do 256 if ! pci_can_use $bdf; then 257 pci_dev_echo "$bdf" "Skipping un-whitelisted I/OAT device" 258 continue 259 fi 260 261 linux_bind_driver "$bdf" "$driver_name" 262 done 263 done < $TMP 264 rm $TMP 265 266 # IDXD 267 TMP=$(mktemp) 268 #collect all the device_id info of idxd devices. 269 grep "PCI_DEVICE_ID_INTEL_IDXD" $rootdir/include/spdk/pci_ids.h \ 270 | awk -F"x" '{print $2}' > $TMP 271 272 while IFS= read -r dev_id; do 273 for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do 274 if ! pci_can_use $bdf; then 275 pci_dev_echo "$bdf" "Skipping un-whitelisted IDXD device" 276 continue 277 fi 278 279 linux_bind_driver "$bdf" "$driver_name" 280 done 281 done < $TMP 282 rm $TMP 283 284 # virtio 285 TMP=$(mktemp) 286 #collect all the device_id info of virtio devices. 287 grep "PCI_DEVICE_ID_VIRTIO" $rootdir/include/spdk/pci_ids.h \ 288 | awk -F"x" '{print $2}' > $TMP 289 290 while IFS= read -r dev_id; do 291 for bdf in ${pci_bus_cache["0x1af4:0x$dev_id"]}; do 292 if ! pci_can_use $bdf; then 293 pci_dev_echo "$bdf" "Skipping un-whitelisted Virtio device at $bdf" 294 continue 295 fi 296 blknames=() 297 get_virtio_names_from_bdf "$bdf" blknames 298 for blkname in "${blknames[@]}"; do 299 if [ "$(lsblk /dev/$blkname --output MOUNTPOINT -n | wc -w)" != "0" ]; then 300 pci_dev_echo "$bdf" "Active mountpoints on /dev/$blkname, so not binding" 301 continue 2 302 fi 303 done 304 305 linux_bind_driver "$bdf" "$driver_name" 306 done 307 done < $TMP 308 rm $TMP 309 310 # VMD 311 TMP=$(mktemp) 312 #collect all the device_id info of vmd devices. 313 grep "PCI_DEVICE_ID_INTEL_VMD" $rootdir/include/spdk/pci_ids.h \ 314 | awk -F"x" '{print $2}' > $TMP 315 316 while IFS= read -r dev_id; do 317 for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do 318 if [[ -z "$PCI_WHITELIST" ]] || ! pci_can_use $bdf; then 319 echo "Skipping un-whitelisted VMD device at $bdf" 320 continue 321 fi 322 323 linux_bind_driver "$bdf" "$driver_name" 324 echo " VMD generic kdrv: " "$bdf" "$driver_name" 325 done 326 done < $TMP 327 rm $TMP 328 329 echo "1" > "/sys/bus/pci/rescan" 330} 331 332function cleanup_linux() { 333 shopt -s extglob nullglob 334 dirs_to_clean="" 335 dirs_to_clean="$(echo {/var/run,/tmp}/dpdk/spdk{,_pid}+([0-9])) " 336 if [[ -d $XDG_RUNTIME_DIR && $XDG_RUNTIME_DIR != *" "* ]]; then 337 dirs_to_clean+="$(readlink -e assert_not_empty $XDG_RUNTIME_DIR/dpdk/spdk{,_pid}+([0-9]) || true) " 338 fi 339 340 files_to_clean="" 341 for dir in $dirs_to_clean; do 342 files_to_clean+="$(echo $dir/*) " 343 done 344 shopt -u extglob nullglob 345 346 files_to_clean+="$(ls -1 /dev/shm/* \ 347 | grep -E '(spdk_tgt|iscsi|vhost|nvmf|rocksdb|bdevio|bdevperf|vhost_fuzz|nvme_fuzz)_trace|spdk_iscsi_conns' || true) " 348 files_to_clean="$(readlink -e assert_not_empty $files_to_clean || true)" 349 if [[ -z "$files_to_clean" ]]; then 350 echo "Clean" 351 return 0 352 fi 353 354 shopt -s extglob 355 for fd_dir in $(echo /proc/+([0-9])); do 356 opened_files+="$(readlink -e assert_not_empty $fd_dir/fd/* || true)" 357 done 358 shopt -u extglob 359 360 if [[ -z "$opened_files" ]]; then 361 echo "Can't get list of opened files!" 362 exit 1 363 fi 364 365 echo 'Cleaning' 366 for f in $files_to_clean; do 367 if ! echo "$opened_files" | grep -E -q "^$f\$"; then 368 echo "Removing: $f" 369 rm $f 370 else 371 echo "Still open: $f" 372 fi 373 done 374 375 for dir in $dirs_to_clean; do 376 if ! echo "$opened_files" | grep -E -q "^$dir\$"; then 377 echo "Removing: $dir" 378 rmdir $dir 379 else 380 echo "Still open: $dir" 381 fi 382 done 383 echo "Clean" 384 385 unset dirs_to_clean files_to_clean opened_files 386} 387 388function configure_linux() { 389 configure_linux_pci 390 hugetlbfs_mounts=$(linux_hugetlbfs_mounts) 391 392 if [ -z "$hugetlbfs_mounts" ]; then 393 hugetlbfs_mounts=/mnt/huge 394 echo "Mounting hugetlbfs at $hugetlbfs_mounts" 395 mkdir -p "$hugetlbfs_mounts" 396 mount -t hugetlbfs nodev "$hugetlbfs_mounts" 397 fi 398 399 if [ -z "$HUGENODE" ]; then 400 hugepages_target="/proc/sys/vm/nr_hugepages" 401 else 402 hugepages_target="/sys/devices/system/node/node${HUGENODE}/hugepages/hugepages-${HUGEPGSZ}kB/nr_hugepages" 403 fi 404 405 echo "$NRHUGE" > "$hugepages_target" 406 allocated_hugepages=$(cat $hugepages_target) 407 if [ "$allocated_hugepages" -lt "$NRHUGE" ]; then 408 echo "" 409 echo "## ERROR: requested $NRHUGE hugepages but only $allocated_hugepages could be allocated." 410 echo "## Memory might be heavily fragmented. Please try flushing the system cache, or reboot the machine." 411 exit 1 412 fi 413 414 if [ "$driver_name" = "vfio-pci" ]; then 415 if [ -n "$TARGET_USER" ]; then 416 for mount in $hugetlbfs_mounts; do 417 chown "$TARGET_USER" "$mount" 418 chmod g+w "$mount" 419 done 420 421 MEMLOCK_AMNT=$(su "$TARGET_USER" -c "ulimit -l") 422 if [[ $MEMLOCK_AMNT != "unlimited" ]]; then 423 MEMLOCK_MB=$((MEMLOCK_AMNT / 1024)) 424 cat <<- MEMLOCK 425 "$TARGET_USER" user memlock limit: $MEMLOCK_MB MB 426 427 This is the maximum amount of memory you will be 428 able to use with DPDK and VFIO if run as user "$TARGET_USER". 429 To change this, please adjust limits.conf memlock limit for user "$TARGET_USER". 430 MEMLOCK 431 if ((MEMLOCK_AMNT < 65536)); then 432 echo "" 433 echo "## WARNING: memlock limit is less than 64MB" 434 echo -n "## DPDK with VFIO may not be able to initialize " 435 echo "if run as user \"$TARGET_USER\"." 436 fi 437 fi 438 fi 439 fi 440 441 if [ ! -f /dev/cpu/0/msr ]; then 442 # Some distros build msr as a module. Make sure it's loaded to ensure 443 # DPDK can easily figure out the TSC rate rather than relying on 100ms 444 # sleeps. 445 modprobe msr || true 446 fi 447} 448 449function reset_linux_pci() { 450 # NVMe 451 set +e 452 check_for_driver nvme 453 driver_loaded=$? 454 set -e 455 for bdf in ${pci_bus_cache["0x010802"]}; do 456 if ! pci_can_use $bdf; then 457 pci_dev_echo "$bdf" "Skipping un-whitelisted NVMe controller $blkname" 458 continue 459 fi 460 if [ $driver_loaded -ne 0 ]; then 461 linux_bind_driver "$bdf" nvme 462 else 463 linux_unbind_driver "$bdf" 464 fi 465 done 466 467 # IOAT 468 TMP=$(mktemp) 469 #collect all the device_id info of ioat devices. 470 grep "PCI_DEVICE_ID_INTEL_IOAT" $rootdir/include/spdk/pci_ids.h \ 471 | awk -F"x" '{print $2}' > $TMP 472 473 set +e 474 check_for_driver ioatdma 475 driver_loaded=$? 476 set -e 477 while IFS= read -r dev_id; do 478 for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do 479 if ! pci_can_use $bdf; then 480 pci_dev_echo "$bdf" "Skipping un-whitelisted I/OAT device" 481 continue 482 fi 483 if [ $driver_loaded -ne 0 ]; then 484 linux_bind_driver "$bdf" ioatdma 485 else 486 linux_unbind_driver "$bdf" 487 fi 488 done 489 done < $TMP 490 rm $TMP 491 492 # IDXD 493 TMP=$(mktemp) 494 #collect all the device_id info of idxd devices. 495 grep "PCI_DEVICE_ID_INTEL_IDXD" $rootdir/include/spdk/pci_ids.h \ 496 | awk -F"x" '{print $2}' > $TMP 497 set +e 498 check_for_driver idxd 499 driver_loaded=$? 500 set -e 501 while IFS= read -r dev_id; do 502 for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do 503 if ! pci_can_use $bdf; then 504 pci_dev_echo "$bdf" "Skipping un-whitelisted IDXD device" 505 continue 506 fi 507 if [ $driver_loaded -ne 0 ]; then 508 linux_bind_driver "$bdf" idxd 509 else 510 linux_unbind_driver "$bdf" 511 fi 512 done 513 done < $TMP 514 rm $TMP 515 516 # virtio 517 TMP=$(mktemp) 518 #collect all the device_id info of virtio devices. 519 grep "PCI_DEVICE_ID_VIRTIO" $rootdir/include/spdk/pci_ids.h \ 520 | awk -F"x" '{print $2}' > $TMP 521 522 # TODO: check if virtio-pci is loaded first and just unbind if it is not loaded 523 # Requires some more investigation - for example, some kernels do not seem to have 524 # virtio-pci but just virtio_scsi instead. Also need to make sure we get the 525 # underscore vs. dash right in the virtio_scsi name. 526 modprobe virtio-pci || true 527 while IFS= read -r dev_id; do 528 for bdf in ${pci_bus_cache["0x1af4:0x$dev_id"]}; do 529 if ! pci_can_use $bdf; then 530 pci_dev_echo "$bdf" "Skipping un-whitelisted Virtio device at" 531 continue 532 fi 533 linux_bind_driver "$bdf" virtio-pci 534 done 535 done < $TMP 536 rm $TMP 537 538 # VMD 539 TMP=$(mktemp) 540 #collect all the device_id info of vmd devices. 541 grep "PCI_DEVICE_ID_INTEL_VMD" $rootdir/include/spdk/pci_ids.h \ 542 | awk -F"x" '{print $2}' > $TMP 543 544 set +e 545 check_for_driver vmd 546 driver_loaded=$? 547 set -e 548 while IFS= read -r dev_id; do 549 for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do 550 if ! pci_can_use $bdf; then 551 echo "Skipping un-whitelisted VMD device at $bdf" 552 continue 553 fi 554 if [ $driver_loaded -ne 0 ]; then 555 linux_bind_driver "$bdf" vmd 556 else 557 linux_unbind_driver "$bdf" 558 fi 559 done 560 done < $TMP 561 rm $TMP 562 563 echo "1" > "/sys/bus/pci/rescan" 564} 565 566function reset_linux() { 567 reset_linux_pci 568 for mount in $(linux_hugetlbfs_mounts); do 569 rm -f "$mount"/spdk*map_* 570 done 571 rm -f /run/.spdk* 572} 573 574function status_linux() { 575 echo "Hugepages" 576 printf "%-6s %10s %8s / %6s\n" "node" "hugesize" "free" "total" 577 578 numa_nodes=0 579 shopt -s nullglob 580 for path in /sys/devices/system/node/node*/hugepages/hugepages-*/; do 581 numa_nodes=$((numa_nodes + 1)) 582 free_pages=$(cat $path/free_hugepages) 583 all_pages=$(cat $path/nr_hugepages) 584 585 [[ $path =~ (node[0-9]+)/hugepages/hugepages-([0-9]+kB) ]] 586 587 node=${BASH_REMATCH[1]} 588 huge_size=${BASH_REMATCH[2]} 589 590 printf "%-6s %10s %8s / %6s\n" $node $huge_size $free_pages $all_pages 591 done 592 shopt -u nullglob 593 594 # fall back to system-wide hugepages 595 if [ "$numa_nodes" = "0" ]; then 596 free_pages=$(grep HugePages_Free /proc/meminfo | awk '{ print $2 }') 597 all_pages=$(grep HugePages_Total /proc/meminfo | awk '{ print $2 }') 598 node="-" 599 huge_size="$HUGEPGSZ" 600 601 printf "%-6s %10s %8s / %6s\n" $node $huge_size $free_pages $all_pages 602 fi 603 604 echo "" 605 echo "NVMe devices" 606 607 echo -e "BDF\t\tVendor\tDevice\tNUMA\tDriver\t\tDevice name" 608 for bdf in ${pci_bus_cache["0x010802"]}; do 609 driver=$(grep DRIVER /sys/bus/pci/devices/$bdf/uevent | awk -F"=" '{print $2}') 610 if [ "$numa_nodes" = "0" ]; then 611 node="-" 612 else 613 node=$(cat /sys/bus/pci/devices/$bdf/numa_node) 614 if ((node == -1)); then 615 node=unknown 616 fi 617 fi 618 device=$(cat /sys/bus/pci/devices/$bdf/device) 619 vendor=$(cat /sys/bus/pci/devices/$bdf/vendor) 620 if [ "$driver" = "nvme" ] && [ -d /sys/bus/pci/devices/$bdf/nvme ]; then 621 name="\t"$(ls /sys/bus/pci/devices/$bdf/nvme) 622 else 623 name="-" 624 fi 625 echo -e "$bdf\t${vendor#0x}\t${device#0x}\t$node\t${driver:--}\t\t$name" 626 done 627 628 echo "" 629 echo "I/OAT Engine" 630 631 #collect all the device_id info of ioat devices. 632 TMP=$(grep "PCI_DEVICE_ID_INTEL_IOAT" $rootdir/include/spdk/pci_ids.h \ 633 | awk -F"x" '{print $2}') 634 echo -e "BDF\t\tVendor\tDevice\tNUMA\tDriver" 635 for dev_id in $TMP; do 636 for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do 637 driver=$(grep DRIVER /sys/bus/pci/devices/$bdf/uevent | awk -F"=" '{print $2}') 638 if [ "$numa_nodes" = "0" ]; then 639 node="-" 640 else 641 node=$(cat /sys/bus/pci/devices/$bdf/numa_node) 642 if ((node == -1)); then 643 node=unknown 644 fi 645 fi 646 device=$(cat /sys/bus/pci/devices/$bdf/device) 647 vendor=$(cat /sys/bus/pci/devices/$bdf/vendor) 648 echo -e "$bdf\t${vendor#0x}\t${device#0x}\t$node\t${driver:--}" 649 done 650 done 651 652 echo "" 653 echo "IDXD Engine" 654 655 #collect all the device_id info of idxd devices. 656 TMP=$(grep "PCI_DEVICE_ID_INTEL_IDXD" $rootdir/include/spdk/pci_ids.h \ 657 | awk -F"x" '{print $2}') 658 echo -e "BDF\t\tVendor\tDevice\tNUMA\tDriver" 659 for dev_id in $TMP; do 660 for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do 661 driver=$(grep DRIVER /sys/bus/pci/devices/$bdf/uevent | awk -F"=" '{print $2}') 662 if [ "$numa_nodes" = "0" ]; then 663 node="-" 664 else 665 node=$(cat /sys/bus/pci/devices/$bdf/numa_node) 666 fi 667 device=$(cat /sys/bus/pci/devices/$bdf/device) 668 vendor=$(cat /sys/bus/pci/devices/$bdf/vendor) 669 echo -e "$bdf\t${vendor#0x}\t${device#0x}\t$node\t${driver:--}" 670 done 671 done 672 673 echo "" 674 echo "virtio" 675 676 #collect all the device_id info of virtio devices. 677 TMP=$(grep "PCI_DEVICE_ID_VIRTIO" $rootdir/include/spdk/pci_ids.h \ 678 | awk -F"x" '{print $2}') 679 echo -e "BDF\t\tVendor\tDevice\tNUMA\tDriver\t\tDevice name" 680 for dev_id in $TMP; do 681 for bdf in ${pci_bus_cache["0x1af4:0x$dev_id"]}; do 682 driver=$(grep DRIVER /sys/bus/pci/devices/$bdf/uevent | awk -F"=" '{print $2}') 683 if [ "$numa_nodes" = "0" ]; then 684 node="-" 685 else 686 node=$(cat /sys/bus/pci/devices/$bdf/numa_node) 687 if ((node == -1)); then 688 node=unknown 689 fi 690 fi 691 device=$(cat /sys/bus/pci/devices/$bdf/device) 692 vendor=$(cat /sys/bus/pci/devices/$bdf/vendor) 693 blknames=() 694 get_virtio_names_from_bdf "$bdf" blknames 695 echo -e "$bdf\t${vendor#0x}\t${device#0x}\t$node\t\t${driver:--}\t\t" "${blknames[@]}" 696 done 697 done 698 699 echo "" 700 echo "VMD" 701 702 #collect all the device_id info of vmd devices. 703 TMP=$(grep "PCI_DEVICE_ID_INTEL_VMD" $rootdir/include/spdk/pci_ids.h \ 704 | awk -F"x" '{print $2}') 705 echo -e "BDF\t\tNuma Node\tDriver Name" 706 for dev_id in $TMP; do 707 for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do 708 driver=$(grep DRIVER /sys/bus/pci/devices/$bdf/uevent | awk -F"=" '{print $2}') 709 node=$(cat /sys/bus/pci/devices/$bdf/numa_node) 710 if ((node == -1)); then 711 node=unknown 712 fi 713 echo -e "$bdf\t$node\t\t$driver" 714 done 715 done 716} 717 718function status_freebsd() { 719 local id pci 720 local ioat idxd vmd 721 722 status_print() ( 723 local dev driver 724 725 echo -e "BDF\t\tVendor\tDevice\tDriver" 726 727 for id; do 728 for pci in ${pci_bus_cache["$id"]}; do 729 driver=$(pciconf -l "pci$pci") 730 driver=${driver%@*} 731 printf '%s\t%s\t%s\t%s\n' \ 732 "$pci" \ 733 "${pci_ids_vendor["$pci"]}" \ 734 "${pci_ids_device["$pci"]}" \ 735 "$driver" 736 done 737 done 738 ) 739 740 devs=PCI_DEVICE_ID_INTEL_IOAT 741 devs+="|PCI_DEVICE_ID_INTEL_IDXD" 742 devs+="|PCI_DEVICE_ID_INTEL_VMD" 743 744 local dev_type dev_id 745 while read -r _ dev_type dev_id; do 746 case "$dev_type" in 747 *IOAT*) ioat+=("0x8086:$dev_id") ;; 748 *IDXD*) idxd+=("0x8086:$dev_id") ;; 749 *VMD*) vmd+=("0x8086:$dev_id") ;; 750 esac 751 done < <(grep -E "$devs" "$rootdir/include/spdk/pci_ids.h") 752 753 local contigmem=present 754 if ! kldstat -q -m contigmem; then 755 contigmem="not present" 756 fi 757 758 cat <<- BSD_INFO 759 Contigmem ($contigmem) 760 Buffer Size: $(kenv hw.contigmem.buffer_size) 761 Num Buffers: $(kenv hw.contigmem.num_buffers) 762 763 NVMe devices 764 $(status_print 0x010802) 765 766 I/IOAT DMA 767 $(status_print "${ioat[@]}") 768 769 IDXD DMA 770 $(status_print "${idxd[@]}") 771 772 VMD 773 $(status_print "${vmd[@]}") 774 BSD_INFO 775} 776 777function configure_freebsd_pci() { 778 local devs ids id 779 local BDFS 780 781 devs=PCI_DEVICE_ID_INTEL_IOAT 782 devs+="|PCI_DEVICE_ID_INTEL_IDXD" 783 devs+="|PCI_DEVICE_ID_INTEL_VMD" 784 785 ids=($(grep -E "$devs" "$rootdir/include/spdk/pci_ids.h" | awk '{print $3}')) 786 787 if [[ -n ${pci_bus_cache["0x010802"]} ]]; then 788 BDFS+=(${pci_bus_cache["0x010802"]}) 789 fi 790 791 for id in "${ids[@]}"; do 792 [[ -n ${pci_bus_cache["0x8086:$id"]} ]] || continue 793 BDFS+=(${pci_bus_cache["0x8086:$id"]}) 794 done 795 796 # Drop the domain part from all the addresses 797 BDFS=("${BDFS[@]#*:}") 798 799 local IFS="," 800 kldunload nic_uio.ko || true 801 kenv hw.nic_uio.bdfs="${BDFS[*]}" 802 kldload nic_uio.ko 803} 804 805function configure_freebsd() { 806 configure_freebsd_pci 807 # If contigmem is already loaded but the HUGEMEM specified doesn't match the 808 # previous value, unload contigmem so that we can reload with the new value. 809 if kldstat -q -m contigmem; then 810 if [ $(kenv hw.contigmem.num_buffers) -ne "$((HUGEMEM / 256))" ]; then 811 kldunload contigmem.ko 812 fi 813 fi 814 if ! kldstat -q -m contigmem; then 815 kenv hw.contigmem.num_buffers=$((HUGEMEM / 256)) 816 kenv hw.contigmem.buffer_size=$((256 * 1024 * 1024)) 817 kldload contigmem.ko 818 fi 819} 820 821function reset_freebsd() { 822 kldunload contigmem.ko || true 823 kldunload nic_uio.ko || true 824} 825 826CMD=reset cache_pci_bus 827 828mode=$1 829 830if [ -z "$mode" ]; then 831 mode="config" 832fi 833 834: ${HUGEMEM:=2048} 835: ${PCI_WHITELIST:=""} 836: ${PCI_BLACKLIST:=""} 837 838if [ -n "$NVME_WHITELIST" ]; then 839 PCI_WHITELIST="$PCI_WHITELIST $NVME_WHITELIST" 840fi 841 842if [ -n "$SKIP_PCI" ]; then 843 PCI_WHITELIST="none" 844fi 845 846if [ -z "$TARGET_USER" ]; then 847 TARGET_USER="$SUDO_USER" 848 if [ -z "$TARGET_USER" ]; then 849 TARGET_USER=$(logname 2> /dev/null) || true 850 fi 851fi 852 853if [[ $os == Linux ]]; then 854 HUGEPGSZ=$(($(grep Hugepagesize /proc/meminfo | cut -d : -f 2 | tr -dc '0-9'))) 855 HUGEPGSZ_MB=$((HUGEPGSZ / 1024)) 856 : ${NRHUGE=$(((HUGEMEM + HUGEPGSZ_MB - 1) / HUGEPGSZ_MB))} 857 858 if [ "$mode" == "config" ]; then 859 configure_linux 860 elif [ "$mode" == "cleanup" ]; then 861 cleanup_linux 862 elif [ "$mode" == "reset" ]; then 863 reset_linux 864 elif [ "$mode" == "status" ]; then 865 status_linux 866 elif [ "$mode" == "help" ]; then 867 usage $0 868 else 869 usage $0 "Invalid argument '$mode'" 870 fi 871else 872 if [ "$mode" == "config" ]; then 873 configure_freebsd 874 elif [ "$mode" == "reset" ]; then 875 reset_freebsd 876 elif [ "$mode" == "cleanup" ]; then 877 echo "setup.sh cleanup function not yet supported on $os" 878 elif [ "$mode" == "status" ]; then 879 status_freebsd 880 elif [ "$mode" == "help" ]; then 881 usage $0 882 else 883 usage $0 "Invalid argument '$mode'" 884 fi 885fi 886