1#!/usr/bin/env bash 2 3set -e 4 5rootdir=$(readlink -f $(dirname $0))/.. 6source "$rootdir/scripts/common.sh" 7 8function usage() 9{ 10 if [ $(uname) = Linux ]; then 11 options="[config|reset|status|cleanup|help]" 12 else 13 options="[config|reset|help]" 14 fi 15 16 [[ -n $2 ]] && ( echo "$2"; echo ""; ) 17 echo "Helper script for allocating hugepages and binding NVMe, I/OAT, VMD and Virtio devices" 18 echo "to a generic VFIO kernel driver. If VFIO is not available on the system, this script" 19 echo "will fall back to UIO. NVMe and Virtio devices with active mountpoints will be ignored." 20 echo "All hugepage operations use default hugepage size on the system (hugepagesz)." 21 echo "Usage: $(basename $1) $options" 22 echo 23 echo "$options - as following:" 24 echo "config Default mode. Allocate hugepages and bind PCI devices." 25 if [ $(uname) = Linux ]; then 26 echo "cleanup Remove any orphaned files that can be left in the system after SPDK application exit" 27 fi 28 echo "reset Rebind PCI devices back to their original drivers." 29 echo " Also cleanup any leftover spdk files/resources." 30 echo " Hugepage memory size will remain unchanged." 31 if [ $(uname) = Linux ]; then 32 echo "status Print status of all SPDK-compatible devices on the system." 33 fi 34 echo "help Print this help message." 35 echo 36 echo "The following environment variables can be specified." 37 echo "HUGEMEM Size of hugepage memory to allocate (in MB). 2048 by default." 38 echo " For NUMA systems, the hugepages will be evenly distributed" 39 echo " between CPU nodes" 40 echo "NRHUGE Number of hugepages to allocate. This variable overwrites HUGEMEM." 41 echo "HUGENODE Specific NUMA node to allocate hugepages on. To allocate" 42 echo " hugepages on multiple nodes run this script multiple times -" 43 echo " once for each node." 44 echo "PCI_WHITELIST" 45 echo "PCI_BLACKLIST Whitespace separated list of PCI devices (NVMe, I/OAT, VMD, Virtio)." 46 echo " Each device must be specified as a full PCI address." 47 echo " E.g. PCI_WHITELIST=\"0000:01:00.0 0000:02:00.0\"" 48 echo " To blacklist all PCI devices use a non-valid address." 49 echo " E.g. PCI_WHITELIST=\"none\"" 50 echo " If PCI_WHITELIST and PCI_BLACKLIST are empty or unset, all PCI devices" 51 echo " will be bound." 52 echo " Each device in PCI_BLACKLIST will be ignored (driver won't be changed)." 53 echo " PCI_BLACKLIST has precedence over PCI_WHITELIST." 54 echo "TARGET_USER User that will own hugepage mountpoint directory and vfio groups." 55 echo " By default the current user will be used." 56 echo "DRIVER_OVERRIDE Disable automatic vfio-pci/uio_pci_generic selection and forcefully" 57 echo " bind devices to the given driver." 58 echo " E.g. DRIVER_OVERRIDE=uio_pci_generic or DRIVER_OVERRIDE=/home/public/dpdk/build/kmod/igb_uio.ko" 59 exit 0 60} 61 62# In monolithic kernels the lsmod won't work. So 63# back that with a /sys/modules. We also check 64# /sys/bus/pci/drivers/ as neither lsmod nor /sys/modules might 65# contain needed info (like in Fedora-like OS). 66function check_for_driver { 67 if lsmod | grep -q ${1//-/_}; then 68 return 1 69 fi 70 71 if [[ -d /sys/module/${1} || \ 72 -d /sys/module/${1//-/_} || \ 73 -d /sys/bus/pci/drivers/${1} || \ 74 -d /sys/bus/pci/drivers/${1//-/_} ]]; then 75 return 2 76 fi 77 return 0 78} 79 80function pci_dev_echo() { 81 local bdf="$1" 82 local vendor 83 local device 84 vendor="$(cat /sys/bus/pci/devices/$bdf/vendor)" 85 device="$(cat /sys/bus/pci/devices/$bdf/device)" 86 shift 87 echo "$bdf (${vendor#0x} ${device#0x}): $*" 88} 89 90function linux_bind_driver() { 91 bdf="$1" 92 driver_name="$2" 93 old_driver_name="no driver" 94 ven_dev_id=$(lspci -n -s $bdf | cut -d' ' -f3 | sed 's/:/ /') 95 96 if [ -e "/sys/bus/pci/devices/$bdf/driver" ]; then 97 old_driver_name=$(basename $(readlink /sys/bus/pci/devices/$bdf/driver)) 98 99 if [ "$driver_name" = "$old_driver_name" ]; then 100 pci_dev_echo "$bdf" "Already using the $old_driver_name driver" 101 return 0 102 fi 103 104 echo "$ven_dev_id" > "/sys/bus/pci/devices/$bdf/driver/remove_id" 2> /dev/null || true 105 echo "$bdf" > "/sys/bus/pci/devices/$bdf/driver/unbind" 106 fi 107 108 pci_dev_echo "$bdf" "$old_driver_name -> $driver_name" 109 110 echo "$ven_dev_id" > "/sys/bus/pci/drivers/$driver_name/new_id" 2> /dev/null || true 111 echo "$bdf" > "/sys/bus/pci/drivers/$driver_name/bind" 2> /dev/null || true 112 113 iommu_group=$(basename $(readlink -f /sys/bus/pci/devices/$bdf/iommu_group)) 114 if [ -e "/dev/vfio/$iommu_group" ]; then 115 if [ -n "$TARGET_USER" ]; then 116 chown "$TARGET_USER" "/dev/vfio/$iommu_group" 117 fi 118 fi 119} 120 121function linux_unbind_driver() { 122 local bdf="$1" 123 local ven_dev_id 124 ven_dev_id=$(lspci -n -s $bdf | cut -d' ' -f3 | sed 's/:/ /') 125 local old_driver_name="no driver" 126 127 if [ -e "/sys/bus/pci/devices/$bdf/driver" ]; then 128 old_driver_name=$(basename $(readlink /sys/bus/pci/devices/$bdf/driver)) 129 echo "$ven_dev_id" > "/sys/bus/pci/devices/$bdf/driver/remove_id" 2> /dev/null || true 130 echo "$bdf" > "/sys/bus/pci/devices/$bdf/driver/unbind" 131 fi 132 133 pci_dev_echo "$bdf" "$old_driver_name -> no driver" 134} 135 136function linux_hugetlbfs_mounts() { 137 mount | grep ' type hugetlbfs ' | awk '{ print $3 }' 138} 139 140function get_nvme_name_from_bdf { 141 local blknames=() 142 143 set +e 144 nvme_devs=$(lsblk -d --output NAME | grep "^nvme") 145 set -e 146 for dev in $nvme_devs; do 147 link_name=$(readlink /sys/block/$dev/device/device) || true 148 if [ -z "$link_name" ]; then 149 link_name=$(readlink /sys/block/$dev/device) 150 fi 151 link_bdf=$(basename "$link_name") 152 if [ "$link_bdf" = "$1" ]; then 153 blknames+=($dev) 154 fi 155 done 156 157 printf '%s\n' "${blknames[@]}" 158} 159 160function get_virtio_names_from_bdf { 161 blk_devs=$(lsblk --nodeps --output NAME) 162 virtio_names=() 163 164 for dev in $blk_devs; do 165 if readlink "/sys/block/$dev" | grep -q "$1"; then 166 virtio_names+=("$dev") 167 fi 168 done 169 170 eval "$2=( " "${virtio_names[@]}" " )" 171} 172 173function configure_linux_pci { 174 local driver_path="" 175 driver_name="" 176 if [[ -n "${DRIVER_OVERRIDE}" ]]; then 177 driver_path="$DRIVER_OVERRIDE" 178 driver_name="${DRIVER_OVERRIDE##*/}" 179 # modprobe and the sysfs don't use the .ko suffix. 180 driver_name=${driver_name%.ko} 181 # path = name -> there is no path 182 if [[ "$driver_path" = "$driver_name" ]]; then 183 driver_path="" 184 fi 185 # igb_uio is a common driver to override with and it depends on uio. 186 if [[ "$driver_name" = "igb_uio" ]]; then 187 modprobe uio 188 fi 189 elif [[ -n "$(ls /sys/kernel/iommu_groups)" || \ 190 (-e /sys/module/vfio/parameters/enable_unsafe_noiommu_mode && \ 191 "$(cat /sys/module/vfio/parameters/enable_unsafe_noiommu_mode)" == "Y") ]]; then 192 driver_name=vfio-pci 193 elif modinfo uio_pci_generic >/dev/null 2>&1; then 194 driver_name=uio_pci_generic 195 elif [[ -r "$rootdir/dpdk/build/kmod/igb_uio.ko" ]]; then 196 driver_path="$rootdir/dpdk/build/kmod/igb_uio.ko" 197 driver_name="igb_uio" 198 modprobe uio 199 echo "WARNING: uio_pci_generic not detected - using $driver_name" 200 else 201 echo "No valid drivers found [vfio-pci, uio_pci_generic, igb_uio]. Please either enable the vfio-pci or uio_pci_generic" 202 echo "kernel modules, or have SPDK build the igb_uio driver by running ./configure --with-igb-uio-driver and recompiling." 203 return 1 204 fi 205 206 # modprobe assumes the directory of the module. If the user passes in a path, we should use insmod 207 if [[ -n "$driver_path" ]]; then 208 insmod $driver_path || true 209 else 210 modprobe $driver_name 211 fi 212 213 # NVMe 214 for bdf in $(iter_all_pci_class_code 01 08 02); do 215 blknames=() 216 if ! pci_can_use $bdf; then 217 pci_dev_echo "$bdf" "Skipping un-whitelisted NVMe controller at $bdf" 218 continue 219 fi 220 221 mount=false 222 for blkname in $(get_nvme_name_from_bdf $bdf); do 223 mountpoints=$(lsblk /dev/$blkname --output MOUNTPOINT -n | wc -w) 224 if [ "$mountpoints" != "0" ]; then 225 mount=true 226 blknames+=($blkname) 227 fi 228 done 229 230 if ! $mount; then 231 linux_bind_driver "$bdf" "$driver_name" 232 else 233 for name in "${blknames[@]}"; do 234 pci_dev_echo "$bdf" "Active mountpoints on /dev/$name, so not binding PCI dev" 235 done 236 fi 237 done 238 239 # IOAT 240 TMP=$(mktemp) 241 #collect all the device_id info of ioat devices. 242 grep "PCI_DEVICE_ID_INTEL_IOAT" $rootdir/include/spdk/pci_ids.h \ 243 | awk -F"x" '{print $2}' > $TMP 244 245 while IFS= read -r dev_id 246 do 247 for bdf in $(iter_all_pci_dev_id 8086 $dev_id); do 248 if ! pci_can_use $bdf; then 249 pci_dev_echo "$bdf" "Skipping un-whitelisted I/OAT device" 250 continue 251 fi 252 253 linux_bind_driver "$bdf" "$driver_name" 254 done 255 done < $TMP 256 rm $TMP 257 258 # IDXD 259 TMP=$(mktemp) 260 #collect all the device_id info of idxd devices. 261 grep "PCI_DEVICE_ID_INTEL_IDXD" $rootdir/include/spdk/pci_ids.h \ 262 | awk -F"x" '{print $2}' > $TMP 263 264 while IFS= read -r dev_id 265 do 266 for bdf in $(iter_all_pci_dev_id 8086 $dev_id); do 267 if ! pci_can_use $bdf; then 268 pci_dev_echo "$bdf" "Skipping un-whitelisted IDXD device" 269 continue 270 fi 271 272 linux_bind_driver "$bdf" "$driver_name" 273 done 274 done < $TMP 275 rm $TMP 276 277 # virtio 278 TMP=$(mktemp) 279 #collect all the device_id info of virtio devices. 280 grep "PCI_DEVICE_ID_VIRTIO" $rootdir/include/spdk/pci_ids.h \ 281 | awk -F"x" '{print $2}' > $TMP 282 283 while IFS= read -r dev_id 284 do 285 for bdf in $(iter_all_pci_dev_id 1af4 $dev_id); do 286 if ! pci_can_use $bdf; then 287 pci_dev_echo "$bdf" "Skipping un-whitelisted Virtio device at $bdf" 288 continue 289 fi 290 blknames=() 291 get_virtio_names_from_bdf "$bdf" blknames 292 for blkname in "${blknames[@]}"; do 293 if [ "$(lsblk /dev/$blkname --output MOUNTPOINT -n | wc -w)" != "0" ]; then 294 pci_dev_echo "$bdf" "Active mountpoints on /dev/$blkname, so not binding" 295 continue 2 296 fi 297 done 298 299 linux_bind_driver "$bdf" "$driver_name" 300 done 301 done < $TMP 302 rm $TMP 303 304 # VMD 305 TMP=$(mktemp) 306 #collect all the device_id info of vmd devices. 307 grep "PCI_DEVICE_ID_INTEL_VMD" $rootdir/include/spdk/pci_ids.h \ 308 | awk -F"x" '{print $2}' > $TMP 309 310 while IFS= read -r dev_id 311 do 312 for bdf in $(iter_pci_dev_id 8086 $dev_id); do 313 if [[ -z "$PCI_WHITELIST" ]] || ! pci_can_use $bdf; then 314 echo "Skipping un-whitelisted VMD device at $bdf" 315 continue 316 fi 317 318 linux_bind_driver "$bdf" "$driver_name" 319 echo " VMD generic kdrv: " "$bdf" "$driver_name" 320 done 321 done < $TMP 322 rm $TMP 323 324 echo "1" > "/sys/bus/pci/rescan" 325} 326 327function cleanup_linux { 328 shopt -s extglob nullglob 329 dirs_to_clean="" 330 dirs_to_clean="$(echo {/var/run,/tmp}/dpdk/spdk{,_pid}+([0-9])) " 331 if [[ -d $XDG_RUNTIME_DIR && $XDG_RUNTIME_DIR != *" "* ]]; then 332 dirs_to_clean+="$(readlink -e assert_not_empty $XDG_RUNTIME_DIR/dpdk/spdk{,_pid}+([0-9]) || true) " 333 fi 334 335 files_to_clean="" 336 for dir in $dirs_to_clean; do 337 files_to_clean+="$(echo $dir/*) " 338 done 339 shopt -u extglob nullglob 340 341 files_to_clean+="$(ls -1 /dev/shm/* | \ 342 grep -E '(spdk_tgt|iscsi|vhost|nvmf|rocksdb|bdevio|bdevperf|vhost_fuzz|nvme_fuzz)_trace|spdk_iscsi_conns' || true) " 343 files_to_clean="$(readlink -e assert_not_empty $files_to_clean || true)" 344 if [[ -z "$files_to_clean" ]]; then 345 echo "Clean" 346 return 0; 347 fi 348 349 shopt -s extglob 350 for fd_dir in $(echo /proc/+([0-9])); do 351 opened_files+="$(readlink -e assert_not_empty $fd_dir/fd/* || true)" 352 done 353 shopt -u extglob 354 355 if [[ -z "$opened_files" ]]; then 356 echo "Can't get list of opened files!" 357 exit 1 358 fi 359 360 echo 'Cleaning' 361 for f in $files_to_clean; do 362 if ! echo "$opened_files" | grep -E -q "^$f\$"; then 363 echo "Removing: $f" 364 rm $f 365 else 366 echo "Still open: $f" 367 fi 368 done 369 370 for dir in $dirs_to_clean; do 371 if ! echo "$opened_files" | grep -E -q "^$dir\$"; then 372 echo "Removing: $dir" 373 rmdir $dir 374 else 375 echo "Still open: $dir" 376 fi 377 done 378 echo "Clean" 379 380 unset dirs_to_clean files_to_clean opened_files 381} 382 383function configure_linux { 384 configure_linux_pci 385 hugetlbfs_mounts=$(linux_hugetlbfs_mounts) 386 387 if [ -z "$hugetlbfs_mounts" ]; then 388 hugetlbfs_mounts=/mnt/huge 389 echo "Mounting hugetlbfs at $hugetlbfs_mounts" 390 mkdir -p "$hugetlbfs_mounts" 391 mount -t hugetlbfs nodev "$hugetlbfs_mounts" 392 fi 393 394 if [ -z "$HUGENODE" ]; then 395 hugepages_target="/proc/sys/vm/nr_hugepages" 396 else 397 hugepages_target="/sys/devices/system/node/node${HUGENODE}/hugepages/hugepages-${HUGEPGSZ}kB/nr_hugepages" 398 fi 399 400 echo "$NRHUGE" > "$hugepages_target" 401 allocated_hugepages=$(cat $hugepages_target) 402 if [ "$allocated_hugepages" -lt "$NRHUGE" ]; then 403 echo "" 404 echo "## ERROR: requested $NRHUGE hugepages but only $allocated_hugepages could be allocated." 405 echo "## Memory might be heavily fragmented. Please try flushing the system cache, or reboot the machine." 406 exit 1 407 fi 408 409 if [ "$driver_name" = "vfio-pci" ]; then 410 if [ -n "$TARGET_USER" ]; then 411 for mount in $hugetlbfs_mounts; do 412 chown "$TARGET_USER" "$mount" 413 chmod g+w "$mount" 414 done 415 fi 416 417 MEMLOCK_AMNT=$(ulimit -l) 418 if [ "$MEMLOCK_AMNT" != "unlimited" ] ; then 419 MEMLOCK_MB=$(( MEMLOCK_AMNT / 1024 )) 420 echo "" 421 echo "Current user memlock limit: ${MEMLOCK_MB} MB" 422 echo "" 423 echo "This is the maximum amount of memory you will be" 424 echo "able to use with DPDK and VFIO if run as current user." 425 echo -n "To change this, please adjust limits.conf memlock " 426 echo "limit for current user." 427 428 if [ $MEMLOCK_AMNT -lt 65536 ] ; then 429 echo "" 430 echo "## WARNING: memlock limit is less than 64MB" 431 echo -n "## DPDK with VFIO may not be able to initialize " 432 echo "if run as current user." 433 fi 434 fi 435 fi 436 437 if [ ! -f /dev/cpu/0/msr ]; then 438 # Some distros build msr as a module. Make sure it's loaded to ensure 439 # DPDK can easily figure out the TSC rate rather than relying on 100ms 440 # sleeps. 441 modprobe msr || true 442 fi 443} 444 445function reset_linux_pci { 446 # NVMe 447 set +e 448 check_for_driver nvme 449 driver_loaded=$? 450 set -e 451 for bdf in $(iter_all_pci_class_code 01 08 02); do 452 if ! pci_can_use $bdf; then 453 pci_dev_echo "$bdf" "Skipping un-whitelisted NVMe controller $blkname" 454 continue 455 fi 456 if [ $driver_loaded -ne 0 ]; then 457 linux_bind_driver "$bdf" nvme 458 else 459 linux_unbind_driver "$bdf" 460 fi 461 done 462 463 # IOAT 464 TMP=$(mktemp) 465 #collect all the device_id info of ioat devices. 466 grep "PCI_DEVICE_ID_INTEL_IOAT" $rootdir/include/spdk/pci_ids.h \ 467 | awk -F"x" '{print $2}' > $TMP 468 469 set +e 470 check_for_driver ioatdma 471 driver_loaded=$? 472 set -e 473 while IFS= read -r dev_id 474 do 475 for bdf in $(iter_all_pci_dev_id 8086 $dev_id); do 476 if ! pci_can_use $bdf; then 477 pci_dev_echo "$bdf" "Skipping un-whitelisted I/OAT device" 478 continue 479 fi 480 if [ $driver_loaded -ne 0 ]; then 481 linux_bind_driver "$bdf" ioatdma 482 else 483 linux_unbind_driver "$bdf" 484 fi 485 done 486 done < $TMP 487 rm $TMP 488 489 # IDXD 490 TMP=$(mktemp) 491 #collect all the device_id info of idxd devices. 492 grep "PCI_DEVICE_ID_INTEL_IDXD" $rootdir/include/spdk/pci_ids.h \ 493 | awk -F"x" '{print $2}' > $TMP 494 set +e 495 check_for_driver idxd 496 driver_loaded=$? 497 set -e 498 while IFS= read -r dev_id 499 do 500 for bdf in $(iter_all_pci_dev_id 8086 $dev_id); do 501 if ! pci_can_use $bdf; then 502 pci_dev_echo "$bdf" "Skipping un-whitelisted IDXD device" 503 continue 504 fi 505 if [ $driver_loaded -ne 0 ]; then 506 linux_bind_driver "$bdf" idxd 507 else 508 linux_unbind_driver "$bdf" 509 fi 510 done 511 done < $TMP 512 rm $TMP 513 514 # virtio 515 TMP=$(mktemp) 516 #collect all the device_id info of virtio devices. 517 grep "PCI_DEVICE_ID_VIRTIO" $rootdir/include/spdk/pci_ids.h \ 518 | awk -F"x" '{print $2}' > $TMP 519 520 # TODO: check if virtio-pci is loaded first and just unbind if it is not loaded 521 # Requires some more investigation - for example, some kernels do not seem to have 522 # virtio-pci but just virtio_scsi instead. Also need to make sure we get the 523 # underscore vs. dash right in the virtio_scsi name. 524 modprobe virtio-pci || true 525 while IFS= read -r dev_id 526 do 527 for bdf in $(iter_all_pci_dev_id 1af4 $dev_id); do 528 if ! pci_can_use $bdf; then 529 pci_dev_echo "$bdf" "Skipping un-whitelisted Virtio device at" 530 continue 531 fi 532 linux_bind_driver "$bdf" virtio-pci 533 done 534 done < $TMP 535 rm $TMP 536 537 # VMD 538 TMP=$(mktemp) 539 #collect all the device_id info of vmd devices. 540 grep "PCI_DEVICE_ID_INTEL_VMD" $rootdir/include/spdk/pci_ids.h \ 541 | awk -F"x" '{print $2}' > $TMP 542 543 set +e 544 check_for_driver vmd 545 driver_loaded=$? 546 set -e 547 while IFS= read -r dev_id 548 do 549 for bdf in $(iter_pci_dev_id 8086 $dev_id); do 550 if ! pci_can_use $bdf; then 551 echo "Skipping un-whitelisted VMD device at $bdf" 552 continue 553 fi 554 if [ $driver_loaded -ne 0 ]; then 555 linux_bind_driver "$bdf" vmd 556 else 557 linux_unbind_driver "$bdf" 558 fi 559 done 560 done < $TMP 561 rm $TMP 562 563 echo "1" > "/sys/bus/pci/rescan" 564} 565 566function reset_linux { 567 reset_linux_pci 568 for mount in $(linux_hugetlbfs_mounts); do 569 rm -f "$mount"/spdk*map_* 570 done 571 rm -f /run/.spdk* 572} 573 574function status_linux { 575 echo "Hugepages" 576 printf "%-6s %10s %8s / %6s\n" "node" "hugesize" "free" "total" 577 578 numa_nodes=0 579 shopt -s nullglob 580 for path in /sys/devices/system/node/node?/hugepages/hugepages-*/; do 581 numa_nodes=$((numa_nodes + 1)) 582 free_pages=$(cat $path/free_hugepages) 583 all_pages=$(cat $path/nr_hugepages) 584 585 [[ $path =~ (node[0-9]+)/hugepages/hugepages-([0-9]+kB) ]] 586 587 node=${BASH_REMATCH[1]} 588 huge_size=${BASH_REMATCH[2]} 589 590 printf "%-6s %10s %8s / %6s\n" $node $huge_size $free_pages $all_pages 591 done 592 shopt -u nullglob 593 594 # fall back to system-wide hugepages 595 if [ "$numa_nodes" = "0" ]; then 596 free_pages=$(grep HugePages_Free /proc/meminfo | awk '{ print $2 }') 597 all_pages=$(grep HugePages_Total /proc/meminfo | awk '{ print $2 }') 598 node="-" 599 huge_size="$HUGEPGSZ" 600 601 printf "%-6s %10s %8s / %6s\n" $node $huge_size $free_pages $all_pages 602 fi 603 604 echo "" 605 echo "NVMe devices" 606 607 echo -e "BDF\t\tVendor\tDevice\tNUMA\tDriver\t\tDevice name" 608 for bdf in $(iter_all_pci_class_code 01 08 02); do 609 driver=$(grep DRIVER /sys/bus/pci/devices/$bdf/uevent |awk -F"=" '{print $2}') 610 if [ "$numa_nodes" = "0" ]; then 611 node="-" 612 else 613 node=$(cat /sys/bus/pci/devices/$bdf/numa_node) 614 fi 615 device=$(cat /sys/bus/pci/devices/$bdf/device) 616 vendor=$(cat /sys/bus/pci/devices/$bdf/vendor) 617 if [ "$driver" = "nvme" ] && [ -d /sys/bus/pci/devices/$bdf/nvme ]; then 618 name="\t"$(ls /sys/bus/pci/devices/$bdf/nvme); 619 else 620 name="-"; 621 fi 622 echo -e "$bdf\t${vendor#0x}\t${device#0x}\t$node\t${driver:--}\t\t$name"; 623 done 624 625 echo "" 626 echo "I/OAT DMA" 627 628 #collect all the device_id info of ioat devices. 629 TMP=$(grep "PCI_DEVICE_ID_INTEL_IOAT" $rootdir/include/spdk/pci_ids.h \ 630 | awk -F"x" '{print $2}') 631 echo -e "BDF\t\tVendor\tDevice\tNUMA\tDriver" 632 for dev_id in $TMP; do 633 for bdf in $(iter_all_pci_dev_id 8086 $dev_id); do 634 driver=$(grep DRIVER /sys/bus/pci/devices/$bdf/uevent |awk -F"=" '{print $2}') 635 if [ "$numa_nodes" = "0" ]; then 636 node="-" 637 else 638 node=$(cat /sys/bus/pci/devices/$bdf/numa_node) 639 fi 640 device=$(cat /sys/bus/pci/devices/$bdf/device) 641 vendor=$(cat /sys/bus/pci/devices/$bdf/vendor) 642 echo -e "$bdf\t${vendor#0x}\t${device#0x}\t$node\t${driver:--}" 643 done 644 done 645 646 echo "" 647 echo "IDXD DMA" 648 649 #collect all the device_id info of idxd devices. 650 TMP=$(grep "PCI_DEVICE_ID_INTEL_IDXD" $rootdir/include/spdk/pci_ids.h \ 651 | awk -F"x" '{print $2}') 652 echo -e "BDF\t\tVendor\tDevice\tNUMA\tDriver" 653 for dev_id in $TMP; do 654 for bdf in $(iter_all_pci_dev_id 8086 $dev_id); do 655 driver=$(grep DRIVER /sys/bus/pci/devices/$bdf/uevent |awk -F"=" '{print $2}') 656 if [ "$numa_nodes" = "0" ]; then 657 node="-" 658 else 659 node=$(cat /sys/bus/pci/devices/$bdf/numa_node) 660 fi 661 device=$(cat /sys/bus/pci/devices/$bdf/device) 662 vendor=$(cat /sys/bus/pci/devices/$bdf/vendor) 663 echo -e "$bdf\t${vendor#0x}\t${device#0x}\t$node\t${driver:--}" 664 done 665 done 666 667 echo "" 668 echo "virtio" 669 670 #collect all the device_id info of virtio devices. 671 TMP=$(grep "PCI_DEVICE_ID_VIRTIO" $rootdir/include/spdk/pci_ids.h \ 672 | awk -F"x" '{print $2}') 673 echo -e "BDF\t\tVendor\tDevice\tNUMA\tDriver\t\tDevice name" 674 for dev_id in $TMP; do 675 for bdf in $(iter_all_pci_dev_id 1af4 $dev_id); do 676 driver=$(grep DRIVER /sys/bus/pci/devices/$bdf/uevent |awk -F"=" '{print $2}') 677 if [ "$numa_nodes" = "0" ]; then 678 node="-" 679 else 680 node=$(cat /sys/bus/pci/devices/$bdf/numa_node) 681 fi 682 device=$(cat /sys/bus/pci/devices/$bdf/device) 683 vendor=$(cat /sys/bus/pci/devices/$bdf/vendor) 684 blknames=() 685 get_virtio_names_from_bdf "$bdf" blknames 686 echo -e "$bdf\t${vendor#0x}\t${device#0x}\t$node\t\t${driver:--}\t\t" "${blknames[@]}" 687 done 688 done 689 690 echo "VMD" 691 692 #collect all the device_id info of vmd devices. 693 TMP=$(grep "PCI_DEVICE_ID_INTEL_VMD" $rootdir/include/spdk/pci_ids.h \ 694 | awk -F"x" '{print $2}') 695 echo -e "BDF\t\tNuma Node\tDriver Name" 696 for dev_id in $TMP; do 697 for bdf in $(iter_pci_dev_id 8086 $dev_id); do 698 driver=$(grep DRIVER /sys/bus/pci/devices/$bdf/uevent |awk -F"=" '{print $2}') 699 node=$(cat /sys/bus/pci/devices/$bdf/numa_node); 700 echo -e "$bdf\t$node\t\t$driver" 701 done 702 done 703} 704 705function configure_freebsd_pci { 706 TMP=$(mktemp) 707 708 # NVMe 709 GREP_STR="class=0x010802" 710 711 # IOAT 712 grep "PCI_DEVICE_ID_INTEL_IOAT" $rootdir/include/spdk/pci_ids.h \ 713 | awk -F"x" '{print $2}' > $TMP 714 while IFS= read -r dev_id 715 do 716 GREP_STR="${GREP_STR}\|chip=0x${dev_id}8086" 717 done < $TMP 718 719 # IDXD 720 grep "PCI_DEVICE_ID_INTEL_IDXD" $rootdir/include/spdk/pci_ids.h \ 721 | awk -F"x" '{print $2}' > $TMP 722 while IFS= read -r dev_id 723 do 724 GREP_STR="${GREP_STR}\|chip=0x${dev_id}8086" 725 done < $TMP 726 727 # VMD 728 grep "PCI_DEVICE_ID_INTEL_VMD" $rootdir/include/spdk/pci_ids.h \ 729 | awk -F"x" '{print $2}' > $TMP 730 while IFS= read -r dev_id 731 do 732 GREP_STR="${GREP_STR}\|chip=0x${dev_id}8086" 733 done < $TMP 734 735 AWK_PROG=("{if (count > 0) printf \",\"; printf \"%s:%s:%s\",\$2,\$3,\$4; count++}") 736 echo "${AWK_PROG[*]}" > $TMP 737 738 BDFS=$(pciconf -l | grep "${GREP_STR}" | awk -F: -f $TMP) 739 740 kldunload nic_uio.ko || true 741 kenv hw.nic_uio.bdfs=$BDFS 742 kldload nic_uio.ko 743 rm $TMP 744} 745 746function configure_freebsd { 747 configure_freebsd_pci 748 # If contigmem is already loaded but the HUGEMEM specified doesn't match the 749 # previous value, unload contigmem so that we can reload with the new value. 750 if kldstat -q -m contigmem; then 751 if [ $(kenv hw.contigmem.num_buffers) -ne "$((HUGEMEM / 256))" ]; then 752 kldunload contigmem.ko 753 fi 754 fi 755 if ! kldstat -q -m contigmem; then 756 kenv hw.contigmem.num_buffers=$((HUGEMEM / 256)) 757 kenv hw.contigmem.buffer_size=$((256 * 1024 * 1024)) 758 kldload contigmem.ko 759 fi 760} 761 762function reset_freebsd { 763 kldunload contigmem.ko || true 764 kldunload nic_uio.ko || true 765} 766 767mode=$1 768 769if [ -z "$mode" ]; then 770 mode="config" 771fi 772 773: ${HUGEMEM:=2048} 774: ${PCI_WHITELIST:=""} 775: ${PCI_BLACKLIST:=""} 776 777if [ -n "$NVME_WHITELIST" ]; then 778 PCI_WHITELIST="$PCI_WHITELIST $NVME_WHITELIST" 779fi 780 781if [ -n "$SKIP_PCI" ]; then 782 PCI_WHITELIST="none" 783fi 784 785if [ -z "$TARGET_USER" ]; then 786 TARGET_USER="$SUDO_USER" 787 if [ -z "$TARGET_USER" ]; then 788 TARGET_USER=$(logname 2>/dev/null) || true 789 fi 790fi 791 792if [ $(uname) = Linux ]; then 793 HUGEPGSZ=$(( $(grep Hugepagesize /proc/meminfo | cut -d : -f 2 | tr -dc '0-9') )) 794 HUGEPGSZ_MB=$(( HUGEPGSZ / 1024 )) 795 : ${NRHUGE=$(( (HUGEMEM + HUGEPGSZ_MB - 1) / HUGEPGSZ_MB ))} 796 797 if [ "$mode" == "config" ]; then 798 configure_linux 799 elif [ "$mode" == "cleanup" ]; then 800 cleanup_linux 801 elif [ "$mode" == "reset" ]; then 802 reset_linux 803 elif [ "$mode" == "status" ]; then 804 status_linux 805 elif [ "$mode" == "help" ]; then 806 usage $0 807 else 808 usage $0 "Invalid argument '$mode'" 809 fi 810else 811 if [ "$mode" == "config" ]; then 812 configure_freebsd 813 elif [ "$mode" == "reset" ]; then 814 reset_freebsd 815 elif [ "$mode" == "cleanup" ]; then 816 echo "setup.sh cleanup function not yet supported on $(uname)" 817 elif [ "$mode" == "status" ]; then 818 echo "setup.sh status function not yet supported on $(uname)" 819 elif [ "$mode" == "help" ]; then 820 usage $0 821 else 822 usage $0 "Invalid argument '$mode'" 823 fi 824fi 825