xref: /spdk/scripts/setup.sh (revision cb7af50cfdf0f6fa3c3b3755969b8d386cbaca34)
155dc5f21SBen Walker#!/usr/bin/env bash
2eb53c232Spaul luse#  SPDX-License-Identifier: BSD-3-Clause
3eb53c232Spaul luse#  Copyright (C) 2016 Intel Corporation
4eb53c232Spaul luse#  All rights reserved.
5eb53c232Spaul luse#
655dc5f21SBen Walkerset -e
78c198512SMichal Bergershopt -s nullglob extglob
855dc5f21SBen Walker
918c02887SMichal Bergeros=$(uname -s)
1018c02887SMichal Berger
1118c02887SMichal Bergerif [[ $os != Linux && $os != FreeBSD ]]; then
1218c02887SMichal Berger	echo "Not supported platform ($os), aborting"
1318c02887SMichal Berger	exit 1
1418c02887SMichal Bergerfi
1518c02887SMichal Berger
1655dc5f21SBen Walkerrootdir=$(readlink -f $(dirname $0))/..
17f0c20934SDaniel Verkampsource "$rootdir/scripts/common.sh"
1855dc5f21SBen Walker
19844c8ec3SMichal Bergerfunction usage() {
2018c02887SMichal Berger	if [[ $os == Linux ]]; then
217a1bd398SMichal Berger		options="[config|reset|status|cleanup|interactive|help]"
225f247660SDariusz Stojaczyk	else
237a1bd398SMichal Berger		options="[config|reset|interactive|help]"
245f247660SDariusz Stojaczyk	fi
255f247660SDariusz Stojaczyk
26844c8ec3SMichal Berger	[[ -n $2 ]] && (
27844c8ec3SMichal Berger		echo "$2"
28844c8ec3SMichal Berger		echo ""
29844c8ec3SMichal Berger	)
30a6edaa96SWojciech Malikowski	echo "Helper script for allocating hugepages and binding NVMe, I/OAT, VMD and Virtio devices"
31a6edaa96SWojciech Malikowski	echo "to a generic VFIO kernel driver. If VFIO is not available on the system, this script"
32a6edaa96SWojciech Malikowski	echo "will fall back to UIO. NVMe and Virtio devices with active mountpoints will be ignored."
335f247660SDariusz Stojaczyk	echo "All hugepage operations use default hugepage size on the system (hugepagesz)."
345f247660SDariusz Stojaczyk	echo "Usage: $(basename $1) $options"
355f247660SDariusz Stojaczyk	echo
365f247660SDariusz Stojaczyk	echo "$options - as following:"
375f247660SDariusz Stojaczyk	echo "config            Default mode. Allocate hugepages and bind PCI devices."
3818c02887SMichal Berger	if [[ $os == Linux ]]; then
39e47f972dSPawel Wodkowski		echo "cleanup           Remove any orphaned files that can be left in the system after SPDK application exit"
40e47f972dSPawel Wodkowski	fi
415f247660SDariusz Stojaczyk	echo "reset             Rebind PCI devices back to their original drivers."
425f247660SDariusz Stojaczyk	echo "                  Also cleanup any leftover spdk files/resources."
435f247660SDariusz Stojaczyk	echo "                  Hugepage memory size will remain unchanged."
4418c02887SMichal Berger	if [[ $os == Linux ]]; then
455f247660SDariusz Stojaczyk		echo "status            Print status of all SPDK-compatible devices on the system."
465f247660SDariusz Stojaczyk	fi
477a1bd398SMichal Berger	echo "interactive       Executes script in interactive mode."
485f247660SDariusz Stojaczyk	echo "help              Print this help message."
495f247660SDariusz Stojaczyk	echo
505f247660SDariusz Stojaczyk	echo "The following environment variables can be specified."
515f247660SDariusz Stojaczyk	echo "HUGEMEM           Size of hugepage memory to allocate (in MB). 2048 by default."
522b80955cSMichal Berger	echo "                  For NUMA systems, the hugepages will be distributed on node0 by"
532b80955cSMichal Berger	echo "                  default."
545f247660SDariusz Stojaczyk	echo "NRHUGE            Number of hugepages to allocate. This variable overwrites HUGEMEM."
552b80955cSMichal Berger	echo "HUGENODE          Specific NUMA node to allocate hugepages on. Multiple nodes can be"
56bcf9f8ffSMichal Berger	echo "                  separated with comas. By default, NRHUGE will be applied on each node."
57bcf9f8ffSMichal Berger	echo "                  Hugepages can be defined per node with e.g.:"
58bcf9f8ffSMichal Berger	echo "                  HUGENODE='nodes_hp[0]=2048,nodes_hp[1]=512,2' - this will allocate"
59bcf9f8ffSMichal Berger	echo "                  2048 pages for node0, 512 for node1 and default NRHUGE for node2."
602b80955cSMichal Berger	echo "HUGEPGSZ          Size of the hugepages to use in kB. If not set, kernel's default"
612b80955cSMichal Berger	echo "                  setting is used."
6228bfb876SMichal Berger	echo "SHRINK_HUGE       If set to 'yes', hugepages allocation won't be skipped in case"
6328bfb876SMichal Berger	echo "                  number of requested hugepages is lower from what's already"
64*0070858eSMichal Berger	echo "                  allocated."
652b80955cSMichal Berger	echo "CLEAR_HUGE        If set to 'yes', the attempt to remove hugepages from all nodes will"
662b80955cSMichal Berger	echo "                  be made prior to allocation".
67a1280c98SJim Harris	echo "PCI_ALLOWED"
68a1280c98SJim Harris	echo "PCI_BLOCKED       Whitespace separated list of PCI devices (NVMe, I/OAT, VMD, Virtio)."
695f247660SDariusz Stojaczyk	echo "                  Each device must be specified as a full PCI address."
70a1280c98SJim Harris	echo "                  E.g. PCI_ALLOWED=\"0000:01:00.0 0000:02:00.0\""
71e4ef7b39SJim Harris	echo "                  To block all PCI devices: PCI_ALLOWED=\"none\""
72e4ef7b39SJim Harris	echo "                  To allow all PCI devices except 0000:01:00.0: PCI_BLOCKED=\"0000:01:00.0\""
73e4ef7b39SJim Harris	echo "                  To allow only PCI device 0000:01:00.0: PCI_ALLOWED=\"0000:01:00.0\""
74a1280c98SJim Harris	echo "                  If PCI_ALLOWED and PCI_BLOCKED are empty or unset, all PCI devices"
75c778e3e5SPawel Wodkowski	echo "                  will be bound."
76a1280c98SJim Harris	echo "                  Each device in PCI_BLOCKED will be ignored (driver won't be changed)."
77a1280c98SJim Harris	echo "                  PCI_BLOCKED has precedence over PCI_ALLOWED."
785f247660SDariusz Stojaczyk	echo "TARGET_USER       User that will own hugepage mountpoint directory and vfio groups."
795f247660SDariusz Stojaczyk	echo "                  By default the current user will be used."
80e93d56b1Stone.zhang	echo "DRIVER_OVERRIDE   Disable automatic vfio-pci/uio_pci_generic selection and forcefully"
81e93d56b1Stone.zhang	echo "                  bind devices to the given driver."
82c8bcedf4SSeth Howell	echo "                  E.g. DRIVER_OVERRIDE=uio_pci_generic or DRIVER_OVERRIDE=/home/public/dpdk/build/kmod/igb_uio.ko"
8344775a80SMichal Berger	echo "PCI_BLOCK_SYNC_ON_RESET"
8444775a80SMichal Berger	echo "                  If set in the environment, the attempt to wait for block devices associated"
8544775a80SMichal Berger	echo "                  with given PCI device will be made upon reset"
86db0d8682SMichal Berger	echo "UNBIND_ENTIRE_IOMMU_GROUP"
87db0d8682SMichal Berger	echo "                  If set, all devices from nvme's iommu group will be unbound from their drivers."
88db0d8682SMichal Berger	echo "                  Use with caution."
897014f640SMichal Berger	echo "DEV_TYPE"
907014f640SMichal Berger	echo "                  Perform action only against selected type of devices. Supported:"
917014f640SMichal Berger	echo "                    IOAT|DSA|IAA|VIRTIO|VMD|NVME."
927014f640SMichal Berger	echo "                  Default is to select all types."
932635e73dSMichal Berger	echo "FORCE_NIC_UIO_REBIND"
942635e73dSMichal Berger	echo "                  When set to 'yes', an attempt to reload nic_uio will be made regardless"
952635e73dSMichal Berger	echo "                  of the kernel environment. Applicable only under FreeBSD."
965f247660SDariusz Stojaczyk	exit 0
975f247660SDariusz Stojaczyk}
985f247660SDariusz Stojaczyk
99e1817b60SStephen Bates# In monolithic kernels the lsmod won't work. So
1002f5767d7SPawel Wodkowski# back that with a /sys/modules. We also check
1012f5767d7SPawel Wodkowski# /sys/bus/pci/drivers/ as neither lsmod nor /sys/modules might
1022f5767d7SPawel Wodkowski# contain needed info (like in Fedora-like OS).
103844c8ec3SMichal Bergerfunction check_for_driver() {
104eb8655b2SMichal Berger	if [[ -z $1 ]]; then
105eb8655b2SMichal Berger		return 0
106eb8655b2SMichal Berger	fi
107eb8655b2SMichal Berger
1082f5767d7SPawel Wodkowski	if lsmod | grep -q ${1//-/_}; then
109e1817b60SStephen Bates		return 1
110e1817b60SStephen Bates	fi
1112f5767d7SPawel Wodkowski
112dfb2950fSMichal Berger	if [[ -d /sys/module/${1} ||
113dfb2950fSMichal Berger		-d /sys/module/${1//-/_} ||
114dfb2950fSMichal Berger		-d /sys/bus/pci/drivers/${1} ||
115dfb2950fSMichal Berger		-d /sys/bus/pci/drivers/${1//-/_} ]]; then
1162f5767d7SPawel Wodkowski		return 2
117e1817b60SStephen Bates	fi
118e1817b60SStephen Bates	return 0
119e1817b60SStephen Bates}
120e1817b60SStephen Bates
12194067e8bSMichal Bergerfunction check_for_driver_freebsd() {
12294067e8bSMichal Berger	# Check if dpdk drivers (nic_uio, contigmem) are in the kernel's module path.
12394067e8bSMichal Berger	local search_paths path driver
12494067e8bSMichal Berger	IFS=";" read -ra search_paths < <(kldconfig -rU)
12594067e8bSMichal Berger
12694067e8bSMichal Berger	for driver in contigmem.ko nic_uio.ko; do
12794067e8bSMichal Berger		for path in "${search_paths[@]}"; do
12894067e8bSMichal Berger			[[ -f $path/$driver ]] && continue 2
12994067e8bSMichal Berger		done
13094067e8bSMichal Berger		return 1
13194067e8bSMichal Berger	done
13294067e8bSMichal Berger	return 0
13394067e8bSMichal Berger}
13494067e8bSMichal Berger
135768cc8eeSPawel Wodkowskifunction pci_dev_echo() {
136768cc8eeSPawel Wodkowski	local bdf="$1"
137768cc8eeSPawel Wodkowski	shift
1385ea54946SMichal Berger	echo "$bdf (${pci_ids_vendor["$bdf"]#0x} ${pci_ids_device["$bdf"]#0x}): $*"
139768cc8eeSPawel Wodkowski}
140768cc8eeSPawel Wodkowski
141db0d8682SMichal Bergerfunction probe_driver() {
142db0d8682SMichal Berger	local bdf=$1
143db0d8682SMichal Berger	local driver_name=$2
1444f8177b5SMichal Berger	old_driver_name=${pci_bus_driver["$bdf"]:-no driver}
14521173cd0SDaniel Verkamp
1460897e4dbSMichal Berger	if [[ $driver_name == "$old_driver_name" ]]; then
147768cc8eeSPawel Wodkowski		pci_dev_echo "$bdf" "Already using the $old_driver_name driver"
14821173cd0SDaniel Verkamp		return 0
14921173cd0SDaniel Verkamp	fi
15021173cd0SDaniel Verkamp
1516ca498c1Swanghailiangx	if [[ $old_driver_name != "no driver" ]]; then
15221173cd0SDaniel Verkamp		echo "$bdf" > "/sys/bus/pci/devices/$bdf/driver/unbind"
1536ca498c1Swanghailiangx	fi
15421173cd0SDaniel Verkamp
155768cc8eeSPawel Wodkowski	pci_dev_echo "$bdf" "$old_driver_name -> $driver_name"
15621173cd0SDaniel Verkamp
157b0aba3fcSSamir Raval	if [[ $driver_name == "none" ]]; then
158b0aba3fcSSamir Raval		return 0
159b0aba3fcSSamir Raval	fi
160b0aba3fcSSamir Raval
161eab0c664SMichal Berger	local probe_attempts=0
1621f59abaeSMichal Berger	echo "$driver_name" > "/sys/bus/pci/devices/$bdf/driver_override"
163eab0c664SMichal Berger	while ! echo "$bdf" > "/sys/bus/pci/drivers_probe" && ((probe_attempts++ < 10)); do
164eab0c664SMichal Berger		pci_dev_echo "$bdf" "failed to bind to $driver_name, retrying ($probe_attempts)"
165eab0c664SMichal Berger		sleep 0.5
166eab0c664SMichal Berger	done 2> /dev/null
167eab0c664SMichal Berger
1681f59abaeSMichal Berger	echo "" > "/sys/bus/pci/devices/$bdf/driver_override"
1697b25f04cSCunyin Chang
170eab0c664SMichal Berger	if [[ ! -e /sys/bus/pci/drivers/$driver_name/$bdf ]]; then
171eab0c664SMichal Berger		pci_dev_echo "$bdf" "failed to bind to $driver_name, aborting"
172eab0c664SMichal Berger		return 1
173e5a08642SMichal Berger	fi
174db0d8682SMichal Berger}
175db0d8682SMichal Berger
176db0d8682SMichal Bergerfunction linux_bind_driver() {
177db0d8682SMichal Berger	local bdf="$1"
178db0d8682SMichal Berger	local driver_name="$2"
179db0d8682SMichal Berger
180db0d8682SMichal Berger	probe_driver "$bdf" "$driver_name"
181e5a08642SMichal Berger
182b44bbd24SMichal Berger	local iommu_group=${pci_iommu_groups["$bdf"]}
1837b25f04cSCunyin Chang	if [ -e "/dev/vfio/$iommu_group" ]; then
1844b428979SDariusz Stojaczyk		if [ -n "$TARGET_USER" ]; then
185da7e3bb8SDariusz Stojaczyk			chown "$TARGET_USER" "/dev/vfio/$iommu_group"
1867b25f04cSCunyin Chang		fi
18756306a46SDaniel Verkamp	fi
188db0d8682SMichal Berger
189b44bbd24SMichal Berger	local iommug=("${!iommu_groups[iommu_group]}")
190db0d8682SMichal Berger	local _bdf _driver
191b44bbd24SMichal Berger	if ((${#iommug[@]} > 1)) && [[ $driver_name == vfio* ]]; then
192db0d8682SMichal Berger		pci_dev_echo "$bdf" "WARNING: detected multiple devices (${#iommug[@]}) under the same IOMMU group!"
193db0d8682SMichal Berger		for _bdf in "${iommug[@]}"; do
194c75664c8SMichal Berger			[[ $_bdf == "$bdf" ]] && continue
195c75664c8SMichal Berger			_driver=$(readlink -f "/sys/bus/pci/devices/$_bdf/driver") && _driver=${_driver##*/}
196c75664c8SMichal Berger			if [[ $_driver == "$driver_name" ]]; then
197db0d8682SMichal Berger				continue
198db0d8682SMichal Berger			fi
199db0d8682SMichal Berger			# See what DPDK considers to be a "viable" iommu group: dpdk/lib/eal/linux/eal_vfio.c -> rte_vfio_setup_device()
200aec2e33bSMichal Berger			pci_dev_echo "$bdf" "WARNING: ${_bdf##*/} not bound to $driver_name (${_driver:-no driver})"
201db0d8682SMichal Berger			pci_dev_echo "$bdf" "WARNING All devices in the IOMMU group must be bound to the same driver or unbound"
202db0d8682SMichal Berger			if [[ $UNBIND_ENTIRE_IOMMU_GROUP == yes ]]; then
203db0d8682SMichal Berger				pci_dev_echo "$bdf" "WARNING: Attempting to unbind ${_bdf##*/}"
204aec2e33bSMichal Berger				pci_bus_driver["${_bdf##*/}"]=$_driver
205db0d8682SMichal Berger				probe_driver "${_bdf##*/}" none
206db0d8682SMichal Berger			fi
207db0d8682SMichal Berger		done
208db0d8682SMichal Berger	fi
209db0d8682SMichal Berger
21021173cd0SDaniel Verkamp}
21121173cd0SDaniel Verkamp
21210283728SJim Harrisfunction linux_unbind_driver() {
213fdcd8b70SPawel Wodkowski	local bdf="$1"
2144f8177b5SMichal Berger	local old_driver_name=${pci_bus_driver["$bdf"]:-no driver}
21510283728SJim Harris
216eb8655b2SMichal Berger	if [[ $old_driver_name == "no driver" ]]; then
217eb8655b2SMichal Berger		pci_dev_echo "$bdf" "Not bound to any driver"
218eb8655b2SMichal Berger		return 0
219eb8655b2SMichal Berger	fi
220eb8655b2SMichal Berger
2210897e4dbSMichal Berger	if [[ -e /sys/bus/pci/drivers/$old_driver_name ]]; then
2220897e4dbSMichal Berger		echo "$bdf" > "/sys/bus/pci/drivers/$old_driver_name/unbind"
2231f59abaeSMichal Berger		echo "" > "/sys/bus/pci/devices/$bdf/driver_override"
224fdcd8b70SPawel Wodkowski	fi
225fdcd8b70SPawel Wodkowski
226fdcd8b70SPawel Wodkowski	pci_dev_echo "$bdf" "$old_driver_name -> no driver"
22710283728SJim Harris}
22810283728SJim Harris
229f8c1c71cSDariusz Stojaczykfunction linux_hugetlbfs_mounts() {
230fc58aceaSDaniel Verkamp	mount | grep ' type hugetlbfs ' | awk '{ print $3 }'
2319603193fSDaniel Verkamp}
2329603193fSDaniel Verkamp
2339af7c30eSMichal Bergerfunction get_used_bdf_block_devs() {
23469ae10f7SMichal Berger	local bdf=$1
235bb4657c7SMichal Berger	local blocks block blockp dev mount holder
2369af7c30eSMichal Berger	local used
2379c44fad7SDariusz Stojaczyk
238274a9ffbSJim Harris	hash lsblk &> /dev/null || return 1
23969ae10f7SMichal Berger	blocks=($(get_block_dev_from_bdf "$bdf"))
24069ae10f7SMichal Berger
24169ae10f7SMichal Berger	for block in "${blocks[@]}"; do
242bb4657c7SMichal Berger		# Check if the device is hold by some other, regardless if it's mounted
243bb4657c7SMichal Berger		# or not.
244bb4657c7SMichal Berger		for holder in "/sys/class/block/$block"*/holders/*; do
245bb4657c7SMichal Berger			[[ -e $holder ]] || continue
246bb4657c7SMichal Berger			blockp=${holder%/holders*} blockp=${blockp##*/}
247bb4657c7SMichal Berger			if [[ -e $holder/slaves/$blockp ]]; then
2489af7c30eSMichal Berger				used+=("holder@$blockp:${holder##*/}")
249bb4657c7SMichal Berger			fi
250bb4657c7SMichal Berger		done
251602b134fSMichal Berger		while read -r dev mount; do
252602b134fSMichal Berger			if [[ -e $mount ]]; then
2539af7c30eSMichal Berger				used+=("mount@$block:$dev")
254f869082aSDariusz Stojaczyk			fi
255602b134fSMichal Berger		done < <(lsblk -l -n -o NAME,MOUNTPOINT "/dev/$block")
2569af7c30eSMichal Berger		if ((${#used[@]} == 0)); then
2579af7c30eSMichal Berger			# Make sure we check if there's any valid data present on the target device
2589af7c30eSMichal Berger			# regardless if it's being actively used or not. This is mainly done to make
2599af7c30eSMichal Berger			# sure we don't miss more complex setups like ZFS pools, etc.
2609af7c30eSMichal Berger			if block_in_use "$block" > /dev/null; then
2619af7c30eSMichal Berger				used+=("data@$block")
2629af7c30eSMichal Berger			fi
2639af7c30eSMichal Berger		fi
264f869082aSDariusz Stojaczyk	done
2659af7c30eSMichal Berger
2669af7c30eSMichal Berger	if ((${#used[@]} > 0)); then
2679af7c30eSMichal Berger		printf '%s\n' "${used[@]}"
2689af7c30eSMichal Berger	fi
269f869082aSDariusz Stojaczyk}
270f869082aSDariusz Stojaczyk
27151b5fa85SMichal Bergerfunction collect_devices() {
2724f8177b5SMichal Berger	local mode=$1 in_use
27351b5fa85SMichal Berger
2747014f640SMichal Berger	map_supported_devices "$DEV_TYPE"
27551b5fa85SMichal Berger
2764f8177b5SMichal Berger	for bdf in "${!all_devices_d[@]}"; do
2775ea54946SMichal Berger		in_use=0
2784f8177b5SMichal Berger		if [[ $mode != status ]]; then
279b9ba32aaSMichal Berger			if ! pci_can_use "$bdf"; then
280b9ba32aaSMichal Berger				pci_dev_echo "$bdf" "Skipping denied controller at $bdf"
2815ea54946SMichal Berger				in_use=1
2825ea54946SMichal Berger			fi
2834f8177b5SMichal Berger		fi
2844f8177b5SMichal Berger		if [[ -n ${nvme_d["$bdf"]} || -n ${virtio_d["$bdf"]} ]]; then
2859af7c30eSMichal Berger			if ! verify_bdf_block_devs "$bdf"; then
2865ea54946SMichal Berger				in_use=1
2875ea54946SMichal Berger			fi
2885ea54946SMichal Berger		fi
2894f8177b5SMichal Berger		if [[ -n ${vmd_d["$bdf"]} ]]; then
290a1280c98SJim Harris			if [[ $PCI_ALLOWED != *"$bdf"* ]]; then
291b9ba32aaSMichal Berger				pci_dev_echo "$bdf" "Skipping not allowed VMD controller at $bdf"
292b9ba32aaSMichal Berger				in_use=1
2934f8177b5SMichal Berger			elif ((vmd_nvme_count["$bdf"] > 0)) && [[ $DRIVER_OVERRLDE != none && $mode == config ]]; then
294b0aba3fcSSamir Raval				cat <<- MESSAGE
2954f8177b5SMichal Berger					Binding new driver to VMD device with NVMe SSDs attached to the kernel:
2964f8177b5SMichal Berger					  ${!vmd_nvme_d["$bdf"]}
2974f8177b5SMichal Berger					The binding process may go faster if you first run this script with
2984f8177b5SMichal Berger					DRIVER_OVERRIDE="none" to unbind only the NVMe SSDs, and then run
2994f8177b5SMichal Berger					again to unbind the VMD devices.
300b0aba3fcSSamir Raval				MESSAGE
301b0aba3fcSSamir Raval			fi
302b0aba3fcSSamir Raval		fi
303e70594d4SSlawomir Ptak		if [[ -n ${dsa_d["$bdf"]} ]] && [[ $PCI_ALLOWED != *"$bdf"* ]]; then
304e70594d4SSlawomir Ptak			pci_dev_echo "$bdf" "Skipping not allowed DSA controller at $bdf"
305e70594d4SSlawomir Ptak			in_use=1
306e70594d4SSlawomir Ptak		fi
307e70594d4SSlawomir Ptak		if [[ -n ${iaa_d["$bdf"]} ]] && [[ $PCI_ALLOWED != *"$bdf"* ]]; then
308e70594d4SSlawomir Ptak			pci_dev_echo "$bdf" "Skipping not allowed IAA controller at $bdf"
309e70594d4SSlawomir Ptak			in_use=1
310e70594d4SSlawomir Ptak		fi
3114f8177b5SMichal Berger		# Update in-use for each bdf. Default from the map_supported_devices() is 0 == "not used"
3124f8177b5SMichal Berger		local -n type_ref=${all_devices_type_d["$bdf"]}_d
3134f8177b5SMichal Berger		type_ref["$bdf"]=$in_use
3145ea54946SMichal Berger		all_devices_d["$bdf"]=$in_use
3154c01eb58SMichal Berger	done
3164c01eb58SMichal Berger
3174c01eb58SMichal Berger	# Check if we got any nvmes attached to VMDs sharing the same iommu_group - if there are
3184c01eb58SMichal Berger	# any skip them since they won't be usable by SPDK without moving the entire VMD ctrl
3194c01eb58SMichal Berger	# away from the kernel first. That said, allow to touch the nvmes in case user requested
3204c01eb58SMichal Berger	# all devices to be unbound from any driver or if dedicated override flag was set.
3214c01eb58SMichal Berger	[[ -z $ALLOW_NVME_BEHIND_VMD && $DRIVER_OVERRIDE != none ]] || return 0
3224c01eb58SMichal Berger
3234c01eb58SMichal Berger	for bdf in "${!nvme_d[@]}"; do
3244c01eb58SMichal Berger		is_nvme_iommu_shared_with_vmd "$bdf" || continue
3254c01eb58SMichal Berger		nvme_d["$bdf"]=1 all_devices_d["$bdf"]=1
3264c01eb58SMichal Berger		pci_dev_echo "$bdf" "Skipping nvme behind VMD (${nvme_vmd_d["$bdf"]})"
3274c01eb58SMichal Berger	done
3284c01eb58SMichal Berger
3292635e73dSMichal Berger	get_unsupported_nic_uio_hw
3302635e73dSMichal Berger
3314c01eb58SMichal Berger	return 0
33251b5fa85SMichal Berger}
33351b5fa85SMichal Berger
33485501619SMichal Bergerfunction collect_driver() {
33585501619SMichal Berger	local bdf=$1
33685501619SMichal Berger	local drivers driver
33785501619SMichal Berger
338eb8655b2SMichal Berger	if [[ -e /sys/bus/pci/devices/$bdf/modalias ]] \
339eb8655b2SMichal Berger		&& drivers=($(modprobe -R "$(< "/sys/bus/pci/devices/$bdf/modalias")")); then
34085501619SMichal Berger		# Pick first entry in case multiple aliases are bound to a driver.
34185501619SMichal Berger		driver=$(readlink -f "/sys/module/${drivers[0]}/drivers/pci:"*)
34285501619SMichal Berger		driver=${driver##*/}
34385501619SMichal Berger	else
344203fd7ffSMichal Berger		[[ -n ${nvme_d["$bdf"]} ]] && driver=nvme
345203fd7ffSMichal Berger		[[ -n ${ioat_d["$bdf"]} ]] && driver=ioatdma
346b711a565SMichal Berger		[[ -n ${dsa_d["$bdf"]} ]] && driver=idxd
347b711a565SMichal Berger		[[ -n ${iaa_d["$bdf"]} ]] && driver=idxd
348203fd7ffSMichal Berger		[[ -n ${virtio_d["$bdf"]} ]] && driver=virtio-pci
349203fd7ffSMichal Berger		[[ -n ${vmd_d["$bdf"]} ]] && driver=vmd
35085501619SMichal Berger	fi 2> /dev/null
35185501619SMichal Berger	echo "$driver"
35285501619SMichal Berger}
35385501619SMichal Berger
3549af7c30eSMichal Bergerfunction verify_bdf_block_devs() {
3555ea54946SMichal Berger	local bdf=$1
35606058e9bSMichal Berger	local blknames
3579af7c30eSMichal Berger	blknames=($(get_used_bdf_block_devs "$bdf")) || return 1
3585ea54946SMichal Berger
3595ea54946SMichal Berger	if ((${#blknames[@]} > 0)); then
360602b134fSMichal Berger		local IFS=","
3619af7c30eSMichal Berger		pci_dev_echo "$bdf" "Active devices: ${blknames[*]}, so not binding PCI dev"
3625ea54946SMichal Berger		return 1
3635ea54946SMichal Berger	fi
3645ea54946SMichal Berger}
3655ea54946SMichal Berger
366844c8ec3SMichal Bergerfunction configure_linux_pci() {
367c8bcedf4SSeth Howell	local driver_path=""
368c8bcedf4SSeth Howell	driver_name=""
369e5a08642SMichal Berger	igb_uio_fallback=""
370e5a08642SMichal Berger
371e5a08642SMichal Berger	if [[ -r "$rootdir/dpdk/build-tmp/kernel/linux/igb_uio/igb_uio.ko" ]]; then
372fcc35c86SMichal Berger		# igb_uio is a common driver to override with and it depends on uio.
373fcc35c86SMichal Berger		modprobe uio || true
374fcc35c86SMichal Berger		if ! check_for_driver igb_uio || insmod "$rootdir/dpdk/build-tmp/kernel/linux/igb_uio/igb_uio.ko"; then
375fcc35c86SMichal Berger			igb_uio_fallback="$rootdir/dpdk/build-tmp/kernel/linux/igb_uio/igb_uio.ko"
376fcc35c86SMichal Berger		fi
377e5a08642SMichal Berger	fi
378e5a08642SMichal Berger
379b0aba3fcSSamir Raval	if [[ "${DRIVER_OVERRIDE}" == "none" ]]; then
380b0aba3fcSSamir Raval		driver_name=none
381b0aba3fcSSamir Raval	elif [[ -n "${DRIVER_OVERRIDE}" ]]; then
382615b6849SSeth Howell		driver_path="$DRIVER_OVERRIDE"
383c8bcedf4SSeth Howell		driver_name="${DRIVER_OVERRIDE##*/}"
384615b6849SSeth Howell		# modprobe and the sysfs don't use the .ko suffix.
385615b6849SSeth Howell		driver_name=${driver_name%.ko}
386c8bcedf4SSeth Howell		# path = name -> there is no path
387c8bcedf4SSeth Howell		if [[ "$driver_path" = "$driver_name" ]]; then
388c8bcedf4SSeth Howell			driver_path=""
389c8bcedf4SSeth Howell		fi
3903ac0a6edSMichal Berger	elif is_iommu_enabled; then
39155dc5f21SBen Walker		driver_name=vfio-pci
392cac9322dSMichal Berger		# Just in case, attempt to load VFIO_IOMMU_TYPE1 module into the kernel - this
393cac9322dSMichal Berger		# should be done automatically by modprobe since this particular module should
394cac9322dSMichal Berger		# be a part of vfio-pci dependencies, however, on some distros, it seems that
395cac9322dSMichal Berger		# it's not the case. See #1689.
396cac9322dSMichal Berger		if modinfo vfio_iommu_type1 > /dev/null; then
397cac9322dSMichal Berger			modprobe vfio_iommu_type1
398cac9322dSMichal Berger		fi
3999801533fSMichal Berger	elif ! check_for_driver uio_pci_generic || modinfo uio_pci_generic > /dev/null 2>&1; then
40055dc5f21SBen Walker		driver_name=uio_pci_generic
401e5a08642SMichal Berger	elif [[ -e $igb_uio_fallback ]]; then
402fcc35c86SMichal Berger		driver_path="$igb_uio_fallback"
403c8bcedf4SSeth Howell		driver_name="igb_uio"
404c8bcedf4SSeth Howell		echo "WARNING: uio_pci_generic not detected - using $driver_name"
405e93d56b1Stone.zhang	else
406aa22321aSTomasz Zawadzki		echo "No valid drivers found [vfio-pci, uio_pci_generic, igb_uio]. Please enable one of the kernel modules."
407c8bcedf4SSeth Howell		return 1
408c8bcedf4SSeth Howell	fi
409c8bcedf4SSeth Howell
410c8bcedf4SSeth Howell	# modprobe assumes the directory of the module. If the user passes in a path, we should use insmod
411b0aba3fcSSamir Raval	if [[ $driver_name != "none" ]]; then
412c8bcedf4SSeth Howell		if [[ -n "$driver_path" ]]; then
413c8bcedf4SSeth Howell			insmod $driver_path || true
414c8bcedf4SSeth Howell		else
415c8bcedf4SSeth Howell			modprobe $driver_name
416e93d56b1Stone.zhang		fi
417b0aba3fcSSamir Raval	fi
41855dc5f21SBen Walker
4195ea54946SMichal Berger	for bdf in "${!all_devices_d[@]}"; do
4205ea54946SMichal Berger		if ((all_devices_d["$bdf"] == 0)); then
421904ac49fSMichal Berger			if [[ -n ${nvme_d["$bdf"]} ]]; then
422904ac49fSMichal Berger				# Some nvme controllers may take significant amount of time while being
423904ac49fSMichal Berger				# unbound from the driver. Put that task into background to speed up the
424904ac49fSMichal Berger				# whole process. Currently this is done only for the devices bound to the
425904ac49fSMichal Berger				# nvme driver as other, i.e., ioatdma's, trigger a kernel BUG when being
426904ac49fSMichal Berger				# unbound in parallel. See https://bugzilla.kernel.org/show_bug.cgi?id=209041.
427904ac49fSMichal Berger				linux_bind_driver "$bdf" "$driver_name" &
428904ac49fSMichal Berger			else
42921173cd0SDaniel Verkamp				linux_bind_driver "$bdf" "$driver_name"
4301a15ce9bSJim Harris			fi
431904ac49fSMichal Berger		fi
43255dc5f21SBen Walker	done
433904ac49fSMichal Berger	wait
43455dc5f21SBen Walker
43555dc5f21SBen Walker	echo "1" > "/sys/bus/pci/rescan"
43655ac2263SGangCao}
43755ac2263SGangCao
438844c8ec3SMichal Bergerfunction cleanup_linux() {
439af5b654dSMichal Berger	local dirs_to_clean=() files_to_clean=() opened_files=() file_locks=()
440af5b654dSMichal Berger	local match_spdk="spdk_tgt|iscsi|vhost|nvmf|rocksdb|bdevio|bdevperf|vhost_fuzz|nvme_fuzz|accel_perf|bdev_svc"
441af5b654dSMichal Berger
442af5b654dSMichal Berger	dirs_to_clean=({/var/run,/tmp}/dpdk/spdk{,_pid}+([0-9]))
443af5b654dSMichal Berger	if [[ -d $XDG_RUNTIME_DIR ]]; then
444af5b654dSMichal Berger		dirs_to_clean+=("$XDG_RUNTIME_DIR/dpdk/spdk"{,_pid}+([0-9]))
4451469679fSDariusz Stojaczyk	fi
4461469679fSDariusz Stojaczyk
447af5b654dSMichal Berger	for dir in "${dirs_to_clean[@]}"; do
448af5b654dSMichal Berger		files_to_clean+=("$dir/"*)
4491469679fSDariusz Stojaczyk	done
450637d9e60SMichal Berger	file_locks+=(/var/tmp/spdk_pci_lock*)
4510af934b3SKrzysztof Karas	file_locks+=(/var/tmp/spdk_cpu_lock*)
4521469679fSDariusz Stojaczyk
45371605a52SMichal Berger	files_to_clean+=(/dev/shm/@(@($match_spdk)_trace|spdk_iscsi_conns)*)
454af5b654dSMichal Berger	files_to_clean+=("${file_locks[@]}")
455e47f972dSPawel Wodkowski
456cb8174dcSMichal Berger	# This may fail in case path that readlink attempts to resolve suddenly
457cb8174dcSMichal Berger	# disappears (as it may happen with terminating processes).
458cb8174dcSMichal Berger	opened_files+=($(readlink -f /proc/+([0-9])/fd/+([0-9]))) || true
459e47f972dSPawel Wodkowski
460af5b654dSMichal Berger	if ((${#opened_files[@]} == 0)); then
461e47f972dSPawel Wodkowski		echo "Can't get list of opened files!"
462e47f972dSPawel Wodkowski		exit 1
463e47f972dSPawel Wodkowski	fi
464e47f972dSPawel Wodkowski
465e47f972dSPawel Wodkowski	echo 'Cleaning'
466af5b654dSMichal Berger	for f in "${files_to_clean[@]}"; do
467af5b654dSMichal Berger		[[ -e $f ]] || continue
468af5b654dSMichal Berger		if [[ ${opened_files[*]} != *"$f"* ]]; then
469e47f972dSPawel Wodkowski			echo "Removing:    $f"
470e47f972dSPawel Wodkowski			rm $f
471e47f972dSPawel Wodkowski		else
472e47f972dSPawel Wodkowski			echo "Still open: $f"
473e47f972dSPawel Wodkowski		fi
474e47f972dSPawel Wodkowski	done
4751469679fSDariusz Stojaczyk
476af5b654dSMichal Berger	for dir in "${dirs_to_clean[@]}"; do
477af5b654dSMichal Berger		[[ -d $dir ]] || continue
478af5b654dSMichal Berger		if [[ ${opened_files[*]} != *"$dir"* ]]; then
4791469679fSDariusz Stojaczyk			echo "Removing:    $dir"
4801469679fSDariusz Stojaczyk			rmdir $dir
4811469679fSDariusz Stojaczyk		else
4821469679fSDariusz Stojaczyk			echo "Still open: $dir"
4831469679fSDariusz Stojaczyk		fi
4841469679fSDariusz Stojaczyk	done
485e47f972dSPawel Wodkowski	echo "Clean"
486e47f972dSPawel Wodkowski}
487e47f972dSPawel Wodkowski
4882b80955cSMichal Bergercheck_hugepages_alloc() {
4892b80955cSMichal Berger	local hp_int=$1
4902b80955cSMichal Berger	local allocated_hugepages
4912b80955cSMichal Berger
49228bfb876SMichal Berger	allocated_hugepages=$(< "$hp_int")
49328bfb876SMichal Berger
49428bfb876SMichal Berger	if ((NRHUGE <= allocated_hugepages)) && [[ $SHRINK_HUGE != yes ]]; then
49528bfb876SMichal Berger		echo "INFO: Requested $NRHUGE hugepages but $allocated_hugepages already allocated ${2:+on node$2}"
49628bfb876SMichal Berger		return 0
49728bfb876SMichal Berger	fi
49828bfb876SMichal Berger
4992b80955cSMichal Berger	echo $((NRHUGE < 0 ? 0 : NRHUGE)) > "$hp_int"
5002b80955cSMichal Berger
5012b80955cSMichal Berger	allocated_hugepages=$(< "$hp_int")
5022b80955cSMichal Berger	if ((allocated_hugepages < NRHUGE)); then
5032b80955cSMichal Berger		cat <<- ERROR
5042b80955cSMichal Berger
5052b80955cSMichal Berger			## ERROR: requested $NRHUGE hugepages but $allocated_hugepages could be allocated ${2:+on node$2}.
5062b80955cSMichal Berger			## Memory might be heavily fragmented. Please try flushing the system cache, or reboot the machine.
5072b80955cSMichal Berger		ERROR
5082b80955cSMichal Berger		return 1
5092b80955cSMichal Berger	fi
5102b80955cSMichal Berger}
5112b80955cSMichal Berger
5122b80955cSMichal Bergerclear_hugepages() { echo 0 > /proc/sys/vm/nr_hugepages; }
5132b80955cSMichal Berger
5142b80955cSMichal Bergerconfigure_linux_hugepages() {
515bcf9f8ffSMichal Berger	local node system_nodes
516bcf9f8ffSMichal Berger	local nodes_to_use nodes_hp
5172b80955cSMichal Berger
5182b80955cSMichal Berger	if [[ $CLEAR_HUGE == yes ]]; then
5192b80955cSMichal Berger		clear_hugepages
5202b80955cSMichal Berger	fi
5212b80955cSMichal Berger
522*0070858eSMichal Berger	if [[ -z $HUGENODE ]]; then
5232b80955cSMichal Berger		check_hugepages_alloc /proc/sys/vm/nr_hugepages
5242b80955cSMichal Berger		return 0
5252b80955cSMichal Berger	fi
5262b80955cSMichal Berger
5272b80955cSMichal Berger	for node in /sys/devices/system/node/node*; do
5282b80955cSMichal Berger		[[ -e $node ]] || continue
5292b80955cSMichal Berger		nodes[${node##*node}]=$node/hugepages/hugepages-${HUGEPGSZ}kB/nr_hugepages
5302b80955cSMichal Berger	done
5312b80955cSMichal Berger
53285395e28SMichal Berger	if ((${#nodes[@]} == 0)); then
53385395e28SMichal Berger		# No NUMA support? Fallback to common interface
53485395e28SMichal Berger		check_hugepages_alloc /proc/sys/vm/nr_hugepages
53585395e28SMichal Berger		return 0
53685395e28SMichal Berger	fi
53785395e28SMichal Berger
5382b80955cSMichal Berger	IFS="," read -ra nodes_to_use <<< "$HUGENODE"
5392b80955cSMichal Berger	if ((${#nodes_to_use[@]} == 0)); then
540bcf9f8ffSMichal Berger		nodes_to_use[0]=0
5412b80955cSMichal Berger	fi
5422b80955cSMichal Berger
543bcf9f8ffSMichal Berger	# Align indexes with node ids
544bcf9f8ffSMichal Berger	for node in "${!nodes_to_use[@]}"; do
545bcf9f8ffSMichal Berger		if [[ ${nodes_to_use[node]} =~ ^nodes_hp\[[0-9]+\]= ]]; then
546bcf9f8ffSMichal Berger			eval "${nodes_to_use[node]}"
547bcf9f8ffSMichal Berger		elif [[ ${nodes_to_use[node]} =~ ^[0-9]+$ ]]; then
548bcf9f8ffSMichal Berger			nodes_hp[nodes_to_use[node]]=$NRHUGE
549bcf9f8ffSMichal Berger		fi
550bcf9f8ffSMichal Berger	done
551bcf9f8ffSMichal Berger
552bcf9f8ffSMichal Berger	for node in "${!nodes_hp[@]}"; do
5532b80955cSMichal Berger		if [[ -z ${nodes[node]} ]]; then
5542b80955cSMichal Berger			echo "Node $node doesn't exist, ignoring" >&2
5552b80955cSMichal Berger			continue
5562b80955cSMichal Berger		fi
557bcf9f8ffSMichal Berger		NRHUGE=${nodes_hp[node]:-$NRHUGE} check_hugepages_alloc "${nodes[node]}" "$node"
5582b80955cSMichal Berger	done
5592b80955cSMichal Berger}
5602b80955cSMichal Berger
561844c8ec3SMichal Bergerfunction configure_linux() {
56255ac2263SGangCao	configure_linux_pci
563f8c1c71cSDariusz Stojaczyk	hugetlbfs_mounts=$(linux_hugetlbfs_mounts)
5649603193fSDaniel Verkamp
565f8c1c71cSDariusz Stojaczyk	if [ -z "$hugetlbfs_mounts" ]; then
566f8c1c71cSDariusz Stojaczyk		hugetlbfs_mounts=/mnt/huge
567f8c1c71cSDariusz Stojaczyk		echo "Mounting hugetlbfs at $hugetlbfs_mounts"
568f8c1c71cSDariusz Stojaczyk		mkdir -p "$hugetlbfs_mounts"
569f8c1c71cSDariusz Stojaczyk		mount -t hugetlbfs nodev "$hugetlbfs_mounts"
570c83f9378SDaniel Verkamp	fi
5717ef370dcSDariusz Stojaczyk
5722b80955cSMichal Berger	configure_linux_hugepages
5737b25f04cSCunyin Chang
5747b25f04cSCunyin Chang	if [ "$driver_name" = "vfio-pci" ]; then
5754b428979SDariusz Stojaczyk		if [ -n "$TARGET_USER" ]; then
576f8c1c71cSDariusz Stojaczyk			for mount in $hugetlbfs_mounts; do
577f8c1c71cSDariusz Stojaczyk				chown "$TARGET_USER" "$mount"
578f8c1c71cSDariusz Stojaczyk				chmod g+w "$mount"
579f8c1c71cSDariusz Stojaczyk			done
5807b25f04cSCunyin Chang
5812e55b97dSTomasz Zawadzki			MEMLOCK_AMNT=$(su "$TARGET_USER" -c "ulimit -l")
5822e55b97dSTomasz Zawadzki			if [[ $MEMLOCK_AMNT != "unlimited" ]]; then
5839a4a87b5SMaciej Wawryk				MEMLOCK_MB=$((MEMLOCK_AMNT / 1024))
5842e55b97dSTomasz Zawadzki				cat <<- MEMLOCK
5852e55b97dSTomasz Zawadzki					"$TARGET_USER" user memlock limit: $MEMLOCK_MB MB
5867b25f04cSCunyin Chang
5872e55b97dSTomasz Zawadzki					This is the maximum amount of memory you will be
5882e55b97dSTomasz Zawadzki					able to use with DPDK and VFIO if run as user "$TARGET_USER".
5892e55b97dSTomasz Zawadzki					To change this, please adjust limits.conf memlock limit for user "$TARGET_USER".
5902e55b97dSTomasz Zawadzki				MEMLOCK
5912e55b97dSTomasz Zawadzki				if ((MEMLOCK_AMNT < 65536)); then
5927b25f04cSCunyin Chang					echo ""
5937b25f04cSCunyin Chang					echo "## WARNING: memlock limit is less than 64MB"
5947b25f04cSCunyin Chang					echo -n "## DPDK with VFIO may not be able to initialize "
5952e55b97dSTomasz Zawadzki					echo "if run as user \"$TARGET_USER\"."
5962e55b97dSTomasz Zawadzki				fi
5977b25f04cSCunyin Chang			fi
5987b25f04cSCunyin Chang		fi
5997b25f04cSCunyin Chang	fi
600f09be44eSJim Harris
601686dcd88SGal Hammer	if [ $(uname -i) == "x86_64" ] && [ ! -e /dev/cpu/0/msr ]; then
602f09be44eSJim Harris		# Some distros build msr as a module.  Make sure it's loaded to ensure
603f09be44eSJim Harris		#  DPDK can easily figure out the TSC rate rather than relying on 100ms
604f09be44eSJim Harris		#  sleeps.
60595c589e6SJim Harris		modprobe msr &> /dev/null || true
606f09be44eSJim Harris	fi
60755dc5f21SBen Walker}
60855dc5f21SBen Walker
609844c8ec3SMichal Bergerfunction reset_linux_pci() {
61017d55c9fSDariusz Stojaczyk	# virtio
61110283728SJim Harris	# TODO: check if virtio-pci is loaded first and just unbind if it is not loaded
61210283728SJim Harris	# Requires some more investigation - for example, some kernels do not seem to have
61310283728SJim Harris	#  virtio-pci but just virtio_scsi instead.  Also need to make sure we get the
61410283728SJim Harris	#  underscore vs. dash right in the virtio_scsi name.
615c2175d2cSJim Harris	modprobe virtio-pci || true
6165ea54946SMichal Berger	for bdf in "${!all_devices_d[@]}"; do
6175ea54946SMichal Berger		((all_devices_d["$bdf"] == 0)) || continue
6185ea54946SMichal Berger
619203fd7ffSMichal Berger		driver=$(collect_driver "$bdf")
620eb8655b2SMichal Berger		if [[ -n $driver ]] && ! check_for_driver "$driver"; then
62185501619SMichal Berger			linux_bind_driver "$bdf" "$driver"
6225ea54946SMichal Berger		else
6235ea54946SMichal Berger			linux_unbind_driver "$bdf"
624a6edaa96SWojciech Malikowski		fi
625a6edaa96SWojciech Malikowski	done
626a6edaa96SWojciech Malikowski
62755dc5f21SBen Walker	echo "1" > "/sys/bus/pci/rescan"
62855ac2263SGangCao}
62955ac2263SGangCao
630844c8ec3SMichal Bergerfunction reset_linux() {
63155ac2263SGangCao	reset_linux_pci
632f8c1c71cSDariusz Stojaczyk	for mount in $(linux_hugetlbfs_mounts); do
633c0648d4bSMichal Berger		for hp in "$mount"/spdk*map_*; do
634c0648d4bSMichal Berger			flock -n "$hp" true && rm -f "$hp"
635c0648d4bSMichal Berger		done
636f8c1c71cSDariusz Stojaczyk	done
6375bdb2886SSeth Howell	rm -f /run/.spdk*
63855dc5f21SBen Walker}
63955dc5f21SBen Walker
640844c8ec3SMichal Bergerfunction status_linux() {
64107e251efSMichal Berger	echo "Hugepages" >&2
64207e251efSMichal Berger	printf "%-6s %10s %8s / %6s\n" "node" "hugesize" "free" "total" >&2
64355241750SDariusz Stojaczyk
64455241750SDariusz Stojaczyk	numa_nodes=0
6455853749bSMichal Berger	for path in /sys/devices/system/node/node*/hugepages/hugepages-*/; do
64655241750SDariusz Stojaczyk		numa_nodes=$((numa_nodes + 1))
647cf090c6cSKarol Latecki		free_pages=$(cat $path/free_hugepages)
648cf090c6cSKarol Latecki		all_pages=$(cat $path/nr_hugepages)
64955241750SDariusz Stojaczyk
65055241750SDariusz Stojaczyk		[[ $path =~ (node[0-9]+)/hugepages/hugepages-([0-9]+kB) ]]
65155241750SDariusz Stojaczyk
65255241750SDariusz Stojaczyk		node=${BASH_REMATCH[1]}
65355241750SDariusz Stojaczyk		huge_size=${BASH_REMATCH[2]}
65455241750SDariusz Stojaczyk
65555241750SDariusz Stojaczyk		printf "%-6s %10s %8s / %6s\n" $node $huge_size $free_pages $all_pages
65655241750SDariusz Stojaczyk	done
65755241750SDariusz Stojaczyk
65855241750SDariusz Stojaczyk	# fall back to system-wide hugepages
65955241750SDariusz Stojaczyk	if [ "$numa_nodes" = "0" ]; then
660cf090c6cSKarol Latecki		free_pages=$(grep HugePages_Free /proc/meminfo | awk '{ print $2 }')
661cf090c6cSKarol Latecki		all_pages=$(grep HugePages_Total /proc/meminfo | awk '{ print $2 }')
66255241750SDariusz Stojaczyk		node="-"
66355241750SDariusz Stojaczyk		huge_size="$HUGEPGSZ"
66455241750SDariusz Stojaczyk
66555241750SDariusz Stojaczyk		printf "%-6s %10s %8s / %6s\n" $node $huge_size $free_pages $all_pages
66655241750SDariusz Stojaczyk	fi
66755241750SDariusz Stojaczyk
6684c01eb58SMichal Berger	printf '\n%-25s %-15s %-6s %-6s %-7s %-16s %-10s %s\n' \
66907e251efSMichal Berger		"Type" "BDF" "Vendor" "Device" "NUMA" "Driver" "Device" "Block devices" >&2
6705c42f218SPiotr Pelplinski
6718c176db2SMichal Berger	sorted_bdfs=($(printf '%s\n' "${!all_devices_d[@]}" | sort))
6728c176db2SMichal Berger
6738c176db2SMichal Berger	for bdf in "${sorted_bdfs[@]}"; do
6744f8177b5SMichal Berger		driver=${pci_bus_driver["$bdf"]}
67530bfdc9cSQingmin Liu		if [ "$numa_nodes" = "0" ]; then
67630bfdc9cSQingmin Liu			node="-"
67730bfdc9cSQingmin Liu		else
678768cc8eeSPawel Wodkowski			node=$(cat /sys/bus/pci/devices/$bdf/numa_node)
67915f52aecSMichal Berger			if ((node == -1)); then
68015f52aecSMichal Berger				node=unknown
68115f52aecSMichal Berger			fi
68230bfdc9cSQingmin Liu		fi
68379e56681SMaciej Wawryk		if [ "$driver" = "nvme" ] && [ -d /sys/bus/pci/devices/$bdf/nvme ]; then
6847f343675SMichal Berger			name=$(ls /sys/bus/pci/devices/$bdf/nvme)
6855c42f218SPiotr Pelplinski		else
686844c8ec3SMichal Berger			name="-"
6875c42f218SPiotr Pelplinski		fi
6885c42f218SPiotr Pelplinski
6897f343675SMichal Berger		if [[ -n ${nvme_d["$bdf"]} || -n ${virtio_d["$bdf"]} ]]; then
6907f343675SMichal Berger			blknames=($(get_block_dev_from_bdf "$bdf"))
69130bfdc9cSQingmin Liu		else
6927f343675SMichal Berger			blknames=("-")
69315f52aecSMichal Berger		fi
694c2175d2cSJim Harris
6957f343675SMichal Berger		desc=""
6964c01eb58SMichal Berger		desc=${desc:-${nvme_d["$bdf"]:+NVMe${nvme_vmd_d["$bdf"]:+@${nvme_vmd_d["$bdf"]}(VMD)}}}
6977f343675SMichal Berger		desc=${desc:-${ioat_d["$bdf"]:+I/OAT}}
69846ac1b8dSpaul luse		desc=${desc:-${dsa_d["$bdf"]:+DSA}}
6999c55555cSpaul luse		desc=${desc:-${iaa_d["$bdf"]:+IAA}}
7007f343675SMichal Berger		desc=${desc:-${virtio_d["$bdf"]:+virtio}}
7017f343675SMichal Berger		desc=${desc:-${vmd_d["$bdf"]:+VMD}}
7023429f97aSpaul luse
7034c01eb58SMichal Berger		printf '%-25s %-15s %-6s %-6s %-7s %-16s %-10s %s\n' \
7047f343675SMichal Berger			"$desc" "$bdf" "${pci_ids_vendor["$bdf"]#0x}" "${pci_ids_device["$bdf"]#0x}" \
7057f343675SMichal Berger			"$node" "${driver:--}" "${name:-}" "${blknames[*]:--}"
7068c176db2SMichal Berger	done
7075c42f218SPiotr Pelplinski}
7085c42f218SPiotr Pelplinski
709d51345c0SMichal Bergerfunction status_freebsd() {
7105ea54946SMichal Berger	local pci
711d51345c0SMichal Berger
712d51345c0SMichal Berger	status_print() (
7137a4a97dfSMichal Berger		local type=$1
714d51345c0SMichal Berger		local dev driver
715d51345c0SMichal Berger
7167a4a97dfSMichal Berger		shift
717d51345c0SMichal Berger
7185ea54946SMichal Berger		for pci; do
7197a4a97dfSMichal Berger			printf '%-8s %-15s %-6s %-6s %-16s\n' \
7207a4a97dfSMichal Berger				"$type" \
721d51345c0SMichal Berger				"$pci" \
722d51345c0SMichal Berger				"${pci_ids_vendor["$pci"]}" \
723d51345c0SMichal Berger				"${pci_ids_device["$pci"]}" \
7247201d0e6SMichal Berger				"${pci_bus_driver["$pci"]}"
7257a4a97dfSMichal Berger		done | sort -k2,2
726d51345c0SMichal Berger	)
727d51345c0SMichal Berger
728d51345c0SMichal Berger	local contigmem=present
729c90dac7aSMichal Berger	local contigmem_buffer_size
730c90dac7aSMichal Berger	local contigmem_num_buffers
731c90dac7aSMichal Berger
732d51345c0SMichal Berger	if ! kldstat -q -m contigmem; then
733d51345c0SMichal Berger		contigmem="not present"
734d51345c0SMichal Berger	fi
735c90dac7aSMichal Berger	if ! contigmem_buffer_size=$(kenv hw.contigmem.buffer_size 2> /dev/null); then
736c90dac7aSMichal Berger		contigmem_buffer_size="not set"
737c90dac7aSMichal Berger	fi
738c90dac7aSMichal Berger	if ! contigmem_num_buffers=$(kenv hw.contigmem.num_buffers 2> /dev/null); then
739c90dac7aSMichal Berger		contigmem_num_buffers="not set"
740c90dac7aSMichal Berger	fi
741d51345c0SMichal Berger
742d51345c0SMichal Berger	cat <<- BSD_INFO
743d51345c0SMichal Berger		Contigmem ($contigmem)
744c90dac7aSMichal Berger		Buffer Size: $contigmem_buffer_size
745c90dac7aSMichal Berger		Num Buffers: $contigmem_num_buffers
746d51345c0SMichal Berger
747d51345c0SMichal Berger	BSD_INFO
7487a4a97dfSMichal Berger
7497a4a97dfSMichal Berger	printf '\n%-8s %-15s %-6s %-6s %-16s\n' \
7507a4a97dfSMichal Berger		"Type" "BDF" "Vendor" "Device" "Driver" >&2
7517a4a97dfSMichal Berger
7527a4a97dfSMichal Berger	status_print "NVMe" "${!nvme_d[@]}"
7537a4a97dfSMichal Berger	status_print "I/OAT" "${!ioat_d[@]}"
7547a4a97dfSMichal Berger	status_print "DSA" "${!dsa_d[@]}"
7557a4a97dfSMichal Berger	status_print "IAA" "${!iaa_d[@]}"
7567a4a97dfSMichal Berger	status_print "VMD" "${!vmd_d[@]}"
757d51345c0SMichal Berger}
758d51345c0SMichal Berger
759844c8ec3SMichal Bergerfunction configure_freebsd_pci() {
76036e573fcSMichal Berger	local BDFS
7616b1e4e73SBen Walker
7622635e73dSMichal Berger	BDFS+=("$@")
763a6edaa96SWojciech Malikowski
7642635e73dSMichal Berger	if ((${#unsupported_nic_uio_hw[@]} > 0)) && [[ $FORCE_NIC_UIO_REBIND != yes ]]; then
7652635e73dSMichal Berger		warn_unsupported_nic_uio_hw
7662635e73dSMichal Berger		return 1
7672635e73dSMichal Berger	fi
7682635e73dSMichal Berger
7692635e73dSMichal Berger	BDFS+=("${unsupported_nic_uio_hw[@]}")
7702635e73dSMichal Berger
7712635e73dSMichal Berger	if kldstat -n nic_uio &> /dev/null; then
7722635e73dSMichal Berger		kldunload nic_uio.ko
7732635e73dSMichal Berger	fi
7746b1e4e73SBen Walker
77536e573fcSMichal Berger	local IFS=","
77636e573fcSMichal Berger	kenv hw.nic_uio.bdfs="${BDFS[*]}"
77755dc5f21SBen Walker	kldload nic_uio.ko
77855ac2263SGangCao}
77955ac2263SGangCao
7802635e73dSMichal Bergerfunction get_unsupported_nic_uio_hw() {
7812635e73dSMichal Berger	local bdfs bdf all_devices
7822635e73dSMichal Berger	local -g unsupported_nic_uio_hw
7832635e73dSMichal Berger
7842635e73dSMichal Berger	IFS="," read -ra bdfs < <(kenv hw.nic_uio.bdfs 2> /dev/null) || return 0
7852635e73dSMichal Berger
7862635e73dSMichal Berger	for bdf in "${bdfs[@]}"; do
7872635e73dSMichal Berger		grep -q "$bdf" <(printf '%s\n' "${!all_devices_d[@]}") || unsupported_nic_uio_hw+=("$bdf")
7882635e73dSMichal Berger	done
7892635e73dSMichal Berger
7902635e73dSMichal Berger	return 0
7912635e73dSMichal Berger}
7922635e73dSMichal Berger
7932635e73dSMichal Bergerfunction warn_unsupported_nic_uio_hw() {
7942635e73dSMichal Berger	cat <<- NIC_UIO
7952635e73dSMichal Berger
7962635e73dSMichal Berger		WARNING: Unsupported devices detected in the nic_uio setup:
7972635e73dSMichal Berger
7982635e73dSMichal Berger		$(printf '  %s\n' "${unsupported_nic_uio_hw[@]}")
7992635e73dSMichal Berger
8002635e73dSMichal Berger		Remove them first or pass FORCE_NIC_UIO_REBIND=yes through the environment.
8012635e73dSMichal Berger
8022635e73dSMichal Berger	NIC_UIO
8032635e73dSMichal Berger}
8042635e73dSMichal Berger
805844c8ec3SMichal Bergerfunction configure_freebsd() {
8062635e73dSMichal Berger	_configure_freebsd "${!nvme_d[@]}" "${!ioat_d[@]}" "${!dsa_d[@]}" "${!iaa_d[@]}" "${!vmd_d[@]}"
8072635e73dSMichal Berger}
8082635e73dSMichal Berger
8092635e73dSMichal Bergerfunction _configure_freebsd() {
81094067e8bSMichal Berger	if ! check_for_driver_freebsd; then
81194067e8bSMichal Berger		echo "DPDK drivers (contigmem and/or nic_uio) are missing, aborting" >&2
81294067e8bSMichal Berger		return 1
81394067e8bSMichal Berger	fi
8142635e73dSMichal Berger	configure_freebsd_pci "$@"
8158021da8bSJim Harris	# If contigmem is already loaded but the HUGEMEM specified doesn't match the
8168021da8bSJim Harris	#  previous value, unload contigmem so that we can reload with the new value.
8178021da8bSJim Harris	if kldstat -q -m contigmem; then
8181431ea02SMichal Berger		# contigmem may be loaded, but the kernel environment doesn't have to
8191431ea02SMichal Berger		# be necessarily set at this point. If it isn't, kenv will fail to
8201431ea02SMichal Berger		# pick up the hw. options. Handle it.
8211431ea02SMichal Berger		if ! contigmem_num_buffers=$(kenv hw.contigmem.num_buffers); then
8221431ea02SMichal Berger			contigmem_num_buffers=-1
8231431ea02SMichal Berger		fi 2> /dev/null
8241431ea02SMichal Berger		if ((contigmem_num_buffers != HUGEMEM / 256)); then
8258021da8bSJim Harris			kldunload contigmem.ko
8268021da8bSJim Harris		fi
8278021da8bSJim Harris	fi
8288021da8bSJim Harris	if ! kldstat -q -m contigmem; then
829f062f797SHailiang Wang		kenv hw.contigmem.num_buffers=$((HUGEMEM / 256))
830b9f3538eSDaniel Verkamp		kenv hw.contigmem.buffer_size=$((256 * 1024 * 1024))
831c83f9378SDaniel Verkamp		kldload contigmem.ko
8328021da8bSJim Harris	fi
83355dc5f21SBen Walker}
83455dc5f21SBen Walker
835844c8ec3SMichal Bergerfunction reset_freebsd() {
8362635e73dSMichal Berger	# Don't reap the entire nic_uio setup in case there are unsupported devices in the kernel env
8372635e73dSMichal Berger	if ((${#unsupported_nic_uio_hw[@]} > 0)) && [[ $FORCE_NIC_UIO_REBIND != yes ]]; then
8382635e73dSMichal Berger		warn_unsupported_nic_uio_hw
8392635e73dSMichal Berger		return 1
8402635e73dSMichal Berger	fi
8412635e73dSMichal Berger
84255dc5f21SBen Walker	kldunload contigmem.ko || true
84355dc5f21SBen Walker	kldunload nic_uio.ko || true
8442635e73dSMichal Berger
8452635e73dSMichal Berger	if ((${#unsupported_nic_uio_hw[@]} > 0)); then
8462635e73dSMichal Berger		# HACK: try to be nice and recreate the setup but only with the unsupported devices
8472635e73dSMichal Berger		_unsupported_nic_uio_hw=("${unsupported_nic_uio_hw[@]}") unsupported_nic_uio_hw=()
8482635e73dSMichal Berger		_configure_freebsd "${_unsupported_nic_uio_hw[@]}"
8492635e73dSMichal Berger	fi
85055dc5f21SBen Walker}
85155dc5f21SBen Walker
8527a1bd398SMichal Bergerfunction set_hp() {
8537a1bd398SMichal Berger	if [[ -n $HUGEPGSZ && ! -e /sys/kernel/mm/hugepages/hugepages-${HUGEPGSZ}kB ]]; then
8547a1bd398SMichal Berger		echo "${HUGEPGSZ}kB is not supported by the running kernel, ignoring" >&2
8557a1bd398SMichal Berger		unset -v HUGEPGSZ
8567a1bd398SMichal Berger	fi
8577a1bd398SMichal Berger
8587a1bd398SMichal Berger	HUGEPGSZ=${HUGEPGSZ:-$(grep Hugepagesize /proc/meminfo | cut -d : -f 2 | tr -dc '0-9')}
8597a1bd398SMichal Berger	HUGEPGSZ_MB=$((HUGEPGSZ / 1024))
8607a1bd398SMichal Berger	NRHUGE=${NRHUGE:-$(((HUGEMEM + HUGEPGSZ_MB - 1) / HUGEPGSZ_MB))}
8617a1bd398SMichal Berger}
8627a1bd398SMichal Berger
863443e1ea3SJim Harriskmsg "spdk: $0 $* (start)"
864443e1ea3SJim Harris
86536e573fcSMichal BergerCMD=reset cache_pci_bus
86636e573fcSMichal Berger
867da7e3bb8SDariusz Stojaczykmode=$1
8687b25f04cSCunyin Chang
869da7e3bb8SDariusz Stojaczykif [ -z "$mode" ]; then
87055dc5f21SBen Walker	mode="config"
87155dc5f21SBen Walkerfi
87255dc5f21SBen Walker
873f062f797SHailiang Wang: ${HUGEMEM:=2048}
874a1280c98SJim Harris: ${PCI_ALLOWED:=""}
875a1280c98SJim Harris: ${PCI_BLOCKED:=""}
8763779dda4SDariusz Stojaczyk
877f1a966dcSJim Harrisif [ -n "$NVME_ALLOWED" ]; then
878f1a966dcSJim Harris	PCI_ALLOWED="$PCI_ALLOWED $NVME_ALLOWED"
8793779dda4SDariusz Stojaczykfi
8803779dda4SDariusz Stojaczyk
8818be76f85SDariusz Stojaczykif [ -n "$SKIP_PCI" ]; then
882a1280c98SJim Harris	PCI_ALLOWED="none"
8838be76f85SDariusz Stojaczykfi
8848be76f85SDariusz Stojaczyk
885da7e3bb8SDariusz Stojaczykif [ -z "$TARGET_USER" ]; then
886da7e3bb8SDariusz Stojaczyk	TARGET_USER="$SUDO_USER"
887da7e3bb8SDariusz Stojaczyk	if [ -z "$TARGET_USER" ]; then
888cf090c6cSKarol Latecki		TARGET_USER=$(logname 2> /dev/null) || true
889da7e3bb8SDariusz Stojaczyk	fi
890da7e3bb8SDariusz Stojaczykfi
891da7e3bb8SDariusz Stojaczyk
89251b5fa85SMichal Bergercollect_devices "$mode"
89344775a80SMichal Berger
8947a1bd398SMichal Bergerif [[ $os == Linux ]]; then
8957a1bd398SMichal Berger	set_hp
8967a1bd398SMichal Bergerfi
8977a1bd398SMichal Berger
8987a1bd398SMichal Bergerif [[ $mode == interactive ]]; then
8997a1bd398SMichal Berger	source "$rootdir/scripts/common/setup/interactive.sh"
90044fef7d2SMichal Berger	main_menu "$2" || exit 0
9017a1bd398SMichal Bergerfi
9027a1bd398SMichal Berger
90344775a80SMichal Bergerif [[ $mode == reset && $PCI_BLOCK_SYNC_ON_RESET == yes ]]; then
90444775a80SMichal Berger	# Note that this will wait only for the first block device attached to
90544775a80SMichal Berger	# a given storage controller. For nvme this may miss some of the devs
90644775a80SMichal Berger	# in case multiple namespaces are being in place.
90744775a80SMichal Berger	# FIXME: Wait for nvme controller(s) to be in live state and determine
90844775a80SMichal Berger	# number of configured namespaces, build list of potential block devs
90944775a80SMichal Berger	# and pass them to sync_dev_uevents. Is it worth the effort?
91044775a80SMichal Berger	bdfs_to_wait_for=()
91144775a80SMichal Berger	for bdf in "${!all_devices_d[@]}"; do
91244775a80SMichal Berger		((all_devices_d["$bdf"] == 0)) || continue
91344775a80SMichal Berger		if [[ -n ${nvme_d["$bdf"]} || -n ${virtio_d["$bdf"]} ]]; then
9144f8177b5SMichal Berger			[[ $(collect_driver "$bdf") != "${pci_bus_driver["$bdf"]}" ]] || continue
91544775a80SMichal Berger			bdfs_to_wait_for+=("$bdf")
91644775a80SMichal Berger		fi
91744775a80SMichal Berger	done
91844775a80SMichal Berger	if ((${#bdfs_to_wait_for[@]} > 0)); then
91944775a80SMichal Berger		echo "Waiting for block devices as requested"
92044775a80SMichal Berger		export UEVENT_TIMEOUT=5 DEVPATH_LOOKUP=yes DEVPATH_SUBSYSTEM=pci
92144775a80SMichal Berger		"$rootdir/scripts/sync_dev_uevents.sh" \
92244775a80SMichal Berger			block/disk \
92344775a80SMichal Berger			"${bdfs_to_wait_for[@]}" &
92444775a80SMichal Berger		sync_pid=$!
92544775a80SMichal Berger	fi
92644775a80SMichal Bergerfi
92744775a80SMichal Berger
92818c02887SMichal Bergerif [[ $os == Linux ]]; then
92955dc5f21SBen Walker	if [ "$mode" == "config" ]; then
93055dc5f21SBen Walker		configure_linux
931e47f972dSPawel Wodkowski	elif [ "$mode" == "cleanup" ]; then
932e47f972dSPawel Wodkowski		cleanup_linux
933ddb42b2eSMichal Berger		clear_hugepages
93455dc5f21SBen Walker	elif [ "$mode" == "reset" ]; then
93555dc5f21SBen Walker		reset_linux
9365c42f218SPiotr Pelplinski	elif [ "$mode" == "status" ]; then
9375c42f218SPiotr Pelplinski		status_linux
9385f247660SDariusz Stojaczyk	elif [ "$mode" == "help" ]; then
9395f247660SDariusz Stojaczyk		usage $0
9405f247660SDariusz Stojaczyk	else
9415f247660SDariusz Stojaczyk		usage $0 "Invalid argument '$mode'"
94255dc5f21SBen Walker	fi
94355dc5f21SBen Walkerelse
94455dc5f21SBen Walker	if [ "$mode" == "config" ]; then
94555dc5f21SBen Walker		configure_freebsd
94655dc5f21SBen Walker	elif [ "$mode" == "reset" ]; then
94755dc5f21SBen Walker		reset_freebsd
948c7917f22SKarol Latecki	elif [ "$mode" == "cleanup" ]; then
94918c02887SMichal Berger		echo "setup.sh cleanup function not yet supported on $os"
950c7917f22SKarol Latecki	elif [ "$mode" == "status" ]; then
951d51345c0SMichal Berger		status_freebsd
9525f247660SDariusz Stojaczyk	elif [ "$mode" == "help" ]; then
9535f247660SDariusz Stojaczyk		usage $0
9545f247660SDariusz Stojaczyk	else
9555f247660SDariusz Stojaczyk		usage $0 "Invalid argument '$mode'"
95655dc5f21SBen Walker	fi
95755dc5f21SBen Walkerfi
95844775a80SMichal Berger
95944775a80SMichal Bergerif [[ -e /proc/$sync_pid/status ]]; then
96044775a80SMichal Berger	wait "$sync_pid"
96144775a80SMichal Bergerfi
962443e1ea3SJim Harris
963443e1ea3SJim Harriskmsg "spdk: $0 $* (done)"
964