xref: /spdk/scripts/setup.sh (revision ba23cec1820104cc710ad776f0127e1cf82033aa)
1#!/usr/bin/env bash
2
3set -e
4
5os=$(uname -s)
6
7if [[ $os != Linux && $os != FreeBSD ]]; then
8	echo "Not supported platform ($os), aborting"
9	exit 1
10fi
11
12rootdir=$(readlink -f $(dirname $0))/..
13source "$rootdir/scripts/common.sh"
14
15function usage() {
16	if [[ $os == Linux ]]; then
17		options="[config|reset|status|cleanup|help]"
18	else
19		options="[config|reset|help]"
20	fi
21
22	[[ -n $2 ]] && (
23		echo "$2"
24		echo ""
25	)
26	echo "Helper script for allocating hugepages and binding NVMe, I/OAT, VMD and Virtio devices"
27	echo "to a generic VFIO kernel driver. If VFIO is not available on the system, this script"
28	echo "will fall back to UIO. NVMe and Virtio devices with active mountpoints will be ignored."
29	echo "All hugepage operations use default hugepage size on the system (hugepagesz)."
30	echo "Usage: $(basename $1) $options"
31	echo
32	echo "$options - as following:"
33	echo "config            Default mode. Allocate hugepages and bind PCI devices."
34	if [[ $os == Linux ]]; then
35		echo "cleanup           Remove any orphaned files that can be left in the system after SPDK application exit"
36	fi
37	echo "reset             Rebind PCI devices back to their original drivers."
38	echo "                  Also cleanup any leftover spdk files/resources."
39	echo "                  Hugepage memory size will remain unchanged."
40	if [[ $os == Linux ]]; then
41		echo "status            Print status of all SPDK-compatible devices on the system."
42	fi
43	echo "help              Print this help message."
44	echo
45	echo "The following environment variables can be specified."
46	echo "HUGEMEM           Size of hugepage memory to allocate (in MB). 2048 by default."
47	echo "                  For NUMA systems, the hugepages will be evenly distributed"
48	echo "                  between CPU nodes"
49	echo "NRHUGE            Number of hugepages to allocate. This variable overwrites HUGEMEM."
50	echo "HUGENODE          Specific NUMA node to allocate hugepages on. To allocate"
51	echo "                  hugepages on multiple nodes run this script multiple times -"
52	echo "                  once for each node."
53	echo "PCI_WHITELIST"
54	echo "PCI_BLACKLIST     Whitespace separated list of PCI devices (NVMe, I/OAT, VMD, Virtio)."
55	echo "                  Each device must be specified as a full PCI address."
56	echo "                  E.g. PCI_WHITELIST=\"0000:01:00.0 0000:02:00.0\""
57	echo "                  To blacklist all PCI devices use a non-valid address."
58	echo "                  E.g. PCI_WHITELIST=\"none\""
59	echo "                  If PCI_WHITELIST and PCI_BLACKLIST are empty or unset, all PCI devices"
60	echo "                  will be bound."
61	echo "                  Each device in PCI_BLACKLIST will be ignored (driver won't be changed)."
62	echo "                  PCI_BLACKLIST has precedence over PCI_WHITELIST."
63	echo "TARGET_USER       User that will own hugepage mountpoint directory and vfio groups."
64	echo "                  By default the current user will be used."
65	echo "DRIVER_OVERRIDE   Disable automatic vfio-pci/uio_pci_generic selection and forcefully"
66	echo "                  bind devices to the given driver."
67	echo "                  E.g. DRIVER_OVERRIDE=uio_pci_generic or DRIVER_OVERRIDE=/home/public/dpdk/build/kmod/igb_uio.ko"
68	exit 0
69}
70
71# In monolithic kernels the lsmod won't work. So
72# back that with a /sys/modules. We also check
73# /sys/bus/pci/drivers/ as neither lsmod nor /sys/modules might
74# contain needed info (like in Fedora-like OS).
75function check_for_driver() {
76	if lsmod | grep -q ${1//-/_}; then
77		return 1
78	fi
79
80	if [[ -d /sys/module/${1} || -d \
81		/sys/module/${1//-/_} || -d \
82		/sys/bus/pci/drivers/${1} || -d \
83		/sys/bus/pci/drivers/${1//-/_} ]]; then
84		return 2
85	fi
86	return 0
87}
88
89function pci_dev_echo() {
90	local bdf="$1"
91	local vendor
92	local device
93	vendor="$(cat /sys/bus/pci/devices/$bdf/vendor)"
94	device="$(cat /sys/bus/pci/devices/$bdf/device)"
95	shift
96	echo "$bdf (${vendor#0x} ${device#0x}): $*"
97}
98
99function linux_bind_driver() {
100	bdf="$1"
101	driver_name="$2"
102	old_driver_name="no driver"
103	ven_dev_id=$(lspci -n -s $bdf | cut -d' ' -f3 | sed 's/:/ /')
104
105	if [ -e "/sys/bus/pci/devices/$bdf/driver" ]; then
106		old_driver_name=$(basename $(readlink /sys/bus/pci/devices/$bdf/driver))
107
108		if [ "$driver_name" = "$old_driver_name" ]; then
109			pci_dev_echo "$bdf" "Already using the $old_driver_name driver"
110			return 0
111		fi
112
113		echo "$ven_dev_id" > "/sys/bus/pci/devices/$bdf/driver/remove_id" 2> /dev/null || true
114		echo "$bdf" > "/sys/bus/pci/devices/$bdf/driver/unbind"
115	fi
116
117	pci_dev_echo "$bdf" "$old_driver_name -> $driver_name"
118
119	echo "$ven_dev_id" > "/sys/bus/pci/drivers/$driver_name/new_id" 2> /dev/null || true
120	echo "$bdf" > "/sys/bus/pci/drivers/$driver_name/bind" 2> /dev/null || true
121
122	iommu_group=$(basename $(readlink -f /sys/bus/pci/devices/$bdf/iommu_group))
123	if [ -e "/dev/vfio/$iommu_group" ]; then
124		if [ -n "$TARGET_USER" ]; then
125			chown "$TARGET_USER" "/dev/vfio/$iommu_group"
126		fi
127	fi
128}
129
130function linux_unbind_driver() {
131	local bdf="$1"
132	local ven_dev_id
133	ven_dev_id=$(lspci -n -s $bdf | cut -d' ' -f3 | sed 's/:/ /')
134	local old_driver_name="no driver"
135
136	if [ -e "/sys/bus/pci/devices/$bdf/driver" ]; then
137		old_driver_name=$(basename $(readlink /sys/bus/pci/devices/$bdf/driver))
138		echo "$ven_dev_id" > "/sys/bus/pci/devices/$bdf/driver/remove_id" 2> /dev/null || true
139		echo "$bdf" > "/sys/bus/pci/devices/$bdf/driver/unbind"
140	fi
141
142	pci_dev_echo "$bdf" "$old_driver_name -> no driver"
143}
144
145function linux_hugetlbfs_mounts() {
146	mount | grep ' type hugetlbfs ' | awk '{ print $3 }'
147}
148
149function get_nvme_name_from_bdf() {
150	local blknames=()
151
152	set +e
153	nvme_devs=$(lsblk -d --output NAME | grep "^nvme")
154	set -e
155	for dev in $nvme_devs; do
156		link_name=$(readlink /sys/block/$dev/device/device) || true
157		if [ -z "$link_name" ]; then
158			link_name=$(readlink /sys/block/$dev/device)
159		fi
160		link_bdf=$(basename "$link_name")
161		if [ "$link_bdf" = "$1" ]; then
162			blknames+=($dev)
163		fi
164	done
165
166	printf '%s\n' "${blknames[@]}"
167}
168
169function get_virtio_names_from_bdf() {
170	blk_devs=$(lsblk --nodeps --output NAME)
171	virtio_names=()
172
173	for dev in $blk_devs; do
174		if readlink "/sys/block/$dev" | grep -q "$1"; then
175			virtio_names+=("$dev")
176		fi
177	done
178
179	eval "$2=( " "${virtio_names[@]}" " )"
180}
181
182function configure_linux_pci() {
183	local driver_path=""
184	driver_name=""
185	if [[ -n "${DRIVER_OVERRIDE}" ]]; then
186		driver_path="$DRIVER_OVERRIDE"
187		driver_name="${DRIVER_OVERRIDE##*/}"
188		# modprobe and the sysfs don't use the .ko suffix.
189		driver_name=${driver_name%.ko}
190		# path = name -> there is no path
191		if [[ "$driver_path" = "$driver_name" ]]; then
192			driver_path=""
193		fi
194		# igb_uio is a common driver to override with and it depends on uio.
195		if [[ "$driver_name" = "igb_uio" ]]; then
196			modprobe uio
197		fi
198	elif [[ -n "$(ls /sys/kernel/iommu_groups)" || (-e \
199	/sys/module/vfio/parameters/enable_unsafe_noiommu_mode && \
200	"$(cat /sys/module/vfio/parameters/enable_unsafe_noiommu_mode)" == "Y") ]]; then
201		driver_name=vfio-pci
202	elif modinfo uio_pci_generic > /dev/null 2>&1; then
203		driver_name=uio_pci_generic
204	elif [[ -r "$rootdir/dpdk/build/kmod/igb_uio.ko" ]]; then
205		driver_path="$rootdir/dpdk/build/kmod/igb_uio.ko"
206		driver_name="igb_uio"
207		modprobe uio
208		echo "WARNING: uio_pci_generic not detected - using $driver_name"
209	else
210		echo "No valid drivers found [vfio-pci, uio_pci_generic, igb_uio]. Please either enable the vfio-pci or uio_pci_generic"
211		echo "kernel modules, or have SPDK build the igb_uio driver by running ./configure --with-igb-uio-driver and recompiling."
212		return 1
213	fi
214
215	# modprobe assumes the directory of the module. If the user passes in a path, we should use insmod
216	if [[ -n "$driver_path" ]]; then
217		insmod $driver_path || true
218	else
219		modprobe $driver_name
220	fi
221
222	# NVMe
223	for bdf in ${pci_bus_cache["0x010802"]}; do
224		blknames=()
225		if ! pci_can_use $bdf; then
226			pci_dev_echo "$bdf" "Skipping un-whitelisted NVMe controller at $bdf"
227			continue
228		fi
229
230		mount=false
231		for blkname in $(get_nvme_name_from_bdf $bdf); do
232			mountpoints=$(lsblk /dev/$blkname --output MOUNTPOINT -n | wc -w)
233			if [ "$mountpoints" != "0" ]; then
234				mount=true
235				blknames+=($blkname)
236			fi
237		done
238
239		if ! $mount; then
240			linux_bind_driver "$bdf" "$driver_name"
241		else
242			for name in "${blknames[@]}"; do
243				pci_dev_echo "$bdf" "Active mountpoints on /dev/$name, so not binding PCI dev"
244			done
245		fi
246	done
247
248	# IOAT
249	TMP=$(mktemp)
250	#collect all the device_id info of ioat devices.
251	grep "PCI_DEVICE_ID_INTEL_IOAT" $rootdir/include/spdk/pci_ids.h \
252		| awk -F"x" '{print $2}' > $TMP
253
254	while IFS= read -r dev_id; do
255		for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do
256			if ! pci_can_use $bdf; then
257				pci_dev_echo "$bdf" "Skipping un-whitelisted I/OAT device"
258				continue
259			fi
260
261			linux_bind_driver "$bdf" "$driver_name"
262		done
263	done < $TMP
264	rm $TMP
265
266	# IDXD
267	TMP=$(mktemp)
268	#collect all the device_id info of idxd devices.
269	grep "PCI_DEVICE_ID_INTEL_IDXD" $rootdir/include/spdk/pci_ids.h \
270		| awk -F"x" '{print $2}' > $TMP
271
272	while IFS= read -r dev_id; do
273		for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do
274			if ! pci_can_use $bdf; then
275				pci_dev_echo "$bdf" "Skipping un-whitelisted IDXD device"
276				continue
277			fi
278
279			linux_bind_driver "$bdf" "$driver_name"
280		done
281	done < $TMP
282	rm $TMP
283
284	# virtio
285	TMP=$(mktemp)
286	#collect all the device_id info of virtio devices.
287	grep "PCI_DEVICE_ID_VIRTIO" $rootdir/include/spdk/pci_ids.h \
288		| awk -F"x" '{print $2}' > $TMP
289
290	while IFS= read -r dev_id; do
291		for bdf in ${pci_bus_cache["0x1af4:0x$dev_id"]}; do
292			if ! pci_can_use $bdf; then
293				pci_dev_echo "$bdf" "Skipping un-whitelisted Virtio device at $bdf"
294				continue
295			fi
296			blknames=()
297			get_virtio_names_from_bdf "$bdf" blknames
298			for blkname in "${blknames[@]}"; do
299				if [ "$(lsblk /dev/$blkname --output MOUNTPOINT -n | wc -w)" != "0" ]; then
300					pci_dev_echo "$bdf" "Active mountpoints on /dev/$blkname, so not binding"
301					continue 2
302				fi
303			done
304
305			linux_bind_driver "$bdf" "$driver_name"
306		done
307	done < $TMP
308	rm $TMP
309
310	# VMD
311	TMP=$(mktemp)
312	#collect all the device_id info of vmd devices.
313	grep "PCI_DEVICE_ID_INTEL_VMD" $rootdir/include/spdk/pci_ids.h \
314		| awk -F"x" '{print $2}' > $TMP
315
316	while IFS= read -r dev_id; do
317		for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do
318			if [[ -z "$PCI_WHITELIST" ]] || ! pci_can_use $bdf; then
319				echo "Skipping un-whitelisted VMD device at $bdf"
320				continue
321			fi
322
323			linux_bind_driver "$bdf" "$driver_name"
324			echo " VMD generic kdrv: " "$bdf" "$driver_name"
325		done
326	done < $TMP
327	rm $TMP
328
329	echo "1" > "/sys/bus/pci/rescan"
330}
331
332function cleanup_linux() {
333	shopt -s extglob nullglob
334	dirs_to_clean=""
335	dirs_to_clean="$(echo {/var/run,/tmp}/dpdk/spdk{,_pid}+([0-9])) "
336	if [[ -d $XDG_RUNTIME_DIR && $XDG_RUNTIME_DIR != *" "* ]]; then
337		dirs_to_clean+="$(readlink -e assert_not_empty $XDG_RUNTIME_DIR/dpdk/spdk{,_pid}+([0-9]) || true) "
338	fi
339
340	files_to_clean=""
341	for dir in $dirs_to_clean; do
342		files_to_clean+="$(echo $dir/*) "
343	done
344	shopt -u extglob nullglob
345
346	files_to_clean+="$(ls -1 /dev/shm/* \
347		| grep -E '(spdk_tgt|iscsi|vhost|nvmf|rocksdb|bdevio|bdevperf|vhost_fuzz|nvme_fuzz)_trace|spdk_iscsi_conns' || true) "
348	files_to_clean="$(readlink -e assert_not_empty $files_to_clean || true)"
349	if [[ -z "$files_to_clean" ]]; then
350		echo "Clean"
351		return 0
352	fi
353
354	shopt -s extglob
355	for fd_dir in $(echo /proc/+([0-9])); do
356		opened_files+="$(readlink -e assert_not_empty $fd_dir/fd/* || true)"
357	done
358	shopt -u extglob
359
360	if [[ -z "$opened_files" ]]; then
361		echo "Can't get list of opened files!"
362		exit 1
363	fi
364
365	echo 'Cleaning'
366	for f in $files_to_clean; do
367		if ! echo "$opened_files" | grep -E -q "^$f\$"; then
368			echo "Removing:    $f"
369			rm $f
370		else
371			echo "Still open: $f"
372		fi
373	done
374
375	for dir in $dirs_to_clean; do
376		if ! echo "$opened_files" | grep -E -q "^$dir\$"; then
377			echo "Removing:    $dir"
378			rmdir $dir
379		else
380			echo "Still open: $dir"
381		fi
382	done
383	echo "Clean"
384
385	unset dirs_to_clean files_to_clean opened_files
386}
387
388function configure_linux() {
389	configure_linux_pci
390	hugetlbfs_mounts=$(linux_hugetlbfs_mounts)
391
392	if [ -z "$hugetlbfs_mounts" ]; then
393		hugetlbfs_mounts=/mnt/huge
394		echo "Mounting hugetlbfs at $hugetlbfs_mounts"
395		mkdir -p "$hugetlbfs_mounts"
396		mount -t hugetlbfs nodev "$hugetlbfs_mounts"
397	fi
398
399	if [ -z "$HUGENODE" ]; then
400		hugepages_target="/proc/sys/vm/nr_hugepages"
401	else
402		hugepages_target="/sys/devices/system/node/node${HUGENODE}/hugepages/hugepages-${HUGEPGSZ}kB/nr_hugepages"
403	fi
404
405	echo "$NRHUGE" > "$hugepages_target"
406	allocated_hugepages=$(cat $hugepages_target)
407	if [ "$allocated_hugepages" -lt "$NRHUGE" ]; then
408		echo ""
409		echo "## ERROR: requested $NRHUGE hugepages but only $allocated_hugepages could be allocated."
410		echo "## Memory might be heavily fragmented. Please try flushing the system cache, or reboot the machine."
411		exit 1
412	fi
413
414	if [ "$driver_name" = "vfio-pci" ]; then
415		if [ -n "$TARGET_USER" ]; then
416			for mount in $hugetlbfs_mounts; do
417				chown "$TARGET_USER" "$mount"
418				chmod g+w "$mount"
419			done
420
421			MEMLOCK_AMNT=$(su "$TARGET_USER" -c "ulimit -l")
422			if [[ $MEMLOCK_AMNT != "unlimited" ]]; then
423				MEMLOCK_MB=$((MEMLOCK_AMNT / 1024))
424				cat <<- MEMLOCK
425					"$TARGET_USER" user memlock limit: $MEMLOCK_MB MB
426
427					This is the maximum amount of memory you will be
428					able to use with DPDK and VFIO if run as user "$TARGET_USER".
429					To change this, please adjust limits.conf memlock limit for user "$TARGET_USER".
430				MEMLOCK
431				if ((MEMLOCK_AMNT < 65536)); then
432					echo ""
433					echo "## WARNING: memlock limit is less than 64MB"
434					echo -n "## DPDK with VFIO may not be able to initialize "
435					echo "if run as user \"$TARGET_USER\"."
436				fi
437			fi
438		fi
439	fi
440
441	if [ ! -f /dev/cpu/0/msr ]; then
442		# Some distros build msr as a module.  Make sure it's loaded to ensure
443		#  DPDK can easily figure out the TSC rate rather than relying on 100ms
444		#  sleeps.
445		modprobe msr || true
446	fi
447}
448
449function reset_linux_pci() {
450	# NVMe
451	set +e
452	check_for_driver nvme
453	driver_loaded=$?
454	set -e
455	for bdf in ${pci_bus_cache["0x010802"]}; do
456		if ! pci_can_use $bdf; then
457			pci_dev_echo "$bdf" "Skipping un-whitelisted NVMe controller $blkname"
458			continue
459		fi
460		if [ $driver_loaded -ne 0 ]; then
461			linux_bind_driver "$bdf" nvme
462		else
463			linux_unbind_driver "$bdf"
464		fi
465	done
466
467	# IOAT
468	TMP=$(mktemp)
469	#collect all the device_id info of ioat devices.
470	grep "PCI_DEVICE_ID_INTEL_IOAT" $rootdir/include/spdk/pci_ids.h \
471		| awk -F"x" '{print $2}' > $TMP
472
473	set +e
474	check_for_driver ioatdma
475	driver_loaded=$?
476	set -e
477	while IFS= read -r dev_id; do
478		for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do
479			if ! pci_can_use $bdf; then
480				pci_dev_echo "$bdf" "Skipping un-whitelisted I/OAT device"
481				continue
482			fi
483			if [ $driver_loaded -ne 0 ]; then
484				linux_bind_driver "$bdf" ioatdma
485			else
486				linux_unbind_driver "$bdf"
487			fi
488		done
489	done < $TMP
490	rm $TMP
491
492	# IDXD
493	TMP=$(mktemp)
494	#collect all the device_id info of idxd devices.
495	grep "PCI_DEVICE_ID_INTEL_IDXD" $rootdir/include/spdk/pci_ids.h \
496		| awk -F"x" '{print $2}' > $TMP
497	set +e
498	check_for_driver idxd
499	driver_loaded=$?
500	set -e
501	while IFS= read -r dev_id; do
502		for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do
503			if ! pci_can_use $bdf; then
504				pci_dev_echo "$bdf" "Skipping un-whitelisted IDXD device"
505				continue
506			fi
507			if [ $driver_loaded -ne 0 ]; then
508				linux_bind_driver "$bdf" idxd
509			else
510				linux_unbind_driver "$bdf"
511			fi
512		done
513	done < $TMP
514	rm $TMP
515
516	# virtio
517	TMP=$(mktemp)
518	#collect all the device_id info of virtio devices.
519	grep "PCI_DEVICE_ID_VIRTIO" $rootdir/include/spdk/pci_ids.h \
520		| awk -F"x" '{print $2}' > $TMP
521
522	# TODO: check if virtio-pci is loaded first and just unbind if it is not loaded
523	# Requires some more investigation - for example, some kernels do not seem to have
524	#  virtio-pci but just virtio_scsi instead.  Also need to make sure we get the
525	#  underscore vs. dash right in the virtio_scsi name.
526	modprobe virtio-pci || true
527	while IFS= read -r dev_id; do
528		for bdf in ${pci_bus_cache["0x1af4:0x$dev_id"]}; do
529			if ! pci_can_use $bdf; then
530				pci_dev_echo "$bdf" "Skipping un-whitelisted Virtio device at"
531				continue
532			fi
533			linux_bind_driver "$bdf" virtio-pci
534		done
535	done < $TMP
536	rm $TMP
537
538	# VMD
539	TMP=$(mktemp)
540	#collect all the device_id info of vmd devices.
541	grep "PCI_DEVICE_ID_INTEL_VMD" $rootdir/include/spdk/pci_ids.h \
542		| awk -F"x" '{print $2}' > $TMP
543
544	set +e
545	check_for_driver vmd
546	driver_loaded=$?
547	set -e
548	while IFS= read -r dev_id; do
549		for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do
550			if ! pci_can_use $bdf; then
551				echo "Skipping un-whitelisted VMD device at $bdf"
552				continue
553			fi
554			if [ $driver_loaded -ne 0 ]; then
555				linux_bind_driver "$bdf" vmd
556			else
557				linux_unbind_driver "$bdf"
558			fi
559		done
560	done < $TMP
561	rm $TMP
562
563	echo "1" > "/sys/bus/pci/rescan"
564}
565
566function reset_linux() {
567	reset_linux_pci
568	for mount in $(linux_hugetlbfs_mounts); do
569		rm -f "$mount"/spdk*map_*
570	done
571	rm -f /run/.spdk*
572}
573
574function status_linux() {
575	echo "Hugepages"
576	printf "%-6s %10s %8s / %6s\n" "node" "hugesize" "free" "total"
577
578	numa_nodes=0
579	shopt -s nullglob
580	for path in /sys/devices/system/node/node*/hugepages/hugepages-*/; do
581		numa_nodes=$((numa_nodes + 1))
582		free_pages=$(cat $path/free_hugepages)
583		all_pages=$(cat $path/nr_hugepages)
584
585		[[ $path =~ (node[0-9]+)/hugepages/hugepages-([0-9]+kB) ]]
586
587		node=${BASH_REMATCH[1]}
588		huge_size=${BASH_REMATCH[2]}
589
590		printf "%-6s %10s %8s / %6s\n" $node $huge_size $free_pages $all_pages
591	done
592	shopt -u nullglob
593
594	# fall back to system-wide hugepages
595	if [ "$numa_nodes" = "0" ]; then
596		free_pages=$(grep HugePages_Free /proc/meminfo | awk '{ print $2 }')
597		all_pages=$(grep HugePages_Total /proc/meminfo | awk '{ print $2 }')
598		node="-"
599		huge_size="$HUGEPGSZ"
600
601		printf "%-6s %10s %8s / %6s\n" $node $huge_size $free_pages $all_pages
602	fi
603
604	echo ""
605	echo "NVMe devices"
606
607	echo -e "BDF\t\tVendor\tDevice\tNUMA\tDriver\t\tDevice name"
608	for bdf in ${pci_bus_cache["0x010802"]}; do
609		driver=$(grep DRIVER /sys/bus/pci/devices/$bdf/uevent | awk -F"=" '{print $2}')
610		if [ "$numa_nodes" = "0" ]; then
611			node="-"
612		else
613			node=$(cat /sys/bus/pci/devices/$bdf/numa_node)
614			if ((node == -1)); then
615				node=unknown
616			fi
617		fi
618		device=$(cat /sys/bus/pci/devices/$bdf/device)
619		vendor=$(cat /sys/bus/pci/devices/$bdf/vendor)
620		if [ "$driver" = "nvme" ] && [ -d /sys/bus/pci/devices/$bdf/nvme ]; then
621			name="\t"$(ls /sys/bus/pci/devices/$bdf/nvme)
622		else
623			name="-"
624		fi
625		echo -e "$bdf\t${vendor#0x}\t${device#0x}\t$node\t${driver:--}\t\t$name"
626	done
627
628	echo ""
629	echo "I/OAT Engine"
630
631	#collect all the device_id info of ioat devices.
632	TMP=$(grep "PCI_DEVICE_ID_INTEL_IOAT" $rootdir/include/spdk/pci_ids.h \
633		| awk -F"x" '{print $2}')
634	echo -e "BDF\t\tVendor\tDevice\tNUMA\tDriver"
635	for dev_id in $TMP; do
636		for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do
637			driver=$(grep DRIVER /sys/bus/pci/devices/$bdf/uevent | awk -F"=" '{print $2}')
638			if [ "$numa_nodes" = "0" ]; then
639				node="-"
640			else
641				node=$(cat /sys/bus/pci/devices/$bdf/numa_node)
642				if ((node == -1)); then
643					node=unknown
644				fi
645			fi
646			device=$(cat /sys/bus/pci/devices/$bdf/device)
647			vendor=$(cat /sys/bus/pci/devices/$bdf/vendor)
648			echo -e "$bdf\t${vendor#0x}\t${device#0x}\t$node\t${driver:--}"
649		done
650	done
651
652	echo ""
653	echo "IDXD Engine"
654
655	#collect all the device_id info of idxd devices.
656	TMP=$(grep "PCI_DEVICE_ID_INTEL_IDXD" $rootdir/include/spdk/pci_ids.h \
657		| awk -F"x" '{print $2}')
658	echo -e "BDF\t\tVendor\tDevice\tNUMA\tDriver"
659	for dev_id in $TMP; do
660		for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do
661			driver=$(grep DRIVER /sys/bus/pci/devices/$bdf/uevent | awk -F"=" '{print $2}')
662			if [ "$numa_nodes" = "0" ]; then
663				node="-"
664			else
665				node=$(cat /sys/bus/pci/devices/$bdf/numa_node)
666			fi
667			device=$(cat /sys/bus/pci/devices/$bdf/device)
668			vendor=$(cat /sys/bus/pci/devices/$bdf/vendor)
669			echo -e "$bdf\t${vendor#0x}\t${device#0x}\t$node\t${driver:--}"
670		done
671	done
672
673	echo ""
674	echo "virtio"
675
676	#collect all the device_id info of virtio devices.
677	TMP=$(grep "PCI_DEVICE_ID_VIRTIO" $rootdir/include/spdk/pci_ids.h \
678		| awk -F"x" '{print $2}')
679	echo -e "BDF\t\tVendor\tDevice\tNUMA\tDriver\t\tDevice name"
680	for dev_id in $TMP; do
681		for bdf in ${pci_bus_cache["0x1af4:0x$dev_id"]}; do
682			driver=$(grep DRIVER /sys/bus/pci/devices/$bdf/uevent | awk -F"=" '{print $2}')
683			if [ "$numa_nodes" = "0" ]; then
684				node="-"
685			else
686				node=$(cat /sys/bus/pci/devices/$bdf/numa_node)
687				if ((node == -1)); then
688					node=unknown
689				fi
690			fi
691			device=$(cat /sys/bus/pci/devices/$bdf/device)
692			vendor=$(cat /sys/bus/pci/devices/$bdf/vendor)
693			blknames=()
694			get_virtio_names_from_bdf "$bdf" blknames
695			echo -e "$bdf\t${vendor#0x}\t${device#0x}\t$node\t\t${driver:--}\t\t" "${blknames[@]}"
696		done
697	done
698
699	echo ""
700	echo "VMD"
701
702	#collect all the device_id info of vmd devices.
703	TMP=$(grep "PCI_DEVICE_ID_INTEL_VMD" $rootdir/include/spdk/pci_ids.h \
704		| awk -F"x" '{print $2}')
705	echo -e "BDF\t\tNuma Node\tDriver Name"
706	for dev_id in $TMP; do
707		for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do
708			driver=$(grep DRIVER /sys/bus/pci/devices/$bdf/uevent | awk -F"=" '{print $2}')
709			node=$(cat /sys/bus/pci/devices/$bdf/numa_node)
710			if ((node == -1)); then
711				node=unknown
712			fi
713			echo -e "$bdf\t$node\t\t$driver"
714		done
715	done
716}
717
718function status_freebsd() {
719	local id pci
720	local ioat idxd vmd
721
722	status_print() (
723		local dev driver
724
725		echo -e "BDF\t\tVendor\tDevice\tDriver"
726
727		for id; do
728			for pci in ${pci_bus_cache["$id"]}; do
729				driver=$(pciconf -l "pci$pci")
730				driver=${driver%@*}
731				printf '%s\t%s\t%s\t%s\n' \
732					"$pci" \
733					"${pci_ids_vendor["$pci"]}" \
734					"${pci_ids_device["$pci"]}" \
735					"$driver"
736			done
737		done
738	)
739
740	devs=PCI_DEVICE_ID_INTEL_IOAT
741	devs+="|PCI_DEVICE_ID_INTEL_IDXD"
742	devs+="|PCI_DEVICE_ID_INTEL_VMD"
743
744	local dev_type dev_id
745	while read -r _ dev_type dev_id; do
746		case "$dev_type" in
747			*IOAT*) ioat+=("0x8086:$dev_id") ;;
748			*IDXD*) idxd+=("0x8086:$dev_id") ;;
749			*VMD*) vmd+=("0x8086:$dev_id") ;;
750		esac
751	done < <(grep -E "$devs" "$rootdir/include/spdk/pci_ids.h")
752
753	local contigmem=present
754	if ! kldstat -q -m contigmem; then
755		contigmem="not present"
756	fi
757
758	cat <<- BSD_INFO
759		Contigmem ($contigmem)
760		Buffer Size: $(kenv hw.contigmem.buffer_size)
761		Num Buffers: $(kenv hw.contigmem.num_buffers)
762
763		NVMe devices
764		$(status_print 0x010802)
765
766		I/IOAT DMA
767		$(status_print "${ioat[@]}")
768
769		IDXD DMA
770		$(status_print "${idxd[@]}")
771
772		VMD
773		$(status_print "${vmd[@]}")
774	BSD_INFO
775}
776
777function configure_freebsd_pci() {
778	local devs ids id
779	local BDFS
780
781	devs=PCI_DEVICE_ID_INTEL_IOAT
782	devs+="|PCI_DEVICE_ID_INTEL_IDXD"
783	devs+="|PCI_DEVICE_ID_INTEL_VMD"
784
785	ids=($(grep -E "$devs" "$rootdir/include/spdk/pci_ids.h" | awk '{print $3}'))
786
787	if [[ -n ${pci_bus_cache["0x010802"]} ]]; then
788		BDFS+=(${pci_bus_cache["0x010802"]})
789	fi
790
791	for id in "${ids[@]}"; do
792		[[ -n ${pci_bus_cache["0x8086:$id"]} ]] || continue
793		BDFS+=(${pci_bus_cache["0x8086:$id"]})
794	done
795
796	# Drop the domain part from all the addresses
797	BDFS=("${BDFS[@]#*:}")
798
799	local IFS=","
800	kldunload nic_uio.ko || true
801	kenv hw.nic_uio.bdfs="${BDFS[*]}"
802	kldload nic_uio.ko
803}
804
805function configure_freebsd() {
806	configure_freebsd_pci
807	# If contigmem is already loaded but the HUGEMEM specified doesn't match the
808	#  previous value, unload contigmem so that we can reload with the new value.
809	if kldstat -q -m contigmem; then
810		if [ $(kenv hw.contigmem.num_buffers) -ne "$((HUGEMEM / 256))" ]; then
811			kldunload contigmem.ko
812		fi
813	fi
814	if ! kldstat -q -m contigmem; then
815		kenv hw.contigmem.num_buffers=$((HUGEMEM / 256))
816		kenv hw.contigmem.buffer_size=$((256 * 1024 * 1024))
817		kldload contigmem.ko
818	fi
819}
820
821function reset_freebsd() {
822	kldunload contigmem.ko || true
823	kldunload nic_uio.ko || true
824}
825
826CMD=reset cache_pci_bus
827
828mode=$1
829
830if [ -z "$mode" ]; then
831	mode="config"
832fi
833
834: ${HUGEMEM:=2048}
835: ${PCI_WHITELIST:=""}
836: ${PCI_BLACKLIST:=""}
837
838if [ -n "$NVME_WHITELIST" ]; then
839	PCI_WHITELIST="$PCI_WHITELIST $NVME_WHITELIST"
840fi
841
842if [ -n "$SKIP_PCI" ]; then
843	PCI_WHITELIST="none"
844fi
845
846if [ -z "$TARGET_USER" ]; then
847	TARGET_USER="$SUDO_USER"
848	if [ -z "$TARGET_USER" ]; then
849		TARGET_USER=$(logname 2> /dev/null) || true
850	fi
851fi
852
853if [[ $os == Linux ]]; then
854	HUGEPGSZ=$(($(grep Hugepagesize /proc/meminfo | cut -d : -f 2 | tr -dc '0-9')))
855	HUGEPGSZ_MB=$((HUGEPGSZ / 1024))
856	: ${NRHUGE=$(((HUGEMEM + HUGEPGSZ_MB - 1) / HUGEPGSZ_MB))}
857
858	if [ "$mode" == "config" ]; then
859		configure_linux
860	elif [ "$mode" == "cleanup" ]; then
861		cleanup_linux
862	elif [ "$mode" == "reset" ]; then
863		reset_linux
864	elif [ "$mode" == "status" ]; then
865		status_linux
866	elif [ "$mode" == "help" ]; then
867		usage $0
868	else
869		usage $0 "Invalid argument '$mode'"
870	fi
871else
872	if [ "$mode" == "config" ]; then
873		configure_freebsd
874	elif [ "$mode" == "reset" ]; then
875		reset_freebsd
876	elif [ "$mode" == "cleanup" ]; then
877		echo "setup.sh cleanup function not yet supported on $os"
878	elif [ "$mode" == "status" ]; then
879		status_freebsd
880	elif [ "$mode" == "help" ]; then
881		usage $0
882	else
883		usage $0 "Invalid argument '$mode'"
884	fi
885fi
886