1NVMF_PORT=4420 2NVMF_SECOND_PORT=4421 3NVMF_THIRD_PORT=4422 4NVMF_IP_PREFIX="192.168.100" 5NVMF_IP_LEAST_ADDR=8 6NVMF_TCP_IP_ADDRESS="127.0.0.1" 7NVMF_TRANSPORT_OPTS="" 8NVMF_SERIAL=SPDK00000000000001 9 10function build_nvmf_app_args() { 11 if [ $SPDK_RUN_NON_ROOT -eq 1 ]; then 12 # We assume that test script is started from sudo 13 NVMF_APP=(sudo -E -u $SUDO_USER "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" "${NVMF_APP[@]}") 14 fi 15 NVMF_APP+=(-i "$NVMF_APP_SHM_ID" -e 0xFFFF) 16 17 if [ -n "$SPDK_HUGE_DIR" ]; then 18 NVMF_APP+=(--huge-dir "$SPDK_HUGE_DIR") 19 elif [ $SPDK_RUN_NON_ROOT -eq 1 ]; then 20 echo "In non-root test mode you have to set SPDK_HUGE_DIR variable." >&2 21 echo "For example:" >&2 22 echo "sudo mkdir /mnt/spdk_hugetlbfs" >&2 23 echo "sudo chown ${SUDO_USER}: /mnt/spdk_hugetlbfs" >&2 24 echo "export SPDK_HUGE_DIR=/mnt/spdk_hugetlbfs" >&2 25 return 1 26 fi 27} 28 29: ${NVMF_APP_SHM_ID="0"} 30export NVMF_APP_SHM_ID 31build_nvmf_app_args 32 33have_pci_nics=0 34 35function rxe_cfg() { 36 "$rootdir/scripts/rxe_cfg_small.sh" "$@" 37} 38 39function load_ib_rdma_modules() { 40 if [ $(uname) != Linux ]; then 41 return 0 42 fi 43 44 modprobe ib_cm 45 modprobe ib_core 46 # Newer kernels do not have the ib_ucm module 47 modprobe ib_ucm || true 48 modprobe ib_umad 49 modprobe ib_uverbs 50 modprobe iw_cm 51 modprobe rdma_cm 52 modprobe rdma_ucm 53} 54 55function detect_soft_roce_nics() { 56 rxe_cfg stop # make sure we run tests with a clean slate 57 rxe_cfg start 58} 59 60# args 1 and 2 represent the grep filters for finding our NICS. 61# subsequent args are all drivers that should be loaded if we find these NICs. 62# Those drivers should be supplied in the correct order. 63function detect_nics_and_probe_drivers() { 64 NIC_VENDOR="$1" 65 NIC_CLASS="$2" 66 67 nvmf_nic_bdfs=$(lspci | grep Ethernet | grep "$NIC_VENDOR" | grep "$NIC_CLASS" | awk -F ' ' '{print "0000:"$1}') 68 69 if [ -z "$nvmf_nic_bdfs" ]; then 70 return 0 71 fi 72 73 have_pci_nics=1 74 if [ $# -ge 2 ]; then 75 # shift out the first two positional arguments. 76 shift 2 77 # Iterate through the remaining arguments. 78 for i; do 79 if [[ $i == irdma ]]; then 80 # Our tests don't play well with iWARP protocol. Make sure we use RoCEv2 instead. 81 if [[ -e /sys/module/irdma/parameters/roce_ena ]]; then 82 # reload the module to re-init the rdma devices 83 (($(< /sys/module/irdma/parameters/roce_ena) != 1)) && modprobe -r irdma 84 fi 85 modprobe "$i" roce_ena=1 86 else 87 modprobe "$i" 88 fi 89 done 90 fi 91} 92 93function pci_rdma_switch() { 94 local driver=$1 95 96 local -a driver_args=() 97 driver_args+=("Mellanox ConnectX-4 mlx5_core mlx5_ib") 98 driver_args+=("Mellanox ConnectX-5 mlx5_core mlx5_ib") 99 driver_args+=("Intel E810 ice irdma") 100 driver_args+=("Intel X722 i40e i40iw") 101 driver_args+=("Chelsio \"Unified Wire\" cxgb4 iw_cxgb4") 102 103 case $driver in 104 mlx5_ib) 105 detect_nics_and_probe_drivers ${driver_args[0]} 106 detect_nics_and_probe_drivers ${driver_args[1]} 107 ;; 108 irdma) 109 detect_nics_and_probe_drivers ${driver_args[2]} 110 ;; 111 i40iw) 112 detect_nics_and_probe_drivers ${driver_args[3]} 113 ;; 114 iw_cxgb4) 115 detect_nics_and_probe_drivers ${driver_args[4]} 116 ;; 117 *) 118 for d in "${driver_args[@]}"; do 119 detect_nics_and_probe_drivers $d 120 done 121 ;; 122 esac 123} 124 125function pci_tcp_switch() { 126 local driver=$1 127 128 local -a driver_args=() 129 driver_args+=("Intel E810 ice") 130 131 case $driver in 132 ice) 133 detect_nics_and_probe_drivers ${driver_args[0]} 134 ;; 135 *) 136 for d in "${driver_args[@]}"; do 137 detect_nics_and_probe_drivers $d 138 done 139 ;; 140 esac 141} 142 143function detect_pci_nics() { 144 145 if ! hash lspci; then 146 return 0 147 fi 148 149 local nic_drivers 150 local found_drivers 151 152 if [[ -z "$TEST_TRANSPORT" ]]; then 153 TEST_TRANSPORT=$SPDK_TEST_NVMF_TRANSPORT 154 fi 155 156 if [[ "$TEST_TRANSPORT" == "rdma" ]]; then 157 nic_drivers="mlx5_ib|irdma|i40iw|iw_cxgb4" 158 159 # Try to find RDMA drivers which are already loded and try to 160 # use only it's associated NICs, without probing all drivers. 161 found_drivers=$(lsmod | grep -Eo $nic_drivers | sort -u) 162 for d in $found_drivers; do 163 pci_rdma_switch $d 164 done 165 166 # In case lsmod reported driver, but lspci does not report 167 # physical NICs - fall back to old approach any try to 168 # probe all compatible NICs. 169 ((have_pci_nics == 0)) && pci_rdma_switch "default" 170 171 elif [[ "$TEST_TRANSPORT" == "tcp" ]]; then 172 nic_drivers="ice" 173 found_drivers=$(lsmod | grep -Eo $nic_drivers | sort -u) 174 for d in $found_drivers; do 175 pci_tcp_switch $d 176 done 177 ((have_pci_nics == 0)) && pci_tcp_switch "default" 178 fi 179 180 # Use softroce if everything else failed. 181 ((have_pci_nics == 0)) && return 0 182 183 # Provide time for drivers to properly load. 184 sleep 5 185} 186 187function detect_transport_nics() { 188 detect_pci_nics 189 if [ "$have_pci_nics" -eq "0" ]; then 190 detect_soft_roce_nics 191 fi 192} 193 194function allocate_nic_ips() { 195 ((count = NVMF_IP_LEAST_ADDR)) 196 for nic_name in $(get_rdma_if_list); do 197 ip="$(get_ip_address $nic_name)" 198 if [[ -z $ip ]]; then 199 ip addr add $NVMF_IP_PREFIX.$count/24 dev $nic_name 200 ip link set $nic_name up 201 ((count = count + 1)) 202 fi 203 # dump configuration for debug log 204 ip addr show $nic_name 205 done 206} 207 208function get_available_rdma_ips() { 209 for nic_name in $(get_rdma_if_list); do 210 get_ip_address $nic_name 211 done 212} 213 214function get_rdma_if_list() { 215 rxe_cfg rxe-net 216} 217 218function get_tcp_if_list_by_driver() { 219 local driver 220 driver=${1:-ice} 221 222 shopt -s nullglob 223 tcp_if_list=(/sys/bus/pci/drivers/$driver/0000*/net/*) 224 shopt -u nullglob 225 printf '%s\n' "${tcp_if_list[@]##*/}" 226} 227 228function get_ip_address() { 229 interface=$1 230 ip -o -4 addr show $interface | awk '{print $4}' | cut -d"/" -f1 231} 232 233function nvmfcleanup() { 234 sync 235 236 if [ "$TEST_TRANSPORT" == "tcp" ] || [ "$TEST_TRANSPORT" == "rdma" ]; then 237 set +e 238 for i in {1..20}; do 239 modprobe -v -r nvme-$TEST_TRANSPORT 240 if modprobe -v -r nvme-fabrics; then 241 set -e 242 return 0 243 fi 244 sleep 1 245 done 246 set -e 247 248 # So far unable to remove the kernel modules. Try 249 # one more time and let it fail. 250 # Allow the transport module to fail for now. See Jim's comment 251 # about the nvme-tcp module below. 252 modprobe -v -r nvme-$TEST_TRANSPORT || true 253 modprobe -v -r nvme-fabrics 254 fi 255} 256 257function nvmf_veth_init() { 258 NVMF_INITIATOR_IP=10.0.0.1 259 NVMF_FIRST_TARGET_IP=10.0.0.2 260 NVMF_SECOND_TARGET_IP=10.0.0.3 261 NVMF_BRIDGE="nvmf_br" 262 NVMF_INITIATOR_INTERFACE="nvmf_init_if" 263 NVMF_INITIATOR_BRIDGE="nvmf_init_br" 264 NVMF_TARGET_NAMESPACE="nvmf_tgt_ns_spdk" 265 NVMF_TARGET_NS_CMD=(ip netns exec "$NVMF_TARGET_NAMESPACE") 266 NVMF_TARGET_INTERFACE="nvmf_tgt_if" 267 NVMF_TARGET_INTERFACE2="nvmf_tgt_if2" 268 NVMF_TARGET_BRIDGE="nvmf_tgt_br" 269 NVMF_TARGET_BRIDGE2="nvmf_tgt_br2" 270 271 ip link set $NVMF_INITIATOR_BRIDGE nomaster || true 272 ip link set $NVMF_TARGET_BRIDGE nomaster || true 273 ip link set $NVMF_TARGET_BRIDGE2 nomaster || true 274 ip link set $NVMF_INITIATOR_BRIDGE down || true 275 ip link set $NVMF_TARGET_BRIDGE down || true 276 ip link set $NVMF_TARGET_BRIDGE2 down || true 277 ip link delete $NVMF_BRIDGE type bridge || true 278 ip link delete $NVMF_INITIATOR_INTERFACE || true 279 "${NVMF_TARGET_NS_CMD[@]}" ip link delete $NVMF_TARGET_INTERFACE || true 280 "${NVMF_TARGET_NS_CMD[@]}" ip link delete $NVMF_TARGET_INTERFACE2 || true 281 282 # Create network namespace 283 ip netns add $NVMF_TARGET_NAMESPACE 284 285 # Create veth (Virtual ethernet) interface pairs 286 ip link add $NVMF_INITIATOR_INTERFACE type veth peer name $NVMF_INITIATOR_BRIDGE 287 ip link add $NVMF_TARGET_INTERFACE type veth peer name $NVMF_TARGET_BRIDGE 288 ip link add $NVMF_TARGET_INTERFACE2 type veth peer name $NVMF_TARGET_BRIDGE2 289 290 # Associate veth interface pairs with network namespace 291 ip link set $NVMF_TARGET_INTERFACE netns $NVMF_TARGET_NAMESPACE 292 ip link set $NVMF_TARGET_INTERFACE2 netns $NVMF_TARGET_NAMESPACE 293 294 # Allocate IP addresses 295 ip addr add $NVMF_INITIATOR_IP/24 dev $NVMF_INITIATOR_INTERFACE 296 "${NVMF_TARGET_NS_CMD[@]}" ip addr add $NVMF_FIRST_TARGET_IP/24 dev $NVMF_TARGET_INTERFACE 297 "${NVMF_TARGET_NS_CMD[@]}" ip addr add $NVMF_SECOND_TARGET_IP/24 dev $NVMF_TARGET_INTERFACE2 298 299 # Link up veth interfaces 300 ip link set $NVMF_INITIATOR_INTERFACE up 301 ip link set $NVMF_INITIATOR_BRIDGE up 302 ip link set $NVMF_TARGET_BRIDGE up 303 ip link set $NVMF_TARGET_BRIDGE2 up 304 "${NVMF_TARGET_NS_CMD[@]}" ip link set $NVMF_TARGET_INTERFACE up 305 "${NVMF_TARGET_NS_CMD[@]}" ip link set $NVMF_TARGET_INTERFACE2 up 306 "${NVMF_TARGET_NS_CMD[@]}" ip link set lo up 307 308 # Create a bridge 309 ip link add $NVMF_BRIDGE type bridge 310 ip link set $NVMF_BRIDGE up 311 312 # Add veth interfaces to the bridge 313 ip link set $NVMF_INITIATOR_BRIDGE master $NVMF_BRIDGE 314 ip link set $NVMF_TARGET_BRIDGE master $NVMF_BRIDGE 315 ip link set $NVMF_TARGET_BRIDGE2 master $NVMF_BRIDGE 316 317 # Accept connections from veth interface 318 iptables -I INPUT 1 -i $NVMF_INITIATOR_INTERFACE -p tcp --dport $NVMF_PORT -j ACCEPT 319 320 # Verify connectivity 321 ping -c 1 $NVMF_FIRST_TARGET_IP 322 ping -c 1 $NVMF_SECOND_TARGET_IP 323 "${NVMF_TARGET_NS_CMD[@]}" ping -c 1 $NVMF_INITIATOR_IP 324 325 NVMF_APP=("${NVMF_TARGET_NS_CMD[@]}" "${NVMF_APP[@]}") 326} 327 328function nvmf_veth_fini() { 329 # Cleanup bridge, veth interfaces, and network namespace 330 # Note: removing one veth removes the pair 331 ip link set $NVMF_INITIATOR_BRIDGE nomaster 332 ip link set $NVMF_TARGET_BRIDGE nomaster 333 ip link set $NVMF_TARGET_BRIDGE2 nomaster 334 ip link set $NVMF_INITIATOR_BRIDGE down 335 ip link set $NVMF_TARGET_BRIDGE down 336 ip link set $NVMF_TARGET_BRIDGE2 down 337 ip link delete $NVMF_BRIDGE type bridge 338 ip link delete $NVMF_INITIATOR_INTERFACE 339 "${NVMF_TARGET_NS_CMD[@]}" ip link delete $NVMF_TARGET_INTERFACE 340 "${NVMF_TARGET_NS_CMD[@]}" ip link delete $NVMF_TARGET_INTERFACE2 341 remove_spdk_ns 342} 343 344function nvmf_tcp_init() { 345 NVMF_INITIATOR_IP=10.0.0.1 346 NVMF_FIRST_TARGET_IP=10.0.0.2 347 TCP_INTERFACE_LIST=($(get_tcp_if_list_by_driver)) 348 if ((${#TCP_INTERFACE_LIST[@]} == 0)) || [ "$TEST_MODE" == "iso" ]; then 349 nvmf_veth_init 350 return 0 351 fi 352 353 # We need two net devs at minimum 354 ((${#TCP_INTERFACE_LIST[@]} > 1)) 355 356 NVMF_TARGET_INTERFACE=${TCP_INTERFACE_LIST[0]} 357 NVMF_INITIATOR_INTERFACE=${TCP_INTERFACE_LIST[1]} 358 359 # Skip case nvmf_multipath in nvmf_tcp_init(), it will be covered by nvmf_veth_init(). 360 NVMF_SECOND_TARGET_IP="" 361 362 NVMF_TARGET_NAMESPACE="${NVMF_TARGET_INTERFACE}_ns_spdk" 363 NVMF_TARGET_NS_CMD=(ip netns exec "$NVMF_TARGET_NAMESPACE") 364 ip -4 addr flush $NVMF_TARGET_INTERFACE || true 365 ip -4 addr flush $NVMF_INITIATOR_INTERFACE || true 366 367 # Create network namespace 368 ip netns add $NVMF_TARGET_NAMESPACE 369 370 # Associate phy interface pairs with network namespace 371 ip link set $NVMF_TARGET_INTERFACE netns $NVMF_TARGET_NAMESPACE 372 373 # Allocate IP addresses 374 ip addr add $NVMF_INITIATOR_IP/24 dev $NVMF_INITIATOR_INTERFACE 375 "${NVMF_TARGET_NS_CMD[@]}" ip addr add $NVMF_FIRST_TARGET_IP/24 dev $NVMF_TARGET_INTERFACE 376 377 # Link up phy interfaces 378 ip link set $NVMF_INITIATOR_INTERFACE up 379 380 "${NVMF_TARGET_NS_CMD[@]}" ip link set $NVMF_TARGET_INTERFACE up 381 "${NVMF_TARGET_NS_CMD[@]}" ip link set lo up 382 383 # Accept connections from phy interface 384 iptables -I INPUT 1 -i $NVMF_INITIATOR_INTERFACE -p tcp --dport $NVMF_PORT -j ACCEPT 385 386 # Verify connectivity 387 ping -c 1 $NVMF_FIRST_TARGET_IP 388 "${NVMF_TARGET_NS_CMD[@]}" ping -c 1 $NVMF_INITIATOR_IP 389 390 NVMF_APP=("${NVMF_TARGET_NS_CMD[@]}" "${NVMF_APP[@]}") 391} 392 393function nvmf_tcp_fini() { 394 if [[ "$NVMF_TARGET_NAMESPACE" == "nvmf_tgt_ns" ]]; then 395 nvmf_veth_fini 396 return 0 397 fi 398 remove_spdk_ns 399 ip -4 addr flush $NVMF_INITIATOR_INTERFACE || : 400} 401 402function nvmftestinit() { 403 if [ -z $TEST_TRANSPORT ]; then 404 echo "transport not specified - use --transport= to specify" 405 return 1 406 fi 407 408 trap 'process_shm --id $NVMF_APP_SHM_ID || :; nvmftestfini' SIGINT SIGTERM EXIT 409 410 if [ "$TEST_MODE" == "iso" ]; then 411 $rootdir/scripts/setup.sh 412 if [[ "$TEST_TRANSPORT" == "rdma" ]]; then 413 rdma_device_init 414 fi 415 if [[ "$TEST_TRANSPORT" == "tcp" ]]; then 416 tcp_device_init 417 fi 418 fi 419 420 NVMF_TRANSPORT_OPTS="-t $TEST_TRANSPORT" 421 if [[ "$TEST_TRANSPORT" == "rdma" ]]; then 422 RDMA_IP_LIST=$(get_available_rdma_ips) 423 NVMF_FIRST_TARGET_IP=$(echo "$RDMA_IP_LIST" | head -n 1) 424 NVMF_SECOND_TARGET_IP=$(echo "$RDMA_IP_LIST" | tail -n +2 | head -n 1) 425 if [ -z $NVMF_FIRST_TARGET_IP ]; then 426 echo "no RDMA NIC for nvmf test" 427 exit 0 428 fi 429 elif [[ "$TEST_TRANSPORT" == "tcp" ]]; then 430 remove_spdk_ns 431 nvmf_tcp_init 432 NVMF_TRANSPORT_OPTS="$NVMF_TRANSPORT_OPTS -o" 433 fi 434 435 if [ "$TEST_TRANSPORT" == "tcp" ] || [ "$TEST_TRANSPORT" == "rdma" ]; then 436 # currently we run the host/perf test for TCP even on systems without kernel nvme-tcp 437 # support; that's fine since the host/perf test uses the SPDK initiator 438 # maybe later we will enforce modprobe to succeed once we have systems in the test pool 439 # with nvme-tcp kernel support - but until then let this pass so we can still run the 440 # host/perf test with the tcp transport 441 modprobe nvme-$TEST_TRANSPORT || true 442 fi 443} 444 445function nvmfappstart() { 446 timing_enter start_nvmf_tgt 447 "${NVMF_APP[@]}" "$@" & 448 nvmfpid=$! 449 waitforlisten $nvmfpid 450 timing_exit start_nvmf_tgt 451} 452 453function nvmftestfini() { 454 nvmfcleanup || : 455 if [ -n "$nvmfpid" ]; then 456 killprocess $nvmfpid 457 fi 458 if [ "$TEST_MODE" == "iso" ]; then 459 $rootdir/scripts/setup.sh reset 460 if [[ "$TEST_TRANSPORT" == "rdma" ]]; then 461 rdma_device_init 462 fi 463 fi 464 if [[ "$TEST_TRANSPORT" == "tcp" ]]; then 465 nvmf_tcp_fini 466 fi 467} 468 469function rdma_device_init() { 470 load_ib_rdma_modules 471 detect_transport_nics 472 allocate_nic_ips 473} 474 475function tcp_device_init() { 476 detect_transport_nics 477} 478 479function revert_soft_roce() { 480 rxe_cfg stop 481} 482 483function check_ip_is_soft_roce() { 484 if [ "$TEST_TRANSPORT" != "rdma" ]; then 485 return 0 486 fi 487 rxe_cfg status rxe | grep -wq "$1" 488} 489 490function nvme_connect() { 491 local init_count 492 init_count=$(nvme list | wc -l) 493 494 if ! nvme connect "$@"; then return $?; fi 495 496 for i in $(seq 1 10); do 497 if [ $(nvme list | wc -l) -gt $init_count ]; then 498 return 0 499 else 500 sleep 1s 501 fi 502 done 503 return 1 504} 505 506function get_nvme_devs() { 507 local dev _ 508 509 while read -r dev _; do 510 if [[ $dev == /dev/nvme* ]]; then 511 echo "$dev" 512 fi 513 done < <(nvme list) 514} 515 516function gen_nvmf_target_json() { 517 local subsystem config=() 518 519 for subsystem in "${@:-1}"; do 520 config+=( 521 "$( 522 cat <<- EOF 523 { 524 "params": { 525 "name": "Nvme$subsystem", 526 "trtype": "$TEST_TRANSPORT", 527 "traddr": "$NVMF_FIRST_TARGET_IP", 528 "adrfam": "ipv4", 529 "trsvcid": "$NVMF_PORT", 530 "subnqn": "nqn.2016-06.io.spdk:cnode$subsystem", 531 "hostnqn": "nqn.2016-06.io.spdk:host$subsystem", 532 "hdgst": ${hdgst:-false}, 533 "ddgst": ${ddgst:-false} 534 }, 535 "method": "bdev_nvme_attach_controller" 536 } 537 EOF 538 )" 539 ) 540 done 541 jq . <<- JSON 542 { 543 "subsystems": [ 544 { 545 "subsystem": "bdev", 546 "config": [ 547 { 548 "method": "bdev_nvme_set_options", 549 "params": { 550 "action_on_timeout": "none", 551 "timeout_us": 0, 552 "retry_count": 4, 553 "arbitration_burst": 0, 554 "low_priority_weight": 0, 555 "medium_priority_weight": 0, 556 "high_priority_weight": 0, 557 "nvme_adminq_poll_period_us": 10000, 558 "keep_alive_timeout_ms" : 10000, 559 "nvme_ioq_poll_period_us": 0, 560 "io_queue_requests": 0, 561 "delay_cmd_submit": true 562 } 563 }, 564 $( 565 IFS="," 566 printf '%s\n' "${config[*]}" 567 ), 568 { 569 "method": "bdev_wait_for_examine" 570 } 571 ] 572 } 573 ] 574 } 575 JSON 576} 577 578function remove_spdk_ns() { 579 local ns 580 while read -r ns _; do 581 [[ $ns == *_spdk ]] || continue 582 ip netns delete "$ns" 583 done < <(ip netns list) 584 # Let it settle 585 sleep 1 586} 587