xref: /spdk/test/bdev/bdev_raid.sh (revision c6c1234de9e0015e670dd0b51bf6ce39ee0e07bd)
1#!/usr/bin/env bash
2#  SPDX-License-Identifier: BSD-3-Clause
3#  Copyright (C) 2019 Intel Corporation
4#  All rights reserved.
5#
6testdir=$(readlink -f $(dirname $0))
7rootdir=$(readlink -f $testdir/../..)
8tmp_dir=$SPDK_TEST_STORAGE/raidtest
9tmp_file=$tmp_dir/raidrandtest
10
11source $rootdir/test/common/autotest_common.sh
12source $testdir/nbd_common.sh
13
14rpc_py=rpc_cmd
15
16function raid_unmap_data_verify() {
17	if hash blkdiscard; then
18		local nbd=$1
19		local blksize
20		blksize=$(lsblk -o LOG-SEC $nbd | grep -v LOG-SEC | cut -d ' ' -f 5)
21		local rw_blk_num=4096
22		local rw_len=$((blksize * rw_blk_num))
23		local unmap_blk_offs=(0 1028 321)
24		local unmap_blk_nums=(128 2035 456)
25		local unmap_off
26		local unmap_len
27
28		# data write
29		dd if=/dev/urandom of=$tmp_file bs=$blksize count=$rw_blk_num
30		dd if=$tmp_file of=$nbd bs=$blksize count=$rw_blk_num oflag=direct
31		blockdev --flushbufs $nbd
32
33		# confirm random data is written correctly in raid0 device
34		cmp -b -n $rw_len $tmp_file $nbd
35
36		for ((i = 0; i < ${#unmap_blk_offs[@]}; i++)); do
37			unmap_off=$((blksize * ${unmap_blk_offs[$i]}))
38			unmap_len=$((blksize * ${unmap_blk_nums[$i]}))
39
40			# data unmap on tmp_file
41			dd if=/dev/zero of=$tmp_file bs=$blksize seek=${unmap_blk_offs[$i]} count=${unmap_blk_nums[$i]} conv=notrunc
42
43			# data unmap on raid bdev
44			blkdiscard -o $unmap_off -l $unmap_len $nbd
45			blockdev --flushbufs $nbd
46
47			# data verify after unmap
48			cmp -b -n $rw_len $tmp_file $nbd
49		done
50	fi
51
52	return 0
53}
54
55function cleanup() {
56	if [ -n "$raid_pid" ] && ps -p $raid_pid > /dev/null; then
57		killprocess $raid_pid
58	fi
59
60	rm -rf "$tmp_dir"
61}
62
63function configure_raid_bdev() {
64	local raid_level=$1
65	rm -rf $testdir/rpcs.txt
66
67	cat <<- EOL >> $testdir/rpcs.txt
68		bdev_malloc_create 32 $base_blocklen $base_malloc_params -b Base_1
69		bdev_malloc_create 32 $base_blocklen $base_malloc_params -b Base_2
70		bdev_raid_create -z 64 -r $raid_level -b "Base_1 Base_2" -n raid
71	EOL
72	$rootdir/scripts/rpc.py < $testdir/rpcs.txt
73
74	rm -rf $testdir/rpcs.txt
75}
76
77function raid_function_test() {
78	local raid_level=$1
79	local nbd=/dev/nbd0
80	local raid_bdev
81
82	$rootdir/test/app/bdev_svc/bdev_svc -i 0 -L bdev_raid &
83	raid_pid=$!
84	echo "Process raid pid: $raid_pid"
85	waitforlisten $raid_pid
86
87	configure_raid_bdev $raid_level
88	raid_bdev=$($rpc_py bdev_raid_get_bdevs online | jq -r '.[0]["name"] | select(.)')
89	if [ $raid_bdev = "" ]; then
90		echo "No raid0 device in SPDK app"
91		return 1
92	fi
93
94	nbd_start_disks $DEFAULT_RPC_ADDR $raid_bdev $nbd
95	count=$(nbd_get_count $DEFAULT_RPC_ADDR)
96	if [ $count -ne 1 ]; then
97		return 1
98	fi
99
100	raid_unmap_data_verify $nbd
101
102	nbd_stop_disks $DEFAULT_RPC_ADDR $nbd
103	count=$(nbd_get_count $DEFAULT_RPC_ADDR)
104	if [ $count -ne 0 ]; then
105		return 1
106	fi
107
108	killprocess $raid_pid
109
110	return 0
111}
112
113function verify_raid_bdev_state() {
114	local raid_bdev_name=$1
115	local expected_state=$2
116	local raid_level=$3
117	local strip_size=$4
118	local num_base_bdevs_operational=$5
119	local raid_bdev_info
120	local num_base_bdevs
121	local num_base_bdevs_discovered
122	local tmp
123
124	raid_bdev_info=$($rpc_py bdev_raid_get_bdevs all | jq -r ".[] | select(.name == \"$raid_bdev_name\")")
125
126	xtrace_disable
127	if [ -z "$raid_bdev_info" ]; then
128		echo "No raid device \"$raid_bdev_name\" in SPDK app"
129		return 1
130	fi
131
132	raid_bdev_info=$($rpc_py bdev_raid_get_bdevs $expected_state | jq -r ".[] | select(.name == \"$raid_bdev_name\")")
133	if [ -z "$raid_bdev_info" ]; then
134		echo "$raid_bdev_name is not in $expected_state state"
135		return 1
136	fi
137
138	tmp=$(echo $raid_bdev_info | jq -r '.state')
139	if [ "$tmp" != $expected_state ]; then
140		echo "incorrect state: $tmp, expected: $expected_state"
141		return 1
142	fi
143
144	tmp=$(echo $raid_bdev_info | jq -r '.raid_level')
145	if [ "$tmp" != $raid_level ]; then
146		echo "incorrect level: $tmp, expected: $raid_level"
147		return 1
148	fi
149
150	tmp=$(echo $raid_bdev_info | jq -r '.strip_size_kb')
151	if [ "$tmp" != $strip_size ]; then
152		echo "incorrect strip size: $tmp, expected: $strip_size"
153		return 1
154	fi
155
156	num_base_bdevs=$(echo $raid_bdev_info | jq -r '[.base_bdevs_list[]] | length')
157	tmp=$(echo $raid_bdev_info | jq -r '.num_base_bdevs')
158	if [ "$num_base_bdevs" != "$tmp" ]; then
159		echo "incorrect num_base_bdevs: $tmp, expected: $num_base_bdevs"
160		return 1
161	fi
162
163	num_base_bdevs_discovered=$(echo $raid_bdev_info | jq -r '[.base_bdevs_list[] | select(.is_configured)] | length')
164	tmp=$(echo $raid_bdev_info | jq -r '.num_base_bdevs_discovered')
165	if [ "$num_base_bdevs_discovered" != "$tmp" ]; then
166		echo "incorrect num_base_bdevs_discovered: $tmp, expected: $num_base_bdevs_discovered"
167		return 1
168	fi
169
170	tmp=$(echo $raid_bdev_info | jq -r '.num_base_bdevs_operational')
171	if [ "$num_base_bdevs_operational" != "$tmp" ]; then
172		echo "incorrect num_base_bdevs_operational $tmp, expected: $num_base_bdevs_operational"
173		return 1
174	fi
175
176	xtrace_restore
177}
178
179function verify_raid_bdev_process() {
180	local raid_bdev_name=$1
181	local process_type=$2
182	local target=$3
183	local raid_bdev_info
184
185	raid_bdev_info=$($rpc_py bdev_raid_get_bdevs all | jq -r ".[] | select(.name == \"$raid_bdev_name\")")
186
187	[[ $(jq -r '.process.type // "none"' <<< "$raid_bdev_info") == "$process_type" ]]
188	[[ $(jq -r '.process.target // "none"' <<< "$raid_bdev_info") == "$target" ]]
189}
190
191function verify_raid_bdev_properties() {
192	local raid_bdev_name=$1
193	local raid_bdev_info
194	local base_bdev_info
195	local base_bdev_names
196	local name
197
198	raid_bdev_info=$($rpc_py bdev_get_bdevs -b $raid_bdev_name | jq '.[]')
199	base_bdev_names=$(jq -r '.driver_specific.raid.base_bdevs_list[] | select(.is_configured == true).name' <<< "$raid_bdev_info")
200
201	for name in $base_bdev_names; do
202		base_bdev_info=$($rpc_py bdev_get_bdevs -b $name | jq '.[]')
203		[[ $(jq '.block_size' <<< "$raid_bdev_info") == $(jq '.block_size' <<< "$base_bdev_info") ]]
204		[[ $(jq '.md_size' <<< "$raid_bdev_info") == $(jq '.md_size' <<< "$base_bdev_info") ]]
205		[[ $(jq '.md_interleave' <<< "$raid_bdev_info") == $(jq '.md_interleave' <<< "$base_bdev_info") ]]
206		[[ $(jq '.dif_type' <<< "$raid_bdev_info") == $(jq '.dif_type' <<< "$base_bdev_info") ]]
207	done
208}
209
210function has_redundancy() {
211	case $1 in
212		"raid1" | "raid5f") return 0 ;;
213		*) return 1 ;;
214	esac
215}
216
217function raid_state_function_test() {
218	local raid_level=$1
219	local num_base_bdevs=$2
220	local superblock=$3
221	local raid_bdev
222	local base_bdevs=($(for ((i = 1; i <= num_base_bdevs; i++)); do echo BaseBdev$i; done))
223	local raid_bdev_name="Existed_Raid"
224	local strip_size
225	local strip_size_create_arg
226	local superblock_create_arg
227
228	if [ $raid_level != "raid1" ]; then
229		strip_size=64
230		strip_size_create_arg="-z $strip_size"
231	else
232		strip_size=0
233	fi
234
235	if [ $superblock = true ]; then
236		superblock_create_arg="-s"
237	else
238		superblock_create_arg=""
239	fi
240
241	$rootdir/test/app/bdev_svc/bdev_svc -i 0 -L bdev_raid &
242	raid_pid=$!
243	echo "Process raid pid: $raid_pid"
244	waitforlisten $raid_pid
245
246	# Step1: create a RAID bdev with no base bdevs
247	# Expect state: CONFIGURING
248	$rpc_py bdev_raid_create $strip_size_create_arg $superblock_create_arg -r $raid_level -b "'${base_bdevs[*]}'" -n $raid_bdev_name
249	verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
250	$rpc_py bdev_raid_delete $raid_bdev_name
251
252	# Step2: create one base bdev and add to the RAID bdev
253	# Expect state: CONFIGURING
254	$rpc_py bdev_raid_create $strip_size_create_arg $superblock_create_arg -r $raid_level -b "'${base_bdevs[*]}'" -n $raid_bdev_name
255	$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b ${base_bdevs[0]}
256	waitforbdev ${base_bdevs[0]}
257	verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
258	$rpc_py bdev_raid_delete $raid_bdev_name
259
260	# Step3: create remaining base bdevs and add to the RAID bdev
261	# Expect state: ONLINE
262	$rpc_py bdev_raid_create $strip_size_create_arg $superblock_create_arg -r $raid_level -b "'${base_bdevs[*]}'" -n $raid_bdev_name
263	for ((i = 1; i < num_base_bdevs; i++)); do
264		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
265		$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b ${base_bdevs[$i]}
266		waitforbdev ${base_bdevs[$i]}
267	done
268	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs
269	verify_raid_bdev_properties $raid_bdev_name
270
271	# Step4: delete one base bdev from the RAID bdev
272	$rpc_py bdev_malloc_delete ${base_bdevs[0]}
273	local expected_state
274	if ! has_redundancy $raid_level; then
275		expected_state="offline"
276	else
277		expected_state="online"
278	fi
279	verify_raid_bdev_state $raid_bdev_name $expected_state $raid_level $strip_size $((num_base_bdevs - 1))
280
281	# Step5: delete remaining base bdevs from the RAID bdev
282	# Expect state: removed from system
283	for ((i = 1; i < num_base_bdevs; i++)); do
284		raid_bdev=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[0]["name"]')
285		if [ "$raid_bdev" != $raid_bdev_name ]; then
286			echo "$raid_bdev_name removed before all base bdevs were deleted"
287			return 1
288		fi
289		$rpc_py bdev_malloc_delete ${base_bdevs[$i]}
290	done
291	raid_bdev=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[0]["name"] | select(.)')
292	if [ -n "$raid_bdev" ]; then
293		echo "$raid_bdev_name is not removed"
294		return 1
295	fi
296
297	if [ $num_base_bdevs -gt 2 ]; then
298		# Test removing and re-adding base bdevs when in CONFIGURING state
299		for ((i = 1; i < num_base_bdevs; i++)); do
300			$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b ${base_bdevs[$i]}
301			waitforbdev ${base_bdevs[$i]}
302		done
303		$rpc_py bdev_raid_create $strip_size_create_arg $superblock_create_arg -r $raid_level -b "'${base_bdevs[*]}'" -n $raid_bdev_name
304		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
305
306		$rpc_py bdev_raid_remove_base_bdev ${base_bdevs[1]}
307		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
308		[[ $($rpc_py bdev_raid_get_bdevs all | jq '.[0].base_bdevs_list[1].is_configured') == "false" ]]
309
310		$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b ${base_bdevs[0]}
311		waitforbdev ${base_bdevs[0]}
312		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
313		[[ $($rpc_py bdev_raid_get_bdevs all | jq '.[0].base_bdevs_list[0].is_configured') == "true" ]]
314
315		$rpc_py bdev_raid_remove_base_bdev ${base_bdevs[2]}
316		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
317		[[ $($rpc_py bdev_raid_get_bdevs all | jq '.[0].base_bdevs_list[2].is_configured') == "false" ]]
318
319		$rpc_py bdev_raid_add_base_bdev $raid_bdev_name ${base_bdevs[2]}
320		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
321		[[ $($rpc_py bdev_raid_get_bdevs all | jq '.[0].base_bdevs_list[2].is_configured') == "true" ]]
322
323		$rpc_py bdev_malloc_delete ${base_bdevs[0]}
324		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
325		[[ $($rpc_py bdev_raid_get_bdevs all | jq '.[0].base_bdevs_list[0].is_configured') == "false" ]]
326
327		$rpc_py bdev_raid_add_base_bdev $raid_bdev_name ${base_bdevs[1]}
328		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
329		[[ $($rpc_py bdev_raid_get_bdevs all | jq '.[0].base_bdevs_list[1].is_configured') == "true" ]]
330
331		$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b NewBaseBdev -u "$($rpc_py bdev_raid_get_bdevs all | jq -r '.[0].base_bdevs_list[0].uuid')"
332		waitforbdev NewBaseBdev
333		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs
334		verify_raid_bdev_properties $raid_bdev_name
335
336		$rpc_py bdev_raid_delete $raid_bdev_name
337	fi
338
339	killprocess $raid_pid
340
341	return 0
342}
343
344function raid_resize_test() {
345	local raid_level=$1
346	local blksize=$base_blocklen
347	local bdev_size_mb=32
348	local new_bdev_size_mb=$((bdev_size_mb * 2))
349	local blkcnt
350	local raid_size_mb
351	local new_raid_size_mb
352	local expected_size
353
354	$rootdir/test/app/bdev_svc/bdev_svc -i 0 -L bdev_raid &
355	raid_pid=$!
356	echo "Process raid pid: $raid_pid"
357	waitforlisten $raid_pid
358
359	$rpc_py bdev_null_create Base_1 $bdev_size_mb $blksize
360	$rpc_py bdev_null_create Base_2 $bdev_size_mb $blksize
361
362	if [ $raid_level -eq 0 ]; then
363		$rpc_py bdev_raid_create -z 64 -r $raid_level -b "'Base_1 Base_2'" -n Raid
364	else
365		$rpc_py bdev_raid_create -r $raid_level -b "'Base_1 Base_2'" -n Raid
366	fi
367
368	# Resize Base_1 first.
369	$rpc_py bdev_null_resize Base_1 $new_bdev_size_mb
370
371	# The size of Raid should not be changed.
372	blkcnt=$($rpc_py bdev_get_bdevs -b Raid | jq '.[].num_blocks')
373	raid_size_mb=$((blkcnt * blksize / 1048576))
374	if [ $raid_level -eq 0 ]; then
375		expected_size=$((bdev_size_mb * 2))
376	else
377		expected_size=$bdev_size_mb
378	fi
379	if [ $raid_size_mb != $expected_size ]; then
380		echo "resize failed"
381		return 1
382	fi
383
384	# Resize Base_2 next.
385	$rpc_py bdev_null_resize Base_2 $new_bdev_size_mb
386
387	# The size of Raid should be updated to the expected value.
388	blkcnt=$($rpc_py bdev_get_bdevs -b Raid | jq '.[].num_blocks')
389	raid_size_mb=$((blkcnt * blksize / 1048576))
390	if [ $raid_level -eq 0 ]; then
391		expected_size=$((new_bdev_size_mb * 2))
392	else
393		expected_size=$new_bdev_size_mb
394	fi
395	if [ $raid_size_mb != $expected_size ]; then
396		echo "resize failed"
397		return 1
398	fi
399
400	killprocess $raid_pid
401
402	return 0
403}
404
405function raid_superblock_test() {
406	local raid_level=$1
407	local num_base_bdevs=$2
408	local base_bdevs_malloc=()
409	local base_bdevs_pt=()
410	local base_bdevs_pt_uuid=()
411	local raid_bdev_name="raid_bdev1"
412	local strip_size
413	local strip_size_create_arg
414	local raid_bdev_uuid
415	local raid_bdev
416
417	if [ $raid_level != "raid1" ]; then
418		strip_size=64
419		strip_size_create_arg="-z $strip_size"
420	else
421		strip_size=0
422	fi
423
424	"$rootdir/test/app/bdev_svc/bdev_svc" -L bdev_raid &
425	raid_pid=$!
426	waitforlisten $raid_pid
427
428	# Create base bdevs
429	for ((i = 1; i <= num_base_bdevs; i++)); do
430		local bdev_malloc="malloc$i"
431		local bdev_pt="pt$i"
432		local bdev_pt_uuid="00000000-0000-0000-0000-00000000000$i"
433
434		base_bdevs_malloc+=($bdev_malloc)
435		base_bdevs_pt+=($bdev_pt)
436		base_bdevs_pt_uuid+=($bdev_pt_uuid)
437
438		$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b $bdev_malloc
439		$rpc_py bdev_passthru_create -b $bdev_malloc -p $bdev_pt -u $bdev_pt_uuid
440	done
441
442	# Create RAID bdev with superblock
443	$rpc_py bdev_raid_create $strip_size_create_arg -r $raid_level -b "'${base_bdevs_pt[*]}'" -n $raid_bdev_name -s
444	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs
445	verify_raid_bdev_properties $raid_bdev_name
446
447	# Get RAID bdev's UUID
448	raid_bdev_uuid=$($rpc_py bdev_get_bdevs -b $raid_bdev_name | jq -r '.[] | .uuid')
449	if [ -z "$raid_bdev_uuid" ]; then
450		return 1
451	fi
452
453	# Stop the RAID bdev
454	$rpc_py bdev_raid_delete $raid_bdev_name
455	raid_bdev=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[]')
456	if [ -n "$raid_bdev" ]; then
457		return 1
458	fi
459
460	# Delete the passthru bdevs
461	for i in "${base_bdevs_pt[@]}"; do
462		$rpc_py bdev_passthru_delete $i
463	done
464	if [ "$($rpc_py bdev_get_bdevs | jq -r '[.[] | select(.product_name == "passthru")] | any')" == "true" ]; then
465		return 1
466	fi
467
468	# Try to create new RAID bdev from malloc bdevs
469	# Should fail due to superblock still present on base bdevs
470	NOT $rpc_py bdev_raid_create $strip_size_create_arg -r $raid_level -b "'${base_bdevs_malloc[*]}'" -n $raid_bdev_name
471
472	raid_bdev=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[]')
473	if [ -n "$raid_bdev" ]; then
474		return 1
475	fi
476
477	# Re-add first base bdev
478	$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[0]} -p ${base_bdevs_pt[0]} -u ${base_bdevs_pt_uuid[0]}
479
480	# Check if the RAID bdev was assembled from superblock
481	verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
482
483	if [ $num_base_bdevs -gt 2 ]; then
484		# Re-add the second base bdev and remove it again
485		$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[1]} -p ${base_bdevs_pt[1]} -u ${base_bdevs_pt_uuid[1]}
486		$rpc_py bdev_passthru_delete ${base_bdevs_pt[1]}
487		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
488	fi
489
490	# Re-add remaining base bdevs
491	for ((i = 1; i < num_base_bdevs; i++)); do
492		$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[$i]} -p ${base_bdevs_pt[$i]} -u ${base_bdevs_pt_uuid[$i]}
493	done
494
495	# Check if the RAID bdev is in online state
496	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs
497	verify_raid_bdev_properties $raid_bdev_name
498
499	# Check if the RAID bdev has the same UUID as when first created
500	if [ "$($rpc_py bdev_get_bdevs -b $raid_bdev_name | jq -r '.[] | .uuid')" != "$raid_bdev_uuid" ]; then
501		return 1
502	fi
503
504	if has_redundancy $raid_level; then
505		# Delete one base bdev
506		$rpc_py bdev_passthru_delete ${base_bdevs_pt[0]}
507
508		# Check if the RAID bdev is in online state (degraded)
509		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs - 1))
510
511		# Stop the RAID bdev
512		$rpc_py bdev_raid_delete $raid_bdev_name
513		raid_bdev=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[]')
514		if [ -n "$raid_bdev" ]; then
515			return 1
516		fi
517
518		# Delete remaining base bdevs
519		for ((i = 1; i < num_base_bdevs; i++)); do
520			$rpc_py bdev_passthru_delete ${base_bdevs_pt[$i]}
521		done
522
523		# Re-add base bdevs from the second up to (not including) the last one
524		for ((i = 1; i < num_base_bdevs - 1; i++)); do
525			$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[$i]} -p ${base_bdevs_pt[$i]} -u ${base_bdevs_pt_uuid[$i]}
526
527			# Check if the RAID bdev is in configuring state
528			verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $((num_base_bdevs - 1))
529		done
530
531		# Re-add the last base bdev
532		i=$((num_base_bdevs - 1))
533		$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[$i]} -p ${base_bdevs_pt[$i]} -u ${base_bdevs_pt_uuid[$i]}
534
535		# Check if the RAID bdev is in online state (degraded)
536		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs - 1))
537
538		# Stop the RAID bdev
539		$rpc_py bdev_raid_delete $raid_bdev_name
540		raid_bdev=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[]')
541		if [ -n "$raid_bdev" ]; then
542			return 1
543		fi
544
545		if [ $num_base_bdevs -gt 2 ]; then
546			# Delete the last base bdev
547			i=$((num_base_bdevs - 1))
548			$rpc_py bdev_passthru_delete ${base_bdevs_pt[$i]}
549		fi
550
551		# Re-add first base bdev
552		# This is the "failed" device and contains the "old" version of the superblock
553		$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[0]} -p ${base_bdevs_pt[0]} -u ${base_bdevs_pt_uuid[0]}
554
555		if [ $num_base_bdevs -gt 2 ]; then
556			# Check if the RAID bdev is in configuring state
557			# This should use the newer superblock version and have n-1 online base bdevs
558			verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $((num_base_bdevs - 1))
559			[[ $($rpc_py bdev_raid_get_bdevs configuring | jq -r '.[].base_bdevs_list[0].is_configured') == "false" ]]
560
561			# Re-add the last base bdev
562			$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[$i]} -p ${base_bdevs_pt[$i]} -u ${base_bdevs_pt_uuid[$i]}
563		fi
564
565		# Check if the RAID bdev is in online state (degraded)
566		# This should use the newer superblock version and have n-1 online base bdevs
567		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs - 1))
568		[[ $($rpc_py bdev_raid_get_bdevs online | jq -r '.[].base_bdevs_list[0].is_configured') == "false" ]]
569
570		# Check if the RAID bdev has the same UUID as when first created
571		if [ "$($rpc_py bdev_get_bdevs -b $raid_bdev_name | jq -r '.[] | .uuid')" != "$raid_bdev_uuid" ]; then
572			return 1
573		fi
574	fi
575
576	killprocess $raid_pid
577
578	return 0
579}
580
581function raid_rebuild_test() {
582	local raid_level=$1
583	local num_base_bdevs=$2
584	local superblock=$3
585	local background_io=$4
586	local verify=$5
587	local base_bdevs=($(for ((i = 1; i <= num_base_bdevs; i++)); do echo BaseBdev$i; done))
588	local raid_bdev_name="raid_bdev1"
589	local strip_size
590	local create_arg
591	local raid_bdev_size
592	local data_offset
593
594	if [ $raid_level != "raid1" ]; then
595		if [ $background_io = true ]; then
596			echo "skipping rebuild test with io for level $raid_level"
597			return 1
598		fi
599		strip_size=64
600		create_arg+=" -z $strip_size"
601	else
602		strip_size=0
603	fi
604
605	if [ $superblock = true ]; then
606		create_arg+=" -s"
607	fi
608
609	"$rootdir/build/examples/bdevperf" -T $raid_bdev_name -t 60 -w randrw -M 50 -o 3M -q 2 -U -z -L bdev_raid &
610	raid_pid=$!
611	waitforlisten $raid_pid
612
613	# Create base bdevs
614	for bdev in "${base_bdevs[@]}"; do
615		$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b ${bdev}_malloc
616		$rpc_py bdev_passthru_create -b ${bdev}_malloc -p $bdev
617	done
618
619	# Create spare bdev
620	$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b "spare_malloc"
621	$rpc_py bdev_delay_create -b "spare_malloc" -d "spare_delay" -r 0 -t 0 -w 100000 -n 100000
622	$rpc_py bdev_passthru_create -b "spare_delay" -p "spare"
623
624	# Create RAID bdev
625	$rpc_py bdev_raid_create $create_arg -r $raid_level -b "'${base_bdevs[*]}'" -n $raid_bdev_name
626	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs
627
628	# Get RAID bdev's size
629	raid_bdev_size=$($rpc_py bdev_get_bdevs -b $raid_bdev_name | jq -r '.[].num_blocks')
630
631	# Get base bdev's data offset
632	data_offset=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[].base_bdevs_list[0].data_offset')
633
634	if [ $background_io = true ]; then
635		# Start user I/O
636		"$rootdir/examples/bdev/bdevperf/bdevperf.py" perform_tests &
637	elif [ $verify = true ]; then
638		local write_unit_size
639
640		# Write random data to the RAID bdev
641		nbd_start_disks $DEFAULT_RPC_ADDR $raid_bdev_name /dev/nbd0
642		if [ $raid_level = "raid5f" ]; then
643			write_unit_size=$((strip_size * 2 * (num_base_bdevs - 1)))
644			echo $((base_blocklen * write_unit_size / 1024)) > /sys/block/nbd0/queue/max_sectors_kb
645		else
646			write_unit_size=1
647		fi
648		dd if=/dev/urandom of=/dev/nbd0 bs=$((base_blocklen * write_unit_size)) count=$((raid_bdev_size / write_unit_size)) oflag=direct
649		nbd_stop_disks $DEFAULT_RPC_ADDR /dev/nbd0
650	fi
651
652	# Remove one base bdev
653	$rpc_py bdev_raid_remove_base_bdev ${base_bdevs[0]}
654
655	# Check if the RAID bdev is in online state (degraded)
656	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs - 1))
657
658	# Add bdev for rebuild
659	$rpc_py bdev_raid_add_base_bdev $raid_bdev_name "spare"
660	sleep 1
661
662	# Check if rebuild started
663	verify_raid_bdev_process $raid_bdev_name "rebuild" "spare"
664
665	# Remove the rebuild target bdev
666	$rpc_py bdev_raid_remove_base_bdev "spare"
667
668	# Check if the RAID bdev is in online state (degraded)
669	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs - 1))
670
671	# Check if rebuild was stopped
672	verify_raid_bdev_process $raid_bdev_name "none" "none"
673
674	# Again, start the rebuild
675	$rpc_py bdev_raid_add_base_bdev $raid_bdev_name "spare"
676	sleep 1
677	verify_raid_bdev_process $raid_bdev_name "rebuild" "spare"
678
679	if [ $superblock = true ] && [ $with_io = false ]; then
680		# Stop the RAID bdev
681		$rpc_py bdev_raid_delete $raid_bdev_name
682		[[ $($rpc_py bdev_raid_get_bdevs all | jq 'length') == 0 ]]
683
684		# Remove the passthru base bdevs, then re-add them to assemble the raid bdev again
685		for ((i = 0; i < num_base_bdevs; i++)); do
686			$rpc_py bdev_passthru_delete ${base_bdevs[$i]}
687		done
688		for ((i = 0; i < num_base_bdevs; i++)); do
689			$rpc_py bdev_passthru_create -b ${base_bdevs[$i]}_malloc -p ${base_bdevs[$i]}
690		done
691
692		# Check if the RAID bdev is in online state (degraded)
693		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs - 1))
694
695		# Check if rebuild is not started
696		verify_raid_bdev_process $raid_bdev_name "none" "none"
697
698		# Again, start the rebuild
699		$rpc_py bdev_raid_add_base_bdev $raid_bdev_name "spare"
700		sleep 1
701		verify_raid_bdev_process $raid_bdev_name "rebuild" "spare"
702	fi
703
704	local num_base_bdevs_operational=$num_base_bdevs
705
706	if [ $raid_level = "raid1" ] && [ $num_base_bdevs -gt 2 ]; then
707		# Remove one more base bdev (not rebuild target)
708		$rpc_py bdev_raid_remove_base_bdev ${base_bdevs[1]}
709
710		# Ignore this bdev later when comparing data
711		base_bdevs[1]=""
712		((num_base_bdevs_operational--))
713
714		# Check if rebuild is still running
715		verify_raid_bdev_process $raid_bdev_name "rebuild" "spare"
716	fi
717
718	# Wait for rebuild to finish
719	local timeout=$((SECONDS + 30))
720	while ((SECONDS < timeout)); do
721		if ! verify_raid_bdev_process $raid_bdev_name "rebuild" "spare" > /dev/null; then
722			break
723		fi
724		sleep 1
725	done
726
727	# Check if rebuild is not running and the RAID bdev has the correct number of operational devices
728	verify_raid_bdev_process $raid_bdev_name "none" "none"
729	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs_operational
730
731	# Stop the RAID bdev
732	$rpc_py bdev_raid_delete $raid_bdev_name
733	[[ $($rpc_py bdev_raid_get_bdevs all | jq 'length') == 0 ]]
734
735	if [ $verify = true ]; then
736		if [ $background_io = true ]; then
737			# Compare data on the rebuilt and other base bdevs
738			nbd_start_disks $DEFAULT_RPC_ADDR "spare" "/dev/nbd0"
739			for bdev in "${base_bdevs[@]:1}"; do
740				if [ -z "$bdev" ]; then
741					continue
742				fi
743				nbd_start_disks $DEFAULT_RPC_ADDR $bdev "/dev/nbd1"
744				cmp -i $((data_offset * base_blocklen)) /dev/nbd0 /dev/nbd1
745				nbd_stop_disks $DEFAULT_RPC_ADDR "/dev/nbd1"
746			done
747			nbd_stop_disks $DEFAULT_RPC_ADDR "/dev/nbd0"
748		else
749			# Compare data on the removed and rebuilt base bdevs
750			nbd_start_disks $DEFAULT_RPC_ADDR "${base_bdevs[0]} spare" "/dev/nbd0 /dev/nbd1"
751			cmp -i $((data_offset * base_blocklen)) /dev/nbd0 /dev/nbd1
752			nbd_stop_disks $DEFAULT_RPC_ADDR "/dev/nbd0 /dev/nbd1"
753		fi
754	fi
755
756	if [ $superblock = true ]; then
757		# Remove then re-add a base bdev to assemble the raid bdev again
758		$rpc_py bdev_passthru_delete "spare"
759		$rpc_py bdev_passthru_create -b "spare_delay" -p "spare"
760		$rpc_py bdev_wait_for_examine
761
762		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs_operational
763		verify_raid_bdev_process $raid_bdev_name "none" "none"
764		[[ $($rpc_py bdev_raid_get_bdevs all | jq -r '.[].base_bdevs_list[0].name') == "spare" ]]
765
766		# Remove and re-add a base bdev - rebuild should start automatically
767		$rpc_py bdev_raid_remove_base_bdev "spare"
768		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs_operational - 1))
769		$rpc_py bdev_raid_add_base_bdev $raid_bdev_name "spare"
770		sleep 1
771		verify_raid_bdev_process $raid_bdev_name "rebuild" "spare"
772
773		# Same as above but re-add through examine
774		$rpc_py bdev_passthru_delete "spare"
775		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs_operational - 1))
776		$rpc_py bdev_passthru_create -b "spare_delay" -p "spare"
777		sleep 1
778		verify_raid_bdev_process $raid_bdev_name "rebuild" "spare"
779
780		# Stop the rebuild
781		$rpc_py bdev_passthru_delete "spare"
782		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs_operational - 1))
783		verify_raid_bdev_process $raid_bdev_name "none" "none"
784
785		# Re-adding a base bdev that was replaced (no longer is a member of the array) should not be allowed
786		$rpc_py bdev_passthru_delete ${base_bdevs[0]}
787		$rpc_py bdev_passthru_create -b ${base_bdevs[0]}_malloc -p ${base_bdevs[0]}
788		sleep 1
789		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs_operational - 1))
790		verify_raid_bdev_process $raid_bdev_name "none" "none"
791		NOT $rpc_py bdev_raid_add_base_bdev $raid_bdev_name ${base_bdevs[0]}
792		sleep 1
793		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs_operational - 1))
794		verify_raid_bdev_process $raid_bdev_name "none" "none"
795	fi
796
797	killprocess $raid_pid
798
799	return 0
800}
801
802function raid_io_error_test() {
803	local raid_level=$1
804	local num_base_bdevs=$2
805	local error_io_type=$3
806	local base_bdevs=($(for ((i = 1; i <= num_base_bdevs; i++)); do echo BaseBdev$i; done))
807	local raid_bdev_name="raid_bdev1"
808	local strip_size
809	local create_arg
810	local bdevperf_log
811	local fail_per_s
812
813	if [ $raid_level != "raid1" ]; then
814		strip_size=64
815		create_arg+=" -z $strip_size"
816	else
817		strip_size=0
818	fi
819
820	bdevperf_log=$(mktemp -p "$tmp_dir")
821
822	"$rootdir/build/examples/bdevperf" -T $raid_bdev_name -t 60 -w randrw -M 50 -o 128k -q 1 -z -f -L bdev_raid > $bdevperf_log &
823	raid_pid=$!
824	waitforlisten $raid_pid
825
826	# Create base bdevs
827	for bdev in "${base_bdevs[@]}"; do
828		$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b ${bdev}_malloc
829		$rpc_py bdev_error_create ${bdev}_malloc
830		$rpc_py bdev_passthru_create -b EE_${bdev}_malloc -p $bdev
831	done
832
833	# Create RAID bdev
834	$rpc_py bdev_raid_create $create_arg -r $raid_level -b "'${base_bdevs[*]}'" -n $raid_bdev_name -s
835	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs
836
837	# Start user I/O
838	"$rootdir/examples/bdev/bdevperf/bdevperf.py" perform_tests &
839	sleep 1
840
841	# Inject an error
842	$rpc_py bdev_error_inject_error EE_${base_bdevs[0]}_malloc $error_io_type failure
843
844	local expected_num_base_bdevs
845	if [[ $raid_level = "raid1" && $error_io_type = "write" ]]; then
846		expected_num_base_bdevs=$((num_base_bdevs - 1))
847	else
848		expected_num_base_bdevs=$num_base_bdevs
849	fi
850	verify_raid_bdev_state $raid_bdev_name online $raid_level $strip_size $expected_num_base_bdevs
851
852	$rpc_py bdev_raid_delete $raid_bdev_name
853
854	killprocess $raid_pid
855
856	# Check I/O failures reported by bdevperf
857	# RAID levels with redundancy should handle the errors and not show any failures
858	fail_per_s=$(grep -v Job $bdevperf_log | grep $raid_bdev_name | awk '{print $6}')
859	if has_redundancy $raid_level; then
860		[[ "$fail_per_s" = "0.00" ]]
861	else
862		[[ "$fail_per_s" != "0.00" ]]
863	fi
864}
865
866function raid_resize_superblock_test() {
867	local raid_level=$1
868
869	$rootdir/test/app/bdev_svc/bdev_svc -i 0 -L bdev_raid &
870	raid_pid=$!
871	echo "Process raid pid: $raid_pid"
872	waitforlisten $raid_pid
873
874	$rpc_py bdev_malloc_create -b malloc0 512 $base_blocklen
875
876	$rpc_py bdev_passthru_create -b malloc0 -p pt0
877	$rpc_py bdev_lvol_create_lvstore pt0 lvs0
878
879	$rpc_py bdev_lvol_create -l lvs0 lvol0 64
880	$rpc_py bdev_lvol_create -l lvs0 lvol1 64
881
882	case $raid_level in
883		0) $rpc_py bdev_raid_create -n Raid -r $raid_level -z 64 -b "'lvs0/lvol0 lvs0/lvol1'" -s ;;
884		1) $rpc_py bdev_raid_create -n Raid -r $raid_level -b "'lvs0/lvol0 lvs0/lvol1'" -s ;;
885	esac
886
887	# Check size of base bdevs first
888	(($(($($rpc_py bdev_get_bdevs -b lvs0/lvol0 | jq '.[].num_blocks') * 512 / 1048576)) == 64))
889	(($(($($rpc_py bdev_get_bdevs -b lvs0/lvol1 | jq '.[].num_blocks') * 512 / 1048576)) == 64))
890
891	# Check size of Raid bdev before resize
892	case $raid_level in
893		0) (($($rpc_py bdev_get_bdevs -b Raid | jq '.[].num_blocks') == 245760)) ;;
894		1) (($($rpc_py bdev_get_bdevs -b Raid | jq '.[].num_blocks') == 122880)) ;;
895	esac
896
897	# Resize bdevs
898	$rpc_py bdev_lvol_resize lvs0/lvol0 100
899	$rpc_py bdev_lvol_resize lvs0/lvol1 100
900
901	# Bdevs should be resized
902	(($(($($rpc_py bdev_get_bdevs -b lvs0/lvol0 | jq '.[].num_blocks') * 512 / 1048576)) == 100))
903	(($(($($rpc_py bdev_get_bdevs -b lvs0/lvol1 | jq '.[].num_blocks') * 512 / 1048576)) == 100))
904
905	# Same with Raid bdevs
906	case $raid_level in
907		0) (($($rpc_py bdev_get_bdevs -b Raid | jq '.[].num_blocks') == 393216)) ;;
908		1) (($($rpc_py bdev_get_bdevs -b Raid | jq '.[].num_blocks') == 196608)) ;;
909	esac
910
911	$rpc_py bdev_passthru_delete pt0
912	$rpc_py bdev_passthru_create -b malloc0 -p pt0
913
914	# After the passthru bdev is re-created, the RAID bdev should start from
915	# superblock and its size should be the same as after it was resized.
916	case $raid_level in
917		0) (($($rpc_py bdev_get_bdevs -b Raid | jq '.[].num_blocks') == 393216)) ;;
918		1) (($($rpc_py bdev_get_bdevs -b Raid | jq '.[].num_blocks') == 196608)) ;;
919	esac
920
921	killprocess $raid_pid
922
923	return 0
924}
925
926function raid_resize_data_offset_test() {
927
928	$rootdir/test/app/bdev_svc/bdev_svc -i 0 -L bdev_raid &
929	raid_pid=$!
930	echo "Process raid pid: $raid_pid"
931	waitforlisten $raid_pid
932
933	# Create three base bdevs with one null bdev to be replaced later
934	$rpc_py bdev_malloc_create -b malloc0 64 $base_blocklen -o 16
935	$rpc_py bdev_malloc_create -b malloc1 64 $base_blocklen -o 16
936	$rpc_py bdev_null_create null0 64 $base_blocklen
937
938	$rpc_py bdev_raid_create -n Raid -r 1 -b "'malloc0 malloc1 null0'" -s
939
940	# Check data_offset
941	(($($rpc_py bdev_raid_get_bdevs all | jq -r '.[].base_bdevs_list[2].data_offset') == 2048))
942
943	$rpc_py bdev_null_delete null0
944
945	# Now null bdev is replaced with malloc, and optimal_io_boundary is changed to force
946	# recalculation
947	$rpc_py bdev_malloc_create -b malloc2 512 $base_blocklen -o 30
948	$rpc_py bdev_raid_add_base_bdev Raid malloc2
949
950	# Data offset is updated
951	(($($rpc_py bdev_raid_get_bdevs all | jq -r '.[].base_bdevs_list[2].data_offset') == 2070))
952
953	killprocess $raid_pid
954
955	return 0
956}
957
958mkdir -p "$tmp_dir"
959trap 'cleanup; exit 1' EXIT
960
961base_blocklen=512
962
963run_test "raid1_resize_data_offset_test" raid_resize_data_offset_test
964
965run_test "raid0_resize_superblock_test" raid_resize_superblock_test 0
966run_test "raid1_resize_superblock_test" raid_resize_superblock_test 1
967
968if [ $(uname -s) = Linux ] && modprobe -n nbd; then
969	has_nbd=true
970	modprobe nbd
971	run_test "raid_function_test_raid0" raid_function_test raid0
972	run_test "raid_function_test_concat" raid_function_test concat
973fi
974
975run_test "raid0_resize_test" raid_resize_test 0
976run_test "raid1_resize_test" raid_resize_test 1
977
978for n in {2..4}; do
979	for level in raid0 concat raid1; do
980		run_test "raid_state_function_test" raid_state_function_test $level $n false
981		run_test "raid_state_function_test_sb" raid_state_function_test $level $n true
982		run_test "raid_superblock_test" raid_superblock_test $level $n
983		run_test "raid_read_error_test" raid_io_error_test $level $n read
984		run_test "raid_write_error_test" raid_io_error_test $level $n write
985	done
986done
987
988if [ "$has_nbd" = true ]; then
989	for n in 2 4; do
990		run_test "raid_rebuild_test" raid_rebuild_test raid1 $n false false true
991		run_test "raid_rebuild_test_sb" raid_rebuild_test raid1 $n true false true
992		run_test "raid_rebuild_test_io" raid_rebuild_test raid1 $n false true true
993		run_test "raid_rebuild_test_sb_io" raid_rebuild_test raid1 $n true true true
994	done
995fi
996
997for n in {3..4}; do
998	run_test "raid5f_state_function_test" raid_state_function_test raid5f $n false
999	run_test "raid5f_state_function_test_sb" raid_state_function_test raid5f $n true
1000	run_test "raid5f_superblock_test" raid_superblock_test raid5f $n
1001	if [ "$has_nbd" = true ]; then
1002		run_test "raid5f_rebuild_test" raid_rebuild_test raid5f $n false false true
1003		run_test "raid5f_rebuild_test_sb" raid_rebuild_test raid5f $n true false true
1004	fi
1005done
1006
1007base_blocklen=4096
1008
1009run_test "raid_state_function_test_sb_4k" raid_state_function_test raid1 2 true
1010run_test "raid_superblock_test_4k" raid_superblock_test raid1 2
1011if [ "$has_nbd" = true ]; then
1012	run_test "raid_rebuild_test_sb_4k" raid_rebuild_test raid1 2 true false true
1013fi
1014
1015base_malloc_params="-m 32"
1016run_test "raid_state_function_test_sb_md_separate" raid_state_function_test raid1 2 true
1017run_test "raid_superblock_test_md_separate" raid_superblock_test raid1 2
1018if [ "$has_nbd" = true ]; then
1019	run_test "raid_rebuild_test_sb_md_separate" raid_rebuild_test raid1 2 true false true
1020fi
1021
1022base_malloc_params="-m 32 -i"
1023run_test "raid_state_function_test_sb_md_interleaved" raid_state_function_test raid1 2 true
1024run_test "raid_superblock_test_md_interleaved" raid_superblock_test raid1 2
1025run_test "raid_rebuild_test_sb_md_interleaved" raid_rebuild_test raid1 2 true false false
1026
1027trap - EXIT
1028cleanup
1029