xref: /spdk/test/bdev/bdev_raid.sh (revision 8afdeef3becfe9409cc9e7372bd0bc10e8b7d46d)
1#!/usr/bin/env bash
2#  SPDX-License-Identifier: BSD-3-Clause
3#  Copyright (C) 2019 Intel Corporation
4#  All rights reserved.
5#
6testdir=$(readlink -f $(dirname $0))
7rootdir=$(readlink -f $testdir/../..)
8rpc_server=/var/tmp/spdk-raid.sock
9tmp_dir=$SPDK_TEST_STORAGE/raidtest
10tmp_file=$tmp_dir/raidrandtest
11
12source $rootdir/test/common/autotest_common.sh
13source $testdir/nbd_common.sh
14
15rpc_py="$rootdir/scripts/rpc.py -s $rpc_server"
16
17function raid_unmap_data_verify() {
18	if hash blkdiscard; then
19		local nbd=$1
20		local rpc_server=$2
21		local blksize
22		blksize=$(lsblk -o LOG-SEC $nbd | grep -v LOG-SEC | cut -d ' ' -f 5)
23		local rw_blk_num=4096
24		local rw_len=$((blksize * rw_blk_num))
25		local unmap_blk_offs=(0 1028 321)
26		local unmap_blk_nums=(128 2035 456)
27		local unmap_off
28		local unmap_len
29
30		# data write
31		dd if=/dev/urandom of=$tmp_file bs=$blksize count=$rw_blk_num
32		dd if=$tmp_file of=$nbd bs=$blksize count=$rw_blk_num oflag=direct
33		blockdev --flushbufs $nbd
34
35		# confirm random data is written correctly in raid0 device
36		cmp -b -n $rw_len $tmp_file $nbd
37
38		for ((i = 0; i < ${#unmap_blk_offs[@]}; i++)); do
39			unmap_off=$((blksize * ${unmap_blk_offs[$i]}))
40			unmap_len=$((blksize * ${unmap_blk_nums[$i]}))
41
42			# data unmap on tmp_file
43			dd if=/dev/zero of=$tmp_file bs=$blksize seek=${unmap_blk_offs[$i]} count=${unmap_blk_nums[$i]} conv=notrunc
44
45			# data unmap on raid bdev
46			blkdiscard -o $unmap_off -l $unmap_len $nbd
47			blockdev --flushbufs $nbd
48
49			# data verify after unmap
50			cmp -b -n $rw_len $tmp_file $nbd
51		done
52	fi
53
54	return 0
55}
56
57function cleanup() {
58	if [ -n "$raid_pid" ] && ps -p $raid_pid > /dev/null; then
59		killprocess $raid_pid
60	fi
61
62	rm -rf "$tmp_dir"
63}
64
65function configure_raid_bdev() {
66	local raid_level=$1
67	rm -rf $testdir/rpcs.txt
68
69	cat <<- EOL >> $testdir/rpcs.txt
70		bdev_malloc_create 32 $base_blocklen $base_malloc_params -b Base_1
71		bdev_malloc_create 32 $base_blocklen $base_malloc_params -b Base_2
72		bdev_raid_create -z 64 -r $raid_level -b "Base_1 Base_2" -n raid
73	EOL
74	$rpc_py < $testdir/rpcs.txt
75
76	rm -rf $testdir/rpcs.txt
77}
78
79function raid_function_test() {
80	local raid_level=$1
81	local nbd=/dev/nbd0
82	local raid_bdev
83
84	$rootdir/test/app/bdev_svc/bdev_svc -r $rpc_server -i 0 -L bdev_raid &
85	raid_pid=$!
86	echo "Process raid pid: $raid_pid"
87	waitforlisten $raid_pid $rpc_server
88
89	configure_raid_bdev $raid_level
90	raid_bdev=$($rpc_py bdev_raid_get_bdevs online | jq -r '.[0]["name"] | select(.)')
91	if [ $raid_bdev = "" ]; then
92		echo "No raid0 device in SPDK app"
93		return 1
94	fi
95
96	nbd_start_disks $rpc_server $raid_bdev $nbd
97	count=$(nbd_get_count $rpc_server)
98	if [ $count -ne 1 ]; then
99		return 1
100	fi
101
102	raid_unmap_data_verify $nbd $rpc_server
103
104	nbd_stop_disks $rpc_server $nbd
105	count=$(nbd_get_count $rpc_server)
106	if [ $count -ne 0 ]; then
107		return 1
108	fi
109
110	killprocess $raid_pid
111
112	return 0
113}
114
115function verify_raid_bdev_state() {
116	local raid_bdev_name=$1
117	local expected_state=$2
118	local raid_level=$3
119	local strip_size=$4
120	local num_base_bdevs_operational=$5
121	local raid_bdev_info
122	local num_base_bdevs
123	local num_base_bdevs_discovered
124	local tmp
125
126	raid_bdev_info=$($rpc_py bdev_raid_get_bdevs all | jq -r ".[] | select(.name == \"$raid_bdev_name\")")
127
128	xtrace_disable
129	if [ -z "$raid_bdev_info" ]; then
130		echo "No raid device \"$raid_bdev_name\" in SPDK app"
131		return 1
132	fi
133
134	raid_bdev_info=$($rpc_py bdev_raid_get_bdevs $expected_state | jq -r ".[] | select(.name == \"$raid_bdev_name\")")
135	if [ -z "$raid_bdev_info" ]; then
136		echo "$raid_bdev_name is not in $expected_state state"
137		return 1
138	fi
139
140	tmp=$(echo $raid_bdev_info | jq -r '.state')
141	if [ "$tmp" != $expected_state ]; then
142		echo "incorrect state: $tmp, expected: $expected_state"
143		return 1
144	fi
145
146	tmp=$(echo $raid_bdev_info | jq -r '.raid_level')
147	if [ "$tmp" != $raid_level ]; then
148		echo "incorrect level: $tmp, expected: $raid_level"
149		return 1
150	fi
151
152	tmp=$(echo $raid_bdev_info | jq -r '.strip_size_kb')
153	if [ "$tmp" != $strip_size ]; then
154		echo "incorrect strip size: $tmp, expected: $strip_size"
155		return 1
156	fi
157
158	num_base_bdevs=$(echo $raid_bdev_info | jq -r '[.base_bdevs_list[]] | length')
159	tmp=$(echo $raid_bdev_info | jq -r '.num_base_bdevs')
160	if [ "$num_base_bdevs" != "$tmp" ]; then
161		echo "incorrect num_base_bdevs: $tmp, expected: $num_base_bdevs"
162		return 1
163	fi
164
165	num_base_bdevs_discovered=$(echo $raid_bdev_info | jq -r '[.base_bdevs_list[] | select(.is_configured)] | length')
166	tmp=$(echo $raid_bdev_info | jq -r '.num_base_bdevs_discovered')
167	if [ "$num_base_bdevs_discovered" != "$tmp" ]; then
168		echo "incorrect num_base_bdevs_discovered: $tmp, expected: $num_base_bdevs_discovered"
169		return 1
170	fi
171
172	tmp=$(echo $raid_bdev_info | jq -r '.num_base_bdevs_operational')
173	if [ "$num_base_bdevs_operational" != "$tmp" ]; then
174		echo "incorrect num_base_bdevs_operational $tmp, expected: $num_base_bdevs_operational"
175		return 1
176	fi
177
178	xtrace_restore
179}
180
181function verify_raid_bdev_process() {
182	local raid_bdev_name=$1
183	local process_type=$2
184	local target=$3
185	local raid_bdev_info
186
187	raid_bdev_info=$($rpc_py bdev_raid_get_bdevs all | jq -r ".[] | select(.name == \"$raid_bdev_name\")")
188
189	[[ $(jq -r '.process.type // "none"' <<< "$raid_bdev_info") == "$process_type" ]]
190	[[ $(jq -r '.process.target // "none"' <<< "$raid_bdev_info") == "$target" ]]
191}
192
193function verify_raid_bdev_properties() {
194	local raid_bdev_name=$1
195	local raid_bdev_info
196	local base_bdev_info
197	local base_bdev_names
198	local name
199
200	raid_bdev_info=$($rpc_py bdev_get_bdevs -b $raid_bdev_name | jq '.[]')
201	base_bdev_names=$(jq -r '.driver_specific.raid.base_bdevs_list[] | select(.is_configured == true).name' <<< "$raid_bdev_info")
202
203	for name in $base_bdev_names; do
204		base_bdev_info=$($rpc_py bdev_get_bdevs -b $name | jq '.[]')
205		[[ $(jq '.block_size' <<< "$raid_bdev_info") == $(jq '.block_size' <<< "$base_bdev_info") ]]
206		[[ $(jq '.md_size' <<< "$raid_bdev_info") == $(jq '.md_size' <<< "$base_bdev_info") ]]
207		[[ $(jq '.md_interleave' <<< "$raid_bdev_info") == $(jq '.md_interleave' <<< "$base_bdev_info") ]]
208		[[ $(jq '.dif_type' <<< "$raid_bdev_info") == $(jq '.dif_type' <<< "$base_bdev_info") ]]
209	done
210}
211
212function has_redundancy() {
213	case $1 in
214		"raid1" | "raid5f") return 0 ;;
215		*) return 1 ;;
216	esac
217}
218
219function raid_state_function_test() {
220	local raid_level=$1
221	local num_base_bdevs=$2
222	local superblock=$3
223	local raid_bdev
224	local base_bdevs=($(for ((i = 1; i <= num_base_bdevs; i++)); do echo BaseBdev$i; done))
225	local raid_bdev_name="Existed_Raid"
226	local strip_size
227	local strip_size_create_arg
228	local superblock_create_arg
229
230	if [ $raid_level != "raid1" ]; then
231		strip_size=64
232		strip_size_create_arg="-z $strip_size"
233	else
234		strip_size=0
235	fi
236
237	if [ $superblock = true ]; then
238		superblock_create_arg="-s"
239	else
240		superblock_create_arg=""
241	fi
242
243	$rootdir/test/app/bdev_svc/bdev_svc -r $rpc_server -i 0 -L bdev_raid &
244	raid_pid=$!
245	echo "Process raid pid: $raid_pid"
246	waitforlisten $raid_pid $rpc_server
247
248	# Step1: create a RAID bdev with no base bdevs
249	# Expect state: CONFIGURING
250	$rpc_py bdev_raid_create $strip_size_create_arg $superblock_create_arg -r $raid_level -b "${base_bdevs[*]}" -n $raid_bdev_name
251	verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
252	$rpc_py bdev_raid_delete $raid_bdev_name
253
254	# Step2: create one base bdev and add to the RAID bdev
255	# Expect state: CONFIGURING
256	$rpc_py bdev_raid_create $strip_size_create_arg $superblock_create_arg -r $raid_level -b "${base_bdevs[*]}" -n $raid_bdev_name
257	$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b ${base_bdevs[0]}
258	waitforbdev ${base_bdevs[0]}
259	verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
260	$rpc_py bdev_raid_delete $raid_bdev_name
261
262	# Step3: create remaining base bdevs and add to the RAID bdev
263	# Expect state: ONLINE
264	$rpc_py bdev_raid_create $strip_size_create_arg $superblock_create_arg -r $raid_level -b "${base_bdevs[*]}" -n $raid_bdev_name
265	for ((i = 1; i < num_base_bdevs; i++)); do
266		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
267		$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b ${base_bdevs[$i]}
268		waitforbdev ${base_bdevs[$i]}
269	done
270	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs
271	verify_raid_bdev_properties $raid_bdev_name
272
273	# Step4: delete one base bdev from the RAID bdev
274	$rpc_py bdev_malloc_delete ${base_bdevs[0]}
275	local expected_state
276	if ! has_redundancy $raid_level; then
277		expected_state="offline"
278	else
279		expected_state="online"
280	fi
281	verify_raid_bdev_state $raid_bdev_name $expected_state $raid_level $strip_size $((num_base_bdevs - 1))
282
283	# Step5: delete remaining base bdevs from the RAID bdev
284	# Expect state: removed from system
285	for ((i = 1; i < num_base_bdevs; i++)); do
286		raid_bdev=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[0]["name"]')
287		if [ "$raid_bdev" != $raid_bdev_name ]; then
288			echo "$raid_bdev_name removed before all base bdevs were deleted"
289			return 1
290		fi
291		$rpc_py bdev_malloc_delete ${base_bdevs[$i]}
292	done
293	raid_bdev=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[0]["name"] | select(.)')
294	if [ -n "$raid_bdev" ]; then
295		echo "$raid_bdev_name is not removed"
296		return 1
297	fi
298
299	if [ $num_base_bdevs -gt 2 ]; then
300		# Test removing and re-adding base bdevs when in CONFIGURING state
301		for ((i = 1; i < num_base_bdevs; i++)); do
302			$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b ${base_bdevs[$i]}
303			waitforbdev ${base_bdevs[$i]}
304		done
305		$rpc_py bdev_raid_create $strip_size_create_arg $superblock_create_arg -r $raid_level -b "${base_bdevs[*]}" -n $raid_bdev_name
306		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
307
308		$rpc_py bdev_raid_remove_base_bdev ${base_bdevs[1]}
309		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
310		[[ $($rpc_py bdev_raid_get_bdevs all | jq '.[0].base_bdevs_list[1].is_configured') == "false" ]]
311
312		$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b ${base_bdevs[0]}
313		waitforbdev ${base_bdevs[0]}
314		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
315		[[ $($rpc_py bdev_raid_get_bdevs all | jq '.[0].base_bdevs_list[0].is_configured') == "true" ]]
316
317		$rpc_py bdev_raid_remove_base_bdev ${base_bdevs[2]}
318		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
319		[[ $($rpc_py bdev_raid_get_bdevs all | jq '.[0].base_bdevs_list[2].is_configured') == "false" ]]
320
321		$rpc_py bdev_raid_add_base_bdev $raid_bdev_name ${base_bdevs[2]}
322		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
323		[[ $($rpc_py bdev_raid_get_bdevs all | jq '.[0].base_bdevs_list[2].is_configured') == "true" ]]
324
325		$rpc_py bdev_malloc_delete ${base_bdevs[0]}
326		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
327		[[ $($rpc_py bdev_raid_get_bdevs all | jq '.[0].base_bdevs_list[0].is_configured') == "false" ]]
328
329		$rpc_py bdev_raid_add_base_bdev $raid_bdev_name ${base_bdevs[1]}
330		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
331		[[ $($rpc_py bdev_raid_get_bdevs all | jq '.[0].base_bdevs_list[1].is_configured') == "true" ]]
332
333		$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b NewBaseBdev -u "$($rpc_py bdev_raid_get_bdevs all | jq -r '.[0].base_bdevs_list[0].uuid')"
334		waitforbdev NewBaseBdev
335		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs
336		verify_raid_bdev_properties $raid_bdev_name
337
338		$rpc_py bdev_raid_delete $raid_bdev_name
339	fi
340
341	killprocess $raid_pid
342
343	return 0
344}
345
346function raid_resize_test() {
347	local raid_level=$1
348	local blksize=$base_blocklen
349	local bdev_size_mb=32
350	local new_bdev_size_mb=$((bdev_size_mb * 2))
351	local blkcnt
352	local raid_size_mb
353	local new_raid_size_mb
354	local expected_size
355
356	$rootdir/test/app/bdev_svc/bdev_svc -r $rpc_server -i 0 -L bdev_raid &
357	raid_pid=$!
358	echo "Process raid pid: $raid_pid"
359	waitforlisten $raid_pid $rpc_server
360
361	$rpc_py bdev_null_create Base_1 $bdev_size_mb $blksize
362	$rpc_py bdev_null_create Base_2 $bdev_size_mb $blksize
363
364	if [ $raid_level -eq 0 ]; then
365		$rpc_py bdev_raid_create -z 64 -r $raid_level -b "Base_1 Base_2" -n Raid
366	else
367		$rpc_py bdev_raid_create -r $raid_level -b "Base_1 Base_2" -n Raid
368	fi
369
370	# Resize Base_1 first.
371	$rpc_py bdev_null_resize Base_1 $new_bdev_size_mb
372
373	# The size of Raid should not be changed.
374	blkcnt=$($rpc_py bdev_get_bdevs -b Raid | jq '.[].num_blocks')
375	raid_size_mb=$((blkcnt * blksize / 1048576))
376	if [ $raid_level -eq 0 ]; then
377		expected_size=$((bdev_size_mb * 2))
378	else
379		expected_size=$bdev_size_mb
380	fi
381	if [ $raid_size_mb != $expected_size ]; then
382		echo "resize failed"
383		return 1
384	fi
385
386	# Resize Base_2 next.
387	$rpc_py bdev_null_resize Base_2 $new_bdev_size_mb
388
389	# The size of Raid should be updated to the expected value.
390	blkcnt=$($rpc_py bdev_get_bdevs -b Raid | jq '.[].num_blocks')
391	raid_size_mb=$((blkcnt * blksize / 1048576))
392	if [ $raid_level -eq 0 ]; then
393		expected_size=$((new_bdev_size_mb * 2))
394	else
395		expected_size=$new_bdev_size_mb
396	fi
397	if [ $raid_size_mb != $expected_size ]; then
398		echo "resize failed"
399		return 1
400	fi
401
402	killprocess $raid_pid
403
404	return 0
405}
406
407function raid_superblock_test() {
408	local raid_level=$1
409	local num_base_bdevs=$2
410	local base_bdevs_malloc=()
411	local base_bdevs_pt=()
412	local base_bdevs_pt_uuid=()
413	local raid_bdev_name="raid_bdev1"
414	local strip_size
415	local strip_size_create_arg
416	local raid_bdev_uuid
417	local raid_bdev
418
419	if [ $raid_level != "raid1" ]; then
420		strip_size=64
421		strip_size_create_arg="-z $strip_size"
422	else
423		strip_size=0
424	fi
425
426	"$rootdir/test/app/bdev_svc/bdev_svc" -r $rpc_server -L bdev_raid &
427	raid_pid=$!
428	waitforlisten $raid_pid $rpc_server
429
430	# Create base bdevs
431	for ((i = 1; i <= num_base_bdevs; i++)); do
432		local bdev_malloc="malloc$i"
433		local bdev_pt="pt$i"
434		local bdev_pt_uuid="00000000-0000-0000-0000-00000000000$i"
435
436		base_bdevs_malloc+=($bdev_malloc)
437		base_bdevs_pt+=($bdev_pt)
438		base_bdevs_pt_uuid+=($bdev_pt_uuid)
439
440		$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b $bdev_malloc
441		$rpc_py bdev_passthru_create -b $bdev_malloc -p $bdev_pt -u $bdev_pt_uuid
442	done
443
444	# Create RAID bdev with superblock
445	$rpc_py bdev_raid_create $strip_size_create_arg -r $raid_level -b "${base_bdevs_pt[*]}" -n $raid_bdev_name -s
446	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs
447	verify_raid_bdev_properties $raid_bdev_name
448
449	# Get RAID bdev's UUID
450	raid_bdev_uuid=$($rpc_py bdev_get_bdevs -b $raid_bdev_name | jq -r '.[] | .uuid')
451	if [ -z "$raid_bdev_uuid" ]; then
452		return 1
453	fi
454
455	# Stop the RAID bdev
456	$rpc_py bdev_raid_delete $raid_bdev_name
457	raid_bdev=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[]')
458	if [ -n "$raid_bdev" ]; then
459		return 1
460	fi
461
462	# Delete the passthru bdevs
463	for i in "${base_bdevs_pt[@]}"; do
464		$rpc_py bdev_passthru_delete $i
465	done
466	if [ "$($rpc_py bdev_get_bdevs | jq -r '[.[] | select(.product_name == "passthru")] | any')" == "true" ]; then
467		return 1
468	fi
469
470	# Try to create new RAID bdev from malloc bdevs
471	# Should fail due to superblock still present on base bdevs
472	NOT $rpc_py bdev_raid_create $strip_size_create_arg -r $raid_level -b "${base_bdevs_malloc[*]}" -n $raid_bdev_name
473
474	raid_bdev=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[]')
475	if [ -n "$raid_bdev" ]; then
476		return 1
477	fi
478
479	# Re-add first base bdev
480	$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[0]} -p ${base_bdevs_pt[0]} -u ${base_bdevs_pt_uuid[0]}
481
482	# Check if the RAID bdev was assembled from superblock
483	verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
484
485	if [ $num_base_bdevs -gt 2 ]; then
486		# Re-add the second base bdev and remove it again
487		$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[1]} -p ${base_bdevs_pt[1]} -u ${base_bdevs_pt_uuid[1]}
488		$rpc_py bdev_passthru_delete ${base_bdevs_pt[1]}
489		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
490	fi
491
492	# Re-add remaining base bdevs
493	for ((i = 1; i < num_base_bdevs; i++)); do
494		$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[$i]} -p ${base_bdevs_pt[$i]} -u ${base_bdevs_pt_uuid[$i]}
495	done
496
497	# Check if the RAID bdev is in online state
498	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs
499	verify_raid_bdev_properties $raid_bdev_name
500
501	# Check if the RAID bdev has the same UUID as when first created
502	if [ "$($rpc_py bdev_get_bdevs -b $raid_bdev_name | jq -r '.[] | .uuid')" != "$raid_bdev_uuid" ]; then
503		return 1
504	fi
505
506	if has_redundancy $raid_level; then
507		# Delete one base bdev
508		$rpc_py bdev_passthru_delete ${base_bdevs_pt[0]}
509
510		# Check if the RAID bdev is in online state (degraded)
511		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs - 1))
512
513		# Stop the RAID bdev
514		$rpc_py bdev_raid_delete $raid_bdev_name
515		raid_bdev=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[]')
516		if [ -n "$raid_bdev" ]; then
517			return 1
518		fi
519
520		# Delete remaining base bdevs
521		for ((i = 1; i < num_base_bdevs; i++)); do
522			$rpc_py bdev_passthru_delete ${base_bdevs_pt[$i]}
523		done
524
525		# Re-add base bdevs from the second up to (not including) the last one
526		for ((i = 1; i < num_base_bdevs - 1; i++)); do
527			$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[$i]} -p ${base_bdevs_pt[$i]} -u ${base_bdevs_pt_uuid[$i]}
528
529			# Check if the RAID bdev is in configuring state
530			verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $((num_base_bdevs - 1))
531		done
532
533		# Re-add the last base bdev
534		i=$((num_base_bdevs - 1))
535		$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[$i]} -p ${base_bdevs_pt[$i]} -u ${base_bdevs_pt_uuid[$i]}
536
537		# Check if the RAID bdev is in online state (degraded)
538		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs - 1))
539
540		# Stop the RAID bdev
541		$rpc_py bdev_raid_delete $raid_bdev_name
542		raid_bdev=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[]')
543		if [ -n "$raid_bdev" ]; then
544			return 1
545		fi
546
547		if [ $num_base_bdevs -gt 2 ]; then
548			# Delete the last base bdev
549			i=$((num_base_bdevs - 1))
550			$rpc_py bdev_passthru_delete ${base_bdevs_pt[$i]}
551		fi
552
553		# Re-add first base bdev
554		# This is the "failed" device and contains the "old" version of the superblock
555		$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[0]} -p ${base_bdevs_pt[0]} -u ${base_bdevs_pt_uuid[0]}
556
557		if [ $num_base_bdevs -gt 2 ]; then
558			# Check if the RAID bdev is in configuring state
559			# This should use the newer superblock version and have n-1 online base bdevs
560			verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $((num_base_bdevs - 1))
561			[[ $($rpc_py bdev_raid_get_bdevs configuring | jq -r '.[].base_bdevs_list[0].is_configured') == "false" ]]
562
563			# Re-add the last base bdev
564			$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[$i]} -p ${base_bdevs_pt[$i]} -u ${base_bdevs_pt_uuid[$i]}
565		fi
566
567		# Check if the RAID bdev is in online state (degraded)
568		# This should use the newer superblock version and have n-1 online base bdevs
569		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs - 1))
570		[[ $($rpc_py bdev_raid_get_bdevs online | jq -r '.[].base_bdevs_list[0].is_configured') == "false" ]]
571
572		# Check if the RAID bdev has the same UUID as when first created
573		if [ "$($rpc_py bdev_get_bdevs -b $raid_bdev_name | jq -r '.[] | .uuid')" != "$raid_bdev_uuid" ]; then
574			return 1
575		fi
576	fi
577
578	killprocess $raid_pid
579
580	return 0
581}
582
583function raid_rebuild_test() {
584	local raid_level=$1
585	local num_base_bdevs=$2
586	local superblock=$3
587	local background_io=$4
588	local verify=$5
589	local base_bdevs=($(for ((i = 1; i <= num_base_bdevs; i++)); do echo BaseBdev$i; done))
590	local raid_bdev_name="raid_bdev1"
591	local strip_size
592	local create_arg
593	local raid_bdev_size
594	local data_offset
595
596	if [ $raid_level != "raid1" ]; then
597		if [ $background_io = true ]; then
598			echo "skipping rebuild test with io for level $raid_level"
599			return 1
600		fi
601		strip_size=64
602		create_arg+=" -z $strip_size"
603	else
604		strip_size=0
605	fi
606
607	if [ $superblock = true ]; then
608		create_arg+=" -s"
609	fi
610
611	"$rootdir/build/examples/bdevperf" -r $rpc_server -T $raid_bdev_name -t 60 -w randrw -M 50 -o 3M -q 2 -U -z -L bdev_raid &
612	raid_pid=$!
613	waitforlisten $raid_pid $rpc_server
614
615	# Create base bdevs
616	for bdev in "${base_bdevs[@]}"; do
617		$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b ${bdev}_malloc
618		$rpc_py bdev_passthru_create -b ${bdev}_malloc -p $bdev
619	done
620
621	# Create spare bdev
622	$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b "spare_malloc"
623	$rpc_py bdev_delay_create -b "spare_malloc" -d "spare_delay" -r 0 -t 0 -w 100000 -n 100000
624	$rpc_py bdev_passthru_create -b "spare_delay" -p "spare"
625
626	# Create RAID bdev
627	$rpc_py bdev_raid_create $create_arg -r $raid_level -b "${base_bdevs[*]}" -n $raid_bdev_name
628	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs
629
630	# Get RAID bdev's size
631	raid_bdev_size=$($rpc_py bdev_get_bdevs -b $raid_bdev_name | jq -r '.[].num_blocks')
632
633	# Get base bdev's data offset
634	data_offset=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[].base_bdevs_list[0].data_offset')
635
636	if [ $background_io = true ]; then
637		# Start user I/O
638		"$rootdir/examples/bdev/bdevperf/bdevperf.py" -s $rpc_server perform_tests &
639	elif [ $verify = true ]; then
640		local write_unit_size
641
642		# Write random data to the RAID bdev
643		nbd_start_disks $rpc_server $raid_bdev_name /dev/nbd0
644		if [ $raid_level = "raid5f" ]; then
645			write_unit_size=$((strip_size * 2 * (num_base_bdevs - 1)))
646			echo $((base_blocklen * write_unit_size / 1024)) > /sys/block/nbd0/queue/max_sectors_kb
647		else
648			write_unit_size=1
649		fi
650		dd if=/dev/urandom of=/dev/nbd0 bs=$((base_blocklen * write_unit_size)) count=$((raid_bdev_size / write_unit_size)) oflag=direct
651		nbd_stop_disks $rpc_server /dev/nbd0
652	fi
653
654	# Remove one base bdev
655	$rpc_py bdev_raid_remove_base_bdev ${base_bdevs[0]}
656
657	# Check if the RAID bdev is in online state (degraded)
658	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs - 1))
659
660	# Add bdev for rebuild
661	$rpc_py bdev_raid_add_base_bdev $raid_bdev_name "spare"
662	sleep 1
663
664	# Check if rebuild started
665	verify_raid_bdev_process $raid_bdev_name "rebuild" "spare"
666
667	# Remove the rebuild target bdev
668	$rpc_py bdev_raid_remove_base_bdev "spare"
669
670	# Check if the RAID bdev is in online state (degraded)
671	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs - 1))
672
673	# Check if rebuild was stopped
674	verify_raid_bdev_process $raid_bdev_name "none" "none"
675
676	# Again, start the rebuild
677	$rpc_py bdev_raid_add_base_bdev $raid_bdev_name "spare"
678	sleep 1
679	verify_raid_bdev_process $raid_bdev_name "rebuild" "spare"
680
681	if [ $superblock = true ] && [ $with_io = false ]; then
682		# Stop the RAID bdev
683		$rpc_py bdev_raid_delete $raid_bdev_name
684		[[ $($rpc_py bdev_raid_get_bdevs all | jq 'length') == 0 ]]
685
686		# Remove the passthru base bdevs, then re-add them to assemble the raid bdev again
687		for ((i = 0; i < num_base_bdevs; i++)); do
688			$rpc_py bdev_passthru_delete ${base_bdevs[$i]}
689		done
690		for ((i = 0; i < num_base_bdevs; i++)); do
691			$rpc_py bdev_passthru_create -b ${base_bdevs[$i]}_malloc -p ${base_bdevs[$i]}
692		done
693
694		# Check if the RAID bdev is in online state (degraded)
695		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs - 1))
696
697		# Check if rebuild is not started
698		verify_raid_bdev_process $raid_bdev_name "none" "none"
699
700		# Again, start the rebuild
701		$rpc_py bdev_raid_add_base_bdev $raid_bdev_name "spare"
702		sleep 1
703		verify_raid_bdev_process $raid_bdev_name "rebuild" "spare"
704	fi
705
706	local num_base_bdevs_operational=$num_base_bdevs
707
708	if [ $raid_level = "raid1" ] && [ $num_base_bdevs -gt 2 ]; then
709		# Remove one more base bdev (not rebuild target)
710		$rpc_py bdev_raid_remove_base_bdev ${base_bdevs[1]}
711
712		# Ignore this bdev later when comparing data
713		base_bdevs[1]=""
714		((num_base_bdevs_operational--))
715
716		# Check if rebuild is still running
717		verify_raid_bdev_process $raid_bdev_name "rebuild" "spare"
718	fi
719
720	# Wait for rebuild to finish
721	local timeout=$((SECONDS + 30))
722	while ((SECONDS < timeout)); do
723		if ! verify_raid_bdev_process $raid_bdev_name "rebuild" "spare" > /dev/null; then
724			break
725		fi
726		sleep 1
727	done
728
729	# Check if rebuild is not running and the RAID bdev has the correct number of operational devices
730	verify_raid_bdev_process $raid_bdev_name "none" "none"
731	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs_operational
732
733	# Stop the RAID bdev
734	$rpc_py bdev_raid_delete $raid_bdev_name
735	[[ $($rpc_py bdev_raid_get_bdevs all | jq 'length') == 0 ]]
736
737	if [ $verify = true ]; then
738		if [ $background_io = true ]; then
739			# Compare data on the rebuilt and other base bdevs
740			nbd_start_disks $rpc_server "spare" "/dev/nbd0"
741			for bdev in "${base_bdevs[@]:1}"; do
742				if [ -z "$bdev" ]; then
743					continue
744				fi
745				nbd_start_disks $rpc_server $bdev "/dev/nbd1"
746				cmp -i $((data_offset * base_blocklen)) /dev/nbd0 /dev/nbd1
747				nbd_stop_disks $rpc_server "/dev/nbd1"
748			done
749			nbd_stop_disks $rpc_server "/dev/nbd0"
750		else
751			# Compare data on the removed and rebuilt base bdevs
752			nbd_start_disks $rpc_server "${base_bdevs[0]} spare" "/dev/nbd0 /dev/nbd1"
753			cmp -i $((data_offset * base_blocklen)) /dev/nbd0 /dev/nbd1
754			nbd_stop_disks $rpc_server "/dev/nbd0 /dev/nbd1"
755		fi
756	fi
757
758	if [ $superblock = true ]; then
759		# Remove then re-add a base bdev to assemble the raid bdev again
760		$rpc_py bdev_passthru_delete "spare"
761		$rpc_py bdev_passthru_create -b "spare_delay" -p "spare"
762
763		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs_operational
764		verify_raid_bdev_process $raid_bdev_name "none" "none"
765		[[ $($rpc_py bdev_raid_get_bdevs all | jq -r '.[].base_bdevs_list[0].name') == "spare" ]]
766
767		# Remove and re-add a base bdev - rebuild should start automatically
768		$rpc_py bdev_raid_remove_base_bdev "spare"
769		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs_operational - 1))
770		$rpc_py bdev_raid_add_base_bdev $raid_bdev_name "spare"
771		sleep 1
772		verify_raid_bdev_process $raid_bdev_name "rebuild" "spare"
773
774		# Same as above but re-add through examine
775		$rpc_py bdev_passthru_delete "spare"
776		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs_operational - 1))
777		$rpc_py bdev_passthru_create -b "spare_delay" -p "spare"
778		sleep 1
779		verify_raid_bdev_process $raid_bdev_name "rebuild" "spare"
780
781		# Stop the rebuild
782		$rpc_py bdev_passthru_delete "spare"
783		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs_operational - 1))
784		verify_raid_bdev_process $raid_bdev_name "none" "none"
785
786		# Re-adding a base bdev that was replaced (no longer is a member of the array) should not be allowed
787		$rpc_py bdev_passthru_delete ${base_bdevs[0]}
788		$rpc_py bdev_passthru_create -b ${base_bdevs[0]}_malloc -p ${base_bdevs[0]}
789		sleep 1
790		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs_operational - 1))
791		verify_raid_bdev_process $raid_bdev_name "none" "none"
792		NOT $rpc_py bdev_raid_add_base_bdev $raid_bdev_name ${base_bdevs[0]}
793		sleep 1
794		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs_operational - 1))
795		verify_raid_bdev_process $raid_bdev_name "none" "none"
796	fi
797
798	killprocess $raid_pid
799
800	return 0
801}
802
803function raid_io_error_test() {
804	local raid_level=$1
805	local num_base_bdevs=$2
806	local error_io_type=$3
807	local base_bdevs=($(for ((i = 1; i <= num_base_bdevs; i++)); do echo BaseBdev$i; done))
808	local raid_bdev_name="raid_bdev1"
809	local strip_size
810	local create_arg
811	local bdevperf_log
812	local fail_per_s
813
814	if [ $raid_level != "raid1" ]; then
815		strip_size=64
816		create_arg+=" -z $strip_size"
817	else
818		strip_size=0
819	fi
820
821	bdevperf_log=$(mktemp -p "$tmp_dir")
822
823	"$rootdir/build/examples/bdevperf" -r $rpc_server -T $raid_bdev_name -t 60 -w randrw -M 50 -o 128k -q 1 -z -f -L bdev_raid > $bdevperf_log &
824	raid_pid=$!
825	waitforlisten $raid_pid $rpc_server
826
827	# Create base bdevs
828	for bdev in "${base_bdevs[@]}"; do
829		$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b ${bdev}_malloc
830		$rpc_py bdev_error_create ${bdev}_malloc
831		$rpc_py bdev_passthru_create -b EE_${bdev}_malloc -p $bdev
832	done
833
834	# Create RAID bdev
835	$rpc_py bdev_raid_create $create_arg -r $raid_level -b "${base_bdevs[*]}" -n $raid_bdev_name -s
836	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs
837
838	# Start user I/O
839	"$rootdir/examples/bdev/bdevperf/bdevperf.py" -s $rpc_server perform_tests &
840	sleep 1
841
842	# Inject an error
843	$rpc_py bdev_error_inject_error EE_${base_bdevs[0]}_malloc $error_io_type failure
844
845	local expected_num_base_bdevs
846	if [[ $raid_level = "raid1" && $error_io_type = "write" ]]; then
847		expected_num_base_bdevs=$((num_base_bdevs - 1))
848	else
849		expected_num_base_bdevs=$num_base_bdevs
850	fi
851	verify_raid_bdev_state $raid_bdev_name online $raid_level $strip_size $expected_num_base_bdevs
852
853	$rpc_py bdev_raid_delete $raid_bdev_name
854
855	killprocess $raid_pid
856
857	# Check I/O failures reported by bdevperf
858	# RAID levels with redundancy should handle the errors and not show any failures
859	fail_per_s=$(grep -v Job $bdevperf_log | grep $raid_bdev_name | awk '{print $6}')
860	if has_redundancy $raid_level; then
861		[[ "$fail_per_s" = "0.00" ]]
862	else
863		[[ "$fail_per_s" != "0.00" ]]
864	fi
865}
866
867function raid_resize_superblock_test() {
868	local raid_level=$1
869
870	$rootdir/test/app/bdev_svc/bdev_svc -r $rpc_server -i 0 -L bdev_raid &
871	raid_pid=$!
872	echo "Process raid pid: $raid_pid"
873	waitforlisten $raid_pid $rpc_server
874
875	$rpc_py bdev_malloc_create -b malloc0 512 $base_blocklen
876
877	$rpc_py bdev_passthru_create -b malloc0 -p pt0
878	$rpc_py bdev_lvol_create_lvstore pt0 lvs0
879
880	$rpc_py bdev_lvol_create -l lvs0 lvol0 64
881	$rpc_py bdev_lvol_create -l lvs0 lvol1 64
882
883	case $raid_level in
884		0) $rpc_py bdev_raid_create -n Raid -r $raid_level -z 64 -b "lvs0/lvol0 lvs0/lvol1" -s ;;
885		1) $rpc_py bdev_raid_create -n Raid -r $raid_level -b "lvs0/lvol0 lvs0/lvol1" -s ;;
886	esac
887
888	# Check size of base bdevs first
889	(($(($($rpc_py bdev_get_bdevs -b lvs0/lvol0 | jq '.[].num_blocks') * 512 / 1048576)) == 64))
890	(($(($($rpc_py bdev_get_bdevs -b lvs0/lvol1 | jq '.[].num_blocks') * 512 / 1048576)) == 64))
891
892	# Check size of Raid bdev before resize
893	case $raid_level in
894		0) (($($rpc_py bdev_get_bdevs -b Raid | jq '.[].num_blocks') == 245760)) ;;
895		1) (($($rpc_py bdev_get_bdevs -b Raid | jq '.[].num_blocks') == 122880)) ;;
896	esac
897
898	# Resize bdevs
899	$rpc_py bdev_lvol_resize lvs0/lvol0 100
900	$rpc_py bdev_lvol_resize lvs0/lvol1 100
901
902	# Bdevs should be resized
903	(($(($($rpc_py bdev_get_bdevs -b lvs0/lvol0 | jq '.[].num_blocks') * 512 / 1048576)) == 100))
904	(($(($($rpc_py bdev_get_bdevs -b lvs0/lvol1 | jq '.[].num_blocks') * 512 / 1048576)) == 100))
905
906	# Same with Raid bdevs
907	case $raid_level in
908		0) (($($rpc_py bdev_get_bdevs -b Raid | jq '.[].num_blocks') == 393216)) ;;
909		1) (($($rpc_py bdev_get_bdevs -b Raid | jq '.[].num_blocks') == 196608)) ;;
910	esac
911
912	$rpc_py bdev_passthru_delete pt0
913	$rpc_py bdev_passthru_create -b malloc0 -p pt0
914
915	# After the passthru bdev is re-created, the RAID bdev should start from
916	# superblock and its size should be the same as after it was resized.
917	case $raid_level in
918		0) (($($rpc_py bdev_get_bdevs -b Raid | jq '.[].num_blocks') == 393216)) ;;
919		1) (($($rpc_py bdev_get_bdevs -b Raid | jq '.[].num_blocks') == 196608)) ;;
920	esac
921
922	killprocess $raid_pid
923
924	return 0
925}
926
927mkdir -p "$tmp_dir"
928trap 'cleanup; exit 1' EXIT
929
930base_blocklen=512
931
932run_test "raid0_resize_superblock_test" raid_resize_superblock_test 0
933run_test "raid1_resize_superblock_test" raid_resize_superblock_test 1
934
935if [ $(uname -s) = Linux ] && modprobe -n nbd; then
936	has_nbd=true
937	modprobe nbd
938	run_test "raid_function_test_raid0" raid_function_test raid0
939	run_test "raid_function_test_concat" raid_function_test concat
940fi
941
942run_test "raid0_resize_test" raid_resize_test 0
943run_test "raid1_resize_test" raid_resize_test 1
944
945for n in {2..4}; do
946	for level in raid0 concat raid1; do
947		run_test "raid_state_function_test" raid_state_function_test $level $n false
948		run_test "raid_state_function_test_sb" raid_state_function_test $level $n true
949		run_test "raid_superblock_test" raid_superblock_test $level $n
950		run_test "raid_read_error_test" raid_io_error_test $level $n read
951		run_test "raid_write_error_test" raid_io_error_test $level $n write
952	done
953done
954
955if [ "$has_nbd" = true ]; then
956	for n in 2 4; do
957		run_test "raid_rebuild_test" raid_rebuild_test raid1 $n false false true
958		run_test "raid_rebuild_test_sb" raid_rebuild_test raid1 $n true false true
959		run_test "raid_rebuild_test_io" raid_rebuild_test raid1 $n false true true
960		run_test "raid_rebuild_test_sb_io" raid_rebuild_test raid1 $n true true true
961	done
962fi
963
964for n in {3..4}; do
965	run_test "raid5f_state_function_test" raid_state_function_test raid5f $n false
966	run_test "raid5f_state_function_test_sb" raid_state_function_test raid5f $n true
967	run_test "raid5f_superblock_test" raid_superblock_test raid5f $n
968	if [ "$has_nbd" = true ]; then
969		run_test "raid5f_rebuild_test" raid_rebuild_test raid5f $n false false true
970		run_test "raid5f_rebuild_test_sb" raid_rebuild_test raid5f $n true false true
971	fi
972done
973
974base_blocklen=4096
975
976run_test "raid_state_function_test_sb_4k" raid_state_function_test raid1 2 true
977run_test "raid_superblock_test_4k" raid_superblock_test raid1 2
978if [ "$has_nbd" = true ]; then
979	run_test "raid_rebuild_test_sb_4k" raid_rebuild_test raid1 2 true false true
980fi
981
982base_malloc_params="-m 32"
983run_test "raid_state_function_test_sb_md_separate" raid_state_function_test raid1 2 true
984run_test "raid_superblock_test_md_separate" raid_superblock_test raid1 2
985if [ "$has_nbd" = true ]; then
986	run_test "raid_rebuild_test_sb_md_separate" raid_rebuild_test raid1 2 true false true
987fi
988
989base_malloc_params="-m 32 -i"
990run_test "raid_state_function_test_sb_md_interleaved" raid_state_function_test raid1 2 true
991run_test "raid_superblock_test_md_interleaved" raid_superblock_test raid1 2
992run_test "raid_rebuild_test_sb_md_interleaved" raid_rebuild_test raid1 2 true false false
993
994trap - EXIT
995cleanup
996