xref: /spdk/test/bdev/bdev_raid.sh (revision 1e148debea22f8054d51cbda549a778bc8da1d78)
1#!/usr/bin/env bash
2#  SPDX-License-Identifier: BSD-3-Clause
3#  Copyright (C) 2019 Intel Corporation
4#  All rights reserved.
5#
6testdir=$(readlink -f $(dirname $0))
7rootdir=$(readlink -f $testdir/../..)
8tmp_dir=$SPDK_TEST_STORAGE/raidtest
9tmp_file=$tmp_dir/raidrandtest
10
11source $rootdir/test/common/autotest_common.sh
12source $testdir/nbd_common.sh
13
14rpc_py=rpc_cmd
15
16function raid_unmap_data_verify() {
17	if hash blkdiscard; then
18		local nbd=$1
19		local blksize
20		blksize=$(lsblk -o LOG-SEC $nbd | grep -v LOG-SEC | cut -d ' ' -f 5)
21		local rw_blk_num=4096
22		local rw_len=$((blksize * rw_blk_num))
23		local unmap_blk_offs=(0 1028 321)
24		local unmap_blk_nums=(128 2035 456)
25		local unmap_off
26		local unmap_len
27
28		# data write
29		dd if=/dev/urandom of=$tmp_file bs=$blksize count=$rw_blk_num
30		dd if=$tmp_file of=$nbd bs=$blksize count=$rw_blk_num oflag=direct
31		blockdev --flushbufs $nbd
32
33		# confirm random data is written correctly in raid0 device
34		cmp -b -n $rw_len $tmp_file $nbd
35
36		for ((i = 0; i < ${#unmap_blk_offs[@]}; i++)); do
37			unmap_off=$((blksize * ${unmap_blk_offs[$i]}))
38			unmap_len=$((blksize * ${unmap_blk_nums[$i]}))
39
40			# data unmap on tmp_file
41			dd if=/dev/zero of=$tmp_file bs=$blksize seek=${unmap_blk_offs[$i]} count=${unmap_blk_nums[$i]} conv=notrunc
42
43			# data unmap on raid bdev
44			blkdiscard -o $unmap_off -l $unmap_len $nbd
45			blockdev --flushbufs $nbd
46
47			# data verify after unmap
48			cmp -b -n $rw_len $tmp_file $nbd
49		done
50	fi
51
52	return 0
53}
54
55function cleanup() {
56	if [ -n "$raid_pid" ] && ps -p $raid_pid > /dev/null; then
57		killprocess $raid_pid
58	fi
59
60	rm -rf "$tmp_dir"
61}
62
63function raid_function_test() {
64	local raid_level=$1
65	local nbd=/dev/nbd0
66	local raid_bdev
67
68	$rootdir/test/app/bdev_svc/bdev_svc -i 0 -L bdev_raid &
69	raid_pid=$!
70	echo "Process raid pid: $raid_pid"
71	waitforlisten $raid_pid
72
73	$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b Base_1
74	$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b Base_2
75	$rpc_py bdev_raid_create -z 64 -r $raid_level -b "'Base_1 Base_2'" -n raid
76
77	raid_bdev=$($rpc_py bdev_raid_get_bdevs online | jq -r '.[0]["name"] | select(.)')
78	if [ $raid_bdev = "" ]; then
79		echo "No raid0 device in SPDK app"
80		return 1
81	fi
82
83	nbd_start_disks $DEFAULT_RPC_ADDR $raid_bdev $nbd
84	count=$(nbd_get_count $DEFAULT_RPC_ADDR)
85	if [ $count -ne 1 ]; then
86		return 1
87	fi
88
89	raid_unmap_data_verify $nbd
90
91	nbd_stop_disks $DEFAULT_RPC_ADDR $nbd
92	count=$(nbd_get_count $DEFAULT_RPC_ADDR)
93	if [ $count -ne 0 ]; then
94		return 1
95	fi
96
97	killprocess $raid_pid
98
99	return 0
100}
101
102function verify_raid_bdev_state() {
103	local raid_bdev_name=$1
104	local expected_state=$2
105	local raid_level=$3
106	local strip_size=$4
107	local num_base_bdevs_operational=$5
108	local raid_bdev_info
109	local num_base_bdevs
110	local num_base_bdevs_discovered
111	local tmp
112
113	raid_bdev_info=$($rpc_py bdev_raid_get_bdevs all | jq -r ".[] | select(.name == \"$raid_bdev_name\")")
114
115	xtrace_disable
116	if [ -z "$raid_bdev_info" ]; then
117		echo "No raid device \"$raid_bdev_name\" in SPDK app"
118		return 1
119	fi
120
121	raid_bdev_info=$($rpc_py bdev_raid_get_bdevs $expected_state | jq -r ".[] | select(.name == \"$raid_bdev_name\")")
122	if [ -z "$raid_bdev_info" ]; then
123		echo "$raid_bdev_name is not in $expected_state state"
124		return 1
125	fi
126
127	tmp=$(echo $raid_bdev_info | jq -r '.state')
128	if [ "$tmp" != $expected_state ]; then
129		echo "incorrect state: $tmp, expected: $expected_state"
130		return 1
131	fi
132
133	tmp=$(echo $raid_bdev_info | jq -r '.raid_level')
134	if [ "$tmp" != $raid_level ]; then
135		echo "incorrect level: $tmp, expected: $raid_level"
136		return 1
137	fi
138
139	tmp=$(echo $raid_bdev_info | jq -r '.strip_size_kb')
140	if [ "$tmp" != $strip_size ]; then
141		echo "incorrect strip size: $tmp, expected: $strip_size"
142		return 1
143	fi
144
145	num_base_bdevs=$(echo $raid_bdev_info | jq -r '[.base_bdevs_list[]] | length')
146	tmp=$(echo $raid_bdev_info | jq -r '.num_base_bdevs')
147	if [ "$num_base_bdevs" != "$tmp" ]; then
148		echo "incorrect num_base_bdevs: $tmp, expected: $num_base_bdevs"
149		return 1
150	fi
151
152	num_base_bdevs_discovered=$(echo $raid_bdev_info | jq -r '[.base_bdevs_list[] | select(.is_configured)] | length')
153	tmp=$(echo $raid_bdev_info | jq -r '.num_base_bdevs_discovered')
154	if [ "$num_base_bdevs_discovered" != "$tmp" ]; then
155		echo "incorrect num_base_bdevs_discovered: $tmp, expected: $num_base_bdevs_discovered"
156		return 1
157	fi
158
159	tmp=$(echo $raid_bdev_info | jq -r '.num_base_bdevs_operational')
160	if [ "$num_base_bdevs_operational" != "$tmp" ]; then
161		echo "incorrect num_base_bdevs_operational $tmp, expected: $num_base_bdevs_operational"
162		return 1
163	fi
164
165	xtrace_restore
166}
167
168function verify_raid_bdev_process() {
169	local raid_bdev_name=$1
170	local process_type=$2
171	local target=$3
172	local raid_bdev_info
173
174	raid_bdev_info=$($rpc_py bdev_raid_get_bdevs all | jq -r ".[] | select(.name == \"$raid_bdev_name\")")
175
176	[[ $(jq -r '.process.type // "none"' <<< "$raid_bdev_info") == "$process_type" ]]
177	[[ $(jq -r '.process.target // "none"' <<< "$raid_bdev_info") == "$target" ]]
178}
179
180function verify_raid_bdev_properties() {
181	local raid_bdev_name=$1
182	local raid_bdev_info
183	local base_bdev_names
184	local name
185	local cmp_raid_bdev cmp_base_bdev
186
187	raid_bdev_info=$($rpc_py bdev_get_bdevs -b $raid_bdev_name | jq '.[]')
188	base_bdev_names=$(jq -r '.driver_specific.raid.base_bdevs_list[] | select(.is_configured == true).name' <<< "$raid_bdev_info")
189	cmp_raid_bdev=$(jq -r '[.block_size, .md_size, .md_interleave, .dif_type] | join(" ")' <<< "$raid_bdev_info")
190
191	for name in $base_bdev_names; do
192		cmp_base_bdev=$($rpc_py bdev_get_bdevs -b $name | jq -r '.[] | [.block_size, .md_size, .md_interleave, .dif_type] | join(" ")')
193		[[ "$cmp_raid_bdev" == "$cmp_base_bdev" ]]
194	done
195}
196
197function has_redundancy() {
198	case $1 in
199		"raid1" | "raid5f") return 0 ;;
200		*) return 1 ;;
201	esac
202}
203
204function raid_state_function_test() {
205	local raid_level=$1
206	local num_base_bdevs=$2
207	local superblock=$3
208	local raid_bdev
209	local base_bdevs=($(for ((i = 1; i <= num_base_bdevs; i++)); do echo BaseBdev$i; done))
210	local raid_bdev_name="Existed_Raid"
211	local strip_size
212	local strip_size_create_arg
213	local superblock_create_arg
214
215	if [ $raid_level != "raid1" ]; then
216		strip_size=64
217		strip_size_create_arg="-z $strip_size"
218	else
219		strip_size=0
220	fi
221
222	if [ $superblock = true ]; then
223		superblock_create_arg="-s"
224	else
225		superblock_create_arg=""
226	fi
227
228	$rootdir/test/app/bdev_svc/bdev_svc -i 0 -L bdev_raid &
229	raid_pid=$!
230	echo "Process raid pid: $raid_pid"
231	waitforlisten $raid_pid
232
233	# Step1: create a RAID bdev with no base bdevs
234	# Expect state: CONFIGURING
235	$rpc_py bdev_raid_create $strip_size_create_arg $superblock_create_arg -r $raid_level -b "'${base_bdevs[*]}'" -n $raid_bdev_name
236	verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
237	$rpc_py bdev_raid_delete $raid_bdev_name
238
239	# Step2: create one base bdev and add to the RAID bdev
240	# Expect state: CONFIGURING
241	$rpc_py bdev_raid_create $strip_size_create_arg $superblock_create_arg -r $raid_level -b "'${base_bdevs[*]}'" -n $raid_bdev_name
242	$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b ${base_bdevs[0]}
243	waitforbdev ${base_bdevs[0]}
244	verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
245	$rpc_py bdev_raid_delete $raid_bdev_name
246
247	# Step3: create remaining base bdevs and add to the RAID bdev
248	# Expect state: ONLINE
249	$rpc_py bdev_raid_create $strip_size_create_arg $superblock_create_arg -r $raid_level -b "'${base_bdevs[*]}'" -n $raid_bdev_name
250	for ((i = 1; i < num_base_bdevs; i++)); do
251		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
252		$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b ${base_bdevs[$i]}
253		waitforbdev ${base_bdevs[$i]}
254	done
255	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs
256	verify_raid_bdev_properties $raid_bdev_name
257
258	# Step4: delete one base bdev from the RAID bdev
259	$rpc_py bdev_malloc_delete ${base_bdevs[0]}
260	local expected_state
261	if ! has_redundancy $raid_level; then
262		expected_state="offline"
263	else
264		expected_state="online"
265	fi
266	verify_raid_bdev_state $raid_bdev_name $expected_state $raid_level $strip_size $((num_base_bdevs - 1))
267
268	# Step5: delete remaining base bdevs from the RAID bdev
269	# Expect state: removed from system
270	for ((i = 1; i < num_base_bdevs; i++)); do
271		raid_bdev=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[0]["name"]')
272		if [ "$raid_bdev" != $raid_bdev_name ]; then
273			echo "$raid_bdev_name removed before all base bdevs were deleted"
274			return 1
275		fi
276		$rpc_py bdev_malloc_delete ${base_bdevs[$i]}
277	done
278	raid_bdev=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[0]["name"] | select(.)')
279	if [ -n "$raid_bdev" ]; then
280		echo "$raid_bdev_name is not removed"
281		return 1
282	fi
283
284	if [ $num_base_bdevs -gt 2 ]; then
285		# Test removing and re-adding base bdevs when in CONFIGURING state
286		for ((i = 1; i < num_base_bdevs; i++)); do
287			$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b ${base_bdevs[$i]}
288			waitforbdev ${base_bdevs[$i]}
289		done
290		$rpc_py bdev_raid_create $strip_size_create_arg $superblock_create_arg -r $raid_level -b "'${base_bdevs[*]}'" -n $raid_bdev_name
291		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
292
293		$rpc_py bdev_raid_remove_base_bdev ${base_bdevs[1]}
294		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
295		[[ $($rpc_py bdev_raid_get_bdevs all | jq '.[0].base_bdevs_list[1].is_configured') == "false" ]]
296
297		$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b ${base_bdevs[0]}
298		waitforbdev ${base_bdevs[0]}
299		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
300		[[ $($rpc_py bdev_raid_get_bdevs all | jq '.[0].base_bdevs_list[0].is_configured') == "true" ]]
301
302		$rpc_py bdev_raid_remove_base_bdev ${base_bdevs[2]}
303		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
304		[[ $($rpc_py bdev_raid_get_bdevs all | jq '.[0].base_bdevs_list[2].is_configured') == "false" ]]
305
306		$rpc_py bdev_raid_add_base_bdev $raid_bdev_name ${base_bdevs[2]}
307		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
308		[[ $($rpc_py bdev_raid_get_bdevs all | jq '.[0].base_bdevs_list[2].is_configured') == "true" ]]
309
310		$rpc_py bdev_malloc_delete ${base_bdevs[0]}
311		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
312		[[ $($rpc_py bdev_raid_get_bdevs all | jq '.[0].base_bdevs_list[0].is_configured') == "false" ]]
313
314		$rpc_py bdev_raid_add_base_bdev $raid_bdev_name ${base_bdevs[1]}
315		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
316		[[ $($rpc_py bdev_raid_get_bdevs all | jq '.[0].base_bdevs_list[1].is_configured') == "true" ]]
317
318		$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b NewBaseBdev -u "$($rpc_py bdev_raid_get_bdevs all | jq -r '.[0].base_bdevs_list[0].uuid')"
319		waitforbdev NewBaseBdev
320		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs
321		verify_raid_bdev_properties $raid_bdev_name
322
323		$rpc_py bdev_raid_delete $raid_bdev_name
324	fi
325
326	killprocess $raid_pid
327
328	return 0
329}
330
331function raid_resize_test() {
332	local raid_level=$1
333	local blksize=$base_blocklen
334	local bdev_size_mb=32
335	local new_bdev_size_mb=$((bdev_size_mb * 2))
336	local blkcnt
337	local raid_size_mb
338	local new_raid_size_mb
339	local expected_size
340
341	$rootdir/test/app/bdev_svc/bdev_svc -i 0 -L bdev_raid &
342	raid_pid=$!
343	echo "Process raid pid: $raid_pid"
344	waitforlisten $raid_pid
345
346	$rpc_py bdev_null_create Base_1 $bdev_size_mb $blksize
347	$rpc_py bdev_null_create Base_2 $bdev_size_mb $blksize
348
349	if [ $raid_level -eq 0 ]; then
350		$rpc_py bdev_raid_create -z 64 -r $raid_level -b "'Base_1 Base_2'" -n Raid
351	else
352		$rpc_py bdev_raid_create -r $raid_level -b "'Base_1 Base_2'" -n Raid
353	fi
354
355	# Resize Base_1 first.
356	$rpc_py bdev_null_resize Base_1 $new_bdev_size_mb
357
358	# The size of Raid should not be changed.
359	blkcnt=$($rpc_py bdev_get_bdevs -b Raid | jq '.[].num_blocks')
360	raid_size_mb=$((blkcnt * blksize / 1048576))
361	if [ $raid_level -eq 0 ]; then
362		expected_size=$((bdev_size_mb * 2))
363	else
364		expected_size=$bdev_size_mb
365	fi
366	if [ $raid_size_mb != $expected_size ]; then
367		echo "resize failed"
368		return 1
369	fi
370
371	# Resize Base_2 next.
372	$rpc_py bdev_null_resize Base_2 $new_bdev_size_mb
373
374	# The size of Raid should be updated to the expected value.
375	blkcnt=$($rpc_py bdev_get_bdevs -b Raid | jq '.[].num_blocks')
376	raid_size_mb=$((blkcnt * blksize / 1048576))
377	if [ $raid_level -eq 0 ]; then
378		expected_size=$((new_bdev_size_mb * 2))
379	else
380		expected_size=$new_bdev_size_mb
381	fi
382	if [ $raid_size_mb != $expected_size ]; then
383		echo "resize failed"
384		return 1
385	fi
386
387	killprocess $raid_pid
388
389	return 0
390}
391
392function raid_superblock_test() {
393	local raid_level=$1
394	local num_base_bdevs=$2
395	local base_bdevs_malloc=()
396	local base_bdevs_pt=()
397	local base_bdevs_pt_uuid=()
398	local raid_bdev_name="raid_bdev1"
399	local strip_size
400	local strip_size_create_arg
401	local raid_bdev_uuid
402	local raid_bdev
403
404	if [ $raid_level != "raid1" ]; then
405		strip_size=64
406		strip_size_create_arg="-z $strip_size"
407	else
408		strip_size=0
409	fi
410
411	"$rootdir/test/app/bdev_svc/bdev_svc" -L bdev_raid &
412	raid_pid=$!
413	waitforlisten $raid_pid
414
415	# Create base bdevs
416	for ((i = 1; i <= num_base_bdevs; i++)); do
417		local bdev_malloc="malloc$i"
418		local bdev_pt="pt$i"
419		local bdev_pt_uuid="00000000-0000-0000-0000-00000000000$i"
420
421		base_bdevs_malloc+=($bdev_malloc)
422		base_bdevs_pt+=($bdev_pt)
423		base_bdevs_pt_uuid+=($bdev_pt_uuid)
424
425		$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b $bdev_malloc
426		$rpc_py bdev_passthru_create -b $bdev_malloc -p $bdev_pt -u $bdev_pt_uuid
427	done
428
429	# Create RAID bdev with superblock
430	$rpc_py bdev_raid_create $strip_size_create_arg -r $raid_level -b "'${base_bdevs_pt[*]}'" -n $raid_bdev_name -s
431	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs
432	verify_raid_bdev_properties $raid_bdev_name
433
434	# Get RAID bdev's UUID
435	raid_bdev_uuid=$($rpc_py bdev_get_bdevs -b $raid_bdev_name | jq -r '.[] | .uuid')
436	if [ -z "$raid_bdev_uuid" ]; then
437		return 1
438	fi
439
440	# Stop the RAID bdev
441	$rpc_py bdev_raid_delete $raid_bdev_name
442	raid_bdev=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[]')
443	if [ -n "$raid_bdev" ]; then
444		return 1
445	fi
446
447	# Delete the passthru bdevs
448	for i in "${base_bdevs_pt[@]}"; do
449		$rpc_py bdev_passthru_delete $i
450	done
451	if [ "$($rpc_py bdev_get_bdevs | jq -r '[.[] | select(.product_name == "passthru")] | any')" == "true" ]; then
452		return 1
453	fi
454
455	# Try to create new RAID bdev from malloc bdevs
456	# Should fail due to superblock still present on base bdevs
457	NOT $rpc_py bdev_raid_create $strip_size_create_arg -r $raid_level -b "'${base_bdevs_malloc[*]}'" -n $raid_bdev_name
458
459	raid_bdev=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[]')
460	if [ -n "$raid_bdev" ]; then
461		return 1
462	fi
463
464	# Re-add first base bdev
465	$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[0]} -p ${base_bdevs_pt[0]} -u ${base_bdevs_pt_uuid[0]}
466
467	# Check if the RAID bdev was assembled from superblock
468	verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
469
470	if [ $num_base_bdevs -gt 2 ]; then
471		# Re-add the second base bdev and remove it again
472		$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[1]} -p ${base_bdevs_pt[1]} -u ${base_bdevs_pt_uuid[1]}
473		$rpc_py bdev_passthru_delete ${base_bdevs_pt[1]}
474		verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs
475	fi
476
477	# Re-add remaining base bdevs
478	for ((i = 1; i < num_base_bdevs; i++)); do
479		$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[$i]} -p ${base_bdevs_pt[$i]} -u ${base_bdevs_pt_uuid[$i]}
480	done
481
482	# Check if the RAID bdev is in online state
483	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs
484	verify_raid_bdev_properties $raid_bdev_name
485
486	# Check if the RAID bdev has the same UUID as when first created
487	if [ "$($rpc_py bdev_get_bdevs -b $raid_bdev_name | jq -r '.[] | .uuid')" != "$raid_bdev_uuid" ]; then
488		return 1
489	fi
490
491	if has_redundancy $raid_level; then
492		# Delete one base bdev
493		$rpc_py bdev_passthru_delete ${base_bdevs_pt[0]}
494
495		# Check if the RAID bdev is in online state (degraded)
496		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs - 1))
497
498		# Stop the RAID bdev
499		$rpc_py bdev_raid_delete $raid_bdev_name
500		raid_bdev=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[]')
501		if [ -n "$raid_bdev" ]; then
502			return 1
503		fi
504
505		# Delete remaining base bdevs
506		for ((i = 1; i < num_base_bdevs; i++)); do
507			$rpc_py bdev_passthru_delete ${base_bdevs_pt[$i]}
508		done
509
510		# Re-add base bdevs from the second up to (not including) the last one
511		for ((i = 1; i < num_base_bdevs - 1; i++)); do
512			$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[$i]} -p ${base_bdevs_pt[$i]} -u ${base_bdevs_pt_uuid[$i]}
513
514			# Check if the RAID bdev is in configuring state
515			verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $((num_base_bdevs - 1))
516		done
517
518		# Re-add the last base bdev
519		i=$((num_base_bdevs - 1))
520		$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[$i]} -p ${base_bdevs_pt[$i]} -u ${base_bdevs_pt_uuid[$i]}
521
522		# Check if the RAID bdev is in online state (degraded)
523		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs - 1))
524
525		# Stop the RAID bdev
526		$rpc_py bdev_raid_delete $raid_bdev_name
527		raid_bdev=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[]')
528		if [ -n "$raid_bdev" ]; then
529			return 1
530		fi
531
532		if [ $num_base_bdevs -gt 2 ]; then
533			# Delete the last base bdev
534			i=$((num_base_bdevs - 1))
535			$rpc_py bdev_passthru_delete ${base_bdevs_pt[$i]}
536		fi
537
538		# Re-add first base bdev
539		# This is the "failed" device and contains the "old" version of the superblock
540		$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[0]} -p ${base_bdevs_pt[0]} -u ${base_bdevs_pt_uuid[0]}
541
542		if [ $num_base_bdevs -gt 2 ]; then
543			# Check if the RAID bdev is in configuring state
544			# This should use the newer superblock version and have n-1 online base bdevs
545			verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $((num_base_bdevs - 1))
546			[[ $($rpc_py bdev_raid_get_bdevs configuring | jq -r '.[].base_bdevs_list[0].is_configured') == "false" ]]
547
548			# Re-add the last base bdev
549			$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[$i]} -p ${base_bdevs_pt[$i]} -u ${base_bdevs_pt_uuid[$i]}
550		fi
551
552		# Check if the RAID bdev is in online state (degraded)
553		# This should use the newer superblock version and have n-1 online base bdevs
554		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs - 1))
555		[[ $($rpc_py bdev_raid_get_bdevs online | jq -r '.[].base_bdevs_list[0].is_configured') == "false" ]]
556
557		# Check if the RAID bdev has the same UUID as when first created
558		if [ "$($rpc_py bdev_get_bdevs -b $raid_bdev_name | jq -r '.[] | .uuid')" != "$raid_bdev_uuid" ]; then
559			return 1
560		fi
561	fi
562
563	killprocess $raid_pid
564
565	return 0
566}
567
568function raid_rebuild_test() {
569	local raid_level=$1
570	local num_base_bdevs=$2
571	local superblock=$3
572	local background_io=$4
573	local verify=$5
574	local base_bdevs=($(for ((i = 1; i <= num_base_bdevs; i++)); do echo BaseBdev$i; done))
575	local raid_bdev_name="raid_bdev1"
576	local strip_size
577	local create_arg
578	local raid_bdev_size
579	local data_offset
580
581	if [ $raid_level != "raid1" ]; then
582		if [ $background_io = true ]; then
583			echo "skipping rebuild test with io for level $raid_level"
584			return 1
585		fi
586		strip_size=64
587		create_arg+=" -z $strip_size"
588	else
589		strip_size=0
590	fi
591
592	if [ $superblock = true ]; then
593		create_arg+=" -s"
594	fi
595
596	"$rootdir/build/examples/bdevperf" -T $raid_bdev_name -t 60 -w randrw -M 50 -o 3M -q 2 -U -z -L bdev_raid &
597	raid_pid=$!
598	waitforlisten $raid_pid
599
600	# Create base bdevs
601	for bdev in "${base_bdevs[@]}"; do
602		$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b ${bdev}_malloc
603		$rpc_py bdev_passthru_create -b ${bdev}_malloc -p $bdev
604	done
605
606	# Create spare bdev
607	$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b "spare_malloc"
608	$rpc_py bdev_delay_create -b "spare_malloc" -d "spare_delay" -r 0 -t 0 -w 100000 -n 100000
609	$rpc_py bdev_passthru_create -b "spare_delay" -p "spare"
610
611	# Create RAID bdev
612	$rpc_py bdev_raid_create $create_arg -r $raid_level -b "'${base_bdevs[*]}'" -n $raid_bdev_name
613	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs
614
615	# Get RAID bdev's size
616	raid_bdev_size=$($rpc_py bdev_get_bdevs -b $raid_bdev_name | jq -r '.[].num_blocks')
617
618	# Get base bdev's data offset
619	data_offset=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[].base_bdevs_list[0].data_offset')
620
621	if [ $background_io = true ]; then
622		# Start user I/O
623		"$rootdir/examples/bdev/bdevperf/bdevperf.py" perform_tests &
624	elif [ $verify = true ]; then
625		local write_unit_size
626
627		# Write random data to the RAID bdev
628		nbd_start_disks $DEFAULT_RPC_ADDR $raid_bdev_name /dev/nbd0
629		if [ $raid_level = "raid5f" ]; then
630			write_unit_size=$((strip_size * 2 * (num_base_bdevs - 1)))
631			echo $((base_blocklen * write_unit_size / 1024)) > /sys/block/nbd0/queue/max_sectors_kb
632		else
633			write_unit_size=1
634		fi
635		dd if=/dev/urandom of=/dev/nbd0 bs=$((base_blocklen * write_unit_size)) count=$((raid_bdev_size / write_unit_size)) oflag=direct
636		nbd_stop_disks $DEFAULT_RPC_ADDR /dev/nbd0
637	fi
638
639	# Remove one base bdev
640	$rpc_py bdev_raid_remove_base_bdev ${base_bdevs[0]}
641
642	# Check if the RAID bdev is in online state (degraded)
643	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs - 1))
644
645	# Add bdev for rebuild
646	$rpc_py bdev_raid_add_base_bdev $raid_bdev_name "spare"
647	sleep 1
648
649	# Check if rebuild started
650	verify_raid_bdev_process $raid_bdev_name "rebuild" "spare"
651
652	# Remove the rebuild target bdev
653	$rpc_py bdev_raid_remove_base_bdev "spare"
654
655	# Check if the RAID bdev is in online state (degraded)
656	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs - 1))
657
658	# Check if rebuild was stopped
659	verify_raid_bdev_process $raid_bdev_name "none" "none"
660
661	# Again, start the rebuild
662	$rpc_py bdev_raid_add_base_bdev $raid_bdev_name "spare"
663	sleep 1
664	verify_raid_bdev_process $raid_bdev_name "rebuild" "spare"
665
666	if [ $superblock = true ] && [ $with_io = false ]; then
667		# Stop the RAID bdev
668		$rpc_py bdev_raid_delete $raid_bdev_name
669		[[ $($rpc_py bdev_raid_get_bdevs all | jq 'length') == 0 ]]
670
671		# Remove the passthru base bdevs, then re-add them to assemble the raid bdev again
672		for ((i = 0; i < num_base_bdevs; i++)); do
673			$rpc_py bdev_passthru_delete ${base_bdevs[$i]}
674		done
675		for ((i = 0; i < num_base_bdevs; i++)); do
676			$rpc_py bdev_passthru_create -b ${base_bdevs[$i]}_malloc -p ${base_bdevs[$i]}
677		done
678
679		# Check if the RAID bdev is in online state (degraded)
680		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs - 1))
681
682		# Check if rebuild is not started
683		verify_raid_bdev_process $raid_bdev_name "none" "none"
684
685		# Again, start the rebuild
686		$rpc_py bdev_raid_add_base_bdev $raid_bdev_name "spare"
687		sleep 1
688		verify_raid_bdev_process $raid_bdev_name "rebuild" "spare"
689	fi
690
691	local num_base_bdevs_operational=$num_base_bdevs
692
693	if [ $raid_level = "raid1" ] && [ $num_base_bdevs -gt 2 ]; then
694		# Remove one more base bdev (not rebuild target)
695		$rpc_py bdev_raid_remove_base_bdev ${base_bdevs[1]}
696
697		# Ignore this bdev later when comparing data
698		base_bdevs[1]=""
699		((num_base_bdevs_operational--))
700
701		# Check if rebuild is still running
702		verify_raid_bdev_process $raid_bdev_name "rebuild" "spare"
703	fi
704
705	# Wait for rebuild to finish
706	local timeout=$((SECONDS + 30))
707	while ((SECONDS < timeout)); do
708		if ! verify_raid_bdev_process $raid_bdev_name "rebuild" "spare" > /dev/null; then
709			break
710		fi
711		sleep 1
712	done
713
714	# Check if rebuild is not running and the RAID bdev has the correct number of operational devices
715	verify_raid_bdev_process $raid_bdev_name "none" "none"
716	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs_operational
717
718	# Stop the RAID bdev
719	$rpc_py bdev_raid_delete $raid_bdev_name
720	[[ $($rpc_py bdev_raid_get_bdevs all | jq 'length') == 0 ]]
721
722	if [ $verify = true ]; then
723		if [ $background_io = true ]; then
724			# Compare data on the rebuilt and other base bdevs
725			nbd_start_disks $DEFAULT_RPC_ADDR "spare" "/dev/nbd0"
726			for bdev in "${base_bdevs[@]:1}"; do
727				if [ -z "$bdev" ]; then
728					continue
729				fi
730				nbd_start_disks $DEFAULT_RPC_ADDR $bdev "/dev/nbd1"
731				cmp -i $((data_offset * base_blocklen)) /dev/nbd0 /dev/nbd1
732				nbd_stop_disks $DEFAULT_RPC_ADDR "/dev/nbd1"
733			done
734			nbd_stop_disks $DEFAULT_RPC_ADDR "/dev/nbd0"
735		else
736			# Compare data on the removed and rebuilt base bdevs
737			nbd_start_disks $DEFAULT_RPC_ADDR "${base_bdevs[0]} spare" "/dev/nbd0 /dev/nbd1"
738			cmp -i $((data_offset * base_blocklen)) /dev/nbd0 /dev/nbd1
739			nbd_stop_disks $DEFAULT_RPC_ADDR "/dev/nbd0 /dev/nbd1"
740		fi
741	fi
742
743	if [ $superblock = true ]; then
744		# Remove then re-add a base bdev to assemble the raid bdev again
745		$rpc_py bdev_passthru_delete "spare"
746		$rpc_py bdev_passthru_create -b "spare_delay" -p "spare"
747		$rpc_py bdev_wait_for_examine
748
749		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs_operational
750		verify_raid_bdev_process $raid_bdev_name "none" "none"
751		[[ $($rpc_py bdev_raid_get_bdevs all | jq -r '.[].base_bdevs_list[0].name') == "spare" ]]
752
753		# Remove and re-add a base bdev - rebuild should start automatically
754		$rpc_py bdev_raid_remove_base_bdev "spare"
755		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs_operational - 1))
756		$rpc_py bdev_raid_add_base_bdev $raid_bdev_name "spare"
757		sleep 1
758		verify_raid_bdev_process $raid_bdev_name "rebuild" "spare"
759
760		# Same as above but re-add through examine
761		$rpc_py bdev_passthru_delete "spare"
762		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs_operational - 1))
763		$rpc_py bdev_passthru_create -b "spare_delay" -p "spare"
764		sleep 1
765		verify_raid_bdev_process $raid_bdev_name "rebuild" "spare"
766
767		# Stop the rebuild
768		$rpc_py bdev_passthru_delete "spare"
769		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs_operational - 1))
770		verify_raid_bdev_process $raid_bdev_name "none" "none"
771
772		# Re-adding a base bdev that was replaced (no longer is a member of the array) should not be allowed
773		$rpc_py bdev_passthru_delete ${base_bdevs[0]}
774		$rpc_py bdev_passthru_create -b ${base_bdevs[0]}_malloc -p ${base_bdevs[0]}
775		sleep 1
776		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs_operational - 1))
777		verify_raid_bdev_process $raid_bdev_name "none" "none"
778		NOT $rpc_py bdev_raid_add_base_bdev $raid_bdev_name ${base_bdevs[0]}
779		sleep 1
780		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs_operational - 1))
781		verify_raid_bdev_process $raid_bdev_name "none" "none"
782	fi
783
784	killprocess $raid_pid
785
786	return 0
787}
788
789function raid_io_error_test() {
790	local raid_level=$1
791	local num_base_bdevs=$2
792	local error_io_type=$3
793	local base_bdevs=($(for ((i = 1; i <= num_base_bdevs; i++)); do echo BaseBdev$i; done))
794	local raid_bdev_name="raid_bdev1"
795	local strip_size
796	local create_arg
797	local bdevperf_log
798	local fail_per_s
799
800	if [ $raid_level != "raid1" ]; then
801		strip_size=64
802		create_arg+=" -z $strip_size"
803	else
804		strip_size=0
805	fi
806
807	bdevperf_log=$(mktemp -p "$tmp_dir")
808
809	"$rootdir/build/examples/bdevperf" -T $raid_bdev_name -t 60 -w randrw -M 50 -o 128k -q 1 -z -f -L bdev_raid > $bdevperf_log &
810	raid_pid=$!
811	waitforlisten $raid_pid
812
813	# Create base bdevs
814	for bdev in "${base_bdevs[@]}"; do
815		$rpc_py bdev_malloc_create 32 $base_blocklen $base_malloc_params -b ${bdev}_malloc
816		$rpc_py bdev_error_create ${bdev}_malloc
817		$rpc_py bdev_passthru_create -b EE_${bdev}_malloc -p $bdev
818	done
819
820	# Create RAID bdev
821	$rpc_py bdev_raid_create $create_arg -r $raid_level -b "'${base_bdevs[*]}'" -n $raid_bdev_name -s
822	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs
823
824	# Start user I/O
825	"$rootdir/examples/bdev/bdevperf/bdevperf.py" perform_tests &
826	sleep 1
827
828	# Inject an error
829	$rpc_py bdev_error_inject_error EE_${base_bdevs[0]}_malloc $error_io_type failure
830
831	local expected_num_base_bdevs
832	if [[ $raid_level = "raid1" && $error_io_type = "write" ]]; then
833		expected_num_base_bdevs=$((num_base_bdevs - 1))
834	else
835		expected_num_base_bdevs=$num_base_bdevs
836	fi
837	verify_raid_bdev_state $raid_bdev_name online $raid_level $strip_size $expected_num_base_bdevs
838
839	$rpc_py bdev_raid_delete $raid_bdev_name
840
841	killprocess $raid_pid
842
843	# Check I/O failures reported by bdevperf
844	# RAID levels with redundancy should handle the errors and not show any failures
845	fail_per_s=$(grep -v Job $bdevperf_log | grep $raid_bdev_name | awk '{print $6}')
846	if has_redundancy $raid_level; then
847		[[ "$fail_per_s" = "0.00" ]]
848	else
849		[[ "$fail_per_s" != "0.00" ]]
850	fi
851}
852
853function raid_resize_superblock_test() {
854	local raid_level=$1
855
856	$rootdir/test/app/bdev_svc/bdev_svc -i 0 -L bdev_raid &
857	raid_pid=$!
858	echo "Process raid pid: $raid_pid"
859	waitforlisten $raid_pid
860
861	$rpc_py bdev_malloc_create -b malloc0 512 $base_blocklen
862
863	$rpc_py bdev_passthru_create -b malloc0 -p pt0
864	$rpc_py bdev_lvol_create_lvstore pt0 lvs0
865
866	$rpc_py bdev_lvol_create -l lvs0 lvol0 64
867	$rpc_py bdev_lvol_create -l lvs0 lvol1 64
868
869	case $raid_level in
870		0) $rpc_py bdev_raid_create -n Raid -r $raid_level -z 64 -b "'lvs0/lvol0 lvs0/lvol1'" -s ;;
871		1) $rpc_py bdev_raid_create -n Raid -r $raid_level -b "'lvs0/lvol0 lvs0/lvol1'" -s ;;
872	esac
873
874	# Check size of base bdevs first
875	(($(($($rpc_py bdev_get_bdevs -b lvs0/lvol0 | jq '.[].num_blocks') * 512 / 1048576)) == 64))
876	(($(($($rpc_py bdev_get_bdevs -b lvs0/lvol1 | jq '.[].num_blocks') * 512 / 1048576)) == 64))
877
878	# Check size of Raid bdev before resize
879	case $raid_level in
880		0) (($($rpc_py bdev_get_bdevs -b Raid | jq '.[].num_blocks') == 245760)) ;;
881		1) (($($rpc_py bdev_get_bdevs -b Raid | jq '.[].num_blocks') == 122880)) ;;
882	esac
883
884	# Resize bdevs
885	$rpc_py bdev_lvol_resize lvs0/lvol0 100
886	$rpc_py bdev_lvol_resize lvs0/lvol1 100
887
888	# Bdevs should be resized
889	(($(($($rpc_py bdev_get_bdevs -b lvs0/lvol0 | jq '.[].num_blocks') * 512 / 1048576)) == 100))
890	(($(($($rpc_py bdev_get_bdevs -b lvs0/lvol1 | jq '.[].num_blocks') * 512 / 1048576)) == 100))
891
892	# Same with Raid bdevs
893	case $raid_level in
894		0) (($($rpc_py bdev_get_bdevs -b Raid | jq '.[].num_blocks') == 393216)) ;;
895		1) (($($rpc_py bdev_get_bdevs -b Raid | jq '.[].num_blocks') == 196608)) ;;
896	esac
897
898	$rpc_py bdev_passthru_delete pt0
899	$rpc_py bdev_passthru_create -b malloc0 -p pt0
900	$rpc_py bdev_wait_for_examine
901
902	# After the passthru bdev is re-created, the RAID bdev should start from
903	# superblock and its size should be the same as after it was resized.
904	case $raid_level in
905		0) (($($rpc_py bdev_get_bdevs -b Raid | jq '.[].num_blocks') == 393216)) ;;
906		1) (($($rpc_py bdev_get_bdevs -b Raid | jq '.[].num_blocks') == 196608)) ;;
907	esac
908
909	killprocess $raid_pid
910
911	return 0
912}
913
914function raid_resize_data_offset_test() {
915
916	$rootdir/test/app/bdev_svc/bdev_svc -i 0 -L bdev_raid &
917	raid_pid=$!
918	echo "Process raid pid: $raid_pid"
919	waitforlisten $raid_pid
920
921	# Create three base bdevs with one null bdev to be replaced later
922	$rpc_py bdev_malloc_create -b malloc0 64 $base_blocklen -o 16
923	$rpc_py bdev_malloc_create -b malloc1 64 $base_blocklen -o 16
924	$rpc_py bdev_null_create null0 64 $base_blocklen
925
926	$rpc_py bdev_raid_create -n Raid -r 1 -b "'malloc0 malloc1 null0'" -s
927
928	# Check data_offset
929	(($($rpc_py bdev_raid_get_bdevs all | jq -r '.[].base_bdevs_list[2].data_offset') == 2048))
930
931	$rpc_py bdev_null_delete null0
932
933	# Now null bdev is replaced with malloc, and optimal_io_boundary is changed to force
934	# recalculation
935	$rpc_py bdev_malloc_create -b malloc2 512 $base_blocklen -o 30
936	$rpc_py bdev_raid_add_base_bdev Raid malloc2
937
938	# Data offset is updated
939	(($($rpc_py bdev_raid_get_bdevs all | jq -r '.[].base_bdevs_list[2].data_offset') == 2070))
940
941	killprocess $raid_pid
942
943	return 0
944}
945
946mkdir -p "$tmp_dir"
947trap 'cleanup; exit 1' EXIT
948
949base_blocklen=512
950
951run_test "raid1_resize_data_offset_test" raid_resize_data_offset_test
952
953run_test "raid0_resize_superblock_test" raid_resize_superblock_test 0
954run_test "raid1_resize_superblock_test" raid_resize_superblock_test 1
955
956if [ $(uname -s) = Linux ] && modprobe -n nbd; then
957	has_nbd=true
958	modprobe nbd
959	run_test "raid_function_test_raid0" raid_function_test raid0
960	run_test "raid_function_test_concat" raid_function_test concat
961fi
962
963run_test "raid0_resize_test" raid_resize_test 0
964run_test "raid1_resize_test" raid_resize_test 1
965
966for n in {2..4}; do
967	for level in raid0 concat raid1; do
968		run_test "raid_state_function_test" raid_state_function_test $level $n false
969		run_test "raid_state_function_test_sb" raid_state_function_test $level $n true
970		run_test "raid_superblock_test" raid_superblock_test $level $n
971		run_test "raid_read_error_test" raid_io_error_test $level $n read
972		run_test "raid_write_error_test" raid_io_error_test $level $n write
973	done
974done
975
976if [ "$has_nbd" = true ]; then
977	for n in 2 4; do
978		run_test "raid_rebuild_test" raid_rebuild_test raid1 $n false false true
979		run_test "raid_rebuild_test_sb" raid_rebuild_test raid1 $n true false true
980		run_test "raid_rebuild_test_io" raid_rebuild_test raid1 $n false true true
981		run_test "raid_rebuild_test_sb_io" raid_rebuild_test raid1 $n true true true
982	done
983fi
984
985for n in {3..4}; do
986	run_test "raid5f_state_function_test" raid_state_function_test raid5f $n false
987	run_test "raid5f_state_function_test_sb" raid_state_function_test raid5f $n true
988	run_test "raid5f_superblock_test" raid_superblock_test raid5f $n
989	if [ "$has_nbd" = true ]; then
990		run_test "raid5f_rebuild_test" raid_rebuild_test raid5f $n false false true
991		run_test "raid5f_rebuild_test_sb" raid_rebuild_test raid5f $n true false true
992	fi
993done
994
995base_blocklen=4096
996
997run_test "raid_state_function_test_sb_4k" raid_state_function_test raid1 2 true
998run_test "raid_superblock_test_4k" raid_superblock_test raid1 2
999if [ "$has_nbd" = true ]; then
1000	run_test "raid_rebuild_test_sb_4k" raid_rebuild_test raid1 2 true false true
1001fi
1002
1003base_malloc_params="-m 32"
1004run_test "raid_state_function_test_sb_md_separate" raid_state_function_test raid1 2 true
1005run_test "raid_superblock_test_md_separate" raid_superblock_test raid1 2
1006if [ "$has_nbd" = true ]; then
1007	run_test "raid_rebuild_test_sb_md_separate" raid_rebuild_test raid1 2 true false true
1008fi
1009
1010base_malloc_params="-m 32 -i"
1011run_test "raid_state_function_test_sb_md_interleaved" raid_state_function_test raid1 2 true
1012run_test "raid_superblock_test_md_interleaved" raid_superblock_test raid1 2
1013run_test "raid_rebuild_test_sb_md_interleaved" raid_rebuild_test raid1 2 true false false
1014
1015trap - EXIT
1016cleanup
1017