xref: /spdk/module/bdev/raid/raid1.c (revision 3ad3bb5e016f265cc0dab783ed475136cefd7210)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2022 Intel Corporation.
3  *   All rights reserved.
4  */
5 
6 #include "bdev_raid.h"
7 
8 #include "spdk/likely.h"
9 #include "spdk/log.h"
10 
11 struct raid1_info {
12 	/* The parent raid bdev */
13 	struct raid_bdev *raid_bdev;
14 };
15 
16 struct raid1_io_channel {
17 	/* Array of per-base_bdev counters of outstanding read blocks on this channel */
18 	uint64_t read_blocks_outstanding[0];
19 };
20 
21 static void
22 raid1_channel_inc_read_counters(struct raid_bdev_io_channel *raid_ch, uint8_t idx,
23 				uint64_t num_blocks)
24 {
25 	struct raid1_io_channel *raid1_ch = spdk_io_channel_get_ctx(raid_ch->module_channel);
26 
27 	assert(raid1_ch->read_blocks_outstanding[idx] <= UINT64_MAX - num_blocks);
28 	raid1_ch->read_blocks_outstanding[idx] += num_blocks;
29 }
30 
31 static void
32 raid1_channel_dec_read_counters(struct raid_bdev_io_channel *raid_ch, uint8_t idx,
33 				uint64_t num_blocks)
34 {
35 	struct raid1_io_channel *raid1_ch = spdk_io_channel_get_ctx(raid_ch->module_channel);
36 
37 	assert(raid1_ch->read_blocks_outstanding[idx] >= num_blocks);
38 	raid1_ch->read_blocks_outstanding[idx] -= num_blocks;
39 }
40 
41 static inline void
42 raid1_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, struct raid_bdev_io *raid_io)
43 {
44 	spdk_bdev_free_io(bdev_io);
45 
46 	raid_bdev_io_complete_part(raid_io, 1, success ?
47 				   SPDK_BDEV_IO_STATUS_SUCCESS :
48 				   SPDK_BDEV_IO_STATUS_FAILED);
49 }
50 
51 static void
52 raid1_write_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
53 {
54 	struct raid_bdev_io *raid_io = cb_arg;
55 
56 	raid1_bdev_io_completion(bdev_io, success, raid_io);
57 }
58 
59 static void
60 raid1_read_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
61 {
62 	struct raid_bdev_io *raid_io = cb_arg;
63 
64 	raid1_channel_dec_read_counters(raid_io->raid_ch, raid_io->base_bdev_io_submitted,
65 					spdk_bdev_io_from_ctx(raid_io)->u.bdev.num_blocks);
66 
67 	raid1_bdev_io_completion(bdev_io, success, raid_io);
68 }
69 
70 static void raid1_submit_rw_request(struct raid_bdev_io *raid_io);
71 
72 static void
73 _raid1_submit_rw_request(void *_raid_io)
74 {
75 	struct raid_bdev_io *raid_io = _raid_io;
76 
77 	raid1_submit_rw_request(raid_io);
78 }
79 
80 static void
81 raid1_init_ext_io_opts(struct spdk_bdev_io *bdev_io, struct spdk_bdev_ext_io_opts *opts)
82 {
83 	memset(opts, 0, sizeof(*opts));
84 	opts->size = sizeof(*opts);
85 	opts->memory_domain = bdev_io->u.bdev.memory_domain;
86 	opts->memory_domain_ctx = bdev_io->u.bdev.memory_domain_ctx;
87 	opts->metadata = bdev_io->u.bdev.md_buf;
88 }
89 
90 static uint8_t
91 raid1_channel_next_read_base_bdev(struct raid_bdev_io_channel *raid_ch)
92 {
93 	struct raid1_io_channel *raid1_ch = spdk_io_channel_get_ctx(raid_ch->module_channel);
94 	uint64_t read_blocks_min = UINT64_MAX;
95 	uint8_t idx = UINT8_MAX;
96 	uint8_t i;
97 
98 	for (i = 0; i < raid_ch->num_channels; i++) {
99 		if (raid_ch->base_channel[i] != NULL &&
100 		    raid1_ch->read_blocks_outstanding[i] < read_blocks_min) {
101 			read_blocks_min = raid1_ch->read_blocks_outstanding[i];
102 			idx = i;
103 		}
104 	}
105 
106 	return idx;
107 }
108 
109 static int
110 raid1_submit_read_request(struct raid_bdev_io *raid_io)
111 {
112 	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
113 	struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io);
114 	struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch;
115 	struct spdk_bdev_ext_io_opts io_opts;
116 	struct raid_base_bdev_info *base_info;
117 	struct spdk_io_channel *base_ch;
118 	uint64_t pd_lba, pd_blocks;
119 	uint8_t idx;
120 	int ret;
121 
122 	pd_lba = bdev_io->u.bdev.offset_blocks;
123 	pd_blocks = bdev_io->u.bdev.num_blocks;
124 
125 	idx = raid1_channel_next_read_base_bdev(raid_ch);
126 	if (spdk_unlikely(idx == UINT8_MAX)) {
127 		raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
128 		return 0;
129 	}
130 
131 	base_info = &raid_bdev->base_bdev_info[idx];
132 	base_ch = raid_ch->base_channel[idx];
133 
134 	raid_io->base_bdev_io_remaining = 1;
135 
136 	raid1_init_ext_io_opts(bdev_io, &io_opts);
137 	ret = raid_bdev_readv_blocks_ext(base_info, base_ch,
138 					 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
139 					 pd_lba, pd_blocks, raid1_read_bdev_io_completion,
140 					 raid_io, &io_opts);
141 
142 	if (spdk_likely(ret == 0)) {
143 		raid1_channel_inc_read_counters(raid_ch, idx, pd_blocks);
144 		raid_io->base_bdev_io_submitted = idx;
145 	} else if (spdk_unlikely(ret == -ENOMEM)) {
146 		raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc),
147 					base_ch, _raid1_submit_rw_request);
148 		return 0;
149 	}
150 
151 	return ret;
152 }
153 
154 static int
155 raid1_submit_write_request(struct raid_bdev_io *raid_io)
156 {
157 	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
158 	struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io);
159 	struct spdk_bdev_ext_io_opts io_opts;
160 	struct raid_base_bdev_info *base_info;
161 	struct spdk_io_channel *base_ch;
162 	uint64_t pd_lba, pd_blocks;
163 	uint8_t idx;
164 	uint64_t base_bdev_io_not_submitted;
165 	int ret = 0;
166 
167 	pd_lba = bdev_io->u.bdev.offset_blocks;
168 	pd_blocks = bdev_io->u.bdev.num_blocks;
169 
170 	if (raid_io->base_bdev_io_submitted == 0) {
171 		raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs;
172 	}
173 
174 	raid1_init_ext_io_opts(bdev_io, &io_opts);
175 	for (idx = raid_io->base_bdev_io_submitted; idx < raid_bdev->num_base_bdevs; idx++) {
176 		base_info = &raid_bdev->base_bdev_info[idx];
177 		base_ch = raid_io->raid_ch->base_channel[idx];
178 
179 		if (base_ch == NULL) {
180 			/* skip a missing base bdev's slot */
181 			raid_io->base_bdev_io_submitted++;
182 			raid_bdev_io_complete_part(raid_io, 1, SPDK_BDEV_IO_STATUS_SUCCESS);
183 			continue;
184 		}
185 
186 		ret = raid_bdev_writev_blocks_ext(base_info, base_ch,
187 						  bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
188 						  pd_lba, pd_blocks, raid1_write_bdev_io_completion,
189 						  raid_io, &io_opts);
190 		if (spdk_unlikely(ret != 0)) {
191 			if (spdk_unlikely(ret == -ENOMEM)) {
192 				raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc),
193 							base_ch, _raid1_submit_rw_request);
194 				return 0;
195 			}
196 
197 			base_bdev_io_not_submitted = raid_bdev->num_base_bdevs -
198 						     raid_io->base_bdev_io_submitted;
199 			raid_bdev_io_complete_part(raid_io, base_bdev_io_not_submitted,
200 						   SPDK_BDEV_IO_STATUS_FAILED);
201 			return 0;
202 		}
203 
204 		raid_io->base_bdev_io_submitted++;
205 	}
206 
207 	if (raid_io->base_bdev_io_submitted == 0) {
208 		ret = -ENODEV;
209 	}
210 
211 	return ret;
212 }
213 
214 static void
215 raid1_submit_rw_request(struct raid_bdev_io *raid_io)
216 {
217 	struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io);
218 	int ret;
219 
220 	switch (bdev_io->type) {
221 	case SPDK_BDEV_IO_TYPE_READ:
222 		ret = raid1_submit_read_request(raid_io);
223 		break;
224 	case SPDK_BDEV_IO_TYPE_WRITE:
225 		ret = raid1_submit_write_request(raid_io);
226 		break;
227 	default:
228 		ret = -EINVAL;
229 		break;
230 	}
231 
232 	if (spdk_unlikely(ret != 0)) {
233 		raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
234 	}
235 }
236 
237 static void
238 raid1_ioch_destroy(void *io_device, void *ctx_buf)
239 {
240 }
241 
242 static int
243 raid1_ioch_create(void *io_device, void *ctx_buf)
244 {
245 	return 0;
246 }
247 
248 static void
249 raid1_io_device_unregister_done(void *io_device)
250 {
251 	struct raid1_info *r1info = io_device;
252 
253 	raid_bdev_module_stop_done(r1info->raid_bdev);
254 
255 	free(r1info);
256 }
257 
258 static int
259 raid1_start(struct raid_bdev *raid_bdev)
260 {
261 	uint64_t min_blockcnt = UINT64_MAX;
262 	struct raid_base_bdev_info *base_info;
263 	struct raid1_info *r1info;
264 	char name[256];
265 
266 	r1info = calloc(1, sizeof(*r1info));
267 	if (!r1info) {
268 		SPDK_ERRLOG("Failed to allocate RAID1 info device structure\n");
269 		return -ENOMEM;
270 	}
271 	r1info->raid_bdev = raid_bdev;
272 
273 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
274 		min_blockcnt = spdk_min(min_blockcnt, base_info->data_size);
275 	}
276 
277 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
278 		base_info->data_size = min_blockcnt;
279 	}
280 
281 	raid_bdev->bdev.blockcnt = min_blockcnt;
282 	raid_bdev->module_private = r1info;
283 
284 	snprintf(name, sizeof(name), "raid1_%s", raid_bdev->bdev.name);
285 	spdk_io_device_register(r1info, raid1_ioch_create, raid1_ioch_destroy,
286 				sizeof(struct raid1_io_channel) + raid_bdev->num_base_bdevs * sizeof(uint64_t),
287 				name);
288 
289 	return 0;
290 }
291 
292 static bool
293 raid1_stop(struct raid_bdev *raid_bdev)
294 {
295 	struct raid1_info *r1info = raid_bdev->module_private;
296 
297 	spdk_io_device_unregister(r1info, raid1_io_device_unregister_done);
298 
299 	return false;
300 }
301 
302 static struct spdk_io_channel *
303 raid1_get_io_channel(struct raid_bdev *raid_bdev)
304 {
305 	struct raid1_info *r1info = raid_bdev->module_private;
306 
307 	return spdk_get_io_channel(r1info);
308 }
309 
310 static struct raid_bdev_module g_raid1_module = {
311 	.level = RAID1,
312 	.base_bdevs_min = 2,
313 	.base_bdevs_constraint = {CONSTRAINT_MIN_BASE_BDEVS_OPERATIONAL, 1},
314 	.memory_domains_supported = true,
315 	.start = raid1_start,
316 	.stop = raid1_stop,
317 	.submit_rw_request = raid1_submit_rw_request,
318 	.get_io_channel = raid1_get_io_channel,
319 };
320 RAID_MODULE_REGISTER(&g_raid1_module)
321 
322 SPDK_LOG_REGISTER_COMPONENT(bdev_raid1)
323