xref: /spdk/module/bdev/raid/raid1.c (revision d6aa653d2b70dee5e9f9d47c9dc6fee5a3da39fb)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2022 Intel Corporation.
3  *   All rights reserved.
4  */
5 
6 #include "bdev_raid.h"
7 
8 #include "spdk/likely.h"
9 #include "spdk/log.h"
10 
11 struct raid1_info {
12 	/* The parent raid bdev */
13 	struct raid_bdev *raid_bdev;
14 };
15 
16 struct raid1_io_channel {
17 	/* Array of per-base_bdev counters of outstanding read blocks on this channel */
18 	uint64_t read_blocks_outstanding[0];
19 };
20 
21 static void
22 raid1_channel_inc_read_counters(struct raid_bdev_io_channel *raid_ch, uint8_t idx,
23 				uint64_t num_blocks)
24 {
25 	struct raid1_io_channel *raid1_ch = raid_bdev_channel_get_module_ctx(raid_ch);
26 
27 	assert(raid1_ch->read_blocks_outstanding[idx] <= UINT64_MAX - num_blocks);
28 	raid1_ch->read_blocks_outstanding[idx] += num_blocks;
29 }
30 
31 static void
32 raid1_channel_dec_read_counters(struct raid_bdev_io_channel *raid_ch, uint8_t idx,
33 				uint64_t num_blocks)
34 {
35 	struct raid1_io_channel *raid1_ch = raid_bdev_channel_get_module_ctx(raid_ch);
36 
37 	assert(raid1_ch->read_blocks_outstanding[idx] >= num_blocks);
38 	raid1_ch->read_blocks_outstanding[idx] -= num_blocks;
39 }
40 
41 static void
42 raid1_write_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
43 {
44 	struct raid_bdev_io *raid_io = cb_arg;
45 
46 	if (!success) {
47 		struct raid_base_bdev_info *base_info;
48 
49 		base_info = raid_bdev_channel_get_base_info(raid_io->raid_ch, bdev_io->bdev);
50 		if (base_info) {
51 			raid_bdev_fail_base_bdev(base_info);
52 		}
53 	}
54 
55 	spdk_bdev_free_io(bdev_io);
56 
57 	raid_bdev_io_complete_part(raid_io, 1, success ?
58 				   SPDK_BDEV_IO_STATUS_SUCCESS :
59 				   SPDK_BDEV_IO_STATUS_FAILED);
60 }
61 
62 static void
63 raid1_read_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
64 {
65 	struct raid_bdev_io *raid_io = cb_arg;
66 
67 	spdk_bdev_free_io(bdev_io);
68 
69 	raid1_channel_dec_read_counters(raid_io->raid_ch, raid_io->base_bdev_io_submitted,
70 					raid_io->num_blocks);
71 
72 	raid_bdev_io_complete(raid_io, success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED);
73 }
74 
75 static void raid1_submit_rw_request(struct raid_bdev_io *raid_io);
76 
77 static void
78 _raid1_submit_rw_request(void *_raid_io)
79 {
80 	struct raid_bdev_io *raid_io = _raid_io;
81 
82 	raid1_submit_rw_request(raid_io);
83 }
84 
85 static void
86 raid1_init_ext_io_opts(struct spdk_bdev_ext_io_opts *opts, struct raid_bdev_io *raid_io)
87 {
88 	memset(opts, 0, sizeof(*opts));
89 	opts->size = sizeof(*opts);
90 	opts->memory_domain = raid_io->memory_domain;
91 	opts->memory_domain_ctx = raid_io->memory_domain_ctx;
92 	opts->metadata = raid_io->md_buf;
93 }
94 
95 static uint8_t
96 raid1_channel_next_read_base_bdev(struct raid_bdev *raid_bdev, struct raid_bdev_io_channel *raid_ch)
97 {
98 	struct raid1_io_channel *raid1_ch = raid_bdev_channel_get_module_ctx(raid_ch);
99 	uint64_t read_blocks_min = UINT64_MAX;
100 	uint8_t idx = UINT8_MAX;
101 	uint8_t i;
102 
103 	for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
104 		if (raid_bdev_channel_get_base_channel(raid_ch, i) != NULL &&
105 		    raid1_ch->read_blocks_outstanding[i] < read_blocks_min) {
106 			read_blocks_min = raid1_ch->read_blocks_outstanding[i];
107 			idx = i;
108 		}
109 	}
110 
111 	return idx;
112 }
113 
114 static int
115 raid1_submit_read_request(struct raid_bdev_io *raid_io)
116 {
117 	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
118 	struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch;
119 	struct spdk_bdev_ext_io_opts io_opts;
120 	struct raid_base_bdev_info *base_info;
121 	struct spdk_io_channel *base_ch;
122 	uint8_t idx;
123 	int ret;
124 
125 	idx = raid1_channel_next_read_base_bdev(raid_bdev, raid_ch);
126 	if (spdk_unlikely(idx == UINT8_MAX)) {
127 		raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
128 		return 0;
129 	}
130 
131 	base_info = &raid_bdev->base_bdev_info[idx];
132 	base_ch = raid_bdev_channel_get_base_channel(raid_ch, idx);
133 
134 	raid1_init_ext_io_opts(&io_opts, raid_io);
135 	ret = raid_bdev_readv_blocks_ext(base_info, base_ch, raid_io->iovs, raid_io->iovcnt,
136 					 raid_io->offset_blocks, raid_io->num_blocks,
137 					 raid1_read_bdev_io_completion, raid_io, &io_opts);
138 
139 	if (spdk_likely(ret == 0)) {
140 		raid1_channel_inc_read_counters(raid_ch, idx, raid_io->num_blocks);
141 		raid_io->base_bdev_io_submitted = idx;
142 	} else if (spdk_unlikely(ret == -ENOMEM)) {
143 		raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc),
144 					base_ch, _raid1_submit_rw_request);
145 		return 0;
146 	}
147 
148 	return ret;
149 }
150 
151 static int
152 raid1_submit_write_request(struct raid_bdev_io *raid_io)
153 {
154 	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
155 	struct spdk_bdev_ext_io_opts io_opts;
156 	struct raid_base_bdev_info *base_info;
157 	struct spdk_io_channel *base_ch;
158 	uint8_t idx;
159 	uint64_t base_bdev_io_not_submitted;
160 	int ret = 0;
161 
162 	if (raid_io->base_bdev_io_submitted == 0) {
163 		raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs;
164 		raid_bdev_io_set_default_status(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
165 	}
166 
167 	raid1_init_ext_io_opts(&io_opts, raid_io);
168 	for (idx = raid_io->base_bdev_io_submitted; idx < raid_bdev->num_base_bdevs; idx++) {
169 		base_info = &raid_bdev->base_bdev_info[idx];
170 		base_ch = raid_bdev_channel_get_base_channel(raid_io->raid_ch, idx);
171 
172 		if (base_ch == NULL) {
173 			/* skip a missing base bdev's slot */
174 			raid_io->base_bdev_io_submitted++;
175 			raid_bdev_io_complete_part(raid_io, 1, SPDK_BDEV_IO_STATUS_FAILED);
176 			continue;
177 		}
178 
179 		ret = raid_bdev_writev_blocks_ext(base_info, base_ch, raid_io->iovs, raid_io->iovcnt,
180 						  raid_io->offset_blocks, raid_io->num_blocks,
181 						  raid1_write_bdev_io_completion, raid_io, &io_opts);
182 		if (spdk_unlikely(ret != 0)) {
183 			if (spdk_unlikely(ret == -ENOMEM)) {
184 				raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc),
185 							base_ch, _raid1_submit_rw_request);
186 				return 0;
187 			}
188 
189 			base_bdev_io_not_submitted = raid_bdev->num_base_bdevs -
190 						     raid_io->base_bdev_io_submitted;
191 			raid_bdev_io_complete_part(raid_io, base_bdev_io_not_submitted,
192 						   SPDK_BDEV_IO_STATUS_FAILED);
193 			return 0;
194 		}
195 
196 		raid_io->base_bdev_io_submitted++;
197 	}
198 
199 	if (raid_io->base_bdev_io_submitted == 0) {
200 		ret = -ENODEV;
201 	}
202 
203 	return ret;
204 }
205 
206 static void
207 raid1_submit_rw_request(struct raid_bdev_io *raid_io)
208 {
209 	int ret;
210 
211 	switch (raid_io->type) {
212 	case SPDK_BDEV_IO_TYPE_READ:
213 		ret = raid1_submit_read_request(raid_io);
214 		break;
215 	case SPDK_BDEV_IO_TYPE_WRITE:
216 		ret = raid1_submit_write_request(raid_io);
217 		break;
218 	default:
219 		ret = -EINVAL;
220 		break;
221 	}
222 
223 	if (spdk_unlikely(ret != 0)) {
224 		raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
225 	}
226 }
227 
228 static void
229 raid1_ioch_destroy(void *io_device, void *ctx_buf)
230 {
231 }
232 
233 static int
234 raid1_ioch_create(void *io_device, void *ctx_buf)
235 {
236 	return 0;
237 }
238 
239 static void
240 raid1_io_device_unregister_done(void *io_device)
241 {
242 	struct raid1_info *r1info = io_device;
243 
244 	raid_bdev_module_stop_done(r1info->raid_bdev);
245 
246 	free(r1info);
247 }
248 
249 static int
250 raid1_start(struct raid_bdev *raid_bdev)
251 {
252 	uint64_t min_blockcnt = UINT64_MAX;
253 	struct raid_base_bdev_info *base_info;
254 	struct raid1_info *r1info;
255 	char name[256];
256 
257 	r1info = calloc(1, sizeof(*r1info));
258 	if (!r1info) {
259 		SPDK_ERRLOG("Failed to allocate RAID1 info device structure\n");
260 		return -ENOMEM;
261 	}
262 	r1info->raid_bdev = raid_bdev;
263 
264 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
265 		min_blockcnt = spdk_min(min_blockcnt, base_info->data_size);
266 	}
267 
268 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
269 		base_info->data_size = min_blockcnt;
270 	}
271 
272 	raid_bdev->bdev.blockcnt = min_blockcnt;
273 	raid_bdev->module_private = r1info;
274 
275 	snprintf(name, sizeof(name), "raid1_%s", raid_bdev->bdev.name);
276 	spdk_io_device_register(r1info, raid1_ioch_create, raid1_ioch_destroy,
277 				sizeof(struct raid1_io_channel) + raid_bdev->num_base_bdevs * sizeof(uint64_t),
278 				name);
279 
280 	return 0;
281 }
282 
283 static bool
284 raid1_stop(struct raid_bdev *raid_bdev)
285 {
286 	struct raid1_info *r1info = raid_bdev->module_private;
287 
288 	spdk_io_device_unregister(r1info, raid1_io_device_unregister_done);
289 
290 	return false;
291 }
292 
293 static struct spdk_io_channel *
294 raid1_get_io_channel(struct raid_bdev *raid_bdev)
295 {
296 	struct raid1_info *r1info = raid_bdev->module_private;
297 
298 	return spdk_get_io_channel(r1info);
299 }
300 
301 static void
302 raid1_process_write_completed(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
303 {
304 	struct raid_bdev_process_request *process_req = cb_arg;
305 
306 	spdk_bdev_free_io(bdev_io);
307 
308 	raid_bdev_process_request_complete(process_req, success ? 0 : -EIO);
309 }
310 
311 static void raid1_process_submit_write(struct raid_bdev_process_request *process_req);
312 
313 static void
314 _raid1_process_submit_write(void *ctx)
315 {
316 	struct raid_bdev_process_request *process_req = ctx;
317 
318 	raid1_process_submit_write(process_req);
319 }
320 
321 static void
322 raid1_process_submit_write(struct raid_bdev_process_request *process_req)
323 {
324 	struct raid_bdev_io *raid_io = &process_req->raid_io;
325 	struct spdk_bdev_ext_io_opts io_opts;
326 	int ret;
327 
328 	raid1_init_ext_io_opts(&io_opts, raid_io);
329 	ret = raid_bdev_writev_blocks_ext(process_req->target, process_req->target_ch,
330 					  raid_io->iovs, raid_io->iovcnt,
331 					  raid_io->offset_blocks, raid_io->num_blocks,
332 					  raid1_process_write_completed, process_req, &io_opts);
333 	if (spdk_unlikely(ret != 0)) {
334 		if (ret == -ENOMEM) {
335 			raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(process_req->target->desc),
336 						process_req->target_ch, _raid1_process_submit_write);
337 		} else {
338 			raid_bdev_process_request_complete(process_req, ret);
339 		}
340 	}
341 }
342 
343 static void
344 raid1_process_read_completed(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status)
345 {
346 	struct raid_bdev_process_request *process_req = SPDK_CONTAINEROF(raid_io,
347 			struct raid_bdev_process_request, raid_io);
348 
349 	if (status != SPDK_BDEV_IO_STATUS_SUCCESS) {
350 		raid_bdev_process_request_complete(process_req, -EIO);
351 		return;
352 	}
353 
354 	raid1_process_submit_write(process_req);
355 }
356 
357 static int
358 raid1_submit_process_request(struct raid_bdev_process_request *process_req,
359 			     struct raid_bdev_io_channel *raid_ch)
360 {
361 	struct raid_bdev_io *raid_io = &process_req->raid_io;
362 	int ret;
363 
364 	raid_bdev_io_init(raid_io, raid_ch, SPDK_BDEV_IO_TYPE_READ,
365 			  process_req->offset_blocks, process_req->num_blocks,
366 			  &process_req->iov, 1, process_req->md_buf, NULL, NULL);
367 	raid_io->completion_cb = raid1_process_read_completed;
368 
369 	ret = raid1_submit_read_request(raid_io);
370 	if (spdk_likely(ret == 0)) {
371 		return process_req->num_blocks;
372 	} else if (ret < 0) {
373 		return ret;
374 	} else {
375 		return -EINVAL;
376 	}
377 }
378 
379 static struct raid_bdev_module g_raid1_module = {
380 	.level = RAID1,
381 	.base_bdevs_min = 2,
382 	.base_bdevs_constraint = {CONSTRAINT_MIN_BASE_BDEVS_OPERATIONAL, 1},
383 	.memory_domains_supported = true,
384 	.start = raid1_start,
385 	.stop = raid1_stop,
386 	.submit_rw_request = raid1_submit_rw_request,
387 	.get_io_channel = raid1_get_io_channel,
388 	.submit_process_request = raid1_submit_process_request,
389 };
390 RAID_MODULE_REGISTER(&g_raid1_module)
391 
392 SPDK_LOG_REGISTER_COMPONENT(bdev_raid1)
393