xref: /spdk/module/bdev/raid/raid1.c (revision 45a053c5777494f4e8ce4bc1191c9de3920377f7)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2022 Intel Corporation.
3  *   All rights reserved.
4  */
5 
6 #include "bdev_raid.h"
7 
8 #include "spdk/likely.h"
9 #include "spdk/log.h"
10 
11 struct raid1_info {
12 	/* The parent raid bdev */
13 	struct raid_bdev *raid_bdev;
14 };
15 
16 struct raid1_io_channel {
17 	/* Array of per-base_bdev counters of outstanding read blocks on this channel */
18 	uint64_t read_blocks_outstanding[0];
19 };
20 
21 static void
22 raid1_channel_inc_read_counters(struct raid_bdev_io_channel *raid_ch, uint8_t idx,
23 				uint64_t num_blocks)
24 {
25 	struct raid1_io_channel *raid1_ch = raid_bdev_channel_get_module_ctx(raid_ch);
26 
27 	assert(raid1_ch->read_blocks_outstanding[idx] <= UINT64_MAX - num_blocks);
28 	raid1_ch->read_blocks_outstanding[idx] += num_blocks;
29 }
30 
31 static void
32 raid1_channel_dec_read_counters(struct raid_bdev_io_channel *raid_ch, uint8_t idx,
33 				uint64_t num_blocks)
34 {
35 	struct raid1_io_channel *raid1_ch = raid_bdev_channel_get_module_ctx(raid_ch);
36 
37 	assert(raid1_ch->read_blocks_outstanding[idx] >= num_blocks);
38 	raid1_ch->read_blocks_outstanding[idx] -= num_blocks;
39 }
40 
41 static inline void
42 raid1_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, struct raid_bdev_io *raid_io)
43 {
44 	spdk_bdev_free_io(bdev_io);
45 
46 	raid_bdev_io_complete_part(raid_io, 1, success ?
47 				   SPDK_BDEV_IO_STATUS_SUCCESS :
48 				   SPDK_BDEV_IO_STATUS_FAILED);
49 }
50 
51 static void
52 raid1_write_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
53 {
54 	struct raid_bdev_io *raid_io = cb_arg;
55 
56 	raid1_bdev_io_completion(bdev_io, success, raid_io);
57 }
58 
59 static void
60 raid1_read_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
61 {
62 	struct raid_bdev_io *raid_io = cb_arg;
63 
64 	raid1_channel_dec_read_counters(raid_io->raid_ch, raid_io->base_bdev_io_submitted,
65 					raid_io->num_blocks);
66 
67 	raid1_bdev_io_completion(bdev_io, success, raid_io);
68 }
69 
70 static void raid1_submit_rw_request(struct raid_bdev_io *raid_io);
71 
72 static void
73 _raid1_submit_rw_request(void *_raid_io)
74 {
75 	struct raid_bdev_io *raid_io = _raid_io;
76 
77 	raid1_submit_rw_request(raid_io);
78 }
79 
80 static void
81 raid1_init_ext_io_opts(struct spdk_bdev_ext_io_opts *opts, struct raid_bdev_io *raid_io)
82 {
83 	memset(opts, 0, sizeof(*opts));
84 	opts->size = sizeof(*opts);
85 	opts->memory_domain = raid_io->memory_domain;
86 	opts->memory_domain_ctx = raid_io->memory_domain_ctx;
87 	opts->metadata = raid_io->md_buf;
88 }
89 
90 static uint8_t
91 raid1_channel_next_read_base_bdev(struct raid_bdev *raid_bdev, struct raid_bdev_io_channel *raid_ch)
92 {
93 	struct raid1_io_channel *raid1_ch = raid_bdev_channel_get_module_ctx(raid_ch);
94 	uint64_t read_blocks_min = UINT64_MAX;
95 	uint8_t idx = UINT8_MAX;
96 	uint8_t i;
97 
98 	for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
99 		if (raid_bdev_channel_get_base_channel(raid_ch, i) != NULL &&
100 		    raid1_ch->read_blocks_outstanding[i] < read_blocks_min) {
101 			read_blocks_min = raid1_ch->read_blocks_outstanding[i];
102 			idx = i;
103 		}
104 	}
105 
106 	return idx;
107 }
108 
109 static int
110 raid1_submit_read_request(struct raid_bdev_io *raid_io)
111 {
112 	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
113 	struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch;
114 	struct spdk_bdev_ext_io_opts io_opts;
115 	struct raid_base_bdev_info *base_info;
116 	struct spdk_io_channel *base_ch;
117 	uint8_t idx;
118 	int ret;
119 
120 	idx = raid1_channel_next_read_base_bdev(raid_bdev, raid_ch);
121 	if (spdk_unlikely(idx == UINT8_MAX)) {
122 		raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
123 		return 0;
124 	}
125 
126 	base_info = &raid_bdev->base_bdev_info[idx];
127 	base_ch = raid_bdev_channel_get_base_channel(raid_ch, idx);
128 
129 	raid_io->base_bdev_io_remaining = 1;
130 
131 	raid1_init_ext_io_opts(&io_opts, raid_io);
132 	ret = raid_bdev_readv_blocks_ext(base_info, base_ch, raid_io->iovs, raid_io->iovcnt,
133 					 raid_io->offset_blocks, raid_io->num_blocks,
134 					 raid1_read_bdev_io_completion, raid_io, &io_opts);
135 
136 	if (spdk_likely(ret == 0)) {
137 		raid1_channel_inc_read_counters(raid_ch, idx, raid_io->num_blocks);
138 		raid_io->base_bdev_io_submitted = idx;
139 	} else if (spdk_unlikely(ret == -ENOMEM)) {
140 		raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc),
141 					base_ch, _raid1_submit_rw_request);
142 		return 0;
143 	}
144 
145 	return ret;
146 }
147 
148 static int
149 raid1_submit_write_request(struct raid_bdev_io *raid_io)
150 {
151 	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
152 	struct spdk_bdev_ext_io_opts io_opts;
153 	struct raid_base_bdev_info *base_info;
154 	struct spdk_io_channel *base_ch;
155 	uint8_t idx;
156 	uint64_t base_bdev_io_not_submitted;
157 	int ret = 0;
158 
159 	if (raid_io->base_bdev_io_submitted == 0) {
160 		raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs;
161 	}
162 
163 	raid1_init_ext_io_opts(&io_opts, raid_io);
164 	for (idx = raid_io->base_bdev_io_submitted; idx < raid_bdev->num_base_bdevs; idx++) {
165 		base_info = &raid_bdev->base_bdev_info[idx];
166 		base_ch = raid_bdev_channel_get_base_channel(raid_io->raid_ch, idx);
167 
168 		if (base_ch == NULL) {
169 			/* skip a missing base bdev's slot */
170 			raid_io->base_bdev_io_submitted++;
171 			raid_bdev_io_complete_part(raid_io, 1, SPDK_BDEV_IO_STATUS_SUCCESS);
172 			continue;
173 		}
174 
175 		ret = raid_bdev_writev_blocks_ext(base_info, base_ch, raid_io->iovs, raid_io->iovcnt,
176 						  raid_io->offset_blocks, raid_io->num_blocks,
177 						  raid1_write_bdev_io_completion, raid_io, &io_opts);
178 		if (spdk_unlikely(ret != 0)) {
179 			if (spdk_unlikely(ret == -ENOMEM)) {
180 				raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc),
181 							base_ch, _raid1_submit_rw_request);
182 				return 0;
183 			}
184 
185 			base_bdev_io_not_submitted = raid_bdev->num_base_bdevs -
186 						     raid_io->base_bdev_io_submitted;
187 			raid_bdev_io_complete_part(raid_io, base_bdev_io_not_submitted,
188 						   SPDK_BDEV_IO_STATUS_FAILED);
189 			return 0;
190 		}
191 
192 		raid_io->base_bdev_io_submitted++;
193 	}
194 
195 	if (raid_io->base_bdev_io_submitted == 0) {
196 		ret = -ENODEV;
197 	}
198 
199 	return ret;
200 }
201 
202 static void
203 raid1_submit_rw_request(struct raid_bdev_io *raid_io)
204 {
205 	int ret;
206 
207 	switch (raid_io->type) {
208 	case SPDK_BDEV_IO_TYPE_READ:
209 		ret = raid1_submit_read_request(raid_io);
210 		break;
211 	case SPDK_BDEV_IO_TYPE_WRITE:
212 		ret = raid1_submit_write_request(raid_io);
213 		break;
214 	default:
215 		ret = -EINVAL;
216 		break;
217 	}
218 
219 	if (spdk_unlikely(ret != 0)) {
220 		raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
221 	}
222 }
223 
224 static void
225 raid1_ioch_destroy(void *io_device, void *ctx_buf)
226 {
227 }
228 
229 static int
230 raid1_ioch_create(void *io_device, void *ctx_buf)
231 {
232 	return 0;
233 }
234 
235 static void
236 raid1_io_device_unregister_done(void *io_device)
237 {
238 	struct raid1_info *r1info = io_device;
239 
240 	raid_bdev_module_stop_done(r1info->raid_bdev);
241 
242 	free(r1info);
243 }
244 
245 static int
246 raid1_start(struct raid_bdev *raid_bdev)
247 {
248 	uint64_t min_blockcnt = UINT64_MAX;
249 	struct raid_base_bdev_info *base_info;
250 	struct raid1_info *r1info;
251 	char name[256];
252 
253 	r1info = calloc(1, sizeof(*r1info));
254 	if (!r1info) {
255 		SPDK_ERRLOG("Failed to allocate RAID1 info device structure\n");
256 		return -ENOMEM;
257 	}
258 	r1info->raid_bdev = raid_bdev;
259 
260 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
261 		min_blockcnt = spdk_min(min_blockcnt, base_info->data_size);
262 	}
263 
264 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
265 		base_info->data_size = min_blockcnt;
266 	}
267 
268 	raid_bdev->bdev.blockcnt = min_blockcnt;
269 	raid_bdev->module_private = r1info;
270 
271 	snprintf(name, sizeof(name), "raid1_%s", raid_bdev->bdev.name);
272 	spdk_io_device_register(r1info, raid1_ioch_create, raid1_ioch_destroy,
273 				sizeof(struct raid1_io_channel) + raid_bdev->num_base_bdevs * sizeof(uint64_t),
274 				name);
275 
276 	return 0;
277 }
278 
279 static bool
280 raid1_stop(struct raid_bdev *raid_bdev)
281 {
282 	struct raid1_info *r1info = raid_bdev->module_private;
283 
284 	spdk_io_device_unregister(r1info, raid1_io_device_unregister_done);
285 
286 	return false;
287 }
288 
289 static struct spdk_io_channel *
290 raid1_get_io_channel(struct raid_bdev *raid_bdev)
291 {
292 	struct raid1_info *r1info = raid_bdev->module_private;
293 
294 	return spdk_get_io_channel(r1info);
295 }
296 
297 static void
298 raid1_process_write_completed(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
299 {
300 	struct raid_bdev_process_request *process_req = cb_arg;
301 
302 	spdk_bdev_free_io(bdev_io);
303 
304 	raid_bdev_process_request_complete(process_req, success ? 0 : -EIO);
305 }
306 
307 static void raid1_process_submit_write(struct raid_bdev_process_request *process_req);
308 
309 static void
310 _raid1_process_submit_write(void *ctx)
311 {
312 	struct raid_bdev_process_request *process_req = ctx;
313 
314 	raid1_process_submit_write(process_req);
315 }
316 
317 static void
318 raid1_process_submit_write(struct raid_bdev_process_request *process_req)
319 {
320 	struct raid_bdev_io *raid_io = &process_req->raid_io;
321 	struct spdk_bdev_ext_io_opts io_opts;
322 	int ret;
323 
324 	raid1_init_ext_io_opts(&io_opts, raid_io);
325 	ret = raid_bdev_writev_blocks_ext(process_req->target, process_req->target_ch,
326 					  raid_io->iovs, raid_io->iovcnt,
327 					  raid_io->offset_blocks, raid_io->num_blocks,
328 					  raid1_process_write_completed, process_req, &io_opts);
329 	if (spdk_unlikely(ret != 0)) {
330 		if (ret == -ENOMEM) {
331 			raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(process_req->target->desc),
332 						process_req->target_ch, _raid1_process_submit_write);
333 		} else {
334 			raid_bdev_process_request_complete(process_req, ret);
335 		}
336 	}
337 }
338 
339 static void
340 raid1_process_read_completed(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status)
341 {
342 	struct raid_bdev_process_request *process_req = SPDK_CONTAINEROF(raid_io,
343 			struct raid_bdev_process_request, raid_io);
344 
345 	if (status != SPDK_BDEV_IO_STATUS_SUCCESS) {
346 		raid_bdev_process_request_complete(process_req, -EIO);
347 		return;
348 	}
349 
350 	raid1_process_submit_write(process_req);
351 }
352 
353 static int
354 raid1_submit_process_request(struct raid_bdev_process_request *process_req,
355 			     struct raid_bdev_io_channel *raid_ch)
356 {
357 	struct raid_bdev_io *raid_io = &process_req->raid_io;
358 	int ret;
359 
360 	raid_bdev_io_init(raid_io, raid_ch, SPDK_BDEV_IO_TYPE_READ,
361 			  process_req->offset_blocks, process_req->num_blocks,
362 			  &process_req->iov, 1, process_req->md_buf, NULL, NULL);
363 	raid_io->completion_cb = raid1_process_read_completed;
364 
365 	ret = raid1_submit_read_request(raid_io);
366 	if (spdk_likely(ret == 0)) {
367 		return process_req->num_blocks;
368 	} else if (ret < 0) {
369 		return ret;
370 	} else {
371 		return -EINVAL;
372 	}
373 }
374 
375 static struct raid_bdev_module g_raid1_module = {
376 	.level = RAID1,
377 	.base_bdevs_min = 2,
378 	.base_bdevs_constraint = {CONSTRAINT_MIN_BASE_BDEVS_OPERATIONAL, 1},
379 	.memory_domains_supported = true,
380 	.start = raid1_start,
381 	.stop = raid1_stop,
382 	.submit_rw_request = raid1_submit_rw_request,
383 	.get_io_channel = raid1_get_io_channel,
384 	.submit_process_request = raid1_submit_process_request,
385 };
386 RAID_MODULE_REGISTER(&g_raid1_module)
387 
388 SPDK_LOG_REGISTER_COMPONENT(bdev_raid1)
389