xref: /spdk/module/bdev/nvme/bdev_nvme.h (revision fecffda6ecf8853b82edccde429b68252f0a62c5)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2016 Intel Corporation. All rights reserved.
3  *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
4  *   Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5  */
6 
7 #ifndef SPDK_BDEV_NVME_H
8 #define SPDK_BDEV_NVME_H
9 
10 #include "spdk/stdinc.h"
11 
12 #include "spdk/queue.h"
13 #include "spdk/nvme.h"
14 #include "spdk/bdev_module.h"
15 
16 TAILQ_HEAD(nvme_bdev_ctrlrs, nvme_bdev_ctrlr);
17 extern struct nvme_bdev_ctrlrs g_nvme_bdev_ctrlrs;
18 extern pthread_mutex_t g_bdev_nvme_mutex;
19 extern bool g_bdev_nvme_module_finish;
20 
21 #define NVME_MAX_CONTROLLERS 1024
22 
23 enum bdev_nvme_multipath_policy {
24 	BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE,
25 	BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE,
26 };
27 
28 typedef void (*spdk_bdev_create_nvme_fn)(void *ctx, size_t bdev_count, int rc);
29 typedef void (*spdk_bdev_nvme_start_discovery_fn)(void *ctx, int status);
30 typedef void (*spdk_bdev_nvme_stop_discovery_fn)(void *ctx);
31 
32 struct nvme_ctrlr_opts {
33 	uint32_t prchk_flags;
34 	int32_t ctrlr_loss_timeout_sec;
35 	uint32_t reconnect_delay_sec;
36 	uint32_t fast_io_fail_timeout_sec;
37 	bool from_discovery_service;
38 };
39 
40 struct nvme_async_probe_ctx {
41 	struct spdk_nvme_probe_ctx *probe_ctx;
42 	const char *base_name;
43 	const char **names;
44 	uint32_t count;
45 	struct spdk_poller *poller;
46 	struct spdk_nvme_transport_id trid;
47 	struct nvme_ctrlr_opts bdev_opts;
48 	struct spdk_nvme_ctrlr_opts drv_opts;
49 	spdk_bdev_create_nvme_fn cb_fn;
50 	void *cb_ctx;
51 	uint32_t populates_in_progress;
52 	bool ctrlr_attached;
53 	bool probe_done;
54 	bool namespaces_populated;
55 };
56 
57 struct nvme_ns {
58 	uint32_t			id;
59 	struct spdk_nvme_ns		*ns;
60 	struct nvme_ctrlr		*ctrlr;
61 	struct nvme_bdev		*bdev;
62 	uint32_t			ana_group_id;
63 	enum spdk_nvme_ana_state	ana_state;
64 	bool				ana_state_updating;
65 	bool				ana_transition_timedout;
66 	struct spdk_poller		*anatt_timer;
67 	struct nvme_async_probe_ctx	*probe_ctx;
68 	TAILQ_ENTRY(nvme_ns)		tailq;
69 	RB_ENTRY(nvme_ns)		node;
70 };
71 
72 struct nvme_bdev_io;
73 struct nvme_bdev_ctrlr;
74 struct nvme_bdev;
75 struct nvme_io_path;
76 
77 struct nvme_path_id {
78 	struct spdk_nvme_transport_id		trid;
79 	struct spdk_nvme_host_id		hostid;
80 	TAILQ_ENTRY(nvme_path_id)		link;
81 	bool					is_failed;
82 };
83 
84 typedef void (*bdev_nvme_reset_cb)(void *cb_arg, bool success);
85 typedef void (*nvme_ctrlr_disconnected_cb)(struct nvme_ctrlr *nvme_ctrlr);
86 
87 struct nvme_ctrlr {
88 	/**
89 	 * points to pinned, physically contiguous memory region;
90 	 * contains 4KB IDENTIFY structure for controller which is
91 	 *  target for CONTROLLER IDENTIFY command during initialization
92 	 */
93 	struct spdk_nvme_ctrlr			*ctrlr;
94 	struct nvme_path_id			*active_path_id;
95 	int					ref;
96 
97 	uint32_t				resetting : 1;
98 	uint32_t				reconnect_is_delayed : 1;
99 	uint32_t				fast_io_fail_timedout : 1;
100 	uint32_t				destruct : 1;
101 	uint32_t				ana_log_page_updating : 1;
102 	uint32_t				io_path_cache_clearing : 1;
103 
104 	struct nvme_ctrlr_opts			opts;
105 
106 	RB_HEAD(nvme_ns_tree, nvme_ns)		namespaces;
107 
108 	struct spdk_opal_dev			*opal_dev;
109 
110 	struct spdk_poller			*adminq_timer_poller;
111 	struct spdk_thread			*thread;
112 
113 	bdev_nvme_reset_cb			reset_cb_fn;
114 	void					*reset_cb_arg;
115 	/* Poller used to check for reset/detach completion */
116 	struct spdk_poller			*reset_detach_poller;
117 	struct spdk_nvme_detach_ctx		*detach_ctx;
118 
119 	uint64_t				reset_start_tsc;
120 	struct spdk_poller			*reconnect_delay_timer;
121 
122 	nvme_ctrlr_disconnected_cb		disconnected_cb;
123 
124 	/** linked list pointer for device list */
125 	TAILQ_ENTRY(nvme_ctrlr)			tailq;
126 	struct nvme_bdev_ctrlr			*nbdev_ctrlr;
127 
128 	TAILQ_HEAD(nvme_paths, nvme_path_id)	trids;
129 
130 	uint32_t				max_ana_log_page_size;
131 	struct spdk_nvme_ana_page		*ana_log_page;
132 	struct spdk_nvme_ana_group_descriptor	*copied_ana_desc;
133 
134 	struct nvme_async_probe_ctx		*probe_ctx;
135 
136 	pthread_mutex_t				mutex;
137 };
138 
139 struct nvme_bdev_ctrlr {
140 	char				*name;
141 	TAILQ_HEAD(, nvme_ctrlr)	ctrlrs;
142 	TAILQ_HEAD(, nvme_bdev)		bdevs;
143 	TAILQ_ENTRY(nvme_bdev_ctrlr)	tailq;
144 };
145 
146 struct nvme_bdev {
147 	struct spdk_bdev		disk;
148 	uint32_t			nsid;
149 	struct nvme_bdev_ctrlr		*nbdev_ctrlr;
150 	pthread_mutex_t			mutex;
151 	int				ref;
152 	enum bdev_nvme_multipath_policy	mp_policy;
153 	TAILQ_HEAD(, nvme_ns)		nvme_ns_list;
154 	bool				opal;
155 	TAILQ_ENTRY(nvme_bdev)		tailq;
156 };
157 
158 struct nvme_qpair {
159 	struct nvme_ctrlr		*ctrlr;
160 	struct spdk_nvme_qpair		*qpair;
161 	struct nvme_poll_group		*group;
162 	struct nvme_ctrlr_channel	*ctrlr_ch;
163 
164 	/* The following is used to update io_path cache of nvme_bdev_channels. */
165 	TAILQ_HEAD(, nvme_io_path)	io_path_list;
166 
167 	TAILQ_ENTRY(nvme_qpair)		tailq;
168 };
169 
170 struct nvme_ctrlr_channel {
171 	struct nvme_qpair		*qpair;
172 	TAILQ_HEAD(, spdk_bdev_io)	pending_resets;
173 
174 	struct spdk_io_channel_iter	*reset_iter;
175 };
176 
177 struct nvme_io_path {
178 	struct nvme_ns			*nvme_ns;
179 	struct nvme_qpair		*qpair;
180 	uint64_t			io_outstanding;
181 	STAILQ_ENTRY(nvme_io_path)	stailq;
182 
183 	/* The following are used to update io_path cache of the nvme_bdev_channel. */
184 	struct nvme_bdev_channel	*nbdev_ch;
185 	TAILQ_ENTRY(nvme_io_path)	tailq;
186 };
187 
188 struct nvme_bdev_channel {
189 	struct nvme_io_path			*current_io_path;
190 	enum bdev_nvme_multipath_policy		mp_policy;
191 	STAILQ_HEAD(, nvme_io_path)		io_path_list;
192 	TAILQ_HEAD(retry_io_head, spdk_bdev_io)	retry_io_list;
193 	struct spdk_poller			*retry_io_poller;
194 };
195 
196 struct nvme_poll_group {
197 	struct spdk_nvme_poll_group		*group;
198 	struct spdk_io_channel			*accel_channel;
199 	struct spdk_poller			*poller;
200 	bool					collect_spin_stat;
201 	uint64_t				spin_ticks;
202 	uint64_t				start_ticks;
203 	uint64_t				end_ticks;
204 	TAILQ_HEAD(, nvme_qpair)		qpair_list;
205 };
206 
207 void nvme_io_path_info_json(struct spdk_json_write_ctx *w, struct nvme_io_path *io_path);
208 
209 struct nvme_ctrlr *nvme_ctrlr_get_by_name(const char *name);
210 
211 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr_get_by_name(const char *name);
212 
213 typedef void (*nvme_bdev_ctrlr_for_each_fn)(struct nvme_bdev_ctrlr *nbdev_ctrlr, void *ctx);
214 
215 void nvme_bdev_ctrlr_for_each(nvme_bdev_ctrlr_for_each_fn fn, void *ctx);
216 
217 void nvme_bdev_dump_trid_json(const struct spdk_nvme_transport_id *trid,
218 			      struct spdk_json_write_ctx *w);
219 
220 void nvme_ctrlr_info_json(struct spdk_json_write_ctx *w, struct nvme_ctrlr *nvme_ctrlr);
221 
222 struct nvme_ns *nvme_ctrlr_get_ns(struct nvme_ctrlr *nvme_ctrlr, uint32_t nsid);
223 struct nvme_ns *nvme_ctrlr_get_first_active_ns(struct nvme_ctrlr *nvme_ctrlr);
224 struct nvme_ns *nvme_ctrlr_get_next_active_ns(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *ns);
225 
226 enum spdk_bdev_timeout_action {
227 	SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE = 0,
228 	SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET,
229 	SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT,
230 };
231 
232 struct spdk_bdev_nvme_opts {
233 	enum spdk_bdev_timeout_action action_on_timeout;
234 	uint64_t timeout_us;
235 	uint64_t timeout_admin_us;
236 	uint32_t keep_alive_timeout_ms;
237 	/* The number of attempts per I/O in the transport layer before an I/O fails. */
238 	uint32_t transport_retry_count;
239 	uint32_t arbitration_burst;
240 	uint32_t low_priority_weight;
241 	uint32_t medium_priority_weight;
242 	uint32_t high_priority_weight;
243 	uint64_t nvme_adminq_poll_period_us;
244 	uint64_t nvme_ioq_poll_period_us;
245 	uint32_t io_queue_requests;
246 	bool delay_cmd_submit;
247 	/* The number of attempts per I/O in the bdev layer before an I/O fails. */
248 	int32_t bdev_retry_count;
249 	uint8_t transport_ack_timeout;
250 	int32_t ctrlr_loss_timeout_sec;
251 	uint32_t reconnect_delay_sec;
252 	uint32_t fast_io_fail_timeout_sec;
253 	bool disable_auto_failback;
254 	bool generate_uuids;
255 };
256 
257 struct spdk_nvme_qpair *bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch);
258 void bdev_nvme_get_opts(struct spdk_bdev_nvme_opts *opts);
259 int bdev_nvme_set_opts(const struct spdk_bdev_nvme_opts *opts);
260 int bdev_nvme_set_hotplug(bool enabled, uint64_t period_us, spdk_msg_fn cb, void *cb_ctx);
261 
262 void bdev_nvme_get_default_ctrlr_opts(struct nvme_ctrlr_opts *opts);
263 
264 int bdev_nvme_create(struct spdk_nvme_transport_id *trid,
265 		     const char *base_name,
266 		     const char **names,
267 		     uint32_t count,
268 		     spdk_bdev_create_nvme_fn cb_fn,
269 		     void *cb_ctx,
270 		     struct spdk_nvme_ctrlr_opts *drv_opts,
271 		     struct nvme_ctrlr_opts *bdev_opts,
272 		     bool multipath);
273 
274 int bdev_nvme_start_discovery(struct spdk_nvme_transport_id *trid, const char *base_name,
275 			      struct spdk_nvme_ctrlr_opts *drv_opts, struct nvme_ctrlr_opts *bdev_opts,
276 			      uint64_t timeout, spdk_bdev_nvme_start_discovery_fn cb_fn, void *cb_ctx);
277 int bdev_nvme_stop_discovery(const char *name, spdk_bdev_nvme_stop_discovery_fn cb_fn,
278 			     void *cb_ctx);
279 void bdev_nvme_get_discovery_info(struct spdk_json_write_ctx *w);
280 
281 struct spdk_nvme_ctrlr *bdev_nvme_get_ctrlr(struct spdk_bdev *bdev);
282 
283 /**
284  * Delete NVMe controller with all bdevs on top of it, or delete the specified path
285  * if there is any alternative path. Requires to pass name of NVMe controller.
286  *
287  * \param name NVMe controller name
288  * \param path_id The specified path to remove (optional)
289  * \return zero on success, -EINVAL on wrong parameters or -ENODEV if controller is not found
290  */
291 int bdev_nvme_delete(const char *name, const struct nvme_path_id *path_id);
292 
293 /**
294  * Reset NVMe controller.
295  *
296  * \param nvme_ctrlr The specified NVMe controller to reset
297  * \param cb_fn Function to be called back after reset completes
298  * \param cb_arg Argument for callback function
299  * \return zero on success. Negated errno on the following error conditions:
300  * -ENXIO: controller is being destroyed.
301  * -EBUSY: controller is already being reset.
302  */
303 int bdev_nvme_reset_rpc(struct nvme_ctrlr *nvme_ctrlr, bdev_nvme_reset_cb cb_fn, void *cb_arg);
304 
305 typedef void (*bdev_nvme_set_preferred_path_cb)(void *cb_arg, int rc);
306 
307 /**
308  * Set the preferred I/O path for an NVMe bdev in multipath mode.
309  *
310  * NOTE: This function does not support NVMe bdevs in failover mode.
311  *
312  * \param name NVMe bdev name
313  * \param cntlid NVMe-oF controller ID
314  * \param cb_fn Function to be called back after completion.
315  * \param cb_arg Argument for callback function.
316  */
317 void bdev_nvme_set_preferred_path(const char *name, uint16_t cntlid,
318 				  bdev_nvme_set_preferred_path_cb cb_fn, void *cb_arg);
319 
320 typedef void (*bdev_nvme_set_multipath_policy_cb)(void *cb_arg, int rc);
321 
322 /**
323  * Set multipath policy of the NVMe bdev.
324  *
325  * \param name NVMe bdev name
326  * \param policy Multipath policy (active-passive or active-active)
327  * \param cb_fn Function to be called back after completion.
328  */
329 void bdev_nvme_set_multipath_policy(const char *name,
330 				    enum bdev_nvme_multipath_policy policy,
331 				    bdev_nvme_set_multipath_policy_cb cb_fn,
332 				    void *cb_arg);
333 
334 #endif /* SPDK_BDEV_NVME_H */
335