xref: /spdk/lib/blob/blobstore.h (revision 2bdec64fbfb636f60c0fc6deb704f43a24c3a1eb)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #ifndef SPDK_BLOBSTORE_H
35 #define SPDK_BLOBSTORE_H
36 
37 #include "spdk/assert.h"
38 #include "spdk/blob.h"
39 #include "spdk/queue.h"
40 #include "spdk/util.h"
41 
42 /* In Memory Data Structures
43  *
44  * The following data structures exist only in memory.
45  */
46 
47 #define SPDK_BLOB_OPTS_CLUSTER_SZ (1024 * 1024)
48 #define SPDK_BLOB_OPTS_NUM_MD_PAGES UINT32_MAX
49 #define SPDK_BLOB_OPTS_MAX_MD_OPS 32
50 #define SPDK_BLOB_OPTS_MAX_CHANNEL_OPS 512
51 
52 struct spdk_xattr {
53 	/* TODO: reorder for best packing */
54 	uint32_t	index;
55 	char		*name;
56 	void		*value;
57 	uint16_t	value_len;
58 	TAILQ_ENTRY(spdk_xattr)	link;
59 };
60 
61 /* The mutable part of the blob data that is sync'd to
62  * disk. The data in here is both mutable and persistent.
63  */
64 struct spdk_blob_mut_data {
65 	/* Number of data clusters in the blob */
66 	uint64_t	num_clusters;
67 
68 	/* Array LBAs that are the beginning of a cluster, in
69 	 * the order they appear in the blob.
70 	 */
71 	uint64_t	*clusters;
72 
73 	/* The size of the clusters array. This is greater than or
74 	 * equal to 'num_clusters'.
75 	 */
76 	size_t		cluster_array_size;
77 
78 	/* Number of metadata pages */
79 	uint32_t	num_pages;
80 
81 	/* Array of page offsets into the metadata region, in
82 	 * the order of the metadata page sequence.
83 	 */
84 	uint32_t	*pages;
85 };
86 
87 enum spdk_blob_state {
88 	/* The blob in-memory version does not match the on-disk
89 	 * version.
90 	 */
91 	SPDK_BLOB_STATE_DIRTY,
92 
93 	/* The blob in memory version of the blob matches the on disk
94 	 * version.
95 	 */
96 	SPDK_BLOB_STATE_CLEAN,
97 
98 	/* The in-memory state being synchronized with the on-disk
99 	 * blob state. */
100 	SPDK_BLOB_STATE_LOADING,
101 
102 	/* The disk state is being synchronized with the current
103 	 * blob state.
104 	 */
105 	SPDK_BLOB_STATE_SYNCING,
106 };
107 
108 struct spdk_blob {
109 	struct spdk_blob_store *bs;
110 
111 	uint32_t	open_ref;
112 
113 	spdk_blob_id	id;
114 
115 	enum spdk_blob_state		state;
116 
117 	/* Two copies of the mutable data. One is a version
118 	 * that matches the last known data on disk (clean).
119 	 * The other (active) is the current data. Syncing
120 	 * a blob makes the clean match the active.
121 	 */
122 	struct spdk_blob_mut_data	clean;
123 	struct spdk_blob_mut_data	active;
124 
125 	/* TODO: The xattrs are mutable, but we don't want to be
126 	 * copying them unecessarily. Figure this out.
127 	 */
128 	TAILQ_HEAD(, spdk_xattr) xattrs;
129 
130 	TAILQ_ENTRY(spdk_blob) link;
131 };
132 
133 struct spdk_blob_store {
134 	uint64_t			md_start; /* Offset from beginning of disk, in pages */
135 	uint32_t			md_len; /* Count, in pages */
136 
137 	struct {
138 		uint32_t		max_md_ops;
139 		struct spdk_io_channel	*md_channel;
140 	} md_target;
141 
142 	struct {
143 		uint32_t		max_channel_ops;
144 	} io_target;
145 
146 
147 	struct spdk_bs_dev		*dev;
148 
149 	struct spdk_bit_array		*used_md_pages;
150 	struct spdk_bit_array		*used_clusters;
151 
152 	uint32_t			cluster_sz;
153 	uint64_t			total_clusters;
154 	uint64_t			num_free_clusters;
155 	uint32_t			pages_per_cluster;
156 
157 	spdk_blob_id			super_blob;
158 
159 	TAILQ_HEAD(, spdk_blob) 	blobs;
160 };
161 
162 struct spdk_bs_channel {
163 	struct spdk_bs_request_set	*req_mem;
164 	TAILQ_HEAD(, spdk_bs_request_set) reqs;
165 
166 	struct spdk_blob_store		*bs;
167 
168 	struct spdk_bs_dev		*dev;
169 	struct spdk_io_channel		*dev_channel;
170 };
171 
172 /* On-Disk Data Structures
173  *
174  * The following data structures exist on disk.
175  */
176 #define SPDK_BS_VERSION 1
177 
178 #pragma pack(push, 1)
179 
180 #define SPDK_MD_MASK_TYPE_USED_PAGES 0
181 #define SPDK_MD_MASK_TYPE_USED_CLUSTERS 1
182 
183 struct spdk_bs_md_mask {
184 	uint8_t		type;
185 	uint32_t	length; /* In bits */
186 	uint8_t		mask[0];
187 };
188 
189 #define SPDK_MD_DESCRIPTOR_TYPE_PADDING 0
190 #define SPDK_MD_DESCRIPTOR_TYPE_EXTENT 1
191 #define SPDK_MD_DESCRIPTOR_TYPE_XATTR 2
192 
193 struct spdk_blob_md_descriptor_xattr {
194 	uint8_t		type;
195 	uint32_t	length;
196 
197 	uint16_t	name_length;
198 	uint16_t	value_length;
199 
200 	char		name[0];
201 	/* String name immediately followed by string value. */
202 };
203 
204 struct spdk_blob_md_descriptor_extent {
205 	uint8_t		type;
206 	uint32_t	length;
207 
208 	struct {
209 		uint32_t        cluster_idx;
210 		uint32_t        length; /* In units of clusters */
211 	} extents[0];
212 };
213 
214 struct spdk_blob_md_descriptor {
215 	uint8_t		type;
216 	uint32_t	length;
217 };
218 
219 #define SPDK_INVALID_MD_PAGE UINT32_MAX
220 
221 struct spdk_blob_md_page {
222 	spdk_blob_id     id;
223 
224 	uint32_t        sequence_num;
225 	uint32_t	reserved0;
226 
227 	/* Descriptors here */
228 	uint8_t		descriptors[4072];
229 
230 	uint32_t	next;
231 	uint32_t	crc;
232 };
233 SPDK_STATIC_ASSERT(sizeof(struct spdk_blob_md_page) == 0x1000, "Invalid md page size");
234 
235 #define SPDK_BS_SUPER_BLOCK_SIG "SPDKBLOB"
236 
237 struct spdk_bs_super_block {
238 	uint8_t		signature[8];
239 	uint32_t        version;
240 	uint32_t        length;
241 	uint32_t	clean; /* If there was a clean shutdown, this is 1. */
242 	spdk_blob_id	super_blob;
243 
244 	uint32_t	cluster_size; /* In bytes */
245 
246 	uint32_t	used_page_mask_start; /* Offset from beginning of disk, in pages */
247 	uint32_t	used_page_mask_len; /* Count, in pages */
248 
249 	uint32_t	used_cluster_mask_start; /* Offset from beginning of disk, in pages */
250 	uint32_t	used_cluster_mask_len; /* Count, in pages */
251 
252 	uint32_t	md_start; /* Offset from beginning of disk, in pages */
253 	uint32_t	md_len; /* Count, in pages */
254 
255 	uint8_t		reserved[4040];
256 };
257 SPDK_STATIC_ASSERT(sizeof(struct spdk_bs_super_block) == 0x1000, "Invalid super block size");
258 
259 #pragma pack(pop)
260 
261 /* Unit Conversions
262  *
263  * The blobstore works with several different units:
264  * - Byte: Self explanatory
265  * - LBA: The logical blocks on the backing storage device.
266  * - Page: The read/write units of blobs and metadata. This is
267  *         an offset into a blob in units of 4KiB.
268  * - Cluster Index: The disk is broken into a sequential list of
269  *		    clusters. This is the offset from the beginning.
270  *
271  * NOTE: These conversions all act on simple magnitudes, not with any sort
272  *        of knowledge about the blobs themselves. For instance, converting
273  *        a page to an lba with the conversion function below simply converts
274  *        a number of pages to an equivalent number of lbas, but that
275  *        lba certainly isn't the right lba that corresponds to a page offset
276  *        for a particular blob.
277  */
278 static inline uint64_t
279 _spdk_bs_byte_to_lba(struct spdk_blob_store *bs, uint64_t length)
280 {
281 	assert(length % bs->dev->blocklen == 0);
282 
283 	return length / bs->dev->blocklen;
284 }
285 
286 static inline uint64_t
287 _spdk_bs_lba_to_byte(struct spdk_blob_store *bs, uint64_t lba)
288 {
289 	return lba * bs->dev->blocklen;
290 }
291 
292 static inline uint64_t
293 _spdk_bs_page_to_lba(struct spdk_blob_store *bs, uint64_t page)
294 {
295 	return page * sizeof(struct spdk_blob_md_page) / bs->dev->blocklen;
296 }
297 
298 static inline uint32_t
299 _spdk_bs_lba_to_page(struct spdk_blob_store *bs, uint64_t lba)
300 {
301 	uint64_t	lbas_per_page;
302 
303 	lbas_per_page = sizeof(struct spdk_blob_md_page) / bs->dev->blocklen;
304 
305 	assert(lba % lbas_per_page == 0);
306 
307 	return lba / lbas_per_page;
308 }
309 
310 static inline uint64_t
311 _spdk_bs_cluster_to_page(struct spdk_blob_store *bs, uint32_t cluster)
312 {
313 	return cluster * bs->pages_per_cluster;
314 }
315 
316 static inline uint32_t
317 _spdk_bs_page_to_cluster(struct spdk_blob_store *bs, uint64_t page)
318 {
319 	assert(page % bs->pages_per_cluster == 0);
320 
321 	return page / bs->pages_per_cluster;
322 }
323 
324 static inline uint64_t
325 _spdk_bs_cluster_to_lba(struct spdk_blob_store *bs, uint32_t cluster)
326 {
327 	return cluster * (bs->cluster_sz / bs->dev->blocklen);
328 }
329 
330 static inline uint32_t
331 _spdk_bs_lba_to_cluster(struct spdk_blob_store *bs, uint64_t lba)
332 {
333 	assert(lba % (bs->cluster_sz / bs->dev->blocklen) == 0);
334 
335 	return lba / (bs->cluster_sz / bs->dev->blocklen);
336 }
337 
338 /* End basic conversions */
339 
340 static inline uint32_t
341 _spdk_bs_blobid_to_page(spdk_blob_id id)
342 {
343 	return id & 0xFFFFFFFF;
344 }
345 
346 /* Given a page offset into a blob, look up the LBA for the
347  * start of that page.
348  */
349 static inline uint64_t
350 _spdk_bs_blob_page_to_lba(struct spdk_blob *blob, uint32_t page)
351 {
352 	uint64_t	lba;
353 	uint32_t	pages_per_cluster;
354 
355 	pages_per_cluster = blob->bs->pages_per_cluster;
356 
357 	assert(page < blob->active.num_clusters * pages_per_cluster);
358 
359 	lba = blob->active.clusters[page / pages_per_cluster];
360 	lba += _spdk_bs_page_to_lba(blob->bs, page % pages_per_cluster);
361 
362 	return lba;
363 }
364 
365 /* Given a page offset into a blob, look up the number of pages until the
366  * next cluster boundary.
367  */
368 static inline uint32_t
369 _spdk_bs_num_pages_to_cluster_boundary(struct spdk_blob *blob, uint32_t page)
370 {
371 	uint32_t	pages_per_cluster;
372 
373 	pages_per_cluster = blob->bs->pages_per_cluster;
374 
375 	return pages_per_cluster - (page % pages_per_cluster);
376 }
377 
378 #endif
379