xref: /spdk/lib/blob/blobstore.h (revision 4eda4fd245ba83ae63891e977a5a0ef33f7a2823)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #ifndef SPDK_BLOBSTORE_H
35 #define SPDK_BLOBSTORE_H
36 
37 #include "spdk/assert.h"
38 #include "spdk/blob.h"
39 #include "spdk/queue.h"
40 #include "spdk/util.h"
41 
42 /* In Memory Data Structures
43  *
44  * The following data structures exist only in memory.
45  */
46 
47 #define SPDK_BLOB_OPTS_CLUSTER_SZ (1024 * 1024)
48 #define SPDK_BLOB_OPTS_NUM_MD_PAGES UINT32_MAX
49 #define SPDK_BLOB_OPTS_MAX_MD_OPS 32
50 #define SPDK_BLOB_OPTS_MAX_CHANNEL_OPS 512
51 
52 struct spdk_xattr {
53 	/* TODO: reorder for best packing */
54 	uint32_t	index;
55 	char		*name;
56 	void		*value;
57 	uint16_t	value_len;
58 	TAILQ_ENTRY(spdk_xattr)	link;
59 };
60 
61 /* The mutable part of the blob data that is sync'd to
62  * disk. The data in here is both mutable and persistent.
63  */
64 struct spdk_blob_mut_data {
65 	/* Number of data clusters in the blob */
66 	uint64_t	num_clusters;
67 
68 	/* Array LBAs that are the beginning of a cluster, in
69 	 * the order they appear in the blob.
70 	 */
71 	uint64_t	*clusters;
72 
73 	/* The size of the clusters array. This is greater than or
74 	 * equal to 'num_clusters'.
75 	 */
76 	size_t		cluster_array_size;
77 
78 	/* Number of metadata pages */
79 	uint32_t	num_pages;
80 
81 	/* Array of page offsets into the metadata region, in
82 	 * the order of the metadata page sequence.
83 	 */
84 	uint32_t	*pages;
85 };
86 
87 enum spdk_blob_state {
88 	/* The blob in-memory version does not match the on-disk
89 	 * version.
90 	 */
91 	SPDK_BLOB_STATE_DIRTY,
92 
93 	/* The blob in memory version of the blob matches the on disk
94 	 * version.
95 	 */
96 	SPDK_BLOB_STATE_CLEAN,
97 
98 	/* The in-memory state being synchronized with the on-disk
99 	 * blob state. */
100 	SPDK_BLOB_STATE_LOADING,
101 
102 	/* The disk state is being synchronized with the current
103 	 * blob state.
104 	 */
105 	SPDK_BLOB_STATE_SYNCING,
106 };
107 
108 struct spdk_blob {
109 	struct spdk_blob_store *bs;
110 
111 	uint32_t	open_ref;
112 
113 	spdk_blob_id	id;
114 
115 	enum spdk_blob_state		state;
116 
117 	/* Two copies of the mutable data. One is a version
118 	 * that matches the last known data on disk (clean).
119 	 * The other (active) is the current data. Syncing
120 	 * a blob makes the clean match the active.
121 	 */
122 	struct spdk_blob_mut_data	clean;
123 	struct spdk_blob_mut_data	active;
124 
125 	/* TODO: The xattrs are mutable, but we don't want to be
126 	 * copying them unecessarily. Figure this out.
127 	 */
128 	TAILQ_HEAD(, spdk_xattr) xattrs;
129 
130 	TAILQ_ENTRY(spdk_blob) link;
131 };
132 
133 struct spdk_blob_store {
134 	uint64_t			md_start; /* Offset from beginning of disk, in pages */
135 	uint32_t			md_len; /* Count, in pages */
136 	struct spdk_io_channel		*md_channel;
137 
138 	struct spdk_bs_dev		*dev;
139 
140 	struct spdk_bit_array		*used_md_pages;
141 	struct spdk_bit_array		*used_clusters;
142 
143 	uint32_t			cluster_sz;
144 	uint64_t			total_clusters;
145 	uint64_t			num_free_clusters;
146 	uint32_t			pages_per_cluster;
147 
148 	uint32_t			max_md_ops;
149 	uint32_t			max_channel_ops;
150 
151 	spdk_blob_id			super_blob;
152 
153 	TAILQ_HEAD(, spdk_blob) 	blobs;
154 };
155 
156 struct spdk_bs_channel {
157 	struct spdk_bs_request_set	*req_mem;
158 	TAILQ_HEAD(, spdk_bs_request_set) reqs;
159 
160 	struct spdk_blob_store		*bs;
161 
162 	struct spdk_bs_dev		*dev;
163 	struct spdk_io_channel		*dev_channel;
164 };
165 
166 /* On-Disk Data Structures
167  *
168  * The following data structures exist on disk.
169  */
170 #define SPDK_BS_VERSION 1
171 
172 #pragma pack(push, 1)
173 
174 #define SPDK_MD_MASK_TYPE_USED_PAGES 0
175 #define SPDK_MD_MASK_TYPE_USED_CLUSTERS 1
176 
177 struct spdk_bs_md_mask {
178 	uint8_t		type;
179 	uint32_t	length; /* In bits */
180 	uint8_t		mask[0];
181 };
182 
183 #define SPDK_MD_DESCRIPTOR_TYPE_PADDING 0
184 #define SPDK_MD_DESCRIPTOR_TYPE_EXTENT 1
185 #define SPDK_MD_DESCRIPTOR_TYPE_XATTR 2
186 
187 struct spdk_blob_md_descriptor_xattr {
188 	uint8_t		type;
189 	uint32_t	length;
190 
191 	uint16_t	name_length;
192 	uint16_t	value_length;
193 
194 	char		name[0];
195 	/* String name immediately followed by string value. */
196 };
197 
198 struct spdk_blob_md_descriptor_extent {
199 	uint8_t		type;
200 	uint32_t	length;
201 
202 	struct {
203 		uint32_t        cluster_idx;
204 		uint32_t        length; /* In units of clusters */
205 	} extents[0];
206 };
207 
208 struct spdk_blob_md_descriptor {
209 	uint8_t		type;
210 	uint32_t	length;
211 };
212 
213 #define SPDK_INVALID_MD_PAGE UINT32_MAX
214 
215 struct spdk_blob_md_page {
216 	spdk_blob_id     id;
217 
218 	uint32_t        sequence_num;
219 	uint32_t	reserved0;
220 
221 	/* Descriptors here */
222 	uint8_t		descriptors[4072];
223 
224 	uint32_t	next;
225 	uint32_t	crc;
226 };
227 SPDK_STATIC_ASSERT(sizeof(struct spdk_blob_md_page) == 0x1000, "Invalid md page size");
228 
229 #define SPDK_BS_SUPER_BLOCK_SIG "SPDKBLOB"
230 
231 struct spdk_bs_super_block {
232 	uint8_t		signature[8];
233 	uint32_t        version;
234 	uint32_t        length;
235 	uint32_t	clean; /* If there was a clean shutdown, this is 1. */
236 	spdk_blob_id	super_blob;
237 
238 	uint32_t	cluster_size; /* In bytes */
239 
240 	uint32_t	used_page_mask_start; /* Offset from beginning of disk, in pages */
241 	uint32_t	used_page_mask_len; /* Count, in pages */
242 
243 	uint32_t	used_cluster_mask_start; /* Offset from beginning of disk, in pages */
244 	uint32_t	used_cluster_mask_len; /* Count, in pages */
245 
246 	uint32_t	md_start; /* Offset from beginning of disk, in pages */
247 	uint32_t	md_len; /* Count, in pages */
248 
249 	uint8_t		reserved[4040];
250 };
251 SPDK_STATIC_ASSERT(sizeof(struct spdk_bs_super_block) == 0x1000, "Invalid super block size");
252 
253 #pragma pack(pop)
254 
255 /* Unit Conversions
256  *
257  * The blobstore works with several different units:
258  * - Byte: Self explanatory
259  * - LBA: The logical blocks on the backing storage device.
260  * - Page: The read/write units of blobs and metadata. This is
261  *         an offset into a blob in units of 4KiB.
262  * - Cluster Index: The disk is broken into a sequential list of
263  *		    clusters. This is the offset from the beginning.
264  *
265  * NOTE: These conversions all act on simple magnitudes, not with any sort
266  *        of knowledge about the blobs themselves. For instance, converting
267  *        a page to an lba with the conversion function below simply converts
268  *        a number of pages to an equivalent number of lbas, but that
269  *        lba certainly isn't the right lba that corresponds to a page offset
270  *        for a particular blob.
271  */
272 static inline uint64_t
273 _spdk_bs_byte_to_lba(struct spdk_blob_store *bs, uint64_t length)
274 {
275 	assert(length % bs->dev->blocklen == 0);
276 
277 	return length / bs->dev->blocklen;
278 }
279 
280 static inline uint64_t
281 _spdk_bs_lba_to_byte(struct spdk_blob_store *bs, uint64_t lba)
282 {
283 	return lba * bs->dev->blocklen;
284 }
285 
286 static inline uint64_t
287 _spdk_bs_page_to_lba(struct spdk_blob_store *bs, uint64_t page)
288 {
289 	return page * sizeof(struct spdk_blob_md_page) / bs->dev->blocklen;
290 }
291 
292 static inline uint32_t
293 _spdk_bs_lba_to_page(struct spdk_blob_store *bs, uint64_t lba)
294 {
295 	uint64_t	lbas_per_page;
296 
297 	lbas_per_page = sizeof(struct spdk_blob_md_page) / bs->dev->blocklen;
298 
299 	assert(lba % lbas_per_page == 0);
300 
301 	return lba / lbas_per_page;
302 }
303 
304 static inline uint64_t
305 _spdk_bs_cluster_to_page(struct spdk_blob_store *bs, uint32_t cluster)
306 {
307 	return cluster * bs->pages_per_cluster;
308 }
309 
310 static inline uint32_t
311 _spdk_bs_page_to_cluster(struct spdk_blob_store *bs, uint64_t page)
312 {
313 	assert(page % bs->pages_per_cluster == 0);
314 
315 	return page / bs->pages_per_cluster;
316 }
317 
318 static inline uint64_t
319 _spdk_bs_cluster_to_lba(struct spdk_blob_store *bs, uint32_t cluster)
320 {
321 	return cluster * (bs->cluster_sz / bs->dev->blocklen);
322 }
323 
324 static inline uint32_t
325 _spdk_bs_lba_to_cluster(struct spdk_blob_store *bs, uint64_t lba)
326 {
327 	assert(lba % (bs->cluster_sz / bs->dev->blocklen) == 0);
328 
329 	return lba / (bs->cluster_sz / bs->dev->blocklen);
330 }
331 
332 /* End basic conversions */
333 
334 static inline uint32_t
335 _spdk_bs_blobid_to_page(spdk_blob_id id)
336 {
337 	return id & 0xFFFFFFFF;
338 }
339 
340 /* Given a page offset into a blob, look up the LBA for the
341  * start of that page.
342  */
343 static inline uint64_t
344 _spdk_bs_blob_page_to_lba(struct spdk_blob *blob, uint32_t page)
345 {
346 	uint64_t	lba;
347 	uint32_t	pages_per_cluster;
348 
349 	pages_per_cluster = blob->bs->pages_per_cluster;
350 
351 	assert(page < blob->active.num_clusters * pages_per_cluster);
352 
353 	lba = blob->active.clusters[page / pages_per_cluster];
354 	lba += _spdk_bs_page_to_lba(blob->bs, page % pages_per_cluster);
355 
356 	return lba;
357 }
358 
359 /* Given a page offset into a blob, look up the number of pages until the
360  * next cluster boundary.
361  */
362 static inline uint32_t
363 _spdk_bs_num_pages_to_cluster_boundary(struct spdk_blob *blob, uint32_t page)
364 {
365 	uint32_t	pages_per_cluster;
366 
367 	pages_per_cluster = blob->bs->pages_per_cluster;
368 
369 	return pages_per_cluster - (page % pages_per_cluster);
370 }
371 
372 #endif
373