xref: /spdk/lib/blob/blobstore.h (revision 674c709733259a0b3763bc859bae4765169b3364)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #ifndef SPDK_BLOBSTORE_H
35 #define SPDK_BLOBSTORE_H
36 
37 #include "spdk/assert.h"
38 #include "spdk/blob.h"
39 #include "spdk/queue.h"
40 #include "spdk/util.h"
41 
42 /* In Memory Data Structures
43  *
44  * The following data structures exist only in memory.
45  */
46 
47 #define SPDK_BLOB_OPTS_CLUSTER_SZ (1024 * 1024)
48 #define SPDK_BLOB_OPTS_NUM_MD_PAGES UINT32_MAX
49 #define SPDK_BLOB_OPTS_MAX_MD_OPS 32
50 #define SPDK_BLOB_OPTS_MAX_CHANNEL_OPS 512
51 
52 struct spdk_xattr {
53 	/* TODO: reorder for best packing */
54 	uint32_t	index;
55 	char		*name;
56 	void		*value;
57 	uint16_t	value_len;
58 	TAILQ_ENTRY(spdk_xattr)	link;
59 };
60 
61 /* The mutable part of the blob data that is sync'd to
62  * disk. The data in here is both mutable and persistent.
63  */
64 struct spdk_blob_mut_data {
65 	/* Number of data clusters in the blob */
66 	uint64_t	num_clusters;
67 
68 	/* Array LBAs that are the beginning of a cluster, in
69 	 * the order they appear in the blob.
70 	 */
71 	uint64_t	*clusters;
72 
73 	/* The size of the clusters array. This is greater than or
74 	 * equal to 'num_clusters'.
75 	 */
76 	size_t		cluster_array_size;
77 
78 	/* Number of metadata pages */
79 	uint32_t	num_pages;
80 
81 	/* Array of page offsets into the metadata region, in
82 	 * the order of the metadata page sequence.
83 	 */
84 	uint32_t	*pages;
85 };
86 
87 enum spdk_blob_state {
88 	/* The blob in-memory version does not match the on-disk
89 	 * version.
90 	 */
91 	SPDK_BLOB_STATE_DIRTY,
92 
93 	/* The blob in memory version of the blob matches the on disk
94 	 * version.
95 	 */
96 	SPDK_BLOB_STATE_CLEAN,
97 
98 	/* The in-memory state being synchronized with the on-disk
99 	 * blob state. */
100 	SPDK_BLOB_STATE_LOADING,
101 
102 	/* The disk state is being synchronized with the current
103 	 * blob state.
104 	 */
105 	SPDK_BLOB_STATE_SYNCING,
106 };
107 
108 struct spdk_blob {
109 	struct spdk_blob_store *bs;
110 
111 	uint32_t	open_ref;
112 
113 	spdk_blob_id	id;
114 
115 	enum spdk_blob_state		state;
116 
117 	/* Two copies of the mutable data. One is a version
118 	 * that matches the last known data on disk (clean).
119 	 * The other (active) is the current data. Syncing
120 	 * a blob makes the clean match the active.
121 	 */
122 	struct spdk_blob_mut_data	clean;
123 	struct spdk_blob_mut_data	active;
124 
125 	/* TODO: The xattrs are mutable, but we don't want to be
126 	 * copying them unecessarily. Figure this out.
127 	 */
128 	TAILQ_HEAD(, spdk_xattr) xattrs;
129 
130 	TAILQ_ENTRY(spdk_blob) link;
131 };
132 
133 struct spdk_blob_store {
134 	uint64_t			md_start; /* Offset from beginning of disk, in pages */
135 	uint32_t			md_len; /* Count, in pages */
136 
137 	struct {
138 		uint32_t		max_md_ops;
139 		struct spdk_io_channel	*md_channel;
140 	} md_target;
141 
142 	struct {
143 		uint32_t		max_channel_ops;
144 	} io_target;
145 
146 
147 	struct spdk_bs_dev		*dev;
148 
149 	struct spdk_bit_array		*used_md_pages;
150 	struct spdk_bit_array		*used_clusters;
151 
152 	uint32_t			cluster_sz;
153 	uint64_t			total_clusters;
154 	uint64_t			num_free_clusters;
155 	uint32_t			pages_per_cluster;
156 
157 	spdk_blob_id			super_blob;
158 
159 	TAILQ_HEAD(, spdk_blob) 	blobs;
160 };
161 
162 struct spdk_bs_channel {
163 	struct spdk_bs_request_set	*req_mem;
164 	TAILQ_HEAD(, spdk_bs_request_set) reqs;
165 
166 	struct spdk_blob_store		*bs;
167 
168 	struct spdk_bs_dev		*dev;
169 	struct spdk_io_channel		*dev_channel;
170 };
171 
172 /* On-Disk Data Structures
173  *
174  * The following data structures exist on disk.
175  */
176 #define SPDK_BS_VERSION 1
177 
178 #pragma pack(push, 1)
179 
180 #define SPDK_MD_MASK_TYPE_USED_PAGES 0
181 #define SPDK_MD_MASK_TYPE_USED_CLUSTERS 1
182 
183 struct spdk_bs_md_mask {
184 	uint8_t		type;
185 	uint32_t	length; /* In bits */
186 	uint8_t		mask[0];
187 };
188 
189 #define SPDK_MD_DESCRIPTOR_TYPE_PADDING 0
190 #define SPDK_MD_DESCRIPTOR_TYPE_EXTENT 1
191 #define SPDK_MD_DESCRIPTOR_TYPE_XATTR 2
192 
193 struct spdk_blob_md_descriptor_xattr {
194 	uint8_t		type;
195 	uint32_t	length;
196 
197 	uint16_t	name_length;
198 	uint16_t	value_length;
199 
200 	char		name[0];
201 	/* String name immediately followed by string value. */
202 };
203 
204 struct spdk_blob_md_descriptor_extent {
205 	uint8_t		type;
206 	uint32_t	length;
207 
208 	struct {
209 		uint32_t        cluster_idx;
210 		uint32_t        length; /* In units of clusters */
211 	} extents[0];
212 };
213 
214 struct spdk_blob_md_descriptor {
215 	uint8_t		type;
216 	uint32_t	length;
217 };
218 
219 #define SPDK_INVALID_MD_PAGE UINT32_MAX
220 
221 struct spdk_blob_md_page {
222 	spdk_blob_id     id;
223 
224 	uint32_t        sequence_num;
225 	uint32_t	reserved0;
226 
227 	/* Descriptors here */
228 	uint8_t		descriptors[4072];
229 
230 	uint32_t	next;
231 	uint32_t	crc;
232 };
233 #define SPDK_BS_PAGE_SIZE 0x1000
234 SPDK_STATIC_ASSERT(SPDK_BS_PAGE_SIZE == sizeof(struct spdk_blob_md_page), "Invalid md page size");
235 
236 #define SPDK_BS_SUPER_BLOCK_SIG "SPDKBLOB"
237 
238 struct spdk_bs_super_block {
239 	uint8_t		signature[8];
240 	uint32_t        version;
241 	uint32_t        length;
242 	uint32_t	clean; /* If there was a clean shutdown, this is 1. */
243 	spdk_blob_id	super_blob;
244 
245 	uint32_t	cluster_size; /* In bytes */
246 
247 	uint32_t	used_page_mask_start; /* Offset from beginning of disk, in pages */
248 	uint32_t	used_page_mask_len; /* Count, in pages */
249 
250 	uint32_t	used_cluster_mask_start; /* Offset from beginning of disk, in pages */
251 	uint32_t	used_cluster_mask_len; /* Count, in pages */
252 
253 	uint32_t	md_start; /* Offset from beginning of disk, in pages */
254 	uint32_t	md_len; /* Count, in pages */
255 
256 	uint8_t		reserved[4036];
257 	uint32_t	crc;
258 };
259 SPDK_STATIC_ASSERT(sizeof(struct spdk_bs_super_block) == 0x1000, "Invalid super block size");
260 
261 #pragma pack(pop)
262 
263 /* Unit Conversions
264  *
265  * The blobstore works with several different units:
266  * - Byte: Self explanatory
267  * - LBA: The logical blocks on the backing storage device.
268  * - Page: The read/write units of blobs and metadata. This is
269  *         an offset into a blob in units of 4KiB.
270  * - Cluster Index: The disk is broken into a sequential list of
271  *		    clusters. This is the offset from the beginning.
272  *
273  * NOTE: These conversions all act on simple magnitudes, not with any sort
274  *        of knowledge about the blobs themselves. For instance, converting
275  *        a page to an lba with the conversion function below simply converts
276  *        a number of pages to an equivalent number of lbas, but that
277  *        lba certainly isn't the right lba that corresponds to a page offset
278  *        for a particular blob.
279  */
280 static inline uint64_t
281 _spdk_bs_byte_to_lba(struct spdk_blob_store *bs, uint64_t length)
282 {
283 	assert(length % bs->dev->blocklen == 0);
284 
285 	return length / bs->dev->blocklen;
286 }
287 
288 static inline uint64_t
289 _spdk_bs_lba_to_byte(struct spdk_blob_store *bs, uint64_t lba)
290 {
291 	return lba * bs->dev->blocklen;
292 }
293 
294 static inline uint64_t
295 _spdk_bs_page_to_lba(struct spdk_blob_store *bs, uint64_t page)
296 {
297 	return page * SPDK_BS_PAGE_SIZE / bs->dev->blocklen;
298 }
299 
300 static inline uint32_t
301 _spdk_bs_lba_to_page(struct spdk_blob_store *bs, uint64_t lba)
302 {
303 	uint64_t	lbas_per_page;
304 
305 	lbas_per_page = SPDK_BS_PAGE_SIZE / bs->dev->blocklen;
306 
307 	assert(lba % lbas_per_page == 0);
308 
309 	return lba / lbas_per_page;
310 }
311 
312 static inline uint64_t
313 _spdk_bs_cluster_to_page(struct spdk_blob_store *bs, uint32_t cluster)
314 {
315 	return cluster * bs->pages_per_cluster;
316 }
317 
318 static inline uint32_t
319 _spdk_bs_page_to_cluster(struct spdk_blob_store *bs, uint64_t page)
320 {
321 	assert(page % bs->pages_per_cluster == 0);
322 
323 	return page / bs->pages_per_cluster;
324 }
325 
326 static inline uint64_t
327 _spdk_bs_cluster_to_lba(struct spdk_blob_store *bs, uint32_t cluster)
328 {
329 	return cluster * (bs->cluster_sz / bs->dev->blocklen);
330 }
331 
332 static inline uint32_t
333 _spdk_bs_lba_to_cluster(struct spdk_blob_store *bs, uint64_t lba)
334 {
335 	assert(lba % (bs->cluster_sz / bs->dev->blocklen) == 0);
336 
337 	return lba / (bs->cluster_sz / bs->dev->blocklen);
338 }
339 
340 /* End basic conversions */
341 
342 static inline uint32_t
343 _spdk_bs_blobid_to_page(spdk_blob_id id)
344 {
345 	return id & 0xFFFFFFFF;
346 }
347 
348 /* Given a page offset into a blob, look up the LBA for the
349  * start of that page.
350  */
351 static inline uint64_t
352 _spdk_bs_blob_page_to_lba(struct spdk_blob *blob, uint32_t page)
353 {
354 	uint64_t	lba;
355 	uint32_t	pages_per_cluster;
356 
357 	pages_per_cluster = blob->bs->pages_per_cluster;
358 
359 	assert(page < blob->active.num_clusters * pages_per_cluster);
360 
361 	lba = blob->active.clusters[page / pages_per_cluster];
362 	lba += _spdk_bs_page_to_lba(blob->bs, page % pages_per_cluster);
363 
364 	return lba;
365 }
366 
367 /* Given a page offset into a blob, look up the number of pages until the
368  * next cluster boundary.
369  */
370 static inline uint32_t
371 _spdk_bs_num_pages_to_cluster_boundary(struct spdk_blob *blob, uint32_t page)
372 {
373 	uint32_t	pages_per_cluster;
374 
375 	pages_per_cluster = blob->bs->pages_per_cluster;
376 
377 	return pages_per_cluster - (page % pages_per_cluster);
378 }
379 
380 #endif
381