xref: /illumos-gate/usr/src/uts/common/fs/zfs/blkptr.c (revision 4923c69fddc0887da5604a262585af3efd82ee20)
15d7b4d43SMatthew Ahrens /*
25d7b4d43SMatthew Ahrens  * CDDL HEADER START
35d7b4d43SMatthew Ahrens  *
45d7b4d43SMatthew Ahrens  * This file and its contents are supplied under the terms of the
55d7b4d43SMatthew Ahrens  * Common Development and Distribution License ("CDDL"), version 1.0.
65d7b4d43SMatthew Ahrens  * You may only use this file in accordance with the terms of version
75d7b4d43SMatthew Ahrens  * 1.0 of the CDDL.
85d7b4d43SMatthew Ahrens  *
95d7b4d43SMatthew Ahrens  * A full copy of the text of the CDDL should have accompanied this
105d7b4d43SMatthew Ahrens  * source.  A copy of the CDDL is also available via the Internet at
115d7b4d43SMatthew Ahrens  * http://www.illumos.org/license/CDDL.
125d7b4d43SMatthew Ahrens  *
135d7b4d43SMatthew Ahrens  * CDDL HEADER END
145d7b4d43SMatthew Ahrens  */
155d7b4d43SMatthew Ahrens 
165d7b4d43SMatthew Ahrens /*
17770499e1SDan Kimmel  * Copyright (c) 2013, 2016 by Delphix. All rights reserved.
185d7b4d43SMatthew Ahrens  */
195d7b4d43SMatthew Ahrens 
205d7b4d43SMatthew Ahrens #include <sys/zfs_context.h>
215d7b4d43SMatthew Ahrens #include <sys/zio.h>
225d7b4d43SMatthew Ahrens #include <sys/zio_compress.h>
235d7b4d43SMatthew Ahrens 
245d7b4d43SMatthew Ahrens /*
255d7b4d43SMatthew Ahrens  * Embedded-data Block Pointers
265d7b4d43SMatthew Ahrens  *
275d7b4d43SMatthew Ahrens  * Normally, block pointers point (via their DVAs) to a block which holds data.
285d7b4d43SMatthew Ahrens  * If the data that we need to store is very small, this is an inefficient
295d7b4d43SMatthew Ahrens  * use of space, because a block must be at minimum 1 sector (typically 512
305d7b4d43SMatthew Ahrens  * bytes or 4KB).  Additionally, reading these small blocks tends to generate
315d7b4d43SMatthew Ahrens  * more random reads.
325d7b4d43SMatthew Ahrens  *
335d7b4d43SMatthew Ahrens  * Embedded-data Block Pointers allow small pieces of data (the "payload",
345d7b4d43SMatthew Ahrens  * up to 112 bytes) to be stored in the block pointer itself, instead of
355d7b4d43SMatthew Ahrens  * being pointed to.  The "Pointer" part of this name is a bit of a
365d7b4d43SMatthew Ahrens  * misnomer, as nothing is pointed to.
375d7b4d43SMatthew Ahrens  *
385d7b4d43SMatthew Ahrens  * BP_EMBEDDED_TYPE_DATA block pointers allow highly-compressible data to
395d7b4d43SMatthew Ahrens  * be embedded in the block pointer.  The logic for this is handled in
405d7b4d43SMatthew Ahrens  * the SPA, by the zio pipeline.  Therefore most code outside the zio
415d7b4d43SMatthew Ahrens  * pipeline doesn't need special-cases to handle these block pointers.
425d7b4d43SMatthew Ahrens  *
435d7b4d43SMatthew Ahrens  * See spa.h for details on the exact layout of embedded block pointers.
445d7b4d43SMatthew Ahrens  */
455d7b4d43SMatthew Ahrens 
465d7b4d43SMatthew Ahrens void
encode_embedded_bp_compressed(blkptr_t * bp,void * data,enum zio_compress comp,int uncompressed_size,int compressed_size)475d7b4d43SMatthew Ahrens encode_embedded_bp_compressed(blkptr_t *bp, void *data,
485d7b4d43SMatthew Ahrens     enum zio_compress comp, int uncompressed_size, int compressed_size)
495d7b4d43SMatthew Ahrens {
505d7b4d43SMatthew Ahrens 	uint64_t *bp64 = (uint64_t *)bp;
515d7b4d43SMatthew Ahrens 	uint64_t w = 0;
525d7b4d43SMatthew Ahrens 	uint8_t *data8 = data;
535d7b4d43SMatthew Ahrens 
545d7b4d43SMatthew Ahrens 	ASSERT3U(compressed_size, <=, BPE_PAYLOAD_SIZE);
555d7b4d43SMatthew Ahrens 	ASSERT(uncompressed_size == compressed_size ||
565d7b4d43SMatthew Ahrens 	    comp != ZIO_COMPRESS_OFF);
575d7b4d43SMatthew Ahrens 	ASSERT3U(comp, >=, ZIO_COMPRESS_OFF);
585d7b4d43SMatthew Ahrens 	ASSERT3U(comp, <, ZIO_COMPRESS_FUNCTIONS);
595d7b4d43SMatthew Ahrens 
605d7b4d43SMatthew Ahrens 	bzero(bp, sizeof (*bp));
615d7b4d43SMatthew Ahrens 	BP_SET_EMBEDDED(bp, B_TRUE);
625d7b4d43SMatthew Ahrens 	BP_SET_COMPRESS(bp, comp);
635d7b4d43SMatthew Ahrens 	BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
645d7b4d43SMatthew Ahrens 	BPE_SET_LSIZE(bp, uncompressed_size);
655d7b4d43SMatthew Ahrens 	BPE_SET_PSIZE(bp, compressed_size);
665d7b4d43SMatthew Ahrens 
675d7b4d43SMatthew Ahrens 	/*
685d7b4d43SMatthew Ahrens 	 * Encode the byte array into the words of the block pointer.
695d7b4d43SMatthew Ahrens 	 * First byte goes into low bits of first word (little endian).
705d7b4d43SMatthew Ahrens 	 */
715d7b4d43SMatthew Ahrens 	for (int i = 0; i < compressed_size; i++) {
725d7b4d43SMatthew Ahrens 		BF64_SET(w, (i % sizeof (w)) * NBBY, NBBY, data8[i]);
735d7b4d43SMatthew Ahrens 		if (i % sizeof (w) == sizeof (w) - 1) {
745d7b4d43SMatthew Ahrens 			/* we've reached the end of a word */
755d7b4d43SMatthew Ahrens 			ASSERT3P(bp64, <, bp + 1);
765d7b4d43SMatthew Ahrens 			*bp64 = w;
775d7b4d43SMatthew Ahrens 			bp64++;
785d7b4d43SMatthew Ahrens 			if (!BPE_IS_PAYLOADWORD(bp, bp64))
795d7b4d43SMatthew Ahrens 				bp64++;
805d7b4d43SMatthew Ahrens 			w = 0;
815d7b4d43SMatthew Ahrens 		}
825d7b4d43SMatthew Ahrens 	}
835d7b4d43SMatthew Ahrens 	/* write last partial word */
845d7b4d43SMatthew Ahrens 	if (bp64 < (uint64_t *)(bp + 1))
855d7b4d43SMatthew Ahrens 		*bp64 = w;
865d7b4d43SMatthew Ahrens }
875d7b4d43SMatthew Ahrens 
885d7b4d43SMatthew Ahrens /*
895d7b4d43SMatthew Ahrens  * buf must be at least BPE_GET_PSIZE(bp) bytes long (which will never be
905d7b4d43SMatthew Ahrens  * more than BPE_PAYLOAD_SIZE bytes).
915d7b4d43SMatthew Ahrens  */
925d7b4d43SMatthew Ahrens void
decode_embedded_bp_compressed(const blkptr_t * bp,void * buf)935d7b4d43SMatthew Ahrens decode_embedded_bp_compressed(const blkptr_t *bp, void *buf)
945d7b4d43SMatthew Ahrens {
955d7b4d43SMatthew Ahrens 	int psize;
965d7b4d43SMatthew Ahrens 	uint8_t *buf8 = buf;
975d7b4d43SMatthew Ahrens 	uint64_t w = 0;
985d7b4d43SMatthew Ahrens 	const uint64_t *bp64 = (const uint64_t *)bp;
995d7b4d43SMatthew Ahrens 
1005d7b4d43SMatthew Ahrens 	ASSERT(BP_IS_EMBEDDED(bp));
1015d7b4d43SMatthew Ahrens 
1025d7b4d43SMatthew Ahrens 	psize = BPE_GET_PSIZE(bp);
1035d7b4d43SMatthew Ahrens 
1045d7b4d43SMatthew Ahrens 	/*
1055d7b4d43SMatthew Ahrens 	 * Decode the words of the block pointer into the byte array.
1065d7b4d43SMatthew Ahrens 	 * Low bits of first word are the first byte (little endian).
1075d7b4d43SMatthew Ahrens 	 */
1085d7b4d43SMatthew Ahrens 	for (int i = 0; i < psize; i++) {
1095d7b4d43SMatthew Ahrens 		if (i % sizeof (w) == 0) {
1105d7b4d43SMatthew Ahrens 			/* beginning of a word */
1115d7b4d43SMatthew Ahrens 			ASSERT3P(bp64, <, bp + 1);
1125d7b4d43SMatthew Ahrens 			w = *bp64;
1135d7b4d43SMatthew Ahrens 			bp64++;
1145d7b4d43SMatthew Ahrens 			if (!BPE_IS_PAYLOADWORD(bp, bp64))
1155d7b4d43SMatthew Ahrens 				bp64++;
1165d7b4d43SMatthew Ahrens 		}
1175d7b4d43SMatthew Ahrens 		buf8[i] = BF64_GET(w, (i % sizeof (w)) * NBBY, NBBY);
1185d7b4d43SMatthew Ahrens 	}
1195d7b4d43SMatthew Ahrens }
120*4923c69fSMatthew Ahrens 
121*4923c69fSMatthew Ahrens /*
122*4923c69fSMatthew Ahrens  * Fill in the buffer with the (decompressed) payload of the embedded
123*4923c69fSMatthew Ahrens  * blkptr_t.  Takes into account compression and byteorder (the payload is
124*4923c69fSMatthew Ahrens  * treated as a stream of bytes).
125*4923c69fSMatthew Ahrens  * Return 0 on success, or ENOSPC if it won't fit in the buffer.
126*4923c69fSMatthew Ahrens  */
127*4923c69fSMatthew Ahrens int
decode_embedded_bp(const blkptr_t * bp,void * buf,int buflen)128*4923c69fSMatthew Ahrens decode_embedded_bp(const blkptr_t *bp, void *buf, int buflen)
129*4923c69fSMatthew Ahrens {
130*4923c69fSMatthew Ahrens 	int lsize, psize;
131*4923c69fSMatthew Ahrens 
132*4923c69fSMatthew Ahrens 	ASSERT(BP_IS_EMBEDDED(bp));
133*4923c69fSMatthew Ahrens 
134*4923c69fSMatthew Ahrens 	lsize = BPE_GET_LSIZE(bp);
135*4923c69fSMatthew Ahrens 	psize = BPE_GET_PSIZE(bp);
136*4923c69fSMatthew Ahrens 
137*4923c69fSMatthew Ahrens 	if (lsize > buflen)
138*4923c69fSMatthew Ahrens 		return (ENOSPC);
139*4923c69fSMatthew Ahrens 	ASSERT3U(lsize, ==, buflen);
140*4923c69fSMatthew Ahrens 
141*4923c69fSMatthew Ahrens 	if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF) {
142*4923c69fSMatthew Ahrens 		uint8_t dstbuf[BPE_PAYLOAD_SIZE];
143*4923c69fSMatthew Ahrens 		decode_embedded_bp_compressed(bp, dstbuf);
144*4923c69fSMatthew Ahrens 		VERIFY0(zio_decompress_data_buf(BP_GET_COMPRESS(bp),
145*4923c69fSMatthew Ahrens 		    dstbuf, buf, psize, buflen));
146*4923c69fSMatthew Ahrens 	} else {
147*4923c69fSMatthew Ahrens 		ASSERT3U(lsize, ==, psize);
148*4923c69fSMatthew Ahrens 		decode_embedded_bp_compressed(bp, buf);
149*4923c69fSMatthew Ahrens 	}
150*4923c69fSMatthew Ahrens 
151*4923c69fSMatthew Ahrens 	return (0);
152*4923c69fSMatthew Ahrens }
153