1 /*
2 * Copyright (c) 2014-2019 Pavel Kalvoda <me@pavelkalvoda.com>
3 *
4 * libcbor is free software; you can redistribute it and/or modify
5 * it under the terms of the MIT license. See LICENSE for details.
6 */
7
8 #include "unicode.h"
9
10 #define UTF8_ACCEPT 0
11 #define UTF8_REJECT 1
12
13 static const uint8_t utf8d[] = {
14 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
15 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 00..1f */
17 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
19 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 20..3f */
20 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
22 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 40..5f */
23 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
24 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
25 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 60..7f */
26 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
27 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9,
28 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, /* 80..9f */
29 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
30 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
31 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, /* a0..bf */
32 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2,
33 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
34 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* c0..df */
35 0xa, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3,
36 0x3, 0x3, 0x4, 0x3, 0x3, /* e0..ef */
37 0xb, 0x6, 0x6, 0x6, 0x5, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
38 0x8, 0x8, 0x8, 0x8, 0x8, /* f0..ff */
39 0x0, 0x1, 0x2, 0x3, 0x5, 0x8, 0x7, 0x1, 0x1, 0x1, 0x4,
40 0x6, 0x1, 0x1, 0x1, 0x1, /* s0..s0 */
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
43 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, /* s1..s2 */
44 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1,
45 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, /* s3..s4 */
47 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1,
48 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
49 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, /* s5..s6 */
50 1, 3, 1, 1, 1, 1, 1, 3, 1, 3, 1,
51 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1,
52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* s7..s8 */
53 };
54
55 /* Copyright of this function: (c) 2008-2009 Bjoern Hoehrmann
56 * <bjoern@hoehrmann.de> */
57 /* See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. */
_cbor_unicode_decode(uint32_t * state,uint32_t * codep,uint32_t byte)58 uint32_t _cbor_unicode_decode(uint32_t* state, uint32_t* codep, uint32_t byte) {
59 uint32_t type = utf8d[byte];
60
61 *codep = (*state != UTF8_ACCEPT) ? (byte & 0x3fu) | (*codep << 6)
62 : (0xff >> type) & (byte);
63
64 *state = utf8d[256 + *state * 16 + type];
65 return *state;
66 }
67
_cbor_unicode_codepoint_count(cbor_data source,size_t source_length,struct _cbor_unicode_status * status)68 size_t _cbor_unicode_codepoint_count(cbor_data source, size_t source_length,
69 struct _cbor_unicode_status* status) {
70 *status =
71 (struct _cbor_unicode_status){.location = 0, .status = _CBOR_UNICODE_OK};
72 uint32_t codepoint, state = UTF8_ACCEPT, res;
73 size_t pos = 0, count = 0;
74
75 for (; pos < source_length; pos++) {
76 res = _cbor_unicode_decode(&state, &codepoint, source[pos]);
77
78 if (res == UTF8_ACCEPT) {
79 count++;
80 } else if (res == UTF8_REJECT) {
81 goto error;
82 }
83 }
84
85 /* Unfinished multibyte codepoint */
86 if (state != UTF8_ACCEPT) goto error;
87
88 return count;
89
90 error:
91 *status = (struct _cbor_unicode_status){.location = pos,
92 .status = _CBOR_UNICODE_BADCP};
93 return -1;
94 }
95