xref: /netbsd-src/external/mpl/mozilla-certdata/share/certdata.awk (revision 2c5ae21ccfb166911c49dc727989ff8e898a097c)
1#!/usr/bin/awk -f
2#
3# Copyright (c) 2023 The NetBSD Foundation, Inc.
4# All rights reserved.
5#
6# Redistribution and use in source and binary forms, with or without
7# modification, are permitted provided that the following conditions
8# are met:
9# 1. Redistributions of source code must retain the above copyright
10#    notice, this list of conditions and the following disclaimer.
11# 2. Redistributions in binary form must reproduce the above copyright
12#    notice, this list of conditions and the following disclaimer in the
13#    documentation and/or other materials provided with the distribution.
14#
15# THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
16# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
17# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
19# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25# POSSIBILITY OF SUCH DAMAGE.
26#
27
28function quotify(x) {
29	gsub(/'/, "'\\''", x)
30	return "'"x"'"
31}
32
33function err(s) {
34	printf "%s: line %s: error: %s\n", ARGV[0], NR, s >"/dev/stderr"
35	printf "# %s\n", $0 >"/dev/stderr"
36	errors++
37}
38
39function symlink(target, link,  cmd, status) {
40	cmd = sprintf("ln -sfn %s %s", quotify(target), quotify(link))
41	status = system(cmd)
42	if (status != 0)
43		err(sprintf("ln failed with status %d", status))
44}
45
46function reset() {
47	cka_class = ""
48	label = ""
49	lolab = ""
50	certpem = ""
51	certworkdir = ""
52	skipping = 0
53}
54
55function skip() {
56	if (VERBOSE)
57		printf "line %d: skip from: %s\n", NR, $0
58	skipping = 1
59}
60
61function parseoctal(s, i, n,  x) {
62	x = 0
63	n += i
64	for (; i < n; i++) {
65		x *= 8
66		x += int(substr(s, i, 1))
67	}
68	return x
69}
70
71function writeoctaldata(f, desc,  warned, status) {
72	if ($2 != "MULTILINE_OCTAL") {
73		err(sprintf("%s: Invalid %s type: %s", label, desc, $2))
74		skip()
75		return -1
76	}
77	warned = 0
78	while (getline) {
79		if ($0 == "END")
80			break
81		for (i = 0; i < length($0); i += 4) {
82			if (substr($0, i + 1, 4) !~ /\\[0-7][0-7][0-7]/) {
83				if (!warned)
84					err(sprintf("%s: Invalid %s data",
85					    label, desc))
86				warned = 1
87				break
88			}
89			printf "%c", parseoctal($0, i + 2, 3) >f
90		}
91	}
92	status = close(f)
93	if ($0 != "END") {
94		err(sprintf("%s: Invalid octal data", label))
95		warned = 1
96	}
97	if (warned)
98		return -1
99	return status
100}
101
102function checkoctaldata(f, desc,  fcheck) {
103	fcheck = f".tmp"
104	if (writeoctaldata(fcheck, desc) != 0)
105		return
106	cmd = sprintf("cmp -- %s %s >/dev/null && rm -- %s",
107	    quotify(f), quotify(fcheck), quotify(fcheck))
108	if (system(cmd) != 0) {
109		err(sprintf("%s: %s mismatch", label, desc))
110		skip()
111	}
112}
113
114function writecheckoctaldata(f, desc, dowrite) {
115	if (dowrite)
116		writeoctaldata(f, desc)
117	else
118		checkoctaldata(f, desc)
119}
120
121function skipoctaldata(desc,  warned) {
122	if ($2 != "MULTILINE_OCTAL") {
123		err(sprintf("%s: Invalid %s type: %s", label, desc, $2))
124		skip()
125		return -1
126	}
127	warned = 0
128	while (getline) {
129		if ($0 == "END")
130			return 0
131		if ($0 !~ /(\\[0-7][0-7][0-7])*/ && !warned) {
132			err(sprintf("%s: Invalid %s data", label, desc))
133			warned = 1
134		}
135	}
136	err(sprintf("%s: Invalid octal data", label))
137	skip()
138	return 1
139}
140
141function distrust_after(desc) {
142	if ($2 == "CK_BBOOL" && $3 == "CK_FALSE")
143		return
144	if ($2 == "MULTILINE_OCTAL") {
145		skipoctaldata(sprintf("%s distrust deadline", $2))
146	} else {
147		err(sprintf("%s: Unknown %s distrust type %d, value: %s",
148		    label, desc, $2, $3))
149	}
150	if (VERBOSE)
151		printf "line %d: distrust for %s: %s\n", NR, desc, label
152	distrusted[lolab] = 1
153}
154
155function addtrust(trustfile, desc) {
156	if (desc":"lolab in trust_lineno) {
157		err(sprintf("%s: Multiple trust lines for %s, first at %d",
158		    label, desc, trust_lineno[desc":"lolab]))
159		skip()
160		return
161	}
162	trust_lineno[desc":"lolab] = NR
163	if ($3 == "CKT_NSS_TRUSTED_DELEGATOR") {
164		if (distrusted[lolab]) {
165			if (VERBOSE) {
166				printf "line %d: distrusted for %s\n", \
167				    NR, desc
168			}
169		} else {
170			if (VERBOSE) {
171				printf "line %d: trusted for %s\n", \
172				    NR, desc
173			}
174			printf "%s\n", label >trustfile
175		}
176	} else if ($3 == "CKT_NSS_TRUSTED" ||
177	    $3 == "CKT_NSS_MUST_VERIFY_TRUST" ||
178	    $3 == "CKT_NSS_UNTRUSTED" ||
179	    $3 == "CKT_NSS_NOT_TRUSTED") {
180		if (VERBOSE) {
181			printf "line %d: untrusted as CA for %s\n", \
182			    NR, desc
183		}
184	} else {
185		err(sprintf("%s: Unknown trust designation for %s: %s",
186		    label, desc, $3))
187	}
188}
189
190
191BEGIN {
192	if (ARGV[0] == "awk")
193		ARGV[0] = "certdata"
194	if (!CERTDIR) {
195		printf "%s: specify -v CERTDIR=...", ARGV[0] >"/dev/stderr"
196		exit 1
197	}
198	if (!WORKDIR) {
199		printf "%s: specify -v WORKDIR=...", ARGV[0] >"/dev/stderr"
200		exit 1
201	}
202	if (!OPENSSL) {
203		printf "%s: specify -v OPENSSL=...", ARGV[0] >"/dev/stderr"
204		exit 1
205	}
206	if (!SERVERTRUST)
207		SERVERTRUST = "/dev/null"
208	if (!EMAILTRUST)
209		EMAILTRUST = "/dev/null"
210	if (!CODETRUST)
211		CODETRUST = "/dev/null"
212	printf "" >SERVERTRUST
213	printf "" >EMAILTRUST
214	printf "" >CODETRUST
215	errors = 0
216	reset()
217
218	# Special cases.  See
219	# https://wiki.mozilla.org/CA/Additional_Trust_Changes for
220	# details.
221
222	# `The Turkish Government CA is name-constrained to *.tr.'
223	#
224	# Implemented in Firefox by
225	# https://phabricator.services.mozilla.com/D177242, but OpenSSL
226	# has no mechanism to implement this constraint, so we just
227	# exclude it altogether.
228	special_distrust["TUBITAK_Kamu_SM_SSL_Kok_Sertifikasi_-_Surum_1"] = 1
229}
230
231END {
232	# Make sure the special cases have been applied.
233	for (label in special_distrust)
234		err("special distrust not found: %s.pem", label)
235
236	fflush()		# flush all
237	close(SERVERTRUST)
238	close(EMAILTRUST)
239	close(CODETRUST)
240	if (errors) {
241		printf "%s: exiting with failure on %d error%s\n", ARGV[0], \
242		    errors, (errors == 1 ? "" : "s") \
243		    >"/dev/stderr"
244		exit 1
245	}
246}
247
248/^ *#/ {			# comment
249	next
250}
251
252/^ *$/ {
253	next
254}
255
256$1 == "BEGINDATA" {
257	next
258}
259
260$1 == "CKA_CLASS" {
261	reset()
262}
263
264skipping {
265	if (VERBOSE)
266		printf "line %d: skipping: %s\n", NR, $0
267	next
268}
269
270$1 == "CKA_CLASS" && $2 != "CK_OBJECT_CLASS" {
271	err(sprintf("Invalid class: %s", $2))
272	skip()
273	next
274}
275
276$1 == "CKA_CLASS" && $3 == "CKO_NSS_BUILTIN_ROOT_LIST" {
277	skip()
278	next
279}
280
281$1 == "CKA_CLASS" {
282	cka_class = $3
283	next
284}
285
286$1 == "CKA_TOKEN" ||
287$1 == "CKA_NSS_MOZILLA_CA_POLICY" ||
2880 {
289	if ($2 != "CK_BBOOL") {
290		err(sprintf("Invalid %s type: %s; value: %s", $1, $2, $3))
291		next
292	}
293	if ($3 != "CK_TRUE")
294		err(sprintf("%s is false", $1))
295	next
296}
297
298$1 == "CKA_MODIFIABLE" ||
299$1 == "CKA_PRIVATE" ||
300$1 == "CKA_TRUST_STEP_UP_APPROVED" ||
3010 {
302	if ($2 != "CK_BBOOL") {
303		err(sprintf("Invalid %s type: %s; value: %s", $1, $2, $3))
304		next
305	}
306	if ($3 != "CK_FALSE")
307		err(sprintf("%s is true", $1))
308	next
309}
310
311$1 == "CKA_LABEL" {
312	if ($2 != "UTF8") {
313		err(sprintf("Non-UTF8 label type: %s; value: %s", $2, $3))
314		skip()
315		next
316	}
317
318	# Clear the `CKA_LABEL UTF8' fields.  (`shift 2', in sh, except
319	# that doesn't work here in awk.)
320	sub(/CKA_LABEL +UTF8 +/, "", $0)
321
322	# Forbid embedded ", \, and /, as well as bunch of others.
323	#
324	# - We forbid embedded " because it's not clear what the escape
325	#   sequence is.
326	#
327	# - We forbid \ in case there are escape sequences we don't
328	#   know.
329	#
330	# - We forbid / so that we can always form a directory
331	#   component
332	#
333	# We immediately forbid a bunch of others that might be
334	# metacharacters or otherwise problematic, so that the next
335	# person to update certdata will be forced to consciously think
336	# about how to handle them.
337	if ($0 !~ /^"[^[:cntrl:]!"#$%&\*\/:;?\[\\\]\^`\|~]*"$/) {
338		err(sprintf("Invalid characters in label: %s", $3))
339		skip()
340		next
341	}
342
343	# Nix the "quotes".
344	label = substr($0, 2, length($0) - 2)
345
346	# XXX The `renaming to' messages are inconsistent about whether
347	# they apply pre-substitution or post-substitution, so some
348	# have spaces and some have underscores.  Oh well.
349
350	# Special cases: Avoid parentheses in two CA names, and
351	# non-US-ASCII in one CA name.  It is regrettable to limit
352	# ourselves to an anglocentric worldview like this, but this
353	# will avoid potential problems with file system pathname
354	# encoding and canonicalization downstream.
355	if (label ~ /^NetLock Arany \(Class Gold\) F.*$/) {
356		label = "NetLock Arany Class Gold"
357		if (cka_class == "CKO_CERTIFICATE") {
358			printf "line %s: special characters," \
359			    " renaming to \"%s\"\n",	      \
360			    NR, label
361		}
362	}
363	if (label == "LAWtrust Root CA2 (4096)") {
364		label = "LAWtrust Root CA2 4096"
365		if (cka_class == "CKO_CERTIFICATE") {
366			printf "line %s: special characters," \
367			    " renaming to \"%s\"\n",	      \
368			    NR, label
369		}
370	}
371
372	# Avoid spaces in filenames, because Unix.  Not that filenames
373	# can't have spaces in Unix, but a lot of downstream tools may
374	# get confused by them.
375	gsub(/ /, "_", label)
376
377	# Make sure it uses onlypathname-safe characters.
378	if (label ~ /[^[:alnum:]._-]/ || label ~ /^\./) {
379		err(sprintf("Special CA label: %s", label))
380		skip()
381		next
382	}
383
384	# Make sure it's not empty.
385	if (length(label) == 0) {
386		err("Empty label")
387		skip()
388		next
389	}
390
391	# Make sure it fits within a reasonable limit as a filename.
392	if (length(label) > 100) {
393		err(sprintf("Label too long, %d bytes > max %d: %s",
394		    length(label), 100, label))
395		skip()
396		next
397	}
398
399	# If this defines the certificate, check for duplicates; if a
400	# duplicate is found, assign a counter suffix.
401	#
402	# XXX This collision numbering might not be stable across
403	# updates.  What to do?  Use the serial number?
404	#
405	# XXX This doesn't use Unicode case-folding.  Let's hope we
406	# don't have anything that is a collision under casefold but
407	# not under US-ASCII-limited tolower.
408	lolab = tolower(label)
409	if (cka_class == "CKO_CERTIFICATE") {
410		if (VERBOSE)
411			printf "line %d: CA \"%s\"\n", NR, label
412		if (lolab in label_lineno) {
413			label = sprintf("%s.%d", label, ++label_counter[lolab])
414			lolab = tolower(label)
415			printf "line %s: duplicate, renaming to \"%s\"\n", \
416			    NR, label
417		}
418		label_lineno[lolab] = NR
419	} else {
420		if (VERBOSE)
421			printf "line %d: trust \"%s\"\n", NR, label
422		# Hack: Take the highest-numbered counter for this label.
423		if (lolab in label_counter) {
424			label = sprintf("%s.%d", label, label_counter[lolab])
425			lolab = tolower(label)
426			printf "line %s: assuming duplicate is \"%s\"\n", \
427			    NR, label
428		}
429		if (!(lolab in label_lineno)) {
430			err(sprintf("Missing label: %s", label))
431			skip()
432			next
433		}
434	}
435
436	# Apply special cases.
437	if (cka_class == "CKO_CERTIFICATE") {
438		if (label in special_distrust) {
439			printf "line %s: specially distrusting \"%s\"\n", \
440			    NR, label
441			distrusted[lolab] = 1
442			delete special_distrust[label]
443		}
444	}
445
446	# Compute where this certificate will lives and a workspace.
447	certpem = CERTDIR"/"label".pem"
448	certworkdir = WORKDIR"/"label
449
450	# If this defines the certificate, create the directory.
451	# Otherwise, make sure the directory is there already.
452	if (cka_class == "CKO_CERTIFICATE") {
453		if (system(sprintf("mkdir -- %s", quotify(certworkdir))) \
454		    != 0) {
455			errors++
456			skip()
457			next
458		}
459	} else {
460		if (system(sprintf("test -f %s", quotify(certpem))) != 0) {
461			err("%s: Missing certificate for %s", label,
462			    cka_class)
463		}
464		if (system(sprintf("test -d %s", quotify(certworkdir))) != 0)
465			err("%s: Missing directory for %s", label, cka_class)
466	}
467
468	next
469}
470
471# Remaining rules assume we are in the middle of an object block and we
472# have a label.
473
474!label {
475	err(sprintf("%s: missing label", $1))
476	skip()
477	next
478}
479
480$1 == "CKA_CERTIFICATE_TYPE" {
481	if ($2 != "CK_CERTIFICATE_TYPE") {
482		err(sprintf("%s: Invalid certificate type type: %s",
483		    label, $2))
484		skip()
485		next
486	}
487	if ($3 != "CKC_X_509") {
488		err(sprintf("%s: Unknown certificate type: %s", label, $2))
489		skip()
490		next
491	}
492	next
493}
494
495$1 == "CKA_SUBJECT" {
496	writeoctaldata(certworkdir"/subject", "subject")
497	next
498}
499
500$1 == "CKA_ID" {
501	if ($0 != "CKA_ID UTF8 \"0\"") {
502		err(sprintf("%s: Invalid id: %s", label, $0))
503		skip()
504		next
505	}
506	next
507}
508
509$1 == "CKA_ISSUER" {
510	writecheckoctaldata(certworkdir"/issuer", "issuer",
511	    cka_class == "CKO_CERTIFICATE")
512	next
513}
514
515$1 == "CKA_SERIAL_NUMBER" {
516	writecheckoctaldata(certworkdir"/serial", "serial number",
517	    cka_class == "CKO_CERTIFICATE")
518	next
519}
520
521$1 == "CKA_VALUE" {
522	if (writeoctaldata(certworkdir"/cert.der", "certificate data"))
523		next
524	if (system(sprintf("%s x509 -inform DER -outform PEM <%s >%s",
525	    quotify(OPENSSL),
526	    quotify(certworkdir"/cert.der"),
527	    quotify(certpem))))
528		err(sprintf("%s: openssl x509 failed", label))
529	next
530}
531
532$1 == "CKA_CERT_SHA1_HASH" {
533	writeoctaldata(certworkdir"/hash.sha1", "SHA-1 hash")
534	next
535}
536
537$1 == "CKA_CERT_MD5_HASH" {
538	writeoctaldata(certworkdir"/hash.md5", "MD5 hash")
539	next
540}
541
542$1 == "CKA_NSS_SERVER_DISTRUST_AFTER" {
543	distrust_after("server")
544	next
545}
546
547$1 == "CKA_NSS_EMAIL_DISTRUST_AFTER" {
548	distrust_after("email")
549	next
550}
551
552$1 !~ /^CKA_TRUST_/ {
553	err(sprintf("%s: Unknown line: %s", label, $0))
554	skip()
555	next
556}
557
558$2 != "CK_TRUST" {
559	err(sprintf("%s: Invalid trust line: %s", label, $0))
560	skip()
561	next
562}
563
564# Remaining rules assume we are on a valid CKA_TRUST_* attribute.
565
566$1 == "CKA_TRUST_SERVER_AUTH" {
567	addtrust(SERVERTRUST, "server authentication")
568	next
569}
570
571$1 == "CKA_TRUST_EMAIL_PROTECTION" {
572	addtrust(EMAILTRUST, "email protection")
573	next
574}
575
576$1 == "CKA_TRUST_CODE_SIGNING" {
577	addtrust(CODETRUST, "code signing")
578	next
579}
580
581{
582	err(sprintf("%s: Unknown trust domain: %s", label, $1))
583}
584