xref: /netbsd-src/external/mpl/bind/dist/bin/tests/system/idna/tests.sh (revision 5dd36a3bc8bf2a9dec29ceb6349550414570c447)
1# Copyright (C) Internet Systems Consortium, Inc. ("ISC")
2#
3# This Source Code Form is subject to the terms of the Mozilla Public
4# License, v. 2.0. If a copy of the MPL was not distributed with this
5# file, You can obtain one at http://mozilla.org/MPL/2.0/.
6#
7# See the COPYRIGHT file distributed with this work for additional
8# information regarding copyright ownership.
9
10SYSTEMTESTTOP=..
11. $SYSTEMTESTTOP/conf.sh
12
13# Set known locale for the tests
14
15if locale -a | grep -qE "^C\\.(UTF-8|utf8)"; then
16    LC_ALL="C.UTF-8"
17elif locale -a | grep -qE "^en_US\\.(UTF-8|utf8)"; then
18    LC_ALL="en_US.UTF-8"
19fi
20export LC_ALL
21
22# This set of tests check the behavior of the IDNA options in "dig".
23#
24# "dig" supports two IDNA-related options:
25#
26# +[no]idnin -  Translates a domain name into punycode format before sending
27#               the query to the server.
28#
29#               Should the input name be a punycode name, "dig +idnin" will also
30#               validate the punycode, rejecting it if it is invalid.
31#
32# +[no]idnout - Translates the received punycode domain names into appropriate
33#               unicode characters before displaying.
34#
35# The tests run "dig" against an authoritative server configured with a minimal
36# root zone and nothing else.  As a result, all queries will result in an
37# NXDOMAIN.  The server will return the qname sent, which "dig" will display
38# according to the options selected.  This returned string is compared with
39# the qname originally sent.
40#
41# In the comments below, the following nomenclature (taken from RFC 5890) is
42# used:
43#
44# A-label: Label comprising ASCII characters that starts xn-- and whose
45#          characters after the xn-- are a valid output of the Punycode
46#          algorithm.
47#
48# Fake A-label: An A-label whose characters after the xn-- are not valid
49#          Punycode output.
50#
51# U-label: Unicode (native character) form of a label.
52#
53# For the purpose of this test script, U-labels do not include labels that
54# comprise purely ASCII characters, which are referred to as "ASCII-labels"
55# here. Valid ASCII-labels comprise letters, digits and hyphens and do not
56# start with a hyphen.
57#
58# References:
59# 1. http://www.unicode.org/reports/tr46/#Deviations
60# 2. http://www.unicode.org/reports/tr46/#IDNAComparison
61
62# Using dig insecure mode as we are not testing DNSSEC here
63DIGCMD="$DIG -i -p ${PORT} @10.53.0.1"
64
65# Initialize test count and status return
66n=0
67status=0
68
69
70# Function for extracting the qname from the response
71#
72# This is the first field in the line after the line starting
73# ";; QUESTION SECTION:".
74#
75# The string returned includes the trailing period.
76
77qname() {
78    awk 'BEGIN { qs = 0; } \
79        /;; QUESTION SECTION:/ { qs = 1; next; } \
80        qs == 1 {sub(";", "", $1) ; print $1; exit 0; }' \
81        $1
82}
83
84# Function for performing a test where "dig" is expected to succeed.
85#
86#   $1 - Description of the test
87#   $2 - Dig command additional options
88#   $3 - Name being queried
89#   $4 - The name that is expected to be displayed by "dig".  Note that names
90#        displayed by "dig" will always have a trailing period, so this
91#        parameter should have that period as well.
92
93idna_test() {
94    n=`expr $n + 1`
95    description=$1
96    if [ "$2" != "" ]; then
97        description="${description}: $2"
98    fi
99    echo_i "$description ($n)"
100
101    ret=0
102    $DIGCMD $2 $3 > dig.out.$n 2>&1
103    if [ $? -ne 0 ]; then
104        echo_i "failed: dig command returned non-zero status"
105        ret=1
106    else
107        actual=`qname dig.out.$n`
108        if [ "$4" != "$actual" ]; then
109            echo_i "failed: expected answer $4, actual result $actual"
110            ret=1
111        fi
112    fi
113    status=`expr $status + $ret`
114}
115
116# Function for performing a test where "dig" is expected to fail
117#
118#   $1 - Description of the test
119#   $2 - Dig command additional options
120#   $3 - Name being queried
121
122idna_fail() {
123    n=`expr $n + 1`
124    description=$1
125    if [ "$2" != "" ]; then
126        description="${description}: $2"
127    fi
128    echo_i "$description ($n)"
129
130    ret=0
131    $DIGCMD $2 $3 > dig.out.$n 2>&1
132    if [ $? -eq 0 ]; then
133        echo_i "failed: dig command unexpectedly succeeded"
134        ret=1
135    fi
136    status=`expr $status + $ret`
137}
138
139# Function to check that case is preserved for an all-ASCII label.
140#
141# Without IDNA support, case-preservation is the expected behavior.
142#
143# With IDNA support... not really.  IDNA maps uppercase ASCII characters to
144# their lower-case equivalent.  When IDNA support in "dig" was updated to
145# non-transitional IDNA 2008, the switch "+idnin" was added and made the default
146# behaviour. This meant that the command "dig LocalhosT" (no command switches)
147# sends the qname "localhost", a change in behavior from earlier versions.
148#
149# This was felt to be confusing to the significant number of users who are
150# not interested in IDNA. For this reason, after "dig" passes the input qname
151# through the IDNA conversion, is does a case-insensitive comparison with the
152# result.  If the two are the same, "dig" can conclude that the qname is
153# entirely ASCII and is uses the entered string instead of the converted string
154# as the qname.
155
156ascii_case_preservation_test() {
157    text="Checking valid ASCII label"
158    idna_test "$text" ""                   LocalhosT LocalhosT.
159    idna_test "$text" "+noidnin +noidnout" LocalhosT LocalhosT.
160    idna_test "$text" "+noidnin +idnout"   LocalhosT LocalhosT.
161    idna_test "$text" "+idnin   +noidnout" LocalhosT LocalhosT.
162    idna_test "$text" "+idnin   +idnout"   LocalhosT LocalhosT.
163}
164
165# Function to perform the tests if IDNA is enabled.
166
167idna_enabled_test() {
168    echo_i "IDNA is enabled, all IDNA tests will be performed"
169    # Check that case is preserved on an ASCII label.
170
171    ascii_case_preservation_test
172
173
174    # Test of a valid U-label
175    #
176    # +noidnin +noidnout: The label is sent as a unicode octet stream and dig
177    #                     will display the string in the \nnn format.
178    # +noidnin +idnout:   As for the previous case.
179    # +idnin   +noidnout: The label is converted to the xn-- format.  "dig"
180    #                     displays the returned xn-- text.
181    # +idnin   +idnout:   The label is converted to the xn-- format.  "dig"
182    #                     converts the returned xn-- string back to the original
183    #                     unicode text.
184    #
185    # Note that ASCII characters are converted to lower-case.
186
187    text="Checking valid non-ASCII label"
188    idna_test "$text" ""                   "München" "M\195\188nchen."
189    idna_test "$text" "+noidnin +noidnout" "München" "M\195\188nchen."
190    idna_test "$text" "+noidnin +idnout"   "München" "M\195\188nchen."
191    idna_test "$text" "+idnin   +noidnout" "München" "xn--mnchen-3ya."
192    idna_test "$text" "+idnin   +idnout"   "München" "münchen."
193
194
195    # Tests of transitional processing of a valid U-label
196    #
197    # IDNA2003 introduced national character sets but, unfortunately, didn't
198    # support several characters properly.  One of those was the German
199    # character "ß" (the "Eszett" or "sharp s"), which was interpreted as "ss".
200    # So the domain “faß.de” domain (for example) was processed as “fass.de”.
201    #
202    # This was corrected in IDNA2008, although some vendors that adopted this
203    # standard chose to keep the existing IDNA2003 translation for this
204    # character to prevent problems (e.g. people visiting www.faß.example would,
205    # under IDNA2003, go to www.fass.example but under IDNA2008 would end up at
206    # www.fa\195\159.example - a different web site).
207    #
208    # BIND has adopted a hard transition, so this test checks that these
209    # transitional mapping is not used.  The tests are essentially the same as
210    # for the valid U-label.
211
212    text="Checking that non-transitional IDNA processing is used"
213    idna_test "$text" ""                   "faß.de" "fa\195\159.de."
214    idna_test "$text" "+noidnin +noidnout" "faß.de" "fa\195\159.de."
215    idna_test "$text" "+noidnin +idnout"   "faß.de" "fa\195\159.de."
216    idna_test "$text" "+idnin   +noidnout" "faß.de" "xn--fa-hia.de."
217    idna_test "$text" "+idnin   +idnout"   "faß.de" "faß.de."
218
219    # Another problem character.  The final character in the first label mapped
220    # onto the Greek sigma character ("σ") in IDNA2003.
221
222    text="Second check that non-transitional IDNA processing is used"
223    idna_test "$text" ""                   "βόλος.com" "\206\178\207\140\206\187\206\191\207\130.com."
224    idna_test "$text" "+noidnin +noidnout" "βόλος.com" "\206\178\207\140\206\187\206\191\207\130.com."
225    idna_test "$text" "+noidnin +idnout"   "βόλος.com" "\206\178\207\140\206\187\206\191\207\130.com."
226    idna_test "$text" "+idnin   +noidnout" "βόλος.com" "xn--nxasmm1c.com."
227    idna_test "$text" "+idnin   +idnout"   "βόλος.com" "βόλος.com."
228
229
230
231    # Tests of a valid A-label (i.e. starting xn--)
232    #
233    # +noidnout: The string is sent as-is to the server and the returned qname
234    #            is displayed in the same form.
235    # +idnout:   The string is sent as-is to the server and the returned qname
236    #            is displayed as the corresponding U-label.
237    #
238    # The "+[no]idnin" flag has no effect in these cases.
239
240    text="Checking valid A-label"
241    idna_test "$text" ""                   "xn--nxasmq6b.com" "xn--nxasmq6b.com."
242    idna_test "$text" "+noidnin +noidnout" "xn--nxasmq6b.com" "xn--nxasmq6b.com."
243    idna_test "$text" "+noidnin +idnout"   "xn--nxasmq6b.com" "βόλοσ.com."
244    idna_test "$text" "+idnin +noidnout"   "xn--nxasmq6b.com" "xn--nxasmq6b.com."
245    idna_test "$text" "+idnin +idnout"     "xn--nxasmq6b.com" "βόλοσ.com."
246
247    # Test of valid A-label in locale that cannot display it
248    #
249    # +noidnout: The string is sent as-is to the server and the returned qname
250    #            is displayed in the same form.
251    # +idnout:   The string is sent as-is to the server and the returned qname
252    #            is displayed as the corresponding A-label.
253    #
254    # The "+[no]idnout" flag has no effect in these cases.
255    text="Checking valid A-label in C locale"
256    label="xn--nxasmq6b.com"
257    if command -v idn2 >/dev/null && ! LC_ALL=C idn2 -d "$label" >/dev/null 2>/dev/null; then
258	LC_ALL=C idna_test "$text" ""                   "$label" "$label."
259	LC_ALL=C idna_test "$text" "+noidnin +noidnout" "$label" "$label."
260	LC_ALL=C idna_test "$text" "+noidnin +idnout"   "$label" "$label."
261	LC_ALL=C idna_test "$text" "+idnin +noidnout"   "$label" "$label."
262	LC_ALL=C idna_test "$text" "+idnin +idnout"     "$label" "$label."
263	LC_ALL=C idna_test "$text" "+noidnin +idnout"   "$label" "$label."
264    fi
265
266
267
268    # Tests of invalid A-labels
269    #
270    # +noidnin: The label is sent as-is to the server and dig will display the
271    #           returned fake A-label in the same form.
272    # +idnin:   "dig" should report that the label is not correct.
273    #
274    # +[no]idnout: If the label makes it to the server (via +noidnin), "dig"
275    #           should report an error if +idnout is specified.
276
277    # The minimum length of a punycode A-label is 7 characters.  Check that
278    # a shorter label is detected and rejected.
279
280    text="Checking punycode label shorter than minimum valid length"
281    idna_test "$text" ""                   "xn--xx" "xn--xx."
282    idna_test "$text" "+noidnin +noidnout" "xn--xx" "xn--xx."
283    idna_fail "$text" "+noidnin   +idnout" "xn--xx"
284    idna_fail "$text" "+idnin   +noidnout" "xn--xx"
285    idna_fail "$text" "+idnin     +idnout" "xn--xx"
286
287    # Fake A-label - the string does not translate to anything.
288
289    text="Checking fake A-label"
290    idna_test "$text" ""                   "xn--ahahah" "xn--ahahah."
291    idna_test "$text" "+noidnin +noidnout" "xn--ahahah" "xn--ahahah."
292    idna_fail "$text" "+noidnin   +idnout" "xn--ahahah"
293    idna_fail "$text" "+idnin   +noidnout" "xn--ahahah"
294    idna_fail "$text" "+idnin     +idnout" "xn--ahahah"
295
296    # Too long a label. The punycode string is too long (at 64 characters).
297    # BIND rejects such labels: with +idnin
298
299    label="xn--xflod18hstflod18hstflod18hstflod18hstflod18hstflod18-1iejjjj"
300    text="Checking punycode label longer than maximum valid length"
301    idna_fail "$text" ""                   "$label"
302    idna_fail "$text" "+noidnin +noidnout" "$label"
303    idna_fail "$text" "+noidnin   +idnout" "$label"
304    idna_fail "$text" "+idnin   +noidnout" "$label"
305    idna_fail "$text" "+idnin     +idnout" "$label"
306
307
308
309
310    # Tests of a valid unicode string but an invalid U-label (input)
311    #
312    # Symbols are not valid IDNA2008 names.  Check whether dig rejects them
313    # when they are supplied on the command line to ensure no IDNA2003
314    # fallbacks are in place.
315    #
316    # +noidnin: "dig" should send unicode octets to the server and display the
317    #           returned qname in the same form.
318    # +idnin:   "dig" should generate an error.
319    #
320    # The +[no]idnout options should not have any effect on the test.
321
322    text="Checking invalid input U-label"
323    idna_test "$text" ""                   "√.com" "\226\136\154.com."
324    idna_test "$text" "+noidnin +noidnout" "√.com" "\226\136\154.com."
325    idna_test "$text" "+noidnin +idnout"   "√.com" "\226\136\154.com."
326    idna_fail "$text" "+idnin   +noidnout" "√.com"
327    idna_fail "$text" "+idnin   +idnout"   "√.com"
328
329    # Tests of a valid unicode string but an invalid U-label (output)
330    #
331    # Symbols are not valid IDNA2008 names.  Check whether dig rejects them
332    # when they are received in DNS responses to ensure no IDNA2003 fallbacks
333    # are in place.
334    #
335    # Note that "+idnin +noidnout" is not tested because libidn2 2.2.0+ parses
336    # Punycode more strictly than older versions and thus dig fails with that
337    # combination of options with libidn2 2.2.0+ but succeeds with older
338    # versions.
339    #
340    # +noidnout: "dig" should send the ACE string to the server and display the
341    #            returned qname.
342    # +idnout:   "dig" should generate an error.
343    #
344    # The +[no]idnin options should not have any effect on the test.
345
346    text="Checking invalid output U-label"
347    idna_test "$text" ""                   "xn--19g" "xn--19g."
348    idna_test "$text" "+noidnin +noidnout" "xn--19g" "xn--19g."
349    idna_fail "$text" "+noidnin +idnout"   "xn--19g"
350    idna_fail "$text" "+idnin   +idnout"   "xn--19g"
351}
352
353
354# Function to perform tests if IDNA is not enabled.
355
356idna_disabled_test() {
357    echo_i "IDNA is disabled, only case mapping tests will be performed"
358    ascii_case_preservation_test
359}
360
361
362# Main test begins here
363
364$FEATURETEST --with-idn
365if [ $? -eq 0 ]; then
366    idna_enabled_test
367else
368    idna_disabled_test
369fi
370
371exit $status
372