1ca1c9b0cSelric#!/usr/local/bin/python 2ca1c9b0cSelric# -*- coding: iso-8859-1 -*- 3ca1c9b0cSelric 4*b40995a4Selric# Id 5ca1c9b0cSelric 6ca1c9b0cSelric# Copyright (c) 2004, 2008 Kungliga Tekniska Högskolan 7ca1c9b0cSelric# (Royal Institute of Technology, Stockholm, Sweden). 8ca1c9b0cSelric# All rights reserved. 9ca1c9b0cSelric# 10ca1c9b0cSelric# Redistribution and use in source and binary forms, with or without 11ca1c9b0cSelric# modification, are permitted provided that the following conditions 12ca1c9b0cSelric# are met: 13ca1c9b0cSelric# 14ca1c9b0cSelric# 1. Redistributions of source code must retain the above copyright 15ca1c9b0cSelric# notice, this list of conditions and the following disclaimer. 16ca1c9b0cSelric# 17ca1c9b0cSelric# 2. Redistributions in binary form must reproduce the above copyright 18ca1c9b0cSelric# notice, this list of conditions and the following disclaimer in the 19ca1c9b0cSelric# documentation and/or other materials provided with the distribution. 20ca1c9b0cSelric# 21ca1c9b0cSelric# 3. Neither the name of the Institute nor the names of its contributors 22ca1c9b0cSelric# may be used to endorse or promote products derived from this software 23ca1c9b0cSelric# without specific prior written permission. 24ca1c9b0cSelric# 25ca1c9b0cSelric# THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND 26ca1c9b0cSelric# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27ca1c9b0cSelric# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28ca1c9b0cSelric# ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE 29ca1c9b0cSelric# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30ca1c9b0cSelric# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31ca1c9b0cSelric# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32ca1c9b0cSelric# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33ca1c9b0cSelric# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34ca1c9b0cSelric# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35ca1c9b0cSelric# SUCH DAMAGE. 36ca1c9b0cSelric 37ca1c9b0cSelricimport re 38ca1c9b0cSelricimport string 39ca1c9b0cSelric 40ca1c9b0cSelricdef read(): 41ca1c9b0cSelric """return a dict of tables from rfc4518""" 42ca1c9b0cSelric 43ca1c9b0cSelric ret = {} 44ca1c9b0cSelric 45ca1c9b0cSelric#2.2. Map 46ca1c9b0cSelric# 47ca1c9b0cSelric# SOFT HYPHEN (U+00AD) and MONGOLIAN TODO SOFT HYPHEN (U+1806) code 48ca1c9b0cSelric# points are mapped to nothing. COMBINING GRAPHEME JOINER (U+034F) and 49ca1c9b0cSelric# VARIATION SELECTORs (U+180B-180D, FF00-FE0F) code points are also 50ca1c9b0cSelric# mapped to nothing. The OBJECT REPLACEMENT CHARACTER (U+FFFC) is 51ca1c9b0cSelric# mapped to nothing. 52ca1c9b0cSelric 53ca1c9b0cSelric t = [] 54ca1c9b0cSelric t.append(" 00AD; ; Map to nothing") 55ca1c9b0cSelric t.append(" 1806; ; Map to nothing") 56ca1c9b0cSelric t.append(" 034F; ; Map to nothing") 57ca1c9b0cSelric 58ca1c9b0cSelric t.append(" 180B; ; Map to nothing") 59ca1c9b0cSelric t.append(" 180C; ; Map to nothing") 60ca1c9b0cSelric t.append(" 180D; ; Map to nothing") 61ca1c9b0cSelric 62ca1c9b0cSelric t.append(" FE00; ; Map to nothing") 63ca1c9b0cSelric t.append(" FE01; ; Map to nothing") 64ca1c9b0cSelric t.append(" FE02; ; Map to nothing") 65ca1c9b0cSelric t.append(" FE03; ; Map to nothing") 66ca1c9b0cSelric t.append(" FE04; ; Map to nothing") 67ca1c9b0cSelric t.append(" FE05; ; Map to nothing") 68ca1c9b0cSelric t.append(" FE06; ; Map to nothing") 69ca1c9b0cSelric t.append(" FE07; ; Map to nothing") 70ca1c9b0cSelric t.append(" FE08; ; Map to nothing") 71ca1c9b0cSelric t.append(" FE09; ; Map to nothing") 72ca1c9b0cSelric t.append(" FE0A; ; Map to nothing") 73ca1c9b0cSelric t.append(" FE0B; ; Map to nothing") 74ca1c9b0cSelric t.append(" FE0C; ; Map to nothing") 75ca1c9b0cSelric t.append(" FE0D; ; Map to nothing") 76ca1c9b0cSelric t.append(" FE0E; ; Map to nothing") 77ca1c9b0cSelric t.append(" FE0F; ; Map to nothing") 78ca1c9b0cSelric 79ca1c9b0cSelric t.append(" FFFC; ; Map to nothing") 80ca1c9b0cSelric 81ca1c9b0cSelric# CHARACTER TABULATION (U+0009), LINE FEED (LF) (U+000A), LINE 82ca1c9b0cSelric# TABULATION (U+000B), FORM FEED (FF) (U+000C), CARRIAGE RETURN (CR) 83ca1c9b0cSelric# (U+000D), and NEXT LINE (NEL) (U+0085) are mapped to SPACE (U+0020). 84ca1c9b0cSelric 85ca1c9b0cSelric t.append(" 0009; 0020 ; Map to SPACE") 86ca1c9b0cSelric t.append(" 000A; 0020 ; Map to SPACE") 87ca1c9b0cSelric t.append(" 000B; 0020 ; Map to SPACE") 88ca1c9b0cSelric t.append(" 000C; 0020 ; Map to SPACE") 89ca1c9b0cSelric t.append(" 000D; 0020 ; Map to SPACE") 90ca1c9b0cSelric t.append(" 0085; 0020 ; Map to SPACE") 91ca1c9b0cSelric 92ca1c9b0cSelric# All other control code (e.g., Cc) points or code points with a 93ca1c9b0cSelric# control function (e.g., Cf) are mapped to nothing. The following is 94ca1c9b0cSelric# a complete list of these code points: U+0000-0008, 000E-001F, 007F- 95ca1c9b0cSelric# 0084, 0086-009F, 06DD, 070F, 180E, 200C-200F, 202A-202E, 2060-2063, 96ca1c9b0cSelric# 206A-206F, FEFF, FFF9-FFFB, 1D173-1D17A, E0001, E0020-E007F. 97ca1c9b0cSelric 98ca1c9b0cSelric t.append(" 0000-0008; ; Map to nothing") 99ca1c9b0cSelric t.append(" 000E-001F; ; Map to nothing") 100ca1c9b0cSelric t.append(" 007F-0084; ; Map to nothing") 101ca1c9b0cSelric t.append(" 0086-009F; ; Map to nothing") 102ca1c9b0cSelric t.append(" 06DD; ; Map to nothing") 103ca1c9b0cSelric t.append(" 070F; ; Map to nothing") 104ca1c9b0cSelric t.append(" 180E; ; Map to nothing") 105ca1c9b0cSelric t.append(" 200C-200F; ; Map to nothing") 106ca1c9b0cSelric t.append(" 202A-202E; ; Map to nothing") 107ca1c9b0cSelric t.append(" 2060-2063; ; Map to nothing") 108ca1c9b0cSelric t.append(" 206A-206F; ; Map to nothing") 109ca1c9b0cSelric t.append(" FEFF; ; Map to nothing") 110ca1c9b0cSelric t.append(" FFF9-FFFB; ; Map to nothing") 111ca1c9b0cSelric t.append(" 1D173-1D17A; ; Map to nothing") 112ca1c9b0cSelric t.append(" E0001; ; Map to nothing") 113ca1c9b0cSelric t.append(" E0020-E007F; ; Map to nothing") 114ca1c9b0cSelric 115ca1c9b0cSelric# ZERO WIDTH SPACE (U+200B) is mapped to nothing. All other code 116ca1c9b0cSelric# points with Separator (space, line, or paragraph) property (e.g., Zs, 117ca1c9b0cSelric# Zl, or Zp) are mapped to SPACE (U+0020). The following is a complete 118ca1c9b0cSelric# list of these code points: U+0020, 00A0, 1680, 2000-200A, 2028-2029, 119ca1c9b0cSelric# 202F, 205F, 3000. 120ca1c9b0cSelric 121ca1c9b0cSelric t.append(" 200B; ; Map to nothing") 122ca1c9b0cSelric t.append(" 0020; 0020; Map to SPACE") 123ca1c9b0cSelric t.append(" 00A0; 0020; Map to SPACE") 124ca1c9b0cSelric t.append(" 1680; 0020; Map to SPACE") 125ca1c9b0cSelric t.append(" 2000-200A; 0020; Map to SPACE") 126ca1c9b0cSelric t.append(" 2028-2029; 0020; Map to SPACE") 127ca1c9b0cSelric t.append(" 202F; 0020; Map to SPACE") 128ca1c9b0cSelric t.append(" 205F; 0020; Map to SPACE") 129ca1c9b0cSelric t.append(" 3000; 0020; Map to SPACE") 130ca1c9b0cSelric 131ca1c9b0cSelric ret["rfc4518-map"] = t 132ca1c9b0cSelric 133ca1c9b0cSelric# For case ignore, numeric, and stored prefix string matching rules, 134ca1c9b0cSelric# characters are case folded per B.2 of [RFC3454]. 135ca1c9b0cSelric 136ca1c9b0cSelric t = [] 137ca1c9b0cSelric 138ca1c9b0cSelric#2.4. Prohibit 139ca1c9b0cSelric 140ca1c9b0cSelric# The REPLACEMENT CHARACTER (U+FFFD) code point is prohibited. 141ca1c9b0cSelric 142ca1c9b0cSelric t.append(" FFFD;") 143ca1c9b0cSelric 144ca1c9b0cSelric ret["rfc4518-error"] = t 145ca1c9b0cSelric 146ca1c9b0cSelric t = [] 147ca1c9b0cSelric 148ca1c9b0cSelric 149ca1c9b0cSelric 150ca1c9b0cSelric return ret 151