1*ebfedea0SLionel Sambuc#!/usr/local/bin/python 2*ebfedea0SLionel Sambuc# -*- coding: iso-8859-1 -*- 3*ebfedea0SLionel Sambuc 4*ebfedea0SLionel Sambuc# Id 5*ebfedea0SLionel Sambuc 6*ebfedea0SLionel Sambuc# Copyright (c) 2004, 2008 Kungliga Tekniska Högskolan 7*ebfedea0SLionel Sambuc# (Royal Institute of Technology, Stockholm, Sweden). 8*ebfedea0SLionel Sambuc# All rights reserved. 9*ebfedea0SLionel Sambuc# 10*ebfedea0SLionel Sambuc# Redistribution and use in source and binary forms, with or without 11*ebfedea0SLionel Sambuc# modification, are permitted provided that the following conditions 12*ebfedea0SLionel Sambuc# are met: 13*ebfedea0SLionel Sambuc# 14*ebfedea0SLionel Sambuc# 1. Redistributions of source code must retain the above copyright 15*ebfedea0SLionel Sambuc# notice, this list of conditions and the following disclaimer. 16*ebfedea0SLionel Sambuc# 17*ebfedea0SLionel Sambuc# 2. Redistributions in binary form must reproduce the above copyright 18*ebfedea0SLionel Sambuc# notice, this list of conditions and the following disclaimer in the 19*ebfedea0SLionel Sambuc# documentation and/or other materials provided with the distribution. 20*ebfedea0SLionel Sambuc# 21*ebfedea0SLionel Sambuc# 3. Neither the name of the Institute nor the names of its contributors 22*ebfedea0SLionel Sambuc# may be used to endorse or promote products derived from this software 23*ebfedea0SLionel Sambuc# without specific prior written permission. 24*ebfedea0SLionel Sambuc# 25*ebfedea0SLionel Sambuc# THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND 26*ebfedea0SLionel Sambuc# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27*ebfedea0SLionel Sambuc# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28*ebfedea0SLionel Sambuc# ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE 29*ebfedea0SLionel Sambuc# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30*ebfedea0SLionel Sambuc# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31*ebfedea0SLionel Sambuc# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32*ebfedea0SLionel Sambuc# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33*ebfedea0SLionel Sambuc# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34*ebfedea0SLionel Sambuc# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35*ebfedea0SLionel Sambuc# SUCH DAMAGE. 36*ebfedea0SLionel Sambuc 37*ebfedea0SLionel Sambucimport re 38*ebfedea0SLionel Sambucimport string 39*ebfedea0SLionel Sambuc 40*ebfedea0SLionel Sambucdef read(): 41*ebfedea0SLionel Sambuc """return a dict of tables from rfc4518""" 42*ebfedea0SLionel Sambuc 43*ebfedea0SLionel Sambuc ret = {} 44*ebfedea0SLionel Sambuc 45*ebfedea0SLionel Sambuc#2.2. Map 46*ebfedea0SLionel Sambuc# 47*ebfedea0SLionel Sambuc# SOFT HYPHEN (U+00AD) and MONGOLIAN TODO SOFT HYPHEN (U+1806) code 48*ebfedea0SLionel Sambuc# points are mapped to nothing. COMBINING GRAPHEME JOINER (U+034F) and 49*ebfedea0SLionel Sambuc# VARIATION SELECTORs (U+180B-180D, FF00-FE0F) code points are also 50*ebfedea0SLionel Sambuc# mapped to nothing. The OBJECT REPLACEMENT CHARACTER (U+FFFC) is 51*ebfedea0SLionel Sambuc# mapped to nothing. 52*ebfedea0SLionel Sambuc 53*ebfedea0SLionel Sambuc t = [] 54*ebfedea0SLionel Sambuc t.append(" 00AD; ; Map to nothing") 55*ebfedea0SLionel Sambuc t.append(" 1806; ; Map to nothing") 56*ebfedea0SLionel Sambuc t.append(" 034F; ; Map to nothing") 57*ebfedea0SLionel Sambuc 58*ebfedea0SLionel Sambuc t.append(" 180B; ; Map to nothing") 59*ebfedea0SLionel Sambuc t.append(" 180C; ; Map to nothing") 60*ebfedea0SLionel Sambuc t.append(" 180D; ; Map to nothing") 61*ebfedea0SLionel Sambuc 62*ebfedea0SLionel Sambuc t.append(" FE00; ; Map to nothing") 63*ebfedea0SLionel Sambuc t.append(" FE01; ; Map to nothing") 64*ebfedea0SLionel Sambuc t.append(" FE02; ; Map to nothing") 65*ebfedea0SLionel Sambuc t.append(" FE03; ; Map to nothing") 66*ebfedea0SLionel Sambuc t.append(" FE04; ; Map to nothing") 67*ebfedea0SLionel Sambuc t.append(" FE05; ; Map to nothing") 68*ebfedea0SLionel Sambuc t.append(" FE06; ; Map to nothing") 69*ebfedea0SLionel Sambuc t.append(" FE07; ; Map to nothing") 70*ebfedea0SLionel Sambuc t.append(" FE08; ; Map to nothing") 71*ebfedea0SLionel Sambuc t.append(" FE09; ; Map to nothing") 72*ebfedea0SLionel Sambuc t.append(" FE0A; ; Map to nothing") 73*ebfedea0SLionel Sambuc t.append(" FE0B; ; Map to nothing") 74*ebfedea0SLionel Sambuc t.append(" FE0C; ; Map to nothing") 75*ebfedea0SLionel Sambuc t.append(" FE0D; ; Map to nothing") 76*ebfedea0SLionel Sambuc t.append(" FE0E; ; Map to nothing") 77*ebfedea0SLionel Sambuc t.append(" FE0F; ; Map to nothing") 78*ebfedea0SLionel Sambuc 79*ebfedea0SLionel Sambuc t.append(" FFFC; ; Map to nothing") 80*ebfedea0SLionel Sambuc 81*ebfedea0SLionel Sambuc# CHARACTER TABULATION (U+0009), LINE FEED (LF) (U+000A), LINE 82*ebfedea0SLionel Sambuc# TABULATION (U+000B), FORM FEED (FF) (U+000C), CARRIAGE RETURN (CR) 83*ebfedea0SLionel Sambuc# (U+000D), and NEXT LINE (NEL) (U+0085) are mapped to SPACE (U+0020). 84*ebfedea0SLionel Sambuc 85*ebfedea0SLionel Sambuc t.append(" 0009; 0020 ; Map to SPACE") 86*ebfedea0SLionel Sambuc t.append(" 000A; 0020 ; Map to SPACE") 87*ebfedea0SLionel Sambuc t.append(" 000B; 0020 ; Map to SPACE") 88*ebfedea0SLionel Sambuc t.append(" 000C; 0020 ; Map to SPACE") 89*ebfedea0SLionel Sambuc t.append(" 000D; 0020 ; Map to SPACE") 90*ebfedea0SLionel Sambuc t.append(" 0085; 0020 ; Map to SPACE") 91*ebfedea0SLionel Sambuc 92*ebfedea0SLionel Sambuc# All other control code (e.g., Cc) points or code points with a 93*ebfedea0SLionel Sambuc# control function (e.g., Cf) are mapped to nothing. The following is 94*ebfedea0SLionel Sambuc# a complete list of these code points: U+0000-0008, 000E-001F, 007F- 95*ebfedea0SLionel Sambuc# 0084, 0086-009F, 06DD, 070F, 180E, 200C-200F, 202A-202E, 2060-2063, 96*ebfedea0SLionel Sambuc# 206A-206F, FEFF, FFF9-FFFB, 1D173-1D17A, E0001, E0020-E007F. 97*ebfedea0SLionel Sambuc 98*ebfedea0SLionel Sambuc t.append(" 0000-0008; ; Map to nothing") 99*ebfedea0SLionel Sambuc t.append(" 000E-001F; ; Map to nothing") 100*ebfedea0SLionel Sambuc t.append(" 007F-0084; ; Map to nothing") 101*ebfedea0SLionel Sambuc t.append(" 0086-009F; ; Map to nothing") 102*ebfedea0SLionel Sambuc t.append(" 06DD; ; Map to nothing") 103*ebfedea0SLionel Sambuc t.append(" 070F; ; Map to nothing") 104*ebfedea0SLionel Sambuc t.append(" 180E; ; Map to nothing") 105*ebfedea0SLionel Sambuc t.append(" 200C-200F; ; Map to nothing") 106*ebfedea0SLionel Sambuc t.append(" 202A-202E; ; Map to nothing") 107*ebfedea0SLionel Sambuc t.append(" 2060-2063; ; Map to nothing") 108*ebfedea0SLionel Sambuc t.append(" 206A-206F; ; Map to nothing") 109*ebfedea0SLionel Sambuc t.append(" FEFF; ; Map to nothing") 110*ebfedea0SLionel Sambuc t.append(" FFF9-FFFB; ; Map to nothing") 111*ebfedea0SLionel Sambuc t.append(" 1D173-1D17A; ; Map to nothing") 112*ebfedea0SLionel Sambuc t.append(" E0001; ; Map to nothing") 113*ebfedea0SLionel Sambuc t.append(" E0020-E007F; ; Map to nothing") 114*ebfedea0SLionel Sambuc 115*ebfedea0SLionel Sambuc# ZERO WIDTH SPACE (U+200B) is mapped to nothing. All other code 116*ebfedea0SLionel Sambuc# points with Separator (space, line, or paragraph) property (e.g., Zs, 117*ebfedea0SLionel Sambuc# Zl, or Zp) are mapped to SPACE (U+0020). The following is a complete 118*ebfedea0SLionel Sambuc# list of these code points: U+0020, 00A0, 1680, 2000-200A, 2028-2029, 119*ebfedea0SLionel Sambuc# 202F, 205F, 3000. 120*ebfedea0SLionel Sambuc 121*ebfedea0SLionel Sambuc t.append(" 200B; ; Map to nothing") 122*ebfedea0SLionel Sambuc t.append(" 0020; 0020; Map to SPACE") 123*ebfedea0SLionel Sambuc t.append(" 00A0; 0020; Map to SPACE") 124*ebfedea0SLionel Sambuc t.append(" 1680; 0020; Map to SPACE") 125*ebfedea0SLionel Sambuc t.append(" 2000-200A; 0020; Map to SPACE") 126*ebfedea0SLionel Sambuc t.append(" 2028-2029; 0020; Map to SPACE") 127*ebfedea0SLionel Sambuc t.append(" 202F; 0020; Map to SPACE") 128*ebfedea0SLionel Sambuc t.append(" 205F; 0020; Map to SPACE") 129*ebfedea0SLionel Sambuc t.append(" 3000; 0020; Map to SPACE") 130*ebfedea0SLionel Sambuc 131*ebfedea0SLionel Sambuc ret["rfc4518-map"] = t 132*ebfedea0SLionel Sambuc 133*ebfedea0SLionel Sambuc# For case ignore, numeric, and stored prefix string matching rules, 134*ebfedea0SLionel Sambuc# characters are case folded per B.2 of [RFC3454]. 135*ebfedea0SLionel Sambuc 136*ebfedea0SLionel Sambuc t = [] 137*ebfedea0SLionel Sambuc 138*ebfedea0SLionel Sambuc#2.4. Prohibit 139*ebfedea0SLionel Sambuc 140*ebfedea0SLionel Sambuc# The REPLACEMENT CHARACTER (U+FFFD) code point is prohibited. 141*ebfedea0SLionel Sambuc 142*ebfedea0SLionel Sambuc t.append(" FFFD;") 143*ebfedea0SLionel Sambuc 144*ebfedea0SLionel Sambuc ret["rfc4518-error"] = t 145*ebfedea0SLionel Sambuc 146*ebfedea0SLionel Sambuc t = [] 147*ebfedea0SLionel Sambuc 148*ebfedea0SLionel Sambuc 149*ebfedea0SLionel Sambuc 150*ebfedea0SLionel Sambuc return ret 151