16b2c1e49SXin LI#! /usr/bin/env python3 26b2c1e49SXin LI# __ __ _ 36b2c1e49SXin LI# ___\ \/ /_ __ __ _| |_ 46b2c1e49SXin LI# / _ \\ /| '_ \ / _` | __| 56b2c1e49SXin LI# | __// \| |_) | (_| | |_ 66b2c1e49SXin LI# \___/_/\_\ .__/ \__,_|\__| 76b2c1e49SXin LI# |_| XML parser 86b2c1e49SXin LI# 9*4543ef51SXin LI# Copyright (c) 2019-2023 Sebastian Pipping <sebastian@pipping.org> 10cc68614dSXin LI# Copyright (c) 2021 Tim Bray <tbray@textuality.com> 116b2c1e49SXin LI# Licensed under the MIT license: 126b2c1e49SXin LI# 136b2c1e49SXin LI# Permission is hereby granted, free of charge, to any person obtaining 146b2c1e49SXin LI# a copy of this software and associated documentation files (the 156b2c1e49SXin LI# "Software"), to deal in the Software without restriction, including 166b2c1e49SXin LI# without limitation the rights to use, copy, modify, merge, publish, 176b2c1e49SXin LI# distribute, sublicense, and/or sell copies of the Software, and to permit 186b2c1e49SXin LI# persons to whom the Software is furnished to do so, subject to the 196b2c1e49SXin LI# following conditions: 206b2c1e49SXin LI# 216b2c1e49SXin LI# The above copyright notice and this permission notice shall be included 226b2c1e49SXin LI# in all copies or substantial portions of the Software. 236b2c1e49SXin LI# 246b2c1e49SXin LI# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 256b2c1e49SXin LI# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 266b2c1e49SXin LI# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 276b2c1e49SXin LI# NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 286b2c1e49SXin LI# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 296b2c1e49SXin LI# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 306b2c1e49SXin LI# USE OR OTHER DEALINGS IN THE SOFTWARE. 316b2c1e49SXin LI 326b2c1e49SXin LIimport argparse 336b2c1e49SXin LI 346b2c1e49SXin LIepilog = """ 35cc68614dSXin LIexit status: 36cc68614dSXin LI 0 the input files are well-formed and the output (if requested) was written successfully 37cc68614dSXin LI 1 could not allocate data structures, signals a serious problem with execution environment 38cc68614dSXin LI 2 one or more input files were not well-formed 39cc68614dSXin LI 3 could not create an output file 40cc68614dSXin LI 4 command-line argument error 41cc68614dSXin LI 42cc68614dSXin LIxmlwf of libexpat is software libre, licensed under the MIT license. 43*4543ef51SXin LIPlease report bugs at https://github.com/libexpat/libexpat/issues -- thank you! 446b2c1e49SXin LI""" 456b2c1e49SXin LI 46cc68614dSXin LIusage = """ 47cc68614dSXin LI %(prog)s [OPTIONS] [FILE ...] 48*4543ef51SXin LI %(prog)s -h|--help 49*4543ef51SXin LI %(prog)s -v|--version 50cc68614dSXin LI""" 51cc68614dSXin LI 526b2c1e49SXin LIparser = argparse.ArgumentParser(prog='xmlwf', add_help=False, 53cc68614dSXin LI usage=usage, 546b2c1e49SXin LI description='xmlwf - Determines if an XML document is well-formed', 556b2c1e49SXin LI formatter_class=argparse.RawTextHelpFormatter, 566b2c1e49SXin LI epilog=epilog) 576b2c1e49SXin LI 586b2c1e49SXin LIinput_related = parser.add_argument_group('input control arguments') 596b2c1e49SXin LIinput_related.add_argument('-s', action='store_true', help='print an error if the document is not [s]tandalone') 606b2c1e49SXin LIinput_related.add_argument('-n', action='store_true', help='enable [n]amespace processing') 61*4543ef51SXin LIinput_related.add_argument('-p', action='store_true', help='enable processing of external DTDs and [p]arameter entities') 626b2c1e49SXin LIinput_related.add_argument('-x', action='store_true', help='enable processing of e[x]ternal entities') 636b2c1e49SXin LIinput_related.add_argument('-e', action='store', metavar='ENCODING', help='override any in-document [e]ncoding declaration') 646b2c1e49SXin LIinput_related.add_argument('-w', action='store_true', help='enable support for [W]indows code pages') 65*4543ef51SXin LIinput_related.add_argument('-r', action='store_true', help='disable memory-mapping and use [r]ead calls instead') 66*4543ef51SXin LIinput_related.add_argument('-g', metavar='BYTES', help='buffer size to request per call pair to XML_[G]etBuffer and read (default: 8 KiB)') 67cc68614dSXin LIinput_related.add_argument('-k', action='store_true', help='when processing multiple files, [k]eep processing after first file with error') 686b2c1e49SXin LI 696b2c1e49SXin LIoutput_related = parser.add_argument_group('output control arguments') 706b2c1e49SXin LIoutput_related.add_argument('-d', action='store', metavar='DIRECTORY', help='output [d]estination directory') 716b2c1e49SXin LIoutput_mode = output_related.add_mutually_exclusive_group() 726b2c1e49SXin LIoutput_mode.add_argument('-c', action='store_true', help='write a [c]opy of input XML, not canonical XML') 736b2c1e49SXin LIoutput_mode.add_argument('-m', action='store_true', help='write [m]eta XML, not canonical XML') 746b2c1e49SXin LIoutput_mode.add_argument('-t', action='store_true', help='write no XML output for [t]iming of plain parsing') 756b2c1e49SXin LIoutput_related.add_argument('-N', action='store_true', help='enable adding doctype and [n]otation declarations') 766b2c1e49SXin LI 77cc68614dSXin LIbillion_laughs = parser.add_argument_group('billion laughs attack protection', 78cc68614dSXin LI description='NOTE: ' 79cc68614dSXin LI 'If you ever need to increase these values ' 80cc68614dSXin LI 'for non-attack payload, please file a bug report.') 81cc68614dSXin LIbillion_laughs.add_argument('-a', metavar='FACTOR', 82cc68614dSXin LI help='set maximum tolerated [a]mplification factor (default: 100.0)') 83cc68614dSXin LIbillion_laughs.add_argument('-b', metavar='BYTES', help='set number of output [b]ytes needed to activate (default: 8 MiB)') 84cc68614dSXin LI 85*4543ef51SXin LIreparse_deferral = parser.add_argument_group('reparse deferral') 86*4543ef51SXin LIreparse_deferral.add_argument('-q', metavar='FACTOR', 87*4543ef51SXin LI help='disable reparse deferral, and allow [q]uadratic parse runtime with large tokens') 88*4543ef51SXin LI 89cc68614dSXin LIparser.add_argument('files', metavar='FILE', nargs='*', help='file to process (default: STDIN)') 906b2c1e49SXin LI 916b2c1e49SXin LIinfo = parser.add_argument_group('info arguments') 926b2c1e49SXin LIinfo = info.add_mutually_exclusive_group() 93*4543ef51SXin LIinfo.add_argument('-h', '--help', action='store_true', help='show this [h]elp message and exit') 94*4543ef51SXin LIinfo.add_argument('-v', '--version', action='store_true', help='show program\'s [v]ersion number and exit') 956b2c1e49SXin LI 966b2c1e49SXin LI 976b2c1e49SXin LIif __name__ == '__main__': 986b2c1e49SXin LI parser.print_help() 99