xref: /minix3/share/misc/nanpa.sed (revision 5737b690dc0057f1cf4d1bfc3a46686ab250b0be)
1*5737b690SBen Gras# $NetBSD: nanpa.sed,v 1.2 2006/12/25 18:39:48 wiz Exp $
2*5737b690SBen Gras#
3*5737b690SBen Gras# Parse HTML tables output by
4*5737b690SBen Gras#   http://docs.nanpa.com/cgi-bin/npa_reports/nanpa
5*5737b690SBen Gras# Specifically, for each html table row (TR),
6*5737b690SBen Gras# print the <TD> elements separated by colons.
7*5737b690SBen Gras#
8*5737b690SBen Gras# This could break on HTML comments.
9*5737b690SBen Gras#
10*5737b690SBen Gras:top
11*5737b690SBen Gras#				Strip ^Ms
12*5737b690SBen Grass/
13*5737b690SBen Gras//g
14*5737b690SBen Gras#				Join all lines with unterminated HTML tags
15*5737b690SBen Gras/<[^>]*$/{
16*5737b690SBen Gras	N
17*5737b690SBen Gras	b top
18*5737b690SBen Gras}
19*5737b690SBen Gras#				Replace all </TR> with EOL tag
20*5737b690SBen Grass;</[Tt][Rr]>;$;g
21*5737b690SBen Gras# 				Join lines with only <TR>.
22*5737b690SBen Gras/<[Tt][Rr][^>]*>$/{
23*5737b690SBen Gras	N
24*5737b690SBen Gras	s/\n//g
25*5737b690SBen Gras	b top
26*5737b690SBen Gras}
27*5737b690SBen Gras#				Also, join all lines starting with <TR>.
28*5737b690SBen Gras/<[TtRr][^>]*>[^$]*$/{
29*5737b690SBen Gras	N
30*5737b690SBen Gras	s/\n//g
31*5737b690SBen Gras	b top
32*5737b690SBen Gras}
33*5737b690SBen Gras#				Remove EOL markers
34*5737b690SBen Grass/\$$//
35*5737b690SBen Gras#				Remove lines not starting with <TR>
36*5737b690SBen Gras/<[Tt][Rr][^>]*>/!d
37*5737b690SBen Gras#				Replace all <TD> with colon
38*5737b690SBen Grass/[ 	]*<TD[^>]*> */:/g
39*5737b690SBen Gras#				Strip all HTML tags
40*5737b690SBen Grass/<[^>]*>//g
41*5737b690SBen Gras#				Handle HTML characters
42*5737b690SBen Grass/&nbsp;/ /g
43*5737b690SBen Gras#				Compress spaces/tabs
44*5737b690SBen Grass/[ 	][ 	]*/ /g
45*5737b690SBen Gras#				Strip leading colons
46*5737b690SBen Grass/^://
47*5737b690SBen Gras#				Strip leading/trailing whitespace
48*5737b690SBen Grass/^ //
49s/ $//
50