1*8462SApril.Chin@Sun.COM#!/usr/bin/ksh93 2*8462SApril.Chin@Sun.COM 3*8462SApril.Chin@Sun.COM# 4*8462SApril.Chin@Sun.COM# CDDL HEADER START 5*8462SApril.Chin@Sun.COM# 6*8462SApril.Chin@Sun.COM# The contents of this file are subject to the terms of the 7*8462SApril.Chin@Sun.COM# Common Development and Distribution License (the "License"). 8*8462SApril.Chin@Sun.COM# You may not use this file except in compliance with the License. 9*8462SApril.Chin@Sun.COM# 10*8462SApril.Chin@Sun.COM# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 11*8462SApril.Chin@Sun.COM# or http://www.opensolaris.org/os/licensing. 12*8462SApril.Chin@Sun.COM# See the License for the specific language governing permissions 13*8462SApril.Chin@Sun.COM# and limitations under the License. 14*8462SApril.Chin@Sun.COM# 15*8462SApril.Chin@Sun.COM# When distributing Covered Code, include this CDDL HEADER in each 16*8462SApril.Chin@Sun.COM# file and include the License file at usr/src/OPENSOLARIS.LICENSE. 17*8462SApril.Chin@Sun.COM# If applicable, add the following below this CDDL HEADER, with the 18*8462SApril.Chin@Sun.COM# fields enclosed by brackets "[]" replaced with your own identifying 19*8462SApril.Chin@Sun.COM# information: Portions Copyright [yyyy] [name of copyright owner] 20*8462SApril.Chin@Sun.COM# 21*8462SApril.Chin@Sun.COM# CDDL HEADER END 22*8462SApril.Chin@Sun.COM# 23*8462SApril.Chin@Sun.COM 24*8462SApril.Chin@Sun.COM# 25*8462SApril.Chin@Sun.COM# Copyright 2008 Sun Microsystems, Inc. All rights reserved. 26*8462SApril.Chin@Sun.COM# Use is subject to license terms. 27*8462SApril.Chin@Sun.COM# 28*8462SApril.Chin@Sun.COM 29*8462SApril.Chin@Sun.COM# 30*8462SApril.Chin@Sun.COM# rssread - a simple RSS2.0 reader with RSS to XHTML to 31*8462SApril.Chin@Sun.COM# plaintext conversion. 32*8462SApril.Chin@Sun.COM# 33*8462SApril.Chin@Sun.COM 34*8462SApril.Chin@Sun.COM# Solaris needs /usr/xpg6/bin:/usr/xpg4/bin because the tools in /usr/bin are not POSIX-conformant 35*8462SApril.Chin@Sun.COMexport PATH=/usr/xpg6/bin:/usr/xpg4/bin:/bin:/usr/bin 36*8462SApril.Chin@Sun.COM 37*8462SApril.Chin@Sun.COMfunction printmsg 38*8462SApril.Chin@Sun.COM{ 39*8462SApril.Chin@Sun.COM print -u2 "$*" 40*8462SApril.Chin@Sun.COM} 41*8462SApril.Chin@Sun.COM 42*8462SApril.Chin@Sun.COMfunction debugmsg 43*8462SApril.Chin@Sun.COM{ 44*8462SApril.Chin@Sun.COM# printmsg "$*" 45*8462SApril.Chin@Sun.COMtrue 46*8462SApril.Chin@Sun.COM} 47*8462SApril.Chin@Sun.COM 48*8462SApril.Chin@Sun.COMfunction fatal_error 49*8462SApril.Chin@Sun.COM{ 50*8462SApril.Chin@Sun.COM print -u2 "${progname}: $*" 51*8462SApril.Chin@Sun.COM exit 1 52*8462SApril.Chin@Sun.COM} 53*8462SApril.Chin@Sun.COM 54*8462SApril.Chin@Sun.COM# parse HTTP return code, cookies etc. 55*8462SApril.Chin@Sun.COMfunction parse_http_response 56*8462SApril.Chin@Sun.COM{ 57*8462SApril.Chin@Sun.COM nameref response="$1" 58*8462SApril.Chin@Sun.COM typeset h statuscode statusmsg i 59*8462SApril.Chin@Sun.COM 60*8462SApril.Chin@Sun.COM # we use '\r' as additional IFS to filter the final '\r' 61*8462SApril.Chin@Sun.COM IFS=$' \t\r' read -r h statuscode statusmsg # read HTTP/1.[01] <code> 62*8462SApril.Chin@Sun.COM [[ "$h" != ~(Eil)HTTP/.* ]] && { print -u2 -f $"%s: HTTP/ header missing\n" "$0" ; return 1 ; } 63*8462SApril.Chin@Sun.COM [[ "$statuscode" != ~(Elr)[0-9]* ]] && { print -u2 -f $"%s: invalid status code\n" "$0" ; return 1 ; } 64*8462SApril.Chin@Sun.COM response.statuscode="$statuscode" 65*8462SApril.Chin@Sun.COM response.statusmsg="$statusmsg" 66*8462SApril.Chin@Sun.COM 67*8462SApril.Chin@Sun.COM # skip remaining headers 68*8462SApril.Chin@Sun.COM while IFS='' read -r i ; do 69*8462SApril.Chin@Sun.COM [[ "$i" == $'\r' ]] && break 70*8462SApril.Chin@Sun.COM 71*8462SApril.Chin@Sun.COM # strip '\r' at the end 72*8462SApril.Chin@Sun.COM i="${i/~(Er)$'\r'/}" 73*8462SApril.Chin@Sun.COM 74*8462SApril.Chin@Sun.COM case "$i" in 75*8462SApril.Chin@Sun.COM ~(Eli)Content-Type:.*) 76*8462SApril.Chin@Sun.COM response.content_type="${i/~(El).*:[[:blank:]]*/}" 77*8462SApril.Chin@Sun.COM ;; 78*8462SApril.Chin@Sun.COM ~(Eli)Content-Length:[[:blank:]]*[0-9]*) 79*8462SApril.Chin@Sun.COM integer response.content_length="${i/~(El).*:[[:blank:]]*/}" 80*8462SApril.Chin@Sun.COM ;; 81*8462SApril.Chin@Sun.COM ~(Eli)Transfer-Encoding:.*) 82*8462SApril.Chin@Sun.COM response.transfer_encoding="${i/~(El).*:[[:blank:]]*/}" 83*8462SApril.Chin@Sun.COM ;; 84*8462SApril.Chin@Sun.COM esac 85*8462SApril.Chin@Sun.COM done 86*8462SApril.Chin@Sun.COM 87*8462SApril.Chin@Sun.COM return 0 88*8462SApril.Chin@Sun.COM} 89*8462SApril.Chin@Sun.COM 90*8462SApril.Chin@Sun.COMfunction cat_http_body 91*8462SApril.Chin@Sun.COM{ 92*8462SApril.Chin@Sun.COM typeset emode="$1" 93*8462SApril.Chin@Sun.COM typeset hexchunksize="0" 94*8462SApril.Chin@Sun.COM integer chunksize=0 95*8462SApril.Chin@Sun.COM 96*8462SApril.Chin@Sun.COM if [[ "${emode}" == "chunked" ]] ; then 97*8462SApril.Chin@Sun.COM while IFS=$'\r' read hexchunksize && 98*8462SApril.Chin@Sun.COM [[ "${hexchunksize}" == ~(Elri)[0-9abcdef]* ]] && 99*8462SApril.Chin@Sun.COM (( chunksize=16#${hexchunksize} )) && (( chunksize > 0 )) ; do 100*8462SApril.Chin@Sun.COM dd bs=1 count="${chunksize}" 2>/dev/null 101*8462SApril.Chin@Sun.COM done 102*8462SApril.Chin@Sun.COM else 103*8462SApril.Chin@Sun.COM cat 104*8462SApril.Chin@Sun.COM fi 105*8462SApril.Chin@Sun.COM 106*8462SApril.Chin@Sun.COM return 0 107*8462SApril.Chin@Sun.COM} 108*8462SApril.Chin@Sun.COM 109*8462SApril.Chin@Sun.COMfunction cat_http 110*8462SApril.Chin@Sun.COM{ 111*8462SApril.Chin@Sun.COM typeset protocol="${1%://*}" 112*8462SApril.Chin@Sun.COM typeset path1="${1#*://}" # "http://foo.bat.net/x/y.html" ----> "foo.bat.net/x/y.html" 113*8462SApril.Chin@Sun.COM 114*8462SApril.Chin@Sun.COM typeset host="${path1%%/*}" 115*8462SApril.Chin@Sun.COM typeset path="${path1#*/}" 116*8462SApril.Chin@Sun.COM typeset port="${host##*:}" 117*8462SApril.Chin@Sun.COM 118*8462SApril.Chin@Sun.COM integer netfd 119*8462SApril.Chin@Sun.COM typeset -C httpresponse # http response 120*8462SApril.Chin@Sun.COM 121*8462SApril.Chin@Sun.COM # If URL did not contain a port number in the host part then look at the 122*8462SApril.Chin@Sun.COM # protocol to get the port number 123*8462SApril.Chin@Sun.COM if [[ "${port}" == "${host}" ]] ; then 124*8462SApril.Chin@Sun.COM case "${protocol}" in 125*8462SApril.Chin@Sun.COM "http") port=80 ;; 126*8462SApril.Chin@Sun.COM *) port="$(getent services "${protocol}" | sed 's/[^0-9]*//;s/\/.*//')" ;; 127*8462SApril.Chin@Sun.COM esac 128*8462SApril.Chin@Sun.COM else 129*8462SApril.Chin@Sun.COM host="${host%:*}" 130*8462SApril.Chin@Sun.COM fi 131*8462SApril.Chin@Sun.COM 132*8462SApril.Chin@Sun.COM printmsg "protocol=${protocol} port=${port} host=${host} path=${path}" 133*8462SApril.Chin@Sun.COM 134*8462SApril.Chin@Sun.COM # prechecks 135*8462SApril.Chin@Sun.COM [[ "${protocol}" == "" ]] && { print -u2 -f "%s: protocol not set.\n" "$0" ; return 1 ; } 136*8462SApril.Chin@Sun.COM [[ "${port}" == "" ]] && { print -u2 -f "%s: port not set.\n" "$0" ; return 1 ; } 137*8462SApril.Chin@Sun.COM [[ "${host}" == "" ]] && { print -u2 -f "%s: host not set.\n" "$0" ; return 1 ; } 138*8462SApril.Chin@Sun.COM [[ "${path}" == "" ]] && { print -u2 -f "%s: path not set.\n" "$0" ; return 1 ; } 139*8462SApril.Chin@Sun.COM 140*8462SApril.Chin@Sun.COM # open TCP channel 141*8462SApril.Chin@Sun.COM redirect {netfd}<>"/dev/tcp/${host}/${port}" 142*8462SApril.Chin@Sun.COM (( $? != 0 )) && { print -u2 -f "%s: Couldn't open %s\n" "$0" "${1}" ; return 1 ; } 143*8462SApril.Chin@Sun.COM 144*8462SApril.Chin@Sun.COM # send HTTP request 145*8462SApril.Chin@Sun.COM request="GET /${path} HTTP/1.1\r\n" 146*8462SApril.Chin@Sun.COM request+="Host: ${host}\r\n" 147*8462SApril.Chin@Sun.COM request+="User-Agent: rssread/ksh93 (2008-10-14; $(uname -s -r -p))\r\n" 148*8462SApril.Chin@Sun.COM request+="Connection: close\r\n" 149*8462SApril.Chin@Sun.COM print -n -- "${request}\r\n" >&${netfd} 150*8462SApril.Chin@Sun.COM 151*8462SApril.Chin@Sun.COM # collect response and send it to stdout 152*8462SApril.Chin@Sun.COM parse_http_response httpresponse <&${netfd} 153*8462SApril.Chin@Sun.COM cat_http_body "${httpresponse.transfer_encoding}" <&${netfd} 154*8462SApril.Chin@Sun.COM 155*8462SApril.Chin@Sun.COM # close connection 156*8462SApril.Chin@Sun.COM redirect {netfd}<&- 157*8462SApril.Chin@Sun.COM 158*8462SApril.Chin@Sun.COM return 0 159*8462SApril.Chin@Sun.COM} 160*8462SApril.Chin@Sun.COM 161*8462SApril.Chin@Sun.COMfunction html_entity_to_ascii 162*8462SApril.Chin@Sun.COM{ 163*8462SApril.Chin@Sun.COM typeset buf 164*8462SApril.Chin@Sun.COM typeset entity 165*8462SApril.Chin@Sun.COM typeset c 166*8462SApril.Chin@Sun.COM typeset value 167*8462SApril.Chin@Sun.COM 168*8462SApril.Chin@Sun.COM # Todo: Add more HTML/MathML entities here 169*8462SApril.Chin@Sun.COM # Note we use a static variable (typeset -S) here to make sure we 170*8462SApril.Chin@Sun.COM # don't loose the cache data between calls 171*8462SApril.Chin@Sun.COM typeset -S -A entity_cache=( 172*8462SApril.Chin@Sun.COM # entity to ascii (fixme: add UTF-8 transliterations) 173*8462SApril.Chin@Sun.COM ["nbsp"]=' ' 174*8462SApril.Chin@Sun.COM ["lt"]='<' 175*8462SApril.Chin@Sun.COM ["le"]='<=' 176*8462SApril.Chin@Sun.COM ["gt"]='>' 177*8462SApril.Chin@Sun.COM ["ge"]='>=' 178*8462SApril.Chin@Sun.COM ["amp"]='&' 179*8462SApril.Chin@Sun.COM ["quot"]='"' 180*8462SApril.Chin@Sun.COM ["apos"]="'" 181*8462SApril.Chin@Sun.COM ) 182*8462SApril.Chin@Sun.COM 183*8462SApril.Chin@Sun.COM buf="" 184*8462SApril.Chin@Sun.COM while IFS='' read -r -N 1 c ; do 185*8462SApril.Chin@Sun.COM if [[ "$c" != "&" ]] ; then 186*8462SApril.Chin@Sun.COM print -n -r -- "${c}" 187*8462SApril.Chin@Sun.COM continue 188*8462SApril.Chin@Sun.COM fi 189*8462SApril.Chin@Sun.COM 190*8462SApril.Chin@Sun.COM entity="" 191*8462SApril.Chin@Sun.COM while IFS='' read -r -N 1 c ; do 192*8462SApril.Chin@Sun.COM case "$c" in 193*8462SApril.Chin@Sun.COM ";") 194*8462SApril.Chin@Sun.COM break 195*8462SApril.Chin@Sun.COM ;; 196*8462SApril.Chin@Sun.COM ~(Eilr)[a-z0-9#]) 197*8462SApril.Chin@Sun.COM entity+="$c" 198*8462SApril.Chin@Sun.COM continue 199*8462SApril.Chin@Sun.COM ;; 200*8462SApril.Chin@Sun.COM *) 201*8462SApril.Chin@Sun.COM# debugmsg "error &${entity}${c}#" 202*8462SApril.Chin@Sun.COM 203*8462SApril.Chin@Sun.COM print -n -r -- "${entity}${c}" 204*8462SApril.Chin@Sun.COM entity="" 205*8462SApril.Chin@Sun.COM continue 2 206*8462SApril.Chin@Sun.COM ;; 207*8462SApril.Chin@Sun.COM esac 208*8462SApril.Chin@Sun.COM done 209*8462SApril.Chin@Sun.COM 210*8462SApril.Chin@Sun.COM value="" 211*8462SApril.Chin@Sun.COM if [[ "${entity_cache["${entity}"]}" != "" ]] ; then 212*8462SApril.Chin@Sun.COM# debugmsg "match #${entity}# = #${entity_cache["${entity}"]}#" 213*8462SApril.Chin@Sun.COM value="${entity_cache["${entity}"]}" 214*8462SApril.Chin@Sun.COM else 215*8462SApril.Chin@Sun.COM if [[ "${entity:0:1}" == "#" ]] ; then 216*8462SApril.Chin@Sun.COM # decimal literal 217*8462SApril.Chin@Sun.COM value="${ printf "\u[${ printf "%x" "${entity:1:8}" ; }]" ; }" 218*8462SApril.Chin@Sun.COM elif [[ "${entity:0:7}" == ~(Eilr)[0-9a-f]* ]] ; then 219*8462SApril.Chin@Sun.COM # hexadecimal literal 220*8462SApril.Chin@Sun.COM value="${ printf "\u[${entity:0:7}]" ; }" 221*8462SApril.Chin@Sun.COM else 222*8462SApril.Chin@Sun.COM # unknown literal - pass-through 223*8462SApril.Chin@Sun.COM value="ENT=|${entity}|" 224*8462SApril.Chin@Sun.COM fi 225*8462SApril.Chin@Sun.COM 226*8462SApril.Chin@Sun.COM entity_cache["${entity}"]="${value}" 227*8462SApril.Chin@Sun.COM 228*8462SApril.Chin@Sun.COM# debugmsg "lookup #${entity}# = #${entity_cache["${entity}"]}#" 229*8462SApril.Chin@Sun.COM fi 230*8462SApril.Chin@Sun.COM 231*8462SApril.Chin@Sun.COM printf "%s" "${value}" 232*8462SApril.Chin@Sun.COM done 233*8462SApril.Chin@Sun.COM 234*8462SApril.Chin@Sun.COM return 0 235*8462SApril.Chin@Sun.COM} 236*8462SApril.Chin@Sun.COM 237*8462SApril.Chin@Sun.COM# dumb xhtml handler - no CSS, tables, images, iframes or nested 238*8462SApril.Chin@Sun.COM# structures are supported (and we assume that the input is correct 239*8462SApril.Chin@Sun.COM# xhtml). The code was written in a trial&&error manner and should be 240*8462SApril.Chin@Sun.COM# rewritten to parse xhtml correctly. 241*8462SApril.Chin@Sun.COMfunction handle_html 242*8462SApril.Chin@Sun.COM{ 243*8462SApril.Chin@Sun.COM # we can't use global variables here when multiple callbacks use the same 244*8462SApril.Chin@Sun.COM # callback function - but we can use the callback associative array for 245*8462SApril.Chin@Sun.COM # variable storage instead 246*8462SApril.Chin@Sun.COM nameref callbacks=${1} 247*8462SApril.Chin@Sun.COM typeset tag_type="$2" 248*8462SApril.Chin@Sun.COM typeset tag_value="$3" 249*8462SApril.Chin@Sun.COM 250*8462SApril.Chin@Sun.COM case "${tag_type}" in 251*8462SApril.Chin@Sun.COM tag_begin) 252*8462SApril.Chin@Sun.COM case "${tag_value}" in 253*8462SApril.Chin@Sun.COM br) printf "\n" ;; 254*8462SApril.Chin@Sun.COM hr) printf "\n-------------------------------------\n" ;; 255*8462SApril.Chin@Sun.COM pre) callbacks["html_pre"]='true' ;; 256*8462SApril.Chin@Sun.COM p) printf "\n" ;; 257*8462SApril.Chin@Sun.COM esac 258*8462SApril.Chin@Sun.COM ;; 259*8462SApril.Chin@Sun.COM 260*8462SApril.Chin@Sun.COM tag_end) 261*8462SApril.Chin@Sun.COM case "${tag_value}" in 262*8462SApril.Chin@Sun.COM pre) callbacks["html_pre"]='false' ;; 263*8462SApril.Chin@Sun.COM esac 264*8462SApril.Chin@Sun.COM ;; 265*8462SApril.Chin@Sun.COM 266*8462SApril.Chin@Sun.COM tag_text) 267*8462SApril.Chin@Sun.COM if ${callbacks["html_pre"]} ; then 268*8462SApril.Chin@Sun.COM printf "%s" "${tag_value}" 269*8462SApril.Chin@Sun.COM else 270*8462SApril.Chin@Sun.COM # compress spaces/newlines/tabs/etc. 271*8462SApril.Chin@Sun.COM printf "%s" "${tag_value//+([\n\r\t\v[:space:][:blank:]])/ }" 272*8462SApril.Chin@Sun.COM fi 273*8462SApril.Chin@Sun.COM ;; 274*8462SApril.Chin@Sun.COM 275*8462SApril.Chin@Sun.COM document_start) 276*8462SApril.Chin@Sun.COM callbacks["html_pre"]='false' 277*8462SApril.Chin@Sun.COM ;; 278*8462SApril.Chin@Sun.COM document_end) ;; 279*8462SApril.Chin@Sun.COM esac 280*8462SApril.Chin@Sun.COM 281*8462SApril.Chin@Sun.COM return 0 282*8462SApril.Chin@Sun.COM} 283*8462SApril.Chin@Sun.COM 284*8462SApril.Chin@Sun.COMfunction handle_rss 285*8462SApril.Chin@Sun.COM{ 286*8462SApril.Chin@Sun.COM # we can't use global variables here when multiple callbacks use the same 287*8462SApril.Chin@Sun.COM # callback function - but we can use the callback associative array for 288*8462SApril.Chin@Sun.COM # variable storage instead 289*8462SApril.Chin@Sun.COM nameref callbacks=${1} 290*8462SApril.Chin@Sun.COM typeset tag_type="$2" 291*8462SApril.Chin@Sun.COM typeset tag_value="$3" 292*8462SApril.Chin@Sun.COM 293*8462SApril.Chin@Sun.COM case "${tag_type}" in 294*8462SApril.Chin@Sun.COM tag_begin) 295*8462SApril.Chin@Sun.COM case "${tag_value}" in 296*8462SApril.Chin@Sun.COM item) 297*8462SApril.Chin@Sun.COM item["title"]="" 298*8462SApril.Chin@Sun.COM item["link"]="" 299*8462SApril.Chin@Sun.COM item["tag"]="" 300*8462SApril.Chin@Sun.COM item["description"]="" 301*8462SApril.Chin@Sun.COM ;; 302*8462SApril.Chin@Sun.COM esac 303*8462SApril.Chin@Sun.COM callbacks["textbuf"]="" 304*8462SApril.Chin@Sun.COM ;; 305*8462SApril.Chin@Sun.COM tag_end) 306*8462SApril.Chin@Sun.COM case "${tag_value}" in 307*8462SApril.Chin@Sun.COM item) 308*8462SApril.Chin@Sun.COM # note that each RSS item needs to be converted seperately from RSS to HTML to plain text 309*8462SApril.Chin@Sun.COM # to make sure that the state of one RSS item doesn't affect others 310*8462SApril.Chin@Sun.COM ( 311*8462SApril.Chin@Sun.COM printf $"<br />#### RSS item: title: %s ####" "${item["title"]}" 312*8462SApril.Chin@Sun.COM printf $"<br />## author: %s" "${item["author"]}" 313*8462SApril.Chin@Sun.COM printf $"<br />## link: %s" "${item["link"]}" 314*8462SApril.Chin@Sun.COM printf $"<br />## date: %s" "${item["pubDate"]}" 315*8462SApril.Chin@Sun.COM printf $"<br />## begin description:" 316*8462SApril.Chin@Sun.COM printf $"<br />%s<br />" "${item["description"]}" 317*8462SApril.Chin@Sun.COM printf $"<br />## end description<br />" 318*8462SApril.Chin@Sun.COM print # extra newline to make sure the sed pipeline gets flushed 319*8462SApril.Chin@Sun.COM ) | 320*8462SApril.Chin@Sun.COM html_entity_to_ascii | # convert XML entities (e.g. decode RSS content to HTML code) 321*8462SApril.Chin@Sun.COM xml_tok "xhtmltok_cb" | # convert HTML to plain text 322*8462SApril.Chin@Sun.COM html_entity_to_ascii # convert HTML entities 323*8462SApril.Chin@Sun.COM ;; 324*8462SApril.Chin@Sun.COM title) item["title"]="${callbacks["textbuf"]}" ; callbacks["textbuf"]="" ;; 325*8462SApril.Chin@Sun.COM link) item["link"]="${callbacks["textbuf"]}" ; callbacks["textbuf"]="" ;; 326*8462SApril.Chin@Sun.COM dc:creator | author) item["author"]="${callbacks["textbuf"]}" ; callbacks["textbuf"]="" ;; 327*8462SApril.Chin@Sun.COM dc:date | pubDate) item["pubDate"]="${callbacks["textbuf"]}" ; callbacks["textbuf"]="" ;; 328*8462SApril.Chin@Sun.COM description) item["description"]="${callbacks["textbuf"]}" ; callbacks["textbuf"]="" ;; 329*8462SApril.Chin@Sun.COM esac 330*8462SApril.Chin@Sun.COM callbacks["textbuf"]="" 331*8462SApril.Chin@Sun.COM ;; 332*8462SApril.Chin@Sun.COM tag_text) 333*8462SApril.Chin@Sun.COM callbacks["textbuf"]+="${tag_value}" 334*8462SApril.Chin@Sun.COM ;; 335*8462SApril.Chin@Sun.COM document_start) ;; 336*8462SApril.Chin@Sun.COM document_end) ;; 337*8462SApril.Chin@Sun.COM esac 338*8462SApril.Chin@Sun.COM return 0 339*8462SApril.Chin@Sun.COM} 340*8462SApril.Chin@Sun.COM 341*8462SApril.Chin@Sun.COMfunction xml_tok 342*8462SApril.Chin@Sun.COM{ 343*8462SApril.Chin@Sun.COM typeset buf="" 344*8462SApril.Chin@Sun.COM typeset namebuf="" 345*8462SApril.Chin@Sun.COM typeset attrbuf="" 346*8462SApril.Chin@Sun.COM typeset c="" 347*8462SApril.Chin@Sun.COM typeset isendtag # bool: true/false 348*8462SApril.Chin@Sun.COM typeset issingletag # bool: true/false (used for tags like "<br />") 349*8462SApril.Chin@Sun.COM nameref callbacks=${1} 350*8462SApril.Chin@Sun.COM 351*8462SApril.Chin@Sun.COM [[ ! -z "${callbacks["document_start"]}" ]] && ${callbacks["document_start"]} "${1}" "document_start" 352*8462SApril.Chin@Sun.COM 353*8462SApril.Chin@Sun.COM while IFS='' read -r -N 1 c ; do 354*8462SApril.Chin@Sun.COM isendtag=false 355*8462SApril.Chin@Sun.COM 356*8462SApril.Chin@Sun.COM if [[ "$c" == "<" ]] ; then 357*8462SApril.Chin@Sun.COM # flush any text content 358*8462SApril.Chin@Sun.COM if [[ "$buf" != "" ]] ; then 359*8462SApril.Chin@Sun.COM [[ ! -z "${callbacks["tag_text"]}" ]] && ${callbacks["tag_text"]} "${1}" "tag_text" "$buf" 360*8462SApril.Chin@Sun.COM buf="" 361*8462SApril.Chin@Sun.COM fi 362*8462SApril.Chin@Sun.COM 363*8462SApril.Chin@Sun.COM IFS='' read -r -N 1 c 364*8462SApril.Chin@Sun.COM if [[ "$c" == "/" ]] ; then 365*8462SApril.Chin@Sun.COM isendtag=true 366*8462SApril.Chin@Sun.COM else 367*8462SApril.Chin@Sun.COM buf="$c" 368*8462SApril.Chin@Sun.COM fi 369*8462SApril.Chin@Sun.COM IFS='' read -r -d '>' c 370*8462SApril.Chin@Sun.COM buf+="$c" 371*8462SApril.Chin@Sun.COM 372*8462SApril.Chin@Sun.COM # handle comments 373*8462SApril.Chin@Sun.COM if [[ "$buf" == ~(El)!-- ]] ; then 374*8462SApril.Chin@Sun.COM # did we read the comment completely ? 375*8462SApril.Chin@Sun.COM if [[ "$buf" != ~(Elr)!--.*-- ]] ; then 376*8462SApril.Chin@Sun.COM buf+=">" 377*8462SApril.Chin@Sun.COM while [[ "$buf" != ~(Elr)!--.*-- ]] ; do 378*8462SApril.Chin@Sun.COM IFS='' read -r -N 1 c || break 379*8462SApril.Chin@Sun.COM buf+="$c" 380*8462SApril.Chin@Sun.COM done 381*8462SApril.Chin@Sun.COM fi 382*8462SApril.Chin@Sun.COM 383*8462SApril.Chin@Sun.COM [[ ! -z "${callbacks["tag_comment"]}" ]] && ${callbacks["tag_comment"]} "${1}" "tag_comment" "${buf:3:${#buf}-5}" 384*8462SApril.Chin@Sun.COM buf="" 385*8462SApril.Chin@Sun.COM continue 386*8462SApril.Chin@Sun.COM fi 387*8462SApril.Chin@Sun.COM 388*8462SApril.Chin@Sun.COM # check if the tag starts and ends at the same time (like "<br />") 389*8462SApril.Chin@Sun.COM if [[ "${buf}" == ~(Er).*/ ]] ; then 390*8462SApril.Chin@Sun.COM issingletag=true 391*8462SApril.Chin@Sun.COM buf="${buf%*/}" 392*8462SApril.Chin@Sun.COM else 393*8462SApril.Chin@Sun.COM issingletag=false 394*8462SApril.Chin@Sun.COM fi 395*8462SApril.Chin@Sun.COM 396*8462SApril.Chin@Sun.COM # check if the tag has attributes (e.g. space after name) 397*8462SApril.Chin@Sun.COM if [[ "$buf" == ~(E)[[:space:][:blank:]] ]] ; then 398*8462SApril.Chin@Sun.COM namebuf="${buf%%~(E)[[:space:][:blank:]].*}" 399*8462SApril.Chin@Sun.COM attrbuf="${buf#~(E).*[[:space:][:blank:]]}" 400*8462SApril.Chin@Sun.COM else 401*8462SApril.Chin@Sun.COM namebuf="$buf" 402*8462SApril.Chin@Sun.COM attrbuf="" 403*8462SApril.Chin@Sun.COM fi 404*8462SApril.Chin@Sun.COM 405*8462SApril.Chin@Sun.COM if ${isendtag} ; then 406*8462SApril.Chin@Sun.COM [[ ! -z "${callbacks["tag_end"]}" ]] && ${callbacks["tag_end"]} "${1}" "tag_end" "$namebuf" 407*8462SApril.Chin@Sun.COM else 408*8462SApril.Chin@Sun.COM [[ ! -z "${callbacks["tag_begin"]}" ]] && ${callbacks["tag_begin"]} "${1}" "tag_begin" "$namebuf" "$attrbuf" 409*8462SApril.Chin@Sun.COM 410*8462SApril.Chin@Sun.COM # handle tags like <br/> (which are start- and end-tag in one piece) 411*8462SApril.Chin@Sun.COM if ${issingletag} ; then 412*8462SApril.Chin@Sun.COM [[ ! -z "${callbacks["tag_end"]}" ]] && ${callbacks["tag_end"]} "${1}" "tag_end" "$namebuf" 413*8462SApril.Chin@Sun.COM fi 414*8462SApril.Chin@Sun.COM fi 415*8462SApril.Chin@Sun.COM buf="" 416*8462SApril.Chin@Sun.COM else 417*8462SApril.Chin@Sun.COM buf+="$c" 418*8462SApril.Chin@Sun.COM fi 419*8462SApril.Chin@Sun.COM done 420*8462SApril.Chin@Sun.COM 421*8462SApril.Chin@Sun.COM [[ ! -z "${callbacks["document_end"]}" ]] && ${callbacks["document_end"]} "${1}" "document_end" "exit_success" 422*8462SApril.Chin@Sun.COM 423*8462SApril.Chin@Sun.COM print # final newline to make filters like "sed" happy 424*8462SApril.Chin@Sun.COM} 425*8462SApril.Chin@Sun.COM 426*8462SApril.Chin@Sun.COM# return the value of LC_MESSAGES needed for subprocesses which 427*8462SApril.Chin@Sun.COM# want to run in a different locale/encoding 428*8462SApril.Chin@Sun.COMfunction get_lc_messages 429*8462SApril.Chin@Sun.COM{ 430*8462SApril.Chin@Sun.COM [[ "${LC_ALL}" != "" ]] && { print "${LC_ALL}" ; return 0 ; } 431*8462SApril.Chin@Sun.COM [[ "${LC_MESSAGES}" != "" ]] && { print "${LC_MESSAGES}" ; return 0 ; } 432*8462SApril.Chin@Sun.COM [[ "${LANG}" != "" ]] && { print "${LANG}" ; return 0 ; } 433*8462SApril.Chin@Sun.COM print "C" ; return 0 434*8462SApril.Chin@Sun.COM} 435*8462SApril.Chin@Sun.COM 436*8462SApril.Chin@Sun.COMfunction do_rssread 437*8462SApril.Chin@Sun.COM{ 438*8462SApril.Chin@Sun.COM # set unicode locale since RSS is encoded in UTF-8 439*8462SApril.Chin@Sun.COM # (and make sure $LC_MESSAGES is set to the parent 440*8462SApril.Chin@Sun.COM # process's locale that all error messages are using 441*8462SApril.Chin@Sun.COM # the callers locale/encoding) 442*8462SApril.Chin@Sun.COM export \ 443*8462SApril.Chin@Sun.COM LC_MESSAGES="${ get_lc_messages ; }" \ 444*8462SApril.Chin@Sun.COM LC_MONETARY="en_US.UTF-8" \ 445*8462SApril.Chin@Sun.COM LC_NUMERIC="en_US.UTF-8" \ 446*8462SApril.Chin@Sun.COM LC_COLLATE="en_US.UTF-8" \ 447*8462SApril.Chin@Sun.COM LC_CTYPE="en_US.UTF-8" \ 448*8462SApril.Chin@Sun.COM LC_TIME="en_US.UTF-8" \ 449*8462SApril.Chin@Sun.COM LANG="en_US.UTF-8" 450*8462SApril.Chin@Sun.COM 451*8462SApril.Chin@Sun.COM # need extra newline after cat_http to terminate line with $'\n' 452*8462SApril.Chin@Sun.COM # to make "xml_tok" happy 453*8462SApril.Chin@Sun.COM { cat_http "$1" ; print ; } | 454*8462SApril.Chin@Sun.COM xml_tok "rsstok_cb" 455*8462SApril.Chin@Sun.COM return 0 456*8462SApril.Chin@Sun.COM} 457*8462SApril.Chin@Sun.COM 458*8462SApril.Chin@Sun.COMfunction usage 459*8462SApril.Chin@Sun.COM{ 460*8462SApril.Chin@Sun.COM OPTIND=0 461*8462SApril.Chin@Sun.COM getopts -a "${progname}" "${rssread_usage}" OPT '-?' 462*8462SApril.Chin@Sun.COM exit 2 463*8462SApril.Chin@Sun.COM} 464*8462SApril.Chin@Sun.COM 465*8462SApril.Chin@Sun.COM# make sure we use the ksh93 builtin versions 466*8462SApril.Chin@Sun.COMbuiltin basename 467*8462SApril.Chin@Sun.COMbuiltin cat 468*8462SApril.Chin@Sun.COM 469*8462SApril.Chin@Sun.COMtypeset -A rsstok_cb # callbacks for xml_tok 470*8462SApril.Chin@Sun.COMrsstok_cb["tag_begin"]="handle_rss" 471*8462SApril.Chin@Sun.COMrsstok_cb["tag_end"]="handle_rss" 472*8462SApril.Chin@Sun.COMrsstok_cb["tag_text"]="handle_rss" 473*8462SApril.Chin@Sun.COMrsstok_cb["textbuf"]="" 474*8462SApril.Chin@Sun.COM 475*8462SApril.Chin@Sun.COMtypeset -A xhtmltok_cb # callbacks for xml_tok 476*8462SApril.Chin@Sun.COMxhtmltok_cb["tag_begin"]="handle_html" 477*8462SApril.Chin@Sun.COMxhtmltok_cb["tag_end"]="handle_html" 478*8462SApril.Chin@Sun.COMxhtmltok_cb["tag_text"]="handle_html" 479*8462SApril.Chin@Sun.COMxhtmltok_cb["textbuf"]="" 480*8462SApril.Chin@Sun.COMxhtmltok_cb["html_pre"]='false' 481*8462SApril.Chin@Sun.COM 482*8462SApril.Chin@Sun.COMtypeset -A item 483*8462SApril.Chin@Sun.COM 484*8462SApril.Chin@Sun.COMtypeset -A bookmark_urls 485*8462SApril.Chin@Sun.COM 486*8462SApril.Chin@Sun.COM# "ramdom" urls for testing 487*8462SApril.Chin@Sun.COMbookmark_urls=( 488*8462SApril.Chin@Sun.COM ["google_blogs_ksh"]="http://blogsearch.google.com/blogsearch_feeds?hl=en&scoring=d&q=(%22ksh93%22%7C%22ksh+93%22+%7C+%22korn93%22+%7C+%22korn+93%22)&ie=utf-8&num=100&output=rss" 489*8462SApril.Chin@Sun.COM # OpenSolaris.org sites 490*8462SApril.Chin@Sun.COM ["ksh93_integration"]="http://www.opensolaris.org/rss/os/project/ksh93-integration/announcements/rss2.xml" 491*8462SApril.Chin@Sun.COM ["shell"]="http://www.opensolaris.org/rss/os/project/shell/announcements/rss2.xml" 492*8462SApril.Chin@Sun.COM ["systemz"]="http://www.opensolaris.org/rss/os/project/systemz/announcements/rss2.xml" 493*8462SApril.Chin@Sun.COM # some Sun staff/sites 494*8462SApril.Chin@Sun.COM ["blogs_sun_com"]="http://blogs.sun.com/main/feed/entries/rss" 495*8462SApril.Chin@Sun.COM ["bigadmin"]="http://www.sun.com/bigadmin/content/rss/motd.xml" 496*8462SApril.Chin@Sun.COM ["jmcp"]="http://www.jmcp.homeunix.com/roller/jmcp/feed/entries/rss" 497*8462SApril.Chin@Sun.COM ["katakai"]="http://blogs.sun.com/katakai/feed/entries/rss" 498*8462SApril.Chin@Sun.COM ["alanc"]="http://blogs.sun.com/alanc/feed/entries/rss" 499*8462SApril.Chin@Sun.COM ["planetsun"]="http://www.planetsun.org/rss20.xml" 500*8462SApril.Chin@Sun.COM ["planetsolaris"]="http://www.planetsolaris.org/rss20.xml" 501*8462SApril.Chin@Sun.COM ["planetopensolaris"]="http://planet.opensolaris.org/rss20.xml" 502*8462SApril.Chin@Sun.COM ["theregister_uk"]="http://www.theregister.co.uk/headlines.rss" 503*8462SApril.Chin@Sun.COM ["heise"]="http://www.heise.de/newsticker/heise.rdf" 504*8462SApril.Chin@Sun.COM ["slashdot"]="http://rss.slashdot.org/Slashdot/slashdot" 505*8462SApril.Chin@Sun.COM) 506*8462SApril.Chin@Sun.COM 507*8462SApril.Chin@Sun.COMtypeset progname="${ basename "${0}" ; }" 508*8462SApril.Chin@Sun.COM 509*8462SApril.Chin@Sun.COMtypeset -r rssread_usage=$'+ 510*8462SApril.Chin@Sun.COM[-?\n@(#)\$Id: rssread (Roland Mainz) 2008-11-10 \$\n] 511*8462SApril.Chin@Sun.COM[-author?Roland Mainz <roland.mainz@sun.com>] 512*8462SApril.Chin@Sun.COM[-author?Roland Mainz <roland.mainz@nrubsig.org>] 513*8462SApril.Chin@Sun.COM[+NAME?rssread - fetch RSS messages and convert them to plain text] 514*8462SApril.Chin@Sun.COM[+DESCRIPTION?\brssread\b RSS to plain text converter 515*8462SApril.Chin@Sun.COM which fetches RSS streams via HTTP and converts them from 516*8462SApril.Chin@Sun.COM RSS to HTML to plain text in the current locale/encoding.] 517*8462SApril.Chin@Sun.COM[I:noiconv?Do not convert data from UTF-8 to current locale/encoding.] 518*8462SApril.Chin@Sun.COM 519*8462SApril.Chin@Sun.COM[ url ] 520*8462SApril.Chin@Sun.COM 521*8462SApril.Chin@Sun.COM[+SEE ALSO?\bksh93\b(1), \bshnote\b(1)] 522*8462SApril.Chin@Sun.COM' 523*8462SApril.Chin@Sun.COM 524*8462SApril.Chin@Sun.COMtypeset noiconv=false 525*8462SApril.Chin@Sun.COM 526*8462SApril.Chin@Sun.COMwhile getopts -a "${progname}" "${rssread_usage}" OPT ; do 527*8462SApril.Chin@Sun.COM# printmsg "## OPT=|${OPT}|, OPTARG=|${OPTARG}|" 528*8462SApril.Chin@Sun.COM case ${OPT} in 529*8462SApril.Chin@Sun.COM I) noiconv=true ;; 530*8462SApril.Chin@Sun.COM +I) noiconv=false ;; 531*8462SApril.Chin@Sun.COM *) usage ;; 532*8462SApril.Chin@Sun.COM esac 533*8462SApril.Chin@Sun.COMdone 534*8462SApril.Chin@Sun.COMshift $((OPTIND-1)) 535*8462SApril.Chin@Sun.COM 536*8462SApril.Chin@Sun.COMtypeset url="$1" 537*8462SApril.Chin@Sun.COM 538*8462SApril.Chin@Sun.COMif [[ "${url}" == "" ]] ; then 539*8462SApril.Chin@Sun.COM fatal_error $"No url given." 540*8462SApril.Chin@Sun.COMfi 541*8462SApril.Chin@Sun.COM 542*8462SApril.Chin@Sun.COMif [[ "${bookmark_urls[${url}]}" != "" ]] ; then 543*8462SApril.Chin@Sun.COM printmsg $"Using bookmark ${url} = ${bookmark_urls[${url}]}" 544*8462SApril.Chin@Sun.COM url="${bookmark_urls[${url}]}" 545*8462SApril.Chin@Sun.COMfi 546*8462SApril.Chin@Sun.COM 547*8462SApril.Chin@Sun.COMif ${noiconv} ; then 548*8462SApril.Chin@Sun.COM do_rssread "${url}" 549*8462SApril.Chin@Sun.COMelse 550*8462SApril.Chin@Sun.COM do_rssread "${url}" | iconv -f "UTF-8" - - 551*8462SApril.Chin@Sun.COMfi 552*8462SApril.Chin@Sun.COM 553*8462SApril.Chin@Sun.COMexit 0 554*8462SApril.Chin@Sun.COM#EOF. 555