1# $NetBSD: join.awk,v 1.7 2019/10/24 16:52:11 rhialto Exp $ 2# 3# Copyright (c) 2002 The NetBSD Foundation, Inc. 4# All rights reserved. 5# 6# This code is derived from software contributed to The NetBSD Foundation 7# by Luke Mewburn of Wasabi Systems. 8# 9# Redistribution and use in source and binary forms, with or without 10# modification, are permitted provided that the following conditions 11# are met: 12# 1. Redistributions of source code must retain the above copyright 13# notice, this list of conditions and the following disclaimer. 14# 2. Redistributions in binary form must reproduce the above copyright 15# notice, this list of conditions and the following disclaimer in the 16# documentation and/or other materials provided with the distribution. 17# 18# THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28# POSSIBILITY OF SUCH DAMAGE. 29# 30# join.awk F1 F2 31# Similar to join(1), this reads a list of words from F1 32# and outputs lines in F2 with a first word that is in F1. 33# For purposes of matching the first word, both instances are 34# canonicalised via unvis(word); the version from F2 is printed. 35# Neither file needs to be sorted. 36 37function unvis(s) \ 38{ 39 # XXX: We don't handle the complete range of vis encodings 40 unvis_result = "" 41 while (length(s) > 0) { 42 unvis_pos = match(s, "\\\\.") 43 if (unvis_pos == 0) { 44 unvis_result = unvis_result "" s 45 s = "" 46 break 47 } 48 # copy the part before the next backslash 49 unvis_result = unvis_result "" substr(s, 1, unvis_pos - 1) 50 s = substr(s, unvis_pos) 51 # process the backslash and next few chars 52 if (substr(s, 1, 2) == "\\\\") { 53 # double backslash -> single backslash 54 unvis_result = unvis_result "\\" 55 s = substr(s, 3) 56 } else if (match(s, "\\\\[0-7][0-7][0-7]") == 1) { 57 # \ooo with three octal digits. 58 # XXX: use strnum() is that is available 59 unvis_result = unvis_result "" sprintf("%c", \ 60 0+substr(s, 2, 1) * 64 + \ 61 0+substr(s, 3, 1) * 8 + \ 62 0+substr(s, 4, 1)) 63 s = substr(s, 5) 64 } else { 65 # unrecognised escape: keep the literal backslash 66 printf "%s: %s:%s: unrecognised escape %s\n", \ 67 ARGV[0], (FILENAME ? FILENAME : "stdin"), FNR, \ 68 substr(s, 1, 2) \ 69 >"/dev/stderr" 70 unvis_result = unvis_result "" substr(s, 1, 1) 71 s = substr(s, 2) 72 } 73 } 74 return unvis_result 75} 76 77BEGIN \ 78{ 79 if (ARGC != 3) { 80 printf("Usage: join file1 file2\n") >"/dev/stderr" 81 exit 1 82 } 83 while ( (getline < ARGV[1]) > 0) { 84 f1 = unvis($1) 85 words[f1] = $0 86 } 87 delete ARGV[1] 88} 89 90{ f1 = unvis($1) } 91 92f1 in words \ 93{ 94 $1="" 95 print words[f1] $0 96} 97