1 #ifndef lint 2 static char sccsid[] = "@(#)locate.code.c 4.1 (Berkeley) 07/21/83"; 3 #endif not lint 4 5 /* 6 * PURPOSE: sorted list compressor (works with a modified 'find' 7 * to encode/decode a filename database) 8 * 9 * USAGE: find.bigram < list > bigrams 10 * process bigrams (see find.squeeze) > common_bigrams 11 * find.code common_bigrams < list > squozen_list 12 * 13 * METHOD: Uses 'front compression' (see ";login:", March 1983, p. 8 ). 14 * Output format is, per line, an offset differential count byte 15 * followed by a partially bigram-encoded ascii residue. 16 * 17 * The codes are: 18 * 19 * 0-28 likeliest differential counts + offset to make nonnegative 20 * 30 escape code for out-of-range count to follow in next word 21 * 128-255 bigram codes, (128 most common, as determined by 'find.squeeze') 22 * 32-127 single character (printable) ascii residue 23 * 24 * SEE ALSO: squeeze, bigram.c, find.c 25 * 26 * AUTHOR: James A. Woods, Informatics General Corp., 27 * NASA Ames Research Center, 10/82 28 */ 29 30 #include <stdio.h> 31 32 #define MAXPATH 1024 /* maximum pathname length */ 33 #define RESET 30 /* switch code */ 34 35 char path[MAXPATH]; 36 char oldpath[MAXPATH] = " "; 37 char bigrams[257] = { 0 }; 38 39 main ( argc, argv ) 40 int argc; char *argv[]; 41 { 42 int count, oldcount, diffcount; 43 int j, code; 44 char bigram[3]; 45 FILE *fp; 46 47 oldcount = 0; 48 bigram[2] = NULL; 49 50 if ( (fp = fopen ( argv[1], "r" )) == NULL ) 51 printf ( "Usage: find.code common_bigrams < list > coded_list\n" ), exit ( 1 ); 52 fgets ( bigrams, 257, fp ); 53 fwrite ( bigrams, 1, 256, stdout ); 54 55 while ( gets ( path ) != NULL ) { 56 /* 57 squelch unprintable chars so as not to botch decoding 58 */ 59 for ( j = 0; path[j] != NULL; j++ ) { 60 path[j] &= 0177; 61 if ( path[j] < 040 || path[j] == 0177 ) 62 path[j] = '?'; 63 } 64 count = prefix_length ( oldpath, path ); 65 diffcount = count - oldcount; 66 if ( (diffcount < -14) || (diffcount > 14) ) { 67 putc ( RESET, stdout ); 68 putw ( diffcount + 14, stdout ); 69 } 70 else 71 putc ( diffcount + 14, stdout ); 72 73 for ( j = count; path[j] != NULL; j += 2 ) { 74 if ( path[j + 1] == NULL ) { 75 putchar ( path[j] ); 76 break; 77 } 78 bigram[0] = path[j]; 79 bigram[1] = path[j + 1]; 80 /* 81 linear search for specific bigram in string table 82 */ 83 if ( (code = strindex ( bigrams, bigram )) % 2 == 0 ) 84 putchar ( (code / 2) | 0200 ); 85 else 86 fputs ( bigram, stdout ); 87 } 88 strcpy ( oldpath, path ); 89 oldcount = count; 90 } 91 } 92 93 strindex ( string, pattern ) /* return location of pattern in string or -1 */ 94 char *string, *pattern; 95 { 96 register char *s, *p, *q; 97 98 for ( s = string; *s != NULL; s++ ) 99 if ( *s == *pattern ) { /* fast first char check */ 100 for ( p = pattern + 1, q = s + 1; *p != NULL; p++, q++ ) 101 if ( *q != *p ) 102 break; 103 if ( *p == NULL ) 104 return ( q - strlen ( pattern ) - string ); 105 } 106 return ( -1 ); 107 } 108 109 prefix_length ( s1, s2 ) /* return length of longest common prefix */ 110 char *s1, *s2; /* ... of strings s1 and s2 */ 111 { 112 register char *start; 113 114 for ( start = s1; *s1 == *s2; s1++, s2++ ) 115 if ( *s1 == NULL ) 116 break; 117 return ( s1 - start ); 118 } 119