1 #ifndef lint 2 static char sccsid[] = "@(#)locate.code.c 4.2 (Berkeley) 07/21/83"; 3 #endif not lint 4 5 /* 6 * PURPOSE: sorted list compressor (works with a modified 'find' 7 * to encode/decode a filename database) 8 * 9 * USAGE: bigram < list > bigrams 10 * process bigrams (see updatedb) > common_bigrams 11 * code common_bigrams < list > squozen_list 12 * 13 * METHOD: Uses 'front compression' (see ";login:", March 1983, p. 8 ). 14 * Output format is, per line, an offset differential count byte 15 * followed by a partially bigram-encoded ascii residue. 16 * 17 * The codes are: 18 * 19 * 0-28 likeliest differential counts + offset to make nonnegative 20 * 30 escape code for out-of-range count to follow in next word 21 * 128-255 bigram codes, (128 most common, as determined by 'updatedb') 22 * 32-127 single character (printable) ascii residue 23 * 24 * SEE ALSO: updatedb.csh, bigram.c, find.c 25 * 26 * AUTHOR: James A. Woods, Informatics General Corp., 27 * NASA Ames Research Center, 10/82 28 */ 29 30 #include <stdio.h> 31 32 #define MAXPATH 1024 /* maximum pathname length */ 33 #define RESET 30 /* switch code */ 34 35 char path[MAXPATH]; 36 char oldpath[MAXPATH] = " "; 37 char bigrams[257] = { 0 }; 38 39 main ( argc, argv ) 40 int argc; char *argv[]; 41 { 42 int count, oldcount, diffcount; 43 int j, code; 44 char bigram[3]; 45 FILE *fp; 46 47 oldcount = 0; 48 bigram[2] = NULL; 49 50 if ((fp = fopen(argv[1], "r")) == NULL) { 51 printf("Usage: code common_bigrams < list > coded_list\n"); 52 exit(1); 53 } 54 fgets ( bigrams, 257, fp ); 55 fwrite ( bigrams, 1, 256, stdout ); 56 57 while ( gets ( path ) != NULL ) { 58 /* 59 squelch unprintable chars so as not to botch decoding 60 */ 61 for ( j = 0; path[j] != NULL; j++ ) { 62 path[j] &= 0177; 63 if ( path[j] < 040 || path[j] == 0177 ) 64 path[j] = '?'; 65 } 66 count = prefix_length ( oldpath, path ); 67 diffcount = count - oldcount; 68 if ( (diffcount < -14) || (diffcount > 14) ) { 69 putc ( RESET, stdout ); 70 putw ( diffcount + 14, stdout ); 71 } 72 else 73 putc ( diffcount + 14, stdout ); 74 75 for ( j = count; path[j] != NULL; j += 2 ) { 76 if ( path[j + 1] == NULL ) { 77 putchar ( path[j] ); 78 break; 79 } 80 bigram[0] = path[j]; 81 bigram[1] = path[j + 1]; 82 /* 83 linear search for specific bigram in string table 84 */ 85 if ( (code = strindex ( bigrams, bigram )) % 2 == 0 ) 86 putchar ( (code / 2) | 0200 ); 87 else 88 fputs ( bigram, stdout ); 89 } 90 strcpy ( oldpath, path ); 91 oldcount = count; 92 } 93 } 94 95 strindex ( string, pattern ) /* return location of pattern in string or -1 */ 96 char *string, *pattern; 97 { 98 register char *s, *p, *q; 99 100 for ( s = string; *s != NULL; s++ ) 101 if ( *s == *pattern ) { /* fast first char check */ 102 for ( p = pattern + 1, q = s + 1; *p != NULL; p++, q++ ) 103 if ( *q != *p ) 104 break; 105 if ( *p == NULL ) 106 return ( q - strlen ( pattern ) - string ); 107 } 108 return ( -1 ); 109 } 110 111 prefix_length ( s1, s2 ) /* return length of longest common prefix */ 112 char *s1, *s2; /* ... of strings s1 and s2 */ 113 { 114 register char *start; 115 116 for ( start = s1; *s1 == *s2; s1++, s2++ ) 117 if ( *s1 == NULL ) 118 break; 119 return ( s1 - start ); 120 } 121