1*12292Stut #ifndef lint 2*12292Stut static char *sccsid = "@(#)inv1.c 4.1 (Berkeley) 05/06/83"; 3*12292Stut #endif 4*12292Stut 5*12292Stut #include <stdio.h> 6*12292Stut #include <assert.h> 7*12292Stut 8*12292Stut main(argc, argv) 9*12292Stut char *argv[]; 10*12292Stut { 11*12292Stut /* Make inverted file indexes. Reads a stream from mkey which 12*12292Stut * gives record pointer items and keys. Generates set of files 13*12292Stut * a. NHASH pointers to file b. 14*12292Stut * b. lists of record numbers. 15*12292Stut * c. record pointer items. 16*12292Stut * 17*12292Stut * these files are named xxx.ia, xxx.ib, xxx.ic; 18*12292Stut * where xxx is taken from arg1. 19*12292Stut * If the files exist they are updated. 20*12292Stut */ 21*12292Stut 22*12292Stut FILE *fa, *fb, *fc, *fta, *ftb, *ftc, *fd; 23*12292Stut int nhash = 256; 24*12292Stut int appflg = 1; 25*12292Stut int keepkey = 0, pipein = 0; 26*12292Stut char nma[100], nmb[100], nmc[100], com[100], nmd[100]; 27*12292Stut char tmpa[20], tmpb[20], tmpc[20]; 28*12292Stut char *remove = NULL; 29*12292Stut int chatty = 0, docs, hashes, fp[2], fr, fw, pfork, pwait, status; 30*12292Stut int i,j,k; 31*12292Stut long keys; 32*12292Stut int iflong =0; 33*12292Stut char *sortdir; 34*12292Stut 35*12292Stut sortdir = (access("/crp/tmp", 06)==0) ? "/crp/tmp" : "/usr/tmp"; 36*12292Stut while (argv[1][0] == '-') 37*12292Stut { 38*12292Stut switch(argv[1][1]) 39*12292Stut { 40*12292Stut case 'h': /* size of hash table */ 41*12292Stut nhash = atoi (argv[1]+2); 42*12292Stut break; 43*12292Stut case 'n': /* new, don't append */ 44*12292Stut appflg=0; 45*12292Stut break; 46*12292Stut case 'a': /* append to old file */ 47*12292Stut appflg=1; 48*12292Stut break; 49*12292Stut case 'v': /* verbose output */ 50*12292Stut chatty=1; 51*12292Stut break; 52*12292Stut case 'd': /* keep keys on file .id for check on searching */ 53*12292Stut keepkey=1; 54*12292Stut break; 55*12292Stut case 'p': /* pipe into sort (saves space, costs time)*/ 56*12292Stut pipein = 1; 57*12292Stut break; 58*12292Stut case 'i': /* input is on file, not stdin */ 59*12292Stut close(0); 60*12292Stut if (open(argv[2], 0) != 0) 61*12292Stut err("Can't read input %s", argv[2]); 62*12292Stut if (argv[1][2]=='u') /* unlink */ 63*12292Stut remove = argv[2]; 64*12292Stut argc--; 65*12292Stut argv++; 66*12292Stut break; 67*12292Stut } 68*12292Stut argc--; 69*12292Stut argv++; 70*12292Stut } 71*12292Stut strcpy (nma, argc >= 2 ? argv[1] : "Index"); 72*12292Stut strcpy (nmb, nma); 73*12292Stut strcpy (nmc, nma); 74*12292Stut strcpy (nmd, nma); 75*12292Stut strcat (nma, ".ia"); 76*12292Stut strcat (nmb, ".ib"); 77*12292Stut strcat (nmc, ".ic"); 78*12292Stut strcat (nmd, ".id"); 79*12292Stut 80*12292Stut sprintf(tmpa, "junk%di", getpid()); 81*12292Stut if (pipein) 82*12292Stut { 83*12292Stut pipe(fp); 84*12292Stut fr=fp[0]; 85*12292Stut fw=fp[1]; 86*12292Stut if ( (pfork=fork()) == 0) 87*12292Stut { 88*12292Stut close(fw); 89*12292Stut close(0); 90*12292Stut _assert(dup(fr)==0); 91*12292Stut close(fr); 92*12292Stut execl("/bin/sort", "sort", "-T", sortdir, "-o", tmpa, 0); 93*12292Stut execl("/usr/bin/sort", "sort", "-T", sortdir, "-o", tmpa, 0); 94*12292Stut _assert(0); 95*12292Stut } 96*12292Stut _assert(pfork!= -1); 97*12292Stut close(fr); 98*12292Stut fta = fopen("/dev/null", "w"); 99*12292Stut close(fta->_file); 100*12292Stut fta->_file = fw; 101*12292Stut } 102*12292Stut else /* use tmp file */ 103*12292Stut { 104*12292Stut fta = fopen(tmpa, "w"); 105*12292Stut _assert (fta != NULL); 106*12292Stut } 107*12292Stut fb = 0; 108*12292Stut if (appflg ) 109*12292Stut { 110*12292Stut if (fb = fopen(nmb, "r")) 111*12292Stut { 112*12292Stut sprintf(tmpb, "junk%dj", getpid()); 113*12292Stut ftb = fopen(tmpb, "w"); 114*12292Stut if (ftb==NULL) 115*12292Stut err("Can't get scratch file %s",tmpb); 116*12292Stut nhash = recopy(ftb, fb, fopen(nma, "r")); 117*12292Stut fclose(ftb); 118*12292Stut } 119*12292Stut else 120*12292Stut appflg=0; 121*12292Stut } 122*12292Stut fc = fopen(nmc, appflg ? "a" : "w"); 123*12292Stut if (keepkey) 124*12292Stut fd = keepkey ? fopen(nmd, "w") : 0; 125*12292Stut docs = newkeys(fta, stdin, fc, nhash, fd, &iflong); 126*12292Stut fclose(stdin); 127*12292Stut if (remove != NULL) 128*12292Stut unlink(remove); 129*12292Stut fclose(fta); 130*12292Stut if (pipein) 131*12292Stut { 132*12292Stut pwait = wait(&status); 133*12292Stut printf("pfork %o pwait %o status %d\n",pfork,pwait,status); 134*12292Stut _assert(pwait==pfork); 135*12292Stut _assert(status==0); 136*12292Stut } 137*12292Stut else 138*12292Stut { 139*12292Stut sprintf(com, "sort -T %s %s -o %s", sortdir, tmpa, tmpa); 140*12292Stut system(com); 141*12292Stut } 142*12292Stut if (appflg) 143*12292Stut { 144*12292Stut sprintf(tmpc, "junk%dk", getpid()); 145*12292Stut sprintf(com, "mv %s %s", tmpa, tmpc); 146*12292Stut system(com); 147*12292Stut sprintf(com, "sort -T %s -m %s %s -o %s", sortdir, 148*12292Stut tmpb, tmpc, tmpa); 149*12292Stut system(com); 150*12292Stut } 151*12292Stut fta = fopen(tmpa, "r"); 152*12292Stut fa = fopen(nma, "w"); 153*12292Stut fb = fopen(nmb, "w"); 154*12292Stut whash(fta, fa, fb, nhash, iflong, &keys, &hashes); 155*12292Stut fclose(fta); 156*12292Stut # ifndef D1 157*12292Stut unlink(tmpa); 158*12292Stut # endif 159*12292Stut if (appflg) 160*12292Stut { 161*12292Stut unlink(tmpb); 162*12292Stut unlink(tmpc); 163*12292Stut } 164*12292Stut if (chatty) 165*12292Stut 166*12292Stut printf ("%ld key occurrences, %d hashes, %d docs\n", 167*12292Stut keys, hashes, docs); 168*12292Stut } 169