112292Stut #ifndef lint 2*37895Sbostic static char *sccsid = "@(#)inv1.c 4.3 (Berkeley) 05/11/89"; 312292Stut #endif 412292Stut 512292Stut #include <stdio.h> 612292Stut #include <assert.h> 7*37895Sbostic #include "pathnames.h" 812292Stut 912292Stut main(argc, argv) 1012292Stut char *argv[]; 1112292Stut { 1212292Stut /* Make inverted file indexes. Reads a stream from mkey which 1312292Stut * gives record pointer items and keys. Generates set of files 1412292Stut * a. NHASH pointers to file b. 1512292Stut * b. lists of record numbers. 1612292Stut * c. record pointer items. 1712292Stut * 1812292Stut * these files are named xxx.ia, xxx.ib, xxx.ic; 1912292Stut * where xxx is taken from arg1. 2012292Stut * If the files exist they are updated. 2112292Stut */ 2212292Stut 2312292Stut FILE *fa, *fb, *fc, *fta, *ftb, *ftc, *fd; 2412292Stut int nhash = 256; 2512292Stut int appflg = 1; 2612292Stut int keepkey = 0, pipein = 0; 2712292Stut char nma[100], nmb[100], nmc[100], com[100], nmd[100]; 2812292Stut char tmpa[20], tmpb[20], tmpc[20]; 2912292Stut char *remove = NULL; 3012292Stut int chatty = 0, docs, hashes, fp[2], fr, fw, pfork, pwait, status; 3112292Stut int i,j,k; 3212292Stut long keys; 3312292Stut int iflong =0; 3412292Stut char *sortdir; 3512292Stut 36*37895Sbostic sortdir = _PATH_USRTMP; 3712292Stut while (argv[1][0] == '-') 3812292Stut { 3912292Stut switch(argv[1][1]) 4012292Stut { 4112292Stut case 'h': /* size of hash table */ 4212292Stut nhash = atoi (argv[1]+2); 4312292Stut break; 4412292Stut case 'n': /* new, don't append */ 4512292Stut appflg=0; 4612292Stut break; 4712292Stut case 'a': /* append to old file */ 4812292Stut appflg=1; 4912292Stut break; 5012292Stut case 'v': /* verbose output */ 5112292Stut chatty=1; 5212292Stut break; 5312292Stut case 'd': /* keep keys on file .id for check on searching */ 5412292Stut keepkey=1; 5512292Stut break; 5612292Stut case 'p': /* pipe into sort (saves space, costs time)*/ 5712292Stut pipein = 1; 5812292Stut break; 5912292Stut case 'i': /* input is on file, not stdin */ 6012292Stut close(0); 6112292Stut if (open(argv[2], 0) != 0) 6212292Stut err("Can't read input %s", argv[2]); 6312292Stut if (argv[1][2]=='u') /* unlink */ 6412292Stut remove = argv[2]; 6512292Stut argc--; 6612292Stut argv++; 6712292Stut break; 6812292Stut } 6912292Stut argc--; 7012292Stut argv++; 7112292Stut } 7212292Stut strcpy (nma, argc >= 2 ? argv[1] : "Index"); 7312292Stut strcpy (nmb, nma); 7412292Stut strcpy (nmc, nma); 7512292Stut strcpy (nmd, nma); 7612292Stut strcat (nma, ".ia"); 7712292Stut strcat (nmb, ".ib"); 7812292Stut strcat (nmc, ".ic"); 7912292Stut strcat (nmd, ".id"); 8012292Stut 8112292Stut sprintf(tmpa, "junk%di", getpid()); 8212292Stut if (pipein) 8312292Stut { 8412292Stut pipe(fp); 8512292Stut fr=fp[0]; 8612292Stut fw=fp[1]; 8712292Stut if ( (pfork=fork()) == 0) 8812292Stut { 8912292Stut close(fw); 9012292Stut close(0); 9112292Stut _assert(dup(fr)==0); 9212292Stut close(fr); 93*37895Sbostic execl(_PATH_SORT, "sort", "-T", sortdir, "-o", tmpa, 0); 9412292Stut _assert(0); 9512292Stut } 9612292Stut _assert(pfork!= -1); 9712292Stut close(fr); 98*37895Sbostic fta = fopen(_PATH_DEVNULL, "w"); 9912292Stut close(fta->_file); 10012292Stut fta->_file = fw; 10112292Stut } 10212292Stut else /* use tmp file */ 10312292Stut { 10412292Stut fta = fopen(tmpa, "w"); 10512292Stut _assert (fta != NULL); 10612292Stut } 10712292Stut fb = 0; 10812292Stut if (appflg ) 10912292Stut { 11012292Stut if (fb = fopen(nmb, "r")) 11112292Stut { 11212292Stut sprintf(tmpb, "junk%dj", getpid()); 11312292Stut ftb = fopen(tmpb, "w"); 11412292Stut if (ftb==NULL) 11512292Stut err("Can't get scratch file %s",tmpb); 11612292Stut nhash = recopy(ftb, fb, fopen(nma, "r")); 11712292Stut fclose(ftb); 11812292Stut } 11912292Stut else 12012292Stut appflg=0; 12112292Stut } 12212292Stut fc = fopen(nmc, appflg ? "a" : "w"); 12326452Smckusick fd = keepkey ? fopen(nmd, "w") : 0; 12412292Stut docs = newkeys(fta, stdin, fc, nhash, fd, &iflong); 12512292Stut fclose(stdin); 12612292Stut if (remove != NULL) 12712292Stut unlink(remove); 12812292Stut fclose(fta); 12912292Stut if (pipein) 13012292Stut { 13112292Stut pwait = wait(&status); 13212292Stut printf("pfork %o pwait %o status %d\n",pfork,pwait,status); 13312292Stut _assert(pwait==pfork); 13412292Stut _assert(status==0); 13512292Stut } 13612292Stut else 13712292Stut { 13812292Stut sprintf(com, "sort -T %s %s -o %s", sortdir, tmpa, tmpa); 13912292Stut system(com); 14012292Stut } 14112292Stut if (appflg) 14212292Stut { 14312292Stut sprintf(tmpc, "junk%dk", getpid()); 14412292Stut sprintf(com, "mv %s %s", tmpa, tmpc); 14512292Stut system(com); 14612292Stut sprintf(com, "sort -T %s -m %s %s -o %s", sortdir, 14712292Stut tmpb, tmpc, tmpa); 14812292Stut system(com); 14912292Stut } 15012292Stut fta = fopen(tmpa, "r"); 15112292Stut fa = fopen(nma, "w"); 15212292Stut fb = fopen(nmb, "w"); 15312292Stut whash(fta, fa, fb, nhash, iflong, &keys, &hashes); 15412292Stut fclose(fta); 15512292Stut # ifndef D1 15612292Stut unlink(tmpa); 15712292Stut # endif 15812292Stut if (appflg) 15912292Stut { 16012292Stut unlink(tmpb); 16112292Stut unlink(tmpc); 16212292Stut } 16312292Stut if (chatty) 16412292Stut 16512292Stut printf ("%ld key occurrences, %d hashes, %d docs\n", 16612292Stut keys, hashes, docs); 16712292Stut } 168