1*48297Sbostic /*-
2*48297Sbostic * %sccs.include.proprietary.c%
3*48297Sbostic */
4*48297Sbostic
512292Stut #ifndef lint
6*48297Sbostic static char sccsid[] = "@(#)inv1.c 4.4 (Berkeley) 04/18/91";
7*48297Sbostic #endif /* not lint */
812292Stut
912292Stut #include <stdio.h>
1012292Stut #include <assert.h>
1137895Sbostic #include "pathnames.h"
1212292Stut
main(argc,argv)1312292Stut main(argc, argv)
1412292Stut char *argv[];
1512292Stut {
1612292Stut /* Make inverted file indexes. Reads a stream from mkey which
1712292Stut * gives record pointer items and keys. Generates set of files
1812292Stut * a. NHASH pointers to file b.
1912292Stut * b. lists of record numbers.
2012292Stut * c. record pointer items.
2112292Stut *
2212292Stut * these files are named xxx.ia, xxx.ib, xxx.ic;
2312292Stut * where xxx is taken from arg1.
2412292Stut * If the files exist they are updated.
2512292Stut */
2612292Stut
2712292Stut FILE *fa, *fb, *fc, *fta, *ftb, *ftc, *fd;
2812292Stut int nhash = 256;
2912292Stut int appflg = 1;
3012292Stut int keepkey = 0, pipein = 0;
3112292Stut char nma[100], nmb[100], nmc[100], com[100], nmd[100];
3212292Stut char tmpa[20], tmpb[20], tmpc[20];
3312292Stut char *remove = NULL;
3412292Stut int chatty = 0, docs, hashes, fp[2], fr, fw, pfork, pwait, status;
3512292Stut int i,j,k;
3612292Stut long keys;
3712292Stut int iflong =0;
3812292Stut char *sortdir;
3912292Stut
4037895Sbostic sortdir = _PATH_USRTMP;
4112292Stut while (argv[1][0] == '-')
4212292Stut {
4312292Stut switch(argv[1][1])
4412292Stut {
4512292Stut case 'h': /* size of hash table */
4612292Stut nhash = atoi (argv[1]+2);
4712292Stut break;
4812292Stut case 'n': /* new, don't append */
4912292Stut appflg=0;
5012292Stut break;
5112292Stut case 'a': /* append to old file */
5212292Stut appflg=1;
5312292Stut break;
5412292Stut case 'v': /* verbose output */
5512292Stut chatty=1;
5612292Stut break;
5712292Stut case 'd': /* keep keys on file .id for check on searching */
5812292Stut keepkey=1;
5912292Stut break;
6012292Stut case 'p': /* pipe into sort (saves space, costs time)*/
6112292Stut pipein = 1;
6212292Stut break;
6312292Stut case 'i': /* input is on file, not stdin */
6412292Stut close(0);
6512292Stut if (open(argv[2], 0) != 0)
6612292Stut err("Can't read input %s", argv[2]);
6712292Stut if (argv[1][2]=='u') /* unlink */
6812292Stut remove = argv[2];
6912292Stut argc--;
7012292Stut argv++;
7112292Stut break;
7212292Stut }
7312292Stut argc--;
7412292Stut argv++;
7512292Stut }
7612292Stut strcpy (nma, argc >= 2 ? argv[1] : "Index");
7712292Stut strcpy (nmb, nma);
7812292Stut strcpy (nmc, nma);
7912292Stut strcpy (nmd, nma);
8012292Stut strcat (nma, ".ia");
8112292Stut strcat (nmb, ".ib");
8212292Stut strcat (nmc, ".ic");
8312292Stut strcat (nmd, ".id");
8412292Stut
8512292Stut sprintf(tmpa, "junk%di", getpid());
8612292Stut if (pipein)
8712292Stut {
8812292Stut pipe(fp);
8912292Stut fr=fp[0];
9012292Stut fw=fp[1];
9112292Stut if ( (pfork=fork()) == 0)
9212292Stut {
9312292Stut close(fw);
9412292Stut close(0);
9512292Stut _assert(dup(fr)==0);
9612292Stut close(fr);
9737895Sbostic execl(_PATH_SORT, "sort", "-T", sortdir, "-o", tmpa, 0);
9812292Stut _assert(0);
9912292Stut }
10012292Stut _assert(pfork!= -1);
10112292Stut close(fr);
10237895Sbostic fta = fopen(_PATH_DEVNULL, "w");
10312292Stut close(fta->_file);
10412292Stut fta->_file = fw;
10512292Stut }
10612292Stut else /* use tmp file */
10712292Stut {
10812292Stut fta = fopen(tmpa, "w");
10912292Stut _assert (fta != NULL);
11012292Stut }
11112292Stut fb = 0;
11212292Stut if (appflg )
11312292Stut {
11412292Stut if (fb = fopen(nmb, "r"))
11512292Stut {
11612292Stut sprintf(tmpb, "junk%dj", getpid());
11712292Stut ftb = fopen(tmpb, "w");
11812292Stut if (ftb==NULL)
11912292Stut err("Can't get scratch file %s",tmpb);
12012292Stut nhash = recopy(ftb, fb, fopen(nma, "r"));
12112292Stut fclose(ftb);
12212292Stut }
12312292Stut else
12412292Stut appflg=0;
12512292Stut }
12612292Stut fc = fopen(nmc, appflg ? "a" : "w");
12726452Smckusick fd = keepkey ? fopen(nmd, "w") : 0;
12812292Stut docs = newkeys(fta, stdin, fc, nhash, fd, &iflong);
12912292Stut fclose(stdin);
13012292Stut if (remove != NULL)
13112292Stut unlink(remove);
13212292Stut fclose(fta);
13312292Stut if (pipein)
13412292Stut {
13512292Stut pwait = wait(&status);
13612292Stut printf("pfork %o pwait %o status %d\n",pfork,pwait,status);
13712292Stut _assert(pwait==pfork);
13812292Stut _assert(status==0);
13912292Stut }
14012292Stut else
14112292Stut {
14212292Stut sprintf(com, "sort -T %s %s -o %s", sortdir, tmpa, tmpa);
14312292Stut system(com);
14412292Stut }
14512292Stut if (appflg)
14612292Stut {
14712292Stut sprintf(tmpc, "junk%dk", getpid());
14812292Stut sprintf(com, "mv %s %s", tmpa, tmpc);
14912292Stut system(com);
15012292Stut sprintf(com, "sort -T %s -m %s %s -o %s", sortdir,
15112292Stut tmpb, tmpc, tmpa);
15212292Stut system(com);
15312292Stut }
15412292Stut fta = fopen(tmpa, "r");
15512292Stut fa = fopen(nma, "w");
15612292Stut fb = fopen(nmb, "w");
15712292Stut whash(fta, fa, fb, nhash, iflong, &keys, &hashes);
15812292Stut fclose(fta);
15912292Stut # ifndef D1
16012292Stut unlink(tmpa);
16112292Stut # endif
16212292Stut if (appflg)
16312292Stut {
16412292Stut unlink(tmpb);
16512292Stut unlink(tmpc);
16612292Stut }
16712292Stut if (chatty)
16812292Stut
16912292Stut printf ("%ld key occurrences, %d hashes, %d docs\n",
17012292Stut keys, hashes, docs);
17112292Stut }
172