# include # include # include # include # include # include # include "globs.h" # include # include SCCSID(@(#)reformat.c 8.3 1/16/85) /* ** REFORMAT.C -- "reformat" module of DECOMPOSITION ** ** reformat() examines each relation which will be ** involved in a one variable subquery to see if it ** is cost effective to modify the relation to a more ** usefull structure. Included in this file are all the ** routines associated with reformat: ** ** reformat -- reformats each relation if cost effective ** ** findlinks -- determines what one variable clauses (if any) ** are associated with the current variable ** and the substitution variable. ** ** rangrf -- decides whether to actually reformat and does it. ** ** primrf -- performs a projection on a user relation ** ** ckpkey -- determines if relation is already structured usefully. ** ** findwid -- determines width of target list. ** ** rel_tup -- returns how many tuples fit on one page */ /* ** Reformat -- Examine each variable to see if it should be reformated. ** ** The algorithm is: ** (1) Find all variables for which, after tuple substitution, ** will have one variable equality clauses on them. ** (2) Estimate how much it will cost to execute them. ** (3) Ignore those relations which are already keyed usefully. ** (4) If it is a user relation, determine if it will be less costly ** to first project (and possibly) modify the relation. ** (5) If it is a _SYS relation, determine if it will be less costly ** to modify the relation to hash on the linking domains. */ reformat(var, sqlist, locrang, buf, tree) int var; QTREE *sqlist[]; int locrang[]; char buf[]; QTREE *tree; { register QTREE *sq; register char *lmap; register int mainvar; int i, j; char linkmap[MAXDOM]; # ifdef xDTR1 if (tTf(39, -1)) printf("REFORMAT: subvar=%d\n", var); # endif /* if the main tree is single var then put it in sq list */ mainvar = -1; if (tree->sym.value.sym_root.tvarc == 1) { mainvar = bitpos(tree->sym.value.sym_root.lvarm | tree->sym.value.sym_root.rvarm); if (sqlist[mainvar] != 0) syserr("help reformat"); sqlist[mainvar] = tree; # ifdef xDTR1 if (tTf(39, 0)) printf("including var %d\n", mainvar); # endif } for(i = 0; i < MAXRANGE; i++) if (sq = sqlist[i]) { # ifdef xDTR1 if (tTf(39, 0)) printf("Sqlist[%d]:\n", i); # endif lmap = linkmap; for (j = 0; j < MAXDOM; j++) *lmap++ = 0; if (findlinks(sq->right, var, i, linkmap)) { # ifdef xDTR1 if (tTf(39, 1)) prlinks("Findlinks found", linkmap); # endif rangrf(i, var, sq, buf, linkmap, tree, locrang); } } if (mainvar >= 0) sqlist[mainvar] = 0; } /* ** Findlinks -- Determine whether there are any equalify clauses ** between the "linkv" and the variable selected for tuple ** substitution "selv". ** ** Return: ** TRUE if there is a linking variable ** FALSE if not ** ** Side Effects: ** The linkmap is set to 1 for each linking domains. ** ie. if domains 3 is a linking domains then ** linkmap[3] = 1. */ findlinks(node, selv, linkv, linkmap) QTREE *node; int selv, linkv; char linkmap[]; { register QTREE *b, *a; register int s; QTREE *temp; extern QTREE *ckvar(); a = node; # ifdef xDTR1 if (tTf(39, 2)) { printf("FINDLINKS:"); nodepr(a); } # endif if (a->sym.type == QLEND) return (0); s = findlinks(a->right, selv, linkv, linkmap); /* ** This mess is looking for a clause of the form: ** EQ ** Var Var ** linkv selv ** Where the VARS can be in either order */ if ((b = a->left)->sym.type != BOP || b->sym.value.sym_op.opno!= opEQ || b->left->sym.type!=VAR || b->right->sym.type!=VAR) return (s); a = ckvar(b->left); b = ckvar(b->right); if (b->sym.value.sym_var.varno == linkv) { temp = a; a = b; b = temp; } if (a->sym.value.sym_var.varno != linkv || (selv >= 0 && b->sym.value.sym_var.varno != selv)) return (s); linkmap[a->sym.value.sym_var.attno] = 1; # ifdef xDTR1 if (tTf(39, 3)) printf("found:attno=%d\n", a->sym.value.sym_var.attno); # endif return (1); } /* ** Rangrf -- reformat the variable "var" if it is cost effective. ** ** Rangrf does the actual work of reformat. If the relation is ** already keyed usefully then rangrf does nothing. ** Otherwise rangrf is split into two decisions: ** A user relation must first be projected and then modified; ** a _SYS relation can be modified directly. ** ** It may be cost effective to just project a user relation. */ /*ARGSUSED*/ rangrf(var, substvar, sq, buf, linkmap, tree, locrang) int var, substvar; QTREE *sq; char buf[]; char linkmap[]; QTREE *tree; int locrang[]; { register struct rang_tab *r, *rs; register int j; char nums[2]; int i, newwid; QTREE *pq; long npages, newpages; long origcost, modcost, projcost; extern char *rangename(); extern long rel_pages(), hashcost(); extern QTREE *makroot(); extern DESC *openr1(); extern QTREE **mksqlist(); r = &De.de_rangev[var]; rs = &De.de_rangev[substvar]; npages = rel_pages(r->rtcnt, r->rtwid); /* calculate original cost of substitution */ origcost = rs->rtcnt * npages; # ifdef xDTR1 if (tTf(39, 5)) printf("eval of mod for var %d. orig cost=%ld\n", var, origcost); # endif /* if relation is already keyed usefully, just return */ if (i = ckpkey(linkmap, var)) { # ifdef xDTR1 if (tTf(39, 4)) printf("already keyed ok %d\n", i); # endif return; } /* if this is a primary relation, a projection must be done ** before any modify can be performed */ if (!rnum_temp(r->relnum)) { /* evaluation for primary, user relation */ /* to save some time, don't evaluate if origcost is very small */ if (origcost < OVHDP + 1 + npages) return; j = markbuf(buf); /* build a projection tree but don't actually create the projection */ pq = makroot(buf); dfind(sq, buf, mksqlist(pq, var)); newwid = findwid(pq); newpages = rel_pages(r->rtcnt, newwid); /* ** Calculate cost of projection + new cost of substitution ** projcost = readoldpages + writenewpages + runquery + overhead */ projcost = npages + newpages + rs->rtcnt * newpages + OVHDP; /* CALCULATE COST OF MODIFY = COST OF PROJECTION + COST OF MODIFY * AND NEW COST OF SUBSTITUTION AFTER MODIFY */ modcost = (npages + newpages) + hashcost(newpages) + rs->rtcnt + OVHDP + OVHDM; # ifdef xDTR1 if (tTf(39, 5)) { printf("primary rel: proj cost=%ld\t", projcost); printf("mod cost=%ld\n", modcost); } # endif if (origcost <= modcost) if (origcost <= projcost) { freebuf(buf, j); return; } # ifdef xDTR1 if (tTf(39, 5)) printf("doing projection\n"); # endif /* i will be TRUE if the proj was aborted */ i = primrf(var, pq, locrang); freebuf(buf, j); if ((projcost <= modcost) || i) return; } /* IF THIS IS A TEMPORARY RELATION, A MODIFY CAN BE DONE DIRECTLY */ else { /* CALCULATE MODIFY COST (which does not include a projection) * AND NEW COST OF SUBSTITUTION */ modcost = hashcost(npages) + rs->rtcnt + OVHDM; # ifdef xDTR1 if (tTf(39, 5)) printf("temp rel: mod cost=%ld\n", modcost); # endif if (origcost <= modcost) return; } # ifdef xDTR1 if (tTf(39, 5)) printf("doing modify\n"); # endif initp(); setp(PV_STR, rangename(var)); setp(PV_STR, "hash"); /* modify to hash */ setp(PV_STR, "num"); /* passing domain numbers - not names */ nums[1] = '\0'; for (j = 0; j < MAXDOM; j++) if (linkmap[j]) { *nums = j; setp(PV_STR, nums); } /* fill relation completely */ setp(PV_STR, ""); setp(PV_STR, "fillfactor"); setp(PV_STR, "100"); setp(PV_STR, "minpages"); setp(PV_STR, "1"); closer1(var); call_dbu(mdMODIFY, FALSE); /* re-open modified range to get accurate descriptor */ openr1(var); } /* ** Primrf -- Replace a user relation with a projection of the needed domains. ** ** Primrf retrieves into a temporary relation, the domains ** specified by the "pq" tree. The range table is updated to ** reflect the new range. ** ** In fact a projection is not an accurate way to describe what ** happens. In order to avoid changing any attribute numbers in ** the query, the needed domains are projected and the domains ** inbetween are created as type "c0". This way they occupy ** no space and the attribute numbering is unchanged. Of course, ** any attributes beyond the last one needed are simply discarded. ** ** In previous versions attempts were made to project only the needed ** domains and to renumber the query tree. This proved to be very ** expensive and difficult and was not always as optimal as this ** method. ** ** The routines newquery() and endquery() will undo the effects ** of primrf and restore the range table back to its original state. */ primrf(var, pq, locrang) int var; QTREE *pq; int locrang[]; { register QTREE *q, **np; register int i; int maxresno, rnum; QTREE *node1[MAXDOM+1], *node2[MAXDOM+1]; static char czero[QT_HDR_SIZ + sizeof (struct resdomnode)]; /* a dummy resdom */ extern DESC *openr1(); extern char *rnum_convert(); # ifdef xDTR1 if (tTf(39, 6)) printf("PRIMRF:\n"); # endif /* renumber RESDOMs to match their VARs */ for (q = pq->left; q->sym.type != TREE; q = q->left) q->sym.value.sym_resdom.resno = q->right->sym.value.sym_var.attno; /* form list of RESDOMs from outermost inward */ node1[lnode(pq->left, node1, 0)] = 0; /* form a dummy RESDOM with type CHAR and length 0 */ q = (QTREE *) czero; q->sym.value.sym_resdom.resfrmt = CHAR; q->sym.value.sym_resdom.resfrml = 0; /* zero node2 */ for (np = node2, i = 0; i < MAXDOM + 1; i++) *np++ = 0; /* sort RESDOMs into node2 */ maxresno = 0; for (np = node1; q = *np++; ) { if ((i = q->sym.value.sym_resdom.resno) == 0) return (1); /* abort. Tid is referenced */ if (i > maxresno) maxresno = i; node2[i-1] = q; } /* fill missing RESDOMs with czero */ for (np = node2, i = 0; i < maxresno; i++, np++) if (*np == 0) *np = (QTREE *) czero; /* set up params for the create */ initp(); setp(PV_STR, "0"); /* initial relstat field */ rnum = rnum_alloc(); setp(PV_STR, rnum_convert(rnum)); domnam(node2, "f"); call_dbu(mdCREATE, FALSE); /* now run projection */ i = var; execsq1(pq, i, rnum); new_range(i, rnum); /* save the name of the new relation */ savrang(locrang, i); openr1(i); return (0); } /* ** Ckpkey -- determine if a relation is already keyed usefully. ** ** Ckpkey gets the key structure from paramd() and compares ** it to the linkmap. If the relation is ISAM and the first keyed ** domain is in linkmap, or if it is HASH and all keyed domains ** are in the linkmap, then ckpkey() returns >0, else ** ckpkey looks for secondary indexes which are usable. ** if none, then ckpkey() returns 0. ** ** The value returned is an estimate of the number of ** pages which must be read to satisfy a query given ** equality clauses on the linkmap domains and the ** structure of the relation. The (crude) estimates are: ** hash 1 page ** isam 2 pages ** hash sec index 2 pages ** isam sec index 3 pages */ ckpkey(linkmap, var) char linkmap[]; int var; { register DESC *d; register int i; struct index itup; struct accessparam ap; TID lotid, hitid; extern DESC *readopen(), *openr1(), Inddes; # ifdef xDTR1 if (tTf(39, 11)) printf("CKPKEY:%s\n", rangename(var)); # endif /* if relation is an unindexed heap, then it cannot be keyed usefully */ d = openr1(var); if (d->reldum.relspec != M_HEAP || d->reldum.relindxd > 0) { d = readopen(var); /* open relation if it hasn't been already */ paramd(d, &ap); if (ckpkey1(linkmap, &ap) == 0) return (ap.mode == EXACTKEY ? 1 : 2); /* success */ if (d->reldum.relindxd > 0) { opencatalog("indexes", OR_READ); setkey(&Inddes, (char *)&itup, d->reldum.relid, IRELIDP); setkey(&Inddes, (char *)&itup, d->reldum.relowner, IOWNERP); if (i = find(&Inddes, EXACTKEY, &lotid, &hitid, (char *)&itup)) syserr("ckpkey:find %d", i); while ((i = get(&Inddes, &lotid, &hitid, (char *)&itup, TRUE)) == 0) { if (!bequal(itup.irelidp, d->reldum.relid, MAXNAME + 2)) continue; parami(&itup, &ap); if (ckpkey1(linkmap, &ap) == 0) return (ap.mode == EXACTKEY ? 2 : 3); /* success */ } } } return (0); /* failure. no useful structure */ } ckpkey1(linkmap, ap) char linkmap[]; struct accessparam *ap; { register int i, k, anykey; if (ap->mode == NOKEY) return (1); anykey = 0; for (i = 0; k = ap->keydno[i]; i++) { if (linkmap[k] == 0) { if (ap->mode == EXACTKEY) return (1); else break; } anykey++; } return (!anykey); } /* ** Findwid -- scan the target list of the projection tree ** to determine the resultant tuple size. ** ** Return: ** Size of projected tuple. */ findwid(tree) QTREE *tree; { register QTREE *nod, *t; register int wid; wid = 0; t = tree; for (nod = t->left; nod && nod->sym.type != TREE; nod = nod->left) { wid += nod->sym.value.sym_var.varfrml & I1MASK; } return (wid); } /* ** HASHCOST -- determine cost to modify to hash ** ** Estimates the cost to modify the relation to hash. ** The estimate is crude since it assumes that there ** are no duplicates and that a "unix" page will be ** the same size as an "ingres" page. ** ** The cost is based on the parameters which signify ** the size of the in-core sort buffer and the n-way ** merge sort plus the cost to read the sorted file ** and write the new relation twice (that's the was it works!). ** ** Parameters: ** npages - the number of pages (estimate) which the ** relation currently occupies. ** ** Returns: ** the cost to hash the relation. ** ** Side Effects: ** none ** ** Called By: ** rangrf */ # define COREBUFSIZE 32767 / PGSIZE # define MERGESIZE 7 long hashcost(npages) long npages; { long sortpages, total; register int nfiles; nfiles = npages / COREBUFSIZE; sortpages = 2 * npages; while (nfiles > 1) { nfiles = (nfiles + MERGESIZE - 1) / MERGESIZE; sortpages += 2 * npages; } total = sortpages + npages + npages + npages; # ifdef xDTR1 if (tTf(39, 5)) printf("hashcost is %ld\n", total); # endif return (total); }