1: # include   "../ingres.h"
   2: # include   "../catalog.h"
   3: # include   "../tree.h"
   4: # include   "../symbol.h"
   5: # include   "decomp.h"
   6: # include   "../access.h"
   7: # include   "../aux.h"
   8: 
   9: /*
  10: **	REFORMAT.C -- "reformat" module of DECOMPOSITION
  11: **
  12: **	reformat() examines each relation which will be
  13: **	involved in a one variable subquery to see if it
  14: **	is cost effective to modify the relation to a more
  15: **	usefull structure. Included in this file are all the
  16: **	routines associated with reformat:
  17: **
  18: **		reformat -- reformats each relation if cost effective
  19: **
  20: **		findlinks -- determines what one variable clauses (if any)
  21: **				are associated with the current variable
  22: **				and the substitution variable.
  23: **
  24: **		rangrf    -- decides whether to actually reformat and does it.
  25: **
  26: **		primrf    -- performs a projection on a user relation
  27: **
  28: **		ckpkey    -- determines if relation is already structured usefully.
  29: **
  30: **		findwid   -- determines width of target list.
  31: **
  32: **		rel_tup   -- returns how many tuples fit on one page
  33: */
  34: 
  35: 
  36: 
  37: reformat(var, sqlist, locrang, buf, tree)
  38: int         var;
  39: struct querytree    *sqlist[];
  40: int         locrang[];
  41: char            buf[];
  42: struct querytree    *tree;
  43: 
  44: /*
  45: ** Reformat -- Examine each variable to see if it should be reformated.
  46: **
  47: **	The algorithm is:
  48: **	(1) Find all variables for which, after tuple substitution,
  49: **	    will have one variable equality clauses on them.
  50: **	(2) Estimate how much it will cost to execute them.
  51: **	(3) Ignore those relations which are already keyed usefully.
  52: **	(4) If it is a user relation, determine if it will be less costly
  53: **	    to first project (and possibly) modify the relation.
  54: **	(5) If it is a _SYS relation, determine if it will be less costly
  55: **	    to modify the relation to hash on the linking domains.
  56: */
  57: 
  58: {
  59:     register struct querytree   *sq;
  60:     register char           *lmap;
  61:     register int            mainvar;
  62:     int             i, j;
  63:     char                linkmap[MAXDOM];
  64: 
  65: #	ifdef xDTR1
  66:     if (tTf(13, -1))
  67:         printf("REFORMAT: subvar=%d\n", var);
  68: #	endif
  69: 
  70:     /* if the main tree is single var then put it in sq list */
  71:     mainvar = -1;
  72:     if (((struct qt_root *)tree)->tvarc == 1)
  73:     {
  74:         mainvar = bitpos(((struct qt_root *)tree)->lvarm | ((struct qt_root *)tree)->rvarm);
  75:         if (sqlist[mainvar] != 0)
  76:             syserr("help reformat");
  77:         sqlist[mainvar] = tree;
  78: #		ifdef xDTR1
  79:         if (tTf(13, 0))
  80:             printf("including var %d\n", mainvar);
  81: #		endif
  82:     }
  83:     for(i = 0; i < MAXRANGE; i++)
  84:         if (sq = sqlist[i])
  85:         {
  86: #			ifdef xDTR1
  87:             if (tTf(13, 0))
  88:                 printf("Sqlist[%d]:\n", i);
  89: #			endif
  90:             lmap = linkmap;
  91:             for (j = 0; j < MAXDOM; j++)
  92:                 *lmap++ = 0;
  93:             if (findlinks(sq->right, var, i, linkmap))
  94:             {
  95: #				ifdef xDTR1
  96:                 if (tTf(13, 1))
  97:                     prlinks("Findlinks found", linkmap);
  98: #				endif
  99:                 rangrf(i, var, sq, buf, linkmap, tree, locrang);
 100:             }
 101:         }
 102:     if (mainvar >= 0)
 103:         sqlist[mainvar] = 0;
 104: }
 105: 
 106: 
 107: 
 108: findlinks(node, selv, linkv, linkmap)
 109: struct querytree    *node;
 110: int         selv, linkv;
 111: char            linkmap[];
 112: 
 113: /*
 114: ** Findlinks -- Determine whether there are any equalify clauses
 115: **	between the "linkv" and the variable selected for tuple
 116: **	substitution "selv".
 117: **
 118: **	Return:
 119: **		TRUE if there is a linking variable
 120: **		FALSE if not
 121: **
 122: **	Side Effects:
 123: **		The linkmap is set to 1 for each linking domains.
 124: **		ie. if domains 3 is a linking domains then
 125: **		linkmap[3] = 1.
 126: */
 127: 
 128: {
 129:     register struct querytree   *b, *a;
 130:     register int            s;
 131:     struct querytree        *temp;
 132:     extern struct querytree     *ckvar();
 133: 
 134:     a = node;
 135: #	ifdef xDTR1
 136:     if (tTf(13, 2))
 137:     {
 138:         printf("FINDLINKS:");
 139:         writenod(a);
 140:     }
 141: #	endif
 142:     if (a->sym.type == QLEND)
 143:         return (0);
 144:     s = findlinks(a->right, selv, linkv, linkmap);
 145: 
 146:     /*
 147: 	** This mess is looking for a clause of the form:
 148: 	**		EQ
 149: 	**	Var		Var
 150: 	**	linkv		selv
 151: 	** Where the VARS can be in either order
 152: 	*/
 153:     if ((b = a->left)->sym.type != BOP || *b->sym.value!= opEQ ||
 154:         b->left->sym.type!=VAR || b->right->sym.type!=VAR)
 155:             return (s);
 156: 
 157:     a = ckvar(b->left);
 158:     b = ckvar(b->right);
 159:     if (((struct qt_var *)b)->varno == linkv)
 160:     {
 161:         temp = a;
 162:         a = b;
 163:         b = temp;
 164:     }
 165:     if (((struct qt_var *)a)->varno != linkv || (selv >= 0 && ((struct qt_var *)b)->varno != selv))
 166:         return (s);
 167: 
 168:     linkmap[((struct qt_var *)a)->attno] = 1;
 169: #	ifdef xDTR1
 170:     if (tTf(13, 3))
 171:         printf("found:attno=%d\n", ((struct qt_var *)a)->attno);
 172: #	endif
 173:     return (1);
 174: }
 175: 
 176: 
 177: 
 178: rangrf(var, substvar, sq, buf, linkmap, tree, locrang)
 179: int         var, substvar;
 180: struct querytree    *sq;
 181: char            buf[];
 182: char            linkmap[];
 183: struct querytree    *tree;
 184: int         locrang[];
 185: 
 186: /*
 187: ** Rangrf -- reformat the variable "var" if it is cost effective.
 188: **
 189: **	Rangrf does the actual work of reformat. If the relation is
 190: **	already keyed usefully then rangrf does nothing.
 191: **	Otherwise rangrf is split into two decisions:
 192: **	A user relation must first be projected and then modified;
 193: **	a _SYS relation can be modified directly.
 194: **
 195: **	It may be cost effective to just project a user relation.
 196: */
 197: 
 198: {
 199:     register struct rang_tab    *r, *rs;
 200:     register int            j;
 201:     char                nums[2];
 202:     int             i, newwid;
 203:     struct querytree        *pq;
 204:     long                npages, projpages, newpages;
 205:     long                origcost, modcost, projcost;
 206:     char                *rangename();
 207:     long                rel_pages(), hashcost();
 208:     extern struct querytree     *makroot();
 209: 
 210: 
 211:     r = &Rangev[var];
 212:     rs = &Rangev[substvar];
 213:     npages = rel_pages(r->rtcnt, r->rtwid);
 214: 
 215:     /* calculate original cost of substitution */
 216: 
 217:     origcost = rs->rtcnt * npages;
 218: 
 219: #	ifdef xDTR1
 220:     if (tTf(13, 5))
 221:         printf("eval of mod for var %d. orig cost=%s\n", var, locv(origcost));
 222: #	endif
 223: 
 224:     /* if relation is already keyed usefully, just return */
 225:     if (i = ckpkey(linkmap, var))
 226:     {
 227: #		ifdef xDTR1
 228:         if (tTf(13, 4))
 229:             printf("already keyed ok %d\n", i);
 230: #		endif
 231:         return;
 232:     }
 233: 
 234:     /* if this is a primary relation, a projection must be done
 235: 	** before any modify can be performed */
 236: 
 237:     if (!rnum_temp(r->relnum))
 238:     {
 239:         /* evaluation for primary, user relation */
 240: 
 241:         /* to save some time, don't evaluate if origcost is very small */
 242:         if (origcost < OVHDP + 1 + npages)
 243:             return;
 244: 
 245:         j = markbuf(buf);
 246: 
 247:         /* build a projection tree but don't actually create the projection */
 248:         pq = makroot(buf);
 249:         dfind(sq, buf, mksqlist(pq, var));
 250: 
 251:         newwid = findwid(pq);
 252:         newpages = rel_pages(r->rtcnt, newwid);
 253: 
 254:         /*
 255: 		** Calculate cost of projection + new cost of substitution
 256: 		** projcost = readoldpages + writenewpages + runquery + overhead
 257: 		*/
 258: 
 259:         projcost = npages + newpages + rs->rtcnt * newpages + OVHDP;
 260: 
 261: 
 262:         /*  CALCULATE COST OF MODIFY = COST OF PROJECTION + COST OF MODIFY
 263: 		 *  	AND NEW COST OF SUBSTITUTION AFTER MODIFY    */
 264: 
 265:         modcost = (npages + newpages) +
 266:                 hashcost(newpages) +
 267:                 rs->rtcnt +
 268:                 OVHDP + OVHDM;
 269: 
 270: #		ifdef xDTR1
 271:         if (tTf(13, 5))
 272:         {
 273:             printf("primary rel: proj cost=%s\t", locv(projcost));
 274:             printf("mod cost=%s\n", locv(modcost));
 275:         }
 276: #		endif
 277: 
 278:         if (origcost <= modcost)
 279:             if (origcost <= projcost)
 280:             {
 281:                 freebuf(buf, j);
 282:                 return;
 283:             }
 284: 
 285: #		ifdef xDTR1
 286:         if (tTf(13, 5))
 287:             printf("doing projection\n");
 288: #		endif
 289: 
 290:         /* i will be TRUE if the proj was aborted */
 291:         i = primrf(var, pq, locrang);
 292:         freebuf(buf, j);
 293:         if ((projcost <= modcost) || i)
 294:             return;
 295:     }
 296: 
 297:     /*  IF THIS IS A TEMPORARY RELATION, A MODIFY CAN BE DONE DIRECTLY  */
 298: 
 299:     else
 300:     {
 301: 
 302:         /*  CALCULATE MODIFY COST (which does not include a projection)
 303: 		 *  AND NEW COST OF SUBSTITUTION				  */
 304: 
 305:         modcost = hashcost(npages)
 306:                   + rs->rtcnt + OVHDM;
 307: 
 308: #		ifdef xDTR1
 309:         if (tTf(13, 5))
 310:             printf("temp rel: mod cost=%s\n", locv(modcost));
 311: #		endif
 312: 
 313:         if (origcost <= modcost)
 314:             return;
 315:     }
 316: 
 317: #	ifdef xDTR1
 318:     if (tTf(13, 5))
 319:         printf("doing modify\n");
 320: #	endif
 321: 
 322:     initp();
 323:     setp(rangename(var));
 324:     setp("hash");   /* modify to hash */
 325:     setp("num");    /* passing domain numbers - not names */
 326: 
 327:     nums[1] = '\0';
 328:     for (j = 0; j < MAXDOM; j++)
 329:         if (linkmap[j])
 330:         {
 331:             *nums = j;
 332:             setp(nums);
 333:         }
 334: 
 335:     /* fill relation completely */
 336:     setp("");
 337:     setp("fillfactor");
 338:     setp("100");
 339:     setp("minpages");
 340:     setp("1");
 341:     closer1(var);
 342:     call_dbu(mdMODIFY, FALSE);
 343: 
 344:     /* re-open modified range to get accurate descriptor */
 345:     openr1(var);
 346: }
 347: 
 348: 
 349: 
 350: 
 351: 
 352: primrf(var, pq, locrang)
 353: int         var;
 354: struct querytree    *pq;
 355: int         locrang[];
 356: 
 357: /*
 358: ** Primrf -- Replace a user relation with a projection of the needed domains.
 359: **
 360: **	Primrf retrieves into a temporary relation, the domains
 361: **	specified by the "pq" tree. The range table is updated to
 362: **	reflect the new range.
 363: **
 364: **	In fact a projection is not an accurate way to describe what
 365: **	happens. In order to avoid changing any attribute numbers in
 366: **	the query, the needed domains are projected and the domains
 367: **	inbetween are created as type "c0". This way they occupy
 368: **	no space and the attribute numbering is unchanged. Of course,
 369: **	any attributes beyond the last one needed are simply discarded.
 370: **
 371: **	In previous versions attempts were made to project only the needed
 372: **	domains and to renumber the query tree. This proved to be very
 373: **	expensive and difficult and was not always as optimal as this
 374: **	method.
 375: **
 376: **	The routines newquery() and endquery() will undo the effects
 377: **	of primrf and restore the range table back to its original state.
 378: */
 379: 
 380: 
 381: {
 382:     register struct querytree   *q, **np;
 383:     register int            i;
 384:     int             maxresno, rnum;
 385:     struct querytree        *node1[MAXDOM+1], *node2[MAXDOM+1];
 386:     char                czero[10];  /* a dummy resdom */
 387: 
 388: #	ifdef xDTR1
 389:     if (tTf(13, 6))
 390:         printf("PRIMRF:\n");
 391: #	endif
 392: 
 393:     /* renumber RESDOMs to match their VARs */
 394:     for (q = pq->left; q->sym.type != TREE; q = q->left)
 395:         ((struct qt_res *)q)->resno = ((struct qt_var *)q->right)->attno;
 396: 
 397:     /* form list of RESDOMs from outermost inward */
 398:     node1[lnode(pq->left, node1, 0)] = 0;
 399: 
 400:     /* form a dummy RESDOM with type CHAR and length 0 */
 401:     q = (struct querytree *) czero;
 402:     ((struct qt_var *)q)->frmt = CHAR;
 403:     ((struct qt_var *)q)->frml = 0;
 404: 
 405:     /* zero node2 */
 406:     for (np = node2, i = 0; i < MAXDOM + 1; i++)
 407:         *np++ = 0;
 408: 
 409:     /* sort RESDOMs into node2 */
 410:     maxresno = 0;
 411:     for (np = node1; q = *np++; )
 412:     {
 413:         if ((i = ((struct qt_res *)q)->resno) == 0)
 414:             return (1); /* abort. Tid is referenced */
 415:         if (i > maxresno)
 416:             maxresno = i;
 417:         node2[i-1] = q;
 418:     }
 419: 
 420:     /* fill missing RESDOMs with czero */
 421:     for (np = node2, i = 0; i < maxresno; i++, np++)
 422:         if (*np == 0)
 423:             *np = (struct querytree *) czero;
 424: 
 425: 
 426:     /* set up params for the create */
 427:     initp();
 428:     setp("0");  /* initial relstat field */
 429:     rnum = rnum_alloc();
 430:     setp(rnum_convert(rnum));
 431:     domnam(node2, "f");
 432:     call_dbu(mdCREATE, FALSE);
 433: 
 434:     /* now run projection */
 435:     i = var;
 436:     execsq1(pq, i, rnum);
 437:     new_range(i, rnum);
 438:     /* save the name of the new relation */
 439:     savrang(locrang, i);
 440:     openr1(i);
 441:     return (0);
 442: }
 443: 
 444: 
 445: 
 446: ckpkey(linkmap, var)
 447: char    linkmap[];
 448: int var;
 449: 
 450: /*
 451: ** Ckpkey -- determine if a relation is already keyed usefully.
 452: **
 453: **	Ckpkey gets the key structure from paramd() and compares
 454: **	it to the linkmap. If the relation is ISAM and the first keyed
 455: **	domain is in linkmap, or if it is HASH and all keyed domains
 456: **	are in the linkmap, then ckpkey() returns >0, else
 457: **	ckpkey looks for secondary indexes which are usable.
 458: **	if none, then ckpkey() returns 0.
 459: **
 460: **	The value returned is an estimate of the number of
 461: **	pages which must be read to satisfy a query given
 462: **	equality clauses on the linkmap domains and the
 463: **	structure of the relation. The (crude) estimates are:
 464: **		hash	1 page
 465: **		isam	2 pages
 466: **		hash sec index	2 pages
 467: **		isam sec index	3 pages
 468: */
 469: {
 470:     register struct descriptor  *d;
 471:     register int            i;
 472:     struct index            itup;
 473:     struct accessparam      ap;
 474:     struct tup_id           lotid, hitid;
 475:     extern struct descriptor    *readopen(), *openr1(), Inddes;
 476: 
 477: 
 478: #	ifdef  xDTR1
 479:     if (tTf(13, 11))
 480:         printf("CKPKEY:%s\n", rangename(var));
 481: #	endif
 482: 
 483:     /* if relation is an unindexed heap, then it cannot be keyed usefully */
 484:     d = openr1(var);
 485:     if (d->relspec != M_HEAP || d->relindxd > 0)
 486:     {
 487:         d = readopen(var);  /* open relation if it hasn't been already */
 488:         paramd(d, &ap);
 489:         if (ckpkey1(linkmap, &ap) == 0)
 490:             return (ap.mode == EXACTKEY ? 1 : 2);   /* success */
 491: 
 492:         if (d->relindxd > 0)
 493:         {
 494:             opencatalog("indexes", 0);
 495:             setkey(&Inddes, &itup, d->relid, IRELIDP);
 496:             setkey(&Inddes, &itup, d->relowner, IOWNERP);
 497:             if (i = find(&Inddes, EXACTKEY, &lotid, &hitid, &itup))
 498:                 syserr("ckpkey:find %d", i);
 499: 
 500:             while ((i = get(&Inddes, &lotid, &hitid, &itup, TRUE)) == 0)
 501:             {
 502:                 if (!bequal(&itup, d->relid, MAXNAME + 2))
 503:                     continue;
 504: 
 505:                 parami(&itup, &ap);
 506:                 if (ckpkey1(linkmap, &ap) == 0)
 507:                     return (ap.mode == EXACTKEY ? 2 : 3);   /* success */
 508:             }
 509:         }
 510:     }
 511:     return (0); /* failure. no usefull structure */
 512: }
 513: 
 514: 
 515: 
 516: ckpkey1(linkmap, ap)
 517: char    linkmap[];
 518: struct accessparam  *ap;
 519: {
 520:     register int    i, k, anykey;
 521: 
 522:     if (ap->mode == NOKEY)
 523:         return (1);
 524:     anykey = 0;
 525:     for (i = 0; k = ap->keydno[i]; i++)
 526:     {
 527:         if (linkmap[k] == 0)
 528:         {
 529:             if (ap->mode == EXACTKEY)
 530:                 return (1);
 531:             else
 532:                 break;
 533:         }
 534:         anykey++;
 535:     }
 536:     return (!anykey);
 537: }
 538: 
 539: 
 540: 
 541: 
 542: 
 543: findwid(tree)
 544: struct querytree    *tree;
 545: 
 546: /*
 547: ** Findwid -- scan the target list of the projection tree
 548: **	to determine the resultant tuple size.
 549: **
 550: **	Return:
 551: **		Size of projected tuple.
 552: */
 553: 
 554: {
 555:     register struct querytree   *nod, *t;
 556:     register int            wid;
 557: 
 558: 
 559:     wid = 0;
 560:     t = tree;
 561: 
 562:     for (nod = t->left; nod && nod->sym.type != TREE; nod = nod->left)
 563:     {
 564:         wid += ((struct qt_var *)nod)->frml & 0377;
 565:     }
 566: 
 567:     return (wid);
 568: }
 569: 
 570: 
 571: /*
 572: **  HASHCOST -- determine cost to modify to hash
 573: **
 574: **	Estimates the cost to modify the relation to hash.
 575: **	The estimate is crude since it assumes that there
 576: **	are no duplicates and that a "unix" page will be
 577: **	the same size as an "ingres" page.
 578: **
 579: **	The cost is based on the parameters which signify
 580: **	the size of the in-core sort buffer and the n-way
 581: **	merge sort plus the cost to read the sorted file
 582: **	and write the new relation twice (that's the was it works!).
 583: **
 584: **	Parameters:
 585: **		npages - the number of pages (estimate) which the
 586: **				relation currently occupies.
 587: **
 588: **	Returns:
 589: **		the cost to hash the relation.
 590: **
 591: **	Side Effects:
 592: **		none
 593: **
 594: **	Called By:
 595: **		rangrf
 596: */
 597: 
 598: # define    COREBUFSIZE 32767 / PGSIZE
 599: # define    MERGESIZE   7
 600: 
 601: long hashcost(npages)
 602: long    npages;
 603: {
 604:     long        sortpages, total;
 605:     register int    nfiles;
 606: 
 607:     nfiles = npages / COREBUFSIZE;
 608:     sortpages = 2 * npages;
 609: 
 610:     while (nfiles > 1)
 611:     {
 612:         nfiles = (nfiles + MERGESIZE - 1) / MERGESIZE;
 613:         sortpages += 2 * npages;
 614:     }
 615: 
 616:     total = sortpages + npages + npages + npages;
 617: #	ifdef xDTR1
 618:     if (tTf(13, 5))
 619:         printf("hashcost is %s\n", locv(total));
 620: #	endif
 621: 
 622:     return (total);
 623: }

Defined functions

ckpkey defined in line 446; used 2 times
ckpkey1 defined in line 516; used 2 times
findlinks defined in line 108; used 3 times
hashcost defined in line 601; used 3 times
primrf defined in line 352; used 1 times
rangrf defined in line 178; used 1 times
  • in line 99
reformat defined in line 37; used 1 times

Defined macros

COREBUFSIZE defined in line 598; used 1 times
MERGESIZE defined in line 599; used 2 times
  • in line 612(2)
Last modified: 1995-04-14
Generated: 2016-12-26
Generated by src2html V0.67
page hit count: 4708
Valid CSS Valid XHTML 1.0 Strict