1: /* 2: * Copyright (c) 1982, 1986 Regents of the University of California. 3: * All rights reserved. The Berkeley software License Agreement 4: * specifies the terms and conditions for redistribution. 5: * 6: * @(#)ufs_bio.c 7.1 (Berkeley) 6/5/86 7: */ 8: 9: #include "../machine/pte.h" 10: 11: #include "param.h" 12: #include "systm.h" 13: #include "dir.h" 14: #include "user.h" 15: #include "buf.h" 16: #include "conf.h" 17: #include "proc.h" 18: #include "seg.h" 19: #include "vm.h" 20: #include "trace.h" 21: 22: /* 23: * Read in (if necessary) the block and return a buffer pointer. 24: */ 25: struct buf * 26: bread(dev, blkno, size) 27: dev_t dev; 28: daddr_t blkno; 29: int size; 30: { 31: register struct buf *bp; 32: 33: if (size == 0) 34: panic("bread: size 0"); 35: bp = getblk(dev, blkno, size); 36: if (bp->b_flags&B_DONE) { 37: trace(TR_BREADHIT, pack(dev, size), blkno); 38: return (bp); 39: } 40: bp->b_flags |= B_READ; 41: if (bp->b_bcount > bp->b_bufsize) 42: panic("bread"); 43: (*bdevsw[major(dev)].d_strategy)(bp); 44: trace(TR_BREADMISS, pack(dev, size), blkno); 45: u.u_ru.ru_inblock++; /* pay for read */ 46: biowait(bp); 47: return (bp); 48: } 49: 50: /* 51: * Read in the block, like bread, but also start I/O on the 52: * read-ahead block (which is not allocated to the caller) 53: */ 54: struct buf * 55: breada(dev, blkno, size, rablkno, rabsize) 56: dev_t dev; 57: daddr_t blkno; int size; 58: daddr_t rablkno; int rabsize; 59: { 60: register struct buf *bp, *rabp; 61: 62: bp = NULL; 63: /* 64: * If the block isn't in core, then allocate 65: * a buffer and initiate i/o (getblk checks 66: * for a cache hit). 67: */ 68: if (!incore(dev, blkno)) { 69: bp = getblk(dev, blkno, size); 70: if ((bp->b_flags&B_DONE) == 0) { 71: bp->b_flags |= B_READ; 72: if (bp->b_bcount > bp->b_bufsize) 73: panic("breada"); 74: (*bdevsw[major(dev)].d_strategy)(bp); 75: trace(TR_BREADMISS, pack(dev, size), blkno); 76: u.u_ru.ru_inblock++; /* pay for read */ 77: } else 78: trace(TR_BREADHIT, pack(dev, size), blkno); 79: } 80: 81: /* 82: * If there's a read-ahead block, start i/o 83: * on it also (as above). 84: */ 85: if (rablkno && !incore(dev, rablkno)) { 86: rabp = getblk(dev, rablkno, rabsize); 87: if (rabp->b_flags & B_DONE) { 88: brelse(rabp); 89: trace(TR_BREADHITRA, pack(dev, rabsize), blkno); 90: } else { 91: rabp->b_flags |= B_READ|B_ASYNC; 92: if (rabp->b_bcount > rabp->b_bufsize) 93: panic("breadrabp"); 94: (*bdevsw[major(dev)].d_strategy)(rabp); 95: trace(TR_BREADMISSRA, pack(dev, rabsize), rablock); 96: u.u_ru.ru_inblock++; /* pay in advance */ 97: } 98: } 99: 100: /* 101: * If block was in core, let bread get it. 102: * If block wasn't in core, then the read was started 103: * above, and just wait for it. 104: */ 105: if (bp == NULL) 106: return (bread(dev, blkno, size)); 107: biowait(bp); 108: return (bp); 109: } 110: 111: /* 112: * Write the buffer, waiting for completion. 113: * Then release the buffer. 114: */ 115: bwrite(bp) 116: register struct buf *bp; 117: { 118: register flag; 119: 120: flag = bp->b_flags; 121: bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); 122: if ((flag&B_DELWRI) == 0) 123: u.u_ru.ru_oublock++; /* noone paid yet */ 124: trace(TR_BWRITE, pack(bp->b_dev, bp->b_bcount), bp->b_blkno); 125: if (bp->b_bcount > bp->b_bufsize) 126: panic("bwrite"); 127: (*bdevsw[major(bp->b_dev)].d_strategy)(bp); 128: 129: /* 130: * If the write was synchronous, then await i/o completion. 131: * If the write was "delayed", then we put the buffer on 132: * the q of blocks awaiting i/o completion status. 133: */ 134: if ((flag&B_ASYNC) == 0) { 135: biowait(bp); 136: brelse(bp); 137: } else if (flag & B_DELWRI) 138: bp->b_flags |= B_AGE; 139: } 140: 141: /* 142: * Release the buffer, marking it so that if it is grabbed 143: * for another purpose it will be written out before being 144: * given up (e.g. when writing a partial block where it is 145: * assumed that another write for the same block will soon follow). 146: * This can't be done for magtape, since writes must be done 147: * in the same order as requested. 148: */ 149: bdwrite(bp) 150: register struct buf *bp; 151: { 152: register int flags; 153: 154: if ((bp->b_flags&B_DELWRI) == 0) 155: u.u_ru.ru_oublock++; /* noone paid yet */ 156: flags = bdevsw[major(bp->b_dev)].d_flags; 157: if(flags & B_TAPE) 158: bawrite(bp); 159: else { 160: bp->b_flags |= B_DELWRI | B_DONE; 161: brelse(bp); 162: } 163: } 164: 165: /* 166: * Release the buffer, start I/O on it, but don't wait for completion. 167: */ 168: bawrite(bp) 169: register struct buf *bp; 170: { 171: 172: bp->b_flags |= B_ASYNC; 173: bwrite(bp); 174: } 175: 176: /* 177: * Release the buffer, with no I/O implied. 178: */ 179: brelse(bp) 180: register struct buf *bp; 181: { 182: register struct buf *flist; 183: register s; 184: 185: trace(TR_BRELSE, pack(bp->b_dev, bp->b_bufsize), bp->b_blkno); 186: /* 187: * If someone's waiting for the buffer, or 188: * is waiting for a buffer wake 'em up. 189: */ 190: if (bp->b_flags&B_WANTED) 191: wakeup((caddr_t)bp); 192: if (bfreelist[0].b_flags&B_WANTED) { 193: bfreelist[0].b_flags &= ~B_WANTED; 194: wakeup((caddr_t)bfreelist); 195: } 196: if (bp->b_flags&B_ERROR) 197: if (bp->b_flags & B_LOCKED) 198: bp->b_flags &= ~B_ERROR; /* try again later */ 199: else 200: bp->b_dev = NODEV; /* no assoc */ 201: 202: /* 203: * Stick the buffer back on a free list. 204: */ 205: s = splbio(); 206: if (bp->b_bufsize <= 0) { 207: /* block has no buffer ... put at front of unused buffer list */ 208: flist = &bfreelist[BQ_EMPTY]; 209: binsheadfree(bp, flist); 210: } else if (bp->b_flags & (B_ERROR|B_INVAL)) { 211: /* block has no info ... put at front of most free list */ 212: flist = &bfreelist[BQ_AGE]; 213: binsheadfree(bp, flist); 214: } else { 215: if (bp->b_flags & B_LOCKED) 216: flist = &bfreelist[BQ_LOCKED]; 217: else if (bp->b_flags & B_AGE) 218: flist = &bfreelist[BQ_AGE]; 219: else 220: flist = &bfreelist[BQ_LRU]; 221: binstailfree(bp, flist); 222: } 223: bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE); 224: splx(s); 225: } 226: 227: /* 228: * See if the block is associated with some buffer 229: * (mainly to avoid getting hung up on a wait in breada) 230: */ 231: incore(dev, blkno) 232: dev_t dev; 233: daddr_t blkno; 234: { 235: register struct buf *bp; 236: register struct buf *dp; 237: 238: dp = BUFHASH(dev, blkno); 239: for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 240: if (bp->b_blkno == blkno && bp->b_dev == dev && 241: (bp->b_flags & B_INVAL) == 0) 242: return (1); 243: return (0); 244: } 245: 246: struct buf * 247: baddr(dev, blkno, size) 248: dev_t dev; 249: daddr_t blkno; 250: int size; 251: { 252: 253: if (incore(dev, blkno)) 254: return (bread(dev, blkno, size)); 255: return (0); 256: } 257: 258: /* 259: * Assign a buffer for the given block. If the appropriate 260: * block is already associated, return it; otherwise search 261: * for the oldest non-busy buffer and reassign it. 262: * 263: * We use splx here because this routine may be called 264: * on the interrupt stack during a dump, and we don't 265: * want to lower the ipl back to 0. 266: */ 267: struct buf * 268: getblk(dev, blkno, size) 269: dev_t dev; 270: daddr_t blkno; 271: int size; 272: { 273: register struct buf *bp, *dp; 274: int s; 275: 276: if (size > MAXBSIZE) 277: panic("getblk: size too big"); 278: /* 279: * To prevent overflow of 32-bit ints when converting block 280: * numbers to byte offsets, blknos > 2^32 / DEV_BSIZE are set 281: * to the maximum number that can be converted to a byte offset 282: * without overflow. This is historic code; what bug it fixed, 283: * or whether it is still a reasonable thing to do is open to 284: * dispute. mkm 9/85 285: */ 286: if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-DEV_BSHIFT)) 287: blkno = 1 << ((sizeof(int)*NBBY-DEV_BSHIFT) + 1); 288: /* 289: * Search the cache for the block. If we hit, but 290: * the buffer is in use for i/o, then we wait until 291: * the i/o has completed. 292: */ 293: dp = BUFHASH(dev, blkno); 294: loop: 295: for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 296: if (bp->b_blkno != blkno || bp->b_dev != dev || 297: bp->b_flags&B_INVAL) 298: continue; 299: s = splbio(); 300: if (bp->b_flags&B_BUSY) { 301: bp->b_flags |= B_WANTED; 302: sleep((caddr_t)bp, PRIBIO+1); 303: splx(s); 304: goto loop; 305: } 306: splx(s); 307: notavail(bp); 308: if (bp->b_bcount != size && brealloc(bp, size) == 0) 309: goto loop; 310: bp->b_flags |= B_CACHE; 311: return (bp); 312: } 313: if (major(dev) >= nblkdev) 314: panic("blkdev"); 315: bp = getnewbuf(); 316: bfree(bp); 317: bremhash(bp); 318: binshash(bp, dp); 319: bp->b_dev = dev; 320: bp->b_blkno = blkno; 321: bp->b_error = 0; 322: if (brealloc(bp, size) == 0) 323: goto loop; 324: return (bp); 325: } 326: 327: /* 328: * get an empty block, 329: * not assigned to any particular device 330: */ 331: struct buf * 332: geteblk(size) 333: int size; 334: { 335: register struct buf *bp, *flist; 336: 337: if (size > MAXBSIZE) 338: panic("geteblk: size too big"); 339: loop: 340: bp = getnewbuf(); 341: bp->b_flags |= B_INVAL; 342: bfree(bp); 343: bremhash(bp); 344: flist = &bfreelist[BQ_AGE]; 345: binshash(bp, flist); 346: bp->b_dev = (dev_t)NODEV; 347: bp->b_error = 0; 348: if (brealloc(bp, size) == 0) 349: goto loop; 350: return (bp); 351: } 352: 353: /* 354: * Allocate space associated with a buffer. 355: * If can't get space, buffer is released 356: */ 357: brealloc(bp, size) 358: register struct buf *bp; 359: int size; 360: { 361: daddr_t start, last; 362: register struct buf *ep; 363: struct buf *dp; 364: int s; 365: 366: /* 367: * First need to make sure that all overlaping previous I/O 368: * is dispatched with. 369: */ 370: if (size == bp->b_bcount) 371: return (1); 372: if (size < bp->b_bcount) { 373: if (bp->b_flags & B_DELWRI) { 374: bwrite(bp); 375: return (0); 376: } 377: if (bp->b_flags & B_LOCKED) 378: panic("brealloc"); 379: return (allocbuf(bp, size)); 380: } 381: bp->b_flags &= ~B_DONE; 382: if (bp->b_dev == NODEV) 383: return (allocbuf(bp, size)); 384: 385: trace(TR_BREALLOC, pack(bp->b_dev, size), bp->b_blkno); 386: /* 387: * Search cache for any buffers that overlap the one that we 388: * are trying to allocate. Overlapping buffers must be marked 389: * invalid, after being written out if they are dirty. (indicated 390: * by B_DELWRI) A disk block must be mapped by at most one buffer 391: * at any point in time. Care must be taken to avoid deadlocking 392: * when two buffer are trying to get the same set of disk blocks. 393: */ 394: start = bp->b_blkno; 395: last = start + btodb(size) - 1; 396: dp = BUFHASH(bp->b_dev, bp->b_blkno); 397: loop: 398: for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) { 399: if (ep == bp || ep->b_dev != bp->b_dev || (ep->b_flags&B_INVAL)) 400: continue; 401: /* look for overlap */ 402: if (ep->b_bcount == 0 || ep->b_blkno > last || 403: ep->b_blkno + btodb(ep->b_bcount) <= start) 404: continue; 405: s = splbio(); 406: if (ep->b_flags&B_BUSY) { 407: ep->b_flags |= B_WANTED; 408: sleep((caddr_t)ep, PRIBIO+1); 409: splx(s); 410: goto loop; 411: } 412: splx(s); 413: notavail(ep); 414: if (ep->b_flags & B_DELWRI) { 415: bwrite(ep); 416: goto loop; 417: } 418: ep->b_flags |= B_INVAL; 419: brelse(ep); 420: } 421: return (allocbuf(bp, size)); 422: } 423: 424: /* 425: * Find a buffer which is available for use. 426: * Select something from a free list. 427: * Preference is to AGE list, then LRU list. 428: */ 429: struct buf * 430: getnewbuf() 431: { 432: register struct buf *bp, *dp; 433: int s; 434: 435: loop: 436: s = splbio(); 437: for (dp = &bfreelist[BQ_AGE]; dp > bfreelist; dp--) 438: if (dp->av_forw != dp) 439: break; 440: if (dp == bfreelist) { /* no free blocks */ 441: dp->b_flags |= B_WANTED; 442: sleep((caddr_t)dp, PRIBIO+1); 443: splx(s); 444: goto loop; 445: } 446: splx(s); 447: bp = dp->av_forw; 448: notavail(bp); 449: if (bp->b_flags & B_DELWRI) { 450: bp->b_flags |= B_ASYNC; 451: bwrite(bp); 452: goto loop; 453: } 454: trace(TR_BRELSE, pack(bp->b_dev, bp->b_bufsize), bp->b_blkno); 455: bp->b_flags = B_BUSY; 456: return (bp); 457: } 458: 459: /* 460: * Wait for I/O completion on the buffer; return errors 461: * to the user. 462: */ 463: biowait(bp) 464: register struct buf *bp; 465: { 466: int s; 467: 468: s = splbio(); 469: while ((bp->b_flags&B_DONE)==0) 470: sleep((caddr_t)bp, PRIBIO); 471: splx(s); 472: if (u.u_error == 0) /* XXX */ 473: u.u_error = geterror(bp); 474: } 475: 476: /* 477: * Mark I/O complete on a buffer. 478: * If someone should be called, e.g. the pageout 479: * daemon, do so. Otherwise, wake up anyone 480: * waiting for it. 481: */ 482: biodone(bp) 483: register struct buf *bp; 484: { 485: 486: if (bp->b_flags & B_DONE) 487: panic("dup biodone"); 488: bp->b_flags |= B_DONE; 489: if (bp->b_flags & B_CALL) { 490: bp->b_flags &= ~B_CALL; 491: (*bp->b_iodone)(bp); 492: return; 493: } 494: if (bp->b_flags&B_ASYNC) 495: brelse(bp); 496: else { 497: bp->b_flags &= ~B_WANTED; 498: wakeup((caddr_t)bp); 499: } 500: } 501: 502: /* 503: * Insure that no part of a specified block is in an incore buffer. 504: */ 505: blkflush(dev, blkno, size) 506: dev_t dev; 507: daddr_t blkno; 508: long size; 509: { 510: register struct buf *ep; 511: struct buf *dp; 512: daddr_t start, last; 513: int s; 514: 515: start = blkno; 516: last = start + btodb(size) - 1; 517: dp = BUFHASH(dev, blkno); 518: loop: 519: for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) { 520: if (ep->b_dev != dev || (ep->b_flags&B_INVAL)) 521: continue; 522: /* look for overlap */ 523: if (ep->b_bcount == 0 || ep->b_blkno > last || 524: ep->b_blkno + btodb(ep->b_bcount) <= start) 525: continue; 526: s = splbio(); 527: if (ep->b_flags&B_BUSY) { 528: ep->b_flags |= B_WANTED; 529: sleep((caddr_t)ep, PRIBIO+1); 530: splx(s); 531: goto loop; 532: } 533: if (ep->b_flags & B_DELWRI) { 534: splx(s); 535: notavail(ep); 536: bwrite(ep); 537: goto loop; 538: } 539: splx(s); 540: } 541: } 542: 543: /* 544: * Make sure all write-behind blocks 545: * on dev (or NODEV for all) 546: * are flushed out. 547: * (from umount and update) 548: */ 549: bflush(dev) 550: dev_t dev; 551: { 552: register struct buf *bp; 553: register struct buf *flist; 554: int s; 555: 556: loop: 557: s = splbio(); 558: for (flist = bfreelist; flist < &bfreelist[BQ_EMPTY]; flist++) 559: for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) { 560: if ((bp->b_flags & B_DELWRI) == 0) 561: continue; 562: if (dev == NODEV || dev == bp->b_dev) { 563: bp->b_flags |= B_ASYNC; 564: notavail(bp); 565: bwrite(bp); 566: splx(s); 567: goto loop; 568: } 569: } 570: splx(s); 571: } 572: 573: /* 574: * Pick up the device's error number and pass it to the user; 575: * if there is an error but the number is 0 set a generalized code. 576: */ 577: geterror(bp) 578: register struct buf *bp; 579: { 580: int error = 0; 581: 582: if (bp->b_flags&B_ERROR) 583: if ((error = bp->b_error)==0) 584: return (EIO); 585: return (error); 586: } 587: 588: /* 589: * Invalidate in core blocks belonging to closed or umounted filesystem 590: * 591: * This is not nicely done at all - the buffer ought to be removed from the 592: * hash chains & have its dev/blkno fields clobbered, but unfortunately we 593: * can't do that here, as it is quite possible that the block is still 594: * being used for i/o. Eventually, all disc drivers should be forced to 595: * have a close routine, which ought ensure that the queue is empty, then 596: * properly flush the queues. Until that happy day, this suffices for 597: * correctness. ... kre 598: */ 599: binval(dev) 600: dev_t dev; 601: { 602: register struct buf *bp; 603: register struct bufhd *hp; 604: #define dp ((struct buf *)hp) 605: 606: for (hp = bufhash; hp < &bufhash[BUFHSZ]; hp++) 607: for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 608: if (bp->b_dev == dev) 609: bp->b_flags |= B_INVAL; 610: }