1: /* 2: * Copyright (c) 1982, 1986 Regents of the University of California. 3: * All rights reserved. The Berkeley software License Agreement 4: * specifies the terms and conditions for redistribution. 5: * 6: * @(#)ufs_inode.c 7.1 (Berkeley) 6/5/86 7: */ 8: 9: #include "param.h" 10: #include "systm.h" 11: #include "mount.h" 12: #include "dir.h" 13: #include "user.h" 14: #include "inode.h" 15: #include "fs.h" 16: #include "buf.h" 17: #include "cmap.h" 18: #ifdef QUOTA 19: #include "quota.h" 20: #endif 21: #include "kernel.h" 22: 23: #define INOHSZ 512 24: #if ((INOHSZ&(INOHSZ-1)) == 0) 25: #define INOHASH(dev,ino) (((dev)+(ino))&(INOHSZ-1)) 26: #else 27: #define INOHASH(dev,ino) (((unsigned)((dev)+(ino)))%INOHSZ) 28: #endif 29: 30: union ihead { /* inode LRU cache, Chris Maltby */ 31: union ihead *ih_head[2]; 32: struct inode *ih_chain[2]; 33: } ihead[INOHSZ]; 34: 35: struct inode *ifreeh, **ifreet; 36: 37: /* 38: * Initialize hash links for inodes 39: * and build inode free list. 40: */ 41: ihinit() 42: { 43: register int i; 44: register struct inode *ip = inode; 45: register union ihead *ih = ihead; 46: 47: for (i = INOHSZ; --i >= 0; ih++) { 48: ih->ih_head[0] = ih; 49: ih->ih_head[1] = ih; 50: } 51: ifreeh = ip; 52: ifreet = &ip->i_freef; 53: ip->i_freeb = &ifreeh; 54: ip->i_forw = ip; 55: ip->i_back = ip; 56: for (i = ninode; --i > 0; ) { 57: ++ip; 58: ip->i_forw = ip; 59: ip->i_back = ip; 60: *ifreet = ip; 61: ip->i_freeb = ifreet; 62: ifreet = &ip->i_freef; 63: } 64: ip->i_freef = NULL; 65: } 66: 67: #ifdef notdef 68: /* 69: * Find an inode if it is incore. 70: * This is the equivalent, for inodes, 71: * of ``incore'' in bio.c or ``pfind'' in subr.c. 72: */ 73: struct inode * 74: ifind(dev, ino) 75: dev_t dev; 76: ino_t ino; 77: { 78: register struct inode *ip; 79: register union ihead *ih; 80: 81: ih = &ihead[INOHASH(dev, ino)]; 82: for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw) 83: if (ino==ip->i_number && dev==ip->i_dev) 84: return (ip); 85: return ((struct inode *)0); 86: } 87: #endif notdef 88: 89: /* 90: * Look up an inode by device,inumber. 91: * If it is in core (in the inode structure), 92: * honor the locking protocol. 93: * If it is not in core, read it in from the 94: * specified device. 95: * If the inode is mounted on, perform 96: * the indicated indirection. 97: * In all cases, a pointer to a locked 98: * inode structure is returned. 99: * 100: * panic: no imt -- if the mounted file 101: * system is not in the mount table. 102: * "cannot happen" 103: */ 104: struct inode * 105: iget(dev, fs, ino) 106: dev_t dev; 107: register struct fs *fs; 108: ino_t ino; 109: { 110: register struct inode *ip; 111: register union ihead *ih; 112: register struct mount *mp; 113: register struct buf *bp; 114: register struct dinode *dp; 115: register struct inode *iq; 116: 117: loop: 118: ih = &ihead[INOHASH(dev, ino)]; 119: for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw) 120: if (ino == ip->i_number && dev == ip->i_dev) { 121: /* 122: * Following is essentially an inline expanded 123: * copy of igrab(), expanded inline for speed, 124: * and so that the test for a mounted on inode 125: * can be deferred until after we are sure that 126: * the inode isn't busy. 127: */ 128: if ((ip->i_flag&ILOCKED) != 0) { 129: ip->i_flag |= IWANT; 130: sleep((caddr_t)ip, PINOD); 131: goto loop; 132: } 133: if ((ip->i_flag&IMOUNT) != 0) { 134: for (mp = &mount[0]; mp < &mount[NMOUNT]; mp++) 135: if(mp->m_inodp == ip) { 136: dev = mp->m_dev; 137: fs = mp->m_bufp->b_un.b_fs; 138: ino = ROOTINO; 139: goto loop; 140: } 141: panic("no imt"); 142: } 143: if (ip->i_count == 0) { /* ino on free list */ 144: if (iq = ip->i_freef) 145: iq->i_freeb = ip->i_freeb; 146: else 147: ifreet = ip->i_freeb; 148: *ip->i_freeb = iq; 149: ip->i_freef = NULL; 150: ip->i_freeb = NULL; 151: } 152: ip->i_count++; 153: ip->i_flag |= ILOCKED; 154: return(ip); 155: } 156: 157: if ((ip = ifreeh) == NULL) { 158: tablefull("inode"); 159: u.u_error = ENFILE; 160: return(NULL); 161: } 162: if (ip->i_count) 163: panic("free inode isn't"); 164: if (iq = ip->i_freef) 165: iq->i_freeb = &ifreeh; 166: ifreeh = iq; 167: ip->i_freef = NULL; 168: ip->i_freeb = NULL; 169: /* 170: * Now to take inode off the hash chain it was on 171: * (initially, or after an iflush, it is on a "hash chain" 172: * consisting entirely of itself, and pointed to by no-one, 173: * but that doesn't matter), and put it on the chain for 174: * its new (ino, dev) pair 175: */ 176: remque(ip); 177: insque(ip, ih); 178: ip->i_dev = dev; 179: ip->i_fs = fs; 180: ip->i_number = ino; 181: cacheinval(ip); 182: ip->i_flag = ILOCKED; 183: ip->i_count++; 184: ip->i_lastr = 0; 185: #ifdef QUOTA 186: dqrele(ip->i_dquot); 187: #endif 188: bp = bread(dev, fsbtodb(fs, itod(fs, ino)), (int)fs->fs_bsize); 189: /* 190: * Check I/O errors 191: */ 192: if ((bp->b_flags&B_ERROR) != 0) { 193: brelse(bp); 194: /* 195: * the inode doesn't contain anything useful, so it would 196: * be misleading to leave it on its hash chain. 197: * 'iput' will take care of putting it back on the free list. 198: */ 199: remque(ip); 200: ip->i_forw = ip; 201: ip->i_back = ip; 202: /* 203: * we also loose its inumber, just in case (as iput 204: * doesn't do that any more) - but as it isn't on its 205: * hash chain, I doubt if this is really necessary .. kre 206: * (probably the two methods are interchangable) 207: */ 208: ip->i_number = 0; 209: #ifdef QUOTA 210: ip->i_dquot = NODQUOT; 211: #endif 212: iput(ip); 213: return(NULL); 214: } 215: dp = bp->b_un.b_dino; 216: dp += itoo(fs, ino); 217: ip->i_ic = dp->di_ic; 218: brelse(bp); 219: #ifdef QUOTA 220: if (ip->i_mode == 0) 221: ip->i_dquot = NODQUOT; 222: else 223: ip->i_dquot = inoquota(ip); 224: #endif 225: return (ip); 226: } 227: 228: /* 229: * Convert a pointer to an inode into a reference to an inode. 230: * 231: * This is basically the internal piece of iget (after the 232: * inode pointer is located) but without the test for mounted 233: * filesystems. It is caller's responsibility to check that 234: * the inode pointer is valid. 235: */ 236: igrab(ip) 237: register struct inode *ip; 238: { 239: while ((ip->i_flag&ILOCKED) != 0) { 240: ip->i_flag |= IWANT; 241: sleep((caddr_t)ip, PINOD); 242: } 243: if (ip->i_count == 0) { /* ino on free list */ 244: register struct inode *iq; 245: 246: if (iq = ip->i_freef) 247: iq->i_freeb = ip->i_freeb; 248: else 249: ifreet = ip->i_freeb; 250: *ip->i_freeb = iq; 251: ip->i_freef = NULL; 252: ip->i_freeb = NULL; 253: } 254: ip->i_count++; 255: ip->i_flag |= ILOCKED; 256: } 257: 258: /* 259: * Decrement reference count of 260: * an inode structure. 261: * On the last reference, 262: * write the inode out and if necessary, 263: * truncate and deallocate the file. 264: */ 265: iput(ip) 266: register struct inode *ip; 267: { 268: 269: if ((ip->i_flag & ILOCKED) == 0) 270: panic("iput"); 271: IUNLOCK(ip); 272: irele(ip); 273: } 274: 275: irele(ip) 276: register struct inode *ip; 277: { 278: int mode; 279: 280: if (ip->i_count == 1) { 281: ip->i_flag |= ILOCKED; 282: if (ip->i_nlink <= 0 && ip->i_fs->fs_ronly == 0) { 283: itrunc(ip, (u_long)0); 284: mode = ip->i_mode; 285: ip->i_mode = 0; 286: ip->i_rdev = 0; 287: ip->i_flag |= IUPD|ICHG; 288: ifree(ip, ip->i_number, mode); 289: #ifdef QUOTA 290: (void) chkiq(ip->i_dev, ip, ip->i_uid, 0); 291: dqrele(ip->i_dquot); 292: ip->i_dquot = NODQUOT; 293: #endif 294: } 295: IUPDAT(ip, &time, &time, 0); 296: IUNLOCK(ip); 297: ip->i_flag = 0; 298: /* 299: * Put the inode on the end of the free list. 300: * Possibly in some cases it would be better to 301: * put the inode at the head of the free list, 302: * (eg: where i_mode == 0 || i_number == 0) 303: * but I will think about that later .. kre 304: * (i_number is rarely 0 - only after an i/o error in iget, 305: * where i_mode == 0, the inode will probably be wanted 306: * again soon for an ialloc, so possibly we should keep it) 307: */ 308: if (ifreeh) { 309: *ifreet = ip; 310: ip->i_freeb = ifreet; 311: } else { 312: ifreeh = ip; 313: ip->i_freeb = &ifreeh; 314: } 315: ip->i_freef = NULL; 316: ifreet = &ip->i_freef; 317: } else if (!(ip->i_flag & ILOCKED)) 318: ITIMES(ip, &time, &time); 319: ip->i_count--; 320: } 321: 322: /* 323: * Check accessed and update flags on 324: * an inode structure. 325: * If any is on, update the inode 326: * with the current time. 327: * If waitfor is given, then must insure 328: * i/o order so wait for write to complete. 329: */ 330: iupdat(ip, ta, tm, waitfor) 331: register struct inode *ip; 332: struct timeval *ta, *tm; 333: int waitfor; 334: { 335: register struct buf *bp; 336: struct dinode *dp; 337: register struct fs *fp; 338: 339: fp = ip->i_fs; 340: if ((ip->i_flag & (IUPD|IACC|ICHG|IMOD)) != 0) { 341: if (fp->fs_ronly) 342: return; 343: bp = bread(ip->i_dev, fsbtodb(fp, itod(fp, ip->i_number)), 344: (int)fp->fs_bsize); 345: if (bp->b_flags & B_ERROR) { 346: brelse(bp); 347: return; 348: } 349: if (ip->i_flag&IACC) 350: ip->i_atime = ta->tv_sec; 351: if (ip->i_flag&IUPD) 352: ip->i_mtime = tm->tv_sec; 353: if (ip->i_flag&ICHG) 354: ip->i_ctime = time.tv_sec; 355: ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD); 356: dp = bp->b_un.b_dino + itoo(fp, ip->i_number); 357: dp->di_ic = ip->i_ic; 358: if (waitfor) 359: bwrite(bp); 360: else 361: bdwrite(bp); 362: } 363: } 364: 365: #define SINGLE 0 /* index of single indirect block */ 366: #define DOUBLE 1 /* index of double indirect block */ 367: #define TRIPLE 2 /* index of triple indirect block */ 368: /* 369: * Truncate the inode ip to at most 370: * length size. Free affected disk 371: * blocks -- the blocks of the file 372: * are removed in reverse order. 373: * 374: * NB: triple indirect blocks are untested. 375: */ 376: itrunc(oip, length) 377: register struct inode *oip; 378: u_long length; 379: { 380: register daddr_t lastblock; 381: daddr_t bn, lbn, lastiblock[NIADDR]; 382: register struct fs *fs; 383: register struct inode *ip; 384: struct buf *bp; 385: int offset, osize, size, count, level, s; 386: long nblocks, blocksreleased = 0; 387: register int i; 388: dev_t dev; 389: struct inode tip; 390: extern long indirtrunc(); 391: extern struct cmap *mfind(); 392: 393: if (oip->i_size <= length) { 394: oip->i_flag |= ICHG|IUPD; 395: iupdat(oip, &time, &time, 1); 396: return; 397: } 398: /* 399: * Calculate index into inode's block list of 400: * last direct and indirect blocks (if any) 401: * which we want to keep. Lastblock is -1 when 402: * the file is truncated to 0. 403: */ 404: fs = oip->i_fs; 405: lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; 406: lastiblock[SINGLE] = lastblock - NDADDR; 407: lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); 408: lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); 409: nblocks = btodb(fs->fs_bsize); 410: /* 411: * Update the size of the file. If the file is not being 412: * truncated to a block boundry, the contents of the 413: * partial block following the end of the file must be 414: * zero'ed in case it ever become accessable again because 415: * of subsequent file growth. 416: */ 417: osize = oip->i_size; 418: offset = blkoff(fs, length); 419: if (offset == 0) { 420: oip->i_size = length; 421: } else { 422: lbn = lblkno(fs, length); 423: bn = fsbtodb(fs, bmap(oip, lbn, B_WRITE, offset)); 424: if (u.u_error || (long)bn < 0) 425: return; 426: oip->i_size = length; 427: size = blksize(fs, oip, lbn); 428: count = howmany(size, DEV_BSIZE); 429: dev = oip->i_dev; 430: s = splimp(); 431: for (i = 0; i < count; i += CLSIZE) 432: if (mfind(dev, bn + i)) 433: munhash(dev, bn + i); 434: splx(s); 435: bp = bread(dev, bn, size); 436: if (bp->b_flags & B_ERROR) { 437: u.u_error = EIO; 438: oip->i_size = osize; 439: brelse(bp); 440: return; 441: } 442: bzero(bp->b_un.b_addr + offset, (unsigned)(size - offset)); 443: bdwrite(bp); 444: } 445: /* 446: * Update file and block pointers 447: * on disk before we start freeing blocks. 448: * If we crash before free'ing blocks below, 449: * the blocks will be returned to the free list. 450: * lastiblock values are also normalized to -1 451: * for calls to indirtrunc below. 452: */ 453: tip = *oip; 454: tip.i_size = osize; 455: for (level = TRIPLE; level >= SINGLE; level--) 456: if (lastiblock[level] < 0) { 457: oip->i_ib[level] = 0; 458: lastiblock[level] = -1; 459: } 460: for (i = NDADDR - 1; i > lastblock; i--) 461: oip->i_db[i] = 0; 462: oip->i_flag |= ICHG|IUPD; 463: syncip(oip); 464: 465: /* 466: * Indirect blocks first. 467: */ 468: ip = &tip; 469: for (level = TRIPLE; level >= SINGLE; level--) { 470: bn = ip->i_ib[level]; 471: if (bn != 0) { 472: blocksreleased += 473: indirtrunc(ip, bn, lastiblock[level], level); 474: if (lastiblock[level] < 0) { 475: ip->i_ib[level] = 0; 476: free(ip, bn, (off_t)fs->fs_bsize); 477: blocksreleased += nblocks; 478: } 479: } 480: if (lastiblock[level] >= 0) 481: goto done; 482: } 483: 484: /* 485: * All whole direct blocks or frags. 486: */ 487: for (i = NDADDR - 1; i > lastblock; i--) { 488: register off_t bsize; 489: 490: bn = ip->i_db[i]; 491: if (bn == 0) 492: continue; 493: ip->i_db[i] = 0; 494: bsize = (off_t)blksize(fs, ip, i); 495: free(ip, bn, bsize); 496: blocksreleased += btodb(bsize); 497: } 498: if (lastblock < 0) 499: goto done; 500: 501: /* 502: * Finally, look for a change in size of the 503: * last direct block; release any frags. 504: */ 505: bn = ip->i_db[lastblock]; 506: if (bn != 0) { 507: off_t oldspace, newspace; 508: 509: /* 510: * Calculate amount of space we're giving 511: * back as old block size minus new block size. 512: */ 513: oldspace = blksize(fs, ip, lastblock); 514: ip->i_size = length; 515: newspace = blksize(fs, ip, lastblock); 516: if (newspace == 0) 517: panic("itrunc: newspace"); 518: if (oldspace - newspace > 0) { 519: /* 520: * Block number of space to be free'd is 521: * the old block # plus the number of frags 522: * required for the storage we're keeping. 523: */ 524: bn += numfrags(fs, newspace); 525: free(ip, bn, oldspace - newspace); 526: blocksreleased += btodb(oldspace - newspace); 527: } 528: } 529: done: 530: /* BEGIN PARANOIA */ 531: for (level = SINGLE; level <= TRIPLE; level++) 532: if (ip->i_ib[level] != oip->i_ib[level]) 533: panic("itrunc1"); 534: for (i = 0; i < NDADDR; i++) 535: if (ip->i_db[i] != oip->i_db[i]) 536: panic("itrunc2"); 537: /* END PARANOIA */ 538: oip->i_blocks -= blocksreleased; 539: if (oip->i_blocks < 0) /* sanity */ 540: oip->i_blocks = 0; 541: oip->i_flag |= ICHG; 542: #ifdef QUOTA 543: (void) chkdq(oip, -blocksreleased, 0); 544: #endif 545: } 546: 547: /* 548: * Release blocks associated with the inode ip and 549: * stored in the indirect block bn. Blocks are free'd 550: * in LIFO order up to (but not including) lastbn. If 551: * level is greater than SINGLE, the block is an indirect 552: * block and recursive calls to indirtrunc must be used to 553: * cleanse other indirect blocks. 554: * 555: * NB: triple indirect blocks are untested. 556: */ 557: long 558: indirtrunc(ip, bn, lastbn, level) 559: register struct inode *ip; 560: daddr_t bn, lastbn; 561: int level; 562: { 563: register int i; 564: struct buf *bp, *copy; 565: register daddr_t *bap; 566: register struct fs *fs = ip->i_fs; 567: daddr_t nb, last; 568: long factor; 569: int blocksreleased = 0, nblocks; 570: 571: /* 572: * Calculate index in current block of last 573: * block to be kept. -1 indicates the entire 574: * block so we need not calculate the index. 575: */ 576: factor = 1; 577: for (i = SINGLE; i < level; i++) 578: factor *= NINDIR(fs); 579: last = lastbn; 580: if (lastbn > 0) 581: last /= factor; 582: nblocks = btodb(fs->fs_bsize); 583: /* 584: * Get buffer of block pointers, zero those 585: * entries corresponding to blocks to be free'd, 586: * and update on disk copy first. 587: */ 588: copy = geteblk((int)fs->fs_bsize); 589: bp = bread(ip->i_dev, fsbtodb(fs, bn), (int)fs->fs_bsize); 590: if (bp->b_flags&B_ERROR) { 591: brelse(copy); 592: brelse(bp); 593: return (0); 594: } 595: bap = bp->b_un.b_daddr; 596: bcopy((caddr_t)bap, (caddr_t)copy->b_un.b_daddr, (u_int)fs->fs_bsize); 597: bzero((caddr_t)&bap[last + 1], 598: (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t)); 599: bwrite(bp); 600: bp = copy, bap = bp->b_un.b_daddr; 601: 602: /* 603: * Recursively free totally unused blocks. 604: */ 605: for (i = NINDIR(fs) - 1; i > last; i--) { 606: nb = bap[i]; 607: if (nb == 0) 608: continue; 609: if (level > SINGLE) 610: blocksreleased += 611: indirtrunc(ip, nb, (daddr_t)-1, level - 1); 612: free(ip, nb, (off_t)fs->fs_bsize); 613: blocksreleased += nblocks; 614: } 615: 616: /* 617: * Recursively free last partial block. 618: */ 619: if (level > SINGLE && lastbn >= 0) { 620: last = lastbn % factor; 621: nb = bap[i]; 622: if (nb != 0) 623: blocksreleased += indirtrunc(ip, nb, last, level - 1); 624: } 625: brelse(bp); 626: return (blocksreleased); 627: } 628: 629: /* 630: * remove any inodes in the inode cache belonging to dev 631: * 632: * There should not be any active ones, return error if any are found 633: * (nb: this is a user error, not a system err) 634: * 635: * Also, count the references to dev by block devices - this really 636: * has nothing to do with the object of the procedure, but as we have 637: * to scan the inode table here anyway, we might as well get the 638: * extra benefit. 639: * 640: * this is called from sumount()/sys3.c when dev is being unmounted 641: */ 642: #ifdef QUOTA 643: iflush(dev, iq) 644: dev_t dev; 645: struct inode *iq; 646: #else 647: iflush(dev) 648: dev_t dev; 649: #endif 650: { 651: register struct inode *ip; 652: register open = 0; 653: 654: for (ip = inode; ip < inodeNINODE; ip++) { 655: #ifdef QUOTA 656: if (ip != iq && ip->i_dev == dev) 657: #else 658: if (ip->i_dev == dev) 659: #endif 660: if (ip->i_count) 661: return(-1); 662: else { 663: remque(ip); 664: ip->i_forw = ip; 665: ip->i_back = ip; 666: /* 667: * as i_count == 0, the inode was on the free 668: * list already, just leave it there, it will 669: * fall off the bottom eventually. We could 670: * perhaps move it to the head of the free 671: * list, but as umounts are done so 672: * infrequently, we would gain very little, 673: * while making the code bigger. 674: */ 675: #ifdef QUOTA 676: dqrele(ip->i_dquot); 677: ip->i_dquot = NODQUOT; 678: #endif 679: } 680: else if (ip->i_count && (ip->i_mode&IFMT)==IFBLK && 681: ip->i_rdev == dev) 682: open++; 683: } 684: return (open); 685: } 686: 687: /* 688: * Lock an inode. If its already locked, set the WANT bit and sleep. 689: */ 690: ilock(ip) 691: register struct inode *ip; 692: { 693: 694: ILOCK(ip); 695: } 696: 697: /* 698: * Unlock an inode. If WANT bit is on, wakeup. 699: */ 700: iunlock(ip) 701: register struct inode *ip; 702: { 703: 704: IUNLOCK(ip); 705: }