1: /* 2: * Copyright (c) 1982, 1986 Regents of the University of California. 3: * All rights reserved. 4: * 5: * Redistribution and use in source and binary forms are permitted 6: * provided that this notice is preserved and that due credit is given 7: * to the University of California at Berkeley. The name of the University 8: * may not be used to endorse or promote products derived from this 9: * software without specific prior written permission. This software 10: * is provided ``as is'' without express or implied warranty. 11: * 12: * @(#)uipc_socket2.c 7.3 (Berkeley) 1/28/88 13: */ 14: 15: #include "param.h" 16: #include "systm.h" 17: #include "user.h" 18: #include "proc.h" 19: #include "file.h" 20: #include "inode.h" 21: #include "buf.h" 22: #include "mbuf.h" 23: #include "protosw.h" 24: #include "socket.h" 25: #include "socketvar.h" 26: 27: /* 28: * Primitive routines for operating on sockets and socket buffers 29: */ 30: 31: /* 32: * Procedures to manipulate state flags of socket 33: * and do appropriate wakeups. Normal sequence from the 34: * active (originating) side is that soisconnecting() is 35: * called during processing of connect() call, 36: * resulting in an eventual call to soisconnected() if/when the 37: * connection is established. When the connection is torn down 38: * soisdisconnecting() is called during processing of disconnect() call, 39: * and soisdisconnected() is called when the connection to the peer 40: * is totally severed. The semantics of these routines are such that 41: * connectionless protocols can call soisconnected() and soisdisconnected() 42: * only, bypassing the in-progress calls when setting up a ``connection'' 43: * takes no time. 44: * 45: * From the passive side, a socket is created with 46: * two queues of sockets: so_q0 for connections in progress 47: * and so_q for connections already made and awaiting user acceptance. 48: * As a protocol is preparing incoming connections, it creates a socket 49: * structure queued on so_q0 by calling sonewconn(). When the connection 50: * is established, soisconnected() is called, and transfers the 51: * socket structure to so_q, making it available to accept(). 52: * 53: * If a socket is closed with sockets on either 54: * so_q0 or so_q, these sockets are dropped. 55: * 56: * If higher level protocols are implemented in 57: * the kernel, the wakeups done here will sometimes 58: * cause software-interrupt process scheduling. 59: */ 60: 61: soisconnecting(so) 62: register struct socket *so; 63: { 64: 65: so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 66: so->so_state |= SS_ISCONNECTING; 67: WAKEUP((caddr_t)&so->so_timeo); 68: } 69: 70: soisconnected(so) 71: register struct socket *so; 72: { 73: register struct socket *head = so->so_head; 74: 75: if (head) { 76: if (soqremque(so, 0) == 0) 77: panic("soisconnected"); 78: soqinsque(head, so, 1); 79: sorwakeup(head); 80: WAKEUP((caddr_t)&head->so_timeo); 81: } 82: so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING); 83: so->so_state |= SS_ISCONNECTED; 84: WAKEUP((caddr_t)&so->so_timeo); 85: sorwakeup(so); 86: sowwakeup(so); 87: } 88: 89: soisdisconnecting(so) 90: register struct socket *so; 91: { 92: 93: so->so_state &= ~SS_ISCONNECTING; 94: so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); 95: WAKEUP((caddr_t)&so->so_timeo); 96: sowwakeup(so); 97: sorwakeup(so); 98: } 99: 100: soisdisconnected(so) 101: register struct socket *so; 102: { 103: 104: so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 105: so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE); 106: WAKEUP((caddr_t)&so->so_timeo); 107: sowwakeup(so); 108: sorwakeup(so); 109: } 110: 111: /* 112: * When an attempt at a new connection is noted on a socket 113: * which accepts connections, sonewconn is called. If the 114: * connection is possible (subject to space constraints, etc.) 115: * then we allocate a new structure, properly linked into the 116: * data structure of the original socket, and return this. 117: */ 118: struct socket * 119: sonewconn(head) 120: register struct socket *head; 121: { 122: register struct socket *so; 123: register struct mbuf *m; 124: 125: if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2) 126: goto bad; 127: m = m_getclr(M_DONTWAIT, MT_SOCKET); 128: if (m == NULL) 129: goto bad; 130: so = mtod(m, struct socket *); 131: so->so_type = head->so_type; 132: so->so_options = head->so_options &~ SO_ACCEPTCONN; 133: so->so_linger = head->so_linger; 134: so->so_state = head->so_state | SS_NOFDREF; 135: so->so_proto = head->so_proto; 136: so->so_timeo = head->so_timeo; 137: so->so_pgrp = head->so_pgrp; 138: soqinsque(head, so, 0); 139: if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH, 140: (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0)) { 141: (void) soqremque(so, 0); 142: (void) m_free(m); 143: goto bad; 144: } 145: return (so); 146: bad: 147: return ((struct socket *)0); 148: } 149: 150: soqinsque(head, so, q) 151: register struct socket *head, *so; 152: int q; 153: { 154: 155: so->so_head = head; 156: if (q == 0) { 157: head->so_q0len++; 158: so->so_q0 = head->so_q0; 159: head->so_q0 = so; 160: } else { 161: head->so_qlen++; 162: so->so_q = head->so_q; 163: head->so_q = so; 164: } 165: } 166: 167: soqremque(so, q) 168: register struct socket *so; 169: int q; 170: { 171: register struct socket *head, *prev, *next; 172: 173: head = so->so_head; 174: prev = head; 175: for (;;) { 176: next = q ? prev->so_q : prev->so_q0; 177: if (next == so) 178: break; 179: if (next == head) 180: return (0); 181: prev = next; 182: } 183: if (q == 0) { 184: prev->so_q0 = next->so_q0; 185: head->so_q0len--; 186: } else { 187: prev->so_q = next->so_q; 188: head->so_qlen--; 189: } 190: next->so_q0 = next->so_q = 0; 191: next->so_head = 0; 192: return (1); 193: } 194: 195: /* 196: * Socantsendmore indicates that no more data will be sent on the 197: * socket; it would normally be applied to a socket when the user 198: * informs the system that no more data is to be sent, by the protocol 199: * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data 200: * will be received, and will normally be applied to the socket by a 201: * protocol when it detects that the peer will send no more data. 202: * Data queued for reading in the socket may yet be read. 203: */ 204: 205: socantsendmore(so) 206: struct socket *so; 207: { 208: 209: so->so_state |= SS_CANTSENDMORE; 210: sowwakeup(so); 211: } 212: 213: socantrcvmore(so) 214: struct socket *so; 215: { 216: 217: so->so_state |= SS_CANTRCVMORE; 218: sorwakeup(so); 219: } 220: 221: /* 222: * Socket select/wakeup routines. 223: */ 224: 225: /* 226: * Queue a process for a select on a socket buffer. 227: */ 228: sbselqueue(sb) 229: register struct sockbuf *sb; 230: { 231: register struct proc *p; 232: extern int selwait; 233: 234: if ((p = sb->sb_sel) && (caddr_t)mfkd(&p->p_wchan) == (caddr_t)&selwait) 235: sb->sb_flags |= SB_COLL; 236: else 237: sb->sb_sel = u.u_procp; 238: } 239: 240: /* 241: * Wait for data to arrive at/drain from a socket buffer. 242: */ 243: sbwait(sb) 244: register struct sockbuf *sb; 245: { 246: 247: sb->sb_flags |= SB_WAIT; 248: SLEEP((caddr_t)&sb->sb_cc, PZERO+1); 249: } 250: 251: /* 252: * Wakeup processes waiting on a socket buffer. 253: */ 254: sbwakeup(sb) 255: register struct sockbuf *sb; 256: { 257: 258: if (sb->sb_sel) { 259: SELWAKEUP(sb->sb_sel, (long)(sb->sb_flags & SB_COLL)); 260: sb->sb_sel = 0; 261: sb->sb_flags &= ~SB_COLL; 262: } 263: if (sb->sb_flags & SB_WAIT) { 264: sb->sb_flags &= ~SB_WAIT; 265: WAKEUP((caddr_t)&sb->sb_cc); 266: } 267: } 268: 269: /* 270: * Wakeup socket readers and writers. 271: * Do asynchronous notification via SIGIO 272: * if the socket has the SS_ASYNC flag set. 273: */ 274: sowakeup(so, sb) 275: register struct socket *so; 276: struct sockbuf *sb; 277: { 278: register struct proc *p; 279: 280: sbwakeup(sb); 281: if (so->so_state & SS_ASYNC) { 282: if (so->so_pgrp < 0) 283: GSIGNAL(-so->so_pgrp, SIGIO); 284: else if (so->so_pgrp > 0 && 285: (p = (struct proc *)NETPFIND(so->so_pgrp)) != 0) 286: NETPSIGNAL(p, SIGIO); 287: } 288: } 289: 290: /* 291: * Socket buffer (struct sockbuf) utility routines. 292: * 293: * Each socket contains two socket buffers: one for sending data and 294: * one for receiving data. Each buffer contains a queue of mbufs, 295: * information about the number of mbufs and amount of data in the 296: * queue, and other fields allowing select() statements and notification 297: * on data availability to be implemented. 298: * 299: * Data stored in a socket buffer is maintained as a list of records. 300: * Each record is a list of mbufs chained together with the m_next 301: * field. Records are chained together with the m_act field. The upper 302: * level routine soreceive() expects the following conventions to be 303: * observed when placing information in the receive buffer: 304: * 305: * 1. If the protocol requires each message be preceded by the sender's 306: * name, then a record containing that name must be present before 307: * any associated data (mbuf's must be of type MT_SONAME). 308: * 2. If the protocol supports the exchange of ``access rights'' (really 309: * just additional data associated with the message), and there are 310: * ``rights'' to be received, then a record containing this data 311: * should be present (mbuf's must be of type MT_RIGHTS). 312: * 3. If a name or rights record exists, then it must be followed by 313: * a data record, perhaps of zero length. 314: * 315: * Before using a new socket structure it is first necessary to reserve 316: * buffer space to the socket, by calling sbreserve(). This should commit 317: * some of the available buffer space in the system buffer pool for the 318: * socket (currently, it does nothing but enforce limits). The space 319: * should be released by calling sbrelease() when the socket is destroyed. 320: */ 321: 322: soreserve(so, sndcc, rcvcc) 323: register struct socket *so; 324: int sndcc, rcvcc; 325: { 326: 327: if (sbreserve(&so->so_snd, sndcc) == 0) 328: goto bad; 329: if (sbreserve(&so->so_rcv, rcvcc) == 0) 330: goto bad2; 331: return (0); 332: bad2: 333: sbrelease(&so->so_snd); 334: bad: 335: return (ENOBUFS); 336: } 337: 338: /* 339: * Allot mbufs to a sockbuf. 340: * Attempt to scale cc so that mbcnt doesn't become limiting 341: * if buffering efficiency is near the normal case. 342: */ 343: sbreserve(sb, cc) 344: struct sockbuf *sb; 345: { 346: 347: #ifdef FIX_43 348: if ((unsigned) cc > (unsigned)SB_MAX * CLBYTES / (2 * MSIZE + CLBYTES)) 349: return (0); 350: #else 351: if ((unsigned) cc > (unsigned)SB_MAX) 352: return (0); 353: #endif 354: sb->sb_hiwat = cc; 355: sb->sb_mbmax = MIN(cc * 2, SB_MAX); 356: return (1); 357: } 358: 359: /* 360: * Free mbufs held by a socket, and reserved mbuf space. 361: */ 362: sbrelease(sb) 363: struct sockbuf *sb; 364: { 365: 366: sbflush(sb); 367: sb->sb_hiwat = sb->sb_mbmax = 0; 368: } 369: 370: /* 371: * Routines to add and remove 372: * data from an mbuf queue. 373: * 374: * The routines sbappend() or sbappendrecord() are normally called to 375: * append new mbufs to a socket buffer, after checking that adequate 376: * space is available, comparing the function sbspace() with the amount 377: * of data to be added. sbappendrecord() differs from sbappend() in 378: * that data supplied is treated as the beginning of a new record. 379: * To place a sender's address, optional access rights, and data in a 380: * socket receive buffer, sbappendaddr() should be used. To place 381: * access rights and data in a socket receive buffer, sbappendrights() 382: * should be used. In either case, the new data begins a new record. 383: * Note that unlike sbappend() and sbappendrecord(), these routines check 384: * for the caller that there will be enough space to store the data. 385: * Each fails if there is not enough space, or if it cannot find mbufs 386: * to store additional information in. 387: * 388: * Reliable protocols may use the socket send buffer to hold data 389: * awaiting acknowledgement. Data is normally copied from a socket 390: * send buffer in a protocol with m_copy for output to a peer, 391: * and then removing the data from the socket buffer with sbdrop() 392: * or sbdroprecord() when the data is acknowledged by the peer. 393: */ 394: 395: /* 396: * Append mbuf chain m to the last record in the 397: * socket buffer sb. The additional space associated 398: * the mbuf chain is recorded in sb. Empty mbufs are 399: * discarded and mbufs are compacted where possible. 400: */ 401: sbappend(sb, m) 402: struct sockbuf *sb; 403: struct mbuf *m; 404: { 405: register struct mbuf *n; 406: 407: if (m == 0) 408: return; 409: if (n = sb->sb_mb) { 410: while (n->m_act) 411: n = n->m_act; 412: while (n->m_next) 413: n = n->m_next; 414: } 415: sbcompress(sb, m, n); 416: } 417: 418: /* 419: * As above, except the mbuf chain 420: * begins a new record. 421: */ 422: sbappendrecord(sb, m0) 423: register struct sockbuf *sb; 424: register struct mbuf *m0; 425: { 426: register struct mbuf *m; 427: 428: if (m0 == 0) 429: return; 430: if (m = sb->sb_mb) 431: while (m->m_act) 432: m = m->m_act; 433: /* 434: * Put the first mbuf on the queue. 435: * Note this permits zero length records. 436: */ 437: sballoc(sb, m0); 438: if (m) 439: m->m_act = m0; 440: else 441: sb->sb_mb = m0; 442: m = m0->m_next; 443: m0->m_next = 0; 444: sbcompress(sb, m, m0); 445: } 446: 447: /* 448: * Append address and data, and optionally, rights 449: * to the receive queue of a socket. Return 0 if 450: * no space in sockbuf or insufficient mbufs. 451: */ 452: sbappendaddr(sb, asa, m0, rights0) 453: register struct sockbuf *sb; 454: struct sockaddr *asa; 455: struct mbuf *rights0, *m0; 456: { 457: register struct mbuf *m, *n; 458: int space = sizeof (*asa); 459: 460: for (m = m0; m; m = m->m_next) 461: space += m->m_len; 462: if (rights0) 463: space += rights0->m_len; 464: if (space > sbspace(sb)) 465: return (0); 466: MGET(m, M_DONTWAIT, MT_SONAME); 467: if (m == 0) 468: return (0); 469: *mtod(m, struct sockaddr *) = *asa; 470: m->m_len = sizeof (*asa); 471: if (rights0 && rights0->m_len) { 472: m->m_next = m_copy(rights0, 0, rights0->m_len); 473: if (m->m_next == 0) { 474: m_freem(m); 475: return (0); 476: } 477: sballoc(sb, m->m_next); 478: } 479: sballoc(sb, m); 480: if (n = sb->sb_mb) { 481: while (n->m_act) 482: n = n->m_act; 483: n->m_act = m; 484: } else 485: sb->sb_mb = m; 486: if (m->m_next) 487: m = m->m_next; 488: if (m0) 489: sbcompress(sb, m0, m); 490: return (1); 491: } 492: 493: sbappendrights(sb, m0, rights) 494: struct sockbuf *sb; 495: struct mbuf *rights, *m0; 496: { 497: register struct mbuf *m, *n; 498: int space = 0; 499: 500: if (rights == 0) 501: panic("sbappendrights"); 502: for (m = m0; m; m = m->m_next) 503: space += m->m_len; 504: space += rights->m_len; 505: if (space > sbspace(sb)) 506: return (0); 507: m = m_copy(rights, 0, rights->m_len); 508: if (m == 0) 509: return (0); 510: sballoc(sb, m); 511: if (n = sb->sb_mb) { 512: while (n->m_act) 513: n = n->m_act; 514: n->m_act = m; 515: } else 516: sb->sb_mb = m; 517: if (m0) 518: sbcompress(sb, m0, m); 519: return (1); 520: } 521: 522: /* 523: * Compress mbuf chain m into the socket 524: * buffer sb following mbuf n. If n 525: * is null, the buffer is presumed empty. 526: */ 527: sbcompress(sb, m, n) 528: register struct sockbuf *sb; 529: register struct mbuf *m, *n; 530: { 531: 532: while (m) { 533: if (m->m_len == 0) { 534: m = m_free(m); 535: continue; 536: } 537: if (n && n->m_off <= MMAXOFF && m->m_off <= MMAXOFF && 538: (n->m_off + n->m_len + m->m_len) <= MMAXOFF && 539: n->m_type == m->m_type) { 540: bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len, 541: (unsigned)m->m_len); 542: n->m_len += m->m_len; 543: sb->sb_cc += m->m_len; 544: m = m_free(m); 545: continue; 546: } 547: sballoc(sb, m); 548: if (n) 549: n->m_next = m; 550: else 551: sb->sb_mb = m; 552: n = m; 553: m = m->m_next; 554: n->m_next = 0; 555: } 556: } 557: 558: /* 559: * Free all mbufs in a sockbuf. 560: * Check that all resources are reclaimed. 561: */ 562: sbflush(sb) 563: register struct sockbuf *sb; 564: { 565: 566: if (sb->sb_flags & SB_LOCK) 567: panic("sbflush"); 568: while (sb->sb_mbcnt) 569: sbdrop(sb, (int)sb->sb_cc); 570: if (sb->sb_cc || sb->sb_mbcnt || sb->sb_mb) 571: panic("sbflush 2"); 572: } 573: 574: /* 575: * Drop data from (the front of) a sockbuf. 576: */ 577: sbdrop(sb, len) 578: register struct sockbuf *sb; 579: register int len; 580: { 581: register struct mbuf *m, *mn; 582: struct mbuf *next; 583: 584: next = (m = sb->sb_mb) ? m->m_act : 0; 585: while (len > 0) { 586: if (m == 0) { 587: if (next == 0) 588: panic("sbdrop"); 589: m = next; 590: next = m->m_act; 591: continue; 592: } 593: if (m->m_len > len) { 594: m->m_len -= len; 595: m->m_off += len; 596: sb->sb_cc -= len; 597: break; 598: } 599: len -= m->m_len; 600: sbfree(sb, m); 601: MFREE(m, mn); 602: m = mn; 603: } 604: while (m && m->m_len == 0) { 605: sbfree(sb, m); 606: MFREE(m, mn); 607: m = mn; 608: } 609: if (m) { 610: sb->sb_mb = m; 611: m->m_act = next; 612: } else 613: sb->sb_mb = next; 614: } 615: 616: /* 617: * Drop a record off the front of a sockbuf 618: * and move the next record to the front. 619: */ 620: sbdroprecord(sb) 621: register struct sockbuf *sb; 622: { 623: register struct mbuf *m, *mn; 624: 625: m = sb->sb_mb; 626: if (m) { 627: sb->sb_mb = m->m_act; 628: do { 629: sbfree(sb, m); 630: MFREE(m, mn); 631: } while (m = mn); 632: } 633: }