1: /*
2: * header.c - header functions plus some other goodies
3: *
4: * TAKEN FROM BNEWS 2.10 6/24/83
5: *
6: */
7:
8: #ifdef RCSIDENT
9: static char *SccsId = "@(#)header.c 2.20 6/24/83";
10: static char *RCSid = "$Header: /usr/local/src/usenet/notes/src/RCS/bnewshead.c,v 2.20 88/10/03 15:34:46 paul Exp Locker: paul $";
11: #endif RCSIDENT
12:
13: #include <stdio.h>
14: #include <sys/types.h>
15: #include "parms.h" /* from notes */
16: #ifdef SYSLOG
17: # include <syslog.h>
18: #endif SYSLOG
19: #include "structs.h" /* ditto */
20: /* above maybe unused */
21: #include "newsgate.h"
22:
23:
24: char *hfgets ();
25:
26: static int seenrelay;
27: static char bfr[PATHLEN]; /* header buffer */
28:
29: /*
30: * Read header from file fp into *hp. If wholething is FALSE,
31: * it's an incremental read, otherwise start from scratch.
32: * Return (FILE *) if header okay, else NULL.
33: */
34:
35: (hp, fp, wholething)
36: register struct hbuf *hp;
37: FILE * fp;
38: int wholething;
39: {
40: register int len;
41:
42: if (wholething) /* from scratch */
43: bclear ((char *) hp, sizeof (*hp));
44:
45: seenrelay = 0;
46:
47: /*
48: * Check that it's a B news style header.
49: */
50: if (((hfgets (bfr, PATHLEN, fp) != NULL &&
51: *bfr >= 'A' && *bfr <= 'Z') && index (bfr, ':')))
52: if (frmread (fp, hp))
53: goto strip;
54:
55: /*
56: * It's not. Try A news (begins with PROTO).
57: */
58: if (*bfr != PROTO)
59: return (0);
60:
61: /*
62: * Read in an A news format article.
63: */
64: strncpy (hp -> oident, &(bfr[1]), NAMELEN); /* file name */
65: if (!nstrip (hp -> oident))
66: return (0);
67: hfgets (hp -> nbuf, BUFLEN, fp); /* newsgroup list */
68: if (!nstrip (hp -> nbuf))
69: return (0);
70: ngcat (hp -> nbuf); /* trailing delim */
71: hfgets (hp -> path, PATHLEN, fp); /* source path */
72: if (!nstrip (hp -> path))
73: return (0);
74: hfgets (hp -> subdate, DATELEN, fp); /* date */
75: if (!nstrip (hp -> subdate))
76: return (0);
77: hfgets (hp -> title, BUFLEN, fp); /* title */
78: if (!nstrip (hp -> title))
79: return (0);
80:
81: /*
82: * strip off sys! from front of path.
83: */
84: :
85: strcpy (bfr, System);
86: if (strncmp (bfr, hp -> path, (len = strlen (bfr))) == 0
87: && index (NETCHRS, hp -> path[len]))
88: strcpy (hp -> path, &(hp -> path[len + 1]));
89:
90: if (wholething && hp -> from[0] == '\0') /* intuit the from: */
91: intuitfrom (hp); /* if wasn't there */
92:
93: if (wholething) /* Get message ID's. */
94: fixid (hp);
95: return (1);
96: }
97:
98:
99: /*
100: * Get header info from mail-format file.
101: * Return non-zero on success.
102: */
103:
104: #include <ctype.h>
105: #define FROM 1
106: #define NEWSGROUP 2
107: #define TITLE 3
108: #define SUBMIT 4
109: #define RECEIVE 5
110: #define EXPIRE 6
111: #define ARTICLEID 7
112: #define MESSAGEID 8
113: #define REPLYTO 9
114: #define FOLLOWID 10
115: #define CONTROL 11
116: #define SENDER 12
117: #define FOLLOWTO 13
118: #define PATH 14
119: #define POSTVERSION 15
120: #define RELAYVERSION 16
121: #define DISTRIBUTION 17
122: #define ORGANIZATION 18
123: #define NUMLINES 19
124: #define KEYWORDS 20
125: #define APPROVED 21
126:
127: #define NLINE1 22
128: #define NLINE2 23
129:
130: #define OTHER 99
131:
132:
133: char *malloc ();
134:
135: frmread (fp, hp)
136: register FILE * fp;
137: register struct hbuf *hp;
138: {
139: int unreccnt = 0;
140: register int i;
141: long curpos;
142: int hdrlineno = 0;
143: int iu;
144:
145: for (iu = 0; iu < NUNREC; iu++)
146: hp -> unrec[iu] = NULL;
147:
148: i = type (bfr);
149: do
150: {
151: curpos = ftell (fp);
152: hdrlineno++;
153: switch (i)
154: {
155: case PATH:
156: getfield (hp -> path);
157: break;
158: case FROM:
159: getfield (hp -> from);
160: break;
161: case NEWSGROUP:
162: getfield (hp -> nbuf);
163: break;
164: case TITLE:
165: getfield (hp -> title);
166: break;
167: case SUBMIT:
168: getfield (hp -> subdate);
169: break;
170: case RECEIVE:
171: getfield (hp -> recdate);
172: break;
173: case EXPIRE:
174: getfield (hp -> expdate);
175: break;
176: case ARTICLEID:
177: getfield (hp -> oident);
178: break;
179: case MESSAGEID:
180: getfield (hp -> ident);
181: break;
182: case REPLYTO:
183: getfield (hp -> replyto);
184: break;
185: case FOLLOWID:
186: getfield (hp -> followid);
187: break;
188: case SENDER:
189: getfield (hp -> sender);
190: break;
191: case FOLLOWTO:
192: getfield (hp -> followto);
193: break;
194: case CONTROL:
195: getfield (hp -> ctlmsg);
196: break;
197: case POSTVERSION:
198: getfield (hp -> postversion);
199: break;
200: case DISTRIBUTION:
201: getfield (hp -> distribution);
202: break;
203: case ORGANIZATION:
204: getfield (hp -> organization);
205: break;
206: case NUMLINES:
207: getfield (hp -> numlines);
208: hp -> intnumlines = atoi (hp -> numlines);
209: break;
210: case KEYWORDS:
211: getfield (hp -> keywords);
212: break;
213: case APPROVED:
214: getfield (hp -> approved);
215: break;
216: case NLINE1: /* notes-specific */
217: getfield (hp -> nline1);
218: break;
219: case NLINE2: /* notes-specific */
220: getfield (hp -> nline2);
221: break;
222: case RELAYVERSION:
223: /*
224: * Only believe a relay version if it's the first
225: * line, otherwise it probably got passed through
226: * by some old neighbor.
227: */
228: if (hdrlineno == 1)
229: {
230: getfield (hp -> relayversion);
231: seenrelay = 1;
232: }
233: break;
234: case OTHER:
235: if (unreccnt < NUNREC)
236: {
237: hp -> unrec[unreccnt] = malloc (strlen (bfr) + 1);
238: strcpy (hp -> unrec[unreccnt], bfr);
239: unreccnt++;
240: }
241: break;
242: }
243: } while ((i = type (hfgets (bfr, LBUFLEN, fp))) > 0);
244:
245: if (*bfr != '\n')
246: {
247: #ifdef SYSLOG
248: syslog (LOG_INFO, "Bizzaro header line: %s\n", bfr);
249: #else
250: printf ("Bizzaro header line: %s\n", bfr);
251: #endif SYSLOG
252: return (0);
253: }
254:
255: /*
256: * Check to see if the REQUIRED headers are present. If so, return
257: * that we found a message. Otherwise barf.
258: */
259: if ((hp -> from[0] || hp -> path[0]) &&
260: hp -> subdate[0] &&
261: (hp -> ident[0] || hp -> oident[0]))
262: {
263: return TRUE;
264: }
265: return FALSE;
266: }
267:
268: /*
269: * There was no From: line in the message (because it was generated by
270: * an old news program). Guess what it should have been and create it.
271: */
272:
273: intuitfrom (hp)
274: register struct hbuf *hp;
275: {
276: char *tp;
277: char *user,
278: *host,
279: *fullname;
280: char *tailpath ();
281: char *at,
282: *dot;
283:
284: tp = tailpath (hp);
285: user = rindex (tp, '!');
286: if (user == NULL)
287: user = tp;
288: else
289: *user++ = '\0';
290:
291: /* Check for an existing Internet address on the end. */
292: at = index (user, '@');
293: if (at)
294: {
295: dot = index (at, '.');
296: if (dot)
297: {
298: strcpy (hp -> from, user);
299: return;
300: }
301: /* @ signs are illegal except for the biggie, so */
302: *at = '%';
303: }
304:
305: if (tp[0] == '.')
306: host = index (tp, '!') + 1;
307: else
308: if (user == tp)
309: host = System;
310: else
311: host = tp;
312:
313: tp = index (host, '@');
314: if (tp != NULL)
315: *tp = 0;
316: sprintf (hp -> from, "%s@%s.%s", user, host, DFLTDOMAIN);
317:
318: fullname = index (hp -> path, '(');
319: if (fullname != NULL)
320: {
321: fullname--;
322: strcat (hp -> from, fullname);
323: *fullname = 0;
324: }
325: }
326:
327: /*
328: * If the message has only one of ident/oident, guess what
329: * the other one should be and fill them both in.
330: */
331:
332: fixid (hp)
333: register struct hbuf *hp;
334: {
335: char lbuf[100];
336: char *p;
337: #ifdef OLD
338: char *q;
339: #endif OLD
340:
341: if (hp -> ident[0] == '\0' && hp -> oident[0] != '\0')
342: {
343: strcpy (lbuf, hp -> oident);
344: p = index (lbuf, '.');
345: if (p == 0)
346: {
347: strcpy (hp -> ident, hp -> oident);
348: return;
349: }
350: *p++ = '\0';
351: /*
352: * It may seem strange that we hardwire ".UUCP" in
353: * here instead of DFLTDOMAIN. However, we are trying
354: * to guess what the domain was on the posting system,
355: * not the local system. Since we don't really know
356: * what the posting system does, we just go with the
357: * majority - almost everyone will be a .UUCP if they
358: * didn't fill in their Message-ID.
359: */
360: sprintf (hp -> ident, "<%s@%s%s>", p, lbuf, ".UUCP");
361: }
362:
363: #ifdef OLD
364: if (hp -> oident[0] == '\0' && hp -> ident[0] != '\0')
365: {
366: strcpy (lbuf, hp -> ident);
367: p = index (lbuf, '@');
368: if (p == 0)
369: {
370: strcpy (hp -> oident, hp -> ident);
371: return;
372: }
373: *p++ = '\0';
374: q = index (p, '.');
375: if (!q)
376: q = index (p, '>');
377: if (q)
378: *q++ = '\0';
379: p[SNLN] = '\0';
380: sprintf (hp -> oident, "%s.%s", p, lbuf + 1);
381: }
382: #endif
383: }
384:
385: /*
386: * Get the given field of a header (char * parm) from bfr, but only
387: * if there's something actually there (after the colon). Don't
388: * bother if we already have an entry for this field.
389: */
390:
391: getfield (hpfield)
392: char *hpfield;
393: {
394: char *ptr;
395:
396: if (hpfield[0])
397: return;
398: for (ptr = index (bfr, ':'); isspace (*++ptr);)
399: ;
400: if (*ptr != '\0')
401: {
402: strcpy (hpfield, ptr);
403: nstrip (hpfield);
404: }
405: return;
406: }
407:
408:
409: /*
410: * Determine the type of the header
411: */
412:
413: #define its(type) (!strncmp(ptr,type,strlen(type)))
414:
415: type (ptr)
416: char *ptr;
417: {
418: char *colon,
419: *space;
420:
421: if (!isalpha (*ptr) && strncmp (ptr, "From ", 5))
422: return FALSE;
423: colon = index (ptr, ':');
424: space = index (ptr, ' ');
425: if (!colon || colon + 1 != space)
426: return FALSE;
427: if (its ("From: "))
428: if (index (ptr, '@') && !index (ptr, '!'))
429: return FROM;
430: else
431: return PATH;
432: if (its ("Path: "))
433: return PATH;
434: if (its ("Newsgroups: "))
435: return NEWSGROUP;
436: if (its ("Subject: ") || its ("Title: "))
437: return TITLE;
438: if (its ("Posted: ") || its ("Date: "))
439: return SUBMIT;
440: if (its ("Date-Received: ") || its ("Received: "))
441: return RECEIVE;
442: if (its ("Expires: "))
443: return EXPIRE;
444: if (its ("Article-I.D.: "))
445: return ARTICLEID;
446: if (its ("Message-ID: "))
447: return MESSAGEID;
448: if (its ("Reply-To: "))
449: return REPLYTO;
450: if (its ("References: "))
451: return FOLLOWID;
452: if (its ("Control: "))
453: return CONTROL;
454: if (its ("Sender: "))
455: return SENDER;
456: if (its ("Followup-To: "))
457: return FOLLOWTO;
458: if (its ("Posting-Version: "))
459: return POSTVERSION;
460: if (its ("Relay-Version: "))
461: return RELAYVERSION;
462: if (its ("Distribution: "))
463: return DISTRIBUTION;
464: if (its ("Organization: "))
465: return ORGANIZATION;
466: if (its ("Lines: "))
467: return NUMLINES;
468: if (its ("Keywords: "))
469: return KEYWORDS;
470: if (its ("Approved: "))
471: return APPROVED;
472: if (its ("Nf-ID: "))
473: return NLINE1;
474: if (its ("Nf-From: "))
475: return NLINE2;
476: return OTHER;
477: }
478:
479: /*
480: * Set nc bytes, starting at cp, to zero.
481: */
482:
483: bclear (cp, nc)
484: register char *cp;
485: register int nc;
486: {
487: while (nc--)
488: *cp++ = 0;
489: }
490:
491: /*
492: * Strip trailing newlines, blanks, and tabs from 's'.
493: * Return TRUE if newline was found, else FALSE.
494: */
495:
496: nstrip (s)
497: register char *s;
498: {
499: register char *p;
500: register int rc;
501:
502: rc = FALSE;
503: p = s;
504: while (*p)
505: if (*p++ == '\n')
506: rc = TRUE;
507: while (--p >= s && (*p == '\n' || *p == ' ' || *p == '\t'));
508: *++p = '\0';
509: return (rc);
510: }
511:
512: /*
513: * Append NGDELIM to string.
514: */
515:
516: ngcat (s)
517: register char *s;
518: {
519: if (*s)
520: {
521: while (*s++);
522: s -= 2;
523: if (*s++ == NGDELIM)
524: return;
525: }
526: *s++ = NGDELIM;
527: *s = '\0';
528: }
529:
530: /*
531: * Return a compact representation of the person who posted the given
532: * message. A sender or internet name will be used, otherwise
533: * the last part of the path is used preceeded by an optional ".."
534: */
535: char *
536: tailpath (hp)
537: struct hbuf *hp;
538: {
539: char *p,
540: *r;
541: static char resultbuf[BUFLEN];
542: char pathbuf[PATHLEN];
543: char *malloc ();
544:
545: /*
546: * This only happens for articles posted by old news software
547: * in non-internet format.
548: */
549: resultbuf[0] = '\0';
550: strcpy (pathbuf, hp -> path);
551: p = index (pathbuf, ' ');
552: if (p)
553: *p = '\0'; /* Chop off trailing " (name)" */
554: r = rindex (pathbuf, '!');
555: if (r == 0)
556: {
557: r = pathbuf;
558: }
559: else
560: {
561: while (r > pathbuf && *--r != '!')
562: ;
563: if (r > pathbuf)
564: {
565: r++;
566: strcpy (resultbuf, "..!");
567: }
568: }
569: strcat (resultbuf, r);
570: return resultbuf;
571: }
572:
573:
574:
575: /*
576: * hfgets is like fgets, but deals with continuation lines.
577: * It also ensures that even if a line that is too long is
578: * received, the remainder of the line is thrown away
579: * instead of treated like a second line.
580: */
581:
582: char *hfgets (buf, len, fp)
583: char *buf;
584: int len;
585: FILE * fp;
586: {
587: register int c;
588: register char *cp,
589: *tp;
590:
591: cp = fgets (buf, len, fp);
592: if (cp == NULL)
593: return NULL;
594:
595: tp = cp + strlen (cp);
596: if (tp[-1] != '\n')
597: {
598: /*
599: * Line too long - part read didn't fit into a newline
600: */
601: while ((c = getc (fp)) != '\n' && c != EOF)
602: ;
603: }
604: else
605: *--tp = '\0'; /* clobber newline */
606:
607: while ((c = getc (fp)) == ' ' || c == '\t') /* continuation */
608: {
609: /*
610: * Continuation line.
611: */
612: while ((c = getc (fp)) == ' ' || c == '\t') /* skip white space */
613: ;
614: if (tp - cp < len)
615: {
616: *tp++ = ' ';
617: *tp++ = c;
618: }
619: while ((c = getc (fp)) != '\n' && c != EOF)
620: if (tp - cp < len)
621: *tp++ = c;
622: }
623: *tp++ = '\n';
624: *tp++ = '\0';
625: if (c != EOF)
626: ungetc (c, fp); /* push back char */
627: return cp;
628: }