/* $Revision$ ** ** Rebuild overview databases of INN 2.1/2.2. ** ** This tool acts as follows: ** - it reads history file by one line and/or scans news spool through ** SAPI; ** - gets SAPI token; ** - fetchies the article; ** - parses out overview data from article Xref: header field; ** - puts overview info into unified overview db; ** - puts Xref: information into overview index file (to be later fed to ** expireindex tool to re-create overview indexes) ** - re-builds SAPI token; ** - puts SAPI token into new history file; ** and so on many times. ** ** Written by Ilya Etingof , 2000 ** ** THIS TOOL IS NOT WELL TESTED! USE IT AT YOUR OWN RISK! ** It should not act in a harmful way, though. It never modifies any live ** INN files. ** */ #include #include #include "configdata.h" #include "clibrary.h" #include #include #if defined(DO_NEED_TIME) #include #endif /* defined(DO_NEED_TIME) */ #include #include #include "paths.h" #include "libinn.h" #include "inndcomm.h" #include "dbz.h" #include "storage.h" #include "qio.h" #include "macros.h" #include "mydir.h" #include #include "../innd/art.h" /* ** The header table. Not necessarily sorted, but the first character ** must be uppercase. */ ARTHEADER ARTheaders[] = { /* Name Type ... */ { "Approved", HTstd }, #define _approved 0 { "Control", HTstd }, #define _control 1 { "Date", HTreq }, #define _date 2 { "Distribution", HTstd }, #define _distribution 3 { "Expires", HTstd }, #define _expires 4 { "From", HTreq }, #define _from 5 { "Lines", HTstd }, #define _lines 6 { "Message-ID", HTreq }, #define _message_id 7 { "Newsgroups", HTreq }, #define _newsgroups 8 { "Path", HTreq }, #define _path 9 { "Reply-To", HTstd }, #define _reply_to 10 { "Sender", HTstd }, #define _sender 11 { "Subject", HTreq }, #define _subject 12 { "Supersedes", HTstd }, #define _supersedes 13 { "Bytes", HTstd }, #define _bytes 14 { "Also-Control", HTstd }, #define _alsocontrol 15 { "References", HTstd }, #define _references 16 { "Xref", HTsav }, #define _xref 17 { "Keywords", HTstd }, #define _keywords 18 { "X-Trace", HTstd }, #define _xtrace 19 { "Date-Received", HTobs }, #define _date_received 20 { "Posted", HTobs }, #define _posted 21 { "Posting-Version", HTobs }, { "Received", HTobs }, { "Relay-Version", HTobs }, { "NNTP-Posting-Host", HTstd }, { "Followup-To", HTstd }, { "Organization", HTstd }, { "Content-Type", HTstd }, { "Content-Base", HTstd }, { "Content-Disposition", HTstd }, { "X-Newsreader", HTstd }, { "X-Mailer", HTstd }, { "X-Newsposter", HTstd }, { "X-Cancelled-By", HTstd }, { "X-Canceled-By", HTstd }, { "Cancel-Key", HTstd }, }; ARTHEADER *ARTheadersENDOF = ENDOF(ARTheaders); typedef struct _BUFFER { long Size; long Used; long Left; char *Data; } BUFFER; /* ** For speed we build a binary tree of the headers, sorted by their ** name. We also store the header's Name fields in the tree to avoid ** doing an extra indirection. */ typedef struct _TREE { STRING Name; ARTHEADER *Header; struct _TREE *Before; struct _TREE *After; } TREE; STATIC TREE *ARTheadertree; /* ** For doing the overview database, we keep a list of the headers and ** a flag saying if they're written in brief or full format. */ typedef struct _ARTOVERFIELD { ARTHEADER *Header; BOOL NeedHeader; } ARTOVERFIELD; STATIC ARTOVERFIELD *ARTfields; STATIC char *ACTIVE = NULL; STATIC char *HISTORYDIR; STATIC char *HISTORY = NULL; STATIC char *TextFile, *TempTextFile; STATIC TIMEINFO Now; STATIC char *SCHEMA = NULL; STATIC char *IndexFile; STATIC BOOL OVERmmap; STATIC char *OverPath; STATIC int Verbose = FALSE; STATIC int Target; #define CRLFSIZE 2 #define TARGET_HISTORY 0x01 #define TARGET_SPOOL 0x02 /* ** Add data to buffer */ void BUFFset(BUFFER *bp, const char *p, const int length) { if ((bp->Left = length) != 0) { /* Need more space? */ if (bp->Size < length) { bp->Size = length; RENEW(bp->Data, char, bp->Size); } /* Try to test for non-overlapping copies. */ memmove((POINTER)bp->Data, (POINTER)p, (SIZE_T)length); } bp->Used = 0; } /* ** Append data to buffer */ void BUFFappend(BUFFER *bp, const char *p, const int len) { int i; if (len == 0) return; /* Note end of buffer, grow it if we need more room */ i = bp->Used + bp->Left; if (i + len > bp->Size) { /* Round size up to next 1K */ bp-> Size += (len + 0x3FF) & ~0x3FF; RENEW(bp->Data, char, bp->Size); } bp->Left += len; memcpy((POINTER)&bp->Data[i], (POINTER)p, len); } /* ** Check and parse an date header line. Return the new value or ** zero on error. */ static long GetaDate(char *p) { time_t t; while (ISWHITE(*p)) p++; if ((t = parsedate(p, &Now)) == -1) return 0L; return (long)t; } /* ** */ STATIC BOOL HISopen() { /* Open the history file, do the lookup. */ if (!dbzinit(TextFile)) { (void)fprintf(stderr, "Can't open history database, %s\n", strerror(errno)); return FALSE; } return TRUE; } /* ** */ STATIC BOOL HISlookup(const HASH key) { idxrec ionevalue; idxrecext iextvalue; /* Not found. */ #ifdef DO_TAGGED_HASH if (dbzfetch(key) < 0) { return FALSE; } #else if (innconf->extendeddbz) { if (!dbzfetch(key, &iextvalue)) { return FALSE; } } else { if (!dbzfetch(key, &ionevalue)) { return FALSE; } } return TRUE; #endif } /* ** */ STATIC BOOL HISclose() { /* Close the history file */ if (!dbzclose()) { (void)fprintf(stderr, "Can't close history database, %s\n", strerror(errno)); return FALSE; } return TRUE; } /* ** Turn any \r or \n in text into spaces. Used to splice back multi-line ** headers into a single line. */ STATIC char * Join(text) register char *text; { register char *p; for (p = text; *p; p++) if (*p == '\n' || *p == '\r') *p = ' '; return text; } /* ** Return a short name that won't overrun our bufer or syslog's buffer. ** q should either be p, or point into p where the "interesting" part is. */ char * MaxLength(p, q) char *p; char *q; { static char buff[80]; register int i; /* Already short enough? */ i = strlen(p); if (i < sizeof buff - 1) return Join(p); /* Simple case of just want the begining? */ if (q - p < sizeof buff - 4) { (void)strncpy(buff, p, sizeof buff - 4); (void)strcpy(&buff[sizeof buff - 4], "..."); } /* Is getting last 10 characters good enough? */ else if ((p + i) - q < 10) { (void)strncpy(buff, p, sizeof buff - 14); (void)strcpy(&buff[sizeof buff - 14], "..."); (void)strcpy(&buff[sizeof buff - 11], &p[i - 10]); } else { /* Not in last 10 bytes, so use double elipses. */ (void)strncpy(buff, p, sizeof buff - 17); (void)strcpy(&buff[sizeof buff - 17], "..."); (void)strncpy(&buff[sizeof buff - 14], &q[-5], 10); (void)strcpy(&buff[sizeof buff - 4], "..."); } return Join(buff); } /* ** Build a balanced tree for the headers in subscript range [lo..hi). ** This only gets called once, and the tree only has about 20 entries, ** so we don't bother to unroll the recursion. */ static TREE * ARTbuildtree(Table, lo, hi) ARTHEADER **Table; int lo; int hi; { int mid; TREE *tp; mid = lo + (hi - lo) / 2; tp = NEW(TREE, 1); tp->Header = Table[mid]; tp->Name = tp->Header->Name; if (mid == lo) tp->Before = NULL; else tp->Before = ARTbuildtree(Table, lo, mid); if (mid == hi - 1) tp->After = NULL; else tp->After = ARTbuildtree(Table, mid + 1, hi); return tp; } /* ** Sorting predicate for qsort call in ARTsetup. */ STATIC int ARTcompare(p1, p2) CPOINTER p1; CPOINTER p2; { ARTHEADER **h1; ARTHEADER **h2; h1 = CAST(ARTHEADER**, p1); h2 = CAST(ARTHEADER**, p2); return strcasecmp(h1[0]->Name, h2[0]->Name); } /* ** Load overview scheme */ BOOL ARTreadschema() { static char *SCHEMA = NULL; FILE *F; int i; char *p; ARTOVERFIELD *fp; ARTHEADER *hp; BOOL ok; char buff[SMBUF]; /* Dispose previous article headers */ if (ARTfields != NULL) { DISPOSE(ARTfields); ARTfields = NULL; } /* Open file, count lines. */ if (SCHEMA == NULL) SCHEMA = COPY(cpcatpath(innconf->pathetc, _PATH_SCHEMA)); /* Open overview scheme */ if ((F = Fopen(SCHEMA, "r", TEMPORARYOPEN)) == NULL) return FALSE; /* Count number of fields */ for (i = 0; fgets(buff, sizeof buff, F) != NULL; i++) continue; (void)fseek(F, (OFFSET_T)0, SEEK_SET); /* Allocate storage for overview fields */ ARTfields = NEW(ARTOVERFIELD, i + 1); /* Parse each field. */ for (ok = TRUE, fp = ARTfields; fgets(buff, sizeof buff, F) != NULL; ) { /* Ignore blank and comment lines. */ if ((p = strchr(buff, '\n')) != NULL) *p = '\0'; if ((p = strchr(buff, COMMENT_CHAR)) != NULL) *p = '\0'; if (buff[0] == '\0') continue; if ((p = strchr(buff, ':')) != NULL) { *p++ = '\0'; fp->NeedHeader = EQ(p, "full"); } else fp->NeedHeader = FALSE; for (hp = ARTheaders; hp < ENDOF(ARTheaders); hp++) if (EQ(buff, hp->Name)) { fp->Header = hp; break; } if (hp == ENDOF(ARTheaders)) { syslog(L_ERROR, "bad_schema unknown header \"%s\"", buff); ok = FALSE; continue; } fp++; } fp->Header = NULL; (void)Fclose(F); return ok; } /* ** Setup the article processing. */ void ARTsetup(void) { ARTHEADER *hp; ARTHEADER **table; int i; /* Allocate space in the header table. */ for (hp = ARTheaders; hp < ENDOF(ARTheaders); hp++) { hp->Size = strlen(hp->Name); hp->Allocated = hp->Value == NULL; /* && hp->Type != HTobs; */ if (hp->Allocated) hp->Value = NEW(char, MAXHEADERSIZE*2); } /* Build the header tree. */ table = NEW(ARTHEADER*, SIZEOF(ARTheaders)); for (i = 0; i < SIZEOF(ARTheaders); i++) table[i] = &ARTheaders[i]; qsort((POINTER)table, SIZEOF(ARTheaders), sizeof *table, ARTcompare); ARTheadertree = ARTbuildtree(table, 0, SIZEOF(ARTheaders)); DISPOSE(table); /* Read overview scheme */ ARTreadschema(); } /* */ STATIC void ARTfreetree(tp) TREE *tp; { TREE *next; for ( ; tp != NULL; tp = next) { if (tp->Before) ARTfreetree(tp->Before); next = tp->After; DISPOSE(tp); } } void ARTclose(void) { ARTHEADER *hp; /* Free space in the header table. */ for (hp = ARTheaders; hp < ENDOF(ARTheaders); hp++) if (hp->Allocated) DISPOSE(hp->Value); if (ARTfields != NULL) { DISPOSE(ARTfields); ARTfields = NULL; } ARTfreetree(ARTheadertree); } /* ** Parse a header that starts at in, copying it to out. Return pointer to ** the start of the next header and fill in *deltap with what should ** get added to the output pointer. (This nicely lets us clobber obsolete ** headers by setting it to zero.) */ STATIC char *ARTparseheader(char *in, char *out, int *deltap, STRING *errorp) { static char buff[SMBUF]; static char COLONSPACE[] = "No colon-space in \"%s\" header"; char *start; TREE *tp; ARTHEADER *hp; char c; char *p; int i; char *colon; /* Find a non-continuation line. */ for (colon = NULL, start = out; ; ) { switch (*in) { case '\0': *errorp = "EOF in headers"; return NULL; case ':': if (colon == NULL) { colon = out; if (start == colon) { *errorp = "Field without name in header"; return NULL; } } break; } if ((*out++ = *in++) == '\n' && !ISWHITE(*in)) break; } *deltap = out - start; if (colon == NULL || !ISWHITE(colon[1])) { if ((p = strchr(start, '\n')) != NULL) *p = '\0'; (void)sprintf(buff, COLONSPACE, MaxLength(start, start)); *errorp = buff; return NULL; } /* See if this is a system header. A fairly tightly-coded * binary search. */ c = CTYPE(islower, *start) ? toupper(*start) : *start; for (*colon = '\0', tp = ARTheadertree; tp; ) { if ((i = c - tp->Name[0]) == 0 && (i = strcasecmp(start, tp->Name)) == 0) break; if (i < 0) tp = tp->Before; else tp = tp->After; } *colon = ':'; if (tp == NULL) { /* Not a system header, make sure we have . */ for (p = colon; --p > start; ) if (ISWHITE(*p)) { (void)sprintf(buff, "Space before colon in \"%s\" header", MaxLength(start, start)); *errorp = buff; return NULL; } if (p < start) return NULL; return in; } /* Found a known header; is it obsolete? */ hp = tp->Header; if (hp->Type == HTobs) { *deltap = 0; return in; } if (hp->Type == HTsav) { *deltap = 0; } /* If body of header is all blanks, drop the header. */ for (p = colon + 1; ISWHITE(*p); p++) continue; if (*p == '\0' || *p == '\n' || (p[0] == '\r' && p[1] == '\n')) { *deltap = 0; return in; } hp->Found++; /* Zap in the canonical form of the header, undoing the \0 that * strcpy put out (strncpy() spec isn't trustable, unfortunately). */ (void)strcpy(start, hp->Name); start[hp->Size] = ':'; /* Copy the header if not too big. */ i = (out - 1 - (innconf->wireformat == TRUE)) - p; if (i >= MAXHEADERSIZE) { (void)sprintf(buff, "\"%s\" header too long", hp->Name); *errorp = buff; return NULL; } hp->Length = i; (void)memcpy((POINTER)hp->Value, (POINTER)p, (SIZE_T)i); hp->Value[i] = '\0'; return in; } /* ** Parse article, fill ARTheader's. ** Return NULL if the article is okay, or a string describing the error. */ STATIC STRING ARTclean(char *Article, long ArticleLen) { static char buff[SMBUF]; ARTHEADER *hp; char *in; char *out; char *p; STRING error; unsigned long lines = 0L; int delta; /* Read through the headers one at a time. */ for (hp = ARTheaders; hp < ENDOF(ARTheaders); hp++) { if (hp->Value && hp->Type != HTobs) *hp->Value = '\0'; hp->Found = 0; } for (error = NULL, in = out = Article; ; out += delta, in = p) { if (*in == '\0') { error = "No body"; break; } /* Count this line */ lines++; if (((*in == '\n' || (in[0] == '\r' && in[1] == '\n')) && out > Article && out[-1] == '\n')) /* Found the header separator; break out. */ break; /* Check the validity of this header. */ if ((p = ARTparseheader(in, out, &delta, &error)) == NULL) break; /* Check against overflow */ if (out+delta-Article > ArticleLen) { /* Reached the end of article, break out */ error = "Short article"; break; } } in++; /* Try to set this now, so we can report it in errors. */ if (error) return error; /* Make sure all the headers we need are there, and no duplicates. */ for (hp = ARTheaders; hp < ENDOF(ARTheaders); hp++) { if (hp->Type == HTreq) { if (*hp->Value == '\0') { (void)sprintf(buff, "Missing \"%s\" header", hp->Name); return buff; } if (hp->Found > 1) { (void)sprintf(buff, "Duplicate \"%s\" header", hp->Name); return buff; } } } /* See if there's Lines: field */ if (!ARTheaders[_lines].Found) { /* Count article lines */ for(p=out; pHeader; fp++) { if (fp != ARTfields) BUFFappend(&Overview, SEP, STRLEN(SEP)); hp = fp->Header; #if defined(DO_KEYWORDS) /* !!! THIS IS NOT SUPPORTED AT THE TIME !!! */ if (innconf->keywords) { /* Ensure that there are Keywords: to shovel. */ if (hp == &ARTheaders[_keywords]) { key_old_value = hp->Value; key_old_length = hp->Length; /* ARTmakekeys(hp, Data->Body, key_old_value, key_old_length);*/ hp->Found++; /* now faked, whether present before or not. */ } } #endif /* defined(DO_KEYWORDS) */ if (!hp->Found) continue; if (fp->NeedHeader) { BUFFappend(&Overview, hp->Name, hp->Size); BUFFappend(&Overview, COLONSPACE, STRLEN(COLONSPACE)); } i = Overview.Left; #if defined(DO_KEYWORDS) if (innconf->keywords) { if (key_old_value) { if (hp->Value) free(hp->Value); /* malloc'd within */ hp->Value = key_old_value; hp->Length = key_old_length; hp->Found--; key_old_value = NULL; } } #endif /* defined(DO_KEYWORDS) */ BUFFappend(&Overview, hp->Value, hp->Length); for (p = &Overview.Data[i]; i < Overview.Left; p++, i++) if (*p == '\t' || *p == '\n' || *p == '\r') *p = ' '; } return &Overview; } /* ** */ BOOL BuildIndex(FILE *index, TOKEN *token) { static char NUL[] = "\0"; static BUFFER Buff; char *p, *q, *hash; int i; /* If index file specified */ if (index == (FILE *)NULL) { return TRUE; } /* See if it has a Xref field */ if (!ARTheaders[_xref].Found) { (void)fprintf(stderr, "No Xref: field in article.\n"); return FALSE; } /* See if it has a Message-Id field */ if (!ARTheaders[_message_id].Found) { (void)fprintf(stderr, "No Message-Id: field in article.\n"); return FALSE; } else { hash = HashToText(HashMessageID(HDR(_message_id))); } /* Copy Xref: field into buffer */ BUFFset(&Buff, HDR(_xref), ARTheaders[_xref].Length); BUFFappend(&Buff, NUL, STRLEN(NUL)); /* Replace tabs etc. with spaces */ for (i = 0, p = Buff.Data; i < Buff.Left; p++, i++) if (*p == '\t' || *p == '\n' || *p == '\r') *p = ' '; /* Position on the beginning of the buffer */ if ((p = strchr(Buff.Data, ' ')) == NULL) (void)fprintf(stderr, "Can't find Xref content, %s\n", Buff.Data); else { /* Find the first space */ for (p++; *p == ' '; p++); /* Walk over the Xref content */ q = p; while ((p = strchr(p, ' ')) != NULL) { *p = '\0'; i = fprintf(index, "[%s] %s\n", hash, q); if (i == EOF || ferror(index)) { (void)fprintf(stderr, "Can't write index line, %s\n", strerror(errno)); exit(1); } for (p++; *p == ' '; p++); q = p; } /* The last token */ if(*q) i = fprintf(index, "[%s] %s\n", hash, q); if (i == EOF || ferror(index)) { (void)fprintf(stderr, "Can't write index line, %s\n", strerror(errno)); exit(1); } } return TRUE; } /* ** Change to a directory or exit out. */ STATIC void xchdir(char *where) { if (chdir(where) < 0) { (void)fprintf(stderr, "Can't change to \"%s\", %s\n", where, strerror(errno)); exit(1); } } /* ** Remove the DBZ files for the specified base text file. */ STATIC void RemoveDBZFiles(char *p) { static char NOCANDO[] = "Can't remove \"%s\", %s\n"; char buff[SMBUF]; (void)sprintf(buff, "%s.dir", p); if (unlink(buff) && errno != ENOENT) (void)fprintf(stderr, NOCANDO, buff, strerror(errno)); #ifdef DO_TAGGED_HASH (void)sprintf(buff, "%s.pag", p); if (unlink(buff) && errno != ENOENT) (void)fprintf(stderr, NOCANDO, buff, strerror(errno)); #else (void)sprintf(buff, "%s.index", p); if (unlink(buff) && errno != ENOENT) (void)fprintf(stderr, NOCANDO, buff, strerror(errno)); (void)sprintf(buff, "%s.hash", p); if (unlink(buff) && errno != ENOENT) (void)fprintf(stderr, NOCANDO, buff, strerror(errno)); #endif } /* ** Rebuild the DBZ file from the text file. */ STATIC void Rebuild() { QIOSTATE *qp; char *p, *q; char *save; OFFSET_T count; OFFSET_T where; HASH key; dbzoptions opt; #ifndef DO_TAGGED_HASH TOKEN token; void *ivalue; idxrec ionevalue; idxrecext iextvalue; #endif if (Verbose) (void)fprintf(stderr, "Re-building DBZ files..."); xchdir(HISTORYDIR); /* Open the text file. */ qp = QIOopen(TempTextFile); if (qp == NULL) { (void)fprintf(stderr, "Can't open \"%s\", %s\n", TextFile, strerror(errno)); exit(1); } /* Remove previous build */ RemoveDBZFiles(TempTextFile); p = TempTextFile; /* Open the new database, using the old file if desired and possible. */ dbzgetoptions(&opt); #ifdef DO_TAGGED_HASH opt.pag_incore = INCORE_MEM; #else opt.idx_incore = INCORE_MEM; opt.exists_incore = INCORE_MEM; #endif dbzsetoptions(opt); if (!dbzagain(p, HISTORY)) { (void)fprintf(stderr, "Can't do dbzagain, %s\n", strerror(errno)); exit(1); } /* Loop through all lines in the text file. */ count = 0; for (where = QIOtell(qp); (p = QIOread(qp)) != NULL; where = QIOtell(qp)) { count++; if ((save = strchr(p, HIS_FIELDSEP)) == NULL) { (void)fprintf(stderr, "Bad line #%ld \"%.30s...\"\n", count, p); exit(1); } *save = '\0'; switch (*p) { case '[': if (strlen(p) != ((sizeof(HASH) * 2) + 2)) { fprintf(stderr, "Invalid length for hash %s, skipping\n", p); continue; } key = TextToHash(p+1); #ifndef DO_TAGGED_HASH if (((save = strchr(save + 1, '@')) != NULL) && ((q = strchr(save + 1, '@')) != NULL)) { *(++q) = '\0'; if (!IsToken(save)) { /* assumes traditional spool */ ionevalue.offset = where; ivalue = (void *)&ionevalue; break; } if (innconf->extendeddbz) { iextvalue.offset[HISTOFFSET] = where; token = TextToToken(save); OVERsetoffset(&token, &iextvalue.offset[OVEROFFSET], &iextvalue.overindex, &iextvalue.overlen); ivalue = (void *)&iextvalue; } else { ionevalue.offset = where; ivalue = (void *)&ionevalue; } } else { if (innconf->extendeddbz) { iextvalue.offset[HISTOFFSET] = where; iextvalue.offset[OVEROFFSET] = 0; iextvalue.overindex = OVER_NONE; iextvalue.overlen = 0; ivalue = (void *)&iextvalue; } else { ionevalue.offset = where; ivalue = (void *)&ionevalue; } } #endif break; case '<': key = HashMessageID(p); #ifndef DO_TAGGED_HASH ionevalue.offset = where; ivalue = (void *)&ionevalue; #endif break; default: fprintf(stderr, "Invalid message-id \"%s\" in history text\n", p); continue; } #ifdef DO_TAGGED_HASH switch (dbzstore(key, (OFFSET_T)where)) { #else switch (dbzstore(key, ivalue)) { #endif case DBZSTORE_EXISTS: fprintf(stderr, "Duplicate message-id \"%s\" in history text\n", p); break; case DBZSTORE_ERROR: fprintf(stderr, "Can't store \"%s\", %s\n", p, strerror(errno)); exit(1); default: break; } } if (QIOerror(qp)) { (void)fprintf(stderr, "Can't read \"%s\" near line %ld, %s\n", TempTextFile, count, strerror(errno)); exit(1); } if (QIOtoolong(qp)) { (void)fprintf(stderr, "Line %ld is too long\n", count); exit(1); } /* Close files. */ QIOclose(qp); if (!dbzclose()) { (void)fprintf(stderr, "Can't close history, %s\n", strerror(errno)); exit(1); } if (Verbose) (void)fprintf(stderr, "done\n"); } STATIC int split(char *p, char sep, char **argv, int count) { int i; if (!p || !*p) return 0; for (i = 1, *argv++ = p; *p; ) if (*p++ == sep) { if (++i == count) /* Overflow. */ return -1; p[-1] = '\0'; for (*argv++ = p; *p == sep; p++) continue; } return i; } /* ** Read history, fetch article and build overview databases */ STATIC BOOL BuildFromHistory(FILE *out, char *OldHistory, FILE *index) { QIOSTATE *qp; unsigned long line; char *p; char *fields[4]; int i; TOKEN token; ARTHANDLE *art; long len; char *error; static BUFFER Article; BUFFER *Overview; if (Verbose) (void)fprintf(stderr, "Building from history"); /* Open history text file */ if ((qp = QIOopen(OldHistory)) == NULL) { (void)fprintf(stderr, "Can't open old history file, %s\n", strerror(errno)); return FALSE; } /* Read it line by line */ for (line = 1L; ; line++) { if ((p = QIOread(qp)) != NULL) { /* Split up fields */ i = split(p, HIS_FIELDSEP, fields, SIZEOF(fields)); /* Ignore bad line */ if (i != 2 && i != 3) { (void)fprintf(stderr, "Strange number of fields at: line %ld\n", line); continue; } /* Expired entry */ if (i == 2) { if (Verbose) (void)fprintf(stderr, "-"); switch (fields[0][0]) { case '[': /* Put as is */ if (out != NULL) { i = fprintf(out, "%s%c%s\n", fields[0], HIS_FIELDSEP, fields[1]); if (i == EOF || ferror(out)) { (void)fprintf(stderr, "Can't write history line, %s\n", strerror(errno)); exit(1); } } break; /* Tokenize and put it to history */ case '<': if (out != NULL) { i = fprintf(out, "[%s]%c%s\n", HashToText(HashMessageID(fields[0])), HIS_FIELDSEP, fields[1]); if (i == EOF || ferror(out)) { (void)fprintf(stderr, "Can't write history line, %s\n", strerror(errno)); exit(1); } } break; default: fprintf(stderr, "Invalid message-id \"%s\" in history text\n", fields[0]); break; } continue; } /* Proceed with history file entry for existing article */ if (fields[0][0] == '[') { if (strlen(fields[0]) != ((sizeof(HASH) * 2) + 2)) { fprintf(stderr, "Invalid length for hash %s, skipping\n", fields[0]); continue; } } else { (void)fprintf(stderr, "Malformed history hash: %s\n", fields[0]); continue; } /* If it's a token */ if (!IsToken(fields[2])) { (void)fprintf(stderr, "Malformed history entry: %s\n", fields[2]); continue; } if (Verbose) (void)fprintf(stderr, "+"); /* Read up the article */ token = TextToToken(fields[2]); if ((art = SMretrieve(token, RETR_ALL)) == (ARTHANDLE *)NULL) { /* Self-expired entry */ i = fprintf(out, "%s%c%s\n", fields[0], HIS_FIELDSEP, fields[1]); if (i == EOF || ferror(out)) { (void)fprintf(stderr, "Can't write history line, %s\n", strerror(errno)); exit(1); } continue; } /* Setup article buffer */ if (Article.Data == NULL) Article.Data = NEW(char, 1); /* Make a copy of article */ BUFFset(&Article, "", 0); BUFFappend(&Article, art->data, art->len); /* Store art len */ len = art->len; /* Free article */ SMfreearticle(art); /* Parse article headers */ if (len) { /* Handle new article */ if ((error = ARTclean(Article.Data, Article.Left))) { fprintf(stderr, "Cannot parse article: %s\n", error); /* Corrupted article, remember it */ i = fprintf(out, "%s%c%s\n", fields[0], HIS_FIELDSEP, fields[1]); if (i == EOF || ferror(out)) { (void)fprintf(stderr, "Can't write history line, %s\n", strerror(errno)); exit(1); } continue; } } /* Build overview record */ if ((Overview = ARTmakeoverview()) == NULL) { fprintf(stderr, "Cannot build overview record\n"); /* Probably incomplete article, remember it */ i = fprintf(out, "%s%c%s%c%s\n", fields[0], HIS_FIELDSEP, fields[1], HIS_FIELDSEP, fields[2]); if (i == EOF || ferror(out)) { (void)fprintf(stderr, "Can't write history line, %s\n", strerror(errno)); exit(1); } continue; } /* Store overview entry */ if (!OVERstore(&token, Overview->Data, Overview->Left)) { fprintf(stderr, "Cannot store overview for %s\n", TokenToText(token)); /* Probably incomplete article, remember it */ i = fprintf(out, "%s%c%s%c%s\n", fields[0], HIS_FIELDSEP, fields[1], HIS_FIELDSEP, fields[2]); if (i == EOF || ferror(out)) { (void)fprintf(stderr, "Can't write history line, %s\n", strerror(errno)); exit(1); } continue; } /* Build overview index */ if (!BuildIndex(index, &token)) { fprintf(stderr, "Cannot build overview index\n"); /* fall through... */ } /* Add modified entry to new history file */ if (out != NULL) { i = fprintf(out, "%s%c%s%c%s\n", fields[0], HIS_FIELDSEP, fields[1], HIS_FIELDSEP, TokenToText(token)); if (i == EOF || ferror(out)) { (void)fprintf(stderr, "Can't write history line, %s\n", strerror(errno)); exit(1); } } } else { /* finished */ break; } } if (Verbose) (void)fprintf(stderr, " done\n"); return TRUE; } /* ** Fetch article from spool and build overview databases */ STATIC BOOL BuildFromSpool(FILE *out, FILE *index) { static char NUL[] = "\0"; char *p; int i; TOKEN token; static ARTHANDLE *art = (ARTHANDLE *)NULL; char *error; static BUFFER Article, Buff; BUFFER *Overview; time_t Arrived; time_t Expires; time_t Posted; HASH key; if (Verbose) (void)fprintf(stderr, "Building from spool"); /* Read article from spool */ while ((art = SMnext(art, RETR_HEAD)) != (ARTHANDLE *)NULL) { /* Setup article buffer */ if (Article.Data == NULL) Article.Data = NEW(char, 1); /* Make a copy of article */ BUFFset(&Article, "", 0); BUFFappend(&Article, art->data, art->len); /* Skip empty articles */ if (!art->len) { continue; } /* Copy token */ memcpy(&token, art->token, sizeof token); /* Set up arrival time */ Arrived = art->arrived; Expires = 0; Posted = 0; /* Handle new article */ if ((error = ARTclean(Article.Data, Article.Left))) { fprintf(stderr, "Cannot parse article: %s\n", error); continue; } /* Set up Posted: */ if (ARTheaders[_posted].Found) { BUFFset(&Buff, ARTheaders[_posted].Value, ARTheaders[_posted].Length); BUFFappend(&Buff, NUL, STRLEN(NUL)); for (i = 0, p = Buff.Data; i < Buff.Left; p++, i++) if (*p == '\t' || *p == '\n' || *p == '\r') *p = ' '; if ((Posted = GetaDate(Buff.Data)) == 0) Posted = Arrived; } else { Posted = Arrived; } /* Set up Expires: */ if (ARTheaders[_expires].Found) { BUFFset(&Buff, ARTheaders[_expires].Value, ARTheaders[_expires].Length); BUFFappend(&Buff, NUL, STRLEN(NUL)); for (i = 0, p = Buff.Data; i < Buff.Left; p++, i++) if (*p == '\t' || *p == '\n' || *p == '\r') *p = ' '; Expires = GetaDate(Buff.Data); } /* Create MsgId hash */ key = HashMessageID(ARTheaders[_message_id].Value); /* Lookup this article at history */ if (HISlookup(key)) { if (Verbose) (void)fprintf(stderr, "."); /* already in history */ continue; } if (Verbose) (void)fprintf(stderr, "+"); /* Build overview record */ if ((Overview = ARTmakeoverview()) == NULL) { fprintf(stderr, "Cannot build overview record\n"); continue; } /* Store overview entry */ if (!OVERstore(&token, Overview->Data, Overview->Left)) { fprintf(stderr, "Cannot store overview for %s\n", TokenToText(token)); /* Probably incomplete article, remember it */ i = fprintf(out, "[%s]%c%lu%c%lu%c%lu%c%s\n", HashToText(key), HIS_FIELDSEP, (unsigned long)Arrived, HIS_SUBFIELDSEP, (unsigned long)Expires, HIS_SUBFIELDSEP, (unsigned long)Posted, HIS_FIELDSEP, TokenToText(*art->token)); continue; } /* Build overview index */ if (!BuildIndex(index, &token)) { fprintf(stderr, "Cannot build overview index\n"); /* fall through... */ } /* Add modified entry to new history file */ i = fprintf(out, "[%s]%c%lu%c%lu%c%lu%c%s\n", HashToText(key), HIS_FIELDSEP, (unsigned long)Arrived, HIS_SUBFIELDSEP, (unsigned long)Expires, HIS_SUBFIELDSEP, (unsigned long)Posted, HIS_FIELDSEP, TokenToText(token)); } if (Verbose) (void)fprintf(stderr, " done\n"); return TRUE; } /* ** Print a usage message and exit. */ STATIC NORETURN Usage(void) { (void)fprintf(stderr, "Usage: makeoverview [ -a file ][ -d overdir ][ -f file ][ -I indexfile ][ -n ][ -T dir ][ -v ]\n"); exit(1); /* NOTREACHED */ } int main(int ac, char *av[]) { FILE *out; char *p; int i; BOOL DoRebuild; BOOL val; char temp[SMBUF]; STRING tmpdir; char *mode; FILE *index = (FILE *)NULL; /* First thing, set up logging and our identity. */ openlog("makeoverview", L_OPENLOG_FLAGS | LOG_PID, LOG_INN_PROG); /* Set defaults. */ if (ReadInnConf() < 0) exit(1); /* Source files */ HISTORY = COPY(cpcatpath(innconf->pathdb, _PATH_HISTORY)); ACTIVE = COPY(cpcatpath(innconf->pathdb, _PATH_ACTIVE)); SCHEMA = COPY(cpcatpath(innconf->pathetc, _PATH_SCHEMA)); TextFile = HISTORY; /* Destination files */ sprintf(temp, "%s.n", innconf->pathuniover); OverPath = COPY(temp); IndexFile = COPY(cpcatpath(innconf->pathtmp, "overview.n")); /* Flags and friends */ DoRebuild = TRUE; Verbose = FALSE; mode = "w"; Target = TARGET_HISTORY; tmpdir = innconf->pathtmp; (void)umask(NEWSUMASK); /* Parse JCL. */ while ((i = getopt(ac, av, "a:d:f:h:I:n:vHS")) != EOF) switch (i) { default: Usage(); /* NOTREACHED */ /* path to active file */ case 'a': ACTIVE = optarg; break; /* path to new uniover directory */ case 'd': OverPath = optarg; break; /* path to history file */ case 'f': TextFile = optarg; break; /* path to index file (to be fed to expireindex -a) */ case 'I': IndexFile = optarg; break; /* path to tmp directory */ case 'T': tmpdir = optarg; break; /* do not build DBZ files */ case 'n': DoRebuild = FALSE; break; case 'H': Target |= TARGET_HISTORY; break; case 'S': Target |= TARGET_SPOOL; break; case 'v': Verbose = TRUE; break; } ac -= optind; if (ac || !TextFile[0] || !OverPath[0] || !IndexFile[0]) Usage(); if ((p = strrchr(TextFile, '/')) == NULL) { /* Find the default history file directory */ HISTORYDIR = COPY(HISTORY); p = strrchr(HISTORYDIR, '/'); if (p != NULL) { *p = '\0'; } } else { *p = '\0'; HISTORYDIR = COPY(TextFile); *p = '/'; } /* Get the time. Only get it once, which is good enough. */ if (GetTimeInfo(&Now) < 0) { (void)fprintf(stderr, "Can't get the time, %s\n", strerror(errno)); exit(1); } /* Open history file. */ xchdir(HISTORYDIR); /* Build temp history file */ (void)sprintf(temp, "%s.n", TextFile); TempTextFile = COPY(temp); /* Create new history text file */ if ((out = fopen(TempTextFile, mode)) == NULL) { (void)fprintf(stderr, "Can't write to history file, %s\n", strerror(errno)); exit(1); } /* Setup for article processing */ ARTsetup(); /* Adjust to overview access method */ OVERmmap = innconf->overviewmmap; if (OVERmmap) val = TRUE; else val = FALSE; /* Setup new overview database */ if (!OVERsetup(OVER_MMAP, (void *)&val)) { (void)fprintf(stderr, "Can't setup unified overview mmap\n"); } val = TRUE; if (!OVERsetup(OVER_BUFFERED, (void *)&val)) { fprintf(stderr, "Can't setup unified overview buffered\n"); exit(1); } if (!OVERsetup(OVER_PREOPEN, (void *)&val)) { fprintf(stderr, "Can't setup unified overview preopen\n"); exit(1); } if (!OVERsetup(OVER_MODE, (void *)mode)) { fprintf(stderr, "Can't setup unified overview mode\n"); exit(1); } if (!OVERsetup(OVER_DIR, (void *)OverPath)) { fprintf(stderr, "Can't setup unified overview path\n"); exit(1); } /* Initialize new unified overview DB */ if (!OVERinit()) { (void)fprintf(stderr, "Can't initialize unified overview\n"); } /* Create index file */ if ((index = fopen(IndexFile, "w")) == (FILE *)NULL) { (void)fprintf(stderr, "Can't open index file, %s\n", strerror(errno)); exit(1); } /* Initialize storage manager */ val = TRUE; if (!SMsetup(SM_RDWR, (void *)&val) || !SMsetup(SM_PREOPEN, (void *)&val)) { fprintf(stderr, "Can't setup storage manager\n"); exit(1); } /* Fail is SM can't be initialized */ if (innconf->storageapi && !SMinit()) { fprintf(stderr, "Can't initialize storage manager: %s\n", SMerrorstr); exit(1); } /* Build overview databases from history file */ if (Target & TARGET_HISTORY && !BuildFromHistory(out, TextFile, index)) { (void)fprintf(stderr, "Cannot build overview database from history\n"); exit(1); } /* Open history database */ if (!HISopen()) { (void)fprintf(stderr, "Cannot open history database\n"); exit(1); } /* Scan news spool for lost articles */ if (Target & TARGET_SPOOL && !BuildFromSpool(out, index)) { (void)fprintf(stderr, "Cannot build overview database from spool\n"); exit(1); } /* Close history database */ if (!HISclose()) { (void)fprintf(stderr, "Cannot close history database\n"); exit(1); } /* Flush history test file */ if (fflush(out) == EOF || ferror(out) || fclose(out) == EOF) { (void)fprintf(stderr, "Can't close new history file, %s\n", strerror(errno)); exit(1); } /* Flush index file */ if (index != (FILE *)NULL && (fflush(index) == EOF || ferror(index) || fclose(index) == EOF)) { (void)fprintf(stderr, "Can't close index file, %s\n", strerror(errno)); exit(1); } /* Move. */ xchdir(HISTORYDIR); /* Build DBZ files */ if (DoRebuild) Rebuild(); exit(0); /* NOTREACHED */ }