: /bin/sh
#
#  Shell script for cnews log reports
#
#  This script scans the cnews data areas and looks for things that
#  are out of whack.  It reads some of the log files looking for
#  interesting information.  If there are no errors or oddities, it
#  tells you so.  This should be run immediately after the cnews daily
#  'newsdaily' script.  It expects the logs to be named <thing>.o
#  but you can change that by resetting the *LOG variables as shown
#  below.
#
#  One point of "philosophy": this whole script is built on the idea
#  that silence is *not* golden.  If things are OK, it explicitly
#  tells you so.
#
#  The log file analyzer is a big awk script.  Make sure you understand
#  awk associative arrays before attempting to modify it. 
#
#  $RCSfile: cnews.logrep,v $	$Revision: 0.16 $
#
#  $Author: news $	$Date: 91/04/22 11:15:10 $
#
#  $State: Exp $	$Locker: news $
#
#  $Log:	cnews.logrep,v $
#  Revision 0.16  91/04/22  11:15:10  news
#  Cleaned up extra backslashes.  Added sendme processing with the ihaves.
#  Made names of all log files explicit.
#  
#  Revision 0.15  91/04/22  10:03:59  news
#  Added cancel and failed cancel reporting.
#  
#  Revision 0.14  91/04/22  09:12:14  news
#  Got oldies number right.
#  
#  Revision 0.13  91/04/12  11:40:12  news
#  Added reports on future dates and too-old dates, added notice on
#  postings which lack message ids.
#  
#  Revision 0.12  91/04/11  16:46:08  news
#  Added outgoing batch checking.
#  
#  Revision 0.11  91/04/11  16:33:33  news
#  Many changes as per suggestions from Brendan Kehoe and Owen Medd.
#  Many many changes.  Many many many changes.
#  
#  Initialize the dirs, names of the logs, and variables
#
. /u/lib/news/bin/config
PATH="$NEWSPATH:$NEWSBIN"
export PATH
SCRIPT=`basename $0`
#
# Shorthand names for standard C news locations
#
LOGDIR=$NEWSCTL
SYSFILE=${LOGDIR}/sys
INCOMING=${NEWSARTS}/in.coming
OUTGOING=${NEWSARGS}/out.going
BADBATCHES=${INCOMING}/bad
#
# If you are running RELAYNEWS as a daemon, uncomment this next line
#
#RELAYNEWS=1
#
# When there are completely unrecognizable entries in the log, the
# normal procedure is to print them out in toto so you can extend
# this script to say something intelligent about them.  But if
# there are hundreds, it can be overwhelming.  These variables allow
# you to restrict the number you will print.
#
MAX_DEFECTS=5
MAX_UNKNOWN=5
MAX_MYSTERY_DASH=5
MAX_MYSTERY=5
MAX_NOPATH=5
#
# If this is not where you put the old log files, adjust
# these locations and names accordingly
#
ERRLOG=${LOGDIR}/errlog.o
NEWSLOG=${LOGDIR}/log.o
BATCHLOG=${NEWSCTL}/batchlog.o
BATCHPARMS=${NEWSCTL}/batchparms
#
NEWSMGR=news
HOSTNAME=`newshostname`
TEMPFILE=/tmp/${SCRIPT}.$$
set `date`
DATE="${1} ${2} ${3}, ${6}"
trap 'rm -f $TEMPFILE ; exit' 0 1 2 3 15
exec > $TEMPFILE
#
#  Print preface.
#
cat << EOF
This is the news system status report for $DATE as
generated by $0.

EOF
#
#  Errorcheck to be sure the error log exists.
#
if [ "" = "$HOSTNAME" ] ; then
	cat << EOF
The news host name was not found.  Please check the file
$NEWSCTL/whoami, and make sure the host name is set
appropriately for your system (e.g, look at \'uuname -l\' and
\'uname -n\'.  The report will be somewhat incomplete.

EOF
fi
#
#  Errorcheck to be sure the error log exists.
#
if [ ! -f "$ERRLOG" ] ; then
	cat << EOF
The news error log ($ERRLOG) for was not found.
Please check the news system and this script.

EOF
else
	#
	#  If the error log is empty, report that.  Otherwise copy it to
	#  the news manager.  In either case, give a nice prefatory remark.
	#
	SIZE=`wc -l < $ERRLOG`
	SIZE=`echo $SIZE`
	if [ $SIZE = 0 ] ; then
		cat << EOF
The news error log ($ERRLOG) shows no errors.

EOF
	else
	#
	#  Give a nice prefatory remark, then copy the log to the news
	#  manager.  Put beginning and end markers.
	#
		cat << EOF
The news error log ($ERRLOG) has $SIZE complaints.  A
copy of the error log is included below:

$ERRLOG:
EOF
		cat < ${ERRLOG}
	cat << EOF
End of $ERRLOG

EOF
	fi
fi
#
#  Report on bad batches left lying around
#
if [ -d ${BADBATCHES} ] ; then
	(
	cd $BADBATCHES
	COUNT=`ls | wc -l`
	COUNT=`echo $COUNT`
	if [ $COUNT = 0 ] ; then
		echo "There are no bad batches being held in $BADBATCHES."
	else
		echo "There are $COUNT bad batches being held in $BADBATCHES:"
		if [ $COUNT -gt 10 ] ; then
			ls -C
		else
			ls -l
		fi
	fi
	)
else
	echo "Could not find bad batches directory ($BADBATCHES)!"
fi
echo ""
#
#  Report on old nrun files.
#
if [ ! -d $INCOMING ] ; then
	echo "Could not find incoming directory ($INCOMING)!"
else
	(
	cd $INCOMING
	COUNT=`find . -name 'n*' -type f -mtime +1 -print | wc -l`
	COUNT=`echo $COUNT`
	if [ $COUNT != 0 ] ; then
		echo "There seem to be old run fragments left in ${INCOMING}:"
		echo ""
		ls -ls n*
	else
		echo "There are no old run fragments left in ${INCOMING}."
	fi
	echo ""
	#
	#  Report on old batch files.
	#
	LIST=`find . -name "[1-9]*" -type f -mtime +1 -print`
	COUNT=`echo $LIST | wc -w`
	COUNT=`echo $COUNT`
	if [ $COUNT != 0 ] ; then
		cat << EOF
There seem to be $COUNT old incoming batches left in ${INCOMING}.
You should check to see if unbatching is being done properly.  The
old batches are:

EOF
		if [ $COUNT -gt 10 ] ; then
			ls -C $LIST
		else
			ls -ld $LIST
		fi
	else
		echo "There are no old batches left in ${INCOMING}."
	fi
	)
fi
echo ""
#
#  Report on the status of batching.  If the batchparms files
#  does not exist, simply state that we think no batching is
#  going on.  If it's there, try to report on batching.  If your
#  site doesn't do batching and you'd like to shut up this section
#  of the report, rename $BATCHPARMS to $BATCHPARMS.sample.
#
#  This needs to be expanded to report more fully on batch results....
#
if [ ! -f "$BATCHPARMS" ] ; then
	echo "You do not appear to be doing any uucp batching (no batch"
	echo "paramters file $BATCHPARMS)."
else
	if [ ! -f "$BATCHLOG" ] ; then
		echo "Could not process batch log ${BATCHLOG}."
	else
		#
		#  Report on stalled batching for outside sites.  Report if the
		#  log does not exist.
		#
		COUNT=`grep "no recent movement" ${BATCHLOG} | wc -l`
		COUNT=`echo $COUNT`
		if [ $COUNT != 0 ] ; then
			echo "The news batch queue is full for the following sites:"
			grep "no recent movement" ${BATCHLOG} | awk '{ printf "%s\n", $1 }' | sort | uniq
		else
			echo "There are no full outgoing batch queues show in $BATCHLOG."
		fi
	fi
fi
echo ""
#
#  Errorcheck to be sure the regular log exists.
#
if [ ! -f "$NEWSLOG" ] ; then
	cat << EOF
Could not process news log!

The standard news log ($NEWSLOG) was not found.
Please check the news system and this script.

EOF
else
	#
	#  Generate traffic, activity, and oddity report.  I am
	#  not a big awk fan, but gotta admit you can do a lot
	#  with associative arrays.
	#
	awk < $NEWSLOG '
BEGIN {
	accept_count = 0
	entry_count = 0
	ship_count = 0
	x_count = 0
	duplicate_count = 0
	ihave_count = 0
	sendme_count = 0
	unapproved_count = 0
	unsub_count = 0
	junk_count = 0
	local_articles = 0
	bad_header_count = 0
	empty_header_count = 0
	non_header_count = 0
	bad_date_count = 0
	no_msgid_count = 0
	unknown_no_count = 0
	future_count = 0
	oldies_count = 0
	cancel_count = 0
	precancel_count = 0
	nopath_count = 0
	mystery_dash_count = 0
	mystery_count = 0
	defect_count = 0
	hostname = "'$HOSTNAME'"
}
{
	#
	#  Count all entries, note and save defective ones
	#
	entry_count++
	if ( NF < 6 )
	{
		defect_count++
		defective_entry[ $0 ]++
	}
	else if ( $5 == "+" )
	{
		#
		#  Track the accepted articles
		#
		accept_count++
		accept_host[ $4 ]++
		if ( NF > 6 )
		{
			ship_count++
			shipfield = 7 + "'$RELAYNEWS'"
			#
			#  Record which systems got articles
			#
			while ( shipfield <= NF )
			{
				ship_list[ $shipfield ]++
				shipfield++
			}
		}
		#
		#  Track the number of local postings
		#
		if ( $4 == hostname )
			local_count++
	}
	#
	#  Track all junked articles by reason junked.
	#
	else if ( $5 == "j" )
	{
		junk_count++
		reason = 7
		junkfor=""
		while ( reason <= NF )
		{
			junkfor = junkfor $reason " "
			reason++
		}
		junk_reason[ junkfor ]++
	}
	#
	#  Track ihave/sendme records
	#
	else if ( $5 == "i" )
	{
		ihave_count++
		ihave_source[ $4 ]++
	}
	else if ( $5 == "s" )
	{
		sendme_count++
		sendme_source[ $4 ]++
	}
	#
	#  Track the exceptions on a class-by-class basis
	#
	else if ( $5 == "-" )
	{
		#
		#  Track the number of duplicates we get from various sites.
		#
		if ( $7 == "duplicate" )
		{
			duplicate_count++
			duplicate_host[ $4 ]++
		}
		#
		#  Track the number of unapproved articles and their sources.
		#
		else if ( $7 == "unapproved" )
		{
			unapproved_count++
			unapproved_source[ $4 ]++
			unapproved_target[ $12 ]++
		}
		#
		#  Lots of errors are no this, no that, etc.  We process
		#  them all at once.
		#
		else if ( $7 == "no" )
		{
			if ( $8 == "subscribed" )
			{
				#
				#  Track the number of unsubscribed articles we
				#  get from various sites and the target group.
				#
				unsub_count++
				unsub_source[ $4 ]++
				unsub_target[ $11 ]++
			}
			#
			#  These are needed for new pedantic Cnews.  Over
			#  time these should decline.  We track where the
			#  articles came from but not the message ids.  If
			#  the admin is really interested, they can track
			#  them down by grepping the logfile for "no FOO"
			#  and the name of the offending host.
			#
			else if ( ( $8 == "Date:" ) || ( $8 == "@" ) || ( $8 == "From:" ) || ( $8 == "Subject:" ) )
			{
				bad_header_count++
				bad_header_source[ $4 ]++
			}
			else
			#
			#  These are minus for unknown reason.  An obvious
			#  candidate for mods to this script.
			#
			{
				unknown_no_count++
				unknown_no_field[ $8 ]++
			        unknown_no_entry[ $0 ]++
			}
		}
		#
		#  This next is a *severe* error!
		#
		else if ( ( $6 == "no" ) && ( $7 == "Message-ID:" ) )
		{
			no_msgid_count++
			no_msgid_source[ $4 ]++
		}
		#
		#  Other less serious errors
		#
		else if ( $7 == "empty" )
		{
			empty_header_count++
			empty_header_field[ $8 ]++
			empty_header_source[ $4 ]++
		}
		else if ( ( $9 == "contains" ) && ( $10 == "non-header" ) )
		{
			non_header_count++
			non_header_source[ $4 ]++
		}
		else if ( ( $7 == "unparsable" ) && ( $8 == "Date:" ) )
		{
			bad_date_count++
			bad_date_source[ $4 ]++
		}
		#
		#  Track the number of articles which we explicitly reject via
		#  x records in the active file.  We do not track the specific
		#  groups for two reasons -- presumably since you xed it, you
		#  know what you reject; and it is hard to parse.
		#
		else if ( ( $7 == "all" ) && ( $8 == "groups" ) )
		{
			x_count++
		}
		#
		#  Track the articles which are too far in the future
		#
		else if ( ( $7 == "Date:" ) && ( $8 == "too" ) && ( $9 == "far" ) )
		{
			future_count++
			future_source[ $4 ]++
		}
		#
		#  Track the articles which are too old to be worth keeping
		#  I love this cnews feature -- it will probably keep things
		#  freer of loops than anything else.
		#
		else if ( ( $7 == "older" ) && ( $8 == "than" ) )
		{
			oldies_count++
			oldies_source[ $4 ]++
		}
		#
		#  Some articles come in sans Path: headers.  This
		#  identifies them.
		#
		else if ( ( $7 == "no" ) && ( $7 == "Path:" ) && ( $8 == "header" ) )
		{
			nopath_count++
			nopath_source[ $4 ]++
		}
		#
		#  Any unrecognised "-" tag is kept here.  As we find these
		#  they should be added to the things handled above.
		#
		else
		{
			mystery_dash_count++
			mystery_dash_entry[ $0 ]++
		}
	}
	#
	#  This counts cancel messages.  They are not generated by
	#  standard C news, but by Dave Aldens relaynews daemon.
	#
	else if ( $5 == "c" )
	{
		cancel_count++
	}
	#
	#  This counts failed cancel messages.  They are not generated
	#  by standard C news, but by Dave Aldens relaynews daemon.
	#
	else if ( $5 == "f" )
	{
		precancel_count++
	}
	#
	#  Any unrecognised tags get noted here.  As we get these
	#  they sould be added to the things handled above.
	#
	else
	{
		mystery_count++
		mystery_tags[ $5 ]++
		mystery_entry[ $0 ]++
	}
}
END {
	printf "\nThere were %d entries in the standard log.  Breakdown:\n", entry_count
	if ( entry_count != 0 )
	{
		#
		#  Report the data by categories.
		#
		#  Local postings.  It would be nice to report the newsgroups
		#  posted to, but that data is not in the log.
		#
		printf "\n%6d articles were posted from this site (%s)\n", local_count, hostname
		#
		#  Next, where outside articles came from
		#
		printf "\n%6d incoming articles accepted for processing\n", accept_count
		if ( accept_count > 0 )
			for ( host in accept_host )
				printf "    %6d from %s\n", accept_host[ host ], host
		printf "\n%6d of those were rejected as duplicates\n", duplicate_count
		#
		#  Report number of duplicates and who gave them to us.
		#
		if ( duplicate_count > 0 )
			for ( host in duplicate_host )
				printf "    %6d from %s\n", duplicate_host[ host ], host
		#
		#  Ihave activity.
		#
		printf "\n%6d/%d ihave/sendme messages were processed\n", ihave_count, sendme_count
		if ( ihave_count > 1 )
		{
			for ( source in ihave_source )
				printf "    %6d from %s\n", ihave_source[ source ], source
		}
		if ( sendme_count > 1 )
		{
			for ( source in sendme_source )
				printf "    %6d from %s\n", sendme_source[ source ], source
		}
		#
		#  Cancel and failed cancel reporting.  These messages only appear
		#  in the log file if you are running Dave Aldens relaynews
		#  daemon.  Since no cancel messages almost certianly means
		#  vanilla C news, we do not report on zero counts.
		#
		if ( cancel_count > 0 )
		{
			printf "    \n%6d articles were cancelled.\n", cancel_count
		}
		if ( precancel_count > 0 )
		{
			printf "    \n%6d articles were cancelled before receipt.\n", precancel_count
		}
		#
		#  Junkage report.  Give total junkage, then break it down by
		#  reason.
		#
		printf "\n%6d articles were junked\n", junk_count
		if ( junk_count > 0 )
			for ( group in junk_reason )
				printf "    %6d %s\n", junk_reason[ group ], group
		#
		#  Outgoing traffic report.  Total ships, then break down by
		#  system.
		#
		printf( "\n%6d articles were shipped to other systems\n", ship_count )
		if ( ship_count > 0 )
		{
			for ( to in ship_list )
				printf "    %6d for %s\n", ship_list[ to ], to
			printf "      (Totals may differ due to same article shipped to multiple systems)\n"
		}
		#
		#  List how many articles we explictly rejected.  Note we
		#  do not track the groups explicitly.
		#
		if ( x_count > 0 )
		{
			printf "\n%6d articles were accepted but not posted/Xed by the active file\n", x_count
		}
		#
		#  Now we report on the questionable stuff
		#
		#
		#  This error comes first, as it indicates a severe problem
		#  with either you or your immediate neighbors
		#
		if ( no_msgid_count > 0 )
		{
			printf "\n* * * Begin Serious Error! * * *\n"
			printf "* * * There were attempts to insert articles which had *no* messages ids.\n"
			printf "* * * This is a sign of significant errors in the posting or transfer\n"
			printf "* * * software and should be checked out IMMEDIATELY!\n"
			for ( source in no_msgid_source )
				printf "* * * %6d attempts were made from %s\n", no_msgid_source[ source ], source
			printf "* * * End Serious Error! * * *\n"
		}
		#
		#  Other errors are less serious.
		#
		#  The attempts to post to moderated groups.
		#
		printf "\n%6d articles were rejected as unapproved for moderated groups\n", unapproved_count
		if ( unapproved_count > 0 )
		{
			for ( target in unapproved_target )
				printf "    %6d posted to %s\n", unapproved_target[ target ], target
			for ( source in unapproved_source )
				printf "    %6d came from %s\n", unapproved_source[ source ], source
		}
		#
		#  Now the transfer of unsubscribed stuff
		#
		printf "\n%6d articles were rejected as for unsubscribed groups\n", unsub_count
		if ( unsub_count > 0 )
		{
			for ( target in unsub_target )
				printf "    %6d posted to %s\n", unsub_target[ target ], target
			for ( source in unsub_source )
				printf "    %6d came from %s\n", unsub_source[ source ], source
		}
		#
		#  List the articles rejected for having non-headers in the
		#  header section.  Since the further spread of pedantic
		#  C news will eventually eliminate these, we do not report
		#  in the case where everything is OK.
		#
		if ( non_header_count > 0 )
		{
			printf "\n%6d articles were rejected as having nonheaders in the header section\n", non_header_count
			for ( source in non_header_source )
				printf "    %6d came from %s\n", non_header_source[ source ], source
		}
		#
		#  List the number of articles rejected due to incorrect headers
		#  and where they came from.  Since the further spread of pedantic
		#  C news will eventually eliminate these, we keep silent if
		#  everything is OK.
		#
		if ( bad_header_count > 0 )
		{
			printf "\n%6d articles were rejected as having bad headers (no Date:, etc)\n", bad_header_count
			for ( source in bad_header_source )
				printf "    %6d came from %s\n", bad_header_source[ source ], source
		}
		#
		#  List those header fields and origins which were noted as
		#  being empty.  Since the further spread of pedantic C news will
		#  eventually eliminate these, we keep silent if everything is OK.
		#
		if ( empty_header_count > 0 )
		{
			printf "\n%6d articles were rejected as having empty headers\n", empty_header_count
			for ( field in empty_header_field )
				printf "    %6d entries had empty \"%s\" fields\n", empty_header_field[ field ], field
			for ( source in empty_header_source )
				printf "    %6d entries came from %s\n", empty_header_source[ source ], source
		}
		#
		#  Date rejections.  Since the further spread of pedantic C news will
		#  eventually eliminate these, we keep silent if everything is OK.
		#
		if ( bad_date_count > 0 )
		{
			printf "\n%6d articles were rejected as having unparseable dates:\n", bad_header_count
			for ( source in bad_date_source )
				printf "    %6d came from %s\n", bad_date_source[ source ], source
		}
		#
		#  These are article which are dated too far in the future.
		#
		if ( future_count > 0 )
		{
			printf "\n%6d articles were rejected as dated too far in the future:\n", future_count
			for ( source in future_source )
				printf "    %6d came from %s\n", future_source[ source ], source
		}
		#
		#  Time rejects -- articles which are just too damned old.  Probably
		#  signs of a news loop.
		#
		if ( oldies_count > 0 )
		{
			printf "\n%6d articles were rejected as dated too far in the past:\n", oldies_count
			for ( source in oldies_source )
				printf "    %6d came from %s\n", oldies_source[ source ], source
		}
		#
		#  The fun stuff.  This is where we report unrecognizable things.
		#  Data reported here is either meat for future mods to this script
		#  or indication of bugs in the news software.
		#
		if ( unknown_no_count > 0 )
		{
			printf "\n%6d articles were rejected with an unrecognised comment\n", unknown_no_field
			printf "about \"no such-and-such\" in the entry.  Those rejections were:\n"
			for ( no_field in unknown_no_field )
				printf "   %s had the reason: %s\n", unknown_no_field[ no_field ], no_field
			max_unknown = '$MAX_UNKNOWN'
			printf "Here is up to %d of the entries:\n", max_unknown
			for ( no_field in unknown_no_entry )
				if ( max_unknown-- > 0 )
					printf "  %s\n", unknown_no_entry[ no_field ]
				else
					break
		}
		#
		if ( nopath_count > 0 )
		{
			printf "\n%6d articles were rejected with a missing Paths header.\n", nopath_count
			max_nopath = '$MAX_NOPATH'
			printf "Here are up to %d of the entries:\n", max_nopath
			for ( nopath in nopath_entry )
				if ( max_nopath-- > 0 )
					printf "   %s\n", nopath_entry
				else
					break
		}
		#
		if ( mystery_dash_count > 0 )
		{
			printf "\n%6d articles were rejected with an unrecognised \"-\" field.\n", mystery_dash_count
			max_mystery_dash = '$MAX_MYSTERY_DASH'
			printf "Here are up to %d of the entries:\n", max_mystery_dash
			for ( mystery_dash in mystery_dash_entry )
				if ( max_mystery_dash-- > 0 )
					printf "   %s\n", mystery_dash
				else
					break
		}
		if ( mystery_count > 0 )
		{
			printf "\nThere were %d entries which were correctly formatted but with tag\n", mystery_count
			printf "fields that were not recognized.  Those tag fields and frequency were:\n"
			tag_count = 0
			for ( mystery in mystery_tags )
				tag_count++
			for ( mystery in mystery_tags )
			{
				if ( tag_count > 1 )
					printf "     \"%s\" (%d),", mystery, mystery_tags[ mystery ]
				else
					printf "     \"%s\" (%d)\n", mystery, mystery_tags[ mystery ]
				tag_count--
			}
			mystery_max='$MAX_MYSTERY'
			printf "Here are up to %d of the entries:\n", mystery_max
			for ( mystery in mystery_entry )
				if ( mystery_max-- > 0 )
					printf "   %s\n", mystery
				else
					break
		}
		#
		#  Report on malformed lines in the log.  Data reported here is
		#  almost certianly a bug in the news software.
		#
		defect_max = '$MAX_DEFECTS'
		if ( defect_count > 0 )
		{
			printf "\nThere were %d entries in the log with too few fields.  Here is a sample:\n", defect_count
			for ( defect in defective_entry )
				if ( defect_max-- > 0 )
					printf "   \"%s\"\n", defect
				else
					break
		}
	}
	#
	#  If nothing was wrong, print a nice reassuring message.
	#
	if ( ( mystery_count == 0 ) && ( mystery_dash_count == 0 ) && ( defect_count == 0 ) )
		printf "\nNo defects or unrecognized entries were found in the standard log.\n"
}'
fi
/usr/bin/mail -s "News Log Report for $DATE" $NEWSMGR < $TEMPFILE
