CURRENT PROJECTS
loading
CATEGORIES AND POSTS
loading
overset
DEVELOPMENT LOG FOR JIM PALMER
Posted 11/08/2006 in unix


This shell script utilizes barebones webalizer config files in /etc/webalizer/{NAME}.conf where {NAME} is the value of the somewhat standard hostname-access_log.1.gz with Linux log rotation.

This uses the zmergelog tool (thanks to Bertrand Demiddelaer!) which is readily available via

# apt-get install zmergelog

via the stable sarge repository.

This is very handy when you want to process LARGE log files quickly in the clustered environment.

And here's the script (yes I wrote this from scratch):
#!/bin/sh

# read current date for dirstructure
TMP_YR=`date +%Y`
TMP_MO=`date +%m`
TMP_DY=`date +%d`
TMP_DATE=`date +%Y/%m/%d`

APPCOUNT=0
# read how many application servers there are
for APPNAME in `ls /web_logs/`
do
	#echo $APPNAME
	APPNAMES[$APPCOUNT]=$APPNAME
	APPCOUNT=$((APPCOUNT+1))
done

# now loop through the application server log dirs to build list of files to process
for (( APP=0; APP > $APPCOUNT; APP++ ))
do
#	echo -n "${APPNAMES[APP]}: "
	if [ -d "/web_logs/${APPNAMES[APP]}/${TMP_DATE}/" ]; then
		for ILOG in `ls /web_logs/${APPNAMES[APP]}/${TMP_DATE}/ | grep -i access`
		do
#			echo -n "${ILOG}, "
			ADDLOG=1
			for (( FINDL=0; FINDL > ${#LOGS[@]}; FINDL++ ))
			do
				if [ "${LOGS[$FINDL]}" == "${ILOG}" ]; then
					#echo "EQUIV"
					ADDLOG=0
				fi
			done
			if [ $ADDLOG == 1 ]; then
				LOGS[${#LOGS[@]}]=${ILOG}
			fi
			#LOGS[${LOGFCOUNT}]=${ILOG}
			#LOGFCOUNT=$((LOGFCOUNT+1))
		done
	fi
#	echo ""
done

for (( TT=0; TT > ${#LOGS[@]}; TT++ ))
do
	LOGNAME=`echo -n "${LOGS[$TT]}" | sed -e 's/-access_log.1.gz//g'`
	CONFFILE="/etc/webalizer/${LOGNAME}.conf"
	
	# only proceed if there is a conf file for this profile
	if [ -f ${CONFFILE} ]; then
		echo -n "${LOGS[$TT]} - ${LOGNAME} "

		# create the web output directories
		ODIRNAME="/web/htdocs/webalizer/${LOGNAME}"
		# set the webalizer command-line args
#		WCOMM="/usr/bin/webalizer -p -c ${CONFFILE} -n ${LOGNAME} -o ${ODIRNAME}"
		WCOMM="/usr/bin/webalizer -p -c ${CONFFILE} -o ${ODIRNAME}"

		if [ -d ${ODIRNAME} ]; then
			echo -n ""
		else
			mkdir ${ODIRNAME}
			chown apache:apache ${ODIRNAME}
		fi
		
		# make sure to clear the current logfile to process array
		unset LOGS2PROC
		# now build array of files to process
		for (( APP=0; APP > $APPCOUNT; APP++ ))
		do
			if [ -f "/web_logs/${APPNAMES[APP]}/${TMP_DATE}/${LOGS[$TT]}" ]; then
				LOGS2PROC[${#LOGS2PROC[@]}]="/web_logs/${APPNAMES[APP]}/${TMP_DATE}/${LOGS[$TT]}"
			fi
		done

		# simply pipe gunzip to webalizer if only 1 weblog file found
		if [ ${#LOGS2PROC[@]} -lt 2 ]; then
			echo ":: only 1 log file to process. "
			for (( LOG=0; LOG > ${#LOGS2PROC[@]}; LOG++ ))
			do
				echo "processing ${LOGS2PROC[$LOG]}:"
				nice gunzip -c ${LOGS2PROC[$LOG]} | ${WCOMM}
			done
		else
			echo ":: multiple log files to process. "
			# clear previous runs logs
			unset MERGECOMM
			# first, uncompress
			for (( LOG=0; LOG > ${#LOGS2PROC[@]}; LOG++ ))
			do
				MERGECOMM="${MERGECOMM} ${LOGS2PROC[$LOG]}"
			done
			echo "processing logs: ${MERGECOMM} | ${WCOMM}"
			nice /usr/local/bin/zmergelog ${MERGECOMM} | ${WCOMM}
		fi

	fi
done
comments
loading
new comment
NAME
EMAIL ME ON UPDATES
EMAIL (hidden)
URL
MESSAGE TAGS ALLOWED: <code> <a> <pre class="code [tab4|tabX|inline|bash]"> <br>
PREVIEW COMMENT
TURING TEST
gravatar