pubvodoolog.pl

download


#!/usr/bin/perl -w
#
#  pubvodoolog.pl v. 0.01
#
#  ***********************************************************
#  *
#  *  This is a perl script that  publishes  voodoo. chat logs
#  *  and strips them from IP addr. This is accomplished by
#  *  reading log files from chat log directory, trip them from IPs
#  *  and writing log files to a web-accessible directory.
#  *  After a daily log was processed, script updates index.html
#  *  file in the log directory with a link for a new file.
#  *  <BR><A HREF="2005-01-01.log.html">2005-01-01</a>
#  *
#  *  pubvodoolog.pl http://konstantin.antselovich.com/perl/
#  *  Voodoo Chat     http://vochat.com/index.php
#  *
#  *  Copyright (c) Konstantin Antselovich 2004-2005
#  *  [email protected] konstantin.antselovich.com
#  *
#  *  This program is free software; it may be used, copied, modified
#  *  and distributed under the terms of the GNU General Public License,
#  *  either version 2, or (at your option) any later version.
#  *
#  ***********************************************************

# -- define directories for store and publish voodoo chat logs
#    and some other parameters

my $LOG_PUB_DIR = '/var/www/test/chatlogs';
my $LOG_DATA_DIR = '/var/www/test/data/logs';
my $PUB_DIR_INDEX_F = $LOG_PUB_DIR . "/index.html";
my $PUB_DIR_INDEX_F_TMP = $LOG_PUB_DIR . "/index.html.tmp";

# -- get the list of new log files and update the
#    index of chatlogs directory

sub get_new_log_dates {

	# -- glob the data directory and get list of files
        @logfiledates = glob( "$LOG_DATA_DIR/*.log" ); 

        # -- delete file path and ".log" from file names to get the list of dates
        foreach $file ( @logfiledates ) {

       		$file =~ s/^\/var\/www\/test\/data\/logs\///;
                $file =~ s/\.log//;
        }

        # -- read index file of log publish dir
 	open (INDEX_FILE, $PUB_DIR_INDEX_F) || die "cannot read $PUB_DIR_INDEX_F: $!";
	while (<INDEX_FILE>) {

                # -- read file line by line and see if we have a date line
                if ( /\d{4}-\d{1,2}-\d{1,2}/ ) {

		        # -- strip string from stuff to get a date
   			s/<BR><A HREF="//;
                	s/\d{4}-\d{1,2}-\d{1,2}//;
                	s/\.log\.html">//;
                	s/<\/a>//;

			# -- check if we have publish that log and delete that date from
                        #    the list

                        foreach $date (@logfiledates) {
                         	if ( /$date/ ) {
                               	 	$date = '';
				}

                        } 

                } 

        }
        close (INDEX_FILE) || die "Cannot close $PUB_DIR_INDEX_F: $!";  

  return @logfiledates;
}

sub parse_log {
	# -- check if we got file name from command line
	if ( !@_ ) {
	  print "usage: ./parselog <log file name>\n";
	} else {
	    $date = $_[0];

  	    $logfile = "$LOG_DATA_DIR/$date.log";
            $publogfile = "$LOG_PUB_DIR/$date.log.html";

		# -- open input and output file and die on errors
	    open (FILE_IN, $logfile) || die "cannot read $logfile : $!";
	    open (FILE_OUT, ">$publogfile" ) || die "cannot write to $publogfile : $!";

	    # -- read input file
	    while (<FILE_IN>) {
	       # -- copy a line from input file to a variable $sting
	       $string = $_;

	       # -- substitute IP address with "-IP-"

	       $string =~ s/\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/-IP-/;

	       # -- add <BR> at the beginning of the line
	       $string =~ s/\d{2}:\d{2}:\d{2}/<BR>$&/;

	       # -- write output file line by line
	       print FILE_OUT $string; 

	    } 

	   close (FILE_IN) || die "cannot close $logfile : $!";
	   close (FILE_OUT) || die "cannot close $publogfile : $!";
	}
}
#
# -- main procedure

# -- get dates for log that we have not publish yet and store them in to a list
my @unpublogdates = get_new_log_dates();

open (INDEX_FILE, "$PUB_DIR_INDEX_F") || die "Cannot open $PUB_DIR_INDEX_F: $!\n";
open (INDEX_FILE_TMP, ">$PUB_DIR_INDEX_F_TMP") || die "Cannot open $PUB_DIR_INDEX_F_TMP: $!\n";

while (<INDEX_FILE>) {

	# -- read file line by line
        if (/END_OF_LIST/) {

		 #-- delete the old END_OF_LIST
                s/END_OF_LIST/updated/;
                print INDEX_FILE_TMP $_;

                # -- strip new logs from IP addresses, save 'em in to pub directory and update
		#    the directory's index

		foreach $date ( @unpublogdates ) {
			if ($date) {
                        	parse_log( $date );
				print INDEX_FILE_TMP "<BR><A HREF=\"$date.log.html\">$date</a>\n";
			}
                }

		 # -- print new END_OF_LIST
                print INDEX_FILE_TMP "<!-- END_OF_LIST //-->\n";

        }

	print INDEX_FILE_TMP $_;
}
close(INDEX_FILE) || die "Cannot close $PUB_DIR_INDEX_F: $!\n";
close(INDEX_FILE_TMP) || die "Cannot close $PUB_DIR_INDEX_F_TMP: $!\n";

# -- and finally rename index.html.tmp to index.html
rename ($PUB_DIR_INDEX_F_TMP, $PUB_DIR_INDEX_F) || die "Cannot rename: $!\n";

# -- handle CGI stuff - redirect to index.html when called from the web 

use CGI;
$query = new CGI;
print $query->redirect("index.html");

# -- this is it :-)
exit(0);