#!/usr/bin/perl # # deepLog: # reports on deep visits to ABL's websites # correlates Apache and RealServer access logs for visits to Ariadne's Thread # uses aplogs-script files: /* script included at end */ # al.deepIP as starting point (list of deep visitors) # al.extract for Apache details on visit # also uses rl.ext for RealServer details # # I made this. -- prie@abl.com, 10/15/99 # # last modified: 4/2/2000 # # excerpt from report: # # 64.14.66.100: ghost.directhit.com # 07/Apr/2000 15:54:56 -0400 /robots.txt # 07/Apr/2000 16:15:45 -0400 / # 07/Apr/2000 19:02:47 -0400 /opt/PlayingCards/index.html # 07/Apr/2000 19:17:13 -0400 /The_Wireless/index.html # 07/Apr/2000 19:24:45 -0400 /at/city/index.html # 07/Apr/2000 19:37:35 -0400 /at/aas/index.html # 07/Apr/2000 20:09:06 -0400 /The_Wireless/iCryoLife/index.html # 07/Apr/2000 20:53:52 -0400 /The_Wireless/iBATech/index.html # 07/Apr/2000 21:02:07 -0400 /opt/ExpecTk/index.html # 07/Apr/2000 21:14:23 -0400 /at/dotties/index.html # 07/Apr/2000 22:38:37 -0400 /The_Wireless/BauHaus/index.html # 07/Apr/2000 22:58:27 -0400 /The_Wireless/iLHS/index.html # 07/Apr/2000 21:02:50 -0400 /at/Dotties/robots.txt # 07/Apr/2000 22:18:34 -0400 /at/Dotties/ # # use Socket; # for DNS lookups # open /tmp/al.deepIP, IPs of visitors to Ariadne's Thread, The_Wireless, etc. # store in a hash, initialized with result from DNS lookup # (NB: the hash is a hash of array references) # %deepData = (); open(APLOG, "/tmp/al.deepIP"); while () { ($visitorIP) = /^(\S+)$/; @{ $deepData{$visitorIP} } = scalar( gethostbyaddr(inet_aton($visitorIP), AF_INET) ); } close(APLOG); # open /tmp/al.extract, Apache log w/o headlines/rules, local, and HEAD requests # read line-by-line, as described by regular expression # if a line pertains to a previously saved IP (i.e, a deep visitor), then # save info on visit # open(APLOG, "/tmp/al.extract"); while () { ($clientIP, $username, $passwd, $date, $time, $tzone, $requestMethod, $fileURL, $protocol, $status, $bytes) = /^(\S+) (\S+) (\S+) \[([^:]+):(\d+:\d+:\d+) ([^\]]+)\] "(\S+) (.*?) (\S+)" (\S+) (\S+)$/; # (after Perl Cookbook, p. 727) if (exists($deepData{$clientIP})) { if ($fileURL =~ m/mp3/) { $visitInfo = "$date $time $tzone $fileURL $bytes"; } else { $visitInfo = "$date $time $tzone $fileURL"; } push( @{$deepData{$clientIP}}, $visitInfo ); } } close(APLOG); # open simplified RealServer access log (local accesses removed) # read input line-by-line # remove "][", which separates Stat fields # describe a typical line by relevant fields # if Apache visitor, save: # date, file, bytes, minutes, bitrate, good resends, bad resends # RealAudio stats # else save separately # %unexpectedData = (); open(RLOG, "/tmp/rl.ext"); while ($line = ) { $line =~ s/\]\[/ /; $_ = $line; ($clientIP, $dateTime, $fileURL, $bytesTX, $stats, $seconds, $goodRX, $badRX) = /(.*) - - \[(.*)\] "GET (.*) .*" \d+ (\d+) \[.*\] \[.*\] \[(.*)\] \d+ \d+ (\d+) (\d+) (\d+)/; $mins = int($seconds / 6) / 10; # sec/60 formatted (1 decimal place) if ($seconds > 0) { $kbps = int(0.08 * $bytesTX / $seconds) / 10; # formatted ... } else { $kbps = 0; } $realInfo = "$dateTime $fileURL $bytesTX ${mins}m ${kbps}x $goodRX $badRX"; if ($stats ne "UNKNOWN") { $_ = $stats; /Stat1:(.*)Stat2:(.*bps)/; $realInfo = $realInfo."\n".$1; $realInfo = $realInfo."\n".$2; } if (exists($deepData{$clientIP})) { push( @{$deepData{$clientIP}}, $realInfo ); } else { if (!exists($unexpectedData{$clientIP})) { # initialize hash entry @{ $unexpectedData{$clientIP} } = scalar( gethostbyaddr(inet_aton($clientIP), AF_INET) ); } push( @{$unexpectedData{$clientIP}}, $realInfo ); # update } } close(RLOG); # unravel the data structure and generate report # foreach $visitorIP (sort keys %deepData) { @visitMap = @{$deepData{$visitorIP}}; print "$visitorIP: $visitMap[0]\n"; # IP and hostname for ($i=1; $i < @visitMap; $i++) { print " $visitMap[$i]\n"; } } # print "\n\naccessed RealServer but not Apache:\n\n"; foreach $visitorIP (sort keys %unexpectedData) { @visitMap = @{$unexpectedData{$visitorIP}}; print "$visitorIP: $visitMap[0]\n"; # IP and hostname for ($i=1; $i < @visitMap; $i++) { print " $visitMap[$i]\n"; } } exit; #!/bin/csh # # aplogs: # generate a report from Apache access/error logs (stdout) # for each server, keep several files for further analysis # al.extract - access.log w/o headlines/rules, local, and HEAD requests # al.deepIP - IP addresses of "deep" visitors # el - error.log sorted by file-requested, w/ tighter line lengths, # and w/o common errors # # last modified: 4/2/2000 # cd /tmp # www.abl.com: # extract needed info # remove headlines/rules, local, and HEAD requests # save as al.extract # fgrep -v Pages /var/log/apache/access.log.0 | \ fgrep -v $WAN_IP_ETH0 | fgrep -v $LAN_IP_ETH1 | \ fgrep -v HEAD > al.extract # dotties.abl.com: # extract as above # add, with unique markings, to the above # fgrep -v Pages dotties.access.log.0 | \ fgrep -v $WWW_IP | \ fgrep -v HEAD > dotties.al.extract # sed 's/ \// \/at\/Dotties\//' dotties.al.extract >> al.extract # filter IP and file-request info and save (temporarily) as al.ip-request # awk '{printf "%15s %s\n", $1, $7}' al.extract > al.ip-request # count number of unique IP addresses # echo -n 'unique visitors:' awk '{print $1}' al.ip-request | sort | uniq | wc -l # count number of sticky IP addresses # (same as above, but just for "deep" pages) # echo -n ' deep visitors:' fgrep Wireless al.ip-request > al.sticky grep ' \/at\/' al.ip-request >> al.sticky grep ' \/opt\/' al.ip-request >> al.sticky grep ' \/bob\/' al.ip-request >> al.sticky awk '{print $1}' al.sticky | sort | uniq > al.deepIP cat al.deepIP | wc -l # rm al.sticky # Top 20: count number of requests per file # don't count nonexistant pages # count */ as */index.html # echo -n ' hits per page:' awk '{print $2}' al.ip-request | \ fgrep -v team | \ fgrep -v forum | \ sed 's/\/$/\/index\.html/' | \ sort | uniq -c | sort -nr | head -20 # rm al.ip-request # concatenate error logs # cp /var/log/apache/error.log.0 els sed 's/ \// \/at\/Dotties\//' dotties.error.log.0 >> els # # count number of errant visitors # include common errors in count but don't save for review # echo -n 'errant visitors:' awk '{print $8}' els | sort | uniq | wc -l # # remove common errors, tighten line lengths, and sort by file-requested # save for review # fgrep -v team els | \ fgrep -v forum | \ sed 's/error\] \[client //' | \ sort +10 > el