#!/usr/bin/perl -ws # Created by Ben Okopnik on Thu Jun 28 09:11:52 EDT 2007 # # Copyright (C) 2007 Ben Okopnik # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. =pod ################################## Changelog ############################## 07/28/07 0:41 - v2.2 * Added 'jwhois' as the preferred option, with a warning if it's not installed. Caching for 'jwhois' is disabled when '-X' is in effect; '-H' is no longer a hard-wired argument to 'whois' ('jwhois' doesn't support it), but is still appended if 'whois' is used. * Tweaked a couple of the regexen to process new TLDs (.fi, .ly, etc.) * Giving serious thought to modifying the format of the -F files; it would be nice to be able to specify the whois server for individual domains. 07/20/07 9:36 - v2.1 * Added a bunch of tracing/debugging statements to the date parser, making the '-X' option much more useful * Built a 'switch-case' structure around the parser so that only one regex would apply to any given host * Added a '-H' argument to 'whois' ("elide legal disclaimer") to make debugging output less annoying (and maybe speed things up fractionally) * Made the 'no expiration date found' error into a non-fatal warning (used to break list processing) * Modified the output format slightly (warnings now appear on the same line as the domain name) * Domains without a registrar will no longer be omitted from the mailed notifications 07/19/07 22:28 - v2.0 * Now parsing .ci domains as well (millions of people cheer, world peace can't be far away now...) 07/19/07 20:54 - v1.9 * Added a little regex-fu to accept lines that have whitespace at the end * Added a Big Sekrit Option ('-X' - shhh, don't tell anybody!) for debugging 07/19/07 11:56 - v1.8 * Lots and lots of fixes for many different TLDs; much mangling of regexen. Now handles many more expiration date types than before. Most importantly, domains that don't list a registrar will now be displayed anyway; people probably know where to send their money, but not necessarily _when._ 07/04/07 12:28 - v1.7 * Scrapped previous approach to the .org delay; the .orgs are now sorted to the end of the domain list and all except the first one wait 20 seconds. * Added a cute little time ticker to the delay routine, just because. :) 07/03/07 1:27 - v1.6 * Added a rate limiter (3/minute) for .org domains 06/30/07 18:34 - v1.5 * Added a "domain not parseable; please report" warning * Added an "Unable to read 'whois' info" warning for the 'fgets: connection reset by peer' error. * All expiration warnings are now sent as one email instead of one per domain; ditto the expired domains notifications. * The 'printf' for the 'SKIPPED' error was ignoring the '-q' option; fixed 06/30/07 8:19 - v1.4 * Removed dependency on File::Find; searching PATH 'manually' * Added an 'exit 1' to the silent failure mode of 'croak' 06/30/07 7:06 - v1.3 * Improved the date-parsing regexes (the numerical months part can now only match '01-12' instead of 'any two digits'); this should increase the reliability of resolving 'dd-mm-yyyy' vs. 'mm-dd-yyyy' somewhat. * More accurate reporting for the 'SKIPPED' error (now shows exact reason) * Fixed the regexes that I screwed up while adding the Dotster extension * Added a '-v' option 06/29/07 18:54 - v1.2 * Got rid of an unnecessary system dependency ('which') - 'File::Find' is a bit clunky, but better than depending on unknowns... * Another date-processing regex (ISOC-IL: 'validity: 29-06-2007') 06/29/07 17:07 - v1.1 * Modified output format to include both exp. date and days remaining * Added another date-processing regex (DOTSTER: 'Expires on: 29-Jun-07') 06/29/07 15:06 - v1.0 I'm finally willing to admit that this script is usable. :) Recent changes include: * Parsing routine for "2007/08/12" date format * 'croak' notifies admin of problems encountered in silent mode * Added a fallback email address for 'croak' * Fixed GMT parsing routine miscalc (thanks to Rick Moen for the heads up) ########################################################################### =cut use strict; use Time::Local; $|++; # Command-line variables our ($d, $e, $F, $h, $q, $s, $v, $x, $X); ### FALLBACK ADDRESS FOR NOTIFICATION ############ my $address = 'root@localhost'; ################################################## my ($name) = $0 =~ /([^\/]+)$/; my $usage =<<"+EoT+"; Usage: $name [-e=email] [-x=expir_days] [-q] [-h] <-d=domain_name|-F=domainfile> -d=domain : Domain to analyze -e=email_address : Send a warning message by email -F=domain_list : File with a list of domains, one per line -h : Print this message -q : Don't print to the console (REQUIRES '-e' OPTION) -s=whois server : Use alternate whois server -v : Display current version of this script -x=days : Change default (30d) expiration interval (REQUIRES '-e' OPTION) +EoT+ # Locate 'whois' or (preferred) 'jwhois' my ($whois) = grep -e, map "$_/jwhois", split /:/, $ENV{PATH}; ($whois) = grep -e, map "$_/whois", split /:/, $ENV{PATH} unless $whois; die "'whois' not found in path.\n" unless $whois; if ($whois =~ m#/whois$#){ $q || print "You really should install 'jwhois'; it gives better results.\n"; # Turn down the noise (minimal output option - only works with 'whois') $whois .= " -H"; } else { # Turn off caching for 'jwhois' if the debug option is on $whois .= " -f" if $X; } # Find a mail client (mutt or mailx) my ($mail) = grep -e, map "$_/mutt", split /:/, $ENV{PATH}; # Switch Mutt into 'mailx' mode if found if ($mail){ $mail .= " -x"; } else { ($mail) = grep -e, map "$_/mailx", split /:/, $ENV{PATH}; } die "No 'mailx' or 'mutt' (mail client) found in path.\n" unless $mail; # Read the version number at the top of the changelog if ($v){ seek DATA, 0, 0; while (){ if (m[^\d+/\d+/\d+[^v]+v([0-9.]+)]){ print "Version: $1\nCopyright (C) 2007 Ben Okopnik \n\n"; exit 0; } } } # Email admin if '-q' is on; otherwise, just exit with the error sub croak { if ($q){ # If '-e' wasn't specified, use the fallback address $e ||= $address; # No place to send an error if this fails... :) open Mail, "|$mail -s 'WARNING: $name script error' $e"; print Mail "$name [" . localtime() . "]: ", $_[0]; close Mail; exit 1; } else { die $_[0]; } } # Display the help output if requested or in case of incorrect usage die "$usage\n" if $h; die "\n*ERROR: '$name' requires an email address with the '-q' and the '-x' options*\n\n$usage" if ($q || $x) && ! $e; die "\n*ERROR: '$name' requires either a domain name or a domain list as an argument*\n\n$usage" if ! $d && ! $F; # Set default notification interval to 30 days if ($x){ croak "Expiration interval must be specified in days (0-9999).\n" unless $x =~ /^\d{1,4}$/; } else { $x = 30; } # Add the server to the "whois" command if it's been specified $whois .= " -h $s" if $s; # Read the domain list file my @domains; if ($F){ croak "$F is not a regular file\n" unless -f $F; croak "Can't read $F\n" unless -r _; # Open the file if it exists open F or croak "$F: $!\n"; while (){ # Skip blank lines; ignore comments next if /^\s*(?:#|$)/; # Strip preceding and following blanks s/^\s*(.*?)\s*$/$1/; # Strip URI method and any terminal '/'s s#^.*://##; s#/$##; push @domains, $_; } close F; } # Having a '-F' AND a '-d' is explicitly not excluded if ($d){ # Strip URI method and any terminal '/'s $d =~ s#^.*://##; $d =~ s#/$##; push @domains, $d; } # Sort list to push .orgs to the end; ASCIIbetical sort otherwise @domains = sort { ($a =~ /\.org$/i) <=> ($b =~ /\.org$/i) || $a cmp $b } @domains; # Trim strings to specified length; return '**UNKNOWN**' if undef sub trim { defined $_[0] || return "**UNKNOWN**"; substr($_[0], 0, $_[1]); } # Lookup list for month number->name conversion my (%mth,%mlookup); @mth{map sprintf("%02d", $_), 1..12} = qw/jan feb mar apr may jun jul aug sep oct nov dec/; # Lookup list for month name->abbrev conversion @mlookup{qw/January February March April May June July August September October November December/} = qw/jan feb mar apr may jun jul aug sep oct nov dec/; ########################## DATA COLLECTION SECTION ############################# # Process the domain list my ($seen, %list); for my $host (@domains){ $q || print "\b\nProcessing $host... "; # Delay to avoid triggering rate limiter if ($host =~ /\.org$/i){ $q || print "(NOTE: Subsequent ORG queries will be delayed by 20 seconds each due to rate limiting) " unless $seen; # Show the cute little time ticker :) if ($seen++){ my @chars = split //, '|/-\\'; for (0 .. 19){ $q || print $chars[$_ % 4], "\b"; sleep 1; } print " \b"; } } # Execute the query my $out; open Who, "$whois $host|" or croak "Error executing $whois: $!\n"; { # Read in the entire output of 'whois' as a single string local $/; $out = ; } close Who; # 'fgets: connection reset by peer' - bloody annoying response! if (!$out || $out !~ /domain/i){ $q || print "Unable to read 'whois' info for $host. Skipping... "; next; } # Freak out and run away if there's no match if ($out =~ /no match/i){ $q || print "No match for $host!\n"; next; } # Ditto for bad hostnames if ($out =~ /No whois server is known for this kind of object/i){ $q || print "'whois' doesn't recognize this kind of object. "; next; } # Convert multi-line 'labeled block' output to 'Label: value' my $debug; if ($out =~ /registrar:\n/i){ $out =~ s/:\n(?!\n)/: /gsm; $debug .= "matched on line " . (__LINE__ - 2) . ": Multi-line 'labeled block'\n"; } # Date preprocessing. Desired date format is '29-jun-2007' # 'Fri Jun 29 15:16:00 EDT 2007' if ($out =~ s/(date:\s*| on:\s*)[A-Z][a-z]+\s+(...)\s+(\d+).*?(\d+)\s*$/$1$3-$2-$4/igsm){ $debug .= "matched on line " . (__LINE__ - 2) . ": 'Fri Jun 29 15:16:00 EDT 2007'\n"; } # '29-Jun-07' elsif ($out =~ s/(date:\s*| on:\s*)(\d{2})[\/ -](...)[\/ -](\d{2})\s*$/$1$2-$3-20$4/igsm){ $debug .= "matched on line " . (__LINE__ - 2) . ": '29-Jun-07'\n"; } # '2007-Jun-29' elsif ($out =~ s/[^\n]*(?:date| on|expires on\.+):\s*(\d{4})[\/-](...)[\/-](\d{2})\.?\s*$/Expiration date: $3-$2-$1/igsm){ $debug .= "matched on line " . (__LINE__ - 2) . ": '2007-Jun-29'\n"; } # '2007/06/29' elsif ($out =~ s/(expires:\s*|date\s*:\s*| on:\s*)(\d{4})(?:[\/-]|\. )(0[1-9]|1[0-2])(?:[\/-]|\. )(\d{2})\.?\s*$/$1$4-$mth{$3}-$2/igsm){ $debug .= "matched on line " . (__LINE__ - 2) . ": '2007/06/29'\n"; } # '29-06-2007' elsif ($out =~ s/(?:validity:|expir(?:y|ation) date:|expires (?:on [^:]+:?|at:))\s*(\d{2})[\/.-](0[1-9]|1[0-2])[\/.-](\d{4})\s*[0-9:.]*\s*\w*\s*$/Expiration date: $1-$mth{$2}-$3/igsm){ $debug .= "matched on line " . (__LINE__ - 2) . ": '29-06-2007'\n"; } # '[Expires on] 2007-06-29' (.jp, .ru) elsif ($out =~ s/(?:valid-date|expiration date:|paid-till:|\[expires on\]|expires on :|expired:)\s*(\d{4})[\/.-](0[1-9]|1[0-2])[\/.-](\d{2})\s*[0-9:.]*\s*\w*\s*$/Expiration date: $3-$mth{$2}-$1/igsm){ $debug .= "matched on line " . (__LINE__ - 2) . ": '[Expires on] 2007-06-29' (.jp, .ru)\n"; } # 'expires: June 29 2007' (.is) elsif ($out =~ s/expires:\s*([A-Z][a-z]+)\s+(\d{1,2})\s+(\d{4})\s*$/"Expiration date: " . sprintf("%02d", $2) . "-$mlookup{$1}-$3"/iegsm){ $debug .= "matched on line " . (__LINE__ - 2) . ": 'expires: June 29 2007' (.is)\n"; } # 'renewal: 29-June-2007' (.ie) elsif ($out =~ s/renewal:\s*(\d{1,2})[\/ -]([A-Z][a-z]+)[\/ -](\d{4})\s*$/Expiration date: $1-$mlookup{$2}-$3/igsm){ $debug .= "matched on line " . (__LINE__ - 2) . ": 'renewal: 29-June-2007' (.ie)\n"; } # 'expire: 20080315' (.cz, .ke) elsif ($out =~ s/expir[ey]:\s*(\d{4})(\d{2})(\d{2})\s*$/Expiration date: $3-$mth{$2}-$1/igsm){ $debug .= "matched on line " . (__LINE__ - 2) . ": 'expire: 20080315' (.cz, .ke)\n"; } # 'domain_datebilleduntil: 2007-06-29T00:00:00+12:00' (.nz) elsif ($out =~ s/domain_datebilleduntil:\s*(\d{4})[-\/](\d{2})[-\/](\d{2})T[0-9:.+-]+\s*$/Expiration date: $3-$mth{$2}-$1/igsm){ $debug .= "matched on line " . (__LINE__ - 2) . ": 'domain_datebilleduntil: 2007-06-29T00:00:00+12:00' (.nz)\n"; } # '29 Jun 2007 11:58:42 UTC' (.coop) elsif ($out =~ s/((?:date|expires):\s*)(\d{2})[\/ -](...)[\/ -](\d{4})\s*[0-9:.]*\s*\w*\s*$/$1$2-\L$3\E-$4/igsm){ $debug .= "matched on line " . (__LINE__ - 2) . ": '29 Jun 2007 11:58:42 UTC' (.coop)\n"; } # 'Record expires on 17/8/2100' (.hm, fi) elsif ($out =~ s/(?:expires(?: on|:))\s*(\d{2})[\/.-]([1-9]|0[1-9]|1[0-2])[\/.-](\d{4})\s*[0-9:.]*\s*\w*\s*$/"Expiration date: $1-".$mth{sprintf "%02d", $2} . "-$3"/iegsm){ $debug .= "matched on line " . (__LINE__ - 2) . ": 'Record expires on 17/8/2100' (.hm)\n"; } else { $debug = "No regexes matched.\n"; } # Collect the data from each query for (split /\n/, $out){ # Clip pre- and post- blanks s/^\s*(.*?)\s*$/$1/; # Squash repeated tabs and spaces tr/ \t//s; # This is where it all happens - regexes to capture registrar and expiration $list{$host}{Registrar} ||= $1 if /(?:authorized agency|registrar)(?:\s*|_)(?:name|id)?:\s*(.*)$/i; $list{$host}{Expires} ||= $1 if /(?:expires(?: on)?|expir(?:e|y|ation) date\s*|renewal(?:[- ]date)?)[:\] ]\s*(\d{2}-[a-z]{3}-\d{4})/i; # print "Registrar: $list{$host}{Registrar}\nExpires: $list{$host}{Expires}\n"; } # Assign default message if no registrar was found $list{$host}{Registrar} ||= "[[[ No registrar found ]]]"; $q || print "No expiration date found in 'whois' output. Please report this domain to the author!" unless defined $list{$host}{Expires}; # Debug option (activated by '-X'); exits here with parsed 'whois' output $debug .= "Registrar: $list{$host}{Registrar}\n" if defined $list{$host}{Registrar}; $debug .= "Expires: $list{$host}{Expires}\n" if defined $list{$host}{Expires}; die "\n", "=" x 70, "\n$out", "=" x 70, "\n$debug", "=" x 70, "\n" if $X; } ########################## DATA ANALYSIS SECTION ############################# # Get current time snapshot in UTC my $now = timegm(gmtime); # Convert dates to UTC epoch seconds; *will* fail on 19 Jan 2038. :) my %months; @months{qw/jan feb mar apr may jun jul aug sep oct nov dec/} = 0..11; # Print the header if '$q' is off and there's content in %list $q || %list && printf "\n\n%-24s%-36s%s\n%s\n", "Host", "Registrar", "Exp.date/Days left", "=" x 78; # Process the collected data my (%exp, %end); for my $k (sort keys %list){ unless (defined $list{$k}{Expires}){ $q || printf "%-32s%s\n", trim($k, 31), "*** SKIPPED (missing exp. date) ***"; delete $list{$k}; next; } my @chunks = split /-/, $list{$k}{Expires}; my $epoch = timegm(0, 0, 0, $chunks[0], $months{lc $chunks[1]}, $chunks[2] - 1900); my $diff = int(($epoch - $now) / 86400); $q || printf "%-24s%-36s%-12s/%5s\n", trim($k, 23), trim($list{$k}{Registrar}, 35), $list{$k}{Expires}, $diff; # Prepare alerts if domain is expired or the expiration date is <= $x days if ($e && ($diff <= $x)){ if ($diff <= 0){ $exp{$k} = -$diff; } else { $end{$k} = $diff; } } } # Report expired domains if (%exp){ open Mail, "|$mail -s '$name: Expired domains' $e" or croak "$mail: $!\n"; print Mail "According to 'whois', the following domains have expired:\n\n"; for my $x (sort { $exp{$a} <=> $exp{$b} } keys %exp){ my $s = $exp{$x} == 1 ? "" : "s"; print Mail "$x ($exp{$x} day$s ago)\n"; } close Mail; } # Report domains that will expire within the '-x' period if (%end){ open Mail, "|$mail -s '$name: Domain expiration warning ($x day cutoff)' $e" or croak "$mail: $!\n"; print Mail "According to 'whois', these domains will expire soon:\n\n"; for my $d (sort { $end{$a} <=> $end{$b} } keys %end){ my $s = $end{$d} == 1 ? "" : "s"; print Mail "$d (in $end{$d} day$s)\n"; } close Mail; } __END__