#! /usr/bin/perl

# $Id: dhcp-aggregator.pl,v 1.37 2008/01/18 05:19:36 nicku Exp $

# dhcp-aggregator.pl
# Aggregate dhcpd.leases files.

# Unless the --aggregate-all option is specified, this program will
# aim to avoid the file being written to by the dhcp server.

# This is a partner to the patch to the ISC dhcp server that avoids
# the need for the dhcp server to stop to aggregate leases.  This
# program does that instead.

# The dhcp server writes a

# next file "next-dhcpd.leases-file";

# at the end of its current leases file, closes the current leases
# file, then begins writing to the filename specified in the 'next
# file' statement.

# This program is intended to be run from cron at appropriate
# intervals.  This interval is chosen to reduce the costs of
# restarting the dhcp server.  Perhaps every hour may be appropriate.

# TODO

# This program does not determine whether a lease's IP address is
# within the ranges of the dhcpd.conf.  This program should be changed
# to reject old addresses that are not within the ranges of the
# dhcpd.conf.

# CAVEATS:

# If dhcpd is restarted while this is running, and we have a list of
# lease files like this: a -> b -> c
# then dhcpd can open file a while this program is writing to file
# a.tmp.$date.  When dhcpd reaches the end of file a, it tries to open
# file b, but this program has already named file b to b.backup.$date,
# so dhcpd complains that file b does not exist, and it then
# terminates.

# To avoid this highly undesireable condition, do not let dhcpd
# restart while this program is running.  A simple test would be for
# the existence of /var/run/dhcp-aggregator.db.  A good place to do
# that would be in the /etc/rc.d/init.d/dhcpd startup file.

# Copyright (C) 2007  Nick Urbanik <nicku@nicku.org>

# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

use DB_File;
use Getopt::Long;
use File::Spec;
use POSIX qw{strftime};
use Fcntl qw/:seek :flock/;
use File::Spec;
use FindBin;
use lib "$FindBin::Bin/../lib";
use OIE::Debug::OnTheFly ':debug';

use strict;
use warnings;

my $FILE_TO_TOUCH_AFTER_SUCCESS
    = OIE::Debug::OnTheFly::base_dir() . '/var/dhcp-aggregator-last-ran';

my $DHCPD_CONF = '/etc/dhcpd.conf';
my $START_DIR = '/var/lib/dhcp';
my $START_FILEBASE = 'dhcpd.leases';
my $START_LEASE_FILE = "$START_DIR/$START_FILEBASE";
my $STARTUP_LOCK_FILE = '/var/lock/dhcpd-startup';
my $DB = '/var/run/dhcp-aggregator.db';

sub ip_to_int {
    my ( $ip ) = @_;
    return unpack 'N', pack( 'C4', split( /\./, $ip ) );
}

sub int_to_ip {
    my ( $int ) = @_;
    return join q{.}, unpack( 'C4', pack 'N', $int );
}

sub find_ip_range {
    my ( $ip, $ranges_ref ) = @_;
    my $int = ip_to_int $ip;

    my $start = 0;
    my $end = @$ranges_ref;

    my $pos;

    my $lastpos = int( ( $start + $end ) / 2 ) - 1;
    while ( $start <= $end ) {
        $pos = int( ( $start + $end ) / 2 );
        if ( $int == $ranges_ref->[$pos]->{first_ip} ) {
            return $ranges_ref->[$pos];
        }
	last if $pos == $lastpos;

	if ( $int < $ranges_ref->[$pos]->{first_ip} ) {
            $end = $pos;
        } else {
            $start = $pos;
	}
	$lastpos = $pos;
    }

    if ( $ranges_ref->[$pos]
         and $int >= $ranges_ref->[$pos]{first_ip}
         and $int <= $ranges_ref->[$pos]{last_ip} ) {
	return $ranges_ref->[$pos];
    }

    return;
}

sub show_ranges {
    my ( $ranges ) = @_;
    foreach my $r ( @$ranges ) {
	my ( $first, $last )
	    = map { int_to_ip($_) } @$r{ 'first_ip', 'last_ip' };
	debug "$first\t=>\t$last\n";
    }
}

sub read_dhcp_conf {
    open my $conf_fh, '<', $DHCPD_CONF
	or die "$0: Unable to open $DHCPD_CONF: $!";

    my @ip_ranges;

  LINE:
    while ( <$conf_fh> ) {
	# Skip white space and comments:
	next LINE if m{^\s*(#.*)?$}ms;

	# syntax of range statement from dhcpd.conf(5):
	# range [ dynamic-bootp ] low-address [ high-address];

	next LINE unless m{
			      ^[ \t]*
			      range
			      [ \t]+
			      (?:dynamic-bootp \s+)?
			      ([\d\.]+)
			      (?:
				  [ ]
				  ([\d\.]+)
			      )?
			      [ \t]*
			      ;
			      \s*
			      $
		      }xms;
	my ( $first_ip, $last_ip )
	    = map { ip_to_int( $_ ) } ( $1, $2 ? $2 : $1 );

	warn "Bad range '$_' found\n" and next LINE
	    unless $first_ip and $last_ip;

	push @ip_ranges, { first_ip => $first_ip, last_ip => $last_ip };
    }

    close $conf_fh or die "$0: Unable to close $DHCPD_CONF: $!";

    @ip_ranges = sort { $a->{first_ip} <=> $b->{first_ip} } @ip_ranges;
    note "read_dhcp_conf() found ", scalar @ip_ranges, " ranges\n";
    show_ranges \@ip_ranges if $debug;

    return \@ip_ranges;
}

sub chain_len {
    my ( $dir ) = @_;
    $dir .= '/' unless $dir =~ m{/$};
    my $curr_fname = q{};
    my $next_fname = "$dir$START_FILEBASE";
    my $chain_len = 0;
 FILE:
    while ( $next_fname ne $curr_fname) {
	$curr_fname = $next_fname;
	++$chain_len;
	open my $lease_fh, '<', $curr_fname
	    or die "unable to open '$curr_fname': $!\n";

	seek $lease_fh, -500, SEEK_END
	    or warn "unable to seek to 500 bytes before eof: $!\n";
	my $buf;
	read $lease_fh, $buf, 500 or last FILE;

	( $next_fname ) = $buf =~ m{^next[ ]file[ ]"([^"]+)";}xms or last FILE;
	note "Next file; '$next_fname', curr_file: '$curr_fname'\n";
	close $lease_fh or die "unable to close '$curr_fname': $!";
    }
    return $chain_len;
}

sub process_record {
    my ( $record, $show_replaced, $lease_ref, $ip_range_ref ) = @_;

    # strip comments, empty lines
    1 while $record =~ s{^[ \t]*(?:\#[^\n]*)?\n}{}xms;

    return unless $record;

    return if $record =~ m{\Anext \s file \s}xms;

    my ( $key ) = $record =~ m/\A \s* (\S.*) \s \{/xms;
    if ( not defined $key ) {
	warn "No key for this record: '$record'\n";
	return;
    }

    if ( $record !~ m/^}\n \z/xms
	     or $record =~ m/.+\n.+\s \{$/xms ) {
	warn "CORRUPT RECORD: '$record'\n";
	return;
    }

    if ( $key =~ m{\Alease\s([\d.]+)}xms ) {
	my $ip = $1;
	if ( not find_ip_range $ip, $ip_range_ref ) {
	    # change in routers.cfg has eliminated this address:
	    note "Cannot find $ip in DHCPD_CONF: discarding '$record'\n";
	    return;
	}
    }

    if ( $show_replaced and exists $lease_ref->{$key} ) {
	my @msg = ( "Will replace this record: $lease_ref->{$key}",
	    "with this record: $record\n" );
	print STDERR @msg;
	debug @msg;
    }
    $lease_ref->{$key} = $record;
}

sub get_preamble {
    ( my $prog = $0 ) =~ s{\s+.*}{}xms;
    return "# This dhcp lease file was re-written by $prog\n\n";
}

sub write_leasefile {
    my ( $dir, $to_stdout, $agg_start_time, $active_file, $lease_ref ) = @_;
    my $num_leases = keys %$lease_ref;
    note "write_leasefile with active_file = '",
	$active_file ? $active_file : q{},
	"', dir = '$dir', to_stdout = '@{[$to_stdout ? $to_stdout : q{}]}'\n",
	    "Writing $num_leases leases\n";

    my $out_fh = *STDOUT;
    my $out_fname = "$dir/$START_FILEBASE.tmp.$agg_start_time";
    if ( not $to_stdout ) {
	note "Opening $out_fname for writing\n";
	open $out_fh, '>', $out_fname
	    or die "Cannot create $out_fname for writing: $!";
    }
    print $out_fh get_preamble;

    my $progname = $0;
    my $lease_count;
    while ( my ( $key, $record ) = each %$lease_ref ) {
	print $out_fh $record;
	if ( ++$lease_count % 200 == 0 ) {
	    if_debug;
	    $0 = "$progname writing [$lease_count/$num_leases]";
	    debug "output $lease_count/$num_leases leases";
	}
    }
    $0 = $progname;

    if ( $active_file ) {
	note "Writing next file record for '$active_file'\n";
	print $out_fh qq{next file "$active_file";\n};
    }
    if ( not $to_stdout ) {
	close $out_fh or die "Cannot close $out_fname: $!";
    }
}

# The problem with renaming the file immediately after having
# processed it is that if this program is interrupted, there will be
# no file dhcpd.leases, and everything will stop working, including
# the next run of this program, dhcpd when it is restarted, and
# cycle_logs will tidy the backup file away.

# There is still a possibility of that here, but the time of danger is
# much briefer, in the same way as there is with dhcpd itself.

# Using reverse here minimises the time when dhcpd.leases does not exist.

sub backup_and_move_files {
    my ( $dir, $agg_start_time, $leasefiles_ref ) = @_;
    note "Locking '$STARTUP_LOCK_FILE' to begin moving files\n";
    open my $lock_fh, '>', $STARTUP_LOCK_FILE
	or die "unable to open $STARTUP_LOCK_FILE: $!";
    flock $lock_fh, LOCK_EX or die "unable to lock $STARTUP_LOCK_FILE: $!";
    note "Now own lock on '$STARTUP_LOCK_FILE'\n";
    foreach my $file ( reverse @$leasefiles_ref ) {
	note "renaming '$file', '$file.backup.$agg_start_time'\n";
        rename $file, "$file.backup.$agg_start_time"
            or die "unable to rename $file $file.backup.$agg_start_time: $!";
    }
    note "renaming '$dir/$START_FILEBASE.tmp.$agg_start_time', ",
	"'$dir/$START_FILEBASE'\n";
    rename "$dir/$START_FILEBASE.tmp.$agg_start_time", "$dir/$START_FILEBASE"
        or die "Cannot rename $dir/$START_FILEBASE.tmp.$agg_start_time to ",
            "$dir/$START_FILEBASE: $!";
    flock $lock_fh, LOCK_UN or die "unable to unlock $STARTUP_LOCK_FILE: $!";
    note "Have released lock on '$STARTUP_LOCK_FILE'\n";
    close $lock_fh or warn "unable to close $STARTUP_LOCK_FILE: $!";
}

sub usage {
    ( my $prog = $0 ) =~ s{.*/}{};
    die <<DEATH;
$prog \[--dir=lease_file_directory] [--to-stdout] [--aggregate-all]
 [--show-replaced]

default lease file_directory is $START_DIR

If --to-stdout is *not* specified:

then $prog will backup $START_FILEBASE and all the other files
that were aggregated by appending .backup.yyyy-mm-ddThh:mm:ss to the
original file name, where yyyy-mm-ddThh:mm:ss is the time at which the
decision was made as to what files would be aggregated.  $prog will
create a new $START_FILEBASE after backing up the original.

If --to-stdout is specified:

then no files will be touched, and output will be to standard output
only.  For debugging.

--show-replaced shows all the records that will be replaced, and will
also show the record that supercedes it.  This goes to standard error.

--aggregate-all aggregates even the active file, and implies
--to-stdout.  No lease files will be written.

Normal way to run it will be from cron:
51 * * * * $0
DEATH
}

sub main {
    my $dir = $START_DIR;
    my $to_stdout;
    my $aggregate_all;
    my $show_replaced;
    if_debug;

    GetOptions( 'dir=s'         => \$dir,
                'to-stdout'     => \$to_stdout,
                'aggregate-all' => \$aggregate_all,
		'show-replaced' => \$show_replaced,
	        'debug'         => \$debug ) or usage;

    $to_stdout = 1 if $aggregate_all;
    $dir =~ s{/+$}{};
    debug "Starting with options: dir = '$dir', ",
	"to-stdout = ",           $to_stdout     ? 1 : 0,
	    ", aggregate-all = ", $aggregate_all ? 1 : 0,
	    ", show-replaced = ", $show_replaced ? 1 : 0, "\n";

    my $chain_len = chain_len $dir;
    if ( not $aggregate_all and $chain_len < 3 ) {
	note "Chain of files is only $chain_len long; NOT AGGREGATING\n";
	die "Database file $DB exists; ",
	    "assume another aggregator process running"
	    if -f $DB;
	touch_file $FILE_TO_TOUCH_AFTER_SUCCESS;
	# Exit non-zero so don't run cycle_logs.pl in this case.
	exit 1;
    }
    note "Found $chain_len files to aggregate\n";

    die "Database file $DB exists; assume another aggregator process running"
	if -f $DB;

    tie my %lease, 'DB_File', $DB or die "$0: Unable to tie hash to $DB: $!";
    my $agg_start_time = strftime '%FT%T', localtime;
    my $ip_range_ref = read_dhcp_conf;
    my $next_fname = "$dir/$START_FILEBASE";
    my $curr_fname = q{};
    my @leasefiles = ( $next_fname );
    my $morefiles = 1;
    my $progname = $0;
LEASE_FILE:
    while ( $morefiles ) {
	$curr_fname = $next_fname;
	$0 = "$progname reading $curr_fname";
	note "About to open '$curr_fname' and seek to end\n";
	open my $lease_fh, '<', $curr_fname
	    or die "unable to open '$curr_fname': $!";
	seek $lease_fh, -500, SEEK_END
	    or warn "unable to seek to 500 bytes before eof of ",
		"'$curr_fname': $!\n";

	my $buf;
	read $lease_fh, $buf, 500 or last LEASE_FILE;

	if ( $buf =~ m{^next[ ]file[ ]"([^"]+)";}xms ) {
	    $next_fname = $1;
	    push @leasefiles, $next_fname;
	    note "new next file is '$next_fname'\n",
		"new file list is '", join( q{', '}, @leasefiles ), qq{'\n};
	} else {
	    $morefiles = 0;
	    $next_fname = q{};
	    note "No next file in this file: '$curr_fname'\n",
		"Current file list is '", join( q{', '}, @leasefiles ), qq{'\n};
	    # File closed automatically by handle going out of scope:
	    last LEASE_FILE unless $aggregate_all;
	    note "--aggregate-all option enabled, at last file\n";
	}

	debug "About to seek to start of '$curr_fname'\n";
	seek $lease_fh, 0, SEEK_SET
	    or die "Unable to seek to beginning of file $curr_fname: $!";
	local $/ = "}\n";
	my $leases_read;

	note "About to start reading leases from '$curr_fname'\n";
	while ( my $record = <$lease_fh> ) {
	    process_record $record, $show_replaced, \%lease, $ip_range_ref;
	    if ( ++$leases_read % 200 == 0 ) {
		if_debug;
		debug "$leases_read leases read from $curr_fname\n";
		$0 = "$progname $curr_fname read $leases_read";
	    }
	}

	note "Closing '$curr_fname'; next file = '$next_fname'\n";
	close $lease_fh or die "unable to close '$curr_fname': $!";
    }

    note "About to remove the last element from file list '",
	join( q{', '}, @leasefiles ), qq{'\n} unless $aggregate_all;

    my $active_file = pop @leasefiles unless $aggregate_all;

    note "active file = '", $active_file ? $active_file : q{},
	"'; exit if empty: '",
	join( q{', '},  @leasefiles ), qq{'\n};

    exit unless @leasefiles;

    $0 = "$progname";

    write_leasefile $dir, $to_stdout, $agg_start_time, $active_file, \%lease;
    untie %lease;

    backup_and_move_files $dir, $agg_start_time, \@leasefiles unless $to_stdout;

    note "unlinking $DB\n";
    unlink $DB;
    note "Finished.\n";
    touch_file $FILE_TO_TOUCH_AFTER_SUCCESS;
    exit;
}

main
