#! /usr/bin/perl

# Copyright (C) 2007..2009  Nick Urbanik <nicku@nicku.org>

# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

use warnings;
use strict;

use XML::Parser;
use XML::Simple;
$XML::Simple::PREFERRED_PARSER = 'XML::Parser';
use Data::Dumper;
use Getopt::Long;
use File::Type;

sub usage {
    ( my $prog = $0 ) =~ s{.*/}{};
    print <<END_USAGE;
$prog [--start=yyyy-mm-dd] [--end=yyyy-mm-dd] [--debug] [--show-zero] [--all] [--levels=n] [gnucash file]

--show-zero\tshows accounts with a value of zero
--all\t\tshows accounts with a value of zero or with no value at all
--levels=n\tshow only n levels of accounts

The gnucash file is compressed; this program detects if that is so and
decompress it with zcat.

You could also use it like this:
zcat gnucash_file | $prog options
END_USAGE
    exit 1;
}

sub sum_in_split {
    my ( $split, $act_ref ) = @_;
    return unless defined $split->{'split:value'};
    if ( $split->{'split:value'} !~ m{^-?\d+/100$} ) {
	warn "FUNNY value: '$split->{'split:value'}'\n";
    }
    my $value = eval $split->{'split:value'};

    if ( defined $value ) {
	$value = sprintf "%.2f", $value;
	$act_ref->{$split->{'split:account'}{content}}{total} += $value;
    } else {
	warn Dumper $split;
	return;
    }
    return 1;
}

# Make money ammounts "nicer" by:
# 1. ensuring have a cents quantity
# 2. Put underscores every three sets of digits
#    (some people like commas, but Perl can read
#    numbers with underscores as numbers).

sub nicen {
    my ( $num ) = @_;
    my $text = reverse sprintf "%.2f", $num;
    $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1_/g;
    return scalar reverse $text;
}

my ( $start, $end, $debug, $all, $show_zero, $display_levels );

GetOptions(
    'start=s'   => \$start,
    'end=s'     => \$end,
    debug       => \$debug,
    all         => \$all,
    'show-zero' => \$show_zero,
    'levels=i'  => \$display_levels,
) or usage;

$start =~ m{^\d{4}-\d\d-\d\d$} or usage if $start;
$end   =~ m{^\d{4}-\d\d-\d\d$} or usage if $end;

# Parse the XML that Gnucash 2.x writes to:
foreach my $file ( @ARGV ) {
    if ( -r $file ) {
	my $ft = File::Type->new;
	my $type = $ft->checktype_filename($file);
	$file = "zcat $file |" if $type eq 'application/x-gzip';
    } else {
	die "Cannot read '$ARGV[ 0 ]'\n";
    }
}
my $blob = do { undef $/; <> };
print STDERR "Read it\n" if $debug;
my $data = XMLin( $blob );
print STDERR "Have parsed it\n" if $debug;
#print Dumper( $data );

foreach my $key ( keys %$data ) {
    print "KEY: '$key'\n" if $debug;
}

foreach my $key ( keys %{$data->{'gnc:book'}} ) {
    print "KEY: '$key'\n" if $debug;
}

# KEY: 'gnc:transaction'
# KEY: 'gnc:template-transactions'
# KEY: 'book:id'
# KEY: 'version'
# KEY: 'gnc:account'
# KEY: 'gnc:count-data'
# KEY: 'gnc:schedxaction'
# KEY: 'gnc:commodity'

# Read all the accounts.
# The key is a 32 character hexadecimal identifier.
# Each entry is a hash ref with the name of the account,
# the key of its parent, and the type of the account.

my %account = map {
    $_->{'act:id'}{content} => {
	name   => $_->{'act:name'},
	parent => $_->{'act:parent'}{content},
	type   => $_->{'act:type'},
    }
} @{$data->{'gnc:book'}{'gnc:account'}};


# Here we determine the full name of each account, by prepending its
# name with that of its parents; each level is separated with a colon.

# We also build an array of arrays of keys at each level.
# If the tree of accounts has four levels, then @levels has
# four arrays at index 1, 2, 3 and 4.  Each contains the keys
# at the corresponding level.  It allows us to traverse the tree
# one level at a time, starting from the lowest level.

my @levels;
my $max_level = 0;
foreach my $key ( keys %account ) {
    #$account{$key}{pname} = $account{$account{$key}{parent}}{name};

    $account{$key}{fullname} = $account{$key}{name};
    my $parent_key = $account{$key}{parent} or next;
    my $local_level = 1;
    while ( defined $account{$parent_key}{name}
		and $account{$parent_key}{name} ne 'Root Account'
	) {
	$account{$key}{fullname}
	    = "$account{$parent_key}{name}:$account{$key}{fullname}";
	$parent_key = $account{$parent_key}{parent};
	++$local_level;
    }
    $account{$key}{level} = $local_level;
    push @{$levels[ $local_level ]}, $key;
    $max_level = $local_level if $local_level > $max_level;
    #print "$account{$key}{fullname}\n" if $local_level == 4;
}

print "MAX LEVEL = $max_level\n" if $debug;


my $acc_count = scalar keys %account;
print "NUM Accounts: $acc_count\n" if $debug;

# Here we add up all the transactions for each account that is
# within the time period we specify with the start and end options.

my $count = 0;
TRANSACTION:
foreach my $item ( @{$data->{'gnc:book'}{'gnc:transaction'}} ) {
    next unless $item->{'trn:date-posted'}{'ts:date'};

    my ( $date ) = $item->{'trn:date-posted'}{'ts:date'}
	=~ m{^(\d{4}-\d\d-\d\d)}
	    or warn "BAD DATE '$item->{'trn:date-posted'}{'ts:date'}'"
		and next TRANSACTION;

    next if $start and $start gt $date;
    next if $end   and $end   lt $date;

    my $split_ref = $item->{'trn:splits'}{'trn:split'};

    if ( ref $split_ref eq 'ARRAY' ) {
      SPLIT:
	foreach my $split ( ( @{$split_ref} ) ) {
	    sum_in_split $split, \%account or next SPLIT;
	}
    } elsif ( ref $split_ref eq 'HASH' ) {
	    sum_in_split $split_ref, \%account or next TRANSACTION;
    } else {
	warn "ERROR: REFTYPE OF SPLIT IS '@{[ref $split_ref]}': ",
	    Dumper $split_ref;
	next TRANSACTION;
    }

    ++$count;
}

# Here we populate the parent entries with the sum of the values
# of their child accounts.

# Want to start from leaves of the tree, add into parent
# do one level at a time, starting furthest from the root.
for ( my $level = $max_level; $level > 0; --$level ) {
    foreach my $key ( @{$levels[ $level ]} ) {
	$account{$account{$key}{parent}}{total} += $account{$key}{total}
	    if defined $account{$key}{total};
    }
}

print Dumper( \%account ) if $debug;

print "Number of Transactions: $count\n" if $debug;

# Here we:
# 1. Sort the accounts alphabetically
# 2. Invert the value if it is of type Income or equity (Gnucash stores
#    these as negative values for reasons perhaps an accountant would
#    possibly understand)
# 3. print the account with its value.

foreach my $key ( sort { $account{$a}{fullname} cmp $account{$b}{fullname} } keys %account ) {
    next if $account{$key}{fullname} eq 'Root Account';
    if ( defined $account{$key}{total} ) {
	next unless $account{$key}{total} or $show_zero;
	next if $display_levels and $account{$key}{level} > $display_levels;
	$account{$key}{total} *= -1
	    if $account{$key}{type} =~ /^(?:INCOME|EQUITY)$/;
	printf "%-52s %10s\n", $account{$key}{fullname},
	    nicen $account{$key}{total};
    } else {
	next unless $all;
	printf "%-52s\n", $account{$key}{fullname};
    }
}

exit;

