#!/usr/bin/perl -CSDAL

use strict;
use warnings;
use utf8;

if( @ARGV != 1 && @ARGV != 2 || $ARGV[0] =~ /^--?h(?:elp)?$/i ) {
    print <<EOF;
usage: repodiff <git repo directory>[:<commit>] [<git repo directory>][:<commit>]
Prints files with same names differing between commits in two possibly
unrelated git repositories, identical files with different names, and files
existing only in one or the other.  Only files in the given subdirectories are
compared.  The comparison uses git object hashes and therefore applies to the
given or HEAD commits, not the working trees.  The default second argument is
the current directory.
EOF
    exit;
}

my ($repo1, $repo2)= @ARGV;

my $commit1= $repo1 =~ s/:(.*)$// ? $1 : "HEAD";
$repo2 //= ".";
my $commit2= $repo2 =~ s/:(.*)$// ? $1 : "HEAD";
$repo2= "." unless length $repo2;

$repo1 =~ s!/.*?$!! unless -d $repo1;
$repo1 =~ s!/*$!/!;
$repo2 =~ s!/.*?$!! unless -d $repo2;
$repo2 =~ s!/*$!/!;

my $v0= `git -C $repo1 describe --always`;
die "Not a git repository: $repo1\n" unless length $v0;
my $v1= `git -C $repo2 describe --always`;
die "Not a git repository: $repo2\n" unless length $v1;

my @files0= split /\n/, `git -C $repo1 ls-tree -r --full-tree $commit1`;
my @files1= split /\n/, `git -C $repo2 ls-tree -r --full-tree $commit2`;

my (%byname0, %byname1, %byhash0, %byhash1);

for (@files0) {
    my @words= split /\s+/;
    next unless $words[1] eq "blob";
    my $basename= $words[3];
    $basename =~ s!^.*/!!;
    $words[3]= $repo1 . $words[3];
    push @{$byname0{$basename}{$words[2]}}, $words[3];
    push @{$byhash0{$words[2]}{$basename}}, $words[3];
}

for (@files1) {
    my @words= split /\s+/;
    next unless $words[1] eq "blob";
    my $basename= $words[3];
    $basename =~ s!^.*/!!;
    $words[3]= $repo2 . $words[3];
    push @{$byname1{$basename}{$words[2]}}, $words[3];
    push @{$byhash1{$words[2]}{$basename}}, $words[3];
}

my @allnames= keys %{ { map { $_ => 1; } (keys(%byname0), keys(%byname1)) } };
my @allhashes= keys %{ { map { $_ => 1; } (keys(%byhash0), keys(%byhash1)) } };

my (@diff, @diffname, @only0, @only1);

for my $n (@allnames) {
    if( ! exists $byname0{$n} ) {
        push @only1, map $byname1{$n}{$_}[0], keys %{$byname1{$n}};
    }
    elsif( ! exists $byname1{$n} ) {
        push @only0, map $byname0{$n}{$_}[0], keys %{$byname0{$n}};
    }
    else {
        my @hashes= keys %{ { map { $_ => 1; } (keys(%{$byname0{$n}}), keys(%{$byname1{$n}})) } };
        my (@diffs0, @diffs1);
        for (@hashes) {
            next if $byname0{$n}{$_} && $byname1{$n}{$_};
            push @diffs0, $byname0{$n}{$_}[0] if $byname0{$n}{$_};
            push @diffs1, $byname1{$n}{$_}[0] if $byname1{$n}{$_};
        }
        push @diff, [ @diffs0, @diffs1 ] if @diffs0 && @diffs1;
    }
}

for my $h (@allhashes) {
    next unless $byhash0{$h} && $byhash1{$h};
    my @names= keys %{ { map { $_ => 1; } (keys(%{$byhash0{$h}}), keys(%{$byhash1{$h}})) } };
    next unless @names > 1;
    my (@eqnames0, @eqnames1);
    for (@names) {
        next if $byhash0{$h}{$_} && $byhash1{$h}{$_};
        push @eqnames0, $byhash0{$h}{$_}[0] if $byhash0{$h}{$_};
        push @eqnames1, $byhash1{$h}{$_}[0] if $byhash1{$h}{$_};
    }
    push @diffname, [ @eqnames0, @eqnames1 ] if @eqnames0 && @eqnames1;
}

print "\nDiffering content:\n", join("\n", map join(" != ", @$_), @diff), "\n"
    if @diff;
print "\nDiffering name:\n", join("\n", map join(" == ", @$_), @diffname), "\n"
    if @diffname;
print "\nOnly in $repo1 ($commit1):\n", join("\n", sort @only0), "\n"
    if @only0;
print "\nOnly in $repo2 ($commit2):\n", join("\n", sort @only1), "\n"
    if @only1;
print "\n";

