#!/usr/bin/perl -w

use strict;
use warnings;

use Carp;
use RPM::Header;
use Getopt::Long;
use Pod::Usage;
use File::Basename;
use File::Temp qw/tempfile tempdir/;


my ($help,$srpmdir,$rpmdir);
my $format='name';
my $cycleformat='file';
my $cycleprefix='cycle';
my $verbose=1;
my $debug=0;
my $ignore_extra_rpms=0;
my $ignore_extra_srpms=1;
my $use_file_provides=1;
my $use_build_requires=1;
my $set_buildfrom_relation=1;
my $opt_whobuildreq;

my ($transaction_srpm_files,$transaction_srpm_names,
    $transaction_rpm_files,$transaction_rpm_names,
    $mark_req_regexp,$outputfile,
);

GetOptions (
    "debug+"  => \$debug,
    "help"  => \$help,
    "cycle-format=s" => \$cycleformat,
    "cycle-prefix=s" => \$cycleprefix,
    "format=s" => \$format,
    "ignore-extra-rpms!" => \$ignore_extra_rpms,
    "ignore-extra-srpms!" => \$ignore_extra_srpms,
    "mark-req-regexp=s" => \$mark_req_regexp,
    "rpm-files-transaction=s" => \$transaction_rpm_files,
    "rpm-names-transaction=s" => \$transaction_rpm_names,
    "srpm-files-transaction=s" => \$transaction_srpm_files,
    "srpm-names-transaction=s" => \$transaction_srpm_names,
    "use-file-provides!" => \$use_file_provides,
    "use-build-requires!" => \$use_build_requires,
    "verbose+"  => \$verbose,
    "whobuildreq"  => \$opt_whobuildreq,
    "output=s" => \$outputfile,
    'q|quiet'   => sub { $verbose = 0 },
    );
if ($help or not @ARGV) {
    #exec "pod2usage --exit=0 $0";
    pod2usage({ #-message => "the options below are package-specific:" ,
	-exitval => 0  ,
	-verbose => $verbose,
	#-output  => $filehandle
	      } );
}

$set_buildfrom_relation=0 if $opt_whobuildreq;

die "Context is non-empty (good) but transaction is empty (bad).
specify the transaction with one of the options
    --mark-req-regexp
    --rpm-files-transaction
    --rpm-names-transaction
    --srpm-files-transaction
    --srpm-names-transaction

for example, 
--mark-req-regexp='^libglib-2.0.so.0' will mark packages that require libglib-2.0.so.0.
--mark-req-regexp=. will mark ALL packages in the context.
" unless $transaction_srpm_files or $transaction_srpm_names 
  or $transaction_rpm_files or $transaction_rpm_names or $mark_req_regexp;

my (@srpms, @rpms);
sub _add_rpm_file {
    my ($arg)=@_;
    if ($arg=~/.*\.src\.rpm$/) {
	push @srpms, $arg;
    } elsif ($arg=~/.*\.rpm$/) {
	push @rpms, $arg;
    } else {
	die "Invalid argument - not a rpm file: $arg";
    }
}

foreach my $rpmarg (@ARGV) {
    if ( -d "$rpmarg") {
	open (RPMARGS_FILES, "ls -1 $rpmarg |egrep -x '[^.].*[.]rpm' |") || die $!;
	my $f;
	while ($f=<RPMARGS_FILES>) {
	    chomp $f;
	    &_add_rpm_file("$rpmarg/$f") if -e "$rpmarg/$f";
	}
	close RPMARGS_FILES;
    } else {
	&_add_rpm_file($rpmarg);
    }
}

my %pkg_by_srpm;
my %pkg_by_rpm;
my $srpm_by_input=[];

my ($i,$j);
for ($i=0;$i<@srpms;$i++) {
    my $rhref = new RPM::Header $srpms[$i];
    my $pkg={
	NAME => $rhref->{NAME},
	TEXTID => $rhref->{NAME},
	PATH => $srpms[$i],
	FILE => basename($srpms[$i]),
	SRPMHDR => $rhref,
	RPMS => [],
    };
    if ($pkg_by_srpm{$pkg->{FILE}}) {
	die "$pkg->{FILE} is already listed.";
    }
    $pkg_by_srpm{$pkg->{FILE}}=$pkg;
    $srpm_by_input->[$i]=$pkg;
}
foreach my $rpmfile (@rpms) {
    my $rhref = new RPM::Header $rpmfile;
    my $srpmfile=$rhref->{SOURCERPM};
    unless ($pkg_by_srpm{$srpmfile}) {
	next if $ignore_extra_rpms;
	die "source rpm is not specified for rpm $rpmfile\n";
    }
    my $rpm = {
	NAME => $rhref->{NAME},
	TEXTID => $rhref->{NAME}.'.'.$rhref->{ARCH},
	PATH => $rpmfile,
	FILE => basename($rpmfile),
	RPMHDR => $rhref,
    };
    if ($pkg_by_rpm{$rpm->{FILE}}) {
	die "$rpm->{FILE} is already listed.";
    }
    $pkg_by_rpm{$rpm->{FILE}}=$rpm;
    push @{$pkg_by_srpm{$srpmfile}->{RPMS}}, $rpm;
}

my $idcounter=0;
my $pkg_by_id=[];
my %id_is_source;

foreach my $srpmentry (@$srpm_by_input) {
    $id_is_source{$idcounter}=1;
    $srpmentry->{ID}=$idcounter++;
    push @$pkg_by_id, $srpmentry;
    foreach my $rpmentry (@{$srpmentry->{RPMS}}) {
	$rpmentry->{ID}=$idcounter++;
	push @$pkg_by_id, $rpmentry;
    }
}

my $tmpdir;
unless ($debug) {
    $tmpdir = tempdir( CLEANUP => 1 );
} else {
    $tmpdir = '.';
}

my $infile = $tmpdir.'/infile';
my $srpmmarkfile = $tmpdir.'/srpmmarkfile';
my $rpmfile = $tmpdir.'/rpmlistfile';
my $rpmmarkfile = $tmpdir.'/rpmmarkfile';
my $namemapfile = $tmpdir.'/namemapfile';
my $filemapfile = $tmpdir.'/filemapfile';
my $pathmapfile = $tmpdir.'/filemapfile';

my @srpm_mark_flag;
my @rpm_mark_flag;
if ($transaction_srpm_files) {
    foreach my $tsrpm (@{&load_file($transaction_srpm_files)}) {
	my $tsfile=basename($tsrpm);
	my $pkg=$pkg_by_srpm{$tsfile};
	die "can't find src.rpm $tsfile" unless $pkg;
	$srpm_mark_flag[$pkg->{ID}]=1;
    }
} elsif ($transaction_srpm_names) {
    my %srpm_by_name;
    foreach my $srpmpkg (@$srpm_by_input) {
	$srpm_by_name{$srpmpkg->{NAME}}=$srpmpkg;
    }
    foreach my $tsname (@{&load_file($transaction_srpm_names)}) {
	my $pkg=$srpm_by_name{$tsname};
	die "can't find src.rpm $tsname" unless $pkg;
	$srpm_mark_flag[$pkg->{ID}]=1;
    }
}
if ($transaction_rpm_files) {
    foreach my $trpm (@{&load_file($transaction_rpm_files)}) {
	my $tfile=basename($trpm);
	my $pkg=$pkg_by_rpm{$tfile};
	die "can't find rpm file $tfile" unless $pkg;
	$rpm_mark_flag[$pkg->{ID}]=1;
    }
} elsif ($transaction_rpm_names) {
    my %rpm_by_name;
    foreach my $rpmpkg (@$pkg_by_id) {
	$rpm_by_name{$rpmpkg->{NAME}}=$rpmpkg if !$id_is_source{$rpmpkg->{ID}};
    }
    foreach my $tname (@{&load_file($transaction_rpm_names)}) {
	my $pkg=$rpm_by_name{$tname};
	die "can't find rpm name $tname" unless $pkg;
	$rpm_mark_flag[$pkg->{ID}]=1;
    }
}
if ($mark_req_regexp) {
    foreach my $srpmpkg (@$srpm_by_input) {
	my $srpmid=$srpmpkg->{ID};
	foreach my $rpm (@{$srpmpkg->{RPMS}}) {
	    foreach my $reqname (@{$rpm->{RPMHDR}->{REQUIRENAME}}) {
		if ($reqname=~/$mark_req_regexp/o) {
		    print STDERR "marked $srpmpkg->{TEXTID} due to $rpm->{TEXTID}\n" if $verbose>1;
		    $rpm_mark_flag[$rpm->{ID}]=1;
		    $srpm_mark_flag[$srpmid]=1;
		    last;
		}
	    }
	}
    }
}

if ($opt_whobuildreq 
    and ($transaction_srpm_names or $transaction_srpm_files) 
    and not ($transaction_rpm_names or $transaction_rpm_files)) {
    for (my $srpmid=0; $srpmid<@srpm_mark_flag; $srpmid++) {
	next unless $srpm_mark_flag[$srpmid];
	my $srpmpkg=$pkg_by_id->[$srpmid];
	$srpm_mark_flag[$srpmid]=0;
	foreach my $rpm (@{$srpmpkg->{RPMS}}) {
	    $rpm_mark_flag[$rpm->{ID}]=1;
	}
    }
}

my $srpmdim=@$srpm_by_input;
my %global_filereq_cache;
my $global_provides_cache={};
my $global_fileprov_cache={};

for ($i=0;$i<$srpmdim;$i++) {
    &__add_providecache_srpm($srpm_by_input->[$i]);
}

my $fn;
if (@srpm_mark_flag) {
    open $fn, '>', $srpmmarkfile || die "$!: can't open $srpmmarkfile";
    for ($i=0;$i<@$pkg_by_id;$i++) {
	print $fn "$i\n" if $srpm_mark_flag[$i];
    }
    close($fn);
}
if (@rpm_mark_flag) {
    open $fn, '>', $rpmmarkfile || die "$!: can't open $rpmfile";
    for ($i=0;$i<@$pkg_by_id;$i++) {
	print $fn "$i\n" if $rpm_mark_flag[$i];
    }
    close($fn);
}
if (@rpm_mark_flag) {
    open $fn, '>', $rpmfile || die "$!: can't open $rpmfile";
    for ($i=0;$i<@$pkg_by_id;$i++) {
	print $fn "$i\n" if (!$id_is_source{$i});
    }
    close($fn);
}

open my $in, '>', $infile || die "$!: can't open $infile";
for ($i=0;$i<$srpmdim;$i++) {
    my $isrpm=$srpm_by_input->[$i];
    my $iid=$isrpm->{ID};
    my $irpms=$isrpm->{RPMS};
    my ($req);

    if ($set_buildfrom_relation) {
	# print dependencies rpms -> srpm "is build from" rpms
	foreach my $ichildrpm (@{$irpms}) {
	    print $in $ichildrpm->{ID}," ", $iid ,"\n";
	}
    }

    if ($use_build_requires) {
	foreach $req (&buildreq($isrpm)) {
	    my $val=$global_provides_cache->{$req};
	    if ($val) {
		foreach my $slaveid (@$val) {
		    print $in "$iid $slaveid\n";
		}
	    }
	}
    }
    
    foreach my $ichildrpm (@$irpms) {
	my $ichildid=$ichildrpm->{ID};
	foreach $req (&requires($ichildrpm)) {
	    my $val=$global_provides_cache->{$req};
	    if ($val) {
		foreach my $slaveid (@$val) {
		    print $in "$ichildid $slaveid\n";
		}
	    }
	}
    }
}

if ($use_file_provides) {
    # find fileprovides for all files in global_filereq_cache
    for ($i=0;$i<$srpmdim;$i++) {
	&__add_fileprovidecache_srpm($srpm_by_input->[$i]);
    }
    # resolve file requires
    for ($i=0;$i<$srpmdim;$i++) {
	my $isrpm=$srpm_by_input->[$i];
	my $iid=$isrpm->{ID};
	my $irpms=$isrpm->{RPMS};
	my ($req);
	if ($use_build_requires) {
	    foreach $req (&buildreq($isrpm)) {
		my $val=$global_fileprov_cache->{$req};
		if ($val) {
		    foreach my $slaveid (@$val) {
			print $in "$iid $slaveid\n";
		    }
		}
	    }
	}
    
	foreach my $ichildrpm (@$irpms) {
	    my $ichildid=$ichildrpm->{ID};
	    foreach $req (@{$ichildrpm->{FILEREQCACHE}}) {
		my $val=$global_fileprov_cache->{$req};
		if ($val) {
		    foreach my $slaveid (@$val) {
			print $in "$ichildid $slaveid\n";
		    }
		}
	    }
	}
    }
}
close($in);

my ($fn1,$fn2,$fn3,$pkgi);
open $fn1, '>', $namemapfile || die "$!: can't open $namemapfile";
open $fn2, '>', $filemapfile || die "$!: can't open $filemapfile";
open $fn3, '>', $pathmapfile || die "$!: can't open $pathmapfile";
for ($i=0;$i<@$pkg_by_id;$i++) {
    $pkgi=$pkg_by_id->[$i];
    print $fn1 $pkgi->{TEXTID},"\n";
    print $fn2 $pkgi->{FILE},"\n";
    print $fn3 $pkgi->{PATH},"\n";
}
close($fn1);
close($fn2);
close($fn3);

if ($debug) {
    open $fn1, '>', $namemapfile.'2' || die "$!: can't open ${namemapfile}2";
    open $fn2, '>', $filemapfile.'2' || die "$!: can't open ${filemapfile}2";
    for ($i=0;$i<@$pkg_by_id;$i++) {
	$pkgi=$pkg_by_id->[$i];
	print $fn1 "$i $pkgi->{TEXTID}\n";
	print $fn2 "$i $pkgi->{FILE}\n";
    }
    close($fn1);
    close($fn2);
}

my @args=('girar-nmu-helper-pos-sort',
	  '-m',&get_format_file($format),
	  '-i',$infile
);

if ($opt_whobuildreq) {
    push @args, '-g';
} else {
    push @args, '-C',$cycleprefix;
    push @args, '-M',&get_format_file($cycleformat);
}
push @args, '-s', $srpmmarkfile if @srpm_mark_flag;
push @args, '-R', $rpmfile, '-r', $rpmmarkfile if @rpm_mark_flag;
push @args, '-o', $outputfile if $outputfile;

push @args, scalar(@$pkg_by_id);

print STDERR join(' ',@args),"\n" if $verbose>1 or $debug;
die "debug mode: execute the command above manually.\n" if $debug;
system(@args)==0 or die "girar-nmu-helper-pos-sort failed";
unless ($debug) {
    unlink $infile,$namemapfile,$filemapfile,$pathmapfile;
    unlink $srpmmarkfile if @srpm_mark_flag;
    unlink $rpmfile, $rpmmarkfile if @rpm_mark_flag;
}

# destroy
$tmpdir=undef;

if ($verbose and not $opt_whobuildreq) {
    my @cycles=glob("$cycleprefix*");
    unless (@cycles) {
	warn "no cyclic dependencies detected.\n";
    } else {
	warn "cyclic dependencies detected:\n";
	foreach my $cyclefile (@cycles) {
	    warn basename($cyclefile).":\n";
	    &print_stderr($cyclefile);
	}
    }
}

sub get_format_file {
    my ($format)=@_;
    if ($format eq 'name') {
	return $namemapfile;
    } elsif ($format eq 'path') {
	return $pathmapfile;
    } elsif ($format eq 'file') {
	return $filemapfile;
    }
    warn "unknown format $format";
    return $filemapfile;
}

sub print_stderr {
    my ($file)=@_;
    open my $out, '<', $file || die "$!: can't open $file";
    local $/; 
    print STDERR <$out>;
    close($out);
}

sub buildreq {
    my ($pkg)=@_;
    return @{$pkg->{SRPMHDR}->{REQUIRENAME}};
}

sub requires {
    my ($rpm)=@_;
    my @requirecache;
    my @filereq;
    my $provides=$rpm->{PROVIDECACHE};
    die "Oops! cache is lost" unless $rpm->{PROVIDECACHE};

    foreach my $req (@{$rpm->{RPMHDR}->{REQUIRENAME}}) {
	push @requirecache, $req unless $provides->{$req};
	if ('/' eq substr($req,0,1)) {
	    push @filereq, $req;
	    $global_filereq_cache{$req}=1;
	}
    }
    $rpm->{FILEREQCACHE}=\@filereq;
    return @requirecache;
}

sub load_file {
    my ($filename) = @_;
    Carp::confess 'Oops! internal error: filename not specified' unless defined $filename;
    my @out;
    open (my $fn, '<', $filename) || die "can't open $filename: $!";
    while (<$fn>) {
	chomp;
	push @out, $_ if /\S/;
    }
    return \@out;
}

sub __add_to_cache {
    my ($id, $name, $cache)=@_;
    my $val=$cache->{$name};
    if ($val) {
	push @$val,$id;
    } else {
	$cache->{$name}=[$id];
    }
}

sub __add_providecache_srpm {
    my ($srpm)=@_;
    die "no binary rpms found for $srpm->{TEXTID}" unless @{$srpm->{RPMS}} or $ignore_extra_srpms;
    foreach my $rpm (@{$srpm->{RPMS}}) {
	my $id=$rpm->{ID};
	my %providecache;
	foreach my $providename (@{$rpm->{RPMHDR}->{PROVIDES}}) {
	    &__add_to_cache($id,$providename,$global_provides_cache);
	    $providecache{$providename}=1;
	}
	$rpm->{PROVIDECACHE}=\%providecache;
    }
}

sub __add_fileprovidecache_srpm {
    my ($srpm)=@_;
    # optimization: already dead
    #die "no binary rpms found for $srpm->{TEXTID}" unless @{$srpm->{RPMS}} or $ignore_extra_srpms;
    foreach my $rpm (@{$srpm->{RPMS}}) {
	my $id=$rpm->{ID};
	my %fileprovcache;
	my $filenamesref=$rpm->{RPMHDR}->filenames();
	if ($filenamesref) {
	    foreach my $filename (@$filenamesref) {
		&__add_to_cache($id,$filename,$global_fileprov_cache) if $global_filereq_cache{$filename};
		$fileprovcache{$filename}=1;
	    }
	}
	$rpm->{FILEPROVCACHE}=\%fileprovcache;
    }
}


=head1	NAME

girar-nmu-sort-transaction - calculate an order to build a transaction.

=head1	SYNOPSIS

B<girar-nmu-sort-transaction> 
[B<-h, --help>]
[B<--cycle-format> I<name|file|path>]
[B<--cycle-prefix> I<string>]
[B<--debug>]
[B<--format> I<name|file|path>]
[B<--ignore-extra-rpms|--no-ignore-extra-rpms>]
[B<--ignore-extra-srpms|--no-ignore-extra-srpms>]
[B<--mark-req-regexp> I<pcre regexp>]
[B<--rpm-files-transaction> I<file>]
[B<--rpm-names-transaction> I<file>]
[B<--srpm-files-transaction> I<file>]
[B<--srpm-names-transaction> I<file>]
[B<-q|--quiet>]
[B<-v|--verbose>]
[B<-o|--output> I<file>]
<context of the transaction: the list of src.rpms and binary rpms built 
from the src.rpms or/and the list of directories containing those rpms>

=head1	DESCRIPTION

B<girar-nmu-sort-transaction> sorts packages of the transaction 
into the build order according to their dependencies;
finds circular dependencies that are obstacles to replacement transaction.

The main arguments to the utility are transaction and context.
Context is the set of packages that provide the net of dependencies.
It is failsafe to provide the whole repository as the context.
Use the smoller contexts on your own risk.

Transaction is the subset of context that should be sorted according to 
the order generated by BuildRequires and Requires.

Mark the transaction using B<--mark-req-regexp> option or 
B<--(srpm|rpm)-(files|names)-transaction> I<file> option.

=head1	OPTIONS

=over

=item	B<-h, --help>

Display this help and exit.

=item	B<-v, --verbose>, B<-q, --quiet>

Verbosity level. Multiple -v increase the verbosity level, -q sets it to 0.
Default is 1.

=item	B<-o, --output> I<filename>

Output file.

=item	B<--format> I<name>

Output file format. One of: name, file, path.

name prints rpm name; file prints rpm filename; path prints rpm filename with path.

=item	B<--cycle-format> I<name>

Cycle information files format. One of: name, file, path.

=item	B<--cycle-prefix> I<name>

Prefix for cycle information files. 

=item	B<--debug>

Debug mode. Data is collected, but data procession is left to user.

=item	B<--ignore-extra-rpms, --no-ignore-extra-rpms>

Ignore/do not ignore binary rpms that have no corresponding source counterparts.
Default is die on first occurence of mon-matched rpm (B<--no-ignore-extra-rpms>).

=item	B<--ignore-extra-srpms, --no-ignore-extra-srpms>

Ignore/do not ignore source rpms that have no corresponding binary counterparts.
Default is ignore. 

=item B<--mark-req-regexp> I<pcre regexp>

Marks some binary and source rpms as part of the transaction according to I<regexp>.
For example, B<--mark-req-regexp '^libperl.so.5.8'> will mark 
binary rpms that depend on libperl.so.5.8* and their source rpms.

Alternatively, use B<--(s)rpm-(files|names)-transaction> I<file> options
to mark transaction rpms and srpms explicitly.

=item B<--(s)rpm-(files|names)-transaction> I<file>

B<--rpm-files-transaction> I<file>
B<--rpm-names-transaction> I<file>
B<--srpm-files-transaction> I<file>
B<--srpm-names-transaction> I<file>

Alternatively, use B<--mark-req-regexp> I<pcre regexp> to mark transaction rpms 
implicitly based on regexp on their Requires:.

=item	B<--use-file-provides, --no-use-file-provides>

Resolve/do not resolve requires using the list of files in the package.
Default is --use-file-provides.
Disable it if uou are sure it is harmless to save some time.

=back

=head1	AUTHOR

Written by Igor Vlasenko <viy@altlinux.org>.

=head1	COPYING

Copyright (c) 2010 Igor Vlasenko, ALT Linux Team.

This is free software; you can redistribute it and/or modify it under the terms
of the GNU General Public License as published by the Free Software Foundation;
either version 2 of the License, or (at your option) any later version.

=cut

