#!/usr/bin/perl

# Copyright © 2020-2021 Felix Lechner
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

use v5.20;
use warnings;
use utf8;

use Const::Fast;
use DBI;
use Email::Address::XS;
use File::Find::Rule;
use HTTP::Tiny;
use IPC::Run3;
use JSON::MaybeXS;
use List::Compare;
use List::SomeUtils qw(first_value);
use List::UtilsBy qw(uniq_by);
use Path::Tiny;
use Time::Duration;
use Time::Piece;
use Unicode::UTF8 qw(encode_utf8 decode_utf8);
use YAML::XS qw(Load LoadFile);

use Debian::Lintian::Deb822;

const my $EMPTY => q{};
const my $SPACE => q{ };
const my $INDENT => $SPACE x 4;
const my $SLASH => q{/};
const my $HYPHEN => q{-};

const my $LAST_ITEM => -1;

die encode_utf8("Usage $0 [config-file]\n")
  unless @ARGV == 1;

my $configfile = $ARGV[0];
die encode_utf8('No configfile')
  unless length $configfile;
my $config = LoadFile($configfile);

my $mirror_base = $config->{mirror_base};
die encode_utf8('No base directory for archive mirror')
  unless length $mirror_base;

# drop trailing slash
$mirror_base =~ s{/$}{};

# get database config
my $dbconnectstring = $config->{database};
die encode_utf8('No database connect string')
  unless length $dbconnectstring;

my $postgres = DBI->connect(
    'dbi:Pg:' . $dbconnectstring,
    $EMPTY, $EMPTY,
    {
        AutoCommit => 0,
        RaiseError => 1,
        ShowErrorStatement => 1,
        pg_enable_utf8 => 0
    });

synchronize_mirror($postgres, "$mirror_base/debian/dists");

update_autoreject_tags($postgres);

$postgres->commit;

$postgres->disconnect;

exit;

sub synchronize_mirror {
    my ($database, $dist_dir) = @_;

    update_releases($database, $dist_dir);

    update_sources($database, $dist_dir);

    prune_sources($database);

    prune_mailboxes($database);

    update_installables($database, $dist_dir);

    prune_installables($database);

    return;
}

sub get_subfolders {
    my ($dist_dir) = @_;

    my $rule = File::Find::Rule->new;
    $rule->directory->mindepth(1)->maxdepth(1);
    $rule->not($rule->new->symlink);
    my @releases = $rule->relative->in($dist_dir);

    return @releases;
}

sub update_releases {
    my ($database, $dist_dir) = @_;

    my @releases = sort +get_subfolders($dist_dir);

    say encode_utf8('The local mirror has those releases:');
    say encode_utf8($INDENT . $HYPHEN . $SPACE . $_) for @releases;

    my @db_releases;
    for my $release (@releases) {

        my %db_release;
        $db_release{release_name} = $release;
        $db_release{channel} = 'local-mirror';

        push(@db_releases, \%db_release);
    }

    my $update_releases_sql =<<~'END_OF_QUERY';
        WITH data AS (
            SELECT *
            FROM json_populate_recordset(null::archive.releases, $1)
        ),
        d AS (
            DELETE FROM archive.releases AS r
            WHERE NOT EXISTS (
                SELECT * FROM data
                WHERE data.release_name = r.release_name
                AND data.channel = r.channel
            )
        )
        INSERT INTO archive.releases
        SELECT * FROM data
        ON CONFLICT (release_name)
            DO NOTHING
    END_OF_QUERY

    my $update_releases = $database->prepare($update_releases_sql);
    $update_releases->execute(encode_json(\@db_releases));

    say encode_utf8('Elapsed time: ' . duration(time - $^T));

    return;
}

sub update_sources {
    my ($database, $dist_dir) = @_;

    my @db_sources;
    my @db_released_sources;
    my @db_mailboxes;
    my @db_distributors;

    my @releases = sort +get_subfolders($dist_dir);

    for my $release (@releases) {

        my $release_dir = "$dist_dir/$release";

        my $liberty_rule = File::Find::Rule->new;
        $liberty_rule->directory->mindepth(1)->maxdepth(1);
        my @liberties = sort $liberty_rule->relative->in($release_dir);

        for my $liberty (@liberties) {

            my $liberty_dir = "$release_dir/$liberty";

            my $sources_xz = "$liberty_dir/source/Sources.xz";
            unless (-e $sources_xz) {

                say encode_utf8(
                    "Skipping $release/$liberty: Cannot find $sources_xz.");
                return;
            }

            say encode_utf8("Parsing sources for $release/$liberty.");

            my @sources = parse_compressed_deb822($sources_xz);
            for my $source (@sources) {

                my $source_name = $source->value('Package');
                my $source_version = $source->value('Version');

                my %db_source;
                $db_source{source_name} = $source_name;
                $db_source{source_version} = $source_version;

                $db_source{source_liberty} = $liberty;

                $db_source{homepage} = $source->value('Homepage');
                $db_source{vcs_browser} = $source->value('Vcs-Browser');

                my @files = $source->trimmed_list('Files', qr/\n/);
                my $dsc = (split($SPACE, $files[0]))[2];

                my $pool_folder = $source->value('Directory');
                $pool_folder =~ s{^ pool / }{}sx;
                $db_source{pool_path} = $pool_folder . $SLASH . $dsc;

                my $upstream_version = $source_version;
                $upstream_version =~ s{ - [^-]+ $}{}x
                  unless $source->value('Format') =~ /native/;
                $db_source{upstream_version} = $upstream_version;

                push(@db_sources, \%db_source);

                my %db_released_source;

                $db_released_source{$_} = $db_source{$_}
                  for qw{source_name source_version};

                $db_released_source{release} = $release;

                push(@db_released_sources, \%db_released_source);

                my @maintainer_parsed
                  = Email::Address::XS->parse($source->value('Maintainer'));
                my @uploaders_parsed
                  = Email::Address::XS->parse($source->value('Uploaders'));

                for my $distributor (@maintainer_parsed, @uploaders_parsed) {

                    unless ($distributor->is_valid
                        && length $distributor->phrase){

                        my $maintainer = $source->value('Maintainer');
                        say encode_utf8("> Maintainer: $maintainer")
                          if length $maintainer;

                        my $uploaders = $source->value('Uploaders');
                        say encode_utf8("> Uploaders: $uploaders")
                          if length $uploaders;

                        say encode_utf8(
"Skipping invalid distributor in $source_name/$source_version/$liberty: "
                              . $distributor->original);

                        next;
                    }

                    my %db_mailbox = (
                        rfc5322 => $distributor->format,
                        phrase => $distributor->phrase,
                        address => $distributor->address,
                        user => $distributor->user,
                        host => $distributor->host,
                    );

                    push(@db_mailboxes, \%db_mailbox);

                    my %db_distributor;

                    $db_distributor{$_} = $db_source{$_}
                      for qw{source_name source_version};

                    $db_distributor{mailbox} = $distributor->format;

                    push(@db_distributors, \%db_distributor);
                }
            }
        }
    }

    my $update_sources_sql =<<~'END_OF_QUERY';
        WITH data AS (
            SELECT *
            FROM json_populate_recordset(null::archive.sources, $1)
        ),
        d AS (
            DELETE FROM archive.sources AS s
            WHERE NOT EXISTS (
                SELECT * FROM data
                WHERE data.source_name = s.source_name
                AND data.source_version = s.source_version
                AND data.source_liberty = s.source_liberty
            )
        )
        INSERT INTO archive.sources
        SELECT *
        FROM data
        ON CONFLICT (source_name, source_version, source_liberty)
            DO NOTHING
    END_OF_QUERY

    @db_sources
      = uniq_by { $_->{source_name} . $_->{source_version} } @db_sources;

    say encode_utf8('Updating '. scalar @db_sources . ' sources.');

    my $update_sources = $database->prepare($update_sources_sql);
    $update_sources->execute(encode_json(\@db_sources));

    my $update_released_sources_sql =<<~'END_OF_QUERY';
        WITH data AS (
            SELECT *
            FROM json_populate_recordset(null::archive.released_sources, $1)
        ),
        d AS (
            DELETE FROM archive.released_sources AS rs
            WHERE NOT EXISTS (
                SELECT * FROM data
                WHERE data.source_name = rs.source_name
                AND data.source_version = rs.source_version
                AND data.release = rs.release
            )
        )
        INSERT INTO archive.released_sources
        SELECT *
        FROM data
        ON CONFLICT (source_name, source_version, release)
            DO NOTHING
    END_OF_QUERY

    @db_released_sources
      = uniq_by { $_->{source_name} . $_->{source_version} . $_->{release} }
    @db_released_sources;

    say encode_utf8(
        'Updating '. scalar @db_released_sources . ' released sources.');

    my $update_released_sources
      = $database->prepare($update_released_sources_sql);
    $update_released_sources->execute(encode_json(\@db_released_sources));

    my $update_mailboxes_sql =<<~'END_OF_QUERY';
        WITH data AS (
            SELECT *
            FROM json_populate_recordset(null::archive.mailboxes, $1)
        )
        INSERT INTO archive.mailboxes
        SELECT * FROM data
        ON CONFLICT (rfc5322)
            DO NOTHING
    END_OF_QUERY

    @db_mailboxes = uniq_by { $_->{rfc5322} } @db_mailboxes;

    say encode_utf8('Updating ' . scalar @db_mailboxes . ' mailboxes.');

    my $update_mailboxes = $database->prepare($update_mailboxes_sql);
    $update_mailboxes->execute(encode_json(\@db_mailboxes));

    my $update_distributors_sql =<<~'END_OF_QUERY';
        WITH data AS (
            SELECT *
            FROM json_populate_recordset(null::archive.distributors, $1)
        )
        INSERT INTO archive.distributors
        SELECT * FROM data
        ON CONFLICT (source_name, source_version, mailbox)
            DO NOTHING
    END_OF_QUERY

    @db_distributors
      = uniq_by { $_->{source_name} . $_->{source_version} . $_->{mailbox} }
    @db_distributors;

    say encode_utf8('Updating ' . scalar @db_distributors . ' distributors.');

    my $update_distributors = $database->prepare($update_distributors_sql);
    $update_distributors->execute(encode_json(\@db_distributors));

    say encode_utf8('Elapsed time: ' . duration(time - $^T));

    return;
}

sub prune_mailboxes {
    my ($database) = @_;

    my $prune_mailboxes_sql =<<~'END_OF_QUERY';
        DELETE FROM archive.mailboxes AS m
        WHERE NOT EXISTS (
            SELECT
                *
            FROM
                archive.distributors AS d
                WHERE
                    d.mailbox = m.rfc5322
        )
    END_OF_QUERY

    say encode_utf8('Pruning unused mailboxes.');
    $database->do($prune_mailboxes_sql);

    say encode_utf8('Elapsed time: ' . duration(time - $^T));

    return;
}

sub prune_sources {
    my ($database) = @_;

    say encode_utf8('Pruning sources not present in any release.');

    my $prune_sources_sql =<<~'END_OF_QUERY';
        DELETE FROM archive.sources AS s
        WHERE NOT EXISTS (
            SELECT * FROM archive.released_sources AS rs
            WHERE rs.source_name = s.source_name
            AND rs.source_version = s.source_version
        )
    END_OF_QUERY

    $database->do($prune_sources_sql);

    say encode_utf8('Elapsed time: ' . duration(time - $^T));

    return;
}

sub update_installables {
    my ($database, $dist_dir) = @_;

    my @db_installables;
    my @db_released_installables;

    my @releases = sort +get_subfolders($dist_dir);
    for my $release (@releases) {

        my $release_dir = "$dist_dir/$release";
        my @liberties = sort +get_subfolders($release_dir);

        for my $liberty (@liberties) {

            my $liberty_dir = "$release_dir/$liberty";
            my @installable_folders
              = sort grep { /^binary-/ } +get_subfolders($liberty_dir);

            for my $folder (@installable_folders) {

                my $port = $folder;
                $port =~ s/^binary-//;

                if ($port eq 'all') {

                    say encode_utf8(
"Skipping $release/$liberty/$port, which is used only to create new ports."
                    );
                    next;
                }

                my $packages_xz= "$liberty_dir/$folder/Packages.xz";
                unless (-e $packages_xz) {

                    say encode_utf8(
"Skipping $release/$liberty/$folder: Cannot find $packages_xz."
                    );
                    return;
                }

                say encode_utf8(
                    "Parsing installables for $release/$liberty/$port.");

                my @installables = parse_compressed_deb822($packages_xz);
                for my $installable (@installables) {

                    my $installable_name = $installable->value('Package');
                    my $installable_version = $installable->value('Version');

                    my ($source_name, $source_version)
                      = split($SPACE, $installable->value('Source'));

                    $source_name ||= $installable_name;
                    $source_version =~ s{^ [(] (.+) [)] $}{$1}sx
                      if defined $source_version;
                    $source_version ||= $installable_version;

                    my $basename
                      = ($installable->trimmed_list('Filename', qr{/}))
                      [$LAST_ITEM];

                    my %db_installable;

                    $db_installable{installable_name} = $installable_name;
                    $db_installable{installable_version}= $installable_version;
                    $db_installable{installable_architecture}
                      = $installable->value('Architecture');

                    $db_installable{installable_liberty} = $liberty;

                    $db_installable{source_name} = $source_name;
                    $db_installable{source_version} = $source_version;

                    my $pool_path = $installable->value('Filename');
                    $pool_path =~ s{^ pool / }{}sx;
                    $db_installable{pool_path} = $pool_path;

                    $db_installable{description}
                      = $installable->value('Description');
                    $db_installable{installed_size_k}
                      = $installable->value('Installed-Size') || 0;
                    $db_installable{size} = $installable->value('Size');

                    push(@db_installables, \%db_installable);

                    my %db_released_installable;

                    $db_released_installable{$_} = $db_installable{$_}
                      for
                      qw{installable_name installable_version installable_architecture};

                    $db_released_installable{release} = $release;
                    $db_released_installable{port} = $port;

                    push(@db_released_installables, \%db_released_installable);
                }
            }
        }
    }

    my $update_installables_sql =<<~'END_OF_QUERY';
WITH data AS (
    SELECT
        *
    FROM
        json_populate_recordset(NULL::archive.installables, $1)
),
d AS (
    DELETE FROM archive.installables AS i
    WHERE NOT EXISTS (
            SELECT
                *
            FROM
                data
            WHERE
                data.installable_name = i.installable_name
                AND data.installable_version = i.installable_version
                AND data.installable_architecture = i.installable_architecture))
    INSERT INTO archive.installables
    SELECT
        *
    FROM
        data
    ON CONFLICT (installable_name,
        installable_version,
        installable_architecture)
    DO NOTHING
END_OF_QUERY

    @db_installables = uniq_by {
        $_->{installable_name}
          . $_->{installable_version}
          . $_->{installable_architecture}
    }
    @db_installables;

    say encode_utf8('Updating '. scalar @db_installables. ' installables.');

    my $update_installables= $database->prepare($update_installables_sql);
    $update_installables->execute(encode_json(\@db_installables));

    say encode_utf8('Elapsed time: ' . duration(time - $^T));

    my $update_released_installables_sql =<<~'END_OF_QUERY';
WITH data AS (
    SELECT
        *
    FROM
        json_populate_recordset(NULL::archive.released_installables, $1)
),
d AS (
    DELETE FROM archive.released_installables AS ri
    WHERE NOT EXISTS (
            SELECT
                *
            FROM
                data
            WHERE
                data.installable_name = ri.installable_name
                AND data.installable_version = ri.installable_version
                AND data.installable_architecture = ri.installable_architecture
                AND data.release = ri.release
                AND data.port = ri.port))
    INSERT INTO archive.released_installables
    SELECT
        *
    FROM
        data
    ON CONFLICT (installable_name,
        installable_version,
        installable_architecture,
        release,
        port)
    DO NOTHING
END_OF_QUERY

    @db_released_installables = uniq_by {
        $_->{installable_name}
          . $_->{installable_version}
          . $_->{installable_architecture}
          . $_->{release}
          . $_->{port}
    }
    @db_released_installables;

    say encode_utf8('Updating '
          . scalar @db_released_installables
          . ' released installables.');

    my $update_released_installables
      = $database->prepare($update_released_installables_sql);
    $update_released_installables->execute(
        encode_json(\@db_released_installables));

    say encode_utf8('Elapsed time: ' . duration(time - $^T));

    return;
}

sub prune_installables {
    my ($database) = @_;

    say encode_utf8(
        'Pruning installables not shipped in any port or release.');

    my $prune_installables_sql =<<~'END_OF_QUERY';
        DELETE FROM archive.installables AS i
        WHERE NOT EXISTS (
            SELECT * FROM archive.released_installables AS ri
            WHERE ri.installable_name = i.installable_name
            AND ri.installable_version = i.installable_version
            AND ri.installable_architecture = i.installable_architecture
        )
    END_OF_QUERY

    $database->do($prune_installables_sql);

    say encode_utf8('Elapsed time: ' . duration(time - $^T));

    return;
}

sub get_archive_liberties {
    my ($database) = @_;

    my $get_liberties_sql =<<~ 'END_OF_QUERY';
SELECT
    json_agg(c)
FROM (
    SELECT
        unnest(enum_range(NULL::distribution_liberty))::text AS liberty
    ORDER BY
        liberty ASC) AS c
END_OF_QUERY

    my $get_liberties = $database->prepare($get_liberties_sql);
    $get_liberties->execute;
    my $liberty_json = $get_liberties->fetchrow_arrayref->[0];
    $get_liberties->finish;
    my $liberty_results = decode_json($liberty_json);
    my @liberties = sort map { $_->{liberty} } @{$liberty_results};

    return @liberties;
}

sub update_autoreject_tags {
    my ($database) = @_;

    my $http = HTTP::Tiny->new(verify_SSL => 1);

    my $FTP_MASTER_URL = 'https://ftp-master.debian.org/static/lintian.tags';

    my $response = $http->get($FTP_MASTER_URL);

    die 'Cannot read FTP Master auto-reject list'
      unless $response->{success};

    my $yaml = $response->{content};
    my $autoreject = Load($yaml);

    my $mandatory = $autoreject->{lintian}{fatal};
    my $overridable = $autoreject->{lintian}{nonfatal};

    my @autoreject_tags;

    for my $name (@{$mandatory}) {

        my %autoreject_tag;
        $autoreject_tag{tag_name} = $name;
        $autoreject_tag{mandatory} = 1;

        push(@autoreject_tags, \%autoreject_tag);
    }

    for my $name (@{$overridable}) {

        my %autoreject_tag;
        $autoreject_tag{tag_name} = $name;
        $autoreject_tag{mandatory} = 0;

        push(@autoreject_tags, \%autoreject_tag);
    }

    say encode_utf8('Truncating autoreject tags from FTP Master.');
    $database->do('TRUNCATE archive.autoreject_tags');

    my $insert_autoreject_tags_sql =<<~'END_OF_QUERY';
        INSERT INTO archive.autoreject_tags
        SELECT * FROM json_populate_recordset(NULL::archive.autoreject_tags, $1)
        ON CONFLICT (tag_name)
            DO NOTHING
    END_OF_QUERY

    say encode_utf8('Inserting autoreject tags from FTP Master.');
    my $autoreject_tags_json_array = encode_json(\@autoreject_tags);
    my $insert_autoreject_tags
      = $database->prepare($insert_autoreject_tags_sql);
    $insert_autoreject_tags->execute($autoreject_tags_json_array);

    say encode_utf8('Elapsed time: ' . duration(time - $^T));

    return;
}

sub parse_compressed_deb822 {
    my ($path) = @_;

    my @command = (qw{xz --decompress --force --stdout}, $path);
    my $uncompressed;

    run3(\@command, \undef, \$uncompressed);
    die encode_utf8("Cannot run @command")
      if $?;

    my $source_deb822 = Debian::Lintian::Deb822->new;
    my @sections = $source_deb822->parse_string(decode_utf8($uncompressed));
    undef $uncompressed;

    return @sections;
}

# Local Variables:
# indent-tabs-mode: nil
# cperl-indent-level: 4
# End:
# vim: syntax=perl sw=4 sts=4 sr et
