#!/usr/bin/perl

# whohas, a Perl utility to display availability of source
# and binary packages from major Linux and BSD distributions
#
# Copyright (C) Philipp L. Wesche 2005-2022
#
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

use strict;
use sigtrap;

#TODO --fetch-unstable switch
#TODO make sure that debian's version numbers are from i386
#TODO get date info about debian, ubuntu (link is to changelog)
#TODO get date info about slackware packages from subsequent links  - postponed until slackware packages is online again
#TODO architecture tests for those that support several
#TODO Gentoo: only report two most recent for each package?
#TODO make more use of the conf directory, e.g. for Fedora, Sourcemage, so we download those indexes only sparingly, and save ourselves processing time; MAKE SURE YOU WRITE THE PROCESSED RESULTS FOR EASY PARSING ON THE NEXT RUN
#TODO we can also cache search results there to drastically reduce query time on subsequent queries
#TODO add option to override the cache (newly fetched file written to cache)
#TODO ubuntu: allow two releases: the long term supported and the most recent
#TODO allow searching on several packages; return results only for those distros that have a hit for each package, possibly in tabular format
#TODO deal gracefully with hyphens that may be present in some distros but not others, i.e. include extra hyphens in regexes, and allow user-specified hyphens to be absent

use Config;
use Env qw(HOME);
eval {
	require if $^O ne 'MSWin32', 'forks';
};
use if $Config{usethreads}, "threads";
use Getopt::Long;
use LWP::UserAgent;
use Thread::Queue;

my $conffile = "whohas.cf";
my $confdir = "$HOME/.whohas";
# make .whohas directory in home directory
unless (-d $confdir) {
	mkdir ($confdir, 0755);
}

my @columns = (11,38,18,4,10,25);
my $cols = 6;

our $fedora_min_release		 =  ""			;
our $fedora_max_release		 =  ""			;
our $debian_current_release	 = "all"		;
our $ubuntu_current_release	 = "all"		;
our $openbsd_release		 = ""		;

my @distrosAvailable = qw(arch cygwin debian fedora fink freebsd gentoo macports mageia mandriva netbsd openbsd opensuse slack sourcemage ubuntu);
my %distrosSelected;
foreach (@distrosAvailable) {
	$distrosSelected{$_} = 1;
}

our @distroSelections;
my $nothreads;
my $shallow;
my $option_help;
my $option_strict;

if ( -s "$confdir/$conffile" ) {
	eval {
		do "$confdir/$conffile";
	};
	if ($@) {
		print STDERR 'Eval of configuration caused errors. Aborting.\n';
		exit;
	}
}

GetOptions(
	"d=s" => \@distroSelections,
	"no-threads" => \$nothreads,
	"shallow" => \$shallow,
	"help|usage|h" => \$option_help,
	"strict|s" => \$option_strict,
);

if ($option_help) {
	print "Usage: $0 [--no-threads] [--shallow] [--strict] [-d Dist1[,Dist2[,Dist3...]]] pkgname\n";
	exit 0;
}

if (@ARGV > 1) {
	die "Error:\tToo many parameters. Usage: $0 [--no-threads] [--shallow] [--strict] [-d Dist1[,Dist2[,Dist3...]]] pkgname\n";
} elsif (@ARGV < 1) {
	die "Error:\tPlease specify a search term.\n";
}

if (!$Config{usethreads} && !$nothreads) {
	$nothreads = 1;
	warn "No threads support, --no-threads is forced.\n";
}

#
# BUILD %distrosSelected
#

if (@distroSelections) {
	foreach (@distrosAvailable) {
		$distrosSelected{$_} = 0;
	}
	@distroSelections = split(/,/,join(',',@distroSelections));
	for my $distro (@distroSelections) {
		$distro =~ tr/A-Z/a-z/;
		if (		$distro =~ /archlinux/i) {	$distrosSelected{arch}	= 1;
		} elsif (	$distro =~ /slackware/i) {	$distrosSelected{slack}	= 1;
		} else {
			if (exists $distrosSelected{$distro}) {	# NB only due to previous setting of hash values for all known distros can we use this test to see if the distro is known
				$distrosSelected{$distro} = 1;
			} else {
				die "Unsupported distribution '$distro'\n";
			}
		}
	}
}

#
# DISPATCH TO SUBROUTINES, THREADED OR UNTHREADED
#

if ($ARGV[0] eq "whohasme") {
	print "Congratulations. You discovered an Easter egg. Maybe you can send a quick email to phi1ipp\@yahoo.com to say hello and tell the developer what you think of the software.\n";
	exit;
}

my $q = Thread::Queue->new();
foreach (keys %distrosSelected) {
	if ($distrosSelected{$_}) {
		if ($_ eq 'arch') {
			$q->enqueue('arch', 'aur');
		} else {
			$q->enqueue($_);
		}
	}
}
$q->end();

if (!$nothreads) {
	my $maxthreads = maxthreads($q);
	my @thrs;
	for (0..$maxthreads - 1) {
		push(@thrs, threads->new(\&worker));
	}
	foreach (@thrs) {
		$_->join;
	}
} else {
	worker();
}

#
# END OF MAIN
#
# DISPATCH HELPER FUNCTIONS BELOW
#

sub worker {
	no strict "refs";
	while (defined(my $distro = $q->dequeue())) {
		# NB this is only safe because we've previously checked for illegal subs
		&$distro($ARGV[0]);
	}
}

sub maxthreads {
	my ($q) = @_;
	my $jobcount = $q->pending();
	eval { require Sys::CPU; };
	if ($@) {
		return $jobcount;
	} else {
		my $cpucount = Sys::CPU::cpu_count();
		return $cpucount < $jobcount ? $cpucount : $jobcount;
	}
}

#
# DISTRO-SPECIFIC MODULES
# (FUNCTIONS THAT QUERY SPECIFIC REPOSITORY/LIST SERVERS)
#

sub fedora {
	my $baseurl = "https://dl.fedoraproject.org/pub/fedora/linux/releases/";
	my $distroname = "Fedora";
	my $arch = "x86_64";
	my $searchy = $_[0];
	my @names;
	my @versions;
	my @dates;
	my @sizes;
	my @repos;
	my @urls;

	if (not $fedora_max_release) {
		my @lines = split /\n/, &fetchdoc($baseurl);
		for (my $li = 0; $li < @lines; $li++) {
			if ($lines[$li] =~ m{<img src="[^"]*folder[^"]*" alt="[^"]*"> *<a href="[0-9]+/"}) {
				my ($release) = ($lines[$li] =~ m{<img src="[^"]*folder[^"]*" alt="[^"]*"> *<a href="([0-9]+)/"});
				if ($release > $fedora_max_release) {
					$fedora_max_release = $release;
				}
			}
		}
		if (not $fedora_max_release) {
			print STDERR "Could not parse Fedora release list, skipping Fedora packages\n";
			return ();
		}
	}
	if (not $fedora_min_release) {
		$fedora_min_release = $fedora_max_release - 2;
	}

	for (my $i = $fedora_max_release; $i >= $fedora_min_release; $i--) {
		my @fed_urls = ("$i/Everything/$arch/os/Packages/");
		my $file = "$confdir/$distroname\_$i.list";
		{
			for (my $a = 0; $a < @fed_urls; $a++) {
				my @lines = split /\n/, &fetchdoc($baseurl.$fed_urls[$a].lc(substr($searchy, 0, 1)).'/');
				for (my $li = 0; $li < @lines; $li++) {
					if ($lines[$li] =~ / +<a href="[^"]+?rpm"/) {
						my ($name, $version, $date, $size) = ($lines[$li] =~ m{ +<a href="(.+?)-([0-9].*)\.fc[0-9]*\.[^.]*\.rpm">[^<]*</a> *([^ ]* [^ ]*) *([^ ]*) *(?:RPM Package)?});
						push @names, $name;
						push @versions, $version;
						push @dates, $date;
						push @sizes, $size;
						push @urls, "https://apps.fedoraproject.org/packages/$name";
					}
				}
			}
		}
	}
	for (my $i = 0; $i < @names; $i++) {
		if ($names[$i] =~ /$searchy/i) {
			&pretty_print($cols,@columns,$distroname,$names[$i],$versions[$i],$sizes[$i],$dates[$i],$repos[$i],$urls[$i]);
		}
	}
	return ();
}

sub month_to_digits {
	$_[0] =~ s/JAN/01/i;
	$_[0] =~ s/FEB/02/i;
	$_[0] =~ s/MAR/03/i;
	$_[0] =~ s/APR/04/i;
	$_[0] =~ s/MAY/05/i;
	$_[0] =~ s/JUN/06/i;
	$_[0] =~ s/JUL/07/i;
	$_[0] =~ s/AUG/08/i;
	$_[0] =~ s/SEP/09/i;
	$_[0] =~ s/OCT/10/i;
	$_[0] =~ s/NOV/11/i;
	$_[0] =~ s/DEC/12/i;
	return ($_[0]);
}

sub macports {
	my $baseurl = "https://www.macports.org";
	my @names;
	my @versions;
	my @dates;
	my @sizes;
	my @repos;
	my @urls;
	my @lines = split /\n/, &fetchdoc($baseurl."/ports.php?by=name&substr=".$_[0]);
	for (my $i = 70; $i < @lines; $i++) {
		if ($lines[$i] =~ /<dt><b>/) {
			my @parties = split /\<dt\>\<b\>/, $lines[$i];
			for (my $javar = 1; $javar < @parties; $javar++) {
				my @parts = split /href="|">|<\/a><\/b> |<\/dt/, $parties[$javar];
				push @urls,     $parts[1];
				push @names,    $parts[2];
				push @versions, $parts[3];
				push @repos, "";
				push @sizes, "";
				push @dates, "";
			}
		}
	}
	for (my $i = 0; $i < @repos; $i++) {
		&pretty_print($cols,@columns,"MacPorts",$names[$i],$versions[$i],$sizes[$i],$dates[$i],$repos[$i],$urls[$i]);
	}
	return ();
}


sub fink {
	my $baseurl = "https://pdb.finkproject.org/pdb/";
	my @names;
	my @versions;
	my @dates;
	my @sizes;
	my @repos;
	my @urls;
	my @lines = split /\n/, &fetchdoc($baseurl."browse.php?name=".$_[0]);
	for (my $i = 60; $i < @lines; $i++) {
		if ($lines[$i] =~ /tr class=\"package\"/) {
			if ($lines[$i] =~ /^\<tr class\=\"pdbHeading\"\>/) {
				$lines[$i] =~ s/.*?\<\/tr\>//;
			}
			my @splitty = split /href\=\"|\"\>|\<\/a\>\<\/td\>\<td class=\"packageName\"\>|\<\/td\>\<td\>/, $lines[$i];
			push @urls, $splitty[3];
			push @names, $splitty[4];
			push @versions, $splitty[5];
			push @repos, "";
			push @sizes, "";
			push @dates, "";
		} elsif ($lines[$i] =~ /\<p\>Query took /) {
			last;
		}
	}
	for (my $i = 0; $i < @repos; $i++) {
		&pretty_print($cols,@columns,"Fink",$names[$i],$versions[$i],$sizes[$i],$dates[$i],$repos[$i],$urls[$i]);
	}
	return ();
}

sub fink_get_details {
	my @repos;
	my @versions;
	my @lines = split /\n/, &fetchdoc($_[0]);
	for (my $i = 60; $i < @lines; $i++) {
		if ($lines[$i] =~ /10\./) {
			my @parts = split /nowrap">|<\/div>/, $lines[$i];
			unless ($parts[5] =~ /not present/ or $parts[5] =~ /unsupported/) {
				if ($parts[1] =~ /<br>/) {
					push @repos, (split /<br>/, $parts[1])[0];
				} else {
					push @repos, $parts[1];
				}
				$parts[5] =~ s/\<\!\-\-.*\-\-\>//;
				push @versions, $parts[5];
			}
		}
		if ($lines[$i] eq '</table>') {
			last;
		}
	}
	return (\@versions,\@repos);
}

sub size_trim {
	# give at least two significant figures; if a 10^3 edge is encountered, put a dot
	my $leave =  length($_[0]) % 3;
	my $threes = (length($_[0]) - $leave) / 3;
	if ($leave == 0) {
		$leave = 3;
		$threes--;
	}
	
	my @parts = split //, $_[0];
	my $retval = join "", @parts[0..($leave-1)];
	if (length($retval)==1 && $threes > 0) {
		# add one more significant figure
		my $add_sf = $parts[$leave];
		if ($parts[$leave+1] > 4) {
			# rounding
			$add_sf++;
			if ($add_sf == 10) {
				$add_sf = 0;
				$retval++;
				if ($retval == 10) {
					return(&size_trim($retval*(1000**$threes)));
				}
			}
		}
		$retval .= ".$add_sf";
	} elsif (defined($parts[$leave]) && $parts[$leave] > 4) { # instead of defined(...), ($threes > 0) is also possible
		my $before = length($retval);
		# rounding
		$retval++;
		if (length($retval) > $before) {
			return(&size_trim($retval*(1000**$threes)));
		}
	}
	my @suffixes = ("k","M","G");
	if ($threes > 0) {
		$retval .= $suffixes[($threes-1)];
	}
	return $retval;
}


sub freebsd {
	my $query = "https://www.freebsd.org/cgi/ports.cgi?query=".$_[0]."&stype=all";
	my @lines = split /\n/, &fetchdoc($query);
	my @names;
	my @versions;
	my @dates;
	my @sizes;
	my @repos;
	my @urls;
	my $now = 0;
	for (my $i = 50; $i<@lines; $i++) {
		if ($lines[$i] =~ /^<dt><b>/) {
			my @parts = split /"/, $lines[$i];
			($names[$now],$versions[$now]) = &combos_freebsd($parts[1]);
			my @subparts = split /\//, $parts[3];
			push @sizes, "";
			push @repos, $subparts[@subparts-2];
			push @urls,  "https://www.freebsd.org/cgi/ports.cgi?stype=all&query=$names[$now]";
			$now++;
			push @dates, "";
		}
	}
	for (my $i = 0; $i < @repos; $i++) {
		&pretty_print($cols,@columns,"FreeBSD",$names[$i],$versions[$i],$sizes[$i],$dates[$i],$repos[$i],$urls[$i]);
	}
	return ();
}


sub sourcemage {
	my @grimoires = ("test","stable","binary","z-rejected","games");
	my @lines = split /\n/, &fetchdoc("http://codex.sourcemage.org/listing.txt");
	my @inirepos;
	my @ininames;
	my @iniversions;
	my @iniurls;
	my @inidates;
	my @inisizes;
	foreach (@lines) {
		my @comps = split /\^/, $_;
		for (my $a = 0; $a < @grimoires;$a++) {
			if (length($comps[$a+1]) > 0) {
				push @inirepos, $grimoires[$a];
				push @ininames, $comps[0];
				push @iniversions, $comps[$a+1];
				push @inisizes, "";
				push @iniurls,  "";
				push @inidates, "";
			}
		}
	}
	my ($p1,$p2,$p3,$p4,$p5,$p6) = &search_by_name(\@ininames,\@iniversions,\@inisizes,\@inidates,\@inirepos,\@iniurls,$_[0]);
	my @names    = @$p1;
	my @versions = @$p2;
	my @sizes    = @$p3;
	my @dates    = @$p4;
	my @repos    = @$p5;
	my @urls     = @$p6;
	for (my $i = 0; $i < @repos; $i++) {
		&pretty_print($cols,@columns,"Source Mage",$names[$i],$versions[$i],$sizes[$i],$dates[$i],$repos[$i],$urls[$i]);
	}
	return ();
}


sub search_by_name { # versions, sizes, dates, repos, urls
	my ($p1,$p2,$p3,$p4,$p5,$p6,$search) = @_;
	my @ininames    = @$p1;
	my @iniversions = @$p2;
	my @inisizes    = @$p3;
	my @inidates    = @$p4;
	my @inirepos    = @$p5;
	my @iniurls     = @$p6;
	my @names;
	my @versions;
	my @sizes;
	my @dates;
	my @repos;
	my @urls;
	for (my $i = 0;$i<@ininames;$i++) {
		if ($ininames[$i] =~ /$search/i) {
			push @names,    $ininames[$i];
			push @repos,    $inirepos[$i];
			push @versions, $iniversions[$i];
			push @sizes,    $inisizes[$i];
			push @dates,    $inidates[$i];
			push @urls,     $iniurls[$i];
		}
	}
	return(\@names,\@versions,\@sizes,\@dates,\@repos,\@urls);
}

sub netbsd_old {
	my $netbsdbase = "ftp://ftp.netbsd.org/pub/NetBSD/packages/pkgsrc/";
	my @ininames;
	my @iniversions;
	my @iniurls;
	my @inirepos;
	my @inisizes;
	my @inidates;
	my $now = 0;
	my $distroname = "NetBSD";
	my $file = "$confdir/$distroname.list";
	# if the list file exists and is recent, use its contents, otherwise download and parse a fresh copy
	if (-s $file && `date +%Y-%m-%d` =~ (split / /, `ls -l $file`)[6]) {
		open IN, $file;
		chomp (my @lines = <IN>);
		for (my $i = 0; $i<@lines;$i++) {
			($ininames[$i],$iniversions[$i],$iniurls[$i]) = split /\t/, $lines[$i];
		}
		close IN;
	} else {
		my @lines = split /\n/, &fetchdoc($netbsdbase."README-all.html");
		for (my $i = 10; $i < @lines; $i++) {
			if ($lines[$i] =~ /^<!-- [0-9A-Za-z]/) {
				my @parts = split / /, $lines[$i];
				($ininames[$now],$iniversions[$now]) = &combos($parts[1]);
				$now++;
				@parts = split /a href="|">/, $lines[$i];
				push @iniurls, $netbsdbase.$parts[1];
				push @inirepos, "";
				push @inisizes, "";
				push @inidates, "";
			}
		}
		open OUT, ">$file";
		for (my $i = 0; $i < @iniurls;$i++) {
			print OUT "$ininames[$i]\t$iniversions[$i]\t$iniurls[$i]\n";
		}
		close OUT;
	}
	my ($p1,$p2,$p3,$p4,$p5,$p6) = &search_by_name(\@ininames,\@iniversions,\@inisizes,\@inidates,\@inirepos,\@iniurls,$_[0]);
	my @names    = @$p1;
	my @versions = @$p2;
	my @sizes    = @$p3;
	my @dates    = @$p4;
	my @repos    = @$p5;
	my @urls     = @$p6;
	for (my $i = 0; $i < @repos; $i++) {
		&pretty_print($cols,@columns,$distroname,$names[$i],$versions[$i],$sizes[$i],$dates[$i],$repos[$i],$urls[$i]);
	}
	return ();
}

sub openbsd_combos {
	my @parts = split /-/, $_[0];
	for (my $i = 1; $i < @parts; $i++) {
		if ($parts[$i] =~ /^[0-9]/) {
			return ( (join '-',@parts[0..($i-1)]), (join '-', @parts[$i..(@parts-1)]) );
		}
	}
}

sub openbsd {
	my $baserepourl = 'https://ftp.openbsd.org/pub/OpenBSD/';
	my $arch = "i386";
	my @names;
	my @versions;
	my @urls;
	my @repos;
	my @sizes;
	my @dates;
	my $distroname = "OpenBSD";

	if (not $openbsd_release) {
		my @lines = split /\n/, &fetchdoc($baserepourl);
		for (my $li = 0; $li < @lines; $li++) {
			if ($lines[$li] =~ m{<a href="\d+\.\d+/"}) {
				my ($release) = ($lines[$li] =~ m{<a href="(\d+\.\d+)/"});
				if ($release > $openbsd_release) {
					$openbsd_release = $release;
				}
			}
		}
		if (not $openbsd_release) {
			print STDERR "Could not parse OpenBSD release list, skipping OpenBSD packages\n";
			return ();
		}
	}

	my $rel = $openbsd_release;
	my $baseurl = 'https://ftp.openbsd.org/pub/OpenBSD/'.$rel.'/packages/'.$arch.'/';
	my $file = "$confdir/$distroname\_$rel.list";

	# if the list file exists and is recent, use its contents, otherwise download and parse a fresh copy
	if (-s $file && `date +%Y-%m-%d` =~ (split / /, `ls -l $file`)[6]) {
		open IN, $file;
		chomp (my @lines = <IN>);
		for (my $i = 0; $i<@lines;$i++) {
			($names[$i],$versions[$i],$dates[$i],$sizes[$i]) = split /\t/, $lines[$i];
		}
		close IN;
	} else {
		my @lines = split /\n/, &fetchdoc($baseurl);
		my $now = 0;
		for (my $i = 0; $i < @lines; $i++) {
			if ($lines[$i] =~ /^<IMG SRC="\/icons\/compressed\.gif|\.tgz/i) {
				my @firstParts = split /<A HREF="|\.tgz">|.tgz<\/A> +|  +/i, $lines[$i];
				my $a = @names;
				($names[$a],$versions[$a]) = &openbsd_combos($firstParts[2]);
				push @dates, $firstParts[4];
				push @sizes, $firstParts[5];
			}
		}
		open OUT, ">$file";
		for (my $i = 0; $i < @names;$i++) {
			print OUT "$names[$i]\t$versions[$i]\t$dates[$i]\t$sizes[$i]\n";
		}
		close OUT;
	}
	my $matcher = $_[0];
	for (my $i = 0; $i < @names; $i++) {
		if ($names[$i] =~ /$matcher/i) {
			&pretty_print($cols,@columns,$distroname,$names[$i],$versions[$i],$sizes[$i],$dates[$i],$repos[$i],$urls[$i]);
		}
	}
	return ();
}

sub cygwin {
	my $baseurl = "https://www.cygwin.com/packages";
	my @names;
	my @versions;
	my @urls;
	my @repos;
	my @sizes;
	my @dates;
	my @archs;
	my $distroname = "Cygwin";
	my $searchy = $_[0];
	my @lines = split /\n/, &fetchdoc("$baseurl/package_list.html");
	for (my $i = 0; $i < @lines; $i++) {
		my $line = $lines[$i];
		if ($line =~ /<tr><td><a href="/) {
			$line =~ s{^<tr><td>}{};
			$line =~ s{</td></tr>$}{};
			my @parts = split /<\/td><td>/, $line;
			$parts[0] =~ s{^<a href="?}{};
			$parts[0] =~ s{</a>$}{};
			my ($temp, $name) = split /"?>/, $parts[0], 2;
			if ($name =~ /$searchy/i) {
				my @detailLines = split /\n/, &fetchdoc("$baseurl/$temp");
				my @highest;
				for (my $a = 0; $a < @detailLines; $a++) { # incrementing ensures that the highest version number will prevail
									   # (higher ones occur lower down at time of writing)
					if ($detailLines[$a] =~ /\<\/td\>\<td class\=\"right\"\>/ && $detailLines[$a] !~ /\-src\<\/a\>\<\/li\>/) {
						@highest = split /\<td\>|\<\/td\>|\<td class\=\"right\"\>/, $detailLines[$a]; # due to server apache config currently only works when $arch eq 'x86_64' - more work needed
					}
				}
				push @versions, $highest[1];
				my @comps = split /\//, $temp;
				push @names,    $name;
				push @archs,    $comps[0];
				push @dates,    $highest[5];
				push @urls,     "$baseurl/$temp";
				push @sizes,    $highest[3];   #TODO make less accurate (e.g. KiB -> M) to fit column, like Debian
				push @repos,    '';
			}
		}
	}
	for (my $i = 0; $i < @names; $i++) {
		#&pretty_print($cols,@columns,$distroname,$names[$i],$versions[$i],$sizes[$i],$dates[$i],$repos[$i],$urls[$i],$archs[$i]);
		&pretty_print($cols,@columns,$distroname,$names[$i],$versions[$i],$sizes[$i],$dates[$i],$repos[$i],$urls[$i]);
	}
	return ();
}

sub gentoo {
	my $gentoobase = "https://gpo.zugaina.org/";
	my $gentoobaseofficial = "https://packages.gentoo.org/package";
	my $distroname = "Gentoo";
	my @names;
	my @versions;
	my @urls;
	my @dates;
	my @lines = split /\n/, &fetchdoc($gentoobase."/Search?search=".$_[0]);
	my $name;
	my @repos;
	my @sizes;
	my @groups;
	for (my $i = 0; $i < @lines; $i++) { # starting value is a speed compromise
		if ($lines[$i] =~ /<div id\=\"search_results\"\>/) {
			for (my $a = $i+1; $a < @lines; $a++) {
				if ($lines[$a] =~ /\<\/div\>/) {
					if ($lines[$a] !~ /\<div\>/) {
						last;
					} else {
						my @parts = split /\<div\>/, $lines[$a];
						my @dosparts = split /\//, $parts[1];
						$dosparts[1] =~ / +$/;
						my $tempurl = $gentoobase."/".$dosparts[0]."/".$dosparts[1];
						my $officialurl = $gentoobaseofficial."/".$dosparts[0]."/".$dosparts[1];
						my @newlines = split /\n/, &fetchdoc($tempurl);
						for (my $li = 0; $li < @newlines; $li++) {
							if ($newlines[$li] =~ /\<li class\=\"[a-z]+ebuildrow\"/) {
								my @tempbreak = split /\<b\>|\<\/b\>|-/, $newlines[$li+2];
								my $vernum;
								for (my $incrementa = 2; $incrementa < @tempbreak; $incrementa++) {
									if ($tempbreak[$incrementa] =~ /^[0-9]/) {
										$vernum = join "-", @tempbreak[$incrementa..(@tempbreak-2)];
									}									
								}
								push @names, $dosparts[1];
								push @groups, $dosparts[0];
								push @urls, $officialurl;
								push @versions, $vernum;
								push @repos, "";
								push @sizes, "";
								push @dates, "";
							}
						}
					}
				}
			}
			last;
		}
	}
	for (my $i = 0; $i < @repos; $i++) {
		&pretty_print($cols,@columns,$distroname,$names[$i],$versions[$i],$sizes[$i],$dates[$i],$repos[$i],$urls[$i]);
	}
	return ();
}

# this almost works, will make the whole thing a lot faster!
#sub combos {
#	my @parts = split /-/, $_[0];
#	my $name;
#	my $version;
#	for (my $i = 1; $i < @parts-1; $i++) {
#		if ($parts[$i] =~ /^[0-9]/) {
#			$name = join "-", @parts[0..($i-1)];
#			$version = join "-", @parts[$i..(@parts-1)];
#			last;
#		}
#	}
#	return($name,$version);
#}

sub combos {
        my @chars = split //, $_[0];
        my $name;
        my $version; 
        for (my $i = 0; $i < @chars-1; $i++) {
                if ($chars[$i] eq "-"#) {
#			if (
&& $chars[$i+1] =~ /[0-9]/) {
                        	$name = join "", @chars[0..($i-1)];
                        	$version = join "", @chars[($i+1)..(@chars-1)];
                        	last;
#			} else {
#				$i++; #minor speed-up
#			}
                }
        }
        return($name,$version);
}

sub combor {
	my @chars = split //, $_[0];
	my $name;
	my $version;
	for (my $i = @chars - 1; $i >= 0; $i--) {
		if ($chars[$i] !~ /[0-9\-\.]/ && !($chars[$i] eq "i" && $chars[$i-1] eq "-" && $chars[$i+1] =~ /[6543]/)) {
			$name = join "", @chars[0..($i)];
			$version = join "", @chars[($i+2)..(@chars-1)];
			last;
		}
	}
	return($name,$version);
}

sub combos_freebsd {
	my @parts = split /-/, $_[0];
	my $name;
	my $version;
	for (my $i = 1; $i < @parts; $i++) {
		if ($parts[$i] =~ /^[0-9]/) {
			$name = join "-", @parts[0..($i-1)];
			$version = join "-", @parts[$i..(@parts-1)];
		}
	}
	return($name,$version);
}

sub slack_combos {
	$_[0] =~ s/^\s*//;
	my @parts = split /-/, $_[0];
	return ((join '-', @parts[0..(@parts-4)]),$parts[(@parts-3)]);
}

sub slack {
	my $slackbase  = "https://packages.slackware.com/";
	my @repos;
	my @groups;
	my @names;
	my @versions;
	my @urls;
	my @combos;
	my @sizes;
	my @dates;
	my @lines = split /\n|<br>|<\/tr>/, &fetchdoc($slackbase."/?release=slackware-current&mode=package&result=100&extra=on&pasture=on&patches=on&slackware=on&source=on&testing=on&search=".$_[0]);
	my $now = 0;
	for (my $i = 0; $i < @lines; $i++) {
		if ($lines[$i] =~ /<td nowrap><a href=/) {
			my $line = $lines[$i];
			$line =~ s{<td align="right">}{<td>};
			$line =~ s{<td nowrap>}{<td>};
			$line =~ s{^<tr><td>}{};
			$line =~ s{</td>$}{};
			my @parts = split /<\/td><td>/, $line;
			my ($repo, $group) = split '/', $parts[1], 2;
			push @groups, $group;
			push @repos, $repo;
			$parts[2] =~ s{^<a href="?}{};
			$parts[2] =~ s{</a>$}{};
			my ($url, $combo) = split '>', $parts[2], 2;
			my ($name, $version) = &slack_combos($combo);
			($names[$now],$versions[$now]) = ($name, $version);
			$now++;
			push @urls, $slackbase.$url;
			push @dates, "";
			push @sizes, $parts[3];
		}
	}
	for (my $i = 0; $i < @repos; $i++) {
		&pretty_print($cols,@columns,"Slackware",$names[$i],$versions[$i],$sizes[$i],$dates[$i],$repos[$i],$urls[$i]);
	}
	return ();
}

sub debian_sizes {
	my @lines = split /\n/, &fetchdoc($_[0]);
	for (my $i = 0; $i < @lines; $i++) {
		if ($lines[$i] =~ /download\">i386<\/a/) {
			my @newparts = split /\"size\"\>|<\/td>/, $lines[$i+3];
			return &debian_size_convert($newparts[1]);
		}
	}
	for (my $i = 0; $i < @lines; $i++) {
		if ($lines[$i] =~ /download\">all<\/a/) {
			my @newparts = split /\"size\"\>|<\/td>/, $lines[$i+3];
			return &debian_size_convert($newparts[1]);
		}
	}
}

sub debian_size_convert {
	if ($_[0] =~ s/\&nbsp;kB$//) {
		$_[0] =~ s/,//g;
		my @parts = split /\./, $_[0];
		my @partses = split //, $parts[1];
		if ($partses[0] >= 5) {
			$parts[0]++;
		}
		if ($parts[0] < 1000) {
			return $parts[0]."K";
		} else {
			my $val = round($parts[0]/1024);
			return $val."M";
		}
	} else {
		die "Strange packet size encountered: $_[0]\n";
	}
}

sub debian {
	my @dists = ($debian_current_release);
	&debuntu('https://packages.debian.org','Debian',\@dists,$_[0]);
	return();
}

sub ubuntu {
	my @array = ($ubuntu_current_release);
	&debuntu('https://packages.ubuntu.com','Ubuntu',\@array,$_[0]);
	return();
}

sub debuntu {
	my ($baseurl,$distname,$releaseArrayP,$searchTerm) = @_;
	my @dists = @$releaseArrayP;
	my @names;
	my @repos;
	my @groups;
	my @versions;
	my @urls;
	my @sizes;
	my @dates;
	for (my $x = 0; $x < @dists; $x++) {
		my @lines = split /\n/, &fetchdoc($baseurl."/search?keywords=".$searchTerm."&searchon=names&suite=".$dists[$x]."&section=all");
		for (my $i = 50; $i < @lines; $i++) {
			if ($lines[$i] =~ /<h3>Package /) {
				my $name = (split /h3>Package |<\/h3>/, $lines[$i])[1];
				# There are now one or more 8-line blocks that are approximately
				# $lines[$i]   <li class="intrepid"><a class="resultlink" href="/intrepid/dpkg">intrepid</a> (base):
				# $lines[$i+3] <br>1.14.20ubuntu6: amd64 i386
				# And this list starts with <ul> and ends with </ul>
				$i += 1;
				while (($lines[$i] !~ '</ul>') && ($i < @lines)) {
					if ($lines[$i] =~ /class="resultlink"/) {
						push @names, $name;
						my @parts = split /href\=\"|\"\>|<\/a\>/, $lines[$i];
						$parts[4] =~ s/ \(|\)://g;
						push @groups, $parts[4];
						push @repos, $dists[$x];
						push @urls,  $baseurl.$parts[2];
						push @dates, "";
						my $vline = $lines[$i+3];
						# prune possibly existing link to backports
						$vline =~ s/ *\[.*strong.*\]//g;
						# Split lines e.g. "0.8.4-3+squeeze1: all"
						@parts = split />|: /, $vline;
						push @versions, $parts[1];
						$i += 4; # do not be too greedy
					} else {
						$i += 1;
					}
				}
			}
		}
	}
	unless ($shallow) {
		if (!$nothreads) {
			my @thr;
			for (my $i = 0; $i < @urls; $i++) {
				push @thr, threads->new(\&debian_sizes, $urls[$i]);
			}
			for (my $i = 0; $i < @thr; $i++) {
				push @sizes, $thr[$i]->join;
			}
		} else {
			for (my $i = 0; $i < @urls; $i++) {
				push @sizes, &debian_sizes($urls[$i]);	# TODO but we want installed size - or both?
			}
		}
	} else {
		for (my $i = 0; $i < @urls; $i++) {
			push @sizes, "";
		}
	}
	for (my $i = 0; $i < @repos; $i++) {
		&pretty_print($cols,@columns,$distname,$names[$i],$versions[$i],$sizes[$i],$dates[$i],$repos[$i],$urls[$i]);
	}
	return ();
}

sub aur {
	my $aurbase    = "https://aur.archlinux.org";
	my @lines = split /\n/, &fetchdoc($aurbase."/packages?O=0&SeB=nd&K=".$_[0]."&outdated=&SB=p&SO=d&PP=1000&submit=Go");
	my @repos;
	my @names;
	my @versions;
	my @dates;
	my @urls;
	my @sizes;
	my $indicator = 0;
	for (my $i = 0; $i < @lines; $i++) {
		if ($lines[$i] =~ /\<a href\=\"\/packages\//) {
			$lines[$i+1] =~ s/^\s+//;
			push @names, $lines[$i+1];
			$lines[$i+5] =~ s/^\s+\<td\>//;
			$lines[$i+5] =~ s/\<\/td\>\s*$//;
			push @versions, $lines[$i+5];
			$lines[$i] =~ s/^\s*\<a href\=\"//;
			$lines[$i] =~ s/\"\>\s*//;
			push @urls, $aurbase.$lines[$i];
			$i += 12;   # conservative in case upstream tightens the layout
		}
	}
	for (my $i = 0; $i < @names; $i++) {
		&pretty_print($cols,@columns,"AUR",$names[$i],$versions[$i],$sizes[$i],$dates[$i],$repos[$i],$urls[$i]);
	}
	return ();
}

sub human {
        my $size = $_[0];
	my @suffixes = (' ', 'K', 'M', 'G');
	my $index = 0;

	while ($size > 1024) {
		$index++;
		$size = POSIX::floor($size / 1024);
	}
	return("$size@suffixes[$index]");
}

sub arch {
	my $archbase   = "https://archlinux.org";
	# if we directly query i686, redirected to the specific package page
	my @lines = split /\n/, &fetchdoc($archbase."/packages/?q=".$_[0]);
	my @repos;
	my @names;
	my @versions;
	my @dates;
	my @urls;
	my @sizes;
	for (my $i = 0; $i < @lines; $i++) {
		if ($lines[$i] =~ /View package details for/) {
			push @versions, &remove_td($lines[$i + 2]);
			push @dates,    &remove_td($lines[$i + 5]);
			push @names,    &arch_extract_name($lines[$i]);
		}
	}
	for (my $i = 0; $i < @names; $i++) {
		&pretty_print($cols,@columns,"Arch",$names[$i],$versions[$i],$sizes[$i],$dates[$i],$repos[$i],$urls[$i]);
	}
	return ();
}

sub remove_td {
	my @retsplit = split /\<\/?td\>/, $_[0];
	return $retsplit[1];
}
sub arch_extract_name {
	my @retsplit  = split /\"\>|\<\/a\>/, $_[0];
	return $retsplit[1];
}

sub opensuse {
	my $opensusebase = "https://software.opensuse.org";
	my @names;
	my @repos;
	my @groups;
	my @versions;
	my @urls;
	my @sizes;
	my @dates;
	my $distroname = "openSUSE";

	my @lines = split /\n/, &fetchdoc($opensusebase."/search?q=".$_[0]."&baseproject=".$distroname."&lang=en&exclude_debug=true");
	for (my $i = 0; $i < @lines; $i++) {
		if ($lines[$i] =~ /<div class="search-result-txt"/) {
			my $line = $lines[($i+1)];
			$line =~ s{^.*<h3><a href="}{};
			$line =~ s{</?mark>}{}g;
			$line =~ s{</a>.*}{};
			my ($url, $name) = split /">/, $line, 2;
			push @names, $name;
			push @urls, "$opensusebase$url";
			push @versions, '';
			push @sizes,  '';
			push @dates,  '';
			push @groups, '';
			push @repos, '';
		}
	}
	for (my $i = 0; $i < @repos; $i++) {
		&pretty_print($cols,@columns,$distroname,$names[$i],$versions[$i],$sizes[$i],$dates[$i],$repos[$i],$urls[$i]);
	}
	return();
}

sub netbsd_pkgsrc_size {
	my @retvals;
	my $continueAt = 0;
	my @lines = split /\n|<br\/>/, &fetchdoc($_[0]);
	for (my $i = 0; $i < @lines; $i++) {
		if ($lines[$i] =~ /Filesize:/) {
			my @parts = split /<\/b> /, $lines[$i];
			push @retvals, &sizeconvert($parts[1]);
			$continueAt = $i;
			last;
		}
	}
	if ($continueAt == 0) {
		push @retvals, "";
	}
	$retvals[1] = ""; #just in case there's no match found
	for (my $i = $continueAt; $i < @lines; $i++) {
		if ($lines[$i] =~ /Updated to version|Package added to/) {
			my @parts = split /<b>|<\/b>/, $lines[$i];
			$retvals[1] = $parts[1];
			last;
		}
	}
	return @retvals;
}

sub round {
    my($number) = shift;
    return int($number + .5);
}

sub sizeconvert {
	if ($_[0] =~ s/ KB$//) {
		my @parts = split /\./, $_[0];
		my @partses = split //, $parts[1];
		if ($partses[0] >= 5) {
			$parts[0]++;
		}
		if ($parts[0] < 1000) {
			return $parts[0]."K";
		} else {
			my $val = round($parts[0]/1024);
			return $val."M";
		}
	} else {
		die "Strange packet size encountered: $_[0]\n";
	}
}

sub netbsd {
	my @lines = split /\n|<br\/>/, &fetchdoc("https://pkgsrc.se/search.php?so=".$_[0]);
	my @names;
	my @versions;
	my @dates;
	my @sizes;
	my @repos;
	my @urls;
	for (my $i = 0; $i < @lines; $i++) {
		if ($lines[$i] =~ /version.+maintainer/) {
			my @parts = split /href="|<\/a>, <em><b>version |<\/b>, maintainer|\">/, $lines[$i];
			push @urls, 	$parts[1];
			push @versions, $parts[3];
			my @subparts = split /\//, $parts[2];
			push @repos, $subparts[0];
			push @names, $subparts[1];
		}
	}
	unless ($shallow) {
		if (!$nothreads) {
			my @thr;
			for (my $i = 0; $i < @urls; $i++) {
				push @thr, threads->new(\&netbsd_pkgsrc_size, $urls[$i]);
			}
			for (my $i = 0; $i < @thr; $i++) {
				($sizes[$i],$dates[$i]) = $thr[$i]->join;
			}
		} else {
			for (my $i = 0; $i < @urls; $i++) {
				($sizes[$i],$dates[$i]) = &netbsd_pkgsrc_size($urls[$i]);
			}
		}
	} else {
		for (my $i = 0; $i < @urls; $i++) {
			($sizes[$i],$dates[$i]) = ("","");
		}
	}
	for (my $i = 0; $i < @repos; $i++) {
		&pretty_print($cols,@columns,"NetBSD",$names[$i],$versions[$i],$sizes[$i],$dates[$i],$repos[$i],$urls[$i]);
	}
	return();
}

sub fedora_admin {
# THIS is a possibility, but offers no date or size info
	my $query = "https://admin.fedoraproject.org/pkgdb/search/package/?searchwords=".$_[0]."&operator=AND&release=19&searchon=name";
	my @lines = split /\n/, &fetchdoc($query);
	my @names;
	my @versions;
	my @dates;
	my @sizes;
	my @repos;
	my @urls;
	my $now = 0;
	for (my $i = 0; $i<@lines; $i++) {
		if ($lines[$i] =~ /unique_tag/) {
			# typically, this is where most of the text processing goes:
			# getting the info and putting it in appropriate arrays

			# use subroutine "combos" if the name and version are represented as, firefox-1.0.6, with the hyphen, and the version number starting with a digit
			my $anchor = "something";
			($names[$now],$versions[$now]) = &combos($anchor);
			$now++;
			push @names,    "";
			push @versions, "";
			push @repos,    "";
			push @sizes,    "";
			push @urls,     "";
			push @dates,    "";
		}
	}
	for (my $i = 0; $i < @repos; $i++) {
		&pretty_print($cols,@columns,"Distroname",$names[$i],$versions[$i],$sizes[$i],$dates[$i],$repos[$i],$urls[$i]);
	}
	return ();
}

sub template_query {
	my $query = "url";
	my @lines = split /\n/, &fetchdoc($query);
	my @names;
	my @versions;
	my @dates;
	my @sizes;
	my @repos;
	my @urls;
	my $now = 0;
	for (my $i = 0; $i<@lines; $i++) {
		if ($lines[$i] =~ /unique_tag/) {
			# typically, this is where most of the text processing goes:
			# getting the info and putting it in appropriate arrays

			# use subroutine "combos" if the name and version are represented as, firefox-1.0.6, with the hyphen, and the version number starting with a digit
			my $anchor = "something";
			($names[$now],$versions[$now]) = &combos($anchor);
			$now++;
			push @names,    "";
			push @versions, "";
			push @repos,    "";
			push @sizes,    "";
			push @urls,     "";
			push @dates,    "";
		}
	}
	for (my $i = 0; $i < @repos; $i++) {
		&pretty_print($cols,@columns,"Distroname",$names[$i],$versions[$i],$sizes[$i],$dates[$i],$repos[$i],$urls[$i]);
	}
	return ();
}

sub template_listing {
	my @ininames;
	my @iniversions;
	my @iniurls;
	my @inirepos;
	my @inisizes;
	my @inidates;
	my $now = 0;
	my $distroname = "mydistro";
	my $base = "url";
	# prepare a list file
	my $file = "$confdir/$distroname.list";
	# if the list file exists and is recent, use its contents, otherwise download and parse a fresh copy
	if (-s $file && `date +%Y-%m-%d` =~ (split / /, `ls -l $file`)[6]) {
		open IN, $file;
		chomp (my @lines = <IN>);
		for (my $i = 0; $i<@lines;$i++) {
			# get back any info that you put in the file
			($ininames[$i],$iniversions[$i],$iniurls[$i]) = split /\t/, $lines[$i];
		}
		close IN;
	} else {
		# download fresh copy
		my @lines = split /\n/, &fetchdoc($base."README-all.html");
		for (my $i = 0; $i < @lines; $i++) {
			if ($lines[$i] =~ /unique_tag/) {
				# extract all info from the downloaded list
				my @parts = split / /, $lines[$i];
				# use subroutine "combos" to separate name and version number
				($ininames[$now],$iniversions[$now]) = &combos($parts[1]);
				$now++;
				# any info you couldn't get, put a blank in
				push @iniurls,  "";
				push @inirepos, "";
				push @inisizes, "";
				push @inidates, "";
			}
		}
		open OUT, ">$file";
		for (my $i = 0; $i < @iniurls;$i++) {
			# store the available info in the file
			print OUT "$ininames[$i]\t$iniversions[$i]\t$iniurls[$i]\n";
		}
		close OUT;
	}
	# search by hand
	my ($p1,$p2,$p3,$p4,$p5,$p6) = &search_by_name(\@ininames,\@iniversions,\@inisizes,\@inidates,\@inirepos,\@iniurls,$_[0]);
	my @names    = @$p1;
	my @versions = @$p2;
	my @sizes    = @$p3;
	my @dates    = @$p4;
	my @repos    = @$p5;
	my @urls     = @$p6;
	for (my $i = 0; $i < @repos; $i++) {
		&pretty_print($cols,@columns,$distroname,$names[$i],$versions[$i],$sizes[$i],$dates[$i],$repos[$i],$urls[$i]);
	}
	return ();
}

sub mandriva_combos {
	$_[0] =~ s/\<\/a\>.*//;
	my @parts = split /-/, $_[0];
	for (my $i = 1; $i < @parts; $i++) {
		if ($parts[$i] =~ /^[0-9]/) {
			return ( (join '-',@parts[0..($i-1)]), (join '-', @parts[$i..(@parts-1)]) );
		}
	}
}

sub mandriva {
	&zarb($_[0], 'Mandriva', 'current');
}

sub mageia {
	&zarb($_[0], 'Mageia', 'cauldron');
}

sub zarb {
	my $baseurl = "http://sophie.zarb.org";
	my @names;
	my @versions;
	my @dates;
	my @sizes;
	my @repos;
	my @urls;
	# NB this server also supports exact matching
	my @lines = split /\n/, &fetchdoc($baseurl."/search?search=".$_[0]."&type=fuzzyname&deptype=P&distribution=".$_[1]."&release=".$_[2]);
	for (my $i = 350; $i < @lines; $i++) {
		#TODO need to check for possible further pages (lists 20 per page)
		#TODO ajax or xml::rpc access might have advantages w.r.t. paging (i.e. none required) 
		if ($lines[$i] =~ /<div class="sophie_search_list">/) {
			my $a = @names;
			push @urls, (split /"/, $lines[$i+1])[1];
			($names[$a],$versions[$a]) = &mandriva_combos($lines[$i+2]); # name, version, arch
		}
	}
	for (my $i = 0; $i < @urls; $i++) {
		&pretty_print($cols,@columns,$_[1],$names[$i],$versions[$i],$sizes[$i],$dates[$i],$repos[$i],$urls[$i]);
	}
	return ();
}

#
# INFRASTRUCTURE FUNCTIONS
#

sub fetchdoc {
	my $url = $_[0];
	my $silent = 0;
	if (@_ == 2 && $_[1] eq "silent") {
		$silent = 1;
	}

	$url =~ s/\&amp\;/\&/ig;   # convert &amp; to &

	my $ua = LWP::UserAgent->new;
	$ua->env_proxy;
        my @firstline;
        my @response;
        for (my $count = 0; ; ++$count) {   # termination condition inside loop
                my $req = HTTP::Request->new(GET => $url);
                my $res = $ua->request($req)->as_string;
                @response = split (/\n/, $res);
                @firstline = split (/ /, $response[0]);
		my $restest = 0;
		if (@firstline == 3) {
			$restest = $firstline[1];
		} elsif (@firstline > 3) {
			$restest = $firstline[0];
		}
		if ($restest == 200 || $response[0] =~ /200 OK/) { #NB the matching expression added specifically for NetBSD package page!
		# server response 200 is a stringent criterion, but should work
			last;
		} elsif ($count > 4) {   # loop termination condition
			unless ($silent == 1) {
				warn ("Tried fetching \"$url\" five times. Giving up.\n");
			}
			return ();
			last;
		}
	}
    my $end = @response - 1;
    my $finaldoc = join ("\n", @response[14..$end]);
    return ($finaldoc);
}

sub pretty_print {
	if( $option_strict && trim($_[$cols+2]) ne $ARGV[0]) {
		return; # strictness enabled, we should print exact matches only
	}
	my $n = $_[0];
	my @colwidths = @_[1..$n];
	my @colvals = @_[($n+1)..(@_-1)];
	for (my $i = 0; $i < @colwidths;$i++) {
		if (length($colvals[$i]) > $colwidths[$i]) {
			my @letters = split //, $colvals[$i];
			print join "", @letters[0..($colwidths[$i]-1)];
			print " ";
		} else {
			print $colvals[$i];
			for (my $a = 0; $a < $colwidths[$i] + 1 - length($colvals[$i]); $a++) {
				print " ";
			}
		}
	}
	print $colvals[@colvals-1]."\n"; #last column is unrestricted in length
}

sub trim {
	# Perl trim function to remove whitespace from the start and end of the string
	$_[0] =~ s/^\s+//;
	$_[0] =~ s/\s+$//;
	return $_[0];
}
