#!/usr/bin/perl
use warnings;
use strict;


# Load current template
#@`wget s7s8.com/SL/template -O template.html`;
#@open my $in, '< template.html';
#@my $template= join '', <$in>;
#@close $in;
#@
# Load candidates for wrapping
my @files= $ARGV[0] || <*.html>;
my $limit = $ARGV[1] || 10;

# Local list of keywords
my %keywords= (
	#"kerberos.html" => [ qw/MIT 1 2 3 4/],
);


for( @files) {
	open my $in, "< $_";
	my $content= join '', <$in>;
	close $in;

	# Remove certainly unneeded stuff
#	$content=~ s#</?html>##g;
#	$content=~ s#<body.*?>##s;
#	$content=~ s#</body>.*##s;

#	# Final data that'll be output
	( my $outname= $_)=~ s#.*/##;
#	my $data= $template;
#
#	# Extract and insert description
#	if( $content=~ m#<meta name="description" content="(.*?)">#s) {
#		my $desc= $1;
#		$data=~ s/\{DESCRIPTION\}/$desc/s;
#	}

	# Extract keywords
	my $tmp= $content;
	$tmp=~ s#<.*?>##gs;
	$tmp=~ s/\&\w+;//gs; # Remove HTML entities
	$tmp=~ s/[^a-zA-Z0-9_-]/ /gs;
	my @words= split / +/, $tmp;
	my %ladder;
	for( @words) {
		$_= lc $_;
		next if
			( length($_)< 4 and $_!~ /^(afs)$/i) or # <- USE LIST
			$_=~ /^(will|that|sudo|with|this|root|file|mirko|have|your|which|files|from|there|they|would|just|monarch|also|more|then|them|some|first|other|called|8212|found|local|using|network|group)$/; # <- DISCARD LIST
		$ladder{$_}++;
	}
	my @top= sort { $ladder{$b} <=> $ladder{$a} } keys %ladder;
	$#top= $limit;

	# Insert keywords
	my @local_keywords= $keywords{$outname}? @{ $keywords{$outname}}: ();
	my $keyword_list= join(', ', @local_keywords, @top[0..( $limit- scalar( @local_keywords)- 1)]);
#	$data=~ s/\{KEYWORDS\}/$keyword_list/s;
#
#	# Remove whole head after its been parsed
#	$content=~ s#<head>(.*)</head>##s;
#
#	# Insert content
#	$data=~ s/\{BODY\}/$content/s;
#
#	print $outname, "\n";
#	open my $out, "> $outname";
#	print $out $data;
#	close $out;
	if( @files> 1) { print "$_: "; }
	print $keyword_list, "\n";
}

