#!/opt/bin/perl
# ________________________________________________________________
# /\ /\ The Web Developer's Virtual Library
# -{-<*>-}- World-Wide Web
# __\/_\/_________________________________________________________
# Author : Alan Richmond
# Purpose : Check a list of HTML files for problematic links.
# Usage : vlinks.pl < [list of filenames]
# Comment : Output in broken.html
# Disclaimer: This software is provided freely on the understanding
# that the Author will not be held responsible for any
# problems arising from it's use, and that there is no support.
# ________________________________________________________________
#
use HTML::Parse;
use LWP::Simple;
use URI::URL;
%link_elements =
(
'a' => 'href',
'img' => 'src',
'form' => 'action',
'link' => 'href',
);
$base = "/www/wdvl/wdvl";
# $debug = 1;
while (<>) {
chop ;
&parse ($_) unless /\/x\//;
}
foreach (keys %link) {
print "check $_\n" if $debug;
if ($_ =~ /^http:/) {
if (!head($_)) {
if (!$ft) {
$ft = 1;
open (OUT, ">broken.html")||die$!;
print OUT "
Broken Links\n";
}
print OUT "- $_:\n$link{$_}\n";
}
}
}
if (!ft) {
print OUT "
";
close (OUT);
}
sub parse {
( $file ) = @_;
print "file: $file:\n" if $debug;
my $h = parse_htmlfile($file);
$BASE = "http://WDVL.Internet.com/$file";
print "BASE: $BASE:\n" if $debug;
$h->traverse(\&expand_urls, 1);
# print $h->as_HTML;
for (@{ $h->extract_links(qw(a img form)) }) {
($link, $linkelem) = @$_;
$link{$link} .= " $file"
unless $link =~ /^mailto/;
print " $link\n" if $debug;
}
}
sub expand_urls {
my($e, $start) = @_;
return 1 unless $start;
my $attr = $link_elements{$e->tag};
return 1 unless defined $attr;
my $url = $e->attr($attr);
return 1 unless defined $url;
$e->attr($attr, url($url, $BASE)->abs->as_string);
}