initial commit
[emacs-init.git] / nxhtml / nxhtml / html-chklnk / PerlLib / HTML / LinkWalker.pm
1 ### File:       LinkWalker.pm
2 ### Author:     Lennart Borgman
3 ###     All rights reserved
4
5 ##########################################################
6 ### UserAgent module
7 ##########################################################
8 package LWP::WalkerUA;
9 require LWP::UserAgent;
10 @ISA = qw(LWP::UserAgent);
11
12 ### Mirror to another file (why???)
13 sub mirror
14 {
15     my($self, $url, $file, $mirr_tmp) = @_;
16     die "no mirr_tmp" unless defined $mirr_tmp;
17
18     LWP::Debug::trace('()');
19     my $request = new HTTP::Request('GET', $url);
20
21     if (-e $file) {
22         my($mtime) = (stat($file))[9];
23         if($mtime) {
24             $request->header('If-Modified-Since' =>
25                              HTTP::Date::time2str($mtime));
26         }
27     }
28     my $tmpfile = "$file-$$";
29
30     my $response = $self->request($request, $tmpfile);
31     if ($response->is_success) {
32
33         my $file_length = (stat($tmpfile))[7];
34         my($content_length) = $response->header('Content-length');
35
36         if (defined $content_length and $file_length < $content_length) {
37             unlink($tmpfile);
38             die "Transfer truncated: " .
39                 "only $file_length out of $content_length bytes received\n";
40         } elsif (defined $content_length and $file_length > $content_length) {
41             unlink($tmpfile);
42             die "Content-length mismatch: " .
43                 "expected $content_length bytes, got $file_length\n";
44         } else {
45             # OK
46             if (-e $mirr_tmp) {
47                 # Some dosish systems fail to rename if the target exists
48                 chmod 0777, $mirr_tmp;
49                 unlink $mirr_tmp;
50             }
51             rename($tmpfile, $mirr_tmp) or
52                 die "Cannot rename '$tmpfile' to '$mirr_tmp': $!\n";
53
54             if (my $lm = $response->last_modified) {
55                 # make sure the file has the same last modification time
56                 utime $lm, $lm, $mirr_tmp;
57             }
58         }
59     } else {
60         unlink($tmpfile);
61     }
62     return $response;
63 }
64
65
66 ##########################################################
67 ### Parser module
68 ##########################################################
69 package HTML::WalkerParser;
70 require HTML::ParserTagEnd;
71 @ISA = qw(HTML::ParserTagEnd);
72 use strict;
73 use vars qw(%LINK_ELEMENT);
74
75 # Elements that might contain links and the name of the link attribute
76 %LINK_ELEMENT =
77 (
78  body   => 'background',
79  base   => 'href',
80  a      => 'href',
81  img    => [qw(src lowsrc usemap)],   # 'lowsrc' is a Netscape invention
82  form   => 'action',
83  input  => 'src',
84 'link'  => 'href',          # need quoting since link is a perl builtin
85  frame  => 'src',
86  applet => [qw(codebase code)],
87  area   => 'href',
88  iframe  => 'src',   # Netscape 2.0 extention
89  embed  => 'src',   # used in Netscape 2.0 for Shockwave and things like that
90 );
91
92 my %LINKATTRIBS = (
93         "href"                  => 1,
94         "src"                   => 1,
95         "action"                => 1,
96         "background"    => 1,
97         "usemap"                => 1,
98         "code"                  => 1,
99         "codebase"              => 1,
100         "lowsrc"                => 1,
101         );
102 my %MAYBECONT = (
103         a => 'href',
104         area   => 'href',
105         form   => 'action',
106         frame  => 'src',
107         iframe  => 'src',
108         );
109
110 sub maybecont($$) {
111         my $tag = shift;
112         my $att = shift;
113         return unless exists $MAYBECONT{$tag};
114         return ($MAYBECONT{$tag} eq $att);
115 }
116
117 sub new {
118     my($class, $parsed_fh) = @_;
119     my $self = $class->SUPER::new;
120     $self->{parsed_fh} = $parsed_fh;
121     $self;
122 }
123
124
125
126
127
128
129
130 ##########################################################
131 ### Walker module
132 ##########################################################
133 package HTML::LinkWalker;
134 use strict;
135
136 use IO::File;
137 use File::Copy qw();
138 use File::Path qw();
139 use PathSubs qw();
140 use HTML::Entities;
141 use FindBin qw();
142
143
144 ##########################################################
145 ### Globals
146 ##########################################################
147 my $ua;
148 my $m_ua_personality = "LinkWalker/0.9";
149 my %m_is_outside;
150 my %m_is_container;
151 my $m_bOnlyCont;
152 my @m_sLinkRoots;
153 my $m_subReport;
154 my $m_subAction;
155 my $m_subMirrorAction;
156
157
158 #############################
159 ### Collecting info
160 #############################
161 my %m_CheckedLinks;
162 my %m_MissedLinks;
163
164 sub tell_bad_link($$$$$) {
165         my $what = shift;
166         my $file = shift;
167         my $lnum = shift;
168         my $link = shift;
169         my $line = shift;
170         $file = "START" unless defined $file;
171         $lnum = "(start)" unless defined $lnum;
172         my $longMsg = "<<$what>>";
173         my $shortMsg = $what;
174         if (defined $link) {
175                 my @lines = split("\\s+", $line);
176                 my $disp_line = join("\n\t\t  ", @lines);
177                 $longMsg .= ",\n\t\tlink=$link\n\t\t$disp_line";
178         }
179         my @msg = ($shortMsg, $longMsg);
180         $m_CheckedLinks{$file}->{ERR}->{$lnum} = \@msg;
181         &$m_subReport("\t* Error * " . $what . "\n");
182 } # tell_bad_link
183
184
185 #############################
186 ### Helpers
187 #############################
188
189 sub get_contenttype($) {
190         my $response = shift;
191         my @rh = $response->header("Content-Type");
192         for my $r (@rh) {
193                 my $c = $r;
194                 if ((my $iPos = index($r, ";")) > -1) {
195                         $c = substr($r, 0, $iPos);
196                 }
197                 return $c;
198         }
199 }
200 sub is_linked_contenttype($) {
201         my $response = shift;
202         return (get_contenttype($response) eq "text/html");
203 }
204
205 sub ending_is_container($) {
206         my $link_addr = shift;
207         $link_addr =~ s!#.*$!!;
208         $link_addr =~ s!\?.*$!!;
209         return (($link_addr =~ m!\.s?html?$!i) ? 1 : 0);
210 }
211
212 my $m_sMirrorRoot;
213 my $m_bMirror = 1;
214
215 sub mk_mirror_name($) {
216         my $orig_name = shift;
217         $orig_name =~ tr!\\!/!;
218         my $mirr_name = $orig_name;
219         my ($orig_host) = ($orig_name =~ m!(^https?://[^/]*)!i);
220         if (defined $orig_host) {
221                 my $host = $orig_host;
222                 $host =~ tr!:!_!;
223                 $host =~ tr!/!_!;
224                 $mirr_name =~ s!^$orig_host!$host!;
225                 if (substr($mirr_name, -1) eq "/") { $mirr_name .= "default.html"; }
226         } else {
227                 die "Can't find host in $orig_name\n";
228         }
229         my $mirr_full = sMirrorRoot() . $mirr_name;
230         if (!$m_bMirror) {
231                 my $sExt = $mirr_name; $sExt =~ s!.*\.([^\.]*$)!$1!;
232                 $mirr_full = sMirrorRoot() . "temp.$sExt";
233         }
234         my $mirr_fold = $mirr_full;
235         $mirr_fold =~ s![^/]*$!!;
236         File::Path::mkpath($mirr_fold, 0, 0777);
237         return $mirr_full;
238 }
239
240 #############################
241 ### Checks
242 #############################
243 sub is_outside($) {
244         my $uq_link_addr = shift;
245         if (!exists $m_is_outside{$uq_link_addr}) {
246                 $m_is_outside{$uq_link_addr} = test_is_outside($uq_link_addr, \@m_sLinkRoots);
247         }
248         return $m_is_outside{$uq_link_addr};
249 }
250 sub set_is_container($$) {
251         my $uq_link_addr = shift;
252         return if exists $m_is_container{$uq_link_addr};
253         $m_is_container{$uq_link_addr} = shift;
254 }
255 sub is_outside_container($) {
256         my $uq_link_addr = shift;
257         if (exists $m_is_container{$uq_link_addr}) {
258                 if ($m_is_container{$uq_link_addr}) {
259                         return is_outside($uq_link_addr);
260                 }
261         }
262 }
263 sub test_is_outside($$) {
264         my $uq_link_addr = shift;
265         my $link_roots   = shift;
266         if (defined $link_roots) {
267                 my $in_roots;
268                 for my $link_root (@$link_roots) {
269                         if (substr($uq_link_addr, 0, length($link_root)) eq $link_root) {
270                                 return 0;
271                         }
272                 }
273                 return 1;
274         }
275 } # is_outside
276
277
278
279 ##########################################################
280 ### Parsing
281 ##########################################################
282
283
284 ### Parser subs
285 sub HTML::WalkerParser::declaration {
286     my($self, $decl) = @_;
287         return unless defined $self->{parsed_fh};
288         my $fh = $self->{parsed_fh};
289         print $fh "<!" . $decl . ">";
290 }
291 my $m_start_cb;
292 sub HTML::WalkerParser::start {
293     my($self, $tag, $attr, $ended) = @_;
294         &$m_start_cb($tag, $attr);
295         return unless defined $self->{parsed_fh};
296         my $t = "<$tag";
297         for my $k (keys %$attr) {
298                 my $encoded = encode_entities($$attr{$k});
299                 $t .= qq( $k="$encoded");
300         }
301         if ($ended) {
302                 $t .= " />";
303         } else {
304                 $t .= ">";
305         }
306         my $fh = $self->{parsed_fh};
307         print $fh $t;
308 }
309 sub HTML::WalkerParser::end {
310         my ($self, $tag) = @_;
311         return unless defined $self->{parsed_fh};
312         my $fh = $self->{parsed_fh};
313         print $fh "</" . $tag . ">";
314 }
315 sub HTML::WalkerParser::text {
316         my ($self, $txt) = @_;
317         return unless defined $self->{parsed_fh};
318         my $fh = $self->{parsed_fh};
319         print $fh $txt;
320 }
321 sub HTML::WalkerParser::comment {
322     my($self, $comment) = @_;
323         return unless defined $self->{parsed_fh};
324         my $fh = $self->{parsed_fh};
325         print $fh "<!--" . $comment . "-->";
326 }
327
328
329
330
331 ### Main parsing routine
332
333 sub parse_file($$$$$$$$$) {
334         my ($file_name, $parsed_fh, $uq_link_addr, $link_roots,
335                 $ref_links, $ref_anchs, $ref_lines, $ref_tagname, $ref_attname) = @_;
336         my $fh;
337         if (-d $file_name) {
338                 $file_name = PathSubs::uniq_dir($file_name) . "default.html";
339                 $uq_link_addr .= "/" unless substr($uq_link_addr, -1) eq "/";
340                 $uq_link_addr .= "default.html";
341                 &$m_subReport("dir => $file_name\n");
342         }
343         $fh = new IO::File($file_name);
344         die "Can't read $file_name: $!\n" unless defined $fh;
345         my $base_href;
346         my $n;
347         my $line;
348         my $uq_link_fold = $uq_link_addr; $uq_link_fold =~ s![^/]*$!!;
349
350         my $start_cb =
351                 sub {
352                         my ($tag, $attr_hash) = @_;
353                         for my $k (keys %$attr_hash) {
354                                 if (($k eq "id") || ($k eq "name")) {
355                                         my $v = $$attr_hash{$k};
356                                         $$ref_anchs{$v} = $n;
357                                         $$ref_lines{$n} = $line;
358                                 } elsif (exists $LINKATTRIBS{$k}) {
359                                         my $v = $$attr_hash{$k};
360                                         next if $v =~ m!^javascript:!;
361                                         next if $v =~ m!^ftp://!;
362                                         next if $v =~ m!^mailto://!;
363                                         if ($tag eq "base") { $base_href = $v if $k eq "href"; next; }
364                                         my $v_abs; my $v_rel;
365                                         my $v_is_abs = PathSubs::is_abs_path($v);
366                                         if ($v_is_abs) {
367                                                 $v_abs = $v;
368                                                 $v_rel = PathSubs::mk_relative_link($uq_link_addr, $v_abs);
369                                         } else {
370                                                 $v_rel = $v;
371                                                 if (defined $base_href) {
372                                                         $v_abs = PathSubs::mk_abs_link($base_href, $v);
373                                                 } else {
374                                                         if (substr($v_rel, 0, 1) ne "#") {
375                                                                 $v_abs = $uq_link_fold . $v_rel;
376                                                         } else {
377                                                                 $v_abs = $uq_link_addr . $v_rel;
378                                                         }
379                                                         $v_abs = PathSubs::resolve_dotdot($v_abs);
380                                                 }
381                                         }
382                                         next if exists $m_CheckedLinks{$v_abs};
383                                         if (is_outside($v_abs)) {
384                                                 if (!$v_is_abs) {
385                                                         if (ending_is_container($v_abs)) {
386                                                                 $m_CheckedLinks{$v_abs} = {};
387                                                                 tell_bad_link("Outside relative link ($v_rel)",
388                                                                         $uq_link_addr, $n, $v, $line);
389                                                         }
390                                                 }
391                                                 ### Skip outside absolute links
392                                                 ### Could be things like banners etc...
393                                                 next;
394                                         }
395                                         $$ref_links{$v_rel} = $n;
396                                         $$ref_lines{$n} = $line;
397                                         if (substr($v_rel, 0, 1) ne "#") {
398                                                 my $v_rel_name = $v_rel;
399                                                 $v_rel_name =~ s!#.*$!!;
400                                                 $v_rel_name =~ s!\?.*$!!;
401                                                 $$ref_tagname{$v_rel_name} = $tag;
402                                                 $$ref_attname{$v_rel_name} = $k;
403                                         }
404                                         if ($v_is_abs && ($v_rel ne $v)) { $$attr_hash{$k} = $v_rel; }
405                                 }
406                         }
407                 }; # $start_cb
408
409         $m_start_cb = $start_cb;
410         my $p = HTML::WalkerParser->new($parsed_fh);
411         while ($line = <$fh>) {
412                 $n++;
413                 $p->parse($line);
414         }
415         $fh->close();
416 } # parse_file
417
418
419
420 ##########################################################
421 ### Do the walk...
422 ##########################################################
423 sub walk_link($$;$$$$) {
424         die "$#_" unless ($#_ == 1 || $#_ == 5);
425         my $link_fold   = shift;
426         my $link_file   = shift;
427         my $parent_url  = shift;
428         my $parent_lnum = shift;
429         my $parent_link = shift;
430         my $parent_line = shift;
431
432         my $link_addr   = $link_fold . $link_file;
433         my $uq_link_addr;
434         my $is_file = ($link_addr !~ m!^https?://!i);
435         if ($is_file) {
436                 $uq_link_addr = PathSubs::uniq_file($link_addr);
437         } else {
438                 $uq_link_addr = PathSubs::resolve_dotdot($link_addr);
439         }
440         return if exists $m_CheckedLinks{$uq_link_addr};
441         return if exists  $m_MissedLinks{$uq_link_addr};
442         $m_CheckedLinks{$uq_link_addr} = {};
443         my $link_is_container = ending_is_container($uq_link_addr);
444         if ($link_is_container) {
445                 set_is_container($uq_link_addr, 1);
446                 return if is_outside($uq_link_addr);
447         } else {
448                 return if $m_bOnlyCont;
449         }
450         my $response;
451         my $contenttype;
452         my $bDoRewrite;
453         my $file_name;
454         if ($is_file) {
455                 if (!-r $uq_link_addr) {
456                         tell_bad_link("Can't read file ($uq_link_addr)",
457                                 $parent_url, $parent_lnum, $parent_link, $parent_line);
458                         $m_MissedLinks{$uq_link_addr} = 1;
459                         return;
460                 }
461                 $file_name = $uq_link_addr;
462         } else {
463                 $file_name = mk_mirror_name($uq_link_addr);
464                 if (!defined $ua) {
465                         $ua = new LWP::UserAgent;
466                         $ua->agent($m_ua_personality);
467                         #$ua->delay(0.1);
468                 }
469                 if ($m_bMirror) {
470                         $response = $ua->mirror($uq_link_addr, $file_name);
471                         &$m_subMirrorAction($uq_link_addr, $file_name, $response);
472                 } else {
473                         my $request = new HTTP::Request('GET', $uq_link_addr);
474                         $response = $ua->request($request, $file_name);
475                 }
476                 #dump_response($response); exit;
477                 if ($response->code != 304) {
478                         if (!$response->is_success) {
479                                 tell_bad_link($response->status_line . " ($uq_link_addr)",
480                                         $parent_url, $parent_lnum, $parent_link, $parent_line);
481                                 $m_MissedLinks{$uq_link_addr} = 1;
482                                 return;
483                         }
484                         $bDoRewrite = $m_bMirror;
485                         $contenttype = get_contenttype($response);
486                         $link_is_container = is_linked_contenttype($response);
487                 }
488                 if ($uq_link_addr ne $response->base) {
489                         if ($m_bMirror) {
490                                 my $base_file = mk_mirror_name($response->base);
491                                 if (!File::Copy::copy($file_name, $base_file)) {
492                                         die "Can't copy($file_name, $base_file): $!\n";
493                                 }
494                                 if (my $lm = $response->last_modified) { utime $lm, $lm, $base_file; }
495                                 $file_name = $base_file;
496                         }
497                         $uq_link_addr = $response->base;
498                 }
499         }
500         ### Test again, could be new info from net!
501         if ($link_is_container) {
502                 set_is_container($uq_link_addr, 1);
503                 return if is_outside($uq_link_addr);
504         } else {
505                 return if $m_bOnlyCont;
506                 return;
507         }
508         &$m_subReport("$uq_link_addr ...");
509
510         my %links;
511         my %anchs;
512         my %lines;
513         my %tagname;
514         my %attname;
515         my $parsed_fh;
516         my $parsed_file;
517         my $file_to_parse = $file_name;
518         if ($bDoRewrite) {
519                 $parsed_file = $file_to_parse . "-p$$";
520                 &$m_subReport(" <<GET");
521                 $parsed_fh = new IO::File("> $parsed_file");
522                 die "Can't create $parsed_file: $!\n" unless defined $parsed_fh;
523                 print $parsed_fh "<!-- parsed version -->\n";
524         }
525         &$m_subReport("\n");
526         parse_file($file_to_parse, $parsed_fh, $uq_link_addr,
527                 \@m_sLinkRoots,
528                 \%links, \%anchs, \%lines, \%tagname, \%attname);
529         if (defined $parsed_fh) {
530                 $parsed_fh->close();
531                 if (-e $file_name) { unlink $file_name or die "Can't unlink $file_name: $!"; }
532                 rename($parsed_file, $file_name) or die "Can't rename($parsed_file, $file_name): $!\n";
533             if (my $lm = $response->last_modified) { utime $lm, $lm, $file_name; }
534         }
535         ### Now we know...
536         if ($link_is_container) { return if is_outside($uq_link_addr); }
537
538         $m_CheckedLinks{$uq_link_addr}->{ANC} = \%anchs;
539         my $file_dir;
540         if ($is_file) {
541                 $file_dir = $uq_link_addr;
542                 $file_dir =~ s![^/]*$!!;
543                 #chdir $file_dir;
544         }
545         my $container_folder = $uq_link_addr; $container_folder =~ s![^/]*$!!;
546         &$m_subAction($uq_link_addr, $file_name, $contenttype);
547         for my $link (sort keys %links) {
548                 # Next line is for onclick lines in prepared docs
549                 next if ($link eq "#");
550                 my $lnum = $links{$link};
551                 my $line = $lines{$lnum};
552                 if ($link eq "") {
553                         tell_bad_link("Empty link", $uq_link_addr, $lnum, $link, $line);
554                         next;
555                 }
556                 if ($link =~ m!(.*)\?!) { $link = $1; }
557                 my $anchor;
558                 if ($link =~ m!(.*)#(.*)!) { $link = $1; $anchor = $2; }
559                 if ($link eq "") {
560                         if (!exists $anchs{$anchor}) {
561                                 tell_bad_link("Anchor not found ($anchor)", $uq_link_addr, $lnum, $link, $line);
562                         }
563                         next;
564                 }
565                 my $sub_fold;
566                 my $sub_file;
567                 my $uq_sublink; 
568                 if ($link =~ m!^https?://!i) {
569                         $sub_fold = "";
570                         $sub_file = $link;
571                         $uq_sublink = $link;
572                 } else {
573                         $sub_file = $link;
574                         if ($is_file) {
575                                 $sub_fold = $file_dir;
576                                 $uq_sublink = PathSubs::uniq_file($sub_fold . $sub_file);
577                         } else {
578                                 $sub_fold = $container_folder;
579                                 $uq_sublink = $sub_fold . $sub_file;
580                         }
581                 }
582                 next if (exists $m_CheckedLinks{$uq_sublink});
583                 if (defined $anchor) {
584                         $m_CheckedLinks{$uq_link_addr}->{EXTANC}->{$uq_sublink} =
585                                         { ANC=> $anchor, LINE=>$line, LNUM=>$lnum};
586                 }
587                 if ($m_bOnlyCont) {
588                         die "link=$link\tattr=$tagname{$link}\n" unless exists $tagname{$link};
589                         next unless maybecont($tagname{$link}, $attname{$link});
590                 }
591                 if (is_outside($uq_link_addr)) {
592                         if (maybecont($tagname{$link}, $attname{$link}) ) {
593                                 next;
594                         }
595                 }
596                 walk_link($sub_fold, $sub_file, $uq_link_addr, $lnum, $link, $line);
597         }
598 } # walk_link
599
600
601
602
603 ############################################
604 ### Some more checks!
605 ############################################
606 sub check_external_anchors() {
607         &$m_subReport("\nChecking external anchors...\n");
608         for my $f (sort keys %m_CheckedLinks) {
609                 my $fnode = $m_CheckedLinks{$f};
610                 if (exists ${$fnode}{"EXTANC"}) {
611                         my $extanc_hash = ${$fnode}{"EXTANC"};
612                         for my $fx (keys %$extanc_hash) {
613                                 next unless (exists $m_CheckedLinks{$fx});
614                                 my $ea_hash = ${$extanc_hash}{$fx};
615                                 my $ea = ${$ea_hash}{ANC};
616                                 my $fxnode = $m_CheckedLinks{$fx};
617                                 my $fx_anc_hash = ${$fxnode}{"ANC"};
618                                 if (!exists ${$fx_anc_hash}{$ea}) {
619                                         my $line = ${$ea_hash}{LINE};
620                                         my $lnum = ${$ea_hash}{LNUM};
621                                         &$m_subReport("From $f\n");
622                                         tell_bad_link("Ext anchor not found ($fx#$ea)",
623                                                 $f, $lnum, "$fx#$ea", $line);
624                                 }
625                         }
626                 }
627         }
628 } # check_external_anchors
629
630
631
632 #############################
633 ### Reporting
634 #############################
635 sub report_errors($$) {
636         my $bSum = shift;
637         my $bDet = shift;
638         my $errors_reported;
639         my $errors_found;
640         for my $f (sort keys %m_CheckedLinks) {
641                 my $fnode = $m_CheckedLinks{$f};
642                 if (exists ${$fnode}{ERR}) {
643                         $errors_found = 1;
644                         last unless $bSum;
645                         if (!defined $errors_reported) {
646                                 $errors_reported = 1;
647                                 &$m_subReport("\n\n*********** Summary ERRORS and WARNINGS **********\n");
648                         }
649                         &$m_subReport("$f\n");
650                         my $err_hash = ${$fnode}{ERR};
651                         for my $e (sort keys %$err_hash) {
652                                 my $refE = ${$err_hash}{$e};
653                                 &$m_subReport("\t" . ${$refE}[0] . "\n");
654                         }
655                 }
656         }
657         undef $errors_reported;
658         if ($bDet) {
659                 for my $f (sort keys %m_CheckedLinks) {
660                         my $fnode = $m_CheckedLinks{$f};
661                         if (exists ${$fnode}{ERR}) {
662                                 if (!defined $errors_reported) {
663                                         $errors_reported = 1;
664                                         &$m_subReport("\n\n*********** Detailed ERRORS and WARNINGS **********\n");
665                                 }
666                                 &$m_subReport("$f\n");
667                                 my $err_hash = ${$fnode}{ERR};
668                                 for my $e (sort keys %$err_hash) {
669                                         my $refE = ${$err_hash}{$e};
670                                         &$m_subReport("\tat line $e: " .  ${$refE}[1] . "\n");
671                                 }
672                         }
673                 }
674         }
675         if ($errors_found) {
676                 die "\n*** There where errors ***\n";
677         } else {
678                 &$m_subReport("No errors found\n");
679         }
680 } # report_errors
681
682 sub dump_response($) {
683         my $response = shift;
684                 &$m_subReport( $response->code . " " . $response->message . "\n");
685                 &$m_subReport( "****************************************\n");
686                 #&$m_subReport( $response->request . "\n");
687                 #&$m_subReport( "****************************************\n");
688                 #&$m_subReport( $response->previous . "\n");
689                 #&$m_subReport( "****************************************\n");
690                 &$m_subReport(  "  i=" . $response->is_info .
691                                 ", s=" . $response->is_success .
692                                 ", r=" . $response->is_redirect .
693                                 ", e=" . $response->is_error . "\n");
694                 &$m_subReport( "****************************************\n");
695                 &$m_subReport( "content: " . $response->content . "\n");
696                 &$m_subReport( "****************************************\n");
697                 &$m_subReport( "base: " . $response->base . "\n");
698                 &$m_subReport( "****************************************\n");
699                 &$m_subReport( $response->as_string);
700                 &$m_subReport( "****************************************\n");
701                 &$m_subReport( $response->current_age . "\n");
702                 &$m_subReport( "****************************************\n");
703                 my @rh = $response->header("Content-Type");
704                 for my $r (@rh) { &$m_subReport( "ct: $r\n"); }
705                 &$m_subReport( "****************************************\n");
706 } # dump_response
707
708
709 #############################
710 ### Parameters
711 #############################
712 sub sMirrorRoot() {
713         my $val = shift;
714         $m_sMirrorRoot = PathSubs::get_temp_path() . "LinkWalker/" unless defined $m_sMirrorRoot;
715         my $old = $m_sMirrorRoot;
716         $m_sMirrorRoot = PathSubs::uniq_dir($val) if defined $val;
717         return $old;
718 }
719 sub bMirror(;$) {
720         my $val = shift;
721         my $old = $m_bMirror;
722         $m_bMirror = $val if defined $val;
723         $old;
724 }
725
726 sub subReporter(;$) {
727         my $val = shift;
728         my $old = $m_subReport;
729         $m_subReport = $val if defined $val;
730         $old
731 }
732 sub subAction(;$) {
733         my $val = shift;
734         my $old = $m_subAction;
735         $m_subAction = $val if defined $val;
736         $old
737 }
738 sub bOnlyCont(;$) {
739         my $val = shift;
740         my $old = $m_bOnlyCont;
741         $m_bOnlyCont = $val if defined $val;
742         $old
743 }
744 sub ua_personality(;$) {
745         my $val = shift;
746         my $old = $m_ua_personality;
747         $m_ua_personality = $val if defined $val;
748         $old
749 }
750
751 sub clear_roots() { @m_sLinkRoots = (); }
752 sub get_roots() { return \@m_sLinkRoots; }
753 sub add_root($) { push @m_sLinkRoots, shift; }
754 sub add_files_root($) {
755         my $file = shift;
756         my $default_root;
757         my ($host) = ($file =~ m!(^https?://[^/]*)!i);
758         if (defined $host) {
759                 $default_root = $file;
760         } else {
761                 die "Can't find $file\n" unless -e $file;
762                 $default_root = PathSubs::uniq_file($file);
763         }
764         $default_root =~ s![^/]*$!!;
765         add_root($default_root);
766 }
767
768 ### Default actions
769 sub default_sub {}
770 $m_subReport            = \&default_sub;
771 $m_subAction            = \&default_sub;
772 $m_subMirrorAction      = \&default_sub;
773
774 1;