Generated: Tue Feb 2 17:54:56 2010 from sitemap.pl 2005/06/08 5.2 KB.
#!/usr/bin/perl -w # sitemap.pl -- a command-line utility for building an XHTML site map # using XML::XPath. Presumes that the documents being # searched are well-formed XML or XHTML. # Usage: perl --localpath=/file/path/to/serverroot/ # --hostname=http://my.server.tld # --exts="xhtml, html" sitemap.p # use strict; use XML::XPath; use XML::XPath::XMLParser; use XML::Parser; use File::Find; use Getopt::Long; my (%file_list, $local_path, $host_name, $file_exts, $out_file); # parse the command line options: ##GetOptions( "localpath=s" => \$local_path, ## "hostname=s" => \$host_name, ## "exts=s" => \$file_exts); $local_path = shift || '/homepage/ex1'; $host_name = shift || 'http://localhost'; $file_exts = shift || 'htm, html, xhtml'; die "Usage: perl --localpath=/file/path/to/root/ --hostname=http://my.server --exts='xhtml, html' sitemap.pl\n" unless $local_path and $host_name and $file_exts; $host_name =~ s/\/$//; $local_path =~ s/\/$//; my @file_exts = split (/\s?,\s?/, $file_exts); my $ep = XML::Parser->new(Namespaces => 1); my $xp = XML::XPath->new; # create the root elements for the output tree my $root = XML::XPath::Node::Element->new('html'); my $body = XML::XPath::Node::Element->new('body'); %file_list = &find_local_files(\@file_exts, [$local_path]); ## # now, to the hunt... # DIR: foreach my $directory ( sort (keys (%file_list))) { my $directory_container = XML::XPath::Node::Element->new('div'); my $translated_directory = &translate_path($local_path, '', $directory); $translated_directory ||= '/'; $directory_container->appendAttribute( XML::XPath::Node::Attribute->new('class', 'directory') ); $directory_container->appendAttribute( XML::XPath::Node::Attribute->new('ID', $translated_directory) ); my $directory_header = XML::XPath::Node::Element->new('h2'); $directory_header->appendChild( XML::XPath::Node::Text->new($translated_directory) ); $directory_container->appendChild($directory_header); my $file_list = XML::XPath::Node::Element->new('ul'); FILE: foreach my $file (sort (@{$file_list{$directory}})) { my ($root_node, $parser); my $list_item = XML::XPath::Node::Element->new('li'); $xp->set_filename($file); eval { $parser = XML::XPath::XMLParser->new( filename => $xp->get_filename, parser => $ep); $root_node = $parser->parse; $xp->find('/*', $root_node); }; if ($@) { $list_item->appendChild( XML::XPath::Node::Text->new("ERROR parsing file '$file': " . $@) ); $file_list->appendChild($list_item); next FILE; } my $translated_uri = &translate_path($local_path, $host_name, $file); my $file_link = XML::XPath::Node::Element->new('a'); $file_link->appendAttribute( XML::XPath::Node::Attribute->new('href' , $translated_uri) ); my $title = $xp->find('/html/head/title', $root_node)->string_value || "Untitled Document [ $translated_uri ]"; $file_link->appendChild( XML::XPath::Node::Text->new($title) ); my $desc = $xp->find(q{/html/head/meta[@content][@name="description"]}, $root_node)->string_value || 'No Description'; # add the description and keywords to as elements make a seachable index. my $desc_node = XML::XPath::Node::Element->new('description'); $desc_node->appendChild( XML::XPath::Node::Text->new(" $desc ") ); my $keywords_text = $xp->find(q{/html/head/meta[@content][@name="keywords"]}, $root_node)->string_value || ''; my $keywords_node = XML::XPath::Node::Element->new('keywords'); $keywords_node->appendChild( XML::XPath::Node::Text->new($keywords_text) ); # add the child nodes to the 'li' element $list_item->appendChild($file_link); $list_item->appendChild($keywords_node); $list_item->appendChild($desc_node ); # add the anchor to the output $file_list->appendChild($list_item); } # add the list to the direcory container, and # the container to the main body, $directory_container->appendChild($file_list); $body->appendChild($directory_container); } $root->appendChild($body); print $root->toString; print "\n"; # utility subroutines sub find_local_files { my ($extensions, $directories) = @_; my %file_list = (); my $extension_re = '('; $extension_re .= join ('|', @{$extensions}); $extension_re .= ')'; local *wanted_files = sub { return if -d; return if -l; push (@{$file_list{$File::Find::dir}}, $File::Find::name) if $File::Find::name =~ /\.$extension_re$/; }; File::Find::find(\&wanted_files, @{$directories}); return %file_list; } sub translate_path { my ($old_path, $new_path, $file) = @_; $file =~ s|$old_path|$new_path|; return $file; }