#!/usr/bin/perl -w
# Configuration section
$contentNegotiation = 1;
# End configuration section
#use locale;
use DirHandle;
sub uriEscape {
my $in = $_[0];
my $out = "";
my $i = 0;
while ($i < length($in)) {
my $ch = substr($in, $i, 1);
if ($ch =~ m,[-A-Za-z0-9:/.],) {
$out .= $ch;
} else {
$out .= sprintf("%%%02x", ord($ch));
}
$i++;
}
return $out;
}
sub htmlEscape {
my $in = $_[0];
my $out = "";
my $i = 0;
while ($i < length($in)) {
my $ch = substr($in, $i, 1);
if ($ch =~ m,[-A-Za-z0-9:/ .],) {
$out .= $ch;
} else {
$out .= sprintf("%d;", ord($ch));
}
$i++;
}
return $out;
}
# Phase 1: Try to find a matching URL first
# Phase 1a: Is it a directory, try without final /
# Will not result in a loop, as it will no longer match this
# rule after one pass. Also, it is unlikely, that a normal
# process could result with this (.asis redirect should not
# be possible; automatic redirect upon finding a directory
# should then return "Permission denied" without index)
my $string = $ENV{"REQUEST_URI"};
if ($string =~ m,^/.*/$,) {
# Remove trailing slashes
$string =~ s,/*$,,;
# Create redirect
print "Status: 302 Try this instead\r\n";
print "Location: http://" . $ENV{"HTTP_HOST"} . $string . "\r\n\r\n";
exit 0;
}
# Do minimal language negotiation for the following steps
# Recognized languages:
my $langs = "_en_de_";
my $lang = undef;
my $language = $ENV{"HTTP_ACCEPT_LANGUAGE"};
if (defined($language)) {
@languages = split(/,\s*/, $language);
foreach $l (@languages) {
$l =~ tr/A-Z/a-z/;
$l =~ s/_.*//;
if (!defined($lang) && $langs =~ /_\$l_/) {
$lang = $l;
}
}
}
$lang = "en" unless defined($lang);
# Minimal localization
my %notFound = ( en => "Not Found", de => "Nicht gefunden" );
my %search = ( en => "Search ", de => "Suchen nur auf " );
my %searchWWW = ( en => "Search WWW", de => "Durchsuchen des WWW" );
my %googleSearch = ( en => "Google Search", de => "Google-Suche" );
my %instr = ( en => '
The requested URL was not found on this server.
You can try to locate the document by manually navigating through
this site or search for the document using Google:
',
de => '
Die gewünschte URL konnte auf diesem Server nicht gefunden werden.
Sie können selbst durch diesen Server navigieren oder
das Dokument mittels Google suchen:
' );
my %multiple = ( en => "Multiple Choices", de => "Auswahl" );
my %multExplan1 = ( en => "
The document you requested (",
de => "
Das Dokument, welches Sie verlangten (" );
my %multExplan2 = ( en => ") could not be found on this server. However,
we found documents with names similar to the one you requested.
Available documents:
", de => "
), konnte auf diesem Server nicht gefunden werden.
Jedoch haben wir Dokumente mit ähnlichen Namen gefunden.
Diese sind:
" );
# Phase 1b: Try to find the appropriate (tail) name, but considering content negotiation
if ($contentNegotiation) {
my $webDir = $ENV{"SCRIPT_URL"};
$webDir =~ s,/[^/]*$,,;
my $dir = $ENV{"DOCUMENT_ROOT"} . $webDir;
my $typo = lc($ENV{"SCRIPT_URL"});
$typo =~ s,.*/,,;
$typoLen = length($typo);
my $dh = new DirHandle($dir);
my @dirEntries = $dh->read();
# No longer need to check for matches without content negotiation, this was done by mod_speling
my %matchesCN = ();
foreach my $fileEntry (@dirEntries )
{
my $len = length($fileEntry);
if ($len + 1 >= $typoLen && -r $dir . "/" . $fileEntry) {
# Different length would not allow for a match, even with CN
# Here, we do not differentiate between the different match types
# Find first differing char
my $i = 0;
while ($i < $typoLen
&& substr($typo, $i, 1) eq lc(substr($fileEntry, $i, 1))) {
$i++;
}
if ($i == $typoLen && substr($fileEntry, $i, 1) eq ".") {
# Test 1: Case-insensitive match
$matchesCN{substr($fileEntry, 0, $typoLen)} = 1;
} elsif ($i+1 < $typoLen
&& substr($typo, $i, 1) eq lc(substr($fileEntry, $i+1, 1))
&& substr($typo, $i+1, 1) eq lc(substr($fileEntry, $i, 1))
&& substr($typo, $i+2) . "." eq lc(substr($fileEntry, $i+2, $typoLen-$i-1))) {
# Test 2: Transposition
$matchesCN{substr($fileEntry, 0, $typoLen)} = 2;
} elsif ($i < $typoLen
&& substr($typo, $i+1) . "." eq lc(substr($fileEntry, $i+1, $typoLen-$i ))) {
# Test 3: Simple typo
$matchesCN{substr($fileEntry, 0, $typoLen)} = 3;
} elsif ($i < $typoLen
&& substr($typo, $i+1) . "." eq lc(substr($fileEntry, $i, $typoLen-$i ))) {
# Test 4: Insertion (in typo URL)
$matchesCN{substr($fileEntry, 0, $typoLen-1)} = 4;
} elsif ($i <= $typoLen
&& substr($typo, $i ) . "." eq lc(substr($fileEntry, $i+1, $typoLen-$i+1))) {
# Test 5: Deletion (in typo URL)
$matchesCN{substr($fileEntry, 0, $typoLen+1)} = 5;
}
}
}
# Do not rely on the reason; the reason stored is the one of the last file that came close enough,
# not of the closest file
if (keys(%matchesCN) == 1) {
# Single match, simple redirect
print "Status: 302 Try this instead\r\n";
print "Location: http://" . $ENV{"HTTP_HOST"} . $webDir . "/" .
join("", keys(%matchesCN)) . "\r\n\r\n";
exit 0;
} elsif (keys(%matchesCN) > 1) {
# Multiple matches, choice
print 'Status: 300 Multiple close matches
Content-Type: text/html
300 ', $multiple{$lang}, '