2005-08-31 11:58:48 +00:00
|
|
|
#!/usr/bin/perl
|
2005-08-31 14:01:17 +00:00
|
|
|
# Copyright (C) 2005 Alex Schroeder <alex@emacswiki.org>
|
|
|
|
|
#
|
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
|
|
|
# (at your option) any later version.
|
|
|
|
|
#
|
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
|
#
|
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
|
# along with this program; if not, write to the
|
|
|
|
|
# Free Software Foundation, Inc.
|
|
|
|
|
# 59 Temple Place, Suite 330
|
|
|
|
|
# Boston, MA 02111-1307 USA
|
2005-08-31 11:58:48 +00:00
|
|
|
|
|
|
|
|
use strict;
|
|
|
|
|
use warnings;
|
|
|
|
|
use CGI;
|
|
|
|
|
use LWP::UserAgent;
|
|
|
|
|
use XML::LibXML;
|
|
|
|
|
use URI;
|
|
|
|
|
|
|
|
|
|
my @indexes = qw(
|
|
|
|
|
http://www.gnu.org/software/emacs/manual/html_node/Command-Index.html
|
|
|
|
|
http://www.gnu.org/software/emacs/manual/html_node/Variable-Index.html
|
2005-08-31 13:12:13 +00:00
|
|
|
http://www.gnu.org/software/emacs/manual/html_node/Concept-Index.html
|
|
|
|
|
http://www.gnu.org/software/emacs/elisp-manual/html_node/elisp_728.html
|
|
|
|
|
http://www.gnu.org/software/emacs/elisp-manual/html_node/elisp_729.html
|
|
|
|
|
http://www.gnu.org/software/emacs/elisp-manual/html_node/elisp_730.html
|
|
|
|
|
http://www.gnu.org/software/emacs/elisp-manual/html_node/elisp_731.html
|
|
|
|
|
http://www.gnu.org/software/emacs/elisp-manual/html_node/elisp_732.html
|
|
|
|
|
http://www.gnu.org/software/emacs/elisp-manual/html_node/elisp_733.html
|
|
|
|
|
http://www.gnu.org/software/emacs/elisp-manual/html_node/elisp_734.html
|
|
|
|
|
http://www.gnu.org/software/emacs/elisp-manual/html_node/elisp_735.html
|
|
|
|
|
http://www.gnu.org/software/emacs/elisp-manual/html_node/elisp_736.html
|
|
|
|
|
http://www.gnu.org/software/emacs/elisp-manual/html_node/elisp_737.html
|
|
|
|
|
http://www.gnu.org/software/emacs/elisp-manual/html_node/elisp_738.html
|
|
|
|
|
http://www.gnu.org/software/emacs/elisp-manual/html_node/elisp_739.html
|
|
|
|
|
http://www.gnu.org/software/emacs/elisp-manual/html_node/elisp_740.html
|
|
|
|
|
http://www.gnu.org/software/emacs/elisp-manual/html_node/elisp_741.html
|
|
|
|
|
http://www.gnu.org/software/emacs/elisp-manual/html_node/elisp_742.html
|
|
|
|
|
http://www.gnu.org/software/emacs/elisp-manual/html_node/elisp_743.html
|
|
|
|
|
http://www.gnu.org/software/emacs/elisp-manual/html_node/elisp_744.html
|
2005-08-31 11:58:48 +00:00
|
|
|
);
|
|
|
|
|
|
|
|
|
|
my $db = '/org/org.emacswiki/htdocs/emacs/info-ref.dat';
|
|
|
|
|
|
|
|
|
|
my $nl = "\n";
|
|
|
|
|
my $fs = "\023";
|
|
|
|
|
my $gs = "\024";
|
|
|
|
|
my $rs = "\025";
|
|
|
|
|
|
|
|
|
|
my $q = new CGI;
|
|
|
|
|
ProcessRequest();
|
|
|
|
|
|
|
|
|
|
sub ProcessRequest {
|
|
|
|
|
if ($q->param('init')) {
|
|
|
|
|
Initialize();
|
|
|
|
|
} elsif ($q->param('find')) {
|
|
|
|
|
Find($q->param('find'));
|
|
|
|
|
} else {
|
|
|
|
|
ShowForm();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2005-08-31 12:06:36 +00:00
|
|
|
sub ShowForm {
|
|
|
|
|
print $q->header, $q->start_html,
|
2005-08-31 14:01:17 +00:00
|
|
|
$q->start_form, "Index entry: ", $q->textfield('find'), $q->submit, $q->end_form,
|
|
|
|
|
$q->p('$Id: info-ref,v 1.6 2005/08/31 14:01:17 as Exp $'),
|
|
|
|
|
$q->end_html;
|
2005-08-31 12:06:36 +00:00
|
|
|
}
|
|
|
|
|
|
2005-08-31 11:58:48 +00:00
|
|
|
sub Find {
|
|
|
|
|
my $str = shift;
|
|
|
|
|
my %map = ();
|
|
|
|
|
my $data = ReadFileOrDie($db);
|
|
|
|
|
foreach my $line (split(/$nl/, $data)) {
|
|
|
|
|
my ($key, $rest) = split(/$fs/, $line);
|
|
|
|
|
$map{$key} = ();
|
|
|
|
|
foreach my $a (split(/$gs/, $rest)) {
|
|
|
|
|
my ($link, $label) = split(/$rs/, $a);
|
|
|
|
|
$map{$key}{$link} = $label;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
my @links = keys %{$map{$str}};
|
|
|
|
|
if ($#links < 0) {
|
|
|
|
|
ReportError("No matches found for '$str'", '404 Not Found');
|
|
|
|
|
} elsif ($#links == 0) {
|
|
|
|
|
print $q->redirect($links[0]);
|
|
|
|
|
} else {
|
|
|
|
|
my @list = map { $q->a({-href=>$_}, $map{$str}{$_}) } @links;
|
|
|
|
|
print $q->header, $q->h1($str), $q->ol($q->li(\@list));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sub Initialize {
|
|
|
|
|
my %map = ();
|
2005-08-31 13:12:13 +00:00
|
|
|
print $q->header, $q->start_html;
|
2005-08-31 11:58:48 +00:00
|
|
|
foreach my $url (@indexes) {
|
2005-08-31 13:12:13 +00:00
|
|
|
print $q->p($url);;
|
2005-08-31 11:58:48 +00:00
|
|
|
# determine base URI
|
|
|
|
|
my $base = URI->new($url);
|
|
|
|
|
# fetch and parse data
|
|
|
|
|
my $data = GetRaw($url);
|
2005-08-31 13:12:13 +00:00
|
|
|
# some markup fixes for the elisp manual
|
|
|
|
|
$data =~ s/&([<"])/&$1/g;
|
|
|
|
|
$data =~ s/<([<"])/<$1/g;
|
|
|
|
|
$data =~ s/="fn_"">/="fn_"">/;
|
|
|
|
|
$data =~ s/<!DOCTYPE.*?>//;
|
|
|
|
|
$data =~ s'</?font.*?>''gi;
|
|
|
|
|
$data =~ s'</table><br></P>'</table><br>';
|
2005-08-31 11:58:48 +00:00
|
|
|
my $parser = XML::LibXML->new();
|
2005-08-31 13:12:13 +00:00
|
|
|
my $doc;
|
|
|
|
|
eval { $doc = $parser->parse_html_string($data); };
|
|
|
|
|
print $q->p($@) if $@;
|
|
|
|
|
next if $@;
|
|
|
|
|
# emacs manual
|
2005-08-31 11:58:48 +00:00
|
|
|
my @nodelist = $doc->findnodes('/html/body/ul/li');
|
|
|
|
|
foreach my $node (@nodelist) {
|
2005-08-31 13:12:13 +00:00
|
|
|
my $text = $node->textContent;
|
2005-08-31 11:58:48 +00:00
|
|
|
my ($key) = split(/: /, $text);
|
|
|
|
|
my $a = $node->findnodes('descendant::a')->[0];
|
|
|
|
|
my $label = $a->textContent;
|
|
|
|
|
my $link = $a->getAttribute('href');
|
|
|
|
|
my $l = URI->new_abs($link, $base);
|
|
|
|
|
# print "$key -> $label $l\n";
|
|
|
|
|
$map{$key} = () unless $map{$key};
|
|
|
|
|
$map{$key}{$l->canonical} = $label;
|
|
|
|
|
}
|
2005-08-31 13:12:13 +00:00
|
|
|
# elisp manual
|
2005-08-31 13:43:38 +00:00
|
|
|
@nodelist = $doc->findnodes('descendant::table[position()=3]/descendant::tr');
|
2005-08-31 13:12:13 +00:00
|
|
|
foreach my $node (@nodelist) {
|
2005-08-31 13:43:38 +00:00
|
|
|
my ($item, $section) = $node->findnodes('td/a');
|
|
|
|
|
next unless $item and $section;
|
2005-08-31 13:12:13 +00:00
|
|
|
my $key = $item->textContent;
|
|
|
|
|
my $label = $section->textContent;
|
|
|
|
|
my $link = $item->getAttribute('href');
|
|
|
|
|
my $l = URI->new_abs($link, $base);
|
2005-08-31 13:43:38 +00:00
|
|
|
# print "$key -> $label $l\n";
|
2005-08-31 13:12:13 +00:00
|
|
|
$map{$key} = () unless $map{$key};
|
|
|
|
|
$map{$key}{$l->canonical} = $label;
|
|
|
|
|
}
|
2005-08-31 11:58:48 +00:00
|
|
|
}
|
|
|
|
|
my $data = join($nl, map {
|
|
|
|
|
my $key = $_;
|
|
|
|
|
$key . $fs . join($gs, map {
|
|
|
|
|
my $link = $_;
|
|
|
|
|
join($rs, $link, $map{$key}{$link});
|
|
|
|
|
} keys %{$map{$_}})
|
|
|
|
|
} keys %map);
|
|
|
|
|
WriteStringToFile($db, $data);
|
2005-08-31 13:12:13 +00:00
|
|
|
print $q->p('Database initialized'), $q->end_html;
|
2005-08-31 11:58:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sub GetRaw {
|
|
|
|
|
my $uri = shift;
|
|
|
|
|
return unless eval { require LWP::UserAgent; };
|
|
|
|
|
my $ua = LWP::UserAgent->new;
|
|
|
|
|
my $response = $ua->get($uri);
|
|
|
|
|
return $response->content;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sub ReadFile {
|
|
|
|
|
my ($filename) = @_;
|
|
|
|
|
my ($data);
|
|
|
|
|
local $/ = undef; # Read complete files
|
|
|
|
|
if (open(IN, "<$filename")) {
|
|
|
|
|
$data=<IN>;
|
|
|
|
|
close IN;
|
|
|
|
|
return (1, $data);
|
|
|
|
|
}
|
|
|
|
|
return (0, '');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sub ReadFileOrDie {
|
|
|
|
|
my ($filename) = @_;
|
|
|
|
|
my ($status, $data);
|
|
|
|
|
($status, $data) = ReadFile($filename);
|
|
|
|
|
if (!$status) {
|
|
|
|
|
ReportError("Cannot open $filename: $!", '500 Internal Server Error');
|
|
|
|
|
}
|
|
|
|
|
return $data;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sub WriteStringToFile {
|
|
|
|
|
my ($file, $string) = @_;
|
|
|
|
|
open(OUT, ">$file")
|
|
|
|
|
or ReportError("Cannot write $file: $!", '500 Internal Server Error');
|
|
|
|
|
print OUT $string;
|
|
|
|
|
close(OUT);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sub ReportError { # fatal!
|
|
|
|
|
my ($errmsg, $status, $log) = @_;
|
|
|
|
|
print $q->header(-status => $status);
|
|
|
|
|
print $q->start_html, $q->h2($errmsg), $q->end_html;
|
|
|
|
|
exit (1);
|
|
|
|
|
}
|