Files
oddmuse/phpwiki-search.pl

83 lines
2.6 KiB
Perl
Raw Normal View History

2004-05-16 11:33:10 +00:00
#! /usr/bin/perl
# Copyright (C) 2004 Alex Schroeder <alex@emacswiki.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.
# 59 Temple Place, Suite 330
# Boston, MA 02111-1307 USA
use CGI qw/:standard/;
use CGI::Carp qw(fatalsToBrowser);
use LWP::UserAgent;
2004-05-16 11:37:10 +00:00
use Encode;
2004-05-16 11:33:10 +00:00
if (not param('url')) {
2004-05-16 13:37:18 +00:00
print header(-charset=>'utf-8'),
2004-05-16 11:33:10 +00:00
start_html('PHP Wiki Search RSS 3.0'),
h1('PHP Wiki Search RSS 3.0'),
p('Translates a PHP Wiki Search result into RSS 3.0 usable by Oddmuse.'),
start_form(-method=>'GET'),
2004-05-16 11:37:10 +00:00
p('Search URL: ', textfield('url', '', 40), checkbox('latin-1'), submit()),
2004-05-16 11:33:10 +00:00
end_form(),
end_html();
exit;
}
print header(-type=>'text/plain; charset=UTF-8');
2004-05-16 13:37:18 +00:00
my $url = param('url');
if (param('latin-1')) {
$url =~ s/%([0-9a-f][0-9a-f])/chr(hex($1))/ige;
$url = encode('latin-1', decode('utf-8', $url));
my @letters = split(//, $url);
my @safe = ('a' .. 'z', 'A' .. 'Z', '0' .. '9', '-', '_', '.', '!', '~', '*', "'", '(', ')',
':', '/', '?', ';', '&', '=');
foreach my $letter (@letters) {
my $pattern = quotemeta($letter);
if (not grep(/$pattern/, @safe)) {
$letter = uc(sprintf("%%%02x", ord($letter)));
}
}
$url = join('', @letters);
}
2004-05-16 11:33:10 +00:00
my $ua = new LWP::UserAgent;
2004-05-16 13:37:18 +00:00
my $request = HTTP::Request->new('GET', $url);
2004-05-16 11:33:10 +00:00
my $response = $ua->request($request);
my $data = $response->content;
2004-05-16 11:37:10 +00:00
$data = encode('utf-8', decode('latin-1', $data)) if param('latin-1');
2004-05-16 11:33:10 +00:00
$data =~ /\<title\>([^<]*)/i;
print "title: $1\n" if $1;
2004-05-16 13:37:18 +00:00
print "link: " . param(url) . "\n";
print "debug: $url\n"; # FIXME
print "\n";
2004-05-16 12:00:39 +00:00
while ($data =~ m|<dt>.*?<a href="([^"]*)".*\n((<dd>.*</dd>\n)*)|g) {
2004-05-16 12:30:59 +00:00
my ($title, $desc) = ($1, $2);
$title =~ s/%([0-9a-f][0-9a-f])/chr(hex($1))/ige;
$title = encode('utf-8', decode('latin-1', $title)) if param('latin-1');
print "title: $title\n";
$_ = $desc;
2004-05-16 12:00:39 +00:00
s|<dd>||g;
s|<small[^>]*>||g;
s|<strong[^>]*>||g;
s|</strong>||g;
s|</small>||g;
s|</dd>||g;
2004-05-16 12:03:43 +00:00
s|\n+$||g;
2004-05-16 12:00:39 +00:00
s|\n|\n\t|g;
print "description: $_\n";
print "\n";
2004-05-16 11:33:10 +00:00
}