forked from github/kensanata.oddmuse
1047 lines
30 KiB
Perl
1047 lines
30 KiB
Perl
#!/usr/bin/perl
|
|
# Texi $Id: texi.pl,v 1.3 2003/04/24 21:52:03 as Exp $
|
|
# Copyright (C) 2002, 2003 Alex Schroeder <alex@gnu.org>
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the
|
|
# Free Software Foundation, Inc.
|
|
# 59 Temple Place, Suite 330
|
|
# Boston, MA 02111-1307 USA
|
|
|
|
use CGI qw/:standard/;
|
|
use CGI::Carp 'fatalsToBrowser';
|
|
|
|
$DataDir = '/home/alex/WWW/emacswiki/test' unless $DataDir;
|
|
$PageDir = "$DataDir/page"; # Stores page data
|
|
$InterFile = "$DataDir/intermap"; # Interwiki site->url map
|
|
$ConfigFile = "$DataDir/config" unless $ConfigFile; # Config file with Perl code to execute
|
|
|
|
# Basics
|
|
$HomePage = 'HomePage'; # Home page (change space to _)
|
|
$HttpCharset = 'ISO-8859-1'; # Charset for pages, eg. 'UTF-8'
|
|
|
|
# LinkPattern
|
|
$WikiLinks = 1; # 1 = LinkPattern is a link
|
|
$FreeLinks = 1; # 1 = [[some text]] is a link
|
|
$FreeUpper = 1; # 1 = forces free links to start with upper case
|
|
$SimpleLinks = 0; # 1 = only letters in links, 0 = allow _ and numbers
|
|
$NonEnglish = 0; # 1 = non-ASCII link characters allowed
|
|
$BracketText = 1; # 1 = [URL desc] uses a description for the URL
|
|
$BracketWiki = 0; # 1 = [WikiLink desc] uses a desc for the local link
|
|
$HtmlLinks = 0; # 1 = <a href="foo">desc</a> is a link
|
|
$UseSubpage = 0; # 1 = PageName/SubPage is a link
|
|
$NetworkFile = 1; # 1 = file: is a valid protocol for URLs
|
|
|
|
# TextFormattingRules
|
|
$HtmlTags = 0; # 1 = allow some 'unsafe' HTML tags
|
|
$RawHtml = 0; # 1 = allow <HTML> environment for raw HTML inclusion
|
|
|
|
# RecentChanges and KeptPages
|
|
$DeletedPage = "DeletedPage"; # Pages starting with this can be deleted
|
|
$RCName = 'RecentChanges'; # Name of changes page (change space to _)
|
|
|
|
if (not @HtmlTags) { # don't set if set in the config file
|
|
if ($HtmlTags) {
|
|
# HTML tag lists, enabled if $HtmlTags is set.
|
|
# Scripting is currently possible with these tags,
|
|
# so they are *not* particularly 'safe'.
|
|
# Tags that must be in <tag> ... </tag> pairs:
|
|
@HtmlTags = qw(b i u font big small sub sup h1 h2 h3 h4 h5 h6 cite code
|
|
em s strike strong tt var div center blockquote ol ul dl
|
|
table caption br p hr li dt dd tr td th);
|
|
} else {
|
|
@HtmlTags = qw(b i u em strong tt);
|
|
}
|
|
}
|
|
|
|
%HtmlTagEquivalent = ('b' => 'b',
|
|
'i' => 'i',
|
|
'u' => 'i',
|
|
'em' => 'emph',
|
|
'strong' => 'strong',
|
|
'tt' => 'code',
|
|
);
|
|
|
|
# == You should not have to change anything below this line. ==
|
|
|
|
$IndentLimit = 20; # Maximum depth of nested lists
|
|
|
|
# == Config file ==
|
|
|
|
if (-f $ConfigFile) {
|
|
do $ConfigFile;
|
|
}
|
|
|
|
# == Code ==
|
|
|
|
die "No HomePage set.\n" unless $HomePage;
|
|
|
|
$outdir = $DataDir unless $outdir;
|
|
$outname = 'wiki' unless $outname;
|
|
$outfile = $outdir . '/' . $outname;
|
|
$licensefile = '' unless $licensefile;
|
|
$intro = qq{\\input texinfo \@c -*-texinfo-*-
|
|
\@c %**start of header
|
|
\@setfilename $outname.info
|
|
\@settitle $HomePage
|
|
\@ifnottex
|
|
\@node Top, $HomePage, , (dir)
|
|
\@top $HomePage
|
|
\@end ifnottex
|
|
};
|
|
|
|
$q = new CGI;
|
|
$q->charset($HttpCharset) if $HttpCharset;
|
|
@ScriptPath = split('/', $q->script_name());
|
|
$ScriptName = pop(@ScriptPath);
|
|
$IndexInit = 0;
|
|
$InterSiteInit = 0;
|
|
%InterSite = ();
|
|
$OpenPageName = '';
|
|
@HtmlStack = ();
|
|
|
|
# Internal Variables
|
|
|
|
my (@pages, @orphans, @control, $current);
|
|
my (%file, %up, %down, %next, %prev, %type, %text, %refs, %menu);
|
|
|
|
charset($HttpCharset);
|
|
|
|
print header;
|
|
print start_html('Texinfo Creation');
|
|
print h1("Texinfo Creation");
|
|
print "<p>";
|
|
|
|
print "Starting<br>\n";
|
|
&process;
|
|
print "Gzipping<br>\n";
|
|
system("/bin/rm", "$outfile.texi.gz");
|
|
system("/bin/gzip", "$outfile.texi");
|
|
print "Done<br>\n";
|
|
print p("Download ", a({-href=>"/$outname.texi.gz"}, "$outname.texi.gz"));
|
|
print end_html;
|
|
|
|
# MAIN CODE
|
|
|
|
sub process {
|
|
open(F,">$outfile.texi") or die "Can't create output file: $!";
|
|
&readpages($PageDir);
|
|
&InitLinkPatterns;
|
|
&readtexts;
|
|
print scalar(@pages) . " pages read<br>\n";
|
|
&removedeletedpages;
|
|
&removeredirects;
|
|
print scalar(@pages) . " pages undeleted and not redirected<br>\n";
|
|
&makechapters;
|
|
&countorphans;
|
|
&findremainingchildren;
|
|
&countorphans;
|
|
&findparents(0);
|
|
&countorphans;
|
|
&findparents(1);
|
|
&countorphans;
|
|
&removeorphans();
|
|
&countwalkthrough;
|
|
# &fixmenus;
|
|
print F $intro;
|
|
&printmastermenu();
|
|
&printorphans;
|
|
&printpages;
|
|
&printfooter;
|
|
close(F);
|
|
}
|
|
|
|
# testing
|
|
|
|
sub testpages {
|
|
foreach my $p (@pages) {
|
|
$s = length($text{$p});
|
|
print "$p has $s bytes<br>\n";
|
|
}
|
|
}
|
|
|
|
sub countorphans {
|
|
my $count = 0;
|
|
foreach my $page (@pages) {
|
|
next if $type{$page};
|
|
$count += 1;
|
|
}
|
|
print "$count/" . scalar(@pages) . " orphaned<br>\n";
|
|
}
|
|
|
|
sub countwalkthrough {
|
|
print scalar(@pages) . " remaining<br>\n";
|
|
my $count = &countwalk($HomePage);
|
|
print "$count pages on the walkthrough<br>\n";
|
|
foreach my $page (@pages) {
|
|
if (!grep(/^$page$/, @control)) {
|
|
print "$page is not part of the walkthroguh<br>\n";
|
|
&printnode($page);
|
|
&printnode($up{$page});
|
|
} elsif (!$type{$page}) {
|
|
print "$page on the walk-through has no type<br>\n";
|
|
}
|
|
}
|
|
foreach my $page (@control) {
|
|
if (!grep(/^$page$/, @pages)) {
|
|
print "$page has suddenly appeared in the page list<br>\n";
|
|
}
|
|
}
|
|
}
|
|
|
|
sub countwalk {
|
|
my $count = 0;
|
|
my ($page) = @_;
|
|
while ($page) {
|
|
push(@control, $page);
|
|
$count += 1;
|
|
if ($down{$page}) {
|
|
$count += &countwalk($down{$page});
|
|
}
|
|
last if $down{$page} and $next{$page} and $down{$page} eq $next{$page};
|
|
$page = $next{$page};
|
|
}
|
|
return $count;
|
|
}
|
|
|
|
# writing texinfo
|
|
|
|
sub removeorphans {
|
|
my @result;
|
|
foreach my $page (@pages) {
|
|
if ($type{$page}) {
|
|
push(@result, $page);
|
|
} else {
|
|
push(@orphans, $page);
|
|
}
|
|
}
|
|
@pages = @result;
|
|
}
|
|
|
|
sub removedeletedpages {
|
|
my @result;
|
|
foreach my $page (@pages) {
|
|
push(@result, $page) unless $text{$page} =~ /^DeletedPage/;
|
|
}
|
|
@pages = @result;
|
|
}
|
|
|
|
sub removeredirects {
|
|
my @result;
|
|
foreach my $page (@pages) {
|
|
push(@result, $page) unless $text{$page} =~ /^#REDIRECT/;
|
|
}
|
|
@pages = @result;
|
|
}
|
|
|
|
sub printpages {
|
|
&printwalk($HomePage);
|
|
}
|
|
|
|
sub printwalk {
|
|
my ($page) = @_;
|
|
while ($page) {
|
|
&printtitle($page);
|
|
print F $text{$page};
|
|
&printmenu($page);
|
|
&printwalk($down{$page}) if ($down{$page});
|
|
last if $down{$page} and $next{$page} and $down{$page} eq $next{$page};
|
|
$page = $next{$page};
|
|
}
|
|
}
|
|
|
|
sub printmenu {
|
|
my ($page) = @_;
|
|
return unless $menu{$page};
|
|
my @menu = @{$menu{$page}};
|
|
return unless scalar(@menu) > 0;
|
|
# print "Menu: " . join(" ",@{$menu{$page}}) . "<br>\n";
|
|
print F "\n\n\@menu\n";
|
|
print F "Related:\n";
|
|
foreach my $entry (@menu) {
|
|
print F "* " . $entry . "::\n";
|
|
}
|
|
print F "\@end menu\n";
|
|
}
|
|
|
|
sub fixmenus {
|
|
foreach my $page (@pages) {
|
|
# print "\n$page<br>\n";
|
|
next unless $menu{$page};
|
|
# entries are the pages that have an up pointer to this page
|
|
my @entries = @{$menu{$page}};
|
|
my @used = ();
|
|
# print "Menu: " . join(" ",@entries) . "<br>\n";
|
|
my @lines = split(/\n/, $text{$page});
|
|
# Identify the menus
|
|
my ($start, $ismenu, $idx) = (0, 0, 0);
|
|
while ($idx <= $#lines) {
|
|
my $line = $lines[$idx];
|
|
# determine start of a menu, check wether any of the entries
|
|
# could be part of a menu, and when the end of a menu is
|
|
# reached, go back and change it to a menu if such an entry was
|
|
# found.
|
|
if ($line =~ /^\@itemize/) {
|
|
$start = $idx;
|
|
$ismenu = 0;
|
|
} elsif ($start and not $ismenu) {
|
|
if ($line =~ /^\@item *($LinkPattern)/
|
|
and grep(/^$1$/,@entries)) {
|
|
$ismenu = 1;
|
|
}
|
|
} elsif ($line =~ /^\@end itemize/) {
|
|
if ($ismenu) {
|
|
$lines[$start] = '@menu';
|
|
$lines[$idx] = '@end menu';
|
|
# go through all entries between start and current position
|
|
my $i = $start;
|
|
while ($i < $idx) {
|
|
my $l = $lines[$i];
|
|
if ($l =~ /^\@item *($LinkPattern)/
|
|
and grep(/^$1$/,@entries)) {
|
|
$l =~ s/^\@item *($LinkPattern) *-* */* $1:: /;
|
|
push(@used,$1);
|
|
$lines[$i] = $l;
|
|
} elsif ($l =~ s/^\@item *//) {
|
|
$lines[$i] = $l;
|
|
}
|
|
$i++;
|
|
}
|
|
}
|
|
$start = 0;
|
|
$ismenu = 0;
|
|
}
|
|
$idx++;
|
|
}
|
|
# print "Used: " . join(" ",@used) . "<br>\n";
|
|
my @newmenu = ();
|
|
foreach my $entry (@entries) {
|
|
unless (grep(/^$entry$/,@used)) {
|
|
push(@newmenu, $entry);
|
|
}
|
|
}
|
|
# print "New: " . join(" ",@newmenu) . "<br>\n";
|
|
@{$menu{$page}} = @newmenu;
|
|
$text{$page} = join("\n",@lines);
|
|
}
|
|
}
|
|
|
|
sub printnode {
|
|
my ($page) = @_;
|
|
my $n = $next{$page} || "";
|
|
my $p = $prev{$page} || "";
|
|
my $u = $up{$page} || "";
|
|
# my $d = $down{$page} || "";
|
|
# print "$page, $n, $p, $u, $d<br>\n";
|
|
print F "\n\n";
|
|
print F "\@node $page, $n, $p, $u\n";
|
|
print F "\@cindex $page\n";
|
|
}
|
|
|
|
sub printtitle {
|
|
my ($page) = @_;
|
|
print F "\n";
|
|
&printnode($page);
|
|
print F "\@comment node-name, next, previous, up\n";
|
|
print F "\@$type{$page} $page\n";
|
|
print F "\n";
|
|
}
|
|
|
|
sub makechapters {
|
|
my $old = $HomePage;
|
|
$type{$HomePage} = 'unnumbered';
|
|
$up{$HomePage} = 'Top';
|
|
$prev{$HomePage} = 'Top';
|
|
print "$HomePage is the main page<br>\n";
|
|
foreach my $page (@{$refs{$HomePage}}) {
|
|
print "Examining $page...<br>\n";
|
|
if ($page =~ /^Category/
|
|
and grep(/^$page$/,@pages)) {
|
|
print "$page is a chapter<br>\n";
|
|
$type{$page} = 'unnumbered';
|
|
$up{$page} = $HomePage;
|
|
$down{$HomePage} = $page unless ($down{$HomePage}); # only once!
|
|
push(@{$menu{$HomePage}}, $page);
|
|
$next{$old} = $page;
|
|
$prev{$page} = $old;
|
|
$old = $page;
|
|
# &makechildren($page);
|
|
}
|
|
}
|
|
}
|
|
|
|
sub makechildren {
|
|
my ($parent) = @_;
|
|
my $old = $parent;
|
|
foreach my $page (@{$refs{$parent}}) {
|
|
next if $type{$page} or not grep(/^$page$/,@pages);
|
|
# make sure backref exists
|
|
if (grep(/^$parent$/,@{$refs{$page}})) {
|
|
print "$page added under $parent<br>\n";
|
|
# print " it links to: " . join(" ",@{$refs{$page}}) . "<br>\n";
|
|
$type{$page} = 'unnumberedsec';
|
|
$up{$page} = $parent;
|
|
$down{$parent} = $page unless ($down{$parent}); # only once!
|
|
push(@{$menu{$parent}}, $page);
|
|
$next{$old} = $page unless ($parent eq $old);
|
|
$prev{$page} = $old;
|
|
$old = $page;
|
|
}
|
|
}
|
|
}
|
|
|
|
sub findremainingchildren {
|
|
foreach my $page (@pages) {
|
|
&makechildren($page) if $type{$page} and not $down{$page};
|
|
}
|
|
}
|
|
|
|
sub findparents {
|
|
my ($force) = @_;
|
|
foreach my $page (@pages) {
|
|
if (not $type{$page} and $refs{$page}) {
|
|
my @parents = @{$refs{$page}}; # any page pointed to might be a parent
|
|
if (not $force) {
|
|
@parents = grep(/^Category/,@parents); # if possible, use a category as parent
|
|
}
|
|
@parents = grep(!/^$page$/,@parents); # exclude self
|
|
@parents = grep($type{$_},@parents); # only accept non-orphaned parents
|
|
if (@parents) {
|
|
my $parent = $parents[0];
|
|
my $old = $down{$parent}; # find last child
|
|
if ($old) {
|
|
$old = $next{$old} while ($next{$old});
|
|
} else {
|
|
$old = $parent;
|
|
}
|
|
print "$page adopted under $parent<br>\n";
|
|
$type{$page} = 'unnumbered';
|
|
$up{$page} = $parent;
|
|
$down{$parent} = $page unless ($down{$parent}); # only once!
|
|
push(@{$menu{$parent}}, $page);
|
|
$prev{$page} = $old;
|
|
$next{$old} = $page unless ($next{$old}); # only once!
|
|
&makechildren($page);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
sub printfooter {
|
|
print F &readfile($licensefile);
|
|
print F <<EOT;
|
|
\@node Index, , , Top
|
|
\@unnumbered Index
|
|
\@printindex cp
|
|
\@bye
|
|
EOT
|
|
}
|
|
|
|
sub printorphans {
|
|
print F "\nThe following pages where not included:\n";
|
|
foreach my $page (@orphans) {
|
|
print F "$page\n";
|
|
}
|
|
}
|
|
|
|
sub printmastermenu {
|
|
print F '@menu' . "\n";
|
|
print F "* " . $HomePage . "::\n";
|
|
print F '@detailmenu' . "\n";
|
|
foreach my $page (@pages) {
|
|
next if ($page eq $HomePage);
|
|
print F "* " . $page . "::\n";
|
|
}
|
|
print F "* GNU Free Documentation License::\n";
|
|
print F "* Index::\n";
|
|
print F '@end detailmenu' . "\n";
|
|
print F '@end menu' . "\n";
|
|
}
|
|
|
|
# reading the page database
|
|
|
|
sub readpages {
|
|
my ($parent) = @_;
|
|
print "Starting with $parent...<br>\n";
|
|
my @dirs = readdirectory($parent);
|
|
foreach my $dir (@dirs) {
|
|
next if $dir =~ /^\./ or ! -d "$parent/$dir";
|
|
print "Reading $dir directory...<br>\n";
|
|
my @files = readdirectory("$parent/$dir");
|
|
foreach my $file (@files) {
|
|
next if $dir =~ /^\./ or ! -f "$parent/$dir/$file";
|
|
# print "Reading $file...<br>\n";
|
|
$file =~ /(.*)\.db$/;
|
|
my $page = $1;
|
|
push @pages, $page;
|
|
$file{$page} = "$parent/$dir/$file";
|
|
# print "Page $page stored in $file{$page}<br>\n"
|
|
}
|
|
}
|
|
}
|
|
|
|
sub readtexts {
|
|
local $| = 1;
|
|
print "Reading pages";
|
|
foreach $page (@pages) {
|
|
$current = $page;
|
|
my %p = &splitpage(&readfile($file{$page}));
|
|
my %s = &splitsection(%p);
|
|
my %t = &splittext(%s);
|
|
my $o = $t{text};
|
|
my $w = &wiki2texi($o);
|
|
$text{$page} = $w;
|
|
print ".";
|
|
}
|
|
print "done.<br>\n";
|
|
}
|
|
|
|
sub readdirectory {
|
|
my ($dirname) = @_;
|
|
opendir(DIR, $dirname) or die "can't opendir $dirname: $!";
|
|
@files = readdir(DIR);
|
|
closedir DIR;
|
|
return @files;
|
|
}
|
|
|
|
sub readfile {
|
|
my ($filename) = @_;
|
|
return '' unless $filename;
|
|
my $data;
|
|
local $/ = undef; # Read complete files
|
|
open(IN, "<$filename") or die "can't read $filename: $!";
|
|
$data=<IN>;
|
|
close IN;
|
|
return $data;
|
|
}
|
|
|
|
sub splitpage {
|
|
my ($data) = @_;
|
|
my (%page);
|
|
%page = split(/$FS1/, $data, -1); # -1 keeps trailing null fields
|
|
return %page;
|
|
}
|
|
|
|
sub splitsection {
|
|
my (%page) = @_;
|
|
my (%section);
|
|
%section = split(/$FS2/, $page{text_default}, -1);
|
|
return %section;
|
|
}
|
|
|
|
sub splittext {
|
|
my (%section) = @_;
|
|
my (%text);
|
|
%text = split(/$FS3/, $section{data}, -1);
|
|
return %text;
|
|
}
|
|
|
|
# Code moved over from wiki.pl
|
|
|
|
sub InitLinkPatterns {
|
|
my ($UpperLetter, $LowerLetter, $AnyLetter, $LpA, $LpB, $QDelim);
|
|
# Allow uses to call this from their config file, so do not run twice.
|
|
return if $FS;
|
|
# Field separators are used in the URL-style patterns below.
|
|
if (!$FS) {
|
|
$FS = "\x1e"; # The FS character is the RECORD SEPARATOR control char in ASCII
|
|
$FS0 = "\xb3"; # The old FS character is a superscript "3" in Latin-1
|
|
}
|
|
$FS1 = $FS . '1'; # The FS values are used to separate fields
|
|
$FS2 = $FS . '2'; # in stored hashtables and other data structures.
|
|
$FS3 = $FS . '3'; # The FS character is not allowed in user data.
|
|
$UpperLetter = '[A-Z';
|
|
$LowerLetter = '[a-z';
|
|
$AnyLetter = '[A-Za-z';
|
|
if ($NonEnglish) {
|
|
$UpperLetter .= "\xc0-\xde";
|
|
$LowerLetter .= "\xdf-\xff";
|
|
$AnyLetter .= "\x80-\xff";
|
|
}
|
|
if (!$SimpleLinks) {
|
|
$AnyLetter .= '_0-9';
|
|
}
|
|
$UpperLetter .= ']'; $LowerLetter .= ']'; $AnyLetter .= ']';
|
|
# Main link pattern: lowercase between uppercase, then anything
|
|
$LpA = $UpperLetter . '+' . $LowerLetter . '+' . $UpperLetter
|
|
. $AnyLetter . '*';
|
|
# Optional subpage link pattern: uppercase, lowercase, then anything
|
|
$LpB = $UpperLetter . '+' . $LowerLetter . '+' . $AnyLetter . '*';
|
|
if ($UseSubpage) {
|
|
# Loose pattern: If subpage is used, subpage may be simple name
|
|
$LinkPattern = "((?:(?:$LpA)?\\/$LpB)|$LpA)";
|
|
# Strict pattern: both sides must be the main LinkPattern
|
|
# $LinkPattern = "((?:(?:$LpA)?\\/)?$LpA)";
|
|
} else {
|
|
$LinkPattern = "($LpA)";
|
|
}
|
|
$QDelim = '(?:"")?'; # Optional quote delimiter (not in output)
|
|
$LinkPattern .= $QDelim;
|
|
# Inter-site convention: sites must start with uppercase letter
|
|
# (Uppercase letter avoids confusion with URLs)
|
|
$InterSitePattern = $UpperLetter . $AnyLetter . '+';
|
|
$InterLinkPattern = "($InterSitePattern:[-a-zA-Z0-9\x80-\xff_=!?#$@~`%&*+\\/:;.,]+[-a-zA-Z0-9\x80-\xff_=#$@~`%&*+\\/])$QDelim";
|
|
if ($FreeLinks) {
|
|
# Note: the - character must be first in $AnyLetter definition
|
|
if ($NonEnglish) {
|
|
$AnyLetter = "[-,.()' _0-9A-Za-z\x80-\xff]";
|
|
} else {
|
|
$AnyLetter = "[-,.()' _0-9A-Za-z]";
|
|
}
|
|
}
|
|
$FreeLinkPattern = "($AnyLetter+)";
|
|
if ($UseSubpage) {
|
|
$FreeLinkPattern = "((?:(?:$AnyLetter+)?\\/)?$AnyLetter+)";
|
|
}
|
|
$FreeLinkPattern .= $QDelim;
|
|
# Url-style links are delimited by one of:
|
|
# 1. Whitespace (kept in output)
|
|
# 2. Left or right angle-bracket (< or >) (kept in output)
|
|
# 3. Right square-bracket (]) (kept in output)
|
|
# 4. A single double-quote (") (kept in output)
|
|
# 5. A $FS (field separator) character (kept in output)
|
|
# 6. A double double-quote ("") (removed from output)
|
|
$UrlProtocols = 'http|https|ftp|afs|news|nntp|mid|cid|mailto|wais|'
|
|
. 'prospero|telnet|gopher';
|
|
$UrlProtocols .= '|file' if $NetworkFile;
|
|
$UrlPattern = "((?:$UrlProtocols):(?://[-a-zA-Z0-9_.]+:[0-9]*)?[-a-zA-Z0-9_=!?#$\@~`%&*+\\/:;.,]+[-a-zA-Z0-9_=#$\@~`%&*+\\/])$QDelim";
|
|
$ImageExtensions = '(gif|jpg|png|bmp|jpeg)';
|
|
}
|
|
|
|
# ==== Common wiki markup ====
|
|
sub wiki2texi {
|
|
my ($pageText) = @_;
|
|
%SaveUrl = ();
|
|
%SaveNumUrl = ();
|
|
$SaveUrlIndex = 0;
|
|
$SaveNumUrlIndex = 0;
|
|
return '#REDIRECT' if $pageText =~ /^#REDIRECT/;
|
|
return 'DeletedPage' if $pageText =~ /^DeletedPage/;
|
|
$pageText =~ s/$FS//g; # Remove separators (paranoia)
|
|
$pageText = &QuoteTexi($pageText);
|
|
$pageText = &ApplyTexiRules($pageText, 1);
|
|
return $pageText;
|
|
}
|
|
|
|
sub QuoteTexi {
|
|
my ($text) = @_;
|
|
$text =~ s/\r//g; # remove ^M
|
|
$text =~ s/\@/\@\@/g; # quote @
|
|
$text =~ s/\{/\@\{/g; # quote {
|
|
$text =~ s/\}/\@\}/g; # quote }
|
|
$text =~ s/\\input/\@\}/g; # quote }
|
|
return $text;
|
|
}
|
|
|
|
sub ApplyTexiRules {
|
|
# locallinks: apply rules that create links depending on local config (incl. interlink!)
|
|
my ($text, $locallinks) = @_;
|
|
$text =~ s/\r\n/\n/g; # DOS to Unix
|
|
my $state = ''; # quote, list, or normal ('')
|
|
my $fragment; # the current HTML fragment to be printed
|
|
my $result;
|
|
my $htmlre = join('|',(@HtmlTags));
|
|
my ($oldmatch, $rest);
|
|
local $_ = $text;
|
|
while(1) {
|
|
# first block -- at the beginning of a line. Note that block level elements eat empty lines to prevent empty p elements.
|
|
undef($fragment);
|
|
if (m/\G(?<=\n)/cg or m/\G^/cg) { # at the beginning of a line
|
|
if (m/\G<pre>\n?(.*?\n)<\/pre>[ \t]*\n?/cgs) { # pre must be on column 1
|
|
$fragment = &CloseHtmlEnvironments() . "\@example\n" . $1 . "\@end example\n";
|
|
} elsif (m/\G(\s*\n)*(\*+)[ \t]*/cg) {
|
|
$fragment = &OpenHtmlEnvironment('itemize',length($2)) . "\n\@item ";
|
|
} elsif (m/\G(\s*\n)*(\#+)[ \t]*/cg) {
|
|
$fragment = &OpenHtmlEnvironment('enumerate',length($2)) . "\n\@item ";
|
|
} elsif (m/\G(\s*\n)*(\:+)[ \t]*/cg) {
|
|
$fragment = &OpenHtmlEnvironment('quotation',length($2));
|
|
} elsif (m/\G(\s*\n)*(\=+)[ \t]*(.*?)[ \t]*(=+)[ \t]*\n?/cg) {
|
|
$fragment = &CloseHtmlEnvironments() . &WikiHeading($2, $3);
|
|
} elsif (m/\G(\s*\n)*----+[ \t]*\n?/cg) {
|
|
$fragment = &CloseHtmlEnvironments() . $q->hr();
|
|
} elsif (m/\G(\s*\n)*(([ \t]+.*\n?)+)/cg) {
|
|
$fragment = &OpenHtmlEnvironment('example',1) . $2; # always level 1
|
|
} elsif (m/\G(\s*\n)*(\;+)[ \t]*(?=.*\:)/cg) {
|
|
$fragment = &OpenHtmlEnvironment('table',length($2), '@asis')
|
|
. "\@item "; # the `:' needs special treatment, later
|
|
} elsif (m/\G(\s*\n)+/cg) {
|
|
$fragment = &CloseHtmlEnvironments() . "\n\n"; # there is another one like this further down
|
|
} elsif (m/\G(\<include +"(.*)"\>[ \t]*\n?)/cgi) { # <include "uri..."> includes the text of the given URI verbatim
|
|
$oldmatch = $1;
|
|
my $oldpos = pos;
|
|
&ApplyTexiRules(&QuoteHtml(&GetRaw($2)),0);
|
|
pos = $oldpos;
|
|
} elsif (m/\G(\<rss +"(.*)"\>[ \t]*\n?)/cgi) { # <rss "uri..."> stores the parsed RSS of the given URI
|
|
$result .= "[RSS feed from \@url{$2}]";
|
|
}
|
|
if (defined $fragment) {
|
|
$result .= $fragment;
|
|
next; # skipt the remaining tests
|
|
}
|
|
}
|
|
# second block -- remaining hilighting
|
|
if ($HtmlStack[0] eq 'table' && m/\G:/cg) {
|
|
$fragment = "\n";
|
|
} elsif (m/\G\<nowiki\>(.*?)\<\/nowiki\>/cgis) {
|
|
$fragment = $1;
|
|
} elsif (m/\G\<code\>(.*?)\<\/code\>/cgis) {
|
|
$fragment = $q->code($1);
|
|
} elsif ($RawHtml && m/\G\<html\>(.*?)\<\/html\>/cgis) {
|
|
$fragment = &UnquoteHtml($1);
|
|
} elsif (m/\G'''/cg) { # traditional wiki syntax with '''strong'''
|
|
if ($HtmlStack[0] eq 'strong') {
|
|
$fragment = &CloseHtmlEnvironment();
|
|
} else {
|
|
$fragment = &AddHtmlEnvironment('strong');
|
|
}
|
|
} elsif (m/\G''/cg) { # traditional wiki syntax with ''emph''
|
|
if ($HtmlStack[0] eq 'em') {
|
|
$fragment = &CloseHtmlEnvironment();
|
|
} else {
|
|
$fragment = &AddHtmlEnvironment('em');
|
|
}
|
|
} elsif (m/\G\<($htmlre)\>/cgi) { # opening
|
|
$fragment = &AddHtmlEnvironment($1);
|
|
} elsif (m/\G\<\/($htmlre)\>/cgi) { # closing tags
|
|
$fragment = &CloseHtmlEnvironment($1);
|
|
} elsif ($HtmlLinks && m/\G\<a(\s[^<>]+?)\>(.*?)\<\/a\>/cgi) { # <a ...>text</a>
|
|
$fragment = "<a$1>$2</a>";
|
|
} elsif ($BracketText && $locallinks && m/\G(\[$InterLinkPattern\s+([^\]]+?)\])/cg) { # [InterLink text]
|
|
# Interlinks can change when the intermap file changes (local config, therefore depend on $locallinks).
|
|
# The intermap file is only read if necessary, so if this not an interlink after all,
|
|
# we have to backtrack a bit.
|
|
$oldmatch = $1;
|
|
$fragment = &GetInterLink($2, $3, 1);
|
|
# we may have to backtrack a bit.
|
|
if ($oldmatch eq $fragment) {
|
|
($fragment, $rest) = split(/:/, $oldmatch, 2);
|
|
pos = (pos) - length($rest) - 1;
|
|
} else {
|
|
$result .= $fragment;
|
|
}
|
|
} elsif ($locallinks && m/\G(\[$InterLinkPattern\])/cog) { # [InterWiki:FooBar] makes footnotes [1]
|
|
$oldmatch = $1;
|
|
$fragment = &GetInterLink($2, '', 1);
|
|
if ($oldmatch eq $fragment) {
|
|
($fragment, $rest) = split(/:/, $oldmatch, 2);
|
|
pos = (pos) - length($rest) - 1;
|
|
} else {
|
|
$result .= $fragment;
|
|
}
|
|
} elsif ($locallinks && m/\G$InterLinkPattern/cog) { # InterWiki:FooBar
|
|
$oldmatch = $1;
|
|
$fragment = &GetInterLink($oldmatch, '', 0);
|
|
# we have to backtrack a bit.
|
|
if ($oldmatch eq $fragment) {
|
|
($fragment, $rest) = split(/:/, $oldmatch, 2);
|
|
pos = (pos) - length($rest) - 1;
|
|
} else {
|
|
$result .= $fragment;
|
|
}
|
|
} elsif ($BracketText && m/\G\[$UrlPattern\s+([^\]]+?)\]/cg) { # [URL text] makes [text] link to URL
|
|
$fragment = &GetUrl($1, $2, 1, 0);
|
|
} elsif (m/\G\[$UrlPattern\]/cog) { # [URL] makes footnotes [1]
|
|
$fragment = &GetUrl($1, '', 1, 0);
|
|
} elsif (m/\G$UrlPattern/cg) { # plain URLs after all $UrlPattern, such that [$UrlPattern text] has priority
|
|
$fragment = &GetUrl($1, '', 0, 1);
|
|
} elsif ($WikiLinks && $BracketWiki && $locallinks && m/\G(\[$LinkPattern\s+([^\]]+?)\])/cg) { # [LocalPage text]
|
|
$result .= &GetPageOrEditLink($2, $3, 1);
|
|
} elsif ($WikiLinks && $locallinks && m/\G(\[$LinkPattern\])/cg) { # [LocalPage]
|
|
$result .= &GetPageOrEditLink($2, '', 1);
|
|
} elsif ($WikiLinks && $locallinks && m/\G$LinkPattern/cg) { # LocalPage
|
|
# LinkPattern after all $UrlPattern, such that http//:...?FooBar
|
|
# will not get an additional ? if FooBar is undefined.
|
|
$result .= &GetPageOrEditLink($1, '');
|
|
} elsif ($FreeLinks && $BracketWiki && $locallinks && m/\G(\[\[$FreeLinkPattern\|([^\]]+)\]\])/cg) { # [[Free Link|text]]
|
|
$result .= &GetPageOrEditLink($2, $3, 0 , 1);
|
|
} elsif ($FreeLinks && $locallinks && m/\G(\[\[$FreeLinkPattern\]\])/cg) { # [[Free Link]]
|
|
$result .= &GetPageOrEditLink($2, '', 0, 1);
|
|
} elsif (m/\G\s*\n(s*\n)+/cg) { # paragraphs -- whitespace including at least two newlines
|
|
$fragment = &CloseHtmlEnvironments() . "\n\n"; # there is another one like this further up
|
|
} elsif (m/\G\s+/cgs) { # whitespace -- including (max one) newlines due to previous rules
|
|
$fragment = ' ';
|
|
} elsif (m/\G(\w+)/cgi) { # word -- cannot use \S here because that eats following markup, too: word<b> for example.
|
|
$fragment = $1;
|
|
} elsif (m/\G(\S)/cg) { # punctuation and other stuff, if not matched by previous markup rule. Gotta move slowly, eg. word.</b>
|
|
$fragment = $1;
|
|
} else {
|
|
last;
|
|
}
|
|
if (defined $fragment) {
|
|
$result .= $fragment;
|
|
}
|
|
}
|
|
# last block -- close it, cache it
|
|
$fragment = &CloseHtmlEnvironments();
|
|
if (defined $fragment) {
|
|
$result .= $fragment;
|
|
}
|
|
return $result;
|
|
}
|
|
|
|
sub UnquoteHtml {
|
|
my ($html) = @_;
|
|
$html =~ s/</</g;
|
|
$html =~ s/>/>/g;
|
|
$html =~ s/&/&/g;
|
|
return $html;
|
|
}
|
|
|
|
sub CloseEnv {
|
|
my $tag = shift;
|
|
if (exists $HtmlTagEquivalent{$tag}) {
|
|
return "}";
|
|
} else {
|
|
return "\n\@end " . $tag . "\n";
|
|
}
|
|
}
|
|
|
|
sub OpenEnv {
|
|
my ($tag, $attr) = @_;
|
|
if ($attr) {
|
|
return "\n\@$tag $attr\n";
|
|
} elsif (exists $HtmlTagEquivalent{$tag}) {
|
|
return '@' . $HtmlTagEquivalent{$tag} . '{';
|
|
} else {
|
|
return "\n\@$tag\n";
|
|
}
|
|
}
|
|
|
|
sub CloseHtmlEnvironment { # just close the current one
|
|
my $code = shift;
|
|
my $result = shift(@HtmlStack) if not defined($code) or $HtmlStack[0] eq $code;
|
|
return &CloseEnv($result) if $result;
|
|
return "</$code>"; # to recognize the bug
|
|
}
|
|
|
|
sub AddHtmlEnvironment { # add a new one so that it will be closed!
|
|
my ($code) = @_;
|
|
if ($HtmlStack[0] ne $code) {
|
|
unshift(@HtmlStack, $code);
|
|
return &OpenEnv($code);
|
|
}
|
|
return '';
|
|
}
|
|
|
|
sub CloseHtmlEnvironments { # close all
|
|
my $text = ''; # always return something
|
|
my $tag;
|
|
while (@HtmlStack > 0) {
|
|
$text .= &CloseEnv(shift(@HtmlStack));
|
|
}
|
|
return $text;
|
|
}
|
|
|
|
sub OpenHtmlEnvironment { # close the previous one and open a new one instead
|
|
my ($code, $depth, $class) = @_;
|
|
my ($oldCode, $tag);
|
|
my $text = ''; # always return something
|
|
$depth = @HtmlStack unless defined($depth);
|
|
while (@HtmlStack > $depth) { # Close tags as needed
|
|
$text .= &CloseEnv(shift(@HtmlStack));
|
|
}
|
|
if ($depth > 0) {
|
|
$depth = $IndentLimit if ($depth > $IndentLimit);
|
|
if (@HtmlStack) { # Non-empty stack
|
|
$oldCode = shift(@HtmlStack);
|
|
if ($oldCode ne $code) {
|
|
$text .= &CloseEnv($oldCode);
|
|
$text .= &OpenEnv($code, $class);
|
|
}
|
|
unshift(@HtmlStack, $code);
|
|
}
|
|
while (@HtmlStack < $depth) {
|
|
unshift(@HtmlStack, $code);
|
|
$text .= &OpenEnv($code, $class);
|
|
}
|
|
}
|
|
return $text;
|
|
}
|
|
|
|
sub GetInterLink {
|
|
my ($id, $text, $bracket) = @_;
|
|
my ($site, $page) = split(/:/, $id, 2);
|
|
$page =~ s/&/&/g; # Unquote common URL HTML
|
|
my $url;
|
|
$url = &GetSiteUrl($site) if $page;
|
|
if ($text && $bracket && !$url) {
|
|
return "[$id $text]";
|
|
} elsif ($bracket && !$url) {
|
|
return "[$id]";
|
|
} elsif (!$url) {
|
|
return $id;
|
|
} elsif ($bracket && !$text) {
|
|
$text = ++$FootnoteNumber;
|
|
} elsif (!$text) {
|
|
$text = $id;
|
|
}
|
|
$url .= $page;
|
|
return "\@uref{$url,$text}";
|
|
}
|
|
|
|
sub GetSiteUrl {
|
|
my ($site) = @_;
|
|
my ($data, $url, $status);
|
|
if (!$InterSiteInit) {
|
|
$InterSiteInit = 1;
|
|
($status, $data) = &ReadFile($InterFile);
|
|
return '' if (!$status);
|
|
%InterSite = split(/\s+/, $data); # Later consider defensive code
|
|
}
|
|
$url = $InterSite{$site} if (defined($InterSite{$site}));
|
|
return $url;
|
|
}
|
|
|
|
sub GetUrl {
|
|
my ($url, $text, $bracket, $images) = @_;
|
|
if ($NetworkFile && $url =~ m|^file:///|
|
|
or !$NetworkFile && $url =~ m|^file:|) {
|
|
# Only do remote file:// links. No file:///c|/windows.
|
|
return $url;
|
|
} elsif ($bracket && !$text) {
|
|
return "\@footnote{$url}";
|
|
}
|
|
$url = &UnquoteHtml($url); # links should be unquoted again
|
|
if ($text) {
|
|
return "\@uref{$url, $text}";
|
|
} else {
|
|
return "\@uref{$url}";
|
|
}
|
|
}
|
|
|
|
sub GetPageOrEditLink { # use GetPageLink and GetEditLink if you know the result!
|
|
my ($id, $text, $bracket, $free) = @_;
|
|
local $| = 1;
|
|
$id =~ s/^\s+//; # Trim extra spaces
|
|
$id =~ s/\s+$//;
|
|
$id =~ s|\s*/\s*|/|; # ...also before/after subpages
|
|
$id =~ s|^/|$MainPage/|;
|
|
$id = &FreeToNormal($id) if $free;
|
|
my $exists = grep(/^$id$/, @pages);
|
|
if (!$text && $exists && $bracket) {
|
|
$text = ++$FootnoteNumber;
|
|
}
|
|
if ($exists) {
|
|
$text = $id unless $text;
|
|
$text =~ s/_/ /g if $free;
|
|
$text = "[$text]" if $bracket;
|
|
return &ScriptLink($id, $text);
|
|
} else {
|
|
# $free and $bracket usually exclude each other
|
|
# $text and not $bracket exclude each other
|
|
if ($bracket && $text) {
|
|
return "[$id $text]";
|
|
} elsif ($bracket) {
|
|
return "[$id]";
|
|
} elsif ($free && $text) {
|
|
$id =~ s/_/ /g;
|
|
$text =~ s/_/ /g;
|
|
return "[$id $text]";
|
|
} elsif ($free) {
|
|
$text = $id;
|
|
$text = "[$text]" if $text =~ /_/;
|
|
$text =~ s/_/ /g;
|
|
return $text;
|
|
} else { # plain, no text
|
|
return $id;
|
|
}
|
|
}
|
|
}
|
|
|
|
sub GetPageLink { # shortcut
|
|
my ($id, $name) = @_;
|
|
$name = $id unless $name;
|
|
$id =~ s|^/|$MainPage/|;
|
|
if ($FreeLinks) {
|
|
$id = &FreeToNormal($id);
|
|
$name =~ s/_/ /g;
|
|
}
|
|
return &ScriptLink($id, $name);
|
|
}
|
|
|
|
sub GetEditLink { # shortcut
|
|
my ($id, $name) = @_;
|
|
$id =~ s|^/|$MainPage/|;
|
|
if ($FreeLinks) {
|
|
$id = &FreeToNormal($id);
|
|
$name =~ s/_/ /g;
|
|
}
|
|
return &ScriptLink("action=edit&id=$id", $name);
|
|
}
|
|
|
|
sub ScriptLink {
|
|
my ($action, $text) = @_;
|
|
push @{$refs{$current}}, $text; # remember all forward links
|
|
return $text;
|
|
}
|
|
|
|
sub WikiHeading {
|
|
my ($pre, $depth, $text) = @_;
|
|
return $pre . "\@heading $text\n";
|
|
}
|
|
|
|
sub T {
|
|
my ($text) = @_;
|
|
if (1) { # Later make translation optional?
|
|
if (defined($Translate{$text}) && ($Translate{$text} ne '')) {
|
|
return $Translate{$text};
|
|
}
|
|
}
|
|
return $text;
|
|
}
|
|
|
|
sub Ts {
|
|
my ($text, $string) = @_;
|
|
$text = T($text);
|
|
$text =~ s/\%s/$string/;
|
|
return $text;
|
|
}
|
|
|
|
sub ReadFile {
|
|
my ($fileName) = @_;
|
|
my ($data);
|
|
local $/ = undef; # Read complete files
|
|
if (open(IN, "<$fileName")) {
|
|
$data=<IN>;
|
|
close IN;
|
|
if ($FS0 and $data =~ /$FS0/ and $data !~ /$FS/) {
|
|
$data =~ s/$FS0/$FS/go;
|
|
$FS0used = 1;
|
|
}
|
|
return (1, $data);
|
|
}
|
|
return (0, '');
|
|
}
|
|
|
|
sub FreeToNormal {
|
|
my ($id) = @_;
|
|
$id =~ s/ /_/g;
|
|
$id = ucfirst($id);
|
|
if (index($id, '_') > -1) { # Quick check for any space/underscores
|
|
$id =~ s/__+/_/g;
|
|
$id =~ s/^_//;
|
|
$id =~ s/_$//;
|
|
if ($UseSubpage) {
|
|
$id =~ s|_/|/|g;
|
|
$id =~ s|/_|/|g;
|
|
}
|
|
}
|
|
if ($FreeUpper) {
|
|
# Note that letters after ' are *not* capitalized
|
|
if ($id =~ m|[-_.,\(\)/][a-z]|) { # Quick check for non-canonical case
|
|
$id =~ s|([-_.,\(\)/])([a-z])|$1 . uc($2)|ge;
|
|
}
|
|
}
|
|
return $id;
|
|
}
|