Files
oddmuse/texi.pl
2003-04-24 21:52:03 +00:00

1047 lines
30 KiB
Perl

#!/usr/bin/perl
# Texi $Id: texi.pl,v 1.3 2003/04/24 21:52:03 as Exp $
# Copyright (C) 2002, 2003 Alex Schroeder <alex@gnu.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.
# 59 Temple Place, Suite 330
# Boston, MA 02111-1307 USA
use CGI qw/:standard/;
use CGI::Carp 'fatalsToBrowser';
$DataDir = '/home/alex/WWW/emacswiki/test' unless $DataDir;
$PageDir = "$DataDir/page"; # Stores page data
$InterFile = "$DataDir/intermap"; # Interwiki site->url map
$ConfigFile = "$DataDir/config" unless $ConfigFile; # Config file with Perl code to execute
# Basics
$HomePage = 'HomePage'; # Home page (change space to _)
$HttpCharset = 'ISO-8859-1'; # Charset for pages, eg. 'UTF-8'
# LinkPattern
$WikiLinks = 1; # 1 = LinkPattern is a link
$FreeLinks = 1; # 1 = [[some text]] is a link
$FreeUpper = 1; # 1 = forces free links to start with upper case
$SimpleLinks = 0; # 1 = only letters in links, 0 = allow _ and numbers
$NonEnglish = 0; # 1 = non-ASCII link characters allowed
$BracketText = 1; # 1 = [URL desc] uses a description for the URL
$BracketWiki = 0; # 1 = [WikiLink desc] uses a desc for the local link
$HtmlLinks = 0; # 1 = <a href="foo">desc</a> is a link
$UseSubpage = 0; # 1 = PageName/SubPage is a link
$NetworkFile = 1; # 1 = file: is a valid protocol for URLs
# TextFormattingRules
$HtmlTags = 0; # 1 = allow some 'unsafe' HTML tags
$RawHtml = 0; # 1 = allow <HTML> environment for raw HTML inclusion
# RecentChanges and KeptPages
$DeletedPage = "DeletedPage"; # Pages starting with this can be deleted
$RCName = 'RecentChanges'; # Name of changes page (change space to _)
if (not @HtmlTags) { # don't set if set in the config file
if ($HtmlTags) {
# HTML tag lists, enabled if $HtmlTags is set.
# Scripting is currently possible with these tags,
# so they are *not* particularly 'safe'.
# Tags that must be in <tag> ... </tag> pairs:
@HtmlTags = qw(b i u font big small sub sup h1 h2 h3 h4 h5 h6 cite code
em s strike strong tt var div center blockquote ol ul dl
table caption br p hr li dt dd tr td th);
} else {
@HtmlTags = qw(b i u em strong tt);
}
}
%HtmlTagEquivalent = ('b' => 'b',
'i' => 'i',
'u' => 'i',
'em' => 'emph',
'strong' => 'strong',
'tt' => 'code',
);
# == You should not have to change anything below this line. ==
$IndentLimit = 20; # Maximum depth of nested lists
# == Config file ==
if (-f $ConfigFile) {
do $ConfigFile;
}
# == Code ==
die "No HomePage set.\n" unless $HomePage;
$outdir = $DataDir unless $outdir;
$outname = 'wiki' unless $outname;
$outfile = $outdir . '/' . $outname;
$licensefile = '' unless $licensefile;
$intro = qq{\\input texinfo \@c -*-texinfo-*-
\@c %**start of header
\@setfilename $outname.info
\@settitle $HomePage
\@ifnottex
\@node Top, $HomePage, , (dir)
\@top $HomePage
\@end ifnottex
};
$q = new CGI;
$q->charset($HttpCharset) if $HttpCharset;
@ScriptPath = split('/', $q->script_name());
$ScriptName = pop(@ScriptPath);
$IndexInit = 0;
$InterSiteInit = 0;
%InterSite = ();
$OpenPageName = '';
@HtmlStack = ();
# Internal Variables
my (@pages, @orphans, @control, $current);
my (%file, %up, %down, %next, %prev, %type, %text, %refs, %menu);
charset($HttpCharset);
print header;
print start_html('Texinfo Creation');
print h1("Texinfo Creation");
print "<p>";
print "Starting<br>\n";
&process;
print "Gzipping<br>\n";
system("/bin/rm", "$outfile.texi.gz");
system("/bin/gzip", "$outfile.texi");
print "Done<br>\n";
print p("Download ", a({-href=>"/$outname.texi.gz"}, "$outname.texi.gz"));
print end_html;
# MAIN CODE
sub process {
open(F,">$outfile.texi") or die "Can't create output file: $!";
&readpages($PageDir);
&InitLinkPatterns;
&readtexts;
print scalar(@pages) . " pages read<br>\n";
&removedeletedpages;
&removeredirects;
print scalar(@pages) . " pages undeleted and not redirected<br>\n";
&makechapters;
&countorphans;
&findremainingchildren;
&countorphans;
&findparents(0);
&countorphans;
&findparents(1);
&countorphans;
&removeorphans();
&countwalkthrough;
# &fixmenus;
print F $intro;
&printmastermenu();
&printorphans;
&printpages;
&printfooter;
close(F);
}
# testing
sub testpages {
foreach my $p (@pages) {
$s = length($text{$p});
print "$p has $s bytes<br>\n";
}
}
sub countorphans {
my $count = 0;
foreach my $page (@pages) {
next if $type{$page};
$count += 1;
}
print "$count/" . scalar(@pages) . " orphaned<br>\n";
}
sub countwalkthrough {
print scalar(@pages) . " remaining<br>\n";
my $count = &countwalk($HomePage);
print "$count pages on the walkthrough<br>\n";
foreach my $page (@pages) {
if (!grep(/^$page$/, @control)) {
print "$page is not part of the walkthroguh<br>\n";
&printnode($page);
&printnode($up{$page});
} elsif (!$type{$page}) {
print "$page on the walk-through has no type<br>\n";
}
}
foreach my $page (@control) {
if (!grep(/^$page$/, @pages)) {
print "$page has suddenly appeared in the page list<br>\n";
}
}
}
sub countwalk {
my $count = 0;
my ($page) = @_;
while ($page) {
push(@control, $page);
$count += 1;
if ($down{$page}) {
$count += &countwalk($down{$page});
}
last if $down{$page} and $next{$page} and $down{$page} eq $next{$page};
$page = $next{$page};
}
return $count;
}
# writing texinfo
sub removeorphans {
my @result;
foreach my $page (@pages) {
if ($type{$page}) {
push(@result, $page);
} else {
push(@orphans, $page);
}
}
@pages = @result;
}
sub removedeletedpages {
my @result;
foreach my $page (@pages) {
push(@result, $page) unless $text{$page} =~ /^DeletedPage/;
}
@pages = @result;
}
sub removeredirects {
my @result;
foreach my $page (@pages) {
push(@result, $page) unless $text{$page} =~ /^#REDIRECT/;
}
@pages = @result;
}
sub printpages {
&printwalk($HomePage);
}
sub printwalk {
my ($page) = @_;
while ($page) {
&printtitle($page);
print F $text{$page};
&printmenu($page);
&printwalk($down{$page}) if ($down{$page});
last if $down{$page} and $next{$page} and $down{$page} eq $next{$page};
$page = $next{$page};
}
}
sub printmenu {
my ($page) = @_;
return unless $menu{$page};
my @menu = @{$menu{$page}};
return unless scalar(@menu) > 0;
# print "Menu: " . join(" ",@{$menu{$page}}) . "<br>\n";
print F "\n\n\@menu\n";
print F "Related:\n";
foreach my $entry (@menu) {
print F "* " . $entry . "::\n";
}
print F "\@end menu\n";
}
sub fixmenus {
foreach my $page (@pages) {
# print "\n$page<br>\n";
next unless $menu{$page};
# entries are the pages that have an up pointer to this page
my @entries = @{$menu{$page}};
my @used = ();
# print "Menu: " . join(" ",@entries) . "<br>\n";
my @lines = split(/\n/, $text{$page});
# Identify the menus
my ($start, $ismenu, $idx) = (0, 0, 0);
while ($idx <= $#lines) {
my $line = $lines[$idx];
# determine start of a menu, check wether any of the entries
# could be part of a menu, and when the end of a menu is
# reached, go back and change it to a menu if such an entry was
# found.
if ($line =~ /^\@itemize/) {
$start = $idx;
$ismenu = 0;
} elsif ($start and not $ismenu) {
if ($line =~ /^\@item *($LinkPattern)/
and grep(/^$1$/,@entries)) {
$ismenu = 1;
}
} elsif ($line =~ /^\@end itemize/) {
if ($ismenu) {
$lines[$start] = '@menu';
$lines[$idx] = '@end menu';
# go through all entries between start and current position
my $i = $start;
while ($i < $idx) {
my $l = $lines[$i];
if ($l =~ /^\@item *($LinkPattern)/
and grep(/^$1$/,@entries)) {
$l =~ s/^\@item *($LinkPattern) *-* */* $1:: /;
push(@used,$1);
$lines[$i] = $l;
} elsif ($l =~ s/^\@item *//) {
$lines[$i] = $l;
}
$i++;
}
}
$start = 0;
$ismenu = 0;
}
$idx++;
}
# print "Used: " . join(" ",@used) . "<br>\n";
my @newmenu = ();
foreach my $entry (@entries) {
unless (grep(/^$entry$/,@used)) {
push(@newmenu, $entry);
}
}
# print "New: " . join(" ",@newmenu) . "<br>\n";
@{$menu{$page}} = @newmenu;
$text{$page} = join("\n",@lines);
}
}
sub printnode {
my ($page) = @_;
my $n = $next{$page} || "";
my $p = $prev{$page} || "";
my $u = $up{$page} || "";
# my $d = $down{$page} || "";
# print "$page, $n, $p, $u, $d<br>\n";
print F "\n\n";
print F "\@node $page, $n, $p, $u\n";
print F "\@cindex $page\n";
}
sub printtitle {
my ($page) = @_;
print F "\n";
&printnode($page);
print F "\@comment node-name, next, previous, up\n";
print F "\@$type{$page} $page\n";
print F "\n";
}
sub makechapters {
my $old = $HomePage;
$type{$HomePage} = 'unnumbered';
$up{$HomePage} = 'Top';
$prev{$HomePage} = 'Top';
print "$HomePage is the main page<br>\n";
foreach my $page (@{$refs{$HomePage}}) {
print "Examining $page...<br>\n";
if ($page =~ /^Category/
and grep(/^$page$/,@pages)) {
print "$page is a chapter<br>\n";
$type{$page} = 'unnumbered';
$up{$page} = $HomePage;
$down{$HomePage} = $page unless ($down{$HomePage}); # only once!
push(@{$menu{$HomePage}}, $page);
$next{$old} = $page;
$prev{$page} = $old;
$old = $page;
# &makechildren($page);
}
}
}
sub makechildren {
my ($parent) = @_;
my $old = $parent;
foreach my $page (@{$refs{$parent}}) {
next if $type{$page} or not grep(/^$page$/,@pages);
# make sure backref exists
if (grep(/^$parent$/,@{$refs{$page}})) {
print "$page added under $parent<br>\n";
# print " it links to: " . join(" ",@{$refs{$page}}) . "<br>\n";
$type{$page} = 'unnumberedsec';
$up{$page} = $parent;
$down{$parent} = $page unless ($down{$parent}); # only once!
push(@{$menu{$parent}}, $page);
$next{$old} = $page unless ($parent eq $old);
$prev{$page} = $old;
$old = $page;
}
}
}
sub findremainingchildren {
foreach my $page (@pages) {
&makechildren($page) if $type{$page} and not $down{$page};
}
}
sub findparents {
my ($force) = @_;
foreach my $page (@pages) {
if (not $type{$page} and $refs{$page}) {
my @parents = @{$refs{$page}}; # any page pointed to might be a parent
if (not $force) {
@parents = grep(/^Category/,@parents); # if possible, use a category as parent
}
@parents = grep(!/^$page$/,@parents); # exclude self
@parents = grep($type{$_},@parents); # only accept non-orphaned parents
if (@parents) {
my $parent = $parents[0];
my $old = $down{$parent}; # find last child
if ($old) {
$old = $next{$old} while ($next{$old});
} else {
$old = $parent;
}
print "$page adopted under $parent<br>\n";
$type{$page} = 'unnumbered';
$up{$page} = $parent;
$down{$parent} = $page unless ($down{$parent}); # only once!
push(@{$menu{$parent}}, $page);
$prev{$page} = $old;
$next{$old} = $page unless ($next{$old}); # only once!
&makechildren($page);
}
}
}
}
sub printfooter {
print F &readfile($licensefile);
print F <<EOT;
\@node Index, , , Top
\@unnumbered Index
\@printindex cp
\@bye
EOT
}
sub printorphans {
print F "\nThe following pages where not included:\n";
foreach my $page (@orphans) {
print F "$page\n";
}
}
sub printmastermenu {
print F '@menu' . "\n";
print F "* " . $HomePage . "::\n";
print F '@detailmenu' . "\n";
foreach my $page (@pages) {
next if ($page eq $HomePage);
print F "* " . $page . "::\n";
}
print F "* GNU Free Documentation License::\n";
print F "* Index::\n";
print F '@end detailmenu' . "\n";
print F '@end menu' . "\n";
}
# reading the page database
sub readpages {
my ($parent) = @_;
print "Starting with $parent...<br>\n";
my @dirs = readdirectory($parent);
foreach my $dir (@dirs) {
next if $dir =~ /^\./ or ! -d "$parent/$dir";
print "Reading $dir directory...<br>\n";
my @files = readdirectory("$parent/$dir");
foreach my $file (@files) {
next if $dir =~ /^\./ or ! -f "$parent/$dir/$file";
# print "Reading $file...<br>\n";
$file =~ /(.*)\.db$/;
my $page = $1;
push @pages, $page;
$file{$page} = "$parent/$dir/$file";
# print "Page $page stored in $file{$page}<br>\n"
}
}
}
sub readtexts {
local $| = 1;
print "Reading pages";
foreach $page (@pages) {
$current = $page;
my %p = &splitpage(&readfile($file{$page}));
my %s = &splitsection(%p);
my %t = &splittext(%s);
my $o = $t{text};
my $w = &wiki2texi($o);
$text{$page} = $w;
print ".";
}
print "done.<br>\n";
}
sub readdirectory {
my ($dirname) = @_;
opendir(DIR, $dirname) or die "can't opendir $dirname: $!";
@files = readdir(DIR);
closedir DIR;
return @files;
}
sub readfile {
my ($filename) = @_;
return '' unless $filename;
my $data;
local $/ = undef; # Read complete files
open(IN, "<$filename") or die "can't read $filename: $!";
$data=<IN>;
close IN;
return $data;
}
sub splitpage {
my ($data) = @_;
my (%page);
%page = split(/$FS1/, $data, -1); # -1 keeps trailing null fields
return %page;
}
sub splitsection {
my (%page) = @_;
my (%section);
%section = split(/$FS2/, $page{text_default}, -1);
return %section;
}
sub splittext {
my (%section) = @_;
my (%text);
%text = split(/$FS3/, $section{data}, -1);
return %text;
}
# Code moved over from wiki.pl
sub InitLinkPatterns {
my ($UpperLetter, $LowerLetter, $AnyLetter, $LpA, $LpB, $QDelim);
# Allow uses to call this from their config file, so do not run twice.
return if $FS;
# Field separators are used in the URL-style patterns below.
if (!$FS) {
$FS = "\x1e"; # The FS character is the RECORD SEPARATOR control char in ASCII
$FS0 = "\xb3"; # The old FS character is a superscript "3" in Latin-1
}
$FS1 = $FS . '1'; # The FS values are used to separate fields
$FS2 = $FS . '2'; # in stored hashtables and other data structures.
$FS3 = $FS . '3'; # The FS character is not allowed in user data.
$UpperLetter = '[A-Z';
$LowerLetter = '[a-z';
$AnyLetter = '[A-Za-z';
if ($NonEnglish) {
$UpperLetter .= "\xc0-\xde";
$LowerLetter .= "\xdf-\xff";
$AnyLetter .= "\x80-\xff";
}
if (!$SimpleLinks) {
$AnyLetter .= '_0-9';
}
$UpperLetter .= ']'; $LowerLetter .= ']'; $AnyLetter .= ']';
# Main link pattern: lowercase between uppercase, then anything
$LpA = $UpperLetter . '+' . $LowerLetter . '+' . $UpperLetter
. $AnyLetter . '*';
# Optional subpage link pattern: uppercase, lowercase, then anything
$LpB = $UpperLetter . '+' . $LowerLetter . '+' . $AnyLetter . '*';
if ($UseSubpage) {
# Loose pattern: If subpage is used, subpage may be simple name
$LinkPattern = "((?:(?:$LpA)?\\/$LpB)|$LpA)";
# Strict pattern: both sides must be the main LinkPattern
# $LinkPattern = "((?:(?:$LpA)?\\/)?$LpA)";
} else {
$LinkPattern = "($LpA)";
}
$QDelim = '(?:"")?'; # Optional quote delimiter (not in output)
$LinkPattern .= $QDelim;
# Inter-site convention: sites must start with uppercase letter
# (Uppercase letter avoids confusion with URLs)
$InterSitePattern = $UpperLetter . $AnyLetter . '+';
$InterLinkPattern = "($InterSitePattern:[-a-zA-Z0-9\x80-\xff_=!?#$@~`%&*+\\/:;.,]+[-a-zA-Z0-9\x80-\xff_=#$@~`%&*+\\/])$QDelim";
if ($FreeLinks) {
# Note: the - character must be first in $AnyLetter definition
if ($NonEnglish) {
$AnyLetter = "[-,.()' _0-9A-Za-z\x80-\xff]";
} else {
$AnyLetter = "[-,.()' _0-9A-Za-z]";
}
}
$FreeLinkPattern = "($AnyLetter+)";
if ($UseSubpage) {
$FreeLinkPattern = "((?:(?:$AnyLetter+)?\\/)?$AnyLetter+)";
}
$FreeLinkPattern .= $QDelim;
# Url-style links are delimited by one of:
# 1. Whitespace (kept in output)
# 2. Left or right angle-bracket (< or >) (kept in output)
# 3. Right square-bracket (]) (kept in output)
# 4. A single double-quote (") (kept in output)
# 5. A $FS (field separator) character (kept in output)
# 6. A double double-quote ("") (removed from output)
$UrlProtocols = 'http|https|ftp|afs|news|nntp|mid|cid|mailto|wais|'
. 'prospero|telnet|gopher';
$UrlProtocols .= '|file' if $NetworkFile;
$UrlPattern = "((?:$UrlProtocols):(?://[-a-zA-Z0-9_.]+:[0-9]*)?[-a-zA-Z0-9_=!?#$\@~`%&*+\\/:;.,]+[-a-zA-Z0-9_=#$\@~`%&*+\\/])$QDelim";
$ImageExtensions = '(gif|jpg|png|bmp|jpeg)';
}
# ==== Common wiki markup ====
sub wiki2texi {
my ($pageText) = @_;
%SaveUrl = ();
%SaveNumUrl = ();
$SaveUrlIndex = 0;
$SaveNumUrlIndex = 0;
return '#REDIRECT' if $pageText =~ /^#REDIRECT/;
return 'DeletedPage' if $pageText =~ /^DeletedPage/;
$pageText =~ s/$FS//g; # Remove separators (paranoia)
$pageText = &QuoteTexi($pageText);
$pageText = &ApplyTexiRules($pageText, 1);
return $pageText;
}
sub QuoteTexi {
my ($text) = @_;
$text =~ s/\r//g; # remove ^M
$text =~ s/\@/\@\@/g; # quote @
$text =~ s/\{/\@\{/g; # quote {
$text =~ s/\}/\@\}/g; # quote }
$text =~ s/\\input/\@\}/g; # quote }
return $text;
}
sub ApplyTexiRules {
# locallinks: apply rules that create links depending on local config (incl. interlink!)
my ($text, $locallinks) = @_;
$text =~ s/\r\n/\n/g; # DOS to Unix
my $state = ''; # quote, list, or normal ('')
my $fragment; # the current HTML fragment to be printed
my $result;
my $htmlre = join('|',(@HtmlTags));
my ($oldmatch, $rest);
local $_ = $text;
while(1) {
# first block -- at the beginning of a line. Note that block level elements eat empty lines to prevent empty p elements.
undef($fragment);
if (m/\G(?<=\n)/cg or m/\G^/cg) { # at the beginning of a line
if (m/\G&lt;pre&gt;\n?(.*?\n)&lt;\/pre&gt;[ \t]*\n?/cgs) { # pre must be on column 1
$fragment = &CloseHtmlEnvironments() . "\@example\n" . $1 . "\@end example\n";
} elsif (m/\G(\s*\n)*(\*+)[ \t]*/cg) {
$fragment = &OpenHtmlEnvironment('itemize',length($2)) . "\n\@item ";
} elsif (m/\G(\s*\n)*(\#+)[ \t]*/cg) {
$fragment = &OpenHtmlEnvironment('enumerate',length($2)) . "\n\@item ";
} elsif (m/\G(\s*\n)*(\:+)[ \t]*/cg) {
$fragment = &OpenHtmlEnvironment('quotation',length($2));
} elsif (m/\G(\s*\n)*(\=+)[ \t]*(.*?)[ \t]*(=+)[ \t]*\n?/cg) {
$fragment = &CloseHtmlEnvironments() . &WikiHeading($2, $3);
} elsif (m/\G(\s*\n)*----+[ \t]*\n?/cg) {
$fragment = &CloseHtmlEnvironments() . $q->hr();
} elsif (m/\G(\s*\n)*(([ \t]+.*\n?)+)/cg) {
$fragment = &OpenHtmlEnvironment('example',1) . $2; # always level 1
} elsif (m/\G(\s*\n)*(\;+)[ \t]*(?=.*\:)/cg) {
$fragment = &OpenHtmlEnvironment('table',length($2), '@asis')
. "\@item "; # the `:' needs special treatment, later
} elsif (m/\G(\s*\n)+/cg) {
$fragment = &CloseHtmlEnvironments() . "\n\n"; # there is another one like this further down
} elsif (m/\G(\&lt;include +"(.*)"\&gt;[ \t]*\n?)/cgi) { # <include "uri..."> includes the text of the given URI verbatim
$oldmatch = $1;
my $oldpos = pos;
&ApplyTexiRules(&QuoteHtml(&GetRaw($2)),0);
pos = $oldpos;
} elsif (m/\G(\&lt;rss +"(.*)"\&gt;[ \t]*\n?)/cgi) { # <rss "uri..."> stores the parsed RSS of the given URI
$result .= "[RSS feed from \@url{$2}]";
}
if (defined $fragment) {
$result .= $fragment;
next; # skipt the remaining tests
}
}
# second block -- remaining hilighting
if ($HtmlStack[0] eq 'table' && m/\G:/cg) {
$fragment = "\n";
} elsif (m/\G\&lt;nowiki\&gt;(.*?)\&lt;\/nowiki\&gt;/cgis) {
$fragment = $1;
} elsif (m/\G\&lt;code\&gt;(.*?)\&lt;\/code\&gt;/cgis) {
$fragment = $q->code($1);
} elsif ($RawHtml && m/\G\&lt;html\&gt;(.*?)\&lt;\/html\&gt;/cgis) {
$fragment = &UnquoteHtml($1);
} elsif (m/\G'''/cg) { # traditional wiki syntax with '''strong'''
if ($HtmlStack[0] eq 'strong') {
$fragment = &CloseHtmlEnvironment();
} else {
$fragment = &AddHtmlEnvironment('strong');
}
} elsif (m/\G''/cg) { # traditional wiki syntax with ''emph''
if ($HtmlStack[0] eq 'em') {
$fragment = &CloseHtmlEnvironment();
} else {
$fragment = &AddHtmlEnvironment('em');
}
} elsif (m/\G\&lt;($htmlre)\&gt;/cgi) { # opening
$fragment = &AddHtmlEnvironment($1);
} elsif (m/\G\&lt;\/($htmlre)\&gt;/cgi) { # closing tags
$fragment = &CloseHtmlEnvironment($1);
} elsif ($HtmlLinks && m/\G\&lt;a(\s[^<>]+?)\&gt;(.*?)\&lt;\/a\&gt;/cgi) { # <a ...>text</a>
$fragment = "<a$1>$2</a>";
} elsif ($BracketText && $locallinks && m/\G(\[$InterLinkPattern\s+([^\]]+?)\])/cg) { # [InterLink text]
# Interlinks can change when the intermap file changes (local config, therefore depend on $locallinks).
# The intermap file is only read if necessary, so if this not an interlink after all,
# we have to backtrack a bit.
$oldmatch = $1;
$fragment = &GetInterLink($2, $3, 1);
# we may have to backtrack a bit.
if ($oldmatch eq $fragment) {
($fragment, $rest) = split(/:/, $oldmatch, 2);
pos = (pos) - length($rest) - 1;
} else {
$result .= $fragment;
}
} elsif ($locallinks && m/\G(\[$InterLinkPattern\])/cog) { # [InterWiki:FooBar] makes footnotes [1]
$oldmatch = $1;
$fragment = &GetInterLink($2, '', 1);
if ($oldmatch eq $fragment) {
($fragment, $rest) = split(/:/, $oldmatch, 2);
pos = (pos) - length($rest) - 1;
} else {
$result .= $fragment;
}
} elsif ($locallinks && m/\G$InterLinkPattern/cog) { # InterWiki:FooBar
$oldmatch = $1;
$fragment = &GetInterLink($oldmatch, '', 0);
# we have to backtrack a bit.
if ($oldmatch eq $fragment) {
($fragment, $rest) = split(/:/, $oldmatch, 2);
pos = (pos) - length($rest) - 1;
} else {
$result .= $fragment;
}
} elsif ($BracketText && m/\G\[$UrlPattern\s+([^\]]+?)\]/cg) { # [URL text] makes [text] link to URL
$fragment = &GetUrl($1, $2, 1, 0);
} elsif (m/\G\[$UrlPattern\]/cog) { # [URL] makes footnotes [1]
$fragment = &GetUrl($1, '', 1, 0);
} elsif (m/\G$UrlPattern/cg) { # plain URLs after all $UrlPattern, such that [$UrlPattern text] has priority
$fragment = &GetUrl($1, '', 0, 1);
} elsif ($WikiLinks && $BracketWiki && $locallinks && m/\G(\[$LinkPattern\s+([^\]]+?)\])/cg) { # [LocalPage text]
$result .= &GetPageOrEditLink($2, $3, 1);
} elsif ($WikiLinks && $locallinks && m/\G(\[$LinkPattern\])/cg) { # [LocalPage]
$result .= &GetPageOrEditLink($2, '', 1);
} elsif ($WikiLinks && $locallinks && m/\G$LinkPattern/cg) { # LocalPage
# LinkPattern after all $UrlPattern, such that http//:...?FooBar
# will not get an additional ? if FooBar is undefined.
$result .= &GetPageOrEditLink($1, '');
} elsif ($FreeLinks && $BracketWiki && $locallinks && m/\G(\[\[$FreeLinkPattern\|([^\]]+)\]\])/cg) { # [[Free Link|text]]
$result .= &GetPageOrEditLink($2, $3, 0 , 1);
} elsif ($FreeLinks && $locallinks && m/\G(\[\[$FreeLinkPattern\]\])/cg) { # [[Free Link]]
$result .= &GetPageOrEditLink($2, '', 0, 1);
} elsif (m/\G\s*\n(s*\n)+/cg) { # paragraphs -- whitespace including at least two newlines
$fragment = &CloseHtmlEnvironments() . "\n\n"; # there is another one like this further up
} elsif (m/\G\s+/cgs) { # whitespace -- including (max one) newlines due to previous rules
$fragment = ' ';
} elsif (m/\G(\w+)/cgi) { # word -- cannot use \S here because that eats following markup, too: word<b> for example.
$fragment = $1;
} elsif (m/\G(\S)/cg) { # punctuation and other stuff, if not matched by previous markup rule. Gotta move slowly, eg. word.</b>
$fragment = $1;
} else {
last;
}
if (defined $fragment) {
$result .= $fragment;
}
}
# last block -- close it, cache it
$fragment = &CloseHtmlEnvironments();
if (defined $fragment) {
$result .= $fragment;
}
return $result;
}
sub UnquoteHtml {
my ($html) = @_;
$html =~ s/&lt;/</g;
$html =~ s/&gt;/>/g;
$html =~ s/&amp;/&/g;
return $html;
}
sub CloseEnv {
my $tag = shift;
if (exists $HtmlTagEquivalent{$tag}) {
return "}";
} else {
return "\n\@end " . $tag . "\n";
}
}
sub OpenEnv {
my ($tag, $attr) = @_;
if ($attr) {
return "\n\@$tag $attr\n";
} elsif (exists $HtmlTagEquivalent{$tag}) {
return '@' . $HtmlTagEquivalent{$tag} . '{';
} else {
return "\n\@$tag\n";
}
}
sub CloseHtmlEnvironment { # just close the current one
my $code = shift;
my $result = shift(@HtmlStack) if not defined($code) or $HtmlStack[0] eq $code;
return &CloseEnv($result) if $result;
return "</$code>"; # to recognize the bug
}
sub AddHtmlEnvironment { # add a new one so that it will be closed!
my ($code) = @_;
if ($HtmlStack[0] ne $code) {
unshift(@HtmlStack, $code);
return &OpenEnv($code);
}
return '';
}
sub CloseHtmlEnvironments { # close all
my $text = ''; # always return something
my $tag;
while (@HtmlStack > 0) {
$text .= &CloseEnv(shift(@HtmlStack));
}
return $text;
}
sub OpenHtmlEnvironment { # close the previous one and open a new one instead
my ($code, $depth, $class) = @_;
my ($oldCode, $tag);
my $text = ''; # always return something
$depth = @HtmlStack unless defined($depth);
while (@HtmlStack > $depth) { # Close tags as needed
$text .= &CloseEnv(shift(@HtmlStack));
}
if ($depth > 0) {
$depth = $IndentLimit if ($depth > $IndentLimit);
if (@HtmlStack) { # Non-empty stack
$oldCode = shift(@HtmlStack);
if ($oldCode ne $code) {
$text .= &CloseEnv($oldCode);
$text .= &OpenEnv($code, $class);
}
unshift(@HtmlStack, $code);
}
while (@HtmlStack < $depth) {
unshift(@HtmlStack, $code);
$text .= &OpenEnv($code, $class);
}
}
return $text;
}
sub GetInterLink {
my ($id, $text, $bracket) = @_;
my ($site, $page) = split(/:/, $id, 2);
$page =~ s/&amp;/&/g; # Unquote common URL HTML
my $url;
$url = &GetSiteUrl($site) if $page;
if ($text && $bracket && !$url) {
return "[$id $text]";
} elsif ($bracket && !$url) {
return "[$id]";
} elsif (!$url) {
return $id;
} elsif ($bracket && !$text) {
$text = ++$FootnoteNumber;
} elsif (!$text) {
$text = $id;
}
$url .= $page;
return "\@uref{$url,$text}";
}
sub GetSiteUrl {
my ($site) = @_;
my ($data, $url, $status);
if (!$InterSiteInit) {
$InterSiteInit = 1;
($status, $data) = &ReadFile($InterFile);
return '' if (!$status);
%InterSite = split(/\s+/, $data); # Later consider defensive code
}
$url = $InterSite{$site} if (defined($InterSite{$site}));
return $url;
}
sub GetUrl {
my ($url, $text, $bracket, $images) = @_;
if ($NetworkFile && $url =~ m|^file:///|
or !$NetworkFile && $url =~ m|^file:|) {
# Only do remote file:// links. No file:///c|/windows.
return $url;
} elsif ($bracket && !$text) {
return "\@footnote{$url}";
}
$url = &UnquoteHtml($url); # links should be unquoted again
if ($text) {
return "\@uref{$url, $text}";
} else {
return "\@uref{$url}";
}
}
sub GetPageOrEditLink { # use GetPageLink and GetEditLink if you know the result!
my ($id, $text, $bracket, $free) = @_;
local $| = 1;
$id =~ s/^\s+//; # Trim extra spaces
$id =~ s/\s+$//;
$id =~ s|\s*/\s*|/|; # ...also before/after subpages
$id =~ s|^/|$MainPage/|;
$id = &FreeToNormal($id) if $free;
my $exists = grep(/^$id$/, @pages);
if (!$text && $exists && $bracket) {
$text = ++$FootnoteNumber;
}
if ($exists) {
$text = $id unless $text;
$text =~ s/_/ /g if $free;
$text = "[$text]" if $bracket;
return &ScriptLink($id, $text);
} else {
# $free and $bracket usually exclude each other
# $text and not $bracket exclude each other
if ($bracket && $text) {
return "[$id $text]";
} elsif ($bracket) {
return "[$id]";
} elsif ($free && $text) {
$id =~ s/_/ /g;
$text =~ s/_/ /g;
return "[$id $text]";
} elsif ($free) {
$text = $id;
$text = "[$text]" if $text =~ /_/;
$text =~ s/_/ /g;
return $text;
} else { # plain, no text
return $id;
}
}
}
sub GetPageLink { # shortcut
my ($id, $name) = @_;
$name = $id unless $name;
$id =~ s|^/|$MainPage/|;
if ($FreeLinks) {
$id = &FreeToNormal($id);
$name =~ s/_/ /g;
}
return &ScriptLink($id, $name);
}
sub GetEditLink { # shortcut
my ($id, $name) = @_;
$id =~ s|^/|$MainPage/|;
if ($FreeLinks) {
$id = &FreeToNormal($id);
$name =~ s/_/ /g;
}
return &ScriptLink("action=edit&id=$id", $name);
}
sub ScriptLink {
my ($action, $text) = @_;
push @{$refs{$current}}, $text; # remember all forward links
return $text;
}
sub WikiHeading {
my ($pre, $depth, $text) = @_;
return $pre . "\@heading $text\n";
}
sub T {
my ($text) = @_;
if (1) { # Later make translation optional?
if (defined($Translate{$text}) && ($Translate{$text} ne '')) {
return $Translate{$text};
}
}
return $text;
}
sub Ts {
my ($text, $string) = @_;
$text = T($text);
$text =~ s/\%s/$string/;
return $text;
}
sub ReadFile {
my ($fileName) = @_;
my ($data);
local $/ = undef; # Read complete files
if (open(IN, "<$fileName")) {
$data=<IN>;
close IN;
if ($FS0 and $data =~ /$FS0/ and $data !~ /$FS/) {
$data =~ s/$FS0/$FS/go;
$FS0used = 1;
}
return (1, $data);
}
return (0, '');
}
sub FreeToNormal {
my ($id) = @_;
$id =~ s/ /_/g;
$id = ucfirst($id);
if (index($id, '_') > -1) { # Quick check for any space/underscores
$id =~ s/__+/_/g;
$id =~ s/^_//;
$id =~ s/_$//;
if ($UseSubpage) {
$id =~ s|_/|/|g;
$id =~ s|/_|/|g;
}
}
if ($FreeUpper) {
# Note that letters after ' are *not* capitalized
if ($id =~ m|[-_.,\(\)/][a-z]|) { # Quick check for non-canonical case
$id =~ s|([-_.,\(\)/])([a-z])|$1 . uc($2)|ge;
}
}
return $id;
}