Files
oddmuse/stuff/upgrade-files.pl
Alex Schroeder 5130a92f9c upgrade_files.pl: Get rid of a warning
Using $q->param in list context...
2015-08-16 09:55:36 +02:00

221 lines
7.3 KiB
Perl
Executable File

#! /usr/bin/perl -w
use CGI qw/:standard/;
use CGI::Carp qw(fatalsToBrowser);
if (param('separator') eq 'UseMod 0.92' or param('separator') eq 'UseMod 1.00') {
$FS = "\xb3";
} elsif (param('separator') eq 'UseMod 1.00 with $NewFS set') {
$FS = "\x1e\xff\xfe\x1e";
} else {
$FS = "\x1e";
}
$NewFS = "\x1e";
# override $FS if you want!
print header() . start_html('Upgrading Files'), p;
print q{Upgrade version: $Id: upgrade-files.pl,v 1.16 2010/11/06 11:51:18 as Exp $}, "\n";
if (not param('dir')) {
print start_form, p, '$DataDir: ', textfield('dir', '/tmp/oddmuse'),
p, radio_group('separator', ['Oddmuse', 'UseMod 0.92', 'UseMod 1.00',
'UseMod 1.00 with $NewFS set']),
p, checkbox('convert', 'checked', 'on', 'Convert Latin-1 to UTF-8'),
p, submit('Ok'), "\n", end_form;
} elsif (param('dir') and not param('sure')) {
print start_form, hidden('sure', 'yes'), hidden('dir', param('dir')),
hidden('separator', param('separator')), hidden('convert', param('convert')),
p, '$DataDir: ', param('dir'),
p, 'separator used when reading pages: ',
join(', ', map { sprintf('0x%x', ord($_)) } split (//, $FS)),
p, 'separator used when writing pages: ',
join(', ', map { sprintf('0x%x', ord($_)) } split (//, $NewFS)),
p, 'Convert Latin-1 to UTF-8: ', param('convert') ? 'Yes' : 'No',
p, submit('Confirm'), "\n", end_form;
} else {
rewrite(scalar(param('dir')));
}
print end_html();
sub rewrite {
my ($directory) = @_;
$FS1 = $FS . "1";
$FS2 = $FS . "2";
$FS3 = $FS . "3";
my @files = glob("$directory/page/*/*.db");
if (not @files) {
print "$directory does not seem to be a data directory.\n";
return;
}
print '<pre>';
foreach my $file (@files) {
print "Reading page $file...\n";
my %page = split(/$FS1/, read_file($file), -1);
%section = split(/$FS2/, $page{text_default}, -1);
%text = split(/$FS3/, $section{data}, -1);
$file =~ s!/([A-Z]|other)/!/!;
$file =~ s/\.db$/.pg/ or die "Invalid page name\n";
print "Writing $file...\n";
write_page_file($file);
}
print '</pre>';
@files = glob("$directory/referer/*/*.rb");
print '<pre>';
foreach my $file (@files) {
print "Reading refer $file...\n";
my $data = read_file($file);
$data =~ s/$FS1/$NewFS/g;
$file =~ s!/([A-Z]|other)/!/!;
$file =~ s/\.rb$/.rf/ or die "Invalid page name\n";
print "Writing $file...\n";
write_file($file, $data);
}
print '</pre>';
@files = glob("$directory/keep/*/*.kp");
foreach my $file (@files) {
print '<pre>';
print "Reading keep $file...\n";
my $data = read_file($file);
my @list = split(/$FS1/, $data);
my $out = $file;
$out =~ s!/([A-Z]|other)/!/!;
$out =~ s/\.kp$// or die "Invalid keep name\n";
# We introduce a new variable $dir, here, instead of using $out,
# because $out will be part of the filename later on, and the
# filename will be converted in write_file. To convert $out to
# utf8 would double-encode the directory part of the filename.
my $dir = param('convert') ? utf8($out) : $out;
print "Creating $out...\n";
mkdir($dir) or die "Cannot create directory $dir\n" unless -d $dir;
foreach my $keep (@list) {
next unless $keep;
%section = split(/$FS2/, $keep, -1);
%text = split(/$FS3/, $section{data}, -1);
my $current = "$out/$section{'revision'}.kp";
print "Writing $current...\n";
write_keep_file($current);
}
print '</pre>';
}
@files = glob("$directory/*rclog");
print '<pre>';
foreach my $file (@files) {
print "Reading $file...\n";
my $data = read_file($file);
@rc = split(/\n/, $data);
foreach (@rc) {
my ($ts, $pagename, $summary, $minor, $host, $kind, $extraTemp)
= split(/$FS3/, $_);
my %extra = split(/$FS2/, $extraTemp, -1);
foreach ('name', 'revision', 'languages', 'cluster') {
$extra{$_} = '' unless $extra{$_};
}
$extra{languages} =~ s/$FS1/,/g;
$_ = join($NewFS, $ts, $pagename, $minor, $summary, $host,
$extra{name}, $extra{revision}, $extra{languages}, $extra{cluster});
}
$data = join("\n", @rc) . "\n";
$file =~ s/log$/.log/;
print "Writing $file...\n";
write_file($file, $data);
}
print '</pre>';
print p, "Done.\n";
}
sub read_file {
my ($filename) = @_;
my ($data);
local $/ = undef; # Read complete files
open(F, "<$filename") or die "can't read $filename: $!";
$data=<F>;
close F;
return $data;
}
sub write_file {
my ($filename, $data) = @_;
if (param('convert')) {
$filename = utf8($filename);
$data = utf8($data);
}
open(F, ">$filename") or die "can't write $filename: $!";
print F $data;
close F;
}
sub cache {
$_ = shift;
return "" unless $_;
my ($block, $flag) = split(/$FS2/, $_);
my @blocks = split(/$FS3/, $block);
my @flags = split(/$FS3/, $flag);
return 'blocks: ' . escape_newlines(join($NewFS, @blocks)) . "\n"
. 'flags: ' . escape_newlines(join($NewFS, @flags)) . "\n";
}
sub escape_newlines {
$_ = shift;
$_ =~ s/\n/\n\t/g if $_;
return $_;
}
# Skip the info encoded in the filename (page name). We need the info
# stored in the rclog (summary, ip, host, username) for the history
# page. Don't trust the modification dates of the files themselves,
# which is why we have the timestamp in the file, too. We need the
# timestamp when expiring old keep files. We need all the info in the
# page file that will eventually end up in the keep file.
sub basic_data {
my $data = 'ts: ' . $section{ts} . "\n" if $section{ts};
$data .= 'keep-ts: ' . $section{keepts} . "\n" if $section{keepts};
$data .= 'revision: ' . $section{revision} . "\n" if $section{revision};
$data .= 'summary: ' . $section{summary} . "\n" if $section{summary};
$data .= 'summary: ' . $text{summary} . "\n" if $text{summary} and not $section{summary};
$data .= 'username: ' . $section{username} . "\n" if $section{username};
$data .= 'ip: ' . $section{ip} . "\n" if $section{ip};
$data .= 'host: ' . $section{host} . "\n" if $section{host};
$data .= 'minor: ' . $text{minor} . "\n" if $text{minor};
# $data .= 'oldmajor: ' . $page{cache_oldmajor} . "\n" if $page{cache_oldmajor};
$data .= 'text: ' . escape_newlines($text{text}) . "\n";
return $data;
}
sub write_page_file {
my $file = shift;
my $data = basic_data();
$data .= cache($page{cache_blocks});
$data .= 'diff-major: ' . escape_newlines($page{cache_diff_default_major}) . "\n"
if $page{cache_diff_default_major};
$data .= 'diff-minor: ' . escape_newlines($page{cache_diff_default_minor}) . "\n"
if $page{cache_diff_default_minor};
write_file($file, $data);
}
sub write_keep_file {
my $file = shift;
my $data = basic_data();
write_file($file, $data);
}
# This Latin-1 to UTF-8 conversion was written by Skalman on the
# Oddmuse Wiki. He says: I added a quick, dirty and completely
# unreadable hack to convert all characters above 0x7F:
# s/([\x80-\xff])/chr(0xc0+(ord($1)>>6)).chr(ord($1)&0b00111111|0b10000000)/ge;
# Reading the UTF-8 and Unicode FAQ, I convert every character to
# (binary) 110xxxxx 10xxxxxx where the 'x' marks the bits of the
# original ISO-8859-1 character. That is: take the two most
# significant bits of the caracter and add them to 0xC0 (first byte),
# then replace the first two bits with 10 (second byte).
sub utf8 {
$_ = shift;
s/([\x80-\xff])/chr(0xc0+(ord($1)>>6)).chr(ord($1)&0b00111111|0b10000000)/ge;
return $_;
}