From 90a92ff4cef748f546c19c4e3e9c535429a4e97f Mon Sep 17 00:00:00 2001 From: Alex Schroeder Date: Fri, 11 Jan 2013 10:40:35 +0100 Subject: [PATCH] Reading files using :utf8 instead of :encoding(utf-8). This is discouraged because :utf8 does not validate the input. The problem is that in some cases you can end up with invalid UTF-8 if your wiki was created with a copy of Oddmuse that allowed raw bytes. There, we requested users to provide UTF-8 input and printed it back claiming that it was UTF-8, but in the end it was just a convention. Spammers and vandals could upload anything they liked. This is why your rc.log (and all other sorts of files) may contain invalid UTF-8 bytes. This is particularly troublesome in the case of your rc.log files as these will never go away and they are read very often. The resulting warnings will fill up your web server logs. --- wiki.pl | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/wiki.pl b/wiki.pl index d6ef7506..901b25fe 100755 --- a/wiki.pl +++ b/wiki.pl @@ -1500,7 +1500,7 @@ sub GetRcLines { # starttime, hash of seen pages to use as a second return value my %following = (); my @result = (); # check the first timestamp in the default file, maybe read old log file - open(F, '<:encoding(UTF-8)', $RcFile); + open(F, '<:utf8', $RcFile); my $line = ; my ($ts) = split(/$FS/o, $line); # the first timestamp in the regular rc file if (not $ts or $ts > $starttime) { # we need to read the old rc file, too @@ -1586,7 +1586,6 @@ sub GetRcLinesFor { rcclusteronly rcfilteronly match lang followup); # parsing and filtering my @result = (); - # using :utf8 instead of :encoding(utf-8) to avoid validation and warnings on corrupt files :( open(F, '<:utf8', $file) or return (); while (my $line = ) { chomp($line); @@ -2668,8 +2667,8 @@ sub OpenPage { # Sets global variables $Page{ts} = $Now; $Page{revision} = 0; if ($id eq $HomePage - and (open(F, '<:encoding(UTF-8)', $ReadMe) - or open(F, '<:encoding(UTF-8)', 'README'))) { + and (open(F, '<:utf8', $ReadMe) + or open(F, '<:utf8', 'README'))) { local $/ = undef; $Page{text} = ; close F; @@ -2799,7 +2798,7 @@ sub ExpireKeepFiles { # call with opened page sub ReadFile { my $file = shift; utf8::encode($file); # filenames are bytes! - if (open(IN, '<:encoding(UTF-8)', $file)) { + if (open(IN, '<:utf8', $file)) { local $/ = undef; # Read complete files my $data=; close IN; @@ -3373,7 +3372,7 @@ sub PageIsUploadedFile { if ($IndexHash{$id}) { my $file = GetPageFile($id); utf8::encode($file); # filenames are bytes! - open(FILE, '<:encoding(UTF-8)', $file) + open(FILE, '<:utf8', $file) or ReportError(Ts('Cannot open %s', $file) . ": $!", '500 INTERNAL SERVER ERROR'); while (defined($_ = ) and $_ !~ /^text: /) { } # read lines until we get to the text key