From 1fbbfd422ea2fc190dd40ce600d5660b8dfc6b80 Mon Sep 17 00:00:00 2001 From: Alex Schroeder Date: Wed, 16 Sep 2015 03:35:12 +0300 Subject: [PATCH] Workaround for utf8::decode bug (sometimes utf8 chars were not decoded) Remember the problem with toc.pl when the whole page was *sometimes* not utf8-decoded? There were some thoughts that it might be associated with memory files, and it is correct. Although I was not able to narrow it down last time, now I did (simply because this problem appeared elsewhere). If you look at $output variable after utf8::decode with Devel::Peek, you will see two variants of flags. This one looks good: FLAGS = (PADMY,POK,pPOK,UTF8) And this one is wrong: FLAGS = (PADMY,POK,pPOK) This problem is weird because it works inconsistently. Most of the time you will get correct output, but sometimes it will be broken. Someone has to golf it down to something short in order to submit perl bug report. This, however, does not look like a simple task. Current workaround is as stupid as it looks like, but it works. Somehow assigning it to another variable solves the problem (which, by the way, is similar to solving other perl string-related problems). --- modules/toc.pl | 7 +++++-- wiki.pl | 5 +++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/modules/toc.pl b/modules/toc.pl index 52f2038f..0c7c5f48 100644 --- a/modules/toc.pl +++ b/modules/toc.pl @@ -231,12 +231,15 @@ sub NewTocApplyRules { my ($html, $blocks, $flags); { local *STDOUT; - open( STDOUT, '>', \$html) or die "Can't open memory file: $!"; + my $html_unfixed; + open( STDOUT, '>', \$html_unfixed) or die "Can't open memory file: $!"; binmode STDOUT, ":encoding(UTF-8)"; ($blocks, $flags) = OldTocApplyRules(@_); close STDOUT; utf8::decode($blocks); - utf8::decode($html); + # do not delete! + $html = $html_unfixed; # this is a workarond for perl bug + utf8::decode($html); # otherwise UTF8 characters are SOMETIMES not decoded. } # If there are at least two HTML headers on this page, insert a table of # contents. diff --git a/wiki.pl b/wiki.pl index 878d132e..c92f9647 100755 --- a/wiki.pl +++ b/wiki.pl @@ -1251,8 +1251,9 @@ sub ToString { $sub_ref->(); select $oldFH; close $outputFH; - utf8::decode($output); - return $output; + my $output_fixed = $output; # do not delete! + utf8::decode($output_fixed); # this is a workarond for perl bug + return $output_fixed; # otherwise UTF8 characters are SOMETIMES not decoded. } sub PageHtml {