2010-10-23 13:08:08 +00:00
|
|
|
# Copyright (C) 2004, 2005, 2006, 2010 Alex Schroeder <alex@gnu.org>
|
2005-01-07 00:50:34 +00:00
|
|
|
#
|
2010-10-23 13:08:08 +00:00
|
|
|
# This program is free software: you can redistribute it and/or modify it under
|
|
|
|
|
# the terms of the GNU General Public License as published by the Free Software
|
|
|
|
|
# Foundation, either version 3 of the License, or (at your option) any later
|
|
|
|
|
# version.
|
2005-01-07 00:50:34 +00:00
|
|
|
#
|
2010-10-23 13:08:08 +00:00
|
|
|
# This program is distributed in the hope that it will be useful, but WITHOUT
|
|
|
|
|
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
|
|
|
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
2005-01-07 00:50:34 +00:00
|
|
|
#
|
2010-10-23 13:08:08 +00:00
|
|
|
# You should have received a copy of the GNU General Public License along with
|
|
|
|
|
# this program. If not, see <http://www.gnu.org/licenses/>.
|
2015-03-27 03:01:01 +02:00
|
|
|
use strict;
|
2015-08-18 10:48:03 +02:00
|
|
|
use v5.10;
|
2005-01-07 00:50:34 +00:00
|
|
|
|
2015-08-17 16:54:07 +02:00
|
|
|
AddModuleDescription('referrer-tracking.pl', 'Automatic Link Back');
|
2006-04-02 17:24:29 +00:00
|
|
|
|
|
|
|
|
use LWP::UserAgent;
|
2005-01-07 00:50:34 +00:00
|
|
|
|
PrintFooter relies on @MyFooters
We already called all the subs on @MyFooters and printed the result,
but this commit moves all the code from PrintFooters into subs and
puts those subs on @MyFooters. This allows us to write modules that
can better control where exactly their output should appear. In this
case the change was required in order to allow the Google +1 module to
coexist with code that maybe prints the comment form for all pages.
For example, knowing that the Google +1 sub is the first on one the
list because of unshift(@MyFooters, \&GooglePlusPrintFooter), we can
now write the following:
splice(@MyFooters, 1, 0, \&MyCommentsInTheFooter);
sub MyCommentsInTheFooter {
my ($id, $rev, $comment) = @_;
if (not $GooglePlusThisPagePrintedJournal
and (GetParam('action', 'browse') eq 'browse'
and $id and $CommentsPrefix
and $id ne $RCName
and $id !~ /^$CommentsPrefix(.*)/o)) {
my $target = $CommentsPrefix . $id;
my $page = '';
$page = PageHtml($target) if $IndexHash{$target};
return $q->div({-class=>'comment'},
$q->h2(T('Comments')),
$page)
. GetCommentForm("$CommentsPrefix$id", $rev, $comment);
}
}
The Google +1 extension was also fixed to not triger the EFF's Privacy
Badger. This is OK because we're using a two step button: The user
needs to click a button before we're loading the script from Google.
2015-08-17 13:39:34 +02:00
|
|
|
our ($q, $Now, $OpenPageName, %Action, @KnownLocks, %AdminPages,
|
|
|
|
|
$ScriptName, $DataDir, $EmbedWiki, $FS, @MyInitVariables,
|
|
|
|
|
@MyAdminCode, $FullUrlPattern, @MyFooters);
|
|
|
|
|
|
2005-01-07 00:50:34 +00:00
|
|
|
push(@KnownLocks, "refer_*");
|
|
|
|
|
$Action{refer} = \&DoPrintAllReferers;
|
|
|
|
|
|
2015-04-10 13:31:28 +03:00
|
|
|
our ($RefererDir, $RefererTimeLimit, $RefererLimit, $RefererFilter,
|
|
|
|
|
$RefererTitleLimit, %Referers);
|
2005-01-07 00:50:34 +00:00
|
|
|
|
|
|
|
|
$RefererTimeLimit = 86400; # How long referrals shall be remembered in seconds
|
|
|
|
|
$RefererLimit = 15; # How many different referer shall be remembered
|
|
|
|
|
$RefererFilter = 'ReferrerFilter'; # Name of the filter page
|
2010-10-23 13:31:07 +00:00
|
|
|
$RefererTitleLimit = 70; # This is used to shorten long titles
|
2005-01-07 00:50:34 +00:00
|
|
|
|
|
|
|
|
push(@MyInitVariables, \&RefererInit);
|
|
|
|
|
|
|
|
|
|
sub RefererInit {
|
|
|
|
|
$RefererFilter = FreeToNormal($RefererFilter); # spaces to underscores
|
2006-07-15 23:14:22 +00:00
|
|
|
$AdminPages{$RefererFilter} = 1;
|
2005-01-07 00:50:34 +00:00
|
|
|
$RefererDir = "$DataDir/referer"; # Stores referer data
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
push(@MyAdminCode, \&RefererMenu);
|
|
|
|
|
|
|
|
|
|
sub RefererMenu {
|
|
|
|
|
my ($id, $menuref, $restref) = @_;
|
2006-08-06 11:48:07 +00:00
|
|
|
push(@$menuref, ScriptLink('action=refer', T('All Referrers'), 'refer'));
|
2005-01-07 00:50:34 +00:00
|
|
|
}
|
|
|
|
|
|
2015-04-11 23:41:33 +03:00
|
|
|
*RefererOldExpireKeepFiles = \&ExpireKeepFiles;
|
|
|
|
|
*ExpireKeepFiles = \&RefererNewExpireKeepFiles;
|
2005-01-07 00:50:34 +00:00
|
|
|
|
|
|
|
|
sub RefererNewExpireKeepFiles {
|
|
|
|
|
RefererOldExpireKeepFiles(@_); # call with opened page
|
2005-01-07 00:54:29 +00:00
|
|
|
ReadReferers($OpenPageName); # clean up reading (expiring) and writing
|
2005-01-07 00:50:34 +00:00
|
|
|
WriteReferers($OpenPageName);
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-11 23:41:33 +03:00
|
|
|
*RefererOldDeletePage = \&DeletePage;
|
|
|
|
|
*DeletePage = \&RefererNewDeletePage;
|
2005-01-07 00:50:34 +00:00
|
|
|
|
|
|
|
|
sub RefererNewDeletePage {
|
|
|
|
|
my $status = RefererOldDeletePage(@_);
|
|
|
|
|
return $status if $status; # this would be the error message
|
|
|
|
|
my $id = shift;
|
|
|
|
|
my $fname = GetRefererFile($id);
|
2016-06-15 23:21:07 +02:00
|
|
|
Unlink($fname) if (IsFile($fname));
|
2005-01-07 00:50:34 +00:00
|
|
|
return ''; # no error
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
## == Actual Code ==
|
|
|
|
|
|
|
|
|
|
sub GetRefererFile {
|
|
|
|
|
my $id = shift;
|
2014-06-06 17:32:44 +02:00
|
|
|
return "$RefererDir/$id.rf";
|
2005-01-07 00:50:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sub ReadReferers {
|
|
|
|
|
my $file = GetRefererFile(shift);
|
|
|
|
|
%Referers = ();
|
2016-06-15 23:21:07 +02:00
|
|
|
if (IsFile($file)) {
|
2005-01-07 00:50:34 +00:00
|
|
|
my ($status, $data) = ReadFile($file);
|
|
|
|
|
%Referers = split(/$FS/, $data, -1) if $status;
|
|
|
|
|
}
|
|
|
|
|
ExpireReferers();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sub ExpireReferers { # no need to save the pruned list if nothing else changes
|
|
|
|
|
if ($RefererTimeLimit) {
|
|
|
|
|
foreach (keys %Referers) {
|
|
|
|
|
if ($Now - $Referers{$_} > $RefererTimeLimit) {
|
|
|
|
|
delete $Referers{$_};
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if ($RefererLimit) {
|
|
|
|
|
my @list = sort {$Referers{$a} cmp $Referers{$b}} keys %Referers;
|
|
|
|
|
@list = @list[$RefererLimit .. @list-1];
|
|
|
|
|
foreach (@list) {
|
|
|
|
|
delete $Referers{$_};
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2006-04-02 19:48:53 +00:00
|
|
|
# maybe test for valid utf-8 later?
|
|
|
|
|
|
|
|
|
|
# http://www.w3.org/International/questions/qa-forms-utf-8
|
|
|
|
|
|
|
|
|
|
# $field =~
|
|
|
|
|
# m/^(
|
|
|
|
|
# [\x09\x0A\x0D\x20-\x7E] # ASCII
|
|
|
|
|
# | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
|
|
|
|
|
# | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
|
|
|
|
|
# | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
|
|
|
|
|
# | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
|
|
|
|
|
# | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
|
|
|
|
|
# | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
|
|
|
|
|
# | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
|
|
|
|
|
# )*$/x;
|
|
|
|
|
|
2010-10-23 13:08:08 +00:00
|
|
|
sub UrlToTitle {
|
|
|
|
|
my $title = QuoteHtml(shift);
|
|
|
|
|
$title = $1 if $title =~ /$FullUrlPattern/; # extract valid URL
|
2011-02-24 22:47:11 +00:00
|
|
|
$title =~ s/\%([0-9a-f][0-9a-f])/chr(hex($1))/egi; # decode if possible
|
2010-10-24 12:15:53 +00:00
|
|
|
$title =~ s!^https?://!!;
|
|
|
|
|
$title =~ s!\.html?$!!;
|
2010-10-24 12:16:10 +00:00
|
|
|
$title =~ s!/$!!;
|
2010-10-23 13:31:07 +00:00
|
|
|
# shorten it if necessary
|
|
|
|
|
if (length($title) > $RefererTitleLimit) {
|
|
|
|
|
$title = substr($title, 0, $RefererTitleLimit - 10)
|
|
|
|
|
. "..." . substr($title, -7);
|
|
|
|
|
}
|
2010-10-23 13:08:08 +00:00
|
|
|
return $title;
|
|
|
|
|
}
|
|
|
|
|
|
2005-01-07 00:50:34 +00:00
|
|
|
sub GetReferers {
|
|
|
|
|
my $result = join(' ', map {
|
2011-02-24 22:47:11 +00:00
|
|
|
my ($ts, $title) = split(/ /, $Referers{$_}, 2);
|
|
|
|
|
$title = UrlToTitle($_) unless $title;
|
2006-04-02 17:24:29 +00:00
|
|
|
$q->a({-href=>$_}, $title);
|
|
|
|
|
} keys %Referers);
|
2006-12-22 01:27:48 +00:00
|
|
|
return $q->div({-class=>'refer'}, $q->p(T('Referrers') . ': ' . $result))
|
2006-04-02 17:24:29 +00:00
|
|
|
if $result;
|
2005-01-07 00:50:34 +00:00
|
|
|
}
|
|
|
|
|
|
2010-10-23 13:08:08 +00:00
|
|
|
sub PageContentToTitle {
|
2011-02-24 22:47:11 +00:00
|
|
|
my ($content) = @_;
|
2015-05-02 04:03:30 +03:00
|
|
|
my $title = $content =~ m!<h1.*?>(.*?)</h1>! ? $1 : '';
|
2010-10-23 13:08:08 +00:00
|
|
|
$title = $1 if not $title and $content =~ m!<title>(.*?)</title>!;
|
|
|
|
|
# get rid of extra tags
|
|
|
|
|
$title =~ s!<.*?>!!g;
|
|
|
|
|
# trimming
|
|
|
|
|
$title =~ s!\s+! !g;
|
2010-10-24 12:15:53 +00:00
|
|
|
$title =~ s!^ !!;
|
|
|
|
|
$title =~ s! $!!;
|
2016-06-05 18:00:01 +02:00
|
|
|
$title = substr($title, 0, $RefererTitleLimit) . "..."
|
2010-10-23 13:31:07 +00:00
|
|
|
if length($title) > $RefererTitleLimit;
|
2010-10-23 13:08:08 +00:00
|
|
|
return $title;
|
|
|
|
|
}
|
|
|
|
|
|
2005-01-07 00:50:34 +00:00
|
|
|
sub UpdateReferers {
|
|
|
|
|
my $self = $ScriptName;
|
|
|
|
|
my $referer = $q->referer();
|
|
|
|
|
return unless $referer and $referer !~ /$self/;
|
|
|
|
|
foreach (split(/\n/,GetPageContent($RefererFilter))) {
|
|
|
|
|
if (/^ ([^ ]+)[ \t]*$/) { # only read lines with one word after one space
|
|
|
|
|
my $regexp = $1;
|
|
|
|
|
return if $referer =~ /$regexp/i;
|
|
|
|
|
}
|
|
|
|
|
}
|
2006-04-02 17:24:29 +00:00
|
|
|
my $ua = LWP::UserAgent->new;
|
|
|
|
|
my $response = $ua->get($referer);
|
2012-10-26 16:38:22 +02:00
|
|
|
return unless $response->is_success and $response->decoded_content =~ /$self/;
|
|
|
|
|
my $title = PageContentToTitle($response->decoded_content);
|
2010-10-23 13:08:08 +00:00
|
|
|
# starting with a timestamp makes sure that numerical comparisons still work!
|
2011-02-24 22:47:11 +00:00
|
|
|
$Referers{$referer} = "$Now $title";
|
2005-01-07 00:50:34 +00:00
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sub WriteReferers {
|
|
|
|
|
my $id = shift;
|
|
|
|
|
return unless RequestLockDir('refer_' . $id); # not fatal
|
|
|
|
|
my $data = join($FS, %Referers);
|
|
|
|
|
my $file = GetRefererFile($id);
|
|
|
|
|
if ($data) {
|
2014-06-23 12:19:07 +02:00
|
|
|
CreateDir($RefererDir);
|
2005-01-07 00:50:34 +00:00
|
|
|
WriteStringToFile($file, $data);
|
|
|
|
|
} else {
|
2016-06-15 23:21:07 +02:00
|
|
|
Unlink($file); # just try it, doesn't matter if it fails
|
2005-01-07 00:50:34 +00:00
|
|
|
}
|
|
|
|
|
ReleaseLockDir('refer_' . $id);
|
|
|
|
|
}
|
|
|
|
|
|
PrintFooter relies on @MyFooters
We already called all the subs on @MyFooters and printed the result,
but this commit moves all the code from PrintFooters into subs and
puts those subs on @MyFooters. This allows us to write modules that
can better control where exactly their output should appear. In this
case the change was required in order to allow the Google +1 module to
coexist with code that maybe prints the comment form for all pages.
For example, knowing that the Google +1 sub is the first on one the
list because of unshift(@MyFooters, \&GooglePlusPrintFooter), we can
now write the following:
splice(@MyFooters, 1, 0, \&MyCommentsInTheFooter);
sub MyCommentsInTheFooter {
my ($id, $rev, $comment) = @_;
if (not $GooglePlusThisPagePrintedJournal
and (GetParam('action', 'browse') eq 'browse'
and $id and $CommentsPrefix
and $id ne $RCName
and $id !~ /^$CommentsPrefix(.*)/o)) {
my $target = $CommentsPrefix . $id;
my $page = '';
$page = PageHtml($target) if $IndexHash{$target};
return $q->div({-class=>'comment'},
$q->h2(T('Comments')),
$page)
. GetCommentForm("$CommentsPrefix$id", $rev, $comment);
}
}
The Google +1 extension was also fixed to not triger the EFF's Privacy
Badger. This is OK because we're using a two step button: The user
needs to click a button before we're loading the script from Google.
2015-08-17 13:39:34 +02:00
|
|
|
if ($MyFooters[-1] == \&DefaultFooter) {
|
|
|
|
|
splice(@MyFooters, -1, 0, \&RefererTrack);
|
|
|
|
|
} else {
|
|
|
|
|
push(@MyFooters, \&RefererTrack);
|
|
|
|
|
}
|
|
|
|
|
|
2005-01-07 00:50:34 +00:00
|
|
|
sub RefererTrack {
|
|
|
|
|
my $id = shift;
|
2005-09-18 10:26:57 +00:00
|
|
|
return unless $id;
|
2005-01-07 00:50:34 +00:00
|
|
|
ReadReferers($id);
|
|
|
|
|
WriteReferers($id) if UpdateReferers($id);
|
|
|
|
|
return GetReferers();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sub DoPrintAllReferers {
|
|
|
|
|
print GetHeader('', T('All Referrers'), ''), $q->start_div({-class=>'content refer'});
|
|
|
|
|
PrintAllReferers(AllPagesList());
|
|
|
|
|
print $q->end_div();
|
|
|
|
|
PrintFooter();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sub PrintAllReferers {
|
|
|
|
|
for my $id (@_) {
|
|
|
|
|
ReadReferers($id);
|
2006-12-22 01:27:48 +00:00
|
|
|
print $q->div({-class=>'page'},
|
2009-02-18 23:13:35 +00:00
|
|
|
$q->p(GetPageLink($id)),
|
2006-12-22 01:27:48 +00:00
|
|
|
GetReferers()) if %Referers;
|
2005-01-07 00:50:34 +00:00
|
|
|
}
|
|
|
|
|
}
|