Files
oddmuse/modules/despam.pl
Aleks-Daniel Jakimenko 6f8d281d79 No more "use vars"!
2015-04-10 13:31:28 +03:00

154 lines
4.7 KiB
Perl

# Copyright (C) 2004, 2005, 2006, 2007 Alex Schroeder <alex@emacswiki.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.
# 59 Temple Place, Suite 330
# Boston, MA 02111-1307 USA
use strict;
our ($q, $Now, %IndexHash, %Action, %Page, $OpenPageName, $FS, $BannedContent, $RcFile, $RcDefault, @MyAdminCode, $FullUrlPattern, $DeletedPage, $StrangeBannedContent);
AddModuleDescription('despam.pl', 'Despam Extension');
push(@MyAdminCode, \&DespamMenu);
sub DespamMenu {
my ($id, $menuref, $restref) = @_;
push(@$menuref, ScriptLink('action=spam', T('List spammed pages'), 'spam'));
push(@$menuref, ScriptLink('action=despam', T('Despamming pages'), 'despam'));
}
my @DespamRules = ();
my @DespamStrangeRules = ();
sub DespamRule {
$_ = shift;
s/#.*//; # trim comments
s/^\s+//; # trim leading whitespace
s/\s+$//; # trim trailing whitespace
return $_;
}
sub InitDespamRules {
# read them only once
@DespamRules = grep /./, map { DespamRule($_) }
split(/\n/, GetPageContent($BannedContent));
@DespamStrangeRules = grep /./, map { DespamRule($_) }
split(/\n/, GetPageContent($StrangeBannedContent))
if $IndexHash{$StrangeBannedContent};
}
$Action{despam} = \&DoDespam;
sub DoDespam {
RequestLockOrError();
my $list = GetParam('list', 0);
print GetHeader('', T('Despamming pages'), '') . '<div class="despam content"><p>';
InitDespamRules();
foreach my $id (DespamPages()) {
next if $id eq $BannedContent or $id eq $StrangeBannedContent;
OpenPage($id);
my $rule = $list || DespamBannedContent($Page{text});
print GetPageLink($id, NormalToFree($id));
DespamPage($rule) if $rule and not $list;
print $q->br();
}
print '</p></div>';
PrintFooter();
ReleaseLock();
}
$Action{spam} = \&DoSpam;
sub DoSpam {
print GetHeader('', T('Spammed pages'), '') . '<div class="spam content"><p>';
InitDespamRules();
foreach my $id (AllPagesList()) {
next if $id eq $BannedContent or $id eq $StrangeBannedContent;
OpenPage($id);
my $rule = DespamBannedContent($Page{text});
next unless $rule;
print GetPageLink($id, NormalToFree($id)), ' ', $rule, $q->br();
}
print '</p></div>';
PrintFooter();
}
# Based on BannedContent(), but with caching
sub DespamBannedContent {
my $str = shift;
my @urls = $str =~ /$FullUrlPattern/g;
foreach (@DespamRules) {
my $regexp = $_;
foreach my $url (@urls) {
if ($url =~ /($regexp)/i) {
return Tss('Rule "%1" matched "%2" on this page.',
QuoteHtml($regexp), QuoteHtml($url));
}
}
}
# depends on strange-spam.pl!
foreach (@DespamStrangeRules) {
my $regexp = $_;
if ($str =~ /($regexp)/i) {
my $match = $1;
$match =~ s/\n/ /g;
return Tss('Rule "%1" matched "%2" on this page.',
QuoteHtml($regexp), QuoteHtml($match));
}
}
return 0;
}
sub DespamPages {
# Assume that regular maintenance is happening and just read rc.log.
# This is not optimized like DoRc().
my $starttime = 0;
$starttime = $Now - GetParam('days', $RcDefault) * 86400; # 24*60*60
my $data = ReadFileOrDie($RcFile);
my %files = (); # use a hash map to make it unique
foreach my $line (split(/\n/, $data)) {
my ($ts, $id) = split(/$FS/, $line);
next if $ts < $starttime;
$files{$id} = 1;
}
return keys %files;
}
sub DespamPage {
my $rule = shift;
# from DoHistory()
my @revisions = sort {$b <=> $a} map { m|/([0-9]+).kp$|; $1; } GetKeepFiles($OpenPageName);
foreach my $revision (@revisions) {
my ($text, $rev) = GetTextRevision($revision, 1); # quiet
if (not $rev) {
print ': ' . Ts('Cannot find revision %s.', $revision);
return;
} elsif (not DespamBannedContent($text)) {
my $summary = Tss('Revert to revision %1: %2', $revision, $rule);
print ': ' . $summary;
Save($OpenPageName, $text, $summary) unless GetParam('debug', 0);
return;
}
}
if (grep(/^1$/, @revisions) or not @revisions) { # if there is no kept revision, yet
my $summary = Ts($rule). ' ' . Ts('Marked as %s.', $DeletedPage);
print ': ' . $summary;
Save($OpenPageName, $DeletedPage, $summary) unless GetParam('debug', 0);
} else {
print ': ' . T('Cannot find unspammed revision.');
}
}