Files
oddmuse/modules/despam.pl
Aleks-Daniel Jakimenko 1c4e082755 Return objects where it begs for it
sub ParseData is fully backwards compatible. If some module runs it in list
context, then it will get listified hash like previously. New code should
always run it in scalar context though (everything in our code base
was changed according to that).

sub GetTextRevision is not backwards compatible (don't let “wantarray” usage
to confuse you). Most modules do not touch that subroutine, so we are probably
fine (modules from our git repo that do use were changed accordingly).

“EncodePage(%$page)” looks wrong. It seems like we should change it to accept
hash ref.
2015-09-04 04:55:48 +03:00

155 lines
4.8 KiB
Perl

# Copyright (C) 2004, 2005, 2006, 2007 Alex Schroeder <alex@emacswiki.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.
# 59 Temple Place, Suite 330
# Boston, MA 02111-1307 USA
use strict;
use v5.10;
our ($q, $Now, %IndexHash, %Action, %Page, $OpenPageName, $FS, $BannedContent, $RcFile, $RcDefault, @MyAdminCode, $FullUrlPattern, $DeletedPage, $StrangeBannedContent);
AddModuleDescription('despam.pl', 'Despam Extension');
push(@MyAdminCode, \&DespamMenu);
sub DespamMenu {
my ($id, $menuref, $restref) = @_;
push(@$menuref, ScriptLink('action=spam', T('List spammed pages'), 'spam'));
push(@$menuref, ScriptLink('action=despam', T('Despamming pages'), 'despam'));
}
my @DespamRules = ();
my @DespamStrangeRules = ();
sub DespamRule {
$_ = shift;
s/#.*//; # trim comments
s/^\s+//; # trim leading whitespace
s/\s+$//; # trim trailing whitespace
return $_;
}
sub InitDespamRules {
# read them only once
@DespamRules = grep /./, map { DespamRule($_) }
split(/\n/, GetPageContent($BannedContent));
@DespamStrangeRules = grep /./, map { DespamRule($_) }
split(/\n/, GetPageContent($StrangeBannedContent))
if $IndexHash{$StrangeBannedContent};
}
$Action{despam} = \&DoDespam;
sub DoDespam {
RequestLockOrError();
my $list = GetParam('list', 0);
print GetHeader('', T('Despamming pages'), '') . '<div class="despam content"><p>';
InitDespamRules();
foreach my $id (DespamPages()) {
next if $id eq $BannedContent or $id eq $StrangeBannedContent;
OpenPage($id);
my $rule = $list || DespamBannedContent($Page{text});
print GetPageLink($id, NormalToFree($id));
DespamPage($rule) if $rule and not $list;
print $q->br();
}
print '</p></div>';
PrintFooter();
ReleaseLock();
}
$Action{spam} = \&DoSpam;
sub DoSpam {
print GetHeader('', T('Spammed pages'), '') . '<div class="spam content"><p>';
InitDespamRules();
foreach my $id (AllPagesList()) {
next if $id eq $BannedContent or $id eq $StrangeBannedContent;
OpenPage($id);
my $rule = DespamBannedContent($Page{text});
next unless $rule;
print GetPageLink($id, NormalToFree($id)), ' ', $rule, $q->br();
}
print '</p></div>';
PrintFooter();
}
# Based on BannedContent(), but with caching
sub DespamBannedContent {
my $str = shift;
my @urls = $str =~ /$FullUrlPattern/g;
foreach (@DespamRules) {
my $regexp = $_;
foreach my $url (@urls) {
if ($url =~ /($regexp)/i) {
return Tss('Rule "%1" matched "%2" on this page.',
QuoteHtml($regexp), QuoteHtml($url));
}
}
}
# depends on strange-spam.pl!
foreach (@DespamStrangeRules) {
my $regexp = $_;
if ($str =~ /($regexp)/i) {
my $match = $1;
$match =~ s/\n/ /g;
return Tss('Rule "%1" matched "%2" on this page.',
QuoteHtml($regexp), QuoteHtml($match));
}
}
return 0;
}
sub DespamPages {
# Assume that regular maintenance is happening and just read rc.log.
# This is not optimized like DoRc().
my $starttime = 0;
$starttime = $Now - GetParam('days', $RcDefault) * 86400; # 24*60*60
my $data = ReadFileOrDie($RcFile);
my %files = (); # use a hash map to make it unique
foreach my $line (split(/\n/, $data)) {
my ($ts, $id) = split(/$FS/, $line);
next if $ts < $starttime;
$files{$id} = 1;
}
return keys %files;
}
sub DespamPage {
my $rule = shift;
# from DoHistory()
my @revisions = sort {$b <=> $a} map { m|/([0-9]+).kp$|; $1; } GetKeepFiles($OpenPageName);
foreach my $revision (@revisions) {
my ($revisionPage, $rev) = GetTextRevision($revision, 1); # quiet
if (not $rev) {
print ': ' . Ts('Cannot find revision %s.', $revision);
return;
} elsif (not DespamBannedContent($revisionPage->{text})) {
my $summary = Tss('Revert to revision %1: %2', $revision, $rule);
print ': ' . $summary;
Save($OpenPageName, $revisionPage->{text}, $summary) unless GetParam('debug', 0);
return;
}
}
if (grep(/^1$/, @revisions) or not @revisions) { # if there is no kept revision, yet
my $summary = Ts($rule). ' ' . Ts('Marked as %s.', $DeletedPage);
print ': ' . $summary;
Save($OpenPageName, $DeletedPage, $summary) unless GetParam('debug', 0);
} else {
print ': ' . T('Cannot find unspammed revision.');
}
}