User:Whobot/code
Jump to navigation
Jump to search
### IMPORTANT ###
# This code is released into the public domain. CONTRIBUTIONS are
# welcome, but will also hereby be RELEASED TO THE PUBLIC DOMAIN.
# See the documentation distributed with this code for important
# warnings and caveats.
# Cloned from Pearle Wisebot, modifications by User:Who
#################
use strict;
use Time::HiRes;
# The following may be helpful in debugging character encoding
# problems.
#use utf8;
#use encoding 'utf8';
# Initialization
use LWP::UserAgent;
use HTTP::Cookies;
use HTTP::Request::Common qw(POST);
use HTML::Entities;
print "\n";
# LWP:UserAgent is a library which allows us to create a "user agent"
# object that handles the low-level details of making HTTP requests.
$::ua = LWP::UserAgent->new(timeout => 300);
$::ua->agent("Whobot Wisebot/0.1");
$::ua->cookie_jar(HTTP::Cookies->new(file => "cookies.whobot.txt", autosave => 1));
$::ua->cookie_jar->load();
# Hot pipes
$| = 1;
# ---
# test();
#sub test
#{
# my ($target, $text, $editTime, $startTime, $token);
#
# $target = "Wikipedia:Sandbox";
# ($text, $editTime, $startTime, $token) = getPage($target);
# print $text;
# $text .= "\Eat my electrons! -- Whobot\n";
# print "---\n";
# postPage ($target, $editTime, $startTime, $token, $text, "Test 008");
# die ("Test complete.");
#}
# ---
interpretCommand(@ARGV);
sub interpretCommand
{
my ($command, @arguments, $i, $line, $argument, @newArguments,
$from, $to, $page, $pageCopy);
($command, @arguments) = @_;
$command =~ s/\*\s*//;
myLog(`date /t`);
myLog ($command.": ".join(" ", @arguments)."\n");
print `date /t`;
print $command.": ".join(" ", @arguments)."\n";
if ($command eq "POST_STDIN")
{
if ($arguments[2] ne "")
{
myLog ("Too many arguments to POST_STDIN.\n");
die ("Too many arguments to POST_STDIN.\n");
}
postSTDIN($arguments[0],$arguments[1]);
}
elsif ($command eq "POST_STDIN_NULLOK")
{
if ($arguments[2] ne "")
{
myLog ("Too many arguments to POST_STDIN.\n");
die ("Too many arguments to POST_STDIN.\n");
}
$::nullOK = "yes";
postSTDIN($arguments[0],$arguments[1]);
$::nullOK = "no";
}
elsif ($command eq "MOVE_CONTENTS")
{
if ($arguments[3] ne "")
{
if (($arguments[4] eq "")
and ($arguments[1] eq "->"))
{
moveCategoryContents($arguments[0],$arguments[2],$arguments[3],"");
return();
}
else
{
myLog ("Too many arguments to MOVE_CONTENTS.\n");
die ("Too many arguments to MOVE_CONTENTS.\n");
}
}
moveCategoryContents($arguments[0],$arguments[1],"no","yes",$arguments[2]);
}
elsif ($command eq "MOVE_CONTENTS_INCL_CATS")
{
if ($arguments[3] ne "")
{
if (($arguments[4] eq "")
and ($arguments[1] eq "->"))
{
moveCategoryContents($arguments[0],$arguments[2],"yes","yes",$arguments[3]);
return();
}
else
{
myLog ("Too many arguments to MOVE_CONTENTS_INCL_CATS.\n");
die ("Too many arguments to MOVE_CONTENTS_INCL_CATS.\n");
}
}
moveCategoryContents($arguments[0],$arguments[1],"yes","yes",$arguments[2],"");
}
elsif ($command eq "REMOVE_X_FROM_CAT")
{
if ($arguments[3] ne "")
{
myLog ("Too many arguments to REMOVE_X_FROM_CAT.\n");
die ("Too many arguments to REMOVE_X_FROM_CAT.\n");
}
removeXFromCat($arguments[0],$arguments[1],$arguments[2],"");
}
elsif ($command eq "DEPOPULATE_CAT")
{
if ($arguments[1] ne "")
{
if (($arguments[2] eq "")
and ($arguments[1] eq "special"))
{
depopulateCat($arguments[0],"special");
}
else
{
myLog ("Too many arguments to DEPOPULATE_CAT.\n");
die ("Too many arguments to DEPOPULATE_CAT.\n");
}
}
depopulateCat($arguments[0]);
}
elsif ($command eq "PRINT_WIKITEXT")
{
if ($arguments[1] ne "")
{
myLog ("Too many arguments to PRINT_WIKITEXT.\n");
die ("Too many arguments to PRINT_WIKITEXT.\n");
}
printWikitext($arguments[0]);
}
elsif ($command eq "ADD_CFD_TAG")
{
if ($arguments[1] ne "")
{
myLog ("Too many arguments to ADD_CFD_TAG.\n");
die ("Too many arguments to ADD_CFD_TAG.\n");
}
addCFDTag($arguments[0]);
}
elsif ($command eq "ADD_CFDU_TAG")
{
if ($arguments[2] ne "")
{
myLog ("Too many arguments to ADD_CFDU_TAG.\n");
die ("Too many arguments to ADD_CFDU_TAG.\n");
}
addCFDUTag($arguments[0],$arguments[1],"");
}
elsif ($command eq "REMOVE_CFD_TAG")
{
if ($arguments[1] ne "")
{
myLog ("Too many arguments to REMOVE_CFD_TAG.\n");
die ("Too many arguments to REMOVE_CFD_TAG.\n");
}
removeCFDTag($arguments[0]);
}
elsif ($command eq "REMOVE_CFDU_TAG")
{
if ($arguments[1] ne "")
{
myLog ("Too many arguments to REMOVE_CFDU_TAG.\n");
die ("Too many arguments to REMOVE_CFDU_TAG.\n");
}
removeCFDUTag($arguments[0]);
}
elsif ($command eq "ADD_TO_CAT")
{
if ($arguments[2] ne "")
{
myLog ("Too many arguments to ADD_TO_CAT.\n");
die ("Too many arguments to ADD_TO_CAT.\n");
}
addToCat($arguments[0],$arguments[1],"");
}
elsif ($command eq "ADD_TO_CAT_NULL_OK")
{
if ($arguments[2] ne "")
{
myLog ("Too many arguments to ADD_TO_CAT_NULL_OK.\n");
die ("Too many arguments to ADD_TO_CAT_NULL_OK.\n");
}
$::nullOK = "yes";
addToCat($arguments[0],$arguments[1],"");
$::nullOK = "no";
}
elsif ($command eq "TRANSFER_TEXT")
{
if ($arguments[2] ne "")
{
myLog ("Too many arguments to TRANSFER_TEXT.\n");
die ("Too many arguments to TRANSFER_TEXT.\n");
}
transferText($arguments[0], $arguments[1]);
}
# DON'T USE THE BELOW COMMAND; IT'S NOT IMPLEMENTED PROPERLY YET.
# elsif ($command eq "LIST_TO_CAT_CHECK")
# {
# if ($arguments[2] ne "")
# {
# myLog ("Too many arguments to LIST_TO_CAT_CHECK.\n");
# die ("Too many arguments to LIST_TO_CAT_CHECK.\n");
# }
# listToCat($arguments[0], $arguments[1], "no");
# }
elsif ($command eq "CHANGE_CATEGORY")
{
if ($arguments[4] ne "")
{
myLog ("Too many arguments to CHANGE_CATEGORY.\n");
die ("Too many arguments to CHANGE_CATEGORY.\n");
}
changeCategory($arguments[0], $arguments[1], $arguments[2], $arguments[3]);
}
elsif ($command eq "CLEANUP_DATE")
{
if ($arguments[0] ne "")
{
myLog ("Too many arguments to CLEANUP_DATE.\n");
die ("Too many arguments to CLEANUP_DATE.\n");
}
cleanupDate();
}
elsif ($command eq "OPENTASK_UPDATE")
{
if ($arguments[0] ne "")
{
myLog ("Too many arguments to OPENTASK_UPDATE.\n");
die ("Too many arguments to OPENTASK_UPDATE.\n");
}
opentaskUpdate();
}
# DON'T USE THE BELOW COMMAND; IT'S NOT IMPLEMENTED PROPERLY YET.
#elsif ($command eq "ENFORCE_CATEGORY_REDIRECTS_CHECK")
#{
# enforceCategoryRedirects("no");
#}
# This command is for remedial cleanup only.
#elsif ($command eq "INTERWIKI_LOOP")
#{
# interwikiLoop();
#}
elsif ($command eq "ENFORCE_CATEGORY_INTERWIKI")
{
if ($arguments[1] ne "")
{
myLog ("Too many arguments to ENFORCE_CATEGORY_INTERWIKI.\n");
die ("Too many arguments to ENFORCE_CATEGORY_INTERWIKI.\n");
}
enforceCategoryInterwiki($arguments[0]);
}
## Broken due to recent changes on WP:CFD
# elsif ($command eq "ENFORCE_CFD")
# {
# enforceCFD();
# }
elsif ($command eq "STOP")
{
myLog ("Stopped.");
die ("Stopped.");
}
elsif (($command eq "READ_COMMANDS")
or ($command eq ""))
{
while (<STDIN>)
{
$line = $_;
if ($line =~ m/READ_COMMANDS/)
{
myLog ("interpretCommands(): Infinite loop!");
die ("interpretCommands(): Infinite loop!");
}
if ($line =~ m/^\s*$/)
{
next;
}
$line =~ s/\s+$//s;
$line =~ s/\*\s*//;
if ($line =~ m/\[\[:?(.*?)\]\] -> \[\[:?(.*?)\]\]/)
{
$line =~ s/\[\[:?(.*?)\]\] -> \[\[:?(.*?)\]\]//;
$from = $1;
$to = $2;
$line =~ s/\s*$//;
$from =~ s/ /_/g;
$to =~ s/ /_/g;
interpretCommand($line, $from, $to);
}
else
{
while ($line =~ m/\[\[:?(.*?)\]\]/)
{
$line =~ m/\[\[:?(.*?)\]\]/;
$page = $1;
$pageCopy = $page;
$page =~ s/ /_/g;
$line =~ s/\[\[:?$pageCopy\]\]/$page/;
}
interpretCommand(split (" ", $line));
}
# unless (($line =~ m/TRANSFER_TEXT_CHECK/) or
# ($line =~ m/ENFORCE_CATEGORY_INTERWIKI/))
unless ($line =~ m/TRANSFER_TEXT_CHECK/)
{
limit();
}
}
myLog ("Execution complete.\n");
print ("Execution complete.\n");
}
else
{
myLog ("Unrecognized command '".$command."': ".join(" ", @arguments)."\n");
die ("Unrecognized command '".$command."': ".join(" ", @arguments));
}
}
sub limit
{
my ($i);
# Rate-limiting to avoid hosing the wiki server
# Min 30 sec unmarked
# Min 10 sec marked
# May be raised by retry() if load is heavy
### ATTENTION ###
# Increasing the speed of the bot to faster than 1 edit every 10
# seconds violates English Wikipedia rules as of April, 2005, and
# will cause your bot to be banned. So don't change $normalDelay
# unless you know what you are doing. Other sites may have
# similar policies, and you are advised to check before using your
# bot at the default speed.
#################
if ($::speedLimit < 10)
{
$::speedLimit = 10;
}
$i = $::speedLimit;
while ($i >= 0)
{
sleep (1);
print STDERR "Sleeping $i seconds...\r";
$i--;
}
print STDERR " \r";
}
# perl whobot.pl POST_STDIN User:Whobot/categories-alpha "Update from 13 Oct 2004 database dump"
sub postSTDIN
{
my ($text, $articleName, $comment, $editTime, $startTime, $junk, $token);
$articleName = $_[0];
$comment = $_[1];
#urlSafe($articleName);
while (<STDIN>)
{
$text .= $_;
}
if ($text =~ m/^\s*$/)
{
myLog ("postSTDIN(): Null input.\n");
die ("postSTDIN(): Null input.\n");
}
($junk, $editTime, $startTime, $token) = getPage($articleName);
if ($comment eq "")
{
$comment = "Automated post";
}
postPage ($articleName, $editTime, $startTime, $token, $text, $comment);
}
# perl whobot.pl ADD_TO_CAT Page_name Category:Category_name sortkey
sub addToCat
{
my ($text, $articleName, $category, $editTime, $startTime, $comment, $status,
@junk, $sortkey, $token);
$articleName = $_[0];
$category = $_[1];
$sortkey = $_[2];
#urlSafe($articleName);
#urlSafe($category);
($text, $editTime, $startTime, $token) = getPage($articleName);
$comment = "Add ${category} per [[WP:CFD]]";
($status, $text, @junk) = addCatToText($category, $text, $sortkey, $articleName);
if ($status ne "success")
{
return();
}
postPage ($articleName, $editTime, $startTime, $token, $text, $comment);
}
sub myLog
{
open (LOG, ">>whobot-log.txt")
|| die "Could not append to log!";
print LOG $_[0];
close (LOG);
}
sub getPage
{
my ($target, $request, $response, $reply, $text, $text2,
$editTime, $startTime, $attemptStartTime, $attemptFinishTime,
$token);
$target = $_[0];
if ($target =~ m/^\s*$/)
{
myLog("getPage: Null target.");
die("getPage: Null target.");
}
# urlSafe ($target);
# Monitor wiki server responsiveness
$attemptStartTime = Time::HiRes::time();
# Create a request-object
print "GET http://en.wikipedia.org/w/wiki.phtml?title=${target}&action=edit\n";
myLog("GET http://en.wikipedia.org/w/wiki.phtml?title=${target}&action=edit\n");
$request = HTTP::Request->new(GET => "http://en.wikipedia.org/w/wiki.phtml?title=${target}&action=edit");
$response = $::ua->request($request);
if ($response->is_success)
{
$reply = $response->content;
# Monitor wiki server responsiveness
$attemptFinishTime = Time::HiRes::time();
retry ("success", "getPage", sprintf("%.3f", $attemptFinishTime-$attemptStartTime));
# This detects whether or not we're logged in.
unless ($reply =~ m%<a href="/wiki/User_talk:Whobot">My talk</a>%)
{
# We've lost our identity.
myLog ("Wiki server is not recognizing me (1).\n---\n${reply}\n---\n");
die ("Wiki server is not recognizing me (1).\n");
}
#$reply =~ m%<textarea\s*tabindex='1'\s*accesskey=","\s*name="wpTextbox1"\s*rows='25'\s*cols='80'\s*>(.*?)</textarea>%s;
$reply =~ m%<textarea\s*tabindex='1'\s*accesskey=","\s*name="wpTextbox1"\s*id="wpTextbox1"\s*rows='25'\s*cols='80'\s*>(.*?)</textarea>%s;
$text = $1;
$reply =~ m/value="(\d+)" name="wpEdittime"/;
$editTime = $1;
# Added 22 Aug 2005 to correctly handle articles that have
# been undeleted
$reply =~ m/value="(\d+)" name="wpStarttime"/;
$startTime = $1;
# Added 9 Mar 2005 after recent software change.
$reply =~ m/value="(\w+)" name="wpEditToken"/;
$token = $1;
###
if (($text =~ m/^\s*$/)
and ($::nullOK ne "yes"))
{
myLog ("getPage($target): Null text!\n");
myLog "\n---\n$reply\n---\n";
die ("getPage($target): Null text!\n");
}
if (($editTime =~ m/^\s*$/)
and ($::nullOK ne "yes"))
{
myLog ("getPage($target): Null time!\n");
myLog "\n---\n$reply\n---\n";
die ("getPage($target): Null time!\n");
}
if (($text =~ m/>/) or
($text =~ m/</))
{
print $text;
myLog "\n---\n$text\n---\n";
myLog ("getPage($target): Bad text suck!\n");
die ("getPage($target): Bad text suck!\n");
}
# Change ( " -> " ) etc
# This function is from HTML::Entities.
decode_entities($text);
# This may or may not actually work
$::ua->cookie_jar->save();
return ($text, $editTime, $startTime, $token);
}
else
{
myLog ("getPage($target): HTTP ERR (".$response->status_line.") http://en.wikipedia.org/w/wiki.phtml?title=${target}&action=edit\n".$response->content."\n");
print ("getPage($target): HTTP ERR (".$response->status_line.") http://en.wikipedia.org/w/wiki.phtml?title=${target}&action=edit\n".$response->content."\n");
# 50X HTTP errors mean there is a problem connecting to the wiki server
if (($response->status_line =~ m/^500/)
or ($response->status_line =~ m/^502/)
or ($response->status_line =~ m/^503/))
{
return(retry("getPage", @_));
}
else
{
# Unhandled HTTP response
die ("getPage($target): HTTP ERR (".$response->status_line.") http://en.wikipedia.org/w/wiki.phtml?title=${target}&action=edit\n");
}
}
}
sub postPage
{
my ($request, $response, $pageName, $textToPost, $summaryEntry,
$editTime, $startTime, $actual, $expected, $attemptStartTime,
$attemptFinishTime, $date, $editToken, $minor);
$pageName = $_[0];
$editTime = $_[1];
$startTime = $_[2];
$editToken = $_[3];
$textToPost = $_[4];
$summaryEntry = $_[5]; # Max 200 chars!
$minor = $_[6];
$summaryEntry = substr($summaryEntry, 0, 200);
if ($pageName eq "")
{
myLog ("postPage(): Empty pageName.\n");
die ("postPage(): Empty pageName.\n");
}
if ($summaryEntry eq "")
{
$summaryEntry = "Automated editing.";
}
# Monitor server responsiveness
$attemptStartTime = Time::HiRes::time();
if ($minor eq "yes")
{
$request = POST "http://en.wikipedia.org/w/wiki.phtml?title=${pageName}&action=submit",
[wpTextbox1 => $textToPost,
wpSummary => $summaryEntry,
wpSave => "Save page",
wpMinoredit => "on",
wpEditToken => $editToken,
wpStarttime => $startTime,
wpEdittime => $editTime];
# Optional: wpWatchthis
}
else
{
$request = POST "http://en.wikipedia.org/w/wiki.phtml?title=${pageName}&action=submit",
[wpTextbox1 => $textToPost,
wpSummary => $summaryEntry,
wpSave => "Save page",
wpEditToken => $editToken,
wpStarttime => $startTime,
wpEdittime => $editTime];
# Optional: wpWatchthis, wpMinoredit
}
# ---
## If posts are failing, you can uncomment the below to see what
## HTTP request is being made.
# myLog($request->as_string());
# print $request->as_string(); $::speedLimit = 60 * 10;
# print $::ua->request($request)->as_string;
# ---
myLog("POSTing...");
print "POSTing...";
# Pass request to the user agent and get a response back
$response = $::ua->request($request);
myLog("POSTed.\n");
print "POSTed.\n";
if ($response->content =~ m/Please confirm that really want to recreate this article./)
{
myLog ($response->content."\n");
die ("Deleted article conflict! See log!");
}
# Check the outcome of the response
if (($response->is_success) or ($response->is_redirect))
{
# Monitor server responsiveness
$attemptFinishTime = Time::HiRes::time();
retry ("success", "postPage", sprintf("%.3f", $attemptFinishTime-$attemptStartTime));
$expected = "302 Moved Temporarily";
$actual = $response->status_line;
if (($expected ne $actual)
and ($actual ne "200 OK"))
{
myLog ("postPage(${pageName}, $editTime)#1 - expected =! actual\n");
myLog ($request->as_string());
myLog ("EXPECTED: '${expected}'\n");
myLog (" ACTUAL: '${actual}'\n");
die ("postPage(${pageName}, $editTime)#1 - expected =! actual - see log\n");
}
$expected = "http://en.wikipedia.org/wiki/${pageName}";
$expected =~ s/\'/%27/g;
$expected =~ s/\*/%2A/g;
$expected = urlEncode($expected);
$actual = $response->headers->header("Location");
if (($expected ne $actual)
and !(($actual eq "") and ($response->status_line eq "200 OK")))
{
myLog ("postPage(${pageName}, $editTime)#2 - expected =! actual\n");
myLog ("EXPECTED: '${expected}'\n");
myLog (" ACTUAL: '${actual}'\n");
die ("postPage(${pageName}, $editTime)#2 - expected =! actual - see log\n");
}
if ($response->content =~ m/<h1 class="firstHeading">Edit conflict/)
{
myLog ("Edit conflict on '$pageName' at '$editTime'!\n");
die ("Edit conflict on '$pageName' at '$editTime'!\n");
}
$::ua->cookie_jar->save();
return ($response->content);
}
else
{
$date = `date /t`;
$date =~ s/\n//g;
myLog ("Bad response to POST to $pageName at $date.\n".$response->status_line."\n".$response->content."\n");
# 50X HTTP errors mean there is a problem connecting to the wiki server
if (($response->status_line =~ m/^500/)
or ($response->status_line =~ m/^502/)
or ($response->status_line =~ m/^503/))
{
print "Bad response to POST to $pageName at $date.\n".$response->status_line."\n".$response->content."\n";
return(retry("postPage", @_));
}
else
{
# Unhandled HTTP response
die ("Bad response to POST to $pageName at $date.\n".$response->status_line."\n");
}
}
}
sub urlSafe
{
# This function is no longer called because the LWP::UserAgent and
# HTTP::Request libraries handle character escaping.
my ($text, $textCopy);
$text = $_[0];
$textCopy = $text;
# & may not be included in this list!
$textCopy =~ s%[\p{IsWord}\w\-,\(\):\/\'\.\;\!]*%%g;
unless ($textCopy eq "")
{
myLog ("urlSafe(): Bad character in ${text}: '${textCopy}'\n");
die ("urlSafe(): Bad character in ${text}: '${textCopy}'\n");
}
}
# perl whobot.pl MOVE_CONTENTS_INCL_CATS Category:From_here Category:To_here CFDListingDay
sub moveCategoryContents
{
my (@articles, $categoryFrom, $categoryTo, $article, $status,
@subcats, $includeCategories, $subcat, @junk, $sortkey,
$includeSortkey, $cfdlisting);
# -- INITIALIZATION --
$categoryFrom = $_[0];
$categoryTo = $_[1];
$includeCategories = $_[2];
$includeSortkey = $_[3];
$cfdlisting = $_[4];
if ($categoryFrom =~ m/^\[\[:(Category:.*?)\]\]/)
{
$categoryFrom =~ s/^\[\[:(Category:.*?)\]\]/$1/;
$categoryFrom =~ s/\s+/_/g;
}
if ($categoryTo =~ m/^\[\[:(Category:.*?)\]\]/)
{
$categoryTo =~ s/^\[\[:(Category:.*?)\]\]/$1/;
$categoryTo =~ s/\s+/_/g;
}
$categoryFrom =~ s/^\[\[://;
$categoryTo =~ s/^\[\[://;
$categoryFrom =~ s/\]\]$//;
$categoryTo =~ s/\]\]$//;
unless (($categoryFrom =~ m/^Category:/) and
($categoryTo =~ m/^Category:/))
{
myLog ("moveCategoryContents(): Are you sure these are categories? ".$categoryFrom."/".$categoryTo."\n");
die ("moveCategoryContents(): Are you sure these are categories? ".$categoryFrom."/".$categoryTo."\n");
}
transferText ($categoryFrom, $categoryTo, $cfdlisting);
# Subcategory transfer
if ($includeCategories eq "yes")
{
@subcats = getSubcategories($categoryFrom);
foreach $subcat (@subcats)
{
if ($subcat =~ m/^\s*$/)
{
next;
}
$subcat = urlDecode($subcat);
print "changeCategory($subcat, $categoryFrom, $categoryTo) c\n";
myLog "changeCategory($subcat, $categoryFrom, $categoryTo) c\n";
changeCategory($subcat, $categoryFrom, $categoryTo, $cfdlisting);
limit();
}
}
# Article transfer
@articles = getCategoryArticles($categoryFrom);
foreach $article (reverse(@articles))
# foreach $article (@articles)
{
#die "article name is $article";
if ($article =~ m/^\s*$/)
{
next;
}
$article = urlDecode($article);
print "changeCategory($article, $categoryFrom, $categoryTo) a\n";
myLog "changeCategory($article, $categoryFrom, $categoryTo) a\n";
changeCategory($article, $categoryFrom, $categoryTo, $cfdlisting);
limit();
}
}
# perl whobot.pl DEPOPULATE_CAT Category:To_be_depopulated
sub depopulateCat #($category);
{
my (@articles, $category, $article, $status, @subcats, $subcat, @junk, $doSpecial);
$category = $_[0];
$doSpecial = $_[1];
if ($category =~ m/^\[\[:(Category:.*?)\]\]/)
{
$category =~ s/^\[\[:(Category:.*?)\]\]/$1/;
$category =~ s/\s+/_/g;
}
if (!$doSpecial)
{
unless ($category =~ m/^Category:/)
{
myLog ("depopulateCat(): Are you sure '$category' is a category?\n");
die ("depopulateCat(): Are you sure '$category' is a category?\n");
}
# Remove all subcategories
@subcats = getSubcategories($category);
foreach $subcat (@subcats)
{
$subcat = urlDecode($subcat);
print "removeXFromCat($subcat, $category) c\n";
myLog "removeXFromCat($subcat, $category) c\n";
($status, @junk) = removeXFromCat($subcat, $category);
unless ($status == 0)
{
myLog ("Status: $status\n");
print "Status: $status\n";
}
}
}
# Remove all articles
@articles = getCategoryArticles($category, $doSpecial);
#foreach $article (reverse(@articles))
foreach $article (@articles)
{
$article = urlDecode($article);
print "removeXFromCat($article, $category, $doSpecial) a\n";
myLog "removeXFromCat($article, $category, $doSpecial) a\n";
($status, @junk) = removeXFromCat($article, $category, $doSpecial);
unless ($status == 0)
{
myLog ("Status: $status\n");
print "Status: $status\n";
}
}
}
# perl whobot.pl REMOVE_X_FROM_CAT Article_name Category:Where_the_article_is CFDListingDay
sub removeXFromCat
{
my ($text, $articleName, $category, $editTime, $startTime, $comment, $catTmp,
$sortkey, @junk, $token, $categoryUnd, $categoryHuman, $cfdlisting, $doSpecial);
$articleName = $_[0];
$category = $_[1];
$cfdlisting = $_[2];
$doSpecial = $_[3];
if (!$doSpecial)
{
$doSpecial = $cfdlisting;
}
#urlSafe($articleName);
#urlSafe($category);
if (!$doSpecial)
{
unless ($category =~ m/^Category:\w+/)
{
myLog ("addToCat(): Bad format on category.\n");
die ("addToCat(): Bad format on category.\n");
}
}
($text, $editTime, $startTime, $token) = getPage($articleName);
$comment = "Removed ${category} per [[Wikipedia:Categories_for_deletion/Log/${cfdlisting}|WP:CFD]]";
#$comment = "test edits ${cfdlisting}";
# Convert underscore to spaces; this is human-readable.
$category =~ s/_/ /g;
$categoryHuman = $category;
# Insert possible whitespace
$category =~ s/^Category://;
# $category = "Category:\\s*\\Q".$category."\\E"; # THIS DOES NOT WORK
$category = "Category:\\s*".$category;
$category =~ s%\(%\\(%g;
$category =~ s%\)%\\)%g;
$category =~ s%\'%\\\'%g;
$categoryUnd = $category;
$categoryUnd =~ s/ /_/g;
unless (($text =~ m/\[\[\s*${category}\s*\]\]/is)
or ($text =~ m/\[\[\s*${category}\s*\|.*?\]\]/is)
or ($text =~ m/\[\[\s*${categoryUnd}\s*\]\]/is)
or ($text =~ m/\[\[\s*${categoryUnd}\s*\|.*?\]\]/is))
{
print "removeXFromCat(): $articleName is not in '$category'.\n";
myLog ("removeXFromCat(): $articleName is not in '$category'.\n");
### TEMPORARY ###
### Uncomment these lines if you want category remove attempts
### to trigger null edits. This is useful if you have have
### changed the category on a template, but due to a bug this
### does not actually move member articles until they are
### edited.
($text, @junk) = fixCategoryInterwiki($text);
postPage ($articleName, $editTime, $startTime, $token, $text, "Mostly null edit to actually remove from ${categoryHuman}", "yes");
limit();
### TEMPORARY ###
return(1);
}
if ($text =~ m/^\s*\#REDIRECT/is)
{
print "addToCat(): $articleName is a redirect!\n";
myLog ("addToCat(): $articleName is a redirect!\n");
return(2);
}
$text =~ m/\[\[\s*${category}\s*\|\s*(.*?)\]\]/is;
$sortkey = $1;
if ($sortkey eq "")
{
$text =~ m/\[\[\s*${categoryUnd}\s*\|\s*(.*?)\]\]/is;
}
# Remove the page from the category and any trailing newline.
$text =~ s/\[\[\s*${category}\s*\|?(.*?)\]\]\n?//isg;
$text =~ s/\[\[\s*${categoryUnd}\s*\|?(.*?)\]\]\n?//isg;
($text, @junk) = fixCategoryInterwiki($text);
postPage ($articleName, $editTime, $startTime, $token, $text, $comment);
return(0, $sortkey);
}
# perl whobot.pl PRINT_WIKITEXT Article_you_want_to_get
## Warning: Saves to a file in the current directory with the same name
## as the article, plus another file with the .html extention.
sub printWikitext
{
my ($editTime, $startTime, $text, $target, $token);
$target = $_[0];
$target =~ s/^\[\[://;
$target =~ s/\]\]$//;
($text, $editTime, $startTime, $token) = getPage($target);
# Save the wikicode version to a file.
open (WIKITEXT, ">./${target}");
print WIKITEXT $text;
close (WIKITEXT);
# Save the HTML version to a file.
print `wget http://en.wikipedia.org/wiki/${target} -O ./${target}.html`;
}
# Get a list of the names of articles in a given category.
sub getCategoryArticles
{
my ($target, $request, $response, $reply, $articles, $article,
@articles, @articles1, @articles2, $attemptStartTime, $attemptFinishTime, $doSpecial);
$target = $_[0];
$doSpecial = $_[1];
#urlSafe ($target);
if (!$doSpecial)
{
unless ($target =~ m/^Category:/)
{
myLog ("getCategoryArticles(): Are you sure '$target' is a category?\n");
die ("getCategoryArticles(): Are you sure '$target' is a category?\n");
}
}
# Monitor wiki server responsiveness
$attemptStartTime = Time::HiRes::time();
# Create a request-object
if (!$doSpecial)
{
print "GET http://en.wikipedia.org/wiki/${target}\n";
myLog("GET http://en.wikipedia.org/wiki/${target}\n");
$request = HTTP::Request->new(GET => "http://en.wikipedia.org/wiki/${target}");
}
else
{
print "GET http://en.wikipedia.org/w/wiki.phtml?title=Special:Whatlinkshere&target=${target}&limit=200&offset=0\n";
myLog("GET http://en.wikipedia.org/w/wiki.phtml?title=Special:Whatlinkshere&target=${target}&limit=200&offset=0\n");
$request = HTTP::Request->new(GET => "http://en.wikipedia.org/w/wiki.phtml?title=Special:Whatlinkshere&target=${target}&limit=200&offset=0\n");
}
$response = $::ua->request($request);
if ($response->is_success)
{
# Monitor wiki server responsiveness
$attemptFinishTime = Time::HiRes::time();
retry ("success", "getCategoryArticles", sprintf("%.3f", $attemptFinishTime-$attemptStartTime));
$reply = $response->content;
# This detects whether or not we're logged in.
unless ($reply =~ m%<a href="/wiki/User_talk:Whobot">My talk</a>%)
{
# We've lost our identity.
myLog ("Wiki server is not recognizing me (2).\n---\n${reply}\n---\n");
die ("Wiki server is not recognizing me (2).\n");
}
$articles = $reply;
$articles =~ s%^.*?<h2>Articles in category.*?</h2>%%s;
$articles =~ s%<div class="printfooter">.*?$%%s;
@articles1 = $articles =~ m%<li><a href="/wiki/(.*?)" title=%sg;
@articles2 = $articles =~ m%px 0;"><a href="/wiki/(.*?)" title=%sg;
my @articles = (@articles1, @articles2);
$::ua->cookie_jar->save();
return @articles;
}
else
{
myLog ("getCategoryArticles($target): HTTP ERR (".$response->status_line.") http://en.wikipedia.org/wiki/${target}\n".$response->content."\n");
# 50X HTTP errors mean there is a problem connecting to the wiki server
if (($response->status_line =~ m/^500/)
or ($response->status_line =~ m/^502/)
or ($response->status_line =~ m/^503/))
{
print "getCategoryArticles($target): HTTP ERR (".$response->status_line.") http://en.wikipedia.org/wiki/${target}\n".$response->content."\n";
return(retry("getCategoryArticles", @_));
}
else
{
# Unhandled HTTP response
die ("getCategoryArticles($target): HTTP ERR (".$response->status_line.") http://en.wikipedia.org/wiki/${target}\n");
}
}
}
# Get a list of the names of subcategories of a given category.
sub getSubcategories
{
my ($target, $request, $response, $reply, $subcats, $subcat,
@subcats, $attemptStartTime, $attemptFinishTime);
$target = $_[0];
#urlSafe ($target);
unless ($target =~ m/^Category:/)
{
myLog ("getSubcategories(): Are you sure '$target' is a category?\n");
die ("getSubcategories(): Are you sure '$target' is a category?\n");
}
# Monitor wiki server responsiveness
$attemptStartTime = Time::HiRes::time();
# Create a request-object
print "GET http://en.wikipedia.org/wiki/${target}\n";
myLog("GET http://en.wikipedia.org/wiki/${target}\n");
$request = HTTP::Request->new(GET => "http://en.wikipedia.org/wiki/${target}");
$response = $::ua->request($request);
if ($response->is_success)
{
# Monitor wiki server responsiveness
$attemptFinishTime = Time::HiRes::time();
retry ("success", "getSubcategories", sprintf("%.3f", $attemptFinishTime-$attemptStartTime));
$reply = $response->content;
# This detects whether or not we're logged in.
unless ($reply =~ m%<a href="/wiki/User_talk:Whobot">My talk</a>%)
{
# We've lost our identity.
myLog ("Wikipedia is not recognizing me (3).\n---\n${reply}\n---\n");
die ("Wikipedia is not recognizing me (3).\n");
}
$subcats = $reply;
if ($subcats =~ m%^.*?<h2>Subcategories</h2>(.*?)<h2>Articles in category.*?</h2>.*?$%s)
{
$subcats =~ s%^.*?<h2>Subcategories</h2>(.*?)<h2>Articles in category.*?</h2>.*?$%$1%s;
}
else
{
return ();
}
@subcats = $subcats =~ m%<li><a href="/wiki/(.*?)" title=%sg;
$::ua->cookie_jar->save();
return @subcats;
}
else
{
myLog ("getSubcategories($target): HTTP ERR (".$response->status_line.") http://en.wikipedia.org/wiki/${target}\n".$response->content."\n");
# 50X HTTP errors mean there is a problem connecting to the wiki server
if (($response->status_line =~ m/^500/)
or ($response->status_line =~ m/^502/)
or ($response->status_line =~ m/^503/))
{
print "getSubcategories($target): HTTP ERR (".$response->status_line.") http://en.wikipedia.org/wiki/${target}\n".$response->content."\n";
return(retry("getCategoryArticles", @_));
}
else
{
# Unhandled HTTP response
die ("getSubcategories($target): HTTP ERR (".$response->status_line.") http://en.wikipedia.org/wiki/${target}\n");
}
}
}
# perl whobot.pl ADD_CFD_TAG Category:Category_name
sub addCFDTag
{
my ($text, $category, $editTime, $startTime, $comment, $catTmp, @junk, $token);
$category = $_[0];
#urlSafe($category);
unless ($category =~ m/^Category:\w+/)
{
myLog ("addCFDTag(): Bad format on category.\n");
die ("addCFDTag(): Bad format on category.\n");
}
$::nullOK = "yes";
($text, $editTime, $startTime, $token) = getPage($category);
$::nullOK = "no";
$comment = "Nominated for deletion or renaming";
if (($text =~ m/\{\{cfd\}\}/is) or
($text =~ m/\{\{cfm/is) or
($text =~ m/\{\{cfr/is) or
($text =~ m/\{\{cfr-speedy/is))
{
print "addCFDTag(): $category is already tagged.\n";
myLog ("addCFDTag(): $category is already tagged.\n");
return();
}
if ($text =~ m/^\s*\#REDIRECT/is)
{
print "addCFDTag(): $category is a redirect!\n";
myLog ("addCFDTag(): $category is a redirect!\n");
return();
}
# Add the CFD tag to the beginning of the page.
$text = "{{cfd}}\n".$text;
($text, @junk) = fixCategoryInterwiki($text);
postPage ($category, $editTime, $startTime, $token, $text, $comment);
}
# perl whobot.pl ADD_CFDU_TAG Category:Category_name
sub addCFDUTag
{
my ($text, $category, $editTime, $startTime, $comment, $catTmp, @junk, $token, $stuff);
$category = $_[0];
$stuff = $_[1];
urlSafe($category);
#urlSafe($stuff);
unless ($category =~ m/^Category:\w+/)
{
myLog ("addCFDUTag(): Bad format on category.\n");
die ("addCFDUTag(): Bad format on category.\n");
}
$::nullOK = "yes";
($text, $editTime, $startTime, $token) = getPage($category);
$::nullOK = "no";
$comment = "Nominated for deletion or renaming";
#$comment = "Test edit";
if (($text =~ m/\{\{cfd\}\}/is) or
($text =~ m/\{\{cfm/is) or
($text =~ m/\{\{cfr/is) or
($text =~ m/\{\{cfr-speedy/is))
{
print "addCFDUTag(): $category is already tagged.\n";
myLog ("addCFDUTag(): $category is already tagged.\n");
return();
}
if ($text =~ m/^\s*\#REDIRECT/is)
{
print "addCFDUTag(): $category is a redirect!\n";
myLog ("addCFDUTag(): $category is a redirect!\n");
return();
}
# Add the CFDU tag to the beginning of the page.
$text = "{{". $stuff. "}}\n".$text;
# $text = $stuff;
($text, @junk) = fixCategoryInterwiki($text);
postPage ($category, $editTime, $startTime, $token, $text, $comment);
}
# perl whobot.pl REMOVE_CFD_TAG Category:Category_name
sub removeCFDTag
{
my ($text, $category, $editTime, $startTime, $comment, $catTmp, @junk, $token);
$category = $_[0];
#urlSafe($category);
unless ($category =~ m/^Category:\w+/)
{
myLog ("removeCFDTag(): Bad format on category.\n");
die ("removeCFDTag(): Bad format on category.\n");
}
$::nullOK = "yes";
($text, $editTime, $startTime, $token) = getPage($category);
$::nullOK = "no";
$comment = "De-listed from [[Wikipedia:Categories for deletion]]";
unless (($text =~ m/\{\{cfd\}\}/is) or
($text =~ m/\{\{cfm/is) or
($text =~ m/\{\{cfr/is) or
($text =~ m/\{\{cfr-speedy/is))
{
print "removeCFDTag(): $category is not tagged.\n";
myLog ("removeCFDTag(): $category is not tagged.\n");
return();
}
if ($text =~ m/^\s*\#REDIRECT/is)
{
print "removeCFDTag(): $category is a redirect!\n";
myLog ("removeCFDTag(): $category is a redirect!\n");
return();
}
# Remove the CFD tag.
$text =~ s/{{cfd}}\s*//gis;
$text =~ s/\{\{cfr.*?\}\}\s*//is;
$text =~ s/\{\{cfm.*?\}\}\s*//is;
$text =~ s/\{\{cfdu.*?\}\}\s*//is;
$text =~ s/\{\{cfru.*?\}\}\s*//is;
$text =~ s/\{\{cfr-speedy.*?\}\}\s*//is;
($text, @junk) = fixCategoryInterwiki($text);
postPage ($category, $editTime, $startTime, $token, $text, $comment);
}
# perl whobot.pl REMOVE_CFDU_TAG Category:Containing subs to remove CFDU
sub removeCFDUTag #($category);
{
my (@articles, $category, $article, $status, @subcats, $subcat, @junk, $text, $editTime, $startTime, $comment, $catTmp, $token);
$category = $_[0];
if ($category =~ m/^\[\[:(Category:.*?)\]\]/)
{
$category =~ s/^\[\[:(Category:.*?)\]\]/$1/;
$category =~ s/\s+/_/g;
}
unless ($category =~ m/^Category:/)
{
myLog ("removeCFDUtag(): Are you sure '$category' is a category?\n");
die ("removeCFDUtag(): Are you sure '$category' is a category?\n");
}
# Remove all subcategories
@subcats = getSubcategories($category);
foreach $subcat (@subcats)
{
$subcat = urlDecode($subcat);
print "removeCFDTag($subcat, $category) c\n";
myLog "removeCFDTag($subcat, $category) c\n";
($status, @junk) = removeCFDTag($subcat, $category, $editTime, $startTime, $token, $text, $comment);
unless ($status == 0)
{
myLog ("Status: $status\n");
print "Status: $status\n";
}
}
}
# perl whobot.pl TRANSFER_TEXT Category:From_here Category:To_there
## Note that this code is called automatically whenever moving a
## category, so you probably don't need to call it yourself from the
## command line.
sub transferText
{
my ($source, $destination, $sourceText, $destinationText,
$sourceTime, $destinationTime, @sourceCategories,
@destinationCategories, $category, $lastCategory,
$sourceTextOrig, $destinationTextOrig, $comment, $sourceHuman,
$destinationHuman, $noMergeFlag, $sourceToken,
$destinationToken, $junk, $sourceStartTime,
$destinationStartTime, $cfdlisting, $summaryText);
$source = $_[0];
$destination = $_[1];
$cfdlisting = $_[2];
if ($cfdlisting eq "speedy")
{
$comment = "Cleanup per [[Wikipedia:Category_renaming#Speedy_renaming_procedure|CFD Speedy rename]] (moving $source to $destination)";
}
else
{
$comment = "Cleanup per [[Wikipedia:Categories_for_deletion/Log/${cfdlisting}|WP:CFD]] (moving $source to $destination)";
}
# Make human-readable versions of these variables for use in edit summaries
$sourceHuman = $source;
$sourceHuman =~ s/_/ /g;
$destinationHuman = $destination;
$destinationHuman =~ s/_/ /g;
unless (($source =~ m/^Category:/) and
($destination =~ m/^Category:/))
{
myLog ("transferText(): Are you sure these are categories? ".$source."/".$destination."\n");
die ("transferText(): Are you sure these are categories? ".$source."/".$destination."\n");
}
($sourceText, $sourceTime, $sourceStartTime, $sourceToken) = getPage($source);
# Avoid double runs!
# This text must be the same as that which is implanted below, and
# it should be an HTML comment, so that it's invisible.
if ($sourceText =~ m/<\!--WHOBOT-MOVE-CATEGORY-CONTENTS-SRC-FLAG-->/)
{
return;
}
$sourceTextOrig = $sourceText;
$sourceText =~ s/{{cfd}}//;
$sourceText =~ s/\{\{cfr.*?\}\}\s*//is;
$sourceText =~ s/\{\{cfm.*?\}\}\s*//is;
$sourceText =~ s/\{\{cfdu.*?\}\}\s*//is;
$sourceText =~ s/\{\{cfru.*?\}\}\s*//is;
$sourceText =~ s/\{\{cfr-speedy.*?\}\}\s*//is;
$sourceText =~ s/^\s+//s;
$sourceText =~ s/\s+$//s;
$::nullOK = "yes";
($destinationText, $destinationTime, $destinationStartTime, $destinationToken)
= getPage($destination);
$::nullOK = "no";
$destinationTextOrig = $destinationText;
$destinationText =~ s/{{cfd}}//;
$destinationText =~ s/\{\{cfm.*?\}\}\s*//is;
$destinationText =~ s/\{\{cfr.*?\}\}\s*//is;
$destinationText =~ s/\{\{cfdu.*?\}\}\s*//is;
$destinationText =~ s/\{\{cfru.*?\}\}\s*//is;
$destinationText =~ s/\{\{cfr-speedy.*?\}\}\s*//is;
$destinationText =~ s/^\s+//s;
$destinationText =~ s/\s+$//s;
# To help keep things straight when we're in a loop.
print STDOUT "\n----\n";
if ($cfdlisting eq "speedy")
{
$summaryText = "[[Wikipedia:Category_renaming#Speedy_renaming_procedure|CFD Speedy rename]]";
}
else
{
$summaryText = "[[Wikipedia:Categories_for_deletion/Log/${cfdlisting}|WP:CFD]]";
}
if (($sourceText eq "") and
($destinationText ne ""))
{
# The HTML comment must be the same as that above.
$sourceText = "{{cfd}}\nThis category has been moved to [[:$destinationHuman]]. Any remaining articles and subcategories will soon be moved by a bot unless otherwise noted on $summaryText.\n<!--WHOBOT-MOVE-CATEGORY-CONTENTS-SRC-FLAG-->\n";
}
elsif (($sourceText ne "") and
($destinationText eq ""))
{
$destinationText = $sourceText;
# The HTML comment must be the same as that above.
$sourceText = "{{cfd}}\nThis category has been moved to [[:$destinationHuman]]. Any remaining articles and subcategories will soon be moved by a bot unless otherwise noted on $summaryText.\n<!--WHOBOT-MOVE-CATEGORY-CONTENTS-SRC-FLAG-->\n";
}
elsif (($sourceText ne "") and
($destinationText ne ""))
{
@sourceCategories = $sourceText =~ m/\[\[\s*(Category:.*?)\s*\]\]/gs;
@destinationCategories = $destinationText =~ m/\[\[\s*(Category:.*?)\s*\]\]/gs;
$sourceText =~ s/\[\[\s*(Category:.*?)\s*\]\]\s*//gs;
$sourceText =~ s/^\s+//s;
$sourceText =~ s/\s+$//s;
$destinationText =~ s/\[\[\s*(Category:.*?)\s*\]\]\s*//gs;
$destinationText =~ s/^\s+//s;
$destinationText =~ s/\s+$//s;
$destinationText = $sourceText."\n".$destinationText;
$destinationText =~ s/^\s+//s;
$destinationText =~ s/\s+$//s;
foreach $category (sort (@sourceCategories, @destinationCategories))
{
if ($category eq $lastCategory)
{
next;
}
$destinationText .= "\n[[${category}]]";
$lastCategory = $category;
}
# The HTML comment must be the same as that above.
$sourceText = "{{cfd}}\nThis category has been moved to [[:$destinationHuman]]. Any remaining articles and subcategories will soon be moved by a bot unless otherwise noted on [[Wikipedia:Categories_for_deletion/Log/${cfdlisting}|WP:CFD]].\n<!--WHOBOT-MOVE-CATEGORY-CONTENTS-SRC-FLAG-->\n";
}
$sourceText =~ s/\n\s+\n/\n\n/sg;
$destinationText =~ s/\n\s+\n/\n\n/sg;
# You may need to futz with this, depending on the templates
# currently in use.
unless (($sourceTextOrig =~ m/\{\{cfd/)
or ($sourceTextOrig =~ m/\{\{cfr/)
or ($sourceTextOrig =~ m/\{\{cfru|/)
or ($sourceTextOrig =~ m/\{\{cfdu|/)
or ($sourceTextOrig =~ m/\{\{cfr-speedy/)
or ($sourceTextOrig =~ m/\{\{cfm/))
{
print STDOUT "FATAL ERROR: $source was not tagged {{cfd}}, {{cfm}}, {{cfr}}, {{cfdu}}, {{cfru}}, or {{cfr-speedy}}!\n";
myLog("FATAL ERROR: $source was not tagged {{cfd}}, {{cfr}}, {{cfm}}, {{cfdu}}, {{cfru}}, or {{cfr-speedy}}!\n");
die("FATAL ERROR: $source was not tagged {{cfd}}, {{cfr}}, {{cfm}}, {{cfdu}}, {{cfru}}, or {{cfr-speedy}}!\n");
}
if (($sourceText eq $sourceTextOrig) and
($destinationText eq $destinationTextOrig))
{
print STDOUT "No changes for $source and $destination.\n";
return();
}
if ($destinationTextOrig =~ m/^\s*$/)
{
print "No merging was required from $source into $destination.\n";
$noMergeFlag = "yes";
}
unless ($noMergeFlag eq "yes")
{
$destinationText .= "{{pearle-manual-cleanup}}\n";
}
# Make sure category and interwiki links conform to style
# guidelines.
($destinationText, $junk) = fixCategoryInterwiki($destinationText);
# If we did have to change things around, print the changes and post them to the wiki.
if ($sourceText ne $sourceTextOrig)
{
unless ($noMergeFlag eq "yes")
{
print STDOUT "SOURCE FROM:\n%%%${sourceTextOrig}%%%\nSOURCE TO:\n%%%${sourceText}%%%\n";
}
postPage ($source, $sourceTime, $sourceStartTime, $sourceToken, $sourceText, $comment);
}
if ($destinationText ne $destinationTextOrig)
{
unless ($noMergeFlag eq "yes")
{
print STDOUT "DESTINATION FROM:\n%%%${destinationTextOrig}%%%\nDESTINATION TO:\n%%%${destinationText}%%%\n";
}
postPage ($destination, $destinationTime, $destinationStartTime, $destinationToken, $destinationText, $comment);
}
}
# Translate from HTTP URL encoding to the native character set.
sub urlDecode
{
my ($input);
$input = $_[0];
$input =~ s/\%([a-f|A-F|0-9][a-f|A-F|0-9])/chr(hex($1))/eg;
return ($input);
}
# Translate from the native character set to HTTP URL encoding.
sub urlEncode
{
my ($char, $input, $output);
$input = $_[0];
foreach $char (split("",$input))
{
# if ($char =~ m/[a-z|A-Z|0-9|\-_\.\!\~\*\'\(\)]/)
# The below exclusions should conform to Wikipedia practice
# (possibly non-standard)
if ($char =~ m/[a-z|A-Z|0-9|\-_\.\*\/:]/)
{
$output .= $char;
}
elsif ($char eq " ")
{
$output .= "+";
}
else
{
$output .= uc(sprintf("%%%x", ord($char)));
# %HH where HH is the (Unicode?) hex code of $char
}
}
return ($output);
}
# perl whobot.pl CHANGE_CATEGORY Article_name Category:From Category:To CFDlistingDay
sub changeCategory
{
my ($articleName, $categoryFrom, $categoryTo, $editTime, $startTime, $text,
$comment, $catTmp, $sortkey, $token, $junk, $categoryFromUnd, $cfdlisting);
$articleName = $_[0];
$categoryFrom = $_[1];
$categoryTo = $_[2];
$cfdlisting = $_[3];
#urlSafe($articleName);
#urlSafe($categoryFrom);
#urlSafe($categoryTo);
unless (($categoryFrom =~ m/^Category:/) and
($categoryTo =~ m/^Category:/))
{
myLog ("moveCategoryContents(): Are you sure these are categories? ".$categoryFrom."/".$categoryTo."\n");
die ("moveCategoryContents(): Are you sure these are categories? ".$categoryFrom."/".$categoryTo."\n");
}
#die ($articleName ."does not exist");
if ($articleName =~ m/^\s*$/)
{
myLog("changeCategory(): Null target.");
die("changeCategory(): Null target.");
}
($text, $editTime, $startTime, $token) = getPage($articleName);
if ($cfdlisting eq "speedy")
{
$comment = "Recat per [[Wikipedia:Category_renaming#Speedy_renaming_procedure|CFD Speedy rename]] ${categoryFrom} to ${categoryTo}";
}
else
{
$comment = "Recat per [[Wikipedia:Categories_for_deletion/Log/${cfdlisting}|WP:CFD]] ${categoryFrom} to ${categoryTo}";
}
# --- Start the removing part ---
# Convert underscore to spaces; this is human-readable.
$categoryFrom =~ s/_/ /g;
# Insert possible whitespace
$categoryFrom =~ s/^Category://;
$categoryFrom = "Category:\\s*".$categoryFrom;
# Escape special characters
$categoryFrom =~ s%\(%\\(%g;
$categoryFrom =~ s%\)%\\)%g;
$categoryFrom =~ s%\'%\\\'%g;
$categoryFromUnd = $categoryFrom;
$categoryFromUnd =~ s/ /_/g;
unless (($text =~ m/\[\[\s*${categoryFrom}\s*\]\]/is)
or ($text =~ m/\[\[\s*${categoryFrom}\s*\|.*?\]\]/is)
or ($text =~ m/\[\[\s*${categoryFromUnd}\s*\]\]/is)
or ($text =~ m/\[\[\s*${categoryFromUnd}\s*\|.*?\]\]/is))
{
myLog ("changeCategory.r(): $articleName is not in '$categoryFrom'.\n");
my ($nullEditFlag);
# Set this to "yes" if you want mass category change attempts
# to trigger null edits automatically. You should check the
# category later to see if everything worked or not, to see if
# any templates should be changed. The below will add a small
# amount of unnecessary server load to try the null edits if
# template changes haven't already been made.
$nullEditFlag = "yes";
if ($nullEditFlag eq "yes")
{
myLog ("changeCategory(): Attempting null edit on $articleName.\n");
print "changeCategory(): Attempting null edit on $articleName.\n";
nullEdit($articleName);
return();
}
else
{
print "###${text}###\n";
die ("changeCategory.r(): $articleName is not in '$categoryFrom'.\n");
}
}
if ($text =~ m/^\s*\#REDIRECT/is)
{
myLog ("changeCategory.r(): $articleName is a redirect!\n");
die ("changeCategory.r(): $articleName is a redirect!\n");
}
# We're lazy and don't fully parse the document to properly check
# for escaped category tags, so there may be some unnecssary
# aborts from the following, but they are rare and easily
# overridden by manually editing the page in question.
if ($text =~ m/<nowiki>.*?category.*?<\/nowiki>/is)
{
myLog ("changeCategory.r(): $articleName has a dangerous nowiki tag!\n");
die ("changeCategory.r(): $articleName has a dangerous nowiki tag!\n");
}
$text =~ m/\[\[\s*${categoryFrom}\s*\|\s*(.*?)\]\]/is;
$sortkey = $1;
if ($sortkey eq "")
{
$text =~ m/\[\[\s*${categoryFromUnd}\s*\|\s*(.*?)\]\]/is;
}
# Remove the page from the category and any trailing newline.
$text =~ s/\[\[\s*${categoryFrom}\s*\|?(.*?)\]\]\n?//isg;
$text =~ s/\[\[\s*${categoryFromUnd}\s*\|?(.*?)\]\]\n?//isg;
# --- Start the adding part ---
# Remove any newlines at the end of the document.
$text =~ s/\n*$//s;
$catTmp = $categoryTo;
# _ and spaces are equivalent and may be intermingled in wikicode.
$catTmp =~ s/Category:\s*/Category:\\s*/g;
$catTmp =~ s/_/[_ ]/g;
$catTmp =~ s%\(%\\\(%g;
$catTmp =~ s%\)%\\\)%g;
$catTmp =~ s%\.%\\\.%g;
if (($text =~ m/(\[\[\s*${catTmp}\s*\|.*?\]\])/is)
or ($text =~ m/(\[\[\s*${catTmp}\s*\]\])/is))
{
myLog ("changeCategory.a(): $articleName is already in '$categoryTo'.\n");
print "\n1: '${1}'\n";
print "\ncattmp: '${catTmp}'\n";
print "changeCategory.a(): $articleName is already in '$categoryTo'.\n";
## It's generally OK to merge it in, so don't do this:
# die "changeCategory.a(): $articleName is already in '$categoryTo'.\n";
# return();
}
elsif ($text =~ m/^\s*\#REDIRECT/is)
{
print "changeCategory.a(): $articleName is a redirect!\n";
myLog ("changeCategory.a(): $articleName is a redirect!\n");
return();
}
else
{
# Convert underscore to spaces; this is human-readable.
$categoryTo =~ s/_/ /g;
# Add the category on a new line.
if ($sortkey eq "")
{
$text .= "\n[[${categoryTo}]]";
}
else
{
$text .= "\n[[${categoryTo}|${sortkey}]]";
}
}
# --- Post-processing ---
($text, $junk) = fixCategoryInterwiki($text);
postPage ($articleName, $editTime, $startTime, $token, $text, $comment, "yes");
}
# This function is not yet finished. Right now it simply compares the
# membership of a given list and a given category. Eventually, it is
# intended to be used to convert lists into categories. This is not
# yet authorized behavior.
sub listToCat
{
my ($lists, $cats, $list, $cat, $listText, @junk, @articlesInList,
@articlesInCat, %articlesInCat, $article, $implement);
$lists = $_[0];
$cats = $_[1];
$implement = $_[2];
if ($implement ne "yes")
{
print "Diffing membership of '$lists' and '$cats'\n";
}
foreach $list (split(";", $lists))
{
$list =~ s/^\[\[:?//;
$list =~ s/\]\]$//;
($listText, @junk) = getPage($list);
$listText =~ s%<nowiki>.*?%%gis;
$listText =~ s%<pre>.*?%%gis; #
@articlesInList = (@articlesInList, $listText =~ m%\[\[(.*?)\]\]%sg);
sleep 1;
}
foreach $cat (split(";", $cats))
{
$cat =~ s/^\[\[:?//;
$cat =~ s/\]\]$//;
$cat =~ s/^:Category/Category/;
@articlesInCat = (@articlesInCat, getCategoryArticles($cat));
sleep 1;
}
foreach $article (@articlesInCat)
{
$article = urlDecode ($article);
$articlesInCat{$article} = 1;
# print "In cat: $article\n";
}
foreach $article (@articlesInList)
{
$article =~ s/\s+/_/gs;
$article =~ s/\|.*$//;
if (exists $articlesInCat{$article})
{
# print "OK: $article\n";
delete $articlesInCat{$article};
}
else
{
print "Only in list(s): $article\n";
}
}
foreach $article (sort(keys(%articlesInCat)))
{
print "Only in cat(s): $article\n";
}
}
# A little paranoia never hurt anyone.
sub shellfix
{
my ($string, $stringTmp);
$string = $_[0];
$string =~ s/([\*\?\!\(\)\&\>\<])\"\'/\\$1/g;
$stringTmp = $string;
$stringTmp =~ s/[Å\p{IsWord}[:alpha:][:digit:]\*,:_.\'\"\)\(\?\-\/\&\>\<\!]//g;
if ($stringTmp ne "")
{
die ("\nUnsafe character(s) in '${string}': '$stringTmp'\n");
}
return $string;
}
# You will not be able to use this function; it requires a dataset
# processed by scripts which have not been included. (It's not
# finished, anyway.)
sub enforceCategoryRedirects
{
my ($implementActually, $line, $lineTmp, $articlesToMove,
$article, $flatResults, $entry, $contents, $catTo, $lineTmp2);
$implementActually = $_[0];
$flatResults = `cat data/reverse-category-links-sorted.txt | grep ^Category:Wikipedia_category_redirects`;
foreach $line (split("\n", $flatResults))
{
$line =~ s/^Category:Wikipedia_category_redirects <\- //;
$lineTmp = shellfix($line);
$lineTmp2 = $lineTmp;
$lineTmp2 =~ s/^Category://;
if ($line =~ m/^Category/)
{
$articlesToMove = `type data/reverse-category-links-sorted.txt | grep ^${lineTmp}`;
if ($articlesToMove eq "")
{
next;
}
print "ATM: $articlesToMove\n";
$entry = `egrep \"^\\([0-9]+,14,'$lineTmp2'\" data/entries-categoryredirect.txt `;
$entry =~ m/^\([0-9]+,14,'$lineTmp2','(.*?)',/;
$contents = $1;
$contents =~ m/\{\{categoryredirect\|(.*?)\}\}/;
$catTo = $1;
$catTo = ":Category:".$catTo;
$catTo =~ s/_/ /g;
$lineTmp = $line;
$lineTmp =~ s/^Category/:Category/i;
$lineTmp =~ s/_/ /g;
foreach $article (split("\n", $articlesToMove))
{
print "ARTICLE: $article\n";
print "LINE: $line\n";
$article =~ s/^$line <\- //;
print "* Move [[$article]] from [[$lineTmp]] to [[$catTo]]\n";
}
}
}
}
# A call to this recursive function handles any retries necessary to
# wait out network or server problems. It's a bit of a hack.
sub retry
{
my ($callType, @args, $i, $normalDelay, $firstRetry,
$secondRetry, $thirdRetry);
($callType, @args) = @_;
### ATTENTION ###
# Increasing the speed of the bot to faster than 1 edit every 10
# seconds violates English Wikipedia rules as of April, 2005, and
# will cause your bot to be banned. So don't change $normalDelay
# unless you know what you are doing. Other sites may have
# similar policies, and you are advised to check before using your
# bot at the default speed.
#################
# HTTP failures are usually an indication of high server load.
# The retry settings here are designed to give human editors
# priority use of the server, by allowing it ample recovering time
# when load is high.
# Time to wait before retry on failure, in seconds
$normalDelay = 10; # Normal interval between edits is 10 seconds
$firstRetry = 60; # First delay on fail is 1 minute
$secondRetry = 60 * 10; # Second delay on fail is 10 minutes
$thirdRetry = 60 * 60; # Third delay on fail is 1 hour
# SUCCESS CASE
# e.g. retry ("success", "getPage", "0.23");
if ($callType eq "success")
{
myLog("Response time for ".$args[0]." (sec): ".$args[1]."\n");
$::retryDelay = $normalDelay;
if ($args[0] eq "postPage")
{
# If the response time is greater than 20 seconds...
if ($args[1] > 20)
{
print "Wikipedia is very slow. Increasing minimum wait to 10 min...\n";
myLog("Wikipedia is very slow. Increasing minimum wait to 10 min...\n");
$::speedLimit = 60 * 10;
}
# If the response time is between 10 and 20 seconds...
elsif ($args[1] > 10)
{
print "Wikipedia is somewhat slow. Setting minimum wait to 60 sec...\n";
myLog("Wikipedia is somewhat slow. Setting minimum wait to 60 sec...\n");
$::speedLimit = 60;
}
# If the response time is less than 10 seconds...
else
{
if ($::speedLimit > 10)
{
print "Returning to normal minimum wait time.\n";
myLog("Returning to normal minimum wait time.\n");
$::speedLimit = 10;
}
}
}
return();
}
# e.g. retry ("getPage", "George_Washington")
# FAILURE CASES
elsif (($::retryDelay == $normalDelay)
or ($::retryDelay == 0))
{
print "First retry for ".$args[0]."\n";
myLog("First retry for ".$args[0]."\n");
$::retryDelay = $firstRetry;
$::speedLimit = 60 * 10;
}
elsif ($::retryDelay == $firstRetry)
{
print "Second retry for ".$args[0]."\n";
myLog("Second retry for ".$args[0]."\n");
$::retryDelay = $secondRetry;
$::speedLimit = 60 * 10;
}
elsif ($::retryDelay == $secondRetry)
{
print "Third retry for ".$args[0]."\n";
myLog("Third retry for ".$args[0]."\n");
$::retryDelay = $thirdRetry;
$::speedLimit = 60 * 10;
}
elsif ($::retryDelay == $thirdRetry)
{
print "Nth retry for ".$args[0]."\n";
myLog("Nth retry for ".$args[0]."\n");
$::retryDelay = $thirdRetry;
$::speedLimit = 60 * 10;
}
else
{
die ("retry(): Internal error - unknown delay factor '".$::retryDelay."'\n");
}
# DEFAULT TO FAILURE CASE HANDLING
$i = $::retryDelay;
while ($i >= 0)
{
sleep (1);
print STDERR "Waiting $i seconds for retry...\r";
$i--;
}
print " \r";
# DO THE ACTUAL RETRY
if ($callType eq "getPage")
{
return(getPage(@args));
}
elsif ($callType eq "postPage")
{
return(postPage(@args));
}
elsif ($callType eq "getCategoryArticles")
{
return(getCategoryArticles(@args));
}
elsif ($callType eq "getSubcategories")
{
return(getSubcategories(@args));
}
elsif ($callType eq "getURL")
{
return(getURL(@args));
}
else
{
myLog ("retry(): Unknown callType: $callType\n");
die ("retry(): Unknown callType: $callType\n");
}
}
# perl pearle ENFORCE_CFD
## This just compares the contents of Category:Categories_for_deletion
## with WP:CFD and /resolved and /unresolved. It is broken now due to
## recent changes which list all nominations on subpages. It also
## does not check above the first 200 members of the category, due to
## recent changes which paginates in 200-page blocks.
sub enforceCFD
{
my (@subcats, $subcat, $cfd, $editTime, $startTime, $token, $cfdU, $cfdR);
@subcats = getSubcategories("Category:Categories_for_deletion");
($cfd, $editTime, $startTime, $token) = getPage("Wikipedia:Categories_for_deletion");
($cfdU, $editTime, $startTime, $token) = getPage("Wikipedia:Categories_for_deletion/unresolved");
($cfdR, $editTime, $startTime, $token) = getPage("Wikipedia:Categories_for_deletion/resolved");
$cfd =~ s/[\r\n_]/ /g;
$cfd =~ s/\s+/ /g;
$cfdU =~ s/[\r\n_]/ /g;
$cfdU =~ s/\s+/ /g;
$cfdR =~ s/[\r\n_]/ /g;
$cfdR =~ s/\s+/ /g;
foreach $subcat (@subcats)
{
$subcat =~ s/[\r\n_]/ /g;
$subcat =~ s/\s+/ /g;
$subcat = urlDecode ($subcat);
unless ($cfd =~ m/$subcat/)
{
print "$subcat is not in WP:CFD";
if ($cfdR =~ m/$subcat/)
{
print " (listed on /resolved)";
}
if ($cfdU =~ m/$subcat/)
{
print " (listed on /unresolved)";
}
print "\n";
}
}
}
# An internal function that handles the complexity of adding a
# category tag to the wikicode of a page.
sub addCatToText
{
my ($category, $text, $catTmp, $sortkey, $articleName, $junk);
$category = $_[0];
$text = $_[1];
$sortkey = $_[2];
$articleName = $_[3];
unless ($category =~ m/^Category:\w+/)
{
myLog ("addCatToText(): Bad format on category.\n");
die ("addCatToText(): Bad format on category.\n");
}
$catTmp = $category;
# _ and spaces are equivalent and may be intermingled.
$catTmp =~ s/Category:\s*/Category:\\s*/g;
$catTmp =~ s/_/[_ ]/g;
$catTmp =~ s%\(%\\\(%g;
$catTmp =~ s%\)%\\\)%g;
$catTmp =~ s%\.%\\\.%g;
if (($text =~ m/(\[\[\s*${catTmp}\s*\|.*?\]\])/is)
or ($text =~ m/(\[\[\s*${catTmp}\s*\]\])/is))
{
print "addCatToText(): $articleName is already in '$category'.\n";
myLog ("addCatToText(): $articleName is already in '$category'.\n");
print "\n1: '${1}'\n";
print "\ncattmp: '${catTmp}'\n";
return("fail", $text);
}
if ($text =~ m/^\s*\#REDIRECT/is)
{
print "addCatToText(): $articleName is a redirect!\n";
myLog ("addCatToText(): $articleName is a redirect!\n");
return("fail", $text);
}
# Convert underscore to spaces; this is human-readable.
$category =~ s/_/ /g;
# Add the category
$text .= "\n[[$category]]";
# Move the category to the right place
($text, $junk) = fixCategoryInterwiki($text);
return ("success", $text);
}
### THIS ROUTINE IS CURRENTLY UNUSED ###
# It will probably not be useful to you, anyway, since it requires
# pre-processed database dumps which are not included in Whobot.
sub getPageOffline
{
my ($target, $result, $targetTmp);
$target = $_[0];
# Must run the following before using this function, from 200YMMDD/data:
# cat entries.txt | perl ../../scripts/rewrite-entries.pl > entries-simple.txt
# Even after this pre-processing, this routine is incredibly slow.
# Set up and use MySQL instead if you care about speed.
$target =~ s/\s/_/g;
# Double escape the tab, once for Perl, once for the shell
# -P means "treat as Perl regexp" (yay!)
# $result = `grep -P '^${target}\\t' /home/beland/wikipedia/20050107/data/entries-simple.txt`;
$targetTmp = shellfix($target);
$result = `grep -P '^${targetTmp}\\t' /home/beland/wikipedia/20050107/data/matches2.txt`;
$result =~ s/^${target}\t//;
$result =~ s/\\n/\n/g;
return ($result, "junk");
}
# --- CATEGORY AND INTERWIKI STYLE CLEANUP ROUTINES ---
# perl whobot.pl INTERWIKI_LOOP
#
## This command is for remedial cleanup only, and so is probably not
## useful anymore. This loop takes input of the form:
## "ArticleName\tBodyText\n{repeat...}" on STDIN.
#
sub interwikiLoop
{
my ($article, $text, @junk, $enforceCategoryInterwikiCalls);
while (<STDIN>)
{
if ($_ =~ m/^\s*$/)
{
next;
}
($article, $text, @junk) = split ("\t", $_);
$text =~ s/\\n/\n/g;
enforceCategoryInterwiki($article, $text);
$enforceCategoryInterwikiCalls++;
print STDOUT "\r interwikiLoop iteration ".$enforceCategoryInterwikiCalls;
}
}
# perl whobot.pl ENFORCE_CATEGORY_INTERWIKI Article_name
#
## This function is for both external use. From the command line, use
## it to tidy up a live page's category and interwiki tags, specifying
## only the name of the page. It can also be used by interwikiLoop(),
## which supplies the full text on its own. It will post any changes
## to the live wiki that involve anything more than whitespace
## changes.
##
## This function also does {{msg:foo}} -> {{foo}} conversion, so that
## the article parsing algorithm can be recycled.
#
sub enforceCategoryInterwiki
{
my ($articleName, $text, $editTime, $startTime, $textOrig, @newLines, $line,
$textCopy, $textOrigCopy, $message, @junk, $diff, $token,
$online);
$articleName = $_[0];
myLog("enforceCategoryInterwiki($articleName)\n");
$text = $_[1];
$online = 0;
if ($text eq "")
{
$online = 1;
($text, $editTime, $startTime, $token) = getPage($articleName);
}
$textOrig = $text;
($text, $message) = fixCategoryInterwiki($text);
if (substantiallyDifferent($text, $textOrig))
{
@newLines = split ("\n", $text);
$textCopy = $text;
$textOrigCopy = $textOrig;
open (ONE, ">/tmp/article1.$$");
print ONE $textOrig;
close (ONE);
open (TWO, ">/tmp/article2.$$");
print TWO $text;
close (TWO);
$diff = `diff /tmp/article1.$$ /tmp/article2.$$`;
unlink("/tmp/article1.$$");
unlink("/tmp/article2.$$");
myLog("*** $articleName - $message\n");
myLog("*** DIFF FOR $articleName\n");
myLog($diff);
if ($online == 0)
{
# Isolate changed files for later runs
open (FIXME, ">>./fixme.interwiki.txt.$$");
$text =~ s/\t/\\t/g;
$text =~ s/\n/\\n/g;
print FIXME $articleName."\t".$text."\n";
close (FIXME);
}
myLog($articleName." changed by fixCategoryInterwiki(): $message\n");
print STDOUT $articleName." changed by fixCategoryInterwiki(): $message\n";
if ($online == 1)
{
postPage ($articleName, $editTime, $startTime, $token, $text, $message, "yes");
}
}
else
{
print STDOUT "--- No change for ${articleName}.\n";
myLog ("--- No change for ${articleName}.\n");
### TEMPORARY ###
### Uncomment this line if you want category changes to
### trigger null edits. This is useful if you have have
### changed the category on a template, but due to a bug this
### does not actually move member articles until they are
### edited.
postPage ($articleName, $editTime, $startTime, $token, $textOrig, "null edit", "yes");
### TEMPORARY ###
}
}
sub substantiallyDifferent
{
my($a, $b);
$a = $_[0];
$b = $_[1];
$a =~ s/\s//g;
$b =~ s/\s//g;
return ($a ne $b);
}
# Given some wikicode as input, this function will tidy up the
# category and interwiki links and return the result and a comment
# suitable for edit summaries.
sub fixCategoryInterwiki
{
my ($input, @segmentNames, @segmentContents, $langlist, $i,
$message, $output, $flagForReview, $interwikiBlock,
$categoryBlock, $flagError, $bodyBlock, $contents, $name,
@interwikiNames, @interwikiContents, @categoryNames,
@categoryContents, @bodyNames, @bodyContents, $bodyFlag,
@bottomNames, @bottomContents, @segmentNamesNew,
@segmentContentsNew, $lastContents, @stubContents,
@stubNames, $stubBlock, $msgFlag);
$input = $_[0];
# The algorithm here is complex. The general idea is to split the
# page in to segments, each of which is assigned a type, and then
# to rearrange, consolidate, and frob the segments as needed.
# Start with one segment that includes the whole page.
@::segmentNames = ("bodyText");
@::segmentContents = ($input);
# Recognize and tag certain types of segments. The order of
# processing is very important.
metaTagInterwiki("nowiki", "^(.*?)(\s*.*?\s*)");
metaTagInterwiki("comment", "^(.*?)(<!.*?>\\n?)");
metaTagInterwiki("html", "^(.*?)(<.*?>\\n?)");
metaTagInterwiki("category", "^(.*?)(\\[\\[\\s*Category\\s*:\\s*.*?\\]\\]\\n?)");
$langlist = `type langlist`;
$langlist =~ s/^\s*//s;
$langlist =~ s/\s*$//s;
$langlist =~ s/\n/\|/gs;
$langlist .= "|minnan|zh\-cn|zh\-tw|nb";
metaTagInterwiki("interwiki", "^(.*?)(\\[\\[\\s*($langlist)\\s*:\\s*.*?\\]\\]\\n?)");
metaTagInterwiki("tag", "^(.*?)(\{\{.*?\}\})");
# Allow category and interwiki segments to be followed by HTML
# comments only (plus any intervening whitespace).
$i = 0;
while ($i < @::segmentNames)
{
$name = $::segmentNames[$i];
$contents = $::segmentContents[$i];
# {{msg:foo}} -> {{foo}} conversion
if (($name eq "tag") and
($contents =~ m/^{{msg:(.*?)}}/))
{
$msgFlag = 1;
$contents =~ s/^{{msg:(.*?)}}/{{$1}}/;
}
if (($name eq "category") or ($name eq "interwiki"))
{
if (!($contents =~ m/\n/) and ($::segmentNames[$i+1] eq "comment"))
{
push (@segmentNamesNew, $name);
push (@segmentContentsNew, $contents.$::segmentContents[$i+1]);
$i += 2;
# DEBUG print "AAA - ".$contents.$::segmentContents[$i+1]);
next;
}
if (!($contents =~ m/\n/)
and ($::segmentNames[$i+1] eq "bodyText")
and ($::segmentContents[$i+1] =~ m/^\s*$/)
and !($::segmentContents[$i+1] =~ m/^\n$/)
and ($::segmentNames[$i+2] eq "comment")
)
{
push (@segmentNamesNew, $name);
push (@segmentContentsNew,
$contents.$::segmentContents[$i+1].$::segmentContents[$i+2]);
$i += 3;
# DEBUG print "BBB".$contents.$::segmentContents[$i+1].$::segmentContents[$i+2]);
next;
}
# Consolidate with any following whitespace
if (($::segmentNames[$i+1] eq "bodyText")
and ($::segmentContents[$i+1] =~ m/^\s*$/)
)
{
push (@segmentNamesNew, $name);
push (@segmentContentsNew,
$contents.$::segmentContents[$i+1]);
$i += 2;
next;
}
}
push (@segmentNamesNew, $name);
push (@segmentContentsNew, $contents);
$i++;
}
# Clean up results
@::segmentNames = @segmentNamesNew;
@::segmentContents = @segmentContentsNew;
@segmentContentsNew = ();
@segmentNamesNew = ();
# Move category and interwiki tags that precede the body text (at
# the top of the page) to the bottom of the page.
$bodyFlag = 0;
foreach $i (0 ... @::segmentNames-1)
{
$name = $::segmentNames[$i];
$contents = $::segmentContents[$i];
if ($bodyFlag == 1)
{
push (@segmentNamesNew, $name);
push (@segmentContentsNew, $contents);
}
elsif (($name eq "category") or ($name eq "interwiki"))
{
push (@bottomNames, $name);
push (@bottomContents, $contents);
}
else
{
push (@segmentNamesNew, $name);
push (@segmentContentsNew, $contents);
$bodyFlag = 1;
}
}
# Clean up results
@::segmentNames = (@segmentNamesNew, @bottomNames);
@::segmentContents = (@segmentContentsNew, @bottomContents);
@segmentContentsNew = ();
@segmentNamesNew = ();
@bottomNames = ();
@bottomContents = ();
# Starting at the bottom of the page, isolate category, interwiki,
# and body text. If categories or interwiki links are mixed with
# body text, flag for human review.
### DEBUG ###
# foreach $i (0 ... @::segmentNames-1)
# {
# print "---$i ".$::segmentNames[$i]."---\n";
# print "%%%".$::segmentContents[$i]."%%%\n";
# }
### DEBUG ###
### DEBUG ###
#my ($first);
#$first = 1;
### DEBUG ###
$bodyFlag = 0;
$flagForReview = 0;
foreach $i (reverse(0 ... @::segmentNames-1))
{
$name = $::segmentNames[$i];
$contents = $::segmentContents[$i];
if (($name eq "category") and ($bodyFlag == 0))
{
# Push in reverse
@categoryNames = ($name, @categoryNames);
@categoryContents = ($contents, @categoryContents);
next;
}
elsif (($name eq "interwiki") and ($bodyFlag == 0))
{
# Push in reverse
@interwikiNames = ($name, @interwikiNames);
@interwikiContents = ($contents, @interwikiContents);
next;
}
elsif (($bodyFlag == 0)
and ($name eq "tag")
and (($contents =~ m/\{\{[ \w\-]+[\- ]?stub\}\}/) or
($contents =~ m/\{\{[ \w\-]+[\- ]?stub\|.*?\}\}/)))
{
### IF THIS IS A STUB TAG AND WE ARE STILL $bodyFlag == 0,
### THEN ADD THIS TO $stubBlock!
# Canonicalize by making {{msg:Foo}} into {{Foo}}
s/^\{\{\s*msg:(.*?)\}\}/\{\{$1\}\}/i;
# Push in reverse
@stubNames = ($name, @stubNames);
@stubContents = ($contents, @stubContents);
next;
}
elsif (($name eq "category") or ($name eq "interwiki"))
# bodyFlag implicitly == 1
{
if ($flagForReview == 0)
{
$flagForReview = 1;
$lastContents =~ s/^\s*//s;
$lastContents =~ s/\s*$//s;
$flagError = substr ($lastContents, 0, 30);
}
# Drop down to push onto main body stack.
}
# Handle this below instead.
## Skip whitespace
#if (($contents =~ m/^\s*$/s) and ($bodyFlag == 0))
#{
# next;
#}
# Delete these comments
if (($bodyFlag == 0) and ($name == "comment"))
{
if (
($contents =~ m/<!--\s*interwiki links\s*-->/i) or
($contents =~ m/<!--\s*interwiki\s*-->/i) or
($contents =~ m/<!--\s*interlanguage links\s*-->/i) or
($contents =~ m/<!--\s*categories\s*-->/i) or
($contents =~ m/<!--\s*other languages\s*-->/i) or
($contents =~ m/<!--\s*The below are interlanguage links.\s*-->/i)
)
{
### DEBUG ###
#print STDOUT ("YELP!\n");
#
#foreach $i (0 ... @bodyNames-1)
#{
# print "---$i ".$bodyNames[$i]."---\n";
# print "%%%".$bodyContents[$i]."%%%\n";
#}
#
#print STDOUT ("END-YELP!");
### DEBUG ###
next;
}
}
# Push onto main body stack (in reverse).
@bodyNames = ($name, @bodyNames);
@bodyContents = ($contents, @bodyContents);
### DEBUG ###
#if (($flagForReview == 1) and ($first == 1))
#{
# $first = 0;
# print "\@\@\@${lastContents}\#\#\#\n";
#}
### DEBUG ###
# This should let tags mixed in with the category and
# interwiki links (not comingled with body text) bubble up.
unless (($contents =~ m/^\s*$/s) or ($name eq "tag"))
{
$bodyFlag = 1;
}
$lastContents = $contents;
}
### DEBUG ###
# foreach $i (0 ... @bodyNames-1)
# {
# print "---$i ".$bodyNames[$i]."---\n";
# print "%%%".$bodyContents[$i]."%%%\n";
# }
# foreach $i (0 ... @categoryNames-1)
# {
# print "---$i ".$categoryNames[$i]."---\n";
# print "^^^".$categoryContents[$i]."^^^\n";
# }
# foreach $i (0 ... @interwikiNames-1)
# {
# print "---$i ".$interwikiNames[$i]."---\n";
# print "&&&".$interwikiContents[$i]."&&&\n";
# }
### DEBUG ###
# Assemble body text, category, interwiki, and stub arrays into strings
foreach $i (0 ... @bodyNames-1)
{
$name = $bodyNames[$i];
$contents = $bodyContents[$i];
$bodyBlock .= $contents;
}
foreach $i (0 ... @categoryNames-1)
{
$name = $categoryNames[$i];
$contents = $categoryContents[$i];
# Enforce style conventions
$contents =~ s/\[\[category\s*:\s*/\[\[Category:/i;
# Enforce a single newline at the end of each category line.
$contents =~ s/\s*$//;
$categoryBlock .= $contents."\n";
}
foreach $i (0 ... @interwikiNames-1)
{
$name = $interwikiNames[$i];
$contents = $interwikiContents[$i];
# Canonicalize minnan to zh-min-nan, since that's what's in
# the officially distributed langlist.
$contents =~ s/^\[\[minnan:/\[\[zh-min-nan:/;
# Canonicalize zh-ch, Chinese (simplified) and zn-tw, Chinese
# (traditional) to "zh"; the distinction is being managed
# implicitly by software now, not explicitly in wikicode.
$contents =~ s/^\[\[zh-cn:/\[\[zh:/g;
$contents =~ s/^\[\[zh-tw:/\[\[zh:/g;
# Canonicalize nb to no
$contents =~ s/^\[\[nb:/\[\[no:/g;
# Canonicalize dk to da
$contents =~ s/^\[\[dk:/\[\[da:/g;
# Enforce a single newline at the end of each interwiki line.
$contents =~ s/\s*$//;
$interwikiBlock .= $contents."\n";
}
foreach $i (0 ... @stubNames-1)
{
$name = $stubNames[$i];
$contents = $stubContents[$i];
# Enforce a single newline at the end of each stub line.
$contents =~ s/\s*$//;
$contents =~ s/^\s*//;
$stubBlock .= $contents."\n";
}
# Minimize interblock whitespace
$bodyBlock =~ s/^\s*//s;
$bodyBlock =~ s/\s*$//s;
$categoryBlock =~ s/^\s*//s;
$categoryBlock =~ s/\s*$//s;
$interwikiBlock =~ s/^\s*//s;
$interwikiBlock =~ s/\s*$//s;
$stubBlock =~ s/^\s*//s;
$stubBlock =~ s/\s*$//s;
# Assemble the three blocks into a single string, flagging for
# human review if necessary.
$output = "";
if ($bodyBlock ne "")
{
$output .= $bodyBlock."\n\n";
}
if (($flagForReview == 1)
and !($input =~ m/\{\{interwiki-category-check/)
and !($input =~ m/\{\{split/)
and !($input =~ m/\[\[Category:Pages for deletion\]\]/))
{
$output .= "{{interwiki-category-check|${flagError}}}\n\n";
}
if ($categoryBlock ne "")
{
$output .= $categoryBlock."\n";
}
if ($interwikiBlock ne "")
{
# $output .= "<!-- The below are interlanguage links. -->\n".$interwikiBlock."\n";
$output .= $interwikiBlock."\n";
}
if ($stubBlock ne "")
{
$output .= $stubBlock."\n";
}
if ($input ne $output)
{
$message = "Minor category, interwiki, or template style cleanup";
if ($flagForReview == 1)
{
$message = "Flagged for manual review of category/interwiki style";
}
if ($msgFlag == 1)
{
$message .= "; {{msg:foo}} -> {{foo}} conversion for MediaWiki 1.5+ compatibility";
}
}
else
{
$message = "No change";
}
return($output, $message);
}
#sub displayInterwiki
#{
# my ($i);
# ## THIS FUNCTION CANNOT BE CALLED DUE TO SCOPING; YOU MUST MANUALLY
# ## COPY THIS TEXT INTO fixCategoryInterwiki(). IT IS ONLY USEFUL
# ## FOR DIAGNOSTIC PURPOSES.
#
# foreach $i (0 ... @::segmentNames-1)
# {
# print "---$i ".$::segmentNames[$i]."---\n";
# print "%%%".$::segmentContents[$i]."%%%\n";
# }
#}
# A subroutine of fixCategoryInterwiki(), this function isolates
# certain parts of existing segments based on a regular expression
# pattern, and tags them with the supplied name (which indicates their
# type). Sorry for the global variables.
sub metaTagInterwiki
{
my ($tag, $pattern, $i, $meta, $body, @segmentNamesNew,
@segmentContentsNew, $name, $contents, $bodyText, );
$tag = $_[0];
$pattern = $_[1];
foreach $i (0 ... @::segmentNames-1)
{
$name = $::segmentNames[$i];
$contents = $::segmentContents[$i];
unless ($name eq "bodyText")
{
push (@segmentNamesNew, $name);
push (@segmentContentsNew, $contents);
next;
}
while (1)
{
if ($contents =~ m%$pattern%is)
{
$bodyText = $1;
$meta = $2;
if ($bodyText ne "")
{
push (@segmentNamesNew, "bodyText");
push (@segmentContentsNew, $bodyText);
}
push (@segmentNamesNew, $tag);
push (@segmentContentsNew, $meta);
$contents =~ s/\Q${bodyText}${meta}\E//s;
}
else
{
if ($contents ne "")
{
push (@segmentNamesNew, $name);
push (@segmentContentsNew, $contents);
}
last;
}
}
}
@::segmentNames = @segmentNamesNew;
@::segmentContents = @segmentContentsNew;
@segmentContentsNew = ();
@segmentNamesNew = ();
}
sub nullEdit
{
my ($text, $articleName, $comment, $editTime, $startTime, $token);
$articleName = $_[0];
print "nullEdit($articleName)\n";
myLog ("nullEdit($articleName)\n");
($text, $editTime, $startTime, $token) = getPage($articleName);
postPage ($articleName, $editTime, $startTime, $token, $text, "null edit");
}
sub cleanupDate
{
my ($article, @articles);
# Get all articles from Category:Wikipedia cleanup
@articles = getCategoryArticles ("Category:Wikipedia cleanup");
# @articles = reverse (sort(@articles));
@articles = (sort(@articles));
foreach $article (@articles)
{
if (($article =~ m/^Wikipedia:/)
or ($article =~ m/^Template:/)
or ($article =~ m/^User:/)
or ($article =~ m/talk:/i)
)
{
next;
}
cleanupDateArticle($article);
limit();
}
}
sub cleanupDateArticle #($target)
{
my (@result, $link, $currentMonth, $currentYear, $junk, $line,
$month, $year, $found, $lineCounter, $target);
$target = $_[0];
print "cleanupDateArticle($target)\n";
@result = parseHistory($target);
($currentMonth, $currentYear, $junk) = split(" ", $result[0]);
$found = "";
foreach $line (@result)
{
$lineCounter++;
($month, $year, $link) = split(" ", $line);
if (($month eq $currentMonth)
and ($year eq $currentYear))
{
# print "$month $year - SKIP\n";
next;
}
# Skip this, because it produces false positives on articles that were
# protected at the end of last month, but no longer are. The correct
# thing to do is to check if an article is CURRENTLY protected by
# fetching the current version, but this seems like a waste of network
# resources.
# if (checkForTag("protected", $link) eq "yes")
# {
# print "$target is {{protected}}; skipping\n";
# myLog("$target is {{protected}}; skipping\n");
# return();
# }
if (checkForTag("sectionclean", $link) eq "yes")
{
print "$target has {{sectionclean}}\n";
myLog("$target has {{sectionclean}}\n");
nullEdit($target);
return();
}
if (checkForTag("Sect-Cleanup", $link) eq "yes")
{
print "$target has {{Sect-Cleanup}}\n";
myLog("$target has {{Sect-Cleanup}}\n");
nullEdit($target);
return();
}
if (checkForTag("section cleanup", $link) eq "yes")
{
print "$target has {{section cleanup}}\n";
myLog("$target has {{section cleanup}}\n");
nullEdit($target);
return();
}
if (checkForTag("sectcleanup", $link) eq "yes")
{
print "$target has {{sectcleanup}}\n";
myLog("$target has {{sectcleanup}}\n");
nullEdit($target);
return();
}
if (checkForTag("cleanup-section", $link) eq "yes")
{
print "$target has {{cleanup-section}}\n";
myLog("$target has {{cleanup-section}}\n");
nullEdit($target);
return();
}
if (checkForTag("cleanup-list", $link) eq "yes")
{
print "$target has {{cleanup-list}}\n";
myLog("$target has {{cleanup-list}}\n");
nullEdit($target);
return();
}
if (checkForTag("cleanup-nonsense", $link) eq "yes")
{
print "$target has {{cleanup-nonsense}}\n";
myLog("$target has {{cleanup-nonsense}}\n");
nullEdit($target);
return();
}
if ((checkForTag("cleanup", $link) eq "yes") or
(checkForTag("clean", $link) eq "yes") or
(checkForTag("CU", $link) eq "yes") or
(checkForTag("cu", $link) eq "yes") or
(checkForTag("cleanup-quality", $link) eq "yes") or
(checkForTag("tidy", $link) eq "yes"))
{
$currentMonth = $month;
$currentYear = $year;
# print "$month $year - YES\n";
next;
}
else
{
# print "$month $year - NO\n";
# print "Tag added $currentMonth $currentYear\n";
$found = "Tag added $currentMonth $currentYear\n";
last;
}
}
if ($found eq "")
{
# print "HISTORY EXHAUSTED\n";
if ($lineCounter < 498)
{
$found = "Tag added $currentMonth $currentYear\n";
}
else
{
# print "Unable to determine when tag was added to $target.\n";
myLog("Unable to determine when tag was added to $target.\n");
die("Unable to determine when tag was added to $target.\n");
}
}
if ($found ne "")
{
changeTag("cleanup", "cleanup-date\|${currentMonth} ${currentYear}", $target)
|| changeTag("tidy", "cleanup-date\|${currentMonth} ${currentYear}", $target)
|| changeTag("CU", "cleanup-date\|${currentMonth} ${currentYear}", $target)
|| changeTag("cu", "cleanup-date\|${currentMonth} ${currentYear}", $target)
|| changeTag("cleanup-quality", "cleanup-date\|${currentMonth} ${currentYear}", $target)
|| changeTag("clean", "cleanup-date\|${currentMonth} ${currentYear}", $target)
|| nullEdit($target);
}
}
sub changeTag
{
my ($tagFrom, $tagFromUpper, $tagTo, $tagToUpper, $articleName,
$editTime, $startTime, $text, $token, $comment, $junk);
$tagFrom = $_[0]; # "cleanup"
$tagTo = $_[1]; # "cleanup-date|August 2005"
$articleName = $_[2]; # Article name
print "changeTag (${tagFrom}, ${tagTo}, ${articleName})\n";
myLog("changeTag (${tagFrom}, ${tagTo}, ${articleName})\n");
$tagFromUpper = ucfirst($tagFrom);
$tagToUpper = ucfirst($tagTo);
if ($articleName =~ m/^\s*$/)
{
myLog("changeTag(): Null target.");
die("changeTag(): Null target.");
}
($text, $editTime, $startTime, $token) = getPage($articleName);
unless (($text =~ m/\{\{\s*\Q$tagFrom\E\s*\}\}/)
or ($text =~ m/\{\{\s*\Q$tagFromUpper\E\s*\}\}/)
or ($text =~ m/\{\{\s*\Qmsg:$tagFrom\E\s*\}\}/)
or ($text =~ m/\{\{\s*\Qmsg:$tagFromUpper\E\s*\}\}/)
or ($text =~ m/\{\{\s*\QTemplate:$tagFrom\E\s*\}\}/)
or ($text =~ m/\{\{\s*\QTemplate:$tagFromUpper\E\s*\}\}/)
or ($text =~ m/\{\{\s*\Q$tagFrom\E\|.*?\s*\}\}/)
or ($text =~ m/\{\{\s*\Q$tagFromUpper\E\|.*?\s*\}\}/)
)
{
myLog("changeTag(): {{$tagFrom}} is not in $articleName.\n");
print "changeTag(): {{$tagFrom}} is not in $articleName.\n";
# die("changeTag(): {{$tagFrom}} is not in $articleName.\n");
### TEMPORARY ###
# Just skip articles with {{tidy}}, {{clean}} {{sectionclean}}, {{advert}}, etc.
sleep(1); # READ THROTTLE!
return(0);
}
if (($text =~ m/\{\{\s*\Q$tagTo\E\s*\}\}/)
or ($text =~ m/\{\{\s*\Q$tagToUpper\E\s*\}\}/))
{
myLog("changeTag(): $articleName already contains {{$tagTo}}.");
die("changeTag(): $articleName already contains {{$tagTo}}.");
}
if ($text =~ m/^\s*\#REDIRECT/is)
{
myLog ("changeTag.a(): $articleName is a redirect!\n");
die ("changeTag.a(): $articleName is a redirect!\n");
sleep(1); # READ THROTTLE!
return(0);
}
# Escape special characters
$tagFrom =~ s%\(%\\(%g;
$tagFrom =~ s%\)%\\)%g;
$tagFrom =~ s%\'%\\\'%g;
# We're lazy and don't fully parse the document to properly check
# for escaped tags, so there may be some unnecssary aborts from
# the following, but they are rare and easily overridden by
# manually editing the page in question.
if (($text =~ m/<nowiki>.*?\Q$tagFrom\E.*?<\/nowiki>/is) or
($text =~ m/<pre>.*?\Q$tagFrom\E.*?<\/pre>/is))
# <pre>
{
myLog ("changeTag.r(): $articleName has a dangerous nowiki or pre tag!\n");
die ("changeTag.r(): $articleName has a dangerous nowiki or pre tag!\n");
}
# Make the swap!
$text =~ s/\{\{\s*\Q$tagFrom\E\s*\}\}/{{${tagTo}}}/g;
$text =~ s/\{\{\s*\Q$tagFromUpper\E\s*\}\}/{{${tagTo}}}/g;
$text =~ s/\{\{\s*\Qmsg:$tagFrom\E\s*\}\}/{{${tagTo}}}/g;
$text =~ s/\{\{\s*\Qmsg:$tagFromUpper\E\s*\}\}/{{${tagTo}}}/g;
$text =~ s/\{\{\s*\QTemplate:$tagFrom\E\s*\}\}/{{${tagTo}}}/g;
$text =~ s/\{\{\s*\QTemplate:$tagFromUpper\E\s*\}\}/{{${tagTo}}}/g;
$text =~ s/\{\{\s*\Q$tagFrom\E\|(.*?)\s*\}\}/{{${tagTo}}}/g;
$text =~ s/\{\{\s*\Q$tagFromUpper\E\|(.*?)\s*\}\}/{{${tagTo}}}/g;
# Tidy up the article in general
($text, $junk) = fixCategoryInterwiki($text);
# Post the changes
$comment = "Changing \{\{${tagFrom}\}\} to \{\{${tagTo}\}\}";
postPage ($articleName, $editTime, $startTime, $token, $text, $comment, "yes");
return (1);
}
sub parseHistory
{
my ($pageName, $html, @lines, $line, $date, $month, $year,
$htmlCopy, $link, @result);
$pageName = $_[0];
$html = getURL("http://en.wikipedia.org/w/wiki.phtml?title=${pageName}&action=history&limit=500");
$htmlCopy = $html;
$html =~ s%^.*?<ul id="pagehistory">%%s;
$html =~ s%(.*?)</ul>.*$%$1%s;
$html =~ s%</li>\s*%%s;
@lines = split ("</li>", $html);
foreach $line (@lines)
{
$line =~ s/\n/ /g;
if ($line =~ m/^\s*$/)
{
next;
}
$line =~ s%<span class='user'>.*?$%%;
$line =~ s%^.*?Select a newer version for comparison%%;
$line =~ s%^.*?Select a older version for comparison%%;
$line =~ s%^.*?name="diff" />%%;
# print "LINE: ".$line."\n";
$line =~ m%<a href="(.*?)" title="(.*?)">(.*?)</a>%;
$link = $1;
$date = $3;
# print $link." / $date\n";
if ($date =~ m/Jan/)
{
$month = "January";
}
elsif ($date =~ m/Feb/)
{
$month = "February";
}
elsif ($date =~ m/Mar/)
{
$month = "March";
}
elsif ($date =~ m/Apr/)
{
$month = "April";
}
elsif ($date =~ m/May/)
{
$month = "May";
}
elsif ($date =~ m/Jun/)
{
$month = "June";
}
elsif ($date =~ m/Jul/)
{
$month = "July";
}
elsif ($date =~ m/Aug/)
{
$month = "August";
}
elsif ($date =~ m/Sep/)
{
$month = "September";
}
elsif ($date =~ m/Oct/)
{
$month = "October";
}
elsif ($date =~ m/Nov/)
{
$month = "November";
}
elsif ($date =~ m/Dec/)
{
$month = "December";
}
else
{
$month = "Unknown month";
myLog ("Unknown month - parse failure! $line\nHTML:\n$html\n");
die ("Unknown month - parse failure! (see log) LINE: $line\n");
}
$date =~ m/(\d\d\d\d)/;
$year = $1;
@result = (@result, "$month $year $link");
}
return (@result);
}
sub checkForTag #($targetURLWithOldIDAttached)
{
my ($tag, $target, $text);
$tag = $_[0];
$target = $_[1];
# Must be absolute; assuming English Wikipedia here.
$target =~ s%^/w/wiki.phtml%http://en.wikipedia.org/w/wiki.phtml%;
# Decode HTML entities in links
$target =~ s/\&/\&/g;
if ($target eq $::cachedTarget)
{
$text = $::cachedText;
}
else
{
$text = getURL ($target."&action=edit");
$::cachedTarget = $target;
$::cachedText = $text;
}
if ($text =~ m/\{\{\s*\Q$tag\E\s*\}\}/)
{
# print $text; die "Cough!";
return "yes";
}
$tag = ucfirst($tag);
if ($text =~ m/\{\{\s*\Q$tag\E\s*\}\}/)
{
# print "\n\nSneeze!\n\n"; print $text."\n\n";
return "yes";
}
return "no";
}
sub getURL #($target)
{
# Read throttle!
sleep (1);
my ($attemptStartTime, $attemptFinishTime, $request, $response, $reply, $url);
$url = $_[0];
# Monitor wiki server responsiveness
$attemptStartTime = Time::HiRes::time();
# Create a request-object
print "GET ${url}\n";
myLog("GET ${url}\n");
$request = HTTP::Request->new(GET => "${url}");
$response = $::ua->request($request);
if ($response->is_success)
{
$reply = $response->content;
# Monitor wiki server responsiveness
$attemptFinishTime = Time::HiRes::time();
retry ("success", "getURL", sprintf("%.3f", $attemptFinishTime-$attemptStartTime));
# This may or may not actually work
$::ua->cookie_jar->save();
return ($reply);
}
else
{
myLog ("getURL(): HTTP ERR (".$response->status_line.") ${url}\n".$response->content."\n");
print ("getURL(): HTTP ERR (".$response->status_line.") ${url}\n".$response->content."\n");
# 50X HTTP errors mean there is a problem connecting to the wiki server
if (($response->status_line =~ m/^500/)
or ($response->status_line =~ m/^502/)
or ($response->status_line =~ m/^503/))
{
return(retry("getURL", @_));
}
else
{
# Unhandled HTTP response
die ("getURL(): HTTP ERR (".$response->status_line.") ${url}\n");
}
}
}
sub opentaskUpdate
{
my ($target, $historyFile, $opentaskText, $editTime, $startTime,
$token, $key, $historyDump);
$target = "User:Beland/workspace";
$historyFile = "/home/beland/wikipedia/pearle-wisebot/opentask-history.pl";
($opentaskText, $editTime, $startTime, $token) = getPage($target);
eval(`type $historyFile`);
$opentaskText = doOpentaskUpdate("NPOV",
"Category:NPOV disputes",
$opentaskText);
$opentaskText = doOpentaskUpdate("COPYEDIT",
"Category:Wikipedia articles needing copy edit",
$opentaskText);
$opentaskText = doOpentaskUpdate("WIKIFY",
"Category:Articles that need to be wikified",
$opentaskText);
$opentaskText = doOpentaskUpdate("MERGE",
"Category:Articles to be merged",
$opentaskText);
# Dump history
$historyDump = "\%::history = (\n";
foreach $key (sort(keys(%::history)))
{
$historyDump .= "\"${key}\" => \"".$::history{$key}."\",\n";
}
$historyDump =~ s/,\n$//s;
$historyDump .= "\n)\n";
open (HISTORY, ">".$historyFile);
print HISTORY $historyDump;
close (HISTORY);
postPage ($target, $editTime, $startTime, $token, $opentaskText, "Automatic rotation of NPOV, copyedit, wikify, and merge", "yes");
}
sub doOpentaskUpdate
{
my ($categoryID, $sourceCategory, $opentaskText, @articles,
$article, %rank, $featuredString, $characterLimit,
$featuredStringTmp);
$categoryID = $_[0];
$sourceCategory = $_[1];
$opentaskText = $_[2];
$characterLimit = 100;
@articles = getCategoryArticles ($sourceCategory);
# Shuffle and clean up article names; and exclude unwanted entries
foreach $article (@articles)
{
if (($article =~ m/^Wikipedia:/)
or ($article =~ m/^Template:/)
or ($article =~ m/^User:/)
or ($article =~ m/talk:/i)
)
{
next;
}
$article = urlDecode($article);
$article =~ s/_/ /g;
$rank{$article} = rand();
}
# Pick as many articles as will fit in the space allowed
foreach $article (sort {$rank{$a} <=> $rank {$b}} (keys(%rank)))
{
if (length($article)+1 < $characterLimit - length($featuredString))
{
$featuredString .= "[[${article}]],\n";
# Record how many times each article is featured.
$::history{"${article}-${categoryID}"}++;
}
}
$featuredStringTmp = $featuredString;
$featuredStringTmp =~ s/\n/ /g;
print "Featuring: $featuredStringTmp\n";
myLog("Featuring: $featuredStringTmp\n");
# Insert into actual page text and finish
$opentaskText =~ s/(<!--START-WHOBOT-INSERT-$categoryID-->).*?(<!--END-WHOBOT-INSERT-$categoryID-->)/${1}\n$featuredString${2}/gs;
return ($opentaskText);
}