package tasks::OnThisDayTagger;
=begin metadata
Bot: AnomieBOT
Task: OnThisDayTagger
BRFA: Wikipedia:Bots/Requests for approval/AnomieBOT 30
Status: Approved 2009-05-26
Created: 2009-05-14
Process each anniversary article (e.g. [[Wikipedia:Selected
anniversaries/October 31]] for 2008-10-31, 2007-10-31, 2006-10-31, and so on)
to extract the bolded links and tag the corresponding article talk pages with
=end metadata
use utf8;
use strict;
use AnomieBOT::Task qw/:time/;
use Data::Dumper;
use POSIX;
use Digest::SHA qw/sha256_base64/;
use vars qw/@ISA/;
my @months=qw/January February March April May June July August September October November December/;
my $monthre="(?:".join("|",@months).")";
my $skip_links_re=join('|',
"$monthre \\d{1,2}",
'List of historical anniversaries',
'List of days of the year',
my @skip_templates=(
'Template:Archive box',
my $whine_to='User talk:AnomieBOT';
my $mode='live'; # 'list' or 'live'
my $list='/tmp/otd-pages.txt';
sub new {
my $class=shift;
my $self=$class->SUPER::new();
bless $self, $class;
return $self;
=for info
Approved 2009-05-26<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 30]]
sub approved {
return 2;
sub run {
my ($self, $api)=@_;
my $res;
$api->task('OnThisDayTagger', 0, 10, qw/d::Talk d::Timestamp d::Templates d::Redirects/);
my $screwup=' Errors? [[User:'.$api->user.'/shutoff/OnThisDayTagger]]';
$api->store->{'nextday'}=5643 unless exists($api->store->{'nextday'});
my $starttime=time;
my $today=day_from_timestamp($starttime);
if($mode eq 'list'){
die "Could not open $list: $!\n" unless open(X, '<:utf8', $list);
my %revs=();
for(my $m=1; $m<=12; $m++){
for(my $d=1; $d<=31; $d++){
my $md=sprintf("%02d-%02d",$m,$d);
next unless strftime("%m-%d",0,0,0,$d,$m-1,100) eq $md;
my $res=$api->query([],
titles => strftime("Wikipedia:Selected anniversaries/%B %-d",0,0,0,$d,$m-1,100),
prop => 'revisions',
rvprop => 'ids|timestamp',
rvlimit => 'max',
if($res->{'code'} eq 'shutoff'){
$api->warn("Task disabled: ".$res->{'content'}."\n");
return 300;
if($res->{'code'} ne 'success'){
$api->warn("Failed to fetch revisions for $md: $res->{error}\n");
my %revids=();
for(my $day=0; $day<$today; $day++){
my $start=$api->ISO2timestamp(day("%Y-%m-%dT00:00:00Z",$day+1));
foreach (@{$revs{day('%m-%d',$day)}}){
my $t=$api->ISO2timestamp($_->{'timestamp'});
my %pages=();
next unless /^\* \[\[(.*?)\]\]: (\d{4}-\d{2}-\d{2}(?:, \d{4}-\d{2}-\d{2})*)\s*$/;
my $title=$1;
my @dates=split /, /, $2;
my %dates=();
foreach (@dates){
die "Bad date: $_" unless exists($revids{$_});
# Check the page
my $res=$api->query(
titles => $title,
prop => 'categories',
cllimit => 'max',
clcategories => 'Category:All disambiguation pages',
if($res->{'code'} eq 'shutoff'){
$api->warn("Task disabled: ".$res->{'content'}."\n");
return 300;
if($res->{'code'} ne 'success'){
$api->warn(day("Failed to load links for %F: ", $self->{'nextday'}).$res->{'error'}."\n");
my $page=(values(%{$res->{'query'}{'pages'}}))[0];
die "$title does not exist";
} elsif(exists($page->{'redirect'})){
die "$title is a redirect";
} elsif($page->{'ns'}!=0){
die "$title is a non-article";
} elsif(exists($page->{'categories'}) && @{$page->{'categories'}}){
die "$title is a disambiguation page";
# Tag the talk page
$api->log("Tagging $title");
my $tok=$api->edittoken("$title", EditRedirect => 1);
if($tok->{'code'} eq 'shutoff'){
$api->warn("Task disabled: ".$tok->{'content'}."\n");
return 300;
if($tok->{'code'} ne 'success'){
$api->warn("Failed to get edit token for $title: ".$tok->{'error'}."\n");
die "$title is a redirect, cannot tag";
my $intxt=$tok->{'revisions'}[0]{'slots'}{'main'}{'*'} // '';
my $outtxt=$self->tag($api, $intxt, %dates);
return 300 unless defined($outtxt);
if($intxt ne $outtxt){
$res=$api->edit($tok, $outtxt, "Adding/updating {{OnThisDay}}. $screwup", 0, 1);
if($res->{'code'} ne 'success'){
$api->warn("Write failed on $title: $res->{error}\n");
close X;
return undef;
if($mode eq 'live'){
if($self->{'nextday'}==0 && exists($api->store->{'nextday'})){
my $t=$api->store->{'nextday'};
$self->{'nextday'}=$t if $t=~/^\d+$/;
# Iterate over all our pages
# First, create the categories if they don't already exist
my $tok=$api->edittoken(day("Category:Selected anniversaries (%Y)", $self->{'nextday'}), EditRedir=>1);
if($tok->{'code'} eq 'shutoff'){
$api->warn("Task disabled: ".$tok->{'content'}."\n");
return 300;
if($tok->{'code'} ne 'success'){
$api->warn("Failed to get edit token for year category: ".$tok->{'error'}."\n");
} elsif(exists($tok->{'missing'})){
$api->edit($tok, "[[Category:Selected anniversaries]]", "Creating dated subcategory, to prevent redlinks. $screwup", 0, 1);
$tok=$api->edittoken(day("Category:Selected anniversaries (%B %Y)", $self->{'nextday'}), EditRedir=>1);
if($tok->{'code'} eq 'shutoff'){
$api->warn("Task disabled: ".$tok->{'content'}."\n");
return 300;
if($tok->{'code'} ne 'success'){
$api->warn("Failed to get edit token for month category: ".$tok->{'error'}."\n");
} elsif(exists($tok->{'missing'})){
my $y=day("%Y", $self->{'nextday'});
my $m=day("%m", $self->{'nextday'});
$api->edit($tok, "[[Category:Selected anniversaries ($y)|0$m]]", "Creating dated subcategory, to prevent redlinks. $screwup", 0, 1);
# Load day page
my $day=$self->{'nextday'};
my $fday=day("%F", $self->{'nextday'});
my $page=load_page_for_day($api, $day);
if($page->{'code'} eq 'shutoff'){
$api->warn("Task disabled: ".$page->{'content'}."\n");
return 300;
if($page->{'code'} ne 'success'){
$api->warn("Failed to fetch page for $fday: $page->{error}\n");
return 60;
my $revid=$page->{'revisions'}[0]{'revid'};
# Strip out non-rendered content
my ($txt, $nowiki)=$api->strip_nowiki($page->{'revisions'}[0]{'slots'}{'main'}{'*'});
while(my ($k,$v)=each %$nowiki){
$nowiki->{$k}='' if $v=~/^<!--/;
$txt=$1 if $txt=~m!<onlyinclude>(.*?)</onlyinclude>!s;
# Replace time-varying templates
my $x=day("%-d", $self->{'nextday'});
$x=day("%w", $self->{'nextday'});
$x=day("%m", $self->{'nextday'});
$x=day("%Y", $self->{'nextday'});
$txt=$api->replace_nowiki($txt, $nowiki);
# Expand templates
action => 'expandtemplates',
title => $page->{'title'},
text => $txt,
prop => 'wikitext',
if($res->{'code'} eq 'shutoff'){
$api->warn("Task disabled: ".$res->{'content'}."\n");
return 300;
if($res->{'code'} ne 'success'){
$api->warn(day("Failed to expand templates for %F: ", $self->{'nextday'}).$res->{'error'}."\n");
$api->debug(2,"Text was $txt\n");
return 60;
# Transform ''' to <b>
$txt=doAllQuotes($api, $res->{'expandtemplates'}{'wikitext'});
# Extract just the bold parts
# Extract the links
action => 'parse',
title => $page->{'title'},
text => $txt,
prop => 'links',
if($res->{'code'} eq 'shutoff'){
$api->warn("Task disabled: ".$res->{'content'}."\n");
return 300;
if($res->{'code'} ne 'success'){
$api->warn(day("Failed to expand templates for %F: ", $self->{'nextday'}).$res->{'error'}."\n");
$api->debug(2,"Text was $txt\n");
return 60;
my @links=map($_->{'*'}, grep($_->{'ns'}==0 && exists($_->{'exists'}), @{$res->{'parse'}{'links'}}));
# Filter out month/year links
@links=grep(!/$skip_links_re/o, @links);
# Resolve redirects & check for dabs
my %res=();
my @err=();
my @l=splice(@links,0,500);
my $res=$api->query(
titles => join('|',@l),
prop => 'categories',
cllimit => 'max',
clcategories => 'Category:All disambiguation pages',
redirects => 1,
if($res->{'code'} eq 'shutoff'){
$api->warn("Task disabled: ".$res->{'content'}."\n");
return 300;
if($res->{'code'} ne 'success'){
$api->warn(day("Failed to load links for %F: ", $self->{'nextday'}).$res->{'error'}."\n");
return 60;
my %map=();
$map{$_->{'from'}}=$_->{'to'} foreach (@{$res->{'query'}{'redirects'}});
my %pages=map { $_->{'title'}=>$_ } values %{$res->{'query'}{'pages'}};
foreach my $l (@l){
my $t=$api->apply_redirect_map( $l, \%map );
$api->warn("No result for $l ".day("(%F)", $day)."\n");
return 60;
my $page=$pages{$t};
push @err, "* [[:$l]] does not exist";
} elsif(exists($page->{'redirect'})){
push @err, "* [[:$l]] is a double redirect";
} elsif($page->{'ns'}!=0){
push @err, "* [[:$l]] redirects to a non-article";
} elsif(exists($page->{'categories'}) && @{$page->{'categories'}}){
push @err, "* [[:$l]] is a disambiguation page";
} else {
$res{$t}=[] unless exists($res{$t});
push @{$res{$t}}, $l;
# Tag the talk pages
foreach my $otitle (keys %res){
my $title="Talk:$otitle";
$api->log("Tagging $title for ".day("%F", $day));
my $tok=$api->edittoken("$title", EditRedirect => 1);
if($tok->{'code'} eq 'shutoff'){
$api->warn("Task disabled: ".$tok->{'content'}."\n");
return 300;
} elsif($tok->{'code'} eq 'botexcluded'){
push @err, "* I am excluded from editing [[$title]], cannot tag for [[".join(']] / [[', @{$res{$otitle}})."]]";
} elsif($tok->{'code'} ne 'success'){
$api->warn("Failed to get edit token for $title: ".$tok->{'error'}."\n");
return 60;
push @err, "* [[$title]] is a redirect, cannot tag for [[".join(']] / [[', @{$res{$otitle}})."]]";
my $intxt=$tok->{'revisions'}[0]{'slots'}{'main'}{'*'} // '';
my $outtxt=$self->tag($api, $intxt, $fday => $revid);
return 300 unless defined($outtxt);
if($intxt ne $outtxt){
$res=$api->edit($tok, $outtxt, "Adding/updating {{OnThisDay}} for $fday. $screwup", 0, 1);
if($res->{'code'} ne 'success'){
$api->warn("Write failed on $title: $res->{error}\n");
return 60;
# Whine about errors
$api->log("Whining about bad links on ".day("%F", $day));
my $err=day("Errors processing [[Wikipedia:Selected anniversaries/%B %-d]]", $self->{'nextday'});
$res=$api->whine($err, "The following links could not be processed:\n".join("\n", @err)."\nPlease add {{tl|OnThisDay}} to them manually using oldid $revid, I will not be retrying. Thanks.", Summary => $err, Pagename => $whine_to, NoSmallPrint => 1);
if($res->{'code'} eq 'shutoff'){
$api->warn("Task disabled: ".$res->{'content'}."\n");
return 300;
if($res->{'code'} ne 'success'){
$api->warn(day("Could not complain about %F: ", $self->{'nextday'}).$res->{'error'}."\n");
return 60;
# Done! Next day
my $next=86400-($starttime%86400)-(time-$starttime);
$next=0 if $self->{'nextday'}<$today;
return $next;
die "Invalid mode '$mode'";
# The anniversary pages were created 2004-02-26 through 2004-02-28; We count
# 2004-02-26 as day 0. This function does strftime on the day number.
# Thankfully, it correctly converts
sub day {
my $fmt=shift;
my $day=shift;
return strftime($fmt,0,0,0,$day+26,1,104);
sub day_from_timestamp {
my $ts=shift;
my ($min,$max)=(0,5000);
my $target=strftime("%F", gmtime $ts);
my $day=day("%F",$max);
if($day eq $target){
return $max;
} elsif($day lt $target){
} else {
my $d=POSIX::floor(($min+$max)/2);
my $day=day("%F",$d);
if($day eq $target){
return $d;
} elsif($day lt $target){
} else {
die "WTF? Searching for $target, got to $min which is ".day("%F",$min);
sub load_page_for_day {
my $api=shift;
my $day=shift;
return $api->store->{"day $day"} if(exists($api->store->{"day $day"}));
my $start=$api->ISO2timestamp(day("%Y-%m-%dT00:00:00Z",$day+1));
my $iter=$api->iterator(
titles => day("Wikipedia:Selected anniversaries/%B %-d",$day),
rvlimit => 1,
rvprop => 'ids|timestamp|content',
rvslots => 'main',
rvdir => 'older',
rvstart => $start,
prop => 'revisions',
while(my $page=$iter->next){
return $page unless $page->{'_ok_'};
my $t=$api->ISO2timestamp($page->{'revisions'}[0]{'timestamp'});
$api->store->{"day $day"}=$page;
$api->store->{"day $day"}{'cached'}=1;
return $page;
# Parse single-quotes in the same way MediaWiki does.
sub doAllQuotes {
my $api=shift;
my ($text,$nowiki)=$api->strip_nowiki(shift);
my $outtxt='';
my @lines=split(/\n/, $text);
$outtxt.=doQuotes($_)."\n" foreach (@lines);
return $api->replace_nowiki($outtxt,$nowiki);
sub doQuotes {
my $text=shift;
my @arr=split(/(''+)/, $text);
return $text if @arr == 1;
my $numbold=0;
my $numitalics=0;
for(my $i=1; $i<@arr; $i+=2){
my $l=length($arr[$i]);
# Four 's => assume one plain text + bold
} elsif($l>5){
# More than five 's => assume N-5 plain text + bold + italic
$numitalics++ if($l==2 || $l==5);
$numbold++ if($l==3 || $l==5);
# Odd number of each => guess one of the bolds is really plain + italic
# Somewhat odd, but to match MediaWiki's parser...
if(($numbold&1) && ($numitalics&1)){
my $firstsingleletterword = -1;
my $firstmultiletterword = -1;
my $firstspace = -1;
for(my $i=1; $i<@arr; $i+=2){
next unless length($arr[$i])==3;
my $x1=substr($arr[$i-1],-1);
my $x2=substr($arr[$i-1],-2,1);
if($x1 eq ' '){
$firstspace=$i if $firstspace == -1;
} elsif($x2 eq ' '){
$firstsingleletterword=$i if $firstsingleletterword == -1;
} else {
$firstmultiletterword=$i if $firstmultiletterword == -1;
if($firstsingleletterword != -1){
} elsif($firstmultiletterword != -1){
} elsif($firstspace != -1){
# Now, convert to HTML
my $output='';
my $buffer='';
my $state='';
for(my $i=0; $i<@arr; $i++){
my $r=$arr[$i];
if($state eq 'i'){
$output.='</i>'; $state='';
} elsif($state eq 'bi'){
$output.='</i>'; $state='b';
} elsif($state eq 'ib'){
$output.='</b></i><b>'; $state='b';
} elsif($state eq 'both'){
$output.="<b><i>$buffer</i>"; $state='b';
} else {
$output.='<i>'; $state.='i';
} elsif(length($r)==3){
if($state eq 'b'){
$output.='</b>'; $state='';
} elsif($state eq 'bi'){
$output.='</i></b><i>'; $state='i';
} elsif($state eq 'ib'){
$output.='</b>'; $state='i';
} elsif($state eq 'both'){
$output.="<i><b>$buffer</b>"; $state='i';
} else {
$output.='<b>'; $state.='b';
} elsif(length($r)==5){
if($state eq 'b'){
$output.='</b><i>'; $state='i';
} elsif($state eq 'i'){
$output.='</i><b>'; $state='b';
} elsif($state eq 'bi'){
$output.='</i></b>'; $state='';
} elsif($state eq 'ib'){
$output.='</b></i>'; $state='';
} elsif($state eq 'both'){
$output.="<i><b>$buffer</b></i>"; $state='';
} else {
$buffer=''; $state='both';
} else {
if($state eq 'both'){
} else {
$output.='</b>' if($state eq 'b' || $state eq 'ib');
$output.='</i>' if($state eq 'i' || $state eq 'bi' || $state eq 'ib');
$output.='</b>' if($state eq 'bi');
$output.="<b><i>$buffer</i></b>" if($state eq 'both' && $buffer ne '');
return $output;
sub tag {
my $self=shift;
my $api=shift;
my $txt=shift;
my %dates=@_;
my %redir=$api->redirects_to_resolved('Template:On this day');
$api->warn("Could not load list of redirects to Template:On this day: ".$redir{''}{'error'}."\n");
return undef;
my %redir2=$api->redirects_to_resolved('Template:Article history');
$api->warn("Could not load list of redirects to Template:Article history: ".$redir2{''}{'error'}."\n");
return undef;
# Update an existing OnThisDay template?
my $done=0;
$txt=$api->process_templates($txt, sub {
my $name=shift;
my $params=shift;
shift; # $wikitext
shift; # $data
my $oname=shift;
return unless exists($redir{"Template:$name"});
# Read existing parameters
my %p=();
my $mx=0;
foreach (@$params){
$mx=$1 if(/^\s*(?:date|oldid)(\d+)\s*=/ && $mx < $1);
} elsif(/^\s*(date\d+)\s*=\s*((?i)$monthre)\s+(\d{1,2})(?:\s*,)?\s+(\d{4})\s*$/){
my $m; for($m=0; $m<@months; $m++){ last if lc($months[$m]) eq lc($2); }
$p{$1}=sprintf("%04d-%02d-%02d", $4, $m+1, $3);
} elsif(/^\s*(date\d+)\s*=\s*(\d{1,2})\s+((?i)$monthre)(?:\s*,)?\s+(\d{4})\s*$/){
my $m; for($m=0; $m<@months; $m++){ last if lc($months[$m]) eq lc($3); }
$p{$1}=sprintf("%04d-%02d-%02d", $4, $m+1, $2);
} elsif(/^\s*(oldid\d+)\s*=\s*(\d+)\s*$/){
for(my $i=1; $i<=$mx; $i++){
$dates{$p{"date$i"}}=$p{"oldid$i"} if(exists($p{"date$i"}) && exists($p{"oldid$i"}) && !exists($dates{$p{"date$i"}}));
# (Re-)insert parameters
my $i=1;
$i++ while grep(/^\s*oldid$i\s*=/, @$params);
foreach my $date (sort keys %dates){
push @$params, "date$i=$date", "oldid$i=".$dates{$date};
return "{{$oname|".join('|', @$params)."}}";
return $txt if $done;
# Update an existing ArticleHistory template?
$txt=$api->process_templates($txt, sub {
my $name=shift;
my $params=shift;
shift; # $wikitext
shift; # $data
my $oname=shift;
return unless exists($redir2{"Template:$name"});
# Find next unused number, and check which dates aren't already used
my %p=();
my $mx=0;
foreach (@$params){
$mx=$1 if(/^\s*otd(\d+)(?:date|oldid)\s*=/ && $mx < $1);
$mx=1 if(/^\s*otd(?:date|oldid)\s*=/ && $mx < 1);
} elsif(/^\s*(otd\d*date)\s*=\s*((?i)$monthre)\s+(\d{1,2})(?:\s*,)?\s+(\d{4})\s*$/){
my $m; for($m=0; $m<@months; $m++){ last if lc($months[$m]) eq lc($2); }
$p{$1}=sprintf("%04d-%02d-%02d", $4, $m+1, $3);
} elsif(/^\s*(otd\d*date)\s*=\s*(\d{1,2})\s+((?i)$monthre)(?:\s*,)?\s+(\d{4})\s*$/){
my $m; for($m=0; $m<@months; $m++){ last if lc($months[$m]) eq lc($3); }
$p{$1}=sprintf("%04d-%02d-%02d", $4, $m+1, $2);
} elsif(/^\s*(otd\d*oldid)\s*=\s*(\d+)\s*$/){
# Delete any already-listed dates
delete $dates{$p{"otddate"}} if(exists($p{"otddate"}) && exists($p{"otdoldid"}) && exists($dates{$p{"otddate"}}));
for(my $i=1; $i<=$mx; $i++){
delete $dates{$p{"otd${i}date"}} if(exists($p{"otd${i}date"}) && exists($p{"otd${i}oldid"}) && exists($dates{$p{"otd${i}date"}}));
# Add new parameters
foreach my $date (sort keys %dates){
push @$params, "otd${mx}date=$date", "otd${mx}oldid=".$dates{$date}."\n";
return "{{$oname|".join('|', @$params)."}}";
return $txt if $done;
if(!exists($self->{'loaded skip redirects'})){
my %skip=$api->redirects_to_resolved(@skip_templates);
$api->warn("Could not load list of redirects for skip templates: ".$skip{''}{'error'}."\n");
return undef;
@skip_templates=keys %skip;
$self->{'loaded skip redirects'}=1;
# No, add a new one.
my $templ='{{OnThisDay';
my $i=1;
foreach my $date (sort keys %dates){
my $nowiki;
my $outtmpl={};
$txt=$api->process_templates($txt, \&_strip_templates, $outtmpl);
$txt="$templ\n$txt" unless $txt=~s/^((?:\s*\x02[a-zA-Z0-9_-]+\x03)*[ \t]*)(?:$|(?=\n))/$1\n$templ/;
$txt=_unstrip_templates($txt, $outtmpl);
return $api->replace_nowiki($txt, $nowiki);
sub _strip_templates {
my ($name, $params, $wikitext, $data) = @_;
return undef if $name=~/^#/;
return undef if grep(/^\s*small\s*=\s*(?!no|n|0)\S/, @$params);
return undef if grep("Template:$name" eq $_, @skip_templates);
my $tmp = $wikitext;
utf8::encode( $tmp ) if utf8::is_utf8( $tmp );
my $tag="\x02".sha256_base64($tmp)."\x03";
return $tag;
sub _unstrip_templates {
my $wikitext=shift;
my $templ=shift;
$wikitext=~s!(\x02[a-zA-Z0-9_-]+\x03)! $templ->{$1} // $1 !gioe;
return $wikitext;