User:AllyUnion/did you know.pl
The following code is licensed under the GPL and the Creative Commons Attribution License. -- AllyUnion (talk) 03:21, 9 Apr 2005 (UTC)
This code is not working -- AllyUnion (talk) 03:21, 9 Apr 2005 (UTC)- FYI: The system calls to python2.3 are calls to the pywikipediabot framework... as perl takes like so many packages just to download files and I don't have access to an English module version that allows me to post to the English wikipedia. -- AllyUnion (talk) 03:27, 9 Apr 2005 (UTC)
Basic idea:
- Leave three blocks of "...that" on Template talk:Did you know
- Move all other blocks of "...that" to Wikipedia:Recent additions
- Move oldest blocks on Wikipedia:Recent additions to an archive page if Wikipedia:Recent additions exceeds 50 "...that" lines.
didyouknow.pl
#!/usr/bin/perl -w
# Author: Jason Y. Lee
# Purpose: Wikipedia's Did you know archival process
# Special thanks to dysprosia, and the person who helped me in #wikipedia
# Assumptions:
# On Template talk:Did you know:
# A did you know line is in the following format:
# *...that <TEXT><br>
# Where <TEXT> is any text of any length, no matter if there is a newline or not.
# On Wikipedia:Recent additions and any archive pages after Wikipedia:Recent additions 25:
# A did you know line starts either with:
# [[Image: OR ...that
# A did you know line ends with either:
# A question mark or a HTML line break (<br>)
use Tie::File;
my $HOME = '<insert home directory>';
my $BOTDIR = 'wikipedia/bots/kurando-san/';
my $LOGDIR = 'dyklogs/';
my $configfile = $HOME . $BOTDIR . 'didyouknow.cfg';
my $logfile1 = $HOME . $BOTDIR . $LOGDIR . 'dyk1.log';
my $logfile2 = $HOME . $BOTDIR . $LOGDIR . 'dyk2.log';
my $logfile3 = $HOME . $BOTDIR . $LOGDIR . 'dyk3.log';
my $logfile4 = $HOME . $BOTDIR . $LOGDIR . 'newra.log';
my $logfile5 = $HOME . $BOTDIR . $LOGDIR . 'newra-';
my $talkpage = "Template talk:Did you know";
my $pagename = "Wikipedia:Recent additions";
my $archive = "Wikipedia:Recent additions";
my $archivenum = "";
my $archiveheader = "{{DYK archive header}}\n{{DYK archive nav}}\n\n==Did you know...==\n";
# Python Page Existance program
#$pageexist1 = "\"import config, wikipedia\nimport sys\nmysite = wikipedia.getSite()\nif (wikipedia.PageLink(mysite, \'";
#$pageexist2 = "\').exists()):\n\tsys.exit(0)\nelse:\n\tsys.exit(1)\n\"";
# Posting a page
$postprog1 = "\"import config, wikipedia\nmysite = wikipedia.getSite()\n";
$postprog2 = "logfile = \'";
$postprog3 = "\'\npagename = \'";
$postprog4 = "\'\ncomment = \'";
$postprog5 = "\'\nlog = file(logfile, 'r')\npage = log.read()\nlog.close()\nwikipedia.PageLink(mysite, pagename).put(page, comment)\n";
tie @config, 'Tie::File', $configfile or die;
$lastarchive = "";
foreach $line (@config)
{
if ($line =~ /lastarchive =/)
{
$archive = $line;
$archivenum = $line;
$archivenum =~ s/lastarchive\s*=\s*//g;
$archive = "Wikipedia:Recent additions " . $archivenum;
$lastarchive = \$line;
}
}
# Get the pages
$pythonprog1 = "\"import config, wikipedia\nmysite = wikipedia.getSite()\n";
$pythonprog2 = "logfile = ";
$pythonprog3 = "\npagename = ";
$pythonprog4 = "\nlog = file(logfile, 'w')\nlog.write(wikipedia.getPage(mysite, pagename, True, True, False).encode('iso-8859-1'))\nlog.close()\n\"";
system("python2.3 -c " . $pythonprog1 . $pythonprog2 . '\'' . $logfile1 . '\'' . $pythonprog3 . '\'' . $talkpage . '\'' . $pythonprog4);
system("python2.3 -c " . $pythonprog1 . $pythonprog2 . '\'' . $logfile2 . '\'' . $pythonprog3 . '\'' . $pagename . '\'' . $pythonprog4);
system("python2.3 -c " . $pythonprog1 . $pythonprog2 . '\'' . $logfile3 . '\'' . $pythonprog3 . '\'' . $archive . '\'' . $pythonprog4);
# Analysis of 'Template talk:Did you know'
tie @dyklog, 'Tie::File', $logfile1 or die;
$dykmat = [];
$dykblockcount = 0;
$y = 0;
$line = "";
$x = 0;
while ($dyklog[$x] !~ m/=+ARCHIVE USED SUGGESTIONS HERE=+/ig)
{
# print $x, ". ", $dyklog[$x], "\n";
$x++;
}
while ($dyklog[$x] !~ m/\'\'include a link to the used picture behind the fact in which it has been \'\'\'used\'\'\' on the front page\.\'\'<br>/ig)
{
# print $x, ". ", $dyklog[$x], "\n";
$x++;
}
#print $x, ". ", $dyklog[$x], "\n";
$x++;
#print $x, ". ", $dyklog[$x], "\n";
while (($dyklog[$x] !~ m/All older items have been archived at \[\[Wikipedia:Recent additions\]\]/ig) && ($x < scalar(@dyklog)))
{
$dyklog[$x] =~ s/^\*\s*\.\.\.\s*that/\*\.\.\.that/ig;
$dyklog[$x] =~ s/(?:--|&[mn]dash;|[10];|)\s*\[\[User(.*?):(.*?)\(UTC\)//ig;
$dyklog[$x] =~ s/\(\)//g;
$dyklog[$x] =~ s/\?\s<br/\?<br/ig;
$dyklog[$x] =~ s/\[\[Image/\[\[:Image/ig;
$loopflag = 1;
$line = "";
# print "$x. 1\n";
if ($dyklog[$x] =~ m/^\*\.\.\.that/ig)
{
# print "$x. 2\n";
$line = $dyklog[$x];
if ($dykblockcount >= 3)
{
splice(@dyklog, $x, 1);
}
else
{
$x++;
}
while (($loopflag == 1) && ($x < scalar(@dyklog)))
{
# print "$x. 3\n";
$dyklog[$x] =~ s/^\*\s*\.\.\.\s*that/\*\.\.\.that/ig;
$dyklog[$x] =~ s/(?:--|&[mn]dash;|[10];|)\s*\[\[User(.*?):(.*?)\(UTC\)//ig;
$dyklog[$x] =~ s/\(\)//g;
$dyklog[$x] =~ s/\?\s<br/\?<br/ig;
$dyklog[$x] =~ s/\[\[Image/\[\[:Image/ig;
if ($dyklog[$x] =~ m/^\*\.\.\.that/ig)
{
# print "$x. 4\n";
$loopflag = 0;
}
elsif ($dyklog[$x] eq "")
{
# print $x, ". Block detected\n";
# print "$x. 5\n";
$loopflag = 0;
}
else
{
# print "$x. 6\n";
$test = $dyklog[$x];
$test =~ s/\s+//g;
if ($test eq "")
{
# print "$x. 7\n";
# print $x, ". Block detected\n";
$loopflag = 0;
}
else
{
# print "$x. 8\n";
$line .= " " . $dyklog[$x];
if ($dykblockcount >= 3)
{
splice(@dyklog, $x, 1);
}
else
{
$x++;
}
}
}
}
# print "$x. 9\n";
# print $x, ". -> (", $dykblockcount, ", ", $y, "): ", $line, "\n\n";
$y++;
$dykmat[$dykblockcount][$y] = $line;
}
elsif ($dyklog[$x] eq "")
{
# print "$x. 10\n";
if ($y == 0)
{
$dykblockcount--;
}
else
{
$dykmat[$dykblockcount][0] = $y + 1;
}
$y = 0;
$dykblockcount++;
if ($dykblockcount >= 3)
{
splice(@dyklog, $x, 1);
}
else
{
$x++;
}
}
else
{
# print "$x. 11\n";
$test = $dyklog[$x];
$test =~ s/\s+//g;
if ($test eq "")
{
# print "$x. 12\n";
if ($y == 0)
{
$dykblockcount--;
}
else
{
$dykmat[$dykblockcount][0] = $y + 1;
}
$y = 0;
$dykblockcount++;
}
if ($dykblockcount >= 3)
{
splice(@dyklog, $x, 1);
}
else
{
$x++;
}
}
# print "Exit";
}
#print $dykblockcount, "\n";
#die;
#for ($x = 0; $x < $dykblockcount; $x++)
#{
# for ($y = 1; $y < $dykmat[$x][0]; $y++)
# {
# print $y, ". ", $dykmat[$x][$y], "\n";
# }
# print "\n";
#}
if ($dykblockcount <= 3)
{
die;
}
# Find the image left and right.
tie @wralog, 'Tie::File', $logfile2 or die;
$side = "left";
$x = 0;
while ($wralog[$x] !~ m/\[\[Image:/i)
{
$x++;
}
if ($wralog[$x] =~ m/left/i)
{
$side = "left";
}
else
{
$side = "right";
}
# Process the talk page.
foreach $x (0 .. $dykblockcount-1)
{
foreach $y (1 .. $dykmat[$x][0]-1)
{
$line = $dykmat[$x][$y];
$line =~ s/^\*\.\.\.that/\.\.\.that/ig;
@parts = split /\(?\[\[:?Image:/i, $line;
if (scalar(@parts) == 2)
{
$image = $parts[1];
$image =~ s/\]\]\)?<br\s*\/?>//ig;
$image =~ s/\]\]\)?//g;
$image =~ s/\n//g;
$imagelink = "[[Image:" . $image . "|100px|" . $side . "]]";
if ($side eq "left")
{
$side = "right";
}
else
{
$side = "left";
}
$line =~ s/\(*?\s*?\[\[:*?Image:.*?\]\]\s*?\)*?//ig;
$line =~ s/\?\s*?\)*?<br/\?<br/ig;
$dykmat[$x][$y] = "$imagelink\n$line";
}
else
{
$line =~ s/\?\s*?\)*?<br/\?<br/ig;
$dykmat[$x][$y] = "$line";
}
}
}
# Analysis of 'Wikipedia:Recent additions'
$wramat = [];
$wrablockcount = -1;
$y = 1;
$line = "";
$wracount = 0;
#$limitflag = False;
$x = 0;
#$wralast = 0;
while (($wralog[$x] !~ m/<!-- newly archived items should go in at the top -->/g) && ($x < scalar(@wralog)))
{
$x++;
}
$x++;
while (($wralog[$x] !~ m/<!-- newly archived items should go in at the top -->/g) && ($x < scalar(@wralog)))
{
$wralog[$x] =~ s/^\s*?\.\.\.\s*?that/\.\.\.that/ig;
# print "Processing: ", $wralog[$x], "\n";
if ($wralog[$x] =~ m/\[\[Image:/ig)
{
if ($wrablockcount == -1)
{
$wrablockcount++;
}
# print $x, ". (nimage). ", $wralog[$x], "\n";
$line = $wralog[$x] . "\n";
while (not (($wralog[$x] =~ m/\?/i) || ($wralog[$x] =~ m/<br\s*\/{0,1}>/i)))
{
# if ($limitflag)
# {
# splice(@wralog, $x, 1);
# }
# else
# {
$x++;
# }
# print $x, ". (image). ", $wralog[$x], "\n";
$line .= $wralog[$x] . "\n";
}
$wramat[$wrablockcount][$y] = $line;
$y++;
$wracount++;
}
elsif ($wralog[$x] =~ m/^\.\.\.that/ig)
{
if ($wrablockcount == -1)
{
$wrablockcount++;
}
$line = "";
while (not (($wralog[$x] =~ m/\?/i) || ($wralog[$x] =~ m/<br\s*\/{0,1}>/i)))
{
# print $x, ". (that). ", $wralog[$x], "\n";
$line .= $wralog[$x] . "\n";
# if ($limitflag)
# {
# splice(@wralog, $x, 1);
# }
# else
# {
$x++;
# }
}
# print $x, ". (that). ", $wralog[$x], "\n";
$line .= $wralog[$x] . "\n";
$wramat[$wrablockcount][$y] = $line;
$y++;
$wracount++;
}
elsif ($wralog[$x] eq "")
{
# print $x, ". Block detected!\n";
# if ($wracount > 50)
# {
# $limitflag = True;
# }
if ($wrablockcount != -1)
{
$wramat[$wrablockcount][0] = $y; # Save the size
}
$y = 1; # Reset the line count for the block
$wrablockcount++;
}
else
{
$test = $wralog[$x];
$test =~ s/\s+//g;
if ($test eq "")
{
# print $x, ". Block detected!\n";
# if ($wracount > 50)
# {
# $limitflag = True;
# }
if ($wrablockcount != -1)
{
$wramat[$wrablockcount][0] = $y;
}
$y = 1;
$wrablockcount++;
}
}
# if ($limitflag)
# {
# splice(@wralog, $x, 1);
# }
# else
# {
$x++;
# }
}
splice(@wralog, $x, 1, "\n<!-- newly archived items should go in at the top -->");
# Copy everything from "Wikipedia:Recent archives" matrix into the "Did you know" matrix
foreach $x (0 .. $wrablockcount-1)
{
foreach $y (0 .. $wramat[$x][0]-1)
{
$dykmat[$dykblockcount][$y] = $wramat[$x][$y];
}
$dykblockcount++;
}
$x = 0;
open(RALOG, ">", $logfile4) or die;
while ((not ($wralog[$x] =~ m/<!-- newly archived items should go in at the top -->/g)) && ($x < scalar(@wralog)))
{
print RALOG $wralog[$x], "\n";
$x++;
}
print RALOG $wralog[$x], "\n";
$x++;
$total = 0;
for ($i = 3; (($i < $dykblockcount) && ($total < 60)); $i++)
{
foreach $j (1 .. $dykmat[$i][0]-1)
{
print RALOG $dykmat[$i][$j], "\n";
# print "(", $i, ", ", $j, "). ", $dykmat[$i][$j], "\n";
}
print RALOG "\n";
$total += $dykmat[$i][0];
}
while (($wralog[$x] !~ m/<!-- newly archived items should go in at the top -->/g) && ($x < scalar(@wralog)))
{
$x++;
}
while ($x < scalar(@wralog))
{
print RALOG $wralog[$x], "\n";
$x++;
}
close(RALOG);
#print "----\n";
#print $wracount, "\n";
# Post the new pages
system("python2.3 -c " . $postprog1 . $postprog2 . $logfile1 . $postprog3 . $talkpage . $postprog4 . "Testing archival bot" . $postprog5 . $postprog2 . $logfile4 . $postprog3 . $pagename . $postprog4 . "Testing archival bot" . $postprog5 . "\"");
# Analysis of the archive pages
$arccount = 0;
if ($i < $dykblockcount)
{
tie @arclog, 'Tie::File', $logfile3 or die;
$x = 0;
while (($arclog[$x] !~ m/==Did you know\.\.\.==/ig) && ($x < scalar(@arclog)))
{
$x++;
}
for ($arccount = 0; $x < scalar(@arclog); $x++)
{
if ($arclog[$x] =~ m/\[\[Image:/ig)
{
# print $x, ". (nimage). ", $arclog[$x], "\n";
while (not (($arclog[$x] =~ m/\?/i) || ($arclog[$x] =~ m/<br\s*\/{0,1}>/i)))
{
$x++;
# print $x, ". (image). ", $arclog[$x], "\n";
}
$arccount++;
}
elsif ($arclog[$x] =~ m/^\.\.\.that/ig)
{
while (not (($arclog[$x] =~ m/\?/i) || ($arclog[$x] =~ m/<br\s*\/{0,1}>/i)))
{
# print $x, ". (that). ", $arclog[$x], "\n";
$x++;
}
# print $x, ". (that). ", $arclog[$x], "\n";
$arccount++;
}
$x++;
}
}
$startingarchive = $archivenum;
$testcount = $arccount;
if ($arccount < 50)
{
$currentlogfile = $logfile5 . $archivenum . ".log";
$x = 0;
open(ARCLOG, ">", $currentlogfile) or die;
print ARCLOG $archiveheader;
close(ARCLOG);
tie @newlog, 'Tie::File', $currentlogfile or die;
for ($a = $dykblockcount - 1; $a >= $i; $a--)
{
$start = 4;
foreach $b (1 .. $dykmat[$a][0]-1)
{
splice(@newlog, $start, 0, $dykmat[$a][$b]);
$start++;
}
splice(@newlog, $start, 0, "");
$testcount += $dykmat[$a][0];
if ($testcount >= 50)
{
$testcount = 0;
$archivenum++;
$currentlog = $logfile5 . $archivenum . ".log";
open(ARCLOG, ">", $currentlog) or die;
print ARCLOG $archiveheader;
close(ARCLOG);
untie @newlog or die;
tie @newlog, 'Tie::File', $currentlog or die;
}
}
open(ARCLOG, ">>", $currentlogfile) or die;
# print ARCLOG "\n";
foreach $x (4 .. $#arclog)
{
print ARCLOG $arclog[$x], "\n";
}
close(ARCLOG);
}
else
{
$a = $dykblockcount - 1;
$archivenum++;
$currentlog = $logfile5 . $archivenum . ".log";
$testcount = 0;
open(ARCLOG, ">", $currentlog) or die;
print ARCLOG $archiveheader;
close(ARCLOG);
tie @newlog, 'Tie::File', $currentlog or die;
while ($a >= $i)
{
$start = 4;
for ($b = 1; $b < $dykmat[$a][0]; $b++)
{
splice(@newlog, $start, 0, $dykmat[$a][$b]);
$start++;
}
splice(@newlog, $start, 0, "");
$testcount += $dykmat[$a][0];
if ($testcount >= 50)
{
$testcount = 0;
$archivenum++;
$currentlog = $logfile5 . $archivenum . ".log";
open(ARCLOG, ">", $currentlog) or die;
print ARCLOG $archiveheader;
close(ARCLOG);
untie @newlog or die;
tie @newlog, 'Tie::File', $currentlog or die;
}
$a--;
}
}
#print "\n$testcount\n";
#print $archivenum, "\n";
foreach $x ($startingarchive .. $archivenum)
{
$currentlog = $logfile5 . $x . ".log";
system("python2.3 -c " . $postprog1 . $postprog2 . $currentlog . $postprog3 . "Wikipedia:Recent additions " . $x . $postprog4 . "Testing archival bot" . $postprog5 . "\"");
}
$$lastarchive = "lastarchive = " . $archivenum;
system("rm", "-f", $HOME . $BOTDIR . $LOGDIR . "*.log");
didyouknow.cfg
# Last archive that the program is on, do not attempt to update manually. lastarchive = 25