User:Misza13/spoilerkill.py
This is an interactive pywikipedia script that aids in the removal of {{spoiler}} (and related) tags from articles. It works on a list of articles from a given file, showing the context within which the tags are placed and asks whether to remove them. In this (default) mode, it is a manually-assisted tool, which means that it doesn't require any approval whatsoever. The --auto
option turns it into a fully automated bot that would remove all tags from all given articles.
Help
usage: spoilerkill.py [options] FILE options: -h, --help show this help message and exit -s ARTICLE, --start=ARTICLE skip articles before ARTICLE -a, --auto don't ask for removal confirmation - remove automatically
FILE - name of file that contains a list of articles to browse through (one article per line, inside [[wiki parens]])
A suitable file (updated daily) is located here:
Code
import re, difflib
from optparse import OptionParser
summaries = {
u'c' : u'cleanup',
u'rm' : u'Removing redundant template(s) per [[Wikipedia:Spoiler]]',
u'ue' : u'Removing unencyclopedic template(s)',
}
def bufline(ch=u'*', clr=10, L=40):
wikipedia.output(ch*L,colors=[clr for i in range(L)])
def main(options, args):
articles = []
ctx = 500
for f in args:
articles += file(f,'r').read().decode('utf-8').split('\n')
articles = [a.strip('\n[]') for a in articles if a != '\n']
if options.start:
articles = [a for a in articles if a >= options.start]
RX = re.compile('\n*(?P<tpl>{{[^}\|]*?spoil[^}]*?}})\n*',re.IGNORECASE)
RXh = re.compile('(?P<hdr>=+.*(?:plot|summary|synopsis|recap|overview|history|character|story|background|biography).*=+)',re.IGNORECASE)
Site = wikipedia.getSite()
for a in articles:
article = wikipedia.Page(Site,a)
bufline(ch=u'=', clr=13, L=60)
wikipedia.output(u'Checking for spoilers in [[%s]]...' % article.title())
try:
oldtxt = article.get()
except wikipedia.IsRedirectPage:
wikipedia.output(u'Skipping redirect page...')
continue
if Site.messages:
msg = u'You have new messages!'
wikipedia.output(msg,colors=[10 for i in msg])
found = False
for match in RX.finditer(oldtxt):
found = True
bufline(clr=14)
st = match.start('tpl')
en = match.end('tpl')
pre = oldtxt[max(0,st-ctx):st]
tpl = match.group('tpl')
post = oldtxt[en:en+ctx]
txtcolors = [None] * len(pre) + [12] * len(tpl) + [None] * len(post)
for hdr in RXh.finditer(pre+tpl+post):
txtcolors[hdr.start('hdr'):hdr.end('hdr')] = [11 for i in hdr.group('hdr')]
wikipedia.output(pre+tpl+post,colors=txtcolors)
if found:
bufline(clr=14)
newtxt = oldtxt
summary = u''
if options.auto:
ch = 'y'
else:
ch = wikipedia.inputChoice('Remove spoiler templates?',['y','e','n'],['y','e','n'])
if ch == 'y':
newtxt = RX.sub('\n\n',oldtxt,re.IGNORECASE)
summary = u'Removing redundant template(s) per [[Wikipedia:Spoiler]]'
elif ch == 'e':
edt = editarticle.TextEditor()
newtxt = edt.edit(oldtxt)
if newtxt:
summary = wikipedia.input(u'Edit summary [empty to abort]:')
if summary in summaries:
summary = summaries[summary]
if summary and newtxt != oldtxt:
try:
article.put(newtxt,comment=summary,minorEdit=True)
except wikipedia.SpamfilterError:
wikipedia.output(u'Spamfilter error has occured!')
except wikipedia.EditConflict:
wikipedia.output(u'An edit conflict has occured!')
else:
wikipedia.output(u'Not found.')
if __name__ == '__main__':
parser = OptionParser(usage='usage: %prog [options] FILE')
parser.add_option('-s', '--start', dest='start',
help='skip articles before ARTICLE', metavar='ARTICLE')
parser.add_option('-a', '--auto', action='store_true', dest='auto',
help='don\'t ask for removal confirmation - remove automatically')
options, args = parser.parse_args()
try:
import wikipedia, editarticle
main(options, args)
finally:
wikipedia.stopme()