User:Brighterorange/punctuation.js
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
/* <nowiki> */
var punctuationVersion = "19 April 2008";
var punctuationID = 1;
var punctuationEdits = undefined;
var punctuationOriginalSummary = undefined;
var punctuationPageOriginalSummary = undefined;
var puCONTEXT = 40;
var puENDASH = 0;
var puSPELL = 1;
var puEMDASH = 2;
var puCOMMA = 3;
var puPERCENT = 4;
var puBORN = 5;
var puLINKSPACE = 6;
var puDECADE = 7;
var puPAREN = 8;
var puXHTML = 9;
var puREF = 10;
var puSEMICOLON = 11;
var puCITYSTATE = 12;
var puDESCRIPTIONS = ["en dash", "spelling", "em dash", "comma", "percent", "born", "link space", "decade", "paren", "xhtml", "ref", "semicolon", "city-state"];
var puNDESC = 13;
// TODO:
// The TODO list is maintained in the development version, at [[User:Brighterorange/punctuation2.js]].
// Feel free to make new suggestions on my [[User talk:Brighterorange|talk page]].
function doPunctuation() {
// alert(document.editform.wpTextbox1.value);
// document.editform.wpMinoredit.checked = true;
// just need some prominent element to put our messages in. We use the "From Wikipedia" header.
var e = document.getElementById('siteSub');
e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">Running autopunctuation...</span>';
puDisableEditing(true);
// We'll represent the document as a list of chunks, where
// a chunk can either be raw text (no replacement suggested)
// or an edit (the suggested replacement text, the reason,
// the original text, and a flag indicating whether the
// change has been rejected).
// start by producing the singleton raw chunk:
var edits = new puCons(puRaw(document.editform.wpTextbox1.value), undefined);
e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">References...</span>';
setTimeout(function (){ // refs
edits = puRawMapConcat(puRef, edits);
e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">Spelling...</span>';
setTimeout(function (){ // spell
edits = puSpell(edits);
e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">Born style...</span>';
setTimeout(function (){ // born
edits = puBorn(edits);
e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">Em dashes...</span>';
setTimeout(function (){ // em dash
edits = puRawMapConcat(puEmDash, edits);
e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">En dashes...</span>';
setTimeout(function (){ // en dash
edits = puRawMapConcat(puEnDash, edits);
e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">Commas...</span>';
setTimeout(function (){ // comma
edits = puRawMapConcat(puComma, edits);
e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">Semicolons...</span>';
setTimeout(function (){ // semicolon
edits = puRawMapConcat(puSemicolon, edits);
e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">Link space...</span>';
setTimeout(function (){ // linkspace
edits = puRawMapConcat(puLinkSpace, edits);
e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">Decade...</span>';
setTimeout(function (){ // decade
edits = puRawMapConcat(puDecade, edits);
e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">Parens...</span>';
setTimeout(function (){ // paren
edits = puRawMapConcat(puParen, edits);
e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">XHTML...</span>';
setTimeout(function (){ // xhtml
edits = puXhtml(edits);
e.innerHTML = '<span style="border : 1px solid #333399; padding : 4px; margin : 4px;">City-State...</span>';
setTimeout(function (){ // city-state
edits = puCityState(edits);
punctuationEdits = edits;
punctuationOriginalSummary = document.editform.wpSummary.value;
document.editform.wpTextbox1.value = puRewrite(edits);
document.editform.wpSummary.value = puSummary(edits);
// finally, show interface for undos
puShowChanges("", edits);
}, 50); // city-state
}, 50); // xhtml
}, 50); // paren
}, 50); // decade
}, 50); // linkspace
}, 50); // semicolon
}, 50); // comma
}, 50); // en dash
}, 50); // em dash
}, 50); // born
}, 50); // spell
}, 50); // refs
};
// don't use textbox's "disable" field, since
// it makes the form submit an empty textbox,
// blanking the article!
function puDisableEditing(flag) {
var e = document.editform.wpTextbox1;
if (flag) {
e.style.opacity = "0.5";
e.style.filter = "Alpha(Opacity=50)";
} else {
e.style.opacity = undefined;
e.style.filter = undefined;
};
};
function puSummary(edits) {
var counts = new Array();
for(var i = 0; i < puNDESC; i ++) counts.push (0);
for(var l = edits; l != undefined; l = l.tail) {
if (!l.head.israw) {
counts[l.head.what] ++;
// alert("!" + l.head.what + "(" + puDESCRIPTIONS[l.head.what] + ") = " + counts[l.head.what]);
}
}
var s = "";
for(var j = 0; j < puNDESC; j ++) {
if (counts[j] > 0) {
if (s != "") s = s + "; ";
s = s + counts[j] + " " + puDESCRIPTIONS[j];
}
// alert("@" + j + ": " + counts[j] + "/" + puDESCRIPTIONS[j] + " -> " + s);
}
if (s == "") return punctuationOriginalSummary;
else {
if (punctuationOriginalSummary == punctuationPageOriginalSummary) {
// user never did anything except run punctuation, so minor
document.editform.wpMinoredit.checked = true;
}
return punctuationOriginalSummary +
(punctuationOriginalSummary == "" ? "" : " ") + "(auto: " + s + ")";
}
};
function puKindButtons(edits) {
var counts = new Array();
for(var i = 0; i < puNDESC; i ++) counts.push (0);
for(var l = edits; l != undefined; l = l.tail) {
if (!l.head.israw) {
counts[l.head.what] ++;
}
}
// now for any edit kind we did do, give buttons for them.
var s = "<table><tr>"
for(var j = 0; j < puNDESC; j ++) {
if (counts[j] > 0) {
s = s +
'<td><div style="padding : 3px; margin-right: 6px; border : 2px solid #333377; background : #DDDDFF"><b><center>' +
counts[j] + " " + puDESCRIPTIONS[j] + '</center></b>' +
'<br/> <span style="cursor : hand; cursor : pointer;" onClick="puAllOn(' + j + ');">ON</span> ' +
'<span style="cursor : hand; cursor : pointer;" onClick="puAllOff(' + j + ');">OFF</span> ' +
'<span style="cursor : hand; cursor : pointer;" onClick="puAllHide(' + j + ');">HIDE</span>' +
'</div></td>';
// onClick="puUndo(' + l.head.id +');"
}
}
s = s + '</tr></table>';
return s;
};
function puContextBefore(ol, ne) {
var s = ol + ne;
if (s.length < puCONTEXT) return s;
else return s.substring(s.length - puCONTEXT);
};
function puContextAfter(l) {
var s = "";
for(var z = l; z != undefined; z = z.tail) {
if (z.head.israw) s = s + z.head.text;
else s = s + z.head.rep;
if (s.length >= puCONTEXT) return s.substr(0, puCONTEXT);
}
return s;
};
// creates the menu for punctuation while in showchanges mode.
// for now just a 'done' button
function puMenu() {
return('<div onclick="puDoneClick();" style="cursor:hand; cursor:pointer; border:2px outset #559955;' +
'padding:4px;margin:4px;background:#DDFFDD">click this when done with changes</div>');
};
// when clicked, get rid of all the shown changes and re-enable
// the textbox.
function puDoneClick() {
puDisableEditing(false);
var e = document.getElementById('siteSub');
e.innerHTML = '';
};
// from a chunk list, give an HTML summary with edit buttons
// pass in the context c of some previous characters.
function puShowChanges(c, l) {
var e = document.getElementById('siteSub');
// XXX actually, if all are deactivated too...
if (l == undefined) {
e.innerHTML = '<p>Punctuation: no changes.</p>';
} else {
e.innerHTML = puShowSomeChanges(c, l);
}
};
function puShowSomeChanges(c, l) {
var o = puMenu();
o = o + puKindButtons(l) + "<br />";
while (l != undefined) {
if (l.head.israw) {
var nc = puContextBefore(c, l.head.text);
o = o + '<span style="color:#AAAAAA">(...)</span>';
c = nc;
} else if (l.head.hidden) {
var nc = puContextBefore(c, l.head.rep);
o = o + '<span style="color:#AAAAAA">(hidden)</span>'
c = nc;
} else {
// XXX hover could select in edit box??
var nc = puContextBefore(c, l.head.rep);
var ca = puContextAfter(l.tail);
var src = (l.head.dispsrc == undefined)?l.head.orig:l.head.dispsrc;
var dst = (l.head.dispdst == undefined)?l.head.rep:l.head.dispdst;
o = o + '<br/> (' + puHighlightContext(puEscape(c)) +
'<span id="puEdit' + l.head.id + '" style="border : 1px solid #FF9999; background : #FFDDDD; cursor : hand; cursor : pointer;"' +
' onClick="puUndo(' + l.head.id +');">' +
puHighlight(puEscape(src)) + "→" + puHighlight(puEscape(dst)) + '</span>'
+ puHighlightContext(puEscape(ca)) +
') ';
c = nc;
}
l = l.tail;
}
return (o + puMenu());
};
// show spaces as light underscores, since many of these involve the deletion/insertion of spaces
function puHighlight(s) {
// first or it will mess up spaces in our html
s = s.replace(/ /g, '<span style="color:#888888">_</span>');
return s.replace(/__PUREF__/g, '<span style="color:#AA55AA"><REF></span>');
};
function puHighlightContext(s) {
s = s.replace(/\[/g, '<span style="color:#FF0000">[</span>');
s = s.replace(/\]/g, '<span style="color:#FF0000">]</span>');
s = s.replace(/\{/g, '<span style="color:#00FF00">{</span>');
s = s.replace(/\}/g, '<span style="color:#00FF00">}</span>');
s = s.replace(/\|/g, '<span style="color:#0000FF">|</span>');
// these occur next to false positives for en dashes, commonly
s = s.replace (/issn/gi, '<span style="color:#FF7722">ISSN</span>');
s = s.replace (/isbn/gi, '<span style="color:#FF7722">ISBN</span>');
// template requires literal dash
s = s.replace (/scotus/gi, '<span style="color:#FF7722">SCOTUS</span>');
return s;
};
function puEscape(s) {
var s1 = s.replace(/</g, "<");
var s2 = s1.replace(/>/g, ">");
return s2;
};
// called from generated html; hides (just don't display) all
// from this kind
function puAllHide(k) {
for(var h = punctuationEdits; h != undefined; h = h.tail) {
if (h.head.what == k) {
h.head.hidden = true;
}
}
// always keep these up to date (actually this should never need a rewrite, right?)
// document.editform.wpTextbox1.value = puRewrite(punctuationEdits);
document.editform.wpSummary.value = puSummary(punctuationEdits);
puShowChanges("", punctuationEdits);
return ;
};
// called from generated html above. undoes the specified edit, making
// the chunk into a raw chunk and rewriting the textarea.
function puUndo(i) {
// alert('undo unimplemented for #' + i);
for(var h = punctuationEdits; h != undefined; h = h.tail) {
if (h.head.id == i) {
h.head.text = h.head.orig;
h.head.israw = true;
// undo edit where it matters
document.editform.wpTextbox1.value = puRewrite(punctuationEdits);
document.editform.wpSummary.value = puSummary(punctuationEdits);
var e = document.getElementById('puEdit' + i);
e.style.border = "none";
e.style.opacity = "0.5";
e.style.filter = "Alpha(Opacity=50)";
return;
}
}
alert("Oops, can't undo? " + i + " ... " + punctuationEdits);
};
// generate the raw text from a chunk list
function puRewrite(l) {
var o = "";
while(l != undefined) {
if (l.head.israw && l.head.text != undefined) o = o + l.head.text;
else if (!l.head.israw && l.head.rep != undefined) o = o + l.head.rep;
else o = o + "???";
l = l.tail;
}
return o;
};
// given a function (f : string -> chunk list) and (l : chunk list)
// build a new list where each raw chunk within l has f applied to
// it and the result flattened. edit chunks are not modified.
function puRawMapConcat(f, l) {
if (l == undefined) return l;
if (l.head.israw) {
var nl = f(l.head.text);
return puAppend(nl, puRawMapConcat(f, l.tail));
} else return puCons(l.head, puRawMapConcat(f, l.tail));
};
function puAppend (l1, l2) {
if (l1 == undefined) return l2;
else return puCons(l1.head, puAppend(l1.tail, l2));
};
// lists are represented as head/tail cons cells
// with nil = undefined
function puCons(h, t) {
// if they are both raw, then flatten.
if (t != undefined && t.head.israw && h.israw) {
var nh = new Object();
nh.israw = true;
nh.text = h.text + t.head.text;
var o = new Object;
o.head = nh;
o.tail = t.tail;
return o;
} else {
var o = new Object();
o.head = h;
o.tail = t;
return o;
}
}
function puRaw(s) {
var o = new Object();
o.israw = true;
o.text = s;
return o;
};
// puCleave(small, large)
// find the next match of small in large.
// return a two-element array of the
// string preceding the match, and the string
// following the match. If there are no matches,
// return undefined.
function puCleave(small, large) {
var x = large.indexOf(small);
if (x == -1) return undefined;
else return new Array(large.substr(0, x),
large.substring(x + small.length));
};
function puBorn(edits) {
return puRawMapConcat(puSpellRep("(b. ", "(born ", puBORN), edits);
};
function puXhtml(edits) {
edits = puRawMapConcat(puSpellRep("<br>", "<br />", puXHTML), edits);
edits = puRawMapConcat(puSpellRep("<BR>", "<br />", puXHTML), edits);
return edits;
};
function puSpell(edits) {
edits = puRawMapConcat(puSpellRep("seperat", "separat", puSPELL), edits);
edits = puRawMapConcat(puSpellRep("embarass", "embarrass", puSPELL), edits);
edits = puRawMapConcat(puSpellRep("existance", "existence", puSPELL), edits);
edits = puRawMapConcat(puSpellRep("supercede", "supersede", puSPELL), edits);
edits = puRawMapConcat(puSpellRep("accomodat", "accommodat", puSPELL), edits);
edits = puRawMapConcat(puSpellRep("foreward", "foreword", puSPELL), edits);
edits = puRawMapConcat(puSpellRep("liason", "liaison", puSPELL), edits);
edits = puRawMapConcat(puSpellRep("millenium", "millennium", puSPELL), edits);
edits = puRawMapConcat(puSpellRep("accomoda", "accommoda", puSPELL), edits);
edits = puRawMapConcat(puSpellRep("occassion", "occasion", puSPELL), edits);
edits = puRawMapConcat(puSpellRep("occurrance", "occurrence", puSPELL), edits);
edits = puRawMapConcat(puSpellRep("privelege", "privilege", puSPELL), edits);
edits = puRawMapConcat(puSpellRep("priviledge", "privilege", puSPELL), edits);
edits = puRawMapConcat(puSpellRep("withold", "withhold", puSPELL), edits);
return edits;
};
function puSpellRep(src, dst, wh) {
return (function(t) {
// spelling is kinda slow, and most misspellings never appear at all
if (t.indexOf(src) == -1) return puCons(puRaw(t), undefined);
else return puSpellOne (t, src, dst, wh);
});
};
function puSpellOne (t, src, dst, wh) {
var a = puCleave(src, t);
if (a == undefined) return puCons(puRaw(t), undefined);
var subst = puEdit(src, dst, wh);
return puCons(puRaw(a[0]), puCons(subst, puSpellOne(a[1], src, dst, wh)));
};
function puCityState(edits) {
/* for every US State... (could do countries here, too.) */
edits = puRawMapConcat(puCityStateFn("Alabama"), edits);
edits = puRawMapConcat(puCityStateFn("Alaska"), edits);
edits = puRawMapConcat(puCityStateFn("Arizona"), edits);
edits = puRawMapConcat(puCityStateFn("Arkansas"), edits);
edits = puRawMapConcat(puCityStateFn("California"), edits);
edits = puRawMapConcat(puCityStateFn("Colorado"), edits);
edits = puRawMapConcat(puCityStateFn("Connecticut"), edits);
edits = puRawMapConcat(puCityStateFn("Delaware"), edits);
edits = puRawMapConcat(puCityStateFn("Florida"), edits);
edits = puRawMapConcat(puCityStateFn("Georgia", "Georgia (U.S. state)|Georgia"), edits);
edits = puRawMapConcat(puCityStateFn("Hawaii"), edits);
edits = puRawMapConcat(puCityStateFn("Idaho"), edits);
edits = puRawMapConcat(puCityStateFn("Illinois"), edits);
edits = puRawMapConcat(puCityStateFn("Indiana"), edits);
edits = puRawMapConcat(puCityStateFn("Iowa"), edits);
edits = puRawMapConcat(puCityStateFn("Kansas"), edits);
edits = puRawMapConcat(puCityStateFn("Kentucky"), edits);
edits = puRawMapConcat(puCityStateFn("Louisiana"), edits);
edits = puRawMapConcat(puCityStateFn("Maine"), edits);
edits = puRawMapConcat(puCityStateFn("Maryland"), edits);
edits = puRawMapConcat(puCityStateFn("Massachusetts"), edits);
edits = puRawMapConcat(puCityStateFn("Michigan"), edits);
edits = puRawMapConcat(puCityStateFn("Minnesota"), edits);
edits = puRawMapConcat(puCityStateFn("Mississippi"), edits);
edits = puRawMapConcat(puCityStateFn("Missouri"), edits);
edits = puRawMapConcat(puCityStateFn("Montana"), edits);
edits = puRawMapConcat(puCityStateFn("Nebraska"), edits);
edits = puRawMapConcat(puCityStateFn("Nevada"), edits);
edits = puRawMapConcat(puCityStateFn("New Hampshire"), edits);
edits = puRawMapConcat(puCityStateFn("New Jersey"), edits);
edits = puRawMapConcat(puCityStateFn("New Mexico"), edits);
edits = puRawMapConcat(puCityStateFn("New York"), edits);
edits = puRawMapConcat(puCityStateFn("North Carolina"), edits);
edits = puRawMapConcat(puCityStateFn("North Dakota"), edits);
edits = puRawMapConcat(puCityStateFn("Ohio"), edits);
edits = puRawMapConcat(puCityStateFn("Oklahoma"), edits);
edits = puRawMapConcat(puCityStateFn("Oregon"), edits);
edits = puRawMapConcat(puCityStateFn("Pennsylvania"), edits);
edits = puRawMapConcat(puCityStateFn("Rhode Island"), edits);
edits = puRawMapConcat(puCityStateFn("South Carolina"), edits);
edits = puRawMapConcat(puCityStateFn("South Dakota"), edits);
edits = puRawMapConcat(puCityStateFn("Tennessee"), edits);
edits = puRawMapConcat(puCityStateFn("Texas"), edits);
edits = puRawMapConcat(puCityStateFn("Utah"), edits);
edits = puRawMapConcat(puCityStateFn("Vermont"), edits);
edits = puRawMapConcat(puCityStateFn("Virginia"), edits);
edits = puRawMapConcat(puCityStateFn("Washington"), edits);
edits = puRawMapConcat(puCityStateFn("West Virginia"), edits);
edits = puRawMapConcat(puCityStateFn("Wisconsin"), edits);
edits = puRawMapConcat(puCityStateFn("Wyoming"), edits);
return edits;
};
function puCityStateFn(state, statelink) {
return (function(t) {
// citystate is kind of slow and there are 50 states; only run a state
// if it appears at all...
if (t.indexOf(', ' + state + ']]') == -1) return puCons(puRaw(t), undefined);
else return puCityStateOne (t, state, statelink);
});
};
function puSplitWhiteEnd(s) {
for(var i = s.length - 1; i >= 0; i --) {
if (s.charAt(i) != ' '.charAt(0))
return new Array(s.substr(0, i + 1), s.substring(i + 1));
}
// all whitespace!
return new Array("", s);
};
function puSplitWhiteStart(s) {
for(var i = 0; i < s.length; i ++) {
if (s.charAt(i) != ' '.charAt(0))
return new Array(s.substr(0, i), s.substring(i));
}
return new Array(s, "");
};
// XXX allow decimal places
function puNumberEnd(s) {
var n = "";
for(var i = s.length - 1; i >= 0; i --) {
if ((s.charCodeAt(i) >= '0'.charCodeAt(0) &&
s.charCodeAt(i) <= '9'.charCodeAt(0)) ||
s.charAt(i) == '-')
n = s.charAt(i) + n;
// years are often linked
else if (s.charAt(i) == '[' || s.charAt(i) == ']')
/* nothing */ ;
else return n;
}
return n;
};
// XXX now just takes the next token up to whitespace or |, ignoring [[brackets]]
function puNumberStart(s) {
var n = "";
for(var i = 0; i < s.length; i ++) {
if (s.charAt(i) == '[' || s.charAt(i) == ']')
/* nothing */ ;
else if (s.charAt(i) != ' ' && s.charAt(i) != '\n' && s.charAt(i) != '|')
n = n + s.charAt(i);
else return n;
}
return n;
};
// does this string end with a (partial) http link?
function puEndsHTTP (s) {
// only http since we want to catch https too
var h = s.lastIndexOf('http');
if (h == -1) return false;
// is there a space or ] terminating the link, though?
if (s.lastIndexOf(' ') > h ||
s.lastIndexOf(']') > h) return false;
else return true;
};
// are we inside an HTML element?
function puIsElement(s) {
var h = s.lastIndexOf('&');
if (h == -1) return false;
// is there a space or ; terminating the element?
if (s.lastIndexOf(' ') > h ||
s.lastIndexOf(';') > h) return false;
else return true;
};
function puEnDash (t) {
// split on every dash
var a = puCleave("-", t);
if (a == undefined) return puCons(puRaw(t), undefined);
// check if dash is preceded by a number and followed by
// a number.
var bef = puSplitWhiteEnd(a[0]);
var aft = puSplitWhiteStart(a[1]);
var befn = puNumberEnd(bef[0]);
var aftn = puNumberStart(aft[1]);
// alert("[" + bef[0] + "][" + bef[1] + "]-[" + aft[0] + "][" + aft[1] + "] .. [" + befn + "]–[" + aftn + "]");
var befnn = befn * 1;
var aftnn = aftn * 1;
// exclude ISBNs and certain dates by making sure the number doesn't have dash in it
if (befn.length > 0 && aftn.length > 0 &&
puEnDashBefOK(befn) && puEnDashAftOK(aftn) &&
!(puInLink(a[0], a[1])) &&
!puEndsHTTP(bef[0]) &&
// ranges are usually lo-hi, but sometimes we see 1987-8
(isNaN(befnn) || isNaN(aftnn) || befnn <= aftnn
|| (befnn >= 1000 && befnn <= 9999 && aftn <= 99) )) {
// src has whitespace around dash, replacement does not
// (note unicode en dash)
return puCons(puRaw(bef[0]), puCons(puEdit(bef[1] + "-" + aft[0], "–", puENDASH), puEnDash(aft[1])));
} else {
// don't match. but if we found dashes to the right, we shouldn't look at those
// again. (e.g. in ISBN 01-1234-6789, once we look at the first dash and reject it,
// we don't want to then consider 1234-6789, which looks like a match.)
var skip = puEnSkip(aft[1]);
return puCons(puRaw(a[0] + "-" + aft[0] + skip[0]), puEnDash(skip[1]));
}
};
// no more hyphens in the number (like when considering the second dash in ISBN 01-1234-6789)
function puEnDashBefOK(s) {
return (s.indexOf('-') == -1);
};
// Sees if this is in a link. That means as a {{ template }},
// or {{ template | with args }}, (but not in the argument part),
// or a [[wiki link]], or a [[target of a piped|link]] (but not
// when in display portion).
function puInLink(a,b) {
var aa = puFindAnyLeft(a, ["}}", "]]", "{{", "[[", "|"]);
var bb = puFindAnyRight(b, ["}}", "]]", "{{", "[[", "|"]);
return ( (aa == "{{" && bb == "}}") ||
(aa == "{{" && bb == "|") ||
(aa == "[[" && bb == "|") ||
(aa == "[[" && bb == "]]") );
};
function puFindAnyLeft(str, finds) {
var latest = undefined;
var latesti = -1;
for(var i = 0; i < finds.length; i ++) {
var x = str.lastIndexOf(finds[i]);
if (x > latesti) {
latest = finds[i];
latesti = x;
}
}
return latest;
};
function puFindAnyRight(str, finds) {
var earliest = undefined;
var earliesti = str.length;
for(var i = 0; i < finds.length; i ++) {
var x = str.indexOf(finds[i]);
if (x < earliesti) {
earliest = finds[i];
earliesti = x;
}
}
return earliest;
};
function puEnDashAftOK(s) {
// some prefix has to be a number...
if (s.charCodeAt(0) >= '0'.charCodeAt(0) && s.charCodeAt(0) <= '9'.charCodeAt(0)) {
// but we should avoid certain stuff...
return (s.indexOf('-') == -1 &&
s.indexOf('.htm') == -1 &&
s.indexOf('.pdf') == -1 &&
s.indexOf('.png') == -1 &&
s.indexOf('.jpg') == -1 &&
s.indexOf('.gif') == -1 &&
s.indexOf('.svg') == -1 &&
s.indexOf('.stm') == -1);
} else {
// otherwise something special:
var ss = s.toLowerCase();
return (
puStartswith(ss, "january") ||
puStartswith(ss, "february") ||
puStartswith(ss, "march") ||
puStartswith(ss, "april") ||
puStartswith(ss, "may") ||
puStartswith(ss, "june") ||
puStartswith(ss, "july") ||
puStartswith(ss, "august") ||
puStartswith(ss, "september") ||
puStartswith(ss, "october") ||
puStartswith(ss, "november") ||
puStartswith(ss, "december") ||
puStartswith(ss, "today") ||
puStartswith(ss, "bc") ||
puStartswith(ss, "present"));
}
};
function puStartswith(lng, sht) {
return (lng.indexOf(sht) == 0);
};
// after not matching a dash for en dash replacement,
// split a string into two parts: the first is what we
// should skip, the rest is what we should look for
// more dashes within.
function puEnSkip(s) {
for(var i = 0; i < s.length; i ++) {
if ((s.charCodeAt(i) >= '0'.charCodeAt(0) &&
s.charCodeAt(i) <= '9'.charCodeAt(0)) ||
s.charAt(i) == '-' ||
s.charAt(i) == '[' ||
s.charAt(i) == ']')
/* nothing */ ;
else return new Array(s.substr(0, i), s.substring(i));
}
return new Array(s, "");
};
function puEdit(src, dst, what) {
return puEditExt(src, dst, what, undefined, undefined);
};
function puEditExt(src, dst, what, dispsrc, dispdst) {
var subst = new Object();
subst.orig = src;
subst.rep = dst;
subst.israw = false;
subst.what = what;
subst.hidden = false;
subst.dispsrc = dispsrc;
subst.dispdst = dispdst;
// alert (src + "→" + dst);
punctuationID ++;
subst.id = punctuationID;
return subst;
};
/* Fix faux em dashes.
"--" almost anywhere should almost always be a real em dash (unless there are four or as
part of an html comment)
TODO: " - " between words should usually be an em dash.
*/
function puEmDash(t) {
var a = puCleave("--", t);
if (a == undefined) return puCons(puRaw(t), undefined);
// must be preceded by a word and followed by a word
var bef = puSplitWhiteEnd(a[0]);
var aft = puSplitWhiteStart(a[1]);
if (aft[1].length > 0 && puEmOKChar(aft[1].charAt(0)) &&
bef[0].length > 0 && puEmOKChar(bef[0].charAt(bef[0].length - 1))) {
return puCons(puRaw(bef[0]),
puCons(puEdit(bef[1] + "--" + aft[0], "—", puEMDASH),
puEmDash(aft[1])));
} else {
/* not an em dash. */
return puCons(puRaw(a[0] + "--"), puEmDash(a[1]));
}
};
function puEmOKChar(c) {
// alert ("check char: [" + c + "]");
if (c == '>' || c == '!' || c == '<' || c == '-' || c == '|') return false;
else return true;
};
function puIsDigit(c) {
return (c.charCodeAt(0) >= '0'.charCodeAt(0) && c.charCodeAt(0) <= '9'.charCodeAt(0));
};
// [[Pittsburgh, Pennsylvania]] to [[Pittsburgh, Pennsylvania|Pittsburgh]], [[Pennsylvania]].
function puCityStateOne(t, state, statelink) {
var a = puCleave(", " + state + "]]", t);
// XXX could be improved by generating pipe trick expansion automatically
// (pipe trick doesn't work in ref tags, etc.)
// but that makes it a little trickier because we have to find "Pittsburgh" in the above
// and might fail (because of other edits)
// XXX when doing that should detect Image: and Category:
if (a == undefined) return puCons(puRaw(t), undefined);
var st = (statelink == undefined) ? state : statelink;
return puCons(puRaw(a[0]),
puCons(puEdit(", " + state + "]]", ", " + state + "|]], [[" + st + "]]", puCITYSTATE),
puCityStateOne(a[1], state, statelink)));
};
// 1980's to 1980s ([[Wikipedia:Manual of Style (dates and numbers)]])
// note this isn't always a mistake:
// "1981 was a cold year compared to 1980's record temperatures" would be okay
// so some context awareness is appropriate (but it is almost always wrong)
function puDecade(t) {
var a = puCleave("0's", t);
if (a == undefined) return puCons(puRaw(t), undefined);
if (// date before? (only do it for 4 or 2 digit dates)
(
(a[0].length >= 4 &&
puIsDigit(a[0].charAt(a[0].length - 1)) &&
puIsDigit(a[0].charAt(a[0].length - 2)) &&
puIsDigit(a[0].charAt(a[0].length - 3)) &&
!puIsDigit(a[0].charAt(a[0].length - 4))) ||
(a[0].length >= 2 &&
puIsDigit(a[0].charAt(a[0].length - 1)) &&
!puIsDigit(a[0].charAt(a[0].length - 2)))
)
&&
// safe to correct?
a[1].length > 0 && puDecadeOKChar(a[1].charAt(0))) {
return puCons(puRaw(a[0]),
puCons(puEdit("0's", "0s", puDECADE),
puDecade(a[1])));
} else {
/* no problem. */
return puCons(puRaw(a[0] + "0's"), puDecade(a[1]));
}
};
function puDecadeOKChar(c) {
// should be the end of a word
if (c == '\n' || c == ' ' || c == ',' || c == '.' ||
c == '&' || c == '—' || c == '-' || c == '–' ||
// text in tables?
c == '|' || c == '\t' || c == '<' || c == ')' ||
c == ';' || c == '!' || c == "'" || c == ':' ||
c == '/'
) return true;
else return false;
};
// space before/around(parentheses )
// closing parens are basically the same as commas below.
function puParen(t) {
var a = puCleave(")", t);
if (a == undefined) return puCons(puRaw(t), undefined);
// must be preceded by a word and followed by a word
var bef = puSplitWhiteEnd(a[0]);
var aft = puSplitWhiteStart(a[1]);
// alert('paren: [' + bef[0] + '][' + bef[1] + ']***[' + aft[0] + '][' + aft[1] + ']');
if (// needs correction?
(bef[1].length > 0 || aft[0].length == 0) &&
// safe to correct?
aft[1].length > 0 && puRParenOKChar(aft[1].charAt(0)) &&
bef[0].length > 0 && puRParenOKChar(bef[0].charAt(bef[0].length - 1))) {
return puCons(puRaw(bef[0]),
puCons(puEdit(bef[1] + ")" + aft[0], ") ", puPAREN),
puParen(aft[1])));
} else {
/* no problem. */
return puCons(puRaw(a[0] + ")"), puParen(a[1]));
}
};
// XXX perhaps should be okay-on-right and okay-on-left; this may be too conservative
function puRParenOKChar(c) {
if (c == ")" || c == "(" || c == '|' ||
// otherwise we undo our linkspace fix ;)
c == ']' ||
// title markup
c == '=' ||
// sometimes people do
c == '&' ||
// quotes, obviously
c == '"' || c == '”' || c == '’' || c == "'" ||
// History of Russia (1900-1950)#World War II
c == "#" ||
// other stuff
c == '\n' || c == ':' || c == ';' || c == '.' || c == '-' || c == '—' || c == ',' ||
c == '}' || '{' || c == '<') return false;
else return true;
};
function puComma(t) {
return puCommaLike(',', puCOMMA, t);
};
function puSemicolon(t) {
return puCommaLike(';', puSEMICOLON, t);
};
// TODO: very important to filter out URL hits, since comma appears in lots of news URLs
function puCommaLike(ch, what, t) {
var a = puCleave(ch, t);
if (a == undefined) return puCons(puRaw(t), undefined);
// must be preceded by a word and followed by a word
var bef = puSplitWhiteEnd(a[0]);
var aft = puSplitWhiteStart(a[1]);
// alert('comma: [' + bef[0] + '][' + bef[1] + ']***[' + aft[0] + '][' + aft[1] + ']');
if (// needs correction?
(bef[1].length > 0 || aft[0].length == 0) &&
// safe to correct?
!puEndsHTTP(bef[0]) &&
!puIsElement(bef[0]) &&
aft[1].length > 0 && puCommaOKChar(aft[1].charAt(0)) &&
bef[0].length > 0 && puCommaOKChar(bef[0].charAt(bef[0].length - 1))) {
// alert('fix!');
return puCons(puRaw(bef[0]),
puCons(puEdit(bef[1] + ch + aft[0], ch + ' ', what),
puCommaLike(ch, what, aft[1])));
} else {
/* no problem. */
return puCons(puRaw(a[0] + ch), puCommaLike(ch, what, a[1]));
}
};
function puLinkSpace(t) {
var a = puCleave(" ]]", t);
if (a == undefined) return puCons(puRaw(t), undefined);
// maybe multiple spaces...
var bef = puSplitWhiteEnd(a[0]);
// alert('linkspace: [' + bef[0] + '][' + bef[1] + ']***[' + aft[0] + '][' + aft[1] + ']');
// filter out the common idiom <nowiki>[[Category:United States| ]]</nowiki>
if (a[0].length > 0 && a[0].charAt(a[0].length - 1) != '|') {
return puCons(puRaw(bef[0]),
puCons(puEdit(bef[1] + " ]]", "]]", puLINKSPACE),
puLinkSpace(a[1])));
} else {
return puCons(puRaw(a[0] + " ]]"), puLinkSpace(a[1]));
}
};
/// XXX not hooked up -- did I finish implementing this?
// between number and %, remove space.
function puPercent(t) {
var a = puCleave("%", t);
if (a == undefined) return puCons(puRaw(t), undefined);
// must be preceded by a word and followed by a word
var bef = puSplitWhiteEnd(a[0]);
var aft = puSplitWhiteStart(a[1]);
// alert('pct: [' + bef[0] + '][' + bef[1] + ']***[' + aft[0] + '][' + aft[1] + ']');
if (// needs correction?
(bef[1].length > 0 || aft[0].length == 0) &&
// safe to correct?
aft[1].length > 0 && puPercentBeforeChar(aft[1].charAt(0)) &&
bef[0].length > 0 && puPercentAfterChar(bef[0].charAt(bef[0].length - 1))) {
// alert('fix!');
return puCons(puRaw(bef[0]),
puCons(puEdit(bef[1] + "%" + aft[0], "% ", puPERCENT),
puPercent(aft[1])));
} else {
/* no problem. */
return puCons(puRaw(a[0] + "%"), puPercent(a[1]));
}
};
function puCommaOKChar(c) {
// definitely not inside numbers
if ((c.charCodeAt(0) >= '0'.charCodeAt(0) && c.charCodeAt(0) <= '9'.charCodeAt(0)) ||
// text in tables?
c == '|' ||
// quotes, obviously
c == '"' || c == '”' || c == '’' || c == "'" ||
// link w/ underscores instead of spaces
c == '_' ||
c == '\n' || c == '&' || c == ',' ||
// ref tags
c == '{' || c == '<') return false;
else return true;
};
function puRefSpaceOKChar(c) {
if (// text in tables?
c == '|' ||
// parenthetical
c == ')' ||
// or space already...
c == ' ' ||
// ending image: tags
c == ']' ||
// ending template text
c == '}' ||
// before em dashes (see MOS)
c == '—' ||
// ending quotes...
c == '"' || c == '”' || c == '’' || c == "'" ||
c == '\n' || c == '&' || c == ',' ||
// ref tags
c == '{' || c == '<') return false;
else return true;
};
// for references, we want to find the ref tags, but
// they can appear in several common forms:
// <ref>...</ref>
// <ref name="first">...</ref>
// <ref name="reused" />
// this function returns a three-element array consisting of
// [the text before the first ref tag, the ref tag, the text following]
// (or it returns undefined if there are no ref tags to be found)
function puGetRef(t) {
var m = '<ref';
// but not this tag!
var nm = '<references';
for(var i = 0; i < t.length; i ++) {
if (t.substr(i, m.length) == m &&
t.substr(i, nm.length) != nm) {
// now, decide what kind of ref
// appearance this is. keep looking
// at characters until we see
// > (bracketing)
// or
// /> (unitary)
for(var j = i + m.length; j < t.length; j ++) {
if (t.charAt(j) == '/') {
if (j < (t.length - 1) && t.charAt(j + 1) == '>') {
var rt = t.substr(i, (j + 2) - i);
var bef = t.substr(0, i);
var aft = t.substr(j + 2, t.length - (j + 2));
return new Array(bef, rt, aft);
} else {
// XXX report problem?
return undefined;
}
} else if (t.charAt(j) == '>') {
// found bracketing ref tag.
// so now eat until </ref> is
// encountered.
var rest = t.substr(j, t.length - j);
var a = puCleave('</ref>', rest);
if (a == undefined) {
// XXX warn: unclosed ref tag??
return undefined;
}
var rt = t.substr(i, j - i) + a[0] + '</ref>';
var bef = t.substr(0, i);
var aft = a[1];
// alert("REF. bef: [" + bef + "]\n" +
// "rt: [" + rt + "]\n" +
// "aft: [" + aft + "]\n");
return new Array(bef, rt, aft);
}
}
}
}
// none found...
return undefined;
};
// If we find a ref tag, we need to ensure the following:
// 1. there should never be any space before the tag.
// 2. the ref tag should appear after punctuation (except dashes)
// UNLESS the reference is to a specific term rather than
// to the sentence or comma/semicolon-separated phrase
// (we'll leave it up to the user to reject these false positives)
// 3. there shouldn't be double punctuation before/after the ref
// 4. there should be space after the ref
// UNLESS the reference is followed by another reference
// (or a dash, or legal punctuation as above)
//
// (this is according to the manual of style at [[wikipedia:footnotes]];
// and conforms to the Chicago Manual of Style)
//
// So, we grab any punctuation that follows the reference,
// erase all space before the reference,
// insert space after the ref if needed
// and insert any trailing punctuation before the reference,
// unless there is already punctuation there.
function puRef(t) {
var a = puGetRef(t);
if (a == undefined) return puCons(puRaw(t), undefined);
var bef = puSplitWhiteEnd(a[0]);
var tag = a[1];
var aft = puSplitWhiteStart(a[2]);
// boolean flags
// insist on two newlines since people frequently put refs on their own lines.
var parend = aft[1].length > 1 && aft[1].charAt(0) == '\n' && aft[1].charAt(1) == '\n';
var nopuncbefore = bef[0].length == 0 || !(puRefPuncChar(bef[0].charAt(bef[0].length - 1)));
var needspuncbefore = nopuncbefore && bef[0].length > 0 && puRefNeedsPunc(bef[0].charAt(bef[0].length - 1));
// the punctuation char or undefined if none
var puncafter = (aft[1].length > 0)?aft[1].charAt(0):undefined;
if (puncafter != undefined && !puRefPuncChar(puncafter)) puncafter = undefined;
if (puncafter != undefined) {
aft[1] = aft[1].substr(1, aft[1].length - 1);
}
var needspaceafter = aft[1].length > 0 && puRefSpaceOKChar(aft[1].charAt(0));
// DEBUG
// var what = '';
// if (nopuncbefore) what = what + " NOPUNCBEFORE.";
// if (parend) what = what + " PAREND.";
// if (puncafter != undefined) what = what + " puncafter: " + puncafter;
// if (needspaceafter) what = what + " NEEDSPACEAFTER.";
// alert(what);
if (// whitespace before?
bef[1].length > 0 ||
// missing necessary whitespace after?
(aft[0].length == 0 && needspaceafter) ||
// punctuation after?
(puncafter != undefined) ||
// or there is no punctuation at all and this is
// the end of the paragraph
(parend && needspuncbefore)) {
// There's something to fix.
// the before part will be whatever's before, plus any additional punctuation,
// but minus any whitespace.
var befplus;
if (parend // implies no punctuation after ref
&& needspuncbefore) {
// assume period at end of paragraph.
// XXX note, this will put the period before only the last
// reference in a series of references at the end of
// a paragraph, sigh
befplus = '.';
} else if (nopuncbefore && puncafter != undefined) {
befplus = puncafter;
} else befplus = '';
var aftoldplus = '';
if (puncafter != undefined) aftoldplus = puncafter;
// XXX: should elide contents of ref in display somehow.
return puCons(puRaw(bef[0]),
puCons(puEditExt(// old:
bef[1] + tag + aft[0] + aftoldplus,
// new:
befplus + tag + (needspaceafter?' ':''),
puREF,
// display versions elide the ref itself:
bef[1] + '__PUREF__' + aft[0] + aftoldplus,
befplus + '__PUREF__' + (needspaceafter?' ':'')),
puRef(aft[1]) ));
} else {
// no change
return puCons(puRaw(a[0] + a[1]), puRef(a[2]));
}
};
function puRefPuncChar(c) {
// eta-expansion necessary??
if (c == '.' || c == ';' || c == ',' || c == '?' ||
c == '!' || c == ':') return true;
else return false;
};
function puRefNeedsPunc(c) {
return (c.charCodeAt(0) >= 'a' && c.charCodeAt(0) <= 'z') ||
(c.charCodeAt(0) >= 'A' && c.charCodeAt(0) <= 'Z') ||
(c.charCodeAt(0) >= '0' && c.charCodeAt(0) <= '9') ||
c == ']';
};
// ----------------------------------------------
// install it..
addOnloadHook(function() {
// not on talk pages...
if (document.title.indexOf("talk:") != -1) {
return;
}
if (document.title.indexOf("Editing ") != -1) {
addOnloadHook(addPunctuation);
}
});
function addPunctuation() {
// need to see later if user has done any editing...
punctuationPageOriginalSummary = document.editform.wpSummary.value;
addTab("javascript:doPunctuation()", "punctuation", "ca-punctuation", "Punctuation", "");
akeytt();
};
/* </nowiki> */