User:N8wilson/EggHunt.js
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⧠Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
$.when( mw.loader.using( 'mediawiki.util' ), $.ready ).then( function () {
// Cease and desist when outside of article space
if ( mw.config.get( 'wgCanonicalNamespace' ) !== '') return;
mcss = 3; // minimum common substring to count
numEggs = 0; // will exclude "hidden" category
marked = false; // flag to keep from double-marking
// Recursively find total length of *ordered* common sequences using
// a greedy approach that always selects the longest unused sequence first
//
// xs -> x_start, xe -> e_end, x_i -> x_iterator, mx -> (position of) max_x
// same pattern with y vars
var inCommon = function(mat, xs, xe, ys, ye) {
if (xe - xs < mcss || ye - ys < mcss) return 0;
// set up contextual vars (to avoid clobbering)
var max = mat[xs][ys], mx = xs, my = ys;
for (xi = xs; xi < xe; xi++) {
for (yi = ys; yi < ye; yi++) {
if (mat[xi][yi] > max) {
max = mat[xi][yi];
mx = xi;
my = yi;
}
}
}
return (max < mcss) ? 0 : max + inCommon(mat, xs, mx-max+1, ys, my-max+1) + inCommon(mat, mx+1, xe, my+1, ye);
};
// Score a link using longest common substring as a percent of the length of the shortest string
var strScore = function(short, long) {
if (typeof(short) != "string" || typeof(long) != "string") return 0;
shortA = new Array(short.length);
for (s=0; s<short.length; s++) {
shortA[s] = new Array(long.length);
shortA[s][0] = short[s] == long[0] ? 1 : 0;
}
for (l=0; l<long.length; l++) {
shortA[0][l] = short[0] == long[l] ? 1 : 0;
}
for (s=1; s<short.length; s++) {
for (l=1; l<long.length; l++) {
if (short[s] == long[l]) {
shortA[s][l] = 1 + shortA[s-1][l-1];
} else {
shortA[s][l] = 0;
}
}
}
sharedSeqs = inCommon(shortA, 0, shortA.length, 0, shortA[0].length);
return sharedSeqs / Math.min(short.length, long.length);
//return shortA.flat().reduce(function(a,b){return Math.max(a,b);}, 0) / Math.min(short.length, long.length);
};
// Categories and scoring
var egg_cats = [
{min: 0.85, mark:'' , cnt: 0, name:'hidden'},
{min: 0.40, mark:'đ„', cnt: 0, name:'unlikely'},
{min: 0.20, mark:'đŁ', cnt: 0, name:'possible'},
{min: 0.02, mark:'đ„', cnt: 0, name:'probable'},
{min: 0.00, mark:'đ€', cnt: 0, name:'unmatched'},
];
// Filter level 1: links in paragraph tags of the article content
$("#mw-content-text p a").filter(
function(idx, el) {
// Filter level 2: links must have title attribute, visible text, and a target beginning with /wiki/....
// (mostly so we can use these assumptions later)
return $(this).attr("title") && $(this).text() && (!$(this).attr("href").indexOf("/wiki/"));
}
).filter(
function(idx, el) {
// Filter level 3: Remove inline timeplates
return $(this).parents(".Inline-Template").length == 0;
}
).each(
function(idx, el) {
// build a lower case text and a title with any disambig clarifiers removed (trailing parens)
loc = $(this).attr("title").search(/[ _]\(.+\)$/);
title_lc = (loc >= 0) ? $(this).attr("title").substr(0,loc).toLowerCase() : $(this).attr("title").toLowerCase();
text_lc = $(this).text().toLowerCase();
// short-circuit if either the title or link text is fully contained in the other (not EGG)
if (text_lc.indexOf(title_lc) >= 0 || title_lc.indexOf(text_lc) >= 0 ) return 1;
// otherwise report possible EGG
score = strScore(text_lc, title_lc);
for (c = 0; c<egg_cats.length; c++) {
if (score >= egg_cats[c].min) {
egg_cats[c].cnt++;
$(this).addClass("eggHunt-"+egg_cats[c].name);
break;
}
}
console.log('[' + score.toFixed(3) + '], "' + $(this).text() + '", "' + $(this).attr("title") + '"');
numEggs++;
}
);
// remove count of hidden eggs
numEggs -= egg_cats[0].cnt;
// Install UI hook
var node = mw.util.addPortletLink('p-cactions', "#", numEggs + ' possible đ„s', 'ca-egghunt', 'Tag '+numEggs+' possible EASTEREGGs in article');
$(node).on('click', function(e) {
if (!marked) {
for (c=0; c<egg_cats.length; c++) {
$("a.eggHunt-"+egg_cats[c].name).after(egg_cats[c].mark);
}
marked = true;
}
return false;
});
});