User:Polygnotus/DuplicateReferences.js
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
//Testpage: https://en.wikipedia.org/wiki/User:Polygnotus/DuplicateReferencesTest
// <nowiki>
mw.loader.using(['mediawiki.util'], function () {
$(document).ready(function () {
const DEBUG = false;
function debug(...args) {
if (DEBUG) {
console.log('[DuplicateReferences]', ...args);
}
}
if (
mw.config.get('wgAction') !== 'view' ||
mw.config.get('wgDiffNewId') ||
mw.config.get('wgDiffOldId') ||
(mw.config.get('wgNamespaceNumber') !== 0 && mw.config.get('wgPageName') !== 'User:Polygnotus/DuplicateReferencesTest')
) {
debug("Not the correct page or action, script terminated");
return;
}
debug("Page title:", document.title);
debug("URL:", window.location.href);
function findNextReflistDiv(element) {
let nextElement = element.nextElementSibling;
while (nextElement) {
if (nextElement.tagName.toLowerCase() === 'div' &&
(nextElement.classList.contains('reflist') || nextElement.classList.contains('mw-references-wrap'))) {
return nextElement;
}
nextElement = nextElement.nextElementSibling;
}
return null;
}
const referencesHeader = document.querySelector("h2#References");
if (!referencesHeader) {
debug("References heading not found, script terminated");
return;
}
const containerDiv = referencesHeader.closest("div");
if (!containerDiv) {
debug("Container div not found, script terminated");
return;
}
const reflistDiv = findNextReflistDiv(containerDiv);
if (!reflistDiv) {
debug("Reflist div not found, script terminated");
return;
}
const referencesList = reflistDiv.querySelector('ol.references');
if (!referencesList) {
debug("ol.references not found within reflist div");
return;
}
const style = document.createElement('style');
style.textContent = `
li:target { border: 1px dotted red; padding: 2px; background-color: #ffcccc !important;}
.duplicate-citation-highlight { background-color: #e1eeff; }
.duplicate-citation-hover { background-color: #cce0ff; border: 1px dotted blue; }
.duplicate-citation-clicked { border: 1px dotted red; padding: 2px; background-color: #ffe6e6; }
.mw-collapsible-toggle { font-weight: normal; float: right; }
.duplicate-references-table { width: 100%; }
@media only screen and (max-width: 768px) {
.duplicate-references-table { display: none; }
}
`;
document.head.appendChild(style);
function addDuplicateCitationsTemplate(linkElement) {
debug("Adding duplicate citations template");
showLoading(linkElement);
var api = new mw.Api();
var pageTitle = mw.config.get('wgPageName');
let duplicateInfo = getDuplicateInfo();
// Get current date
const currentDate = new Date();
const monthNames = ["January", "February", "March", "April", "May", "June",
"July", "August", "September", "October", "November", "December"
];
const currentMonth = monthNames[currentDate.getMonth()];
const currentYear = currentDate.getFullYear();
const dateParam = `|date=${currentMonth} ${currentYear}`;
api.get({
action: 'query',
prop: 'revisions',
titles: pageTitle,
rvprop: 'content',
rvslots: 'main',
formatversion: 2
}).then(function (data) {
var page = data.query.pages[0];
var content = page.revisions[0].slots.main.content;
// Define the templates to check for
const templatesToCheck = [
'{{short description',
'{{DISPLAYTITLE',
'{{Lowercase title',
'{{Italic title',
'{{about',
'{{redirect',
'{{Distinguish',
'{{for'
];
// Find the position to insert the new template
let insertPosition = 0;
let lines = content.split('\n');
for (let i = 0; i < lines.length; i++) {
let line = lines[i].trim().toLowerCase();
if (templatesToCheck.some(template => line.startsWith(template.toLowerCase()))) {
insertPosition = i + 1;
} else if (line && !line.startsWith('{{') && !line.startsWith('__')) {
break;
}
}
// Create the reason string
let reason = '[[User:Polygnotus/DuplicateReferences|DuplicateReferences]] detected:<br>\n';
if (duplicateInfo.length > 0) {
duplicateInfo.forEach((info) => {
reason += `* ${info.url} (refs: ${info.refs.map(r => r.number).join(', ')})<br>\n`;
});
}
// Insert the new template with the reason parameter
lines.splice(insertPosition, 0, `{{Duplicated citations|reason=${reason}${dateParam}}}`);
var newContent = lines.join('\n');
let summary = `[[User:Polygnotus/DuplicateReferences|DuplicateReferences]] +{{Duplicated citations|reason=${reason}${dateParam}}}`;
return api.postWithToken('csrf', {
action: 'edit',
title: pageTitle,
text: newContent,
summary: summary
});
}).then(function () {
showSuccess(linkElement);
setTimeout(function () {
location.reload();
}, 100); // Reload after 0.1 second
}).catch(function (error) {
console.error('Error:', error);
showError(linkElement);
mw.notify('Failed to add the template. See console for details.', {type: 'error'});
});
}
function showLoading(element) {
element.innerHTML = '<sup><small>[ Working... ]</small></sup>';
}
function showSuccess(element) {
element.innerHTML = '<sup><small>[ Done ]</small></sup>';
}
function showError(element) {
element.innerHTML = '<sup><small>[ Error ]</small></sup>';
}
function getVisibleText(element) {
// Recursively get the visible text content of an element
let text = '';
for (let node of element.childNodes) {
if (node.nodeType === Node.TEXT_NODE) {
text += node.textContent.trim() + ' ';
} else if (node.nodeType === Node.ELEMENT_NODE) {
// Skip hidden elements
const style = window.getComputedStyle(node);
if (style.display !== 'none' && style.visibility !== 'hidden') {
text += getVisibleText(node) + ' ';
}
}
}
return text.trim();
}
function calculateLevenshteinDistance(a, b) {
debug("Comparing:");
debug("Text 1:", a);
debug("Text 2:", b);
if (a.length === 0) return b.length;
if (b.length === 0) return a.length;
const matrix = [];
// Increment along the first column of each row
for (let i = 0; i <= b.length; i++) {
matrix[i] = [i];
}
// Increment each column in the first row
for (let j = 0; j <= a.length; j++) {
matrix[0][j] = j;
}
// Fill in the rest of the matrix
for (let i = 1; i <= b.length; i++) {
for (let j = 1; j <= a.length; j++) {
if (b.charAt(i - 1) === a.charAt(j - 1)) {
matrix[i][j] = matrix[i - 1][j - 1];
} else {
matrix[i][j] = Math.min(
matrix[i - 1][j - 1] + 1, // substitution
Math.min(
matrix[i][j - 1] + 1, // insertion
matrix[i - 1][j] + 1 // deletion
)
);
}
}
}
debug("Levenshtein distance:", matrix[b.length][a.length]);
return matrix[b.length][a.length];
}
function calculateSimilarityPercentage(distance, maxLength) {
const similarity = ((maxLength - distance) / maxLength) * 100;
debug("Similarity percentage:", similarity.toFixed(2) + "%");
return Math.round(similarity) + '%';
}
function getDuplicateInfo() {
debug("Getting duplicate info");
const duplicates = [];
const urlMap = new Map();
const referenceItems = Array.from(referencesList.children);
debug("Number of reference items:", referenceItems.length);
referenceItems.forEach((item, index) => {
if (item.tagName.toLowerCase() === 'li') {
const refId = item.id;
const refNumber = index + 1;
debug(`Processing reference item ${refNumber} (${refId})`);
// Get the visible text of the entire reference item
const refText = getVisibleText(item);
debug(` Reference text: ${refText}`);
// Find the first valid link in the reference
const links = item.querySelectorAll('a');
let validLink = null;
for (let link of links) {
const url = link.href;
// Skip this reference if the URL doesn't contain 'http'
if (!url.includes('http')) {
debug(` Skipping reference ${refNumber} - URL does not contain 'http'`);
return; // This 'return' is equivalent to 'continue' in a regular for loop
}
const linkText = link.textContent.trim();
if (
// (!url.includes("wikipedia.org/wiki/") || url.includes("Special:BookSources")) &&
linkText !== "Archived" &&
!url.includes("wikipedia.org") &&
!url.includes("_(identifier)") && // Templates like ISBN and ISSN and OCLC and S2CID contain (identifier)
!url.startsWith("https://search.worldcat.org/") && // |issn= parameter in cite news
!url.startsWith("https://www.bbc.co.uk/news/live/") && // live articles get frequent updates
!url.startsWith("https://www.aljazeera.com/news/liveblog/") &&
!url.startsWith("https://www.nbcnews.com/news/world/live-blog/") &&
!url.startsWith("https://www.theguardian.com/world/live/") &&
!url.startsWith("https://www.nytimes.com/live/") &&
!url.startsWith("https://edition.cnn.com/world/live-news/") &&
!url.startsWith("https://www.timesofisrael.com/liveblog") &&
!url.startsWith("https://www.france24.com/en/live-news/") &&
!url.startsWith("https://books.google.com/") && //may be 2 different pages of the same book
!url.startsWith("https://archive.org/details/isbn_")
) {
validLink = link;
debug(` Valid link found: ${url}`);
break;
}
}
if (validLink) {
const url = validLink.href;
if (urlMap.has(url)) {
urlMap.get(url).push({id: refId, number: refNumber, text: refText});
debug(` Duplicate found for URL: ${url}`);
} else {
urlMap.set(url, [{id: refId, number: refNumber, text: refText}]);
debug(` New URL added to map: ${url}`);
}
} else {
debug(` No valid link found in this item`);
}
}
});
urlMap.forEach((refs, url) => {
if (refs.length > 1) {
// Calculate Levenshtein distance for each pair of refs
for (let i = 0; i < refs.length - 1; i++) {
for (let j = i + 1; j < refs.length; j++) {
debug(`Comparing references ${refs[i].number} and ${refs[j].number}:`);
const distance = calculateLevenshteinDistance(refs[i].text, refs[j].text);
const maxLength = Math.max(refs[i].text.length, refs[j].text.length);
const similarity = calculateSimilarityPercentage(distance, maxLength);
refs[i].similarity = refs[i].similarity || {};
refs[i].similarity[refs[j].id] = similarity;
}
}
duplicates.push({url, refs});
}
});
debug("Number of duplicate sets found:", duplicates.length);
debug("Duplicate sets:", duplicates);
return duplicates;
}
function createCollapsibleTable(duplicateInfo) {
const table = document.createElement('table');
table.className = 'wikitable mw-collapsible duplicate-references-table';
table.setAttribute('role', 'presentation');
const tbody = document.createElement('tbody');
table.appendChild(tbody);
const headerRow = document.createElement('tr');
const headerCell = document.createElement('td');
headerCell.innerHTML = '<strong>Duplicate References</strong>';
const toggleSpan = document.createElement('span');
toggleSpan.className = 'mw-collapsible-toggle';
toggleSpan.innerHTML = '[<a href="#" class="mw-collapsible-text">hide</a>]';
headerCell.appendChild(toggleSpan);
// Check if the {{Duplicated citations}} template is already present
const duplicatedCitationsTemplate = document.querySelector('table.box-Duplicated_citations');
// Only add the link if the template is not present
if (!duplicatedCitationsTemplate) {
// Add the "add {{duplicated citations}}" link to the header
const addTemplateLink = document.createElement('a');
addTemplateLink.textContent = ' add {{duplicated citations}} ';
addTemplateLink.href = '#';
addTemplateLink.addEventListener('click', function (e) {
e.preventDefault();
addDuplicateCitationsTemplate(this);
});
//headerCell.appendChild(document.createTextNode(' ['));
headerCell.appendChild(addTemplateLink);
//headerCell.appendChild(document.createTextNode(']'));
}
headerRow.appendChild(headerCell);
tbody.appendChild(headerRow);
const pageTitle = mw.config.get('wgPageName').replace(/_/g, ' ');
duplicateInfo.forEach(({url, refs}) => {
const row = document.createElement('tr');
const cell = document.createElement('td');
// Create report icon
const reportIcon = document.createElement('a');
reportIcon.href = `https://en.wikipedia.org/wiki/User_talk:Polygnotus?action=edit§ion=new&preloadtitle=Reporting%20%5B%5BUser%3APolygnotus%2FDuplicateReferences%7CDuplicateReferences%5D%5D%20false-positive&preload=User:Polygnotus/$1&preloadparams%5b%5d=${encodeURIComponent(`[[${pageTitle}]] ${url}`)}%20~~~~`;
reportIcon.innerHTML = '<img src="https://upload.wikimedia.org/wikipedia/commons/thumb/e/ef/Cross_CSS_Red.svg/15px-Cross_CSS_Red.svg.png" width="15" height="15" alt="Report false positive" title="Report false positive" />';
reportIcon.style.marginRight = '5px';
cell.appendChild(reportIcon);
let urlLink = document.createElement('a');
urlLink.href = url;
urlLink.textContent = url;
urlLink.target = "_blank";
urlLink.rel = "noopener noreferrer";
cell.appendChild(urlLink);
cell.appendChild(document.createTextNode(' in refs: '));
const originalRef = refs[0];
refs.forEach((ref, index) => {
let link = document.createElement('a');
link.href = `#${ref.id}`;
link.textContent = ref.number;
cell.appendChild(link);
// Add similarity information
if (index > 0) {
const similarity = calculateSimilarityPercentage(
calculateLevenshteinDistance(originalRef.text, ref.text),
Math.max(originalRef.text.length, ref.text.length)
);
let similarityInfo = document.createElement('span');
similarityInfo.textContent = ` (${similarity})`;
cell.appendChild(similarityInfo);
}
link.addEventListener('mouseover', () => {
refs.forEach(r => {
const citationElement = document.getElementById(r.id);
if (citationElement) {
if (r.id === ref.id) {
citationElement.classList.add('duplicate-citation-hover');
} else {
citationElement.classList.add('duplicate-citation-highlight');
}
}
});
});
link.addEventListener('mouseout', () => {
refs.forEach(r => {
const citationElement = document.getElementById(r.id);
if (citationElement) {
citationElement.classList.remove('duplicate-citation-hover');
citationElement.classList.remove('duplicate-citation-highlight');
}
});
});
link.addEventListener('click', () => {
document.querySelectorAll('.duplicate-citation-clicked').forEach(el => {
el.classList.remove('duplicate-citation-clicked');
});
refs.forEach(r => {
const citationElement = document.getElementById(r.id);
if (citationElement) {
citationElement.classList.add('duplicate-citation-clicked');
}
});
});
if (index < refs.length - 1) {
cell.appendChild(document.createTextNode(', '));
}
});
row.appendChild(cell);
tbody.appendChild(row);
});
return table;
}
function checkDuplicateReferenceLinks() {
debug("Checking for duplicate reference links");
const duplicateInfo = getDuplicateInfo();
if (duplicateInfo.length > 0) {
debug("Duplicates found, creating collapsible table");
const table = createCollapsibleTable(duplicateInfo);
containerDiv.after(table);
// Set up collapsible functionality
const toggleLink = table.querySelector('.mw-collapsible-toggle a');
const tableBody = $(table).find('tr:not(:first-child)');
const storageKey = 'duplicateReferencesTableState';
function setTableState(isCollapsed) {
if (isCollapsed) {
tableBody.hide();
toggleLink.textContent = 'show';
} else {
tableBody.show();
toggleLink.textContent = 'hide';
}
localStorage.setItem(storageKey, isCollapsed);
}
// Initialize state from localStorage
const initialState = localStorage.getItem(storageKey) === 'true';
setTableState(initialState);
toggleLink.addEventListener('click', function (e) {
e.preventDefault();
const isCurrentlyCollapsed = tableBody.is(':hidden');
setTableState(!isCurrentlyCollapsed);
});
} else {
debug("No duplicates found");
}
}
checkDuplicateReferenceLinks();
debug("Script execution completed");
});
});
// </nowiki>