MediaWiki:Gadget-FormattingFixer.js: Difference between revisions
Update documentation and comments |
Change WikiEditor icons to articleCheck and code |
||
| (12 intermediate revisions by the same user not shown) | |||
| Line 1: | Line 1: | ||
/** | /** | ||
* FormattingFixer for MediaWiki | |||
* | |||
* A comprehensive tool for standardizing citations and auto-linking content in wikitext. | |||
* Designed to work seamlessly with both VisualEditor (VE) and the classic WikiEditor. | |||
* | |||
* KEY FEATURES: | |||
* | * | ||
* 1. CITATION STANDARDIZATION | * 1. CITATION STANDARDIZATION | ||
| Line 21: | Line 21: | ||
* - Skips the "See also" section to avoid modifying list items. | * - Skips the "See also" section to avoid modifying list items. | ||
* - Ignores text already inside links ([[...]]) or section headers (== ... ==). | * - Ignores text already inside links ([[...]]) or section headers (== ... ==). | ||
* - | * - Link Consistency: Ensures the FIRST occurrence of a term in the body (or Relationships header) is linked. | ||
* | * If a later occurrence was manually linked but the first was not, it links the first and unlinks the later one. | ||
* | * | ||
* 3. CONTENT PRESERVATION (VisualEditor) | * 3. CONTENT PRESERVATION (VisualEditor) | ||
| Line 471: | Line 470: | ||
var database = { | var database = { | ||
targets: {}, // canonical name -> true | targets: {}, // canonical name -> true (characters, locations) | ||
chapters: {}, // chapter title -> true (always link, case-sensitive) | |||
aliases: {}, // alias -> canonical name | aliases: {}, // alias -> canonical name | ||
displayText: {} // search term -> { target: canonical, display: text } | displayText: {} // search term -> { target: canonical, display: text } | ||
| Line 529: | Line 529: | ||
var content = page.revisions[0].slots.main['*']; | var content = page.revisions[0].slots.main['*']; | ||
// Parse {{Chapter|number=X|title=Y|...}} entries | // Parse {{Chapter|number=X|title=Y|...}} entries | ||
// Chapters are stored separately - they're always linked (not just first occurrence) | |||
// and matching is case-sensitive | |||
var chapterPattern = /\{\{Chapter\|[^}]*title=([^|}]+)/gi; | var chapterPattern = /\{\{Chapter\|[^}]*title=([^|}]+)/gi; | ||
var match; | var match; | ||
while ((match = chapterPattern.exec(content)) !== null) { | while ((match = chapterPattern.exec(content)) !== null) { | ||
var title = match[1].trim(); | var title = match[1].trim(); | ||
database. | database.chapters[title] = true; | ||
} | } | ||
} | } | ||
| Line 664: | Line 666: | ||
var intro = wikitext.substring(0, firstSectionIdx); | var intro = wikitext.substring(0, firstSectionIdx); | ||
var body = wikitext.substring(firstSectionIdx); | var body = wikitext.substring(firstSectionIdx); | ||
// Get current page title to prevent self-linking | |||
var currentPageTitle = ''; | |||
if (typeof mw !== 'undefined' && mw.config) { | |||
currentPageTitle = mw.config.get('wgTitle') || ''; | |||
} | |||
// Track which canonical targets have been linked | // Track which canonical targets have been linked | ||
var linked = {}; | var linked = {}; | ||
// Mark current page as already linked to prevent self-linking | |||
if (currentPageTitle) { | |||
linked[currentPageTitle] = true; | |||
} | |||
// Build a combined list of all searchable terms | // Build a combined list of all searchable terms | ||
| Line 679: | Line 692: | ||
} | } | ||
allTerms.sort(function(a, b) { return b.term.length - a.term.length; }); | allTerms.sort(function(a, b) { return b.term.length - a.term.length; }); | ||
// Case-insensitive lookup tables for header linkification. | |||
// Some pages may have headings like "=== jessica ===" or extra whitespace. | |||
// We normalize to lowercase and resolve to the canonical page title. | |||
var targetsByLower = {}; | |||
for (var t in database.targets) { | |||
if (database.targets.hasOwnProperty(t)) { | |||
targetsByLower[t.toLowerCase()] = t; | |||
} | |||
} | |||
var aliasesByLower = {}; | |||
for (var a in database.aliases) { | |||
if (database.aliases.hasOwnProperty(a)) { | |||
aliasesByLower[a.toLowerCase()] = database.aliases[a]; | |||
} | |||
} | |||
// First: Process section headers linearly to handle Relationships linking | // First: Process section headers linearly to handle Relationships linking | ||
| Line 702: | Line 731: | ||
// Check if we are inside a Relationships section hierarchy | // Check if we are inside a Relationships section hierarchy | ||
// Scan up the stack to find if any ancestor is a "Relationships" section | // Scan up the stack to find if any ancestor is a "Relationships" section. | ||
// Simplified regex to match "Relationships", "Major Relationships", "Minor Relationships" more robustly. | |||
var isRelationships = sectionStack.some(function(section) { | var isRelationships = sectionStack.some(function(section) { | ||
return /^(Major | // Matches: "Relationships", "Major Relationships", "Minor Relationships" (case insensitive) | ||
return /^(Major|Minor)?\s*Relationships$/i.test(section.title); | |||
}); | }); | ||
if (isRelationships) { | if (isRelationships) { | ||
// Check if title is a known target/alias. | |||
// Use case-insensitive lookup so "jessica" resolves to "Jessica". | |||
var titleKey = title.toLowerCase(); | |||
var canonical = targetsByLower[titleKey] || aliasesByLower[titleKey] || null; | |||
// Only link if known target/alias and not already linked. | |||
// Use canonical title in the link to ensure proper capitalization. | |||
if (canonical && !/\[\[/.test(line)) { | |||
line = headerMatch[1] + ' [[' + canonical + ']] ' + headerMatch[1]; | |||
fixes.push('Linked "' + canonical + '" in Relationships header'); | |||
} | |||
} | } | ||
| Line 726: | Line 760: | ||
// Second pass: Find all existing links (not in headers) and mark as "linked" | // Second pass: Find all existing links (not in headers) and mark as "linked" | ||
// This prevents us from linking "Rachel" if "[[Rachel]]" is already present later in the text | // This prevents us from linking "Rachel" if "[[Rachel]]" is already present later in the text | ||
var | /* | ||
* PASS REMOVED: We no longer pre-mark existing links. | |||
* Instead, we let the Third Pass link the FIRST occurrence of a term. | |||
* The Fourth Pass (deduplication) will then clean up any subsequent links, | |||
* ensuring the first occurrence is always the one that is linked. | |||
*/ | |||
// Helper: Check if an index in body is on a header line (line-based, not position-based) | |||
// This is more reliable than isInsideSectionHeader after body has been modified | |||
function isOnHeaderLine(text, idx) { | |||
// Find the line containing this index | |||
var lineStart = text.lastIndexOf('\n', idx - 1) + 1; | |||
var lineEnd = text.indexOf('\n', idx); | |||
if (lineEnd === -1) lineEnd = text.length; | |||
var line = text.substring(lineStart, lineEnd); | |||
// Check if this line is a section header (== ... ==) | |||
return /^={2,}[^=].*={2,}\s*$/.test(line); | |||
} | |||
// Chapter Pass: Link ALL occurrences of chapter titles (case-sensitive, exact match) | |||
// Unlike characters which only get first occurrence linked, chapters are always linked. | |||
var chapterTitles = Object.keys(database.chapters).sort(function(a, b) { | |||
return b.length - a.length; // Longer titles first to avoid partial matches | |||
}); | |||
chapterTitles.forEach(function(chapter) { | |||
// Case-sensitive, exact word boundary match | |||
var escapedChapter = chapter.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); | |||
var chapterPattern = new RegExp('\\b(' + escapedChapter + ')\\b', 'g'); | |||
var newBody = ''; | |||
var lastIndex = 0; | |||
var chapterMatch; | |||
while ((chapterMatch = chapterPattern.exec(body)) !== null) { | |||
// Skip if on a header line | |||
if (isOnHeaderLine(body, chapterMatch.index)) continue; | |||
// Skip if in See also section | |||
var absPos = firstSectionIdx + chapterMatch.index; | |||
if (isInSeeAlsoSection(wikitext, absPos)) continue; | |||
// Skip if already inside a link | |||
var before = body.substring(Math.max(0, chapterMatch.index - 50), chapterMatch.index); | |||
var after = body.substring(chapterMatch.index, Math.min(body.length, chapterMatch.index + 50)); | |||
if (/\[\[[^\]]*$/.test(before) && /^[^\[]*\]\]/.test(after)) continue; | |||
// Link this occurrence | |||
var captured = chapterMatch[1]; | |||
var replacement = '[[' + captured + ']]'; | |||
newBody += body.substring(lastIndex, chapterMatch.index) + replacement; | |||
lastIndex = chapterMatch.index + chapterMatch[0].length; | |||
fixes.push('Linked chapter "' + chapter + '"'); | |||
} | |||
if (lastIndex > 0) { | |||
if ( | body = newBody + body.substring(lastIndex); | ||
} | } | ||
} | }); | ||
// Third pass: Add links for unlinked terms (first occurrence only, respecting exclusions) | // Third pass: Add links for unlinked terms (first occurrence only, respecting exclusions) | ||
| Line 759: | Line 837: | ||
while ((termMatch = termPattern.exec(body)) !== null) { | while ((termMatch = termPattern.exec(body)) !== null) { | ||
// Skip if on a header line (use line-based detection, not position-based) | |||
if (isOnHeaderLine(body, termMatch.index)) continue; | |||
// Skip if in See also section (position-based is OK here since wikitext hasn't changed) | |||
var absPos = firstSectionIdx + termMatch.index; | var absPos = firstSectionIdx + termMatch.index; | ||
if (isInSeeAlsoSection(wikitext, absPos)) continue; | if (isInSeeAlsoSection(wikitext, absPos)) continue; | ||
// Skip if already inside a link ( | // Skip if already inside a link or inside single brackets (e.g., [Augustus] in quotes) | ||
// | // Check for [[ ]] (wikilinks) or [ ] (single brackets used in prose) | ||
var before = body.substring(Math.max(0, termMatch.index - 50), termMatch.index); | var before = body.substring(Math.max(0, termMatch.index - 50), termMatch.index); | ||
var after = body.substring(termMatch.index, Math.min(body.length, termMatch.index + 50)); | var after = body.substring(termMatch.index, Math.min(body.length, termMatch.index + 50)); | ||
// Skip if inside wikilink [[ ... ]] | |||
if (/\[\[[^\]]*$/.test(before) && /^[^\[]*\]\]/.test(after)) continue; | if (/\[\[[^\]]*$/.test(before) && /^[^\[]*\]\]/.test(after)) continue; | ||
// Skip if inside single brackets [ ... ] (common in prose like "[Augustus] said") | |||
if (/\[[^\[\]]*$/.test(before) && /^[^\[\]]*\]/.test(after)) continue; | |||
if (!found) { | if (!found) { | ||
| Line 797: | Line 877: | ||
}); | }); | ||
// Fourth pass: Remove duplicate links (keep first, remove subsequent) - but NOT in headers | // Fourth pass: Remove duplicate links (keep first, remove subsequent) - but NOT in headers, See also, or chapters | ||
var seenLinks = {}; | var seenLinks = {}; | ||
var dupPattern = /\[\[([^\]|]+)(\|([^\]]+))?\]\]/g; | var dupPattern = /\[\[([^\]|]+)(\|([^\]]+))?\]\]/g; | ||
| Line 805: | Line 885: | ||
while ((dupMatch = dupPattern.exec(body)) !== null) { | while ((dupMatch = dupPattern.exec(body)) !== null) { | ||
var | var target = dupMatch[1].trim(); | ||
var canonical = database.aliases[target] || target; | |||
// Don't touch links in section headers | // Don't touch links in section headers, and DON'T mark them as seen. | ||
if ( | // Header links (e.g., === [[Augustus]] ===) are independent from body links. | ||
// The first body occurrence should still be linked even if a header link exists. | |||
// Use line-based detection since body has been modified. | |||
if (isOnHeaderLine(body, dupMatch.index)) { | |||
continue; | continue; | ||
} | } | ||
// Don't touch links in See also | // Don't touch links in See also, but DO mark them as seen. | ||
// If a name appears in See also, we don't want to link it again in the body. | |||
var absPos = firstSectionIdx + dupMatch.index; | |||
if (isInSeeAlsoSection(wikitext, absPos)) { | if (isInSeeAlsoSection(wikitext, absPos)) { | ||
seenLinks[canonical] = true; | |||
continue; | continue; | ||
} | } | ||
// Don't deduplicate chapter links - chapters should ALWAYS be linked | |||
if (database.chapters[target]) { | |||
continue; | |||
} | |||
if (seenLinks[canonical]) { | if (seenLinks[canonical]) { | ||
| Line 1,442: | Line 1,531: | ||
label: 'Fix Citations', | label: 'Fix Citations', | ||
type: 'button', | type: 'button', | ||
oouiIcon: ' | oouiIcon: 'articleCheck', | ||
action: { | action: { | ||
type: 'callback', | type: 'callback', | ||
| Line 1,472: | Line 1,561: | ||
label: 'Fix Citations + Auto Add Links', | label: 'Fix Citations + Auto Add Links', | ||
type: 'button', | type: 'button', | ||
oouiIcon: ' | oouiIcon: 'code', | ||
action: { | action: { | ||
type: 'callback', | type: 'callback', | ||
| Line 1,578: | Line 1,667: | ||
function init() { | function init() { | ||
var action = mw.config.get('wgAction'); | var action = mw.config.get('wgAction'); | ||
console.log('FormattingFixer: Initializing, action=' + action); | console.log('FormattingFixer: Initializing, action=' + action); | ||
// | // VisualEditor integration removed - Parsoid causes too many issues with references and formatting | ||
// For source editing (action=edit without VE) | // For source editing (action=edit without VE) | ||
| Line 1,682: | Line 1,760: | ||
})(); | })(); | ||
// Force update timestamp: Mon Jan 5 19:24:00 EST 2026 | |||
Latest revision as of 01:16, 6 January 2026
/**
* FormattingFixer for MediaWiki
*
* A comprehensive tool for standardizing citations and auto-linking content in wikitext.
* Designed to work seamlessly with both VisualEditor (VE) and the classic WikiEditor.
*
* KEY FEATURES:
*
* 1. CITATION STANDARDIZATION
* - Reorders references: Ensures definitions (<ref name="x">...</ref>) appear before reuses (<ref name="x" />).
* - Standardizes formats: Converts raw chapter URLs into {{Cite chapter|url=...}} templates.
* - Validates URLs: Checks that chapter URLs follow the /cXX/pXX pattern.
* - Renames References: Smartly renames generic ref names (e.g., ":0") to chapter-based names (e.g., "c37p15").
* - Fixes Undefined Refs: Identifies used but undefined references.
*
* 2. INTELLIGENT AUTO-LINKING
* - Database: Dynamically fetches link targets from Category:Characters, Category:Locations, and Template:ChapterList.
* - Alias Support: Respects manual aliases defined in Template:LinkifyAliases.
* - Smart Exclusions:
* - Skips the "Intro" section (text before the first section header) to preserve manual formatting and Infoboxes.
* - Skips the "See also" section to avoid modifying list items.
* - Ignores text already inside links ([[...]]) or section headers (== ... ==).
* - Link Consistency: Ensures the FIRST occurrence of a term in the body (or Relationships header) is linked.
* If a later occurrence was manually linked but the first was not, it links the first and unlinks the later one.
*
* 3. CONTENT PRESERVATION (VisualEditor)
* - Implements a "Split-Process-Merge" strategy for VisualEditor to prevent Parsoid corruption.
* - The Intro section (containing the Infobox and lead paragraph) is DETACHED before processing.
* - Only the body content is round-tripped through Parsoid for linking.
* - The original, untouched Intro is re-attached to the processed body at the end.
* - This guarantees that complex Infobox formatting (linebreaks) and lead text are preserved exactly.
*
* Installation:
* 1. Copy this code to MediaWiki:Gadget-FormattingFixer.js
* 2. Add to MediaWiki:Gadgets-definition:
* * FormattingFixer[ResourceLoader|default]|Gadget-FormattingFixer.js
* 3. All users get it by default; can disable in Special:Preferences > Gadgets
*/
(function() {
'use strict';
// Configuration - adjust this pattern if your wiki uses a different URL structure
var config = {
chapterUrlPattern: /https?:\/\/www\.bittersweetcandybowl\.com\/c(\d+(?:\.\d+)?)\/p(\d+)/,
chapterUrlLoose: /https?:\/\/www\.bittersweetcandybowl\.com\/c(\d+(?:\.\d+)?)(\/(p\d+)?)?/,
siteUrlPattern: /https?:\/\/www\.bittersweetcandybowl\.com\//
};
// Guard to prevent duplicate WikiEditor buttons
var wikiEditorButtonsAdded = false;
/**
* Parse all references from wikitext
*/
function parseRefs(wikitext) {
var refs = {
definitions: [], // <ref name="X">content</ref>
reuses: [], // <ref name="X" />
anonymous: [] // <ref>content</ref>
};
// Match named refs with content: <ref name="X">content</ref>
var defined_refPattern = /<ref\s+name\s*=\s*["']([^"']+)["']\s*>([\s\S]*?)<\/ref>/gi;
var match;
while ((match = defined_refPattern.exec(wikitext)) !== null) {
refs.definitions.push({
fullMatch: match[0],
name: match[1],
content: match[2],
index: match.index
});
}
// Match named refs without content (reuses): <ref name="X" />
var reusePattern = /<ref\s+name\s*=\s*["']([^"']+)["']\s*\/>/gi;
while ((match = reusePattern.exec(wikitext)) !== null) {
refs.reuses.push({
fullMatch: match[0],
name: match[1],
index: match.index
});
}
// Match anonymous refs: <ref>content</ref>
// Need to be careful not to match named refs
var anonPattern = /<ref\s*>([\s\S]*?)<\/ref>/gi;
while ((match = anonPattern.exec(wikitext)) !== null) {
// Double-check it's not a named ref that our pattern somehow caught
if (!/<ref\s+name\s*=/.test(match[0])) {
refs.anonymous.push({
fullMatch: match[0],
content: match[1],
index: match.index
});
}
}
return refs;
}
/**
* Generate a ref name from a chapter URL (e.g., :c37p15 or :c37_1p15 for decimals)
*/
function generateRefName(url) {
var match = config.chapterUrlPattern.exec(url);
if (!match) return null;
var chapter = match[1];
var page = match[2];
// Replace decimal with underscore for valid ref names (37.1 -> 37_1)
var safeChapter = chapter.replace('.', '_');
return ':c' + safeChapter + 'p' + page;
}
/**
* Extract URL from ref content
*/
function extractUrl(content) {
// Try {{Cite chapter|url=...}} format first
var citeMatch = /\{\{Cite\s*chapter\s*\|\s*url\s*=\s*([^\s\}\|]+)/i.exec(content);
if (citeMatch) {
return citeMatch[1];
}
// Try {{Cite web|url=...}} format
var webMatch = /\{\{Cite\s*web\s*\|\s*url\s*=\s*([^\s\}\|]+)/i.exec(content);
if (webMatch) {
return webMatch[1];
}
// Try raw URL
var urlMatch = /(https?:\/\/[^\s\}<]+)/.exec(content);
if (urlMatch) {
return urlMatch[1];
}
return null;
}
/**
* Format a URL as proper citation - ONLY for chapter URLs
*/
function formatCitation(url) {
if (!url) return null;
// Only convert chapter URLs (must match /cXX/pXX pattern)
if (config.chapterUrlPattern.test(url)) {
return '{{Cite chapter|url=' + url + '}}';
}
// All other URLs are left unchanged
return url;
}
/**
* Validate a chapter URL has proper format
*/
function validateChapterUrl(url) {
if (!url) return { valid: false, error: 'No URL found' };
var looseMatch = config.chapterUrlLoose.exec(url);
if (!looseMatch) {
return { valid: true, error: null }; // Not a chapter URL, skip validation
}
// It's a chapter URL - check if it has /pXX
if (!looseMatch[2]) {
return {
valid: false,
error: 'Chapter URL missing page number: ' + url + ' (needs /pXX)'
};
}
return { valid: true, error: null };
}
/**
* Find order issues - refs used before they're defined
*/
function findOrderIssues(refs) {
var issues = [];
var definitionsByName = {};
// Index definitions by name
refs.definitions.forEach(function(def) {
if (!definitionsByName[def.name]) {
definitionsByName[def.name] = def;
}
});
// Check each reuse
refs.reuses.forEach(function(reuse) {
var def = definitionsByName[reuse.name];
if (def && reuse.index < def.index) {
issues.push({
name: reuse.name,
reuseIndex: reuse.index,
definitionIndex: def.index,
definition: def,
reuse: reuse
});
}
});
return issues;
}
/**
* Find undefined refs - names used but never defined
*/
function findUndefinedRefs(refs) {
var definedNames = new Set(refs.definitions.map(function(d) { return d.name; }));
var undefinedRefs = [];
refs.reuses.forEach(function(reuse) {
if (!definedNames.has(reuse.name)) {
// Check if we already logged this name
if (!undefinedRefs.some(function(u) { return u.name === reuse.name; })) {
undefinedRefs.push({
name: reuse.name,
usages: refs.reuses.filter(function(r) { return r.name === reuse.name; })
});
}
}
});
return undefinedRefs;
}
/**
* Find anonymous refs that could match undefined names
*/
function findPotentialMatches(refs, undefinedRefs) {
var matches = [];
undefinedRefs.forEach(function(undef) {
refs.anonymous.forEach(function(anon) {
var url = extractUrl(anon.content);
if (url) {
matches.push({
undefinedName: undef.name,
anonymousRef: anon,
url: url
});
}
});
});
return matches;
}
/**
* Assign chapter-based names to anonymous refs with chapter URLs
*/
function assignNamesToAnonymousRefs(wikitext, refs) {
var usedNames = new Set(refs.definitions.map(function(d) { return d.name; }));
var fixes = [];
// Sort by index descending to preserve positions when replacing
var anonsToName = refs.anonymous.filter(function(anon) {
var url = extractUrl(anon.content);
return url && config.chapterUrlPattern.test(url);
}).sort(function(a, b) { return b.index - a.index; });
anonsToName.forEach(function(anon) {
var url = extractUrl(anon.content);
var baseName = generateRefName(url);
if (!baseName) return;
// Ensure unique name
var name = baseName;
var suffix = 2;
while (usedNames.has(name)) {
name = baseName + '_' + suffix;
suffix++;
}
usedNames.add(name);
// Replace anonymous ref with named ref
var namedRef = '<ref name="' + name + '">' + anon.content + '</ref>';
wikitext = wikitext.substring(0, anon.index) + namedRef + wikitext.substring(anon.index + anon.fullMatch.length);
fixes.push('Named anonymous ref as "' + name + '"');
});
return { wikitext: wikitext, fixes: fixes };
}
/**
* Rename refs that have non-chapter-style names to proper chapter-based names.
* E.g., name=":0" with a chapter URL should become name="c32p7"
*/
function renameNonChapterStyleRefs(wikitext, refs) {
var usedNames = new Set(refs.definitions.map(function(d) { return d.name; }));
var fixes = [];
var renames = {}; // oldName -> newName mapping for updating reuses
// Pattern for non-chapter-style names (like :0, :1, etc. or random strings)
var nonChapterPattern = /^:[0-9]+$|^[^c]|^c[^0-9]/;
// Process definitions that have non-chapter-style names but contain chapter URLs
var defsToRename = refs.definitions.filter(function(def) {
if (!nonChapterPattern.test(def.name)) return false;
var url = extractUrl(def.content);
return url && config.chapterUrlPattern.test(url);
}).sort(function(a, b) { return b.index - a.index; }); // Process from end to preserve indices
defsToRename.forEach(function(def) {
var url = extractUrl(def.content);
var baseName = generateRefName(url);
if (!baseName) return;
// Skip if already has proper name
if (def.name === baseName) return;
// Ensure unique name
var newName = baseName;
var suffix = 2;
while (usedNames.has(newName)) {
newName = baseName + '_' + suffix;
suffix++;
}
usedNames.add(newName);
renames[def.name] = newName;
// Replace the ref definition with new name
var newRef = '<ref name="' + newName + '">' + def.content + '</ref>';
wikitext = wikitext.substring(0, def.index) + newRef + wikitext.substring(def.index + def.fullMatch.length);
fixes.push('Renamed ref "' + def.name + '" to "' + newName + '"');
});
// Now update all reuses of renamed refs
for (var oldName in renames) {
var newName = renames[oldName];
// Match both <ref name="oldName" /> and <ref name="oldName"/> (with or without space)
var reusePattern = new RegExp('<ref\\s+name\\s*=\\s*["\']' + oldName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') + '["\']\\s*/>', 'gi');
wikitext = wikitext.replace(reusePattern, '<ref name="' + newName + '" />');
}
return { wikitext: wikitext, fixes: fixes };
}
/**
* Fix order issues in wikitext
*/
function fixOrderIssues(wikitext, issues) {
if (issues.length === 0) return wikitext;
// Sort issues by definition index descending (fix from end to preserve indices)
issues.sort(function(a, b) { return b.definitionIndex - a.definitionIndex; });
issues.forEach(function(issue) {
// Strategy:
// 1. Replace the definition with a reuse
// 2. Replace the first reuse with the definition
var def = issue.definition;
var reuse = issue.reuse;
// Build the replacement strings
var reuseStr = '<ref name="' + def.name + '" />';
var defStr = '<ref name="' + def.name + '">' + def.content + '</ref>';
// Replace definition with reuse (do this first since it's later in the text)
wikitext = wikitext.substring(0, def.index) +
reuseStr +
wikitext.substring(def.index + def.fullMatch.length);
// Now replace the reuse with definition
// Need to recalculate position since we changed the text
var offset = reuseStr.length - def.fullMatch.length;
var newReuseIndex = reuse.index; // reuse is before def, so no offset needed
wikitext = wikitext.substring(0, newReuseIndex) +
defStr +
wikitext.substring(newReuseIndex + reuse.fullMatch.length);
});
return wikitext;
}
/**
* Standardize citation format - ONLY for chapter URLs
*/
function standardizeCitations(wikitext) {
// Find all refs with raw URLs (not in Cite templates)
var refPattern = /<ref(\s+name\s*=\s*["'][^"']+["'])?\s*>([\s\S]*?)<\/ref>/gi;
wikitext = wikitext.replace(refPattern, function(match, nameAttr, content) {
nameAttr = nameAttr || '';
// Check if content is just a raw URL
var trimmedContent = content.trim();
// Skip if already in Cite template
if (/\{\{Cite/i.test(trimmedContent)) {
return match;
}
// Only process if it's a chapter URL (matches /cXX/pXX)
if (config.chapterUrlPattern.test(trimmedContent)) {
var formatted = formatCitation(trimmedContent);
return '<ref' + nameAttr + '>' + formatted + '</ref>';
}
return match;
});
return wikitext;
}
// ========================================
// Auto Add Links - Formatting & Linkify
// ========================================
/**
* Cache for linkify database (fetched from wiki)
*/
var linkifyCache = null;
var linkifyCacheTime = 0;
var LINKIFY_CACHE_TTL = 5 * 60 * 1000; // 5 minutes
/**
* Apply formatting cleanup to wikitext
*/
function applyFormattingCleanup(wikitext) {
var fixes = [];
var original = wikitext;
// Step 1: Trim whitespace from start and end
wikitext = wikitext.trim();
// Step 2: Delete trailing spaces from lines
wikitext = wikitext.split('\n').map(function(line) {
return line.replace(/\s+$/, '');
}).join('\n');
// Step 3: Replace multiple spaces with single space (within lines)
wikitext = wikitext.split('\n').map(function(line) {
return line.replace(/ {2,}/g, ' ');
}).join('\n');
// Step 3.5: Replace ** markdown bold with ''' wikitext bold
if (/\*\*/.test(wikitext)) {
wikitext = wikitext.replace(/\*\*/g, "'''");
fixes.push('Converted markdown bold to wikitext bold');
}
// Ensure single space after </ref> if not at end of line
wikitext = wikitext.replace(/<\/ref>(?![\s\n]|$)/g, '</ref> ');
// Remove any double spaces that might have been created
wikitext = wikitext.replace(/ {2,}/g, ' ');
if (wikitext !== original) {
fixes.push('Applied formatting cleanup');
}
return { wikitext: wikitext, fixes: fixes };
}
/**
* Fetch the linkify database from wiki categories and templates
*/
function fetchLinkifyDatabase() {
var deferred = $.Deferred();
// Check cache
if (linkifyCache && (Date.now() - linkifyCacheTime < LINKIFY_CACHE_TTL)) {
return deferred.resolve(linkifyCache).promise();
}
var database = {
targets: {}, // canonical name -> true (characters, locations)
chapters: {}, // chapter title -> true (always link, case-sensitive)
aliases: {}, // alias -> canonical name
displayText: {} // search term -> { target: canonical, display: text }
};
var apiCalls = [];
// Fetch Category:Characters
apiCalls.push(
new mw.Api().get({
action: 'query',
list: 'categorymembers',
cmtitle: 'Category:Characters',
cmlimit: 500,
format: 'json'
}).then(function(data) {
if (data.query && data.query.categorymembers) {
data.query.categorymembers.forEach(function(member) {
database.targets[member.title] = true;
});
}
})
);
// Fetch Category:Locations
apiCalls.push(
new mw.Api().get({
action: 'query',
list: 'categorymembers',
cmtitle: 'Category:Locations',
cmlimit: 500,
format: 'json'
}).then(function(data) {
if (data.query && data.query.categorymembers) {
data.query.categorymembers.forEach(function(member) {
database.targets[member.title] = true;
});
}
})
);
// Fetch Template:ChapterList content
apiCalls.push(
new mw.Api().get({
action: 'query',
titles: 'Template:ChapterList',
prop: 'revisions',
rvprop: 'content',
rvslots: 'main',
format: 'json'
}).then(function(data) {
var pages = data.query && data.query.pages;
if (pages) {
for (var pageId in pages) {
var page = pages[pageId];
if (page.revisions && page.revisions[0]) {
var content = page.revisions[0].slots.main['*'];
// Parse {{Chapter|number=X|title=Y|...}} entries
// Chapters are stored separately - they're always linked (not just first occurrence)
// and matching is case-sensitive
var chapterPattern = /\{\{Chapter\|[^}]*title=([^|}]+)/gi;
var match;
while ((match = chapterPattern.exec(content)) !== null) {
var title = match[1].trim();
database.chapters[title] = true;
}
}
}
}
})
);
// Fetch Template:LinkifyAliases for custom mappings
apiCalls.push(
new mw.Api().get({
action: 'query',
titles: 'Template:LinkifyAliases',
prop: 'revisions',
rvprop: 'content',
rvslots: 'main',
format: 'json'
}).then(function(data) {
var pages = data.query && data.query.pages;
if (pages) {
for (var pageId in pages) {
var page = pages[pageId];
if (page.revisions && page.revisions[0]) {
var content = page.revisions[0].slots.main['*'];
// Parse alias definitions
// Format: alias1,alias2->CanonicalName
// Or: displayText->CanonicalName (for piped links)
var lines = content.split('\n');
lines.forEach(function(line) {
line = line.trim();
if (!line || line.startsWith('<!--') || line.startsWith('{{') || line.startsWith('}}')) return;
var arrowMatch = line.match(/^([^-]+)->(.+)$/);
if (arrowMatch) {
var aliases = arrowMatch[1].split(',').map(function(s) { return s.trim(); });
var target = arrowMatch[2].trim();
aliases.forEach(function(alias) {
database.aliases[alias] = target;
database.aliases[alias.toLowerCase()] = target;
});
}
});
}
}
}
}).fail(function() {
// Template doesn't exist yet, that's fine
})
);
$.when.apply($, apiCalls).always(function() {
linkifyCache = database;
linkifyCacheTime = Date.now();
deferred.resolve(database);
});
return deferred.promise();
}
/**
* Find the index where the first section heading starts
*/
function findFirstSectionIndex(wikitext) {
var sectionMatch = /^==\s*[^=]+\s*==/m.exec(wikitext);
return sectionMatch ? sectionMatch.index : -1;
}
/**
* Check if a position is inside a section header (== ... ==)
*/
function isInsideSectionHeader(wikitext, position) {
// Find the line containing this position
var lineStart = wikitext.lastIndexOf('\n', position) + 1;
var lineEnd = wikitext.indexOf('\n', position);
if (lineEnd === -1) lineEnd = wikitext.length;
var line = wikitext.substring(lineStart, lineEnd);
return /^=+[^=]+=+\s*$/.test(line);
}
/**
* Check if position is within a "See also" section (until next same-level or higher heading)
*
* Logic:
* 1. Find the last "See also" header before the position.
* 2. Check if there are any other headers between that "See also" and the position.
* 3. If we find a header of the same level (e.g. == See also == ... == References ==)
* or higher level (e.g. === See also === ... == Next Section ==), we are no longer in "See also".
*/
function isInSeeAlsoSection(wikitext, position) {
// Find all section headings before this position
var beforeText = wikitext.substring(0, position);
var seeAlsoPattern = /^(==+)\s*See\s+also\s*\1\s*$/gim;
var lastSeeAlso = null;
var match;
while ((match = seeAlsoPattern.exec(beforeText)) !== null) {
lastSeeAlso = { index: match.index, level: match[1].length };
}
if (!lastSeeAlso) return false;
// Check if there's a same-level or higher heading between See also and position
var afterSeeAlso = wikitext.substring(lastSeeAlso.index);
var nextHeadingPattern = /^(==+)\s*[^=]+\s*\1\s*$/gim;
nextHeadingPattern.lastIndex = 0; // Skip the See also heading itself
var firstMatch = true;
while ((match = nextHeadingPattern.exec(afterSeeAlso)) !== null) {
if (firstMatch) { firstMatch = false; continue; } // Skip See also itself
var headingLevel = match[1].length;
var absolutePos = lastSeeAlso.index + match.index;
if (absolutePos < position && headingLevel <= lastSeeAlso.level) {
return false; // We've left the See also section
}
if (absolutePos >= position) break;
}
return true;
}
/**
* Apply linkify logic to wikitext
*/
function applyLinkify(wikitext, database) {
var fixes = [];
// Find where the first section starts - everything before is intro (don't touch)
var firstSectionIdx = findFirstSectionIndex(wikitext);
if (firstSectionIdx === -1) {
// No sections at all - don't linkify anything
return { wikitext: wikitext, fixes: [] };
}
var intro = wikitext.substring(0, firstSectionIdx);
var body = wikitext.substring(firstSectionIdx);
// Get current page title to prevent self-linking
var currentPageTitle = '';
if (typeof mw !== 'undefined' && mw.config) {
currentPageTitle = mw.config.get('wgTitle') || '';
}
// Track which canonical targets have been linked
var linked = {};
// Mark current page as already linked to prevent self-linking
if (currentPageTitle) {
linked[currentPageTitle] = true;
}
// Build a combined list of all searchable terms
var allTerms = [];
for (var target in database.targets) {
allTerms.push({ term: target, canonical: target, display: null });
}
for (var alias in database.aliases) {
if (!database.targets[alias]) {
allTerms.push({ term: alias, canonical: database.aliases[alias], display: alias });
}
}
allTerms.sort(function(a, b) { return b.term.length - a.term.length; });
// Case-insensitive lookup tables for header linkification.
// Some pages may have headings like "=== jessica ===" or extra whitespace.
// We normalize to lowercase and resolve to the canonical page title.
var targetsByLower = {};
for (var t in database.targets) {
if (database.targets.hasOwnProperty(t)) {
targetsByLower[t.toLowerCase()] = t;
}
}
var aliasesByLower = {};
for (var a in database.aliases) {
if (database.aliases.hasOwnProperty(a)) {
aliasesByLower[a.toLowerCase()] = database.aliases[a];
}
}
// First: Process section headers linearly to handle Relationships linking
// This avoids complex regex lookbacks and handles nesting correctly via a stack
// Section stack tracks current section level and title to determine if we are inside a "Relationships" hierarchy
var lines = body.split('\n');
var newLines = [];
var sectionStack = []; // Array of { level: number, title: string }
for (var i = 0; i < lines.length; i++) {
var line = lines[i];
var headerMatch = /^(={2,})\s*([^=]+?)\s*\1\s*$/.exec(line);
if (headerMatch) {
var level = headerMatch[1].length;
var title = headerMatch[2].trim();
// Update stack: pop anything >= current level (since we are starting a new section at 'level')
// e.g. if stack is [2, 3] and we see a level 2 header, we pop 3 and 2.
while (sectionStack.length > 0 && sectionStack[sectionStack.length - 1].level >= level) {
sectionStack.pop();
}
// Check if we are inside a Relationships section hierarchy
// Scan up the stack to find if any ancestor is a "Relationships" section.
// Simplified regex to match "Relationships", "Major Relationships", "Minor Relationships" more robustly.
var isRelationships = sectionStack.some(function(section) {
// Matches: "Relationships", "Major Relationships", "Minor Relationships" (case insensitive)
return /^(Major|Minor)?\s*Relationships$/i.test(section.title);
});
if (isRelationships) {
// Check if title is a known target/alias.
// Use case-insensitive lookup so "jessica" resolves to "Jessica".
var titleKey = title.toLowerCase();
var canonical = targetsByLower[titleKey] || aliasesByLower[titleKey] || null;
// Only link if known target/alias and not already linked.
// Use canonical title in the link to ensure proper capitalization.
if (canonical && !/\[\[/.test(line)) {
line = headerMatch[1] + ' [[' + canonical + ']] ' + headerMatch[1];
fixes.push('Linked "' + canonical + '" in Relationships header');
}
}
sectionStack.push({ level: level, title: title });
}
newLines.push(line);
}
body = newLines.join('\n');
// Second pass: Find all existing links (not in headers) and mark as "linked"
// This prevents us from linking "Rachel" if "[[Rachel]]" is already present later in the text
/*
* PASS REMOVED: We no longer pre-mark existing links.
* Instead, we let the Third Pass link the FIRST occurrence of a term.
* The Fourth Pass (deduplication) will then clean up any subsequent links,
* ensuring the first occurrence is always the one that is linked.
*/
// Helper: Check if an index in body is on a header line (line-based, not position-based)
// This is more reliable than isInsideSectionHeader after body has been modified
function isOnHeaderLine(text, idx) {
// Find the line containing this index
var lineStart = text.lastIndexOf('\n', idx - 1) + 1;
var lineEnd = text.indexOf('\n', idx);
if (lineEnd === -1) lineEnd = text.length;
var line = text.substring(lineStart, lineEnd);
// Check if this line is a section header (== ... ==)
return /^={2,}[^=].*={2,}\s*$/.test(line);
}
// Chapter Pass: Link ALL occurrences of chapter titles (case-sensitive, exact match)
// Unlike characters which only get first occurrence linked, chapters are always linked.
var chapterTitles = Object.keys(database.chapters).sort(function(a, b) {
return b.length - a.length; // Longer titles first to avoid partial matches
});
chapterTitles.forEach(function(chapter) {
// Case-sensitive, exact word boundary match
var escapedChapter = chapter.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
var chapterPattern = new RegExp('\\b(' + escapedChapter + ')\\b', 'g');
var newBody = '';
var lastIndex = 0;
var chapterMatch;
while ((chapterMatch = chapterPattern.exec(body)) !== null) {
// Skip if on a header line
if (isOnHeaderLine(body, chapterMatch.index)) continue;
// Skip if in See also section
var absPos = firstSectionIdx + chapterMatch.index;
if (isInSeeAlsoSection(wikitext, absPos)) continue;
// Skip if already inside a link
var before = body.substring(Math.max(0, chapterMatch.index - 50), chapterMatch.index);
var after = body.substring(chapterMatch.index, Math.min(body.length, chapterMatch.index + 50));
if (/\[\[[^\]]*$/.test(before) && /^[^\[]*\]\]/.test(after)) continue;
// Link this occurrence
var captured = chapterMatch[1];
var replacement = '[[' + captured + ']]';
newBody += body.substring(lastIndex, chapterMatch.index) + replacement;
lastIndex = chapterMatch.index + chapterMatch[0].length;
fixes.push('Linked chapter "' + chapter + '"');
}
if (lastIndex > 0) {
body = newBody + body.substring(lastIndex);
}
});
// Third pass: Add links for unlinked terms (first occurrence only, respecting exclusions)
// We scan for each term individually to ensure we find the FIRST instance in the text
allTerms.forEach(function(item) {
if (linked[item.canonical]) return;
// Escape regex special chars and look for whole word match
var escapedTerm = item.term.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
// Allow "Rachel's" to match "Rachel"
var termPattern = new RegExp('\\b(' + escapedTerm + "(?:'s)?)\\b", 'g');
var found = false;
var newBody = '';
var lastIndex = 0;
var termMatch;
while ((termMatch = termPattern.exec(body)) !== null) {
// Skip if on a header line (use line-based detection, not position-based)
if (isOnHeaderLine(body, termMatch.index)) continue;
// Skip if in See also section (position-based is OK here since wikitext hasn't changed)
var absPos = firstSectionIdx + termMatch.index;
if (isInSeeAlsoSection(wikitext, absPos)) continue;
// Skip if already inside a link or inside single brackets (e.g., [Augustus] in quotes)
// Check for [[ ]] (wikilinks) or [ ] (single brackets used in prose)
var before = body.substring(Math.max(0, termMatch.index - 50), termMatch.index);
var after = body.substring(termMatch.index, Math.min(body.length, termMatch.index + 50));
// Skip if inside wikilink [[ ... ]]
if (/\[\[[^\]]*$/.test(before) && /^[^\[]*\]\]/.test(after)) continue;
// Skip if inside single brackets [ ... ] (common in prose like "[Augustus] said")
if (/\[[^\[\]]*$/.test(before) && /^[^\[\]]*\]/.test(after)) continue;
if (!found) {
found = true;
linked[item.canonical] = true;
var captured = termMatch[1];
var replacement;
// Preserve display text if it differs from canonical (e.g. alias or possessive)
if (item.display || captured !== item.canonical) {
replacement = '[[' + item.canonical + '|' + captured + ']]';
} else {
replacement = '[[' + captured + ']]';
}
newBody += body.substring(lastIndex, termMatch.index) + replacement;
lastIndex = termMatch.index + termMatch[0].length;
fixes.push('Linked first instance of "' + item.term + '"');
}
}
if (found) {
body = newBody + body.substring(lastIndex);
}
});
// Fourth pass: Remove duplicate links (keep first, remove subsequent) - but NOT in headers, See also, or chapters
var seenLinks = {};
var dupPattern = /\[\[([^\]|]+)(\|([^\]]+))?\]\]/g;
var newBody = '';
var lastIdx = 0;
var dupMatch;
while ((dupMatch = dupPattern.exec(body)) !== null) {
var target = dupMatch[1].trim();
var canonical = database.aliases[target] || target;
// Don't touch links in section headers, and DON'T mark them as seen.
// Header links (e.g., === [[Augustus]] ===) are independent from body links.
// The first body occurrence should still be linked even if a header link exists.
// Use line-based detection since body has been modified.
if (isOnHeaderLine(body, dupMatch.index)) {
continue;
}
// Don't touch links in See also, but DO mark them as seen.
// If a name appears in See also, we don't want to link it again in the body.
var absPos = firstSectionIdx + dupMatch.index;
if (isInSeeAlsoSection(wikitext, absPos)) {
seenLinks[canonical] = true;
continue;
}
// Don't deduplicate chapter links - chapters should ALWAYS be linked
if (database.chapters[target]) {
continue;
}
if (seenLinks[canonical]) {
// Duplicate - remove link, keep text
var text = dupMatch[3] || target;
newBody += body.substring(lastIdx, dupMatch.index) + text;
lastIdx = dupMatch.index + dupMatch[0].length;
fixes.push('Removed duplicate link to "' + target + '"');
} else {
seenLinks[canonical] = true;
}
}
body = newBody + body.substring(lastIdx);
return {
wikitext: intro + body,
fixes: fixes
};
}
/**
* Auto-add links - main function
*/
function autoAddLinks(wikitext) {
var deferred = $.Deferred();
var allFixes = [];
var warnings = [];
// Step 1: Apply formatting cleanup
var formatResult = applyFormattingCleanup(wikitext);
wikitext = formatResult.wikitext;
allFixes = allFixes.concat(formatResult.fixes);
// Step 2: Fetch linkify database and apply
fetchLinkifyDatabase().then(function(database) {
var targetCount = Object.keys(database.targets).length;
var aliasCount = Object.keys(database.aliases).length;
if (targetCount === 0) {
warnings.push('No linkify targets found. Create Category:Characters, Category:Locations, or Template:ChapterList.');
} else {
var linkResult = applyLinkify(wikitext, database);
wikitext = linkResult.wikitext;
allFixes = allFixes.concat(linkResult.fixes);
}
deferred.resolve({
wikitext: wikitext,
fixes: allFixes,
warnings: warnings
});
}).fail(function() {
warnings.push('Could not fetch linkify database. Formatting cleanup was still applied.');
deferred.resolve({
wikitext: wikitext,
fixes: allFixes,
warnings: warnings
});
});
return deferred.promise();
}
/**
* Synchronous version of autoAddLinks for source mode (uses cached database)
*/
function autoAddLinksSync(wikitext) {
var allFixes = [];
var warnings = [];
// Apply formatting cleanup
var formatResult = applyFormattingCleanup(wikitext);
wikitext = formatResult.wikitext;
allFixes = allFixes.concat(formatResult.fixes);
// Use cached database if available
if (linkifyCache) {
var linkResult = applyLinkify(wikitext, linkifyCache);
wikitext = linkResult.wikitext;
allFixes = allFixes.concat(linkResult.fixes);
} else {
warnings.push('Linkify database not cached. Run Auto Add Links in Visual Editor first, or wait a moment.');
}
return {
wikitext: wikitext,
fixes: allFixes,
warnings: warnings
};
}
/**
* Run both Fix Citations and Auto Add Links (async)
*/
function fixAll(wikitext) {
var deferred = $.Deferred();
// Run Fix Citations first (sync)
var citationResult = fixCitations(wikitext);
// Then run Auto Add Links on the result (async)
autoAddLinks(citationResult.wikitext).then(function(linkResult) {
deferred.resolve({
wikitext: linkResult.wikitext,
fixes: citationResult.fixes.concat(linkResult.fixes),
warnings: citationResult.warnings.concat(linkResult.warnings)
});
});
return deferred.promise();
}
/**
* Synchronous version of fixAll for source mode
*/
function fixAllSync(wikitext) {
var citationResult = fixCitations(wikitext);
var linkResult = autoAddLinksSync(citationResult.wikitext);
return {
wikitext: linkResult.wikitext,
fixes: citationResult.fixes.concat(linkResult.fixes),
warnings: citationResult.warnings.concat(linkResult.warnings)
};
}
/**
* Main fix function
*/
function fixCitations(wikitext) {
var issues = [];
var warnings = [];
var fixes = [];
// Parse all refs
var refs = parseRefs(wikitext);
// 1. Find and fix order issues
var orderIssues = findOrderIssues(refs);
if (orderIssues.length > 0) {
wikitext = fixOrderIssues(wikitext, orderIssues);
orderIssues.forEach(function(issue) {
fixes.push('Fixed order: "' + issue.name + '" - moved definition before first usage');
});
// Re-parse after fixes
refs = parseRefs(wikitext);
}
// 2. Standardize citation format
var before = wikitext;
wikitext = standardizeCitations(wikitext);
if (wikitext !== before) {
fixes.push('Standardized raw URLs to {{Cite chapter|url=...}} format');
refs = parseRefs(wikitext);
}
// 3. Find undefined refs
var undefinedRefs = findUndefinedRefs(refs);
if (undefinedRefs.length > 0) {
undefinedRefs.forEach(function(undef) {
warnings.push('Undefined reference: "' + undef.name + '" is used ' +
undef.usages.length + ' time(s) but never defined');
});
}
// 4. Validate chapter URLs
refs.definitions.forEach(function(def) {
var url = extractUrl(def.content);
var validation = validateChapterUrl(url);
if (!validation.valid) {
warnings.push('Invalid URL in "' + def.name + '": ' + validation.error);
}
});
refs.anonymous.forEach(function(anon, i) {
var url = extractUrl(anon.content);
var validation = validateChapterUrl(url);
if (!validation.valid) {
warnings.push('Invalid URL in anonymous ref #' + (i+1) + ': ' + validation.error);
}
});
// 5. Assign names to anonymous refs with chapter URLs
var namingResult = assignNamesToAnonymousRefs(wikitext, refs);
if (namingResult.fixes.length > 0) {
wikitext = namingResult.wikitext;
fixes = fixes.concat(namingResult.fixes);
refs = parseRefs(wikitext);
}
// 5.5. Rename refs with non-chapter-style names (like :0) to proper chapter-based names
var renameResult = renameNonChapterStyleRefs(wikitext, refs);
if (renameResult.fixes.length > 0) {
wikitext = renameResult.wikitext;
fixes = fixes.concat(renameResult.fixes);
refs = parseRefs(wikitext);
}
// 6. Re-check undefined refs after naming
undefinedRefs = findUndefinedRefs(refs);
if (undefinedRefs.length > 0) {
warnings.push('');
warnings.push('=== Undefined references requiring manual attention ===');
undefinedRefs.forEach(function(undef) {
warnings.push('Reference "' + undef.name + '" is used ' + undef.usages.length + ' time(s) but never defined.');
warnings.push(' → Find the correct source and add: <ref name="' + undef.name + '">{{Cite chapter|url=...}}</ref>');
});
}
return {
wikitext: wikitext,
fixes: fixes,
warnings: warnings
};
}
/**
* Show result message using mw.notify (nicer than alert)
*/
function showResultMessage(result) {
var message = '';
if (result.fixes.length > 0) {
message += 'FIXES APPLIED:\n' + result.fixes.join('\n') + '\n\n';
}
if (result.warnings.length > 0) {
message += 'WARNINGS:\n' + result.warnings.join('\n');
}
if (result.fixes.length === 0 && result.warnings.length === 0) {
message = 'No issues found! Citations look good.';
}
// Use mw.notify for a nicer notification, fall back to alert
// Auto-hide after 4 seconds (longer if there are warnings)
var hideDelay = result.warnings.length > 0 ? 8000 : 4000;
if (typeof mw !== 'undefined' && mw.notify) {
mw.notify(message.replace(/\n/g, '<br>'), {
title: 'FormattingFixer',
autoHide: true,
autoHideSeconds: hideDelay / 1000,
tag: 'formattingfixer'
});
} else {
alert(message);
}
}
// ========================================
// VisualEditor Integration
// ========================================
function veIsAvailable() {
return typeof ve !== 'undefined' && ve.init && ve.init.target;
}
function veGetSurface() {
if ( !veIsAvailable() ) {
return null;
}
return ve.init.target.getSurface && ve.init.target.getSurface();
}
function veGetMode() {
var surface = veGetSurface();
return surface && surface.getMode ? surface.getMode() : null;
}
function veGetFullWikitextFromSourceSurface( surface ) {
var model = surface.getModel();
var doc = model.getDocument();
var range = new ve.Range( 0, doc.data.getLength() );
return doc.data.getSourceText( range );
}
function veReplaceAllWikitextInSourceSurface( surface, newWikitext ) {
var model = surface.getModel();
model.getLinearFragment( new ve.Range( 0 ), true )
.expandLinearSelection( 'root' )
.insertContent( newWikitext );
}
function veCreateDmDocumentFromWikitext( wikitext, targetDoc ) {
return ve.init.target.parseWikitextFragment( wikitext, false, targetDoc ).then( function ( response ) {
if ( ve.getProp( response, 'visualeditor', 'result' ) !== 'success' ) {
return $.Deferred().reject( response ).promise();
}
var html = response.visualeditor.content;
var htmlDoc = ve.createDocumentFromHtml( html );
// Mirror VE's own clipboard importer flow for Parsoid HTML
if ( mw.libs && mw.libs.ve && mw.libs.ve.stripRestbaseIds ) {
mw.libs.ve.stripRestbaseIds( htmlDoc );
}
if ( mw.libs && mw.libs.ve && mw.libs.ve.stripParsoidFallbackIds ) {
mw.libs.ve.stripParsoidFallbackIds( htmlDoc.body );
}
// Pass an empty object for importRules to enable clipboard mode
var newDoc = targetDoc.newFromHtml( htmlDoc, {} );
var data = newDoc.data.data;
var surface = new ve.dm.Surface( newDoc );
// Filter out auto-generated items (e.g. reference lists)
for ( var i = data.length - 1; i >= 0; i-- ) {
if ( ve.getProp( data[ i ], 'attributes', 'mw', 'autoGenerated' ) ) {
surface.change(
ve.dm.TransactionBuilder.static.newFromRemoval(
newDoc,
surface.getDocument().getDocumentNode().getNodeFromOffset( i + 1 ).getOuterRange()
)
);
}
}
// Avoid about attribute conflicts
newDoc.data.cloneElements( true );
return newDoc;
} );
}
function veReplaceAllContentWithDocument( uiSurface, newDoc ) {
var surfaceModel = uiSurface.getModel();
surfaceModel.getLinearFragment( new ve.Range( 0 ), true )
.expandLinearSelection( 'root' )
.insertDocument( newDoc );
}
function veEnsureSourceMode() {
var target = ve.init.target;
var surface = veGetSurface();
if ( surface && surface.getMode && surface.getMode() === 'source' ) {
return $.Deferred().resolve().promise();
}
// Switch to the VisualEditor wikitext mode. This is the only reliable
// way to apply whole-document wikitext transforms.
try {
return target.switchToWikitextEditor( true );
} catch ( e ) {
return $.Deferred().reject( e ).promise();
}
}
function runFormattingFixerInVE( mode ) {
if ( !veIsAvailable() ) {
mw.notify( 'FormattingFixer: VisualEditor is not available yet.', { type: 'error' } );
return;
}
var target = ve.init.target;
var surface = veGetSurface();
if ( !surface ) {
mw.notify( 'FormattingFixer: Could not access the editor surface.', { type: 'error' } );
return;
}
var currentMode = veGetMode();
// In source mode, we can read/write directly
if ( currentMode === 'source' ) {
var sourceWikitext = veGetFullWikitextFromSourceSurface( surface );
// Use sync versions for source mode
var result;
if ( mode === 'links' ) {
result = autoAddLinksSync( sourceWikitext );
} else if ( mode === 'all' ) {
result = fixAllSync( sourceWikitext );
} else {
result = fixCitations( sourceWikitext );
}
if ( result.wikitext !== sourceWikitext ) {
veReplaceAllWikitextInSourceSurface( surface, result.wikitext );
}
showResultMessage( result );
return;
}
// In visual mode, split intro from body to protect intro from Parsoid
var doc = surface.getModel().getDocument();
target.getWikitextFragment( doc ).then( function ( originalWikitext ) {
// Split at first section header - intro stays untouched, only body goes through Parsoid
var firstHeaderMatch = /^==[^=]/m.exec( originalWikitext );
var introSection = '';
var bodySection = originalWikitext;
if ( firstHeaderMatch ) {
introSection = originalWikitext.substring( 0, firstHeaderMatch.index );
bodySection = originalWikitext.substring( firstHeaderMatch.index );
}
// Helper to apply result to VE
function applyResult( result ) {
if ( result.wikitext === originalWikitext ) {
showResultMessage( result );
return;
}
// Split the processed result the same way
var processedFirstHeader = /^==[^=]/m.exec( result.wikitext );
var processedBody = result.wikitext;
if ( processedFirstHeader ) {
processedBody = result.wikitext.substring( processedFirstHeader.index );
}
// Only send the BODY through Parsoid, not the intro
veCreateDmDocumentFromWikitext( processedBody, doc ).then( function ( newDoc ) {
veReplaceAllContentWithDocument( surface, newDoc );
// After Parsoid processes body, prepend the original intro unchanged
setTimeout( function () {
target.getWikitextFragment( surface.getModel().getDocument() ).then( function ( parsoidBody ) {
// Combine: original intro (unchanged) + Parsoid-processed body
var finalWikitext = introSection + parsoidBody;
// Apply this combined result
veCreateDmDocumentFromWikitext( finalWikitext, surface.getModel().getDocument() ).then( function ( finalDoc ) {
veReplaceAllContentWithDocument( surface, finalDoc );
showResultMessage( result );
} ).fail( function () {
showResultMessage( result );
} );
} );
}, 500 );
}, function () {
mw.notify( 'FormattingFixer: Could not apply changes. Try using source mode.', { type: 'error' } );
} );
}
// Process based on mode - some functions are async
if ( mode === 'links' ) {
autoAddLinks( originalWikitext ).then( applyResult );
} else if ( mode === 'all' ) {
fixAll( originalWikitext ).then( applyResult );
} else {
applyResult( fixCitations( originalWikitext ) );
}
} );
}
/**
* Add toolbar buttons to VisualEditor
*/
function addVisualEditorButtons() {
function registerTools() {
// Define and register tools. Use the documented pattern:
// register tools via ve.ui.toolFactory, and add a toolbar group
// via target.static.toolbarGroups (early) or toolbar.addItems (late).
if ( !veIsAvailable() ) {
return;
}
var toolFactory = ve.ui.toolFactory;
// Tool 1: Fix Citations
if ( !toolFactory.lookup( 'formattingFixerFix' ) ) {
function FormattingFixerFixTool() {
FormattingFixerFixTool.super.apply( this, arguments );
}
OO.inheritClass( FormattingFixerFixTool, OO.ui.Tool );
FormattingFixerFixTool.static.name = 'formattingFixerFix';
FormattingFixerFixTool.static.group = 'formattingfixer';
FormattingFixerFixTool.static.title = 'Fix Citations';
FormattingFixerFixTool.static.icon = 'check';
FormattingFixerFixTool.static.autoAddToCatchall = false;
FormattingFixerFixTool.static.autoAddToGroup = true;
FormattingFixerFixTool.prototype.onSelect = function () {
runFormattingFixerInVE( 'citations' );
this.setActive( false );
};
FormattingFixerFixTool.prototype.onUpdateState = function () {
this.setDisabled( false );
};
toolFactory.register( FormattingFixerFixTool );
}
// Tool 2: Auto Add Links
if ( !toolFactory.lookup( 'formattingFixerLinks' ) ) {
function FormattingFixerLinksTool() {
FormattingFixerLinksTool.super.apply( this, arguments );
}
OO.inheritClass( FormattingFixerLinksTool, OO.ui.Tool );
FormattingFixerLinksTool.static.name = 'formattingFixerLinks';
FormattingFixerLinksTool.static.group = 'formattingfixer';
FormattingFixerLinksTool.static.title = 'Auto Add Links';
FormattingFixerLinksTool.static.icon = 'link';
FormattingFixerLinksTool.static.autoAddToCatchall = false;
FormattingFixerLinksTool.static.autoAddToGroup = true;
FormattingFixerLinksTool.prototype.onSelect = function () {
runFormattingFixerInVE( 'links' );
this.setActive( false );
};
FormattingFixerLinksTool.prototype.onUpdateState = function () {
this.setDisabled( false );
};
toolFactory.register( FormattingFixerLinksTool );
}
// Tool 3: Fix All (Citations + Links)
if ( !toolFactory.lookup( 'formattingFixerAll' ) ) {
function FormattingFixerAllTool() {
FormattingFixerAllTool.super.apply( this, arguments );
}
OO.inheritClass( FormattingFixerAllTool, OO.ui.Tool );
FormattingFixerAllTool.static.name = 'formattingFixerAll';
FormattingFixerAllTool.static.group = 'formattingfixer';
FormattingFixerAllTool.static.title = 'Fix Citations + Auto Add Links';
FormattingFixerAllTool.static.icon = 'wikiText';
FormattingFixerAllTool.static.autoAddToCatchall = false;
FormattingFixerAllTool.static.autoAddToGroup = true;
FormattingFixerAllTool.prototype.onSelect = function () {
runFormattingFixerInVE( 'all' );
this.setActive( false );
};
FormattingFixerAllTool.prototype.onUpdateState = function () {
this.setDisabled( false );
};
toolFactory.register( FormattingFixerAllTool );
}
}
// Ensure our tool group is available in the toolbar (early-load path).
function addGroupToTarget( targetClass ) {
if ( !targetClass.static.toolbarGroups ) {
return;
}
var exists = targetClass.static.toolbarGroups.some( function ( g ) {
return g && g.name === 'formattingfixer';
} );
if ( exists ) {
return;
}
targetClass.static.toolbarGroups.push( {
name: 'formattingfixer',
label: 'FormattingFixer',
type: 'list',
indicator: 'down',
include: [ { group: 'formattingfixer' } ]
} );
}
// Preferred: integrate during VE module loading.
mw.hook( 've.loadModules' ).add( function ( addPlugin ) {
addPlugin( function () {
registerTools();
mw.loader.using( [ 'ext.visualEditor.mediawiki' ] ).then( function () {
for ( var n in ve.init.mw.targetFactory.registry ) {
addGroupToTarget( ve.init.mw.targetFactory.lookup( n ) );
}
ve.init.mw.targetFactory.on( 'register', function ( name, targetClass ) {
addGroupToTarget( targetClass );
} );
} );
} );
} );
// Fallback: if VE is already active, add a toolgroup to the existing toolbar.
mw.hook( 've.activationComplete' ).add( function () {
if ( !veIsAvailable() ) {
return;
}
registerTools();
var toolbar = ve.init.target.getToolbar && ve.init.target.getToolbar();
if ( !toolbar ) {
toolbar = ve.init.target.toolbar;
}
if ( !toolbar || toolbar.$element.find( '.formattingfixer-toolgroup' ).length ) {
return;
}
var myToolGroup = new OO.ui.ListToolGroup( toolbar, {
label: 'FormattingFixer',
include: [ 'formattingFixerFix', 'formattingFixerLinks', 'formattingFixerAll' ]
} );
myToolGroup.$element.addClass( 'formattingfixer-toolgroup' );
toolbar.addItems( [ myToolGroup ] );
} );
}
// ========================================
// WikiEditor Integration (Legacy)
// ========================================
/**
* Add toolbar button for WikiEditor
*/
function addWikiEditorButtons() {
// Guard against duplicate button creation
if (wikiEditorButtonsAdded) {
return true;
}
if (typeof $ === 'undefined' || !$.fn.wikiEditor) {
console.log('FormattingFixer: WikiEditor not available');
return false;
}
var $textarea = $('#wpTextbox1');
if ($textarea.length === 0) {
console.log('FormattingFixer: No textarea found');
return false;
}
// Check if button already exists in DOM
if ($('.tool[rel="formattingfixer-fix"]').length > 0) {
wikiEditorButtonsAdded = true;
return true;
}
try {
$textarea.wikiEditor('addToToolbar', {
section: 'advanced',
group: 'format',
tools: {
'formattingfixer-fix': {
label: 'Fix Citations',
type: 'button',
oouiIcon: 'articleCheck',
action: {
type: 'callback',
execute: function() {
var textarea = document.getElementById('wpTextbox1');
var result = fixCitations(textarea.value);
textarea.value = result.wikitext;
showResultMessage(result);
}
}
},
'formattingfixer-links': {
label: 'Auto Add Links',
type: 'button',
oouiIcon: 'link',
action: {
type: 'callback',
execute: function() {
var textarea = document.getElementById('wpTextbox1');
mw.notify('Fetching linkify database...', { tag: 'formattingfixer' });
autoAddLinks(textarea.value).then(function(result) {
textarea.value = result.wikitext;
showResultMessage(result);
});
}
}
},
'formattingfixer-all': {
label: 'Fix Citations + Auto Add Links',
type: 'button',
oouiIcon: 'code',
action: {
type: 'callback',
execute: function() {
var textarea = document.getElementById('wpTextbox1');
mw.notify('Fetching linkify database...', { tag: 'formattingfixer' });
fixAll(textarea.value).then(function(result) {
textarea.value = result.wikitext;
showResultMessage(result);
});
}
}
}
}
});
wikiEditorButtonsAdded = true;
console.log('FormattingFixer: WikiEditor buttons added');
return true;
} catch (e) {
console.log('FormattingFixer: Error adding WikiEditor buttons:', e);
return false;
}
}
// ========================================
// Fallback: Simple Buttons
// ========================================
/**
* Add simple HTML buttons as fallback
*/
function addSimpleButtons() {
var textbox = document.getElementById('wpTextbox1');
if (!textbox) return false;
// Don't attach to VisualEditor's hidden dummy textbox
if (textbox.classList && textbox.classList.contains('ve-dummyTextbox')) {
return false;
}
// Check if buttons already exist
if (document.getElementById('formattingfixer-buttons')) return true;
var container = document.createElement('div');
container.id = 'formattingfixer-buttons';
container.style.cssText = 'margin: 5px 0; padding: 8px; background: #f8f9fa; border: 1px solid #a2a9b1; border-radius: 2px; display: flex; gap: 10px; align-items: center;';
var label = document.createElement('span');
label.textContent = 'FormattingFixer:';
label.style.fontWeight = 'bold';
container.appendChild(label);
var fixBtn = document.createElement('button');
fixBtn.textContent = 'Fix Citations';
fixBtn.style.cssText = 'padding: 5px 10px; cursor: pointer; border: 1px solid #a2a9b1; border-radius: 2px; background: #fff;';
fixBtn.type = 'button';
fixBtn.onclick = function() {
var result = fixCitations(textbox.value);
textbox.value = result.wikitext;
showResultMessage(result);
};
container.appendChild(fixBtn);
var linksBtn = document.createElement('button');
linksBtn.textContent = 'Auto Add Links';
linksBtn.style.cssText = 'padding: 5px 10px; cursor: pointer; border: 1px solid #a2a9b1; border-radius: 2px; background: #fff;';
linksBtn.type = 'button';
linksBtn.onclick = function() {
linksBtn.disabled = true;
linksBtn.textContent = 'Loading...';
autoAddLinks(textbox.value).then(function(result) {
textbox.value = result.wikitext;
showResultMessage(result);
linksBtn.disabled = false;
linksBtn.textContent = 'Auto Add Links';
});
};
container.appendChild(linksBtn);
var allBtn = document.createElement('button');
allBtn.textContent = 'Fix All';
allBtn.style.cssText = 'padding: 5px 10px; cursor: pointer; border: 1px solid #a2a9b1; border-radius: 2px; background: #36c; color: #fff;';
allBtn.type = 'button';
allBtn.onclick = function() {
allBtn.disabled = true;
allBtn.textContent = 'Loading...';
fixAll(textbox.value).then(function(result) {
textbox.value = result.wikitext;
showResultMessage(result);
allBtn.disabled = false;
allBtn.textContent = 'Fix All';
});
};
container.appendChild(allBtn);
textbox.parentNode.insertBefore(container, textbox);
console.log('FormattingFixer: Simple buttons added');
return true;
}
// ========================================
// Initialization
// ========================================
function init() {
var action = mw.config.get('wgAction');
console.log('FormattingFixer: Initializing, action=' + action);
// VisualEditor integration removed - Parsoid causes too many issues with references and formatting
// For source editing (action=edit without VE)
if (action === 'edit' || action === 'submit') {
// Try WikiEditor first
mw.hook('wikiEditor.toolbarReady').add(function($textarea) {
console.log('FormattingFixer: WikiEditor toolbar ready');
addWikiEditorButtons();
});
// If this is the classic source editor, try loading WikiEditor explicitly.
// (If the user doesn't have it enabled, we'll fall back to simple buttons.)
setTimeout(function() {
var textbox = document.getElementById('wpTextbox1');
if (!textbox) {
return;
}
if (textbox.classList && textbox.classList.contains('ve-dummyTextbox')) {
return;
}
mw.loader.using(['ext.wikiEditor']).then(function() {
addWikiEditorButtons();
}, function() {
addSimpleButtons();
});
}, 0);
// If WikiEditor isn't present, ensure the user still gets controls
// in the classic source editor.
setTimeout(function() {
var textbox = document.getElementById('wpTextbox1');
if (textbox && !document.getElementById('formattingfixer-buttons')) {
if (textbox.classList && textbox.classList.contains('ve-dummyTextbox')) {
return;
}
if (typeof $ !== 'undefined' && $.fn && $.fn.wikiEditor) {
// WikiEditor exists but may not be initialized yet.
if (!addWikiEditorButtons()) {
addSimpleButtons();
}
} else {
addSimpleButtons();
}
}
}, 250);
// Fallback after delay
setTimeout(function() {
var textbox = document.getElementById('wpTextbox1');
if (textbox && !document.getElementById('formattingfixer-buttons')) {
if (textbox.classList && textbox.classList.contains('ve-dummyTextbox')) {
return;
}
// Check if WikiEditor toolbar exists
if ($('.wikiEditor-ui-toolbar').length > 0) {
if (!addWikiEditorButtons()) {
addSimpleButtons();
}
} else {
addSimpleButtons();
}
}
}, 1500);
}
}
// Run when ready
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', function() {
mw.loader.using(['mediawiki.util']).then(init);
});
} else {
mw.loader.using(['mediawiki.util']).then(init);
}
// Expose for testing
window.FormattingFixer = {
fixCitations: fixCitations,
autoAddLinks: autoAddLinks,
autoAddLinksSync: autoAddLinksSync,
fixAll: fixAll,
fixAllSync: fixAllSync,
parseRefs: parseRefs,
generateRefName: generateRefName,
fetchLinkifyDatabase: fetchLinkifyDatabase,
applyFormattingCleanup: applyFormattingCleanup
};
})();
// Force update timestamp: Mon Jan 5 19:24:00 EST 2026