Toggle menu
Toggle preferences menu
Toggle personal menu
Not logged in
Your IP address will be publicly visible if you make any edits.

User:DarkMatterMan4500/edit-filter-hit-analyzer.js: Difference between revisions

From WikiOasis Meta
Created page with "// <nowiki> // @ts-check // More information on how an edit filter was tripped importStylesheet('w:en:User:GeneralNotability/edit-filter-hit-analyzer.css' ); /**"
 
mNo edit summary
 
Line 5: Line 5:
importStylesheet('w:en:User:GeneralNotability/edit-filter-hit-analyzer.css' );
importStylesheet('w:en:User:GeneralNotability/edit-filter-hit-analyzer.css' );
/**
/**
* @typedef EditFilterLine
* @type {Object}
* @property {string} text Text of the line
* @property {string} normedText Text with some modifications applied for parsing
* @property {string[]} variables Variables found in the line
* @property {number} indentation how far to indent the line
*/
const efa_knownVars = {};
const efa_PAGE_NAME_RE = /Special:AbuseLog\/\d+/;
const efa_FILTER_PAGE_RE = /\/wiki\/(Special:AbuseFilter\/(\d+))/;
// Vars in this list shouldn't have their full content displayed because they're usually really big
const efa_HIDDEN_VARS = [ 'old_wikitext', 'new_wikitext', 'edit_diff', 'all_links', 'added_lines', 'removed_lines', 'new_html' ];
// Parser regexes
const efa_REGEX_ASSIGNMENT_RE = /(\w+)\s*:=\s*"(.*)"/;
const efa_RLIKE_RE = /\b(.*)\s*(i?rlike|regex)\s*(\w+|".*")/;
// Future reference: if we want to do ccnorm ourselves, we can pull the conversion list
// from https://phab.wmfusercontent.org/file/data/jcoued3dziiwwwdr53lp/PHID-FILE-lkxia6juxnhqt263dbrj/equivset.json
async function efa_main() {
// populate knownVars with built-in values
Object.entries(mw.config.get('wgAbuseFilterVariables')).forEach(([ key, value ]) => {
efa_knownVars[key] = value;
});
const $actionParams = $('h3:contains("Action parameters")', document);
$('<h3>').text('Filter rule analysis').insertBefore($actionParams);
const $ruleAnchor = $('<ul>').attr('id', 'efa-anchor').insertBefore($actionParams);
// Find the link which goes to Special:AbuseFilter, then pull out the wikilink part of it
const filterId = $('a', document).filter(function () {
return efa_FILTER_PAGE_RE.test(this.getAttribute('href'));
}).attr('href').match(efa_FILTER_PAGE_RE)[2];
const filterPattern = await efa_getFilter(filterId);
if (!filterPattern) {
// Something went wrong (or we can't access the filter),
// bail out
return;
}
const filterRules = efa_parseRules(filterPattern);
filterRules.forEach((rule) => {
const $bullet = $('<li>').attr('style', 'margin-left:' + (10 * rule.indentation + 10) + 'px;');
$('<span>').addClass('efa-rule').text(rule.text).appendTo($bullet);
rule.variables.forEach((variable) => {
const $efaData = $('<span>').addClass('efa-data');
if (efa_HIDDEN_VARS.includes(variable)) {
$efaData.append(variable + ': (not shown)');
} else {
$efaData.append(variable + ': ' + efa_knownVars[variable]);
}
$efaData.appendTo($bullet);
});
const rlikeMatch = rule.normedText.match(efa_RLIKE_RE);
if (rlikeMatch) {
// If this is a regex, try to expand it and generate a link
let reText = rlikeMatch[1];
const matchType = rlikeMatch[2];
let re = rlikeMatch[3];
// Whether to apply substitution on the regex side (don't if )
let subRe = true;
const reQuoteSearch = re.match(/.*?"(.*)"/);
if (reQuoteSearch) {
// Remove the quotes around a literal regex
re = reQuoteSearch[1];
// Don't attempt substitution since this is a literal
subRe = false;
}
// Expand variables (or possibly function calls on a variable)
// TODO: this is really simplistic (obviously) - strip function calls and get
// an exact match
for (const entry of Object.entries(efa_knownVars)) {
if (reText.includes(entry[0])) {
reText = entry[1].toString();
}
if (re.includes(entry[0]) && subRe) {
re = entry[1].toString();
}
}
// abusefilter entries are PCRE and by default use the 'u' flag.
// if irlike is being used, add the i flag as well.
let flags = 'u';
if (matchType === 'irlike') {
flags += 'i';
}
const re101url = `https://regex101.com/?regex=${encodeURIComponent(re)}&testString=${encodeURIComponent(reText)}&flags=${flags}`;
$bullet.append(' ').append($('<a>').attr('href', re101url).text('(view at regex101)'));
}
$bullet.appendTo($ruleAnchor);
});
}
/**
* Turn a filter's pattern into a list of rules
*
* @param {string} pattern Original text pattern
*
* @return {EditFilterLine[]} List of rules
*/
function efa_parseRules(pattern) {
// Strip all newline characters and split by statement
// The second part is taken from https://stackoverflow.com/questions/11502598/how-to-match-something-with-regex-that-is-not-between-two-special-characters
// It matches all split characters (&, ;, &) as long as they are _not_ between quotes
const filterLines = pattern.replace(/(\r|\n)/g, '').split(/([&;|](?=(?:[^"]*"[^"]*")*[^"]*$))/g);
/** @type {EditFilterLine[]} */
const annotatedFilterLines = [];
filterLines.forEach((line) => {
// Trim, then replace long whitespaces with a single space
const cleanedUpLine = line.trim().replace(/\s+/, ' ');
const annotatedLine = { text: cleanedUpLine, normedText: cleanedUpLine,
variables: [], indentation: 0 };
annotatedFilterLines.push(annotatedLine);
});
// Indentation pass: figure out how deep each statement is nested in parens,
// then create a "normed" version which strips the extra paren(s)
// While we're in there, save variable assignments
let indent = 0;
annotatedFilterLines.forEach((line) => {
const openParens = line.text.split(/\(/).length;
const closeParens = line.text.split(/\)/).length;
// Because of how we split the strings, a block of indented text will
// always start with an extra open paren on the starting rule, and close
// with an extra one on the ending rule (but we want both of those lines)
// indented
const deltaParens = openParens - closeParens;
if (deltaParens > 0) {
indent += deltaParens;
line.indentation = indent;
// Remove the extra paren from the normed text
line.normedText = line.text.replace('(', '');
} else if (deltaParens < 0) {
line.indentation = indent;
indent += deltaParens; // Remember, deltaparens is negative here, so add it
// Remove the extra paren from the normed text
line.normedText = line.text.replace(/\)(?=[^)]*$)/, '');
} else {
line.indentation = indent;
}
const varAssignment = line.normedText.match(efa_REGEX_ASSIGNMENT_RE);
if (varAssignment) {
efa_knownVars[varAssignment[1]] = varAssignment[2];
}
});
// Annotate by going through and identifying variables used in the lines
Object.keys(efa_knownVars).forEach((varName) => {
const varRe = new RegExp('\\b' + varName + '\\b');
annotatedFilterLines.forEach((line) => {
if (line.text.match(varRe)) {
const assignmentMatch = line.text.match(efa_REGEX_ASSIGNMENT_RE);
if (assignmentMatch && assignmentMatch[1] === varName) {
// Don't list the variable on the line that assigns
return;
}
line.variables.push(varName);
}
});
});
return annotatedFilterLines;
}
async function efa_getFilter(filterId) {
try {
const api = new mw.Api();
const response = await api.get({
action: 'query',
list: 'abusefilters',
abfstartid: filterId,
abfendid: filterId,
abfprop: 'pattern'
});
if (response.query.abusefilters.length < 1) {
// No match?
return '';
}
return response.query.abusefilters[0].pattern;
} catch (error) {
console.log(error);
return '';
}
}
// On document load, check if this page is a edit filter hit - if so,
// load the EF stuff
$(function () {
if (efa_PAGE_NAME_RE.test(mw.config.get('wgPageName'))) {
efa_main();
}
});
// </nowiki>

Latest revision as of 18:12, 5 May 2026

// <nowiki>
// @ts-check
// More information on how an edit filter was tripped

importStylesheet('w:en:User:GeneralNotability/edit-filter-hit-analyzer.css' );
/**
 * @typedef EditFilterLine
 * @type {Object}
 * @property {string} text Text of the line
 * @property {string} normedText Text with some modifications applied for parsing
 * @property {string[]} variables Variables found in the line
 * @property {number} indentation how far to indent the line
 */

const efa_knownVars = {};

const efa_PAGE_NAME_RE = /Special:AbuseLog\/\d+/;
const efa_FILTER_PAGE_RE = /\/wiki\/(Special:AbuseFilter\/(\d+))/;
// Vars in this list shouldn't have their full content displayed because they're usually really big
const efa_HIDDEN_VARS = [ 'old_wikitext', 'new_wikitext', 'edit_diff', 'all_links', 'added_lines', 'removed_lines', 'new_html' ];

// Parser regexes
const efa_REGEX_ASSIGNMENT_RE = /(\w+)\s*:=\s*"(.*)"/;
const efa_RLIKE_RE = /\b(.*)\s*(i?rlike|regex)\s*(\w+|".*")/;

// Future reference: if we want to do ccnorm ourselves, we can pull the conversion list
// from https://phab.wmfusercontent.org/file/data/jcoued3dziiwwwdr53lp/PHID-FILE-lkxia6juxnhqt263dbrj/equivset.json

async function efa_main() {
	// populate knownVars with built-in values
	Object.entries(mw.config.get('wgAbuseFilterVariables')).forEach(([ key, value ]) => {
		efa_knownVars[key] = value;
	});

	const $actionParams = $('h3:contains("Action parameters")', document);
	$('<h3>').text('Filter rule analysis').insertBefore($actionParams);
	const $ruleAnchor = $('<ul>').attr('id', 'efa-anchor').insertBefore($actionParams);
	// Find the link which goes to Special:AbuseFilter, then pull out the wikilink part of it
	const filterId = $('a', document).filter(function () {
		return efa_FILTER_PAGE_RE.test(this.getAttribute('href'));
	}).attr('href').match(efa_FILTER_PAGE_RE)[2];
	const filterPattern = await efa_getFilter(filterId);
	if (!filterPattern) {
		// Something went wrong (or we can't access the filter),
		// bail out
		return;
	}
	const filterRules = efa_parseRules(filterPattern);
	filterRules.forEach((rule) => {
		const $bullet = $('<li>').attr('style', 'margin-left:' + (10 * rule.indentation + 10) + 'px;');
		$('<span>').addClass('efa-rule').text(rule.text).appendTo($bullet);
		rule.variables.forEach((variable) => {
			const $efaData = $('<span>').addClass('efa-data');
			if (efa_HIDDEN_VARS.includes(variable)) {
				$efaData.append(variable + ': (not shown)');
			} else {
				$efaData.append(variable + ': ' + efa_knownVars[variable]);
			}
			$efaData.appendTo($bullet);
		});

		const rlikeMatch = rule.normedText.match(efa_RLIKE_RE);
		if (rlikeMatch) {
			// If this is a regex, try to expand it and generate a link
			let reText = rlikeMatch[1];
			const matchType = rlikeMatch[2];
			let re = rlikeMatch[3];
			// Whether to apply substitution on the regex side (don't if )
			let subRe = true;
			const reQuoteSearch = re.match(/.*?"(.*)"/);
			if (reQuoteSearch) {
				// Remove the quotes around a literal regex
				re = reQuoteSearch[1];
				// Don't attempt substitution since this is a literal
				subRe = false;
			}
			// Expand variables (or possibly function calls on a variable)
			// TODO: this is really simplistic (obviously) - strip function calls and get
			// an exact match
			for (const entry of Object.entries(efa_knownVars)) {
				if (reText.includes(entry[0])) {
					reText = entry[1].toString();
				}
				if (re.includes(entry[0]) && subRe) {
					re = entry[1].toString();
				}
			}
			// abusefilter entries are PCRE and by default use the 'u' flag.
			// if irlike is being used, add the i flag as well.
			let flags = 'u';
			if (matchType === 'irlike') {
				flags += 'i';
			}
			const re101url = `https://regex101.com/?regex=${encodeURIComponent(re)}&testString=${encodeURIComponent(reText)}&flags=${flags}`;
			$bullet.append(' ').append($('<a>').attr('href', re101url).text('(view at regex101)'));
		}
		$bullet.appendTo($ruleAnchor);
	});
}

/**
 * Turn a filter's pattern into a list of rules
 *
 * @param {string} pattern Original text pattern
 *
 * @return {EditFilterLine[]} List of rules
 */
function efa_parseRules(pattern) {
	// Strip all newline characters and split by statement
	// The second part is taken from https://stackoverflow.com/questions/11502598/how-to-match-something-with-regex-that-is-not-between-two-special-characters
	// It matches all split characters (&, ;, &) as long as they are _not_ between quotes
	const filterLines = pattern.replace(/(\r|\n)/g, '').split(/([&;|](?=(?:[^"]*"[^"]*")*[^"]*$))/g);
	/** @type {EditFilterLine[]} */
	const annotatedFilterLines = [];
	filterLines.forEach((line) => {
		// Trim, then replace long whitespaces with a single space
		const cleanedUpLine = line.trim().replace(/\s+/, ' ');
		const annotatedLine = { text: cleanedUpLine, normedText: cleanedUpLine,
			variables: [], indentation: 0 };
		annotatedFilterLines.push(annotatedLine);
	});

	// Indentation pass: figure out how deep each statement is nested in parens,
	// then create a "normed" version which strips the extra paren(s)
	// While we're in there, save variable assignments
	let indent = 0;
	annotatedFilterLines.forEach((line) => {
		const openParens = line.text.split(/\(/).length;
		const closeParens = line.text.split(/\)/).length;
		// Because of how we split the strings, a block of indented text will
		// always start with an extra open paren on the starting rule, and close
		// with an extra one on the ending rule (but we want both of those lines)
		// indented
		const deltaParens = openParens - closeParens;
		if (deltaParens > 0) {
			indent += deltaParens;
			line.indentation = indent;
			// Remove the extra paren from the normed text
			line.normedText = line.text.replace('(', '');
		} else if (deltaParens < 0) {
			line.indentation = indent;
			indent += deltaParens; // Remember, deltaparens is negative here, so add it
			// Remove the extra paren from the normed text
			line.normedText = line.text.replace(/\)(?=[^)]*$)/, '');
		} else {
			line.indentation = indent;
		}
		const varAssignment = line.normedText.match(efa_REGEX_ASSIGNMENT_RE);
		if (varAssignment) {
			efa_knownVars[varAssignment[1]] = varAssignment[2];
		}
	});

	// Annotate by going through and identifying variables used in the lines
	Object.keys(efa_knownVars).forEach((varName) => {
		const varRe = new RegExp('\\b' + varName + '\\b');
		annotatedFilterLines.forEach((line) => {
			if (line.text.match(varRe)) {
				const assignmentMatch = line.text.match(efa_REGEX_ASSIGNMENT_RE);
				if (assignmentMatch && assignmentMatch[1] === varName) {
					// Don't list the variable on the line that assigns
					return;
				}
				line.variables.push(varName);
			}
		});
	});
	return annotatedFilterLines;
}

async function efa_getFilter(filterId) {
	try {
		const api = new mw.Api();
		const response = await api.get({
			action: 'query',
			list: 'abusefilters',
			abfstartid: filterId,
			abfendid: filterId,
			abfprop: 'pattern'
		});
		if (response.query.abusefilters.length < 1) {
			// No match?
			return '';
		}
		return response.query.abusefilters[0].pattern;

	} catch (error) {
		console.log(error);
		return '';
	}
}

// On document load, check if this page is a edit filter hit - if so,
// load the EF stuff
$(function () {
	if (efa_PAGE_NAME_RE.test(mw.config.get('wgPageName'))) {
		efa_main();
	}
});
// </nowiki>
Cookies help us deliver our services. By using our services, you agree to our use of cookies.