User:DarkMatterMan4500/edit-filter-hit-analyzer.js: Difference between revisions
From WikiOasis Meta
More actions
Created page with "// <nowiki> // @ts-check // More information on how an edit filter was tripped importStylesheet('w:en:User:GeneralNotability/edit-filter-hit-analyzer.css' ); /**" |
mNo edit summary |
||
| Line 5: | Line 5: | ||
importStylesheet('w:en:User:GeneralNotability/edit-filter-hit-analyzer.css' ); | importStylesheet('w:en:User:GeneralNotability/edit-filter-hit-analyzer.css' ); | ||
/** | /** | ||
* @typedef EditFilterLine | |||
* @type {Object} | |||
* @property {string} text Text of the line | |||
* @property {string} normedText Text with some modifications applied for parsing | |||
* @property {string[]} variables Variables found in the line | |||
* @property {number} indentation how far to indent the line | |||
*/ | |||
const efa_knownVars = {}; | |||
const efa_PAGE_NAME_RE = /Special:AbuseLog\/\d+/; | |||
const efa_FILTER_PAGE_RE = /\/wiki\/(Special:AbuseFilter\/(\d+))/; | |||
// Vars in this list shouldn't have their full content displayed because they're usually really big | |||
const efa_HIDDEN_VARS = [ 'old_wikitext', 'new_wikitext', 'edit_diff', 'all_links', 'added_lines', 'removed_lines', 'new_html' ]; | |||
// Parser regexes | |||
const efa_REGEX_ASSIGNMENT_RE = /(\w+)\s*:=\s*"(.*)"/; | |||
const efa_RLIKE_RE = /\b(.*)\s*(i?rlike|regex)\s*(\w+|".*")/; | |||
// Future reference: if we want to do ccnorm ourselves, we can pull the conversion list | |||
// from https://phab.wmfusercontent.org/file/data/jcoued3dziiwwwdr53lp/PHID-FILE-lkxia6juxnhqt263dbrj/equivset.json | |||
async function efa_main() { | |||
// populate knownVars with built-in values | |||
Object.entries(mw.config.get('wgAbuseFilterVariables')).forEach(([ key, value ]) => { | |||
efa_knownVars[key] = value; | |||
}); | |||
const $actionParams = $('h3:contains("Action parameters")', document); | |||
$('<h3>').text('Filter rule analysis').insertBefore($actionParams); | |||
const $ruleAnchor = $('<ul>').attr('id', 'efa-anchor').insertBefore($actionParams); | |||
// Find the link which goes to Special:AbuseFilter, then pull out the wikilink part of it | |||
const filterId = $('a', document).filter(function () { | |||
return efa_FILTER_PAGE_RE.test(this.getAttribute('href')); | |||
}).attr('href').match(efa_FILTER_PAGE_RE)[2]; | |||
const filterPattern = await efa_getFilter(filterId); | |||
if (!filterPattern) { | |||
// Something went wrong (or we can't access the filter), | |||
// bail out | |||
return; | |||
} | |||
const filterRules = efa_parseRules(filterPattern); | |||
filterRules.forEach((rule) => { | |||
const $bullet = $('<li>').attr('style', 'margin-left:' + (10 * rule.indentation + 10) + 'px;'); | |||
$('<span>').addClass('efa-rule').text(rule.text).appendTo($bullet); | |||
rule.variables.forEach((variable) => { | |||
const $efaData = $('<span>').addClass('efa-data'); | |||
if (efa_HIDDEN_VARS.includes(variable)) { | |||
$efaData.append(variable + ': (not shown)'); | |||
} else { | |||
$efaData.append(variable + ': ' + efa_knownVars[variable]); | |||
} | |||
$efaData.appendTo($bullet); | |||
}); | |||
const rlikeMatch = rule.normedText.match(efa_RLIKE_RE); | |||
if (rlikeMatch) { | |||
// If this is a regex, try to expand it and generate a link | |||
let reText = rlikeMatch[1]; | |||
const matchType = rlikeMatch[2]; | |||
let re = rlikeMatch[3]; | |||
// Whether to apply substitution on the regex side (don't if ) | |||
let subRe = true; | |||
const reQuoteSearch = re.match(/.*?"(.*)"/); | |||
if (reQuoteSearch) { | |||
// Remove the quotes around a literal regex | |||
re = reQuoteSearch[1]; | |||
// Don't attempt substitution since this is a literal | |||
subRe = false; | |||
} | |||
// Expand variables (or possibly function calls on a variable) | |||
// TODO: this is really simplistic (obviously) - strip function calls and get | |||
// an exact match | |||
for (const entry of Object.entries(efa_knownVars)) { | |||
if (reText.includes(entry[0])) { | |||
reText = entry[1].toString(); | |||
} | |||
if (re.includes(entry[0]) && subRe) { | |||
re = entry[1].toString(); | |||
} | |||
} | |||
// abusefilter entries are PCRE and by default use the 'u' flag. | |||
// if irlike is being used, add the i flag as well. | |||
let flags = 'u'; | |||
if (matchType === 'irlike') { | |||
flags += 'i'; | |||
} | |||
const re101url = `https://regex101.com/?regex=${encodeURIComponent(re)}&testString=${encodeURIComponent(reText)}&flags=${flags}`; | |||
$bullet.append(' ').append($('<a>').attr('href', re101url).text('(view at regex101)')); | |||
} | |||
$bullet.appendTo($ruleAnchor); | |||
}); | |||
} | |||
/** | |||
* Turn a filter's pattern into a list of rules | |||
* | |||
* @param {string} pattern Original text pattern | |||
* | |||
* @return {EditFilterLine[]} List of rules | |||
*/ | |||
function efa_parseRules(pattern) { | |||
// Strip all newline characters and split by statement | |||
// The second part is taken from https://stackoverflow.com/questions/11502598/how-to-match-something-with-regex-that-is-not-between-two-special-characters | |||
// It matches all split characters (&, ;, &) as long as they are _not_ between quotes | |||
const filterLines = pattern.replace(/(\r|\n)/g, '').split(/([&;|](?=(?:[^"]*"[^"]*")*[^"]*$))/g); | |||
/** @type {EditFilterLine[]} */ | |||
const annotatedFilterLines = []; | |||
filterLines.forEach((line) => { | |||
// Trim, then replace long whitespaces with a single space | |||
const cleanedUpLine = line.trim().replace(/\s+/, ' '); | |||
const annotatedLine = { text: cleanedUpLine, normedText: cleanedUpLine, | |||
variables: [], indentation: 0 }; | |||
annotatedFilterLines.push(annotatedLine); | |||
}); | |||
// Indentation pass: figure out how deep each statement is nested in parens, | |||
// then create a "normed" version which strips the extra paren(s) | |||
// While we're in there, save variable assignments | |||
let indent = 0; | |||
annotatedFilterLines.forEach((line) => { | |||
const openParens = line.text.split(/\(/).length; | |||
const closeParens = line.text.split(/\)/).length; | |||
// Because of how we split the strings, a block of indented text will | |||
// always start with an extra open paren on the starting rule, and close | |||
// with an extra one on the ending rule (but we want both of those lines) | |||
// indented | |||
const deltaParens = openParens - closeParens; | |||
if (deltaParens > 0) { | |||
indent += deltaParens; | |||
line.indentation = indent; | |||
// Remove the extra paren from the normed text | |||
line.normedText = line.text.replace('(', ''); | |||
} else if (deltaParens < 0) { | |||
line.indentation = indent; | |||
indent += deltaParens; // Remember, deltaparens is negative here, so add it | |||
// Remove the extra paren from the normed text | |||
line.normedText = line.text.replace(/\)(?=[^)]*$)/, ''); | |||
} else { | |||
line.indentation = indent; | |||
} | |||
const varAssignment = line.normedText.match(efa_REGEX_ASSIGNMENT_RE); | |||
if (varAssignment) { | |||
efa_knownVars[varAssignment[1]] = varAssignment[2]; | |||
} | |||
}); | |||
// Annotate by going through and identifying variables used in the lines | |||
Object.keys(efa_knownVars).forEach((varName) => { | |||
const varRe = new RegExp('\\b' + varName + '\\b'); | |||
annotatedFilterLines.forEach((line) => { | |||
if (line.text.match(varRe)) { | |||
const assignmentMatch = line.text.match(efa_REGEX_ASSIGNMENT_RE); | |||
if (assignmentMatch && assignmentMatch[1] === varName) { | |||
// Don't list the variable on the line that assigns | |||
return; | |||
} | |||
line.variables.push(varName); | |||
} | |||
}); | |||
}); | |||
return annotatedFilterLines; | |||
} | |||
async function efa_getFilter(filterId) { | |||
try { | |||
const api = new mw.Api(); | |||
const response = await api.get({ | |||
action: 'query', | |||
list: 'abusefilters', | |||
abfstartid: filterId, | |||
abfendid: filterId, | |||
abfprop: 'pattern' | |||
}); | |||
if (response.query.abusefilters.length < 1) { | |||
// No match? | |||
return ''; | |||
} | |||
return response.query.abusefilters[0].pattern; | |||
} catch (error) { | |||
console.log(error); | |||
return ''; | |||
} | |||
} | |||
// On document load, check if this page is a edit filter hit - if so, | |||
// load the EF stuff | |||
$(function () { | |||
if (efa_PAGE_NAME_RE.test(mw.config.get('wgPageName'))) { | |||
efa_main(); | |||
} | |||
}); | |||
// </nowiki> | |||
Latest revision as of 18:12, 5 May 2026
// <nowiki>
// @ts-check
// More information on how an edit filter was tripped
importStylesheet('w:en:User:GeneralNotability/edit-filter-hit-analyzer.css' );
/**
* @typedef EditFilterLine
* @type {Object}
* @property {string} text Text of the line
* @property {string} normedText Text with some modifications applied for parsing
* @property {string[]} variables Variables found in the line
* @property {number} indentation how far to indent the line
*/
const efa_knownVars = {};
const efa_PAGE_NAME_RE = /Special:AbuseLog\/\d+/;
const efa_FILTER_PAGE_RE = /\/wiki\/(Special:AbuseFilter\/(\d+))/;
// Vars in this list shouldn't have their full content displayed because they're usually really big
const efa_HIDDEN_VARS = [ 'old_wikitext', 'new_wikitext', 'edit_diff', 'all_links', 'added_lines', 'removed_lines', 'new_html' ];
// Parser regexes
const efa_REGEX_ASSIGNMENT_RE = /(\w+)\s*:=\s*"(.*)"/;
const efa_RLIKE_RE = /\b(.*)\s*(i?rlike|regex)\s*(\w+|".*")/;
// Future reference: if we want to do ccnorm ourselves, we can pull the conversion list
// from https://phab.wmfusercontent.org/file/data/jcoued3dziiwwwdr53lp/PHID-FILE-lkxia6juxnhqt263dbrj/equivset.json
async function efa_main() {
// populate knownVars with built-in values
Object.entries(mw.config.get('wgAbuseFilterVariables')).forEach(([ key, value ]) => {
efa_knownVars[key] = value;
});
const $actionParams = $('h3:contains("Action parameters")', document);
$('<h3>').text('Filter rule analysis').insertBefore($actionParams);
const $ruleAnchor = $('<ul>').attr('id', 'efa-anchor').insertBefore($actionParams);
// Find the link which goes to Special:AbuseFilter, then pull out the wikilink part of it
const filterId = $('a', document).filter(function () {
return efa_FILTER_PAGE_RE.test(this.getAttribute('href'));
}).attr('href').match(efa_FILTER_PAGE_RE)[2];
const filterPattern = await efa_getFilter(filterId);
if (!filterPattern) {
// Something went wrong (or we can't access the filter),
// bail out
return;
}
const filterRules = efa_parseRules(filterPattern);
filterRules.forEach((rule) => {
const $bullet = $('<li>').attr('style', 'margin-left:' + (10 * rule.indentation + 10) + 'px;');
$('<span>').addClass('efa-rule').text(rule.text).appendTo($bullet);
rule.variables.forEach((variable) => {
const $efaData = $('<span>').addClass('efa-data');
if (efa_HIDDEN_VARS.includes(variable)) {
$efaData.append(variable + ': (not shown)');
} else {
$efaData.append(variable + ': ' + efa_knownVars[variable]);
}
$efaData.appendTo($bullet);
});
const rlikeMatch = rule.normedText.match(efa_RLIKE_RE);
if (rlikeMatch) {
// If this is a regex, try to expand it and generate a link
let reText = rlikeMatch[1];
const matchType = rlikeMatch[2];
let re = rlikeMatch[3];
// Whether to apply substitution on the regex side (don't if )
let subRe = true;
const reQuoteSearch = re.match(/.*?"(.*)"/);
if (reQuoteSearch) {
// Remove the quotes around a literal regex
re = reQuoteSearch[1];
// Don't attempt substitution since this is a literal
subRe = false;
}
// Expand variables (or possibly function calls on a variable)
// TODO: this is really simplistic (obviously) - strip function calls and get
// an exact match
for (const entry of Object.entries(efa_knownVars)) {
if (reText.includes(entry[0])) {
reText = entry[1].toString();
}
if (re.includes(entry[0]) && subRe) {
re = entry[1].toString();
}
}
// abusefilter entries are PCRE and by default use the 'u' flag.
// if irlike is being used, add the i flag as well.
let flags = 'u';
if (matchType === 'irlike') {
flags += 'i';
}
const re101url = `https://regex101.com/?regex=${encodeURIComponent(re)}&testString=${encodeURIComponent(reText)}&flags=${flags}`;
$bullet.append(' ').append($('<a>').attr('href', re101url).text('(view at regex101)'));
}
$bullet.appendTo($ruleAnchor);
});
}
/**
* Turn a filter's pattern into a list of rules
*
* @param {string} pattern Original text pattern
*
* @return {EditFilterLine[]} List of rules
*/
function efa_parseRules(pattern) {
// Strip all newline characters and split by statement
// The second part is taken from https://stackoverflow.com/questions/11502598/how-to-match-something-with-regex-that-is-not-between-two-special-characters
// It matches all split characters (&, ;, &) as long as they are _not_ between quotes
const filterLines = pattern.replace(/(\r|\n)/g, '').split(/([&;|](?=(?:[^"]*"[^"]*")*[^"]*$))/g);
/** @type {EditFilterLine[]} */
const annotatedFilterLines = [];
filterLines.forEach((line) => {
// Trim, then replace long whitespaces with a single space
const cleanedUpLine = line.trim().replace(/\s+/, ' ');
const annotatedLine = { text: cleanedUpLine, normedText: cleanedUpLine,
variables: [], indentation: 0 };
annotatedFilterLines.push(annotatedLine);
});
// Indentation pass: figure out how deep each statement is nested in parens,
// then create a "normed" version which strips the extra paren(s)
// While we're in there, save variable assignments
let indent = 0;
annotatedFilterLines.forEach((line) => {
const openParens = line.text.split(/\(/).length;
const closeParens = line.text.split(/\)/).length;
// Because of how we split the strings, a block of indented text will
// always start with an extra open paren on the starting rule, and close
// with an extra one on the ending rule (but we want both of those lines)
// indented
const deltaParens = openParens - closeParens;
if (deltaParens > 0) {
indent += deltaParens;
line.indentation = indent;
// Remove the extra paren from the normed text
line.normedText = line.text.replace('(', '');
} else if (deltaParens < 0) {
line.indentation = indent;
indent += deltaParens; // Remember, deltaparens is negative here, so add it
// Remove the extra paren from the normed text
line.normedText = line.text.replace(/\)(?=[^)]*$)/, '');
} else {
line.indentation = indent;
}
const varAssignment = line.normedText.match(efa_REGEX_ASSIGNMENT_RE);
if (varAssignment) {
efa_knownVars[varAssignment[1]] = varAssignment[2];
}
});
// Annotate by going through and identifying variables used in the lines
Object.keys(efa_knownVars).forEach((varName) => {
const varRe = new RegExp('\\b' + varName + '\\b');
annotatedFilterLines.forEach((line) => {
if (line.text.match(varRe)) {
const assignmentMatch = line.text.match(efa_REGEX_ASSIGNMENT_RE);
if (assignmentMatch && assignmentMatch[1] === varName) {
// Don't list the variable on the line that assigns
return;
}
line.variables.push(varName);
}
});
});
return annotatedFilterLines;
}
async function efa_getFilter(filterId) {
try {
const api = new mw.Api();
const response = await api.get({
action: 'query',
list: 'abusefilters',
abfstartid: filterId,
abfendid: filterId,
abfprop: 'pattern'
});
if (response.query.abusefilters.length < 1) {
// No match?
return '';
}
return response.query.abusefilters[0].pattern;
} catch (error) {
console.log(error);
return '';
}
}
// On document load, check if this page is a edit filter hit - if so,
// load the EF stuff
$(function () {
if (efa_PAGE_NAME_RE.test(mw.config.get('wgPageName'))) {
efa_main();
}
});
// </nowiki>