User:DarkMatterMan4500/edit-filter-hit-analyzer.js: Difference between revisions
From WikiOasis Meta
More actions
Created page with "// <nowiki> // @ts-check // More information on how an edit filter was tripped importStylesheet('w:en:User:GeneralNotability/edit-filter-hit-analyzer.css' ); /**" |
mNo edit summary |
||
| Line 5: | Line 5: | ||
importStylesheet('w:en:User:GeneralNotability/edit-filter-hit-analyzer.css' ); | importStylesheet('w:en:User:GeneralNotability/edit-filter-hit-analyzer.css' ); | ||
/** | /** | ||
* @typedef EditFilterLine | |||
* @type {Object} | |||
* @property {string} text Text of the line | |||
* @property {string} normedText Text with some modifications applied for parsing | |||
* @property {string[]} variables Variables found in the line | |||
* @property {number} indentation how far to indent the line | |||
*/ | |||
const efa_knownVars = {}; | |||
const efa_PAGE_NAME_RE = /Special:AbuseLog\/\d+/; | |||
const efa_FILTER_PAGE_RE = /\/wiki\/(Special:AbuseFilter\/(\d+))/; | |||
// Vars in this list shouldn't have their full content displayed because they're usually really big | |||
const efa_HIDDEN_VARS = [ 'old_wikitext', 'new_wikitext', 'edit_diff', 'all_links', 'added_lines', 'removed_lines', 'new_html' ]; | |||
// Parser regexes | |||
const efa_REGEX_ASSIGNMENT_RE = /(\w+)\s*:=\s*"(.*)"/; | |||
const efa_RLIKE_RE = /\b(.*)\s*(i?rlike|regex)\s*(\w+|".*")/; | |||
// Future reference: if we want to do ccnorm ourselves, we can pull the conversion list | |||
// from https://phab.wmfusercontent.org/file/data/jcoued3dziiwwwdr53lp/PHID-FILE-lkxia6juxnhqt263dbrj/equivset.json | |||
async function efa_main() { | |||
// populate knownVars with built-in values | |||
Object.entries(mw.config.get('wgAbuseFilterVariables')).forEach(([ key, value ]) => { | |||
efa_knownVars[key] = value; | |||
}); | |||
const $actionParams = $('h3:contains("Action parameters")', document); | |||
$('<h3>').text('Filter rule analysis').insertBefore($actionParams); | |||
const $ruleAnchor = $('<ul>').attr('id', 'efa-anchor').insertBefore($actionParams); | |||
// Find the link which goes to Special:AbuseFilter, then pull out the wikilink part of it | |||
const filterId = $('a', document).filter(function () { | |||
return efa_FILTER_PAGE_RE.test(this.getAttribute('href')); | |||
}).attr('href').match(efa_FILTER_PAGE_RE)[2]; | |||
const filterPattern = await efa_getFilter(filterId); | |||
if (!filterPattern) { | |||
// Something went wrong (or we can't access the filter), | |||
// bail out | |||
return; | |||
} | |||
const filterRules = efa_parseRules(filterPattern); | |||
filterRules.forEach((rule) => { | |||
const $bullet = $('<li>').attr('style', 'margin-left:' + (10 * rule.indentation + 10) + 'px;'); | |||
$('<span>').addClass('efa-rule').text(rule.text).appendTo($bullet); | |||
rule.variables.forEach((variable) => { | |||
const $efaData = $('<span>').addClass('efa-data'); | |||
if (efa_HIDDEN_VARS.includes(variable)) { | |||
$efaData.append(variable + ': (not shown)'); | |||
} else { | |||
$efaData.append(variable + ': ' + efa_knownVars[variable]); | |||
} | |||
$efaData.appendTo($bullet); | |||
}); | |||
const rlikeMatch = rule.normedText.match(efa_RLIKE_RE); | |||
if (rlikeMatch) { | |||
// If this is a regex, try to expand it and generate a link | |||
let reText = rlikeMatch[1]; | |||
const matchType = rlikeMatch[2]; | |||
let re = rlikeMatch[3]; | |||
// Whether to apply substitution on the regex side (don't if ) | |||
let subRe = true; | |||
const reQuoteSearch = re.match(/.*?"(.*)"/); | |||
if (reQuoteSearch) { | |||
// Remove the quotes around a literal regex | |||
re = reQuoteSearch[1]; | |||
// Don't attempt substitution since this is a literal | |||
subRe = false; | |||
} | |||
// Expand variables (or possibly function calls on a variable) | |||
// TODO: this is really simplistic (obviously) - strip function calls and get | |||
// an exact match | |||
for (const entry of Object.entries(efa_knownVars)) { | |||
if (reText.includes(entry[0])) { | |||
reText = entry[1].toString(); | |||
} | |||
if (re.includes(entry[0]) && subRe) { | |||
re = entry[1].toString(); | |||
} | |||
} | |||
// abusefilter entries are PCRE and by default use the 'u' flag. | |||
// if irlike is being used, add the i flag as well. | |||
let flags = 'u'; | |||
if (matchType === 'irlike') { | |||
flags += 'i'; | |||
} | |||
const re101url = `https://regex101.com/?regex=${encodeURIComponent(re)}&testString=${encodeURIComponent(reText)}&flags=${flags}`; | |||
$bullet.append(' ').append($('<a>').attr('href', re101url).text('(view at regex101)')); | |||
} | |||
$bullet.appendTo($ruleAnchor); | |||
}); | |||
} | |||
/** | |||
* Turn a filter's pattern into a list of rules | |||
* | |||
* @param {string} pattern Original text pattern | |||
* | |||
* @return {EditFilterLine[]} List of rules | |||
*/ | |||
function efa_parseRules(pattern) { | |||
// Strip all newline characters and split by statement | |||
// The second part is taken from https://stackoverflow.com/questions/11502598/how-to-match-something-with-regex-that-is-not-between-two-special-characters | |||
// It matches all split characters (&, ;, &) as long as they are _not_ between quotes | |||
const filterLines = pattern.replace(/(\r|\n)/g, '').split(/([&;|](?=(?:[^"]*"[^"]*")*[^"]*$))/g); | |||
/** @type {EditFilterLine[]} */ | |||
const annotatedFilterLines = []; | |||
filterLines.forEach((line) => { | |||
// Trim, then replace long whitespaces with a single space | |||
const cleanedUpLine = line.trim().replace(/\s+/, ' '); | |||
const annotatedLine = { text: cleanedUpLine, normedText: cleanedUpLine, | |||
variables: [], indentation: 0 }; | |||
annotatedFilterLines.push(annotatedLine); | |||
}); | |||
// Indentation pass: figure out how deep each statement is nested in parens, | |||
// then create a "normed" version which strips the extra paren(s) | |||
// While we're in there, save variable assignments | |||
let indent = 0; | |||
annotatedFilterLines.forEach((line) => { | |||
const openParens = line.text.split(/\(/).length; | |||
const closeParens = line.text.split(/\)/).length; | |||
// Because of how we split the strings, a block of indented text will | |||
// always start with an extra open paren on the starting rule, and close | |||
// with an extra one on the ending rule (but we want both of those lines) | |||
// indented | |||
const deltaParens = openParens - closeParens; | |||
if (deltaParens > 0) { | |||
indent += deltaParens; | |||
line.indentation = indent; | |||
// Remove the extra paren from the normed text | |||
line.normedText = line.text.replace('(', ''); | |||
} else if (deltaParens < 0) { | |||
line.indentation = indent; | |||
indent += deltaParens; // Remember, deltaparens is negative here, so add it | |||
// Remove the extra paren from the normed text | |||
line.normedText = line.text.replace(/\)(?=[^)]*$)/, ''); | |||
} else { | |||
line.indentation = indent; | |||
} | |||
const varAssignment = line.normedText.match(efa_REGEX_ASSIGNMENT_RE); | |||
if (varAssignment) { | |||
efa_knownVars[varAssignment[1]] = varAssignment[2]; | |||
} | |||
}); | |||
// Annotate by going through and identifying variables used in the lines | |||
Object.keys(efa_knownVars).forEach((varName) => { | |||
const varRe = new RegExp('\\b' + varName + '\\b'); | |||
annotatedFilterLines.forEach((line) => { | |||
if (line.text.match(varRe)) { | |||
const assignmentMatch = line.text.match(efa_REGEX_ASSIGNMENT_RE); | |||
if (assignmentMatch && assignmentMatch[1] === varName) { | |||
// Don't list the variable on the line that assigns | |||
return; | |||
} | |||
line.variables.push(varName); | |||
} | |||
}); | |||
}); | |||
return annotatedFilterLines; | |||
} | |||
async function efa_getFilter(filterId) { | |||
try { | |||
const api = new mw.Api(); | |||
const response = await api.get({ | |||
action: 'query', | |||
list: 'abusefilters', | |||
abfstartid: filterId, | |||
abfendid: filterId, | |||
abfprop: 'pattern' | |||
}); | |||
if (response.query.abusefilters.length < 1) { | |||
// No match? | |||
return ''; | |||
} | |||
return response.query.abusefilters[0].pattern; | |||
} catch (error) { | |||
console.log(error); | |||
return ''; | |||
} | |||
} | |||
// On document load, check if this page is a edit filter hit - if so, | |||
// load the EF stuff | |||
$(function () { | |||
if (efa_PAGE_NAME_RE.test(mw.config.get('wgPageName'))) { | |||
efa_main(); | |||
} | |||
}); | |||
// </nowiki> | |||