(function (root, factory) { if (typeof define === 'function' && define.amd) { define('html-janitor', factory); } else if (typeof exports === 'object') { module.exports = factory(); } else { root.HTMLJanitor = factory(); } }(this, function () { /** * @param {Object} config.tags Dictionary of allowed tags. * @param {boolean} config.keepNestedBlockElements Default false. */ function HTMLJanitor(config) { this.config = config; } // TODO: not exhaustive? var blockElementNames = ['P', 'LI', 'TD', 'TH', 'DIV', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'PRE']; function isBlockElement(node) { return blockElementNames.indexOf(node.nodeName) !== -1; } var inlineElementNames = ['A', 'B', 'STRONG', 'I', 'EM', 'SUB', 'SUP', 'U', 'STRIKE']; function isInlineElement(node) { return inlineElementNames.indexOf(node.nodeName) !== -1; } HTMLJanitor.prototype.clean = function (html) { var sandbox = document.createElement('div'); sandbox.innerHTML = html; this._sanitize(sandbox); return sandbox.innerHTML; }; HTMLJanitor.prototype._sanitize = function (parentNode) { var treeWalker = createTreeWalker(parentNode); var node = treeWalker.firstChild(); if (!node) { return; } do { var nodeName = node.nodeName.toLowerCase(); var allowedAttrs = this.config.tags[nodeName]; // Ignore nodes that have already been sanitized if (node._sanitized) { continue; } if (node.nodeType === Node.TEXT_NODE) { // If this text node is just whitespace and the previous or next element // sibling is a block element, remove it // N.B.: This heuristic could change. Very specific to a bug with // `contenteditable` in Firefox: http://jsbin.com/EyuKase/1/edit?js,output // FIXME: make this an option? if (node.data.trim() === '' && ((node.previousElementSibling && isBlockElement(node.previousElementSibling)) || (node.nextElementSibling && isBlockElement(node.nextElementSibling)))) { parentNode.removeChild(node); this._sanitize(parentNode); break; } else { continue; } } // Remove all comments if (node.nodeType === Node.COMMENT_NODE) { parentNode.removeChild(node); this._sanitize(parentNode); break; } var isInline = isInlineElement(node); var containsBlockElement; if (isInline) { containsBlockElement = Array.prototype.some.call(node.childNodes, isBlockElement); } var isInvalid = isInline && containsBlockElement; // Block elements should not be nested (e.g.
...); if // they are, we want to unwrap the inner block element. var isNotTopContainer = !! parentNode.parentNode; var isNestedBlockElement = isBlockElement(parentNode) && isBlockElement(node) && isNotTopContainer; // Drop tag entirely according to the whitelist *and* if the markup // is invalid. if (!this.config.tags[nodeName] || isInvalid || (!this.config.keepNestedBlockElements && isNestedBlockElement)) { // Do not keep the inner text of SCRIPT/STYLE elements. if (! (node.nodeName === 'SCRIPT' || node.nodeName === 'STYLE')) { while (node.childNodes.length > 0) { parentNode.insertBefore(node.childNodes[0], node); } } parentNode.removeChild(node); this._sanitize(parentNode); break; } // Sanitize attributes for (var a = 0; a < node.attributes.length; a += 1) { var attr = node.attributes[a]; var attrName = attr.name.toLowerCase(); // Allow attribute? var allowedAttrValue = allowedAttrs[attrName] || allowedAttrs === true; var notInAttrList = ! allowedAttrValue; var valueNotAllowed = allowedAttrValue !== true && attr.value !== allowedAttrValue; if (notInAttrList || valueNotAllowed) { node.removeAttribute(attr.name); // Shift the array to continue looping. a = a - 1; } } // Sanitize children this._sanitize(node); // Mark node as sanitized so it's ignored in future runs node._sanitized = true; } while ((node = treeWalker.nextSibling())); }; function createTreeWalker(node) { return document.createTreeWalker(node, NodeFilter.SHOW_TEXT | NodeFilter.SHOW_ELEMENT | NodeFilter.SHOW_COMMENT, null, false); } return HTMLJanitor; }));