Clobbering DOM attributes to bypass HTML filters

寻找注入点

代码分析

payload构造

注入结果

寻找注入点

DOM破坏肯定是出现在js文件中，我们首先来看源码

/resources/labheader/js/labHeader.js这个源码没什么问题我们重点关注在下面两个源码上

/resources/js/loadCommentsWithHtmlJanitor.js这个源码中重要的输入输出都被Htmlhanitor进行了一个过滤，也没有办法，接下来我们把目光移到最后一个代码上

/resources/js/htmlJanitor.js

代码分析

/resources/js/htmlJanitor.js代码分析

(function (root, factory) {
    if (typeof define === 'function' && define.amd) {
      define('html-janitor', factory);
    } else if (typeof exports === 'object') {
      module.exports = factory();
    } else {
      root.HTMLJanitor = factory();
    }
  }(this, function () {
  
    /**
     * @param {Object} config.tags Dictionary of allowed tags.
     * @param {boolean} config.keepNestedBlockElements Default false.
     */
    function HTMLJanitor(config) {
  
      var tagDefinitions = config['tags'];
      var tags = Object.keys(tagDefinitions);
  
      var validConfigValues = tags
        .map(function(k) { return typeof tagDefinitions[k]; })
        .every(function(type) { return type === 'object' || type === 'boolean' || type === 'function'; });
  
      if(!validConfigValues) {
        throw new Error("The configuration was invalid");
      }
  
      this.config = config;
    }

  //   config: 配置对象，包含允许的标签及其属性定义。
// tagDefinitions: 从配置中提取的标签定义。
// tags: 标签定义的键名数组。
// validConfigValues: 检查所有标签定义的值是否为合法的类型（object、boolean 或 function）。
// 如果配置无效，则抛出错误。否则，将配置存储在实例中
    var blockElementNames = ['P', 'LI', 'TD', 'TH', 'DIV', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'PRE'];
    function isBlockElement(node) {
      return blockElementNames.indexOf(node.nodeName) !== -1;
    }
//   blockElementNames: 规定了哪些标签是块级元素。
//   isBlockElement: 判断节点是否为块级元素。
 
    var inlineElementNames = ['A', 'B', 'STRONG', 'I', 'EM', 'SUB', 'SUP', 'U', 'STRIKE'];
    function isInlineElement(node) {
      return inlineElementNames.indexOf(node.nodeName) !== -1;
    }
  
   // inlineElementNames: 规定了哪些标签是行内元素。
   // isInlineElement: 判断节点是否为行内元素。

    HTMLJanitor.prototype.clean = function (html) {
      const sandbox = document.implementation.createHTMLDocument('');
      const root = sandbox.createElement("div");
      root.innerHTML = html;
  
      this._sanitize(sandbox, root);
  
      return root.innerHTML;
    };
 // 处理 HTML 字符串，将其插入到一个新的 HTML 文档中，然后调用 _sanitize 方法来清理 HTML，最后返回处理后的 HTML 字符串
    HTMLJanitor.prototype._sanitize = function (document, parentNode) {
      var treeWalker = createTreeWalker(document, parentNode);
//创建一个 TreeWalker 实例，用于遍历 parentNode 的子节点。
      var node = treeWalker.firstChild();
  //获取第一个节点
      if (!node) { return; }
  //如果没有节点就返回
      do {
        if (node.nodeType === Node.TEXT_NODE) {
          // If this text node is just whitespace and the previous or next element
          // sibling is a block element, remove it
          // N.B.: This heuristic could change. Very specific to a bug with
          // `contenteditable` in Firefox: http://jsbin.com/EyuKase/1/edit?js,output
          // FIXME: make this an option?
          if (node.data.trim() === ''
              && ((node.previousElementSibling && isBlockElement(node.previousElementSibling))
                   || (node.nextElementSibling && isBlockElement(node.nextElementSibling)))) {
            parentNode.removeChild(node);
            this._sanitize(document, parentNode);
            break;
          } else {
            continue;
          }
        }
//对于空白文本节点(tirm就是用来处理字符串让其没有空白节点)，检查其是否位于块级元素的前后。如果是，则移除该文本节点，并递归调用 _sanitize 以处理 parentNode 重新遍历。处理完后，停止当前循环（break）
  
        // Remove all comments
        if (node.nodeType === Node.COMMENT_NODE) {
          parentNode.removeChild(node);
          this._sanitize(document, parentNode);
          break;
        }
//移除注释节点，并递归调用 _sanitize 处理 parentNode。处理完后，停止当前循环
        var isInline = isInlineElement(node);
        var containsBlockElement;
        if (isInline) {
          containsBlockElement = Array.prototype.some.call(node.childNodes, isBlockElement);
        }
  
        // Block elements should not be nested (e.g. <li><p>...); if
        // they are, we want to unwrap the inner block element.
        var isNotTopContainer = !! parentNode.parentNode;
        var isNestedBlockElement =
              isBlockElement(parentNode) &&
              isBlockElement(node) &&
              isNotTopContainer;
        var nodeName = node.nodeName.toLowerCase();
  
        var allowedAttrs = getAllowedAttrs(this.config, nodeName, node);
  
        var isInvalid = isInline && containsBlockElement;
  //   检查节点是否为行内元素（isInline）。
//   如果是行内元素，检查其子节点是否包含块级元素（containsBlockElement）。
//   检查节点是否为非顶级容器（isNotTopContainer）。
//   检查是否为嵌套的块级元素（isNestedBlockElement）。
//   获取节点的允许属性列表（allowedAttrs）。
//   判断是否无效（isInvalid）：如果是行内元素且包含块级元素。
        if (isInvalid || shouldRejectNode(node, allowedAttrs)
            || (!this.config.keepNestedBlockElements && isNestedBlockElement)) {
          // Do not keep the inner text of SCRIPT/STYLE elements.
          if (! (node.nodeName === 'SCRIPT' || node.nodeName === 'STYLE')) {
            while (node.childNodes.length > 0) {
              parentNode.insertBefore(node.childNodes[0], node);
            }
          }
          parentNode.removeChild(node);
  
          this._sanitize(document, parentNode);
          break;
        }
  
//   如果节点无效、应该被拒绝，或是嵌套的块级元素且配置不允许嵌套，则：

//   如果节点不是 SCRIPT 或 STYLE，将其子节点移到 parentNode 中。
//   移除该节点。
//   递归调用 _sanitize 处理 parentNode，然后停止当前循环（break）
        // Sanitize attributes
        for (var a = 0; a < node.attributes.length; a += 1) {
          var attr = node.attributes[a];
  
          if (shouldRejectAttr(attr, allowedAttrs, node)) {
            node.removeAttribute(attr.name);
            // Shift the array to continue looping.
            a = a - 1;
          }
        }
  //遍历节点的所有属性，并根据允许的属性列表决定是否移除属性。如果属性应该被拒绝，则移除该属性，并调整索引以重新检查移除后的属性
        // Sanitize children
        this._sanitize(document, node);
  
      } while ((node = treeWalker.nextSibling()));
    };
  
    function createTreeWalker(document, node) {
      return document.createTreeWalker(node,
                                       NodeFilter.SHOW_TEXT | NodeFilter.SHOW_ELEMENT | NodeFilter.SHOW_COMMENT,
                                       null, false);
    }
  // node: 指定 TreeWalker 遍历的起始节点。

// NodeFilter.SHOW_TEXT | NodeFilter.SHOW_ELEMENT | NodeFilter.SHOW_COMMENT: 这是一个位掩码（bitmask），指定了 TreeWalker 应该包括哪些类型的节点。它包括：
//     NodeFilter.SHOW_TEXT: 过滤文本节点。
//     NodeFilter.SHOW_ELEMENT: 过滤元素节点。
//     NodeFilter.SHOW_COMMENT: 过滤注释节点。

// 使用 | 运算符将这些常量合并成一个位掩码，使 TreeWalker 可以遍历这些节点类型。

// null: 这是一个 NodeFilter 对象，用于定义哪些节点应该被包含或排除。设置为 null 表示不使用自定义的过滤条件，即所有符合上述类型的节点都会被遍历。

// false: 指定 TreeWalker 是否应处理实体（例如字符实体）。设置为 false 表示不处理实体，这通常在处理普通文本和节点时不需要考虑。
    function getAllowedAttrs(config, nodeName, node){
      if (typeof config.tags[nodeName] === 'function') {
        return config.tags[nodeName](node);
      } else {
        return config.tags[nodeName];
      }
    }
 // 获取允许的属性集合
    function shouldRejectNode(node, allowedAttrs){
      if (typeof allowedAttrs === 'undefined') {
        return true;
      } else if (typeof allowedAttrs === 'boolean') {
        return !allowedAttrs;
      }
  
      return false;
    }
  // 判断节点是否应该被拒绝
    function shouldRejectAttr(attr, allowedAttrs, node){
      var attrName = attr.name.toLowerCase();
  
      if (allowedAttrs === true){
        return false;
      } else if (typeof allowedAttrs[attrName] === 'function'){
        return !allowedAttrs[attrName](attr.value, node);
      } else if (typeof allowedAttrs[attrName] === 'undefined'){
        return true;
      } else if (allowedAttrs[attrName] === false) {
        return true;
      } else if (typeof allowedAttrs[attrName] === 'string') {
        return (allowedAttrs[attrName] !== attr.value);
      }
  
      return false;
    }
  //判断属性是否应该被拒绝
    return HTMLJanitor;
  
  }));

payload构造

重点在这里，如果传递的参数是黑名单就删掉，只有form和input在，然后我们可以看一下这道题portswigger的Exploiting DOM clobbering to enable XSS-CSDN博客里面的第二道题有一块和下面的漏洞点很是相像。接着往下看

分析过后整体代码中行内元素的内嵌不可以是用那我们使用不是行内元素的元素,

使用这个元素<from><input>，我们就逃过了，然后我们构造的这个标签又没有父类，又绕过了

然后我们到这里看，我们构造<form><input id=attributes>，然后我们绕过以后form的attributes属性就是<input id=attributes>这个然后如果这个没有length属性就会跳出下面循环，然后就不删除了

然后经过上面的分析我们就要找一个标签没有length属性，让他跳出上面的for循环，所以就使用我们这个喽，然后在form里面构造我们的注入语句

<form tabindex=0 onfocus=alert(123)><input id=attributes>

注入结果

上面的payload是点击tab才可以触发，使用tabindex属性，已经插入进去了