深入理解内容匹配-技术圈

之前在了解 Vue 模板编译的原理过程中，是通过正则去匹配然后生成一个树结构的对象，代码因为太多造成不太好理解，今天重新改写了一下，通过括号匹配来模仿标签匹配的过程。

实现匹配 [111[2]111] 输出一个树结构的对象。

let endTag = /^\]/let startTag = /^\[/
function parseHTML (html) {  var index = 0;  while (html) {    // console.log(html)    var textEnd1 = html.indexOf('[')    var textEnd2 = html.indexOf(']')    if (textEnd1 === 0 || textEnd2 === 0) {      var endTagMatch = html.match(endTag);      if (endTagMatch) {        // 结束位置        var curIndex = index;        advance(endTagMatch[0].length);        continue      }
      var startTagMatch = html.match(startTag);      if (startTagMatch) {        // 开始位置        var curIndex = index;        advance(startTagMatch[0].length);        continue      }    }    // 文本位置    var text = (void 0);    if (textEnd1 >= 0) {      // 优先匹配左括号[1[1]]      text = html.substring(0, textEnd1);    }    if (textEnd1 < 0 && textEnd2 >= 0) {      // 左括号没有再匹配右括号      text = html.substring(0, textEnd2);    }    if (textEnd1 < 0 && textEnd2 < 0) {      // 左右括号都没有      text = html;    }    if (text) {      advance(text.length);    }  }  function advance(n) {    index += n;    html = html.substring(n);  }}

执行代码，我们可以清楚的理解是挨个字符进行匹配，每次匹配到内容都使原字符减少，直到把所有的字符都匹配完成。

let html = '[111[2]111]'parseHTML(html)// 匹配过程[111[2]111]111[2]111][2]111]2]111]]111]111]]

根据整个匹配的过程，我们就可以生成树结构的对象，利用栈后进先出的特性可以处理依赖关系。

let html = '[111[2]111]'let endTag = /^\]/let startTag = /^\[/
var currentParent;var stack = [];var root;
function end(tag, start) {  var element = stack[stack.length - 1];  // pop stack 出栈  stack.length -= 1;  currentParent = stack[stack.length - 1];  closeElement(element);}

function start(tag, attrs, unary, start, end) {  var element = createASTElement(currentParent);  if (!root) {    root = element;  }  currentParent = element;  // push stack 入栈  stack.push(element);}

function chars(text, start, end) {  var children = currentParent.children;  if (text) {    var child;    child = {      type: 'text',      text: text    };    if (child) {      children.push(child);    }  }}
function closeElement (element) {  if (currentParent) {    currentParent.children.push(element);    element.parent = currentParent;  }}
function parseHTML (html) {  var index = 0;  while (html) {    console.log(html)    var textEnd1 = html.indexOf('[')    var textEnd2 = html.indexOf(']')    if (textEnd1 === 0 || textEnd2 === 0) {      var endTagMatch = html.match(endTag);      if (endTagMatch) {        var curIndex = index;        advance(endTagMatch[0].length);        end()        continue      }
      var startTagMatch = html.match(startTag);      if (startTagMatch) {        var curIndex = index;        advance(startTagMatch[0].length);        start()        continue      }    }    var text = (void 0);    if (textEnd1 >= 0) {      // 优先匹配左括号[1[1]]      text = html.substring(0, textEnd1);    }    if (textEnd1 < 0 && textEnd2 >= 0) {      // 左括号没有再匹配右括号      text = html.substring(0, textEnd2);    }    if (textEnd1 < 0 && textEnd2 < 0) {      // 左右括号都没有      text = html;    }    if (text) {      advance(text.length);    }    chars(text)  }  function advance(n) {    index += n;    html = html.substring(n);  }}
parseHTML(html)
function createASTElement (parent) {  return {    type: 'tag',    parent: parent,    children: []  }}
console.log(root)

最后生成的结构如下：

{    "type":"tag",    "children":[        {            "type":"text",            "text":"111"        },        {            "type":"tag",            "children":[                {                    "type":"text",                    "text":"2"                }            ]        },        {            "type":"text",            "text":"111"        }    ]}