题目
Leetcode - 10. 正则表达式匹配
解题思路
- 预处理正则表达式,合并同项,比如: "a * b * c * . * " -> " . * "
- 更加预处理后的正则表达式,构建NFA
- 通过NFA状态转移条件一个一个匹配字符串字符
- 不匹配的状态要回退
- 匹配完字符串了,且该状态是可结束的,就true,否则false
c * b * a * 这种正则,构造NFA时,标记a是可结束,但是其实c, b也可结束
a * b * c 这种正则,构造NFA时,a和b都可以是c的父节点
class Solution {
public boolean isMatch(String s, String p) {
if (p.equals(".*")) {
return true;
}
// pretreatment pattern
p = pretreatment(p);
if (p.equals(".*")) {
return true;
}
// constructNFA
State root = constructNFA(p);
// match s
Set<State> states = matchStates(root, s.charAt(0));
for (State state : states) {
boolean flag = doMatch(s, 0, state);
if (flag) {
return true; // it has any one matched
}
}
return false;
}
private static class State {
// this state's parents
Set<State> parents = new HashSet<>();
// <condition, states> : the table denotes map to next state by condition
Map<Character, Set<State>> nextStates = new HashMap<>();
// this state is last state
boolean isEnd = false;
}
private String pretreatment(String p) {
// pretreatment pattern
// merge follow pattern
// a* b* ... c*.* -> .*
// a* a* ... a*a* -> a*
// .* a* ... .*b* -> .*
char[] preP = new char[p.length()];
int prePIndex = -1;
for (int i = 0; i < p.length(); i++) {
char ch = p.charAt(i);
preP[++prePIndex] = ch;
if (ch != '*') {
continue;
}
// .*
if (p.charAt(i - 1) == '.') { // .* a* ... .*b* -> .*
while (i + 2 < p.length() && p.charAt(i + 2) == '*') {
i += 2;
}
continue;
}
// a*
int j = i;
boolean flag = false;
while (j + 2 < p.length() && p.charAt(j + 2) == '*') {
if (p.charAt(j + 1) == '.') {
flag = true;
break;
}
j += 2;
}
if (flag) { // a* b* ... c*.* -> .*
preP[prePIndex - 1] = '.';
i = j + 2;
while (i + 2 < p.length() && p.charAt(i + 2) == '*') {
i += 2;
}
continue;
}
// a* a* ... a*a* -> a*
while (i + 2 < p.length() && p.charAt(i + 2) == '*') {
if (p.charAt(i + 1) != preP[prePIndex - 1]) {
break;
}
i += 2;
}
}
p = new String(preP, 0, prePIndex + 1);
return p;
}
private State constructNFA(String p) {
State newState = new State();
State root = newState;
for (int i = 0; i < p.length(); i++) {
char ch = p.charAt(i);
Set<State> states;
if (ch != '*') {
states = root.nextStates.computeIfAbsent(ch, k -> new HashSet<>());
State state = new State();
states.add(state);
if (i >= 1 && p.charAt(i - 1) == '*') {
repeatedParentAddState(root, ch, state);
}
state.parents.add(root);
root = state;
if (i == p.length() - 1) {
root.isEnd = true;
}
continue;
}
// x* -> get x
ch = p.charAt(i - 1);
states = root.nextStates.computeIfAbsent(ch, k -> new HashSet<>());
states.add(root); // x+ : contain one and more one
root.parents.add(root); // x{0} : no contain
// before two steps achieve x*
if (i == p.length() - 1) {
root.isEnd = true;
repeatedParentMarkEnd(root);
}
}
return newState;
}
public boolean doMatch(String s, int cur, State state) {
if (cur + 1 == s.length()) {
return state.isEnd;
}
Set<State> states = matchStates(state, s.charAt(cur + 1));
for (State state1 : states) {
boolean flag = doMatch(s, cur + 1, state1);
if (flag) {
return true;
}
}
return false;
}
public Set<State> matchStates(State root, char ch) {
// get states by condition
Set<State> states = root.nextStates.get(ch);
if (states == null) {
states = new HashSet<>();
}
// get states by especial condition of '.', because it can match any char
Set<State> states1 = root.nextStates.get('.');
if (states1 != null) {
states.addAll(states1);
}
return states;
}
private void repeatedParentMarkEnd(State root) {
if (!root.parents.contains(root)) {
return;
}
for (State parent : root.parents) {
if (parent == root) {
continue;
}
parent.isEnd = true;
repeatedParentMarkEnd(parent);
}
}
private void repeatedParentAddState(State root, char ch, State state) {
if (!root.parents.contains(root)) {
return;
}
for (State rootParent : root.parents) {
Set<State> states = rootParent.nextStates.get(ch);
if (states != null) {
if (states.contains(state)) {
continue;
}
states.add(state);
}
else {
states = new HashSet<>();
states.add(state);
rootParent.nextStates.put(ch, states);
}
repeatedParentAddState(rootParent, ch, state);
}
}
}
优化
无