算法Day09 | KMP，28. 实现 strStr() ，459.重复的子字符串

news2025/4/15 20:06:21

Day09

- KMP
- 28. 实现 strStr()
- 459.重复的子字符串

KMP

KMP是三个人人名缩写，用于在文本字符串text中搜索pattern字符串，返回在text中第一出现的位置。
算法做法就是在暴力匹配的基础上加速匹配。通过对pattern字符串求next数组(该数组也成为前缀表)，来跳过text部分匹配。next数组是为了实现跳过部分字符串的功能来设计的。暴力匹配的时间复杂度为 $O(m\times n)$ ，KMP的时间复杂度为 $O (m + n)$ 。

如何求出next数组？通过求出pattern所有字符子串的最长相等前、后缀构成next数组。具体实现可能存在其他加工，如对next数组减一，位移一位等。

假设有abbabbk，对于字符k有一个信息，k之前字符串中，前、后缀匹配最大长度为3。其中前、后缀为k前字符串的真子字符串，即不包含整体abbabb字符串。为什么不包含，因为是next数组对应长度的下标不能等于它本身，k是第6个元素，next[k] != 6，而且整体字符串也一定相等，无意义。

int next[] = {-1/*人为规定*/, 0/*根据定义*/, 0/1, ...};

求next数组是往前跳的过程，最后用到前三个元素来得到最终数组。

d4779b1360e9149fe4265c7afc4f631
0b8fd333c19d3f228d872827b21f74c
b7ef04f09a89f04411565a5cbf1b608
0e1a3cf49c881dba57b070df93deaff

010f77584f1ff064d7ac05c0b700a6e

//通过回跳得到完整的next数组
void getNext(const string& s, int* next) {
    //初始化next数组
    next[0] = -1;
	//由于next数组是看之前的字符。当s只有一个长度时，之前没有字符。所以只能是在此跳出。
	if (s.size() == 1) return;
    next[1] = 0;

    int i = 2;//遍历next数组，next数组的下标
    int cn = 0;//回跳的值，也是next数组的值
    while (i < s.size()) {
        if (s[i - 1] == s[cn]) {//next[i]是用next[i-1]求
            next[i++] = ++cn;//i为cn+1
        } else if (cn > 0) {//cn还可以往前跳
            cn = next[cn];
        } else {
            next[i++] = 0;//next[i]为0
        }
    }
}
int KMP(const string& text, const string& pattern) {
    int next[pattern.size()];
    getNext(pattern, next);
    int i = 0/*遍历text*/, j = 0/*遍历pattern*/;

    //保持i的位置不变，通过调节j的位置来匹配字符，直到i匹配完毕，匹配i+1
    while (i < text.size() && j < pattern.size()) {
        if (text[i] == pattern[j]) {//匹配上了，对比下一个
            i++;
            j++;
        } else if (j != 0) {//j还可以往回跳
            j = next[j];
        } else {//j跳不动了。i匹配不上，匹配i+1
            i++;
        }
    }
    //是否是根据j跳出的边界
    //j跳出while表示已经配出相同的了，才会导致j++到出界
    //不是因为j跳出的while，说明没有找到
    return j == pattern.size() ? i - j : -1;//i-j对应找到的下标
}

对第六行代码 if (s.size() == 1) return;，没有这一句，当字符串s只有一个字符时，next数组应该直接返回next[1] = {-1}，如果不跳出，则返回next[2] = {-1, 0};所以会造成dynamic-stack-buffer-overflow。
本地编译器来说，不加这一句也能通过，应该是给优化掉了。
LeetCode没有优化掉，不过还是应该填上这一句，保证代码的稳定。

28. 实现 strStr()

题目链接：28. 实现 strStr()
KMP代码同上

class Solution {
public:
    void getNext(const string &s, int* next) {
        next[0] = -1;
        if (s.size() == 1) return;
        next[1] = 0;
        int i = 2;
        int cn = 0;
        while (i < s.size()) {
            if (s[i - 1] == s[cn]) {
                next[i++] = ++cn;
            } else if (cn > 0) {
                cn = next[cn];
            } else {
                next[i++] = 0;
            }
        }
    }
    int strStr(string haystack, string needle) {
        int next[needle.size()];
        getNext(needle, next);
        int i = 0;
        int j = 0;
        while (i < haystack.size() && j < needle.size()) {
            if (haystack[i] == needle[j]) {
                i++;
                j++;
            } else if (j > 0) {
                j = next[j];
            } else {
                i++;
            }
        }
        return j == needle.size() ? i - j : -1;
    }
};

459.重复的子字符串

题目链接： 459.重复的子字符串
移动匹配
字符串s中有重复，s+s也会包含s。erase()时间复杂度为 $O (n)$

class Solution {
public:
    bool repeatedSubstringPattern(string s) {
        string ss = s + s;
        ss.erase(ss.begin());
        ss.erase(ss.end() - 1);
        return ss.find(s) != string::npos ? true : false;
    }
};

KMP
用到了next数组，根据next数组的构成原则可得，重复的子字符串 = 字符串 - 最长相等前后缀。如果没有最长相等前后缀，直接返回false。

class Solution {
public:
    void getNext(const string& s, int* next) {
        next[0] = -1;
        if (s.size() == 1) return;
        next[1] = 0;
        int i = 2;
        int cn = 0;
        while (i < s.size()) {
            if (s[i - 1] == s[cn]) {
                next[i++] = ++cn;
            } else if (cn > 0) {
                cn = next[cn];
            } else {
                next[i++] = 0;
            }
        }
    }
    bool repeatedSubstringPattern(string s) {
        int len = s.size();
        
        int next[len + 1];//多一位是因为next数组的构造不同，需要整个字符串的next数组
        
        getNext(s + 'A'/*只要加一个不满足s要求的字符都行*/, next);
        //比如字符串是"aba",变成"abaA"
        
        return (next[len] != 0/*等于0，直接跳出为false*/ && len % (len - next[len]) == 0) ? 
                true : false;
    }
};