常用数据结构

常用数据结构 - 前缀树

news2024/12/31 21:40:26

应用场景

一般涉及到需要前缀信息来分析问题或者解决问题的时候，就应该考虑一下前缀树。
前缀树是对字符串每个位置上的字符进行编码，记录的是字符本身和字符串的信息，非常适合用于前缀匹配。

数据结构

前缀树的每个节点，是根据字符串的内容记录的路径信息，它有两种实现：

根据字符串中包含的字符种类数 $N$ ，在每个节点上创建对应的 N 叉树。这样一来，每个节点的后继就可以通过树来轻松找到。另外还需要一个布尔型的变量 $e n d$ ，来标记是否有字符串在这里结束（或者用整型来统计次数）。这样的做法，可以参见 Leetcode 208.实现 Trie (前缀树)。
字符串的路径信息，可以通过使用二维数组 $t ree$ 搭配计数变量 $co u n t$ 来维护。其中第一个下标根据某个字符信息来索引相应的类别信息，第二个下标则表示下一个节点的数据。此外，定义 $p a ss$ 和 $e n d$ 数组来描述经过这个节点（包含到这里为止的前缀）的字符串以及以这个节点为终止位置（包含整个字符串）的字符串类别。

具体实现

树记录信息

class TreeNode {
    public int pass; // pass 表示经过当前节点（包含这个字符）的字符串数量
    public int end; // end 表示以当前节点为止（包含整个字符串）的字符串数量
    public TreeNode[] path; // 数组形式的树

    public TreeNode() {
        pass = 0;
        end = 0;
        path = new TreeNode[26];
    }
}

class Trie {
    private final TreeNode root;

    public Trie() {
        root = new TreeNode();
    }

    // 插入新字符串
    public void insert(String word) {
        TreeNode node = root;
        // 更新包含当前节点的字符串数量
        node.pass++;
        for (int i = 0, cur; i < word.length(); i++) {
            // cur 表示当前节点的下标
            cur = word.charAt(i) - 'a';
            if (node.path[cur] == null) {
                node.path[cur] = new TreeNode();
            }
            // 移动工作指针
            node = node.path[cur];
            node.pass++;
        }
        // 更新以当前节点为止的字符串数量
        node.end++;
    }

    // 统计某个字符串出现的次数
    public int countWords(String word) {
        TreeNode node = root;
        for (int i = 0, cur; i < word.length(); i++) {
            cur = word.charAt(i) - 'a';
            // 到某个节点时发现没有往下走的路径，则返回 0
            if (node.path[cur] == null) {
                return 0;
            }
            // 移动工作指针
            node = node.path[cur];
        }
        // 返回以当前节点为止的字符串数量
        return node.end;
    }

    // 统计以某个字符串为前缀的字符串的数量
    public int countPrefixes(String prefix) {
        TreeNode node = root;
        for (int i = 0, cur; i < prefix.length(); i++) {
            cur = prefix.charAt(i) - 'a';
            // 到某个节点时发现没有往下走的路径，则返回 0
            if (node.path[cur] == null) {
                return 0;
            }
            // 移动工作指针
            node = node.path[cur];
        }
        // 返回包含当前节点的字符串数量
        return node.pass;
    }

    // 删除字符串
    public void delete(String word) {
        // 确保该字符串存在，提高代码的健壮性
        if (countWords(word) > 0) {
            TreeNode node = root;
            // 更新包含该节点的字符串数量
            node.pass--;
            for (int i = 0, cur; i < word.length(); i++) {
                cur = word.charAt(i) - 'a';
                // 一旦更新过后经过下个节点的字符串数量为零，那么将走向它的路径制空
                if (--node.path[cur].pass == 0) {
                    node.path[cur] = null;
                    return;
                }
                // 移动工作指针
                node = node.path[cur];
            }
            // 更新以当前节点为止的字符串数量
            node.end--;
        }
    }
}

二维数组维护

import java.util.Arrays;

public class Trie {

    public static int MAX_N = 100;
    public static int[][] tree = new int[MAX_N][26]; // 二维数组维护路径信息
    public static int[] pass = new int[MAX_N]; // pass 表示经过某个节点（包含某个字符）的字符串数量
    public static int[] end = new int[MAX_N]; // end 表示以某个节点为止（包含某个字符串）的字符串数量
    public static int count; // count 维护存在的字符串种类数

    public Trie() {
        count = 1;
    }

    // 插入新字符串
    public static void insert(String word) {
        // 从类别为 1 的字符串开始访问
        int index = 1;
        // 更新包含该节点的字符串数量
        pass[index]++;
        for (int i = 0, cur; i < word.length(); i++) {
            cur = word.charAt(i) - 'a';
            // 如果这种字符串先前不存在，那么 count 先自增
            if (tree[index][cur] == 0) {
                tree[index][cur] = ++count;
            }
            // 移动工作指针
            index = tree[index][cur];
            pass[index]++;
        }
        // 更新以当前节点为止的字符串数量
        end[index]++;
    }

    // 统计某个字符串出现的次数
    public static int countWords(String word) {
        // 从类别为 1 的字符串开始访问
        int index = 1;
        for (int i = 0, cur; i < word.length(); i++) {
            cur = word.charAt(i) - 'a';
            // 如果这种字符串先前不存在，则返回 0
            if (tree[index][cur] == 0) {
                return 0;
            }
            // 移动工作指针
            index = tree[index][cur];
        }
        // 返回以当前节点为止的字符串数量
        return end[index];
    }

    // 统计以某个字符串为前缀的字符串的数量
    public static int countPrefixes(String pre) {
        // 从类别为 1 的字符串开始访问
        int index = 1;
        for (int i = 0, cur; i < pre.length(); i++) {
            cur = pre.charAt(i) - 'a';
            // 到某个节点时发现没有往下走的路径，则返回 0
            if (tree[index][cur] == 0) {
                return 0;
            }
            // 移动工作指针
            index = tree[index][cur];
        }
        // 返回包含当前节点的字符串数量
        return pass[index];
    }

    // 删除字符串
    public static void delete(String word) {
        // 确保该字符串存在，提高代码的健壮性
        if (countWords(word) > 0) {
            // 从类别为 1 的字符串开始访问
            int index = 1;
            for (int i = 0, cur; i < word.length(); i++) {
                cur = word.charAt(i) - 'a';
                // 一旦更新过后经过下个节点的字符串数量为零，那么将走向它的路径制空
                if (--pass[tree[index][cur]] == 0) {
                    tree[index][cur] = 0;
                    return;
                }
                // 移动工作指针
                index = tree[index][cur];
            }
            // 更新以当前节点为止的字符串数量
            end[index]--;
        }
    }

    // 由于定义的是静态变量，在不同测试用例之间需要清理空间防止出现脏数据
    // 清空二维数组
    public static void clear() {
        for (int i = 1; i <= count; i++) {
            Arrays.fill(tree[i], 0);
            end[i] = 0;
            pass[i] = 0;
        }
    }
}