LeetCode刷题笔记
基础数据结构
二叉树

07字典树

# 05 字典树

字典树一般用于判断字符串是否存在或者是否具有某种字符串前缀。如下图一个字典树中，存储了 A、to、tea、ted、ten、i、in 和 inn，以及它们的频率。

为什么需要用字典树解决这类问题呢？假如我们有一个储存了近万个单词的字典，即使我们使用哈希，在其中搜索一个单词的实际开销也是非常大的，且无法轻易支持搜索单词前缀。然而由于一个英文单词的长度 n 通常在 10 以内，如果我们使用字典树，则可以在 O(n)——近似 O(1)的时间内完成搜索，且额外开销非常小。

# 208 实现 Trie (前缀树) (opens new window)

尝试建立一个字典树，支持快速插入单词、查找单词、查找单词前缀的功能

以下是数据结构的调用样例

输入: ["Trie", "insert", "search", "search", "startsWith", "insert", "search"] [[], ["apple"], ["apple"], ["app"], ["app"], ["app"], ["app"]] 输出: [null, null, true, false, true, null, true]

解释: Trie trie = new Trie(); trie.insert("apple"); trie.search("apple"); // 返回 True trie.search("app"); // 返回 False trie.startsWith("app"); // 返回 True trie.insert("app"); trie.search("app"); // 返回 True

class Trie {
private:
    vector<Trie*> children;
    bool isEnd;

    Trie* searchPrefix(string prefix) {
        Trie* node = this;
        for (char ch : prefix) {
            ch -= 'a';
            if (node->children[ch] == nullptr) {
                return nullptr;
            }
            node = node->children[ch];
        }
        return node;
    }

public:
    Trie() : children(26), isEnd(false) {}

    void insert(string word) {
        Trie* node = this;
        for (char ch : word) {
            ch -= 'a';
            if (node->children[ch] == nullptr) {
                node->children[ch] = new Trie();
            }
            node = node->children[ch];
        }
        node->isEnd = true;
    }

    bool search(string word) {
        Trie* node = this->searchPrefix(word);
        return node != nullptr && node->isEnd;
    }

    bool startsWith(string prefix) {
        return this->searchPrefix(prefix) != nullptr;
    }
};

# 820 单词的压缩编码 (opens new window)

单词数组 words 的有效编码由任意助记字符串 s 和下标数组 indices 组成，且满足：

words.length == indices.length
助记字符串 s 以 '#' 字符结尾
对于每个下标 indices[i] ，s 的一个从 indices[i] 开始、到下一个 '#' 字符结束（但不包括 '#'）的子字符串恰好与 words[i] 相等

给定一个单词数组 words ，返回成功对 words 进行编码的最小助记字符串 s 的长度。

输入：words = ["time", "me", "bell"]

输出：10

解释：一组有效编码为 s = "time#bell#" 和 indices = [0, 2, 5] 。 words[0] = "time" ，s 开始于 indices[0] = 0 到下一个 '#' 结束的子字符串，如加粗部分所示 "time#bell#" words[1] = "me" ，s 开始于 indices[1] = 2 到下一个 '#' 结束的子字符串，如加粗部分所示 "time#bell#" words[2] = "bell" ，s 开始于 indices[2] = 5 到下一个 '#' 结束的子字符串，如加粗部分所示 "time#bell#"

解析:

class Trie{
private:
    Trie* child[26];
    bool found;
public:
    Trie(){
        for(int i=0;i<26;++i){
            child[i] = nullptr;
            found = false;
        }
    }

    void insert(string word){
        Trie* node = this;
        for(const auto ch:word){
            if(node->child[ch-'a'] == nullptr){
                node->child[ch-'a'] = new Trie();
            }
            node = node->child[ch-'a'];
        }
        node->found = true;
    }

    bool search(string word){
        Trie* node = this;
        for(const auto ch:word){
            if(node->child[ch-'a']==nullptr){
                return false;
            }
            node = node->child[ch-'a'];
        }
        return node->found;
    }

    bool prefix(string prefix){
        Trie* node = this;
        for(const auto ch:prefix){
            if(node->child[ch-'a']==nullptr){
                return false;
            }
            node = node->child[ch-'a'];
        }
        return true;
    }
};

class Solution {
public:
    int minimumLengthEncoding(vector<string>& words) {
        unordered_set<string> hash;
        // 要从长单词到短单词构建字典树
        sort(words.begin(),words.end(),[](string& a, string& b){
            return a.length() > b.length();
        });
        Trie* trie = new Trie();
        // 遍历所有单词,如果已经存在字典树中则直接跳过,反之生成插入新单词
        for(string& word:words){
            reverse(word.begin(),word.end()); // 后缀检验和插入
            if(trie->prefix(word)){
                continue;
            }
            trie->insert(word);
            hash.insert(word);
        }

        // 计算字典树中表征的单词个数
        int ans = 0;
        for(auto word:hash){
            ans += word.length()+1;
        }
        return ans;
    }
};

上次更新: 2023/11/19, 12:55:48

← 06二叉搜索树的操作 08二叉搜索树BST→