讨论一下Trie树的基本性质、应用场景和具体实现。
基本性质
- 根节点不包含字符,除根节点外每一个节点都只包含一个字符。
- 从根节点到某一节点,路径上经过的字符连接起来,为该节点对应的字符串。
- 每个节点的所有子节点包含的字符都不相同。
应用场景
- 字符串检索:事先将已知的一些字符串(字典)的有关信息保存到trie树里,查找另外一些未知字符串是否出现过或者出现频率。
- 字符串最长公共前缀:Trie树利用多个字符串的公共前缀来节省存储空间,反之,当我们把大量字符串存储到一棵trie树上时,我们可以快速得到某些字符串的公共前缀。
- 排序
- 词频统计
- 字符串搜索的前缀匹配
具体实现
Trie树使用JavaScript来实现实在是简单的很,因为JS可以扩展实例化的对象,直接用下标访问对象,不需要再建一个哈希表了。
构造完字典后,比如要查找"tree",只需要这样访问:root['t']['r']['e']['e'].isWord,任意一步拿不到就说明单词不在字典中。
具体代码如下,看看就懂了,并不是个什么高大上的东西:
function generateUUID() {
var d = new Date().getTime();
var uuid = 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) {
var r = (d + Math.random() * 16) % 16 | 0;
d = Math.floor(d / 16);
return (c == 'x' ? r : (r & 0x3 | 0x8)).toString(16);
});
return uuid;
}
function TrieNode(key) {
this.key = key;
this.isWord = false;
}
/**
* Initialize your data structure here.
*/
function Trie() {
this.root = new TrieNode('root');
};
/**
* Inserts a word into the trie.
* @param {string} word
* @return {void}
*/
Trie.prototype.insert = function(word) {
var trie = this.root,
cur;
for (var i = 0; i < word.length; i++) {
cur = word[i];
if (!trie[cur]) {
trie[cur] = new TrieNode(cur);
}
trie = trie[word[i]];
}
trie.isWord = true;
};
/**
* Returns if the word is in the trie.
* @param {string} word
* @return {boolean}
*/
Trie.prototype.search = function(word) {
var trie = this.root,
cur;
for (var i = 0; i < word.length; i++) {
cur = word[i];
if (!trie[cur]) {
return false;
}
trie = trie[cur];
}
return trie.isWord;
};
/**
* Returns if there is any word in the trie that starts with the given prefix.
* @param {string} prefix
* @return {boolean}
*/
Trie.prototype.startsWith = function(prefix) {
var trie = this.root,
cur;
for (var i = 0; i < prefix.length; i++) {
cur = prefix[i];
if (!trie[cur]) {
return false;
}
trie = trie[cur];
}
return true;
};
Trie.prototype.smartAlert = function(key) {
var result = [];
var trie = this.root,
cur;
for (var i = 0; i < key.length; i++) {
cur = key[i];
if (!trie[cur]) {
return result;
}
trie = trie[cur];
}
// 还要沿着分支继续走下去
var result = [];
next(trie, key, result);
return result;
}
function next(trie, key, result) {
// 找到一个单词
if (trie.isWord) {
result.push(key)
}
for (var attr in trie) {
if (attr.length == 1) {
// 如果有下属节点
var sonTrie = trie[attr];
var tempKey = key + sonTrie.key;
next(sonTrie, tempKey, result);
}
}
}
// 简单测试
// var obj = new Trie();
// obj.insert('ball');
// obj.insert('balls');
// obj.insert('ballf');
// obj.insert('sense');
// obj.insert('bacd');
// console.log(obj);
// var param_2 = obj.search('abc');
// console.log(param_2);
// var param_3 = obj.startsWith('abd');
// console.log(param_3);
// console.log(obj.smartAlert('ba'));
// 400000字典 10000次查询
function initTest() {
var dirSize = 400000;
var askWdSize = 9999;
var wordMinNum = 5;
var wordMaxNum = 20;
var strMinNum = 2;
var strMaxNum = 12;
var askArray = [];
var dirArray = [];
for (var i = 0; i < dirSize; i++) {
dirArray[i] = generateUUID().toString().toLowerCase().replace(/-/g, "").substring(0, Math.floor(Math.random() * (wordMaxNum - wordMinNum + 1)) + wordMinNum);
}
for (var i = 0; i < askWdSize; i++) {
askArray[i] = generateUUID().toString().toLowerCase().replace(/-/g, "").substring(0, Math.floor(Math.random() * (strMaxNum - strMinNum + 1)) + strMinNum);
}
var startTime = new Date().getTime();
var obj = new Trie();
for (var i = 0; i < dirSize; i++) {
obj.insert(dirArray[i]);
}
for (var i = 0; i < askWdSize; i++) {
console.log(obj.smartAlert(askArray[i]));
}
console.log(new Date().getTime() - startTime);
}
initTest();
Java实现
JS实现起来是比较简单的,那么Java中呢,其实也并不复杂,使用HashMap即可,代码如下:
package cn.com;
import java.util.HashMap;
import java.util.Map;
public class Node {
char content;
boolean isEnd;
int count;
Map<Character, Node> childList; // 不推荐使用List
public Node(char c) {
childList = new HashMap<Character, Node>();
isEnd = false;
content = c;
count = 0;
}
public Node subNode(char c) {
return childList.get(c);
}
}
package cn.com;
public class Trie {
private Node root;
public Trie() {
root = new Node(' ');
}
public void insert(String word) {
if (search(word) == true)
return;
Node current = root;
for (int i = 0; i < word.length(); i++) {
char curChar = word.charAt(i);
Node child = current.subNode(curChar);
if (child != null) {
current = child;
} else {
current.childList.put(curChar, new Node(curChar));
current = current.subNode(curChar);
}
current.count++;
}
// Set isEnd to indicate end of the word
current.isEnd = true;
}
public boolean search(String word) {
Node current = root;
for (int i = 0; i < word.length(); i++) {
char curChar = word.charAt(i);
if (current.subNode(curChar) == null)
return false;
else
current = current.subNode(curChar);
}
return current.isEnd;
}
public void deleteWord(String word) {
if (search(word) == false)
return;
Node current = root;
for (char c : word.toCharArray()) {
Node child = current.subNode(c);
if (child.count == 1) {
current.childList.remove(c);
return;
} else {
child.count--;
current = child;
}
}
current.isEnd = false;
}
}