Merge: Nitsmell : Adding new code smells and print console updated
[nit.git] / lib / trees / trie.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # A trie (or prefix tree) is a datastructure used to perform prefix searches.
16 #
17 # The trie uses an arborescent datastructure to perform searches on a prefix.
18 # With this version of the trie, you can link data to nodes so the trie can
19 # be used as a kind of Map by prefix.
20 #
21 # ~~~
22 # # Associate some integers to Map keys
23 # var trie = new Trie[Int]
24 # trie["foo"] = 1
25 # trie["fooo"] = 2
26 # trie["foooo"] = 3
27 # trie["bar"] = 4
28 # trie["baz"] = 5
29 #
30 # # Get stored values by key
31 # print trie.has_key("foo")
32 # print trie["foo"] == 1
33 #
34 # # Get stored values by prefix
35 # assert trie.has_prefix("fo")
36 # assert not trie.has_prefix("z")
37 # assert trie.find_by_prefix("foo") == [1, 2, 3]
38 # assert trie.find_by_prefix("bar") == [4]
39 # assert trie.find_by_prefix("z").is_empty
40 # ~~~
41 module trie
42
43 import trees
44
45 # Trie data structure for prefix searches
46 #
47 # ~~~
48 # # Associate some integers to Map keys
49 # var trie = new Trie[Int]
50 # trie["foo"] = 1
51 # trie["fooo"] = 2
52 # trie["bar"] = 3
53 #
54 # # Search by key
55 # assert trie.has_key("foo")
56 # assert trie["foo"] == 1
57 #
58 # # Search by prefix
59 # assert trie.find_by_prefix("") == [1, 3, 2]
60 # assert trie.find_by_prefix("foo") == [1, 2]
61 # assert trie.find_by_prefix("baz").is_empty
62 # ~~~
63 class Trie[E]
64 super Map[String, E]
65
66 # Trie root
67 #
68 # Root children are used to store the first letters.
69 private var root = new TrieNode[E]
70
71 redef fun []=(key, value) do
72 var children = root.children
73
74 for i in [0..key.length[ do
75 var c = key.chars[i]
76
77 var node
78 if children.has_key(c) then
79 node = children[c]
80 else
81 node = new TrieNode[E](c)
82 children[c] = node
83 end
84 children = node.children
85
86 if i == key.length - 1 then
87 node.is_leaf = true
88 node.value = value
89 end
90 end
91 end
92
93 # Cache node used between `has_key` and `[]`
94 private var cache: nullable TrieNode[E] = null
95
96 # Cache key used between `has_key` and `[]`
97 private var cache_key: nullable String = null
98
99 redef fun [](key) do
100 if cache_key == key then return cache.as(not null).value.as(not null)
101 var node = search_node(key)
102 assert node != null
103 return node.value
104 end
105
106 redef fun has_key(key) do
107 var node = search_node(key)
108 if node == null then return false
109 var res = node.is_leaf
110 if res then
111 cache = node
112 cache_key = key.as(String)
113 end
114 return res
115 end
116
117 # Find values stored under `prefix`
118 #
119 # ~~~
120 # # Associate some integers to Map keys
121 # var trie = new Trie[Int]
122 # trie["foo"] = 1
123 # trie["fooo"] = 2
124 # trie["foooo"] = 3
125 # trie["bar"] = 4
126 #
127 # assert trie.find_by_prefix("") == [1, 4, 2, 3]
128 # assert trie.find_by_prefix("foo") == [1, 2, 3]
129 # assert trie.find_by_prefix("bar") == [4]
130 # assert trie.find_by_prefix("baz").is_empty
131 # ~~~
132 fun find_by_prefix(prefix: String): Array[E] do
133 var node
134 if prefix == "" then
135 node = root
136 else
137 node = search_node(prefix)
138 end
139 if node == null then return new Array[E]
140 return node.collect_values
141 end
142
143 # Find values stored under `prefix`
144 #
145 # ~~~
146 # # Associate some integers to Map keys
147 # var trie = new Trie[Int]
148 # trie["foo"] = 1
149 # trie["bar"] = 4
150 # trie["baz"] = 5
151 #
152 # assert trie.has_prefix("")
153 # assert trie.has_prefix("f")
154 # assert not trie.has_prefix("z")
155 # ~~~
156 fun has_prefix(prefix: String): Bool do
157 if prefix == "" then return true
158 return search_node(prefix) != null
159 end
160
161 # Search a node by a key or prefix
162 #
163 # Returns `null` if no node matches `string`.
164 private fun search_node(string: nullable Object): nullable TrieNode[E] do
165 if string == null then return null
166 string = string.to_s
167 var children = root.children
168 var node = null
169 for i in [0..string.length[ do
170 var c = string.chars[i]
171 if children.has_key(c) then
172 node = children[c]
173 children = node.children
174 else
175 node = null
176 break
177 end
178 end
179 return node
180 end
181 end
182
183 # TrieNode used to store the Character key of the value
184 private class TrieNode[E]
185 var c: nullable Char
186 var value: nullable E
187 var children = new HashMap[Char, TrieNode[E]]
188 var is_leaf: Bool = false
189
190 fun collect_values: Array[E] do
191 var values = new Array[E]
192
193 var todo = new List[TrieNode[E]]
194 todo.add self
195 while todo.not_empty do
196 var node = todo.shift
197 var value = node.value
198 if value != null then values.add value
199 for child in node.children.values do
200 todo.push child
201 end
202 end
203 return values
204 end
205 end