A trie (or prefix tree) is a datastructure used to perform prefix searches.

The trie uses an arborescent datastructure to perform searches on a prefix. With this version of the trie, you can link data to nodes so the trie can be used as a kind of Map by prefix.

var trie = new Trie[Int]
trie["foo"] = 1
trie["fooo"] = 2
trie["foooo"] = 3
trie["bar"] = 4
trie["baz"] = 5

# Get stored values by key
print trie.has_key("foo")
print trie["foo"] == 1

# Get stored values by prefix
assert trie.has_prefix("fo")
assert not trie.has_prefix("z")
assert trie.find_by_prefix("foo") == [1, 2, 3]
assert trie.find_by_prefix("bar") == [4]
assert trie.find_by_prefix("z").is_empty

Introduced classes

class Trie[E: nullable Object]

trees :: Trie

Trie data structure for prefix searches

All class definitions

class Trie[E: nullable Object]

trees $ Trie

Trie data structure for prefix searches
package_diagram trees::trie trie trees trees trees::trie->trees core core trees->core ...core ... ...core->core a_star-m a_star-m a_star-m->trees::trie

Ancestors

module abstract_collection

core :: abstract_collection

Abstract collection classes and services.
module abstract_text

core :: abstract_text

Abstract class for manipulation of sequences of characters
module abstract_tree

trees :: abstract_tree

Introduce tree structures abstraction
module array

core :: array

This module introduces the standard array structure.
module bintree

trees :: bintree

Binary Tree data-structure
module bitset

core :: bitset

Services to handle BitSet
module bytes

core :: bytes

Services for byte streams and arrays
module circular_array

core :: circular_array

Efficient data structure to access both end of the sequence.
module codec_base

core :: codec_base

Base for codecs to use with streams
module codecs

core :: codecs

Group module for all codec-related manipulations
module collection

core :: collection

This module define several collection classes.
module core

core :: core

Standard classes and methods used by default by Nit programs and libraries.
module environ

core :: environ

Access to the environment variables of the process
module error

core :: error

Standard error-management infrastructure.
module exec

core :: exec

Invocation and management of operating system sub-processes.
module file

core :: file

File manipulations (create, read, write, etc.)
module fixed_ints

core :: fixed_ints

Basic integers of fixed-precision
module fixed_ints_text

core :: fixed_ints_text

Text services to complement fixed_ints
module flat

core :: flat

All the array-based text representations
module gc

core :: gc

Access to the Nit internal garbage collection mechanism
module hash_collection

core :: hash_collection

Introduce HashMap and HashSet.
module iso8859_1

core :: iso8859_1

Codec for ISO8859-1 I/O
module kernel

core :: kernel

Most basic classes and methods.
module list

core :: list

This module handle double linked lists
module math

core :: math

Mathematical operations
module native

core :: native

Native structures for text and bytes
module numeric

core :: numeric

Advanced services for Numeric types
module protocol

core :: protocol

module queue

core :: queue

Queuing data structures and wrappers
module range

core :: range

Module for range of discrete objects.
module rbtree

trees :: rbtree

A red–black tree is a data structure which is a type of self-balancing binary search tree.
module re

core :: re

Regular expression support for all services based on Pattern
module ropes

core :: ropes

Tree-based representation of a String.
module sorter

core :: sorter

This module contains classes used to compare things and sorts arrays.
module stream

core :: stream

Input and output streams of characters
module text

core :: text

All the classes and methods related to the manipulation of text entities
module time

core :: time

Management of time and dates
module union_find

core :: union_find

union–find algorithm using an efficient disjoint-set data structure
module utf8

core :: utf8

Codec for UTF-8 I/O

Parents

module trees

trees :: trees

General module for tree data structures

Children

module a_star-m

a_star-m

# A trie (or prefix tree) is a datastructure used to perform prefix searches.
#
# The trie uses an arborescent datastructure to perform searches on a prefix.
# With this version of the trie, you can link data to nodes so the trie can
# be used as a kind of Map by prefix.
#
# ~~~
# # Associate some integers to Map keys
# var trie = new Trie[Int]
# trie["foo"] = 1
# trie["fooo"] = 2
# trie["foooo"] = 3
# trie["bar"] = 4
# trie["baz"] = 5
#
# # Get stored values by key
# print trie.has_key("foo")
# print trie["foo"] == 1
#
# # Get stored values by prefix
# assert trie.has_prefix("fo")
# assert not trie.has_prefix("z")
# assert trie.find_by_prefix("foo") == [1, 2, 3]
# assert trie.find_by_prefix("bar") == [4]
# assert trie.find_by_prefix("z").is_empty
# ~~~
module trie

import trees

# Trie data structure for prefix searches
#
# ~~~
# # Associate some integers to Map keys
# var trie = new Trie[Int]
# trie["foo"] = 1
# trie["fooo"] = 2
# trie["bar"] = 3
#
# # Search by key
# assert trie.has_key("foo")
# assert trie["foo"] == 1
#
# # Search by prefix
# assert trie.find_by_prefix("") == [1, 3, 2]
# assert trie.find_by_prefix("foo") == [1, 2]
# assert trie.find_by_prefix("baz").is_empty
# ~~~
class Trie[E]
	super Map[String, E]

	# Trie root
	#
	# Root children are used to store the first letters.
	private var root = new TrieNode[E]

	redef fun []=(key, value) do
		var children = root.children

		for i in [0..key.length[ do
			var c = key.chars[i]

			var node
			if children.has_key(c) then
				node = children[c]
			else
				node = new TrieNode[E](c)
				children[c] = node
			end
			children = node.children

			if i == key.length - 1 then
				node.is_leaf = true
				node.value = value
			end
		end
	end

	# Cache node used between `has_key` and `[]`
	private var cache: nullable TrieNode[E] = null

	# Cache key used between `has_key` and `[]`
	private var cache_key: nullable String = null

	redef fun [](key) do
		if cache_key == key then return cache.as(not null).value.as(not null)
		var node = search_node(key)
		assert node != null
		return node.value
	end

	redef fun has_key(key) do
		var node = search_node(key)
		if node == null then return false
		var res = node.is_leaf
		if res then
			cache = node
			cache_key = key.as(String)
		end
		return res
	end

	# Find values stored under `prefix`
	#
	# ~~~
	# # Associate some integers to Map keys
	# var trie = new Trie[Int]
	# trie["foo"] = 1
	# trie["fooo"] = 2
	# trie["foooo"] = 3
	# trie["bar"] = 4
	#
	# assert trie.find_by_prefix("") == [1, 4, 2, 3]
	# assert trie.find_by_prefix("foo") == [1, 2, 3]
	# assert trie.find_by_prefix("bar") == [4]
	# assert trie.find_by_prefix("baz").is_empty
	# ~~~
	fun find_by_prefix(prefix: String): Array[E] do
		var node
		if prefix == "" then
			node = root
		else
			node = search_node(prefix)
		end
		if node == null then return new Array[E]
		return node.collect_values
	end

	# Find values stored under `prefix`
	#
	# ~~~
	# # Associate some integers to Map keys
	# var trie = new Trie[Int]
	# trie["foo"] = 1
	# trie["bar"] = 4
	# trie["baz"] = 5
	#
	# assert trie.has_prefix("")
	# assert trie.has_prefix("f")
	# assert not trie.has_prefix("z")
	# ~~~
	fun has_prefix(prefix: String): Bool do
		if prefix == "" then return true
		return search_node(prefix) != null
	end

	# Search a node by a key or prefix
	#
	# Returns `null` if no node matches `string`.
	private fun search_node(string: nullable Object): nullable TrieNode[E] do
		if string == null then return null
		string = string.to_s
		var children = root.children
		var node = null
		for i in [0..string.length[ do
			var c = string.chars[i]
			if children.has_key(c) then
				node = children[c]
				children = node.children
			else
				node = null
				break
			end
		end
		return node
	end
end

# TrieNode used to store the Character key of the value
private class TrieNode[E]
	var c: nullable Char
	var value: nullable E
	var children = new HashMap[Char, TrieNode[E]]
	var is_leaf: Bool = false

	fun collect_values: Array[E] do
		var values = new Array[E]

		var todo = new List[TrieNode[E]]
		todo.add self
		while todo.not_empty do
			var node = todo.shift
			var value = node.value
			if value != null then values.add value
			for child in node.children.values do
				todo.push child
			end
		end
		return values
	end
end
lib/trees/trie.nit:15,1--205,3