Perfect hashing and perfect numbering

Introduced classes

class Perfecthashing

perfect_hashing :: Perfecthashing

Class for Perfect Hashing operations with perfect numbering

All class definitions

class Perfecthashing

perfect_hashing $ Perfecthashing

Class for Perfect Hashing operations with perfect numbering
package_diagram perfect_hashing::perfect_hashing perfect_hashing core core perfect_hashing::perfect_hashing->core a_star-m a_star-m a_star-m->perfect_hashing::perfect_hashing

Ancestors

module abstract_collection

core :: abstract_collection

Abstract collection classes and services.
module abstract_text

core :: abstract_text

Abstract class for manipulation of sequences of characters
module array

core :: array

This module introduces the standard array structure.
module bitset

core :: bitset

Services to handle BitSet
module bytes

core :: bytes

Services for byte streams and arrays
module circular_array

core :: circular_array

Efficient data structure to access both end of the sequence.
module codec_base

core :: codec_base

Base for codecs to use with streams
module codecs

core :: codecs

Group module for all codec-related manipulations
module collection

core :: collection

This module define several collection classes.
module environ

core :: environ

Access to the environment variables of the process
module error

core :: error

Standard error-management infrastructure.
module exec

core :: exec

Invocation and management of operating system sub-processes.
module file

core :: file

File manipulations (create, read, write, etc.)
module fixed_ints

core :: fixed_ints

Basic integers of fixed-precision
module fixed_ints_text

core :: fixed_ints_text

Text services to complement fixed_ints
module flat

core :: flat

All the array-based text representations
module gc

core :: gc

Access to the Nit internal garbage collection mechanism
module hash_collection

core :: hash_collection

Introduce HashMap and HashSet.
module iso8859_1

core :: iso8859_1

Codec for ISO8859-1 I/O
module kernel

core :: kernel

Most basic classes and methods.
module list

core :: list

This module handle double linked lists
module math

core :: math

Mathematical operations
module native

core :: native

Native structures for text and bytes
module numeric

core :: numeric

Advanced services for Numeric types
module protocol

core :: protocol

module queue

core :: queue

Queuing data structures and wrappers
module range

core :: range

Module for range of discrete objects.
module re

core :: re

Regular expression support for all services based on Pattern
module ropes

core :: ropes

Tree-based representation of a String.
module sorter

core :: sorter

This module contains classes used to compare things and sorts arrays.
module stream

core :: stream

Input and output streams of characters
module text

core :: text

All the classes and methods related to the manipulation of text entities
module time

core :: time

Management of time and dates
module union_find

core :: union_find

union–find algorithm using an efficient disjoint-set data structure
module utf8

core :: utf8

Codec for UTF-8 I/O

Parents

module core

core :: core

Standard classes and methods used by default by Nit programs and libraries.

Children

module a_star-m

a_star-m

# Perfect hashing and perfect numbering
module perfect_hashing

# Class for Perfect Hashing operations with perfect numbering
# It also stores the free identifiers for the perfect numbering
class Perfecthashing

	# Union of interval for implementing perfect numbering
	# Represents the interval of free identifiers
	# A null value represents the upper bound of identifier
	private var interval = new List[Couple[nullable Int, nullable Int]]

	# An array used as a temporary Hashtable for
	# checking there is no collision between identifiers
	private var tempht = new Array[nullable Int]

	# Initialize the structure of free identifiers
	init
	do
		# By default, all identifiers are available
		interval.push(new Couple[nullable Int, nullable Int](1, null))
	end

	# Returns a mask composed by discriminants bits
	# for these given identifiers
	# The mask + 1 value is the size of the hastable to create
	fun phand(ids: Array[Int]): Int
	do
		var mask = 1

		# If ids is null return 1
		if ids.length == 0 then
			return mask
		end

		var orMask = 0
		var andMask = 0
		for i in ids do
			orMask |= i
			andMask &= i
		end

		mask = orMask ^ andMask

		# Re-initialize the hashtable with null values
		for i in [0..(mask+1)] do tempht[i] = null

		# Optimize the already computed mask
		var newmask = 0
		var i = mask.highest_bit
		while i != 0 do
			if mask.getbit(i) == 1 then
				newmask = mask ^ (1 << i)

				# If there is no collision, replace the old mask
				if phandp(ids, newmask) then
					mask = newmask
				end
			end

			i -= 1
		end

		return mask + 1
	end

	# Checks if the mask is a perfect hashing function for
	# these identifiers
	fun phandp(ids: Array[Int], mask: Int): Bool
	do
		for i in ids do
			var hv = i & mask
			if tempht[hv] == mask then
				return false
			else
				tempht[hv] = mask
			end
		end

		return true
	end

	# Perfect numbering : give new free identifiers
	# ids : identifiers to hash (super-classes)
	# n : number of required free identifiers
	# idc : This array will be filled with n free identifiers
	# return the size of hashTable to create for theses identifiers (mask + 1)
	#
	#     var ph = new Perfecthashing
	#     var idc = new Array[Int]
	#     assert ph.pnand([3, 7, 10], 1, idc) == 6
	#     assert idc[0] == 4
	#     var idc1 = new Array[Int]
	#     assert ph.pnand([3, 7, 10], 1, idc1) == 6
	#     assert idc1[0] == 6
	fun pnand(ids: Array[Int], n: Int, idc: Array[Int]): Int
	do
		var mask = phand(ids) -1
		var i = 0
		while n+ids.length > (1 << mask.number_bits(1)) do
			# When there are not enough 1-bits
			if mask.getbit(i) == 0 then
				mask = mask ^ (1 << i)
			end
			i += 1
		end

		# Resetting hashtable
		for j in [0..(mask+1)] do tempht[j] = null

		# Set the temporary hashtable for `compute_least_free_ids`
		phandp(ids, mask)

		# Fill the given array with n free identifiers
		compute_least_free_ids(n, mask, idc)

		return mask + 1
	end


	# Compute free identifiers for future perfect numbering
	# n : number of required ids
	# mask : perfect hashing parameter
	# idc : This array will be filled with n free identifiers
	private fun compute_least_free_ids(n: Int, mask: Int, idc: Array[Int])
	do
		while n != 0 do
			idc.push(first_valid_id(mask))
			n = n - 1
		end
	end

	# Returns the first valid free identifier which correspond to this mask
	private fun first_valid_id(mask: Int): Int
	do
		# Search for the first valid free identifier
		var inter = interval.iterator
		var found = false
		var id = 0
		while inter.is_ok and not found do
			var i = inter.item.first.as(not null)
			while i != inter.item.second and not found do
				# Tests if this id is free for this mask
				var hv = i & mask

				# If the hashtable if full, push an empty item
				if hv >= tempht.length then
					tempht.push(null)
				end

				if tempht[hv] != mask then
					found = true
					id = i
				end
				i = i + 1
			end

			if not found then
				inter.next
			end
		end

		# Updates the structure of union of intervals
		if id == inter.item.first then
			if inter.item.first == inter.item.second then
				inter.delete
			else
				inter.item.first += 1
			end
		else
			if id != inter.item.first and id != inter.item.second then
				# We need to split in two this interval
				var last = inter.item.second
				inter.item.second = id - 1
				inter.next

				var p = new Couple[nullable Int, nullable Int](id + 1, last)
				if inter.is_ok then
					inter.insert_before(p)
				else
					interval.push(p)
				end
			else
				inter.item.second = id - 1
			end
		end

		return id
	end
end
lib/perfect_hashing/perfect_hashing.nit:17,1--206,3