Adding perfect_hashing module (with perfect numbering)
[nit.git] / lib / perfect_hashing.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Copyright 2014 Julien Pagès <julien.pages@lirmm.fr>
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16
17 # Perfect hashing and perfect numbering
18 module perfect_hashing
19
20 # Class for Perfect Hashing operations with perfect numbering
21 # It also stores the free identifiers for the perfect numbering
22 class Perfecthashing
23
24 # Union of interval for implementing perfect numbering
25 # Represents the interval of free identifiers
26 # A null value represents the upper bound of identifier
27 private var interval: List[Couple[nullable Int, nullable Int]]
28
29 # An array used as a temporary Hashtable for
30 # checking there is no collision between identifiers
31 private var tempht: Array[nullable Int]
32
33 # Initialize the structure of free identifiers
34 init
35 do
36 # By default, all identifiers are available
37 interval = new List[Couple[nullable Int, nullable Int]]
38 interval.push(new Couple[nullable Int, nullable Int](0, null))
39 tempht = new Array[nullable Int]
40 end
41
42 # Returns a mask composed by discriminants bits
43 # for these given identifiers
44 # The mask + 1 value is the size of the hastable to create
45 fun phand(ids: Array[Int]): Int
46 do
47 var mask = 1
48
49 # If ids is null return 1
50 if ids.length == 0 then
51 return mask
52 end
53
54 var orMask = 0
55 var andMask = 0
56 for i in ids do
57 orMask = orMask.bin_or(i)
58 andMask = andMask.bin_and(i)
59 end
60
61 mask = orMask.bin_xor(andMask)
62
63 # Re-initialize the hashtable with null values
64 for i in [0..mask+1] do tempht[i] = null
65
66 # Optimize the already computed mask
67 var newmask = 0
68 var i = mask.highest_bit
69 while i != 0 do
70 if mask.getbit(i) == 1 then
71 newmask = mask.bin_xor(1.lshift(i))
72
73 # If there is no collision, replace the old mask
74 if phandp(ids, newmask) then
75 mask = newmask
76 end
77 end
78
79 i -= 1
80 end
81
82 return mask + 1
83 end
84
85 # Checks if the mask is a perfect hashing function for
86 # these identifiers
87 fun phandp(ids: Array[Int], mask: Int): Bool
88 do
89 for i in ids do
90 var hv = i.bin_and(mask)
91 if tempht[hv] == mask then
92 return false
93 else
94 tempht[hv] = mask
95 end
96 end
97
98 return true
99 end
100
101 # Perfect numbering : give new free identifiers
102 # ids : identifiers to hash (super-classes)
103 # n : number of required free identifiers
104 # idc : This array will be filled with n free identifiers
105 # return the size of hashTable to create for theses identifiers (mask + 1)
106 # var ph = new Perfecthashing
107 # var idc = new Array[Int]
108 # assert ph.pnand([3, 7, 10], 1, idc) == 6
109 # assert idc[0] == 4
110 # var idc1 = new Array[Int]
111 # assert ph.pnand([3, 7, 10], 1, idc1) == 6
112 # assert idc1[0] == 6
113 fun pnand(ids: Array[Int], n: Int, idc: Array[Int]): Int
114 do
115 var mask = phand(ids) -1
116 var i = 0
117 while n+ids.length > (1.lshift(mask.number_bits(1))) do
118 # When there are not enough 1-bits
119 if mask.getbit(i) == 0 then
120 mask = mask.bin_xor(1.lshift(i))
121 end
122 i += 1
123 end
124
125 # Resetting hashtable
126 phandp(ids, mask)
127
128 # Fill the given array with n free identifiers
129 compute_least_free_ids(n, mask, idc)
130
131 return mask + 1
132 end
133
134
135 # Compute free identifiers for future perfect numbering
136 # n : number of required ids
137 # mask : perfect hashing parameter
138 # idc : This array will be filled with n free identifiers
139 private fun compute_least_free_ids(n: Int, mask: Int, idc: Array[Int])
140 do
141 while n != 0 do
142 idc.push(first_valid_id(mask))
143 n = n - 1
144 end
145 end
146
147 # Returns the first valid free identifier which correspond to this mask
148 private fun first_valid_id(mask: Int): Int
149 do
150 # Search for the first valid free identifier
151 var inter = interval.iterator
152 var found = false
153 var id = 0
154 while inter.is_ok and not found do
155 var i = inter.item.first.as(not null)
156 while i != inter.item.second and not found do
157 # Tests if this id is free for this mask
158 var hv = i.bin_and(mask)
159
160 # If the hashtable if full, push an empty item
161 if hv >= tempht.length then
162 tempht.push(null)
163 end
164
165 if tempht[hv] != mask then
166 found = true
167 id = i
168 end
169 i = i + 1
170 end
171
172 if not found then
173 inter.next
174 end
175 end
176
177 # Updates the structure of union of intervals
178 if id == inter.item.first then
179 if inter.item.first == inter.item.second then
180 inter.delete
181 else
182 inter.item.first += 1
183 end
184 else
185 if id != inter.item.first and id != inter.item.second then
186 # We need to split in two this interval
187 var last = inter.item.second
188 inter.item.second = id - 1
189 inter.next
190
191 var p = new Couple[nullable Int, nullable Int](id + 1, last)
192 if inter.is_ok then
193 inter.insert_before(p)
194 else
195 interval.push(p)
196 end
197 else
198 inter.item.second = id - 1
199 end
200 end
201
202 return id
203 end
204 end