Merge: Intro an unrolled linked list, and fix `List::insert`
authorJean Privat <jean@pryen.org>
Wed, 27 May 2015 00:54:56 +0000 (20:54 -0400)
committerJean Privat <jean@pryen.org>
Wed, 27 May 2015 10:11:11 +0000 (06:11 -0400)
An unrolled list is a sequence implemented by a linked list of arrays.

I use this list for a very long FIFO. `Array::shift` is very slow and a large `List` can create too many small nodes for the GC.

The test compares the behavior of `UnrolledList` with `List` on random data sets. It allowed me to find a bug in `List::insert`.

Pull-Request: #1379
Reviewed-by: Jean Privat <jean@pryen.org>
Reviewed-by: Lucas Bajolet <r4pass@hotmail.com>
Reviewed-by: Alexandre Terrasa <alexandre@moz-code.org>
Reviewed-by: Romain Chanoir <chanoir.romain@courrier.uqam.ca>

lib/more_collections.nit
lib/standard/collection/list.nit
tests/test_unrolled_list.nit [new file with mode: 0644]

index 4cafb1d..4f75430 100644 (file)
@@ -201,3 +201,332 @@ class DefaultMap[K, V]
 
        redef fun provide_default_value(key) do return default
 end
+
+# An unrolled linked list
+#
+# A sequence implemented as a linked list of arrays.
+#
+# This data structure is similar to the `List` but it can benefit from
+# better cache performance, lower data overhead for the nodes metadata and
+# it spares the GC to allocate many small nodes.
+class UnrolledList[E]
+       super Sequence[E]
+
+       # Desired capacity for each nodes
+       #
+       # By default at `32`, it can be increased for very large lists.
+       #
+       # It can be modified anytime, but newly created nodes may still be assigned
+       # the same capacity of old nodes when created by `insert`.
+       var nodes_length = 32 is writable
+
+       private var head_node: UnrolledNode[E] = new UnrolledNode[E](nodes_length)
+
+       private var tail_node: UnrolledNode[E] = head_node
+
+       redef var length = 0
+
+       redef fun clear
+       do
+               head_node = new UnrolledNode[E](nodes_length)
+               tail_node = head_node
+               length = 0
+       end
+
+       # Out parameter of `node_at`
+       private var index_within_node = 0
+
+       private fun node_at(index: Int): UnrolledNode[E]
+       do
+               assert index >= 0 and index < length
+
+               var node = head_node
+               while index >= node.length do
+                       index -= node.length
+                       node = node.next.as(not null)
+               end
+
+               index_within_node = index
+               return node
+       end
+
+       private fun insert_node(node: UnrolledNode[E], prev, next: nullable UnrolledNode[E])
+       do
+               if prev != null then
+                       prev.next = node
+               else head_node = node
+
+               if next != null then
+                       next.prev = node
+               else tail_node = node
+
+               node.next = next
+               node.prev = prev
+       end
+
+       redef fun [](index)
+       do
+               var node = node_at(index)
+               index = index_within_node + node.head_index
+               return node.items[index]
+       end
+
+       redef fun []=(index, value)
+       do
+               var node = node_at(index)
+               index = index_within_node + node.head_index
+               node.items[index] = value
+       end
+
+       redef fun push(item)
+       do
+               var node = tail_node
+               if not node.full then
+                       if node.tail_index < node.capacity then
+                               # There's room at the tail
+                               node.tail_index += 1
+                       else
+                               # Move everything over by `d`
+                               assert node.head_index > 0
+                               var d = node.head_index / 2 + 1
+                               node.move_head(node.length, d)
+                               for i in d.times do node.items[node.tail_index - i] = null
+                               node.head_index -= d
+                               node.tail_index += -d+1
+                       end
+                       node.items[node.tail_index-1] = item
+               else
+                       # New node!
+                       node = new UnrolledNode[E](nodes_length)
+                       insert_node(node, tail_node, null)
+                       node.tail_index = 1
+                       node.items[0] = item
+               end
+               length += 1
+       end
+
+       redef fun unshift(item)
+       do
+               var node = head_node
+               if not node.full then
+                       if node.head_index > 0 then
+                               # There's room at the head
+                               node.head_index -= 1
+                       else
+                               # Move everything over by `d`
+                               assert node.tail_index < node.capacity
+                               var d = (node.capacity-node.tail_index) / 2 + 1
+                               node.move_tail(0, d)
+                               for i in d.times do node.items[node.head_index+i] = null
+                               node.head_index += d-1
+                               node.tail_index += d
+                       end
+                       node.items[node.head_index] = item
+               else
+                       # New node!
+                       node = new UnrolledNode[E](nodes_length)
+                       insert_node(node, null, head_node)
+                       node.head_index = node.capacity-1
+                       node.tail_index = node.capacity
+                       node.items[node.capacity-1] = item
+               end
+               length += 1
+       end
+
+       redef fun pop
+       do
+               assert not_empty
+
+               var node = tail_node
+               while node.length == 0 do
+                       # Delete empty node
+                       var nullable_node = node.prev
+                       assert is_not_empty: nullable_node != null
+                       node = nullable_node
+                       node.next = null
+                       self.tail_node = node
+               end
+
+               var item = node.items[node.tail_index-1]
+               node.tail_index -= 1
+               length -= 1
+               return item
+       end
+
+       redef fun shift
+       do
+               assert not_empty
+
+               var node = head_node
+               while node.length == 0 do
+                       # Delete empty node
+                       var nullable_node = node.next
+                       assert is_not_empty: nullable_node != null
+                       node = nullable_node
+                       node.prev = null
+                       self.head_node = node
+               end
+
+               var item = node.items[node.head_index]
+               node.head_index += 1
+               length -= 1
+               return item
+       end
+
+       redef fun insert(item, index)
+       do
+               if index == length then
+                       push item
+                       return
+               end
+
+               var node = node_at(index)
+               index = index_within_node
+               if node.full then
+                       # Move half to a new node
+                       var new_node = new UnrolledNode[E](nodes_length.max(node.capacity))
+
+                       # Plug in the new node
+                       var next_node = node.next
+                       insert_node(new_node, node, next_node)
+
+                       # Move items at and after `index` to the new node
+                       var to_displace = node.length-index
+                       var offset = (new_node.capacity-to_displace)/2
+                       for i in [0..to_displace[ do
+                               new_node.items[offset+i] = node.items[index+i]
+                               node.items[index+i] = null
+                       end
+                       new_node.head_index = offset
+                       new_node.tail_index = offset + to_displace
+                       node.tail_index -= to_displace
+
+                       # Store `item`
+                       if index > node.capacity / 2 then
+                               new_node.items[offset-1] = item
+                               new_node.head_index -= 1
+                       else
+                               node.items[node.head_index+index] = item
+                               node.tail_index += 1
+                       end
+               else
+                       if node.tail_index < node.capacity then
+                               # Move items towards the tail
+                               node.move_tail(index, 1)
+                               node.tail_index += 1
+                               node.items[node.head_index + index] = item
+                       else
+                               # Move items towards the head
+                               node.move_head(index, 1)
+                               node.items[node.head_index + index-1] = item
+                               node.head_index -= 1
+                       end
+               end
+               length += 1
+       end
+
+       redef fun remove_at(index)
+       do
+               var node = node_at(index)
+               index = index_within_node + node.head_index
+
+               # Shift left all the elements after `index`
+               for i in [index+1 .. node.tail_index[ do
+                       node.items[i-1] = node.items[i]
+               end
+               node.tail_index -= 1
+               node.items[node.tail_index] = null
+
+               length -= 1
+
+               var next_node = node.next
+               var prev_node = node.prev
+               if node.is_empty and (next_node != null or prev_node != null) then
+                       # Empty and non-head or tail node, delete
+                       if next_node != null then
+                               next_node.prev = node.prev
+                       else tail_node = prev_node.as(not null)
+
+                       if prev_node != null then
+                               prev_node.next = node.next
+                       else head_node = next_node.as(not null)
+               end
+       end
+
+       redef fun iterator do return new UnrolledIterator[E](self)
+end
+
+# Node composing an `UnrolledList`
+#
+# Stores the elements in the `items` array. The elements in the `items` array
+# begin at `head_index` and end right before `tail_index`. The data is contiguous,
+# but there can be empty cells at the beginning and the end of the array.
+private class UnrolledNode[E]
+
+       var prev: nullable UnrolledNode[E] = null
+
+       var next: nullable UnrolledNode[E] = null
+
+       # Desired length of `items`
+       var capacity: Int
+
+       # `Array` of items in this node, filled with `null`
+       var items = new Array[nullable E].filled_with(null, capacity) is lazy
+
+       # Index of the first element in `items`
+       var head_index = 0
+
+       # Index after the last element in `items`
+       var tail_index = 0
+
+       fun length: Int do return tail_index - head_index
+
+       fun full: Bool do return length == capacity
+
+       fun is_empty: Bool do return tail_index == head_index
+
+       # Move towards the head all elements before `index` of `displace` cells
+       fun move_tail(index, displace: Int)
+       do
+               for i in [tail_index-1..head_index+index].step(-1) do
+                       items[i+displace] = items[i]
+               end
+       end
+
+       # Move towards the tail all elements at and after `index` of `displace` cells
+       fun move_head(index, displace: Int)
+       do
+               for i in [head_index..head_index+index[ do
+                       items[i-displace] = items[i]
+               end
+       end
+end
+
+private class UnrolledIterator[E]
+       super IndexedIterator[E]
+
+       var list: UnrolledList[E]
+
+       var node: nullable UnrolledNode[E] = list.head_node is lazy
+
+       # Index of the current `item`
+       redef var index = 0
+
+       # Index within the current `node`
+       var index_in_node: Int = node.head_index is lazy
+
+       redef fun item do return node.items[index_in_node]
+
+       redef fun is_ok do return index < list.length
+
+       redef fun next
+       do
+               index += 1
+               index_in_node += 1
+
+               if index_in_node >= node.tail_index then
+                       node = node.next
+                       if node != null then index_in_node = node.head_index
+               end
+       end
+end
index 66ef4af..2cdc431 100644 (file)
@@ -126,16 +126,7 @@ class List[E]
                        push(e)
                        return
                end
-               var nnode = new ListNode[E](e)
-               var next = node.next
-               if next == null then
-                       _tail = nnode
-               else
-                       next.prev = nnode
-               end
-               nnode.prev = node
-               nnode.next = next
-               node.next = nnode
+               insert_before(e, node)
        end
 
        # Append `l` to `self` but clear `l`.
diff --git a/tests/test_unrolled_list.nit b/tests/test_unrolled_list.nit
new file mode 100644 (file)
index 0000000..df5f6a2
--- /dev/null
@@ -0,0 +1,65 @@
+# This file is part of NIT ( http://www.nitlanguage.org ).
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import more_collections
+
+var ul = new UnrolledList[Object]
+var ll = new List[Object]
+
+for i in 1000.times do
+       var val = 100.rand+1
+
+       var act = 3.rand
+       #print "+"+act.to_s
+       if act == 0 then
+               ll.add val
+               ul.add val
+       else if act == 1 then
+               ll.unshift val
+               ul.unshift val
+       else if act == 2 then
+               var index = ll.length.rand
+               ll.insert(val, index)
+               ul.insert(val, index)
+       else abort
+
+       #assert ll == ul
+end
+
+for i in 200.times do
+       var act = 3.rand
+       #print "-"+act.to_s
+       if act == 0 then
+               var o = ll.pop
+               var c = ul.pop
+               assert o == c
+       else if act == 1 then
+               var o = ll.shift
+               var c = ul.shift
+               assert o == c
+       else if act == 2 then
+               var index = ll.length.rand
+               ll.remove_at(index)
+               ul.remove_at(index)
+       else abort
+
+       #assert ll == ul
+end
+
+while ul.not_empty do
+       var c = ul.shift
+       var o = ll.shift
+       assert c == o else print "{c} vs {o}"
+end
+assert ll.is_empty