From: Jean Privat Date: Mon, 13 Aug 2018 19:25:30 +0000 (-0400) Subject: Merge: src/model/model_index: model index uses BKTree X-Git-Url: http://nitlanguage.org?hp=72867cacdcf3691f4d133313561f8d229ffbcd6d Merge: src/model/model_index: model index uses BKTree This PR makes ModelIndex use a BKTree (#2718) to speed up Nitweb name similarity search queries on large code corpora. Levenstein names comparison with 300k+ entities took a bit long (up to 10s for Nitweb with `contrib` and `src` for some queries). The use of BKTree speeds things up and brings those queries under 0.1s. Pull-Request: #2724 Reviewed-by: Jean Privat --- diff --git a/lib/core/collection/circular_array.nit b/lib/core/collection/circular_array.nit index 9419853..1cc757b 100644 --- a/lib/core/collection/circular_array.nit +++ b/lib/core/collection/circular_array.nit @@ -19,11 +19,11 @@ import array # Efficient data structure to access both end of the sequence. # -# A circular array offers efficient random access, -# efficient manipulation for both ends of the structure (push, pop, ) and +# A circular array offers efficient random access, efficient manipulation +# at both ends of the structure (push, pop, shift and unshift) and # automatic amortized growth. # -# Therefore it can be used as is or as and efficient queue (FIFO/LIFO) +# Therefore it can be used as is or as an efficient queue (FIFO/LIFO). class CircularArray[E] super Sequence[E] diff --git a/lib/core/collection/union_find.nit b/lib/core/collection/union_find.nit index 6c74ca2..864c509 100644 --- a/lib/core/collection/union_find.nit +++ b/lib/core/collection/union_find.nit @@ -13,7 +13,7 @@ module union_find import hash_collection -# Data structure to keeps track of elements partitioned into disjoint subsets +# Data structure to keep track of elements partitioned into disjoint subsets # # var s = new DisjointSet[Int] # s.add(1) @@ -22,15 +22,15 @@ import hash_collection # s.union(1,2) # assert s.in_same_subset(1,2) # -# `in_same_subset` is transitive, reflexive and symetric +# `in_same_subset` is transitive, reflexive and symmetric # # s.add(3) # assert not s.in_same_subset(1,3) # s.union(3,2) # assert s.in_same_subset(1,3) # -# Unkike theorical Disjoint-set data structures, the underling implementation is opaque -# that makes the traditionnal `find` method unavailable for clients. +# Unlike theoretical Disjoint-set data structures, the underling implementation is opaque +# making the traditional `find` method unavailable for clients. # The methods `in_same_subset`, `to_partitions`, and their variations are offered instead. class DisjointSet[E] super SimpleCollection[E] @@ -172,7 +172,7 @@ class DisjointSet[E] return to_subpartition(self) end - # Construct a partitionning on `es`, a subset of elements + # Construct a partitioning on `es`, a subset of elements # # var s = new DisjointSet[Int] # s.add_all([1,2,3,4,5,6]) @@ -237,7 +237,7 @@ private class DisjointSetNode # If parent == self then the node is a root var parent: DisjointSetNode = self - # The rank to no desequilibrate the structure. + # The rank to keep the structure balanced. # The term rank is used instead of depth since # path compression is used, see `DisjointSet::nfind` var rank = 0