Merge remote-tracking branch 'upstream/master' into init_auto

[nit.git] / src / model / model_index.nit
diff --git a/src/model/model_index.nit b/src/model/model_index.nit

new file mode 100644 (file)

index 0000000..f0c12fa
--- /dev/null
+++ b/src/model/model_index.nit
@@ -0,0 +1,672 @@
+# This file is part of NIT ( http://www.nitlanguage.org ).
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Search things from the Model
+#
+# ModelIndex allows you to index mentities then retrieve them by their `name`
+# or `full_name`.
+# It offers a set of `find_*` methods that can be used to match queries
+# against entities name.
+#
+# Because each use is different, ModelIndex only provide base raw search services.
+# All of them return IndexMatches that can be ordered and filtered by the client.
+#
+# ## Indexing mentities
+#
+# Before searching something from the ModelIndex, you have to index it.
+# Use the `ModelIndex::index` method to do that:
+#
+# ~~~nitish
+# var index = new ModelIndex
+# for mentity in model.collect_mentities do
+#      index.index(mentity)
+# end
+# ~~~
+#
+# ## Search mentities
+#
+# You can then run queries on the ModelIndex:
+#
+# ~~~nitish
+# for res in index.find("Array").limit(10) do
+#    print res
+# end
+# ~~~
+#
+# ## Examples
+#
+# Here some examples of how you can use the ModelIndex.
+#
+# ### Smart type prediction
+#
+# Use ModelIndex to implement a smart prediction system based on the typed prefix:
+#
+# ~~~nitish
+# var index = new ModelIndex
+# for mentity in model.collect_mentities do
+#      # We don't really care about definitions
+#      if mentity isa MClassDef or mentity isa MPropDef then continue
+#      index.index(mentity)
+# end
+#
+# var typed_prefix = "Arr"
+# for res in index.find_by_name_prefix(typed_prefix).
+#      uniq. # keep only the best ranked mentity
+#      limit(5). # limit to ten results
+#      sort(new VisibilityComparator, new NameComparator) do # order by visibility then name
+#    print res
+# end
+# ~~~
+#
+# Will output something like:
+#
+# ~~~raw
+# Array (1)
+# ArraySet (2)
+# ArrayMap (3)
+# ArrayCmp (4)
+# ArrayMapKeys (5)
+# ~~~
+#
+# ### Method autocompletion
+#
+# Find methods from a class full_name:
+#
+# ~~~nitish
+# var class_full_name = "core::Array"
+# for res in index.find_by_full_name_prefix("{class_full_name}::").
+#      uniq. # keep only the best ranked mentity
+#      sort(new VisibilityComparator). # put private in the bottom of the list
+#      limit(5). # limit to ten results
+#      sort(new FullNameComparator) do # order by lexicographic order
+#    print res
+# end
+# ~~~
+#
+# Will output something like:
+#
+# ~~~raw
+# * (2)
+# + (1)
+# filled_with (5)
+# from (3)
+# with_items (4)
+# ~~~
+#
+# ### Name typo detection and suggestion
+#
+# Detect unknown names and suggest corrections:
+#
+# ~~~nitish
+# var name = "Zrray"
+#
+# if index.find_by_name_prefix(name).is_empty then
+#      printn "`{name}` not found, did you mean: "
+#      printn index.find_by_name_similarity(name).sort(new ScoreComparator).limit(2).join(" or ")
+#      print "?"
+# end
+# ~~~
+#
+# Will output something like:
+#
+# ~~~raw
+# `Zrray` not found, did you mean: Array (1) or array (1)?
+# ~~~
+module model_index
+
+import model::model_collect
+
+import trees::trie
+import trees::bktree
+
+redef class Model
+
+       # Keep a direct link to mentities by full name to speed up `mentity_from_uri`
+       var mentities_by_full_name: HashMap[String, MEntity] is lazy do
+               var mentities_by_full_name = new HashMap[String, MEntity]
+               for mentity in collect_mentities do
+                       mentities_by_full_name[mentity.full_name] = mentity
+               end
+               return mentities_by_full_name
+       end
+
+       # ModelIndex used to perform searches
+       var index: ModelIndex is lazy, writable do
+               var index = new ModelIndex
+               for mentity in collect_mentities do
+                       if mentity isa MClassDef or mentity isa MPropDef then continue
+                       index.index mentity
+               end
+               return index
+       end
+
+       redef fun mentities_by_name(name, filter) do
+               var res = new Array[MEntity]
+               if not index.names.has_key(name) then return res
+               for mentity in index.names[name] do
+                       if filter == null or filter.accept_mentity(mentity) then res.add mentity
+               end
+               return res
+       end
+
+       redef fun mentity_by_full_name(full_name, filter) do
+               if mentities_by_full_name.has_key(full_name) then
+                       var mentity = mentities_by_full_name[full_name]
+                       if filter == null or filter.accept_mentity(mentity) then return mentity
+               end
+               return null
+       end
+
+       private var score_sorter = new ScoreComparator
+       private var vis_sorter = new VisibilityComparator
+       private var kind_sorter = new MEntityComparator
+       private var name_sorter = new NameComparator
+       private var lname_sorter = new NameLengthComparator
+       private var fname_sorter = new FullNameComparator
+       private var lfname_sorter = new FullNameLengthComparator
+
+       # Search mentities based on a `query` string
+       #
+       # Lookup the index for anything matching `query` and return `limit` results.
+       #
+       # The algorithm used is the following:
+       # 1- lookup by name prefix
+       # 2- lookup by full_name prefix
+       # 3- loopup by levenshtein distance
+       #
+       # At each step if the `limit` is reached, the algorithm stops and returns the results.
+       fun find(query: String, limit: nullable Int, filter: nullable ModelFilter): Array[MEntity] do
+               # Find, lookup by name prefix
+               var matches = index.find_by_name_prefix(query, filter).uniq.
+                       sort(lname_sorter, name_sorter, kind_sorter)
+               if limit != null and matches.length >= limit then
+                       return matches.limit(limit).rerank.sort(vis_sorter, score_sorter).mentities
+               end
+               matches = matches.rerank.sort(vis_sorter, score_sorter)
+
+               # If limit not reached, lookup by full_name prefix
+               var malus = matches.length
+               var full_matches = new IndexMatches
+               for match in index.find_by_full_name_prefix(query, filter).
+                       sort(kind_sorter, lfname_sorter, fname_sorter) do
+                       match.score += malus
+                       full_matches.add match
+               end
+               matches = matches.uniq
+               if limit != null and matches.length + full_matches.length >= limit then
+                       matches.add_all full_matches
+                       matches = matches.uniq.limit(limit).rerank.sort(vis_sorter, score_sorter)
+                       return matches.mentities
+               end
+
+               # If limit not reached, lookup by similarity
+               malus = matches.length
+               var sim_matches = new IndexMatches
+               for match in index.find_by_similarity(query, filter).sort(score_sorter, kind_sorter, lname_sorter, name_sorter) do
+                       match.score += malus
+                       sim_matches.add match
+               end
+               matches.add_all sim_matches
+               matches = matches.uniq
+               if limit != null then matches = matches.limit(limit)
+               return matches.rerank.sort(vis_sorter, score_sorter).mentities
+       end
+end
+
+# ModelIndex indexes mentities by their name and full name
+#
+# It provides methods to find mentities based on a prefix or string similarity.
+#
+# ~~~nitish
+# # Build index
+# var index = new ModelIndex
+# for mentity in model.collect_mentities do
+#      if mentity isa MClassDef or mentity isa MPropDef then continue
+#      index.index(mentity)
+# end
+#
+# for e in index.find("Foo").uniq.sort(new ScoreComparator).limit(10) do
+#      print " * {e.score}: {e.mentity.name} ({e.mentity.full_name})"
+# end
+# ~~~
+class ModelIndex
+
+       # List of all indexed mentities.
+       #
+       # Faster than traversing the tries.
+       var mentities = new Array[MEntity]
+
+       # Map of all mentities indexed by their `name`
+       var names = new HashMap[String, Array[MEntity]]
+
+       # Prefix tree for mentities `name`
+       #
+       # Because multiple mentities can share the same `name`, we use a Trie of
+       # arrays of mentities.
+       #
+       # As for now, we do not index class and property definitions.
+       # TODO add an option.
+       var name_prefixes = new Trie[Array[MEntity]]
+
+       # Distance tree for mentities `name`
+       var name_distances = new BKTree
+
+       # Map of all mentities indexed by their `full_name`
+       var full_names = new HashMap[String, MEntity]
+
+       # Prefix tree for mentities `full_name`
+       #
+       # Even if two mentities cannot share the same `full_name`, we use a Trie of
+       # arrays of mentities to be consistent with `name_prefixes`.
+       var full_name_prefixes = new Trie[Array[MEntity]]
+
+       # Distance tree for mentities `full_name`
+       var full_name_distances = new BKTree
+
+       # Index `mentity` by it's `MEntity::name`
+       #
+       # See `name_prefixes`.
+       private fun index_by_name(mentity: MEntity) do
+               var name = mentity.name
+
+               # Index name
+               if not names.has_key(name) then
+                       names[name] = new Array[MEntity]
+               end
+               names[name].add mentity
+
+               # Index prefix
+               if not name_prefixes.has_key(name) then
+                       name_prefixes[name] = new Array[MEntity]
+               end
+               name_prefixes[name].add mentity
+
+               # Index distance
+               name_distances.add(name)
+       end
+
+       # Index `mentity` by its `MEntity::full_name`
+       private fun index_by_full_name(mentity: MEntity) do
+               var name = mentity.full_name
+
+               # Index full name
+               full_names[name] = mentity
+
+               # Index prefix
+               if not full_name_prefixes.has_key(name) then
+                       full_name_prefixes[name] = new Array[MEntity]
+               end
+               full_name_prefixes[name].add mentity
+
+               # Index distance
+               full_name_distances.add(name)
+       end
+
+       # Index `mentity` so it can be retrieved by a find query
+       #
+       # MEntities are indexed by both name and full_name.
+       fun index(mentity: MEntity) do
+               mentities.add mentity
+               index_by_name mentity
+               index_by_full_name mentity
+       end
+
+       # Translate Trie results to `SearchResult`
+       #
+       # This method is used internally to translate each mentity returned by a prefix
+       # match in a Trie into a SearchResult that can be ranked by score.
+       #
+       # Results from the Trie are returned in a breadth first manner so we get the
+       # matches ordered by prefix.
+       # We preserve that order by giving an incremental score to the `array` items.
+       private fun score_results_incremental(array: Array[Array[MEntity]], filter: nullable ModelFilter): IndexMatches do
+               var results = new IndexMatches
+               var score = 1
+               for mentities in array do
+                       for mentity in mentities do
+                               if filter != null and not filter.accept_mentity(mentity) then continue
+                               results.add new IndexMatch(mentity, score)
+                       end
+                       score += 1
+               end
+               return results
+       end
+
+       # Find all mentities where `MEntity::name` matches the `prefix`
+       fun find_by_name_prefix(prefix: String, filter: nullable ModelFilter): IndexMatches do
+               return score_results_incremental(name_prefixes.find_by_prefix(prefix), filter)
+       end
+
+       # Find all mentities where `MEntity::full_name` matches the `prefix`
+       fun find_by_full_name_prefix(prefix: String, filter: nullable ModelFilter): IndexMatches do
+               return score_results_incremental(full_name_prefixes.find_by_prefix(prefix), filter)
+       end
+
+       # Rank all mentities by the distance between `MEntity::name` and `name`
+       #
+       # Use the Levenshtein algorithm on all the indexed mentities `name`.
+       # Warning: may not scale to large indexes.
+       fun find_by_name_similarity(name: String, filter: nullable ModelFilter): IndexMatches do
+               var results = new IndexMatches
+               for match in name_distances.search(name) do
+                       var dist = match.distance
+                       var mname = match.key
+                       if not names.has_key(mname) then continue
+                       for mentity in names[mname] do
+                               if mentity isa MClassDef or mentity isa MPropDef then continue
+                               if filter != null and not filter.accept_mentity(mentity) then continue
+                               results.add new IndexMatch(mentity, dist)
+                       end
+               end
+               return results
+       end
+
+       # Rank all mentities by the distance between `MEntity::full_name` and `full_name`
+       #
+       # Use the Levenshtein algorithm on all the indexed mentities `full_name`.
+       # Warning: may not scale to large indexes.
+       fun find_by_full_name_similarity(name: String, filter: nullable ModelFilter): IndexMatches do
+               var results = new IndexMatches
+               for match in full_name_distances.search(name) do
+                       var dist = match.distance
+                       var mname = match.key
+                       if not full_names.has_key(mname) then continue
+                       var mentity = full_names[mname]
+                       if mentity isa MClassDef or mentity isa MPropDef then continue
+                       if filter != null and not filter.accept_mentity(mentity) then continue
+                       results.add new IndexMatch(mentity, dist)
+               end
+               return results
+       end
+
+       # Rank all mentities by the distance between `name` and both the mentity name and full name
+       fun find_by_similarity(name: String, filter: nullable ModelFilter): IndexMatches do
+               var results = new IndexMatches
+               results.add_all find_by_full_name_similarity(name, filter)
+               results.add_all find_by_name_similarity(name, filter)
+               return results
+       end
+
+       # Find mentities by name trying first by prefix then by similarity
+       fun find_by_name(name: String, filter: nullable ModelFilter): IndexMatches do
+               var results = find_by_name_prefix(name, filter)
+               results.add_all find_by_name_similarity(name, filter)
+               return results
+       end
+
+       # Find mentities by full name trying firt by prefix then by similarity
+       fun find_by_full_name(name: String, filter: nullable ModelFilter): IndexMatches do
+               var results = find_by_full_name_prefix(name)
+               results.add_all find_by_full_name_similarity(name, filter)
+               return results
+       end
+
+       # Find all mentities that matches `name`
+       #
+       # 1. try by name prefix
+       # 2. add full name prefix matches
+       # 3. try similarity by name
+       # 4. try similarity by full_name
+       fun find(name: String, filter: nullable ModelFilter): IndexMatches do
+               var results = find_by_name_prefix(name, filter)
+               results.add_all find_by_full_name_prefix(name, filter)
+               results.add_all find_by_similarity(name, filter)
+               return results
+       end
+end
+
+# An array of IndexMatch instances returned by the ModelIndex
+#
+# Index matches can be sorted, filtered and truncated.
+#
+# Thanks to the fluent interface, the index matches can be manipulated in chain
+# from a model index query:
+#
+# ~~~nitish
+# var res = index.find("Foo").
+#   uniq.
+#      sort(new ScoreComparator, new MEntityComparator).
+#      limit(10).
+#      sort(new VisibilityComparator)
+# ~~~
+class IndexMatches
+       super Array[IndexMatch]
+
+       # Create a new ModelMatches from an array of matches
+       #
+       # Elements are copied.
+       init from_matches(matches: Array[IndexMatch]) do self.add_all matches
+
+       # Sort the matches with `comparator` (or a list of comparators)
+       #
+       # Return a new IndexMatches instance with the sorted results.
+       #
+       # When more than one comparator is given, the comparators are applied in a
+       # pipeline where the `n`th comparator is applied only if the `n-1`th comparator
+       # returned 0.
+       fun sort(comparator: ScoreComparator...): IndexMatches do
+               var res = to_a
+               if comparator.length == 1 then
+                       comparator.first.sort res
+               else
+                       var comparators = new MatchComparators(comparator)
+                       comparators.sort res
+               end
+               return new IndexMatches.from_matches(res)
+       end
+
+       # Limit the matches with `limit`
+       #
+       # Return a new IndexMatches instance with only the `limit` first matches.
+       fun limit(limit: Int): IndexMatches do
+               var res = new Array[IndexMatch]
+               for match in self do
+                       if res.length >= limit then break
+                       res.add match
+               end
+               return new IndexMatches.from_matches(res)
+       end
+
+       # Remove doublons from the matches
+       #
+       # Preverse the lowest score of all the matches for a MEntity.
+       fun uniq: IndexMatches do
+               var scores = new HashMap[MEntity, IndexMatch]
+               var res = new Array[IndexMatch]
+               for match in self do
+                       var mentity = match.mentity
+                       if scores.has_key(mentity) then
+                               var older = scores[mentity]
+                               if match.score < older.score then older.score = match.score
+                       else
+                               scores[mentity] = match
+                               res.add match
+                       end
+               end
+               return new IndexMatches.from_matches(res)
+       end
+
+       # Reset score of each matches to follow `self` order
+       #
+       # Usefull when you need to apply one sorter over another.
+       fun rerank: IndexMatches do
+               var res = new IndexMatches
+               for match in self do
+                       res.add match
+                       match.score = res.length
+               end
+               return res
+       end
+
+       # Aggregate the mentities for all the matches
+       #
+       # Preserve the match order.
+       fun mentities: Array[MEntity] do
+               var res = new Array[MEntity]
+               for match in self do res.add match.mentity
+               return res
+       end
+end
+
+# An MEntity matched from a ModelIndex
+#
+# Each match has a `score`. The score should be seen as the distance of
+# the match from the query. In other words, the lowest is the score, the more
+# relevant is the match.
+class IndexMatch
+       super Comparable
+
+       redef type OTHER: IndexMatch
+
+       # MEntity matches
+       var mentity: MEntity
+
+       # Score allocated by the search method
+       #
+       # A lowest score means a more relevant match.
+       #
+       # Scores values are arbitrary, the meaning of `10` vs `2000` really depends
+       # on the search method producing the match and the comparators used to sort
+       # the matches.
+       # The only universal rule is: low score = relevance.
+       var score: Int is writable
+
+       # By default matches are compared only on their score
+       redef fun <=>(o) do return score <=> o.score
+
+       redef fun to_s do return "{mentity} ({score})"
+end
+
+# Compare two matches by their score
+#
+# Since the score can be seen as a distance, we want the lowest score first.
+class ScoreComparator
+       super Comparator
+
+       redef type COMPARED: IndexMatch
+
+       redef fun compare(o1, o2) do return o1.score <=> o2.score
+end
+
+# A pipeline of comparators executed in inclusion order
+#
+# This class is used internally to agregate the behaviors of multiple comparators.
+# Use `IndexMatches::sort(comparator...)` instead.
+private class MatchComparators
+       super ScoreComparator
+
+       # Comparator to use in the array order
+       var comparators: Array[ScoreComparator]
+
+       # Compare with each comparator
+       #
+       # Return the compare value of the first one to return anything else than 0.
+       redef fun compare(o1, o2) do
+               for comparator in comparators do
+                       var c = comparator.compare(o1, o2)
+                       if c != 0 then return c
+               end
+               return 0
+       end
+end
+
+# Compare two matches by their MEntity kind
+#
+# Usefull to order the mentities by kind in this order:
+# packages, groups, modules and classes, properties.
+class MEntityComparator
+       super ScoreComparator
+
+       # See `MEntity::compare_mentity`
+       redef fun compare(o1, o2) do
+               return o1.mentity.mentity_kind_rank <=> o2.mentity.mentity_kind_rank
+       end
+end
+
+# Compare two matches by their MEntity visibility
+#
+# We reverse the original order so private is at the end of the list.
+class VisibilityComparator
+       super ScoreComparator
+
+       redef fun compare(o1, o2) do return o2.mentity.visibility <=> o1.mentity.visibility
+end
+
+# Compare two matches by their name in lexicographic order
+#
+# Generally, for a same score, we want to put A before Z.
+class NameComparator
+       super ScoreComparator
+
+       redef fun compare(o1, o2) do return o1.mentity.name <=> o2.mentity.name
+end
+
+# Compare two matches by their name length
+class NameLengthComparator
+       super ScoreComparator
+
+       redef fun compare(o1, o2) do return o1.mentity.name.length <=> o2.mentity.name.length
+end
+
+# Compare two matches by their full_name in lexicographic order
+#
+# Generally, for a same score, we want to put A before Z.
+class FullNameComparator
+       super ScoreComparator
+
+       redef fun compare(o1, o2) do return o1.mentity.full_name <=> o2.mentity.full_name
+end
+
+# Compare two matches by their full name length
+class FullNameLengthComparator
+       super ScoreComparator
+
+       redef fun compare(o1, o2) do return o1.mentity.full_name.length <=> o2.mentity.full_name.length
+end
+
+redef class MEntity
+
+       # Compare MEntity class kind
+       #
+       # Unknown kind have a virtually high score.
+       private fun mentity_kind_rank: Int do return 10
+end
+
+redef class MPackage
+       redef fun mentity_kind_rank do return 1
+end
+
+redef class MGroup
+       redef fun mentity_kind_rank do return 2
+end
+
+redef class MModule
+       redef fun mentity_kind_rank do return 3
+end
+
+redef class MClass
+       redef fun mentity_kind_rank do return 4
+end
+
+redef class MClassDef
+       redef fun mentity_kind_rank do return 5
+end
+
+redef class MProperty
+       redef fun mentity_kind_rank do return 6
+end
+
+redef class MPropDef
+       redef fun mentity_kind_rank do return 7
+end