# This file is part of NIT ( http://www.nitlanguage.org ). # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Search things from the Model # # ModelIndex allows you to index mentities then retrieve them by their `name` # or `full_name`. # It offers a set of `find_*` methods that can be used to match queries # against entities name. # # Because each use is different, ModelIndex only provide base raw search services. # All of them return IndexMatches that can be ordered and filtered by the client. # # ## Indexing mentities # # Before searching something from the ModelIndex, you have to index it. # Use the `ModelIndex::index` method to do that: # # ~~~nitish # var index = new ModelIndex # for mentity in model.collect_mentities do # index.index(mentity) # end # ~~~ # # ## Search mentities # # You can then run queries on the ModelIndex: # # ~~~nitish # for res in index.find("Array").limit(10) do # print res # end # ~~~ # # ## Examples # # Here some examples of how you can use the ModelIndex. # # ### Smart type prediction # # Use ModelIndex to implement a smart prediction system based on the typed prefix: # # ~~~nitish # var index = new ModelIndex # for mentity in model.collect_mentities do # # We don't really care about definitions # if mentity isa MClassDef or mentity isa MPropDef then continue # index.index(mentity) # end # # var typed_prefix = "Arr" # for res in index.find_by_name_prefix(typed_prefix). # uniq. # keep only the best ranked mentity # limit(5). # limit to ten results # sort(new VisibilityComparator, new NameComparator) do # order by visibility then name # print res # end # ~~~ # # Will output something like: # # ~~~raw # Array (1) # ArraySet (2) # ArrayMap (3) # ArrayCmp (4) # ArrayMapKeys (5) # ~~~ # # ### Method autocompletion # # Find methods from a class full_name: # # ~~~nitish # var class_full_name = "core::Array" # for res in index.find_by_full_name_prefix("{class_full_name}::"). # uniq. # keep only the best ranked mentity # sort(new VisibilityComparator). # put private in the bottom of the list # limit(5). # limit to ten results # sort(new FullNameComparator) do # order by lexicographic order # print res # end # ~~~ # # Will output something like: # # ~~~raw # * (2) # + (1) # filled_with (5) # from (3) # with_items (4) # ~~~ # # ### Name typo detection and suggestion # # Detect unknown names and suggest corrections: # # ~~~nitish # var name = "Zrray" # # if index.find_by_name_prefix(name).is_empty then # printn "`{name}` not found, did you mean: " # printn index.find_by_name_similarity(name).sort(new ScoreComparator).limit(2).join(" or ") # print "?" # end # ~~~ # # Will output something like: # # ~~~raw # `Zrray` not found, did you mean: Array (1) or array (1)? # ~~~ module model_index import model::model_collect import trees::trie import trees::bktree redef class Model # Keep a direct link to mentities by full name to speed up `mentity_from_uri` var mentities_by_full_name: HashMap[String, MEntity] is lazy do var mentities_by_full_name = new HashMap[String, MEntity] for mentity in collect_mentities do mentities_by_full_name[mentity.full_name] = mentity end return mentities_by_full_name end # ModelIndex used to perform searches var index: ModelIndex is lazy, writable do var index = new ModelIndex for mentity in collect_mentities do if mentity isa MClassDef or mentity isa MPropDef then continue index.index mentity end return index end redef fun mentities_by_name(name, filter) do var res = new Array[MEntity] if not index.names.has_key(name) then return res for mentity in index.names[name] do if filter == null or filter.accept_mentity(mentity) then res.add mentity end return res end redef fun mentity_by_full_name(full_name, filter) do if mentities_by_full_name.has_key(full_name) then var mentity = mentities_by_full_name[full_name] if filter == null or filter.accept_mentity(mentity) then return mentity end return null end private var score_sorter = new ScoreComparator private var vis_sorter = new VisibilityComparator private var kind_sorter = new MEntityComparator private var name_sorter = new NameComparator private var lname_sorter = new NameLengthComparator private var fname_sorter = new FullNameComparator private var lfname_sorter = new FullNameLengthComparator # Search mentities based on a `query` string # # Lookup the index for anything matching `query` and return `limit` results. # # The algorithm used is the following: # 1- lookup by name prefix # 2- lookup by full_name prefix # 3- loopup by levenshtein distance # # At each step if the `limit` is reached, the algorithm stops and returns the results. fun find(query: String, limit: nullable Int, filter: nullable ModelFilter): Array[MEntity] do # Find, lookup by name prefix var matches = index.find_by_name_prefix(query, filter).uniq. sort(lname_sorter, name_sorter, kind_sorter) if limit != null and matches.length >= limit then return matches.limit(limit).rerank.sort(vis_sorter, score_sorter).mentities end matches = matches.rerank.sort(vis_sorter, score_sorter) # If limit not reached, lookup by full_name prefix var malus = matches.length var full_matches = new IndexMatches for match in index.find_by_full_name_prefix(query, filter). sort(kind_sorter, lfname_sorter, fname_sorter) do match.score += malus full_matches.add match end matches = matches.uniq if limit != null and matches.length + full_matches.length >= limit then matches.add_all full_matches matches = matches.uniq.limit(limit).rerank.sort(vis_sorter, score_sorter) return matches.mentities end # If limit not reached, lookup by similarity malus = matches.length var sim_matches = new IndexMatches for match in index.find_by_similarity(query, filter).sort(score_sorter, kind_sorter, lname_sorter, name_sorter) do match.score += malus sim_matches.add match end matches.add_all sim_matches matches = matches.uniq if limit != null then matches = matches.limit(limit) return matches.rerank.sort(vis_sorter, score_sorter).mentities end end # ModelIndex indexes mentities by their name and full name # # It provides methods to find mentities based on a prefix or string similarity. # # ~~~nitish # # Build index # var index = new ModelIndex # for mentity in model.collect_mentities do # if mentity isa MClassDef or mentity isa MPropDef then continue # index.index(mentity) # end # # for e in index.find("Foo").uniq.sort(new ScoreComparator).limit(10) do # print " * {e.score}: {e.mentity.name} ({e.mentity.full_name})" # end # ~~~ class ModelIndex # List of all indexed mentities. # # Faster than traversing the tries. var mentities = new Array[MEntity] # Map of all mentities indexed by their `name` var names = new HashMap[String, Array[MEntity]] # Prefix tree for mentities `name` # # Because multiple mentities can share the same `name`, we use a Trie of # arrays of mentities. # # As for now, we do not index class and property definitions. # TODO add an option. var name_prefixes = new Trie[Array[MEntity]] # Distance tree for mentities `name` var name_distances = new BKTree # Map of all mentities indexed by their `full_name` var full_names = new HashMap[String, MEntity] # Prefix tree for mentities `full_name` # # Even if two mentities cannot share the same `full_name`, we use a Trie of # arrays of mentities to be consistent with `name_prefixes`. var full_name_prefixes = new Trie[Array[MEntity]] # Distance tree for mentities `full_name` var full_name_distances = new BKTree # Index `mentity` by it's `MEntity::name` # # See `name_prefixes`. private fun index_by_name(mentity: MEntity) do var name = mentity.name # Index name if not names.has_key(name) then names[name] = new Array[MEntity] end names[name].add mentity # Index prefix if not name_prefixes.has_key(name) then name_prefixes[name] = new Array[MEntity] end name_prefixes[name].add mentity # Index distance name_distances.add(name) end # Index `mentity` by its `MEntity::full_name` private fun index_by_full_name(mentity: MEntity) do var name = mentity.full_name # Index full name full_names[name] = mentity # Index prefix if not full_name_prefixes.has_key(name) then full_name_prefixes[name] = new Array[MEntity] end full_name_prefixes[name].add mentity # Index distance full_name_distances.add(name) end # Index `mentity` so it can be retrieved by a find query # # MEntities are indexed by both name and full_name. fun index(mentity: MEntity) do mentities.add mentity index_by_name mentity index_by_full_name mentity end # Translate Trie results to `SearchResult` # # This method is used internally to translate each mentity returned by a prefix # match in a Trie into a SearchResult that can be ranked by score. # # Results from the Trie are returned in a breadth first manner so we get the # matches ordered by prefix. # We preserve that order by giving an incremental score to the `array` items. private fun score_results_incremental(array: Array[Array[MEntity]], filter: nullable ModelFilter): IndexMatches do var results = new IndexMatches var score = 1 for mentities in array do for mentity in mentities do if filter != null and not filter.accept_mentity(mentity) then continue results.add new IndexMatch(mentity, score) end score += 1 end return results end # Find all mentities where `MEntity::name` matches the `prefix` fun find_by_name_prefix(prefix: String, filter: nullable ModelFilter): IndexMatches do return score_results_incremental(name_prefixes.find_by_prefix(prefix), filter) end # Find all mentities where `MEntity::full_name` matches the `prefix` fun find_by_full_name_prefix(prefix: String, filter: nullable ModelFilter): IndexMatches do return score_results_incremental(full_name_prefixes.find_by_prefix(prefix), filter) end # Rank all mentities by the distance between `MEntity::name` and `name` # # Use the Levenshtein algorithm on all the indexed mentities `name`. # Warning: may not scale to large indexes. fun find_by_name_similarity(name: String, filter: nullable ModelFilter): IndexMatches do var results = new IndexMatches for match in name_distances.search(name) do var dist = match.distance var mname = match.key if not names.has_key(mname) then continue for mentity in names[mname] do if mentity isa MClassDef or mentity isa MPropDef then continue if filter != null and not filter.accept_mentity(mentity) then continue results.add new IndexMatch(mentity, dist) end end return results end # Rank all mentities by the distance between `MEntity::full_name` and `full_name` # # Use the Levenshtein algorithm on all the indexed mentities `full_name`. # Warning: may not scale to large indexes. fun find_by_full_name_similarity(name: String, filter: nullable ModelFilter): IndexMatches do var results = new IndexMatches for match in full_name_distances.search(name) do var dist = match.distance var mname = match.key if not full_names.has_key(mname) then continue var mentity = full_names[mname] if mentity isa MClassDef or mentity isa MPropDef then continue if filter != null and not filter.accept_mentity(mentity) then continue results.add new IndexMatch(mentity, dist) end return results end # Rank all mentities by the distance between `name` and both the mentity name and full name fun find_by_similarity(name: String, filter: nullable ModelFilter): IndexMatches do var results = new IndexMatches results.add_all find_by_full_name_similarity(name, filter) results.add_all find_by_name_similarity(name, filter) return results end # Find mentities by name trying first by prefix then by similarity fun find_by_name(name: String, filter: nullable ModelFilter): IndexMatches do var results = find_by_name_prefix(name, filter) results.add_all find_by_name_similarity(name, filter) return results end # Find mentities by full name trying firt by prefix then by similarity fun find_by_full_name(name: String, filter: nullable ModelFilter): IndexMatches do var results = find_by_full_name_prefix(name) results.add_all find_by_full_name_similarity(name, filter) return results end # Find all mentities that matches `name` # # 1. try by name prefix # 2. add full name prefix matches # 3. try similarity by name # 4. try similarity by full_name fun find(name: String, filter: nullable ModelFilter): IndexMatches do var results = find_by_name_prefix(name, filter) results.add_all find_by_full_name_prefix(name, filter) results.add_all find_by_similarity(name, filter) return results end end # An array of IndexMatch instances returned by the ModelIndex # # Index matches can be sorted, filtered and truncated. # # Thanks to the fluent interface, the index matches can be manipulated in chain # from a model index query: # # ~~~nitish # var res = index.find("Foo"). # uniq. # sort(new ScoreComparator, new MEntityComparator). # limit(10). # sort(new VisibilityComparator) # ~~~ class IndexMatches super Array[IndexMatch] # Create a new ModelMatches from an array of matches # # Elements are copied. init from_matches(matches: Array[IndexMatch]) do self.add_all matches # Sort the matches with `comparator` (or a list of comparators) # # Return a new IndexMatches instance with the sorted results. # # When more than one comparator is given, the comparators are applied in a # pipeline where the `n`th comparator is applied only if the `n-1`th comparator # returned 0. fun sort(comparator: ScoreComparator...): IndexMatches do var res = to_a if comparator.length == 1 then comparator.first.sort res else var comparators = new MatchComparators(comparator) comparators.sort res end return new IndexMatches.from_matches(res) end # Limit the matches with `limit` # # Return a new IndexMatches instance with only the `limit` first matches. fun limit(limit: Int): IndexMatches do var res = new Array[IndexMatch] for match in self do if res.length >= limit then break res.add match end return new IndexMatches.from_matches(res) end # Remove doublons from the matches # # Preverse the lowest score of all the matches for a MEntity. fun uniq: IndexMatches do var scores = new HashMap[MEntity, IndexMatch] var res = new Array[IndexMatch] for match in self do var mentity = match.mentity if scores.has_key(mentity) then var older = scores[mentity] if match.score < older.score then older.score = match.score else scores[mentity] = match res.add match end end return new IndexMatches.from_matches(res) end # Reset score of each matches to follow `self` order # # Usefull when you need to apply one sorter over another. fun rerank: IndexMatches do var res = new IndexMatches for match in self do res.add match match.score = res.length end return res end # Aggregate the mentities for all the matches # # Preserve the match order. fun mentities: Array[MEntity] do var res = new Array[MEntity] for match in self do res.add match.mentity return res end end # An MEntity matched from a ModelIndex # # Each match has a `score`. The score should be seen as the distance of # the match from the query. In other words, the lowest is the score, the more # relevant is the match. class IndexMatch super Comparable redef type OTHER: IndexMatch # MEntity matches var mentity: MEntity # Score allocated by the search method # # A lowest score means a more relevant match. # # Scores values are arbitrary, the meaning of `10` vs `2000` really depends # on the search method producing the match and the comparators used to sort # the matches. # The only universal rule is: low score = relevance. var score: Int is writable # By default matches are compared only on their score redef fun <=>(o) do return score <=> o.score redef fun to_s do return "{mentity} ({score})" end # Compare two matches by their score # # Since the score can be seen as a distance, we want the lowest score first. class ScoreComparator super Comparator redef type COMPARED: IndexMatch redef fun compare(o1, o2) do return o1.score <=> o2.score end # A pipeline of comparators executed in inclusion order # # This class is used internally to agregate the behaviors of multiple comparators. # Use `IndexMatches::sort(comparator...)` instead. private class MatchComparators super ScoreComparator # Comparator to use in the array order var comparators: Array[ScoreComparator] # Compare with each comparator # # Return the compare value of the first one to return anything else than 0. redef fun compare(o1, o2) do for comparator in comparators do var c = comparator.compare(o1, o2) if c != 0 then return c end return 0 end end # Compare two matches by their MEntity kind # # Usefull to order the mentities by kind in this order: # packages, groups, modules and classes, properties. class MEntityComparator super ScoreComparator # See `MEntity::compare_mentity` redef fun compare(o1, o2) do return o1.mentity.mentity_kind_rank <=> o2.mentity.mentity_kind_rank end end # Compare two matches by their MEntity visibility # # We reverse the original order so private is at the end of the list. class VisibilityComparator super ScoreComparator redef fun compare(o1, o2) do return o2.mentity.visibility <=> o1.mentity.visibility end # Compare two matches by their name in lexicographic order # # Generally, for a same score, we want to put A before Z. class NameComparator super ScoreComparator redef fun compare(o1, o2) do return o1.mentity.name <=> o2.mentity.name end # Compare two matches by their name length class NameLengthComparator super ScoreComparator redef fun compare(o1, o2) do return o1.mentity.name.length <=> o2.mentity.name.length end # Compare two matches by their full_name in lexicographic order # # Generally, for a same score, we want to put A before Z. class FullNameComparator super ScoreComparator redef fun compare(o1, o2) do return o1.mentity.full_name <=> o2.mentity.full_name end # Compare two matches by their full name length class FullNameLengthComparator super ScoreComparator redef fun compare(o1, o2) do return o1.mentity.full_name.length <=> o2.mentity.full_name.length end redef class MEntity # Compare MEntity class kind # # Unknown kind have a virtually high score. private fun mentity_kind_rank: Int do return 10 end redef class MPackage redef fun mentity_kind_rank do return 1 end redef class MGroup redef fun mentity_kind_rank do return 2 end redef class MModule redef fun mentity_kind_rank do return 3 end redef class MClass redef fun mentity_kind_rank do return 4 end redef class MClassDef redef fun mentity_kind_rank do return 5 end redef class MProperty redef fun mentity_kind_rank do return 6 end redef class MPropDef redef fun mentity_kind_rank do return 7 end