X-Git-Url: http://nitlanguage.org diff --git a/src/model/model_index.nit b/src/model/model_index.nit index f8cfb24..f0c12fa 100644 --- a/src/model/model_index.nit +++ b/src/model/model_index.nit @@ -29,8 +29,7 @@ # # ~~~nitish # var index = new ModelIndex -# var view = new ModelView(model, mainmodule) -# for mentity in view.mentities do +# for mentity in model.collect_mentities do # index.index(mentity) # end # ~~~ @@ -55,8 +54,7 @@ # # ~~~nitish # var index = new ModelIndex -# var view = new ModelView(model, mainmodule) -# for mentity in view.mentities do +# for mentity in model.collect_mentities do # # We don't really care about definitions # if mentity isa MClassDef or mentity isa MPropDef then continue # index.index(mentity) @@ -127,40 +125,45 @@ # ~~~ module model_index -import model::model_views +import model::model_collect + import trees::trie +import trees::bktree -redef class ModelView +redef class Model # Keep a direct link to mentities by full name to speed up `mentity_from_uri` var mentities_by_full_name: HashMap[String, MEntity] is lazy do var mentities_by_full_name = new HashMap[String, MEntity] - for mentity in mentities do + for mentity in collect_mentities do mentities_by_full_name[mentity.full_name] = mentity end return mentities_by_full_name end # ModelIndex used to perform searches - var index: ModelIndex is lazy do + var index: ModelIndex is lazy, writable do var index = new ModelIndex - for mentity in mentities do + for mentity in collect_mentities do if mentity isa MClassDef or mentity isa MPropDef then continue index.index mentity end return index end - redef fun mentities_by_name(name) do - if index.name_prefixes.has_key(name) then - return index.name_prefixes[name] + redef fun mentities_by_name(name, filter) do + var res = new Array[MEntity] + if not index.names.has_key(name) then return res + for mentity in index.names[name] do + if filter == null or filter.accept_mentity(mentity) then res.add mentity end - return new Array[MEntity] + return res end - redef fun mentity_by_full_name(full_name) do + redef fun mentity_by_full_name(full_name, filter) do if mentities_by_full_name.has_key(full_name) then - return mentities_by_full_name[full_name] + var mentity = mentities_by_full_name[full_name] + if filter == null or filter.accept_mentity(mentity) then return mentity end return null end @@ -175,7 +178,7 @@ redef class ModelView # Search mentities based on a `query` string # - # Lookup the view index for anything matching `query` and return `limit` results. + # Lookup the index for anything matching `query` and return `limit` results. # # The algorithm used is the following: # 1- lookup by name prefix @@ -183,9 +186,9 @@ redef class ModelView # 3- loopup by levenshtein distance # # At each step if the `limit` is reached, the algorithm stops and returns the results. - fun find(query: String, limit: nullable Int): Array[MEntity] do + fun find(query: String, limit: nullable Int, filter: nullable ModelFilter): Array[MEntity] do # Find, lookup by name prefix - var matches = index.find_by_name_prefix(query).uniq. + var matches = index.find_by_name_prefix(query, filter).uniq. sort(lname_sorter, name_sorter, kind_sorter) if limit != null and matches.length >= limit then return matches.limit(limit).rerank.sort(vis_sorter, score_sorter).mentities @@ -195,7 +198,7 @@ redef class ModelView # If limit not reached, lookup by full_name prefix var malus = matches.length var full_matches = new IndexMatches - for match in index.find_by_full_name_prefix(query). + for match in index.find_by_full_name_prefix(query, filter). sort(kind_sorter, lfname_sorter, fname_sorter) do match.score += malus full_matches.add match @@ -210,7 +213,7 @@ redef class ModelView # If limit not reached, lookup by similarity malus = matches.length var sim_matches = new IndexMatches - for match in index.find_by_similarity(query).sort(score_sorter, kind_sorter, lname_sorter, name_sorter) do + for match in index.find_by_similarity(query, filter).sort(score_sorter, kind_sorter, lname_sorter, name_sorter) do match.score += malus sim_matches.add match end @@ -228,8 +231,7 @@ end # ~~~nitish # # Build index # var index = new ModelIndex -# var view = new ModelView(model, mainmodule) -# for mentity in view.mentities do +# for mentity in model.collect_mentities do # if mentity isa MClassDef or mentity isa MPropDef then continue # index.index(mentity) # end @@ -245,6 +247,9 @@ class ModelIndex # Faster than traversing the tries. var mentities = new Array[MEntity] + # Map of all mentities indexed by their `name` + var names = new HashMap[String, Array[MEntity]] + # Prefix tree for mentities `name` # # Because multiple mentities can share the same `name`, we use a Trie of @@ -254,30 +259,58 @@ class ModelIndex # TODO add an option. var name_prefixes = new Trie[Array[MEntity]] + # Distance tree for mentities `name` + var name_distances = new BKTree + + # Map of all mentities indexed by their `full_name` + var full_names = new HashMap[String, MEntity] + # Prefix tree for mentities `full_name` # # Even if two mentities cannot share the same `full_name`, we use a Trie of # arrays of mentities to be consistent with `name_prefixes`. var full_name_prefixes = new Trie[Array[MEntity]] + # Distance tree for mentities `full_name` + var full_name_distances = new BKTree + # Index `mentity` by it's `MEntity::name` # # See `name_prefixes`. private fun index_by_name(mentity: MEntity) do var name = mentity.name + + # Index name + if not names.has_key(name) then + names[name] = new Array[MEntity] + end + names[name].add mentity + + # Index prefix if not name_prefixes.has_key(name) then name_prefixes[name] = new Array[MEntity] end name_prefixes[name].add mentity + + # Index distance + name_distances.add(name) end # Index `mentity` by its `MEntity::full_name` private fun index_by_full_name(mentity: MEntity) do var name = mentity.full_name + + # Index full name + full_names[name] = mentity + + # Index prefix if not full_name_prefixes.has_key(name) then full_name_prefixes[name] = new Array[MEntity] end full_name_prefixes[name].add mentity + + # Index distance + full_name_distances.add(name) end # Index `mentity` so it can be retrieved by a find query @@ -297,11 +330,12 @@ class ModelIndex # Results from the Trie are returned in a breadth first manner so we get the # matches ordered by prefix. # We preserve that order by giving an incremental score to the `array` items. - private fun score_results_incremental(array: Array[Array[MEntity]]): IndexMatches do + private fun score_results_incremental(array: Array[Array[MEntity]], filter: nullable ModelFilter): IndexMatches do var results = new IndexMatches var score = 1 for mentities in array do for mentity in mentities do + if filter != null and not filter.accept_mentity(mentity) then continue results.add new IndexMatch(mentity, score) end score += 1 @@ -310,24 +344,30 @@ class ModelIndex end # Find all mentities where `MEntity::name` matches the `prefix` - fun find_by_name_prefix(prefix: String): IndexMatches do - return score_results_incremental(name_prefixes.find_by_prefix(prefix)) + fun find_by_name_prefix(prefix: String, filter: nullable ModelFilter): IndexMatches do + return score_results_incremental(name_prefixes.find_by_prefix(prefix), filter) end # Find all mentities where `MEntity::full_name` matches the `prefix` - fun find_by_full_name_prefix(prefix: String): IndexMatches do - return score_results_incremental(full_name_prefixes.find_by_prefix(prefix)) + fun find_by_full_name_prefix(prefix: String, filter: nullable ModelFilter): IndexMatches do + return score_results_incremental(full_name_prefixes.find_by_prefix(prefix), filter) end # Rank all mentities by the distance between `MEntity::name` and `name` # # Use the Levenshtein algorithm on all the indexed mentities `name`. # Warning: may not scale to large indexes. - fun find_by_name_similarity(name: String): IndexMatches do + fun find_by_name_similarity(name: String, filter: nullable ModelFilter): IndexMatches do var results = new IndexMatches - for mentity in mentities do - if mentity isa MClassDef or mentity isa MPropDef then continue - results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name)) + for match in name_distances.search(name) do + var dist = match.distance + var mname = match.key + if not names.has_key(mname) then continue + for mentity in names[mname] do + if mentity isa MClassDef or mentity isa MPropDef then continue + if filter != null and not filter.accept_mentity(mentity) then continue + results.add new IndexMatch(mentity, dist) + end end return results end @@ -336,43 +376,39 @@ class ModelIndex # # Use the Levenshtein algorithm on all the indexed mentities `full_name`. # Warning: may not scale to large indexes. - fun find_by_full_name_similarity(name: String): IndexMatches do + fun find_by_full_name_similarity(name: String, filter: nullable ModelFilter): IndexMatches do var results = new IndexMatches - for mentity in mentities do + for match in full_name_distances.search(name) do + var dist = match.distance + var mname = match.key + if not full_names.has_key(mname) then continue + var mentity = full_names[mname] if mentity isa MClassDef or mentity isa MPropDef then continue - results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name)) + if filter != null and not filter.accept_mentity(mentity) then continue + results.add new IndexMatch(mentity, dist) end return results end # Rank all mentities by the distance between `name` and both the mentity name and full name - fun find_by_similarity(name: String): IndexMatches do + fun find_by_similarity(name: String, filter: nullable ModelFilter): IndexMatches do var results = new IndexMatches - for mentity in mentities do - if mentity isa MClassDef or mentity isa MPropDef then continue - results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name)) - results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name)) - end + results.add_all find_by_full_name_similarity(name, filter) + results.add_all find_by_name_similarity(name, filter) return results end # Find mentities by name trying first by prefix then by similarity - fun find_by_name(name: String): IndexMatches do - var results = find_by_name_prefix(name) - for mentity in mentities do - if mentity isa MClassDef or mentity isa MPropDef then continue - results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name)) - end + fun find_by_name(name: String, filter: nullable ModelFilter): IndexMatches do + var results = find_by_name_prefix(name, filter) + results.add_all find_by_name_similarity(name, filter) return results end # Find mentities by full name trying firt by prefix then by similarity - fun find_by_full_name(name: String): IndexMatches do + fun find_by_full_name(name: String, filter: nullable ModelFilter): IndexMatches do var results = find_by_full_name_prefix(name) - for mentity in mentities do - if mentity isa MClassDef or mentity isa MPropDef then continue - results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name)) - end + results.add_all find_by_full_name_similarity(name, filter) return results end @@ -382,18 +418,10 @@ class ModelIndex # 2. add full name prefix matches # 3. try similarity by name # 4. try similarity by full_name - fun find(name: String): IndexMatches do - var results = find_by_name_prefix(name) - - for result in find_by_full_name_prefix(name) do - results.add result - end - - for mentity in mentities do - if mentity isa MClassDef or mentity isa MPropDef then continue - results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name)) - results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name)) - end + fun find(name: String, filter: nullable ModelFilter): IndexMatches do + var results = find_by_name_prefix(name, filter) + results.add_all find_by_full_name_prefix(name, filter) + results.add_all find_by_similarity(name, filter) return results end end