#
# ~~~nitish
# var index = new ModelIndex
-#
-# for mentity in model.private_view.mentities do
+# for mentity in model.collect_mentities do
# index.index(mentity)
# end
# ~~~
#
# ~~~nitish
# var index = new ModelIndex
-#
-# for mentity in model.private_view.mentities do
+# for mentity in model.collect_mentities do
# # We don't really care about definitions
# if mentity isa MClassDef or mentity isa MPropDef then continue
# index.index(mentity)
# ~~~
module model_index
-import model::model_views
+import model::model_collect
+
import trees::trie
+import trees::bktree
+
+redef class Model
+
+ # Keep a direct link to mentities by full name to speed up `mentity_from_uri`
+ var mentities_by_full_name: HashMap[String, MEntity] is lazy do
+ var mentities_by_full_name = new HashMap[String, MEntity]
+ for mentity in collect_mentities do
+ mentities_by_full_name[mentity.full_name] = mentity
+ end
+ return mentities_by_full_name
+ end
+
+ # ModelIndex used to perform searches
+ var index: ModelIndex is lazy, writable do
+ var index = new ModelIndex
+ for mentity in collect_mentities do
+ if mentity isa MClassDef or mentity isa MPropDef then continue
+ index.index mentity
+ end
+ return index
+ end
+
+ redef fun mentities_by_name(name, filter) do
+ var res = new Array[MEntity]
+ if not index.names.has_key(name) then return res
+ for mentity in index.names[name] do
+ if filter == null or filter.accept_mentity(mentity) then res.add mentity
+ end
+ return res
+ end
+
+ redef fun mentity_by_full_name(full_name, filter) do
+ if mentities_by_full_name.has_key(full_name) then
+ var mentity = mentities_by_full_name[full_name]
+ if filter == null or filter.accept_mentity(mentity) then return mentity
+ end
+ return null
+ end
+
+ private var score_sorter = new ScoreComparator
+ private var vis_sorter = new VisibilityComparator
+ private var kind_sorter = new MEntityComparator
+ private var name_sorter = new NameComparator
+ private var lname_sorter = new NameLengthComparator
+ private var fname_sorter = new FullNameComparator
+ private var lfname_sorter = new FullNameLengthComparator
+
+ # Search mentities based on a `query` string
+ #
+ # Lookup the index for anything matching `query` and return `limit` results.
+ #
+ # The algorithm used is the following:
+ # 1- lookup by name prefix
+ # 2- lookup by full_name prefix
+ # 3- loopup by levenshtein distance
+ #
+ # At each step if the `limit` is reached, the algorithm stops and returns the results.
+ fun find(query: String, limit: nullable Int, filter: nullable ModelFilter): Array[MEntity] do
+ # Find, lookup by name prefix
+ var matches = index.find_by_name_prefix(query, filter).uniq.
+ sort(lname_sorter, name_sorter, kind_sorter)
+ if limit != null and matches.length >= limit then
+ return matches.limit(limit).rerank.sort(vis_sorter, score_sorter).mentities
+ end
+ matches = matches.rerank.sort(vis_sorter, score_sorter)
+
+ # If limit not reached, lookup by full_name prefix
+ var malus = matches.length
+ var full_matches = new IndexMatches
+ for match in index.find_by_full_name_prefix(query, filter).
+ sort(kind_sorter, lfname_sorter, fname_sorter) do
+ match.score += malus
+ full_matches.add match
+ end
+ matches = matches.uniq
+ if limit != null and matches.length + full_matches.length >= limit then
+ matches.add_all full_matches
+ matches = matches.uniq.limit(limit).rerank.sort(vis_sorter, score_sorter)
+ return matches.mentities
+ end
+
+ # If limit not reached, lookup by similarity
+ malus = matches.length
+ var sim_matches = new IndexMatches
+ for match in index.find_by_similarity(query, filter).sort(score_sorter, kind_sorter, lname_sorter, name_sorter) do
+ match.score += malus
+ sim_matches.add match
+ end
+ matches.add_all sim_matches
+ matches = matches.uniq
+ if limit != null then matches = matches.limit(limit)
+ return matches.rerank.sort(vis_sorter, score_sorter).mentities
+ end
+end
# ModelIndex indexes mentities by their name and full name
#
# ~~~nitish
# # Build index
# var index = new ModelIndex
-# for mentity in model.private_view.mentities do
+# for mentity in model.collect_mentities do
# if mentity isa MClassDef or mentity isa MPropDef then continue
# index.index(mentity)
# end
# Faster than traversing the tries.
var mentities = new Array[MEntity]
+ # Map of all mentities indexed by their `name`
+ var names = new HashMap[String, Array[MEntity]]
+
# Prefix tree for mentities `name`
#
# Because multiple mentities can share the same `name`, we use a Trie of
# TODO add an option.
var name_prefixes = new Trie[Array[MEntity]]
+ # Distance tree for mentities `name`
+ var name_distances = new BKTree
+
+ # Map of all mentities indexed by their `full_name`
+ var full_names = new HashMap[String, MEntity]
+
# Prefix tree for mentities `full_name`
#
# Even if two mentities cannot share the same `full_name`, we use a Trie of
# arrays of mentities to be consistent with `name_prefixes`.
var full_name_prefixes = new Trie[Array[MEntity]]
+ # Distance tree for mentities `full_name`
+ var full_name_distances = new BKTree
+
# Index `mentity` by it's `MEntity::name`
#
# See `name_prefixes`.
private fun index_by_name(mentity: MEntity) do
var name = mentity.name
+
+ # Index name
+ if not names.has_key(name) then
+ names[name] = new Array[MEntity]
+ end
+ names[name].add mentity
+
+ # Index prefix
if not name_prefixes.has_key(name) then
name_prefixes[name] = new Array[MEntity]
end
name_prefixes[name].add mentity
+
+ # Index distance
+ name_distances.add(name)
end
# Index `mentity` by its `MEntity::full_name`
private fun index_by_full_name(mentity: MEntity) do
var name = mentity.full_name
+
+ # Index full name
+ full_names[name] = mentity
+
+ # Index prefix
if not full_name_prefixes.has_key(name) then
full_name_prefixes[name] = new Array[MEntity]
end
full_name_prefixes[name].add mentity
+
+ # Index distance
+ full_name_distances.add(name)
end
# Index `mentity` so it can be retrieved by a find query
# Results from the Trie are returned in a breadth first manner so we get the
# matches ordered by prefix.
# We preserve that order by giving an incremental score to the `array` items.
- private fun score_results_incremental(array: Array[Array[MEntity]]): IndexMatches do
+ private fun score_results_incremental(array: Array[Array[MEntity]], filter: nullable ModelFilter): IndexMatches do
var results = new IndexMatches
var score = 1
for mentities in array do
for mentity in mentities do
+ if filter != null and not filter.accept_mentity(mentity) then continue
results.add new IndexMatch(mentity, score)
end
score += 1
end
# Find all mentities where `MEntity::name` matches the `prefix`
- fun find_by_name_prefix(prefix: String): IndexMatches do
- return score_results_incremental(name_prefixes.find_by_prefix(prefix))
+ fun find_by_name_prefix(prefix: String, filter: nullable ModelFilter): IndexMatches do
+ return score_results_incremental(name_prefixes.find_by_prefix(prefix), filter)
end
# Find all mentities where `MEntity::full_name` matches the `prefix`
- fun find_by_full_name_prefix(prefix: String): IndexMatches do
- return score_results_incremental(full_name_prefixes.find_by_prefix(prefix))
+ fun find_by_full_name_prefix(prefix: String, filter: nullable ModelFilter): IndexMatches do
+ return score_results_incremental(full_name_prefixes.find_by_prefix(prefix), filter)
end
# Rank all mentities by the distance between `MEntity::name` and `name`
#
# Use the Levenshtein algorithm on all the indexed mentities `name`.
# Warning: may not scale to large indexes.
- fun find_by_name_similarity(name: String): IndexMatches do
+ fun find_by_name_similarity(name: String, filter: nullable ModelFilter): IndexMatches do
var results = new IndexMatches
- for mentity in mentities do
- if mentity isa MClassDef or mentity isa MPropDef then continue
- results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
+ for match in name_distances.search(name) do
+ var dist = match.distance
+ var mname = match.key
+ if not names.has_key(mname) then continue
+ for mentity in names[mname] do
+ if mentity isa MClassDef or mentity isa MPropDef then continue
+ if filter != null and not filter.accept_mentity(mentity) then continue
+ results.add new IndexMatch(mentity, dist)
+ end
end
return results
end
#
# Use the Levenshtein algorithm on all the indexed mentities `full_name`.
# Warning: may not scale to large indexes.
- fun find_by_full_name_similarity(name: String): IndexMatches do
+ fun find_by_full_name_similarity(name: String, filter: nullable ModelFilter): IndexMatches do
var results = new IndexMatches
- for mentity in mentities do
+ for match in full_name_distances.search(name) do
+ var dist = match.distance
+ var mname = match.key
+ if not full_names.has_key(mname) then continue
+ var mentity = full_names[mname]
if mentity isa MClassDef or mentity isa MPropDef then continue
- results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
+ if filter != null and not filter.accept_mentity(mentity) then continue
+ results.add new IndexMatch(mentity, dist)
end
return results
end
# Rank all mentities by the distance between `name` and both the mentity name and full name
- fun find_by_similarity(name: String): IndexMatches do
+ fun find_by_similarity(name: String, filter: nullable ModelFilter): IndexMatches do
var results = new IndexMatches
- for mentity in mentities do
- if mentity isa MClassDef or mentity isa MPropDef then continue
- results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
- results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
- end
+ results.add_all find_by_full_name_similarity(name, filter)
+ results.add_all find_by_name_similarity(name, filter)
return results
end
# Find mentities by name trying first by prefix then by similarity
- fun find_by_name(name: String): IndexMatches do
- var results = find_by_name_prefix(name)
- for mentity in mentities do
- if mentity isa MClassDef or mentity isa MPropDef then continue
- results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
- end
+ fun find_by_name(name: String, filter: nullable ModelFilter): IndexMatches do
+ var results = find_by_name_prefix(name, filter)
+ results.add_all find_by_name_similarity(name, filter)
return results
end
# Find mentities by full name trying firt by prefix then by similarity
- fun find_by_full_name(name: String): IndexMatches do
+ fun find_by_full_name(name: String, filter: nullable ModelFilter): IndexMatches do
var results = find_by_full_name_prefix(name)
- for mentity in mentities do
- if mentity isa MClassDef or mentity isa MPropDef then continue
- results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
- end
+ results.add_all find_by_full_name_similarity(name, filter)
return results
end
# 2. add full name prefix matches
# 3. try similarity by name
# 4. try similarity by full_name
- fun find(name: String): IndexMatches do
- var results = find_by_name_prefix(name)
-
- for result in find_by_full_name_prefix(name) do
- results.add result
- end
-
- for mentity in mentities do
- if mentity isa MClassDef or mentity isa MPropDef then continue
- results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
- results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
- end
+ fun find(name: String, filter: nullable ModelFilter): IndexMatches do
+ var results = find_by_name_prefix(name, filter)
+ results.add_all find_by_full_name_prefix(name, filter)
+ results.add_all find_by_similarity(name, filter)
return results
end
end