module model_index
import model::model_collect
+
import trees::trie
+import trees::bktree
redef class Model
end
# ModelIndex used to perform searches
- var index: ModelIndex is lazy do
+ var index: ModelIndex is lazy, writable do
var index = new ModelIndex
for mentity in collect_mentities do
if mentity isa MClassDef or mentity isa MPropDef then continue
redef fun mentities_by_name(name, filter) do
var res = new Array[MEntity]
- if index.name_prefixes.has_key(name) then
- for mentity in index.name_prefixes[name] do
- if filter == null or filter.accept_mentity(mentity) then
- res.add mentity
- end
- end
+ if not index.names.has_key(name) then return res
+ for mentity in index.names[name] do
+ if filter == null or filter.accept_mentity(mentity) then res.add mentity
end
return res
end
# 3- loopup by levenshtein distance
#
# At each step if the `limit` is reached, the algorithm stops and returns the results.
- fun find(query: String, limit: nullable Int): Array[MEntity] do
+ fun find(query: String, limit: nullable Int, filter: nullable ModelFilter): Array[MEntity] do
# Find, lookup by name prefix
- var matches = index.find_by_name_prefix(query).uniq.
+ var matches = index.find_by_name_prefix(query, filter).uniq.
sort(lname_sorter, name_sorter, kind_sorter)
if limit != null and matches.length >= limit then
return matches.limit(limit).rerank.sort(vis_sorter, score_sorter).mentities
# If limit not reached, lookup by full_name prefix
var malus = matches.length
var full_matches = new IndexMatches
- for match in index.find_by_full_name_prefix(query).
+ for match in index.find_by_full_name_prefix(query, filter).
sort(kind_sorter, lfname_sorter, fname_sorter) do
match.score += malus
full_matches.add match
# If limit not reached, lookup by similarity
malus = matches.length
var sim_matches = new IndexMatches
- for match in index.find_by_similarity(query).sort(score_sorter, kind_sorter, lname_sorter, name_sorter) do
+ for match in index.find_by_similarity(query, filter).sort(score_sorter, kind_sorter, lname_sorter, name_sorter) do
match.score += malus
sim_matches.add match
end
# Faster than traversing the tries.
var mentities = new Array[MEntity]
+ # Map of all mentities indexed by their `name`
+ var names = new HashMap[String, Array[MEntity]]
+
# Prefix tree for mentities `name`
#
# Because multiple mentities can share the same `name`, we use a Trie of
# TODO add an option.
var name_prefixes = new Trie[Array[MEntity]]
+ # Distance tree for mentities `name`
+ var name_distances = new BKTree
+
+ # Map of all mentities indexed by their `full_name`
+ var full_names = new HashMap[String, MEntity]
+
# Prefix tree for mentities `full_name`
#
# Even if two mentities cannot share the same `full_name`, we use a Trie of
# arrays of mentities to be consistent with `name_prefixes`.
var full_name_prefixes = new Trie[Array[MEntity]]
+ # Distance tree for mentities `full_name`
+ var full_name_distances = new BKTree
+
# Index `mentity` by it's `MEntity::name`
#
# See `name_prefixes`.
private fun index_by_name(mentity: MEntity) do
var name = mentity.name
+
+ # Index name
+ if not names.has_key(name) then
+ names[name] = new Array[MEntity]
+ end
+ names[name].add mentity
+
+ # Index prefix
if not name_prefixes.has_key(name) then
name_prefixes[name] = new Array[MEntity]
end
name_prefixes[name].add mentity
+
+ # Index distance
+ name_distances.add(name)
end
# Index `mentity` by its `MEntity::full_name`
private fun index_by_full_name(mentity: MEntity) do
var name = mentity.full_name
+
+ # Index full name
+ full_names[name] = mentity
+
+ # Index prefix
if not full_name_prefixes.has_key(name) then
full_name_prefixes[name] = new Array[MEntity]
end
full_name_prefixes[name].add mentity
+
+ # Index distance
+ full_name_distances.add(name)
end
# Index `mentity` so it can be retrieved by a find query
# Results from the Trie are returned in a breadth first manner so we get the
# matches ordered by prefix.
# We preserve that order by giving an incremental score to the `array` items.
- private fun score_results_incremental(array: Array[Array[MEntity]]): IndexMatches do
+ private fun score_results_incremental(array: Array[Array[MEntity]], filter: nullable ModelFilter): IndexMatches do
var results = new IndexMatches
var score = 1
for mentities in array do
for mentity in mentities do
+ if filter != null and not filter.accept_mentity(mentity) then continue
results.add new IndexMatch(mentity, score)
end
score += 1
end
# Find all mentities where `MEntity::name` matches the `prefix`
- fun find_by_name_prefix(prefix: String): IndexMatches do
- return score_results_incremental(name_prefixes.find_by_prefix(prefix))
+ fun find_by_name_prefix(prefix: String, filter: nullable ModelFilter): IndexMatches do
+ return score_results_incremental(name_prefixes.find_by_prefix(prefix), filter)
end
# Find all mentities where `MEntity::full_name` matches the `prefix`
- fun find_by_full_name_prefix(prefix: String): IndexMatches do
- return score_results_incremental(full_name_prefixes.find_by_prefix(prefix))
+ fun find_by_full_name_prefix(prefix: String, filter: nullable ModelFilter): IndexMatches do
+ return score_results_incremental(full_name_prefixes.find_by_prefix(prefix), filter)
end
# Rank all mentities by the distance between `MEntity::name` and `name`
#
# Use the Levenshtein algorithm on all the indexed mentities `name`.
# Warning: may not scale to large indexes.
- fun find_by_name_similarity(name: String): IndexMatches do
+ fun find_by_name_similarity(name: String, filter: nullable ModelFilter): IndexMatches do
var results = new IndexMatches
- for mentity in mentities do
- if mentity isa MClassDef or mentity isa MPropDef then continue
- results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
+ for match in name_distances.search(name) do
+ var dist = match.distance
+ var mname = match.key
+ if not names.has_key(mname) then continue
+ for mentity in names[mname] do
+ if mentity isa MClassDef or mentity isa MPropDef then continue
+ if filter != null and not filter.accept_mentity(mentity) then continue
+ results.add new IndexMatch(mentity, dist)
+ end
end
return results
end
#
# Use the Levenshtein algorithm on all the indexed mentities `full_name`.
# Warning: may not scale to large indexes.
- fun find_by_full_name_similarity(name: String): IndexMatches do
+ fun find_by_full_name_similarity(name: String, filter: nullable ModelFilter): IndexMatches do
var results = new IndexMatches
- for mentity in mentities do
+ for match in full_name_distances.search(name) do
+ var dist = match.distance
+ var mname = match.key
+ if not full_names.has_key(mname) then continue
+ var mentity = full_names[mname]
if mentity isa MClassDef or mentity isa MPropDef then continue
- results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
+ if filter != null and not filter.accept_mentity(mentity) then continue
+ results.add new IndexMatch(mentity, dist)
end
return results
end
# Rank all mentities by the distance between `name` and both the mentity name and full name
- fun find_by_similarity(name: String): IndexMatches do
+ fun find_by_similarity(name: String, filter: nullable ModelFilter): IndexMatches do
var results = new IndexMatches
- for mentity in mentities do
- if mentity isa MClassDef or mentity isa MPropDef then continue
- results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
- results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
- end
+ results.add_all find_by_full_name_similarity(name, filter)
+ results.add_all find_by_name_similarity(name, filter)
return results
end
# Find mentities by name trying first by prefix then by similarity
- fun find_by_name(name: String): IndexMatches do
- var results = find_by_name_prefix(name)
- for mentity in mentities do
- if mentity isa MClassDef or mentity isa MPropDef then continue
- results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
- end
+ fun find_by_name(name: String, filter: nullable ModelFilter): IndexMatches do
+ var results = find_by_name_prefix(name, filter)
+ results.add_all find_by_name_similarity(name, filter)
return results
end
# Find mentities by full name trying firt by prefix then by similarity
- fun find_by_full_name(name: String): IndexMatches do
+ fun find_by_full_name(name: String, filter: nullable ModelFilter): IndexMatches do
var results = find_by_full_name_prefix(name)
- for mentity in mentities do
- if mentity isa MClassDef or mentity isa MPropDef then continue
- results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
- end
+ results.add_all find_by_full_name_similarity(name, filter)
return results
end
# 2. add full name prefix matches
# 3. try similarity by name
# 4. try similarity by full_name
- fun find(name: String): IndexMatches do
- var results = find_by_name_prefix(name)
-
- for result in find_by_full_name_prefix(name) do
- results.add result
- end
-
- for mentity in mentities do
- if mentity isa MClassDef or mentity isa MPropDef then continue
- results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
- results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
- end
+ fun find(name: String, filter: nullable ModelFilter): IndexMatches do
+ var results = find_by_name_prefix(name, filter)
+ results.add_all find_by_full_name_prefix(name, filter)
+ results.add_all find_by_similarity(name, filter)
return results
end
end