module model_index
import model::model_collect
+
import trees::trie
+import trees::bktree
redef class Model
end
# ModelIndex used to perform searches
- var index: ModelIndex is lazy do
+ var index: ModelIndex is lazy, writable do
var index = new ModelIndex
for mentity in collect_mentities do
if mentity isa MClassDef or mentity isa MPropDef then continue
redef fun mentities_by_name(name, filter) do
var res = new Array[MEntity]
- if index.name_prefixes.has_key(name) then
- for mentity in index.name_prefixes[name] do
- if filter == null or filter.accept_mentity(mentity) then
- res.add mentity
- end
- end
+ if not index.names.has_key(name) then return res
+ for mentity in index.names[name] do
+ if filter == null or filter.accept_mentity(mentity) then res.add mentity
end
return res
end
# Faster than traversing the tries.
var mentities = new Array[MEntity]
+ # Map of all mentities indexed by their `name`
+ var names = new HashMap[String, Array[MEntity]]
+
# Prefix tree for mentities `name`
#
# Because multiple mentities can share the same `name`, we use a Trie of
# TODO add an option.
var name_prefixes = new Trie[Array[MEntity]]
+ # Distance tree for mentities `name`
+ var name_distances = new BKTree
+
+ # Map of all mentities indexed by their `full_name`
+ var full_names = new HashMap[String, MEntity]
+
# Prefix tree for mentities `full_name`
#
# Even if two mentities cannot share the same `full_name`, we use a Trie of
# arrays of mentities to be consistent with `name_prefixes`.
var full_name_prefixes = new Trie[Array[MEntity]]
+ # Distance tree for mentities `full_name`
+ var full_name_distances = new BKTree
+
# Index `mentity` by it's `MEntity::name`
#
# See `name_prefixes`.
private fun index_by_name(mentity: MEntity) do
var name = mentity.name
+
+ # Index name
+ if not names.has_key(name) then
+ names[name] = new Array[MEntity]
+ end
+ names[name].add mentity
+
+ # Index prefix
if not name_prefixes.has_key(name) then
name_prefixes[name] = new Array[MEntity]
end
name_prefixes[name].add mentity
+
+ # Index distance
+ name_distances.add(name)
end
# Index `mentity` by its `MEntity::full_name`
private fun index_by_full_name(mentity: MEntity) do
var name = mentity.full_name
+
+ # Index full name
+ full_names[name] = mentity
+
+ # Index prefix
if not full_name_prefixes.has_key(name) then
full_name_prefixes[name] = new Array[MEntity]
end
full_name_prefixes[name].add mentity
+
+ # Index distance
+ full_name_distances.add(name)
end
# Index `mentity` so it can be retrieved by a find query
# Warning: may not scale to large indexes.
fun find_by_name_similarity(name: String, filter: nullable ModelFilter): IndexMatches do
var results = new IndexMatches
- for mentity in mentities do
- if filter != null and not filter.accept_mentity(mentity) then continue
- if mentity isa MClassDef or mentity isa MPropDef then continue
- results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
+ for match in name_distances.search(name) do
+ var dist = match.distance
+ var mname = match.key
+ if not names.has_key(mname) then continue
+ for mentity in names[mname] do
+ if mentity isa MClassDef or mentity isa MPropDef then continue
+ if filter != null and not filter.accept_mentity(mentity) then continue
+ results.add new IndexMatch(mentity, dist)
+ end
end
return results
end
# Warning: may not scale to large indexes.
fun find_by_full_name_similarity(name: String, filter: nullable ModelFilter): IndexMatches do
var results = new IndexMatches
- for mentity in mentities do
- if filter != null and not filter.accept_mentity(mentity) then continue
+ for match in full_name_distances.search(name) do
+ var dist = match.distance
+ var mname = match.key
+ if not full_names.has_key(mname) then continue
+ var mentity = full_names[mname]
if mentity isa MClassDef or mentity isa MPropDef then continue
- results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
+ if filter != null and not filter.accept_mentity(mentity) then continue
+ results.add new IndexMatch(mentity, dist)
end
return results
end
# Rank all mentities by the distance between `name` and both the mentity name and full name
fun find_by_similarity(name: String, filter: nullable ModelFilter): IndexMatches do
var results = new IndexMatches
- for mentity in mentities do
- if filter != null and not filter.accept_mentity(mentity) then continue
- if mentity isa MClassDef or mentity isa MPropDef then continue
- results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
- results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
- end
+ results.add_all find_by_full_name_similarity(name, filter)
+ results.add_all find_by_name_similarity(name, filter)
return results
end
# Find mentities by name trying first by prefix then by similarity
fun find_by_name(name: String, filter: nullable ModelFilter): IndexMatches do
- var results = find_by_name_prefix(name)
- for mentity in mentities do
- if filter != null and not filter.accept_mentity(mentity) then continue
- if mentity isa MClassDef or mentity isa MPropDef then continue
- results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
- end
+ var results = find_by_name_prefix(name, filter)
+ results.add_all find_by_name_similarity(name, filter)
return results
end
# Find mentities by full name trying firt by prefix then by similarity
fun find_by_full_name(name: String, filter: nullable ModelFilter): IndexMatches do
var results = find_by_full_name_prefix(name)
- for mentity in mentities do
- if filter != null and not filter.accept_mentity(mentity) then continue
- if mentity isa MClassDef or mentity isa MPropDef then continue
- results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
- end
+ results.add_all find_by_full_name_similarity(name, filter)
return results
end
# 4. try similarity by full_name
fun find(name: String, filter: nullable ModelFilter): IndexMatches do
var results = find_by_name_prefix(name, filter)
-
- for result in find_by_full_name_prefix(name, filter) do
- results.add result
- end
-
- for mentity in mentities do
- if filter != null and not filter.accept_mentity(mentity) then continue
- if mentity isa MClassDef or mentity isa MPropDef then continue
- results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
- results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
- end
+ results.add_all find_by_full_name_prefix(name, filter)
+ results.add_all find_by_similarity(name, filter)
return results
end
end