1 # This file is part of NIT ( http://www.nitlanguage.org ).
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
15 # Search things from the Model
17 # ModelIndex allows you to index mentities then retrieve them by their `name`
19 # It offers a set of `find_*` methods that can be used to match queries
20 # against entities name.
22 # Because each use is different, ModelIndex only provide base raw search services.
23 # All of them return IndexMatches that can be ordered and filtered by the client.
25 # ## Indexing mentities
27 # Before searching something from the ModelIndex, you have to index it.
28 # Use the `ModelIndex::index` method to do that:
31 # var index = new ModelIndex
32 # for mentity in model.collect_mentities do
33 # index.index(mentity)
39 # You can then run queries on the ModelIndex:
42 # for res in index.find("Array").limit(10) do
49 # Here some examples of how you can use the ModelIndex.
51 # ### Smart type prediction
53 # Use ModelIndex to implement a smart prediction system based on the typed prefix:
56 # var index = new ModelIndex
57 # for mentity in model.collect_mentities do
58 # # We don't really care about definitions
59 # if mentity isa MClassDef or mentity isa MPropDef then continue
60 # index.index(mentity)
63 # var typed_prefix = "Arr"
64 # for res in index.find_by_name_prefix(typed_prefix).
65 # uniq. # keep only the best ranked mentity
66 # limit(5). # limit to ten results
67 # sort(new VisibilityComparator, new NameComparator) do # order by visibility then name
72 # Will output something like:
82 # ### Method autocompletion
84 # Find methods from a class full_name:
87 # var class_full_name = "core::Array"
88 # for res in index.find_by_full_name_prefix("{class_full_name}::").
89 # uniq. # keep only the best ranked mentity
90 # sort(new VisibilityComparator). # put private in the bottom of the list
91 # limit(5). # limit to ten results
92 # sort(new FullNameComparator) do # order by lexicographic order
97 # Will output something like:
107 # ### Name typo detection and suggestion
109 # Detect unknown names and suggest corrections:
114 # if index.find_by_name_prefix(name).is_empty then
115 # printn "`{name}` not found, did you mean: "
116 # printn index.find_by_name_similarity(name).sort(new ScoreComparator).limit(2).join(" or ")
121 # Will output something like:
124 # `Zrray` not found, did you mean: Array (1) or array (1)?
128 import model
::model_collect
135 # Keep a direct link to mentities by full name to speed up `mentity_from_uri`
136 var mentities_by_full_name
: HashMap[String, MEntity] is lazy
do
137 var mentities_by_full_name
= new HashMap[String, MEntity]
138 for mentity
in collect_mentities
do
139 mentities_by_full_name
[mentity
.full_name
] = mentity
141 return mentities_by_full_name
144 # ModelIndex used to perform searches
145 var index
: ModelIndex is lazy
, writable do
146 var index
= new ModelIndex
147 for mentity
in collect_mentities
do
148 if mentity
isa MClassDef or mentity
isa MPropDef then continue
154 redef fun mentities_by_name
(name
, filter
) do
155 var res
= new Array[MEntity]
156 if not index
.names
.has_key
(name
) then return res
157 for mentity
in index
.names
[name
] do
158 if filter
== null or filter
.accept_mentity
(mentity
) then res
.add mentity
163 redef fun mentity_by_full_name
(full_name
, filter
) do
164 if mentities_by_full_name
.has_key
(full_name
) then
165 var mentity
= mentities_by_full_name
[full_name
]
166 if filter
== null or filter
.accept_mentity
(mentity
) then return mentity
171 private var score_sorter
= new ScoreComparator
172 private var vis_sorter
= new VisibilityComparator
173 private var kind_sorter
= new MEntityComparator
174 private var name_sorter
= new NameComparator
175 private var lname_sorter
= new NameLengthComparator
176 private var fname_sorter
= new FullNameComparator
177 private var lfname_sorter
= new FullNameLengthComparator
179 # Search mentities based on a `query` string
181 # Lookup the index for anything matching `query` and return `limit` results.
183 # The algorithm used is the following:
184 # 1- lookup by name prefix
185 # 2- lookup by full_name prefix
186 # 3- loopup by levenshtein distance
188 # At each step if the `limit` is reached, the algorithm stops and returns the results.
189 fun find
(query
: String, limit
: nullable Int, filter
: nullable ModelFilter): Array[MEntity] do
190 # Find, lookup by name prefix
191 var matches
= index
.find_by_name_prefix
(query
, filter
).uniq
.
192 sort
(lname_sorter
, name_sorter
, kind_sorter
)
193 if limit
!= null and matches
.length
>= limit
then
194 return matches
.limit
(limit
).rerank
.sort
(vis_sorter
, score_sorter
).mentities
196 matches
= matches
.rerank
.sort
(vis_sorter
, score_sorter
)
198 # If limit not reached, lookup by full_name prefix
199 var malus
= matches
.length
200 var full_matches
= new IndexMatches
201 for match
in index
.find_by_full_name_prefix
(query
, filter
).
202 sort
(kind_sorter
, lfname_sorter
, fname_sorter
) do
204 full_matches
.add match
206 matches
= matches
.uniq
207 if limit
!= null and matches
.length
+ full_matches
.length
>= limit
then
208 matches
.add_all full_matches
209 matches
= matches
.uniq
.limit
(limit
).rerank
.sort
(vis_sorter
, score_sorter
)
210 return matches
.mentities
213 # If limit not reached, lookup by similarity
214 malus
= matches
.length
215 var sim_matches
= new IndexMatches
216 for match
in index
.find_by_similarity
(query
, filter
).sort
(score_sorter
, kind_sorter
, lname_sorter
, name_sorter
) do
218 sim_matches
.add match
220 matches
.add_all sim_matches
221 matches
= matches
.uniq
222 if limit
!= null then matches
= matches
.limit
(limit
)
223 return matches
.rerank
.sort
(vis_sorter
, score_sorter
).mentities
227 # ModelIndex indexes mentities by their name and full name
229 # It provides methods to find mentities based on a prefix or string similarity.
233 # var index = new ModelIndex
234 # for mentity in model.collect_mentities do
235 # if mentity isa MClassDef or mentity isa MPropDef then continue
236 # index.index(mentity)
239 # for e in index.find("Foo").uniq.sort(new ScoreComparator).limit(10) do
240 # print " * {e.score}: {e.mentity.name} ({e.mentity.full_name})"
245 # List of all indexed mentities.
247 # Faster than traversing the tries.
248 var mentities
= new Array[MEntity]
250 # Map of all mentities indexed by their `name`
251 var names
= new HashMap[String, Array[MEntity]]
253 # Prefix tree for mentities `name`
255 # Because multiple mentities can share the same `name`, we use a Trie of
256 # arrays of mentities.
258 # As for now, we do not index class and property definitions.
259 # TODO add an option.
260 var name_prefixes
= new Trie[Array[MEntity]]
262 # Distance tree for mentities `name`
263 var name_distances
= new BKTree
265 # Map of all mentities indexed by their `full_name`
266 var full_names
= new HashMap[String, MEntity]
268 # Prefix tree for mentities `full_name`
270 # Even if two mentities cannot share the same `full_name`, we use a Trie of
271 # arrays of mentities to be consistent with `name_prefixes`.
272 var full_name_prefixes
= new Trie[Array[MEntity]]
274 # Distance tree for mentities `full_name`
275 var full_name_distances
= new BKTree
277 # Index `mentity` by it's `MEntity::name`
279 # See `name_prefixes`.
280 private fun index_by_name
(mentity
: MEntity) do
281 var name
= mentity
.name
284 if not names
.has_key
(name
) then
285 names
[name
] = new Array[MEntity]
287 names
[name
].add mentity
290 if not name_prefixes
.has_key
(name
) then
291 name_prefixes
[name
] = new Array[MEntity]
293 name_prefixes
[name
].add mentity
296 name_distances
.add
(name
)
299 # Index `mentity` by its `MEntity::full_name`
300 private fun index_by_full_name
(mentity
: MEntity) do
301 var name
= mentity
.full_name
304 full_names
[name
] = mentity
307 if not full_name_prefixes
.has_key
(name
) then
308 full_name_prefixes
[name
] = new Array[MEntity]
310 full_name_prefixes
[name
].add mentity
313 full_name_distances
.add
(name
)
316 # Index `mentity` so it can be retrieved by a find query
318 # MEntities are indexed by both name and full_name.
319 fun index
(mentity
: MEntity) do
320 mentities
.add mentity
321 index_by_name mentity
322 index_by_full_name mentity
325 # Translate Trie results to `SearchResult`
327 # This method is used internally to translate each mentity returned by a prefix
328 # match in a Trie into a SearchResult that can be ranked by score.
330 # Results from the Trie are returned in a breadth first manner so we get the
331 # matches ordered by prefix.
332 # We preserve that order by giving an incremental score to the `array` items.
333 private fun score_results_incremental
(array
: Array[Array[MEntity]], filter
: nullable ModelFilter): IndexMatches do
334 var results
= new IndexMatches
336 for mentities
in array
do
337 for mentity
in mentities
do
338 if filter
!= null and not filter
.accept_mentity
(mentity
) then continue
339 results
.add
new IndexMatch(mentity
, score
)
346 # Find all mentities where `MEntity::name` matches the `prefix`
347 fun find_by_name_prefix
(prefix
: String, filter
: nullable ModelFilter): IndexMatches do
348 return score_results_incremental
(name_prefixes
.find_by_prefix
(prefix
), filter
)
351 # Find all mentities where `MEntity::full_name` matches the `prefix`
352 fun find_by_full_name_prefix
(prefix
: String, filter
: nullable ModelFilter): IndexMatches do
353 return score_results_incremental
(full_name_prefixes
.find_by_prefix
(prefix
), filter
)
356 # Rank all mentities by the distance between `MEntity::name` and `name`
358 # Use the Levenshtein algorithm on all the indexed mentities `name`.
359 # Warning: may not scale to large indexes.
360 fun find_by_name_similarity
(name
: String, filter
: nullable ModelFilter): IndexMatches do
361 var results
= new IndexMatches
362 for match
in name_distances
.search
(name
) do
363 var dist
= match
.distance
364 var mname
= match
.key
365 if not names
.has_key
(mname
) then continue
366 for mentity
in names
[mname
] do
367 if mentity
isa MClassDef or mentity
isa MPropDef then continue
368 if filter
!= null and not filter
.accept_mentity
(mentity
) then continue
369 results
.add
new IndexMatch(mentity
, dist
)
375 # Rank all mentities by the distance between `MEntity::full_name` and `full_name`
377 # Use the Levenshtein algorithm on all the indexed mentities `full_name`.
378 # Warning: may not scale to large indexes.
379 fun find_by_full_name_similarity
(name
: String, filter
: nullable ModelFilter): IndexMatches do
380 var results
= new IndexMatches
381 for match
in full_name_distances
.search
(name
) do
382 var dist
= match
.distance
383 var mname
= match
.key
384 if not full_names
.has_key
(mname
) then continue
385 var mentity
= full_names
[mname
]
386 if mentity
isa MClassDef or mentity
isa MPropDef then continue
387 if filter
!= null and not filter
.accept_mentity
(mentity
) then continue
388 results
.add
new IndexMatch(mentity
, dist
)
393 # Rank all mentities by the distance between `name` and both the mentity name and full name
394 fun find_by_similarity
(name
: String, filter
: nullable ModelFilter): IndexMatches do
395 var results
= new IndexMatches
396 results
.add_all find_by_full_name_similarity
(name
, filter
)
397 results
.add_all find_by_name_similarity
(name
, filter
)
401 # Find mentities by name trying first by prefix then by similarity
402 fun find_by_name
(name
: String, filter
: nullable ModelFilter): IndexMatches do
403 var results
= find_by_name_prefix
(name
, filter
)
404 results
.add_all find_by_name_similarity
(name
, filter
)
408 # Find mentities by full name trying firt by prefix then by similarity
409 fun find_by_full_name
(name
: String, filter
: nullable ModelFilter): IndexMatches do
410 var results
= find_by_full_name_prefix
(name
)
411 results
.add_all find_by_full_name_similarity
(name
, filter
)
415 # Find all mentities that matches `name`
417 # 1. try by name prefix
418 # 2. add full name prefix matches
419 # 3. try similarity by name
420 # 4. try similarity by full_name
421 fun find
(name
: String, filter
: nullable ModelFilter): IndexMatches do
422 var results
= find_by_name_prefix
(name
, filter
)
423 results
.add_all find_by_full_name_prefix
(name
, filter
)
424 results
.add_all find_by_similarity
(name
, filter
)
429 # An array of IndexMatch instances returned by the ModelIndex
431 # Index matches can be sorted, filtered and truncated.
433 # Thanks to the fluent interface, the index matches can be manipulated in chain
434 # from a model index query:
437 # var res = index.find("Foo").
439 # sort(new ScoreComparator, new MEntityComparator).
441 # sort(new VisibilityComparator)
444 super Array[IndexMatch]
446 # Create a new ModelMatches from an array of matches
448 # Elements are copied.
449 init from_matches
(matches
: Array[IndexMatch]) do self.add_all matches
451 # Sort the matches with `comparator` (or a list of comparators)
453 # Return a new IndexMatches instance with the sorted results.
455 # When more than one comparator is given, the comparators are applied in a
456 # pipeline where the `n`th comparator is applied only if the `n-1`th comparator
458 fun sort
(comparator
: ScoreComparator...): IndexMatches do
460 if comparator
.length
== 1 then
461 comparator
.first
.sort res
463 var comparators
= new MatchComparators(comparator
)
466 return new IndexMatches.from_matches
(res
)
469 # Limit the matches with `limit`
471 # Return a new IndexMatches instance with only the `limit` first matches.
472 fun limit
(limit
: Int): IndexMatches do
473 var res
= new Array[IndexMatch]
475 if res
.length
>= limit
then break
478 return new IndexMatches.from_matches
(res
)
481 # Remove doublons from the matches
483 # Preverse the lowest score of all the matches for a MEntity.
484 fun uniq
: IndexMatches do
485 var scores
= new HashMap[MEntity, IndexMatch]
486 var res
= new Array[IndexMatch]
488 var mentity
= match
.mentity
489 if scores
.has_key
(mentity
) then
490 var older
= scores
[mentity
]
491 if match
.score
< older
.score
then older
.score
= match
.score
493 scores
[mentity
] = match
497 return new IndexMatches.from_matches
(res
)
500 # Reset score of each matches to follow `self` order
502 # Usefull when you need to apply one sorter over another.
503 fun rerank
: IndexMatches do
504 var res
= new IndexMatches
507 match
.score
= res
.length
512 # Aggregate the mentities for all the matches
514 # Preserve the match order.
515 fun mentities
: Array[MEntity] do
516 var res
= new Array[MEntity]
517 for match
in self do res
.add match
.mentity
522 # An MEntity matched from a ModelIndex
524 # Each match has a `score`. The score should be seen as the distance of
525 # the match from the query. In other words, the lowest is the score, the more
526 # relevant is the match.
530 redef type OTHER: IndexMatch
535 # Score allocated by the search method
537 # A lowest score means a more relevant match.
539 # Scores values are arbitrary, the meaning of `10` vs `2000` really depends
540 # on the search method producing the match and the comparators used to sort
542 # The only universal rule is: low score = relevance.
543 var score
: Int is writable
545 # By default matches are compared only on their score
546 redef fun <=>(o
) do return score
<=> o
.score
548 redef fun to_s
do return "{mentity} ({score})"
551 # Compare two matches by their score
553 # Since the score can be seen as a distance, we want the lowest score first.
554 class ScoreComparator
557 redef type COMPARED: IndexMatch
559 redef fun compare
(o1
, o2
) do return o1
.score
<=> o2
.score
562 # A pipeline of comparators executed in inclusion order
564 # This class is used internally to agregate the behaviors of multiple comparators.
565 # Use `IndexMatches::sort(comparator...)` instead.
566 private class MatchComparators
567 super ScoreComparator
569 # Comparator to use in the array order
570 var comparators
: Array[ScoreComparator]
572 # Compare with each comparator
574 # Return the compare value of the first one to return anything else than 0.
575 redef fun compare
(o1
, o2
) do
576 for comparator
in comparators
do
577 var c
= comparator
.compare
(o1
, o2
)
578 if c
!= 0 then return c
584 # Compare two matches by their MEntity kind
586 # Usefull to order the mentities by kind in this order:
587 # packages, groups, modules and classes, properties.
588 class MEntityComparator
589 super ScoreComparator
591 # See `MEntity::compare_mentity`
592 redef fun compare
(o1
, o2
) do
593 return o1
.mentity
.mentity_kind_rank
<=> o2
.mentity
.mentity_kind_rank
597 # Compare two matches by their MEntity visibility
599 # We reverse the original order so private is at the end of the list.
600 class VisibilityComparator
601 super ScoreComparator
603 redef fun compare
(o1
, o2
) do return o2
.mentity
.visibility
<=> o1
.mentity
.visibility
606 # Compare two matches by their name in lexicographic order
608 # Generally, for a same score, we want to put A before Z.
610 super ScoreComparator
612 redef fun compare
(o1
, o2
) do return o1
.mentity
.name
<=> o2
.mentity
.name
615 # Compare two matches by their name length
616 class NameLengthComparator
617 super ScoreComparator
619 redef fun compare
(o1
, o2
) do return o1
.mentity
.name
.length
<=> o2
.mentity
.name
.length
622 # Compare two matches by their full_name in lexicographic order
624 # Generally, for a same score, we want to put A before Z.
625 class FullNameComparator
626 super ScoreComparator
628 redef fun compare
(o1
, o2
) do return o1
.mentity
.full_name
<=> o2
.mentity
.full_name
631 # Compare two matches by their full name length
632 class FullNameLengthComparator
633 super ScoreComparator
635 redef fun compare
(o1
, o2
) do return o1
.mentity
.full_name
.length
<=> o2
.mentity
.full_name
.length
640 # Compare MEntity class kind
642 # Unknown kind have a virtually high score.
643 private fun mentity_kind_rank
: Int do return 10
647 redef fun mentity_kind_rank
do return 1
651 redef fun mentity_kind_rank
do return 2
655 redef fun mentity_kind_rank
do return 3
659 redef fun mentity_kind_rank
do return 4
662 redef class MClassDef
663 redef fun mentity_kind_rank
do return 5
666 redef class MProperty
667 redef fun mentity_kind_rank
do return 6
671 redef fun mentity_kind_rank
do return 7