c04c21ab5f1b5891cb07bf15b186d9df8bb185ef
[nit.git] / src / model / model_index.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Search things from the Model
16 #
17 # ModelIndex allows you to index mentities then retrieve them by their `name`
18 # or `full_name`.
19 # It offers a set of `find_*` methods that can be used to match queries
20 # against entities name.
21 #
22 # Because each use is different, ModelIndex only provide base raw search services.
23 # All of them return IndexMatches that can be ordered and filtered by the client.
24 #
25 # ## Indexing mentities
26 #
27 # Before searching something from the ModelIndex, you have to index it.
28 # Use the `ModelIndex::index` method to do that:
29 #
30 # ~~~nitish
31 # var index = new ModelIndex
32 # for mentity in model.collect_mentities do
33 # index.index(mentity)
34 # end
35 # ~~~
36 #
37 # ## Search mentities
38 #
39 # You can then run queries on the ModelIndex:
40 #
41 # ~~~nitish
42 # for res in index.find("Array").limit(10) do
43 # print res
44 # end
45 # ~~~
46 #
47 # ## Examples
48 #
49 # Here some examples of how you can use the ModelIndex.
50 #
51 # ### Smart type prediction
52 #
53 # Use ModelIndex to implement a smart prediction system based on the typed prefix:
54 #
55 # ~~~nitish
56 # var index = new ModelIndex
57 # for mentity in model.collect_mentities do
58 # # We don't really care about definitions
59 # if mentity isa MClassDef or mentity isa MPropDef then continue
60 # index.index(mentity)
61 # end
62 #
63 # var typed_prefix = "Arr"
64 # for res in index.find_by_name_prefix(typed_prefix).
65 # uniq. # keep only the best ranked mentity
66 # limit(5). # limit to ten results
67 # sort(new VisibilityComparator, new NameComparator) do # order by visibility then name
68 # print res
69 # end
70 # ~~~
71 #
72 # Will output something like:
73 #
74 # ~~~raw
75 # Array (1)
76 # ArraySet (2)
77 # ArrayMap (3)
78 # ArrayCmp (4)
79 # ArrayMapKeys (5)
80 # ~~~
81 #
82 # ### Method autocompletion
83 #
84 # Find methods from a class full_name:
85 #
86 # ~~~nitish
87 # var class_full_name = "core::Array"
88 # for res in index.find_by_full_name_prefix("{class_full_name}::").
89 # uniq. # keep only the best ranked mentity
90 # sort(new VisibilityComparator). # put private in the bottom of the list
91 # limit(5). # limit to ten results
92 # sort(new FullNameComparator) do # order by lexicographic order
93 # print res
94 # end
95 # ~~~
96 #
97 # Will output something like:
98 #
99 # ~~~raw
100 # * (2)
101 # + (1)
102 # filled_with (5)
103 # from (3)
104 # with_items (4)
105 # ~~~
106 #
107 # ### Name typo detection and suggestion
108 #
109 # Detect unknown names and suggest corrections:
110 #
111 # ~~~nitish
112 # var name = "Zrray"
113 #
114 # if index.find_by_name_prefix(name).is_empty then
115 # printn "`{name}` not found, did you mean: "
116 # printn index.find_by_name_similarity(name).sort(new ScoreComparator).limit(2).join(" or ")
117 # print "?"
118 # end
119 # ~~~
120 #
121 # Will output something like:
122 #
123 # ~~~raw
124 # `Zrray` not found, did you mean: Array (1) or array (1)?
125 # ~~~
126 module model_index
127
128 import model::model_collect
129 import trees::trie
130
131 redef class Model
132
133 # Keep a direct link to mentities by full name to speed up `mentity_from_uri`
134 var mentities_by_full_name: HashMap[String, MEntity] is lazy do
135 var mentities_by_full_name = new HashMap[String, MEntity]
136 for mentity in collect_mentities do
137 mentities_by_full_name[mentity.full_name] = mentity
138 end
139 return mentities_by_full_name
140 end
141
142 # ModelIndex used to perform searches
143 var index: ModelIndex is lazy, writable do
144 var index = new ModelIndex
145 for mentity in collect_mentities do
146 if mentity isa MClassDef or mentity isa MPropDef then continue
147 index.index mentity
148 end
149 return index
150 end
151
152 redef fun mentities_by_name(name, filter) do
153 var res = new Array[MEntity]
154 if not index.names.has_key(name) then return res
155 for mentity in index.names[name] do
156 if filter == null or filter.accept_mentity(mentity) then res.add mentity
157 end
158 return res
159 end
160
161 redef fun mentity_by_full_name(full_name, filter) do
162 if mentities_by_full_name.has_key(full_name) then
163 var mentity = mentities_by_full_name[full_name]
164 if filter == null or filter.accept_mentity(mentity) then return mentity
165 end
166 return null
167 end
168
169 private var score_sorter = new ScoreComparator
170 private var vis_sorter = new VisibilityComparator
171 private var kind_sorter = new MEntityComparator
172 private var name_sorter = new NameComparator
173 private var lname_sorter = new NameLengthComparator
174 private var fname_sorter = new FullNameComparator
175 private var lfname_sorter = new FullNameLengthComparator
176
177 # Search mentities based on a `query` string
178 #
179 # Lookup the index for anything matching `query` and return `limit` results.
180 #
181 # The algorithm used is the following:
182 # 1- lookup by name prefix
183 # 2- lookup by full_name prefix
184 # 3- loopup by levenshtein distance
185 #
186 # At each step if the `limit` is reached, the algorithm stops and returns the results.
187 fun find(query: String, limit: nullable Int, filter: nullable ModelFilter): Array[MEntity] do
188 # Find, lookup by name prefix
189 var matches = index.find_by_name_prefix(query, filter).uniq.
190 sort(lname_sorter, name_sorter, kind_sorter)
191 if limit != null and matches.length >= limit then
192 return matches.limit(limit).rerank.sort(vis_sorter, score_sorter).mentities
193 end
194 matches = matches.rerank.sort(vis_sorter, score_sorter)
195
196 # If limit not reached, lookup by full_name prefix
197 var malus = matches.length
198 var full_matches = new IndexMatches
199 for match in index.find_by_full_name_prefix(query, filter).
200 sort(kind_sorter, lfname_sorter, fname_sorter) do
201 match.score += malus
202 full_matches.add match
203 end
204 matches = matches.uniq
205 if limit != null and matches.length + full_matches.length >= limit then
206 matches.add_all full_matches
207 matches = matches.uniq.limit(limit).rerank.sort(vis_sorter, score_sorter)
208 return matches.mentities
209 end
210
211 # If limit not reached, lookup by similarity
212 malus = matches.length
213 var sim_matches = new IndexMatches
214 for match in index.find_by_similarity(query, filter).sort(score_sorter, kind_sorter, lname_sorter, name_sorter) do
215 match.score += malus
216 sim_matches.add match
217 end
218 matches.add_all sim_matches
219 matches = matches.uniq
220 if limit != null then matches = matches.limit(limit)
221 return matches.rerank.sort(vis_sorter, score_sorter).mentities
222 end
223 end
224
225 # ModelIndex indexes mentities by their name and full name
226 #
227 # It provides methods to find mentities based on a prefix or string similarity.
228 #
229 # ~~~nitish
230 # # Build index
231 # var index = new ModelIndex
232 # for mentity in model.collect_mentities do
233 # if mentity isa MClassDef or mentity isa MPropDef then continue
234 # index.index(mentity)
235 # end
236 #
237 # for e in index.find("Foo").uniq.sort(new ScoreComparator).limit(10) do
238 # print " * {e.score}: {e.mentity.name} ({e.mentity.full_name})"
239 # end
240 # ~~~
241 class ModelIndex
242
243 # List of all indexed mentities.
244 #
245 # Faster than traversing the tries.
246 var mentities = new Array[MEntity]
247
248 # Map of all mentities indexed by their `name`
249 var names = new HashMap[String, Array[MEntity]]
250
251 # Prefix tree for mentities `name`
252 #
253 # Because multiple mentities can share the same `name`, we use a Trie of
254 # arrays of mentities.
255 #
256 # As for now, we do not index class and property definitions.
257 # TODO add an option.
258 var name_prefixes = new Trie[Array[MEntity]]
259
260 # Map of all mentities indexed by their `full_name`
261 var full_names = new HashMap[String, MEntity]
262
263 # Prefix tree for mentities `full_name`
264 #
265 # Even if two mentities cannot share the same `full_name`, we use a Trie of
266 # arrays of mentities to be consistent with `name_prefixes`.
267 var full_name_prefixes = new Trie[Array[MEntity]]
268
269 # Index `mentity` by it's `MEntity::name`
270 #
271 # See `name_prefixes`.
272 private fun index_by_name(mentity: MEntity) do
273 var name = mentity.name
274
275 # Index name
276 if not names.has_key(name) then
277 names[name] = new Array[MEntity]
278 end
279 names[name].add mentity
280
281 # Index prefix
282 if not name_prefixes.has_key(name) then
283 name_prefixes[name] = new Array[MEntity]
284 end
285 name_prefixes[name].add mentity
286 end
287
288 # Index `mentity` by its `MEntity::full_name`
289 private fun index_by_full_name(mentity: MEntity) do
290 var name = mentity.full_name
291
292 # Index full name
293 full_names[name] = mentity
294
295 # Index prefix
296 if not full_name_prefixes.has_key(name) then
297 full_name_prefixes[name] = new Array[MEntity]
298 end
299 full_name_prefixes[name].add mentity
300 end
301
302 # Index `mentity` so it can be retrieved by a find query
303 #
304 # MEntities are indexed by both name and full_name.
305 fun index(mentity: MEntity) do
306 mentities.add mentity
307 index_by_name mentity
308 index_by_full_name mentity
309 end
310
311 # Translate Trie results to `SearchResult`
312 #
313 # This method is used internally to translate each mentity returned by a prefix
314 # match in a Trie into a SearchResult that can be ranked by score.
315 #
316 # Results from the Trie are returned in a breadth first manner so we get the
317 # matches ordered by prefix.
318 # We preserve that order by giving an incremental score to the `array` items.
319 private fun score_results_incremental(array: Array[Array[MEntity]], filter: nullable ModelFilter): IndexMatches do
320 var results = new IndexMatches
321 var score = 1
322 for mentities in array do
323 for mentity in mentities do
324 if filter != null and not filter.accept_mentity(mentity) then continue
325 results.add new IndexMatch(mentity, score)
326 end
327 score += 1
328 end
329 return results
330 end
331
332 # Find all mentities where `MEntity::name` matches the `prefix`
333 fun find_by_name_prefix(prefix: String, filter: nullable ModelFilter): IndexMatches do
334 return score_results_incremental(name_prefixes.find_by_prefix(prefix), filter)
335 end
336
337 # Find all mentities where `MEntity::full_name` matches the `prefix`
338 fun find_by_full_name_prefix(prefix: String, filter: nullable ModelFilter): IndexMatches do
339 return score_results_incremental(full_name_prefixes.find_by_prefix(prefix), filter)
340 end
341
342 # Rank all mentities by the distance between `MEntity::name` and `name`
343 #
344 # Use the Levenshtein algorithm on all the indexed mentities `name`.
345 # Warning: may not scale to large indexes.
346 fun find_by_name_similarity(name: String, filter: nullable ModelFilter): IndexMatches do
347 var results = new IndexMatches
348 for mentity in mentities do
349 if filter != null and not filter.accept_mentity(mentity) then continue
350 if mentity isa MClassDef or mentity isa MPropDef then continue
351 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
352 end
353 return results
354 end
355
356 # Rank all mentities by the distance between `MEntity::full_name` and `full_name`
357 #
358 # Use the Levenshtein algorithm on all the indexed mentities `full_name`.
359 # Warning: may not scale to large indexes.
360 fun find_by_full_name_similarity(name: String, filter: nullable ModelFilter): IndexMatches do
361 var results = new IndexMatches
362 for mentity in mentities do
363 if filter != null and not filter.accept_mentity(mentity) then continue
364 if mentity isa MClassDef or mentity isa MPropDef then continue
365 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
366 end
367 return results
368 end
369
370 # Rank all mentities by the distance between `name` and both the mentity name and full name
371 fun find_by_similarity(name: String, filter: nullable ModelFilter): IndexMatches do
372 var results = new IndexMatches
373 for mentity in mentities do
374 if filter != null and not filter.accept_mentity(mentity) then continue
375 if mentity isa MClassDef or mentity isa MPropDef then continue
376 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
377 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
378 end
379 return results
380 end
381
382 # Find mentities by name trying first by prefix then by similarity
383 fun find_by_name(name: String, filter: nullable ModelFilter): IndexMatches do
384 var results = find_by_name_prefix(name)
385 for mentity in mentities do
386 if filter != null and not filter.accept_mentity(mentity) then continue
387 if mentity isa MClassDef or mentity isa MPropDef then continue
388 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
389 end
390 return results
391 end
392
393 # Find mentities by full name trying firt by prefix then by similarity
394 fun find_by_full_name(name: String, filter: nullable ModelFilter): IndexMatches do
395 var results = find_by_full_name_prefix(name)
396 for mentity in mentities do
397 if filter != null and not filter.accept_mentity(mentity) then continue
398 if mentity isa MClassDef or mentity isa MPropDef then continue
399 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
400 end
401 return results
402 end
403
404 # Find all mentities that matches `name`
405 #
406 # 1. try by name prefix
407 # 2. add full name prefix matches
408 # 3. try similarity by name
409 # 4. try similarity by full_name
410 fun find(name: String, filter: nullable ModelFilter): IndexMatches do
411 var results = find_by_name_prefix(name, filter)
412
413 for result in find_by_full_name_prefix(name, filter) do
414 results.add result
415 end
416
417 for mentity in mentities do
418 if filter != null and not filter.accept_mentity(mentity) then continue
419 if mentity isa MClassDef or mentity isa MPropDef then continue
420 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
421 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
422 end
423 return results
424 end
425 end
426
427 # An array of IndexMatch instances returned by the ModelIndex
428 #
429 # Index matches can be sorted, filtered and truncated.
430 #
431 # Thanks to the fluent interface, the index matches can be manipulated in chain
432 # from a model index query:
433 #
434 # ~~~nitish
435 # var res = index.find("Foo").
436 # uniq.
437 # sort(new ScoreComparator, new MEntityComparator).
438 # limit(10).
439 # sort(new VisibilityComparator)
440 # ~~~
441 class IndexMatches
442 super Array[IndexMatch]
443
444 # Create a new ModelMatches from an array of matches
445 #
446 # Elements are copied.
447 init from_matches(matches: Array[IndexMatch]) do self.add_all matches
448
449 # Sort the matches with `comparator` (or a list of comparators)
450 #
451 # Return a new IndexMatches instance with the sorted results.
452 #
453 # When more than one comparator is given, the comparators are applied in a
454 # pipeline where the `n`th comparator is applied only if the `n-1`th comparator
455 # returned 0.
456 fun sort(comparator: ScoreComparator...): IndexMatches do
457 var res = to_a
458 if comparator.length == 1 then
459 comparator.first.sort res
460 else
461 var comparators = new MatchComparators(comparator)
462 comparators.sort res
463 end
464 return new IndexMatches.from_matches(res)
465 end
466
467 # Limit the matches with `limit`
468 #
469 # Return a new IndexMatches instance with only the `limit` first matches.
470 fun limit(limit: Int): IndexMatches do
471 var res = new Array[IndexMatch]
472 for match in self do
473 if res.length >= limit then break
474 res.add match
475 end
476 return new IndexMatches.from_matches(res)
477 end
478
479 # Remove doublons from the matches
480 #
481 # Preverse the lowest score of all the matches for a MEntity.
482 fun uniq: IndexMatches do
483 var scores = new HashMap[MEntity, IndexMatch]
484 var res = new Array[IndexMatch]
485 for match in self do
486 var mentity = match.mentity
487 if scores.has_key(mentity) then
488 var older = scores[mentity]
489 if match.score < older.score then older.score = match.score
490 else
491 scores[mentity] = match
492 res.add match
493 end
494 end
495 return new IndexMatches.from_matches(res)
496 end
497
498 # Reset score of each matches to follow `self` order
499 #
500 # Usefull when you need to apply one sorter over another.
501 fun rerank: IndexMatches do
502 var res = new IndexMatches
503 for match in self do
504 res.add match
505 match.score = res.length
506 end
507 return res
508 end
509
510 # Aggregate the mentities for all the matches
511 #
512 # Preserve the match order.
513 fun mentities: Array[MEntity] do
514 var res = new Array[MEntity]
515 for match in self do res.add match.mentity
516 return res
517 end
518 end
519
520 # An MEntity matched from a ModelIndex
521 #
522 # Each match has a `score`. The score should be seen as the distance of
523 # the match from the query. In other words, the lowest is the score, the more
524 # relevant is the match.
525 class IndexMatch
526 super Comparable
527
528 redef type OTHER: IndexMatch
529
530 # MEntity matches
531 var mentity: MEntity
532
533 # Score allocated by the search method
534 #
535 # A lowest score means a more relevant match.
536 #
537 # Scores values are arbitrary, the meaning of `10` vs `2000` really depends
538 # on the search method producing the match and the comparators used to sort
539 # the matches.
540 # The only universal rule is: low score = relevance.
541 var score: Int is writable
542
543 # By default matches are compared only on their score
544 redef fun <=>(o) do return score <=> o.score
545
546 redef fun to_s do return "{mentity} ({score})"
547 end
548
549 # Compare two matches by their score
550 #
551 # Since the score can be seen as a distance, we want the lowest score first.
552 class ScoreComparator
553 super Comparator
554
555 redef type COMPARED: IndexMatch
556
557 redef fun compare(o1, o2) do return o1.score <=> o2.score
558 end
559
560 # A pipeline of comparators executed in inclusion order
561 #
562 # This class is used internally to agregate the behaviors of multiple comparators.
563 # Use `IndexMatches::sort(comparator...)` instead.
564 private class MatchComparators
565 super ScoreComparator
566
567 # Comparator to use in the array order
568 var comparators: Array[ScoreComparator]
569
570 # Compare with each comparator
571 #
572 # Return the compare value of the first one to return anything else than 0.
573 redef fun compare(o1, o2) do
574 for comparator in comparators do
575 var c = comparator.compare(o1, o2)
576 if c != 0 then return c
577 end
578 return 0
579 end
580 end
581
582 # Compare two matches by their MEntity kind
583 #
584 # Usefull to order the mentities by kind in this order:
585 # packages, groups, modules and classes, properties.
586 class MEntityComparator
587 super ScoreComparator
588
589 # See `MEntity::compare_mentity`
590 redef fun compare(o1, o2) do
591 return o1.mentity.mentity_kind_rank <=> o2.mentity.mentity_kind_rank
592 end
593 end
594
595 # Compare two matches by their MEntity visibility
596 #
597 # We reverse the original order so private is at the end of the list.
598 class VisibilityComparator
599 super ScoreComparator
600
601 redef fun compare(o1, o2) do return o2.mentity.visibility <=> o1.mentity.visibility
602 end
603
604 # Compare two matches by their name in lexicographic order
605 #
606 # Generally, for a same score, we want to put A before Z.
607 class NameComparator
608 super ScoreComparator
609
610 redef fun compare(o1, o2) do return o1.mentity.name <=> o2.mentity.name
611 end
612
613 # Compare two matches by their name length
614 class NameLengthComparator
615 super ScoreComparator
616
617 redef fun compare(o1, o2) do return o1.mentity.name.length <=> o2.mentity.name.length
618 end
619
620 # Compare two matches by their full_name in lexicographic order
621 #
622 # Generally, for a same score, we want to put A before Z.
623 class FullNameComparator
624 super ScoreComparator
625
626 redef fun compare(o1, o2) do return o1.mentity.full_name <=> o2.mentity.full_name
627 end
628
629 # Compare two matches by their full name length
630 class FullNameLengthComparator
631 super ScoreComparator
632
633 redef fun compare(o1, o2) do return o1.mentity.full_name.length <=> o2.mentity.full_name.length
634 end
635
636 redef class MEntity
637
638 # Compare MEntity class kind
639 #
640 # Unknown kind have a virtually high score.
641 private fun mentity_kind_rank: Int do return 10
642 end
643
644 redef class MPackage
645 redef fun mentity_kind_rank do return 1
646 end
647
648 redef class MGroup
649 redef fun mentity_kind_rank do return 2
650 end
651
652 redef class MModule
653 redef fun mentity_kind_rank do return 3
654 end
655
656 redef class MClass
657 redef fun mentity_kind_rank do return 4
658 end
659
660 redef class MClassDef
661 redef fun mentity_kind_rank do return 5
662 end
663
664 redef class MProperty
665 redef fun mentity_kind_rank do return 6
666 end
667
668 redef class MPropDef
669 redef fun mentity_kind_rank do return 7
670 end