model_views: introduce `mentities_by_name` in views
[nit.git] / src / model / model_index.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Search things from the Model
16 #
17 # ModelIndex allows you to index mentities then retrieve them by their `name`
18 # or `full_name`.
19 # It offers a set of `find_*` methods that can be used to match queries
20 # against entities name.
21 #
22 # Because each use is different, ModelIndex only provide base raw search services.
23 # All of them return IndexMatches that can be ordered and filtered by the client.
24 #
25 # ## Indexing mentities
26 #
27 # Before searching something from the ModelIndex, you have to index it.
28 # Use the `ModelIndex::index` method to do that:
29 #
30 # ~~~nitish
31 # var index = new ModelIndex
32 # var view = new ModelView(model, mainmodule)
33 # for mentity in view.mentities do
34 # index.index(mentity)
35 # end
36 # ~~~
37 #
38 # ## Search mentities
39 #
40 # You can then run queries on the ModelIndex:
41 #
42 # ~~~nitish
43 # for res in index.find("Array").limit(10) do
44 # print res
45 # end
46 # ~~~
47 #
48 # ## Examples
49 #
50 # Here some examples of how you can use the ModelIndex.
51 #
52 # ### Smart type prediction
53 #
54 # Use ModelIndex to implement a smart prediction system based on the typed prefix:
55 #
56 # ~~~nitish
57 # var index = new ModelIndex
58 # var view = new ModelView(model, mainmodule)
59 # for mentity in view.mentities do
60 # # We don't really care about definitions
61 # if mentity isa MClassDef or mentity isa MPropDef then continue
62 # index.index(mentity)
63 # end
64 #
65 # var typed_prefix = "Arr"
66 # for res in index.find_by_name_prefix(typed_prefix).
67 # uniq. # keep only the best ranked mentity
68 # limit(5). # limit to ten results
69 # sort(new VisibilityComparator, new NameComparator) do # order by visibility then name
70 # print res
71 # end
72 # ~~~
73 #
74 # Will output something like:
75 #
76 # ~~~raw
77 # Array (1)
78 # ArraySet (2)
79 # ArrayMap (3)
80 # ArrayCmp (4)
81 # ArrayMapKeys (5)
82 # ~~~
83 #
84 # ### Method autocompletion
85 #
86 # Find methods from a class full_name:
87 #
88 # ~~~nitish
89 # var class_full_name = "core::Array"
90 # for res in index.find_by_full_name_prefix("{class_full_name}::").
91 # uniq. # keep only the best ranked mentity
92 # sort(new VisibilityComparator). # put private in the bottom of the list
93 # limit(5). # limit to ten results
94 # sort(new FullNameComparator) do # order by lexicographic order
95 # print res
96 # end
97 # ~~~
98 #
99 # Will output something like:
100 #
101 # ~~~raw
102 # * (2)
103 # + (1)
104 # filled_with (5)
105 # from (3)
106 # with_items (4)
107 # ~~~
108 #
109 # ### Name typo detection and suggestion
110 #
111 # Detect unknown names and suggest corrections:
112 #
113 # ~~~nitish
114 # var name = "Zrray"
115 #
116 # if index.find_by_name_prefix(name).is_empty then
117 # printn "`{name}` not found, did you mean: "
118 # printn index.find_by_name_similarity(name).sort(new ScoreComparator).limit(2).join(" or ")
119 # print "?"
120 # end
121 # ~~~
122 #
123 # Will output something like:
124 #
125 # ~~~raw
126 # `Zrray` not found, did you mean: Array (1) or array (1)?
127 # ~~~
128 module model_index
129
130 import model::model_views
131 import trees::trie
132
133 redef class ModelView
134
135 # Keep a direct link to mentities by full name to speed up `mentity_from_uri`
136 var mentities_by_full_name: HashMap[String, MEntity] is lazy do
137 var mentities_by_full_name = new HashMap[String, MEntity]
138 for mentity in mentities do
139 mentities_by_full_name[mentity.full_name] = mentity
140 end
141 return mentities_by_full_name
142 end
143
144 # ModelIndex used to perform searches
145 var index: ModelIndex is lazy do
146 var index = new ModelIndex
147 for mentity in mentities do
148 if mentity isa MClassDef or mentity isa MPropDef then continue
149 index.index mentity
150 end
151 return index
152 end
153
154 redef fun mentities_by_name(name) do
155 if index.name_prefixes.has_key(name) then
156 return index.name_prefixes[name]
157 end
158 return new Array[MEntity]
159 end
160
161 redef fun mentity_by_full_name(full_name) do
162 if mentities_by_full_name.has_key(full_name) then
163 return mentities_by_full_name[full_name]
164 end
165 return null
166 end
167
168 private var score_sorter = new ScoreComparator
169 private var vis_sorter = new VisibilityComparator
170 private var kind_sorter = new MEntityComparator
171 private var name_sorter = new NameComparator
172 private var lname_sorter = new NameLengthComparator
173 private var fname_sorter = new FullNameComparator
174 private var lfname_sorter = new FullNameLengthComparator
175
176 # Search mentities based on a `query` string
177 #
178 # Lookup the view index for anything matching `query` and return `limit` results.
179 #
180 # The algorithm used is the following:
181 # 1- lookup by name prefix
182 # 2- lookup by full_name prefix
183 # 3- loopup by levenshtein distance
184 #
185 # At each step if the `limit` is reached, the algorithm stops and returns the results.
186 fun find(query: String, limit: nullable Int): Array[MEntity] do
187 # Find, lookup by name prefix
188 var matches = index.find_by_name_prefix(query).uniq.
189 sort(lname_sorter, name_sorter, kind_sorter)
190 if limit != null and matches.length >= limit then
191 return matches.limit(limit).rerank.sort(vis_sorter, score_sorter).mentities
192 end
193 matches = matches.rerank.sort(vis_sorter, score_sorter)
194
195 # If limit not reached, lookup by full_name prefix
196 var malus = matches.length
197 var full_matches = new IndexMatches
198 for match in index.find_by_full_name_prefix(query).
199 sort(kind_sorter, lfname_sorter, fname_sorter) do
200 match.score += malus
201 full_matches.add match
202 end
203 matches = matches.uniq
204 if limit != null and matches.length + full_matches.length >= limit then
205 matches.add_all full_matches
206 matches = matches.uniq.limit(limit).rerank.sort(vis_sorter, score_sorter)
207 return matches.mentities
208 end
209
210 # If limit not reached, lookup by similarity
211 malus = matches.length
212 var sim_matches = new IndexMatches
213 for match in index.find_by_similarity(query).sort(score_sorter, kind_sorter, lname_sorter, name_sorter) do
214 match.score += malus
215 sim_matches.add match
216 end
217 matches.add_all sim_matches
218 matches = matches.uniq
219 if limit != null then matches = matches.limit(limit)
220 return matches.rerank.sort(vis_sorter, score_sorter).mentities
221 end
222 end
223
224 # ModelIndex indexes mentities by their name and full name
225 #
226 # It provides methods to find mentities based on a prefix or string similarity.
227 #
228 # ~~~nitish
229 # # Build index
230 # var index = new ModelIndex
231 # var view = new ModelView(model, mainmodule)
232 # for mentity in view.mentities do
233 # if mentity isa MClassDef or mentity isa MPropDef then continue
234 # index.index(mentity)
235 # end
236 #
237 # for e in index.find("Foo").uniq.sort(new ScoreComparator).limit(10) do
238 # print " * {e.score}: {e.mentity.name} ({e.mentity.full_name})"
239 # end
240 # ~~~
241 class ModelIndex
242
243 # List of all indexed mentities.
244 #
245 # Faster than traversing the tries.
246 var mentities = new Array[MEntity]
247
248 # Prefix tree for mentities `name`
249 #
250 # Because multiple mentities can share the same `name`, we use a Trie of
251 # arrays of mentities.
252 #
253 # As for now, we do not index class and property definitions.
254 # TODO add an option.
255 var name_prefixes = new Trie[Array[MEntity]]
256
257 # Prefix tree for mentities `full_name`
258 #
259 # Even if two mentities cannot share the same `full_name`, we use a Trie of
260 # arrays of mentities to be consistent with `name_prefixes`.
261 var full_name_prefixes = new Trie[Array[MEntity]]
262
263 # Index `mentity` by it's `MEntity::name`
264 #
265 # See `name_prefixes`.
266 private fun index_by_name(mentity: MEntity) do
267 var name = mentity.name
268 if not name_prefixes.has_key(name) then
269 name_prefixes[name] = new Array[MEntity]
270 end
271 name_prefixes[name].add mentity
272 end
273
274 # Index `mentity` by its `MEntity::full_name`
275 private fun index_by_full_name(mentity: MEntity) do
276 var name = mentity.full_name
277 if not full_name_prefixes.has_key(name) then
278 full_name_prefixes[name] = new Array[MEntity]
279 end
280 full_name_prefixes[name].add mentity
281 end
282
283 # Index `mentity` so it can be retrieved by a find query
284 #
285 # MEntities are indexed by both name and full_name.
286 fun index(mentity: MEntity) do
287 mentities.add mentity
288 index_by_name mentity
289 index_by_full_name mentity
290 end
291
292 # Translate Trie results to `SearchResult`
293 #
294 # This method is used internally to translate each mentity returned by a prefix
295 # match in a Trie into a SearchResult that can be ranked by score.
296 #
297 # Results from the Trie are returned in a breadth first manner so we get the
298 # matches ordered by prefix.
299 # We preserve that order by giving an incremental score to the `array` items.
300 private fun score_results_incremental(array: Array[Array[MEntity]]): IndexMatches do
301 var results = new IndexMatches
302 var score = 1
303 for mentities in array do
304 for mentity in mentities do
305 results.add new IndexMatch(mentity, score)
306 end
307 score += 1
308 end
309 return results
310 end
311
312 # Find all mentities where `MEntity::name` matches the `prefix`
313 fun find_by_name_prefix(prefix: String): IndexMatches do
314 return score_results_incremental(name_prefixes.find_by_prefix(prefix))
315 end
316
317 # Find all mentities where `MEntity::full_name` matches the `prefix`
318 fun find_by_full_name_prefix(prefix: String): IndexMatches do
319 return score_results_incremental(full_name_prefixes.find_by_prefix(prefix))
320 end
321
322 # Rank all mentities by the distance between `MEntity::name` and `name`
323 #
324 # Use the Levenshtein algorithm on all the indexed mentities `name`.
325 # Warning: may not scale to large indexes.
326 fun find_by_name_similarity(name: String): IndexMatches do
327 var results = new IndexMatches
328 for mentity in mentities do
329 if mentity isa MClassDef or mentity isa MPropDef then continue
330 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
331 end
332 return results
333 end
334
335 # Rank all mentities by the distance between `MEntity::full_name` and `full_name`
336 #
337 # Use the Levenshtein algorithm on all the indexed mentities `full_name`.
338 # Warning: may not scale to large indexes.
339 fun find_by_full_name_similarity(name: String): IndexMatches do
340 var results = new IndexMatches
341 for mentity in mentities do
342 if mentity isa MClassDef or mentity isa MPropDef then continue
343 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
344 end
345 return results
346 end
347
348 # Rank all mentities by the distance between `name` and both the mentity name and full name
349 fun find_by_similarity(name: String): IndexMatches do
350 var results = new IndexMatches
351 for mentity in mentities do
352 if mentity isa MClassDef or mentity isa MPropDef then continue
353 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
354 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
355 end
356 return results
357 end
358
359 # Find mentities by name trying first by prefix then by similarity
360 fun find_by_name(name: String): IndexMatches do
361 var results = find_by_name_prefix(name)
362 for mentity in mentities do
363 if mentity isa MClassDef or mentity isa MPropDef then continue
364 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
365 end
366 return results
367 end
368
369 # Find mentities by full name trying firt by prefix then by similarity
370 fun find_by_full_name(name: String): IndexMatches do
371 var results = find_by_full_name_prefix(name)
372 for mentity in mentities do
373 if mentity isa MClassDef or mentity isa MPropDef then continue
374 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
375 end
376 return results
377 end
378
379 # Find all mentities that matches `name`
380 #
381 # 1. try by name prefix
382 # 2. add full name prefix matches
383 # 3. try similarity by name
384 # 4. try similarity by full_name
385 fun find(name: String): IndexMatches do
386 var results = find_by_name_prefix(name)
387
388 for result in find_by_full_name_prefix(name) do
389 results.add result
390 end
391
392 for mentity in mentities do
393 if mentity isa MClassDef or mentity isa MPropDef then continue
394 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
395 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
396 end
397 return results
398 end
399 end
400
401 # An array of IndexMatch instances returned by the ModelIndex
402 #
403 # Index matches can be sorted, filtered and truncated.
404 #
405 # Thanks to the fluent interface, the index matches can be manipulated in chain
406 # from a model index query:
407 #
408 # ~~~nitish
409 # var res = index.find("Foo").
410 # uniq.
411 # sort(new ScoreComparator, new MEntityComparator).
412 # limit(10).
413 # sort(new VisibilityComparator)
414 # ~~~
415 class IndexMatches
416 super Array[IndexMatch]
417
418 # Create a new ModelMatches from an array of matches
419 #
420 # Elements are copied.
421 init from_matches(matches: Array[IndexMatch]) do self.add_all matches
422
423 # Sort the matches with `comparator` (or a list of comparators)
424 #
425 # Return a new IndexMatches instance with the sorted results.
426 #
427 # When more than one comparator is given, the comparators are applied in a
428 # pipeline where the `n`th comparator is applied only if the `n-1`th comparator
429 # returned 0.
430 fun sort(comparator: ScoreComparator...): IndexMatches do
431 var res = to_a
432 if comparator.length == 1 then
433 comparator.first.sort res
434 else
435 var comparators = new MatchComparators(comparator)
436 comparators.sort res
437 end
438 return new IndexMatches.from_matches(res)
439 end
440
441 # Limit the matches with `limit`
442 #
443 # Return a new IndexMatches instance with only the `limit` first matches.
444 fun limit(limit: Int): IndexMatches do
445 var res = new Array[IndexMatch]
446 for match in self do
447 if res.length >= limit then break
448 res.add match
449 end
450 return new IndexMatches.from_matches(res)
451 end
452
453 # Remove doublons from the matches
454 #
455 # Preverse the lowest score of all the matches for a MEntity.
456 fun uniq: IndexMatches do
457 var scores = new HashMap[MEntity, IndexMatch]
458 var res = new Array[IndexMatch]
459 for match in self do
460 var mentity = match.mentity
461 if scores.has_key(mentity) then
462 var older = scores[mentity]
463 if match.score < older.score then older.score = match.score
464 else
465 scores[mentity] = match
466 res.add match
467 end
468 end
469 return new IndexMatches.from_matches(res)
470 end
471
472 # Reset score of each matches to follow `self` order
473 #
474 # Usefull when you need to apply one sorter over another.
475 fun rerank: IndexMatches do
476 var res = new IndexMatches
477 for match in self do
478 res.add match
479 match.score = res.length
480 end
481 return res
482 end
483
484 # Aggregate the mentities for all the matches
485 #
486 # Preserve the match order.
487 fun mentities: Array[MEntity] do
488 var res = new Array[MEntity]
489 for match in self do res.add match.mentity
490 return res
491 end
492 end
493
494 # An MEntity matched from a ModelIndex
495 #
496 # Each match has a `score`. The score should be seen as the distance of
497 # the match from the query. In other words, the lowest is the score, the more
498 # relevant is the match.
499 class IndexMatch
500 super Comparable
501
502 redef type OTHER: IndexMatch
503
504 # MEntity matches
505 var mentity: MEntity
506
507 # Score allocated by the search method
508 #
509 # A lowest score means a more relevant match.
510 #
511 # Scores values are arbitrary, the meaning of `10` vs `2000` really depends
512 # on the search method producing the match and the comparators used to sort
513 # the matches.
514 # The only universal rule is: low score = relevance.
515 var score: Int is writable
516
517 # By default matches are compared only on their score
518 redef fun <=>(o) do return score <=> o.score
519
520 redef fun to_s do return "{mentity} ({score})"
521 end
522
523 # Compare two matches by their score
524 #
525 # Since the score can be seen as a distance, we want the lowest score first.
526 class ScoreComparator
527 super Comparator
528
529 redef type COMPARED: IndexMatch
530
531 redef fun compare(o1, o2) do return o1.score <=> o2.score
532 end
533
534 # A pipeline of comparators executed in inclusion order
535 #
536 # This class is used internally to agregate the behaviors of multiple comparators.
537 # Use `IndexMatches::sort(comparator...)` instead.
538 private class MatchComparators
539 super ScoreComparator
540
541 # Comparator to use in the array order
542 var comparators: Array[ScoreComparator]
543
544 # Compare with each comparator
545 #
546 # Return the compare value of the first one to return anything else than 0.
547 redef fun compare(o1, o2) do
548 for comparator in comparators do
549 var c = comparator.compare(o1, o2)
550 if c != 0 then return c
551 end
552 return 0
553 end
554 end
555
556 # Compare two matches by their MEntity kind
557 #
558 # Usefull to order the mentities by kind in this order:
559 # packages, groups, modules and classes, properties.
560 class MEntityComparator
561 super ScoreComparator
562
563 # See `MEntity::compare_mentity`
564 redef fun compare(o1, o2) do
565 return o1.mentity.mentity_kind_rank <=> o2.mentity.mentity_kind_rank
566 end
567 end
568
569 # Compare two matches by their MEntity visibility
570 #
571 # We reverse the original order so private is at the end of the list.
572 class VisibilityComparator
573 super ScoreComparator
574
575 redef fun compare(o1, o2) do return o2.mentity.visibility <=> o1.mentity.visibility
576 end
577
578 # Compare two matches by their name in lexicographic order
579 #
580 # Generally, for a same score, we want to put A before Z.
581 class NameComparator
582 super ScoreComparator
583
584 redef fun compare(o1, o2) do return o1.mentity.name <=> o2.mentity.name
585 end
586
587 # Compare two matches by their name length
588 class NameLengthComparator
589 super ScoreComparator
590
591 redef fun compare(o1, o2) do return o1.mentity.name.length <=> o2.mentity.name.length
592 end
593
594 # Compare two matches by their full_name in lexicographic order
595 #
596 # Generally, for a same score, we want to put A before Z.
597 class FullNameComparator
598 super ScoreComparator
599
600 redef fun compare(o1, o2) do return o1.mentity.full_name <=> o2.mentity.full_name
601 end
602
603 # Compare two matches by their full name length
604 class FullNameLengthComparator
605 super ScoreComparator
606
607 redef fun compare(o1, o2) do return o1.mentity.full_name.length <=> o2.mentity.full_name.length
608 end
609
610 redef class MEntity
611
612 # Compare MEntity class kind
613 #
614 # Unknown kind have a virtually high score.
615 private fun mentity_kind_rank: Int do return 10
616 end
617
618 redef class MPackage
619 redef fun mentity_kind_rank do return 1
620 end
621
622 redef class MGroup
623 redef fun mentity_kind_rank do return 2
624 end
625
626 redef class MModule
627 redef fun mentity_kind_rank do return 3
628 end
629
630 redef class MClass
631 redef fun mentity_kind_rank do return 4
632 end
633
634 redef class MClassDef
635 redef fun mentity_kind_rank do return 5
636 end
637
638 redef class MProperty
639 redef fun mentity_kind_rank do return 6
640 end
641
642 redef class MPropDef
643 redef fun mentity_kind_rank do return 7
644 end