model_index: update ModeView
[nit.git] / src / model / model_index.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Search things from the Model
16 #
17 # ModelIndex allows you to index mentities then retrieve them by their `name`
18 # or `full_name`.
19 # It offers a set of `find_*` methods that can be used to match queries
20 # against entities name.
21 #
22 # Because each use is different, ModelIndex only provide base raw search services.
23 # All of them return IndexMatches that can be ordered and filtered by the client.
24 #
25 # ## Indexing mentities
26 #
27 # Before searching something from the ModelIndex, you have to index it.
28 # Use the `ModelIndex::index` method to do that:
29 #
30 # ~~~nitish
31 # var index = new ModelIndex
32 # var view = new ModelView(model, mainmodule)
33 # for mentity in view.mentities do
34 # index.index(mentity)
35 # end
36 # ~~~
37 #
38 # ## Search mentities
39 #
40 # You can then run queries on the ModelIndex:
41 #
42 # ~~~nitish
43 # for res in index.find("Array").limit(10) do
44 # print res
45 # end
46 # ~~~
47 #
48 # ## Examples
49 #
50 # Here some examples of how you can use the ModelIndex.
51 #
52 # ### Smart type prediction
53 #
54 # Use ModelIndex to implement a smart prediction system based on the typed prefix:
55 #
56 # ~~~nitish
57 # var index = new ModelIndex
58 # var view = new ModelView(model, mainmodule)
59 # for mentity in view.mentities do
60 # # We don't really care about definitions
61 # if mentity isa MClassDef or mentity isa MPropDef then continue
62 # index.index(mentity)
63 # end
64 #
65 # var typed_prefix = "Arr"
66 # for res in index.find_by_name_prefix(typed_prefix).
67 # uniq. # keep only the best ranked mentity
68 # limit(5). # limit to ten results
69 # sort(new VisibilityComparator, new NameComparator) do # order by visibility then name
70 # print res
71 # end
72 # ~~~
73 #
74 # Will output something like:
75 #
76 # ~~~raw
77 # Array (1)
78 # ArraySet (2)
79 # ArrayMap (3)
80 # ArrayCmp (4)
81 # ArrayMapKeys (5)
82 # ~~~
83 #
84 # ### Method autocompletion
85 #
86 # Find methods from a class full_name:
87 #
88 # ~~~nitish
89 # var class_full_name = "core::Array"
90 # for res in index.find_by_full_name_prefix("{class_full_name}::").
91 # uniq. # keep only the best ranked mentity
92 # sort(new VisibilityComparator). # put private in the bottom of the list
93 # limit(5). # limit to ten results
94 # sort(new FullNameComparator) do # order by lexicographic order
95 # print res
96 # end
97 # ~~~
98 #
99 # Will output something like:
100 #
101 # ~~~raw
102 # * (2)
103 # + (1)
104 # filled_with (5)
105 # from (3)
106 # with_items (4)
107 # ~~~
108 #
109 # ### Name typo detection and suggestion
110 #
111 # Detect unknown names and suggest corrections:
112 #
113 # ~~~nitish
114 # var name = "Zrray"
115 #
116 # if index.find_by_name_prefix(name).is_empty then
117 # printn "`{name}` not found, did you mean: "
118 # printn index.find_by_name_similarity(name).sort(new ScoreComparator).limit(2).join(" or ")
119 # print "?"
120 # end
121 # ~~~
122 #
123 # Will output something like:
124 #
125 # ~~~raw
126 # `Zrray` not found, did you mean: Array (1) or array (1)?
127 # ~~~
128 module model_index
129
130 import model::model_views
131 import trees::trie
132
133 redef class ModelView
134
135 # Keep a direct link to mentities by full name to speed up `mentity_from_uri`
136 var mentities_by_full_name: HashMap[String, MEntity] is lazy do
137 var mentities_by_full_name = new HashMap[String, MEntity]
138 for mentity in mentities do
139 mentities_by_full_name[mentity.full_name] = mentity
140 end
141 return mentities_by_full_name
142 end
143
144 # ModelIndex used to perform searches
145 var index: ModelIndex is lazy do
146 var index = new ModelIndex
147 for mentity in mentities do
148 if mentity isa MClassDef or mentity isa MPropDef then continue
149 index.index mentity
150 end
151 return index
152 end
153
154 # Find mentities by their `name`
155 fun mentities_by_name(name: String): Array[MEntity] do
156 if index.name_prefixes.has_key(name) then
157 return index.name_prefixes[name]
158 end
159 return new Array[MEntity]
160 end
161
162 redef fun mentity_by_full_name(full_name) do
163 if mentities_by_full_name.has_key(full_name) then
164 return mentities_by_full_name[full_name]
165 end
166 return null
167 end
168
169 private var score_sorter = new ScoreComparator
170 private var vis_sorter = new VisibilityComparator
171 private var kind_sorter = new MEntityComparator
172 private var name_sorter = new NameComparator
173 private var lname_sorter = new NameLengthComparator
174 private var fname_sorter = new FullNameComparator
175 private var lfname_sorter = new FullNameLengthComparator
176
177 # Search mentities based on a `query` string
178 #
179 # Lookup the view index for anything matching `query` and return `limit` results.
180 #
181 # The algorithm used is the following:
182 # 1- lookup by name prefix
183 # 2- lookup by full_name prefix
184 # 3- loopup by levenshtein distance
185 #
186 # At each step if the `limit` is reached, the algorithm stops and returns the results.
187 fun find(query: String, limit: nullable Int): Array[MEntity] do
188 # Find, lookup by name prefix
189 var matches = index.find_by_name_prefix(query).uniq.
190 sort(lname_sorter, name_sorter, kind_sorter)
191 if limit != null and matches.length >= limit then
192 return matches.limit(limit).rerank.sort(vis_sorter, score_sorter).mentities
193 end
194 matches = matches.rerank.sort(vis_sorter, score_sorter)
195
196 # If limit not reached, lookup by full_name prefix
197 var malus = matches.length
198 var full_matches = new IndexMatches
199 for match in index.find_by_full_name_prefix(query).
200 sort(kind_sorter, lfname_sorter, fname_sorter) do
201 match.score += malus
202 full_matches.add match
203 end
204 matches = matches.uniq
205 if limit != null and matches.length + full_matches.length >= limit then
206 matches.add_all full_matches
207 matches = matches.uniq.limit(limit).rerank.sort(vis_sorter, score_sorter)
208 return matches.mentities
209 end
210
211 # If limit not reached, lookup by similarity
212 malus = matches.length
213 var sim_matches = new IndexMatches
214 for match in index.find_by_similarity(query).sort(score_sorter, kind_sorter, lname_sorter, name_sorter) do
215 match.score += malus
216 sim_matches.add match
217 end
218 matches.add_all sim_matches
219 matches = matches.uniq
220 if limit != null then matches = matches.limit(limit)
221 return matches.rerank.sort(vis_sorter, score_sorter).mentities
222 end
223 end
224
225 # ModelIndex indexes mentities by their name and full name
226 #
227 # It provides methods to find mentities based on a prefix or string similarity.
228 #
229 # ~~~nitish
230 # # Build index
231 # var index = new ModelIndex
232 # var view = new ModelView(model, mainmodule)
233 # for mentity in view.mentities do
234 # if mentity isa MClassDef or mentity isa MPropDef then continue
235 # index.index(mentity)
236 # end
237 #
238 # for e in index.find("Foo").uniq.sort(new ScoreComparator).limit(10) do
239 # print " * {e.score}: {e.mentity.name} ({e.mentity.full_name})"
240 # end
241 # ~~~
242 class ModelIndex
243
244 # List of all indexed mentities.
245 #
246 # Faster than traversing the tries.
247 var mentities = new Array[MEntity]
248
249 # Prefix tree for mentities `name`
250 #
251 # Because multiple mentities can share the same `name`, we use a Trie of
252 # arrays of mentities.
253 #
254 # As for now, we do not index class and property definitions.
255 # TODO add an option.
256 var name_prefixes = new Trie[Array[MEntity]]
257
258 # Prefix tree for mentities `full_name`
259 #
260 # Even if two mentities cannot share the same `full_name`, we use a Trie of
261 # arrays of mentities to be consistent with `name_prefixes`.
262 var full_name_prefixes = new Trie[Array[MEntity]]
263
264 # Index `mentity` by it's `MEntity::name`
265 #
266 # See `name_prefixes`.
267 private fun index_by_name(mentity: MEntity) do
268 var name = mentity.name
269 if not name_prefixes.has_key(name) then
270 name_prefixes[name] = new Array[MEntity]
271 end
272 name_prefixes[name].add mentity
273 end
274
275 # Index `mentity` by its `MEntity::full_name`
276 private fun index_by_full_name(mentity: MEntity) do
277 var name = mentity.full_name
278 if not full_name_prefixes.has_key(name) then
279 full_name_prefixes[name] = new Array[MEntity]
280 end
281 full_name_prefixes[name].add mentity
282 end
283
284 # Index `mentity` so it can be retrieved by a find query
285 #
286 # MEntities are indexed by both name and full_name.
287 fun index(mentity: MEntity) do
288 mentities.add mentity
289 index_by_name mentity
290 index_by_full_name mentity
291 end
292
293 # Translate Trie results to `SearchResult`
294 #
295 # This method is used internally to translate each mentity returned by a prefix
296 # match in a Trie into a SearchResult that can be ranked by score.
297 #
298 # Results from the Trie are returned in a breadth first manner so we get the
299 # matches ordered by prefix.
300 # We preserve that order by giving an incremental score to the `array` items.
301 private fun score_results_incremental(array: Array[Array[MEntity]]): IndexMatches do
302 var results = new IndexMatches
303 var score = 1
304 for mentities in array do
305 for mentity in mentities do
306 results.add new IndexMatch(mentity, score)
307 end
308 score += 1
309 end
310 return results
311 end
312
313 # Find all mentities where `MEntity::name` matches the `prefix`
314 fun find_by_name_prefix(prefix: String): IndexMatches do
315 return score_results_incremental(name_prefixes.find_by_prefix(prefix))
316 end
317
318 # Find all mentities where `MEntity::full_name` matches the `prefix`
319 fun find_by_full_name_prefix(prefix: String): IndexMatches do
320 return score_results_incremental(full_name_prefixes.find_by_prefix(prefix))
321 end
322
323 # Rank all mentities by the distance between `MEntity::name` and `name`
324 #
325 # Use the Levenshtein algorithm on all the indexed mentities `name`.
326 # Warning: may not scale to large indexes.
327 fun find_by_name_similarity(name: String): IndexMatches do
328 var results = new IndexMatches
329 for mentity in mentities do
330 if mentity isa MClassDef or mentity isa MPropDef then continue
331 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
332 end
333 return results
334 end
335
336 # Rank all mentities by the distance between `MEntity::full_name` and `full_name`
337 #
338 # Use the Levenshtein algorithm on all the indexed mentities `full_name`.
339 # Warning: may not scale to large indexes.
340 fun find_by_full_name_similarity(name: String): IndexMatches do
341 var results = new IndexMatches
342 for mentity in mentities do
343 if mentity isa MClassDef or mentity isa MPropDef then continue
344 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
345 end
346 return results
347 end
348
349 # Rank all mentities by the distance between `name` and both the mentity name and full name
350 fun find_by_similarity(name: String): IndexMatches do
351 var results = new IndexMatches
352 for mentity in mentities do
353 if mentity isa MClassDef or mentity isa MPropDef then continue
354 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
355 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
356 end
357 return results
358 end
359
360 # Find mentities by name trying first by prefix then by similarity
361 fun find_by_name(name: String): IndexMatches do
362 var results = find_by_name_prefix(name)
363 for mentity in mentities do
364 if mentity isa MClassDef or mentity isa MPropDef then continue
365 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
366 end
367 return results
368 end
369
370 # Find mentities by full name trying firt by prefix then by similarity
371 fun find_by_full_name(name: String): IndexMatches do
372 var results = find_by_full_name_prefix(name)
373 for mentity in mentities do
374 if mentity isa MClassDef or mentity isa MPropDef then continue
375 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
376 end
377 return results
378 end
379
380 # Find all mentities that matches `name`
381 #
382 # 1. try by name prefix
383 # 2. add full name prefix matches
384 # 3. try similarity by name
385 # 4. try similarity by full_name
386 fun find(name: String): IndexMatches do
387 var results = find_by_name_prefix(name)
388
389 for result in find_by_full_name_prefix(name) do
390 results.add result
391 end
392
393 for mentity in mentities do
394 if mentity isa MClassDef or mentity isa MPropDef then continue
395 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
396 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
397 end
398 return results
399 end
400 end
401
402 # An array of IndexMatch instances returned by the ModelIndex
403 #
404 # Index matches can be sorted, filtered and truncated.
405 #
406 # Thanks to the fluent interface, the index matches can be manipulated in chain
407 # from a model index query:
408 #
409 # ~~~nitish
410 # var res = index.find("Foo").
411 # uniq.
412 # sort(new ScoreComparator, new MEntityComparator).
413 # limit(10).
414 # sort(new VisibilityComparator)
415 # ~~~
416 class IndexMatches
417 super Array[IndexMatch]
418
419 # Create a new ModelMatches from an array of matches
420 #
421 # Elements are copied.
422 init from_matches(matches: Array[IndexMatch]) do self.add_all matches
423
424 # Sort the matches with `comparator` (or a list of comparators)
425 #
426 # Return a new IndexMatches instance with the sorted results.
427 #
428 # When more than one comparator is given, the comparators are applied in a
429 # pipeline where the `n`th comparator is applied only if the `n-1`th comparator
430 # returned 0.
431 fun sort(comparator: ScoreComparator...): IndexMatches do
432 var res = to_a
433 if comparator.length == 1 then
434 comparator.first.sort res
435 else
436 var comparators = new MatchComparators(comparator)
437 comparators.sort res
438 end
439 return new IndexMatches.from_matches(res)
440 end
441
442 # Limit the matches with `limit`
443 #
444 # Return a new IndexMatches instance with only the `limit` first matches.
445 fun limit(limit: Int): IndexMatches do
446 var res = new Array[IndexMatch]
447 for match in self do
448 if res.length >= limit then break
449 res.add match
450 end
451 return new IndexMatches.from_matches(res)
452 end
453
454 # Remove doublons from the matches
455 #
456 # Preverse the lowest score of all the matches for a MEntity.
457 fun uniq: IndexMatches do
458 var scores = new HashMap[MEntity, IndexMatch]
459 var res = new Array[IndexMatch]
460 for match in self do
461 var mentity = match.mentity
462 if scores.has_key(mentity) then
463 var older = scores[mentity]
464 if match.score < older.score then older.score = match.score
465 else
466 scores[mentity] = match
467 res.add match
468 end
469 end
470 return new IndexMatches.from_matches(res)
471 end
472
473 # Reset score of each matches to follow `self` order
474 #
475 # Usefull when you need to apply one sorter over another.
476 fun rerank: IndexMatches do
477 var res = new IndexMatches
478 for match in self do
479 res.add match
480 match.score = res.length
481 end
482 return res
483 end
484
485 # Aggregate the mentities for all the matches
486 #
487 # Preserve the match order.
488 fun mentities: Array[MEntity] do
489 var res = new Array[MEntity]
490 for match in self do res.add match.mentity
491 return res
492 end
493 end
494
495 # An MEntity matched from a ModelIndex
496 #
497 # Each match has a `score`. The score should be seen as the distance of
498 # the match from the query. In other words, the lowest is the score, the more
499 # relevant is the match.
500 class IndexMatch
501 super Comparable
502
503 redef type OTHER: IndexMatch
504
505 # MEntity matches
506 var mentity: MEntity
507
508 # Score allocated by the search method
509 #
510 # A lowest score means a more relevant match.
511 #
512 # Scores values are arbitrary, the meaning of `10` vs `2000` really depends
513 # on the search method producing the match and the comparators used to sort
514 # the matches.
515 # The only universal rule is: low score = relevance.
516 var score: Int is writable
517
518 # By default matches are compared only on their score
519 redef fun <=>(o) do return score <=> o.score
520
521 redef fun to_s do return "{mentity} ({score})"
522 end
523
524 # Compare two matches by their score
525 #
526 # Since the score can be seen as a distance, we want the lowest score first.
527 class ScoreComparator
528 super Comparator
529
530 redef type COMPARED: IndexMatch
531
532 redef fun compare(o1, o2) do return o1.score <=> o2.score
533 end
534
535 # A pipeline of comparators executed in inclusion order
536 #
537 # This class is used internally to agregate the behaviors of multiple comparators.
538 # Use `IndexMatches::sort(comparator...)` instead.
539 private class MatchComparators
540 super ScoreComparator
541
542 # Comparator to use in the array order
543 var comparators: Array[ScoreComparator]
544
545 # Compare with each comparator
546 #
547 # Return the compare value of the first one to return anything else than 0.
548 redef fun compare(o1, o2) do
549 for comparator in comparators do
550 var c = comparator.compare(o1, o2)
551 if c != 0 then return c
552 end
553 return 0
554 end
555 end
556
557 # Compare two matches by their MEntity kind
558 #
559 # Usefull to order the mentities by kind in this order:
560 # packages, groups, modules and classes, properties.
561 class MEntityComparator
562 super ScoreComparator
563
564 # See `MEntity::compare_mentity`
565 redef fun compare(o1, o2) do
566 return o1.mentity.mentity_kind_rank <=> o2.mentity.mentity_kind_rank
567 end
568 end
569
570 # Compare two matches by their MEntity visibility
571 #
572 # We reverse the original order so private is at the end of the list.
573 class VisibilityComparator
574 super ScoreComparator
575
576 redef fun compare(o1, o2) do return o2.mentity.visibility <=> o1.mentity.visibility
577 end
578
579 # Compare two matches by their name in lexicographic order
580 #
581 # Generally, for a same score, we want to put A before Z.
582 class NameComparator
583 super ScoreComparator
584
585 redef fun compare(o1, o2) do return o1.mentity.name <=> o2.mentity.name
586 end
587
588 # Compare two matches by their name length
589 class NameLengthComparator
590 super ScoreComparator
591
592 redef fun compare(o1, o2) do return o1.mentity.name.length <=> o2.mentity.name.length
593 end
594
595 # Compare two matches by their full_name in lexicographic order
596 #
597 # Generally, for a same score, we want to put A before Z.
598 class FullNameComparator
599 super ScoreComparator
600
601 redef fun compare(o1, o2) do return o1.mentity.full_name <=> o2.mentity.full_name
602 end
603
604 # Compare two matches by their full name length
605 class FullNameLengthComparator
606 super ScoreComparator
607
608 redef fun compare(o1, o2) do return o1.mentity.full_name.length <=> o2.mentity.full_name.length
609 end
610
611 redef class MEntity
612
613 # Compare MEntity class kind
614 #
615 # Unknown kind have a virtually high score.
616 private fun mentity_kind_rank: Int do return 10
617 end
618
619 redef class MPackage
620 redef fun mentity_kind_rank do return 1
621 end
622
623 redef class MGroup
624 redef fun mentity_kind_rank do return 2
625 end
626
627 redef class MModule
628 redef fun mentity_kind_rank do return 3
629 end
630
631 redef class MClass
632 redef fun mentity_kind_rank do return 4
633 end
634
635 redef class MClassDef
636 redef fun mentity_kind_rank do return 5
637 end
638
639 redef class MProperty
640 redef fun mentity_kind_rank do return 6
641 end
642
643 redef class MPropDef
644 redef fun mentity_kind_rank do return 7
645 end