model/model_contract: Move contract model representation
[nit.git] / src / model / model_index.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Search things from the Model
16 #
17 # ModelIndex allows you to index mentities then retrieve them by their `name`
18 # or `full_name`.
19 # It offers a set of `find_*` methods that can be used to match queries
20 # against entities name.
21 #
22 # Because each use is different, ModelIndex only provide base raw search services.
23 # All of them return IndexMatches that can be ordered and filtered by the client.
24 #
25 # ## Indexing mentities
26 #
27 # Before searching something from the ModelIndex, you have to index it.
28 # Use the `ModelIndex::index` method to do that:
29 #
30 # ~~~nitish
31 # var index = new ModelIndex
32 # for mentity in model.collect_mentities do
33 # index.index(mentity)
34 # end
35 # ~~~
36 #
37 # ## Search mentities
38 #
39 # You can then run queries on the ModelIndex:
40 #
41 # ~~~nitish
42 # for res in index.find("Array").limit(10) do
43 # print res
44 # end
45 # ~~~
46 #
47 # ## Examples
48 #
49 # Here some examples of how you can use the ModelIndex.
50 #
51 # ### Smart type prediction
52 #
53 # Use ModelIndex to implement a smart prediction system based on the typed prefix:
54 #
55 # ~~~nitish
56 # var index = new ModelIndex
57 # for mentity in model.collect_mentities do
58 # # We don't really care about definitions
59 # if mentity isa MClassDef or mentity isa MPropDef then continue
60 # index.index(mentity)
61 # end
62 #
63 # var typed_prefix = "Arr"
64 # for res in index.find_by_name_prefix(typed_prefix).
65 # uniq. # keep only the best ranked mentity
66 # limit(5). # limit to ten results
67 # sort(new VisibilityComparator, new NameComparator) do # order by visibility then name
68 # print res
69 # end
70 # ~~~
71 #
72 # Will output something like:
73 #
74 # ~~~raw
75 # Array (1)
76 # ArraySet (2)
77 # ArrayMap (3)
78 # ArrayCmp (4)
79 # ArrayMapKeys (5)
80 # ~~~
81 #
82 # ### Method autocompletion
83 #
84 # Find methods from a class full_name:
85 #
86 # ~~~nitish
87 # var class_full_name = "core::Array"
88 # for res in index.find_by_full_name_prefix("{class_full_name}::").
89 # uniq. # keep only the best ranked mentity
90 # sort(new VisibilityComparator). # put private in the bottom of the list
91 # limit(5). # limit to ten results
92 # sort(new FullNameComparator) do # order by lexicographic order
93 # print res
94 # end
95 # ~~~
96 #
97 # Will output something like:
98 #
99 # ~~~raw
100 # * (2)
101 # + (1)
102 # filled_with (5)
103 # from (3)
104 # with_items (4)
105 # ~~~
106 #
107 # ### Name typo detection and suggestion
108 #
109 # Detect unknown names and suggest corrections:
110 #
111 # ~~~nitish
112 # var name = "Zrray"
113 #
114 # if index.find_by_name_prefix(name).is_empty then
115 # printn "`{name}` not found, did you mean: "
116 # printn index.find_by_name_similarity(name).sort(new ScoreComparator).limit(2).join(" or ")
117 # print "?"
118 # end
119 # ~~~
120 #
121 # Will output something like:
122 #
123 # ~~~raw
124 # `Zrray` not found, did you mean: Array (1) or array (1)?
125 # ~~~
126 module model_index
127
128 import model::model_collect
129
130 import trees::trie
131 import trees::bktree
132
133 redef class Model
134
135 # Keep a direct link to mentities by full name to speed up `mentity_from_uri`
136 var mentities_by_full_name: HashMap[String, MEntity] is lazy do
137 var mentities_by_full_name = new HashMap[String, MEntity]
138 for mentity in collect_mentities do
139 mentities_by_full_name[mentity.full_name] = mentity
140 end
141 return mentities_by_full_name
142 end
143
144 # ModelIndex used to perform searches
145 var index: ModelIndex is lazy, writable do
146 var index = new ModelIndex
147 for mentity in collect_mentities do
148 if mentity isa MClassDef or mentity isa MPropDef then continue
149 index.index mentity
150 end
151 return index
152 end
153
154 redef fun mentities_by_name(name, filter) do
155 var res = new Array[MEntity]
156 if not index.names.has_key(name) then return res
157 for mentity in index.names[name] do
158 if filter == null or filter.accept_mentity(mentity) then res.add mentity
159 end
160 return res
161 end
162
163 redef fun mentity_by_full_name(full_name, filter) do
164 if mentities_by_full_name.has_key(full_name) then
165 var mentity = mentities_by_full_name[full_name]
166 if filter == null or filter.accept_mentity(mentity) then return mentity
167 end
168 return null
169 end
170
171 private var score_sorter = new ScoreComparator
172 private var vis_sorter = new VisibilityComparator
173 private var kind_sorter = new MEntityComparator
174 private var name_sorter = new NameComparator
175 private var lname_sorter = new NameLengthComparator
176 private var fname_sorter = new FullNameComparator
177 private var lfname_sorter = new FullNameLengthComparator
178
179 # Search mentities based on a `query` string
180 #
181 # Lookup the index for anything matching `query` and return `limit` results.
182 #
183 # The algorithm used is the following:
184 # 1- lookup by name prefix
185 # 2- lookup by full_name prefix
186 # 3- loopup by levenshtein distance
187 #
188 # At each step if the `limit` is reached, the algorithm stops and returns the results.
189 fun find(query: String, limit: nullable Int, filter: nullable ModelFilter): Array[MEntity] do
190 # Find, lookup by name prefix
191 var matches = index.find_by_name_prefix(query, filter).uniq.
192 sort(lname_sorter, name_sorter, kind_sorter)
193 if limit != null and matches.length >= limit then
194 return matches.limit(limit).rerank.sort(vis_sorter, score_sorter).mentities
195 end
196 matches = matches.rerank.sort(vis_sorter, score_sorter)
197
198 # If limit not reached, lookup by full_name prefix
199 var malus = matches.length
200 var full_matches = new IndexMatches
201 for match in index.find_by_full_name_prefix(query, filter).
202 sort(kind_sorter, lfname_sorter, fname_sorter) do
203 match.score += malus
204 full_matches.add match
205 end
206 matches = matches.uniq
207 if limit != null and matches.length + full_matches.length >= limit then
208 matches.add_all full_matches
209 matches = matches.uniq.limit(limit).rerank.sort(vis_sorter, score_sorter)
210 return matches.mentities
211 end
212
213 # If limit not reached, lookup by similarity
214 malus = matches.length
215 var sim_matches = new IndexMatches
216 for match in index.find_by_similarity(query, filter).sort(score_sorter, kind_sorter, lname_sorter, name_sorter) do
217 match.score += malus
218 sim_matches.add match
219 end
220 matches.add_all sim_matches
221 matches = matches.uniq
222 if limit != null then matches = matches.limit(limit)
223 return matches.rerank.sort(vis_sorter, score_sorter).mentities
224 end
225 end
226
227 # ModelIndex indexes mentities by their name and full name
228 #
229 # It provides methods to find mentities based on a prefix or string similarity.
230 #
231 # ~~~nitish
232 # # Build index
233 # var index = new ModelIndex
234 # for mentity in model.collect_mentities do
235 # if mentity isa MClassDef or mentity isa MPropDef then continue
236 # index.index(mentity)
237 # end
238 #
239 # for e in index.find("Foo").uniq.sort(new ScoreComparator).limit(10) do
240 # print " * {e.score}: {e.mentity.name} ({e.mentity.full_name})"
241 # end
242 # ~~~
243 class ModelIndex
244
245 # List of all indexed mentities.
246 #
247 # Faster than traversing the tries.
248 var mentities = new Array[MEntity]
249
250 # Map of all mentities indexed by their `name`
251 var names = new HashMap[String, Array[MEntity]]
252
253 # Prefix tree for mentities `name`
254 #
255 # Because multiple mentities can share the same `name`, we use a Trie of
256 # arrays of mentities.
257 #
258 # As for now, we do not index class and property definitions.
259 # TODO add an option.
260 var name_prefixes = new Trie[Array[MEntity]]
261
262 # Distance tree for mentities `name`
263 var name_distances = new BKTree
264
265 # Map of all mentities indexed by their `full_name`
266 var full_names = new HashMap[String, MEntity]
267
268 # Prefix tree for mentities `full_name`
269 #
270 # Even if two mentities cannot share the same `full_name`, we use a Trie of
271 # arrays of mentities to be consistent with `name_prefixes`.
272 var full_name_prefixes = new Trie[Array[MEntity]]
273
274 # Distance tree for mentities `full_name`
275 var full_name_distances = new BKTree
276
277 # Index `mentity` by it's `MEntity::name`
278 #
279 # See `name_prefixes`.
280 private fun index_by_name(mentity: MEntity) do
281 var name = mentity.name
282
283 # Index name
284 if not names.has_key(name) then
285 names[name] = new Array[MEntity]
286 end
287 names[name].add mentity
288
289 # Index prefix
290 if not name_prefixes.has_key(name) then
291 name_prefixes[name] = new Array[MEntity]
292 end
293 name_prefixes[name].add mentity
294
295 # Index distance
296 name_distances.add(name)
297 end
298
299 # Index `mentity` by its `MEntity::full_name`
300 private fun index_by_full_name(mentity: MEntity) do
301 var name = mentity.full_name
302
303 # Index full name
304 full_names[name] = mentity
305
306 # Index prefix
307 if not full_name_prefixes.has_key(name) then
308 full_name_prefixes[name] = new Array[MEntity]
309 end
310 full_name_prefixes[name].add mentity
311
312 # Index distance
313 full_name_distances.add(name)
314 end
315
316 # Index `mentity` so it can be retrieved by a find query
317 #
318 # MEntities are indexed by both name and full_name.
319 fun index(mentity: MEntity) do
320 mentities.add mentity
321 index_by_name mentity
322 index_by_full_name mentity
323 end
324
325 # Translate Trie results to `SearchResult`
326 #
327 # This method is used internally to translate each mentity returned by a prefix
328 # match in a Trie into a SearchResult that can be ranked by score.
329 #
330 # Results from the Trie are returned in a breadth first manner so we get the
331 # matches ordered by prefix.
332 # We preserve that order by giving an incremental score to the `array` items.
333 private fun score_results_incremental(array: Array[Array[MEntity]], filter: nullable ModelFilter): IndexMatches do
334 var results = new IndexMatches
335 var score = 1
336 for mentities in array do
337 for mentity in mentities do
338 if filter != null and not filter.accept_mentity(mentity) then continue
339 results.add new IndexMatch(mentity, score)
340 end
341 score += 1
342 end
343 return results
344 end
345
346 # Find all mentities where `MEntity::name` matches the `prefix`
347 fun find_by_name_prefix(prefix: String, filter: nullable ModelFilter): IndexMatches do
348 return score_results_incremental(name_prefixes.find_by_prefix(prefix), filter)
349 end
350
351 # Find all mentities where `MEntity::full_name` matches the `prefix`
352 fun find_by_full_name_prefix(prefix: String, filter: nullable ModelFilter): IndexMatches do
353 return score_results_incremental(full_name_prefixes.find_by_prefix(prefix), filter)
354 end
355
356 # Rank all mentities by the distance between `MEntity::name` and `name`
357 #
358 # Use the Levenshtein algorithm on all the indexed mentities `name`.
359 # Warning: may not scale to large indexes.
360 fun find_by_name_similarity(name: String, filter: nullable ModelFilter): IndexMatches do
361 var results = new IndexMatches
362 for match in name_distances.search(name) do
363 var dist = match.distance
364 var mname = match.key
365 if not names.has_key(mname) then continue
366 for mentity in names[mname] do
367 if mentity isa MClassDef or mentity isa MPropDef then continue
368 if filter != null and not filter.accept_mentity(mentity) then continue
369 results.add new IndexMatch(mentity, dist)
370 end
371 end
372 return results
373 end
374
375 # Rank all mentities by the distance between `MEntity::full_name` and `full_name`
376 #
377 # Use the Levenshtein algorithm on all the indexed mentities `full_name`.
378 # Warning: may not scale to large indexes.
379 fun find_by_full_name_similarity(name: String, filter: nullable ModelFilter): IndexMatches do
380 var results = new IndexMatches
381 for match in full_name_distances.search(name) do
382 var dist = match.distance
383 var mname = match.key
384 if not full_names.has_key(mname) then continue
385 var mentity = full_names[mname]
386 if mentity isa MClassDef or mentity isa MPropDef then continue
387 if filter != null and not filter.accept_mentity(mentity) then continue
388 results.add new IndexMatch(mentity, dist)
389 end
390 return results
391 end
392
393 # Rank all mentities by the distance between `name` and both the mentity name and full name
394 fun find_by_similarity(name: String, filter: nullable ModelFilter): IndexMatches do
395 var results = new IndexMatches
396 results.add_all find_by_full_name_similarity(name, filter)
397 results.add_all find_by_name_similarity(name, filter)
398 return results
399 end
400
401 # Find mentities by name trying first by prefix then by similarity
402 fun find_by_name(name: String, filter: nullable ModelFilter): IndexMatches do
403 var results = find_by_name_prefix(name, filter)
404 results.add_all find_by_name_similarity(name, filter)
405 return results
406 end
407
408 # Find mentities by full name trying firt by prefix then by similarity
409 fun find_by_full_name(name: String, filter: nullable ModelFilter): IndexMatches do
410 var results = find_by_full_name_prefix(name)
411 results.add_all find_by_full_name_similarity(name, filter)
412 return results
413 end
414
415 # Find all mentities that matches `name`
416 #
417 # 1. try by name prefix
418 # 2. add full name prefix matches
419 # 3. try similarity by name
420 # 4. try similarity by full_name
421 fun find(name: String, filter: nullable ModelFilter): IndexMatches do
422 var results = find_by_name_prefix(name, filter)
423 results.add_all find_by_full_name_prefix(name, filter)
424 results.add_all find_by_similarity(name, filter)
425 return results
426 end
427 end
428
429 # An array of IndexMatch instances returned by the ModelIndex
430 #
431 # Index matches can be sorted, filtered and truncated.
432 #
433 # Thanks to the fluent interface, the index matches can be manipulated in chain
434 # from a model index query:
435 #
436 # ~~~nitish
437 # var res = index.find("Foo").
438 # uniq.
439 # sort(new ScoreComparator, new MEntityComparator).
440 # limit(10).
441 # sort(new VisibilityComparator)
442 # ~~~
443 class IndexMatches
444 super Array[IndexMatch]
445
446 # Create a new ModelMatches from an array of matches
447 #
448 # Elements are copied.
449 init from_matches(matches: Array[IndexMatch]) do self.add_all matches
450
451 # Sort the matches with `comparator` (or a list of comparators)
452 #
453 # Return a new IndexMatches instance with the sorted results.
454 #
455 # When more than one comparator is given, the comparators are applied in a
456 # pipeline where the `n`th comparator is applied only if the `n-1`th comparator
457 # returned 0.
458 fun sort(comparator: ScoreComparator...): IndexMatches do
459 var res = to_a
460 if comparator.length == 1 then
461 comparator.first.sort res
462 else
463 var comparators = new MatchComparators(comparator)
464 comparators.sort res
465 end
466 return new IndexMatches.from_matches(res)
467 end
468
469 # Limit the matches with `limit`
470 #
471 # Return a new IndexMatches instance with only the `limit` first matches.
472 fun limit(limit: Int): IndexMatches do
473 var res = new Array[IndexMatch]
474 for match in self do
475 if res.length >= limit then break
476 res.add match
477 end
478 return new IndexMatches.from_matches(res)
479 end
480
481 # Remove doublons from the matches
482 #
483 # Preverse the lowest score of all the matches for a MEntity.
484 fun uniq: IndexMatches do
485 var scores = new HashMap[MEntity, IndexMatch]
486 var res = new Array[IndexMatch]
487 for match in self do
488 var mentity = match.mentity
489 if scores.has_key(mentity) then
490 var older = scores[mentity]
491 if match.score < older.score then older.score = match.score
492 else
493 scores[mentity] = match
494 res.add match
495 end
496 end
497 return new IndexMatches.from_matches(res)
498 end
499
500 # Reset score of each matches to follow `self` order
501 #
502 # Usefull when you need to apply one sorter over another.
503 fun rerank: IndexMatches do
504 var res = new IndexMatches
505 for match in self do
506 res.add match
507 match.score = res.length
508 end
509 return res
510 end
511
512 # Aggregate the mentities for all the matches
513 #
514 # Preserve the match order.
515 fun mentities: Array[MEntity] do
516 var res = new Array[MEntity]
517 for match in self do res.add match.mentity
518 return res
519 end
520 end
521
522 # An MEntity matched from a ModelIndex
523 #
524 # Each match has a `score`. The score should be seen as the distance of
525 # the match from the query. In other words, the lowest is the score, the more
526 # relevant is the match.
527 class IndexMatch
528 super Comparable
529
530 redef type OTHER: IndexMatch
531
532 # MEntity matches
533 var mentity: MEntity
534
535 # Score allocated by the search method
536 #
537 # A lowest score means a more relevant match.
538 #
539 # Scores values are arbitrary, the meaning of `10` vs `2000` really depends
540 # on the search method producing the match and the comparators used to sort
541 # the matches.
542 # The only universal rule is: low score = relevance.
543 var score: Int is writable
544
545 # By default matches are compared only on their score
546 redef fun <=>(o) do return score <=> o.score
547
548 redef fun to_s do return "{mentity} ({score})"
549 end
550
551 # Compare two matches by their score
552 #
553 # Since the score can be seen as a distance, we want the lowest score first.
554 class ScoreComparator
555 super Comparator
556
557 redef type COMPARED: IndexMatch
558
559 redef fun compare(o1, o2) do return o1.score <=> o2.score
560 end
561
562 # A pipeline of comparators executed in inclusion order
563 #
564 # This class is used internally to agregate the behaviors of multiple comparators.
565 # Use `IndexMatches::sort(comparator...)` instead.
566 private class MatchComparators
567 super ScoreComparator
568
569 # Comparator to use in the array order
570 var comparators: Array[ScoreComparator]
571
572 # Compare with each comparator
573 #
574 # Return the compare value of the first one to return anything else than 0.
575 redef fun compare(o1, o2) do
576 for comparator in comparators do
577 var c = comparator.compare(o1, o2)
578 if c != 0 then return c
579 end
580 return 0
581 end
582 end
583
584 # Compare two matches by their MEntity kind
585 #
586 # Usefull to order the mentities by kind in this order:
587 # packages, groups, modules and classes, properties.
588 class MEntityComparator
589 super ScoreComparator
590
591 # See `MEntity::compare_mentity`
592 redef fun compare(o1, o2) do
593 return o1.mentity.mentity_kind_rank <=> o2.mentity.mentity_kind_rank
594 end
595 end
596
597 # Compare two matches by their MEntity visibility
598 #
599 # We reverse the original order so private is at the end of the list.
600 class VisibilityComparator
601 super ScoreComparator
602
603 redef fun compare(o1, o2) do return o2.mentity.visibility <=> o1.mentity.visibility
604 end
605
606 # Compare two matches by their name in lexicographic order
607 #
608 # Generally, for a same score, we want to put A before Z.
609 class NameComparator
610 super ScoreComparator
611
612 redef fun compare(o1, o2) do return o1.mentity.name <=> o2.mentity.name
613 end
614
615 # Compare two matches by their name length
616 class NameLengthComparator
617 super ScoreComparator
618
619 redef fun compare(o1, o2) do return o1.mentity.name.length <=> o2.mentity.name.length
620 end
621
622 # Compare two matches by their full_name in lexicographic order
623 #
624 # Generally, for a same score, we want to put A before Z.
625 class FullNameComparator
626 super ScoreComparator
627
628 redef fun compare(o1, o2) do return o1.mentity.full_name <=> o2.mentity.full_name
629 end
630
631 # Compare two matches by their full name length
632 class FullNameLengthComparator
633 super ScoreComparator
634
635 redef fun compare(o1, o2) do return o1.mentity.full_name.length <=> o2.mentity.full_name.length
636 end
637
638 redef class MEntity
639
640 # Compare MEntity class kind
641 #
642 # Unknown kind have a virtually high score.
643 private fun mentity_kind_rank: Int do return 10
644 end
645
646 redef class MPackage
647 redef fun mentity_kind_rank do return 1
648 end
649
650 redef class MGroup
651 redef fun mentity_kind_rank do return 2
652 end
653
654 redef class MModule
655 redef fun mentity_kind_rank do return 3
656 end
657
658 redef class MClass
659 redef fun mentity_kind_rank do return 4
660 end
661
662 redef class MClassDef
663 redef fun mentity_kind_rank do return 5
664 end
665
666 redef class MProperty
667 redef fun mentity_kind_rank do return 6
668 end
669
670 redef class MPropDef
671 redef fun mentity_kind_rank do return 7
672 end