perf analysis: customize float precision in reports
[nit.git] / src / model / model_index.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Search things from the Model
16 #
17 # ModelIndex allows you to index mentities then retrieve them by their `name`
18 # or `full_name`.
19 # It offers a set of `find_*` methods that can be used to match queries
20 # against entities name.
21 #
22 # Because each use is different, ModelIndex only provide base raw search services.
23 # All of them return IndexMatches that can be ordered and filtered by the client.
24 #
25 # ## Indexing mentities
26 #
27 # Before searching something from the ModelIndex, you have to index it.
28 # Use the `ModelIndex::index` method to do that:
29 #
30 # ~~~nitish
31 # var index = new ModelIndex
32 #
33 # for mentity in model.private_view.mentities do
34 # index.index(mentity)
35 # end
36 # ~~~
37 #
38 # ## Search mentities
39 #
40 # You can then run queries on the ModelIndex:
41 #
42 # ~~~nitish
43 # for res in index.find("Array").limit(10) do
44 # print res
45 # end
46 # ~~~
47 #
48 # ## Examples
49 #
50 # Here some examples of how you can use the ModelIndex.
51 #
52 # ### Smart type prediction
53 #
54 # Use ModelIndex to implement a smart prediction system based on the typed prefix:
55 #
56 # ~~~nitish
57 # var index = new ModelIndex
58 #
59 # for mentity in model.private_view.mentities do
60 # # We don't really care about definitions
61 # if mentity isa MClassDef or mentity isa MPropDef then continue
62 # index.index(mentity)
63 # end
64 #
65 # var typed_prefix = "Arr"
66 # for res in index.find_by_name_prefix(typed_prefix).
67 # uniq. # keep only the best ranked mentity
68 # limit(5). # limit to ten results
69 # sort(new VisibilityComparator, new NameComparator) do # order by visibility then name
70 # print res
71 # end
72 # ~~~
73 #
74 # Will output something like:
75 #
76 # ~~~raw
77 # Array (1)
78 # ArraySet (2)
79 # ArrayMap (3)
80 # ArrayCmp (4)
81 # ArrayMapKeys (5)
82 # ~~~
83 #
84 # ### Method autocompletion
85 #
86 # Find methods from a class full_name:
87 #
88 # ~~~nitish
89 # var class_full_name = "core::Array"
90 # for res in index.find_by_full_name_prefix("{class_full_name}::").
91 # uniq. # keep only the best ranked mentity
92 # sort(new VisibilityComparator). # put private in the bottom of the list
93 # limit(5). # limit to ten results
94 # sort(new FullNameComparator) do # order by lexicographic order
95 # print res
96 # end
97 # ~~~
98 #
99 # Will output something like:
100 #
101 # ~~~raw
102 # * (2)
103 # + (1)
104 # filled_with (5)
105 # from (3)
106 # with_items (4)
107 # ~~~
108 #
109 # ### Name typo detection and suggestion
110 #
111 # Detect unknown names and suggest corrections:
112 #
113 # ~~~nitish
114 # var name = "Zrray"
115 #
116 # if index.find_by_name_prefix(name).is_empty then
117 # printn "`{name}` not found, did you mean: "
118 # printn index.find_by_name_similarity(name).sort(new ScoreComparator).limit(2).join(" or ")
119 # print "?"
120 # end
121 # ~~~
122 #
123 # Will output something like:
124 #
125 # ~~~raw
126 # `Zrray` not found, did you mean: Array (1) or array (1)?
127 # ~~~
128 module model_index
129
130 import model::model_views
131 import trees::trie
132
133 redef class ModelView
134
135 # Keep a direct link to mentities by full name to speed up `mentity_from_uri`
136 var mentities_by_full_name: HashMap[String, MEntity] is lazy do
137 var mentities_by_full_name = new HashMap[String, MEntity]
138 for mentity in model.private_view.mentities do
139 mentities_by_full_name[mentity.full_name] = mentity
140 end
141 return mentities_by_full_name
142 end
143
144 # ModelIndex used to perform searches
145 var index: ModelIndex is lazy do
146 var index = new ModelIndex
147 for mentity in model.private_view.mentities do
148 if mentity isa MClassDef or mentity isa MPropDef then continue
149 index.index mentity
150 end
151 return index
152 end
153
154 # Find mentities by their `name`
155 fun mentities_by_name(name: String): Array[MEntity] do
156 if index.name_prefixes.has_key(name) then
157 return index.name_prefixes[name]
158 end
159 return new Array[MEntity]
160 end
161
162 redef fun mentity_by_full_name(full_name) do
163 if mentities_by_full_name.has_key(full_name) then
164 return mentities_by_full_name[full_name]
165 end
166 return null
167 end
168
169 private var score_sorter = new ScoreComparator
170 private var vis_sorter = new VisibilityComparator
171 private var kind_sorter = new MEntityComparator
172 private var name_sorter = new NameComparator
173 private var lname_sorter = new NameLengthComparator
174 private var fname_sorter = new FullNameComparator
175 private var lfname_sorter = new FullNameLengthComparator
176
177 # Search mentities based on a `query` string
178 #
179 # Lookup the view index for anything matching `query` and return `limit` results.
180 #
181 # The algorithm used is the following:
182 # 1- lookup by name prefix
183 # 2- lookup by full_name prefix
184 # 3- loopup by levenshtein distance
185 #
186 # At each step if the `limit` is reached, the algorithm stops and returns the results.
187 fun find(query: String, limit: nullable Int): Array[MEntity] do
188 # Find, lookup by name prefix
189 var matches = index.find_by_name_prefix(query).uniq.
190 sort(lname_sorter, name_sorter, kind_sorter)
191 if limit != null and matches.length >= limit then
192 return matches.limit(limit).rerank.sort(vis_sorter, score_sorter).mentities
193 end
194 matches = matches.rerank.sort(vis_sorter, score_sorter)
195
196 # If limit not reached, lookup by full_name prefix
197 var malus = matches.length
198 var full_matches = new IndexMatches
199 for match in index.find_by_full_name_prefix(query).
200 sort(kind_sorter, lfname_sorter, fname_sorter) do
201 match.score += malus
202 full_matches.add match
203 end
204 matches = matches.uniq
205 if limit != null and matches.length + full_matches.length >= limit then
206 matches.add_all full_matches
207 matches = matches.uniq.limit(limit).rerank.sort(vis_sorter, score_sorter)
208 return matches.mentities
209 end
210
211 # If limit not reached, lookup by similarity
212 malus = matches.length
213 var sim_matches = new IndexMatches
214 for match in index.find_by_similarity(query).sort(score_sorter, kind_sorter, lname_sorter, name_sorter) do
215 match.score += malus
216 sim_matches.add match
217 end
218 matches.add_all sim_matches
219 matches = matches.uniq
220 if limit != null then matches = matches.limit(limit)
221 return matches.rerank.sort(vis_sorter, score_sorter).mentities
222 end
223 end
224
225 # ModelIndex indexes mentities by their name and full name
226 #
227 # It provides methods to find mentities based on a prefix or string similarity.
228 #
229 # ~~~nitish
230 # # Build index
231 # var index = new ModelIndex
232 # for mentity in model.private_view.mentities do
233 # if mentity isa MClassDef or mentity isa MPropDef then continue
234 # index.index(mentity)
235 # end
236 #
237 # for e in index.find("Foo").uniq.sort(new ScoreComparator).limit(10) do
238 # print " * {e.score}: {e.mentity.name} ({e.mentity.full_name})"
239 # end
240 # ~~~
241 class ModelIndex
242
243 # List of all indexed mentities.
244 #
245 # Faster than traversing the tries.
246 var mentities = new Array[MEntity]
247
248 # Prefix tree for mentities `name`
249 #
250 # Because multiple mentities can share the same `name`, we use a Trie of
251 # arrays of mentities.
252 #
253 # As for now, we do not index class and property definitions.
254 # TODO add an option.
255 var name_prefixes = new Trie[Array[MEntity]]
256
257 # Prefix tree for mentities `full_name`
258 #
259 # Even if two mentities cannot share the same `full_name`, we use a Trie of
260 # arrays of mentities to be consistent with `name_prefixes`.
261 var full_name_prefixes = new Trie[Array[MEntity]]
262
263 # Index `mentity` by it's `MEntity::name`
264 #
265 # See `name_prefixes`.
266 private fun index_by_name(mentity: MEntity) do
267 var name = mentity.name
268 if not name_prefixes.has_key(name) then
269 name_prefixes[name] = new Array[MEntity]
270 end
271 name_prefixes[name].add mentity
272 end
273
274 # Index `mentity` by its `MEntity::full_name`
275 private fun index_by_full_name(mentity: MEntity) do
276 var name = mentity.full_name
277 if not full_name_prefixes.has_key(name) then
278 full_name_prefixes[name] = new Array[MEntity]
279 end
280 full_name_prefixes[name].add mentity
281 end
282
283 # Index `mentity` so it can be retrieved by a find query
284 #
285 # MEntities are indexed by both name and full_name.
286 fun index(mentity: MEntity) do
287 mentities.add mentity
288 index_by_name mentity
289 index_by_full_name mentity
290 end
291
292 # Translate Trie results to `SearchResult`
293 #
294 # This method is used internally to translate each mentity returned by a prefix
295 # match in a Trie into a SearchResult that can be ranked by score.
296 #
297 # Results from the Trie are returned in a breadth first manner so we get the
298 # matches ordered by prefix.
299 # We preserve that order by giving an incremental score to the `array` items.
300 private fun score_results_incremental(array: Array[Array[MEntity]]): IndexMatches do
301 var results = new IndexMatches
302 var score = 1
303 for mentities in array do
304 for mentity in mentities do
305 results.add new IndexMatch(mentity, score)
306 end
307 score += 1
308 end
309 return results
310 end
311
312 # Find all mentities where `MEntity::name` matches the `prefix`
313 fun find_by_name_prefix(prefix: String): IndexMatches do
314 return score_results_incremental(name_prefixes.find_by_prefix(prefix))
315 end
316
317 # Find all mentities where `MEntity::full_name` matches the `prefix`
318 fun find_by_full_name_prefix(prefix: String): IndexMatches do
319 return score_results_incremental(full_name_prefixes.find_by_prefix(prefix))
320 end
321
322 # Rank all mentities by the distance between `MEntity::name` and `name`
323 #
324 # Use the Levenshtein algorithm on all the indexed mentities `name`.
325 # Warning: may not scale to large indexes.
326 fun find_by_name_similarity(name: String): IndexMatches do
327 var results = new IndexMatches
328 for mentity in mentities do
329 if mentity isa MClassDef or mentity isa MPropDef then continue
330 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
331 end
332 return results
333 end
334
335 # Rank all mentities by the distance between `MEntity::full_name` and `full_name`
336 #
337 # Use the Levenshtein algorithm on all the indexed mentities `full_name`.
338 # Warning: may not scale to large indexes.
339 fun find_by_full_name_similarity(name: String): IndexMatches do
340 var results = new IndexMatches
341 for mentity in mentities do
342 if mentity isa MClassDef or mentity isa MPropDef then continue
343 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
344 end
345 return results
346 end
347
348 # Rank all mentities by the distance between `name` and both the mentity name and full name
349 fun find_by_similarity(name: String): IndexMatches do
350 var results = new IndexMatches
351 for mentity in mentities do
352 if mentity isa MClassDef or mentity isa MPropDef then continue
353 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
354 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
355 end
356 return results
357 end
358
359 # Find mentities by name trying first by prefix then by similarity
360 fun find_by_name(name: String): IndexMatches do
361 var results = find_by_name_prefix(name)
362 for mentity in mentities do
363 if mentity isa MClassDef or mentity isa MPropDef then continue
364 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
365 end
366 return results
367 end
368
369 # Find mentities by full name trying firt by prefix then by similarity
370 fun find_by_full_name(name: String): IndexMatches do
371 var results = find_by_full_name_prefix(name)
372 for mentity in mentities do
373 if mentity isa MClassDef or mentity isa MPropDef then continue
374 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
375 end
376 return results
377 end
378
379 # Find all mentities that matches `name`
380 #
381 # 1. try by name prefix
382 # 2. add full name prefix matches
383 # 3. try similarity by name
384 # 4. try similarity by full_name
385 fun find(name: String): IndexMatches do
386 var results = find_by_name_prefix(name)
387
388 for result in find_by_full_name_prefix(name) do
389 results.add result
390 end
391
392 for mentity in mentities do
393 if mentity isa MClassDef or mentity isa MPropDef then continue
394 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
395 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
396 end
397 return results
398 end
399 end
400
401 # An array of IndexMatch instances returned by the ModelIndex
402 #
403 # Index matches can be sorted, filtered and truncated.
404 #
405 # Thanks to the fluent interface, the index matches can be manipulated in chain
406 # from a model index query:
407 #
408 # ~~~nitish
409 # var res = index.find("Foo").
410 # uniq.
411 # sort(new ScoreComparator, new MEntityComparator).
412 # limit(10).
413 # sort(new VisibilityComparator)
414 # ~~~
415 class IndexMatches
416 super Array[IndexMatch]
417
418 # Create a new ModelMatches from an array of matches
419 #
420 # Elements are copied.
421 init from_matches(matches: Array[IndexMatch]) do self.add_all matches
422
423 # Sort the matches with `comparator` (or a list of comparators)
424 #
425 # Return a new IndexMatches instance with the sorted results.
426 #
427 # When more than one comparator is given, the comparators are applied in a
428 # pipeline where the `n`th comparator is applied only if the `n-1`th comparator
429 # returned 0.
430 fun sort(comparator: ScoreComparator...): IndexMatches do
431 var res = to_a
432 if comparator.length == 1 then
433 comparator.first.sort res
434 else
435 var comparators = new MatchComparators(comparator)
436 comparators.sort res
437 end
438 return new IndexMatches.from_matches(res)
439 end
440
441 # Limit the matches with `limit`
442 #
443 # Return a new IndexMatches instance with only the `limit` first matches.
444 fun limit(limit: Int): IndexMatches do
445 var res = new Array[IndexMatch]
446 for match in self do
447 if res.length >= limit then break
448 res.add match
449 end
450 return new IndexMatches.from_matches(res)
451 end
452
453 # Remove doublons from the matches
454 #
455 # Preverse the lowest score of all the matches for a MEntity.
456 fun uniq: IndexMatches do
457 var scores = new HashMap[MEntity, IndexMatch]
458 var res = new Array[IndexMatch]
459 for match in self do
460 var mentity = match.mentity
461 if scores.has_key(mentity) then
462 var older = scores[mentity]
463 if match.score < older.score then older.score = match.score
464 else
465 scores[mentity] = match
466 res.add match
467 end
468 end
469 return new IndexMatches.from_matches(res)
470 end
471
472 # Reset score of each matches to follow `self` order
473 #
474 # Usefull when you need to apply one sorter over another.
475 fun rerank: IndexMatches do
476 var res = new IndexMatches
477 for match in self do
478 res.add match
479 match.score = res.length
480 end
481 return res
482 end
483
484 # Aggregate the mentities for all the matches
485 #
486 # Preserve the match order.
487 fun mentities: Array[MEntity] do
488 var res = new Array[MEntity]
489 for match in self do res.add match.mentity
490 return res
491 end
492 end
493
494 # An MEntity matched from a ModelIndex
495 #
496 # Each match has a `score`. The score should be seen as the distance of
497 # the match from the query. In other words, the lowest is the score, the more
498 # relevant is the match.
499 class IndexMatch
500 super Comparable
501
502 redef type OTHER: IndexMatch
503
504 # MEntity matches
505 var mentity: MEntity
506
507 # Score allocated by the search method
508 #
509 # A lowest score means a more relevant match.
510 #
511 # Scores values are arbitrary, the meaning of `10` vs `2000` really depends
512 # on the search method producing the match and the comparators used to sort
513 # the matches.
514 # The only universal rule is: low score = relevance.
515 var score: Int is writable
516
517 # By default matches are compared only on their score
518 redef fun <=>(o) do return score <=> o.score
519
520 redef fun to_s do return "{mentity} ({score})"
521 end
522
523 # Compare two matches by their score
524 #
525 # Since the score can be seen as a distance, we want the lowest score first.
526 class ScoreComparator
527 super Comparator
528
529 redef type COMPARED: IndexMatch
530
531 redef fun compare(o1, o2) do return o1.score <=> o2.score
532 end
533
534 # A pipeline of comparators executed in inclusion order
535 #
536 # This class is used internally to agregate the behaviors of multiple comparators.
537 # Use `IndexMatches::sort(comparator...)` instead.
538 private class MatchComparators
539 super ScoreComparator
540
541 # Comparator to use in the array order
542 var comparators: Array[ScoreComparator]
543
544 # Compare with each comparator
545 #
546 # Return the compare value of the first one to return anything else than 0.
547 redef fun compare(o1, o2) do
548 for comparator in comparators do
549 var c = comparator.compare(o1, o2)
550 if c != 0 then return c
551 end
552 return 0
553 end
554 end
555
556 # Compare two matches by their MEntity kind
557 #
558 # Usefull to order the mentities by kind in this order:
559 # packages, groups, modules and classes, properties.
560 class MEntityComparator
561 super ScoreComparator
562
563 # See `MEntity::compare_mentity`
564 redef fun compare(o1, o2) do
565 return o1.mentity.mentity_kind_rank <=> o2.mentity.mentity_kind_rank
566 end
567 end
568
569 # Compare two matches by their MEntity visibility
570 #
571 # We reverse the original order so private is at the end of the list.
572 class VisibilityComparator
573 super ScoreComparator
574
575 redef fun compare(o1, o2) do return o2.mentity.visibility <=> o1.mentity.visibility
576 end
577
578 # Compare two matches by their name in lexicographic order
579 #
580 # Generally, for a same score, we want to put A before Z.
581 class NameComparator
582 super ScoreComparator
583
584 redef fun compare(o1, o2) do return o1.mentity.name <=> o2.mentity.name
585 end
586
587 # Compare two matches by their name length
588 class NameLengthComparator
589 super ScoreComparator
590
591 redef fun compare(o1, o2) do return o1.mentity.name.length <=> o2.mentity.name.length
592 end
593
594 # Compare two matches by their full_name in lexicographic order
595 #
596 # Generally, for a same score, we want to put A before Z.
597 class FullNameComparator
598 super ScoreComparator
599
600 redef fun compare(o1, o2) do return o1.mentity.full_name <=> o2.mentity.full_name
601 end
602
603 # Compare two matches by their full name length
604 class FullNameLengthComparator
605 super ScoreComparator
606
607 redef fun compare(o1, o2) do return o1.mentity.full_name.length <=> o2.mentity.full_name.length
608 end
609
610 redef class MEntity
611
612 # Compare MEntity class kind
613 #
614 # Unknown kind have a virtually high score.
615 private fun mentity_kind_rank: Int do return 10
616 end
617
618 redef class MPackage
619 redef fun mentity_kind_rank do return 1
620 end
621
622 redef class MGroup
623 redef fun mentity_kind_rank do return 2
624 end
625
626 redef class MModule
627 redef fun mentity_kind_rank do return 3
628 end
629
630 redef class MClass
631 redef fun mentity_kind_rank do return 4
632 end
633
634 redef class MClassDef
635 redef fun mentity_kind_rank do return 5
636 end
637
638 redef class MProperty
639 redef fun mentity_kind_rank do return 6
640 end
641
642 redef class MPropDef
643 redef fun mentity_kind_rank do return 7
644 end