src/model: introduce ModelIndex
[nit.git] / src / model / model_index.nit
1 # This file is part of NIT ( http://www.nitlanguage.org ).
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 # Search things from the Model
16 #
17 # ModelIndex allows you to index mentities then retrieve them by their `name`
18 # or `full_name`.
19 # It offers a set of `find_*` methods that can be used to match queries
20 # against entities name.
21 #
22 # Because each use is different, ModelIndex only provide base raw search services.
23 # All of them return IndexMatches that can be ordered and filtered by the client.
24 #
25 # ## Indexing mentities
26 #
27 # Before searching something from the ModelIndex, you have to index it.
28 # Use the `ModelIndex::index` method to do that:
29 #
30 # ~~~nitish
31 # var index = new ModelIndex
32 #
33 # for mentity in model.private_view.mentities do
34 # index.index(mentity)
35 # end
36 # ~~~
37 #
38 # ## Search mentities
39 #
40 # You can then run queries on the ModelIndex:
41 #
42 # ~~~nitish
43 # for res in index.find("Array").limit(10) do
44 # print res
45 # end
46 # ~~~
47 #
48 # ## Examples
49 #
50 # Here some examples of how you can use the ModelIndex.
51 #
52 # ### Smart type prediction
53 #
54 # Use ModelIndex to implement a smart prediction system based on the typed prefix:
55 #
56 # ~~~nitish
57 # var index = new ModelIndex
58 #
59 # for mentity in model.private_view.mentities do
60 # # We don't really care about definitions
61 # if mentity isa MClassDef or mentity isa MPropDef then continue
62 # index.index(mentity)
63 # end
64 #
65 # var typed_prefix = "Arr"
66 # for res in index.find_by_name_prefix(typed_prefix).
67 # uniq. # keep only the best ranked mentity
68 # limit(5). # limit to ten results
69 # sort(new VisibilityComparator, new NameComparator) do # order by visibility then name
70 # print res
71 # end
72 # ~~~
73 #
74 # Will output something like:
75 #
76 # ~~~raw
77 # Array (1)
78 # ArraySet (2)
79 # ArrayMap (3)
80 # ArrayCmp (4)
81 # ArrayMapKeys (5)
82 # ~~~
83 #
84 # ### Method autocompletion
85 #
86 # Find methods from a class full_name:
87 #
88 # ~~~nitish
89 # var class_full_name = "core::Array"
90 # for res in index.find_by_full_name_prefix("{class_full_name}::").
91 # uniq. # keep only the best ranked mentity
92 # sort(new VisibilityComparator). # put private in the bottom of the list
93 # limit(5). # limit to ten results
94 # sort(new FullNameComparator) do # order by lexicographic order
95 # print res
96 # end
97 # ~~~
98 #
99 # Will output something like:
100 #
101 # ~~~raw
102 # * (2)
103 # + (1)
104 # filled_with (5)
105 # from (3)
106 # with_items (4)
107 # ~~~
108 #
109 # ### Name typo detection and suggestion
110 #
111 # Detect unknown names and suggest corrections:
112 #
113 # ~~~nitish
114 # var name = "Zrray"
115 #
116 # if index.find_by_name_prefix(name).is_empty then
117 # printn "`{name}` not found, did you mean: "
118 # printn index.find_by_name_similarity(name).sort(new ScoreComparator).limit(2).join(" or ")
119 # print "?"
120 # end
121 # ~~~
122 #
123 # Will output something like:
124 #
125 # ~~~raw
126 # `Zrray` not found, did you mean: Array (1) or array (1)?
127 # ~~~
128 module model_index
129
130 import model::model_views
131 import trees::trie
132
133 # ModelIndex indexes mentities by their name and full name
134 #
135 # It provides methods to find mentities based on a prefix or string similarity.
136 #
137 # ~~~nitish
138 # # Build index
139 # var index = new ModelIndex
140 # for mentity in model.private_view.mentities do
141 # if mentity isa MClassDef or mentity isa MPropDef then continue
142 # index.index(mentity)
143 # end
144 #
145 # for e in index.find("Foo").uniq.sort(new ScoreComparator).limit(10) do
146 # print " * {e.score}: {e.mentity.name} ({e.mentity.full_name})"
147 # end
148 # ~~~
149 class ModelIndex
150
151 # List of all indexed mentities.
152 #
153 # Faster than traversing the tries.
154 var mentities = new Array[MEntity]
155
156 # Prefix tree for mentities `name`
157 #
158 # Because multiple mentities can share the same `name`, we use a Trie of
159 # arrays of mentities.
160 #
161 # As for now, we do not index class and property definitions.
162 # TODO add an option.
163 var name_prefixes = new Trie[Array[MEntity]]
164
165 # Prefix tree for mentities `full_name`
166 #
167 # Even if two mentities cannot share the same `full_name`, we use a Trie of
168 # arrays of mentities to be consistent with `name_prefixes`.
169 var full_name_prefixes = new Trie[Array[MEntity]]
170
171 # Index `mentity` by it's `MEntity::name`
172 #
173 # See `name_prefixes`.
174 private fun index_by_name(mentity: MEntity) do
175 var name = mentity.name
176 if not name_prefixes.has_key(name) then
177 name_prefixes[name] = new Array[MEntity]
178 end
179 name_prefixes[name].add mentity
180 end
181
182 # Index `mentity` by its `MEntity::full_name`
183 private fun index_by_full_name(mentity: MEntity) do
184 var name = mentity.full_name
185 if not full_name_prefixes.has_key(name) then
186 full_name_prefixes[name] = new Array[MEntity]
187 end
188 full_name_prefixes[name].add mentity
189 end
190
191 # Index `mentity` so it can be retrieved by a find query
192 #
193 # MEntities are indexed by both name and full_name.
194 fun index(mentity: MEntity) do
195 mentities.add mentity
196 index_by_name mentity
197 index_by_full_name mentity
198 end
199
200 # Translate Trie results to `SearchResult`
201 #
202 # This method is used internally to translate each mentity returned by a prefix
203 # match in a Trie into a SearchResult that can be ranked by score.
204 #
205 # Results from the Trie are returned in a breadth first manner so we get the
206 # matches ordered by prefix.
207 # We preserve that order by giving an incremental score to the `array` items.
208 private fun score_results_incremental(array: Array[Array[MEntity]]): IndexMatches do
209 var results = new IndexMatches
210 var score = 1
211 for mentities in array do
212 for mentity in mentities do
213 results.add new IndexMatch(mentity, score)
214 end
215 score += 1
216 end
217 return results
218 end
219
220 # Find all mentities where `MEntity::name` matches the `prefix`
221 fun find_by_name_prefix(prefix: String): IndexMatches do
222 return score_results_incremental(name_prefixes.find_by_prefix(prefix))
223 end
224
225 # Find all mentities where `MEntity::full_name` matches the `prefix`
226 fun find_by_full_name_prefix(prefix: String): IndexMatches do
227 return score_results_incremental(full_name_prefixes.find_by_prefix(prefix))
228 end
229
230 # Rank all mentities by the distance between `MEntity::name` and `name`
231 #
232 # Use the Levenshtein algorithm on all the indexed mentities `name`.
233 # Warning: may not scale to large indexes.
234 fun find_by_name_similarity(name: String): IndexMatches do
235 var results = new IndexMatches
236 for mentity in mentities do
237 if mentity isa MClassDef or mentity isa MPropDef then continue
238 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
239 end
240 return results
241 end
242
243 # Rank all mentities by the distance between `MEntity::full_name` and `full_name`
244 #
245 # Use the Levenshtein algorithm on all the indexed mentities `full_name`.
246 # Warning: may not scale to large indexes.
247 fun find_by_full_name_similarity(name: String): IndexMatches do
248 var results = new IndexMatches
249 for mentity in mentities do
250 if mentity isa MClassDef or mentity isa MPropDef then continue
251 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
252 end
253 return results
254 end
255
256 # Rank all mentities by the distance between `name` and both the mentity name and full name
257 fun find_by_similarity(name: String): IndexMatches do
258 var results = new IndexMatches
259 for mentity in mentities do
260 if mentity isa MClassDef or mentity isa MPropDef then continue
261 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
262 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
263 end
264 return results
265 end
266
267 # Find mentities by name trying first by prefix then by similarity
268 fun find_by_name(name: String): IndexMatches do
269 var results = find_by_name_prefix(name)
270 for mentity in mentities do
271 if mentity isa MClassDef or mentity isa MPropDef then continue
272 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
273 end
274 return results
275 end
276
277 # Find mentities by full name trying firt by prefix then by similarity
278 fun find_by_full_name(name: String): IndexMatches do
279 var results = find_by_full_name_prefix(name)
280 for mentity in mentities do
281 if mentity isa MClassDef or mentity isa MPropDef then continue
282 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
283 end
284 return results
285 end
286
287 # Find all mentities that matches `name`
288 #
289 # 1. try by name prefix
290 # 2. add full name prefix matches
291 # 3. try similarity by name
292 # 4. try similarity by full_name
293 fun find(name: String): IndexMatches do
294 var results = find_by_name_prefix(name)
295
296 for result in find_by_full_name_prefix(name) do
297 results.add result
298 end
299
300 for mentity in mentities do
301 if mentity isa MClassDef or mentity isa MPropDef then continue
302 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
303 results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
304 end
305 return results
306 end
307 end
308
309 # An array of IndexMatch instances returned by the ModelIndex
310 #
311 # Index matches can be sorted, filtered and truncated.
312 #
313 # Thanks to the fluent interface, the index matches can be manipulated in chain
314 # from a model index query:
315 #
316 # ~~~nitish
317 # var res = index.find("Foo").
318 # uniq.
319 # sort(new ScoreComparator, new MEntityComparator).
320 # limit(10).
321 # sort(new VisibilityComparator)
322 # ~~~
323 class IndexMatches
324 super Array[IndexMatch]
325
326 # Create a new ModelMatches from an array of matches
327 #
328 # Elements are copied.
329 init from_matches(matches: Array[IndexMatch]) do self.add_all matches
330
331 # Sort the matches with `comparator` (or a list of comparators)
332 #
333 # Return a new IndexMatches instance with the sorted results.
334 #
335 # When more than one comparator is given, the comparators are applied in a
336 # pipeline where the `n`th comparator is applied only if the `n-1`th comparator
337 # returned 0.
338 fun sort(comparator: ScoreComparator...): IndexMatches do
339 var res = to_a
340 if comparator.length == 1 then
341 comparator.first.sort res
342 else
343 var comparators = new MatchComparators(comparator)
344 comparators.sort res
345 end
346 return new IndexMatches.from_matches(res)
347 end
348
349 # Limit the matches with `limit`
350 #
351 # Return a new IndexMatches instance with only the `limit` first matches.
352 fun limit(limit: Int): IndexMatches do
353 var res = new Array[IndexMatch]
354 for match in self do
355 if res.length >= limit then break
356 res.add match
357 end
358 return new IndexMatches.from_matches(res)
359 end
360
361 # Remove doublons from the matches
362 #
363 # Preverse the lowest score of all the matches for a MEntity.
364 fun uniq: IndexMatches do
365 var scores = new HashMap[MEntity, IndexMatch]
366 var res = new Array[IndexMatch]
367 for match in self do
368 var mentity = match.mentity
369 if scores.has_key(mentity) then
370 var older = scores[mentity]
371 if match.score < older.score then older.score = match.score
372 else
373 scores[mentity] = match
374 res.add match
375 end
376 end
377 return new IndexMatches.from_matches(res)
378 end
379
380 # Reset score of each matches to follow `self` order
381 #
382 # Usefull when you need to apply one sorter over another.
383 fun rerank: IndexMatches do
384 var res = new IndexMatches
385 for match in self do
386 res.add match
387 match.score = res.length
388 end
389 return res
390 end
391
392 # Aggregate the mentities for all the matches
393 #
394 # Preserve the match order.
395 fun mentities: Array[MEntity] do
396 var res = new Array[MEntity]
397 for match in self do res.add match.mentity
398 return res
399 end
400 end
401
402 # An MEntity matched from a ModelIndex
403 #
404 # Each match has a `score`. The score should be seen as the distance of
405 # the match from the query. In other words, the lowest is the score, the more
406 # relevant is the match.
407 class IndexMatch
408 super Comparable
409
410 redef type OTHER: IndexMatch
411
412 # MEntity matches
413 var mentity: MEntity
414
415 # Score allocated by the search method
416 #
417 # A lowest score means a more relevant match.
418 #
419 # Scores values are arbitrary, the meaning of `10` vs `2000` really depends
420 # on the search method producing the match and the comparators used to sort
421 # the matches.
422 # The only universal rule is: low score = relevance.
423 var score: Int is writable
424
425 # By default matches are compared only on their score
426 redef fun <=>(o) do return score <=> o.score
427
428 redef fun to_s do return "{mentity} ({score})"
429 end
430
431 # Compare two matches by their score
432 #
433 # Since the score can be seen as a distance, we want the lowest score first.
434 class ScoreComparator
435 super Comparator
436
437 redef type COMPARED: IndexMatch
438
439 redef fun compare(o1, o2) do return o1.score <=> o2.score
440 end
441
442 # A pipeline of comparators executed in inclusion order
443 #
444 # This class is used internally to agregate the behaviors of multiple comparators.
445 # Use `IndexMatches::sort(comparator...)` instead.
446 private class MatchComparators
447 super ScoreComparator
448
449 # Comparator to use in the array order
450 var comparators: Array[ScoreComparator]
451
452 # Compare with each comparator
453 #
454 # Return the compare value of the first one to return anything else than 0.
455 redef fun compare(o1, o2) do
456 for comparator in comparators do
457 var c = comparator.compare(o1, o2)
458 if c != 0 then return c
459 end
460 return 0
461 end
462 end
463
464 # Compare two matches by their MEntity kind
465 #
466 # Usefull to order the mentities by kind in this order:
467 # packages, groups, modules and classes, properties.
468 class MEntityComparator
469 super ScoreComparator
470
471 # See `MEntity::compare_mentity`
472 redef fun compare(o1, o2) do
473 return o1.mentity.mentity_kind_rank <=> o2.mentity.mentity_kind_rank
474 end
475 end
476
477 # Compare two matches by their MEntity visibility
478 #
479 # We reverse the original order so private is at the end of the list.
480 class VisibilityComparator
481 super ScoreComparator
482
483 redef fun compare(o1, o2) do return o2.mentity.visibility <=> o1.mentity.visibility
484 end
485
486 # Compare two matches by their name in lexicographic order
487 #
488 # Generally, for a same score, we want to put A before Z.
489 class NameComparator
490 super ScoreComparator
491
492 redef fun compare(o1, o2) do return o1.mentity.name <=> o2.mentity.name
493 end
494
495 # Compare two matches by their name length
496 class NameLengthComparator
497 super ScoreComparator
498
499 redef fun compare(o1, o2) do return o1.mentity.name.length <=> o2.mentity.name.length
500 end
501
502 # Compare two matches by their full_name in lexicographic order
503 #
504 # Generally, for a same score, we want to put A before Z.
505 class FullNameComparator
506 super ScoreComparator
507
508 redef fun compare(o1, o2) do return o1.mentity.full_name <=> o2.mentity.full_name
509 end
510
511 # Compare two matches by their full name length
512 class FullNameLengthComparator
513 super ScoreComparator
514
515 redef fun compare(o1, o2) do return o1.mentity.full_name.length <=> o2.mentity.full_name.length
516 end
517
518 redef class MEntity
519
520 # Compare MEntity class kind
521 #
522 # Unknown kind have a virtually high score.
523 private fun mentity_kind_rank: Int do return 10
524 end
525
526 redef class MPackage
527 redef fun mentity_kind_rank do return 1
528 end
529
530 redef class MGroup
531 redef fun mentity_kind_rank do return 2
532 end
533
534 redef class MModule
535 redef fun mentity_kind_rank do return 3
536 end
537
538 redef class MClass
539 redef fun mentity_kind_rank do return 4
540 end
541
542 redef class MClassDef
543 redef fun mentity_kind_rank do return 5
544 end
545
546 redef class MProperty
547 redef fun mentity_kind_rank do return 6
548 end
549
550 redef class MPropDef
551 redef fun mentity_kind_rank do return 7
552 end