src/model/model_index.nit

   1 # This file is part of NIT ( http://www.nitlanguage.org ).
   2 #
   3 # Licensed under the Apache License, Version 2.0 (the "License");
   4 # you may not use this file except in compliance with the License.
   5 # You may obtain a copy of the License at
   6 #
   7 # http://www.apache.org/licenses/LICENSE-2.0
   8 #
   9 # Unless required by applicable law or agreed to in writing, software
  10 # distributed under the License is distributed on an "AS IS" BASIS,
  11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 # See the License for the specific language governing permissions and
  13 # limitations under the License.
  14
  15 # Search things from the Model
  16 #
  17 # ModelIndex allows you to index mentities then retrieve them by their `name`
  18 # or `full_name`.
  19 # It offers a set of `find_*` methods that can be used to match queries
  20 # against entities name.
  21 #
  22 # Because each use is different, ModelIndex only provide base raw search services.
  23 # All of them return IndexMatches that can be ordered and filtered by the client.
  24 #
  25 # ## Indexing mentities
  26 #
  27 # Before searching something from the ModelIndex, you have to index it.
  28 # Use the `ModelIndex::index` method to do that:
  29 #
  30 # ~~~nitish
  31 # var index = new ModelIndex
  32 # for mentity in model.collect_mentities do
  33 #       index.index(mentity)
  34 # end
  35 # ~~~
  36 #
  37 # ## Search mentities
  38 #
  39 # You can then run queries on the ModelIndex:
  40 #
  41 # ~~~nitish
  42 # for res in index.find("Array").limit(10) do
  43 #    print res
  44 # end
  45 # ~~~
  46 #
  47 # ## Examples
  48 #
  49 # Here some examples of how you can use the ModelIndex.
  50 #
  51 # ### Smart type prediction
  52 #
  53 # Use ModelIndex to implement a smart prediction system based on the typed prefix:
  54 #
  55 # ~~~nitish
  56 # var index = new ModelIndex
  57 # for mentity in model.collect_mentities do
  58 #       # We don't really care about definitions
  59 #       if mentity isa MClassDef or mentity isa MPropDef then continue
  60 #       index.index(mentity)
  61 # end
  62 #
  63 # var typed_prefix = "Arr"
  64 # for res in index.find_by_name_prefix(typed_prefix).
  65 #       uniq. # keep only the best ranked mentity
  66 #       limit(5). # limit to ten results
  67 #       sort(new VisibilityComparator, new NameComparator) do # order by visibility then name
  68 #    print res
  69 # end
  70 # ~~~
  71 #
  72 # Will output something like:
  73 #
  74 # ~~~raw
  75 # Array (1)
  76 # ArraySet (2)
  77 # ArrayMap (3)
  78 # ArrayCmp (4)
  79 # ArrayMapKeys (5)
  80 # ~~~
  81 #
  82 # ### Method autocompletion
  83 #
  84 # Find methods from a class full_name:
  85 #
  86 # ~~~nitish
  87 # var class_full_name = "core::Array"
  88 # for res in index.find_by_full_name_prefix("{class_full_name}::").
  89 #       uniq. # keep only the best ranked mentity
  90 #       sort(new VisibilityComparator). # put private in the bottom of the list
  91 #       limit(5). # limit to ten results
  92 #       sort(new FullNameComparator) do # order by lexicographic order
  93 #    print res
  94 # end
  95 # ~~~
  96 #
  97 # Will output something like:
  98 #
  99 # ~~~raw
 100 # * (2)
 101 # + (1)
 102 # filled_with (5)
 103 # from (3)
 104 # with_items (4)
 105 # ~~~
 106 #
 107 # ### Name typo detection and suggestion
 108 #
 109 # Detect unknown names and suggest corrections:
 110 #
 111 # ~~~nitish
 112 # var name = "Zrray"
 113 #
 114 # if index.find_by_name_prefix(name).is_empty then
 115 #       printn "`{name}` not found, did you mean: "
 116 #       printn index.find_by_name_similarity(name).sort(new ScoreComparator).limit(2).join(" or ")
 117 #       print "?"
 118 # end
 119 # ~~~
 120 #
 121 # Will output something like:
 122 #
 123 # ~~~raw
 124 # `Zrray` not found, did you mean: Array (1) or array (1)?
 125 # ~~~
 126 module model_index
 127
 128 import model::model_collect
 129 import trees::trie
 130
 131 redef class Model
 132
 133         # Keep a direct link to mentities by full name to speed up `mentity_from_uri`
 134         var mentities_by_full_name: HashMap[String, MEntity] is lazy do
 135                 var mentities_by_full_name = new HashMap[String, MEntity]
 136                 for mentity in collect_mentities do
 137                         mentities_by_full_name[mentity.full_name] = mentity
 138                 end
 139                 return mentities_by_full_name
 140         end
 141
 142         # ModelIndex used to perform searches
 143         var index: ModelIndex is lazy, writable do
 144                 var index = new ModelIndex
 145                 for mentity in collect_mentities do
 146                         if mentity isa MClassDef or mentity isa MPropDef then continue
 147                         index.index mentity
 148                 end
 149                 return index
 150         end
 151
 152         redef fun mentities_by_name(name, filter) do
 153                 var res = new Array[MEntity]
 154                 if not index.names.has_key(name) then return res
 155                 for mentity in index.names[name] do
 156                         if filter == null or filter.accept_mentity(mentity) then res.add mentity
 157                 end
 158                 return res
 159         end
 160
 161         redef fun mentity_by_full_name(full_name, filter) do
 162                 if mentities_by_full_name.has_key(full_name) then
 163                         var mentity = mentities_by_full_name[full_name]
 164                         if filter == null or filter.accept_mentity(mentity) then return mentity
 165                 end
 166                 return null
 167         end
 168
 169         private var score_sorter = new ScoreComparator
 170         private var vis_sorter = new VisibilityComparator
 171         private var kind_sorter = new MEntityComparator
 172         private var name_sorter = new NameComparator
 173         private var lname_sorter = new NameLengthComparator
 174         private var fname_sorter = new FullNameComparator
 175         private var lfname_sorter = new FullNameLengthComparator
 176
 177         # Search mentities based on a `query` string
 178         #
 179         # Lookup the index for anything matching `query` and return `limit` results.
 180         #
 181         # The algorithm used is the following:
 182         # 1- lookup by name prefix
 183         # 2- lookup by full_name prefix
 184         # 3- loopup by levenshtein distance
 185         #
 186         # At each step if the `limit` is reached, the algorithm stops and returns the results.
 187         fun find(query: String, limit: nullable Int, filter: nullable ModelFilter): Array[MEntity] do
 188                 # Find, lookup by name prefix
 189                 var matches = index.find_by_name_prefix(query, filter).uniq.
 190                         sort(lname_sorter, name_sorter, kind_sorter)
 191                 if limit != null and matches.length >= limit then
 192                         return matches.limit(limit).rerank.sort(vis_sorter, score_sorter).mentities
 193                 end
 194                 matches = matches.rerank.sort(vis_sorter, score_sorter)
 195
 196                 # If limit not reached, lookup by full_name prefix
 197                 var malus = matches.length
 198                 var full_matches = new IndexMatches
 199                 for match in index.find_by_full_name_prefix(query, filter).
 200                         sort(kind_sorter, lfname_sorter, fname_sorter) do
 201                         match.score += malus
 202                         full_matches.add match
 203                 end
 204                 matches = matches.uniq
 205                 if limit != null and matches.length + full_matches.length >= limit then
 206                         matches.add_all full_matches
 207                         matches = matches.uniq.limit(limit).rerank.sort(vis_sorter, score_sorter)
 208                         return matches.mentities
 209                 end
 210
 211                 # If limit not reached, lookup by similarity
 212                 malus = matches.length
 213                 var sim_matches = new IndexMatches
 214                 for match in index.find_by_similarity(query, filter).sort(score_sorter, kind_sorter, lname_sorter, name_sorter) do
 215                         match.score += malus
 216                         sim_matches.add match
 217                 end
 218                 matches.add_all sim_matches
 219                 matches = matches.uniq
 220                 if limit != null then matches = matches.limit(limit)
 221                 return matches.rerank.sort(vis_sorter, score_sorter).mentities
 222         end
 223 end
 224
 225 # ModelIndex indexes mentities by their name and full name
 226 #
 227 # It provides methods to find mentities based on a prefix or string similarity.
 228 #
 229 # ~~~nitish
 230 # # Build index
 231 # var index = new ModelIndex
 232 # for mentity in model.collect_mentities do
 233 #       if mentity isa MClassDef or mentity isa MPropDef then continue
 234 #       index.index(mentity)
 235 # end
 236 #
 237 # for e in index.find("Foo").uniq.sort(new ScoreComparator).limit(10) do
 238 #       print " * {e.score}: {e.mentity.name} ({e.mentity.full_name})"
 239 # end
 240 # ~~~
 241 class ModelIndex
 242
 243         # List of all indexed mentities.
 244         #
 245         # Faster than traversing the tries.
 246         var mentities = new Array[MEntity]
 247
 248         # Map of all mentities indexed by their `name`
 249         var names = new HashMap[String, Array[MEntity]]
 250
 251         # Prefix tree for mentities `name`
 252         #
 253         # Because multiple mentities can share the same `name`, we use a Trie of
 254         # arrays of mentities.
 255         #
 256         # As for now, we do not index class and property definitions.
 257         # TODO add an option.
 258         var name_prefixes = new Trie[Array[MEntity]]
 259
 260         # Map of all mentities indexed by their `full_name`
 261         var full_names = new HashMap[String, MEntity]
 262
 263         # Prefix tree for mentities `full_name`
 264         #
 265         # Even if two mentities cannot share the same `full_name`, we use a Trie of
 266         # arrays of mentities to be consistent with `name_prefixes`.
 267         var full_name_prefixes = new Trie[Array[MEntity]]
 268
 269         # Index `mentity` by it's `MEntity::name`
 270         #
 271         # See `name_prefixes`.
 272         private fun index_by_name(mentity: MEntity) do
 273                 var name = mentity.name
 274
 275                 # Index name
 276                 if not names.has_key(name) then
 277                         names[name] = new Array[MEntity]
 278                 end
 279                 names[name].add mentity
 280
 281                 # Index prefix
 282                 if not name_prefixes.has_key(name) then
 283                         name_prefixes[name] = new Array[MEntity]
 284                 end
 285                 name_prefixes[name].add mentity
 286         end
 287
 288         # Index `mentity` by its `MEntity::full_name`
 289         private fun index_by_full_name(mentity: MEntity) do
 290                 var name = mentity.full_name
 291
 292                 # Index full name
 293                 full_names[name] = mentity
 294
 295                 # Index prefix
 296                 if not full_name_prefixes.has_key(name) then
 297                         full_name_prefixes[name] = new Array[MEntity]
 298                 end
 299                 full_name_prefixes[name].add mentity
 300         end
 301
 302         # Index `mentity` so it can be retrieved by a find query
 303         #
 304         # MEntities are indexed by both name and full_name.
 305         fun index(mentity: MEntity) do
 306                 mentities.add mentity
 307                 index_by_name mentity
 308                 index_by_full_name mentity
 309         end
 310
 311         # Translate Trie results to `SearchResult`
 312         #
 313         # This method is used internally to translate each mentity returned by a prefix
 314         # match in a Trie into a SearchResult that can be ranked by score.
 315         #
 316         # Results from the Trie are returned in a breadth first manner so we get the
 317         # matches ordered by prefix.
 318         # We preserve that order by giving an incremental score to the `array` items.
 319         private fun score_results_incremental(array: Array[Array[MEntity]], filter: nullable ModelFilter): IndexMatches do
 320                 var results = new IndexMatches
 321                 var score = 1
 322                 for mentities in array do
 323                         for mentity in mentities do
 324                                 if filter != null and not filter.accept_mentity(mentity) then continue
 325                                 results.add new IndexMatch(mentity, score)
 326                         end
 327                         score += 1
 328                 end
 329                 return results
 330         end
 331
 332         # Find all mentities where `MEntity::name` matches the `prefix`
 333         fun find_by_name_prefix(prefix: String, filter: nullable ModelFilter): IndexMatches do
 334                 return score_results_incremental(name_prefixes.find_by_prefix(prefix), filter)
 335         end
 336
 337         # Find all mentities where `MEntity::full_name` matches the `prefix`
 338         fun find_by_full_name_prefix(prefix: String, filter: nullable ModelFilter): IndexMatches do
 339                 return score_results_incremental(full_name_prefixes.find_by_prefix(prefix), filter)
 340         end
 341
 342         # Rank all mentities by the distance between `MEntity::name` and `name`
 343         #
 344         # Use the Levenshtein algorithm on all the indexed mentities `name`.
 345         # Warning: may not scale to large indexes.
 346         fun find_by_name_similarity(name: String, filter: nullable ModelFilter): IndexMatches do
 347                 var results = new IndexMatches
 348                 for mentity in mentities do
 349                         if filter != null and not filter.accept_mentity(mentity) then continue
 350                         if mentity isa MClassDef or mentity isa MPropDef then continue
 351                         results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
 352                 end
 353                 return results
 354         end
 355
 356         # Rank all mentities by the distance between `MEntity::full_name` and `full_name`
 357         #
 358         # Use the Levenshtein algorithm on all the indexed mentities `full_name`.
 359         # Warning: may not scale to large indexes.
 360         fun find_by_full_name_similarity(name: String, filter: nullable ModelFilter): IndexMatches do
 361                 var results = new IndexMatches
 362                 for mentity in mentities do
 363                         if filter != null and not filter.accept_mentity(mentity) then continue
 364                         if mentity isa MClassDef or mentity isa MPropDef then continue
 365                         results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
 366                 end
 367                 return results
 368         end
 369
 370         # Rank all mentities by the distance between `name` and both the mentity name and full name
 371         fun find_by_similarity(name: String, filter: nullable ModelFilter): IndexMatches do
 372                 var results = new IndexMatches
 373                 for mentity in mentities do
 374                         if filter != null and not filter.accept_mentity(mentity) then continue
 375                         if mentity isa MClassDef or mentity isa MPropDef then continue
 376                         results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
 377                         results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
 378                 end
 379                 return results
 380         end
 381
 382         # Find mentities by name trying first by prefix then by similarity
 383         fun find_by_name(name: String, filter: nullable ModelFilter): IndexMatches do
 384                 var results = find_by_name_prefix(name)
 385                 for mentity in mentities do
 386                         if filter != null and not filter.accept_mentity(mentity) then continue
 387                         if mentity isa MClassDef or mentity isa MPropDef then continue
 388                         results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
 389                 end
 390                 return results
 391         end
 392
 393         # Find mentities by full name trying firt by prefix then by similarity
 394         fun find_by_full_name(name: String, filter: nullable ModelFilter): IndexMatches do
 395                 var results = find_by_full_name_prefix(name)
 396                 for mentity in mentities do
 397                         if filter != null and not filter.accept_mentity(mentity) then continue
 398                         if mentity isa MClassDef or mentity isa MPropDef then continue
 399                         results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
 400                 end
 401                 return results
 402         end
 403
 404         # Find all mentities that matches `name`
 405         #
 406         # 1. try by name prefix
 407         # 2. add full name prefix matches
 408         # 3. try similarity by name
 409         # 4. try similarity by full_name
 410         fun find(name: String, filter: nullable ModelFilter): IndexMatches do
 411                 var results = find_by_name_prefix(name, filter)
 412
 413                 for result in find_by_full_name_prefix(name, filter) do
 414                         results.add result
 415                 end
 416
 417                 for mentity in mentities do
 418                         if filter != null and not filter.accept_mentity(mentity) then continue
 419                         if mentity isa MClassDef or mentity isa MPropDef then continue
 420                         results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.name))
 421                         results.add new IndexMatch(mentity, name.levenshtein_distance(mentity.full_name))
 422                 end
 423                 return results
 424         end
 425 end
 426
 427 # An array of IndexMatch instances returned by the ModelIndex
 428 #
 429 # Index matches can be sorted, filtered and truncated.
 430 #
 431 # Thanks to the fluent interface, the index matches can be manipulated in chain
 432 # from a model index query:
 433 #
 434 # ~~~nitish
 435 # var res = index.find("Foo").
 436 #   uniq.
 437 #       sort(new ScoreComparator, new MEntityComparator).
 438 #       limit(10).
 439 #       sort(new VisibilityComparator)
 440 # ~~~
 441 class IndexMatches
 442         super Array[IndexMatch]
 443
 444         # Create a new ModelMatches from an array of matches
 445         #
 446         # Elements are copied.
 447         init from_matches(matches: Array[IndexMatch]) do self.add_all matches
 448
 449         # Sort the matches with `comparator` (or a list of comparators)
 450         #
 451         # Return a new IndexMatches instance with the sorted results.
 452         #
 453         # When more than one comparator is given, the comparators are applied in a
 454         # pipeline where the `n`th comparator is applied only if the `n-1`th comparator
 455         # returned 0.
 456         fun sort(comparator: ScoreComparator...): IndexMatches do
 457                 var res = to_a
 458                 if comparator.length == 1 then
 459                         comparator.first.sort res
 460                 else
 461                         var comparators = new MatchComparators(comparator)
 462                         comparators.sort res
 463                 end
 464                 return new IndexMatches.from_matches(res)
 465         end
 466
 467         # Limit the matches with `limit`
 468         #
 469         # Return a new IndexMatches instance with only the `limit` first matches.
 470         fun limit(limit: Int): IndexMatches do
 471                 var res = new Array[IndexMatch]
 472                 for match in self do
 473                         if res.length >= limit then break
 474                         res.add match
 475                 end
 476                 return new IndexMatches.from_matches(res)
 477         end
 478
 479         # Remove doublons from the matches
 480         #
 481         # Preverse the lowest score of all the matches for a MEntity.
 482         fun uniq: IndexMatches do
 483                 var scores = new HashMap[MEntity, IndexMatch]
 484                 var res = new Array[IndexMatch]
 485                 for match in self do
 486                         var mentity = match.mentity
 487                         if scores.has_key(mentity) then
 488                                 var older = scores[mentity]
 489                                 if match.score < older.score then older.score = match.score
 490                         else
 491                                 scores[mentity] = match
 492                                 res.add match
 493                         end
 494                 end
 495                 return new IndexMatches.from_matches(res)
 496         end
 497
 498         # Reset score of each matches to follow `self` order
 499         #
 500         # Usefull when you need to apply one sorter over another.
 501         fun rerank: IndexMatches do
 502                 var res = new IndexMatches
 503                 for match in self do
 504                         res.add match
 505                         match.score = res.length
 506                 end
 507                 return res
 508         end
 509
 510         # Aggregate the mentities for all the matches
 511         #
 512         # Preserve the match order.
 513         fun mentities: Array[MEntity] do
 514                 var res = new Array[MEntity]
 515                 for match in self do res.add match.mentity
 516                 return res
 517         end
 518 end
 519
 520 # An MEntity matched from a ModelIndex
 521 #
 522 # Each match has a `score`. The score should be seen as the distance of
 523 # the match from the query. In other words, the lowest is the score, the more
 524 # relevant is the match.
 525 class IndexMatch
 526         super Comparable
 527
 528         redef type OTHER: IndexMatch
 529
 530         # MEntity matches
 531         var mentity: MEntity
 532
 533         # Score allocated by the search method
 534         #
 535         # A lowest score means a more relevant match.
 536         #
 537         # Scores values are arbitrary, the meaning of `10` vs `2000` really depends
 538         # on the search method producing the match and the comparators used to sort
 539         # the matches.
 540         # The only universal rule is: low score = relevance.
 541         var score: Int is writable
 542
 543         # By default matches are compared only on their score
 544         redef fun <=>(o) do return score <=> o.score
 545
 546         redef fun to_s do return "{mentity} ({score})"
 547 end
 548
 549 # Compare two matches by their score
 550 #
 551 # Since the score can be seen as a distance, we want the lowest score first.
 552 class ScoreComparator
 553         super Comparator
 554
 555         redef type COMPARED: IndexMatch
 556
 557         redef fun compare(o1, o2) do return o1.score <=> o2.score
 558 end
 559
 560 # A pipeline of comparators executed in inclusion order
 561 #
 562 # This class is used internally to agregate the behaviors of multiple comparators.
 563 # Use `IndexMatches::sort(comparator...)` instead.
 564 private class MatchComparators
 565         super ScoreComparator
 566
 567         # Comparator to use in the array order
 568         var comparators: Array[ScoreComparator]
 569
 570         # Compare with each comparator
 571         #
 572         # Return the compare value of the first one to return anything else than 0.
 573         redef fun compare(o1, o2) do
 574                 for comparator in comparators do
 575                         var c = comparator.compare(o1, o2)
 576                         if c != 0 then return c
 577                 end
 578                 return 0
 579         end
 580 end
 581
 582 # Compare two matches by their MEntity kind
 583 #
 584 # Usefull to order the mentities by kind in this order:
 585 # packages, groups, modules and classes, properties.
 586 class MEntityComparator
 587         super ScoreComparator
 588
 589         # See `MEntity::compare_mentity`
 590         redef fun compare(o1, o2) do
 591                 return o1.mentity.mentity_kind_rank <=> o2.mentity.mentity_kind_rank
 592         end
 593 end
 594
 595 # Compare two matches by their MEntity visibility
 596 #
 597 # We reverse the original order so private is at the end of the list.
 598 class VisibilityComparator
 599         super ScoreComparator
 600
 601         redef fun compare(o1, o2) do return o2.mentity.visibility <=> o1.mentity.visibility
 602 end
 603
 604 # Compare two matches by their name in lexicographic order
 605 #
 606 # Generally, for a same score, we want to put A before Z.
 607 class NameComparator
 608         super ScoreComparator
 609
 610         redef fun compare(o1, o2) do return o1.mentity.name <=> o2.mentity.name
 611 end
 612
 613 # Compare two matches by their name length
 614 class NameLengthComparator
 615         super ScoreComparator
 616
 617         redef fun compare(o1, o2) do return o1.mentity.name.length <=> o2.mentity.name.length
 618 end
 619
 620 # Compare two matches by their full_name in lexicographic order
 621 #
 622 # Generally, for a same score, we want to put A before Z.
 623 class FullNameComparator
 624         super ScoreComparator
 625
 626         redef fun compare(o1, o2) do return o1.mentity.full_name <=> o2.mentity.full_name
 627 end
 628
 629 # Compare two matches by their full name length
 630 class FullNameLengthComparator
 631         super ScoreComparator
 632
 633         redef fun compare(o1, o2) do return o1.mentity.full_name.length <=> o2.mentity.full_name.length
 634 end
 635
 636 redef class MEntity
 637
 638         # Compare MEntity class kind
 639         #
 640         # Unknown kind have a virtually high score.
 641         private fun mentity_kind_rank: Int do return 10
 642 end
 643
 644 redef class MPackage
 645         redef fun mentity_kind_rank do return 1
 646 end
 647
 648 redef class MGroup
 649         redef fun mentity_kind_rank do return 2
 650 end
 651
 652 redef class MModule
 653         redef fun mentity_kind_rank do return 3
 654 end
 655
 656 redef class MClass
 657         redef fun mentity_kind_rank do return 4
 658 end
 659
 660 redef class MClassDef
 661         redef fun mentity_kind_rank do return 5
 662 end
 663
 664 redef class MProperty
 665         redef fun mentity_kind_rank do return 6
 666 end
 667
 668 redef class MPropDef
 669         redef fun mentity_kind_rank do return 7
 670 end