This PR replaces accesses to key `total` by `count` in all Nitweb views.
So the display in not broken.
Signed-off-by: Alexandre Terrasa <alexandre@moz-code.org>
Pull-Request: #2717
--- /dev/null
+# This file is part of NIT ( http://www.nitlanguage.org ).
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Implementation of BKTree
+#
+# As proposed by W. A. Burkhard and R. M. Keller in
+# "Some approaches to best-match file searching" the BKTree data structure
+# is usefull to speed-up spell checking based on the Levenshtein distance between
+# words.
+#
+# With a BKTree, the complexity to find mispelled words in a dictionnary is *O(log n)*
+# instead of *O(n)* with a dummy list.
+#
+# Example:
+#
+# ~~~
+# # Create a new BKTree
+# var tree = new BKTree
+#
+# # Index some words in the dictionnary for spell checking
+# tree.add("book")
+# tree.add("books")
+# tree.add("cake")
+# tree.add("boo")
+# tree.add("cape")
+# tree.add("boon")
+# tree.add("cook")
+# tree.add("cart")
+#
+# # Search suggestions in the BKTree
+# assert tree.search("caqe").join(", ") == "\{1: cake\}, \{1: cape\}"
+# ~~~
+#
+# See <https://dl.acm.org/citation.cfm?id=362003.362025>.
+module bktree
+
+import abstract_tree
+
+# A BKTree implementation
+#
+# See `add` to insert a new word.
+# See `search` to find matches from a `key`.
+class BKTree
+
+ # Default tolerance used to find matches
+ #
+ # Default is `2`.
+ var default_tolerance = 2 is writable
+
+ # Tree root
+ private var root: nullable BKNode = null
+
+ # Add a `key` in the tree
+ fun add(key: String) do
+ var root = self.root
+ if root == null then
+ self.root = new BKNode(key)
+ return
+ end
+
+ var node = root
+ var dist = node.key.levenshtein_distance(key)
+
+ while node.has_key(dist) do
+ if dist == 0 then return
+ node = node[dist]
+ dist = node.key.levenshtein_distance(key)
+ end
+ node[dist] = new BKNode(key)
+ end
+
+ # Search `key` with a distance of `tolerance` in `self`.
+ #
+ # If `tolerance` is null, the use `default_tolerance` instead.
+ fun search(key: String, tolerance: nullable Int): Array[BKMatch] do
+ var res = new Array[BKMatch]
+ var root = self.root
+ if root != null then
+ if tolerance == null then tolerance = self.default_tolerance
+ search_recursive(root, res, key, tolerance)
+ end
+ default_comparator.sort(res)
+ return res
+ end
+
+ private fun search_recursive(node: BKNode, res: Array[BKMatch], key: String, tolerance: Int) do
+ var dist = node.key.levenshtein_distance(key)
+ var min = dist - tolerance
+ var max = dist + tolerance
+
+ if dist < tolerance then
+ res.add new BKMatch(dist, node.key)
+ end
+
+ for odist, child in node do
+ if odist < min or odist > max then continue
+ search_recursive(child, res, key, tolerance)
+ end
+ end
+end
+
+# A node that goes in a BKTree
+#
+# Each child is mapped from `self` by its distance as an Int.
+private class BKNode
+ super HashMap[Int, BKNode]
+
+ # Key stored in `self`
+ var key: String
+
+ redef fun to_s do return "\{{key}\}"
+end
+
+# A match in a BKTree
+#
+# Used to order results returned by `BKTree::search`.
+class BKMatch
+ super Comparable
+
+ redef type OTHER: BKMatch
+
+ # Distance of `key` from the search query
+ var distance: Int
+
+ # Matched `key`
+ var key: String
+
+ redef fun ==(o) do return o isa BKMatch and distance == o.distance and key == o.key
+
+ redef fun <=>(o) do
+ if distance == o.distance then
+ return key <=> o.key
+ end
+ return distance <=> o.distance
+ end
+
+ redef fun to_s do return "\{{distance}: {key}\}"
+end
return super
end
+ # Increment value for `obj` term
+ #
+ # If the term isn't already in the vector, the new value is 1.0.
+ fun inc(obj: nullable Object) do
+ if has_key(obj) then
+ self[obj] += 1.0
+ else
+ self[obj] = 1.0
+ end
+ end
+
# The norm of the vector.
#
# `||x|| = (x1 ** 2 ... + xn ** 2).sqrt`
# Documents can then be matched to query vectors.
class VSMIndex
+ # Kind of documents stored in this index
+ #
+ # Clients can redefine this type to specialize the index.
+ type DOC: Document
+
# Documents index
+ var documents = new HashSet[DOC]
+
+ # Inversed index
#
- # TODO use a more efficient representation.
- var documents = new HashSet[Document]
+ # Link documents to existing terms.
+ var inversed_index = new HashMap[nullable Object, Array[DOC]]
# Count for all terms in all indexed documents
#
#
# Returns an `IndexMatch` for each indexed document.
# Results are ordered by descending similarity.
- fun match_vector(query: Vector): Array[IndexMatch] do
- var matches = new Array[IndexMatch]
+ fun match_vector(query: Vector): Array[IndexMatch[DOC]] do
+ var documents = new HashSet[DOC]
+ for term, count in query do
+ if inversed_index.has_key(term) then
+ documents.add_all inversed_index[term]
+ end
+ end
+ var matches = new Array[IndexMatch[DOC]]
for doc in documents do
var sim = query.cosine_similarity(doc.tfidf)
if sim == 0.0 then continue
- matches.add new IndexMatch(doc, sim)
+ matches.add new IndexMatch[DOC](doc, sim)
end
sorter.sort(matches)
return matches
#
# When processing batch documents, use `auto_update = false` to disable
# the auto update of the index.
- fun index_document(doc: Document, auto_update: nullable Bool) do
+ fun index_document(doc: DOC, auto_update: nullable Bool) do
for term, count in doc.terms_count do
- if not terms_doc_count.has_key(term) then
- terms_doc_count[term] = 1.0
- else
- terms_doc_count[term] += 1.0
+ terms_doc_count.inc(term)
+ if not inversed_index.has_key(term) then
+ inversed_index[term] = new Array[DOC]
end
+ inversed_index[term].add doc
end
documents.add doc
if auto_update == null or auto_update then update_index
# Return the Document created.
#
# See `index_document`.
- fun index_string(title, uri, string: String, auto_update: nullable Bool): Document do
+ fun index_string(title, uri, string: String, auto_update: nullable Bool): DOC do
var vector = parse_string(string)
var doc = new Document(title, uri, vector)
index_document(doc, auto_update)
# Match the `query` string against all indexed documents
#
# See `match_vector`.
- fun match_string(query: String): Array[IndexMatch] do
+ fun match_string(query: String): Array[IndexMatch[DOC]] do
var vector = parse_string(query)
var doc = new Document("", "", vector)
return match_vector(doc.terms_frequency)
loop
var token = reader.read_word
if token == "" then break
-
- if not vector.has_key(token) then
- vector[token] = 1.0
- else
- vector[token] += 1.0
- end
+ vector.inc(token)
end
return vector
end
# Return the created document or null if `path` is not accepted by `accept_file`.
#
# See `index_document`.
- fun index_file(path: String, auto_update: nullable Bool): nullable Document do
+ fun index_file(path: String, auto_update: nullable Bool): nullable DOC do
if not accept_file(path) then return null
var vector = parse_file(path)
var doc = new Document(path, path, vector)
# A high weight in tf–idf is reached by a high term frequency
# (in the given document) and a low document frequency of the term in the
# whole collection of documents
- var tfidf = new Vector
+ var tfidf: Vector = terms_count is lazy
redef fun to_s do return "{title}"
end
# A match to a `request` in an `Index`
-class IndexMatch
+class IndexMatch[DOC: Document]
super Comparable
# Document matching the `request_vector`
- var document: Document
+ var document: DOC
# Similarity between the `request` and the `doc`.
#
class IndexMatchSorter
super DefaultComparator
- redef type COMPARED: IndexMatch
+ redef type COMPARED: IndexMatch[Document]
redef fun compare(a, b) do
return b.similarity <=> a.similarity
--- /dev/null
+# This file is part of NIT ( http://www.nitlanguage.org ).
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# An index that contains Nit code
+#
+# Model entities are indexed by their ANode.
+#
+# Vectorization is based on model usage such as:
+# * modules importation
+# * classes spcialization and refinement
+# * methods calls and refinements
+#
+# Example:
+# ~~~nitish
+# # Create the index
+# var index = new CodeIndex(toolcontext)
+# for mentity in mentities do
+# index.index_mentity(mentity)
+# end
+#
+# # Match a piece of code
+# var matches = index.match_code("print \"Hello, World!\"")
+# for match in matches do
+# print match
+# end
+# ~~~
+module code_index
+
+import vsm
+import semantize
+import parser_util
+
+# Index for Nit doc
+class CodeIndex
+ super VSMIndex
+
+ redef type DOC: CodeDocument
+
+ # ToolContext used to parse pieces of code
+ var toolcontext: ToolContext
+
+ # Index `mentity`
+ fun index_mentity(mentity: MEntity) do
+ var terms = vectorize_mentity(mentity)
+ var doc = new CodeDocument(mentity, terms)
+ index_document(doc, false)
+ end
+
+ # Match `code` with the index
+ fun match_code(code: String): Array[IndexMatch[DOC]] do
+ var node = parse_code(code)
+ if node == null then return new Array[IndexMatch[DOC]]
+ return match_node(node)
+ end
+
+ # Match `node` with the index
+ fun match_node(node: ANode): Array[IndexMatch[DOC]] do
+ var vector = vectorize_node(node)
+ return match_vector(vector)
+ end
+
+ # Parse a piece of code
+ private fun parse_code(code: String): nullable AModule do
+ # Try to parse code
+ var node = toolcontext.parse_something(code)
+ if not node isa AModule then return null
+
+ # Load code into model
+ var mbuilder = toolcontext.modelbuilder
+ mbuilder.load_rt_module(null, node, "tmp")
+ mbuilder.run_phases
+ return node
+ end
+
+ # Transform `node` in a Vector
+ private fun vectorize_node(node: ANode): Vector do
+ var visitor = new CodeIndexVisitor
+ visitor.enter_visit(node)
+ return visitor.vector
+ end
+
+ # Transform `mentity` in a Vector
+ private fun vectorize_mentity(mentity: MEntity): Vector do
+ var node = toolcontext.modelbuilder.mentity2node(mentity)
+ if node == null then return new Vector
+ return vectorize_node(node)
+ end
+end
+
+# A specific document for mentities code
+class CodeDocument
+ super Document
+ autoinit(mentity, terms_count)
+
+ # MEntity related to this document
+ var mentity: MEntity
+
+ redef var title = mentity.full_name is lazy
+
+ redef var uri = mentity.location.to_s is lazy
+end
+
+# Code index visitor
+#
+# Used to build a VSM Vector from a Nit ANode.
+private class CodeIndexVisitor
+ super Visitor
+
+ var vector = new Vector
+
+ redef fun visit(node) do
+ node.accept_code_index_visitor(self)
+ end
+end
+
+redef class ANode
+ private fun accept_code_index_visitor(v: CodeIndexVisitor) do
+ visit_all(v)
+ end
+end
+
+redef class AStdImport
+ redef fun accept_code_index_visitor(v) do
+ var mmodule = self.mmodule
+ if mmodule != null then
+ v.vector.inc "import#{mmodule.full_name}"
+ end
+ end
+end
+
+redef class AStdClassdef
+ redef fun accept_code_index_visitor(v) do
+ var mclassdef = self.mclassdef
+ if mclassdef != null then
+ if not mclassdef.is_intro then
+ v.vector.inc "redef#{mclassdef.full_name}"
+ v.vector.inc "redef#{mclassdef.mclass.full_name}"
+ end
+ end
+ visit_all(v)
+ end
+end
+
+redef class ASuperPropdef
+ redef fun accept_code_index_visitor(v) do
+ var mtype = self.n_type.mtype
+ if mtype isa MClassType then
+ v.vector.inc "super#{mtype.mclass.intro.full_name}"
+ v.vector.inc "super#{mtype.mclass.full_name}"
+ end
+ end
+end
+
+redef class APropdef
+ redef fun accept_code_index_visitor(v) do
+ var mpropdef = self.mpropdef
+ if mpropdef != null then
+ if not mpropdef.is_intro then
+ v.vector.inc "redef#{mpropdef.mproperty.intro.full_name}"
+ v.vector.inc "redef#{mpropdef.mproperty.full_name}"
+ end
+ end
+ visit_all(v)
+ end
+end
+
+redef class ASendExpr
+ redef fun accept_code_index_visitor(v) do
+ var callsite = self.callsite
+ if callsite != null then
+ var args = callsite.signaturemap.as(not null).map.length
+ v.vector.inc "call#{callsite.mpropdef.full_name}({args})"
+ v.vector.inc "call#{callsite.mpropdef.mproperty.full_name}({args})"
+ v.vector.inc "call#{callsite.mpropdef.mclassdef.full_name}({args})"
+ v.vector.inc "call#{callsite.mpropdef.mclassdef.mclass.full_name}({args})"
+ end
+ visit_all(v)
+ end
+end
+
+redef class ANewExpr
+ redef fun accept_code_index_visitor(v) do
+ var callsite = self.callsite
+ if callsite != null then
+ var args = callsite.signaturemap.as(not null).map.length
+ v.vector.inc "call#{callsite.mpropdef.full_name}({args})"
+ v.vector.inc "call#{callsite.mpropdef.mproperty.full_name}({args})"
+ v.vector.inc "new#{callsite.mpropdef.mclassdef.full_name}({args})"
+ v.vector.inc "new#{callsite.mpropdef.mclassdef.mclass.full_name}({args})"
+ end
+ visit_all(v)
+ end
+end
--- /dev/null
+# This file is part of NIT ( http://www.nitlanguage.org ).
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+module test_code_index is test
+
+import code_index
+import frontend
+
+class TestCodeIndex
+ test
+
+ # CodeIndex used in tests
+ var test_index: CodeIndex is noinit
+
+ # Initialize test variables
+ #
+ # Must be called before test execution.
+ # FIXME should be before_all
+ fun build_test_env is before do
+ var test_path = "NIT_TESTING_PATH".environ.dirname
+ var test_src = test_path / "../../../tests/test_prog"
+
+ # build model
+ var toolcontext = new ToolContext
+ var model = new Model
+ var modelbuilder = new ModelBuilder(model, toolcontext)
+ var mmodules = modelbuilder.parse_full([test_src])
+ modelbuilder.run_phases
+ toolcontext.run_global_phases(mmodules)
+
+ # create index
+ var index = new CodeIndex(toolcontext)
+ for mmodule in mmodules do
+ index.index_mentity(mmodule)
+ end
+ test_index = index
+ modelbuilder.paths.add test_src
+ end
+
+ fun test_find1 is test do
+ var query = "import game\n"
+ var matches = test_index.match_code(query)
+ assert matches.first.document.mentity.full_name == "test_prog::test_prog"
+ end
+
+ fun test_find2 is test do
+ var query = "import game\nimport rpg\n"
+ var matches = test_index.match_code(query)
+ assert matches.first.document.mentity.full_name == "test_prog::game"
+ end
+
+ fun test_find3 is test do
+ var query = "import game\nclass MyGame\nsuper Game\nredef fun start_game do end\nend\n"
+ var matches = test_index.match_code(query)
+ assert matches.first.document.mentity.full_name == "test_prog::game_examples"
+ end
+
+ fun test_find_error is test do
+ var query = "error"
+ var matches = test_index.match_code(query)
+ assert matches.is_empty
+ end
+end