vsm :: Document :: defaultinit
# A Document to add in a VSMIndex
class Document
# Document title
var title: String
# Document URI
var uri: String
# Count of all terms found in the document
#
# Used to compute the document `terms_frequency`.
var terms_count: Vector
# Frequency of each term found in the document
#
# Used to match the document against the `VSMIndex::inverse_doc_frequency`.
var terms_frequency: Vector is lazy do
var all_terms = 0.0
for t, c in terms_count do all_terms += c
var vector = new Vector
for t, c in terms_count do
vector[t] = c / all_terms
end
return vector
end
# Term frequency–Inverse document frequency for each term
#
# A high weight in tf–idf is reached by a high term frequency
# (in the given document) and a low document frequency of the term in the
# whole collection of documents
var tfidf: Vector = terms_count is lazy
redef fun to_s do return "{title}"
end
lib/vsm/vsm.nit:337,1--373,3