A VSM index to store strings

Introduced properties

fun index_string(title: String, uri: String, string: String, auto_update: nullable Bool): DOC

vsm :: StringIndex :: index_string

Index a new Document from title, uri and string string.
fun match_string(query: String): Array[IndexMatch[DOC]]

vsm :: StringIndex :: match_string

Match the query string against all indexed documents
fun parse_string(string: String): Vector

vsm :: StringIndex :: parse_string

Parse the string as a Vector

Redefined properties

redef type SELF: StringIndex

vsm $ StringIndex :: SELF

Type of this instance, automatically specialized in every class

All properties

fun !=(other: nullable Object): Bool

core :: Object :: !=

Have self and other different values?
fun ==(other: nullable Object): Bool

core :: Object :: ==

Have self and other the same value?
type CLASS: Class[SELF]

core :: Object :: CLASS

The type of the class of self.
type DOC: Document

vsm :: VSMIndex :: DOC

Kind of documents stored in this index
type SELF: Object

core :: Object :: SELF

Type of this instance, automatically specialized in every class
protected fun class_factory(name: String): CLASS

core :: Object :: class_factory

Implementation used by get_class to create the specific class.
fun class_name: String

core :: Object :: class_name

The class name of the object.
fun documents: HashSet[DOC]

vsm :: VSMIndex :: documents

Documents index
protected fun documents=(documents: HashSet[DOC])

vsm :: VSMIndex :: documents=

Documents index
fun get_class: CLASS

core :: Object :: get_class

The meta-object representing the dynamic type of self.
fun hash: Int

core :: Object :: hash

The hash code of the object.
fun index_document(doc: DOC, auto_update: nullable Bool)

vsm :: VSMIndex :: index_document

Index a document
fun index_string(title: String, uri: String, string: String, auto_update: nullable Bool): DOC

vsm :: StringIndex :: index_string

Index a new Document from title, uri and string string.
init init

core :: Object :: init

fun inspect: String

core :: Object :: inspect

Developer readable representation of self.
protected fun inspect_head: String

core :: Object :: inspect_head

Return "CLASSNAME:#OBJECTID".
fun inverse_doc_frequency: Vector

vsm :: VSMIndex :: inverse_doc_frequency

Inverse document frequency
protected fun inverse_doc_frequency=(inverse_doc_frequency: Vector)

vsm :: VSMIndex :: inverse_doc_frequency=

Inverse document frequency
fun inversed_index: HashMap[nullable Object, Array[DOC]]

vsm :: VSMIndex :: inversed_index

Inversed index
protected fun inversed_index=(inversed_index: HashMap[nullable Object, Array[DOC]])

vsm :: VSMIndex :: inversed_index=

Inversed index
intern fun is_same_instance(other: nullable Object): Bool

core :: Object :: is_same_instance

Return true if self and other are the same instance (i.e. same identity).
fun is_same_serialized(other: nullable Object): Bool

core :: Object :: is_same_serialized

Is self the same as other in a serialization context?
intern fun is_same_type(other: Object): Bool

core :: Object :: is_same_type

Return true if self and other have the same dynamic type.
fun match_string(query: String): Array[IndexMatch[DOC]]

vsm :: StringIndex :: match_string

Match the query string against all indexed documents
fun match_vector(query: Vector): Array[IndexMatch[DOC]]

vsm :: VSMIndex :: match_vector

Match query vector to all index document vectors
intern fun object_id: Int

core :: Object :: object_id

An internal hash code for the object based on its identity.
fun output

core :: Object :: output

Display self on stdout (debug only).
intern fun output_class_name

core :: Object :: output_class_name

Display class name on stdout (debug only).
fun parse_string(string: String): Vector

vsm :: StringIndex :: parse_string

Parse the string as a Vector
fun serialization_hash: Int

core :: Object :: serialization_hash

Hash value use for serialization
fun sorter: IndexMatchSorter

vsm :: VSMIndex :: sorter

Used to sort matches
protected fun sorter=(sorter: IndexMatchSorter)

vsm :: VSMIndex :: sorter=

Used to sort matches
intern fun sys: Sys

core :: Object :: sys

Return the global sys object, the only instance of the Sys class.
fun terms_doc_count: Vector

vsm :: VSMIndex :: terms_doc_count

Count for all terms in all indexed documents
protected fun terms_doc_count=(terms_doc_count: Vector)

vsm :: VSMIndex :: terms_doc_count=

Count for all terms in all indexed documents
abstract fun to_jvalue(env: JniEnv): JValue

core :: Object :: to_jvalue

fun to_s: String

core :: Object :: to_s

User readable representation of self.
fun update_index

vsm :: VSMIndex :: update_index

Update the index
package_diagram vsm::StringIndex StringIndex vsm::VSMIndex VSMIndex vsm::StringIndex->vsm::VSMIndex core::Object Object vsm::VSMIndex->core::Object ...core::Object ... ...core::Object->core::Object nlp::NLPIndex NLPIndex nlp::NLPIndex->vsm::StringIndex vsm::FileIndex FileIndex vsm::FileIndex->vsm::StringIndex nlp::NLPFileIndex NLPFileIndex nlp::NLPFileIndex->nlp::NLPIndex nlp::NLPFileIndex->vsm::FileIndex nlp::NLPFileIndex... ... nlp::NLPFileIndex...->nlp::NLPFileIndex

Ancestors

interface Object

core :: Object

The root of the class hierarchy.

Parents

class VSMIndex

vsm :: VSMIndex

A Document index based on VSM

Children

class FileIndex

vsm :: FileIndex

A VSMIndex to index files
class NLPIndex

nlp :: NLPIndex

A StringIndex using a NLPProcessor to parse and vectorize strings

Descendants

class NLPFileIndex

nlp :: NLPFileIndex

A FileIndex based using a NLPProcessor

Class definitions

vsm $ StringIndex
# A VSM index to store strings
class StringIndex
	super VSMIndex

	# Index a new Document from `title`, `uri` and string `string`.
	#
	# Return the Document created.
	#
	# See `index_document`.
	fun index_string(title, uri, string: String, auto_update: nullable Bool): DOC do
		var vector = parse_string(string)
		var doc = new Document(title, uri, vector)
		index_document(doc, auto_update)
		return doc
	end

	# Match the `query` string against all indexed documents
	#
	# See `match_vector`.
	fun match_string(query: String): Array[IndexMatch[DOC]] do
		var vector = parse_string(query)
		var doc = new Document("", "", vector)
		return match_vector(doc.terms_frequency)
	end

	# Parse the `string` as a Vector
	#
	# Returns a vector containing the terms of `string`.
	fun parse_string(string: String): Vector do
		var reader = new StringReader(string)
		var vector = new Vector
		loop
			var token = reader.read_word
			if token == "" then break
			vector.inc(token)
		end
		return vector
	end
end
lib/vsm/vsm.nit:215,1--253,3