A VSMIndex to index files

Introduced properties

fun accept_file(path: String): Bool

vsm :: FileIndex :: accept_file

Is path accepted depending on whitelist_exts and blacklist_exts?
fun blacklist_exts: Array[String]

vsm :: FileIndex :: blacklist_exts

File extensions black list
fun blacklist_exts=(blacklist_exts: Array[String])

vsm :: FileIndex :: blacklist_exts=

File extensions black list
fun index_dir(dir: String, auto_update: nullable Bool)

vsm :: FileIndex :: index_dir

Index all files in dir recursively
fun index_file(path: String, auto_update: nullable Bool): nullable DOC

vsm :: FileIndex :: index_file

Index a file from its path.
fun index_files(paths: Collection[String], auto_update: nullable Bool)

vsm :: FileIndex :: index_files

Index multiple files
fun parse_file(file: String): Vector

vsm :: FileIndex :: parse_file

Parse the file content as a Vector
fun whitelist_exts: Array[String]

vsm :: FileIndex :: whitelist_exts

File extensions white list
fun whitelist_exts=(whitelist_exts: Array[String])

vsm :: FileIndex :: whitelist_exts=

File extensions white list

Redefined properties

redef type SELF: FileIndex

vsm $ FileIndex :: SELF

Type of this instance, automatically specialized in every class

All properties

fun !=(other: nullable Object): Bool

core :: Object :: !=

Have self and other different values?
fun ==(other: nullable Object): Bool

core :: Object :: ==

Have self and other the same value?
type CLASS: Class[SELF]

core :: Object :: CLASS

The type of the class of self.
type DOC: Document

vsm :: VSMIndex :: DOC

Kind of documents stored in this index
type SELF: Object

core :: Object :: SELF

Type of this instance, automatically specialized in every class
fun accept_file(path: String): Bool

vsm :: FileIndex :: accept_file

Is path accepted depending on whitelist_exts and blacklist_exts?
fun blacklist_exts: Array[String]

vsm :: FileIndex :: blacklist_exts

File extensions black list
fun blacklist_exts=(blacklist_exts: Array[String])

vsm :: FileIndex :: blacklist_exts=

File extensions black list
protected fun class_factory(name: String): CLASS

core :: Object :: class_factory

Implementation used by get_class to create the specific class.
fun class_name: String

core :: Object :: class_name

The class name of the object.
fun documents: HashSet[DOC]

vsm :: VSMIndex :: documents

Documents index
protected fun documents=(documents: HashSet[DOC])

vsm :: VSMIndex :: documents=

Documents index
fun get_class: CLASS

core :: Object :: get_class

The meta-object representing the dynamic type of self.
fun hash: Int

core :: Object :: hash

The hash code of the object.
fun index_dir(dir: String, auto_update: nullable Bool)

vsm :: FileIndex :: index_dir

Index all files in dir recursively
fun index_document(doc: DOC, auto_update: nullable Bool)

vsm :: VSMIndex :: index_document

Index a document
fun index_file(path: String, auto_update: nullable Bool): nullable DOC

vsm :: FileIndex :: index_file

Index a file from its path.
fun index_files(paths: Collection[String], auto_update: nullable Bool)

vsm :: FileIndex :: index_files

Index multiple files
fun index_string(title: String, uri: String, string: String, auto_update: nullable Bool): DOC

vsm :: StringIndex :: index_string

Index a new Document from title, uri and string string.
init init

core :: Object :: init

fun inspect: String

core :: Object :: inspect

Developer readable representation of self.
protected fun inspect_head: String

core :: Object :: inspect_head

Return "CLASSNAME:#OBJECTID".
fun inverse_doc_frequency: Vector

vsm :: VSMIndex :: inverse_doc_frequency

Inverse document frequency
protected fun inverse_doc_frequency=(inverse_doc_frequency: Vector)

vsm :: VSMIndex :: inverse_doc_frequency=

Inverse document frequency
fun inversed_index: HashMap[nullable Object, Array[DOC]]

vsm :: VSMIndex :: inversed_index

Inversed index
protected fun inversed_index=(inversed_index: HashMap[nullable Object, Array[DOC]])

vsm :: VSMIndex :: inversed_index=

Inversed index
intern fun is_same_instance(other: nullable Object): Bool

core :: Object :: is_same_instance

Return true if self and other are the same instance (i.e. same identity).
fun is_same_serialized(other: nullable Object): Bool

core :: Object :: is_same_serialized

Is self the same as other in a serialization context?
intern fun is_same_type(other: Object): Bool

core :: Object :: is_same_type

Return true if self and other have the same dynamic type.
fun match_string(query: String): Array[IndexMatch[DOC]]

vsm :: StringIndex :: match_string

Match the query string against all indexed documents
fun match_vector(query: Vector): Array[IndexMatch[DOC]]

vsm :: VSMIndex :: match_vector

Match query vector to all index document vectors
intern fun object_id: Int

core :: Object :: object_id

An internal hash code for the object based on its identity.
fun output

core :: Object :: output

Display self on stdout (debug only).
intern fun output_class_name

core :: Object :: output_class_name

Display class name on stdout (debug only).
fun parse_file(file: String): Vector

vsm :: FileIndex :: parse_file

Parse the file content as a Vector
fun parse_string(string: String): Vector

vsm :: StringIndex :: parse_string

Parse the string as a Vector
fun serialization_hash: Int

core :: Object :: serialization_hash

Hash value use for serialization
fun sorter: IndexMatchSorter

vsm :: VSMIndex :: sorter

Used to sort matches
protected fun sorter=(sorter: IndexMatchSorter)

vsm :: VSMIndex :: sorter=

Used to sort matches
intern fun sys: Sys

core :: Object :: sys

Return the global sys object, the only instance of the Sys class.
fun terms_doc_count: Vector

vsm :: VSMIndex :: terms_doc_count

Count for all terms in all indexed documents
protected fun terms_doc_count=(terms_doc_count: Vector)

vsm :: VSMIndex :: terms_doc_count=

Count for all terms in all indexed documents
abstract fun to_jvalue(env: JniEnv): JValue

core :: Object :: to_jvalue

fun to_s: String

core :: Object :: to_s

User readable representation of self.
fun update_index

vsm :: VSMIndex :: update_index

Update the index
fun whitelist_exts: Array[String]

vsm :: FileIndex :: whitelist_exts

File extensions white list
fun whitelist_exts=(whitelist_exts: Array[String])

vsm :: FileIndex :: whitelist_exts=

File extensions white list
package_diagram vsm::FileIndex FileIndex vsm::StringIndex StringIndex vsm::FileIndex->vsm::StringIndex vsm::VSMIndex VSMIndex vsm::StringIndex->vsm::VSMIndex ...vsm::VSMIndex ... ...vsm::VSMIndex->vsm::VSMIndex nlp::NLPFileIndex NLPFileIndex nlp::NLPFileIndex->vsm::FileIndex

Ancestors

interface Object

core :: Object

The root of the class hierarchy.
class VSMIndex

vsm :: VSMIndex

A Document index based on VSM

Parents

class StringIndex

vsm :: StringIndex

A VSM index to store strings

Children

class NLPFileIndex

nlp :: NLPFileIndex

A FileIndex based using a NLPProcessor

Class definitions

vsm $ FileIndex
# A VSMIndex to index files
class FileIndex
	super StringIndex

	# Index a file from its `path`.
	#
	# Return the created document or null if `path` is not accepted by `accept_file`.
	#
	# See `index_document`.
	fun index_file(path: String, auto_update: nullable Bool): nullable DOC do
		if not accept_file(path) then return null
		var vector = parse_file(path)
		var doc = new Document(path, path, vector)
		index_document(doc, auto_update)
		return doc
	end

	# Index multiple files
	#
	# The recursive method `index_dir` will be called for each directory found
	# in `paths`.
	#
	# See `index_file`
	fun index_files(paths: Collection[String], auto_update: nullable Bool) do
		for path in paths do
			if path.to_path.is_dir then
				index_dir(path, false)
			else
				index_file(path, false)
			end
		end
		if auto_update != null and auto_update then update_index
	end

	# Index all files in `dir` recursively
	#
	# See `index_file`.
	fun index_dir(dir: String, auto_update: nullable Bool) do
		if not dir.to_path.is_dir then return
		for file in dir.files do
			var path = dir / file
			if path.to_path.is_dir then
				index_dir(path, false)
			else
				index_file(path, false)
			end
		end
		if auto_update != null and auto_update then update_index
	end

	# Is `path` accepted depending on `whitelist_exts` and `blacklist_exts`?
	fun accept_file(path: String): Bool do
		var ext = path.file_extension
		if ext != null then
			ext = ext.to_lower
			if blacklist_exts.has(ext) then return false
			if whitelist_exts.not_empty and not whitelist_exts.has(ext) then return false
		end
		return whitelist_exts.is_empty
	end

	# Parse the `file` content as a Vector
	#
	# See `parse_string`.
	fun parse_file(file: String): Vector do
		return parse_string(file.to_path.read_all)
	end

	# File extensions white list
	#
	# If not empty, only files with these extensions will be indexed.
	#
	# If an extension is in both `whitelist_exts` and `blacklist_exts`, the
	# blacklist will prevail and the file will be ignored.
	var whitelist_exts = new Array[String] is writable

	# File extensions black list
	#
	# Files with these extensions will not be indexed.
	var blacklist_exts = new Array[String] is writable
end
lib/vsm/vsm.nit:255,1--335,3