A FileIndex based using a NLPProcessor

Introduced properties

init defaultinit(nlp_processor: NLPProcessor)

nlp :: NLPFileIndex :: defaultinit

Redefined properties

redef type SELF: NLPFileIndex

nlp $ NLPFileIndex :: SELF

Type of this instance, automatically specialized in every class

All properties

fun !=(other: nullable Object): Bool

core :: Object :: !=

Have self and other different values?
fun ==(other: nullable Object): Bool

core :: Object :: ==

Have self and other the same value?
type CLASS: Class[SELF]

core :: Object :: CLASS

The type of the class of self.
type DOC: Document

vsm :: VSMIndex :: DOC

Kind of documents stored in this index
type SELF: Object

core :: Object :: SELF

Type of this instance, automatically specialized in every class
fun accept_file(path: String): Bool

vsm :: FileIndex :: accept_file

Is path accepted depending on whitelist_exts and blacklist_exts?
fun accept_token(token: NLPToken): Bool

nlp :: NLPIndex :: accept_token

Is token accepted by this index?
fun blacklist_exts: Array[String]

vsm :: FileIndex :: blacklist_exts

File extensions black list
fun blacklist_exts=(blacklist_exts: Array[String])

vsm :: FileIndex :: blacklist_exts=

File extensions black list
fun blacklist_pos: Array[String]

nlp :: NLPIndex :: blacklist_pos

Part-Of-Speech blacklist
fun blacklist_pos=(blacklist_pos: Array[String])

nlp :: NLPIndex :: blacklist_pos=

Part-Of-Speech blacklist
protected fun class_factory(name: String): CLASS

core :: Object :: class_factory

Implementation used by get_class to create the specific class.
fun class_name: String

core :: Object :: class_name

The class name of the object.
init defaultinit(nlp_processor: NLPProcessor)

nlp :: NLPIndex :: defaultinit

init defaultinit(nlp_processor: NLPProcessor)

nlp :: NLPFileIndex :: defaultinit

fun documents: HashSet[DOC]

vsm :: VSMIndex :: documents

Documents index
protected fun documents=(documents: HashSet[DOC])

vsm :: VSMIndex :: documents=

Documents index
fun get_class: CLASS

core :: Object :: get_class

The meta-object representing the dynamic type of self.
fun hash: Int

core :: Object :: hash

The hash code of the object.
fun index_dir(dir: String, auto_update: nullable Bool)

vsm :: FileIndex :: index_dir

Index all files in dir recursively
fun index_document(doc: DOC, auto_update: nullable Bool)

vsm :: VSMIndex :: index_document

Index a document
fun index_file(path: String, auto_update: nullable Bool): nullable DOC

vsm :: FileIndex :: index_file

Index a file from its path.
fun index_files(paths: Collection[String], auto_update: nullable Bool)

vsm :: FileIndex :: index_files

Index multiple files
fun index_string(title: String, uri: String, string: String, auto_update: nullable Bool): DOC

vsm :: StringIndex :: index_string

Index a new Document from title, uri and string string.
init init

core :: Object :: init

fun inspect: String

core :: Object :: inspect

Developer readable representation of self.
protected fun inspect_head: String

core :: Object :: inspect_head

Return "CLASSNAME:#OBJECTID".
fun inverse_doc_frequency: Vector

vsm :: VSMIndex :: inverse_doc_frequency

Inverse document frequency
protected fun inverse_doc_frequency=(inverse_doc_frequency: Vector)

vsm :: VSMIndex :: inverse_doc_frequency=

Inverse document frequency
fun inversed_index: HashMap[nullable Object, Array[DOC]]

vsm :: VSMIndex :: inversed_index

Inversed index
protected fun inversed_index=(inversed_index: HashMap[nullable Object, Array[DOC]])

vsm :: VSMIndex :: inversed_index=

Inversed index
intern fun is_same_instance(other: nullable Object): Bool

core :: Object :: is_same_instance

Return true if self and other are the same instance (i.e. same identity).
fun is_same_serialized(other: nullable Object): Bool

core :: Object :: is_same_serialized

Is self the same as other in a serialization context?
intern fun is_same_type(other: Object): Bool

core :: Object :: is_same_type

Return true if self and other have the same dynamic type.
fun match_string(query: String): Array[IndexMatch[DOC]]

vsm :: StringIndex :: match_string

Match the query string against all indexed documents
fun match_vector(query: Vector): Array[IndexMatch[DOC]]

vsm :: VSMIndex :: match_vector

Match query vector to all index document vectors
fun nlp_processor: NLPProcessor

nlp :: NLPIndex :: nlp_processor

NLP Processor used to tokenize, lemmatize and POS tag documents
protected fun nlp_processor=(nlp_processor: NLPProcessor)

nlp :: NLPIndex :: nlp_processor=

NLP Processor used to tokenize, lemmatize and POS tag documents
intern fun object_id: Int

core :: Object :: object_id

An internal hash code for the object based on its identity.
fun output

core :: Object :: output

Display self on stdout (debug only).
intern fun output_class_name

core :: Object :: output_class_name

Display class name on stdout (debug only).
fun parse_file(file: String): Vector

vsm :: FileIndex :: parse_file

Parse the file content as a Vector
fun parse_string(string: String): Vector

vsm :: StringIndex :: parse_string

Parse the string as a Vector
fun serialization_hash: Int

core :: Object :: serialization_hash

Hash value use for serialization
fun sorter: IndexMatchSorter

vsm :: VSMIndex :: sorter

Used to sort matches
protected fun sorter=(sorter: IndexMatchSorter)

vsm :: VSMIndex :: sorter=

Used to sort matches
fun stoplist: Array[String]

nlp :: NLPIndex :: stoplist

List of lemmas that must not be indexed
fun stoplist=(stoplist: Array[String])

nlp :: NLPIndex :: stoplist=

List of lemmas that must not be indexed
intern fun sys: Sys

core :: Object :: sys

Return the global sys object, the only instance of the Sys class.
fun terms_doc_count: Vector

vsm :: VSMIndex :: terms_doc_count

Count for all terms in all indexed documents
protected fun terms_doc_count=(terms_doc_count: Vector)

vsm :: VSMIndex :: terms_doc_count=

Count for all terms in all indexed documents
abstract fun to_jvalue(env: JniEnv): JValue

core :: Object :: to_jvalue

fun to_s: String

core :: Object :: to_s

User readable representation of self.
fun update_index

vsm :: VSMIndex :: update_index

Update the index
fun whitelist_exts: Array[String]

vsm :: FileIndex :: whitelist_exts

File extensions white list
fun whitelist_exts=(whitelist_exts: Array[String])

vsm :: FileIndex :: whitelist_exts=

File extensions white list
fun whitelist_pos: Array[String]

nlp :: NLPIndex :: whitelist_pos

Part-Of-Speech whitelist
fun whitelist_pos=(whitelist_pos: Array[String])

nlp :: NLPIndex :: whitelist_pos=

Part-Of-Speech whitelist
package_diagram nlp::NLPFileIndex NLPFileIndex nlp::NLPIndex NLPIndex nlp::NLPFileIndex->nlp::NLPIndex vsm::FileIndex FileIndex nlp::NLPFileIndex->vsm::FileIndex vsm::StringIndex StringIndex nlp::NLPIndex->vsm::StringIndex vsm::FileIndex->vsm::StringIndex ...vsm::StringIndex ... ...vsm::StringIndex->vsm::StringIndex

Ancestors

interface Object

core :: Object

The root of the class hierarchy.
class StringIndex

vsm :: StringIndex

A VSM index to store strings
class VSMIndex

vsm :: VSMIndex

A Document index based on VSM

Parents

class FileIndex

vsm :: FileIndex

A VSMIndex to index files
class NLPIndex

nlp :: NLPIndex

A StringIndex using a NLPProcessor to parse and vectorize strings

Class definitions

nlp $ NLPFileIndex
# A FileIndex based using a NLPProcessor
class NLPFileIndex
	super NLPIndex
	super FileIndex
end
lib/nlp/nlp.nit:73,1--77,3