A Document represent a text input given to the NLP processor.

Once processed, it contains a list of sentences that contain tokens.

Introduced properties

init from_xml(xml: XMLDocument)

nlp :: NLPDocument :: from_xml

Init self from an xml element.
init from_xml_file(path: String)

nlp :: NLPDocument :: from_xml_file

Init self from a XML file.
fun sentences: Array[NLPSentence]

nlp :: NLPDocument :: sentences

NLPSentences contained in self
protected fun sentences=(sentences: Array[NLPSentence])

nlp :: NLPDocument :: sentences=

NLPSentences contained in self

Redefined properties

redef type SELF: NLPDocument

nlp $ NLPDocument :: SELF

Type of this instance, automatically specialized in every class

All properties

fun !=(other: nullable Object): Bool

core :: Object :: !=

Have self and other different values?
fun ==(other: nullable Object): Bool

core :: Object :: ==

Have self and other the same value?
type CLASS: Class[SELF]

core :: Object :: CLASS

The type of the class of self.
type SELF: Object

core :: Object :: SELF

Type of this instance, automatically specialized in every class
protected fun class_factory(name: String): CLASS

core :: Object :: class_factory

Implementation used by get_class to create the specific class.
fun class_name: String

core :: Object :: class_name

The class name of the object.
init from_xml(xml: XMLDocument)

nlp :: NLPDocument :: from_xml

Init self from an xml element.
init from_xml_file(path: String)

nlp :: NLPDocument :: from_xml_file

Init self from a XML file.
fun get_class: CLASS

core :: Object :: get_class

The meta-object representing the dynamic type of self.
fun hash: Int

core :: Object :: hash

The hash code of the object.
init init

core :: Object :: init

fun inspect: String

core :: Object :: inspect

Developer readable representation of self.
protected fun inspect_head: String

core :: Object :: inspect_head

Return "CLASSNAME:#OBJECTID".
intern fun is_same_instance(other: nullable Object): Bool

core :: Object :: is_same_instance

Return true if self and other are the same instance (i.e. same identity).
fun is_same_serialized(other: nullable Object): Bool

core :: Object :: is_same_serialized

Is self the same as other in a serialization context?
intern fun is_same_type(other: Object): Bool

core :: Object :: is_same_type

Return true if self and other have the same dynamic type.
intern fun object_id: Int

core :: Object :: object_id

An internal hash code for the object based on its identity.
fun output

core :: Object :: output

Display self on stdout (debug only).
intern fun output_class_name

core :: Object :: output_class_name

Display class name on stdout (debug only).
fun sentences: Array[NLPSentence]

nlp :: NLPDocument :: sentences

NLPSentences contained in self
protected fun sentences=(sentences: Array[NLPSentence])

nlp :: NLPDocument :: sentences=

NLPSentences contained in self
fun serialization_hash: Int

core :: Object :: serialization_hash

Hash value use for serialization
intern fun sys: Sys

core :: Object :: sys

Return the global sys object, the only instance of the Sys class.
abstract fun to_jvalue(env: JniEnv): JValue

core :: Object :: to_jvalue

fun to_s: String

core :: Object :: to_s

User readable representation of self.
package_diagram nlp::NLPDocument NLPDocument core::Object Object nlp::NLPDocument->core::Object

Parents

interface Object

core :: Object

The root of the class hierarchy.

Class definitions

nlp $ NLPDocument
# A `Document` represent a text input given to the NLP processor.
#
# Once processed, it contains a list of sentences that contain tokens.
class NLPDocument

	#  NLPSentences contained in `self`
	var sentences = new Array[NLPSentence]

	# Init `self` from an xml element.
	#
	# ~~~
	# var xml = """
	# <root>
	#   <document>
	#     <sentences>
	#       <sentence id="1">
	#         <tokens>
	#           <token id="1">
	#             <word>Stanford</word>
	#             <lemma>Stanford</lemma>
	#             <CharacterOffsetBegin>0</CharacterOffsetBegin>
	#             <CharacterOffsetEnd>8</CharacterOffsetEnd>
	#             <POS>NNP</POS>
	#           </token>
	#           <token id="2">
	#             <word>University</word>
	#             <lemma>University</lemma>
	#             <CharacterOffsetBegin>9</CharacterOffsetBegin>
	#             <CharacterOffsetEnd>19</CharacterOffsetEnd>
	#             <POS>NNP</POS>
	#           </token>
	#         </tokens>
	#       </sentence>
	#       <sentence id="2">
	#         <tokens>
	#           <token id="1">
	#             <word>UQAM</word>
	#             <lemma>UQAM</lemma>
	#             <CharacterOffsetBegin>0</CharacterOffsetBegin>
	#             <CharacterOffsetEnd>4</CharacterOffsetEnd>
	#             <POS>NNP</POS>
	#           </token>
	#           <token id="2">
	#             <word>University</word>
	#             <lemma>University</lemma>
	#             <CharacterOffsetBegin>5</CharacterOffsetBegin>
	#             <CharacterOffsetEnd>15</CharacterOffsetEnd>
	#             <POS>NNP</POS>
	#           </token>
	#         </tokens>
	#       </sentence>
	#     </sentences>
	#   </document>
	# </root>""".to_xml.as(XMLDocument)
	#
	# var document = new NLPDocument.from_xml(xml)
	# assert document.sentences.length == 2
	# assert document.sentences.first.tokens.first.word == "Stanford"
	# assert document.sentences.last.tokens.first.word == "UQAM"
	# ~~~
	init from_xml(xml: XMLDocument) do
		for obj in xml["root"].first["document"].first["sentences"].first["sentence"] do
			if obj isa XMLStartTag then
				sentences.add new NLPSentence.from_xml(obj)
			else
				print "Warning: malformed xml, `sentences` is supposed to contain `sencence` tags"
			end
		end
	end

	# Init `self` from a XML file.
	init from_xml_file(path: String) do
		var file = new FileReader.open(path)
		var xml = file.read_lines
		file.close
		xml.shift # remove xml doctype
		xml.shift # remove xslt link
		from_xml(xml.join("\n").to_xml.as(XMLDocument))
	end
end
lib/nlp/stanford.nit:109,1--188,3